From b8750385cc7a56d73ef874a0be51e66ad16dcef7 Mon Sep 17 00:00:00 2001
From: Christina Dionysio <dionysio@tu-berlin.de>
Date: Tue, 16 Sep 2025 14:08:41 +0200
Subject: [PATCH 01/22] adapt hp tuner to unimodal optimizations

---
 .../scuro/drsearch/hyperparameter_tuner.py    | 333 +++++++++++++++++-
 .../python/systemds/scuro/utils/identifier.py |  34 ++
 2 files changed, 362 insertions(+), 5 deletions(-)
 create mode 100644 src/main/python/systemds/scuro/utils/identifier.py

diff --git a/src/main/python/systemds/scuro/drsearch/hyperparameter_tuner.py b/src/main/python/systemds/scuro/drsearch/hyperparameter_tuner.py
index 04a3fa4701a..9662734514d 100644
--- a/src/main/python/systemds/scuro/drsearch/hyperparameter_tuner.py
+++ b/src/main/python/systemds/scuro/drsearch/hyperparameter_tuner.py
@@ -19,19 +19,324 @@
 #
 # -------------------------------------------------------------
 import itertools
+import concurrent.futures
+from typing import Dict, List, Callable, Tuple, Any, Optional
+import numpy as np
+from sklearn.model_selection import ParameterGrid
+import json
+import logging
+from dataclasses import dataclass
+from pathlib import Path
 import time
 
-import numpy as np
+from systemds.scuro.modality.modality import Modality
+from systemds.scuro.drsearch.task import Task
+from systemds.scuro.representations.representation import Representation
+from systemds.scuro.representations.window_aggregation import Window
+
+# Set up logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
 
 from systemds.scuro.drsearch.optimization_data import OptimizationResult
 from systemds.scuro.representations.context import Context
 
+@dataclass
+class HyperparamResult:
+    """Store hyperparameter tuning results"""
+    representation_name: str
+    best_params: Dict[str, Any]
+    best_score: float
+    all_results: List[Tuple[Dict[str, Any], float]]
+    tuning_time: float
+    modality_id: int
 
 class HyperparameterTuner:
-    def __init__(self, task, n_trials=10, early_stopping_patience=5):
-        self.task = task
-        self.n_trials = n_trials
-        self.early_stopping_patience = early_stopping_patience
+    
+    def __init__(self, modalities, tasks, optimization_results, k: int = 2, n_jobs: int = -1, scoring_metric: str = 'accuracy',
+                 maximize_metric: bool = True, save_results: bool = True):
+        self.tasks = tasks
+        self.optimization_results = optimization_results
+        self.n_jobs = n_jobs
+        self.scoring_metric = scoring_metric
+        self.maximize_metric = maximize_metric
+        self.save_results = save_results
+        self.results = {}
+        self.k = k
+        self.modalities = modalities
+        self.k_best_cache = None
+        self.k_best_modalities = None
+        self.extract_k_best_modalities_per_task()
+    
+    
+    def get_modality_by_id(self, modality_id: int) -> Modality:
+        for mod in self.modalities:
+            if mod.modality_id == modality_id:
+                return mod
+        
+    def extract_k_best_modalities_per_task(self):
+        self.k_best_modalities = {}
+        self.k_best_cache = {}
+        for task in self.tasks:
+            self.k_best_modalities[task.model.name] = []
+            self.k_best_cache[task.model.name] = []
+            for modality in self.modalities:
+                k_best_results, cached_data = (
+                    self.optimization_results.get_k_best_results(
+                        modality, self.k, task
+                    )
+                )
+
+                self.k_best_modalities[task.model.name].extend(k_best_results)
+                self.k_best_cache[task.model.name].extend(cached_data)
+    
+    def evaluate_single_config(self, reps: List[Representation],
+                               params: Dict[str, Any], modality_id: int, task: Task, param_idx: List[int]) -> Tuple[Dict[str, Any], float]:
+        """
+        Evaluate a single hyperparameter configuration
+        """
+        # try:
+        rep_name = ''
+        modality = self.get_modality_by_id(modality_id)
+        start = 0
+        for i, rep in enumerate(reps):
+            rep_name += rep().name
+            len_params = len(rep().parameters)
+            if isinstance(rep(), Window):
+                modality = modality.context(rep(*np.array(list(params.values()))[param_idx[start:start+len_params]]))
+            else:
+                modality = modality.apply_representation(rep(*np.array(list(params.values()))[param_idx[start:start+len_params]]))
+            start += len_params
+    
+        score = task.run(modality.data)[1]
+        logger.debug(f"{rep_name} with params {params}: score = {score}")
+        return params, score
+        # except Exception as e:
+        #         logger.error(f"Error evaluating {rep_name} with params {params}: {e}")
+        #         return params, float('-inf') if self.maximize_metric else float('inf')
+        #
+    def tune_representation(self, reps: List,
+                            hyperparams: List[Dict[str, List]], modality_id: int, task: Task,
+                            max_evals: Optional[int] = None) -> HyperparamResult:
+        """
+        Tune hyperparameters for a single representation
+
+        Args:
+            rep_name: Name of the representation
+            rep_func: Function that takes (task_data, **hyperparams) and returns score
+            hyperparams: Dictionary with parameter names as keys and lists of values as values
+            task_data: Data to pass to the representation function
+            max_evals: Maximum number of evaluations (None for full grid search)
+        """
+        start_time = time.time()
+        rep_name = ''.join([rep().name for rep in reps])
+        logger.info(f"Starting hyperparameter tuning for")
+        
+        # Generate parameter grid
+        hp = merge_multiple_dicts_with_increments(list(hyperparams))
+        param_grid = list(ParameterGrid(hp))
+        idx_params = []
+        for h in hp.keys():
+            for i, p in enumerate(param_grid[0].keys()):
+                if h == p:
+                    idx_params.append(i)
+                    break
+        
+        
+        # Limit evaluations if specified
+        if max_evals and len(param_grid) > max_evals:
+            # Random sampling if too many combinations
+            np.random.shuffle(param_grid)
+            param_grid = param_grid[:max_evals]
+        
+        logger.info(f"Evaluating {len(param_grid)} parameter combinations for")
+        
+        # Parallel evaluation
+        all_results = []
+        if self.n_jobs <= 1:
+            # Sequential execution
+            for params in param_grid:
+                result = self.evaluate_single_config(reps, params, modality_id, task, idx_params)
+                all_results.append(result)
+        else:
+            # Parallel execution
+            with concurrent.futures.ProcessPoolExecutor(max_workers=self.n_jobs) as executor:
+                futures = [executor.submit(self.evaluate_single_config, reps, params, modality_id, task, idx_params)
+                           for params in param_grid]
+                
+                for future in concurrent.futures.as_completed(futures):
+                    try:
+                        result = future.result()
+                        all_results.append(result)
+                    except Exception as e:
+                        logger.error(f"Error in parallel execution: {e}")
+        
+        # Find best parameters
+        if self.maximize_metric:
+            best_params, best_score = max(all_results, key=lambda x: x[1])
+        else:
+            best_params, best_score = min(all_results, key=lambda x: x[1])
+        
+        tuning_time = time.time() - start_time
+        logger.info(f"Best params for {rep_name}: {best_params}, score: {best_score:.4f}, time: {tuning_time:.2f}s")
+        
+        return HyperparamResult(
+            representation_name=rep_name,
+            best_params=best_params,
+            best_score=best_score,
+            all_results=all_results,
+            tuning_time=tuning_time,
+            modality_id=modality_id,
+        )
+    
+    def tune_unimodal_representations(self, max_eval_per_rep: Optional[int] = None):
+        results = {}
+        for task in self.tasks:
+            results[task.model.name] = []
+            for representation in self.k_best_cache[task.model.name]:
+                hyperparams = []
+                reps = []
+                for transformation in representation.transformation:
+                    params = transformation.parameters
+                    rep = transformation.__class__
+                    hyperparams.append(params)
+                    reps.append(rep)
+                result = self.tune_representation(
+                    reps, hyperparams, representation.modality_id, task, max_eval_per_rep
+                )
+                results[task.model.name].append(result)
+            
+        self.results = results
+        
+        if self.save_results:
+            self.save_tuning_results()
+        
+        return results
+    
+    
+    def tune_multimodal_representations(self, optimization_results, task: Task, k: int = 1, optimize_unimodal: bool = True, max_eval_per_rep: Optional[int] = None):
+        best_optimization_results = optimization_results[:k]
+        
+        for result in best_optimization_results:
+            fusion_node_ids = []
+            used_modalities = result.architecture.encoder_choices
+            cached_representations = []
+            modality_ids = []
+            hyperparams = []
+            reps = []
+            for i, fusion_node in enumerate(result.architecture.fusion_nodes):
+                if len(fusion_node.parameters) > 0:
+                    fusion_node_ids.append(i)
+                
+            if len(fusion_node_ids) == 0 and not optimize_unimodal:
+                logger.warning("No fusion nodes with hyperparameters and unimodal optimization disabled. Skipping.")
+                continue
+            
+            for modality in used_modalities:
+                mod_id = modality.modality_id
+                instance_id = modality.modality_instance_id
+                cached_representation = self.get_cached_representation(int(mod_id), int(instance_id), task)
+                cached_representations.append(cached_representation)
+                
+                if optimize_unimodal:
+                    modality_ids.append(int(mod_id))
+                    
+                    for transformation in cached_representation.transformation:
+                        params = transformation.parameters
+                        rep = transformation.__class__
+                        hyperparams.append(params)
+                        reps.append(rep)
+                    
+                
+                
+                
+            
+            
+                
+    def get_cached_representation(self, modality_id: int, instance_id: int, task: Task):
+        counter = -1
+        for cached_representation in self.k_best_cache[task.model.name]:
+            if cached_representation.modality_id == modality_id:
+                counter +=1
+                if counter == instance_id:
+                    return cached_representation
+    
+    def tune_multiple_representations(self, representations: Dict[str, Dict],
+                                      task_data: Any, max_evals_per_rep: Optional[int] = None) -> Dict[
+        str, HyperparamResult]:
+        """
+        Tune hyperparameters for multiple representations
+
+        Args:
+            representations: Dict with structure:
+                {
+                    'rep_name': {
+                        'function': callable,
+                        'hyperparams': dict of param_name -> [values]
+                    }
+                }
+            task_data: Data to pass to representation functions
+            max_evals_per_rep: Maximum evaluations per representation
+        """
+        results = {}
+        
+        for rep_name, rep_config in representations.items():
+            rep_func = rep_config['function']
+            hyperparams = rep_config['hyperparams']
+            
+            result = self.tune_representation(
+                rep_name, rep_func, hyperparams, task_data, max_evals_per_rep
+            )
+            results[rep_name] = result
+        
+        self.results = results
+        
+        if self.save_results:
+            self.save_tuning_results()
+        
+        return results
+    
+    def get_best_representations(self, k: int = None) -> List[Tuple[str, HyperparamResult]]:
+        """
+        Get the k best representations based on their best scores
+        """
+        if not self.results:
+            logger.warning("No tuning results available")
+            return []
+        
+        sorted_results = sorted(
+            self.results.items(),
+            key=lambda x: x[1].best_score,
+            reverse=self.maximize_metric
+        )
+        
+        if k is None:
+            return sorted_results
+        
+        return sorted_results[:k]
+    
+    def save_tuning_results(self, filepath: str = None):
+        """Save tuning results to JSON file"""
+        if not filepath:
+            filepath = f"hyperparameter_results_{int(time.time())}.json"
+        
+        # Convert results to JSON-serializable format
+        json_results = {}
+        for task in self.results.keys():
+            for result in self.results[task]:
+                json_results[result.representation_name] = {
+                    'best_params': result.best_params,
+                    'best_score': result.best_score,
+                    'tuning_time': result.tuning_time,
+                    'num_evaluations': len(result.all_results)
+                }
+            
+        
+        with open(filepath, 'w') as f:
+            json.dump(json_results, f, indent=2)
+        
+        logger.info(f"Results saved to {filepath}")
+        
 
     def tune_operator_chain(self, modality, operator_chain):
         best_result = None
@@ -104,3 +409,21 @@ def _generate_search_space(self, param_grids):
         ]
 
         return parameter_grid
+
+
+def merge_multiple_dicts_with_increments(dicts):
+    result = dicts[0].copy() if dicts else {}
+    
+    for dict_to_merge in dicts[1:]:
+        for key, value in dict_to_merge.items():
+            if key in result:
+                counter = 1
+                new_key = f"{key}{counter}"
+                while new_key in result:
+                    counter += 1
+                    new_key = f"{key}{counter}"
+                result[new_key] = value
+            else:
+                result[key] = value
+    
+    return result
\ No newline at end of file
diff --git a/src/main/python/systemds/scuro/utils/identifier.py b/src/main/python/systemds/scuro/utils/identifier.py
new file mode 100644
index 00000000000..ca352db211e
--- /dev/null
+++ b/src/main/python/systemds/scuro/utils/identifier.py
@@ -0,0 +1,34 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+class Identifier:
+    """ """
+
+    _instance = None
+    id = -1
+
+    def __new__(cls):
+        if not cls._instance:
+            cls._instance = super().__new__(cls)
+        return cls._instance
+
+    def new_id(self):  # TODO: make threadsafe when parallelizing
+        self.id += 1
+        return self.id

From 362d02083e0badd637ca961cd7ef98eb8f0e0224 Mon Sep 17 00:00:00 2001
From: Christina Dionysio <dionysio@tu-berlin.de>
Date: Wed, 17 Sep 2025 07:57:05 +0200
Subject: [PATCH 02/22] adapt hp tuner to unimodal optimizations

---
 .../scuro/drsearch/hyperparameter_tuner.py    | 36 +++++++++++++++----
 1 file changed, 29 insertions(+), 7 deletions(-)

diff --git a/src/main/python/systemds/scuro/drsearch/hyperparameter_tuner.py b/src/main/python/systemds/scuro/drsearch/hyperparameter_tuner.py
index 9662734514d..779facd0e9d 100644
--- a/src/main/python/systemds/scuro/drsearch/hyperparameter_tuner.py
+++ b/src/main/python/systemds/scuro/drsearch/hyperparameter_tuner.py
@@ -29,10 +29,12 @@
 from pathlib import Path
 import time
 
+from systemds.scuro.drsearch.operator_registry import Registry
 from systemds.scuro.modality.modality import Modality
 from systemds.scuro.drsearch.task import Task
 from systemds.scuro.representations.representation import Representation
 from systemds.scuro.representations.window_aggregation import Window
+from systemds.scuro.representations.fusion import Fusion
 
 # Set up logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
@@ -91,20 +93,31 @@ def extract_k_best_modalities_per_task(self):
                 self.k_best_cache[task.model.name].extend(cached_data)
     
     def evaluate_single_config(self, reps: List[Representation],
-                               params: Dict[str, Any], modality_id: int, task: Task, param_idx: List[int]) -> Tuple[Dict[str, Any], float]:
+                               params: Dict[str, Any], modality_ids: List[int], task: Task, param_idx: List[int]) -> Tuple[Dict[str, Any], float]:
         """
         Evaluate a single hyperparameter configuration
         """
         # try:
         rep_name = ''
-        modality = self.get_modality_by_id(modality_id)
+        modality_counter = 0
+        modality = None
+        modality_is_initialized = False
         start = 0
+        # if isinstance(rep, Fusion):
+        #     modality = left.combine(right, fusion_method)
         for i, rep in enumerate(reps):
             rep_name += rep().name
-            len_params = len(rep().parameters)
+            len_params = len(rep().parameters) if rep().parameters is not None else 0
             if isinstance(rep(), Window):
                 modality = modality.context(rep(*np.array(list(params.values()))[param_idx[start:start+len_params]]))
+            elif isinstance(rep(), Fusion):
+                modality = modality.combine(rep(*np.array(list(params.values()))[param_idx[start:start+len_params]]))
+                modality_is_initialized = False
             else:
+                if not modality_is_initialized:
+                    modality = self.get_modality_by_id(modality_ids[modality_counter])
+                    modality_is_initialized = True
+                    modality_counter += 1
                 modality = modality.apply_representation(rep(*np.array(list(params.values()))[param_idx[start:start+len_params]]))
             start += len_params
     
@@ -114,9 +127,11 @@ def evaluate_single_config(self, reps: List[Representation],
         # except Exception as e:
         #         logger.error(f"Error evaluating {rep_name} with params {params}: {e}")
         #         return params, float('-inf') if self.maximize_metric else float('inf')
-        #
+    
+    
+    
     def tune_representation(self, reps: List,
-                            hyperparams: List[Dict[str, List]], modality_id: int, task: Task,
+                            hyperparams: List[Dict[str, List]], modality_id: List[int], task: Task,
                             max_evals: Optional[int] = None) -> HyperparamResult:
         """
         Tune hyperparameters for a single representation
@@ -224,6 +239,7 @@ def tune_multimodal_representations(self, optimization_results, task: Task, k: i
             modality_ids = []
             hyperparams = []
             reps = []
+            
             for i, fusion_node in enumerate(result.architecture.fusion_nodes):
                 if len(fusion_node.parameters) > 0:
                     fusion_node_ids.append(i)
@@ -232,7 +248,7 @@ def tune_multimodal_representations(self, optimization_results, task: Task, k: i
                 logger.warning("No fusion nodes with hyperparameters and unimodal optimization disabled. Skipping.")
                 continue
             
-            for modality in used_modalities:
+            for i, modality in enumerate(used_modalities):
                 mod_id = modality.modality_id
                 instance_id = modality.modality_instance_id
                 cached_representation = self.get_cached_representation(int(mod_id), int(instance_id), task)
@@ -246,7 +262,13 @@ def tune_multimodal_representations(self, optimization_results, task: Task, k: i
                         rep = transformation.__class__
                         hyperparams.append(params)
                         reps.append(rep)
-                    
+                        
+                if len(used_modalities) > i + 1:
+                    reps.append(Registry().get_fusion_operator_by_name(result.architecture.fusion_nodes[i].operation))
+                    hyperparams.append(result.architecture.fusion_nodes[i].parameters)
+                        
+            self.tune_representation(reps, hyperparams, modality_ids, task, max_eval_per_rep)
+            
                 
                 
                 

From c2fe75e849d982e64e8a9320e060ab1864b0eff7 Mon Sep 17 00:00:00 2001
From: Christina Dionysio <dionysio@tu-berlin.de>
Date: Wed, 17 Sep 2025 07:59:28 +0200
Subject: [PATCH 03/22] fix unimodal optimizer

---
 .../scuro/drsearch/operator_registry.py       |  6 +++
 .../scuro/drsearch/unimodal_optimizer.py      | 52 +++++++++++++++++--
 2 files changed, 54 insertions(+), 4 deletions(-)

diff --git a/src/main/python/systemds/scuro/drsearch/operator_registry.py b/src/main/python/systemds/scuro/drsearch/operator_registry.py
index 699dcad8571..4339028f448 100644
--- a/src/main/python/systemds/scuro/drsearch/operator_registry.py
+++ b/src/main/python/systemds/scuro/drsearch/operator_registry.py
@@ -76,6 +76,12 @@ def get_context_operators(self):
 
     def get_fusion_operators(self):
         return self._fusion_operators
+    
+    def get_fusion_operator_by_name(self, fusion_name):
+        for fusion in self._fusion_operators:
+            if fusion.__name__ == fusion_name:
+                return fusion
+        return None
 
     def get_representation_by_name(self, representation_name, modality_type):
         for representation in self._context_operators:
diff --git a/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py b/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py
index 86c7ce1e63a..d7b9a78964c 100644
--- a/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py
+++ b/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py
@@ -28,7 +28,8 @@
 from typing import Union
 
 import numpy as np
-from systemds.scuro.representations.window_aggregation import WindowAggregation
+import wandb
+from systemds.scuro.representations.window_aggregation import Window
 from systemds.scuro.representations.concatenation import Concatenation
 from systemds.scuro.representations.hadamard import Hadamard
 from systemds.scuro.representations.sum import Sum
@@ -48,9 +49,18 @@ class UnimodalOptimizer:
     def __init__(self, modalities, tasks, debug=True):
         self.modalities = modalities
         self.tasks = tasks
+        self.run = None
+        if debug:
+            wandb.login()
+            config = {
+                "representation_type": "unimodal",  # or "unimodal"
+            }
+            self.run = wandb.init(project="multimodal-search", config=config)
+            
+        self.debug = debug
 
         self.operator_registry = Registry()
-        self.operator_performance = UnimodalResults(modalities, tasks, debug)
+        self.operator_performance = UnimodalResults(modalities, tasks, debug, self.run)
 
         self._tasks_require_same_dims = True
         self.expected_dimensions = tasks[0].expected_dim
@@ -91,6 +101,9 @@ def optimize_parallel(self, n_workers=None):
     def optimize(self):
         for modality in self.modalities:
             local_result = self._process_modality(modality, False)
+        
+        if self.debug:
+            wandb.finish()
 
     def _process_modality(self, modality, parallel):
         if parallel:
@@ -134,6 +147,8 @@ def _combine_non_self_contained_representations(
         other_representations,
         local_results,
     ):
+        other_representations = copy.deepcopy(other_representations)
+        other_representations.remove(representation.transformation[0].__class__)
         combined = representation
         context_operators = self.operator_registry.get_context_operators()
         used_representations = representation.transformation
@@ -242,12 +257,13 @@ def _evaluate_local(
 
 
 class UnimodalResults:
-    def __init__(self, modalities, tasks, debug=False):
+    def __init__(self, modalities, tasks, debug=False, run=None):
         self.modality_ids = [modality.modality_id for modality in modalities]
         self.task_names = [task.model.name for task in tasks]
         self.results = {}
         self.debug = debug
         self.cache = {}
+        self.run = run
 
         for modality in self.modality_ids:
             self.results[modality] = {}
@@ -261,6 +277,7 @@ def add_result(
     ):
         parameters = []
         representation_names = []
+        
 
         for rep in representations:
             representation_names.append(type(rep).__name__)
@@ -272,7 +289,7 @@ def add_result(
             for param in list(rep.parameters.keys()):
                 params[param] = getattr(rep, param)
 
-            if isinstance(rep, WindowAggregation):
+            if isinstance(rep, Window):
                 params["aggregation_function"] = (
                     rep.aggregation_function.aggregation_function_name
                 )
@@ -288,6 +305,33 @@ def add_result(
             task_time=task_time,
             combination=combination.name if combination else "",
         )
+        
+        if self.debug:
+            # config = {
+            #     "representation_type": "unimodal",  # or "unimodal"
+            #     "used_modalities": [modality.modality_type.name],
+            #     "representations": representation_names,
+            #     "representation_time": modality.transform_time,
+            #     "fusion_method": combination.name if combination else "",
+            #     "hyperparameters": parameters,
+            #     "task_name": task_name,
+            # }
+            
+            metrics = asdict(entry)
+            table = wandb.Table(columns=["representations"])
+            for m in representation_names:
+                table.add_data(m)
+            metrics["representations"] = table
+            # table = wandb.Table(columns=["parameters"])
+            # for m in parameters:
+            #     table.add_data(m)
+            metrics.pop("params")
+            
+            metrics["used_modalities"] = modality.modality_id
+            metrics["task"] = self.task_names.index(task_name)
+            # Log metric for the multimodal combination
+            self.run.log(metrics)
+            
         self.results[modality.modality_id][task_name].append(entry)
         self.cache[modality.modality_id][task_name][
             (tuple(representation_names), scores[1], modality.transform_time)

From 034e8c5c40254ce2fff77cd9db96944c9164a5ec Mon Sep 17 00:00:00 2001
From: Christina Dionysio <dionysio@tu-berlin.de>
Date: Wed, 17 Sep 2025 20:32:34 +0200
Subject: [PATCH 04/22] adapt unimodal optimizer to dag structure

---
 .../scuro/drsearch/unimodal_optimizer.py      | 263 ++++++++++--------
 .../scuro/modality/unimodal_modality.py       |   4 +-
 .../systemds/scuro/representations/bow.py     |   4 +-
 .../systemds/scuro/representations/lstm.py    |   6 +-
 .../scuro/representations/mel_spectrogram.py  |   6 +-
 .../systemds/scuro/representations/mfcc.py    |   8 +-
 .../scuro/representations/representation.py   |   3 +-
 .../systemds/scuro/representations/resnet.py  |   3 +-
 .../scuro/representations/spectrogram.py      |   4 +-
 .../systemds/scuro/representations/tfidf.py   |   2 +-
 .../representations/window_aggregation.py     |  12 +-
 .../scuro/representations/word2vec.py         |  15 +-
 12 files changed, 173 insertions(+), 157 deletions(-)

diff --git a/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py b/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py
index d7b9a78964c..0d7ec8e8350 100644
--- a/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py
+++ b/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py
@@ -10,7 +10,7 @@
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
-# Unless required by applicable law or agreed to in writing,
+# Unless required by applicable law or agreed in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
@@ -23,26 +23,27 @@
 import copy
 from concurrent.futures import ProcessPoolExecutor, as_completed
 from dataclasses import dataclass, field, asdict
-
 import multiprocessing as mp
-from typing import Union
+from typing import Union, Dict, List, Any
 
 import numpy as np
 import wandb
+from systemds.scuro.representations.unimodal import UnimodalRepresentation
 from systemds.scuro.representations.window_aggregation import Window
+from systemds.scuro.representations.context import Context
+from systemds.scuro.representations.fusion import Fusion
 from systemds.scuro.representations.concatenation import Concatenation
 from systemds.scuro.representations.hadamard import Hadamard
 from systemds.scuro.representations.sum import Sum
-
-from systemds.scuro.representations.aggregated_representation import (
-    AggregatedRepresentation,
-)
+from systemds.scuro.representations.aggregated_representation import AggregatedRepresentation
 from systemds.scuro.modality.type import ModalityType
 from systemds.scuro.modality.modality import Modality
 from systemds.scuro.modality.transformed import TransformedModality
 from systemds.scuro.representations.aggregate import Aggregation
 from systemds.scuro.drsearch.operator_registry import Registry
 from systemds.scuro.utils.schema_helpers import get_shape
+from systemds.scuro.drsearch.unimodal_dag import UnimodalDAGBuilder, UnimodalDAG
+from systemds.scuro.drsearch.unimodal_visualizer import visualize_dag
 
 
 class UnimodalOptimizer:
@@ -50,6 +51,9 @@ def __init__(self, modalities, tasks, debug=True):
         self.modalities = modalities
         self.tasks = tasks
         self.run = None
+        
+        self.builders = {modality.modality_id: UnimodalDAGBuilder() for modality in modalities}
+
         if debug:
             wandb.login()
             config = {
@@ -99,6 +103,7 @@ def optimize_parallel(self, n_workers=None):
                 #     print(f'Modality {modality.modality_id} generated an exception: {exc}')
 
     def optimize(self):
+        """Optimize representations for each modality"""
         for modality in self.modalities:
             local_result = self._process_modality(modality, False)
         
@@ -106,71 +111,82 @@ def optimize(self):
             wandb.finish()
 
     def _process_modality(self, modality, parallel):
+        """Process a single modality using the DAG-based approach"""
         if parallel:
-            local_results = UnimodalResults(
-                modalities=[modality], tasks=self.tasks, debug=False
-            )
+            local_results = UnimodalResults([modality], self.tasks, debug=False)
         else:
             local_results = self.operator_performance
 
-        context_operators = self.operator_registry.get_context_operators()
-        not_self_contained_reps = (
-            self.operator_registry.get_not_self_contained_representations(
-                modality.modality_type
-            )
-        )
-        modality_specific_operators = self.operator_registry.get_representations(
-            modality.modality_type
-        )
-        for modality_specific_operator in modality_specific_operators:
-            mod_op = modality_specific_operator()
-
-            mod = modality.apply_representation(mod_op)
-            self._evaluate_local(mod, [mod_op], local_results)
-
-            if not mod_op.self_contained:
-                self._combine_non_self_contained_representations(
-                    modality, mod, not_self_contained_reps, local_results
-                )
-
-            for context_operator_after in context_operators:
-                con_op_after = context_operator_after()
-                mod_con = mod.context(con_op_after)
-                self._evaluate_local(mod_con, [mod_op, con_op_after], local_results)
+        modality_specific_operators = self.operator_registry.get_representations(modality.modality_type)
+        
+        for operator in modality_specific_operators:
+            # Build DAG for this operator
+            dags = self._build_modality_dag(modality, operator())
+            
+            for dag in dags:
+                # Execute DAG and get all intermediate representations
+                representations = self._execute_dag(dag, modality)
+                node_id = list(representations.keys())[-1]
+                node = dag.get_node_by_id(node_id)
+                if node.operation is None:
+                    continue
+                reps = self._get_representation_chain(node, dag)
+                combination = next((op for op in reps if isinstance(op, Fusion)), None)
+                self._evaluate_local(representations[node_id], local_results, dag, combination)
+                if self.debug:
+                    visualize_dag(dag)
+
+        return local_results
+
+    def _execute_dag(self, dag: UnimodalDAG, modality: Modality) -> Dict[str, TransformedModality]:
+        cache = {}
+
+        def execute_node(node_id: str) -> TransformedModality:
+            if node_id in cache:
+                return cache[node_id]
+
+            node = dag.get_node_by_id(node_id)
+            
+            if not node.inputs:  # Leaf node
+                cache[node_id] = modality
+                return modality
+                
+            input_mods = [execute_node(input_id) for input_id in node.inputs]
+            
+            if len(input_mods) == 1:
+                if isinstance(node.operation(), UnimodalRepresentation):
+                    if isinstance(input_mods[0], TransformedModality) and input_mods[0].transformation[0].__class__ == node.operation:
+                        result = input_mods[0]
+                    else:
+                        result = input_mods[0].apply_representation(node.operation())
+                elif isinstance(node.operation(), Context):
+                    result = input_mods[0].context(node.operation())
+                elif isinstance(node.operation(), AggregatedRepresentation):
+                    result = node.operation().transform(input_mods[0])
+            else:
+                result = input_mods[0].combine(input_mods[1:], node.operation())
+                
+            cache[node_id] = result
+            return result
 
-            return local_results
 
-    def _combine_non_self_contained_representations(
-        self,
-        modality: Modality,
-        representation: TransformedModality,
-        other_representations,
-        local_results,
-    ):
-        other_representations = copy.deepcopy(other_representations)
-        other_representations.remove(representation.transformation[0].__class__)
-        combined = representation
-        context_operators = self.operator_registry.get_context_operators()
-        used_representations = representation.transformation
-        for other_representation in other_representations:
-            used_representations.append(other_representation())
-            for combination in [Concatenation(), Hadamard(), Sum()]:
-                combined = combined.combine(
-                    modality.apply_representation(other_representation()), combination
-                )
-                self._evaluate_local(
-                    combined, used_representations, local_results, combination
-                )
+        execute_node(dag.root_node_id)
+        
+        return cache
 
-                for context_op in context_operators:
-                    con_op = context_op()
-                    mod = combined.context(con_op)
-                    c_t = copy.deepcopy(used_representations)
-                    c_t.append(con_op)
-                    self._evaluate_local(mod, c_t, local_results, combination)
+    def _get_representation_chain(self, node: 'UnimodalNode', dag: UnimodalDAG) -> List[Any]:
+        representations = []
+        if node.operation:
+            representations.append(node.operation)
+            
+        for input_id in node.inputs:
+            input_node = dag.get_node_by_id(input_id)
+            if input_node.operation:
+                representations.extend(self._get_representation_chain(input_node, dag))
+                
+        return representations
 
     def _merge_results(self, local_results):
-        """Merge local results into the main results"""
         for modality_id in local_results.results:
             for task_name in local_results.results[modality_id]:
                 self.operator_performance.results[modality_id][task_name].extend(
@@ -183,28 +199,28 @@ def _merge_results(self, local_results):
                     self.operator_performance.cache[modality][task_name][key] = value
 
     def _evaluate_local(
-        self, modality, representations, local_results, combination=None
+        self, modality, local_results, dag, combination=None
     ):
         if self._tasks_require_same_dims:
             if self.expected_dimensions == 1 and get_shape(modality.metadata) > 1:
-                # for aggregation in Aggregation().get_aggregation_functions():
-                agg_operator = AggregatedRepresentation(Aggregation())
-                agg_modality = agg_operator.transform(modality)
-                reps = representations.copy()
-                reps.append(agg_operator)
-                # agg_modality.pad()
+                builder = self.builders[modality.modality_id]
+                agg_operator = AggregatedRepresentation()
+                rep_node_id = builder.create_operation_node(agg_operator.__class__, [dag.root_node_id], agg_operator.parameters)
+                dag = builder.build(rep_node_id)
+                representations = self._execute_dag(dag, modality)
+                node_id = list(representations.keys())[-1]
                 for task in self.tasks:
                     start = time.time()
-                    scores = task.run(agg_modality.data)
+                    scores = task.run(representations[node_id].data)
                     end = time.time()
 
                     local_results.add_result(
                         scores,
-                        reps,
                         modality,
                         task.model.name,
                         end - start,
                         combination,
+                        dag
                     )
             else:
                 modality.pad()
@@ -214,47 +230,86 @@ def _evaluate_local(
                     end = time.time()
                     local_results.add_result(
                         scores,
-                        representations,
                         modality,
                         task.model.name,
                         end - start,
                         combination,
+                        dag
                     )
         else:
             for task in self.tasks:
                 if task.expected_dim == 1 and get_shape(modality.metadata) > 1:
-                    # for aggregation in Aggregation().get_aggregation_functions():
+                    builder = self.builders[modality.modality_id]
                     agg_operator = AggregatedRepresentation(Aggregation())
-                    agg_modality = agg_operator.transform(modality)
+                    rep_node_id = builder.create_operation_node(operator.__class__, [dag.root_node_id],
+                                                                agg_operator.parameters)
+                    dag = builder.build(rep_node_id)
+                    representations = self._execute_dag(dag, modality)
+                    node_id = list(representations.keys())[-1]
 
-                    reps = representations.copy()
-                    reps.append(agg_operator)
-                    # modality.pad()
                     start = time.time()
-                    scores = task.run(agg_modality.data)
+                    scores = task.run(representations[node_id].data)
                     end = time.time()
                     local_results.add_result(
                         scores,
-                        reps,
                         modality,
                         task.model.name,
                         end - start,
                         combination,
+                        dag
                     )
                 else:
-                    # modality.pad()
                     start = time.time()
                     scores = task.run(modality.data)
                     end = time.time()
                     local_results.add_result(
                         scores,
-                        representations,
                         modality,
                         task.model.name,
                         end - start,
                         combination,
+                        dag
                     )
 
+    def _build_modality_dag(self, modality: Modality, operator: Any) -> UnimodalDAG:
+        dags = []
+        builder = self.builders[modality.modality_id]
+        leaf_id = builder.create_leaf_node(None, modality.modality_id)
+        
+        rep_node_id = builder.create_operation_node(operator.__class__, [leaf_id], operator.parameters)
+        current_node_id = rep_node_id
+        dags.append(builder.build(current_node_id))
+        
+        if not operator.self_contained:
+            not_self_contained_reps = self.operator_registry.get_not_self_contained_representations(modality.modality_type)
+            not_self_contained_reps = [rep for rep in not_self_contained_reps if rep != operator.__class__]
+            
+            for combination in [Concatenation(), Hadamard(), Sum()]:
+                for other_rep in not_self_contained_reps:
+                    # Create node for other representation
+                    other_rep_id = builder.create_operation_node(other_rep, [leaf_id])
+                    
+                    # Create combination nodes
+                    combine_id = builder.create_operation_node(
+                        combination.__class__,
+                        [current_node_id, other_rep_id],
+                        {"combination_type": combination.__class__.__name__}
+                    )
+                    dags.append(builder.build(combine_id))
+                    current_node_id = combine_id
+                
+                    
+        context_operators = self.operator_registry.get_context_operators()
+       
+        for context_op in context_operators:
+            context_node_id = builder.create_operation_node(
+                context_op,
+                [current_node_id],
+                context_op().parameters,
+            )
+            dags.append(builder.build(context_node_id))
+            
+        return dags
 
 class UnimodalResults:
     def __init__(self, modalities, tasks, debug=False, run=None):
@@ -273,58 +328,23 @@ def __init__(self, modalities, tasks, debug=False, run=None):
                 self.results[modality][task_name] = []
 
     def add_result(
-        self, scores, representations, modality, task_name, task_time, combination
+        self, scores, modality, task_name, task_time, combination, dag
     ):
-        parameters = []
-        representation_names = []
-        
-
-        for rep in representations:
-            representation_names.append(type(rep).__name__)
-            if isinstance(rep, AggregatedRepresentation):
-                parameters.append(rep.parameters)
-                continue
-
-            params = {}
-            for param in list(rep.parameters.keys()):
-                params[param] = getattr(rep, param)
-
-            if isinstance(rep, Window):
-                params["aggregation_function"] = (
-                    rep.aggregation_function.aggregation_function_name
-                )
-
-            parameters.append(params)
-
         entry = ResultEntry(
-            representations=representation_names,
-            params=parameters,
             train_score=scores[0],
             val_score=scores[1],
             representation_time=modality.transform_time,
             task_time=task_time,
             combination=combination.name if combination else "",
+            dag=dag
         )
         
         if self.debug:
-            # config = {
-            #     "representation_type": "unimodal",  # or "unimodal"
-            #     "used_modalities": [modality.modality_type.name],
-            #     "representations": representation_names,
-            #     "representation_time": modality.transform_time,
-            #     "fusion_method": combination.name if combination else "",
-            #     "hyperparameters": parameters,
-            #     "task_name": task_name,
-            # }
-            
             metrics = asdict(entry)
             table = wandb.Table(columns=["representations"])
             for m in representation_names:
                 table.add_data(m)
             metrics["representations"] = table
-            # table = wandb.Table(columns=["parameters"])
-            # for m in parameters:
-            #     table.add_data(m)
             metrics.pop("params")
             
             metrics["used_modalities"] = modality.modality_id
@@ -334,7 +354,7 @@ def add_result(
             
         self.results[modality.modality_id][task_name].append(entry)
         self.cache[modality.modality_id][task_name][
-            (tuple(representation_names), scores[1], modality.transform_time)
+            (tuple([rep.operation for rep in dag.nodes]), scores[1], modality.transform_time)
         ] = modality
 
         if self.debug:
@@ -372,9 +392,8 @@ def get_k_best_results(self, modality, k, task):
 @dataclass(frozen=True)
 class ResultEntry:
     val_score: float
-    representations: list
-    params: list
     train_score: float
     representation_time: float
     task_time: float
     combination: str
+    dag: UnimodalDAG
diff --git a/src/main/python/systemds/scuro/modality/unimodal_modality.py b/src/main/python/systemds/scuro/modality/unimodal_modality.py
index dd1674ea85a..48ae3520a79 100644
--- a/src/main/python/systemds/scuro/modality/unimodal_modality.py
+++ b/src/main/python/systemds/scuro/modality/unimodal_modality.py
@@ -27,7 +27,7 @@
 from systemds.scuro.modality.modality import Modality
 from systemds.scuro.modality.joined import JoinedModality
 from systemds.scuro.modality.transformed import TransformedModality
-from systemds.scuro.modality.modality_identifier import ModalityIdentifier
+from systemds.scuro.utils.identifier import Identifier
 
 
 class UnimodalModality(Modality):
@@ -40,7 +40,7 @@ def __init__(self, data_loader: BaseLoader):
         """
         super().__init__(
             data_loader.modality_type,
-            ModalityIdentifier().new_id(),
+            Identifier().new_id(),
             {},
             data_loader.data_type,
         )
diff --git a/src/main/python/systemds/scuro/representations/bow.py b/src/main/python/systemds/scuro/representations/bow.py
index 7cfddbb506f..2b338d30ee6 100644
--- a/src/main/python/systemds/scuro/representations/bow.py
+++ b/src/main/python/systemds/scuro/representations/bow.py
@@ -34,8 +34,8 @@ class BoW(UnimodalRepresentation):
     def __init__(self, ngram_range=2, min_df=2, output_file=None):
         parameters = {"ngram_range": [ngram_range], "min_df": [min_df]}
         super().__init__("BoW", ModalityType.EMBEDDING, parameters)
-        self.ngram_range = ngram_range
-        self.min_df = min_df
+        self.ngram_range = int(ngram_range)
+        self.min_df = int(min_df)
         self.output_file = output_file
 
     def transform(self, modality):
diff --git a/src/main/python/systemds/scuro/representations/lstm.py b/src/main/python/systemds/scuro/representations/lstm.py
index 0cfafddefa9..af5fd56e9fa 100644
--- a/src/main/python/systemds/scuro/representations/lstm.py
+++ b/src/main/python/systemds/scuro/representations/lstm.py
@@ -43,9 +43,9 @@ def __init__(self, width=128, depth=1, dropout_rate=0.1):
         Combines modalities using an LSTM
         """
         super().__init__("LSTM")
-        self.depth = depth
-        self.width = width
-        self.dropout_rate = dropout_rate
+        self.depth = int(depth)
+        self.width = int(width)
+        self.dropout_rate = float(dropout_rate)
         self.unimodal_embeddings = {}
         seed = 42
 
diff --git a/src/main/python/systemds/scuro/representations/mel_spectrogram.py b/src/main/python/systemds/scuro/representations/mel_spectrogram.py
index dca1b0eec85..6ea90619013 100644
--- a/src/main/python/systemds/scuro/representations/mel_spectrogram.py
+++ b/src/main/python/systemds/scuro/representations/mel_spectrogram.py
@@ -37,9 +37,9 @@ def __init__(self, n_mels=128, hop_length=512, n_fft=2048):
             "n_fft": [1024, 2048, 4096],
         }
         super().__init__("MelSpectrogram", ModalityType.TIMESERIES, parameters, False)
-        self.n_mels = n_mels
-        self.hop_length = hop_length
-        self.n_fft = n_fft
+        self.n_mels = int(n_mels)
+        self.hop_length = int(hop_length)
+        self.n_fft = int(n_fft)
 
     def transform(self, modality):
         transformed_modality = TransformedModality(
diff --git a/src/main/python/systemds/scuro/representations/mfcc.py b/src/main/python/systemds/scuro/representations/mfcc.py
index c942f3076e7..4d9989add98 100644
--- a/src/main/python/systemds/scuro/representations/mfcc.py
+++ b/src/main/python/systemds/scuro/representations/mfcc.py
@@ -38,10 +38,10 @@ def __init__(self, n_mfcc=12, dct_type=2, n_mels=128, hop_length=512):
             "n_mels": [20, 32, 64, 128],
         }  # TODO
         super().__init__("MFCC", ModalityType.TIMESERIES, parameters, False)
-        self.n_mfcc = n_mfcc
-        self.dct_type = dct_type
-        self.n_mels = n_mels
-        self.hop_length = hop_length
+        self.n_mfcc = int(n_mfcc)
+        self.dct_type = int(dct_type)
+        self.n_mels = int(n_mels)
+        self.hop_length = int(hop_length)
 
     def transform(self, modality):
         transformed_modality = TransformedModality(
diff --git a/src/main/python/systemds/scuro/representations/representation.py b/src/main/python/systemds/scuro/representations/representation.py
index 144b88f34c0..dac3bb2b983 100644
--- a/src/main/python/systemds/scuro/representations/representation.py
+++ b/src/main/python/systemds/scuro/representations/representation.py
@@ -18,7 +18,7 @@
 # under the License.
 #
 # -------------------------------------------------------------
-from abc import abstractmethod
+from systemds.scuro.utils.identifier import Identifier
 
 
 class Representation:
@@ -26,6 +26,7 @@ def __init__(self, name, parameters):
         self.name = name
         self._parameters = parameters
         self.self_contained = True
+        self.representation_id = Identifier().new_id()
 
     @property
     def parameters(self):
diff --git a/src/main/python/systemds/scuro/representations/resnet.py b/src/main/python/systemds/scuro/representations/resnet.py
index f961cb4588a..711d1f39a60 100644
--- a/src/main/python/systemds/scuro/representations/resnet.py
+++ b/src/main/python/systemds/scuro/representations/resnet.py
@@ -36,7 +36,7 @@
     [ModalityType.IMAGE, ModalityType.VIDEO, ModalityType.TIMESERIES]
 )
 class ResNet(UnimodalRepresentation):
-    def __init__(self, layer="avgpool", model_name="ResNet18", output_file=None):
+    def __init__(self, model_name="ResNet18", layer="avgpool", output_file=None):
         self.data_type = torch.bfloat16
         self.model_name = model_name
         parameters = self._get_parameters()
@@ -95,7 +95,6 @@ def model_name(self, model_name):
                 .to(get_device())
                 .to(self.data_type)
             )
-
         else:
             raise NotImplementedError
 
diff --git a/src/main/python/systemds/scuro/representations/spectrogram.py b/src/main/python/systemds/scuro/representations/spectrogram.py
index 51b69d7d87c..f71fe80bb74 100644
--- a/src/main/python/systemds/scuro/representations/spectrogram.py
+++ b/src/main/python/systemds/scuro/representations/spectrogram.py
@@ -33,8 +33,8 @@ class Spectrogram(UnimodalRepresentation):
     def __init__(self, hop_length=512, n_fft=2048):
         parameters = {"hop_length": [256, 512, 1024, 2048], "n_fft": [1024, 2048, 4096]}
         super().__init__("Spectrogram", ModalityType.TIMESERIES, parameters, False)
-        self.hop_length = hop_length
-        self.n_fft = n_fft
+        self.hop_length = int(hop_length)
+        self.n_fft = int(n_fft)
 
     def transform(self, modality):
         transformed_modality = TransformedModality(
diff --git a/src/main/python/systemds/scuro/representations/tfidf.py b/src/main/python/systemds/scuro/representations/tfidf.py
index 3b8f069df83..c82961949fe 100644
--- a/src/main/python/systemds/scuro/representations/tfidf.py
+++ b/src/main/python/systemds/scuro/representations/tfidf.py
@@ -34,7 +34,7 @@ class TfIdf(UnimodalRepresentation):
     def __init__(self, min_df=2, output_file=None):
         parameters = {"min_df": [min_df]}
         super().__init__("TF-IDF", ModalityType.EMBEDDING, parameters)
-        self.min_df = min_df
+        self.min_df = int(min_df)
         self.output_file = output_file
 
     def transform(self, modality):
diff --git a/src/main/python/systemds/scuro/representations/window_aggregation.py b/src/main/python/systemds/scuro/representations/window_aggregation.py
index b3ad9e1b934..04d93142093 100644
--- a/src/main/python/systemds/scuro/representations/window_aggregation.py
+++ b/src/main/python/systemds/scuro/representations/window_aggregation.py
@@ -49,10 +49,10 @@ def aggregation_function(self, value):
 
 @register_context_operator()
 class WindowAggregation(Window):
-    def __init__(self, window_size=10, aggregation_function="mean", pad=False):
+    def __init__(self, aggregation_function="mean", window_size=10, pad=False):
         super().__init__("WindowAggregation", aggregation_function)
         self.parameters["window_size"] = [window_size]
-        self.window_size = window_size
+        self.window_size = int(window_size)
         self.pad = pad
 
     def execute(self, modality):
@@ -141,10 +141,10 @@ def window_aggregate_nested_level(self, instance, new_length):
 
 @register_context_operator()
 class StaticWindow(Window):
-    def __init__(self, num_windows=100, aggregation_function="mean"):
+    def __init__(self, aggregation_function="mean", num_windows=100):
         super().__init__("StaticWindow", aggregation_function)
         self.parameters["num_windows"] = [num_windows]
-        self.num_windows = num_windows
+        self.num_windows = int(num_windows)
 
     def execute(self, modality):
         windowed_data = []
@@ -172,10 +172,10 @@ def execute(self, modality):
 
 @register_context_operator()
 class DynamicWindow(Window):
-    def __init__(self, num_windows=100, aggregation_function="mean"):
+    def __init__(self, aggregation_function="mean", num_windows=100):
         super().__init__("DynamicWindow", aggregation_function)
         self.parameters["num_windows"] = [num_windows]
-        self.num_windows = num_windows
+        self.num_windows = int(num_windows)
 
     def execute(self, modality):
         windowed_data = []
diff --git a/src/main/python/systemds/scuro/representations/word2vec.py b/src/main/python/systemds/scuro/representations/word2vec.py
index 06e082fb695..38c83ee8d34 100644
--- a/src/main/python/systemds/scuro/representations/word2vec.py
+++ b/src/main/python/systemds/scuro/representations/word2vec.py
@@ -41,16 +41,14 @@ def get_embedding(sentence, model):
 
 @register_representation(ModalityType.TEXT)
 class W2V(UnimodalRepresentation):
-    def __init__(self, vector_size=150, min_count=2, window=5, output_file=None):
+    def __init__(self, vector_size=150, min_count=2, output_file=None):
         parameters = {
-            "vector_size": [vector_size],
-            "min_count": [min_count],
-            "window": [window],
-        }  # TODO
+            "vector_size": [100, 150, 200, 300],
+            "min_count": [1, 2, 3, 5, 7, 10],
+        }
         super().__init__("Word2Vec", ModalityType.EMBEDDING, parameters)
-        self.vector_size = vector_size
-        self.min_count = min_count
-        self.window = window
+        self.vector_size = int(vector_size)
+        self.min_count = int(min_count)
         self.output_file = output_file
 
     def transform(self, modality):
@@ -59,7 +57,6 @@ def transform(self, modality):
         model = Word2Vec(
             sentences=t,
             vector_size=self.vector_size,
-            window=self.window,
             min_count=self.min_count,
         )
         embeddings = []

From 4096c511374ef937bc5a527757e196274e441bdc Mon Sep 17 00:00:00 2001
From: Christina Dionysio <dionysio@tu-berlin.de>
Date: Wed, 17 Sep 2025 20:50:41 +0200
Subject: [PATCH 05/22] add additional dag utilitities

---
 src/main/python/systemds/scuro/__init__.py    |   4 +-
 .../systemds/scuro/drsearch/unimodal_dag.py   | 146 ++++++++++++++++++
 .../scuro/drsearch/unimodal_optimizer.py      | 145 ++++++++---------
 .../scuro/drsearch/unimodal_visualizer.py     |  55 +++++++
 .../scuro/modality/joined_transformed.py      |   2 +-
 .../systemds/scuro/modality/transformed.py    |   4 +-
 .../aggregated_representation.py              |   7 +-
 .../systemds/scuro/representations/fusion.py  |   2 +-
 .../scuro/representations/word2vec.py         |  10 +-
 9 files changed, 291 insertions(+), 84 deletions(-)
 create mode 100644 src/main/python/systemds/scuro/drsearch/unimodal_dag.py
 create mode 100644 src/main/python/systemds/scuro/drsearch/unimodal_visualizer.py

diff --git a/src/main/python/systemds/scuro/__init__.py b/src/main/python/systemds/scuro/__init__.py
index 8e83c865a2a..da9477f0739 100644
--- a/src/main/python/systemds/scuro/__init__.py
+++ b/src/main/python/systemds/scuro/__init__.py
@@ -67,7 +67,7 @@
 from systemds.scuro.modality.joined import JoinedModality
 from systemds.scuro.modality.joined_transformed import JoinedTransformedModality
 from systemds.scuro.modality.modality import Modality
-from systemds.scuro.modality.modality_identifier import ModalityIdentifier
+from systemds.scuro.utils.identifier import Identifier
 from systemds.scuro.modality.transformed import TransformedModality
 from systemds.scuro.modality.type import ModalityType
 from systemds.scuro.modality.unimodal_modality import UnimodalModality
@@ -131,7 +131,7 @@
     "JoinedModality",
     "JoinedTransformedModality",
     "Modality",
-    "ModalityIdentifier",
+    "Identifier",
     "TransformedModality",
     "ModalityType",
     "UnimodalModality",
diff --git a/src/main/python/systemds/scuro/drsearch/unimodal_dag.py b/src/main/python/systemds/scuro/drsearch/unimodal_dag.py
new file mode 100644
index 00000000000..538f2088e10
--- /dev/null
+++ b/src/main/python/systemds/scuro/drsearch/unimodal_dag.py
@@ -0,0 +1,146 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+from dataclasses import dataclass, field
+from typing import List, Dict, Any
+import copy
+from collections import deque
+
+
+@dataclass
+class UnimodalNode:
+    node_id: str
+    operation: Any
+    inputs: List[str]
+    modality_id: str = None
+    parameters: Dict[str, Any] = field(default_factory=dict)
+
+
+@dataclass
+class UnimodalDAG:
+
+    def __init__(self, nodes: List[UnimodalNode], root_node_id):
+        self.root_node_id = root_node_id
+        self.nodes = self.filter_connected_nodes_bfs(nodes)
+
+    def filter_connected_nodes_bfs(self, nodes):
+        node_map = {node.node_id: node for node in nodes}
+
+        if self.root_node_id not in node_map:
+            return []
+
+        visited = set()
+        stack = [self.root_node_id]
+
+        while stack:
+            current_id = stack.pop()
+            if current_id not in visited:
+                visited.add(current_id)
+
+                current_node = node_map[current_id]
+                for input_id in current_node.inputs:
+                    if input_id in node_map and input_id not in visited:
+                        stack.append(input_id)
+
+        return [node for node in nodes if node.node_id in visited]
+
+    def get_leaf_nodes(self) -> List[str]:
+        leaf_nodes = []
+        for node in self.nodes:
+            if not node.inputs:
+                leaf_nodes.append(node.node_id)
+        return leaf_nodes
+
+    def get_node_by_id(self, node_id: str) -> UnimodalNode:
+        for node in self.nodes:
+            if node.node_id == node_id:
+                return node
+        return None
+
+    def get_children(self, node_id: str) -> List[str]:
+        children = []
+        for node in self.nodes:
+            if node_id in node.inputs:
+                children.append(node.node_id)
+        return children
+
+    def validate(self) -> bool:
+        node_ids = {node.node_id for node in self.nodes}
+
+        if self.root_node_id not in node_ids:
+            return False
+
+        for node in self.nodes:
+            for input_id in node.inputs:
+                if input_id not in node_ids:
+                    return False
+
+        visited = set()
+
+        def has_cycle(node_id: str, path: set) -> bool:
+            if node_id in path:
+                return True
+            if node_id in visited:
+                return False
+            path.add(node_id)
+            visited.add(node_id)
+            node = self.get_node_by_id(node_id)
+            for input_id in node.inputs:
+                if has_cycle(input_id, path.copy()):
+                    return True
+            return False
+
+        return not has_cycle(self.root_node_id, set())
+
+
+class UnimodalDAGBuilder:
+
+    def __init__(self):
+        self.nodes = []
+        self.node_counter = 0
+
+    def create_leaf_node(self, operation: Any, modality_id: str) -> str:
+        node_id = f"leaf_{self.node_counter}"
+        self.node_counter += 1
+        node = UnimodalNode(
+            node_id=node_id, operation=operation, inputs=[], modality_id=modality_id
+        )
+        self.nodes.append(node)
+        return node_id
+
+    def create_operation_node(
+        self, operation: Any, inputs: List[str], parameters: Dict[str, Any] = None
+    ) -> str:
+        node_id = f"op_{self.node_counter}"
+        self.node_counter += 1
+        node = UnimodalNode(
+            node_id=node_id,
+            operation=operation,
+            inputs=inputs,
+            parameters=parameters or {},
+        )
+        self.nodes.append(node)
+        return node_id
+
+    def build(self, root_node_id: str) -> UnimodalDAG:
+        dag = UnimodalDAG(nodes=copy.deepcopy(self.nodes), root_node_id=root_node_id)
+        if not dag.validate():
+            raise ValueError("Invalid DAG construction")
+        return dag
diff --git a/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py b/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py
index 0d7ec8e8350..daba2c5f18c 100644
--- a/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py
+++ b/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py
@@ -35,7 +35,9 @@
 from systemds.scuro.representations.concatenation import Concatenation
 from systemds.scuro.representations.hadamard import Hadamard
 from systemds.scuro.representations.sum import Sum
-from systemds.scuro.representations.aggregated_representation import AggregatedRepresentation
+from systemds.scuro.representations.aggregated_representation import (
+    AggregatedRepresentation,
+)
 from systemds.scuro.modality.type import ModalityType
 from systemds.scuro.modality.modality import Modality
 from systemds.scuro.modality.transformed import TransformedModality
@@ -51,8 +53,10 @@ def __init__(self, modalities, tasks, debug=True):
         self.modalities = modalities
         self.tasks = tasks
         self.run = None
-        
-        self.builders = {modality.modality_id: UnimodalDAGBuilder() for modality in modalities}
+
+        self.builders = {
+            modality.modality_id: UnimodalDAGBuilder() for modality in modalities
+        }
 
         if debug:
             wandb.login()
@@ -60,7 +64,7 @@ def __init__(self, modalities, tasks, debug=True):
                 "representation_type": "unimodal",  # or "unimodal"
             }
             self.run = wandb.init(project="multimodal-search", config=config)
-            
+
         self.debug = debug
 
         self.operator_registry = Registry()
@@ -106,7 +110,7 @@ def optimize(self):
         """Optimize representations for each modality"""
         for modality in self.modalities:
             local_result = self._process_modality(modality, False)
-        
+
         if self.debug:
             wandb.finish()
 
@@ -117,12 +121,14 @@ def _process_modality(self, modality, parallel):
         else:
             local_results = self.operator_performance
 
-        modality_specific_operators = self.operator_registry.get_representations(modality.modality_type)
-        
+        modality_specific_operators = self.operator_registry.get_representations(
+            modality.modality_type
+        )
+
         for operator in modality_specific_operators:
             # Build DAG for this operator
             dags = self._build_modality_dag(modality, operator())
-            
+
             for dag in dags:
                 # Execute DAG and get all intermediate representations
                 representations = self._execute_dag(dag, modality)
@@ -132,13 +138,17 @@ def _process_modality(self, modality, parallel):
                     continue
                 reps = self._get_representation_chain(node, dag)
                 combination = next((op for op in reps if isinstance(op, Fusion)), None)
-                self._evaluate_local(representations[node_id], local_results, dag, combination)
+                self._evaluate_local(
+                    representations[node_id], local_results, dag, combination
+                )
                 if self.debug:
                     visualize_dag(dag)
 
         return local_results
 
-    def _execute_dag(self, dag: UnimodalDAG, modality: Modality) -> Dict[str, TransformedModality]:
+    def _execute_dag(
+        self, dag: UnimodalDAG, modality: Modality
+    ) -> Dict[str, TransformedModality]:
         cache = {}
 
         def execute_node(node_id: str) -> TransformedModality:
@@ -146,16 +156,19 @@ def execute_node(node_id: str) -> TransformedModality:
                 return cache[node_id]
 
             node = dag.get_node_by_id(node_id)
-            
+
             if not node.inputs:  # Leaf node
                 cache[node_id] = modality
                 return modality
-                
+
             input_mods = [execute_node(input_id) for input_id in node.inputs]
-            
+
             if len(input_mods) == 1:
                 if isinstance(node.operation(), UnimodalRepresentation):
-                    if isinstance(input_mods[0], TransformedModality) and input_mods[0].transformation[0].__class__ == node.operation:
+                    if (
+                        isinstance(input_mods[0], TransformedModality)
+                        and input_mods[0].transformation[0].__class__ == node.operation
+                    ):
                         result = input_mods[0]
                     else:
                         result = input_mods[0].apply_representation(node.operation())
@@ -165,25 +178,26 @@ def execute_node(node_id: str) -> TransformedModality:
                     result = node.operation().transform(input_mods[0])
             else:
                 result = input_mods[0].combine(input_mods[1:], node.operation())
-                
+
             cache[node_id] = result
             return result
 
-
         execute_node(dag.root_node_id)
-        
+
         return cache
 
-    def _get_representation_chain(self, node: 'UnimodalNode', dag: UnimodalDAG) -> List[Any]:
+    def _get_representation_chain(
+        self, node: "UnimodalNode", dag: UnimodalDAG
+    ) -> List[Any]:
         representations = []
         if node.operation:
             representations.append(node.operation)
-            
+
         for input_id in node.inputs:
             input_node = dag.get_node_by_id(input_id)
             if input_node.operation:
                 representations.extend(self._get_representation_chain(input_node, dag))
-                
+
         return representations
 
     def _merge_results(self, local_results):
@@ -198,14 +212,14 @@ def _merge_results(self, local_results):
                 for key, value in local_results.cache[modality][task_name].items():
                     self.operator_performance.cache[modality][task_name][key] = value
 
-    def _evaluate_local(
-        self, modality, local_results, dag, combination=None
-    ):
+    def _evaluate_local(self, modality, local_results, dag, combination=None):
         if self._tasks_require_same_dims:
             if self.expected_dimensions == 1 and get_shape(modality.metadata) > 1:
                 builder = self.builders[modality.modality_id]
                 agg_operator = AggregatedRepresentation()
-                rep_node_id = builder.create_operation_node(agg_operator.__class__, [dag.root_node_id], agg_operator.parameters)
+                rep_node_id = builder.create_operation_node(
+                    agg_operator.__class__, [dag.root_node_id], agg_operator.parameters
+                )
                 dag = builder.build(rep_node_id)
                 representations = self._execute_dag(dag, modality)
                 node_id = list(representations.keys())[-1]
@@ -215,12 +229,7 @@ def _evaluate_local(
                     end = time.time()
 
                     local_results.add_result(
-                        scores,
-                        modality,
-                        task.model.name,
-                        end - start,
-                        combination,
-                        dag
+                        scores, modality, task.model.name, end - start, combination, dag
                     )
             else:
                 modality.pad()
@@ -229,20 +238,16 @@ def _evaluate_local(
                     scores = task.run(modality.data)
                     end = time.time()
                     local_results.add_result(
-                        scores,
-                        modality,
-                        task.model.name,
-                        end - start,
-                        combination,
-                        dag
+                        scores, modality, task.model.name, end - start, combination, dag
                     )
         else:
             for task in self.tasks:
                 if task.expected_dim == 1 and get_shape(modality.metadata) > 1:
                     builder = self.builders[modality.modality_id]
                     agg_operator = AggregatedRepresentation(Aggregation())
-                    rep_node_id = builder.create_operation_node(operator.__class__, [dag.root_node_id],
-                                                                agg_operator.parameters)
+                    rep_node_id = builder.create_operation_node(
+                        operator.__class__, [dag.root_node_id], agg_operator.parameters
+                    )
                     dag = builder.build(rep_node_id)
                     representations = self._execute_dag(dag, modality)
                     node_id = list(representations.keys())[-1]
@@ -251,56 +256,53 @@ def _evaluate_local(
                     scores = task.run(representations[node_id].data)
                     end = time.time()
                     local_results.add_result(
-                        scores,
-                        modality,
-                        task.model.name,
-                        end - start,
-                        combination,
-                        dag
+                        scores, modality, task.model.name, end - start, combination, dag
                     )
                 else:
                     start = time.time()
                     scores = task.run(modality.data)
                     end = time.time()
                     local_results.add_result(
-                        scores,
-                        modality,
-                        task.model.name,
-                        end - start,
-                        combination,
-                        dag
+                        scores, modality, task.model.name, end - start, combination, dag
                     )
 
     def _build_modality_dag(self, modality: Modality, operator: Any) -> UnimodalDAG:
         dags = []
         builder = self.builders[modality.modality_id]
         leaf_id = builder.create_leaf_node(None, modality.modality_id)
-        
-        rep_node_id = builder.create_operation_node(operator.__class__, [leaf_id], operator.parameters)
+
+        rep_node_id = builder.create_operation_node(
+            operator.__class__, [leaf_id], operator.parameters
+        )
         current_node_id = rep_node_id
         dags.append(builder.build(current_node_id))
-        
+
         if not operator.self_contained:
-            not_self_contained_reps = self.operator_registry.get_not_self_contained_representations(modality.modality_type)
-            not_self_contained_reps = [rep for rep in not_self_contained_reps if rep != operator.__class__]
-            
+            not_self_contained_reps = (
+                self.operator_registry.get_not_self_contained_representations(
+                    modality.modality_type
+                )
+            )
+            not_self_contained_reps = [
+                rep for rep in not_self_contained_reps if rep != operator.__class__
+            ]
+
             for combination in [Concatenation(), Hadamard(), Sum()]:
                 for other_rep in not_self_contained_reps:
                     # Create node for other representation
                     other_rep_id = builder.create_operation_node(other_rep, [leaf_id])
-                    
+
                     # Create combination nodes
                     combine_id = builder.create_operation_node(
                         combination.__class__,
                         [current_node_id, other_rep_id],
-                        {"combination_type": combination.__class__.__name__}
+                        {"combination_type": combination.__class__.__name__},
                     )
                     dags.append(builder.build(combine_id))
                     current_node_id = combine_id
-                
-                    
+
         context_operators = self.operator_registry.get_context_operators()
-       
+
         for context_op in context_operators:
             context_node_id = builder.create_operation_node(
                 context_op,
@@ -308,9 +310,10 @@ def _build_modality_dag(self, modality: Modality, operator: Any) -> UnimodalDAG:
                 context_op().parameters,
             )
             dags.append(builder.build(context_node_id))
-            
+
         return dags
 
+
 class UnimodalResults:
     def __init__(self, modalities, tasks, debug=False, run=None):
         self.modality_ids = [modality.modality_id for modality in modalities]
@@ -327,18 +330,16 @@ def __init__(self, modalities, tasks, debug=False, run=None):
                 self.cache[modality][task_name] = {}
                 self.results[modality][task_name] = []
 
-    def add_result(
-        self, scores, modality, task_name, task_time, combination, dag
-    ):
+    def add_result(self, scores, modality, task_name, task_time, combination, dag):
         entry = ResultEntry(
             train_score=scores[0],
             val_score=scores[1],
             representation_time=modality.transform_time,
             task_time=task_time,
             combination=combination.name if combination else "",
-            dag=dag
+            dag=dag,
         )
-        
+
         if self.debug:
             metrics = asdict(entry)
             table = wandb.Table(columns=["representations"])
@@ -346,15 +347,19 @@ def add_result(
                 table.add_data(m)
             metrics["representations"] = table
             metrics.pop("params")
-            
+
             metrics["used_modalities"] = modality.modality_id
             metrics["task"] = self.task_names.index(task_name)
             # Log metric for the multimodal combination
             self.run.log(metrics)
-            
+
         self.results[modality.modality_id][task_name].append(entry)
         self.cache[modality.modality_id][task_name][
-            (tuple([rep.operation for rep in dag.nodes]), scores[1], modality.transform_time)
+            (
+                tuple([rep.operation for rep in dag.nodes]),
+                scores[1],
+                modality.transform_time,
+            )
         ] = modality
 
         if self.debug:
diff --git a/src/main/python/systemds/scuro/drsearch/unimodal_visualizer.py b/src/main/python/systemds/scuro/drsearch/unimodal_visualizer.py
new file mode 100644
index 00000000000..6aac24a3f1e
--- /dev/null
+++ b/src/main/python/systemds/scuro/drsearch/unimodal_visualizer.py
@@ -0,0 +1,55 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+
+from typing import Dict, Any
+from systemds.scuro.drsearch.unimodal_dag import UnimodalDAG
+
+
+def visualize_dag(dag: UnimodalDAG) -> Dict[str, Any]:
+    nodes = []
+    edges = []
+
+    for i, node in enumerate(dag.nodes):
+        # Create node entry
+        node_type = "operation" if node.operation else "modality"
+        label = node.operation if node.operation else f"Modality: {node.modality_id}"
+
+        nodes.append(
+            {
+                "id": node.node_id,
+                "label": label,
+                "type": node_type,
+                "parameters": node.parameters,
+            }
+        )
+
+        print(nodes[i])
+
+        # Create edges
+        for input_id in node.inputs:
+            edges.append({"from": input_id, "to": node.node_id})
+
+    for edge in edges:
+        print(edge)
+
+    print(f"Root Node ID: {dag.root_node_id}")
+
+    return {"nodes": nodes, "edges": edges, "root": dag.root_node_id}
diff --git a/src/main/python/systemds/scuro/modality/joined_transformed.py b/src/main/python/systemds/scuro/modality/joined_transformed.py
index 3e0d8fb9dfb..078ce86f7af 100644
--- a/src/main/python/systemds/scuro/modality/joined_transformed.py
+++ b/src/main/python/systemds/scuro/modality/joined_transformed.py
@@ -72,7 +72,7 @@ def combine(self, fusion_method):
         return self
 
     def window_aggregation(self, window_size, aggregation):
-        w = WindowAggregation(window_size, aggregation)
+        w = WindowAggregation(aggregation, window_size)
         self.left_modality.data = w.execute(self.left_modality)
         self.right_modality.data = w.execute(self.right_modality)
         return self
diff --git a/src/main/python/systemds/scuro/modality/transformed.py b/src/main/python/systemds/scuro/modality/transformed.py
index 9481937e2ca..9f8d17c0f79 100644
--- a/src/main/python/systemds/scuro/modality/transformed.py
+++ b/src/main/python/systemds/scuro/modality/transformed.py
@@ -99,8 +99,8 @@ def join(self, right, join_condition):
 
         return joined_modality
 
-    def window_aggregation(self, windowSize, aggregation):
-        w = WindowAggregation(windowSize, aggregation)
+    def window_aggregation(self, window_size, aggregation):
+        w = WindowAggregation(aggregation, window_size)
         transformed_modality = TransformedModality(
             self, w, self_contained=self.self_contained
         )
diff --git a/src/main/python/systemds/scuro/representations/aggregated_representation.py b/src/main/python/systemds/scuro/representations/aggregated_representation.py
index 9119070a027..92115767e07 100644
--- a/src/main/python/systemds/scuro/representations/aggregated_representation.py
+++ b/src/main/python/systemds/scuro/representations/aggregated_representation.py
@@ -20,12 +20,13 @@
 # -------------------------------------------------------------
 from systemds.scuro.modality.transformed import TransformedModality
 from systemds.scuro.representations.representation import Representation
+from systemds.scuro.representations.aggregate import Aggregation
 
 
 class AggregatedRepresentation(Representation):
-    def __init__(self, aggregation):
-        super().__init__("AggregatedRepresentation", aggregation.parameters)
-        self.aggregation = aggregation
+    def __init__(self, aggregation="mean"):
+        super().__init__("AggregatedRepresentation", None)
+        self.aggregation = Aggregation(aggregation)
         self.self_contained = True
 
     def transform(self, modality):
diff --git a/src/main/python/systemds/scuro/representations/fusion.py b/src/main/python/systemds/scuro/representations/fusion.py
index ea614ac0955..7a942c28b88 100644
--- a/src/main/python/systemds/scuro/representations/fusion.py
+++ b/src/main/python/systemds/scuro/representations/fusion.py
@@ -52,7 +52,7 @@ def transform(self, modalities: List[Modality]):
         for modality in modalities:
             agg_modality = None
             if get_shape(modality.metadata) > 1:
-                agg_operator = AggregatedRepresentation(Aggregation())
+                agg_operator = AggregatedRepresentation()
                 agg_modality = agg_operator.transform(modality)
             mods.append(agg_modality if agg_modality else modality)
 
diff --git a/src/main/python/systemds/scuro/representations/word2vec.py b/src/main/python/systemds/scuro/representations/word2vec.py
index 38c83ee8d34..837811935cd 100644
--- a/src/main/python/systemds/scuro/representations/word2vec.py
+++ b/src/main/python/systemds/scuro/representations/word2vec.py
@@ -41,14 +41,14 @@ def get_embedding(sentence, model):
 
 @register_representation(ModalityType.TEXT)
 class W2V(UnimodalRepresentation):
-    def __init__(self, vector_size=150, min_count=2, output_file=None):
+    def __init__(self, vector_size=150, min_count=1, output_file=None):
         parameters = {
-            "vector_size": [100, 150, 200, 300],
-            "min_count": [1, 2, 3, 5, 7, 10],
+            "vector_size": [vector_size],
+            "min_count": [min_count],
         }
         super().__init__("Word2Vec", ModalityType.EMBEDDING, parameters)
-        self.vector_size = int(vector_size)
-        self.min_count = int(min_count)
+        self.vector_size = vector_size
+        self.min_count = min_count
         self.output_file = output_file
 
     def transform(self, modality):

From 2a15c82fda6efe1f9f0677c8e320a7c4d84dcf68 Mon Sep 17 00:00:00 2001
From: Christina Dionysio <dionysio@tu-berlin.de>
Date: Wed, 17 Sep 2025 21:04:21 +0200
Subject: [PATCH 06/22] fix error in optimizer

---
 .../scuro/drsearch/hyperparameter_tuner.py    | 276 +++++++++++-------
 .../scuro/drsearch/unimodal_optimizer.py      |   3 +-
 2 files changed, 177 insertions(+), 102 deletions(-)

diff --git a/src/main/python/systemds/scuro/drsearch/hyperparameter_tuner.py b/src/main/python/systemds/scuro/drsearch/hyperparameter_tuner.py
index 779facd0e9d..7760210c4fb 100644
--- a/src/main/python/systemds/scuro/drsearch/hyperparameter_tuner.py
+++ b/src/main/python/systemds/scuro/drsearch/hyperparameter_tuner.py
@@ -36,16 +36,19 @@
 from systemds.scuro.representations.window_aggregation import Window
 from systemds.scuro.representations.fusion import Fusion
 
-# Set up logging
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+)
 logger = logging.getLogger(__name__)
 
 from systemds.scuro.drsearch.optimization_data import OptimizationResult
 from systemds.scuro.representations.context import Context
 
+
 @dataclass
 class HyperparamResult:
-    """Store hyperparameter tuning results"""
+
     representation_name: str
     best_params: Dict[str, Any]
     best_score: float
@@ -53,10 +56,20 @@ class HyperparamResult:
     tuning_time: float
     modality_id: int
 
+
 class HyperparameterTuner:
-    
-    def __init__(self, modalities, tasks, optimization_results, k: int = 2, n_jobs: int = -1, scoring_metric: str = 'accuracy',
-                 maximize_metric: bool = True, save_results: bool = True):
+
+    def __init__(
+        self,
+        modalities,
+        tasks,
+        optimization_results,
+        k: int = 2,
+        n_jobs: int = -1,
+        scoring_metric: str = "accuracy",
+        maximize_metric: bool = True,
+        save_results: bool = True,
+    ):
         self.tasks = tasks
         self.optimization_results = optimization_results
         self.n_jobs = n_jobs
@@ -69,13 +82,12 @@ def __init__(self, modalities, tasks, optimization_results, k: int = 2, n_jobs:
         self.k_best_cache = None
         self.k_best_modalities = None
         self.extract_k_best_modalities_per_task()
-    
-    
+
     def get_modality_by_id(self, modality_id: int) -> Modality:
         for mod in self.modalities:
             if mod.modality_id == modality_id:
                 return mod
-        
+
     def extract_k_best_modalities_per_task(self):
         self.k_best_modalities = {}
         self.k_best_cache = {}
@@ -84,21 +96,25 @@ def extract_k_best_modalities_per_task(self):
             self.k_best_cache[task.model.name] = []
             for modality in self.modalities:
                 k_best_results, cached_data = (
-                    self.optimization_results.get_k_best_results(
-                        modality, self.k, task
-                    )
+                    self.optimization_results.get_k_best_results(modality, self.k, task)
                 )
 
                 self.k_best_modalities[task.model.name].extend(k_best_results)
                 self.k_best_cache[task.model.name].extend(cached_data)
-    
-    def evaluate_single_config(self, reps: List[Representation],
-                               params: Dict[str, Any], modality_ids: List[int], task: Task, param_idx: List[int]) -> Tuple[Dict[str, Any], float]:
+
+    def evaluate_single_config(
+        self,
+        reps: List[Representation],
+        params: Dict[str, Any],
+        modality_ids: List[int],
+        task: Task,
+        param_idx: List[int],
+    ) -> Tuple[Dict[str, Any], float]:
         """
         Evaluate a single hyperparameter configuration
         """
         # try:
-        rep_name = ''
+        rep_name = ""
         modality_counter = 0
         modality = None
         modality_is_initialized = False
@@ -109,30 +125,51 @@ def evaluate_single_config(self, reps: List[Representation],
             rep_name += rep().name
             len_params = len(rep().parameters) if rep().parameters is not None else 0
             if isinstance(rep(), Window):
-                modality = modality.context(rep(*np.array(list(params.values()))[param_idx[start:start+len_params]]))
+                modality = modality.context(
+                    rep(
+                        *np.array(list(params.values()))[
+                            param_idx[start : start + len_params]
+                        ]
+                    )
+                )
             elif isinstance(rep(), Fusion):
-                modality = modality.combine(rep(*np.array(list(params.values()))[param_idx[start:start+len_params]]))
+                modality = modality.combine(
+                    rep(
+                        *np.array(list(params.values()))[
+                            param_idx[start : start + len_params]
+                        ]
+                    )
+                )
                 modality_is_initialized = False
             else:
                 if not modality_is_initialized:
                     modality = self.get_modality_by_id(modality_ids[modality_counter])
                     modality_is_initialized = True
                     modality_counter += 1
-                modality = modality.apply_representation(rep(*np.array(list(params.values()))[param_idx[start:start+len_params]]))
+                modality = modality.apply_representation(
+                    rep(
+                        *np.array(list(params.values()))[
+                            param_idx[start : start + len_params]
+                        ]
+                    )
+                )
             start += len_params
-    
+
         score = task.run(modality.data)[1]
         logger.debug(f"{rep_name} with params {params}: score = {score}")
         return params, score
         # except Exception as e:
         #         logger.error(f"Error evaluating {rep_name} with params {params}: {e}")
         #         return params, float('-inf') if self.maximize_metric else float('inf')
-    
-    
-    
-    def tune_representation(self, reps: List,
-                            hyperparams: List[Dict[str, List]], modality_id: List[int], task: Task,
-                            max_evals: Optional[int] = None) -> HyperparamResult:
+
+    def tune_representation(
+        self,
+        reps: List,
+        hyperparams: List[Dict[str, List]],
+        modality_id: List[int],
+        task: Task,
+        max_evals: Optional[int] = None,
+    ) -> HyperparamResult:
         """
         Tune hyperparameters for a single representation
 
@@ -144,9 +181,9 @@ def tune_representation(self, reps: List,
             max_evals: Maximum number of evaluations (None for full grid search)
         """
         start_time = time.time()
-        rep_name = ''.join([rep().name for rep in reps])
+        rep_name = "".join([rep().name for rep in reps])
         logger.info(f"Starting hyperparameter tuning for")
-        
+
         # Generate parameter grid
         hp = merge_multiple_dicts_with_increments(list(hyperparams))
         param_grid = list(ParameterGrid(hp))
@@ -156,45 +193,59 @@ def tune_representation(self, reps: List,
                 if h == p:
                     idx_params.append(i)
                     break
-        
-        
+
         # Limit evaluations if specified
         if max_evals and len(param_grid) > max_evals:
             # Random sampling if too many combinations
             np.random.shuffle(param_grid)
             param_grid = param_grid[:max_evals]
-        
+
         logger.info(f"Evaluating {len(param_grid)} parameter combinations for")
-        
+
         # Parallel evaluation
         all_results = []
         if self.n_jobs <= 1:
             # Sequential execution
             for params in param_grid:
-                result = self.evaluate_single_config(reps, params, modality_id, task, idx_params)
+                result = self.evaluate_single_config(
+                    reps, params, modality_id, task, idx_params
+                )
                 all_results.append(result)
         else:
             # Parallel execution
-            with concurrent.futures.ProcessPoolExecutor(max_workers=self.n_jobs) as executor:
-                futures = [executor.submit(self.evaluate_single_config, reps, params, modality_id, task, idx_params)
-                           for params in param_grid]
-                
+            with concurrent.futures.ProcessPoolExecutor(
+                max_workers=self.n_jobs
+            ) as executor:
+                futures = [
+                    executor.submit(
+                        self.evaluate_single_config,
+                        reps,
+                        params,
+                        modality_id,
+                        task,
+                        idx_params,
+                    )
+                    for params in param_grid
+                ]
+
                 for future in concurrent.futures.as_completed(futures):
                     try:
                         result = future.result()
                         all_results.append(result)
                     except Exception as e:
                         logger.error(f"Error in parallel execution: {e}")
-        
+
         # Find best parameters
         if self.maximize_metric:
             best_params, best_score = max(all_results, key=lambda x: x[1])
         else:
             best_params, best_score = min(all_results, key=lambda x: x[1])
-        
+
         tuning_time = time.time() - start_time
-        logger.info(f"Best params for {rep_name}: {best_params}, score: {best_score:.4f}, time: {tuning_time:.2f}s")
-        
+        logger.info(
+            f"Best params for {rep_name}: {best_params}, score: {best_score:.4f}, time: {tuning_time:.2f}s"
+        )
+
         return HyperparamResult(
             representation_name=rep_name,
             best_params=best_params,
@@ -203,7 +254,7 @@ def tune_representation(self, reps: List,
             tuning_time=tuning_time,
             modality_id=modality_id,
         )
-    
+
     def tune_unimodal_representations(self, max_eval_per_rep: Optional[int] = None):
         results = {}
         for task in self.tasks:
@@ -217,21 +268,31 @@ def tune_unimodal_representations(self, max_eval_per_rep: Optional[int] = None):
                     hyperparams.append(params)
                     reps.append(rep)
                 result = self.tune_representation(
-                    reps, hyperparams, representation.modality_id, task, max_eval_per_rep
+                    reps,
+                    hyperparams,
+                    [representation.modality_id],
+                    task,
+                    max_eval_per_rep,
                 )
                 results[task.model.name].append(result)
-            
+
         self.results = results
-        
+
         if self.save_results:
             self.save_tuning_results()
-        
+
         return results
-    
-    
-    def tune_multimodal_representations(self, optimization_results, task: Task, k: int = 1, optimize_unimodal: bool = True, max_eval_per_rep: Optional[int] = None):
+
+    def tune_multimodal_representations(
+        self,
+        optimization_results,
+        task: Task,
+        k: int = 1,
+        optimize_unimodal: bool = True,
+        max_eval_per_rep: Optional[int] = None,
+    ):
         best_optimization_results = optimization_results[:k]
-        
+
         for result in best_optimization_results:
             fusion_node_ids = []
             used_modalities = result.architecture.encoder_choices
@@ -239,53 +300,60 @@ def tune_multimodal_representations(self, optimization_results, task: Task, k: i
             modality_ids = []
             hyperparams = []
             reps = []
-            
+
             for i, fusion_node in enumerate(result.architecture.fusion_nodes):
                 if len(fusion_node.parameters) > 0:
                     fusion_node_ids.append(i)
-                
+
             if len(fusion_node_ids) == 0 and not optimize_unimodal:
-                logger.warning("No fusion nodes with hyperparameters and unimodal optimization disabled. Skipping.")
+                logger.warning(
+                    "No fusion nodes with hyperparameters and unimodal optimization disabled. Skipping."
+                )
                 continue
-            
+
             for i, modality in enumerate(used_modalities):
                 mod_id = modality.modality_id
                 instance_id = modality.modality_instance_id
-                cached_representation = self.get_cached_representation(int(mod_id), int(instance_id), task)
+                cached_representation = self.get_cached_representation(
+                    int(mod_id), int(instance_id), task
+                )
                 cached_representations.append(cached_representation)
-                
+
                 if optimize_unimodal:
                     modality_ids.append(int(mod_id))
-                    
+
                     for transformation in cached_representation.transformation:
                         params = transformation.parameters
                         rep = transformation.__class__
                         hyperparams.append(params)
                         reps.append(rep)
-                        
+
                 if len(used_modalities) > i + 1:
-                    reps.append(Registry().get_fusion_operator_by_name(result.architecture.fusion_nodes[i].operation))
+                    reps.append(
+                        Registry().get_fusion_operator_by_name(
+                            result.architecture.fusion_nodes[i].operation
+                        )
+                    )
                     hyperparams.append(result.architecture.fusion_nodes[i].parameters)
-                        
-            self.tune_representation(reps, hyperparams, modality_ids, task, max_eval_per_rep)
-            
-                
-                
-                
-            
-            
-                
+
+            self.tune_representation(
+                reps, hyperparams, modality_ids, task, max_eval_per_rep
+            )
+
     def get_cached_representation(self, modality_id: int, instance_id: int, task: Task):
         counter = -1
         for cached_representation in self.k_best_cache[task.model.name]:
             if cached_representation.modality_id == modality_id:
-                counter +=1
+                counter += 1
                 if counter == instance_id:
                     return cached_representation
-    
-    def tune_multiple_representations(self, representations: Dict[str, Dict],
-                                      task_data: Any, max_evals_per_rep: Optional[int] = None) -> Dict[
-        str, HyperparamResult]:
+
+    def tune_multiple_representations(
+        self,
+        representations: Dict[str, Dict],
+        task_data: Any,
+        max_evals_per_rep: Optional[int] = None,
+    ) -> Dict[str, HyperparamResult]:
         """
         Tune hyperparameters for multiple representations
 
@@ -301,64 +369,64 @@ def tune_multiple_representations(self, representations: Dict[str, Dict],
             max_evals_per_rep: Maximum evaluations per representation
         """
         results = {}
-        
+
         for rep_name, rep_config in representations.items():
-            rep_func = rep_config['function']
-            hyperparams = rep_config['hyperparams']
-            
+            rep_func = rep_config["function"]
+            hyperparams = rep_config["hyperparams"]
+
             result = self.tune_representation(
                 rep_name, rep_func, hyperparams, task_data, max_evals_per_rep
             )
             results[rep_name] = result
-        
+
         self.results = results
-        
+
         if self.save_results:
             self.save_tuning_results()
-        
+
         return results
-    
-    def get_best_representations(self, k: int = None) -> List[Tuple[str, HyperparamResult]]:
+
+    def get_best_representations(
+        self, k: int = None
+    ) -> List[Tuple[str, HyperparamResult]]:
         """
         Get the k best representations based on their best scores
         """
         if not self.results:
             logger.warning("No tuning results available")
             return []
-        
+
         sorted_results = sorted(
             self.results.items(),
             key=lambda x: x[1].best_score,
-            reverse=self.maximize_metric
+            reverse=self.maximize_metric,
         )
-        
+
         if k is None:
             return sorted_results
-        
+
         return sorted_results[:k]
-    
+
     def save_tuning_results(self, filepath: str = None):
         """Save tuning results to JSON file"""
         if not filepath:
             filepath = f"hyperparameter_results_{int(time.time())}.json"
-        
+
         # Convert results to JSON-serializable format
         json_results = {}
         for task in self.results.keys():
             for result in self.results[task]:
                 json_results[result.representation_name] = {
-                    'best_params': result.best_params,
-                    'best_score': result.best_score,
-                    'tuning_time': result.tuning_time,
-                    'num_evaluations': len(result.all_results)
+                    "best_params": result.best_params,
+                    "best_score": result.best_score,
+                    "tuning_time": result.tuning_time,
+                    "num_evaluations": len(result.all_results),
                 }
-            
-        
-        with open(filepath, 'w') as f:
+
+        with open(filepath, "w") as f:
             json.dump(json_results, f, indent=2)
-        
+
         logger.info(f"Results saved to {filepath}")
-        
 
     def tune_operator_chain(self, modality, operator_chain):
         best_result = None
@@ -434,9 +502,15 @@ def _generate_search_space(self, param_grids):
 
 
 def merge_multiple_dicts_with_increments(dicts):
-    result = dicts[0].copy() if dicts else {}
-    
+    if dicts is None:
+        return {}
+
+    result = dicts[0].copy() if dicts[0] is not None else {}
+
     for dict_to_merge in dicts[1:]:
+        if dict_to_merge is None:
+            continue
+
         for key, value in dict_to_merge.items():
             if key in result:
                 counter = 1
@@ -447,5 +521,5 @@ def merge_multiple_dicts_with_increments(dicts):
                 result[new_key] = value
             else:
                 result[key] = value
-    
-    return result
\ No newline at end of file
+
+    return result
diff --git a/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py b/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py
index daba2c5f18c..8f44f87527a 100644
--- a/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py
+++ b/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py
@@ -288,9 +288,10 @@ def _build_modality_dag(self, modality: Modality, operator: Any) -> UnimodalDAG:
             ]
 
             for combination in [Concatenation(), Hadamard(), Sum()]:
+                current_node_id = rep_node_id
                 for other_rep in not_self_contained_reps:
                     # Create node for other representation
-                    other_rep_id = builder.create_operation_node(other_rep, [leaf_id])
+                    other_rep_id = builder.create_operation_node(other_rep, [leaf_id],  other_rep().parameters)
 
                     # Create combination nodes
                     combine_id = builder.create_operation_node(

From aaf4e0f62aae0d25aa359a6656ca22ff61b50ff5 Mon Sep 17 00:00:00 2001
From: Christina Dionysio <dionysio@tu-berlin.de>
Date: Wed, 17 Sep 2025 22:20:22 +0200
Subject: [PATCH 07/22] adapt hp tuning to dag representation

---
 .../scuro/drsearch/hyperparameter_tuner.py    | 146 ++++++++++----
 .../scuro/drsearch/operator_registry.py       |   2 +-
 .../systemds/scuro/drsearch/unimodal_dag.py   |  52 ++++-
 .../scuro/drsearch/unimodal_optimizer.py      |  52 +----
 .../aggregated_representation.py              |   5 +-
 src/main/python/tests/scuro/test_hp_tuner.py  | 180 ++++++++++++++++++
 .../tests/scuro/test_unimodal_optimizer.py    |   6 +-
 .../tests/scuro/test_window_operations.py     |   4 +-
 8 files changed, 357 insertions(+), 90 deletions(-)
 create mode 100644 src/main/python/tests/scuro/test_hp_tuner.py

diff --git a/src/main/python/systemds/scuro/drsearch/hyperparameter_tuner.py b/src/main/python/systemds/scuro/drsearch/hyperparameter_tuner.py
index 7760210c4fb..e3cfad07319 100644
--- a/src/main/python/systemds/scuro/drsearch/hyperparameter_tuner.py
+++ b/src/main/python/systemds/scuro/drsearch/hyperparameter_tuner.py
@@ -28,10 +28,14 @@
 from dataclasses import dataclass
 from pathlib import Path
 import time
+import copy
 
 from systemds.scuro.drsearch.operator_registry import Registry
 from systemds.scuro.modality.modality import Modality
 from systemds.scuro.drsearch.task import Task
+from systemds.scuro.representations.aggregated_representation import (
+    AggregatedRepresentation,
+)
 from systemds.scuro.representations.representation import Representation
 from systemds.scuro.representations.window_aggregation import Window
 from systemds.scuro.representations.fusion import Fusion
@@ -68,7 +72,7 @@ def __init__(
         n_jobs: int = -1,
         scoring_metric: str = "accuracy",
         maximize_metric: bool = True,
-        save_results: bool = True,
+        save_results: bool = False,
     ):
         self.tasks = tasks
         self.optimization_results = optimization_results
@@ -80,7 +84,7 @@ def __init__(
         self.k = k
         self.modalities = modalities
         self.k_best_cache = None
-        self.k_best_modalities = None
+        self.k_best_representations = None
         self.extract_k_best_modalities_per_task()
 
     def get_modality_by_id(self, modality_id: int) -> Modality:
@@ -89,17 +93,17 @@ def get_modality_by_id(self, modality_id: int) -> Modality:
                 return mod
 
     def extract_k_best_modalities_per_task(self):
-        self.k_best_modalities = {}
+        self.k_best_representations = {}
         self.k_best_cache = {}
         for task in self.tasks:
-            self.k_best_modalities[task.model.name] = []
+            self.k_best_representations[task.model.name] = []
             self.k_best_cache[task.model.name] = []
             for modality in self.modalities:
                 k_best_results, cached_data = (
                     self.optimization_results.get_k_best_results(modality, self.k, task)
                 )
 
-                self.k_best_modalities[task.model.name].extend(k_best_results)
+                self.k_best_representations[task.model.name].extend(k_best_results)
                 self.k_best_cache[task.model.name].extend(cached_data)
 
     def evaluate_single_config(
@@ -259,18 +263,10 @@ def tune_unimodal_representations(self, max_eval_per_rep: Optional[int] = None):
         results = {}
         for task in self.tasks:
             results[task.model.name] = []
-            for representation in self.k_best_cache[task.model.name]:
-                hyperparams = []
-                reps = []
-                for transformation in representation.transformation:
-                    params = transformation.parameters
-                    rep = transformation.__class__
-                    hyperparams.append(params)
-                    reps.append(rep)
-                result = self.tune_representation(
-                    reps,
-                    hyperparams,
-                    [representation.modality_id],
+            for representation in self.k_best_representations[task.model.name]:
+                result = self.tune_dag_representation(
+                    representation.dag,
+                    representation.dag.root_node_id,
                     task,
                     max_eval_per_rep,
                 )
@@ -283,6 +279,101 @@ def tune_unimodal_representations(self, max_eval_per_rep: Optional[int] = None):
 
         return results
 
+    def tune_dag_representation(self, dag, root_node_id, task, max_evals=None):
+        """
+        Tune hyperparameters for a DAG-based representation
+        """
+        hyperparams = {}
+        reps = []
+        modality_ids = []
+        node_order = []
+
+        # Extract parameters and operations from DAG in topological order
+        visited = set()
+
+        def visit_node(node_id):
+            if node_id in visited:
+                return
+            node = dag.get_node_by_id(node_id)
+            for input_id in node.inputs:
+                visit_node(input_id)
+            visited.add(node_id)
+            if node.operation is not None:
+                if node.parameters:
+                    hyperparams.update(node.parameters)
+                reps.append(node.operation)
+                node_order.append(node_id)
+            if node.modality_id is not None:
+                modality_ids.append(node.modality_id)
+
+        visit_node(root_node_id)
+
+        if not hyperparams:
+            return None
+
+        # Tune the hyperparameters
+        start_time = time.time()
+        rep_name = "_".join([rep.__name__ for rep in reps])
+
+        # Generate parameter grid
+        param_grid = list(ParameterGrid(hyperparams))
+        if max_evals and len(param_grid) > max_evals:
+            np.random.shuffle(param_grid)
+            param_grid = param_grid[:max_evals]
+
+        # Evaluate parameter combinations
+        all_results = []
+        for params in param_grid:
+            result = self.evaluate_dag_config(
+                dag, params, node_order, modality_ids, task
+            )
+            all_results.append(result)
+
+        # Find best parameters
+        if self.maximize_metric:
+            best_params, best_score = max(all_results, key=lambda x: x[1])
+        else:
+            best_params, best_score = min(all_results, key=lambda x: x[1])
+
+        tuning_time = time.time() - start_time
+
+        return HyperparamResult(
+            representation_name=rep_name,
+            best_params=best_params,
+            best_score=best_score,
+            all_results=all_results,
+            tuning_time=tuning_time,
+            modality_id=modality_ids[0] if modality_ids else None,
+        )
+
+    def evaluate_dag_config(self, dag, params, node_order, modality_ids, task):
+        """
+        Evaluate a single parameter configuration for a DAG-based representation
+        """
+        try:
+            # Create a copy of the DAG to modify
+            dag_copy = copy.deepcopy(dag)
+
+            # Update parameters in the DAG
+            for node_id in node_order:
+                node = dag_copy.get_node_by_id(node_id)
+                if node.operation is not None and node.parameters:
+                    node_params = {
+                        k: v for k, v in params.items() if k in node.parameters
+                    }
+                    node.parameters = node_params
+
+            modality = self.get_modality_by_id(modality_ids[0])
+            modified_modality = dag_copy.execute(modality)
+            score = task.run(
+                modified_modality[list(modified_modality.keys())[-1]].data
+            )[1]
+
+            return params, score
+        except Exception as e:
+            logger.error(f"Error evaluating DAG with params {params}: {e}")
+            return params, float("-inf") if self.maximize_metric else float("inf")
+
     def tune_multimodal_representations(
         self,
         optimization_results,
@@ -386,27 +477,6 @@ def tune_multiple_representations(
 
         return results
 
-    def get_best_representations(
-        self, k: int = None
-    ) -> List[Tuple[str, HyperparamResult]]:
-        """
-        Get the k best representations based on their best scores
-        """
-        if not self.results:
-            logger.warning("No tuning results available")
-            return []
-
-        sorted_results = sorted(
-            self.results.items(),
-            key=lambda x: x[1].best_score,
-            reverse=self.maximize_metric,
-        )
-
-        if k is None:
-            return sorted_results
-
-        return sorted_results[:k]
-
     def save_tuning_results(self, filepath: str = None):
         """Save tuning results to JSON file"""
         if not filepath:
diff --git a/src/main/python/systemds/scuro/drsearch/operator_registry.py b/src/main/python/systemds/scuro/drsearch/operator_registry.py
index 4339028f448..9bc90720f8e 100644
--- a/src/main/python/systemds/scuro/drsearch/operator_registry.py
+++ b/src/main/python/systemds/scuro/drsearch/operator_registry.py
@@ -76,7 +76,7 @@ def get_context_operators(self):
 
     def get_fusion_operators(self):
         return self._fusion_operators
-    
+
     def get_fusion_operator_by_name(self, fusion_name):
         for fusion in self._fusion_operators:
             if fusion.__name__ == fusion_name:
diff --git a/src/main/python/systemds/scuro/drsearch/unimodal_dag.py b/src/main/python/systemds/scuro/drsearch/unimodal_dag.py
index 538f2088e10..b19d120c1d4 100644
--- a/src/main/python/systemds/scuro/drsearch/unimodal_dag.py
+++ b/src/main/python/systemds/scuro/drsearch/unimodal_dag.py
@@ -22,6 +22,16 @@
 from typing import List, Dict, Any
 import copy
 from collections import deque
+from systemds.scuro.modality.modality import Modality
+from systemds.scuro.modality.transformed import TransformedModality
+from systemds.scuro.representations.representation import (
+    Representation as UnimodalRepresentation,
+)
+from systemds.scuro.representations.aggregated_representation import (
+    AggregatedRepresentation,
+)
+from systemds.scuro.representations.context import Context
+from systemds.scuro.representations.window_aggregation import WindowAggregation
 
 
 @dataclass
@@ -38,9 +48,9 @@ class UnimodalDAG:
 
     def __init__(self, nodes: List[UnimodalNode], root_node_id):
         self.root_node_id = root_node_id
-        self.nodes = self.filter_connected_nodes_bfs(nodes)
+        self.nodes = self.filter_connected_nodes(nodes)
 
-    def filter_connected_nodes_bfs(self, nodes):
+    def filter_connected_nodes(self, nodes):
         node_map = {node.node_id: node for node in nodes}
 
         if self.root_node_id not in node_map:
@@ -109,6 +119,44 @@ def has_cycle(node_id: str, path: set) -> bool:
 
         return not has_cycle(self.root_node_id, set())
 
+    def execute(self, modality: Modality) -> Dict[str, TransformedModality]:
+        cache = {}
+
+        def execute_node(node_id: str) -> TransformedModality:
+            if node_id in cache:
+                return cache[node_id]
+
+            node = self.get_node_by_id(node_id)
+
+            if not node.inputs:  # Leaf node
+                cache[node_id] = modality
+                return modality
+
+            input_mods = [execute_node(input_id) for input_id in node.inputs]
+
+            if len(input_mods) == 1:
+                if isinstance(node.operation(), Context):
+                    result = input_mods[0].context(node.operation())
+                elif isinstance(node.operation(), UnimodalRepresentation):
+                    if (
+                        isinstance(input_mods[0], TransformedModality)
+                        and input_mods[0].transformation[0].__class__ == node.operation
+                    ):
+                        result = input_mods[0]
+                    else:
+                        result = input_mods[0].apply_representation(node.operation())
+                elif isinstance(node.operation(), AggregatedRepresentation):
+                    result = node.operation().transform(input_mods[0])
+            else:
+                result = input_mods[0].combine(input_mods[1:], node.operation())
+
+            cache[node_id] = result
+            return result
+
+        execute_node(self.root_node_id)
+
+        return cache
+
 
 class UnimodalDAGBuilder:
 
diff --git a/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py b/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py
index 8f44f87527a..ed4fce7fedd 100644
--- a/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py
+++ b/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py
@@ -131,7 +131,7 @@ def _process_modality(self, modality, parallel):
 
             for dag in dags:
                 # Execute DAG and get all intermediate representations
-                representations = self._execute_dag(dag, modality)
+                representations = dag.execute(modality)
                 node_id = list(representations.keys())[-1]
                 node = dag.get_node_by_id(node_id)
                 if node.operation is None:
@@ -146,46 +146,6 @@ def _process_modality(self, modality, parallel):
 
         return local_results
 
-    def _execute_dag(
-        self, dag: UnimodalDAG, modality: Modality
-    ) -> Dict[str, TransformedModality]:
-        cache = {}
-
-        def execute_node(node_id: str) -> TransformedModality:
-            if node_id in cache:
-                return cache[node_id]
-
-            node = dag.get_node_by_id(node_id)
-
-            if not node.inputs:  # Leaf node
-                cache[node_id] = modality
-                return modality
-
-            input_mods = [execute_node(input_id) for input_id in node.inputs]
-
-            if len(input_mods) == 1:
-                if isinstance(node.operation(), UnimodalRepresentation):
-                    if (
-                        isinstance(input_mods[0], TransformedModality)
-                        and input_mods[0].transformation[0].__class__ == node.operation
-                    ):
-                        result = input_mods[0]
-                    else:
-                        result = input_mods[0].apply_representation(node.operation())
-                elif isinstance(node.operation(), Context):
-                    result = input_mods[0].context(node.operation())
-                elif isinstance(node.operation(), AggregatedRepresentation):
-                    result = node.operation().transform(input_mods[0])
-            else:
-                result = input_mods[0].combine(input_mods[1:], node.operation())
-
-            cache[node_id] = result
-            return result
-
-        execute_node(dag.root_node_id)
-
-        return cache
-
     def _get_representation_chain(
         self, node: "UnimodalNode", dag: UnimodalDAG
     ) -> List[Any]:
@@ -221,7 +181,7 @@ def _evaluate_local(self, modality, local_results, dag, combination=None):
                     agg_operator.__class__, [dag.root_node_id], agg_operator.parameters
                 )
                 dag = builder.build(rep_node_id)
-                representations = self._execute_dag(dag, modality)
+                representations = dag.execute(modality)
                 node_id = list(representations.keys())[-1]
                 for task in self.tasks:
                     start = time.time()
@@ -249,7 +209,7 @@ def _evaluate_local(self, modality, local_results, dag, combination=None):
                         operator.__class__, [dag.root_node_id], agg_operator.parameters
                     )
                     dag = builder.build(rep_node_id)
-                    representations = self._execute_dag(dag, modality)
+                    representations = dag.execute(modality)
                     node_id = list(representations.keys())[-1]
 
                     start = time.time()
@@ -291,13 +251,15 @@ def _build_modality_dag(self, modality: Modality, operator: Any) -> UnimodalDAG:
                 current_node_id = rep_node_id
                 for other_rep in not_self_contained_reps:
                     # Create node for other representation
-                    other_rep_id = builder.create_operation_node(other_rep, [leaf_id],  other_rep().parameters)
+                    other_rep_id = builder.create_operation_node(
+                        other_rep, [leaf_id], other_rep().parameters
+                    )
 
                     # Create combination nodes
                     combine_id = builder.create_operation_node(
                         combination.__class__,
                         [current_node_id, other_rep_id],
-                        {"combination_type": combination.__class__.__name__},
+                        combination.parameters,
                     )
                     dags.append(builder.build(combine_id))
                     current_node_id = combine_id
diff --git a/src/main/python/systemds/scuro/representations/aggregated_representation.py b/src/main/python/systemds/scuro/representations/aggregated_representation.py
index 92115767e07..1e98d2f92ae 100644
--- a/src/main/python/systemds/scuro/representations/aggregated_representation.py
+++ b/src/main/python/systemds/scuro/representations/aggregated_representation.py
@@ -25,7 +25,10 @@
 
 class AggregatedRepresentation(Representation):
     def __init__(self, aggregation="mean"):
-        super().__init__("AggregatedRepresentation", None)
+        parameters = {
+            "aggregation": list(Aggregation().get_aggregation_functions()),
+        }
+        super().__init__("AggregatedRepresentation", parameters)
         self.aggregation = Aggregation(aggregation)
         self.self_contained = True
 
diff --git a/src/main/python/tests/scuro/test_hp_tuner.py b/src/main/python/tests/scuro/test_hp_tuner.py
new file mode 100644
index 00000000000..4ec684d9c93
--- /dev/null
+++ b/src/main/python/tests/scuro/test_hp_tuner.py
@@ -0,0 +1,180 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+
+
+import unittest
+
+import numpy as np
+from sklearn import svm
+from sklearn.metrics import classification_report
+from sklearn.model_selection import train_test_split
+
+from systemds.scuro.drsearch.operator_registry import Registry
+from systemds.scuro.models.model import Model
+from systemds.scuro.drsearch.task import Task
+from systemds.scuro.drsearch.unimodal_optimizer import UnimodalOptimizer
+
+from systemds.scuro.representations.spectrogram import Spectrogram
+from systemds.scuro.representations.covarep_audio_features import (
+    ZeroCrossing,
+    Spectral,
+    Pitch,
+)
+from systemds.scuro.representations.word2vec import W2V
+from systemds.scuro.representations.bow import BoW
+from systemds.scuro.modality.unimodal_modality import UnimodalModality
+from systemds.scuro.representations.resnet import ResNet
+from tests.scuro.data_generator import ModalityRandomDataGenerator, TestDataLoader
+
+from systemds.scuro.modality.type import ModalityType
+from systemds.scuro.drsearch.hyperparameter_tuner import HyperparameterTuner
+
+
+class TestSVM(Model):
+    def __init__(self):
+        super().__init__("TestSVM")
+
+    def fit(self, X, y, X_test, y_test):
+        if X.ndim > 2:
+            X = X.reshape(X.shape[0], -1)
+        self.clf = svm.SVC(C=1, gamma="scale", kernel="rbf", verbose=False)
+        self.clf = self.clf.fit(X, np.array(y))
+        y_pred = self.clf.predict(X)
+
+        return classification_report(
+            y, y_pred, output_dict=True, digits=3, zero_division=1
+        )["accuracy"]
+
+    def test(self, test_X: np.ndarray, test_y: np.ndarray):
+        if test_X.ndim > 2:
+            test_X = test_X.reshape(test_X.shape[0], -1)
+        y_pred = self.clf.predict(np.array(test_X))  # noqa
+
+        return classification_report(
+            np.array(test_y), y_pred, output_dict=True, digits=3, zero_division=1
+        )["accuracy"]
+
+
+from unittest.mock import patch
+
+
+class TestHPTuner(unittest.TestCase):
+    data_generator = None
+    num_instances = 0
+
+    @classmethod
+    def setUpClass(cls):
+        cls.num_instances = 10
+        cls.mods = [ModalityType.VIDEO, ModalityType.AUDIO, ModalityType.TEXT]
+        cls.labels = ModalityRandomDataGenerator().create_balanced_labels(
+            num_instances=cls.num_instances
+        )
+        cls.indices = np.array(range(cls.num_instances))
+
+        split = train_test_split(
+            cls.indices,
+            cls.labels,
+            test_size=0.2,
+            random_state=42,
+        )
+        cls.train_indizes, cls.val_indizes = [int(i) for i in split[0]], [
+            int(i) for i in split[1]
+        ]
+
+        cls.tasks = [
+            Task(
+                "UnimodalRepresentationTask1",
+                TestSVM(),
+                cls.labels,
+                cls.train_indizes,
+                cls.val_indizes,
+            )
+        ]
+
+    def test_hp_tuner_for_audio_modality(self):
+        audio_data, audio_md = ModalityRandomDataGenerator().create_audio_data(
+            self.num_instances, 3000
+        )
+        audio = UnimodalModality(
+            TestDataLoader(
+                self.indices, None, ModalityType.AUDIO, audio_data, np.float32, audio_md
+            )
+        )
+
+        self.run_hp_for_modality(audio)
+
+    def test_hp_tuner_for_text_modality(self):
+        text_data, text_md = ModalityRandomDataGenerator().create_text_data(
+            self.num_instances
+        )
+        text = UnimodalModality(
+            TestDataLoader(
+                self.indices, None, ModalityType.TEXT, text_data, str, text_md
+            )
+        )
+        self.run_hp_for_modality(text)
+
+    def run_hp_for_modality(self, modality):
+        with patch.object(
+            Registry,
+            "_representations",
+            {
+                ModalityType.TEXT: [W2V, BoW],
+                ModalityType.AUDIO: [Spectrogram, ZeroCrossing, Spectral, Pitch],
+                ModalityType.TIMESERIES: [ResNet],
+                ModalityType.VIDEO: [ResNet],
+                ModalityType.EMBEDDING: [],
+            },
+        ):
+            registry = Registry()
+
+            unimodal_optimizer = UnimodalOptimizer([modality], self.tasks, False)
+            unimodal_optimizer.optimize()
+
+            hp = HyperparameterTuner(
+                [modality], self.tasks, unimodal_optimizer.operator_performance
+            )
+            hp.tune_unimodal_representations()
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/src/main/python/tests/scuro/test_unimodal_optimizer.py b/src/main/python/tests/scuro/test_unimodal_optimizer.py
index a4952d29f94..d8fd98e74a2 100644
--- a/src/main/python/tests/scuro/test_unimodal_optimizer.py
+++ b/src/main/python/tests/scuro/test_unimodal_optimizer.py
@@ -39,6 +39,7 @@
     Pitch,
 )
 from systemds.scuro.representations.word2vec import W2V
+from systemds.scuro.representations.bow import BoW
 from systemds.scuro.modality.unimodal_modality import UnimodalModality
 from systemds.scuro.representations.resnet import ResNet
 from tests.scuro.data_generator import ModalityRandomDataGenerator, TestDataLoader
@@ -178,7 +179,7 @@ def optimize_unimodal_representation_for_modality(self, modality):
             Registry,
             "_representations",
             {
-                ModalityType.TEXT: [W2V],
+                ModalityType.TEXT: [W2V, BoW],
                 ModalityType.AUDIO: [Spectrogram, ZeroCrossing, Spectral, Pitch],
                 ModalityType.TIMESERIES: [ResNet],
                 ModalityType.VIDEO: [ResNet],
@@ -201,6 +202,9 @@ def optimize_unimodal_representation_for_modality(self, modality):
             assert len(result) == 1
             assert len(cached) == 1
 
+    # Todo: Add a test with all representations at once
+    # Todo: Add test with only one model
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/src/main/python/tests/scuro/test_window_operations.py b/src/main/python/tests/scuro/test_window_operations.py
index 9aab25a8148..d98e9ff4f3b 100644
--- a/src/main/python/tests/scuro/test_window_operations.py
+++ b/src/main/python/tests/scuro/test_window_operations.py
@@ -53,7 +53,7 @@ def test_static_window(self):
                 md,
             )
         )
-        aggregated_window = modality.context(StaticWindow(num_windows))
+        aggregated_window = modality.context(StaticWindow(num_windows=num_windows))
 
         for i in range(0, self.num_instances):
             assert len(aggregated_window.data[i]) == num_windows
@@ -71,7 +71,7 @@ def test_dynamic_window(self):
                 md,
             )
         )
-        aggregated_window = modality.context(DynamicWindow(num_windows))
+        aggregated_window = modality.context(DynamicWindow(num_windows=num_windows))
 
         for i in range(0, self.num_instances):
             assert len(aggregated_window.data[i]) == num_windows

From 0e5764b9799f4f307dec882b7c5b6f7e074f498e Mon Sep 17 00:00:00 2001
From: Christina Dionysio <dionysio@tu-berlin.de>
Date: Thu, 18 Sep 2025 10:09:03 +0200
Subject: [PATCH 08/22] add dag structure to multimodal optimizer

---
 .../scuro/drsearch/multimodal_optimizer.py    | 1006 +++++++++++------
 1 file changed, 675 insertions(+), 331 deletions(-)

diff --git a/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py b/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py
index ac4365ed5c6..62927ad174c 100644
--- a/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py
+++ b/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py
@@ -1,309 +1,531 @@
-# -------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-# -------------------------------------------------------------
 import itertools
+import time
+from dataclasses import dataclass, field
+from typing import List, Dict, Any, Optional, Tuple, Union, Generator
+from enum import Enum
+import random
+import copy
+import numpy as np
+from concurrent.futures import ProcessPoolExecutor, as_completed
+import math
+import heapq
+from collections import defaultdict
 
 from systemds.scuro.representations.aggregated_representation import (
     AggregatedRepresentation,
 )
-
 from systemds.scuro.representations.aggregate import Aggregation
-
 from systemds.scuro.drsearch.operator_registry import Registry
-
 from systemds.scuro.utils.schema_helpers import get_shape
-import dataclasses
+from systemds.scuro.modality.transformed import TransformedModality
+from systemds.scuro.modality.type import ModalityType
 
 
-class MultimodalOptimizer:
-    def __init__(
-        self, modalities, unimodal_optimization_results, tasks, k=2, debug=True
-    ):
-        self.k_best_cache = None
-        self.k_best_modalities = None
-        self.modalities = modalities
-        self.unimodal_optimization_results = unimodal_optimization_results
-        self.tasks = tasks
-        self.k = k
-        self.extract_k_best_modalities_per_task()
-        self.debug = debug
+class SearchStrategy(Enum):
+    RANDOM = "random"
+    EXHAUSTIVE = "exhaustive"
 
-        self.operator_registry = Registry()
-        self.optimization_results = MultimodalResults(
-            modalities, tasks, debug, self.k_best_modalities
+
+@dataclass
+class FusionNode:
+    node_id: str
+    inputs: List[str]
+    operation: str
+    parameters: Dict[str, Any] = field(default_factory=dict)
+
+
+@dataclass
+class EncoderChoice:
+    modality_id: str
+    modality_instance_id: str
+    encoder_names: str
+    encoder_params: Dict[str, Any] = field(default_factory=dict)
+
+
+@dataclass
+class FusionArchitecture:
+    encoder_choices: List[str]
+    fusion_nodes: List[FusionNode]
+    root_node_id: str
+    used_modalities: List[str] = field(default_factory=list)
+
+    def get_leaf_nodes(self) -> List[str]:
+        return [
+            f"leaf_{choice.modality_id}_{choice.modality_instance_id}"
+            for choice in self.encoder_choices
+        ]
+
+    def validate(self) -> bool:
+        node_ids = {node.node_id for node in self.fusion_nodes}
+        leaf_ids = set(self.get_leaf_nodes())
+        all_ids = node_ids | leaf_ids
+
+        if self.root_node_id not in all_ids:
+            return False
+
+        for node in self.fusion_nodes:
+            for input_id in node.inputs:
+                if input_id not in all_ids:
+                    return False
+        return True
+
+    def __eq__(self, other):
+        if not isinstance(other, FusionArchitecture):
+            return False
+        return (
+            self.encoder_choices == other.encoder_choices
+            and self.fusion_nodes == other.fusion_nodes
+            and self.root_node_id == other.root_node_id
         )
-        self.cache = {}
 
-    def optimize(self):
-        for task in self.tasks:
-            self.optimize_intermodal_representations(task)
+    def __hash__(self):
+        encoder_tuple = tuple(
+            (c.modality_id, c.modality_instance_id, c.encoder_name)
+            for c in self.encoder_choices
+        )
+        fusion_tuple = tuple(
+            (f.node_id, tuple(f.inputs), f.operation) for f in self.fusion_nodes
+        )
+        return hash((encoder_tuple, fusion_tuple, self.root_node_id))
 
-    def optimize_intramodal_representations(self, task):
-        for modality in self.modalities:
-            representations = self.k_best_modalities[task.model.name][
-                modality.modality_id
-            ]
-            applied_representations = self.extract_representations(
-                representations, modality, task.model.name
-            )
 
-            for i in range(1, len(applied_representations)):
-                for fusion_method in self.operator_registry.get_fusion_operators():
-                    if fusion_method().needs_alignment and not applied_representations[
-                        i - 1
-                    ].is_aligned(applied_representations[i]):
-                        continue
-                    combined = applied_representations[i - 1].combine(
-                        applied_representations[i], fusion_method()
-                    )
-                    self.evaluate(
-                        task,
-                        combined,
-                        [i - 1, i],
-                        fusion_method,
-                        [
-                            applied_representations[i - 1].modality_id,
-                            applied_representations[i].modality_id,
-                        ],
-                    )
-                    if not fusion_method().commutative:
-                        combined_comm = applied_representations[i].combine(
-                            applied_representations[i - 1], fusion_method()
-                        )
-                        self.evaluate(
-                            task,
-                            combined_comm,
-                            [i, i - 1],
-                            fusion_method,
-                            [
-                                applied_representations[i - 1].modality_id,
-                                applied_representations[i].modality_id,
-                            ],
-                        )
+class DagBuilder:
+    def __init__(self, operator_registry: Registry):
+        self.operator_registry = operator_registry
 
-    # TODO: check if order matters for reused reps - only compute once - check in cache
-    # TODO: parallelize - whenever an item of len 0 comes along give it to a new thread - merge results
-    # TODO: change the algorithm so that one representation is used until there is no more representations to add - saves a lot of memory
-    def optimize_intermodal_representations(self, task):
-        modality_combos = []
-        n = len(self.k_best_cache[task.model.name])
-        reuse_cache = {}
-
-        def generate_extensions(current_combo, remaining_indices):
-            # Add current combination if it has at least 2 elements
-            if len(current_combo) >= 2:
-                combo_tuple = tuple(i for i in current_combo)
-                modality_combos.append(combo_tuple)
-
-            for i in remaining_indices:
-                new_combo = current_combo + [i]
-                new_remaining = [j for j in remaining_indices if j > i]
-                generate_extensions(new_combo, new_remaining)
-
-        for start_idx in range(n):
-            remaining = list(range(start_idx + 1, n))
-            generate_extensions([start_idx], remaining)
-        fusion_methods = self.operator_registry.get_fusion_operators()
-        fused_representations = []
-        reuse_fused_representations = False
-        for i, modality_combo in enumerate(modality_combos):
-            # clear reuse cache
-            reuse_cache = self.prune_cache(modality_combos[i:], reuse_cache)
-
-            if i != 0:
-                reuse_fused_representations = self.is_prefix_match(
-                    modality_combos[i - 1], modality_combo
-                )
-            if reuse_fused_representations:
-                mods = [
-                    self.k_best_cache[task.model.name][mod_idx]
-                    for mod_idx in modality_combo[len(modality_combos[i - 1]) :]
-                ]
-                fused_representations = reuse_cache[modality_combos[i - 1]]
+    def build_dag(
+        self, architecture: FusionArchitecture, unimodal_representations: Dict[str, Any]
+    ) -> Any:
+
+        node_outputs = {}
+
+        for choice in architecture.encoder_choices:
+            leaf_id = f"leaf_{choice.modality_id}_{choice.modality_instance_id}"
+            representation_key = f"{choice.modality_id}_{choice.modality_instance_id}"
+
+            if representation_key in unimodal_representations:
+                node_outputs[leaf_id] = unimodal_representations[representation_key]
             else:
-                prefix_idx = self.compute_equal_prefix_index(
-                    modality_combos[i - 1], modality_combo
+                raise ValueError(
+                    f"Missing unimodal representation: {representation_key}"
                 )
-                if prefix_idx > 1:
-                    fused_representations = reuse_cache[
-                        modality_combos[i - 1][:prefix_idx]
-                    ]
-                    reuse_fused_representations = True
-                    mods = [
-                        self.k_best_cache[task.model.name][mod_idx]
-                        for mod_idx in modality_combo[prefix_idx:]
+
+        executed_nodes = set(node_outputs.keys())
+        max_iterations = len(architecture.fusion_nodes) * 2
+        iteration = 0
+
+        while len(executed_nodes) < len(architecture.fusion_nodes) + len(
+            architecture.encoder_choices
+        ):
+            if iteration > max_iterations:
+                raise ValueError("Circular dependency detected in fusion architecture")
+
+            progress_made = False
+
+            for node in architecture.fusion_nodes:
+                if node.node_id in executed_nodes:
+                    continue
+
+                if all(input_id in executed_nodes for input_id in node.inputs):
+                    input_representations = [
+                        node_outputs[input_id] for input_id in node.inputs
                     ]
-            if self.debug:
-                print(
-                    f"New modality combo: {modality_combo} - Reuse: {reuse_fused_representations} - # fused reps: {len(fused_representations)}"
-                )
 
-            all_mods = [
-                self.k_best_cache[task.model.name][mod_idx]
-                for mod_idx in modality_combo
-            ]
-            temp_fused_reps = []
-            for j, fusion_method in enumerate(fusion_methods):
-                # Evaluate all mods
-                fused_rep = all_mods[0].combine(all_mods[1:], fusion_method())
-                temp_fused_reps.append(fused_rep)
-                self.evaluate(
-                    task,
-                    fused_rep,
-                    [
-                        self.k_best_modalities[task.model.name][k].representations
-                        for k in modality_combo
-                    ],
-                    fusion_method,
-                    modality_combo,
-                )
-                if reuse_fused_representations:
-                    for fused_representation in fused_representations:
-                        fused_rep = fused_representation.combine(mods, fusion_method())
-                        temp_fused_reps.append(fused_rep)
-                        self.evaluate(
-                            task,
-                            fused_rep,
-                            [
-                                self.k_best_modalities[task.model.name][
-                                    k
-                                ].representations
-                                for k in modality_combo
-                            ],
-                            fusion_method,
-                            modality_combo,
+                    fusion_ops = self.operator_registry.get_fusion_operators()
+                    fusion_op = None
+
+                    for op_class in fusion_ops:
+                        if op_class.__name__ == node.operation:
+                            fusion_op = op_class()
+                            break
+
+                    if fusion_op is None:
+                        raise ValueError(f"Unknown fusion operation: {node.operation}")
+
+                    if len(input_representations) == 1:
+                        fused = input_representations[0]
+                    else:
+                        fused = input_representations[0].combine(
+                            input_representations[1:], fusion_op
                         )
 
-            if (
-                len(modality_combo) < len(self.k_best_cache[task.model.name])
-                and i + 1 < len(modality_combos)
-                and self.is_prefix_match(modality_combos[i], modality_combos[i + 1])
+                    node_outputs[node.node_id] = fused
+                    executed_nodes.add(node.node_id)
+                    progress_made = True
+
+            if not progress_made:
+                break
+
+            iteration += 1
+
+        return (
+            node_outputs[architecture.root_node_id]
+            if architecture.root_node_id in node_outputs.keys()
+            else None
+        )
+
+
+class SubsetModalityGenerator:
+    def __init__(
+        self,
+        modality_encoder_choices: List[str],
+        fusion_primitives: List[str],
+        min_modalities: int = 1,
+        max_modalities: int = None,
+        max_depth: int = 4,
+    ):
+        self.modality_encoder_choices = modality_encoder_choices
+        self.fusion_primitives = fusion_primitives
+        self.min_modalities = max(1, min_modalities)
+        self.max_modalities = max_modalities or len(self.modality_encoder_choices)
+        self.max_depth = max_depth
+
+    def generate_modality_subsets(self) -> Generator[List[str], None, None]:
+        for r in range(
+            self.min_modalities,
+            min(self.max_modalities + 1, len(self.modality_encoder_choices) + 1),
+        ):
+            for modality_subset in itertools.permutations(
+                self.modality_encoder_choices, r
             ):
-                reuse_cache[modality_combo] = temp_fused_reps
-            reuse_fused_representations = False
+                yield list(modality_subset)
+
+    def generate_encoder_combinations_for_subset(
+        self, modality_subset: List[str]
+    ) -> Generator[List[EncoderChoice], None, None]:
+        modality_encoder_combos = []
+
+        for modality_id in modality_subset:
+            encoder_options = self.modality_encoder_choices[modality_id]
+            modality_combos = []
+
+            if len(encoder_options) > 1:
+                for r in range(1, len(encoder_options) + 1):
+                    for encoder_subset in itertools.combinations(encoder_options, r):
+                        encoder_choices = []
+                        for i, encoder_name in enumerate(encoder_subset):
+                            encoder_choices.append(
+                                EncoderChoice(
+                                    modality_id=modality_id,
+                                    modality_instance_id=str(i),
+                                    encoder_names=encoder_name,
+                                )
+                            )
+                        modality_combos.append(encoder_choices)
+            else:
+                for encoder_name in encoder_options:
+                    encoder_choices = [
+                        EncoderChoice(
+                            modality_id=modality_id,
+                            modality_instance_id="0",
+                            encoder_names=encoder_name,
+                        )
+                    ]
+                    modality_combos.append(encoder_choices)
 
-    def prune_cache(self, sequences, cache):
-        seqs_as_tuples = [tuple(seq) for seq in sequences]
+            modality_encoder_combos.append(modality_combos)
 
-        def still_used(key):
-            return any(self.is_prefix_match(key, seq) for seq in seqs_as_tuples)
+        for combo in itertools.product(*modality_encoder_combos):
+            all_encoder_choices = []
+            for modality_choices in combo:
+                all_encoder_choices.extend(modality_choices)
+            yield all_encoder_choices
 
-        cache = {key: value for key, value in cache.items() if still_used(key)}
-        return cache
 
-    def is_prefix_match(self, seq1, seq2):
-        if len(seq1) > len(seq2):
-            return False
+class ExhaustiveFusionArchitectureGenerator(SubsetModalityGenerator):
 
-        # Check if seq1 matches the beginning of seq2
-        return seq2[: len(seq1)] == seq1
+    def generate_all_architectures(self) -> Generator[FusionArchitecture, None, None]:
+        architecture_count = 0
 
-    def compute_equal_prefix_index(self, seq1, seq2):
-        max_len = min(len(seq1), len(seq2))
-        i = 0
-        while i < max_len and seq1[i] == seq2[i]:
-            i += 1
+        for modality_subset in self.generate_modality_subsets():
 
-        return i
+            leaf_nodes = [
+                f"leaf_{choice.modality_id}_{choice.modality_instance_id}"
+                for choice in modality_subset
+            ]
 
-    def extract_representations(self, representations, modality, task_name):
-        applied_representations = []
-        for i in range(0, len(representations)):
-            cache_key = (
-                tuple(representations[i].representations),
-                representations[i].task_time,
-                representations[i].representation_time,
-            )
-            if (
-                cache_key
-                in self.unimodal_optimization_results.cache[modality.modality_id][
-                    task_name
+            for fusion_nodes, root_node_id in self._generate_all_dags(leaf_nodes):
+                architecture = FusionArchitecture(
+                    encoder_choices=modality_subset,
+                    fusion_nodes=fusion_nodes,
+                    root_node_id=root_node_id,
+                    used_modalities=modality_subset,
+                )
+
+                if architecture.validate():
+                    architecture_count += 1
+                    yield architecture
+
+                    if architecture_count > 50000:
+                        print(
+                            f"Exhaustive search hit limit of {architecture_count} architectures"
+                        )
+                        return
+
+    def _generate_all_dags(
+        self, leaf_nodes: List[str]
+    ) -> Generator[Tuple[List[FusionNode], str], None, None]:
+        if len(leaf_nodes) == 1:
+            yield [], leaf_nodes[0]
+            return
+
+        for fusion_nodes, root in self._generate_inter_modal_fusions(leaf_nodes):
+            yield fusion_nodes, root
+
+    def _generate_intra_modal_fusions(
+        self, leaf_nodes: List[str]
+    ) -> Generator[Dict, None, None]:
+        modality_groups = defaultdict(list)
+        for leaf in leaf_nodes:
+            parts = leaf.split("_")
+            modality_id = parts[1]
+            modality_groups[modality_id].append(leaf)
+
+        modality_fusion_options = []
+
+        for modality_id, nodes in modality_groups.items():
+            options = []
+
+            if len(nodes) == 1:
+                options.append({"remaining_nodes": nodes, "fusion_nodes": []})
+            else:
+                options.append({"remaining_nodes": nodes, "fusion_nodes": []})
+
+                node_counter = 0
+                for fusion_op in self.fusion_primitives:
+                    fused_node_id = f"intra_{modality_id}_{node_counter}"
+                    fusion_node = FusionNode(fused_node_id, nodes, fusion_op)
+                    options.append(
+                        {
+                            "remaining_nodes": [fused_node_id],
+                            "fusion_nodes": [fusion_node],
+                        }
+                    )
+                    node_counter += 1
+
+            modality_fusion_options.append(options)
+
+        for combo in itertools.product(*modality_fusion_options):
+            all_remaining_nodes = []
+            all_fusion_nodes = []
+            for option in combo:
+                all_remaining_nodes.extend(option["remaining_nodes"])
+                all_fusion_nodes.extend(option["fusion_nodes"])
+
+            yield {
+                "remaining_nodes": all_remaining_nodes,
+                "fusion_nodes": all_fusion_nodes,
+            }
+
+    def _generate_inter_modal_fusions(
+        self, nodes: List[str]
+    ) -> Generator[Tuple[List[FusionNode], str], None, None]:
+        if len(nodes) == 1:
+            yield [], nodes[0]
+            return
+
+        if len(nodes) == 2:
+            for fusion_op in self.fusion_primitives:
+                fusion_node = FusionNode("fusion_0", nodes, fusion_op)
+                yield [fusion_node], "fusion_0"
+            return
+
+        for combination_sequence in self._generate_combination_sequences(nodes):
+            for fusion_assignment in self._assign_fusion_operations(
+                combination_sequence
+            ):
+                yield fusion_assignment
+
+    def _generate_combination_sequences(
+        self, nodes: List[str]
+    ) -> Generator[List[Tuple], None, None]:
+        if len(nodes) <= 2:
+            if len(nodes) == 2:
+                yield [(nodes[0], nodes[1], "result_0")]
+            return
+
+        for i in range(len(nodes)):
+            for j in range(i + 1, len(nodes)):
+                first_pair = (nodes[i], nodes[j], "intermediate_0")
+                remaining = [n for k, n in enumerate(nodes) if k != i and k != j] + [
+                    "intermediate_0"
                 ]
+
+                for rest_sequence in self._generate_combination_sequences(remaining):
+                    yield [first_pair] + rest_sequence
+
+    def _assign_fusion_operations(
+        self, combination_sequence: List[Tuple]
+    ) -> Generator[Tuple[List[FusionNode], str], None, None]:
+        if not combination_sequence:
+            return
+
+        num_operations = len(combination_sequence)
+
+        for fusion_ops in itertools.product(
+            self.fusion_primitives, repeat=num_operations
+        ):
+            fusion_nodes = []
+
+            for i, ((input1, input2, output), fusion_op) in enumerate(
+                zip(combination_sequence, fusion_ops)
             ):
-                applied_representations.append(
-                    self.unimodal_optimization_results.cache[modality.modality_id][
-                        task_name
-                    ][cache_key]
+                fusion_node = FusionNode(output, [input1, input2], fusion_op)
+                fusion_nodes.append(fusion_node)
+
+            root_id = combination_sequence[-1][2]
+            yield fusion_nodes, root_id
+
+
+class RandomFusionArchitectureGenerator(SubsetModalityGenerator):
+
+    def generate_random_architecture(self, max_depth: int = 4) -> FusionArchitecture:
+        num_modalities = random.randint(
+            self.min_modalities,
+            min(self.max_modalities, len(self.modality_encoder_choices)),
+        )
+        modality_subset = random.sample(self.modality_encoder_choices, num_modalities)
+
+        encoder_choices = []
+
+        for modality_id in modality_subset:
+            encoder_options = self.modality_encoder_choices[modality_id]
+
+            if self.allow_intra_modal and len(encoder_options) > 1:
+                num_encoders = random.randint(
+                    1, min(self.max_intra_modal_per_modality, len(encoder_options))
                 )
-            else:
-                applied_representation = modality
-                for j, rep in enumerate(representations[i].representations):
-                    representation, is_context = (
-                        self.operator_registry.get_representation_by_name(
-                            rep, modality.modality_type
+                chosen_encoders = random.sample(encoder_options, num_encoders)
+
+                for i, encoder_name in enumerate(chosen_encoders):
+                    encoder_choices.append(
+                        EncoderChoice(
+                            modality_id=modality_id,
+                            modality_instance_id=str(i),
+                            encoder_name=encoder_name,
                         )
                     )
-                    if representation is None:
-                        if rep == AggregatedRepresentation.__name__:
-                            representation = AggregatedRepresentation(Aggregation())
-                    else:
-                        representation = representation()
-                    representation.set_parameters(representations[i].params[j])
-                    if is_context:
-                        applied_representation = applied_representation.context(
-                            representation
-                        )
-                    else:
-                        applied_representation = (
-                            applied_representation.apply_representation(representation)
-                        )
-                self.k_best_cache[task_name].append(applied_representation)
-                applied_representations.append(applied_representation)
-        return applied_representations
-
-    def evaluate(self, task, modality, representations, fusion, modality_combo):
-        if task.expected_dim == 1 and get_shape(modality.metadata) > 1:
-            for aggregation in Aggregation().get_aggregation_functions():
-                agg_operator = AggregatedRepresentation(Aggregation(aggregation, False))
-                agg_modality = agg_operator.transform(modality)
-
-                scores = task.run(agg_modality.data)
-                reps = representations.copy()
-                reps.append(agg_operator)
-
-                self.optimization_results.add_result(
-                    scores,
-                    reps,
-                    modality.transformation,
-                    modality_combo,
-                    task.model.name,
+            else:
+                chosen_encoder = random.choice(encoder_options)
+                encoder_choices.append(
+                    EncoderChoice(
+                        modality_id=modality_id,
+                        modality_instance_id="0",
+                        encoder_names=chosen_encoder,
+                    )
                 )
-        else:
-            scores = task.run(modality.data)
-            self.optimization_results.add_result(
-                scores,
-                representations,
-                modality.transformation,
-                modality_combo,
-                task.model.name,
+
+        fusion_nodes = []
+        available_nodes = [
+            f"leaf_{choice.modality_id}_{choice.modality_instance_id}"
+            for choice in encoder_choices
+        ]
+        node_counter = 0
+
+        if self.allow_intra_modal:
+            modality_groups = {}
+            for choice in encoder_choices:
+                if choice.modality_id not in modality_groups:
+                    modality_groups[choice.modality_id] = []
+                modality_groups[choice.modality_id].append(
+                    f"leaf_{choice.modality_id}_{choice.modality_instance_id}"
+                )
+
+            for modality_id, nodes in modality_groups.items():
+                if len(nodes) > 1:
+                    fusion_op = random.choice(self.fusion_primitives)
+                    intra_modal_node_id = f"intra_{modality_id}_{node_counter}"
+
+                    fusion_node = FusionNode(intra_modal_node_id, nodes, fusion_op)
+                    fusion_nodes.append(fusion_node)
+
+                    for node in nodes:
+                        available_nodes.remove(node)
+                    available_nodes.append(intra_modal_node_id)
+                    node_counter += 1
+
+        while len(available_nodes) > 1 and node_counter < max_depth:
+            num_inputs = min(
+                random.randint(2, min(4, len(available_nodes))), len(available_nodes)
             )
+            selected_inputs = random.sample(available_nodes, num_inputs)
+
+            fusion_op = random.choice(self.fusion_primitives)
+
+            new_node_id = f"fusion_{node_counter}"
+            fusion_node = FusionNode(new_node_id, selected_inputs, fusion_op)
+            fusion_nodes.append(fusion_node)
+
+            for node in selected_inputs:
+                available_nodes.remove(node)
+            available_nodes.append(new_node_id)
+            node_counter += 1
+
+        root_node_id = (
+            available_nodes[0] if available_nodes else f"fusion_{node_counter-1}"
+        )
+
+        return FusionArchitecture(
+            encoder_choices=encoder_choices,
+            fusion_nodes=fusion_nodes,
+            root_node_id=root_node_id,
+            used_modalities=modality_subset,
+        )
 
-    def add_to_cache(self, result_idx, combined_modality):
-        self.cache[result_idx] = combined_modality
 
-    def extract_k_best_modalities_per_task(self):
-        self.k_best_modalities = {}
-        self.k_best_cache = {}
+class MultimodalOptimizer:
+    def __init__(
+        self,
+        modalities: List[Any],
+        unimodal_optimization_results: Any,
+        tasks: List[Any],
+        k: int = 2,
+        debug: bool = True,
+        min_modalities: int = 1,
+        max_modalities: int = None,
+    ):
+
+        self.modalities = modalities
+        self.unimodal_optimization_results = unimodal_optimization_results
+        self.tasks = tasks
+        self.k = k
+        self.debug = debug
+
+        self.min_modalities = min_modalities
+        self.max_modalities = max_modalities or len(modalities)
+
+        self.operator_registry = Registry()
+        self.fusion_primitives = [
+            op.__name__ for op in self.operator_registry.get_fusion_operators()
+        ]
+
+        self.k_best_representations = self._extract_k_best_representations()
+        self.modality_encoder_choices = self._create_encoder_choices()
+
+        self.architecture_generator = RandomFusionArchitectureGenerator(
+            self.modality_encoder_choices,
+            self.fusion_primitives,
+            min_modalities=min_modalities,
+            max_modalities=self.max_modalities,
+        )
+
+        self.exhaustive_generator = ExhaustiveFusionArchitectureGenerator(
+            self.modality_encoder_choices,
+            self.fusion_primitives,
+            min_modalities=min_modalities,
+            max_modalities=self.max_modalities,
+            max_depth=3,
+        )
+
+        self.dag_builder = DagBuilder(self.operator_registry)
+        self.optimization_results = []
+
+    def _extract_k_best_representations(self) -> Dict[str, Dict[str, List[Any]]]:
+        k_best = {}
+
         for task in self.tasks:
-            self.k_best_modalities[task.model.name] = []
-            self.k_best_cache[task.model.name] = []
+            k_best[task.model.name] = {}
+
             for modality in self.modalities:
                 k_best_results, cached_data = (
                     self.unimodal_optimization_results.get_k_best_results(
@@ -311,90 +533,212 @@ def extract_k_best_modalities_per_task(self):
                     )
                 )
 
-                self.k_best_modalities[task.model.name].extend(k_best_results)
-                self.k_best_cache[task.model.name].extend(cached_data)
+                k_best[task.model.name][modality.modality_id] = {
+                    "results": k_best_results,
+                    "representations": cached_data,
+                }
 
+        return k_best
 
-class MultimodalResults:
-    def __init__(self, modalities, tasks, debug, k_best_modalities):
-        self.modality_ids = [modality.modality_id for modality in modalities]
-        self.task_names = [task.model.name for task in tasks]
-        self.results = {}
-        self.debug = debug
-        self.k_best_modalities = k_best_modalities
+    def _create_encoder_choices(self) -> List[EncoderChoice]:
+        choices = []
 
-        for task in tasks:
-            self.results[task.model.name] = {}
+        first_task = self.tasks[0]
+        task_name = first_task.model.name
 
-    def add_result(
-        self, scores, best_representation_idx, fusion_methods, modality_combo, task_name
-    ):
+        for modality in self.modalities:
+            modality_id = modality.modality_id
+            k_best_data = self.k_best_representations[task_name][modality_id]
+            for i, result in enumerate(k_best_data["results"]):
+                r = []
+                for n in result.dag.nodes:
+                    if n.operation is not None:
+                        r.append(n.operation.__name__)
+                choices.append(
+                    EncoderChoice(
+                        modality_id=modality_id,
+                        modality_instance_id=str(i),
+                        encoder_names="".join(r),
+                    )
+                )
+
+        return choices
+
+    def _evaluate_architecture(
+        self, architecture: FusionArchitecture, task: Any
+    ) -> "OptimizationResult":
+
+        start_time = time.time()
+        task_name = task.model.name
+
+        unimodal_representations = {}
+
+        for choice in architecture.encoder_choices:
+            modality_id = choice.modality_id
+            encoder_names = choice.encoder_names
+
+            task_data = self.k_best_representations[task_name][modality_id]
+
+            selected_repr = None
+            for i, result in enumerate(task_data["results"]):
+                if hasattr(result, "representations"):
+                    if encoder_names == f"{result.representations}":
+                        selected_repr = task_data["representations"][i]
+                        break
+
+            representation_key = f"{modality_id}_{choice.modality_instance_id}"
+            unimodal_representations[representation_key] = selected_repr
+
+        fused_representation = self.dag_builder.build_dag(
+            architecture, unimodal_representations
+        )
+
+        if fused_representation is None:
+            return None
+
+        final_representation = fused_representation
+        if task.expected_dim == 1 and get_shape(fused_representation.metadata) > 1:
+            agg_operator = AggregatedRepresentation(Aggregation())
+            final_representation = agg_operator.transform(fused_representation)
 
-        entry = MultimodalResultEntry(
-            representations=best_representation_idx,
+        eval_start = time.time()
+        scores = task.run(final_representation.data)
+        eval_time = time.time() - eval_start
+
+        total_time = time.time() - start_time
+
+        return OptimizationResult(
+            architecture=architecture,
             train_score=scores[0],
             val_score=scores[1],
-            fusion_methods=[
-                fusion_method.__class__.__name__ for fusion_method in fusion_methods
-            ],
-            modality_combo=modality_combo,
-            task=task_name,
+            runtime=total_time,
+            task_name=task_name,
+            evaluation_time=eval_time,
         )
 
-        modality_id_strings = "_".join(list(map(str, modality_combo)))
-        if not modality_id_strings in self.results[task_name]:
-            self.results[task_name][modality_id_strings] = []
+    def _optimize_task_exhaustive(
+        self, task: Any, max_architectures: int = None
+    ) -> List["OptimizationResult"]:
 
-        self.results[task_name][modality_id_strings].append(entry)
+        task_results = []
+        evaluated_count = 0
 
         if self.debug:
-            print(f"{modality_id_strings}_{task_name}: {entry}")
-
-    def print_results(self):
-        for task_name in self.task_names:
-            for modality in self.results[task_name].keys():
-                for entry in self.results[task_name][modality]:
-                    reps = []
-                    for i, mod_idx in enumerate(entry.modality_combo):
-                        reps.append(self.k_best_modalities[task_name][mod_idx])
-
-                    print(
-                        f"{modality}_{task_name}: "
-                        f"Validation score: {entry.val_score} - Training score: {entry.train_score}"
-                    )
-                    for i, rep in enumerate(reps):
-                        print(
-                            f"    Representation: {entry.modality_combo[i]} - {rep.representations}"
-                        )
+            print(f"  Starting exhaustive search for task: {task.model.name}")
+            if max_architectures:
+                print(f"  Limiting to first {max_architectures} architectures")
 
-                    print(f"    Fusion: {entry.fusion_methods[0]} ")
+        for architecture in self.exhaustive_generator.generate_all_architectures():
+            if max_architectures and evaluated_count >= max_architectures:
+                break
 
-    def store_results(self, file_name=None):
-        for task_name in self.task_names:
-            for modality in self.results[task_name].keys():
-                for entry in self.results[task_name][modality]:
-                    reps = []
-                    for i, mod_idx in enumerate(entry.modality_combo):
-                        reps.append(self.k_best_modalities[task_name][mod_idx])
-                    entry.representations = reps
+            if self.debug and evaluated_count % 50 == 0:
+                print(f"  Evaluated {evaluated_count} architectures...")
 
-        import pickle
+            try:
+                result = self._evaluate_architecture(architecture, task)
+                if result is not None:
+                    task_results.append(result)
+            except Exception as e:
+                if self.debug:
+                    print(f"  Error evaluating architecture {evaluated_count}: {e}")
+                continue
 
-        if file_name is None:
-            import time
+            evaluated_count += 1
 
-            timestr = time.strftime("%Y%m%d-%H%M%S")
-            file_name = "multimodal_optimizer" + timestr + ".pkl"
+        if self.debug:
+            print(
+                f"  Exhaustive search completed: {evaluated_count} architectures evaluated"
+            )
+            modality_subset_counts = {}
+            for result in task_results:
+                num_modalities = len(result.architecture.used_modalities)
+                modality_subset_counts[num_modalities] = (
+                    modality_subset_counts.get(num_modalities, 0) + 1
+                )
+            print(f"  Modality subset distribution: {modality_subset_counts}")
 
-        with open(file_name, "wb") as f:
-            pickle.dump(self.results, f)
+        return task_results
 
+    def optimize(
+        self,
+        search_strategy: SearchStrategy = SearchStrategy.RANDOM,
+        search_budget: int = 50,
+        **search_params,
+    ) -> List["OptimizationResult"]:
+        all_results = []
 
-@dataclasses.dataclass
-class MultimodalResultEntry:
-    val_score: float
-    modality_combo: list
-    representations: list
-    fusion_methods: list
+        for task in self.tasks:
+            if self.debug:
+                print(f"Optimizing fusion architectures for task: {task.model.name}")
+                print(
+                    f"Exploring modality subsets: {self.min_modalities} to {self.max_modalities} modalities"
+                )
+
+            task_results = self._optimize_task(
+                task, search_strategy, search_budget, **search_params
+            )
+            all_results.extend(task_results)
+
+        self.optimization_results = all_results
+
+        if self.debug:
+            print(
+                f"\nOptimization completed: {len(all_results)} total architectures evaluated"
+            )
+            modality_usage = {}
+            for result in all_results:
+                for modality in result.architecture.used_modalities:
+                    modality_usage[modality.modality_id] = (
+                        modality_usage.get(modality.modality_id, 0) + 1
+                    )
+            print(f"Modality usage frequency: {modality_usage}")
+
+        return all_results
+
+    def _optimize_task(
+        self,
+        task: Any,
+        search_strategy: SearchStrategy,
+        search_budget: int,
+        **search_params,
+    ) -> List["OptimizationResult"]:
+
+        if search_strategy == SearchStrategy.EXHAUSTIVE:
+            max_architectures = search_params.get("max_architectures", search_budget)
+            return self._optimize_task_exhaustive(task, max_architectures)
+
+        elif search_strategy == SearchStrategy.RANDOM:
+            task_results = []
+            candidates = [
+                self.architecture_generator.generate_random_architecture()
+                for _ in range(search_budget)
+            ]
+
+            for i, architecture in enumerate(candidates):
+                if self.debug and i % 10 == 0:
+                    print(f"  Evaluating architecture {i+1}/{len(candidates)}")
+
+                try:
+                    result = self._evaluate_architecture(architecture, task)
+                    if result is not None:
+                        task_results.append(result)
+                except Exception as e:
+                    if self.debug:
+                        print(f"  Error evaluating architecture {i}: {e}")
+                    continue
+
+            return task_results
+
+        else:
+            raise ValueError(f"Unknown search strategy: {search_strategy}")
+
+
+@dataclass
+class OptimizationResult:
+    architecture: FusionArchitecture
     train_score: float
-    task: str
+    val_score: float
+    runtime: float
+    task_name: str
+    evaluation_time: float = 0.0

From 68c1587f25c518adf825fb95a471f79ffb876422 Mon Sep 17 00:00:00 2001
From: Christina Dionysio <dionysio@tu-berlin.de>
Date: Thu, 18 Sep 2025 11:15:45 +0200
Subject: [PATCH 09/22] refactor

---
 .../scuro/drsearch/multimodal_optimizer.py    | 872 ++++++------------
 1 file changed, 261 insertions(+), 611 deletions(-)

diff --git a/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py b/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py
index 62927ad174c..e52e65ca6e4 100644
--- a/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py
+++ b/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py
@@ -1,16 +1,11 @@
 import itertools
 import time
 from dataclasses import dataclass, field
-from typing import List, Dict, Any, Optional, Tuple, Union, Generator
-from enum import Enum
-import random
+from typing import List, Dict, Any, Generator
 import copy
-import numpy as np
-from concurrent.futures import ProcessPoolExecutor, as_completed
-import math
-import heapq
-from collections import defaultdict
+import traceback
 
+from systemds.scuro import Task
 from systemds.scuro.representations.aggregated_representation import (
     AggregatedRepresentation,
 )
@@ -18,459 +13,155 @@
 from systemds.scuro.drsearch.operator_registry import Registry
 from systemds.scuro.utils.schema_helpers import get_shape
 from systemds.scuro.modality.transformed import TransformedModality
-from systemds.scuro.modality.type import ModalityType
-
-
-class SearchStrategy(Enum):
-    RANDOM = "random"
-    EXHAUSTIVE = "exhaustive"
 
 
 @dataclass
-class FusionNode:
+class MultimodalNode:
     node_id: str
     inputs: List[str]
-    operation: str
+    operation: Any
+    modality_id: str = None
+    representation_index: int = None
     parameters: Dict[str, Any] = field(default_factory=dict)
 
 
 @dataclass
-class EncoderChoice:
-    modality_id: str
-    modality_instance_id: str
-    encoder_names: str
-    encoder_params: Dict[str, Any] = field(default_factory=dict)
+class MultimodalDAG:
+    def __init__(self, nodes: List[MultimodalNode], root_node_id: str):
+        self.root_node_id = root_node_id
+        self.nodes = self.filter_connected_nodes(nodes)
 
+    def filter_connected_nodes(self, nodes):
+        node_map = {node.node_id: node for node in nodes}
 
-@dataclass
-class FusionArchitecture:
-    encoder_choices: List[str]
-    fusion_nodes: List[FusionNode]
-    root_node_id: str
-    used_modalities: List[str] = field(default_factory=list)
-
-    def get_leaf_nodes(self) -> List[str]:
-        return [
-            f"leaf_{choice.modality_id}_{choice.modality_instance_id}"
-            for choice in self.encoder_choices
-        ]
+        if self.root_node_id not in node_map:
+            return []
+
+        visited = set()
+        stack = [self.root_node_id]
+
+        while stack:
+            current_id = stack.pop()
+            if current_id not in visited:
+                visited.add(current_id)
+                current_node = node_map[current_id]
+                for input_id in current_node.inputs:
+                    if input_id in node_map and input_id not in visited:
+                        stack.append(input_id)
+
+        return [node for node in nodes if node.node_id in visited]
+
+    def get_node_by_id(self, node_id: str) -> MultimodalNode:
+        for node in self.nodes:
+            if node.node_id == node_id:
+                return node
+        return None
 
     def validate(self) -> bool:
-        node_ids = {node.node_id for node in self.fusion_nodes}
-        leaf_ids = set(self.get_leaf_nodes())
-        all_ids = node_ids | leaf_ids
+        node_ids = {node.node_id for node in self.nodes}
 
-        if self.root_node_id not in all_ids:
+        if self.root_node_id not in node_ids:
             return False
 
-        for node in self.fusion_nodes:
+        for node in self.nodes:
             for input_id in node.inputs:
-                if input_id not in all_ids:
+                if input_id not in node_ids:
                     return False
-        return True
 
-    def __eq__(self, other):
-        if not isinstance(other, FusionArchitecture):
+        visited = set()
+
+        def has_cycle(node_id: str, path: set) -> bool:
+            if node_id in path:
+                return True
+            if node_id in visited:
+                return False
+            path.add(node_id)
+            visited.add(node_id)
+            node = self.get_node_by_id(node_id)
+            for input_id in node.inputs:
+                if has_cycle(input_id, path.copy()):
+                    return True
             return False
-        return (
-            self.encoder_choices == other.encoder_choices
-            and self.fusion_nodes == other.fusion_nodes
-            and self.root_node_id == other.root_node_id
-        )
 
-    def __hash__(self):
-        encoder_tuple = tuple(
-            (c.modality_id, c.modality_instance_id, c.encoder_name)
-            for c in self.encoder_choices
-        )
-        fusion_tuple = tuple(
-            (f.node_id, tuple(f.inputs), f.operation) for f in self.fusion_nodes
-        )
-        return hash((encoder_tuple, fusion_tuple, self.root_node_id))
+        return not has_cycle(self.root_node_id, set())
 
+    def execute(
+        self, k_best_representations: Dict[str, Dict[str, List[Any]]], task: Task
+    ) -> TransformedModality:
+        cache = {}
 
-class DagBuilder:
-    def __init__(self, operator_registry: Registry):
-        self.operator_registry = operator_registry
+        def execute_node(node_id: str) -> TransformedModality:
+            if node_id in cache:
+                return cache[node_id]
 
-    def build_dag(
-        self, architecture: FusionArchitecture, unimodal_representations: Dict[str, Any]
-    ) -> Any:
+            node = self.get_node_by_id(node_id)
 
-        node_outputs = {}
+            if not node.inputs:
+                representation = k_best_representations[task.model.name][node.modality_id][
+                    "representations"
+                ][node.representation_index]
+                cache[node_id] = representation
+                return representation
 
-        for choice in architecture.encoder_choices:
-            leaf_id = f"leaf_{choice.modality_id}_{choice.modality_instance_id}"
-            representation_key = f"{choice.modality_id}_{choice.modality_instance_id}"
+            input_representations = [execute_node(input_id) for input_id in node.inputs]
 
-            if representation_key in unimodal_representations:
-                node_outputs[leaf_id] = unimodal_representations[representation_key]
+            if len(input_representations) == 1:
+                result = input_representations[0]
             else:
-                raise ValueError(
-                    f"Missing unimodal representation: {representation_key}"
-                )
-
-        executed_nodes = set(node_outputs.keys())
-        max_iterations = len(architecture.fusion_nodes) * 2
-        iteration = 0
-
-        while len(executed_nodes) < len(architecture.fusion_nodes) + len(
-            architecture.encoder_choices
-        ):
-            if iteration > max_iterations:
-                raise ValueError("Circular dependency detected in fusion architecture")
-
-            progress_made = False
-
-            for node in architecture.fusion_nodes:
-                if node.node_id in executed_nodes:
-                    continue
-
-                if all(input_id in executed_nodes for input_id in node.inputs):
-                    input_representations = [
-                        node_outputs[input_id] for input_id in node.inputs
-                    ]
-
-                    fusion_ops = self.operator_registry.get_fusion_operators()
-                    fusion_op = None
-
-                    for op_class in fusion_ops:
-                        if op_class.__name__ == node.operation:
-                            fusion_op = op_class()
-                            break
-
-                    if fusion_op is None:
-                        raise ValueError(f"Unknown fusion operation: {node.operation}")
-
-                    if len(input_representations) == 1:
-                        fused = input_representations[0]
-                    else:
-                        fused = input_representations[0].combine(
-                            input_representations[1:], fusion_op
-                        )
-
-                    node_outputs[node.node_id] = fused
-                    executed_nodes.add(node.node_id)
-                    progress_made = True
-
-            if not progress_made:
-                break
-
-            iteration += 1
-
-        return (
-            node_outputs[architecture.root_node_id]
-            if architecture.root_node_id in node_outputs.keys()
-            else None
-        )
-
-
-class SubsetModalityGenerator:
-    def __init__(
-        self,
-        modality_encoder_choices: List[str],
-        fusion_primitives: List[str],
-        min_modalities: int = 1,
-        max_modalities: int = None,
-        max_depth: int = 4,
-    ):
-        self.modality_encoder_choices = modality_encoder_choices
-        self.fusion_primitives = fusion_primitives
-        self.min_modalities = max(1, min_modalities)
-        self.max_modalities = max_modalities or len(self.modality_encoder_choices)
-        self.max_depth = max_depth
-
-    def generate_modality_subsets(self) -> Generator[List[str], None, None]:
-        for r in range(
-            self.min_modalities,
-            min(self.max_modalities + 1, len(self.modality_encoder_choices) + 1),
-        ):
-            for modality_subset in itertools.permutations(
-                self.modality_encoder_choices, r
-            ):
-                yield list(modality_subset)
-
-    def generate_encoder_combinations_for_subset(
-        self, modality_subset: List[str]
-    ) -> Generator[List[EncoderChoice], None, None]:
-        modality_encoder_combos = []
+                op = node.operation
+                try:
+                    op_instance = op() if callable(op) else op
+                except Exception:
+                    op_instance = op
 
-        for modality_id in modality_subset:
-            encoder_options = self.modality_encoder_choices[modality_id]
-            modality_combos = []
-
-            if len(encoder_options) > 1:
-                for r in range(1, len(encoder_options) + 1):
-                    for encoder_subset in itertools.combinations(encoder_options, r):
-                        encoder_choices = []
-                        for i, encoder_name in enumerate(encoder_subset):
-                            encoder_choices.append(
-                                EncoderChoice(
-                                    modality_id=modality_id,
-                                    modality_instance_id=str(i),
-                                    encoder_names=encoder_name,
-                                )
-                            )
-                        modality_combos.append(encoder_choices)
-            else:
-                for encoder_name in encoder_options:
-                    encoder_choices = [
-                        EncoderChoice(
-                            modality_id=modality_id,
-                            modality_instance_id="0",
-                            encoder_names=encoder_name,
-                        )
-                    ]
-                    modality_combos.append(encoder_choices)
-
-            modality_encoder_combos.append(modality_combos)
-
-        for combo in itertools.product(*modality_encoder_combos):
-            all_encoder_choices = []
-            for modality_choices in combo:
-                all_encoder_choices.extend(modality_choices)
-            yield all_encoder_choices
-
-
-class ExhaustiveFusionArchitectureGenerator(SubsetModalityGenerator):
-
-    def generate_all_architectures(self) -> Generator[FusionArchitecture, None, None]:
-        architecture_count = 0
-
-        for modality_subset in self.generate_modality_subsets():
-
-            leaf_nodes = [
-                f"leaf_{choice.modality_id}_{choice.modality_instance_id}"
-                for choice in modality_subset
-            ]
-
-            for fusion_nodes, root_node_id in self._generate_all_dags(leaf_nodes):
-                architecture = FusionArchitecture(
-                    encoder_choices=modality_subset,
-                    fusion_nodes=fusion_nodes,
-                    root_node_id=root_node_id,
-                    used_modalities=modality_subset,
+                result = input_representations[0].combine(
+                    input_representations[1:], op_instance
                 )
 
-                if architecture.validate():
-                    architecture_count += 1
-                    yield architecture
-
-                    if architecture_count > 50000:
-                        print(
-                            f"Exhaustive search hit limit of {architecture_count} architectures"
-                        )
-                        return
-
-    def _generate_all_dags(
-        self, leaf_nodes: List[str]
-    ) -> Generator[Tuple[List[FusionNode], str], None, None]:
-        if len(leaf_nodes) == 1:
-            yield [], leaf_nodes[0]
-            return
-
-        for fusion_nodes, root in self._generate_inter_modal_fusions(leaf_nodes):
-            yield fusion_nodes, root
-
-    def _generate_intra_modal_fusions(
-        self, leaf_nodes: List[str]
-    ) -> Generator[Dict, None, None]:
-        modality_groups = defaultdict(list)
-        for leaf in leaf_nodes:
-            parts = leaf.split("_")
-            modality_id = parts[1]
-            modality_groups[modality_id].append(leaf)
-
-        modality_fusion_options = []
-
-        for modality_id, nodes in modality_groups.items():
-            options = []
-
-            if len(nodes) == 1:
-                options.append({"remaining_nodes": nodes, "fusion_nodes": []})
-            else:
-                options.append({"remaining_nodes": nodes, "fusion_nodes": []})
-
-                node_counter = 0
-                for fusion_op in self.fusion_primitives:
-                    fused_node_id = f"intra_{modality_id}_{node_counter}"
-                    fusion_node = FusionNode(fused_node_id, nodes, fusion_op)
-                    options.append(
-                        {
-                            "remaining_nodes": [fused_node_id],
-                            "fusion_nodes": [fusion_node],
-                        }
-                    )
-                    node_counter += 1
-
-            modality_fusion_options.append(options)
+            cache[node_id] = result
+            return result
 
-        for combo in itertools.product(*modality_fusion_options):
-            all_remaining_nodes = []
-            all_fusion_nodes = []
-            for option in combo:
-                all_remaining_nodes.extend(option["remaining_nodes"])
-                all_fusion_nodes.extend(option["fusion_nodes"])
+        return execute_node(self.root_node_id)
 
-            yield {
-                "remaining_nodes": all_remaining_nodes,
-                "fusion_nodes": all_fusion_nodes,
-            }
-
-    def _generate_inter_modal_fusions(
-        self, nodes: List[str]
-    ) -> Generator[Tuple[List[FusionNode], str], None, None]:
-        if len(nodes) == 1:
-            yield [], nodes[0]
-            return
-
-        if len(nodes) == 2:
-            for fusion_op in self.fusion_primitives:
-                fusion_node = FusionNode("fusion_0", nodes, fusion_op)
-                yield [fusion_node], "fusion_0"
-            return
-
-        for combination_sequence in self._generate_combination_sequences(nodes):
-            for fusion_assignment in self._assign_fusion_operations(
-                combination_sequence
-            ):
-                yield fusion_assignment
-
-    def _generate_combination_sequences(
-        self, nodes: List[str]
-    ) -> Generator[List[Tuple], None, None]:
-        if len(nodes) <= 2:
-            if len(nodes) == 2:
-                yield [(nodes[0], nodes[1], "result_0")]
-            return
-
-        for i in range(len(nodes)):
-            for j in range(i + 1, len(nodes)):
-                first_pair = (nodes[i], nodes[j], "intermediate_0")
-                remaining = [n for k, n in enumerate(nodes) if k != i and k != j] + [
-                    "intermediate_0"
-                ]
-
-                for rest_sequence in self._generate_combination_sequences(remaining):
-                    yield [first_pair] + rest_sequence
-
-    def _assign_fusion_operations(
-        self, combination_sequence: List[Tuple]
-    ) -> Generator[Tuple[List[FusionNode], str], None, None]:
-        if not combination_sequence:
-            return
-
-        num_operations = len(combination_sequence)
-
-        for fusion_ops in itertools.product(
-            self.fusion_primitives, repeat=num_operations
-        ):
-            fusion_nodes = []
 
-            for i, ((input1, input2, output), fusion_op) in enumerate(
-                zip(combination_sequence, fusion_ops)
-            ):
-                fusion_node = FusionNode(output, [input1, input2], fusion_op)
-                fusion_nodes.append(fusion_node)
+class MultimodalDAGBuilder:
+    def __init__(self):
+        self.nodes = []
+        self.node_counter = 0
 
-            root_id = combination_sequence[-1][2]
-            yield fusion_nodes, root_id
-
-
-class RandomFusionArchitectureGenerator(SubsetModalityGenerator):
-
-    def generate_random_architecture(self, max_depth: int = 4) -> FusionArchitecture:
-        num_modalities = random.randint(
-            self.min_modalities,
-            min(self.max_modalities, len(self.modality_encoder_choices)),
+    def create_leaf_node(self, modality_id: str, representation_index: int) -> str:
+        node_id = f"leaf_{modality_id}_{representation_index}"
+        node = MultimodalNode(
+            node_id=node_id,
+            inputs=[],
+            operation=None,
+            modality_id=modality_id,
+            representation_index=representation_index,
         )
-        modality_subset = random.sample(self.modality_encoder_choices, num_modalities)
-
-        encoder_choices = []
-
-        for modality_id in modality_subset:
-            encoder_options = self.modality_encoder_choices[modality_id]
-
-            if self.allow_intra_modal and len(encoder_options) > 1:
-                num_encoders = random.randint(
-                    1, min(self.max_intra_modal_per_modality, len(encoder_options))
-                )
-                chosen_encoders = random.sample(encoder_options, num_encoders)
-
-                for i, encoder_name in enumerate(chosen_encoders):
-                    encoder_choices.append(
-                        EncoderChoice(
-                            modality_id=modality_id,
-                            modality_instance_id=str(i),
-                            encoder_name=encoder_name,
-                        )
-                    )
-            else:
-                chosen_encoder = random.choice(encoder_options)
-                encoder_choices.append(
-                    EncoderChoice(
-                        modality_id=modality_id,
-                        modality_instance_id="0",
-                        encoder_names=chosen_encoder,
-                    )
-                )
-
-        fusion_nodes = []
-        available_nodes = [
-            f"leaf_{choice.modality_id}_{choice.modality_instance_id}"
-            for choice in encoder_choices
-        ]
-        node_counter = 0
-
-        if self.allow_intra_modal:
-            modality_groups = {}
-            for choice in encoder_choices:
-                if choice.modality_id not in modality_groups:
-                    modality_groups[choice.modality_id] = []
-                modality_groups[choice.modality_id].append(
-                    f"leaf_{choice.modality_id}_{choice.modality_instance_id}"
-                )
-
-            for modality_id, nodes in modality_groups.items():
-                if len(nodes) > 1:
-                    fusion_op = random.choice(self.fusion_primitives)
-                    intra_modal_node_id = f"intra_{modality_id}_{node_counter}"
-
-                    fusion_node = FusionNode(intra_modal_node_id, nodes, fusion_op)
-                    fusion_nodes.append(fusion_node)
-
-                    for node in nodes:
-                        available_nodes.remove(node)
-                    available_nodes.append(intra_modal_node_id)
-                    node_counter += 1
-
-        while len(available_nodes) > 1 and node_counter < max_depth:
-            num_inputs = min(
-                random.randint(2, min(4, len(available_nodes))), len(available_nodes)
-            )
-            selected_inputs = random.sample(available_nodes, num_inputs)
-
-            fusion_op = random.choice(self.fusion_primitives)
-
-            new_node_id = f"fusion_{node_counter}"
-            fusion_node = FusionNode(new_node_id, selected_inputs, fusion_op)
-            fusion_nodes.append(fusion_node)
-
-            for node in selected_inputs:
-                available_nodes.remove(node)
-            available_nodes.append(new_node_id)
-            node_counter += 1
-
-        root_node_id = (
-            available_nodes[0] if available_nodes else f"fusion_{node_counter-1}"
+        self.nodes.append(node)
+        return node_id
+
+    def create_fusion_node(self, inputs: List[str], fusion_operation: Any) -> str:
+        node_id = f"fusion_{self.node_counter}"
+        self.node_counter += 1
+        node = MultimodalNode(
+            node_id=node_id,
+            inputs=inputs,
+            operation=fusion_operation,
+            parameters=(
+                fusion_operation.parameters
+                if hasattr(fusion_operation, "parameters")
+                else {}
+            ),
         )
+        self.nodes.append(node)
+        return node_id
 
-        return FusionArchitecture(
-            encoder_choices=encoder_choices,
-            fusion_nodes=fusion_nodes,
-            root_node_id=root_node_id,
-            used_modalities=modality_subset,
-        )
+    def build(self, root_node_id: str) -> MultimodalDAG:
+        dag = MultimodalDAG(nodes=copy.deepcopy(self.nodes), root_node_id=root_node_id)
+        if not dag.validate():
+            raise ValueError("Invalid DAG construction")
+        return dag
 
 
 class MultimodalOptimizer:
@@ -481,43 +172,21 @@ def __init__(
         tasks: List[Any],
         k: int = 2,
         debug: bool = True,
-        min_modalities: int = 1,
+        min_modalities: int = 2,
         max_modalities: int = None,
     ):
-
         self.modalities = modalities
         self.unimodal_optimization_results = unimodal_optimization_results
         self.tasks = tasks
         self.k = k
         self.debug = debug
-
-        self.min_modalities = min_modalities
+        self.min_modalities = max(2, min_modalities)
         self.max_modalities = max_modalities or len(modalities)
 
         self.operator_registry = Registry()
-        self.fusion_primitives = [
-            op.__name__ for op in self.operator_registry.get_fusion_operators()
-        ]
+        self.fusion_operators = self.operator_registry.get_fusion_operators()
 
         self.k_best_representations = self._extract_k_best_representations()
-        self.modality_encoder_choices = self._create_encoder_choices()
-
-        self.architecture_generator = RandomFusionArchitectureGenerator(
-            self.modality_encoder_choices,
-            self.fusion_primitives,
-            min_modalities=min_modalities,
-            max_modalities=self.max_modalities,
-        )
-
-        self.exhaustive_generator = ExhaustiveFusionArchitectureGenerator(
-            self.modality_encoder_choices,
-            self.fusion_primitives,
-            min_modalities=min_modalities,
-            max_modalities=self.max_modalities,
-            max_depth=3,
-        )
-
-        self.dag_builder = DagBuilder(self.operator_registry)
         self.optimization_results = []
 
     def _extract_k_best_representations(self) -> Dict[str, Dict[str, List[Any]]]:
@@ -540,203 +209,184 @@ def _extract_k_best_representations(self) -> Dict[str, Dict[str, List[Any]]]:
 
         return k_best
 
-    def _create_encoder_choices(self) -> List[EncoderChoice]:
-        choices = []
-
-        first_task = self.tasks[0]
-        task_name = first_task.model.name
-
-        for modality in self.modalities:
-            modality_id = modality.modality_id
-            k_best_data = self.k_best_representations[task_name][modality_id]
-            for i, result in enumerate(k_best_data["results"]):
-                r = []
-                for n in result.dag.nodes:
-                    if n.operation is not None:
-                        r.append(n.operation.__name__)
-                choices.append(
-                    EncoderChoice(
-                        modality_id=modality_id,
-                        modality_instance_id=str(i),
-                        encoder_names="".join(r),
-                    )
-                )
+    def _generate_modality_combinations(self) -> Generator[List[str], None, None]:
+        modality_ids = [mod.modality_id for mod in self.modalities]
 
-        return choices
-
-    def _evaluate_architecture(
-        self, architecture: FusionArchitecture, task: Any
-    ) -> "OptimizationResult":
-
-        start_time = time.time()
-        task_name = task.model.name
-
-        unimodal_representations = {}
-
-        for choice in architecture.encoder_choices:
-            modality_id = choice.modality_id
-            encoder_names = choice.encoder_names
+        for r in range(
+            self.min_modalities, min(self.max_modalities + 1, len(modality_ids) + 1)
+        ):
+            for modality_subset in itertools.combinations(modality_ids, r):
+                yield list(modality_subset)
 
-            task_data = self.k_best_representations[task_name][modality_id]
+    def _generate_representation_combinations(
+        self, modality_subset: List[str], task_name: str
+    ) -> Generator[Dict[str, int], None, None]:
+        representation_options = []
 
-            selected_repr = None
-            for i, result in enumerate(task_data["results"]):
-                if hasattr(result, "representations"):
-                    if encoder_names == f"{result.representations}":
-                        selected_repr = task_data["representations"][i]
-                        break
+        for modality_id in modality_subset:
+            num_representations = len(
+                self.k_best_representations[task_name][modality_id]["representations"]
+            )
+            representation_options.append(list(range(num_representations)))
 
-            representation_key = f"{modality_id}_{choice.modality_instance_id}"
-            unimodal_representations[representation_key] = selected_repr
+        for combo in itertools.product(*representation_options):
+            yield {
+                modality_id: repr_idx
+                for modality_id, repr_idx in zip(modality_subset, combo)
+            }
 
-        fused_representation = self.dag_builder.build_dag(
-            architecture, unimodal_representations
-        )
+    def _generate_fusion_dags(
+        self, modality_subset: List[str], representation_combo: Dict[str, int]
+    ) -> Generator[MultimodalDAG, None, None]:
+        leaf_infos = [(m, representation_combo[m]) for m in modality_subset]
+
+        def gen_trees(indices: List[int]):
+            if len(indices) == 1:
+                yield indices[0]
+                return
+            for split in range(1, len(indices)):
+                for left_idxs in itertools.combinations(indices, split):
+                    left = list(left_idxs)
+                    right = [i for i in indices if i not in left]
+                    for l_tree in gen_trees(left):
+                        for r_tree in gen_trees(right):
+                            yield (l_tree, r_tree)
+
+        def build_variants(subtree, base_builder: MultimodalDAGBuilder, leaf_id_map):
+            variants = []
+
+            if isinstance(subtree, int):
+                variants.append((base_builder, leaf_id_map[subtree]))
+                return variants
+
+            left_sub, right_sub = subtree
+
+            left_variants = build_variants(left_sub, copy.deepcopy(base_builder), leaf_id_map)
+
+            for left_builder, left_root in left_variants:
+                right_variants = build_variants(right_sub, copy.deepcopy(left_builder), leaf_id_map)
+
+                for right_builder, right_root in right_variants:
+                    for fusion_op_class in self.fusion_operators:
+                        new_builder = copy.deepcopy(right_builder)
+                        fusion_op = fusion_op_class()
+                        fusion_id = new_builder.create_fusion_node([left_root, right_root], fusion_op)
+                        variants.append((new_builder, fusion_id))
+
+            return variants
+
+        n = len(leaf_infos)
+
+        for permuted_leaf_infos in itertools.permutations(leaf_infos, n):
+            base_builder = MultimodalDAGBuilder()
+            leaf_id_map = {}
+            for idx, (modality_id, repr_idx) in enumerate(permuted_leaf_infos):
+                nodeid = base_builder.create_leaf_node(modality_id, repr_idx)
+                leaf_id_map[idx] = nodeid
+
+            indices = list(range(n))
+
+            for tree in gen_trees(indices):
+                variants = build_variants(tree, base_builder, leaf_id_map)
+                for builder_variant, root_id in variants:
+                    try:
+                        yield builder_variant.build(root_id)
+                    except ValueError:
+                        if self.debug:
+                            print(f"Skipping invalid DAG for root {root_id}")
+                        continue
+
+    def _evaluate_dag(self, dag: MultimodalDAG, task: Task) -> "OptimizationResult":
+        start_time = time.time()
+        
+        try:
+            fused_representation = dag.execute(self.k_best_representations, task)
+
+            if fused_representation is None:
+                return None
+
+            final_representation = fused_representation
+            if task.expected_dim == 1 and get_shape(fused_representation.metadata) > 1:
+                agg_operator = AggregatedRepresentation(Aggregation())
+                final_representation = agg_operator.transform(fused_representation)
+
+            eval_start = time.time()
+            scores = task.run(final_representation.data)
+            eval_time = time.time() - eval_start
+
+            total_time = time.time() - start_time
+
+            return OptimizationResult(
+                dag=dag,
+                train_score=scores[0],
+                val_score=scores[1],
+                runtime=total_time,
+                task_name=task.model.name,
+                evaluation_time=eval_time,
+            )
 
-        if fused_representation is None:
+        except Exception as e:
+            print(f"Error evaluating DAG: {e}")
+            traceback.print_exc()
             return None
 
-        final_representation = fused_representation
-        if task.expected_dim == 1 and get_shape(fused_representation.metadata) > 1:
-            agg_operator = AggregatedRepresentation(Aggregation())
-            final_representation = agg_operator.transform(fused_representation)
+    def optimize(self, max_combinations: int = None) -> List["OptimizationResult"]:
+        all_results = []
 
-        eval_start = time.time()
-        scores = task.run(final_representation.data)
-        eval_time = time.time() - eval_start
+        for task in self.tasks:
+            if self.debug:
+                print(f"Optimizing multimodal fusion for task: {task.model.name}")
 
-        total_time = time.time() - start_time
+            task_results = []
+            evaluated_count = 0
 
-        return OptimizationResult(
-            architecture=architecture,
-            train_score=scores[0],
-            val_score=scores[1],
-            runtime=total_time,
-            task_name=task_name,
-            evaluation_time=eval_time,
-        )
+            for modality_subset in self._generate_modality_combinations():
+                if self.debug:
+                    print(f"  Evaluating modality subset: {modality_subset}")
 
-    def _optimize_task_exhaustive(
-        self, task: Any, max_architectures: int = None
-    ) -> List["OptimizationResult"]:
+                for repr_combo in self._generate_representation_combinations(
+                    modality_subset, task.model.name
+                ):
 
-        task_results = []
-        evaluated_count = 0
+                    for dag in self._generate_fusion_dags(modality_subset, repr_combo):
+                        if max_combinations and evaluated_count >= max_combinations:
+                            break
 
-        if self.debug:
-            print(f"  Starting exhaustive search for task: {task.model.name}")
-            if max_architectures:
-                print(f"  Limiting to first {max_architectures} architectures")
-
-        for architecture in self.exhaustive_generator.generate_all_architectures():
-            if max_architectures and evaluated_count >= max_architectures:
-                break
-
-            if self.debug and evaluated_count % 50 == 0:
-                print(f"  Evaluated {evaluated_count} architectures...")
-
-            try:
-                result = self._evaluate_architecture(architecture, task)
-                if result is not None:
-                    task_results.append(result)
-            except Exception as e:
-                if self.debug:
-                    print(f"  Error evaluating architecture {evaluated_count}: {e}")
-                continue
+                        result = self._evaluate_dag(dag, task)
+                        if result is not None:
+                            task_results.append(result)
 
-            evaluated_count += 1
+                        evaluated_count += 1
 
-        if self.debug:
-            print(
-                f"  Exhaustive search completed: {evaluated_count} architectures evaluated"
-            )
-            modality_subset_counts = {}
-            for result in task_results:
-                num_modalities = len(result.architecture.used_modalities)
-                modality_subset_counts[num_modalities] = (
-                    modality_subset_counts.get(num_modalities, 0) + 1
-                )
-            print(f"  Modality subset distribution: {modality_subset_counts}")
+                        if self.debug and evaluated_count % 100 == 0:
+                            print(f"    Evaluated {evaluated_count} combinations...")
 
-        return task_results
+                    if max_combinations and evaluated_count >= max_combinations:
+                        break
 
-    def optimize(
-        self,
-        search_strategy: SearchStrategy = SearchStrategy.RANDOM,
-        search_budget: int = 50,
-        **search_params,
-    ) -> List["OptimizationResult"]:
-        all_results = []
+                if max_combinations and evaluated_count >= max_combinations:
+                    break
+
+            all_results.extend(task_results)
 
-        for task in self.tasks:
             if self.debug:
-                print(f"Optimizing fusion architectures for task: {task.model.name}")
                 print(
-                    f"Exploring modality subsets: {self.min_modalities} to {self.max_modalities} modalities"
+                    f"  Task completed: {len(task_results)} valid combinations evaluated"
                 )
 
-            task_results = self._optimize_task(
-                task, search_strategy, search_budget, **search_params
-            )
-            all_results.extend(task_results)
-
         self.optimization_results = all_results
 
         if self.debug:
             print(
-                f"\nOptimization completed: {len(all_results)} total architectures evaluated"
+                f"\nOptimization completed: {len(all_results)} total combinations evaluated"
             )
-            modality_usage = {}
-            for result in all_results:
-                for modality in result.architecture.used_modalities:
-                    modality_usage[modality.modality_id] = (
-                        modality_usage.get(modality.modality_id, 0) + 1
-                    )
-            print(f"Modality usage frequency: {modality_usage}")
 
         return all_results
 
-    def _optimize_task(
-        self,
-        task: Any,
-        search_strategy: SearchStrategy,
-        search_budget: int,
-        **search_params,
-    ) -> List["OptimizationResult"]:
-
-        if search_strategy == SearchStrategy.EXHAUSTIVE:
-            max_architectures = search_params.get("max_architectures", search_budget)
-            return self._optimize_task_exhaustive(task, max_architectures)
-
-        elif search_strategy == SearchStrategy.RANDOM:
-            task_results = []
-            candidates = [
-                self.architecture_generator.generate_random_architecture()
-                for _ in range(search_budget)
-            ]
-
-            for i, architecture in enumerate(candidates):
-                if self.debug and i % 10 == 0:
-                    print(f"  Evaluating architecture {i+1}/{len(candidates)}")
-
-                try:
-                    result = self._evaluate_architecture(architecture, task)
-                    if result is not None:
-                        task_results.append(result)
-                except Exception as e:
-                    if self.debug:
-                        print(f"  Error evaluating architecture {i}: {e}")
-                    continue
-
-            return task_results
-
-        else:
-            raise ValueError(f"Unknown search strategy: {search_strategy}")
-
 
 @dataclass
 class OptimizationResult:
-    architecture: FusionArchitecture
+    dag: MultimodalDAG
     train_score: float
     val_score: float
     runtime: float

From da99a737d2694b6de35d0d9f7eb3d5431ee4ea94 Mon Sep 17 00:00:00 2001
From: Christina Dionysio <dionysio@tu-berlin.de>
Date: Thu, 18 Sep 2025 12:41:33 +0200
Subject: [PATCH 10/22] remove duplicated code

---
 .../scuro/drsearch/multimodal_optimizer.py    | 172 +++++-------------
 ...{unimodal_dag.py => representation_dag.py} |  82 +++------
 .../scuro/drsearch/unimodal_optimizer.py      |  79 ++++++--
 .../scuro/drsearch/unimodal_visualizer.py     |   4 +-
 4 files changed, 134 insertions(+), 203 deletions(-)
 rename src/main/python/systemds/scuro/drsearch/{unimodal_dag.py => representation_dag.py} (73%)

diff --git a/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py b/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py
index e52e65ca6e4..503118447f6 100644
--- a/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py
+++ b/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py
@@ -4,15 +4,15 @@
 from typing import List, Dict, Any, Generator
 import copy
 import traceback
-
+from itertools import chain
 from systemds.scuro import Task
+from systemds.scuro.drsearch.representation_dag import RepresentationDag
 from systemds.scuro.representations.aggregated_representation import (
     AggregatedRepresentation,
 )
 from systemds.scuro.representations.aggregate import Aggregation
 from systemds.scuro.drsearch.operator_registry import Registry
 from systemds.scuro.utils.schema_helpers import get_shape
-from systemds.scuro.modality.transformed import TransformedModality
 
 
 @dataclass
@@ -25,105 +25,6 @@ class MultimodalNode:
     parameters: Dict[str, Any] = field(default_factory=dict)
 
 
-@dataclass
-class MultimodalDAG:
-    def __init__(self, nodes: List[MultimodalNode], root_node_id: str):
-        self.root_node_id = root_node_id
-        self.nodes = self.filter_connected_nodes(nodes)
-
-    def filter_connected_nodes(self, nodes):
-        node_map = {node.node_id: node for node in nodes}
-
-        if self.root_node_id not in node_map:
-            return []
-
-        visited = set()
-        stack = [self.root_node_id]
-
-        while stack:
-            current_id = stack.pop()
-            if current_id not in visited:
-                visited.add(current_id)
-                current_node = node_map[current_id]
-                for input_id in current_node.inputs:
-                    if input_id in node_map and input_id not in visited:
-                        stack.append(input_id)
-
-        return [node for node in nodes if node.node_id in visited]
-
-    def get_node_by_id(self, node_id: str) -> MultimodalNode:
-        for node in self.nodes:
-            if node.node_id == node_id:
-                return node
-        return None
-
-    def validate(self) -> bool:
-        node_ids = {node.node_id for node in self.nodes}
-
-        if self.root_node_id not in node_ids:
-            return False
-
-        for node in self.nodes:
-            for input_id in node.inputs:
-                if input_id not in node_ids:
-                    return False
-
-        visited = set()
-
-        def has_cycle(node_id: str, path: set) -> bool:
-            if node_id in path:
-                return True
-            if node_id in visited:
-                return False
-            path.add(node_id)
-            visited.add(node_id)
-            node = self.get_node_by_id(node_id)
-            for input_id in node.inputs:
-                if has_cycle(input_id, path.copy()):
-                    return True
-            return False
-
-        return not has_cycle(self.root_node_id, set())
-
-    def execute(
-        self, k_best_representations: Dict[str, Dict[str, List[Any]]], task: Task
-    ) -> TransformedModality:
-        cache = {}
-
-        def execute_node(node_id: str) -> TransformedModality:
-            if node_id in cache:
-                return cache[node_id]
-
-            node = self.get_node_by_id(node_id)
-
-            if not node.inputs:
-                representation = k_best_representations[task.model.name][node.modality_id][
-                    "representations"
-                ][node.representation_index]
-                cache[node_id] = representation
-                return representation
-
-            input_representations = [execute_node(input_id) for input_id in node.inputs]
-
-            if len(input_representations) == 1:
-                result = input_representations[0]
-            else:
-                op = node.operation
-                try:
-                    op_instance = op() if callable(op) else op
-                except Exception:
-                    op_instance = op
-
-                result = input_representations[0].combine(
-                    input_representations[1:], op_instance
-                )
-
-            cache[node_id] = result
-            return result
-
-        return execute_node(self.root_node_id)
-
-
 class MultimodalDAGBuilder:
     def __init__(self):
         self.nodes = []
@@ -147,18 +48,16 @@ def create_fusion_node(self, inputs: List[str], fusion_operation: Any) -> str:
         node = MultimodalNode(
             node_id=node_id,
             inputs=inputs,
-            operation=fusion_operation,
-            parameters=(
-                fusion_operation.parameters
-                if hasattr(fusion_operation, "parameters")
-                else {}
-            ),
+            operation=fusion_operation.__class__,
+            parameters=fusion_operation.parameters,
         )
         self.nodes.append(node)
         return node_id
 
-    def build(self, root_node_id: str) -> MultimodalDAG:
-        dag = MultimodalDAG(nodes=copy.deepcopy(self.nodes), root_node_id=root_node_id)
+    def build(self, root_node_id: str) -> RepresentationDag:
+        dag = RepresentationDag(
+            nodes=copy.deepcopy(self.nodes), root_node_id=root_node_id
+        )
         if not dag.validate():
             raise ValueError("Invalid DAG construction")
         return dag
@@ -176,7 +75,6 @@ def __init__(
         max_modalities: int = None,
     ):
         self.modalities = modalities
-        self.unimodal_optimization_results = unimodal_optimization_results
         self.tasks = tasks
         self.k = k
         self.debug = debug
@@ -186,10 +84,14 @@ def __init__(
         self.operator_registry = Registry()
         self.fusion_operators = self.operator_registry.get_fusion_operators()
 
-        self.k_best_representations = self._extract_k_best_representations()
+        self.k_best_representations = self._extract_k_best_representations(
+            unimodal_optimization_results
+        )
         self.optimization_results = []
 
-    def _extract_k_best_representations(self) -> Dict[str, Dict[str, List[Any]]]:
+    def _extract_k_best_representations(
+        self, unimodal_optimization_results: Any
+    ) -> Dict[str, Dict[str, List[Any]]]:
         k_best = {}
 
         for task in self.tasks:
@@ -197,15 +99,12 @@ def _extract_k_best_representations(self) -> Dict[str, Dict[str, List[Any]]]:
 
             for modality in self.modalities:
                 k_best_results, cached_data = (
-                    self.unimodal_optimization_results.get_k_best_results(
+                    unimodal_optimization_results.get_k_best_results(
                         modality, self.k, task
                     )
                 )
 
-                k_best[task.model.name][modality.modality_id] = {
-                    "results": k_best_results,
-                    "representations": cached_data,
-                }
+                k_best[task.model.name][modality.modality_id] = cached_data
 
         return k_best
 
@@ -225,7 +124,7 @@ def _generate_representation_combinations(
 
         for modality_id in modality_subset:
             num_representations = len(
-                self.k_best_representations[task_name][modality_id]["representations"]
+                self.k_best_representations[task_name][modality_id]
             )
             representation_options.append(list(range(num_representations)))
 
@@ -237,7 +136,7 @@ def _generate_representation_combinations(
 
     def _generate_fusion_dags(
         self, modality_subset: List[str], representation_combo: Dict[str, int]
-    ) -> Generator[MultimodalDAG, None, None]:
+    ) -> Generator[RepresentationDag, None, None]:
         leaf_infos = [(m, representation_combo[m]) for m in modality_subset]
 
         def gen_trees(indices: List[int]):
@@ -261,16 +160,22 @@ def build_variants(subtree, base_builder: MultimodalDAGBuilder, leaf_id_map):
 
             left_sub, right_sub = subtree
 
-            left_variants = build_variants(left_sub, copy.deepcopy(base_builder), leaf_id_map)
+            left_variants = build_variants(
+                left_sub, copy.deepcopy(base_builder), leaf_id_map
+            )
 
             for left_builder, left_root in left_variants:
-                right_variants = build_variants(right_sub, copy.deepcopy(left_builder), leaf_id_map)
+                right_variants = build_variants(
+                    right_sub, copy.deepcopy(left_builder), leaf_id_map
+                )
 
                 for right_builder, right_root in right_variants:
                     for fusion_op_class in self.fusion_operators:
                         new_builder = copy.deepcopy(right_builder)
                         fusion_op = fusion_op_class()
-                        fusion_id = new_builder.create_fusion_node([left_root, right_root], fusion_op)
+                        fusion_id = new_builder.create_fusion_node(
+                            [left_root, right_root], fusion_op
+                        )
                         variants.append((new_builder, fusion_id))
 
             return variants
@@ -296,19 +201,28 @@ def build_variants(subtree, base_builder: MultimodalDAGBuilder, leaf_id_map):
                             print(f"Skipping invalid DAG for root {root_id}")
                         continue
 
-    def _evaluate_dag(self, dag: MultimodalDAG, task: Task) -> "OptimizationResult":
+    def _evaluate_dag(self, dag: RepresentationDag, task: Task) -> "OptimizationResult":
         start_time = time.time()
-        
+
         try:
-            fused_representation = dag.execute(self.k_best_representations, task)
+
+            fused_representation = dag.execute(
+                list(
+                    chain.from_iterable(
+                        self.k_best_representations[task.model.name].values()
+                    )
+                )
+            )
 
             if fused_representation is None:
                 return None
 
-            final_representation = fused_representation
-            if task.expected_dim == 1 and get_shape(fused_representation.metadata) > 1:
+            final_representation = fused_representation[
+                list(fused_representation.keys())[-1]
+            ]
+            if task.expected_dim == 1 and get_shape(final_representation.metadata) > 1:
                 agg_operator = AggregatedRepresentation(Aggregation())
-                final_representation = agg_operator.transform(fused_representation)
+                final_representation = agg_operator.transform(final_representation)
 
             eval_start = time.time()
             scores = task.run(final_representation.data)
@@ -386,7 +300,7 @@ def optimize(self, max_combinations: int = None) -> List["OptimizationResult"]:
 
 @dataclass
 class OptimizationResult:
-    dag: MultimodalDAG
+    dag: RepresentationDag
     train_score: float
     val_score: float
     runtime: float
diff --git a/src/main/python/systemds/scuro/drsearch/unimodal_dag.py b/src/main/python/systemds/scuro/drsearch/representation_dag.py
similarity index 73%
rename from src/main/python/systemds/scuro/drsearch/unimodal_dag.py
rename to src/main/python/systemds/scuro/drsearch/representation_dag.py
index b19d120c1d4..2b9fcba519e 100644
--- a/src/main/python/systemds/scuro/drsearch/unimodal_dag.py
+++ b/src/main/python/systemds/scuro/drsearch/representation_dag.py
@@ -18,10 +18,8 @@
 # under the License.
 #
 # -------------------------------------------------------------
-from dataclasses import dataclass, field
+from dataclasses import dataclass
 from typing import List, Dict, Any
-import copy
-from collections import deque
 from systemds.scuro.modality.modality import Modality
 from systemds.scuro.modality.transformed import TransformedModality
 from systemds.scuro.representations.representation import (
@@ -31,22 +29,12 @@
     AggregatedRepresentation,
 )
 from systemds.scuro.representations.context import Context
-from systemds.scuro.representations.window_aggregation import WindowAggregation
 
 
 @dataclass
-class UnimodalNode:
-    node_id: str
-    operation: Any
-    inputs: List[str]
-    modality_id: str = None
-    parameters: Dict[str, Any] = field(default_factory=dict)
+class RepresentationDag:
 
-
-@dataclass
-class UnimodalDAG:
-
-    def __init__(self, nodes: List[UnimodalNode], root_node_id):
+    def __init__(self, nodes: List[Any], root_node_id):
         self.root_node_id = root_node_id
         self.nodes = self.filter_connected_nodes(nodes)
 
@@ -78,7 +66,7 @@ def get_leaf_nodes(self) -> List[str]:
                 leaf_nodes.append(node.node_id)
         return leaf_nodes
 
-    def get_node_by_id(self, node_id: str) -> UnimodalNode:
+    def get_node_by_id(self, node_id: str):
         for node in self.nodes:
             if node.node_id == node_id:
                 return node
@@ -119,7 +107,7 @@ def has_cycle(node_id: str, path: set) -> bool:
 
         return not has_cycle(self.root_node_id, set())
 
-    def execute(self, modality: Modality) -> Dict[str, TransformedModality]:
+    def execute(self, modalities: List[Modality]) -> Dict[str, TransformedModality]:
         cache = {}
 
         def execute_node(node_id: str) -> TransformedModality:
@@ -128,7 +116,13 @@ def execute_node(node_id: str) -> TransformedModality:
 
             node = self.get_node_by_id(node_id)
 
-            if not node.inputs:  # Leaf node
+            if not node.inputs:
+                if hasattr(node, "representation_index"):
+                    modality = get_modality_by_id_and_instance_id(
+                        modalities, node.modality_id, node.representation_index
+                    )
+                else:
+                    modality = get_modality_by_id(modalities, node.modality_id)
                 cache[node_id] = modality
                 return modality
 
@@ -158,37 +152,21 @@ def execute_node(node_id: str) -> TransformedModality:
         return cache
 
 
-class UnimodalDAGBuilder:
-
-    def __init__(self):
-        self.nodes = []
-        self.node_counter = 0
-
-    def create_leaf_node(self, operation: Any, modality_id: str) -> str:
-        node_id = f"leaf_{self.node_counter}"
-        self.node_counter += 1
-        node = UnimodalNode(
-            node_id=node_id, operation=operation, inputs=[], modality_id=modality_id
-        )
-        self.nodes.append(node)
-        return node_id
-
-    def create_operation_node(
-        self, operation: Any, inputs: List[str], parameters: Dict[str, Any] = None
-    ) -> str:
-        node_id = f"op_{self.node_counter}"
-        self.node_counter += 1
-        node = UnimodalNode(
-            node_id=node_id,
-            operation=operation,
-            inputs=inputs,
-            parameters=parameters or {},
-        )
-        self.nodes.append(node)
-        return node_id
-
-    def build(self, root_node_id: str) -> UnimodalDAG:
-        dag = UnimodalDAG(nodes=copy.deepcopy(self.nodes), root_node_id=root_node_id)
-        if not dag.validate():
-            raise ValueError("Invalid DAG construction")
-        return dag
+def get_modality_by_id(modalities: List[Modality], modality_id: int) -> Modality:
+    for modality in modalities:
+        if modality.modality_id == modality_id:
+            return modality
+    return None
+
+
+def get_modality_by_id_and_instance_id(
+    modalities: List[Modality], modality_id: int, instance_id: int
+):
+    counter = 0
+    for modality in modalities:
+        if modality.modality_id == modality_id:
+            if counter == instance_id:
+                return modality
+            else:
+                counter += 1
+    return None
diff --git a/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py b/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py
index ed4fce7fedd..20064d88a51 100644
--- a/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py
+++ b/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py
@@ -28,9 +28,6 @@
 
 import numpy as np
 import wandb
-from systemds.scuro.representations.unimodal import UnimodalRepresentation
-from systemds.scuro.representations.window_aggregation import Window
-from systemds.scuro.representations.context import Context
 from systemds.scuro.representations.fusion import Fusion
 from systemds.scuro.representations.concatenation import Concatenation
 from systemds.scuro.representations.hadamard import Hadamard
@@ -38,13 +35,11 @@
 from systemds.scuro.representations.aggregated_representation import (
     AggregatedRepresentation,
 )
-from systemds.scuro.modality.type import ModalityType
 from systemds.scuro.modality.modality import Modality
-from systemds.scuro.modality.transformed import TransformedModality
 from systemds.scuro.representations.aggregate import Aggregation
 from systemds.scuro.drsearch.operator_registry import Registry
 from systemds.scuro.utils.schema_helpers import get_shape
-from systemds.scuro.drsearch.unimodal_dag import UnimodalDAGBuilder, UnimodalDAG
+from systemds.scuro.drsearch.representation_dag import RepresentationDag
 from systemds.scuro.drsearch.unimodal_visualizer import visualize_dag
 
 
@@ -126,12 +121,10 @@ def _process_modality(self, modality, parallel):
         )
 
         for operator in modality_specific_operators:
-            # Build DAG for this operator
             dags = self._build_modality_dag(modality, operator())
 
             for dag in dags:
-                # Execute DAG and get all intermediate representations
-                representations = dag.execute(modality)
+                representations = dag.execute([modality])
                 node_id = list(representations.keys())[-1]
                 node = dag.get_node_by_id(node_id)
                 if node.operation is None:
@@ -147,7 +140,7 @@ def _process_modality(self, modality, parallel):
         return local_results
 
     def _get_representation_chain(
-        self, node: "UnimodalNode", dag: UnimodalDAG
+        self, node: "UnimodalNode", dag: RepresentationDag
     ) -> List[Any]:
         representations = []
         if node.operation:
@@ -181,7 +174,7 @@ def _evaluate_local(self, modality, local_results, dag, combination=None):
                     agg_operator.__class__, [dag.root_node_id], agg_operator.parameters
                 )
                 dag = builder.build(rep_node_id)
-                representations = dag.execute(modality)
+                representations = dag.execute([modality])
                 node_id = list(representations.keys())[-1]
                 for task in self.tasks:
                     start = time.time()
@@ -206,10 +199,12 @@ def _evaluate_local(self, modality, local_results, dag, combination=None):
                     builder = self.builders[modality.modality_id]
                     agg_operator = AggregatedRepresentation(Aggregation())
                     rep_node_id = builder.create_operation_node(
-                        operator.__class__, [dag.root_node_id], agg_operator.parameters
+                        agg_operator.__class__,
+                        [dag.root_node_id],
+                        agg_operator.parameters,
                     )
                     dag = builder.build(rep_node_id)
-                    representations = dag.execute(modality)
+                    representations = dag.execute([modality])
                     node_id = list(representations.keys())[-1]
 
                     start = time.time()
@@ -226,7 +221,9 @@ def _evaluate_local(self, modality, local_results, dag, combination=None):
                         scores, modality, task.model.name, end - start, combination, dag
                     )
 
-    def _build_modality_dag(self, modality: Modality, operator: Any) -> UnimodalDAG:
+    def _build_modality_dag(
+        self, modality: Modality, operator: Any
+    ) -> RepresentationDag:
         dags = []
         builder = self.builders[modality.modality_id]
         leaf_id = builder.create_leaf_node(None, modality.modality_id)
@@ -305,11 +302,6 @@ def add_result(self, scores, modality, task_name, task_time, combination, dag):
 
         if self.debug:
             metrics = asdict(entry)
-            table = wandb.Table(columns=["representations"])
-            for m in representation_names:
-                table.add_data(m)
-            metrics["representations"] = table
-            metrics.pop("params")
 
             metrics["used_modalities"] = modality.modality_id
             metrics["task"] = self.task_names.index(task_name)
@@ -364,4 +356,51 @@ class ResultEntry:
     representation_time: float
     task_time: float
     combination: str
-    dag: UnimodalDAG
+    dag: RepresentationDag
+
+
+@dataclass
+class UnimodalNode:
+    node_id: str
+    operation: Any
+    inputs: List[str]
+    modality_id: str = None
+    parameters: Dict[str, Any] = field(default_factory=dict)
+
+
+class UnimodalDAGBuilder:
+
+    def __init__(self):
+        self.nodes = []
+        self.node_counter = 0
+
+    def create_leaf_node(self, operation: Any, modality_id: str) -> str:
+        node_id = f"leaf_{self.node_counter}"
+        self.node_counter += 1
+        node = UnimodalNode(
+            node_id=node_id, operation=operation, inputs=[], modality_id=modality_id
+        )
+        self.nodes.append(node)
+        return node_id
+
+    def create_operation_node(
+        self, operation: Any, inputs: List[str], parameters: Dict[str, Any] = None
+    ) -> str:
+        node_id = f"op_{self.node_counter}"
+        self.node_counter += 1
+        node = UnimodalNode(
+            node_id=node_id,
+            operation=operation,
+            inputs=inputs,
+            parameters=parameters or {},
+        )
+        self.nodes.append(node)
+        return node_id
+
+    def build(self, root_node_id: str) -> RepresentationDag:
+        dag = RepresentationDag(
+            nodes=copy.deepcopy(self.nodes), root_node_id=root_node_id
+        )
+        if not dag.validate():
+            raise ValueError("Invalid DAG construction")
+        return dag
diff --git a/src/main/python/systemds/scuro/drsearch/unimodal_visualizer.py b/src/main/python/systemds/scuro/drsearch/unimodal_visualizer.py
index 6aac24a3f1e..b0a1bbe285c 100644
--- a/src/main/python/systemds/scuro/drsearch/unimodal_visualizer.py
+++ b/src/main/python/systemds/scuro/drsearch/unimodal_visualizer.py
@@ -20,10 +20,10 @@
 # -------------------------------------------------------------
 
 from typing import Dict, Any
-from systemds.scuro.drsearch.unimodal_dag import UnimodalDAG
+from systemds.scuro.drsearch.representation_dag import RepresentationDag
 
 
-def visualize_dag(dag: UnimodalDAG) -> Dict[str, Any]:
+def visualize_dag(dag: RepresentationDag) -> Dict[str, Any]:
     nodes = []
     edges = []
 

From 38959053c7ba8403c87874a832240c9a7f08cd3c Mon Sep 17 00:00:00 2001
From: Christina Dionysio <dionysio@tu-berlin.de>
Date: Thu, 18 Sep 2025 13:04:30 +0200
Subject: [PATCH 11/22] remove duplicated code

---
 .../scuro/drsearch/multimodal_optimizer.py    | 67 +++-----------
 .../scuro/drsearch/representation_dag.py      | 76 ++++++++++++----
 .../scuro/drsearch/unimodal_optimizer.py      | 88 +++----------------
 3 files changed, 85 insertions(+), 146 deletions(-)

diff --git a/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py b/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py
index 503118447f6..0e24a43d490 100644
--- a/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py
+++ b/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py
@@ -1,12 +1,15 @@
 import itertools
 import time
-from dataclasses import dataclass, field
+from dataclasses import dataclass
 from typing import List, Dict, Any, Generator
 import copy
 import traceback
 from itertools import chain
 from systemds.scuro import Task
-from systemds.scuro.drsearch.representation_dag import RepresentationDag
+from systemds.scuro.drsearch.representation_dag import (
+    RepresentationDag,
+    RepresentationDAGBuilder,
+)
 from systemds.scuro.representations.aggregated_representation import (
     AggregatedRepresentation,
 )
@@ -15,54 +18,6 @@
 from systemds.scuro.utils.schema_helpers import get_shape
 
 
-@dataclass
-class MultimodalNode:
-    node_id: str
-    inputs: List[str]
-    operation: Any
-    modality_id: str = None
-    representation_index: int = None
-    parameters: Dict[str, Any] = field(default_factory=dict)
-
-
-class MultimodalDAGBuilder:
-    def __init__(self):
-        self.nodes = []
-        self.node_counter = 0
-
-    def create_leaf_node(self, modality_id: str, representation_index: int) -> str:
-        node_id = f"leaf_{modality_id}_{representation_index}"
-        node = MultimodalNode(
-            node_id=node_id,
-            inputs=[],
-            operation=None,
-            modality_id=modality_id,
-            representation_index=representation_index,
-        )
-        self.nodes.append(node)
-        return node_id
-
-    def create_fusion_node(self, inputs: List[str], fusion_operation: Any) -> str:
-        node_id = f"fusion_{self.node_counter}"
-        self.node_counter += 1
-        node = MultimodalNode(
-            node_id=node_id,
-            inputs=inputs,
-            operation=fusion_operation.__class__,
-            parameters=fusion_operation.parameters,
-        )
-        self.nodes.append(node)
-        return node_id
-
-    def build(self, root_node_id: str) -> RepresentationDag:
-        dag = RepresentationDag(
-            nodes=copy.deepcopy(self.nodes), root_node_id=root_node_id
-        )
-        if not dag.validate():
-            raise ValueError("Invalid DAG construction")
-        return dag
-
-
 class MultimodalOptimizer:
     def __init__(
         self,
@@ -151,7 +106,9 @@ def gen_trees(indices: List[int]):
                         for r_tree in gen_trees(right):
                             yield (l_tree, r_tree)
 
-        def build_variants(subtree, base_builder: MultimodalDAGBuilder, leaf_id_map):
+        def build_variants(
+            subtree, base_builder: RepresentationDAGBuilder, leaf_id_map
+        ):
             variants = []
 
             if isinstance(subtree, int):
@@ -173,8 +130,10 @@ def build_variants(subtree, base_builder: MultimodalDAGBuilder, leaf_id_map):
                     for fusion_op_class in self.fusion_operators:
                         new_builder = copy.deepcopy(right_builder)
                         fusion_op = fusion_op_class()
-                        fusion_id = new_builder.create_fusion_node(
-                            [left_root, right_root], fusion_op
+                        fusion_id = new_builder.create_operation_node(
+                            fusion_op.__class__,
+                            [left_root, right_root],
+                            fusion_op.parameters,
                         )
                         variants.append((new_builder, fusion_id))
 
@@ -183,7 +142,7 @@ def build_variants(subtree, base_builder: MultimodalDAGBuilder, leaf_id_map):
         n = len(leaf_infos)
 
         for permuted_leaf_infos in itertools.permutations(leaf_infos, n):
-            base_builder = MultimodalDAGBuilder()
+            base_builder = RepresentationDAGBuilder()
             leaf_id_map = {}
             for idx, (modality_id, repr_idx) in enumerate(permuted_leaf_infos):
                 nodeid = base_builder.create_leaf_node(modality_id, repr_idx)
diff --git a/src/main/python/systemds/scuro/drsearch/representation_dag.py b/src/main/python/systemds/scuro/drsearch/representation_dag.py
index 2b9fcba519e..619e4236a2a 100644
--- a/src/main/python/systemds/scuro/drsearch/representation_dag.py
+++ b/src/main/python/systemds/scuro/drsearch/representation_dag.py
@@ -18,7 +18,8 @@
 # under the License.
 #
 # -------------------------------------------------------------
-from dataclasses import dataclass
+import copy
+from dataclasses import dataclass, field
 from typing import List, Dict, Any
 from systemds.scuro.modality.modality import Modality
 from systemds.scuro.modality.transformed import TransformedModality
@@ -31,6 +32,16 @@
 from systemds.scuro.representations.context import Context
 
 
+@dataclass
+class RepresentationNode:
+    node_id: str
+    operation: Any
+    inputs: List[str]
+    modality_id: str = None
+    representation_index: int = None
+    parameters: Dict[str, Any] = field(default_factory=dict)
+
+
 @dataclass
 class RepresentationDag:
 
@@ -117,12 +128,9 @@ def execute_node(node_id: str) -> TransformedModality:
             node = self.get_node_by_id(node_id)
 
             if not node.inputs:
-                if hasattr(node, "representation_index"):
-                    modality = get_modality_by_id_and_instance_id(
-                        modalities, node.modality_id, node.representation_index
-                    )
-                else:
-                    modality = get_modality_by_id(modalities, node.modality_id)
+                modality = get_modality_by_id_and_instance_id(
+                    modalities, node.modality_id, node.representation_index
+                )
                 cache[node_id] = modality
                 return modality
 
@@ -152,21 +160,59 @@ def execute_node(node_id: str) -> TransformedModality:
         return cache
 
 
-def get_modality_by_id(modalities: List[Modality], modality_id: int) -> Modality:
-    for modality in modalities:
-        if modality.modality_id == modality_id:
-            return modality
-    return None
-
-
 def get_modality_by_id_and_instance_id(
     modalities: List[Modality], modality_id: int, instance_id: int
 ):
     counter = 0
     for modality in modalities:
         if modality.modality_id == modality_id:
-            if counter == instance_id:
+            if counter == instance_id or instance_id == -1:
                 return modality
             else:
                 counter += 1
     return None
+
+
+class RepresentationDAGBuilder:
+    def __init__(self):
+        self.nodes = []
+        self.node_counter = 0
+
+    def create_leaf_node(
+        self, modality_id: str, representation_index: int = -1, operation=None
+    ) -> str:
+        if representation_index != -1:
+            node_id = f"leaf_{modality_id}_{representation_index}"
+        else:
+            node_id = f"leaf_{self.node_counter}"
+        node = RepresentationNode(
+            node_id=node_id,
+            inputs=[],
+            operation=operation,
+            modality_id=modality_id,
+            representation_index=representation_index,
+        )
+        self.nodes.append(node)
+        return node_id
+
+    def create_operation_node(
+        self, operation: Any, inputs: List[str], parameters: Dict[str, Any] = None
+    ) -> str:
+        node_id = f"op_{self.node_counter}"
+        self.node_counter += 1
+        node = RepresentationNode(
+            node_id=node_id,
+            inputs=inputs,
+            operation=operation,
+            parameters=parameters or {},
+        )
+        self.nodes.append(node)
+        return node_id
+
+    def build(self, root_node_id: str) -> RepresentationDag:
+        dag = RepresentationDag(
+            nodes=copy.deepcopy(self.nodes), root_node_id=root_node_id
+        )
+        if not dag.validate():
+            raise ValueError("Invalid DAG construction")
+        return dag
diff --git a/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py b/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py
index 20064d88a51..c1ade12bcd6 100644
--- a/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py
+++ b/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py
@@ -20,14 +20,11 @@
 # -------------------------------------------------------------
 import pickle
 import time
-import copy
 from concurrent.futures import ProcessPoolExecutor, as_completed
-from dataclasses import dataclass, field, asdict
+from dataclasses import dataclass, asdict
 import multiprocessing as mp
-from typing import Union, Dict, List, Any
+from typing import List, Any
 
-import numpy as np
-import wandb
 from systemds.scuro.representations.fusion import Fusion
 from systemds.scuro.representations.concatenation import Concatenation
 from systemds.scuro.representations.hadamard import Hadamard
@@ -39,7 +36,11 @@
 from systemds.scuro.representations.aggregate import Aggregation
 from systemds.scuro.drsearch.operator_registry import Registry
 from systemds.scuro.utils.schema_helpers import get_shape
-from systemds.scuro.drsearch.representation_dag import RepresentationDag
+from systemds.scuro.drsearch.representation_dag import (
+    RepresentationDag,
+    RepresentationNode,
+    RepresentationDAGBuilder,
+)
 from systemds.scuro.drsearch.unimodal_visualizer import visualize_dag
 
 
@@ -50,16 +51,9 @@ def __init__(self, modalities, tasks, debug=True):
         self.run = None
 
         self.builders = {
-            modality.modality_id: UnimodalDAGBuilder() for modality in modalities
+            modality.modality_id: RepresentationDAGBuilder() for modality in modalities
         }
 
-        if debug:
-            wandb.login()
-            config = {
-                "representation_type": "unimodal",  # or "unimodal"
-            }
-            self.run = wandb.init(project="multimodal-search", config=config)
-
         self.debug = debug
 
         self.operator_registry = Registry()
@@ -106,11 +100,7 @@ def optimize(self):
         for modality in self.modalities:
             local_result = self._process_modality(modality, False)
 
-        if self.debug:
-            wandb.finish()
-
     def _process_modality(self, modality, parallel):
-        """Process a single modality using the DAG-based approach"""
         if parallel:
             local_results = UnimodalResults([modality], self.tasks, debug=False)
         else:
@@ -140,7 +130,7 @@ def _process_modality(self, modality, parallel):
         return local_results
 
     def _get_representation_chain(
-        self, node: "UnimodalNode", dag: RepresentationDag
+        self, node: "RepresentationNode", dag: RepresentationDag
     ) -> List[Any]:
         representations = []
         if node.operation:
@@ -226,7 +216,7 @@ def _build_modality_dag(
     ) -> RepresentationDag:
         dags = []
         builder = self.builders[modality.modality_id]
-        leaf_id = builder.create_leaf_node(None, modality.modality_id)
+        leaf_id = builder.create_leaf_node(modality.modality_id)
 
         rep_node_id = builder.create_operation_node(
             operator.__class__, [leaf_id], operator.parameters
@@ -275,13 +265,12 @@ def _build_modality_dag(
 
 
 class UnimodalResults:
-    def __init__(self, modalities, tasks, debug=False, run=None):
+    def __init__(self, modalities, tasks, debug=False):
         self.modality_ids = [modality.modality_id for modality in modalities]
         self.task_names = [task.model.name for task in tasks]
         self.results = {}
         self.debug = debug
         self.cache = {}
-        self.run = run
 
         for modality in self.modality_ids:
             self.results[modality] = {}
@@ -300,14 +289,6 @@ def add_result(self, scores, modality, task_name, task_time, combination, dag):
             dag=dag,
         )
 
-        if self.debug:
-            metrics = asdict(entry)
-
-            metrics["used_modalities"] = modality.modality_id
-            metrics["task"] = self.task_names.index(task_name)
-            # Log metric for the multimodal combination
-            self.run.log(metrics)
-
         self.results[modality.modality_id][task_name].append(entry)
         self.cache[modality.modality_id][task_name][
             (
@@ -357,50 +338,3 @@ class ResultEntry:
     task_time: float
     combination: str
     dag: RepresentationDag
-
-
-@dataclass
-class UnimodalNode:
-    node_id: str
-    operation: Any
-    inputs: List[str]
-    modality_id: str = None
-    parameters: Dict[str, Any] = field(default_factory=dict)
-
-
-class UnimodalDAGBuilder:
-
-    def __init__(self):
-        self.nodes = []
-        self.node_counter = 0
-
-    def create_leaf_node(self, operation: Any, modality_id: str) -> str:
-        node_id = f"leaf_{self.node_counter}"
-        self.node_counter += 1
-        node = UnimodalNode(
-            node_id=node_id, operation=operation, inputs=[], modality_id=modality_id
-        )
-        self.nodes.append(node)
-        return node_id
-
-    def create_operation_node(
-        self, operation: Any, inputs: List[str], parameters: Dict[str, Any] = None
-    ) -> str:
-        node_id = f"op_{self.node_counter}"
-        self.node_counter += 1
-        node = UnimodalNode(
-            node_id=node_id,
-            operation=operation,
-            inputs=inputs,
-            parameters=parameters or {},
-        )
-        self.nodes.append(node)
-        return node_id
-
-    def build(self, root_node_id: str) -> RepresentationDag:
-        dag = RepresentationDag(
-            nodes=copy.deepcopy(self.nodes), root_node_id=root_node_id
-        )
-        if not dag.validate():
-            raise ValueError("Invalid DAG construction")
-        return dag

From 648460874eaa45078fe2e7cb658ba77c02aec876 Mon Sep 17 00:00:00 2001
From: Christina Dionysio <dionysio@tu-berlin.de>
Date: Mon, 22 Sep 2025 10:38:43 +0200
Subject: [PATCH 12/22] hp tuning for multimodal optimizer

---
 .../scuro/drsearch/fusion_optimizer.py        | 295 ------------
 .../scuro/drsearch/hyperparameter_tuner.py    | 425 +++---------------
 .../scuro/drsearch/multimodal_optimizer.py    |  22 +
 ...er.py => representation_dag_visualizer.py} |   0
 .../scuro/drsearch/unimodal_optimizer.py      | 101 +++--
 .../unimodal_representation_optimizer.py      | 271 -----------
 src/main/python/tests/scuro/test_hp_tuner.py  |  93 ++--
 .../tests/scuro/test_multimodal_fusion.py     |  50 +--
 .../tests/scuro/test_unimodal_optimizer.py    |  11 -
 9 files changed, 203 insertions(+), 1065 deletions(-)
 delete mode 100644 src/main/python/systemds/scuro/drsearch/fusion_optimizer.py
 rename src/main/python/systemds/scuro/drsearch/{unimodal_visualizer.py => representation_dag_visualizer.py} (100%)
 delete mode 100644 src/main/python/systemds/scuro/drsearch/unimodal_representation_optimizer.py

diff --git a/src/main/python/systemds/scuro/drsearch/fusion_optimizer.py b/src/main/python/systemds/scuro/drsearch/fusion_optimizer.py
deleted file mode 100644
index 7247720f555..00000000000
--- a/src/main/python/systemds/scuro/drsearch/fusion_optimizer.py
+++ /dev/null
@@ -1,295 +0,0 @@
-# -------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-# -------------------------------------------------------------
-import time
-import copy
-import pickle
-from systemds.scuro.drsearch.operator_registry import Registry
-from systemds.scuro.drsearch.optimization_data import (
-    OptimizationResult,
-    OptimizationStatistics,
-)
-from systemds.scuro.drsearch.representation_cache import RepresentationCache
-from systemds.scuro.drsearch.task import Task
-from systemds.scuro.representations.aggregate import Aggregation
-from systemds.scuro.representations.context import Context
-
-
-def extract_names(operator_chain):
-    result = []
-    for op in operator_chain:
-        result.append(op.name)
-
-    return result
-
-
-class FusionOptimizer:
-    def __init__(
-        self,
-        modalities,
-        task: Task,
-        unimodal_representations_candidates,
-        representation_cache: RepresentationCache,
-        num_best_candidates=4,
-        max_chain_depth=5,
-        debug=False,
-    ):
-        self.modalities = modalities
-        self.task = task
-        self.unimodal_representations_candidates = unimodal_representations_candidates
-        self.num_best_candidates = num_best_candidates
-        self.k_best_candidates, self.candidates_per_modality = self.get_k_best_results(
-            num_best_candidates
-        )
-        self.operator_registry = Registry()
-        self.max_chain_depth = max_chain_depth
-        self.debug = debug
-        self.evaluated_candidates = set()
-        self.cache = representation_cache
-        self.optimization_statistics = OptimizationStatistics(self.k_best_candidates)
-        self.optimization_results = []
-
-    def optimize(self):
-        """
-        This method finds different ways in how to combine modalities and evaluates the fused representations against
-        the given task. It can fuse different representations from the same modality as well as fuse representations
-        form different modalities.
-        """
-
-        # TODO: add an aligned representation for all modalities with a temporal dimension
-        # TODO: keep a map of operator chains so that we don't evaluate them multiple times in different orders (if it does not make a difference)
-
-        r = []
-
-        for candidate in self.k_best_candidates:
-            modality = self.candidates_per_modality[str(candidate)]
-            cached_representation, representation_ops, used_op_names = (
-                self.cache.load_from_cache(modality, candidate.operator_chain)
-            )
-            if cached_representation is not None:
-                modality = cached_representation
-            store = False
-            for representation in representation_ops:
-                if isinstance(representation, Context):
-                    modality = modality.context(representation)
-                elif representation.name == "RowWiseConcatenation":
-                    modality = modality.flatten(True)
-                else:
-                    modality = modality.apply_representation(representation)
-                store = True
-            if store:
-                self.cache.save_to_cache(modality, used_op_names, representation_ops)
-
-            remaining_candidates = [c for c in self.k_best_candidates if c != candidate]
-            r.append(
-                self._optimize_candidate(modality, candidate, remaining_candidates, 1)
-            )
-
-        if self.debug:
-            with open(
-                f"fusion_statistics_{self.task.model.name}_{self.num_best_candidates}_{self.max_chain_depth}.pkl",
-                "wb",
-            ) as fp:
-                pickle.dump(
-                    self.optimization_statistics,
-                    fp,
-                    protocol=pickle.HIGHEST_PROTOCOL,
-                )
-
-            opt_results = copy.deepcopy(self.optimization_results)
-            for i, opt_res in enumerate(self.optimization_results):
-                op_name = []
-                for op in opt_res.operator_chain:
-                    if isinstance(op, list):
-                        for o in op:
-                            if isinstance(o, list):
-                                for j in o:
-                                    op_name.append(j.name)
-                            elif isinstance(o, str):
-                                op_name.append(o)
-                            else:
-                                op_name.append(o.name)
-                    elif isinstance(op, str):
-                        op_name.append(op)
-                    else:
-                        op_name.append(op.name)
-                opt_results[i].operator_chain = op_name
-            with open(
-                f"fusion_results_{self.task.model.name}_{self.num_best_candidates}_{self.max_chain_depth}.pkl",
-                "wb",
-            ) as fp:
-                pickle.dump(opt_results, fp, protocol=pickle.HIGHEST_PROTOCOL)
-
-            self.optimization_statistics.print_statistics()
-
-    def get_k_best_results(self, k: int):
-        """
-        Get the k best results per modality
-        :param k: number of best results
-        """
-        best_results = []
-        candidate_for_modality = {}
-        for modality in self.modalities:
-            k_results = sorted(
-                self.unimodal_representations_candidates[modality.modality_id][
-                    self.task.model.name
-                ],
-                key=lambda x: x.test_accuracy,
-                reverse=True,
-            )[:k]
-            for k_result in k_results:
-                candidate_for_modality[str(k_result)] = modality
-            best_results.extend(k_results)
-
-        return best_results, candidate_for_modality
-
-    def _optimize_candidate(
-        self, modality, candidate, remaining_candidates, chain_depth
-    ):
-        """
-        Optimize a single candidate by fusing it with others recursively.
-
-        :param candidate: The current candidate representation.
-        :param chain_depth: The current depth of fusion chains.
-        """
-        if chain_depth > self.max_chain_depth:
-            return
-
-        for other_candidate in remaining_candidates:
-            other_modality = self.candidates_per_modality[str(other_candidate)]
-            cached_representation, representation_ops, used_op_names = (
-                self.cache.load_from_cache(
-                    other_modality, other_candidate.operator_chain
-                )
-            )
-            if cached_representation is not None:
-                other_modality = cached_representation
-            store = False
-            for representation in representation_ops:
-                if representation.name == "Aggregation":
-                    params = other_candidate.parameters[representation.name]
-                    representation = Aggregation(
-                        aggregation_function=params["aggregation"]
-                    )
-                if isinstance(representation, Context):
-                    other_modality = other_modality.context(representation)
-                elif isinstance(representation, Aggregation):
-                    other_modality = representation.execute(other_modality)
-                elif representation.name == "RowWiseConcatenation":
-                    other_modality = other_modality.flatten(True)
-                else:
-                    other_modality = other_modality.apply_representation(representation)
-                store = True
-            if store:
-                self.cache.save_to_cache(
-                    other_modality, used_op_names, representation_ops
-                )
-
-            fusion_results = self.operator_registry.get_fusion_operators()
-            fusion_representation = None
-            for fusion_operator in fusion_results:
-                fusion_operator = fusion_operator()
-                chain_key = self.create_identifier(
-                    candidate, fusion_operator, other_candidate
-                )
-                # print(fusion_operator.name)
-                representation_start = time.time()
-                if (
-                    isinstance(fusion_operator, Context)
-                    and fusion_representation is not None
-                ):
-                    fusion_representation.context(fusion_operator)
-                elif isinstance(fusion_operator, Context):
-                    continue
-                else:
-                    fused_representation = modality.combine(
-                        other_modality, fusion_operator
-                    )
-
-                representation_end = time.time()
-                if chain_key not in self.evaluated_candidates:
-                    # Evaluate the fused representation
-
-                    score = self.task.run(fused_representation.data)
-                    fusion_params = {fusion_operator.name: fusion_operator.parameters}
-                    result = OptimizationResult(
-                        operator_chain=[
-                            candidate.operator_chain,
-                            fusion_operator.name,
-                            other_candidate.operator_chain,
-                        ],
-                        parameters=[
-                            candidate.parameters,
-                            fusion_params,
-                            other_candidate.parameters,
-                        ],
-                        train_accuracy=score[0],
-                        test_accuracy=score[1],
-                        # train_min_it_acc=score[2],
-                        # test_min_it_acc=score[3],
-                        training_runtime=self.task.training_time,
-                        inference_runtime=self.task.inference_time,
-                        representation_time=representation_end - representation_start,
-                        output_shape=(1, 1),  # TODO
-                    )
-
-                    # Store the result
-                    self.optimization_results.append(result)
-                    self.optimization_statistics.add_entry(
-                        [
-                            candidate.operator_chain,
-                            [fusion_operator.name],
-                            other_candidate.operator_chain,
-                        ],
-                        score[1],
-                    )
-
-                    # Mark this chain as evaluated
-                    self.evaluated_candidates.add(chain_key)
-
-                    if self.debug:
-                        print(
-                            f"Evaluated chain: {candidate.operator_chain} + {fusion_operator.name} + {other_candidate.operator_chain} -> {score[1]}"
-                        )
-
-                    # Recursively optimize further with this fused representation
-                    self._optimize_candidate(
-                        fused_representation,
-                        result,
-                        [c for c in remaining_candidates if c != other_candidate],
-                        chain_depth + 1,
-                    )
-
-    def create_identifier(self, candidate, fusion, other_candidate):
-        identifier = "".join(flatten_and_join(candidate.operator_chain))
-        identifier += fusion.name
-        identifier += "".join(flatten_and_join(other_candidate.operator_chain))
-
-        return identifier
-
-
-def flatten_and_join(data):
-    flat_list = []
-    for item in data:
-        if isinstance(item, list):
-            flat_list.extend(flatten_and_join(item))
-        else:
-            flat_list.append(item.name if not isinstance(item, str) else item)
-    return flat_list
diff --git a/src/main/python/systemds/scuro/drsearch/hyperparameter_tuner.py b/src/main/python/systemds/scuro/drsearch/hyperparameter_tuner.py
index e3cfad07319..17cde10be39 100644
--- a/src/main/python/systemds/scuro/drsearch/hyperparameter_tuner.py
+++ b/src/main/python/systemds/scuro/drsearch/hyperparameter_tuner.py
@@ -18,36 +18,17 @@
 # under the License.
 #
 # -------------------------------------------------------------
-import itertools
-import concurrent.futures
-from typing import Dict, List, Callable, Tuple, Any, Optional
+from typing import Dict, List, Tuple, Any, Optional
 import numpy as np
 from sklearn.model_selection import ParameterGrid
 import json
 import logging
 from dataclasses import dataclass
-from pathlib import Path
 import time
 import copy
 
-from systemds.scuro.drsearch.operator_registry import Registry
 from systemds.scuro.modality.modality import Modality
 from systemds.scuro.drsearch.task import Task
-from systemds.scuro.representations.aggregated_representation import (
-    AggregatedRepresentation,
-)
-from systemds.scuro.representations.representation import Representation
-from systemds.scuro.representations.window_aggregation import Window
-from systemds.scuro.representations.fusion import Fusion
-
-
-logging.basicConfig(
-    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
-)
-logger = logging.getLogger(__name__)
-
-from systemds.scuro.drsearch.optimization_data import OptimizationResult
-from systemds.scuro.representations.context import Context
 
 
 @dataclass
@@ -73,6 +54,7 @@ def __init__(
         scoring_metric: str = "accuracy",
         maximize_metric: bool = True,
         save_results: bool = False,
+        debug: bool = True,
     ):
         self.tasks = tasks
         self.optimization_results = optimization_results
@@ -83,9 +65,16 @@ def __init__(
         self.results = {}
         self.k = k
         self.modalities = modalities
+        self.representations = None
         self.k_best_cache = None
         self.k_best_representations = None
         self.extract_k_best_modalities_per_task()
+        self.debug = debug
+        self.logger = logging.getLogger(__name__)
+        if debug:
+            logging.basicConfig(
+                level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+            )
 
     def get_modality_by_id(self, modality_id: int) -> Modality:
         for mod in self.modalities:
@@ -95,169 +84,19 @@ def get_modality_by_id(self, modality_id: int) -> Modality:
     def extract_k_best_modalities_per_task(self):
         self.k_best_representations = {}
         self.k_best_cache = {}
+        representations = {}
         for task in self.tasks:
             self.k_best_representations[task.model.name] = []
             self.k_best_cache[task.model.name] = []
+            representations[task.model.name] = {}
             for modality in self.modalities:
                 k_best_results, cached_data = (
                     self.optimization_results.get_k_best_results(modality, self.k, task)
                 )
-
+                representations[task.model.name][modality.modality_id] = k_best_results
                 self.k_best_representations[task.model.name].extend(k_best_results)
                 self.k_best_cache[task.model.name].extend(cached_data)
-
-    def evaluate_single_config(
-        self,
-        reps: List[Representation],
-        params: Dict[str, Any],
-        modality_ids: List[int],
-        task: Task,
-        param_idx: List[int],
-    ) -> Tuple[Dict[str, Any], float]:
-        """
-        Evaluate a single hyperparameter configuration
-        """
-        # try:
-        rep_name = ""
-        modality_counter = 0
-        modality = None
-        modality_is_initialized = False
-        start = 0
-        # if isinstance(rep, Fusion):
-        #     modality = left.combine(right, fusion_method)
-        for i, rep in enumerate(reps):
-            rep_name += rep().name
-            len_params = len(rep().parameters) if rep().parameters is not None else 0
-            if isinstance(rep(), Window):
-                modality = modality.context(
-                    rep(
-                        *np.array(list(params.values()))[
-                            param_idx[start : start + len_params]
-                        ]
-                    )
-                )
-            elif isinstance(rep(), Fusion):
-                modality = modality.combine(
-                    rep(
-                        *np.array(list(params.values()))[
-                            param_idx[start : start + len_params]
-                        ]
-                    )
-                )
-                modality_is_initialized = False
-            else:
-                if not modality_is_initialized:
-                    modality = self.get_modality_by_id(modality_ids[modality_counter])
-                    modality_is_initialized = True
-                    modality_counter += 1
-                modality = modality.apply_representation(
-                    rep(
-                        *np.array(list(params.values()))[
-                            param_idx[start : start + len_params]
-                        ]
-                    )
-                )
-            start += len_params
-
-        score = task.run(modality.data)[1]
-        logger.debug(f"{rep_name} with params {params}: score = {score}")
-        return params, score
-        # except Exception as e:
-        #         logger.error(f"Error evaluating {rep_name} with params {params}: {e}")
-        #         return params, float('-inf') if self.maximize_metric else float('inf')
-
-    def tune_representation(
-        self,
-        reps: List,
-        hyperparams: List[Dict[str, List]],
-        modality_id: List[int],
-        task: Task,
-        max_evals: Optional[int] = None,
-    ) -> HyperparamResult:
-        """
-        Tune hyperparameters for a single representation
-
-        Args:
-            rep_name: Name of the representation
-            rep_func: Function that takes (task_data, **hyperparams) and returns score
-            hyperparams: Dictionary with parameter names as keys and lists of values as values
-            task_data: Data to pass to the representation function
-            max_evals: Maximum number of evaluations (None for full grid search)
-        """
-        start_time = time.time()
-        rep_name = "".join([rep().name for rep in reps])
-        logger.info(f"Starting hyperparameter tuning for")
-
-        # Generate parameter grid
-        hp = merge_multiple_dicts_with_increments(list(hyperparams))
-        param_grid = list(ParameterGrid(hp))
-        idx_params = []
-        for h in hp.keys():
-            for i, p in enumerate(param_grid[0].keys()):
-                if h == p:
-                    idx_params.append(i)
-                    break
-
-        # Limit evaluations if specified
-        if max_evals and len(param_grid) > max_evals:
-            # Random sampling if too many combinations
-            np.random.shuffle(param_grid)
-            param_grid = param_grid[:max_evals]
-
-        logger.info(f"Evaluating {len(param_grid)} parameter combinations for")
-
-        # Parallel evaluation
-        all_results = []
-        if self.n_jobs <= 1:
-            # Sequential execution
-            for params in param_grid:
-                result = self.evaluate_single_config(
-                    reps, params, modality_id, task, idx_params
-                )
-                all_results.append(result)
-        else:
-            # Parallel execution
-            with concurrent.futures.ProcessPoolExecutor(
-                max_workers=self.n_jobs
-            ) as executor:
-                futures = [
-                    executor.submit(
-                        self.evaluate_single_config,
-                        reps,
-                        params,
-                        modality_id,
-                        task,
-                        idx_params,
-                    )
-                    for params in param_grid
-                ]
-
-                for future in concurrent.futures.as_completed(futures):
-                    try:
-                        result = future.result()
-                        all_results.append(result)
-                    except Exception as e:
-                        logger.error(f"Error in parallel execution: {e}")
-
-        # Find best parameters
-        if self.maximize_metric:
-            best_params, best_score = max(all_results, key=lambda x: x[1])
-        else:
-            best_params, best_score = min(all_results, key=lambda x: x[1])
-
-        tuning_time = time.time() - start_time
-        logger.info(
-            f"Best params for {rep_name}: {best_params}, score: {best_score:.4f}, time: {tuning_time:.2f}s"
-        )
-
-        return HyperparamResult(
-            representation_name=rep_name,
-            best_params=best_params,
-            best_score=best_score,
-            all_results=all_results,
-            tuning_time=tuning_time,
-            modality_id=modality_id,
-        )
+        self.representations = representations
 
     def tune_unimodal_representations(self, max_eval_per_rep: Optional[int] = None):
         results = {}
@@ -280,15 +119,11 @@ def tune_unimodal_representations(self, max_eval_per_rep: Optional[int] = None):
         return results
 
     def tune_dag_representation(self, dag, root_node_id, task, max_evals=None):
-        """
-        Tune hyperparameters for a DAG-based representation
-        """
         hyperparams = {}
         reps = []
         modality_ids = []
         node_order = []
 
-        # Extract parameters and operations from DAG in topological order
         visited = set()
 
         def visit_node(node_id):
@@ -311,17 +146,14 @@ def visit_node(node_id):
         if not hyperparams:
             return None
 
-        # Tune the hyperparameters
         start_time = time.time()
         rep_name = "_".join([rep.__name__ for rep in reps])
 
-        # Generate parameter grid
         param_grid = list(ParameterGrid(hyperparams))
         if max_evals and len(param_grid) > max_evals:
             np.random.shuffle(param_grid)
             param_grid = param_grid[:max_evals]
 
-        # Evaluate parameter combinations
         all_results = []
         for params in param_grid:
             result = self.evaluate_dag_config(
@@ -329,7 +161,6 @@ def visit_node(node_id):
             )
             all_results.append(result)
 
-        # Find best parameters
         if self.maximize_metric:
             best_params, best_score = max(all_results, key=lambda x: x[1])
         else:
@@ -347,14 +178,9 @@ def visit_node(node_id):
         )
 
     def evaluate_dag_config(self, dag, params, node_order, modality_ids, task):
-        """
-        Evaluate a single parameter configuration for a DAG-based representation
-        """
         try:
-            # Create a copy of the DAG to modify
             dag_copy = copy.deepcopy(dag)
 
-            # Update parameters in the DAG
             for node_id in node_order:
                 node = dag_copy.get_node_by_id(node_id)
                 if node.operation is not None and node.parameters:
@@ -364,14 +190,14 @@ def evaluate_dag_config(self, dag, params, node_order, modality_ids, task):
                     node.parameters = node_params
 
             modality = self.get_modality_by_id(modality_ids[0])
-            modified_modality = dag_copy.execute(modality)
+            modified_modality = dag_copy.execute([modality])
             score = task.run(
                 modified_modality[list(modified_modality.keys())[-1]].data
             )[1]
 
             return params, score
         except Exception as e:
-            logger.error(f"Error evaluating DAG with params {params}: {e}")
+            self.logger.error(f"Error evaluating DAG with params {params}: {e}")
             return params, float("-inf") if self.maximize_metric else float("inf")
 
     def tune_multimodal_representations(
@@ -383,93 +209,45 @@ def tune_multimodal_representations(
         max_eval_per_rep: Optional[int] = None,
     ):
         best_optimization_results = optimization_results[:k]
-
-        for result in best_optimization_results:
-            fusion_node_ids = []
-            used_modalities = result.architecture.encoder_choices
-            cached_representations = []
-            modality_ids = []
-            hyperparams = []
-            reps = []
-
-            for i, fusion_node in enumerate(result.architecture.fusion_nodes):
-                if len(fusion_node.parameters) > 0:
-                    fusion_node_ids.append(i)
-
-            if len(fusion_node_ids) == 0 and not optimize_unimodal:
-                logger.warning(
-                    "No fusion nodes with hyperparameters and unimodal optimization disabled. Skipping."
+        results = []
+        for representation in best_optimization_results:
+            if optimize_unimodal:
+                dag = copy.deepcopy(representation.dag)
+                index = 0
+                for i, node in enumerate(representation.dag.nodes):
+                    if not node.inputs:
+                        leaf_node_id = node.node_id
+                        leaf_nodes = self.representations[task.model.name][
+                            node.modality_id
+                        ][node.representation_index].dag.nodes
+                        for leaf_idx, node in enumerate(dag.nodes):
+                            if node.node_id == leaf_node_id:
+                                dag.nodes[leaf_idx : leaf_idx + 1] = leaf_nodes
+                                index = leaf_idx + len(leaf_nodes) - 1
+                                break
+
+                        for node in dag.nodes:
+                            try:
+                                idx = node.inputs.index(leaf_node_id)
+                                node.inputs[idx] = dag.nodes[index].node_id
+                                break
+                            except ValueError:
+                                continue
+                result = self.tune_dag_representation(
+                    dag,
+                    dag.root_node_id,
+                    task,
+                    max_eval_per_rep,
                 )
-                continue
 
-            for i, modality in enumerate(used_modalities):
-                mod_id = modality.modality_id
-                instance_id = modality.modality_instance_id
-                cached_representation = self.get_cached_representation(
-                    int(mod_id), int(instance_id), task
+            else:
+                result = self.tune_dag_representation(
+                    representation.dag,
+                    representation.dag.root_node_id,
+                    task,
+                    max_eval_per_rep,
                 )
-                cached_representations.append(cached_representation)
-
-                if optimize_unimodal:
-                    modality_ids.append(int(mod_id))
-
-                    for transformation in cached_representation.transformation:
-                        params = transformation.parameters
-                        rep = transformation.__class__
-                        hyperparams.append(params)
-                        reps.append(rep)
-
-                if len(used_modalities) > i + 1:
-                    reps.append(
-                        Registry().get_fusion_operator_by_name(
-                            result.architecture.fusion_nodes[i].operation
-                        )
-                    )
-                    hyperparams.append(result.architecture.fusion_nodes[i].parameters)
-
-            self.tune_representation(
-                reps, hyperparams, modality_ids, task, max_eval_per_rep
-            )
-
-    def get_cached_representation(self, modality_id: int, instance_id: int, task: Task):
-        counter = -1
-        for cached_representation in self.k_best_cache[task.model.name]:
-            if cached_representation.modality_id == modality_id:
-                counter += 1
-                if counter == instance_id:
-                    return cached_representation
-
-    def tune_multiple_representations(
-        self,
-        representations: Dict[str, Dict],
-        task_data: Any,
-        max_evals_per_rep: Optional[int] = None,
-    ) -> Dict[str, HyperparamResult]:
-        """
-        Tune hyperparameters for multiple representations
-
-        Args:
-            representations: Dict with structure:
-                {
-                    'rep_name': {
-                        'function': callable,
-                        'hyperparams': dict of param_name -> [values]
-                    }
-                }
-            task_data: Data to pass to representation functions
-            max_evals_per_rep: Maximum evaluations per representation
-        """
-        results = {}
-
-        for rep_name, rep_config in representations.items():
-            rep_func = rep_config["function"]
-            hyperparams = rep_config["hyperparams"]
-
-            result = self.tune_representation(
-                rep_name, rep_func, hyperparams, task_data, max_evals_per_rep
-            )
-            results[rep_name] = result
-
+            results.append(result)
         self.results = results
 
         if self.save_results:
@@ -478,11 +256,9 @@ def tune_multiple_representations(
         return results
 
     def save_tuning_results(self, filepath: str = None):
-        """Save tuning results to JSON file"""
         if not filepath:
             filepath = f"hyperparameter_results_{int(time.time())}.json"
 
-        # Convert results to JSON-serializable format
         json_results = {}
         for task in self.results.keys():
             for result in self.results[task]:
@@ -496,100 +272,5 @@ def save_tuning_results(self, filepath: str = None):
         with open(filepath, "w") as f:
             json.dump(json_results, f, indent=2)
 
-        logger.info(f"Results saved to {filepath}")
-
-    def tune_operator_chain(self, modality, operator_chain):
-        best_result = None
-        best_score = -np.inf
-
-        param_grids = {}
-
-        for operator in operator_chain:
-            param_grids[operator.name] = operator.parameters
-
-        param_combinations = self._generate_search_space(param_grids)
-
-        for params in param_combinations:
-            modified_modality = modality
-            current_chain = []
-
-            representation_start = time.time()
-            try:
-                for operator in operator_chain:
-
-                    if operator.name in params:
-                        operator.set_parameters(params[operator.name])
-
-                    if isinstance(operator, Context):
-                        modified_modality = modified_modality.context(operator)
-                    else:
-                        modified_modality = modified_modality.apply_representation(
-                            operator
-                        )
-
-                    current_chain.append(operator)
-
-                representation_end = time.time()
-
-                score = self.task.run(modified_modality.data)
-
-                if score[1] > best_score:
-                    best_score = score[1]
-                    best_params = params
-                    best_result = OptimizationResult(
-                        operator_chain=current_chain,
-                        parameters=params,
-                        train_accuracy=score[0],
-                        test_accuracy=score[1],
-                        training_runtime=self.task.training_time,
-                        inference_runtime=self.task.inference_time,
-                        representation_time=representation_end - representation_start,
-                        output_shape=(1, 1),
-                    )
-
-            except Exception as e:
-                print(f"Failed parameter combination {params}: {str(e)}")
-                continue
-
-        return best_result
-
-    def _generate_search_space(self, param_grids):
-        combinations = {}
-        for operator_name, params in param_grids.items():
-            operator_combinations = [
-                dict(zip(params.keys(), v)) for v in itertools.product(*params.values())
-            ]
-            combinations[operator_name] = operator_combinations
-
-        keys = list(combinations.keys())
-        values = [combinations[key] for key in keys]
-
-        parameter_grid = [
-            dict(zip(keys, combo)) for combo in itertools.product(*values)
-        ]
-
-        return parameter_grid
-
-
-def merge_multiple_dicts_with_increments(dicts):
-    if dicts is None:
-        return {}
-
-    result = dicts[0].copy() if dicts[0] is not None else {}
-
-    for dict_to_merge in dicts[1:]:
-        if dict_to_merge is None:
-            continue
-
-        for key, value in dict_to_merge.items():
-            if key in result:
-                counter = 1
-                new_key = f"{key}{counter}"
-                while new_key in result:
-                    counter += 1
-                    new_key = f"{key}{counter}"
-                result[new_key] = value
-            else:
-                result[key] = value
-
-    return result
+        if self.debug:
+            self.logger.info(f"Results saved to {filepath}")
diff --git a/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py b/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py
index 0e24a43d490..f0de7687cbc 100644
--- a/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py
+++ b/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py
@@ -1,3 +1,25 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+
+
 import itertools
 import time
 from dataclasses import dataclass
diff --git a/src/main/python/systemds/scuro/drsearch/unimodal_visualizer.py b/src/main/python/systemds/scuro/drsearch/representation_dag_visualizer.py
similarity index 100%
rename from src/main/python/systemds/scuro/drsearch/unimodal_visualizer.py
rename to src/main/python/systemds/scuro/drsearch/representation_dag_visualizer.py
diff --git a/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py b/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py
index c1ade12bcd6..9d5299eeac9 100644
--- a/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py
+++ b/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py
@@ -21,9 +21,10 @@
 import pickle
 import time
 from concurrent.futures import ProcessPoolExecutor, as_completed
-from dataclasses import dataclass, asdict
+from dataclasses import dataclass
 import multiprocessing as mp
 from typing import List, Any
+from functools import lru_cache
 
 from systemds.scuro.representations.fusion import Fusion
 from systemds.scuro.representations.concatenation import Concatenation
@@ -41,7 +42,7 @@
     RepresentationNode,
     RepresentationDAGBuilder,
 )
-from systemds.scuro.drsearch.unimodal_visualizer import visualize_dag
+from systemds.scuro.drsearch.representation_dag_visualizer import visualize_dag
 
 
 class UnimodalOptimizer:
@@ -67,6 +68,22 @@ def __init__(self, modalities, tasks, debug=True):
             if tasks[i - 1].expected_dim != tasks[i].expected_dim:
                 self._tasks_require_same_dims = False
 
+        self._combination_operators = [Concatenation(), Hadamard(), Sum()]
+
+    @lru_cache(maxsize=128)
+    def _get_modality_operators(self, modality_type):
+        return self.operator_registry.get_representations(modality_type)
+
+    @lru_cache(maxsize=128)
+    def _get_not_self_contained_reps(self, modality_type):
+        return self.operator_registry.get_not_self_contained_representations(
+            modality_type
+        )
+
+    @lru_cache(maxsize=32)
+    def _get_context_operators(self):
+        return self.operator_registry.get_context_operators()
+
     def store_results(self, file_name=None):
         if file_name is None:
             import time
@@ -89,11 +106,8 @@ def optimize_parallel(self, n_workers=None):
 
             for future in as_completed(future_to_modality):
                 modality = future_to_modality[future]
-                # try:
                 results = future.result()
                 self._merge_results(results)
-                # except Exception as exc:
-                #     print(f'Modality {modality.modality_id} generated an exception: {exc}')
 
     def optimize(self):
         """Optimize representations for each modality"""
@@ -106,7 +120,7 @@ def _process_modality(self, modality, parallel):
         else:
             local_results = self.operator_performance
 
-        modality_specific_operators = self.operator_registry.get_representations(
+        modality_specific_operators = self._get_modality_operators(
             modality.modality_type
         )
 
@@ -119,6 +133,7 @@ def _process_modality(self, modality, parallel):
                 node = dag.get_node_by_id(node_id)
                 if node.operation is None:
                     continue
+
                 reps = self._get_representation_chain(node, dag)
                 combination = next((op for op in reps if isinstance(op, Fusion)), None)
                 self._evaluate_local(
@@ -167,9 +182,9 @@ def _evaluate_local(self, modality, local_results, dag, combination=None):
                 representations = dag.execute([modality])
                 node_id = list(representations.keys())[-1]
                 for task in self.tasks:
-                    start = time.time()
+                    start = time.perf_counter()
                     scores = task.run(representations[node_id].data)
-                    end = time.time()
+                    end = time.perf_counter()
 
                     local_results.add_result(
                         scores, modality, task.model.name, end - start, combination, dag
@@ -177,9 +192,9 @@ def _evaluate_local(self, modality, local_results, dag, combination=None):
             else:
                 modality.pad()
                 for task in self.tasks:
-                    start = time.time()
+                    start = time.perf_counter()
                     scores = task.run(modality.data)
-                    end = time.time()
+                    end = time.perf_counter()
                     local_results.add_result(
                         scores, modality, task.model.name, end - start, combination, dag
                     )
@@ -197,23 +212,23 @@ def _evaluate_local(self, modality, local_results, dag, combination=None):
                     representations = dag.execute([modality])
                     node_id = list(representations.keys())[-1]
 
-                    start = time.time()
+                    start = time.perf_counter()
                     scores = task.run(representations[node_id].data)
-                    end = time.time()
+                    end = time.perf_counter()
                     local_results.add_result(
                         scores, modality, task.model.name, end - start, combination, dag
                     )
                 else:
-                    start = time.time()
+                    start = time.perf_counter()
                     scores = task.run(modality.data)
-                    end = time.time()
+                    end = time.perf_counter()
                     local_results.add_result(
                         scores, modality, task.model.name, end - start, combination, dag
                     )
 
     def _build_modality_dag(
         self, modality: Modality, operator: Any
-    ) -> RepresentationDag:
+    ) -> List[RepresentationDag]:
         dags = []
         builder = self.builders[modality.modality_id]
         leaf_id = builder.create_leaf_node(modality.modality_id)
@@ -225,24 +240,20 @@ def _build_modality_dag(
         dags.append(builder.build(current_node_id))
 
         if not operator.self_contained:
-            not_self_contained_reps = (
-                self.operator_registry.get_not_self_contained_representations(
-                    modality.modality_type
-                )
+            not_self_contained_reps = self._get_not_self_contained_reps(
+                modality.modality_type
             )
             not_self_contained_reps = [
                 rep for rep in not_self_contained_reps if rep != operator.__class__
             ]
 
-            for combination in [Concatenation(), Hadamard(), Sum()]:
+            for combination in self._combination_operators:
                 current_node_id = rep_node_id
                 for other_rep in not_self_contained_reps:
-                    # Create node for other representation
                     other_rep_id = builder.create_operation_node(
                         other_rep, [leaf_id], other_rep().parameters
                     )
 
-                    # Create combination nodes
                     combine_id = builder.create_operation_node(
                         combination.__class__,
                         [current_node_id, other_rep_id],
@@ -251,7 +262,7 @@ def _build_modality_dag(
                     dags.append(builder.build(combine_id))
                     current_node_id = combine_id
 
-        context_operators = self.operator_registry.get_context_operators()
+        context_operators = self._get_context_operators()
 
         for context_op in context_operators:
             context_node_id = builder.create_operation_node(
@@ -265,7 +276,7 @@ def _build_modality_dag(
 
 
 class UnimodalResults:
-    def __init__(self, modalities, tasks, debug=False):
+    def __init__(self, modalities, tasks, debug=False, run=None):
         self.modality_ids = [modality.modality_id for modality in modalities]
         self.task_names = [task.model.name for task in tasks]
         self.results = {}
@@ -273,11 +284,8 @@ def __init__(self, modalities, tasks, debug=False):
         self.cache = {}
 
         for modality in self.modality_ids:
-            self.results[modality] = {}
-            self.cache[modality] = {}
-            for task_name in self.task_names:
-                self.cache[modality][task_name] = {}
-                self.results[modality][task_name] = []
+            self.results[modality] = {task_name: [] for task_name in self.task_names}
+            self.cache[modality] = {task_name: {} for task_name in self.task_names}
 
     def add_result(self, scores, modality, task_name, task_time, combination, dag):
         entry = ResultEntry(
@@ -290,13 +298,13 @@ def add_result(self, scores, modality, task_name, task_time, combination, dag):
         )
 
         self.results[modality.modality_id][task_name].append(entry)
-        self.cache[modality.modality_id][task_name][
-            (
-                tuple([rep.operation for rep in dag.nodes]),
-                scores[1],
-                modality.transform_time,
-            )
-        ] = modality
+
+        cache_key = (
+            id(dag),
+            scores[1],
+            modality.transform_time,
+        )
+        self.cache[modality.modality_id][task_name][cache_key] = modality
 
         if self.debug:
             print(f"{modality.modality_id}_{task_name}: {entry}")
@@ -313,21 +321,22 @@ def get_k_best_results(self, modality, k, task):
         :param modality: modality to get the best results for
         :param k: number of best results
         """
-        items = self.results[modality.modality_id][task.model.name]
-        sorted_indices = sorted(
-            range(len(items)), key=lambda x: items[x].val_score, reverse=True
-        )[:k]
+        task_results = self.results[modality.modality_id][task.model.name]
+
+        results = sorted(task_results, key=lambda x: x.val_score, reverse=True)[:k]
 
-        results = sorted(
-            self.results[modality.modality_id][task.model.name],
-            key=lambda x: x.val_score,
+        sorted_indices = sorted(
+            range(len(task_results)),
+            key=lambda x: task_results[x].val_score,
             reverse=True,
         )[:k]
 
-        items = list(self.cache[modality.modality_id][task.model.name].items())
-        reordered_cache = [items[i][1] for i in sorted_indices]
+        cache_items = list(self.cache[modality.modality_id][task.model.name].items())
+        reordered_cache = [
+            cache_items[i][1] for i in sorted_indices if i < len(cache_items)
+        ]
 
-        return results, list(reordered_cache)
+        return results, reordered_cache
 
 
 @dataclass(frozen=True)
diff --git a/src/main/python/systemds/scuro/drsearch/unimodal_representation_optimizer.py b/src/main/python/systemds/scuro/drsearch/unimodal_representation_optimizer.py
deleted file mode 100644
index e59ddbe9beb..00000000000
--- a/src/main/python/systemds/scuro/drsearch/unimodal_representation_optimizer.py
+++ /dev/null
@@ -1,271 +0,0 @@
-# -------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-# -------------------------------------------------------------
-import copy
-import os
-import pickle
-import time
-from typing import List
-
-from systemds.scuro.drsearch.operator_registry import Registry
-from systemds.scuro.drsearch.optimization_data import OptimizationResult
-from systemds.scuro.drsearch.representation_cache import RepresentationCache
-from systemds.scuro.drsearch.task import Task
-from systemds.scuro.modality.modality import Modality
-from systemds.scuro.representations.aggregate import Aggregation
-from systemds.scuro.representations.context import Context
-
-
-class UnimodalRepresentationOptimizer:
-    def __init__(
-        self,
-        modalities: List[Modality],
-        tasks: List[Task],
-        max_chain_depth=5,
-        debug=False,
-        folder_name=None,
-    ):
-        self.optimization_results = {}
-        self.modalities = modalities
-        self.tasks = tasks
-        self.operator_registry = Registry()
-        self.initialize_optimization_results()
-        self.max_chain_depth = max_chain_depth
-        self.debug = debug
-        self.cache = RepresentationCache(self.debug)
-        if self.debug:
-            self.folder_name = folder_name
-            os.makedirs(self.folder_name, exist_ok=True)
-
-    def initialize_optimization_results(self):
-        for modality in self.modalities:
-            self.optimization_results[modality.modality_id] = {}
-            for task in self.tasks:
-                self.optimization_results[modality.modality_id][task.model.name] = []
-
-    def optimize(self):
-        """
-        This method finds different unimodal representations for all given modalities
-        """
-
-        for modality in self.modalities:
-            self._optimize_modality(modality)
-
-            copy_results = copy.deepcopy(
-                self.optimization_results[modality.modality_id]
-            )
-            for model in copy_results:
-                for i, model_task in enumerate(copy_results[model]):
-                    ops = []
-                    for op in model_task.operator_chain:
-                        if not isinstance(op, str):
-                            ops.append(op.name)
-                    if len(ops) > 0:
-                        copy_results[model][i].operator_chain = ops
-                if self.debug:
-                    with open(
-                        f"{self.folder_name}/results_{model}_{modality.modality_type.name}.p",
-                        "wb",
-                    ) as fp:
-                        pickle.dump(
-                            copy_results[model], fp, protocol=pickle.HIGHEST_PROTOCOL
-                        )
-
-    def get_k_best_results(self, modality: Modality, k: int, task: Task):
-        """
-        Get the k best results for the given modality
-        :param modality: modality to get the best results for
-        :param k: number of best results
-        """
-        results = sorted(
-            self.optimization_results[modality.modality_id][task.model.name],
-            key=lambda x: x.test_accuracy,
-            reverse=True,
-        )[:k]
-
-        return results
-
-    def _optimize_modality(self, modality: Modality):
-        """
-        Optimize a single modality by leveraging modality specific heuristics and incorporating context and
-        stores the resulting operation chains as optimization results.
-        :param modality: modality to optimize
-        """
-
-        representations = self._get_compatible_operators(modality.modality_type, [])
-
-        for rep in representations:
-            self._build_operator_chain(modality, [rep()], 1)
-
-    def _get_compatible_operators(self, modality_type, used_operators):
-        next_operators = []
-        for operator in self.operator_registry.get_representations(modality_type):
-            if operator.__name__ not in used_operators:
-                next_operators.append(operator)
-
-        for context_operator in self.operator_registry.get_context_operators():
-            if (
-                len(used_operators) == 0
-                or context_operator.__name__ not in used_operators[-1]
-            ):
-                next_operators.append(context_operator)
-
-        return next_operators
-
-    def _build_operator_chain(self, modality, current_operator_chain, depth):
-
-        if depth > self.max_chain_depth:
-            return
-
-        self._apply_operator_chain(modality, current_operator_chain)
-
-        current_modality_type = modality.modality_type
-
-        for operator in current_operator_chain:
-            if hasattr(operator, "output_modality_type"):
-                current_modality_type = operator.output_modality_type
-
-        next_representations = self._get_compatible_operators(
-            current_modality_type, [type(op).__name__ for op in current_operator_chain]
-        )
-
-        for next_rep in next_representations:
-            rep_instance = next_rep()
-            new_chain = current_operator_chain + [rep_instance]
-            self._build_operator_chain(modality, new_chain, depth + 1)
-
-    def _evaluate_with_flattened_data(
-        self, modality, operator_chain, op_params, representation_time, task
-    ):
-        from systemds.scuro.representations.aggregated_representation import (
-            AggregatedRepresentation,
-        )
-
-        results = []
-        for aggregation in Aggregation().get_aggregation_functions():
-            start = time.time()
-            agg_operator = AggregatedRepresentation(Aggregation(aggregation, True))
-            agg_modality = agg_operator.transform(modality)
-            end = time.time()
-
-            agg_opperator_chain = operator_chain + [agg_operator]
-            agg_params = dict(op_params)
-            agg_params.update({agg_operator.name: agg_operator.parameters})
-
-            score = task.run(agg_modality.data)
-            result = OptimizationResult(
-                operator_chain=agg_opperator_chain,
-                parameters=agg_params,
-                train_accuracy=score[0],
-                test_accuracy=score[1],
-                # train_min_it_acc=score[2],
-                # test_min_it_acc=score[3],
-                training_runtime=task.training_time,
-                inference_runtime=task.inference_time,
-                representation_time=representation_time + end - start,
-                output_shape=(1, 1),  # TODO
-            )
-            results.append(result)
-
-            if self.debug:
-                op_name = ""
-                for operator in agg_opperator_chain:
-                    op_name += str(operator.__class__.__name__)
-                print(f"{task.name} {task.model.name} {op_name}: {score[1]}")
-
-        return results
-
-    def _evaluate_operator_chain(
-        self, modality, operator_chain, op_params, representation_time
-    ):
-        for task in self.tasks:
-            if isinstance(modality.data[0], str):
-                continue
-
-            if (
-                task.expected_dim == 1
-                and not isinstance(modality.data[0], list)
-                and modality.data[0].ndim > 1
-            ):
-                r = self._evaluate_with_flattened_data(
-                    modality, operator_chain, op_params, representation_time, task
-                )
-                self.optimization_results[modality.modality_id][task.model.name].extend(
-                    r
-                )
-            else:
-                score = task.run(modality.data)
-                result = OptimizationResult(
-                    operator_chain=operator_chain,
-                    parameters=op_params,
-                    train_accuracy=score[0],
-                    test_accuracy=score[1],
-                    # train_min_it_acc=score[2],
-                    # test_min_it_acc=score[3],
-                    training_runtime=task.training_time,
-                    inference_runtime=task.inference_time,
-                    representation_time=representation_time,
-                    output_shape=(1, 1),
-                )  # TODO
-                self.optimization_results[modality.modality_id][task.model.name].append(
-                    result
-                )
-                if self.debug:
-                    op_name = ""
-                    for operator in operator_chain:
-                        op_name += str(operator.__class__.__name__)
-                    print(f"{task.name} {task.model.name} - {op_name}: {score[1]}")
-
-    def _apply_operator_chain(self, current_modality, operator_chain):
-        op_params = {}
-        modified_modality = current_modality
-
-        representation_start = time.time()
-        try:
-            cached_representation, representation_ops, used_op_names = (
-                self.cache.load_from_cache(
-                    modified_modality, copy.deepcopy(operator_chain)
-                )
-            )
-            if cached_representation is not None:
-                modified_modality = cached_representation
-            store = False
-            for operator in representation_ops:
-                if isinstance(operator, Context):
-                    modified_modality = modified_modality.context(operator)
-                else:
-                    modified_modality = modified_modality.apply_representation(operator)
-                store = True
-                op_params[operator.name] = operator.get_current_parameters()
-            if store:
-                self.cache.save_to_cache(
-                    modified_modality, used_op_names, representation_ops
-                )
-            representation_end = time.time()
-
-            self._evaluate_operator_chain(
-                modified_modality,
-                operator_chain,
-                op_params,
-                representation_end - representation_start,
-            )
-        except Exception as e:
-            print(f"Failed to evaluate chain {operator_chain}: {str(e)}")
-            return
diff --git a/src/main/python/tests/scuro/test_hp_tuner.py b/src/main/python/tests/scuro/test_hp_tuner.py
index 4ec684d9c93..f939f5c6b42 100644
--- a/src/main/python/tests/scuro/test_hp_tuner.py
+++ b/src/main/python/tests/scuro/test_hp_tuner.py
@@ -19,28 +19,6 @@
 #
 # -------------------------------------------------------------
 
-# -------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-# -------------------------------------------------------------
-
-
 import unittest
 
 import numpy as np
@@ -48,6 +26,9 @@
 from sklearn.metrics import classification_report
 from sklearn.model_selection import train_test_split
 
+from systemds.scuro.drsearch.multimodal_optimizer import MultimodalOptimizer
+from systemds.scuro.representations.average import Average
+from systemds.scuro.representations.concatenation import Concatenation
 from systemds.scuro.drsearch.operator_registry import Registry
 from systemds.scuro.models.model import Model
 from systemds.scuro.drsearch.task import Task
@@ -140,7 +121,33 @@ def test_hp_tuner_for_audio_modality(self):
             )
         )
 
-        self.run_hp_for_modality(audio)
+        self.run_hp_for_modality([audio])
+
+    def test_multimodal_hp_tuning(self):
+        audio_data, audio_md = ModalityRandomDataGenerator().create_audio_data(
+            self.num_instances, 3000
+        )
+        audio = UnimodalModality(
+            TestDataLoader(
+                self.indices, None, ModalityType.AUDIO, audio_data, np.float32, audio_md
+            )
+        )
+
+        text_data, text_md = ModalityRandomDataGenerator().create_text_data(
+            self.num_instances
+        )
+        text = UnimodalModality(
+            TestDataLoader(
+                self.indices, None, ModalityType.TEXT, text_data, str, text_md
+            )
+        )
+
+        self.run_hp_for_modality(
+            [audio, text], multimodal=True, tune_unimodal_representations=True
+        )
+        self.run_hp_for_modality(
+            [audio, text], multimodal=True, tune_unimodal_representations=False
+        )
 
     def test_hp_tuner_for_text_modality(self):
         text_data, text_md = ModalityRandomDataGenerator().create_text_data(
@@ -151,9 +158,11 @@ def test_hp_tuner_for_text_modality(self):
                 self.indices, None, ModalityType.TEXT, text_data, str, text_md
             )
         )
-        self.run_hp_for_modality(text)
+        self.run_hp_for_modality([text])
 
-    def run_hp_for_modality(self, modality):
+    def run_hp_for_modality(
+        self, modalities, multimodal=False, tune_unimodal_representations=False
+    ):
         with patch.object(
             Registry,
             "_representations",
@@ -166,14 +175,40 @@ def run_hp_for_modality(self, modality):
             },
         ):
             registry = Registry()
-
-            unimodal_optimizer = UnimodalOptimizer([modality], self.tasks, False)
+            registry._fusion_operators = [Average, Concatenation]
+            unimodal_optimizer = UnimodalOptimizer(modalities, self.tasks, False)
             unimodal_optimizer.optimize()
 
             hp = HyperparameterTuner(
-                [modality], self.tasks, unimodal_optimizer.operator_performance
+                modalities, self.tasks, unimodal_optimizer.operator_performance
             )
-            hp.tune_unimodal_representations()
+
+            if multimodal:
+                m_o = MultimodalOptimizer(
+                    modalities,
+                    unimodal_optimizer.operator_performance,
+                    self.tasks,
+                    debug=False,
+                    min_modalities=2,
+                    max_modalities=3,
+                )
+                fusion_results = m_o.optimize()
+
+                best_results = sorted(
+                    fusion_results, key=lambda x: x.val_score, reverse=True
+                )
+
+                hp.tune_multimodal_representations(
+                    best_results,
+                    self.tasks[0],
+                    k=2,
+                    optimize_unimodal=tune_unimodal_representations,
+                )
+                assert len(hp.results) == 2
+            else:
+                hp.tune_unimodal_representations()
+                assert len(hp.results) == len(self.tasks)
+                assert len(hp.results[self.tasks[0].model.name]) == 2
 
 
 if __name__ == "__main__":
diff --git a/src/main/python/tests/scuro/test_multimodal_fusion.py b/src/main/python/tests/scuro/test_multimodal_fusion.py
index ae3ddedffb1..aacf1a26eb8 100644
--- a/src/main/python/tests/scuro/test_multimodal_fusion.py
+++ b/src/main/python/tests/scuro/test_multimodal_fusion.py
@@ -19,10 +19,7 @@
 #
 # -------------------------------------------------------------
 
-
-import shutil
 import unittest
-from multiprocessing import freeze_support
 
 import numpy as np
 from sklearn import svm
@@ -33,29 +30,20 @@
 from systemds.scuro.drsearch.unimodal_optimizer import UnimodalOptimizer
 from systemds.scuro.representations.concatenation import Concatenation
 from systemds.scuro.representations.average import Average
-from systemds.scuro.drsearch.fusion_optimizer import FusionOptimizer
 from systemds.scuro.drsearch.operator_registry import Registry
 from systemds.scuro.models.model import Model
 from systemds.scuro.drsearch.task import Task
-from systemds.scuro.drsearch.unimodal_representation_optimizer import (
-    UnimodalRepresentationOptimizer,
-)
 
 from systemds.scuro.representations.spectrogram import Spectrogram
 from systemds.scuro.representations.word2vec import W2V
 from systemds.scuro.modality.unimodal_modality import UnimodalModality
 from systemds.scuro.representations.resnet import ResNet
 from tests.scuro.data_generator import (
-    setup_data,
     TestDataLoader,
     ModalityRandomDataGenerator,
 )
 
-from systemds.scuro.dataloader.audio_loader import AudioLoader
-from systemds.scuro.dataloader.video_loader import VideoLoader
-from systemds.scuro.dataloader.text_loader import TextLoader
 from systemds.scuro.modality.type import ModalityType
-
 from unittest.mock import patch
 
 
@@ -144,7 +132,7 @@ def test_multimodal_fusion(self):
         )
 
         audio_data, audio_md = ModalityRandomDataGenerator().create_audio_data(
-            self.num_instances, 100
+            self.num_instances, 1000
         )
         text_data, text_md = ModalityRandomDataGenerator().create_text_data(
             self.num_instances
@@ -186,42 +174,22 @@ def test_multimodal_fusion(self):
             )
             unimodal_optimizer.optimize()
             unimodal_optimizer.operator_performance.get_k_best_results(audio, 2, task)
-
-            multimodal_optimizer = MultimodalOptimizer(
+            m_o = MultimodalOptimizer(
                 [audio, text, video],
                 unimodal_optimizer.operator_performance,
                 [task],
                 debug=False,
+                min_modalities=2,
+                max_modalities=3,
             )
+            fusion_results = m_o.optimize()
 
-            multimodal_optimizer.optimize()
+            best_results = sorted(
+                fusion_results, key=lambda x: x.val_score, reverse=True
+            )[:2]
 
-            assert (
-                len(multimodal_optimizer.optimization_results.results["TestSVM"].keys())
-                == 57
-            )
-            assert (
-                len(
-                    multimodal_optimizer.optimization_results.results["TestSVM"][
-                        "0_1_2_3_4_5"
-                    ]
-                )
-                == 62
-            )
-            assert (
-                len(
-                    multimodal_optimizer.optimization_results.results["TestSVM"][
-                        "3_4_5"
-                    ]
-                )
-                == 6
-            )
-            assert (
-                len(multimodal_optimizer.optimization_results.results["TestSVM"]["0_1"])
-                == 2
-            )
+            assert best_results[0].val_score >= best_results[1].val_score
 
 
 if __name__ == "__main__":
-    freeze_support()
     unittest.main()
diff --git a/src/main/python/tests/scuro/test_unimodal_optimizer.py b/src/main/python/tests/scuro/test_unimodal_optimizer.py
index d8fd98e74a2..0680b3edf54 100644
--- a/src/main/python/tests/scuro/test_unimodal_optimizer.py
+++ b/src/main/python/tests/scuro/test_unimodal_optimizer.py
@@ -163,17 +163,6 @@ def test_unimodal_optimizer_for_text_modality(self):
         )
         self.optimize_unimodal_representation_for_modality(text)
 
-    def test_unimodal_optimizer_for_video_modality(self):
-        video_data, video_md = ModalityRandomDataGenerator().create_visual_modality(
-            self.num_instances, 60
-        )
-        video = UnimodalModality(
-            TestDataLoader(
-                self.indices, None, ModalityType.VIDEO, video_data, np.float32, video_md
-            )
-        )
-        self.optimize_unimodal_representation_for_modality(video)
-
     def optimize_unimodal_representation_for_modality(self, modality):
         with patch.object(
             Registry,

From 4abdd465f083b3dabc6effb82ef88e0e114e048b Mon Sep 17 00:00:00 2001
From: Christina Dionysio <dionysio@tu-berlin.de>
Date: Mon, 22 Sep 2025 12:35:32 +0200
Subject: [PATCH 13/22] adapt init

---
 src/main/python/systemds/scuro/__init__.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/src/main/python/systemds/scuro/__init__.py b/src/main/python/systemds/scuro/__init__.py
index da9477f0739..da9e12e3b74 100644
--- a/src/main/python/systemds/scuro/__init__.py
+++ b/src/main/python/systemds/scuro/__init__.py
@@ -73,13 +73,9 @@
 from systemds.scuro.modality.unimodal_modality import UnimodalModality
 from systemds.scuro.drsearch.dr_search import DRSearch
 from systemds.scuro.drsearch.task import Task
-from systemds.scuro.drsearch.fusion_optimizer import FusionOptimizer
 from systemds.scuro.drsearch.operator_registry import Registry
 from systemds.scuro.drsearch.optimization_data import OptimizationData
 from systemds.scuro.drsearch.representation_cache import RepresentationCache
-from systemds.scuro.drsearch.unimodal_representation_optimizer import (
-    UnimodalRepresentationOptimizer,
-)
 from systemds.scuro.representations.covarep_audio_features import (
     RMSE,
     Spectral,
@@ -137,11 +133,9 @@
     "UnimodalModality",
     "DRSearch",
     "Task",
-    "FusionOptimizer",
     "Registry",
     "OptimizationData",
     "RepresentationCache",
-    "UnimodalRepresentationOptimizer",
     "UnimodalOptimizer",
     "MultimodalOptimizer",
     "ZeroCrossing",

From 4656f13dda7892ca8d3e0839fe01bce207e3d91c Mon Sep 17 00:00:00 2001
From: Christina Dionysio <dionysio@tu-berlin.de>
Date: Tue, 23 Sep 2025 12:01:44 +0200
Subject: [PATCH 14/22] add learnable reps to optimizers

---
 .../scuro/drsearch/hyperparameter_tuner.py    | 210 ++++++++++-
 .../scuro/drsearch/multimodal_optimizer.py    |  83 ++++-
 .../systemds/scuro/representations/fusion.py  |  57 ++-
 .../systemds/scuro/representations/lstm.py    | 208 +++++++++--
 .../multimodal_attention_fusion.py            | 334 +++++++++++-------
 5 files changed, 691 insertions(+), 201 deletions(-)

diff --git a/src/main/python/systemds/scuro/drsearch/hyperparameter_tuner.py b/src/main/python/systemds/scuro/drsearch/hyperparameter_tuner.py
index 17cde10be39..e2a03b82c5c 100644
--- a/src/main/python/systemds/scuro/drsearch/hyperparameter_tuner.py
+++ b/src/main/python/systemds/scuro/drsearch/hyperparameter_tuner.py
@@ -233,21 +233,27 @@ def tune_multimodal_representations(
                                 break
                             except ValueError:
                                 continue
-                result = self.tune_dag_representation(
-                    dag,
-                    dag.root_node_id,
-                    task,
-                    max_eval_per_rep,
-                )
 
+                if self._dag_has_trainable_fusion(dag):
+                    result = self.tune_trainable_fusion_dag(dag, task, max_eval_per_rep)
+                else:
+                    result = self.tune_dag_representation(
+                        dag, dag.root_node_id, task, max_eval_per_rep
+                    )
             else:
-                result = self.tune_dag_representation(
-                    representation.dag,
-                    representation.dag.root_node_id,
-                    task,
-                    max_eval_per_rep,
-                )
+                if self._dag_has_trainable_fusion(representation.dag):
+                    result = self.tune_trainable_fusion_dag(
+                        representation.dag, task, max_eval_per_rep
+                    )
+                else:
+                    result = self.tune_dag_representation(
+                        representation.dag,
+                        representation.dag.root_node_id,
+                        task,
+                        max_eval_per_rep,
+                    )
             results.append(result)
+
         self.results = results
 
         if self.save_results:
@@ -255,6 +261,186 @@ def tune_multimodal_representations(
 
         return results
 
+    def _dag_has_trainable_fusion(self, dag) -> bool:
+        for node in dag.nodes:
+            if node.operation and hasattr(node.operation(), "needs_training"):
+                if node.operation().needs_training:
+                    return True
+        return False
+
+    def tune_trainable_fusion_dag(self, dag, task, max_evals=None):
+        hyperparams = {}
+        reps = []
+        modality_ids = []
+        node_order = []
+        fusion_nodes = []
+
+        visited = set()
+
+        def visit_node(node_id):
+            if node_id in visited:
+                return
+            node = dag.get_node_by_id(node_id)
+            for input_id in node.inputs:
+                visit_node(input_id)
+            visited.add(node_id)
+            if node.operation is not None:
+                if node.parameters:
+                    hyperparams.update(node.parameters)
+                reps.append(node.operation)
+                node_order.append(node_id)
+
+                if hasattr(node.operation(), "needs_training"):
+                    if node.operation().needs_training:
+                        fusion_nodes.append(node_id)
+
+            if node.modality_id is not None:
+                modality_ids.append(node.modality_id)
+
+        visit_node(dag.root_node_id)
+
+        if not hyperparams:
+            return None
+
+        start_time = time.time()
+        rep_name = "_".join([rep.__name__ for rep in reps])
+
+        param_grid = list(ParameterGrid(hyperparams))
+        if max_evals and len(param_grid) > max_evals:
+            np.random.shuffle(param_grid)
+            param_grid = param_grid[:max_evals]
+
+        all_results = []
+        for params in param_grid:
+            result = self.evaluate_trainable_fusion_config(
+                dag, params, node_order, modality_ids, fusion_nodes, task
+            )
+            all_results.append(result)
+
+        if self.maximize_metric:
+            best_params, best_score = max(all_results, key=lambda x: x[1])
+        else:
+            best_params, best_score = min(all_results, key=lambda x: x[1])
+
+        tuning_time = time.time() - start_time
+
+        return HyperparamResult(
+            representation_name=rep_name,
+            best_params=best_params,
+            best_score=best_score,
+            all_results=all_results,
+            tuning_time=tuning_time,
+            modality_id=modality_ids[0] if modality_ids else None,
+        )
+
+    def evaluate_trainable_fusion_config(
+        self, dag, params, node_order, modality_ids, fusion_nodes, task
+    ):
+        try:
+            dag_copy = copy.deepcopy(dag)
+
+            for node_id in node_order:
+                node = dag_copy.get_node_by_id(node_id)
+                if node.operation is not None and node.parameters:
+                    node_params = {
+                        k: v for k, v in params.items() if k in node.parameters
+                    }
+                    operation_class = node.operation
+                    new_operation = operation_class(**node_params)
+                    node.operation = lambda: new_operation
+
+            required_modalities = []
+            for modality_id in set(modality_ids):
+                modality = self.get_modality_by_id(modality_id)
+                if modality:
+                    required_modalities.append(modality)
+
+            if not required_modalities:
+                raise ValueError("No valid modalities found for DAG evaluation")
+
+            if fusion_nodes:
+                final_representation = self._execute_trainable_fusion_dag(
+                    dag_copy, required_modalities, task
+                )
+            else:
+                modified_modalities = dag_copy.execute(required_modalities)
+                final_representation = modified_modalities[
+                    list(modified_modalities.keys())[-1]
+                ]
+
+            score = task.run(final_representation.data)[1]
+            return params, score
+
+        except Exception as e:
+            self.logger.error(
+                f"Error evaluating trainable fusion DAG with params {params}: {e}"
+            )
+            import traceback
+
+            traceback.print_exc()
+            return params, float("-inf") if self.maximize_metric else float("inf")
+
+    def _execute_trainable_fusion_dag(self, dag, modalities, task):
+        cache = {}
+
+        def execute_node_with_training(node_id: str):
+            if node_id in cache:
+                return cache[node_id]
+
+            node = dag.get_node_by_id(node_id)
+
+            if not node.inputs:
+                modality = None
+                for mod in modalities:
+                    if mod.modality_id == node.modality_id:
+                        modality = mod
+                        break
+                if modality is None:
+                    raise ValueError(f"Modality {node.modality_id} not found")
+                cache[node_id] = modality
+                return modality
+
+            input_mods = [
+                execute_node_with_training(input_id) for input_id in node.inputs
+            ]
+
+            if len(input_mods) > 1:
+                fusion_op = node.operation()
+
+                if hasattr(fusion_op, "needs_training") and fusion_op.needs_training:
+
+                    fusion_op.transform_with_training(
+                        input_mods, task.train_indices, task.labels
+                    )
+
+                    result_data = fusion_op.transform_data(input_mods, task.val_indices)
+
+                    from systemds.scuro.modality.transformed import TransformedModality
+
+                    result = TransformedModality(
+                        modality_type="fused",
+                        data=result_data,
+                        metadata={"shape": result_data.shape},
+                        transformation=[fusion_op],
+                    )
+                else:
+                    result = input_mods[0].combine(input_mods[1:], fusion_op)
+            else:
+                if hasattr(node.operation(), "__class__"):
+                    op_instance = node.operation()
+                    if hasattr(input_mods[0], "apply_representation"):
+                        result = input_mods[0].apply_representation(op_instance)
+                    else:
+                        result = op_instance.transform(input_mods[0])
+                else:
+                    result = input_mods[0]
+
+            cache[node_id] = result
+            return result
+
+        final_result = execute_node_with_training(dag.root_node_id)
+        return final_result
+
     def save_tuning_results(self, filepath: str = None):
         if not filepath:
             filepath = f"hyperparameter_results_{int(time.time())}.json"
diff --git a/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py b/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py
index f0de7687cbc..727ca28b3d9 100644
--- a/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py
+++ b/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py
@@ -27,7 +27,8 @@
 import copy
 import traceback
 from itertools import chain
-from systemds.scuro import Task
+from systemds.scuro.drsearch.task import Task
+from systemds.scuro.modality.type import ModalityType
 from systemds.scuro.drsearch.representation_dag import (
     RepresentationDag,
     RepresentationDAGBuilder,
@@ -35,6 +36,7 @@
 from systemds.scuro.representations.aggregated_representation import (
     AggregatedRepresentation,
 )
+from systemds.scuro.modality.transformed import TransformedModality
 from systemds.scuro.representations.aggregate import Aggregation
 from systemds.scuro.drsearch.operator_registry import Registry
 from systemds.scuro.utils.schema_helpers import get_shape
@@ -186,14 +188,18 @@ def _evaluate_dag(self, dag: RepresentationDag, task: Task) -> "OptimizationResu
         start_time = time.time()
 
         try:
-
-            fused_representation = dag.execute(
-                list(
-                    chain.from_iterable(
-                        self.k_best_representations[task.model.name].values()
+            has_trainable_fusion = self._dag_has_trainable_fusion(dag)
+
+            if has_trainable_fusion:
+                fused_representation = self._execute_dag_with_training(dag, task)
+            else:
+                fused_representation = dag.execute(
+                    list(
+                        chain.from_iterable(
+                            self.k_best_representations[task.model.name].values()
+                        )
                     )
                 )
-            )
 
             if fused_representation is None:
                 return None
@@ -225,6 +231,69 @@ def _evaluate_dag(self, dag: RepresentationDag, task: Task) -> "OptimizationResu
             traceback.print_exc()
             return None
 
+    def _dag_has_trainable_fusion(self, dag: RepresentationDag) -> bool:
+        for node in dag.nodes:
+            if node.operation and hasattr(node.operation(), "needs_training"):
+                if node.operation().needs_training:
+                    return True
+        return False
+
+    def _execute_dag_with_training(self, dag: RepresentationDag, task: Task):
+        all_modalities = list(
+            chain.from_iterable(self.k_best_representations[task.model.name].values())
+        )
+
+        cache = {}
+
+        def execute_node_with_training(node_id: str):
+            if node_id in cache:
+                return cache[node_id]
+
+            node = dag.get_node_by_id(node_id)
+
+            if not node.inputs:
+                modality = self._get_modality_by_id_and_instance_id(
+                    all_modalities, node.modality_id, node.representation_index
+                )
+                cache[node_id] = modality
+                return modality
+
+            input_mods = [
+                execute_node_with_training(input_id) for input_id in node.inputs
+            ]
+
+            fusion_op = node.operation()
+
+            if hasattr(fusion_op, "needs_training") and fusion_op.needs_training:
+                fusion_op.transform_with_training(
+                    input_mods, task.train_indices, task.labels
+                )
+
+                result_data = fusion_op.transform_data(input_mods)
+                result = TransformedModality(
+                    input_mods[0], fusion_op, ModalityType.EMBEDDING
+                )
+                result.data = result_data
+
+            else:
+                result = input_mods[0].combine(input_mods[1:], fusion_op)
+
+            cache[node_id] = result
+            return result
+
+        execute_node_with_training(dag.root_node_id)
+        return cache
+
+    def _get_modality_by_id_and_instance_id(self, modalities, modality_id, instance_id):
+        counter = 0
+        for modality in modalities:
+            if modality.modality_id == modality_id:
+                if counter == instance_id or instance_id == -1:
+                    return modality
+                else:
+                    counter += 1
+        return None
+
     def optimize(self, max_combinations: int = None) -> List["OptimizationResult"]:
         all_results = []
 
diff --git a/src/main/python/systemds/scuro/representations/fusion.py b/src/main/python/systemds/scuro/representations/fusion.py
index 7a942c28b88..e7ecfa5acc1 100644
--- a/src/main/python/systemds/scuro/representations/fusion.py
+++ b/src/main/python/systemds/scuro/representations/fusion.py
@@ -18,10 +18,14 @@
 # under the License.
 #
 # -------------------------------------------------------------
+import copy
 from typing import List
 
 import numpy as np
-from systemds.scuro import AggregatedRepresentation, Aggregation
+from systemds.scuro.representations.aggregated_representation import (
+    AggregatedRepresentation,
+)
+from systemds.scuro.modality.transformed import TransformedModality
 
 from systemds.scuro.modality.modality import Modality
 from systemds.scuro.representations.representation import Representation
@@ -66,23 +70,40 @@ def transform(self, modalities: List[Modality]):
     def transform_with_training(
         self, modalities: List[Modality], train_indices, labels
     ):
-        # if self.needs_instance_alignment:
-        #     max_len = self.get_max_embedding_size(modalities)
-        #     for modality in modalities:
-        #         modality.pad(max_len=max_len)
-
-        self.execute(
-            [np.array(modality.data)[train_indices] for modality in modalities],
-            labels[train_indices],
-        )
-
-    def transform_data(self, modalities: List[Modality], val_indices):
-        return self.apply_representation(
-            [np.array(modality.data)[val_indices] for modality in modalities]
-        )
-
-    def execute(self, modalities: List[Modality]):
-        raise f"Not implemented for Fusion: {self.name}"
+
+        train_modalities = []
+        for modality in modalities:
+            train_data = [d for i, d in enumerate(modality.data) if i in train_indices]
+            train_modality = TransformedModality(modality, self)
+            train_modality.data = copy.deepcopy(train_data)
+            train_modalities.append(train_modality)
+
+        self.execute(train_modalities, labels[train_indices])
+
+    def transform_data(self, modalities: List[Modality], indices=None):
+        val_modalities = []
+        for modality in modalities:
+            val_data = (
+                [d for i, d in enumerate(modality.data) if i in indices]
+                if indices
+                else modality.data
+            )
+            val_modality = type(modality)(modality, self)
+            val_modality.data = copy.deepcopy(val_data)
+            val_modalities.append(val_modality)
+
+        return self.apply_representation(val_modalities)
+
+    def execute(self, modalities: List[Modality], labels: np.ndarray = None):
+        raise NotImplementedError(f"Not implemented for Fusion: {self.name}")
+
+    def apply_representation(self, modalities: List[Modality]):
+        if self.needs_training:
+            raise NotImplementedError(
+                f"apply_representation not implemented for trainable fusion: {self.name}"
+            )
+        else:
+            return self.execute(modalities)
 
     def get_max_embedding_size(self, modalities: List[Modality]):
         """
diff --git a/src/main/python/systemds/scuro/representations/lstm.py b/src/main/python/systemds/scuro/representations/lstm.py
index af5fd56e9fa..c9e8bae1cdf 100644
--- a/src/main/python/systemds/scuro/representations/lstm.py
+++ b/src/main/python/systemds/scuro/representations/lstm.py
@@ -22,10 +22,10 @@
 import random
 
 import torch
-
 from torch import nn
-from typing import List
-
+from torch.utils.data import DataLoader, TensorDataset
+from typing import List, Dict, Any
+from systemds.scuro.utils.static_variables import get_device
 import numpy as np
 
 from systemds.scuro.modality.modality import Modality
@@ -34,21 +34,46 @@
 from systemds.scuro.drsearch.operator_registry import register_fusion_operator
 
 
-# TODO: concatenate before embedding
-# Make this a hyperparameter
 @register_fusion_operator()
 class LSTM(Fusion):
-    def __init__(self, width=128, depth=1, dropout_rate=0.1):
-        """
-        Combines modalities using an LSTM
-        """
-        super().__init__("LSTM")
-        self.depth = int(depth)
+    def __init__(
+        self,
+        width=128,
+        depth=1,
+        dropout_rate=0.1,
+        learning_rate=0.001,
+        epochs=50,
+        batch_size=32,
+    ):
+        parameters = {
+            "width": [128, 256, 512],
+            "depth": [1, 2, 3],
+            "dropout_rate": [0.1, 0.2, 0.3, 0.4, 0.5],
+            "learning_rate": [0.001, 0.0001, 0.01, 0.1],
+            "epochs": [50, 100, 200],
+            "batch_size": [8, 16, 32, 64, 128],
+        }
+
+        super().__init__("LSTM", parameters)
+
         self.width = int(width)
+        self.depth = int(depth)
         self.dropout_rate = float(dropout_rate)
-        self.unimodal_embeddings = {}
-        seed = 42
+        self.learning_rate = float(learning_rate)
+        self.epochs = int(epochs)
+        self.batch_size = int(batch_size)
 
+        self.needs_training = True
+        self.needs_alignment = True
+        self.model = None
+        self.input_dim = None
+        self.num_classes = None
+        self.is_trained = False
+        self.model_state = None
+
+        self._set_random_seeds()
+
+    def _set_random_seeds(self, seed=42):
         os.environ["PYTHONHASHSEED"] = str(seed)
         random.seed(seed)
         np.random.seed(seed)
@@ -57,40 +82,145 @@ def __init__(self, width=128, depth=1, dropout_rate=0.1):
         torch.backends.cudnn.deterministic = True
         torch.backends.cudnn.benchmark = False
 
-    def transform(self, modalities: List[Modality]):
-        self.unimodal_embeddings = {}
-        size = len(modalities[0].data)
-
-        result = np.zeros((size, 0))
+    def _prepare_data(self, modalities: List[Modality]) -> np.ndarray:
+        processed_modalities = []
 
         for modality in modalities:
-            if modality.modality_type in list(self.unimodal_embeddings.keys()):
-                out = self.unimodal_embeddings.get(modality.modality_type)
+            data = np.array(modality.data)
+
+            if data.ndim == 1:
+                data = data.reshape(-1, 1, 1)
+            elif data.ndim == 2:
+                data = data.reshape(data.shape[0], 1, data.shape[1])
+            elif data.ndim == 3:
+                pass
             else:
-                out = self.run_lstm(modality.data)
-                self.unimodal_embeddings[modality.modality_type] = out
+                raise ValueError(
+                    f"Unsupported data shape: {data.shape}. Expected 1D, 2D, or 3D arrays."
+                )
+
+            processed_modalities.append(data)
+
+        max_seq_len = max(mod.shape[1] for mod in processed_modalities)
+
+        aligned_modalities = []
+        for data in processed_modalities:
+            if data.shape[1] < max_seq_len:
+                pad_width = ((0, 0), (0, max_seq_len - data.shape[1]), (0, 0))
+                data = np.pad(data, pad_width, mode="constant", constant_values=0)
+            aligned_modalities.append(data)
+
+        concatenated_data = np.concatenate(aligned_modalities, axis=2)
+
+        return concatenated_data.astype(np.float32)
+
+    def _build_model(self, input_dim: int, num_classes: int) -> nn.Module:
+
+        class LSTMClassifier(nn.Module):
+            def __init__(
+                self, input_dim, hidden_dim, num_layers, num_classes, dropout_rate
+            ):
+                super(LSTMClassifier, self).__init__()
+                self.hidden_dim = hidden_dim
+                self.num_layers = num_layers
+
+                self.lstm = nn.LSTM(
+                    input_dim,
+                    hidden_dim,
+                    num_layers,
+                    batch_first=True,
+                    bidirectional=True,
+                    dropout=dropout_rate if num_layers > 1 else 0,
+                )
+
+                self.dropout = nn.Dropout(dropout_rate)
+                self.classifier = nn.Linear(hidden_dim * 2, num_classes)
+
+            def forward(self, x):
+                lstm_out, _ = self.lstm(x)
+                last_output = lstm_out[:, -1, :]
+                dropped = self.dropout(last_output)
+                output = self.classifier(dropped)
+
+                return last_output, output
+
+        return LSTMClassifier(
+            input_dim, self.width, self.depth, num_classes, self.dropout_rate
+        )
+
+    def execute(self, modalities: List[Modality], labels: np.ndarray = None):
+        if labels is None:
+            raise ValueError("LSTM fusion requires labels for training")
+
+        X = self._prepare_data(modalities)
+        y = np.array(labels)
+
+        self.input_dim = X.shape[2]
+        self.num_classes = len(np.unique(y))
+
+        self.model = self._build_model(self.input_dim, self.num_classes)
+        device = get_device()
+        self.model.to(device)
+
+        criterion = nn.CrossEntropyLoss()
+        optimizer = torch.optim.Adam(self.model.parameters(), lr=self.learning_rate)
+
+        X_tensor = torch.FloatTensor(X).to(device)
+        y_tensor = torch.LongTensor(y).to(device)
+
+        dataset = TensorDataset(X_tensor, y_tensor)
+        dataloader = DataLoader(dataset, batch_size=self.batch_size, shuffle=True)
+
+        self.model.train()
+        for epoch in range(self.epochs):
+            total_loss = 0
+            for batch_X, batch_y in dataloader:
+                optimizer.zero_grad()
+
+                features, predictions = self.model(batch_X)
+                loss = criterion(predictions, batch_y)
+
+                loss.backward()
+                optimizer.step()
+
+                total_loss += loss.item()
+
+        self.is_trained = True
+
+        self.model_state = {
+            "state_dict": self.model.state_dict(),
+            "input_dim": self.input_dim,
+            "num_classes": self.num_classes,
+            "width": self.width,
+            "depth": self.depth,
+            "dropout_rate": self.dropout_rate,
+        }
+
+    def apply_representation(self, modalities: List[Modality]) -> np.ndarray:
+        if not self.is_trained or self.model is None:
+            raise ValueError("Model must be trained before applying representation")
 
-            result = np.concatenate([result, out], axis=-1)
+        X = self._prepare_data(modalities)
 
-        return result
+        device = get_device()
+        self.model.to(device)
 
-    def run_lstm(self, data):
-        if isinstance(data, list):
-            data = np.array(data)
+        X_tensor = torch.FloatTensor(X).to(device)
 
-        d = data.astype(np.float32)
-        dim = d.shape[-1]
-        d = torch.from_numpy(d)
-        dropout_layer = torch.nn.Dropout(self.dropout_rate)
+        self.model.eval()
+        with torch.no_grad():
+            features, _ = self.model(X_tensor)
 
-        for x in range(0, self.depth):
-            lstm_x = nn.LSTM(dim, self.width, batch_first=True, bidirectional=True)
-            dim = 2 * self.width
-            d = lstm_x(d)[0]
+        return features.cpu().numpy()
 
-        out = dropout_layer(d)
+    def get_model_state(self) -> Dict[str, Any]:
+        return self.model_state
 
-        if d.ndim > 2:
-            out = torch.flatten(out, 1)
+    def set_model_state(self, state: Dict[str, Any]):
+        self.model_state = state
+        self.input_dim = state["input_dim"]
+        self.num_classes = state["num_classes"]
 
-        return out.detach().numpy()
+        self.model = self._build_model(self.input_dim, self.num_classes)
+        self.model.load_state_dict(state["state_dict"])
+        self.is_trained = True
diff --git a/src/main/python/systemds/scuro/representations/multimodal_attention_fusion.py b/src/main/python/systemds/scuro/representations/multimodal_attention_fusion.py
index 7928b9988bd..092ebebb441 100644
--- a/src/main/python/systemds/scuro/representations/multimodal_attention_fusion.py
+++ b/src/main/python/systemds/scuro/representations/multimodal_attention_fusion.py
@@ -19,10 +19,12 @@
 #
 # -------------------------------------------------------------
 
+import os
+import random
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-from typing import List, Dict, Optional
+from typing import List, Dict, Optional, Any
 import numpy as np
 from systemds.scuro.drsearch.operator_registry import register_fusion_operator
 from systemds.scuro.modality.modality import Modality
@@ -37,140 +39,237 @@ def __init__(
         hidden_dim=256,
         num_heads=8,
         dropout=0.1,
-        fusion_strategy="attention",
         batch_size=32,
         num_epochs=50,
+        learning_rate=0.001,
     ):
-        self.encoder = None
         params = {
-            "hidden_dim": [128, 256, 512],
-            "num_heads": [1, 4, 8],
-            "dropout": [0.1, 0.2, 0.3],
-            "fusion_strategy": ["mean", "max", "attention", "cls"],
-            "batch_size": [32, 64, 128],
-            "num_epochs": [50, 70, 100, 150],
+            "hidden_dim": [32, 128, 256, 384, 512, 768],
+            "num_heads": [2, 4, 8, 12],
+            "dropout": [0.0, 0.1, 0.2, 0.3, 0.4],
+            "batch_size": [8, 16, 32, 64, 128],
+            "num_epochs": [50, 100, 150, 200],
+            "learning_rate": [1e-5, 1e-4, 1e-3, 1e-2],
         }
         super().__init__("AttentionFusion", params)
-        self.hidden_dim = hidden_dim
-        self.num_heads = num_heads
-        self.dropout = dropout
-        self.fusion_strategy = fusion_strategy
-        self.batch_size = batch_size
-        self.needs_training = True
-        self.needs_instance_alignment = True
-        self.num_epochs = num_epochs
 
-    def execute(
-        self,
-        data: List[np.ndarray],
-        labels: np.ndarray,
-    ):
-        input_dimension = {}
+        self.hidden_dim = int(hidden_dim)
+        self.num_heads = int(num_heads)
+        self.dropout = float(dropout)
+        self.batch_size = int(batch_size)
+        self.num_epochs = int(num_epochs)
+        self.learning_rate = float(learning_rate)
+
+        self.needs_training = True
+        self.needs_alignment = True
+        self.encoder = None
+        self.classification_head = None
+        self.input_dimensions = None
+        self.max_sequence_length = None
+        self.num_classes = None
+        self.is_trained = False
+        self.model_state = None
+
+        self._set_random_seeds()
+
+    def _set_random_seeds(self, seed=42):
+        os.environ["PYTHONHASHSEED"] = str(seed)
+        random.seed(seed)
+        np.random.seed(seed)
+        torch.manual_seed(seed)
+        torch.cuda.manual_seed(seed)
+        torch.backends.cudnn.deterministic = True
+        torch.backends.cudnn.benchmark = False
+
+    def _prepare_data(self, modalities: List[Modality]) -> Dict[str, torch.Tensor]:
         inputs = {}
+        input_dimensions = {}
         max_sequence_length = 0
-        masks = {}
-        for i, modality in enumerate(data):
-            modality_name = "modality_" + str(i)
-            shape = modality.shape
-            max_sequence_length = max(max_sequence_length, shape[1])
-            input_dimension[modality_name] = shape[2] if len(shape) > 2 else shape[1]
-            inputs[modality_name] = torch.from_numpy(np.stack(modality)).to(
-                get_device()
-            )
 
-            # attention_masks_list = [
-            #     entry["attention_masks"]
-            #     for entry in modality.metadata.values()
-            #     if "attention_masks" in entry
-            # ]
-            attention_masks_list = None
-            if attention_masks_list:
-                masks[modality_name] = (
-                    torch.tensor(np.array(attention_masks_list)).bool().to(get_device())
-                )
+        for i, modality in enumerate(modalities):
+            modality_name = f"modality_{i}"
+            data = np.array(modality.data)
+
+            if data.ndim == 1:
+                data = data.reshape(-1, 1, 1)
+            elif data.ndim == 2:
+                data = data.reshape(data.shape[0], 1, data.shape[1])
+            elif data.ndim == 3:
+                pass
             else:
-                masks[modality_name] = None
+                raise ValueError(
+                    f"Unsupported data shape: {data.shape}. Expected 1D, 2D, or 3D arrays."
+                )
+
+            input_dimensions[modality_name] = data.shape[2]  # Feature dimension
+            max_sequence_length = max(max_sequence_length, data.shape[1])
+
+            inputs[modality_name] = torch.from_numpy(data.astype(np.float32))
+
+        for modality_name, tensor in inputs.items():
+            if tensor.shape[1] < max_sequence_length:
+                pad_width = (0, 0, 0, max_sequence_length - tensor.shape[1], 0, 0)
+                inputs[modality_name] = F.pad(
+                    tensor, pad_width, mode="constant", value=0
+                )
+
+        return inputs, input_dimensions, max_sequence_length
+
+    def execute(self, modalities: List[Modality], labels: np.ndarray = None):
+        if labels is None:
+            raise ValueError("Attention fusion requires labels for training")
+
+        inputs, input_dimensions, max_sequence_length = self._prepare_data(modalities)
+        y = np.array(labels)
+
+        self.input_dimensions = input_dimensions
+        self.max_sequence_length = max_sequence_length
+        self.num_classes = len(np.unique(y))
 
         self.encoder = MultiModalAttentionFusion(
-            input_dimension,
+            self.input_dimensions,
             self.hidden_dim,
             self.num_heads,
             self.dropout,
-            max_sequence_length,
-            self.fusion_strategy,
+            self.max_sequence_length,
         )
 
-        head = FusedClassificationHead(
-            fused_dim=self.hidden_dim, num_classes=len(np.unique(labels))
+        self.classification_head = FusedClassificationHead(
+            fused_dim=self.hidden_dim, num_classes=self.num_classes
         )
+
+        device = get_device()
+        self.encoder.to(device)
+        self.classification_head.to(device)
+
         criterion = nn.CrossEntropyLoss()
         optimizer = torch.optim.Adam(
-            list(self.encoder.parameters()) + list(head.parameters()), lr=0.001
+            list(self.encoder.parameters())
+            + list(self.classification_head.parameters()),
+            lr=self.learning_rate,
         )
-        labels = torch.from_numpy(labels).to(get_device())
+
+        for modality_name in inputs:
+            inputs[modality_name] = inputs[modality_name].to(device)
+        labels_tensor = torch.from_numpy(y).long().to(device)
+
+        dataset_inputs = []
+        for i in range(len(y)):
+            sample_inputs = {name: tensor[i] for name, tensor in inputs.items()}
+            dataset_inputs.append((sample_inputs, labels_tensor[i]))
+
+        self.encoder.train()
+        self.classification_head.train()
 
         for epoch in range(self.num_epochs):
             total_loss = 0
-            total_accuracy = 0
-            for batch_idx in range(0, len(data), self.batch_size):
-                batched_input = {}
-                for modality, modality_data in inputs.items():
-                    batched_input[modality] = modality_data[
-                        batch_idx : batch_idx + self.batch_size
-                    ]
-                loss, predictions = self.train_encoder_step(
-                    head,
-                    inputs,
-                    labels[batch_idx : batch_idx + self.batch_size],
-                    criterion,
-                    optimizer,
-                )
-                total_loss += loss
-                total_accuracy += predictions
+            total_correct = 0
+            total_samples = 0
+
+            for batch_start in range(0, len(dataset_inputs), self.batch_size):
+                batch_end = min(batch_start + self.batch_size, len(dataset_inputs))
+                batch_data = dataset_inputs[batch_start:batch_end]
+
+                batch_inputs = {}
+                batch_labels = []
+
+                for sample_inputs, label in batch_data:
+                    batch_labels.append(label)
+                    for modality_name, tensor in sample_inputs.items():
+                        if modality_name not in batch_inputs:
+                            batch_inputs[modality_name] = []
+                        batch_inputs[modality_name].append(tensor)
+
+                for modality_name in batch_inputs:
+                    batch_inputs[modality_name] = torch.stack(
+                        batch_inputs[modality_name]
+                    )
+
+                batch_labels = torch.stack(batch_labels)
+
+                optimizer.zero_grad()
+
+                encoder_output = self.encoder(batch_inputs)
+                logits = self.classification_head(encoder_output["fused"])
+                loss = criterion(logits, batch_labels)
+
+                loss.backward()
+                optimizer.step()
+
+                total_loss += loss.item()
+                _, predicted = torch.max(logits.data, 1)
+                total_correct += (predicted == batch_labels).sum().item()
+                total_samples += batch_labels.size(0)
+
+        self.is_trained = True
+
+        self.model_state = {
+            "encoder_state_dict": self.encoder.state_dict(),
+            "classification_head_state_dict": self.classification_head.state_dict(),
+            "input_dimensions": self.input_dimensions,
+            "max_sequence_length": self.max_sequence_length,
+            "num_classes": self.num_classes,
+            "hidden_dim": self.hidden_dim,
+            "num_heads": self.num_heads,
+            "dropout": self.dropout,
+        }
 
-            if epoch % 50 == 0 or epoch == self.num_epochs - 1:
-                print(
-                    f"Epoch {epoch}, Loss: {total_loss:.4f}, accuracy: {total_accuracy/len(data):.4f}"
-                )
+    def apply_representation(self, modalities: List[Modality]) -> np.ndarray:
+        if not self.is_trained or self.encoder is None:
+            raise ValueError("Model must be trained before applying representation")
+
+        inputs, _, _ = self._prepare_data(modalities)
+
+        device = get_device()
+        self.encoder.to(device)
+
+        for modality_name in inputs:
+            inputs[modality_name] = inputs[modality_name].to(device)
 
-    # Training step (encoder + classification head)
-    def train_encoder_step(self, head, inputs, labels, criterion, optimizer):
-        self.encoder.train()
-        head.train()
-        optimizer.zero_grad()
-        output = self.encoder(inputs)
-        logits = head(output["fused"])
-        loss = criterion(logits, labels)
-        loss.backward()
-        optimizer.step()
-        _, predicted = torch.max(logits.data, 1)
-        return loss.item(), (predicted == labels).sum().item()
-
-    def apply_representation(self, modalities):
-        inputs = {}
-        for i, modality in enumerate(modalities):
-            modality_name = "modality_" + str(i)
-            inputs[modality_name] = torch.from_numpy(np.stack(modality)).to(
-                get_device()
-            )
         self.encoder.eval()
         with torch.no_grad():
-            output = self.encoder(inputs)
-        return output["fused"].cpu().numpy()
+            encoder_output = self.encoder(inputs)
+
+        return encoder_output["fused"].cpu().numpy()
+
+    def get_model_state(self) -> Dict[str, Any]:
+        return self.model_state
+
+    def set_model_state(self, state: Dict[str, Any]):
+        self.model_state = state
+        self.input_dimensions = state["input_dimensions"]
+        self.max_sequence_length = state["max_sequence_length"]
+        self.num_classes = state["num_classes"]
+
+        self.encoder = MultiModalAttentionFusion(
+            self.input_dimensions,
+            state["hidden_dim"],
+            state["num_heads"],
+            state["dropout"],
+            self.max_sequence_length,
+        )
+        self.encoder.load_state_dict(state["encoder_state_dict"])
+
+        self.classification_head = FusedClassificationHead(
+            fused_dim=state["hidden_dim"], num_classes=self.num_classes
+        )
+        self.classification_head.load_state_dict(
+            state["classification_head_state_dict"]
+        )
+
+        self.is_trained = True
 
 
 class FusedClassificationHead(nn.Module):
-    """
-    Simple classification head for supervision during training.
-    """
 
     def __init__(self, fused_dim, num_classes=2):
         super(FusedClassificationHead, self).__init__()
         self.head = nn.Sequential(
             nn.Linear(fused_dim, fused_dim // 2),
             nn.ReLU(),
+            nn.Dropout(0.1),
             nn.Linear(fused_dim // 2, num_classes),
-        ).to(get_device())
+        )
 
     def forward(self, fused):
         return self.head(fused)
@@ -194,50 +293,42 @@ def __init__(
         self.pooling_strategy = pooling_strategy
         self.max_seq_len = max_seq_len
 
-        # Project each modality to the same hidden dimension
         self.modality_projections = nn.ModuleDict(
             {
-                modality: nn.Linear(dim, hidden_dim).to(get_device())
+                modality: nn.Linear(dim, hidden_dim)
                 for modality, dim in modality_dims.items()
             }
         )
 
-        # Positional encoding for sequence modalities
         self.positional_encoding = nn.Parameter(
             torch.randn(max_seq_len, hidden_dim) * 0.1
-        ).to(get_device())
+        )
 
-        # Cross-modal attention
         self.cross_attention = nn.MultiheadAttention(
             embed_dim=hidden_dim, num_heads=num_heads, dropout=dropout, batch_first=True
-        ).to(get_device())
+        )
 
-        # Self-attention within each modality
         self.self_attention = nn.MultiheadAttention(
             embed_dim=hidden_dim, num_heads=num_heads, dropout=dropout, batch_first=True
-        ).to(get_device())
+        )
 
-        # Attention-based pooling for sequences
         if pooling_strategy == "attention":
             self.pooling_attention = nn.Sequential(
                 nn.Linear(hidden_dim, hidden_dim // 2),
                 nn.Tanh(),
                 nn.Linear(hidden_dim // 2, 1),
-            ).to(get_device())
+            )
 
-        # Modality-level attention for final fusion
         self.modality_attention = nn.Sequential(
             nn.Linear(hidden_dim, hidden_dim // 2),
             nn.ReLU(),
             nn.Linear(hidden_dim // 2, 1),
-        ).to(get_device())
+        )
 
-        # Layer normalization
-        self.layer_norm = nn.LayerNorm(hidden_dim).to(get_device())
-        self.dropout = nn.Dropout(dropout).to(get_device())
+        self.layer_norm = nn.LayerNorm(hidden_dim)
+        self.dropout_layer = nn.Dropout(dropout)
 
-        # Final projection
-        self.final_projection = nn.Linear(hidden_dim, hidden_dim).to(get_device())
+        self.final_projection = nn.Linear(hidden_dim, hidden_dim)
 
     def _handle_input_format(self, modality_tensor: torch.Tensor) -> torch.Tensor:
         if len(modality_tensor.shape) == 2:
@@ -269,29 +360,22 @@ def _pool_sequence(
 
         elif self.pooling_strategy == "max":
             if mask is not None:
-                # Set masked positions to large negative value before max pooling
                 masked_seq = sequence.masked_fill(~mask.unsqueeze(-1), float("-inf"))
                 return masked_seq.max(dim=1)[0]
             else:
                 return sequence.max(dim=1)[0]
 
         elif self.pooling_strategy == "cls":
-            # Use the first token (assuming it's a CLS token)
             return sequence[:, 0, :]
 
         elif self.pooling_strategy == "attention":
-            # Attention-based pooling
-            attention_scores = self.pooling_attention(sequence).squeeze(
-                -1
-            )  # (batch, seq)
+            attention_scores = self.pooling_attention(sequence).squeeze(-1)
 
             if mask is not None:
                 attention_scores = attention_scores.masked_fill(~mask, float("-inf"))
 
-            attention_weights = F.softmax(attention_scores, dim=1)  # (batch, seq)
-            return (sequence * attention_weights.unsqueeze(-1)).sum(
-                dim=1
-            )  # (batch, hidden)
+            attention_weights = F.softmax(attention_scores, dim=1)
+            return (sequence * attention_weights.unsqueeze(-1)).sum(dim=1)
 
         else:
             raise ValueError(f"Unknown pooling strategy: {self.pooling_strategy}")
@@ -323,7 +407,7 @@ def forward(
                     key_padding_mask=~mask if mask is not None else None,
                 )
 
-                projected = self.layer_norm(projected + self.dropout(attended))
+                projected = self.layer_norm(projected + self.dropout_layer(attended))
 
                 pooled = self._pool_sequence(projected, mask)
             else:
@@ -339,7 +423,7 @@ def forward(
             )
 
             cross_attended = self.layer_norm(
-                modality_stack + self.dropout(cross_attended)
+                modality_stack + self.dropout_layer(cross_attended)
             )
 
             updated_embeddings = {

From 672c517c98eee840b3716c45ea735ceadee202c9 Mon Sep 17 00:00:00 2001
From: Christina Dionysio <dionysio@tu-berlin.de>
Date: Wed, 24 Sep 2025 10:23:43 +0200
Subject: [PATCH 15/22] cleanup

---
 src/main/python/systemds/scuro/__init__.py    |   4 -
 .../systemds/scuro/drsearch/dr_search.py      | 167 -----------------
 .../scuro/drsearch/hyperparameter_tuner.py    | 103 ++++++-----
 .../scuro/drsearch/multimodal_optimizer.py    |  30 +--
 .../scuro/drsearch/representation_cache.py    | 128 -------------
 .../scuro/drsearch/unimodal_optimizer.py      |   9 +
 .../systemds/scuro/modality/modality.py       |  12 +-
 .../scuro/modality/modality_identifier.py     |  34 ----
 .../systemds/scuro/modality/transformed.py    |   1 +
 .../multimodal_attention_fusion.py            |   2 +-
 src/main/python/tests/scuro/test_dr_search.py | 174 ------------------
 src/main/python/tests/scuro/test_hp_tuner.py  |  51 +++--
 .../tests/scuro/test_multimodal_fusion.py     |  20 +-
 13 files changed, 145 insertions(+), 590 deletions(-)
 delete mode 100644 src/main/python/systemds/scuro/drsearch/dr_search.py
 delete mode 100644 src/main/python/systemds/scuro/drsearch/representation_cache.py
 delete mode 100644 src/main/python/systemds/scuro/modality/modality_identifier.py
 delete mode 100644 src/main/python/tests/scuro/test_dr_search.py

diff --git a/src/main/python/systemds/scuro/__init__.py b/src/main/python/systemds/scuro/__init__.py
index da9e12e3b74..b567b300247 100644
--- a/src/main/python/systemds/scuro/__init__.py
+++ b/src/main/python/systemds/scuro/__init__.py
@@ -71,11 +71,9 @@
 from systemds.scuro.modality.transformed import TransformedModality
 from systemds.scuro.modality.type import ModalityType
 from systemds.scuro.modality.unimodal_modality import UnimodalModality
-from systemds.scuro.drsearch.dr_search import DRSearch
 from systemds.scuro.drsearch.task import Task
 from systemds.scuro.drsearch.operator_registry import Registry
 from systemds.scuro.drsearch.optimization_data import OptimizationData
-from systemds.scuro.drsearch.representation_cache import RepresentationCache
 from systemds.scuro.representations.covarep_audio_features import (
     RMSE,
     Spectral,
@@ -131,11 +129,9 @@
     "TransformedModality",
     "ModalityType",
     "UnimodalModality",
-    "DRSearch",
     "Task",
     "Registry",
     "OptimizationData",
-    "RepresentationCache",
     "UnimodalOptimizer",
     "MultimodalOptimizer",
     "ZeroCrossing",
diff --git a/src/main/python/systemds/scuro/drsearch/dr_search.py b/src/main/python/systemds/scuro/drsearch/dr_search.py
deleted file mode 100644
index 601001c7428..00000000000
--- a/src/main/python/systemds/scuro/drsearch/dr_search.py
+++ /dev/null
@@ -1,167 +0,0 @@
-# -------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-# -------------------------------------------------------------
-import itertools
-import random
-from typing import List
-
-from systemds.scuro.drsearch.task import Task
-from systemds.scuro.modality.modality import Modality
-from systemds.scuro.representations.representation import Representation
-
-import warnings
-
-warnings.filterwarnings("ignore")
-
-
-def get_modalities_by_name(modalities, name):
-    for modality in modalities:
-        if modality.name == name:
-            return modality
-
-    raise "Modality " + name + "not in modalities"
-
-
-class DRSearch:
-    def __init__(
-        self,
-        modalities: List[Modality],
-        task: Task,
-        representations: List[Representation],
-    ):
-        """
-        The DRSearch primitive finds the best uni- or multimodal data representation for the given modalities for
-        a specific task
-        :param modalities: List of uni-modal modalities
-        :param task: custom task
-        :param representations: List of representations to be evaluated
-        """
-        self.modalities = modalities
-        self.task = task
-        self.representations = representations
-        self.scores = {}
-        self.best_modalities = None
-        self.best_representation = None
-        self.best_score = -1
-
-    def set_best_params(
-        self,
-        representation: Representation,
-        scores: List[float],
-        modality_names: List[str],
-    ):
-        """
-        Updates the best parameters for given modalities, representation, and score
-        :param representation: The representation used to retrieve the current score
-        :param scores: achieved train/test scores for the set of modalities and representation
-        :param modality_names: List of modality names used in this setting
-        :return:
-        """
-
-        # check if modality name is already in dictionary
-        if "_".join(modality_names) not in list(self.scores.keys()):
-            # if not add it to dictionary
-            self.scores["_".join(modality_names)] = {}
-
-        # set score for representation
-        self.scores["_".join(modality_names)][representation] = scores
-
-        # compare current score with best score
-        if scores[1] > self.best_score:
-            self.best_score = scores[1]
-            self.best_representation = representation
-            self.best_modalities = modality_names
-
-    def reset_best_params(self):
-        self.best_score = -1
-        self.best_modalities = None
-        self.best_representation = None
-        self.scores = {}
-
-    def fit_random(self, seed=-1):
-        """
-        This method randomly selects a modality or combination of modalities and representation
-        """
-        if seed != -1:
-            random.seed(seed)
-
-        modalities = []
-        for M in range(1, len(self.modalities) + 1):
-            for combination in itertools.combinations(self.modalities, M):
-                modalities.append(combination)
-
-        modality_combination = random.choice(modalities)
-        representation = random.choice(self.representations)
-
-        modality = modality_combination[0].combine(
-            list(modality_combination[1:]), representation
-        )
-
-        scores = self.task.run(modality.data)
-        self.set_best_params(representation, scores, modality.get_modality_names())
-
-        return self.best_representation, self.best_score, self.best_modalities
-
-    def fit_enumerate_all(self):
-        """
-        This method finds the best representation out of a given List of uni-modal modalities and
-        representations
-        :return: The best parameters found in the search procedure
-        """
-
-        for M in range(1, len(self.modalities) + 1):
-            for combination in itertools.combinations(self.modalities, M):
-                for representation in self.representations:
-                    modality = combination[0]
-                    if len(combination) > 1:
-                        modality = combination[0].combine(
-                            list(combination[1:]), representation
-                        )
-
-                    scores = self.task.run(modality.data)
-                    self.set_best_params(
-                        representation,
-                        scores,
-                        modality.get_modality_names(),
-                    )
-
-        return self.best_representation, self.best_score, self.best_modalities
-
-    def transform(self, modalities: List[Modality]):
-        """
-        The transform method takes a list of uni-modal modalities and creates an aligned representation
-        by using the best parameters found during the fitting step
-        :param modalities: List of uni-modal modalities
-        :return: aligned data
-        """
-
-        if self.best_score == -1:
-            raise "Please fit representations first!"
-
-        used_modalities = []
-
-        for modality_name in self.best_modalities:
-            used_modalities.append(get_modalities_by_name(modalities, modality_name))
-
-        modality = used_modalities[0].combine(
-            used_modalities[1:], self.best_representation
-        )
-
-        return modality.data
diff --git a/src/main/python/systemds/scuro/drsearch/hyperparameter_tuner.py b/src/main/python/systemds/scuro/drsearch/hyperparameter_tuner.py
index e2a03b82c5c..e50e7d1ee22 100644
--- a/src/main/python/systemds/scuro/drsearch/hyperparameter_tuner.py
+++ b/src/main/python/systemds/scuro/drsearch/hyperparameter_tuner.py
@@ -203,56 +203,69 @@ def evaluate_dag_config(self, dag, params, node_order, modality_ids, task):
     def tune_multimodal_representations(
         self,
         optimization_results,
-        task: Task,
         k: int = 1,
         optimize_unimodal: bool = True,
         max_eval_per_rep: Optional[int] = None,
     ):
-        best_optimization_results = optimization_results[:k]
-        results = []
-        for representation in best_optimization_results:
-            if optimize_unimodal:
-                dag = copy.deepcopy(representation.dag)
-                index = 0
-                for i, node in enumerate(representation.dag.nodes):
-                    if not node.inputs:
-                        leaf_node_id = node.node_id
-                        leaf_nodes = self.representations[task.model.name][
-                            node.modality_id
-                        ][node.representation_index].dag.nodes
-                        for leaf_idx, node in enumerate(dag.nodes):
-                            if node.node_id == leaf_node_id:
-                                dag.nodes[leaf_idx : leaf_idx + 1] = leaf_nodes
-                                index = leaf_idx + len(leaf_nodes) - 1
-                                break
-
-                        for node in dag.nodes:
-                            try:
-                                idx = node.inputs.index(leaf_node_id)
-                                node.inputs[idx] = dag.nodes[index].node_id
-                                break
-                            except ValueError:
-                                continue
-
-                if self._dag_has_trainable_fusion(dag):
-                    result = self.tune_trainable_fusion_dag(dag, task, max_eval_per_rep)
-                else:
-                    result = self.tune_dag_representation(
-                        dag, dag.root_node_id, task, max_eval_per_rep
-                    )
-            else:
-                if self._dag_has_trainable_fusion(representation.dag):
-                    result = self.tune_trainable_fusion_dag(
-                        representation.dag, task, max_eval_per_rep
-                    )
+
+        best_results = {}
+        for task in self.tasks:
+            best_results[task.model.name] = sorted(
+                optimization_results[task.model.name],
+                key=lambda x: x.val_score,
+                reverse=True,
+            )[:k]
+
+        results = {}
+        for task in self.tasks:
+            results[task.model.name] = []
+            best_optimization_results = best_results[task.model.name]
+
+            for representation in best_optimization_results:
+                if optimize_unimodal:
+                    dag = copy.deepcopy(representation.dag)
+                    index = 0
+                    for i, node in enumerate(representation.dag.nodes):
+                        if not node.inputs:
+                            leaf_node_id = node.node_id
+                            leaf_nodes = self.representations[task.model.name][
+                                node.modality_id
+                            ][node.representation_index].dag.nodes
+                            for leaf_idx, node in enumerate(dag.nodes):
+                                if node.node_id == leaf_node_id:
+                                    dag.nodes[leaf_idx : leaf_idx + 1] = leaf_nodes
+                                    index = leaf_idx + len(leaf_nodes) - 1
+                                    break
+
+                            for node in dag.nodes:
+                                try:
+                                    idx = node.inputs.index(leaf_node_id)
+                                    node.inputs[idx] = dag.nodes[index].node_id
+                                    break
+                                except ValueError:
+                                    continue
+
+                    if self._dag_has_trainable_fusion(dag):
+                        result = self.tune_trainable_fusion_dag(
+                            dag, task, max_eval_per_rep
+                        )
+                    else:
+                        result = self.tune_dag_representation(
+                            dag, dag.root_node_id, task, max_eval_per_rep
+                        )
                 else:
-                    result = self.tune_dag_representation(
-                        representation.dag,
-                        representation.dag.root_node_id,
-                        task,
-                        max_eval_per_rep,
-                    )
-            results.append(result)
+                    if self._dag_has_trainable_fusion(representation.dag):
+                        result = self.tune_trainable_fusion_dag(
+                            representation.dag, task, max_eval_per_rep
+                        )
+                    else:
+                        result = self.tune_dag_representation(
+                            representation.dag,
+                            representation.dag.root_node_id,
+                            task,
+                            max_eval_per_rep,
+                        )
+                results[task.model.name].append(result)
 
         self.results = results
 
diff --git a/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py b/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py
index 727ca28b3d9..0d3a896e8a4 100644
--- a/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py
+++ b/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py
@@ -21,6 +21,7 @@
 
 
 import itertools
+import pickle
 import time
 from dataclasses import dataclass
 from typing import List, Dict, Any, Generator
@@ -294,14 +295,15 @@ def _get_modality_by_id_and_instance_id(self, modalities, modality_id, instance_
                     counter += 1
         return None
 
-    def optimize(self, max_combinations: int = None) -> List["OptimizationResult"]:
-        all_results = []
+    def optimize(
+        self, max_combinations: int = None
+    ) -> Dict[str, List["OptimizationResult"]]:
+        all_results = {}
 
         for task in self.tasks:
             if self.debug:
                 print(f"Optimizing multimodal fusion for task: {task.model.name}")
-
-            task_results = []
+            all_results[task.model.name] = []
             evaluated_count = 0
 
             for modality_subset in self._generate_modality_combinations():
@@ -318,7 +320,7 @@ def optimize(self, max_combinations: int = None) -> List["OptimizationResult"]:
 
                         result = self._evaluate_dag(dag, task)
                         if result is not None:
-                            task_results.append(result)
+                            all_results[task.model.name].append(result)
 
                         evaluated_count += 1
 
@@ -331,22 +333,28 @@ def optimize(self, max_combinations: int = None) -> List["OptimizationResult"]:
                 if max_combinations and evaluated_count >= max_combinations:
                     break
 
-            all_results.extend(task_results)
-
             if self.debug:
                 print(
-                    f"  Task completed: {len(task_results)} valid combinations evaluated"
+                    f"  Task completed: {len(all_results[task.model.name])} valid combinations evaluated"
                 )
 
         self.optimization_results = all_results
 
         if self.debug:
-            print(
-                f"\nOptimization completed: {len(all_results)} total combinations evaluated"
-            )
+            print(f"\nOptimization completed")
 
         return all_results
 
+    def store_results(self, file_name=None):
+        if file_name is None:
+            import time
+
+            timestr = time.strftime("%Y%m%d-%H%M%S")
+            file_name = "multimodal_optimizer" + timestr + ".pkl"
+
+        with open(file_name, "wb") as f:
+            pickle.dump(self.optimization_results, f)
+
 
 @dataclass
 class OptimizationResult:
diff --git a/src/main/python/systemds/scuro/drsearch/representation_cache.py b/src/main/python/systemds/scuro/drsearch/representation_cache.py
deleted file mode 100644
index 4df478272df..00000000000
--- a/src/main/python/systemds/scuro/drsearch/representation_cache.py
+++ /dev/null
@@ -1,128 +0,0 @@
-# -------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-# -------------------------------------------------------------
-import copy
-import os
-import pickle
-import tempfile
-
-from systemds.scuro.modality.transformed import TransformedModality
-
-
-class RepresentationCache:
-    """ """
-
-    _instance = None
-    _cache_dir = None
-    debug = False
-
-    def __new__(cls, debug=False):
-        if not cls._instance:
-            cls.debug = debug
-            cls._instance = super().__new__(cls)
-            cls._cache_dir = tempfile.TemporaryDirectory()
-            # cls._cache_dir = "representation_cache"
-        return cls._instance
-
-    def _generate_cache_filename(self, modality_id, operators):
-        """
-        Generate a unique filename for an operator based on its name.
-
-        :param operator_name: The name of the operator.
-        :return: A full path to the cache file.
-        """
-        op_names = []
-        filename = modality_id
-        for operator in operators:
-            if isinstance(operator, str):
-                op_names.append(operator)
-                filename += operator
-            else:
-                op_names.append(operator.name)
-                filename += operator.name
-
-        return os.path.join(self._cache_dir.name, filename), op_names  # _cache_dir.name
-
-    def save_to_cache(self, modality, used_op_names, operators):
-        """
-        Save data to a cache file.
-
-        :param operator_name: The name of the operator.
-        :param data: The data to save.
-        """
-        filename, op_names = self._generate_cache_filename(
-            str(modality.modality_id) + used_op_names, operators
-        )
-        if not os.path.exists(filename):
-            with open(f"{filename}.pkl", "wb") as f:
-                pickle.dump(modality.data, f)
-
-            with open(f"{filename}.meta", "wb") as f:
-                pickle.dump(modality.metadata, f)
-
-            if self.debug:
-                str_names = ", ".join(op_names)
-                print(
-                    f"Saved data for operator {str(modality.modality_id)}{used_op_names}{str_names} to cache: {filename}"
-                )
-
-    def load_from_cache(self, modality, operators):
-        """
-        Load data from a cache file if it exists.
-
-        :param operator_name: The name of the operator.
-        :return: The cached data or None if not found.
-        """
-        ops = copy.deepcopy(operators)
-        filename, op_names = self._generate_cache_filename(
-            str(modality.modality_id), ops
-        )
-        dropped_ops = []
-        while not os.path.exists(f"{filename}.pkl"):
-            op_names.pop()
-            dropped_ops.append(ops.pop())
-            if len(ops) < 1:
-                break
-            filename, op_names = self._generate_cache_filename(
-                str(modality.modality_id), ops
-            )
-
-        dropped_ops.reverse()
-        op_names = "".join(op_names)
-
-        if os.path.exists(f"{filename}.pkl"):
-            with open(f"{filename}.meta", "rb") as f:
-                metadata = pickle.load(f)
-
-            transformed_modality = TransformedModality(
-                modality,
-                op_names,
-            )
-            data = None
-            with open(f"{filename}.pkl", "rb") as f:
-                if self.debug:
-                    print(
-                        f"Loaded cached data for operator '{str(modality.modality_id) + op_names}' from {filename}"
-                    )
-                data = pickle.load(f)
-            transformed_modality.data = data
-            return transformed_modality, dropped_ops, op_names
-
-        return None, dropped_ops, op_names
diff --git a/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py b/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py
index 9d5299eeac9..10b127f5b60 100644
--- a/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py
+++ b/src/main/python/systemds/scuro/drsearch/unimodal_optimizer.py
@@ -26,6 +26,7 @@
 from typing import List, Any
 from functools import lru_cache
 
+from systemds.scuro import ModalityType
 from systemds.scuro.representations.fusion import Fusion
 from systemds.scuro.representations.concatenation import Concatenation
 from systemds.scuro.representations.hadamard import Hadamard
@@ -265,6 +266,14 @@ def _build_modality_dag(
         context_operators = self._get_context_operators()
 
         for context_op in context_operators:
+            if modality.modality_type != ModalityType.TEXT:
+                context_node_id = builder.create_operation_node(
+                    context_op,
+                    [leaf_id],
+                    context_op().parameters,
+                )
+                dags.append(builder.build(context_node_id))
+
             context_node_id = builder.create_operation_node(
                 context_op,
                 [current_node_id],
diff --git a/src/main/python/systemds/scuro/modality/modality.py b/src/main/python/systemds/scuro/modality/modality.py
index f1b00fefcfe..98dd631e12c 100644
--- a/src/main/python/systemds/scuro/modality/modality.py
+++ b/src/main/python/systemds/scuro/modality/modality.py
@@ -132,6 +132,8 @@ def pad(self, value=0, max_len=None):
         try:
             if max_len is None:
                 result = np.array(self.data)
+            elif isinstance(self.data, np.ndarray) and self.data.shape[1] == max_len:
+                result = self.data
             else:
                 raise "Needs padding to max_len"
         except:
@@ -144,8 +146,16 @@ def pad(self, value=0, max_len=None):
             for i, seq in enumerate(self.data):
                 data = seq[:maxlen]
                 result[i, : len(data)] = data
-                # TODO: add padding to metadata as attention_masks
 
+                if self.has_metadata():
+                    attention_mask = np.zeros(result.shape[1], dtype=np.int8)
+                    attention_mask[: len(seq[:maxlen])] = 1
+                    md_key = list(self.metadata.keys())[i]
+                    if "attention_mask" in self.metadata[md_key]:
+                        self.metadata[md_key]["attention_mask"] = attention_mask
+                    else:
+                        self.metadata[md_key].update({"attention_mask": attention_mask})
+        # TODO: this might need to be a new modality (otherwise we loose the original data)
         self.data = result
 
     def get_data_layout(self):
diff --git a/src/main/python/systemds/scuro/modality/modality_identifier.py b/src/main/python/systemds/scuro/modality/modality_identifier.py
deleted file mode 100644
index 5eeee7dc131..00000000000
--- a/src/main/python/systemds/scuro/modality/modality_identifier.py
+++ /dev/null
@@ -1,34 +0,0 @@
-# -------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-# -------------------------------------------------------------
-class ModalityIdentifier:
-    """ """
-
-    _instance = None
-    id = -1
-
-    def __new__(cls):
-        if not cls._instance:
-            cls._instance = super().__new__(cls)
-        return cls._instance
-
-    def new_id(self):  # TODO: make threadsafe when parallelizing
-        self.id += 1
-        return self.id
diff --git a/src/main/python/systemds/scuro/modality/transformed.py b/src/main/python/systemds/scuro/modality/transformed.py
index 9f8d17c0f79..4e68c222791 100644
--- a/src/main/python/systemds/scuro/modality/transformed.py
+++ b/src/main/python/systemds/scuro/modality/transformed.py
@@ -63,6 +63,7 @@ def __init__(
     def add_transformation(self, transformation, modality):
         if (
             transformation.__class__.__bases__[0].__name__ == "Fusion"
+            and type(modality).__name__ == "TransformedModality"
             and modality.transformation[0].__class__.__bases__[0].__name__ != "Fusion"
         ):
             self.transformation = []
diff --git a/src/main/python/systemds/scuro/representations/multimodal_attention_fusion.py b/src/main/python/systemds/scuro/representations/multimodal_attention_fusion.py
index 092ebebb441..d17451932e1 100644
--- a/src/main/python/systemds/scuro/representations/multimodal_attention_fusion.py
+++ b/src/main/python/systemds/scuro/representations/multimodal_attention_fusion.py
@@ -283,7 +283,7 @@ def __init__(
         num_heads: int,
         dropout: float,
         max_seq_len: int,
-        pooling_strategy: str,
+        pooling_strategy: str = "mean",
     ):
         super().__init__()
 
diff --git a/src/main/python/tests/scuro/test_dr_search.py b/src/main/python/tests/scuro/test_dr_search.py
deleted file mode 100644
index 3e0e702e6f3..00000000000
--- a/src/main/python/tests/scuro/test_dr_search.py
+++ /dev/null
@@ -1,174 +0,0 @@
-# -------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-# -------------------------------------------------------------
-import os
-import shutil
-import unittest
-
-import numpy as np
-from sklearn import svm
-from sklearn.metrics import classification_report
-from sklearn.model_selection import train_test_split
-from sklearn.preprocessing import MinMaxScaler
-
-from systemds.scuro.modality.type import ModalityType
-from systemds.scuro.drsearch.dr_search import DRSearch
-from systemds.scuro.drsearch.task import Task
-from systemds.scuro.models.model import Model
-from systemds.scuro.representations.average import Average
-from systemds.scuro.representations.bert import Bert
-from systemds.scuro.representations.concatenation import Concatenation
-from systemds.scuro.representations.lstm import LSTM
-from systemds.scuro.representations.max import RowMax
-from systemds.scuro.representations.mel_spectrogram import MelSpectrogram
-from systemds.scuro.representations.hadamard import Hadamard
-from systemds.scuro.representations.resnet import ResNet
-from systemds.scuro.representations.sum import Sum
-from tests.scuro.data_generator import ModalityRandomDataGenerator
-
-
-import warnings
-
-warnings.filterwarnings("always")
-
-
-class TestSVM(Model):
-    def __init__(self):
-        super().__init__("Test")
-
-    def fit(self, X, y, X_test, y_test):
-        self.clf = svm.SVC(C=1, gamma="scale", kernel="rbf", verbose=False)
-        self.clf = self.clf.fit(X, np.array(y))
-        y_pred = self.clf.predict(X)
-
-        return classification_report(
-            y, y_pred, output_dict=True, digits=3, zero_division=1
-        )["accuracy"]
-
-    def test(self, test_X: np.ndarray, test_y: np.ndarray):
-        y_pred = self.clf.predict(np.array(test_X))  # noqa
-
-        return classification_report(
-            np.array(test_y), y_pred, output_dict=True, digits=3, zero_division=1
-        )["accuracy"]
-
-
-def scale_data(data, train_indizes):
-    data = np.array(data).reshape(len(data), -1)
-    scaler = MinMaxScaler(feature_range=(0, 1))
-    scaler.fit(data[train_indizes])
-    return scaler.transform(data)
-
-
-class TestDataLoaders(unittest.TestCase):
-    train_indizes = None
-    val_indizes = None
-    test_file_path = None
-    mods = None
-    text = None
-    audio = None
-    video = None
-    data_generator = None
-    num_instances = 0
-    representations = None
-
-    @classmethod
-    def setUpClass(cls):
-        cls.num_instances = 20
-        cls.data_generator = ModalityRandomDataGenerator()
-
-        cls.labels = ModalityRandomDataGenerator().create_balanced_labels(
-            num_instances=cls.num_instances
-        )
-        # TODO: adapt the representation so they return non aggregated values. Apply windowing operation instead
-
-        cls.video = cls.data_generator.create1DModality(
-            cls.num_instances, 100, ModalityType.VIDEO
-        )
-        cls.text = cls.data_generator.create1DModality(
-            cls.num_instances, 100, ModalityType.TEXT
-        )
-        cls.audio = cls.data_generator.create1DModality(
-            cls.num_instances, 100, ModalityType.AUDIO
-        )
-
-        cls.mods = [cls.video, cls.audio, cls.text]
-
-        split = train_test_split(
-            np.array(range(cls.num_instances)),
-            cls.labels,
-            test_size=0.2,
-            random_state=42,
-        )
-        cls.train_indizes, cls.val_indizes = [int(i) for i in split[0]], [
-            int(i) for i in split[1]
-        ]
-
-        for m in cls.mods:
-            m.data = scale_data(m.data, cls.train_indizes)
-
-        cls.representations = [
-            Concatenation(),
-            Average(),
-            RowMax(),
-            Hadamard(),
-            Sum(),
-            LSTM(width=256, depth=3),
-        ]
-
-    def test_enumerate_all(self):
-        task = Task(
-            "TestTask",
-            TestSVM(),
-            self.labels,
-            self.train_indizes,
-            self.val_indizes,
-        )
-        dr_search = DRSearch(self.mods, task, self.representations)
-        best_representation, best_score, best_modalities = dr_search.fit_enumerate_all()
-
-        for r in dr_search.scores.values():
-            for scores in r.values():
-                assert scores[1] <= best_score
-
-    def test_enumerate_all_vs_random(self):
-        task = Task(
-            "TestTask",
-            TestSVM(),
-            self.labels,
-            self.train_indizes,
-            self.val_indizes,
-        )
-        dr_search = DRSearch(self.mods, task, self.representations)
-        best_representation_enum, best_score_enum, best_modalities_enum = (
-            dr_search.fit_enumerate_all()
-        )
-
-        dr_search.reset_best_params()
-
-        best_representation_rand, best_score_rand, best_modalities_rand = (
-            dr_search.fit_random(seed=42)
-        )
-
-        assert best_score_rand <= best_score_enum
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/src/main/python/tests/scuro/test_hp_tuner.py b/src/main/python/tests/scuro/test_hp_tuner.py
index f939f5c6b42..48b916ab908 100644
--- a/src/main/python/tests/scuro/test_hp_tuner.py
+++ b/src/main/python/tests/scuro/test_hp_tuner.py
@@ -29,6 +29,7 @@
 from systemds.scuro.drsearch.multimodal_optimizer import MultimodalOptimizer
 from systemds.scuro.representations.average import Average
 from systemds.scuro.representations.concatenation import Concatenation
+from systemds.scuro.representations.lstm import LSTM
 from systemds.scuro.drsearch.operator_registry import Registry
 from systemds.scuro.models.model import Model
 from systemds.scuro.drsearch.task import Task
@@ -75,6 +76,31 @@ def test(self, test_X: np.ndarray, test_y: np.ndarray):
         )["accuracy"]
 
 
+class TestSVM2(Model):
+    def __init__(self):
+        super().__init__("TestSVM2")
+
+    def fit(self, X, y, X_test, y_test):
+        if X.ndim > 2:
+            X = X.reshape(X.shape[0], -1)
+        self.clf = svm.SVC(C=1, gamma="scale", kernel="rbf", verbose=False)
+        self.clf = self.clf.fit(X, np.array(y))
+        y_pred = self.clf.predict(X)
+
+        return classification_report(
+            y, y_pred, output_dict=True, digits=3, zero_division=1
+        )["accuracy"]
+
+    def test(self, test_X: np.ndarray, test_y: np.ndarray):
+        if test_X.ndim > 2:
+            test_X = test_X.reshape(test_X.shape[0], -1)
+        y_pred = self.clf.predict(np.array(test_X))  # noqa
+
+        return classification_report(
+            np.array(test_y), y_pred, output_dict=True, digits=3, zero_division=1
+        )["accuracy"]
+
+
 from unittest.mock import patch
 
 
@@ -108,7 +134,14 @@ def setUpClass(cls):
                 cls.labels,
                 cls.train_indizes,
                 cls.val_indizes,
-            )
+            ),
+            Task(
+                "UnimodalRepresentationTask2",
+                TestSVM2(),
+                cls.labels,
+                cls.train_indizes,
+                cls.val_indizes,
+            ),
         ]
 
     def test_hp_tuner_for_audio_modality(self):
@@ -175,7 +208,7 @@ def run_hp_for_modality(
             },
         ):
             registry = Registry()
-            registry._fusion_operators = [Average, Concatenation]
+            registry._fusion_operators = [Average, Concatenation, LSTM]
             unimodal_optimizer = UnimodalOptimizer(modalities, self.tasks, False)
             unimodal_optimizer.optimize()
 
@@ -194,21 +227,17 @@ def run_hp_for_modality(
                 )
                 fusion_results = m_o.optimize()
 
-                best_results = sorted(
-                    fusion_results, key=lambda x: x.val_score, reverse=True
-                )
-
                 hp.tune_multimodal_representations(
-                    best_results,
-                    self.tasks[0],
+                    fusion_results,
                     k=2,
                     optimize_unimodal=tune_unimodal_representations,
                 )
-                assert len(hp.results) == 2
+
             else:
                 hp.tune_unimodal_representations()
-                assert len(hp.results) == len(self.tasks)
-                assert len(hp.results[self.tasks[0].model.name]) == 2
+
+            assert len(hp.results) == len(self.tasks)
+            assert len(hp.results[self.tasks[0].model.name]) == 2
 
 
 if __name__ == "__main__":
diff --git a/src/main/python/tests/scuro/test_multimodal_fusion.py b/src/main/python/tests/scuro/test_multimodal_fusion.py
index aacf1a26eb8..0925e47cf25 100644
--- a/src/main/python/tests/scuro/test_multimodal_fusion.py
+++ b/src/main/python/tests/scuro/test_multimodal_fusion.py
@@ -29,6 +29,7 @@
 from systemds.scuro.drsearch.multimodal_optimizer import MultimodalOptimizer
 from systemds.scuro.drsearch.unimodal_optimizer import UnimodalOptimizer
 from systemds.scuro.representations.concatenation import Concatenation
+from systemds.scuro.representations.lstm import LSTM
 from systemds.scuro.representations.average import Average
 from systemds.scuro.drsearch.operator_registry import Registry
 from systemds.scuro.models.model import Model
@@ -137,19 +138,12 @@ def test_multimodal_fusion(self):
         text_data, text_md = ModalityRandomDataGenerator().create_text_data(
             self.num_instances
         )
-        video_data, video_md = ModalityRandomDataGenerator().create_visual_modality(
-            self.num_instances, 60
-        )
+
         audio = UnimodalModality(
             TestDataLoader(
                 self.indices, None, ModalityType.AUDIO, audio_data, np.float32, audio_md
             )
         )
-        video = UnimodalModality(
-            TestDataLoader(
-                self.indices, None, ModalityType.VIDEO, video_data, np.float32, video_md
-            )
-        )
         text = UnimodalModality(
             TestDataLoader(
                 self.indices, None, ModalityType.TEXT, text_data, str, text_md
@@ -168,14 +162,12 @@ def test_multimodal_fusion(self):
             },
         ):
             registry = Registry()
-            registry._fusion_operators = [Average, Concatenation]
-            unimodal_optimizer = UnimodalOptimizer(
-                [audio, text, video], [task], debug=False
-            )
+            registry._fusion_operators = [Average, Concatenation, LSTM]
+            unimodal_optimizer = UnimodalOptimizer([audio, text], [task], debug=False)
             unimodal_optimizer.optimize()
             unimodal_optimizer.operator_performance.get_k_best_results(audio, 2, task)
             m_o = MultimodalOptimizer(
-                [audio, text, video],
+                [audio, text],
                 unimodal_optimizer.operator_performance,
                 [task],
                 debug=False,
@@ -185,7 +177,7 @@ def test_multimodal_fusion(self):
             fusion_results = m_o.optimize()
 
             best_results = sorted(
-                fusion_results, key=lambda x: x.val_score, reverse=True
+                fusion_results[task.model.name], key=lambda x: x.val_score, reverse=True
             )[:2]
 
             assert best_results[0].val_score >= best_results[1].val_score

From f9eca6b9a1fc14ec65700b821fd448c6ba737697 Mon Sep 17 00:00:00 2001
From: Christina Dionysio <dionysio@tu-berlin.de>
Date: Thu, 9 Oct 2025 11:21:04 +0200
Subject: [PATCH 16/22] remove imports

---
 .../tests/scuro/test_unimodal_representations.py     | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/src/main/python/tests/scuro/test_unimodal_representations.py b/src/main/python/tests/scuro/test_unimodal_representations.py
index 52bca501ace..dbd5b8899ac 100644
--- a/src/main/python/tests/scuro/test_unimodal_representations.py
+++ b/src/main/python/tests/scuro/test_unimodal_representations.py
@@ -19,8 +19,6 @@
 #
 # -------------------------------------------------------------
 
-import os
-import shutil
 import unittest
 import copy
 import numpy as np
@@ -37,20 +35,14 @@
 from systemds.scuro.representations.word2vec import W2V
 from systemds.scuro.representations.tfidf import TfIdf
 from systemds.scuro.modality.unimodal_modality import UnimodalModality
-from systemds.scuro.representations.bert import Bert
 from systemds.scuro.representations.mel_spectrogram import MelSpectrogram
 from systemds.scuro.representations.mfcc import MFCC
 from systemds.scuro.representations.resnet import ResNet
 from systemds.scuro.representations.swin_video_transformer import SwinVideoTransformer
-from tests.scuro.data_generator import setup_data
 from tests.scuro.data_generator import (
-    setup_data,
     TestDataLoader,
     ModalityRandomDataGenerator,
 )
-from systemds.scuro.dataloader.audio_loader import AudioLoader
-from systemds.scuro.dataloader.video_loader import VideoLoader
-from systemds.scuro.dataloader.text_loader import TextLoader
 from systemds.scuro.modality.type import ModalityType
 
 
@@ -78,7 +70,7 @@ def test_audio_representations(self):
             ZeroCrossing(),
             RMSE(),
             Pitch(),
-        ]  # TODO: add FFT, TFN, 1DCNN
+        ]
         audio_data, audio_md = ModalityRandomDataGenerator().create_audio_data(
             self.num_instances, 1000
         )
@@ -120,7 +112,7 @@ def test_video_representations(self):
             assert r.data[0].ndim == 2
 
     def test_text_representations(self):
-        test_representations = [BoW(2, 2), W2V(5, 2, 2), TfIdf(2), Bert()]
+        test_representations = [BoW(2, 2), W2V(5, 2, 2), TfIdf(2)]
         text_data, text_md = ModalityRandomDataGenerator().create_text_data(
             self.num_instances
         )

From b653beeb96e5534d8aa03da3f961c833c700f8c8 Mon Sep 17 00:00:00 2001
From: Christina Dionysio <dionysio@tu-berlin.de>
Date: Wed, 15 Oct 2025 20:27:17 +0200
Subject: [PATCH 17/22] fix trainable representations in optimizer and hp tuner

---
 .../systemds/scuro/dataloader/base_loader.py  |   4 +-
 .../scuro/drsearch/hyperparameter_tuner.py    | 242 +++---------------
 .../scuro/drsearch/multimodal_optimizer.py    |  72 +-----
 .../scuro/drsearch/representation_dag.py      |  40 ++-
 .../systemds/scuro/modality/transformed.py    |  19 +-
 .../python/systemds/scuro/modality/type.py    |  11 +
 .../scuro/modality/unimodal_modality.py       |   5 +
 .../scuro/representations/aggregate.py        |  30 ++-
 .../systemds/scuro/representations/fusion.py  |  27 +-
 .../systemds/scuro/representations/lstm.py    |  18 +-
 .../systemds/scuro/representations/resnet.py  |   8 +-
 .../scuro/representations/spectrogram.py      |   4 +-
 .../representations/window_aggregation.py     |   1 +
 .../python/systemds/scuro/utils/identifier.py |  27 ++
 14 files changed, 187 insertions(+), 321 deletions(-)

diff --git a/src/main/python/systemds/scuro/dataloader/base_loader.py b/src/main/python/systemds/scuro/dataloader/base_loader.py
index f21f212e7a0..33b418efb30 100644
--- a/src/main/python/systemds/scuro/dataloader/base_loader.py
+++ b/src/main/python/systemds/scuro/dataloader/base_loader.py
@@ -127,8 +127,8 @@ def _load(self, indices: List[str]):
         if isinstance(file_names, str):
             self.extract(file_names, indices)
         else:
-            for file_name in file_names:
-                self.extract(file_name)
+            for i, file_name in enumerate(file_names):
+                self.extract(file_name, indices[i])
 
         return self.data, self.metadata
 
diff --git a/src/main/python/systemds/scuro/drsearch/hyperparameter_tuner.py b/src/main/python/systemds/scuro/drsearch/hyperparameter_tuner.py
index e50e7d1ee22..8902bb7d011 100644
--- a/src/main/python/systemds/scuro/drsearch/hyperparameter_tuner.py
+++ b/src/main/python/systemds/scuro/drsearch/hyperparameter_tuner.py
@@ -54,7 +54,7 @@ def __init__(
         scoring_metric: str = "accuracy",
         maximize_metric: bool = True,
         save_results: bool = False,
-        debug: bool = True,
+        debug: bool = False,
     ):
         self.tasks = tasks
         self.optimization_results = optimization_results
@@ -76,10 +76,22 @@ def __init__(
                 level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
             )
 
-    def get_modality_by_id(self, modality_id: int) -> Modality:
+    def get_modalities_by_id(self, modality_ids: List[int]) -> Modality:
+        modalities = []
         for mod in self.modalities:
-            if mod.modality_id == modality_id:
-                return mod
+            if mod.modality_id in modality_ids:
+                modalities.append(mod)
+        return modalities
+
+    def get_modality_by_id_and_instance_id(self, modality_id, instance_id):
+        counter = 0
+        for modality in self.modalities:
+            if modality.modality_id == modality_id:
+                if counter == instance_id or instance_id == -1:
+                    return modality
+                else:
+                    counter += 1
+        return None
 
     def extract_k_best_modalities_per_task(self):
         self.k_best_representations = {}
@@ -189,8 +201,8 @@ def evaluate_dag_config(self, dag, params, node_order, modality_ids, task):
                     }
                     node.parameters = node_params
 
-            modality = self.get_modality_by_id(modality_ids[0])
-            modified_modality = dag_copy.execute([modality])
+            modalities = self.get_modalities_by_id(modality_ids)
+            modified_modality = dag_copy.execute(modalities, task)
             score = task.run(
                 modified_modality[list(modified_modality.keys())[-1]].data
             )[1]
@@ -207,19 +219,15 @@ def tune_multimodal_representations(
         optimize_unimodal: bool = True,
         max_eval_per_rep: Optional[int] = None,
     ):
-
-        best_results = {}
+        results = {}
         for task in self.tasks:
-            best_results[task.model.name] = sorted(
+            best_results = sorted(
                 optimization_results[task.model.name],
                 key=lambda x: x.val_score,
                 reverse=True,
             )[:k]
-
-        results = {}
-        for task in self.tasks:
             results[task.model.name] = []
-            best_optimization_results = best_results[task.model.name]
+            best_optimization_results = best_results
 
             for representation in best_optimization_results:
                 if optimize_unimodal:
@@ -245,26 +253,16 @@ def tune_multimodal_representations(
                                 except ValueError:
                                     continue
 
-                    if self._dag_has_trainable_fusion(dag):
-                        result = self.tune_trainable_fusion_dag(
-                            dag, task, max_eval_per_rep
-                        )
-                    else:
-                        result = self.tune_dag_representation(
-                            dag, dag.root_node_id, task, max_eval_per_rep
-                        )
+                    result = self.tune_dag_representation(
+                        dag, dag.root_node_id, task, max_eval_per_rep
+                    )
                 else:
-                    if self._dag_has_trainable_fusion(representation.dag):
-                        result = self.tune_trainable_fusion_dag(
-                            representation.dag, task, max_eval_per_rep
-                        )
-                    else:
-                        result = self.tune_dag_representation(
-                            representation.dag,
-                            representation.dag.root_node_id,
-                            task,
-                            max_eval_per_rep,
-                        )
+                    result = self.tune_dag_representation(
+                        representation.dag,
+                        representation.dag.root_node_id,
+                        task,
+                        max_eval_per_rep,
+                    )
                 results[task.model.name].append(result)
 
         self.results = results
@@ -274,186 +272,6 @@ def tune_multimodal_representations(
 
         return results
 
-    def _dag_has_trainable_fusion(self, dag) -> bool:
-        for node in dag.nodes:
-            if node.operation and hasattr(node.operation(), "needs_training"):
-                if node.operation().needs_training:
-                    return True
-        return False
-
-    def tune_trainable_fusion_dag(self, dag, task, max_evals=None):
-        hyperparams = {}
-        reps = []
-        modality_ids = []
-        node_order = []
-        fusion_nodes = []
-
-        visited = set()
-
-        def visit_node(node_id):
-            if node_id in visited:
-                return
-            node = dag.get_node_by_id(node_id)
-            for input_id in node.inputs:
-                visit_node(input_id)
-            visited.add(node_id)
-            if node.operation is not None:
-                if node.parameters:
-                    hyperparams.update(node.parameters)
-                reps.append(node.operation)
-                node_order.append(node_id)
-
-                if hasattr(node.operation(), "needs_training"):
-                    if node.operation().needs_training:
-                        fusion_nodes.append(node_id)
-
-            if node.modality_id is not None:
-                modality_ids.append(node.modality_id)
-
-        visit_node(dag.root_node_id)
-
-        if not hyperparams:
-            return None
-
-        start_time = time.time()
-        rep_name = "_".join([rep.__name__ for rep in reps])
-
-        param_grid = list(ParameterGrid(hyperparams))
-        if max_evals and len(param_grid) > max_evals:
-            np.random.shuffle(param_grid)
-            param_grid = param_grid[:max_evals]
-
-        all_results = []
-        for params in param_grid:
-            result = self.evaluate_trainable_fusion_config(
-                dag, params, node_order, modality_ids, fusion_nodes, task
-            )
-            all_results.append(result)
-
-        if self.maximize_metric:
-            best_params, best_score = max(all_results, key=lambda x: x[1])
-        else:
-            best_params, best_score = min(all_results, key=lambda x: x[1])
-
-        tuning_time = time.time() - start_time
-
-        return HyperparamResult(
-            representation_name=rep_name,
-            best_params=best_params,
-            best_score=best_score,
-            all_results=all_results,
-            tuning_time=tuning_time,
-            modality_id=modality_ids[0] if modality_ids else None,
-        )
-
-    def evaluate_trainable_fusion_config(
-        self, dag, params, node_order, modality_ids, fusion_nodes, task
-    ):
-        try:
-            dag_copy = copy.deepcopy(dag)
-
-            for node_id in node_order:
-                node = dag_copy.get_node_by_id(node_id)
-                if node.operation is not None and node.parameters:
-                    node_params = {
-                        k: v for k, v in params.items() if k in node.parameters
-                    }
-                    operation_class = node.operation
-                    new_operation = operation_class(**node_params)
-                    node.operation = lambda: new_operation
-
-            required_modalities = []
-            for modality_id in set(modality_ids):
-                modality = self.get_modality_by_id(modality_id)
-                if modality:
-                    required_modalities.append(modality)
-
-            if not required_modalities:
-                raise ValueError("No valid modalities found for DAG evaluation")
-
-            if fusion_nodes:
-                final_representation = self._execute_trainable_fusion_dag(
-                    dag_copy, required_modalities, task
-                )
-            else:
-                modified_modalities = dag_copy.execute(required_modalities)
-                final_representation = modified_modalities[
-                    list(modified_modalities.keys())[-1]
-                ]
-
-            score = task.run(final_representation.data)[1]
-            return params, score
-
-        except Exception as e:
-            self.logger.error(
-                f"Error evaluating trainable fusion DAG with params {params}: {e}"
-            )
-            import traceback
-
-            traceback.print_exc()
-            return params, float("-inf") if self.maximize_metric else float("inf")
-
-    def _execute_trainable_fusion_dag(self, dag, modalities, task):
-        cache = {}
-
-        def execute_node_with_training(node_id: str):
-            if node_id in cache:
-                return cache[node_id]
-
-            node = dag.get_node_by_id(node_id)
-
-            if not node.inputs:
-                modality = None
-                for mod in modalities:
-                    if mod.modality_id == node.modality_id:
-                        modality = mod
-                        break
-                if modality is None:
-                    raise ValueError(f"Modality {node.modality_id} not found")
-                cache[node_id] = modality
-                return modality
-
-            input_mods = [
-                execute_node_with_training(input_id) for input_id in node.inputs
-            ]
-
-            if len(input_mods) > 1:
-                fusion_op = node.operation()
-
-                if hasattr(fusion_op, "needs_training") and fusion_op.needs_training:
-
-                    fusion_op.transform_with_training(
-                        input_mods, task.train_indices, task.labels
-                    )
-
-                    result_data = fusion_op.transform_data(input_mods, task.val_indices)
-
-                    from systemds.scuro.modality.transformed import TransformedModality
-
-                    result = TransformedModality(
-                        modality_type="fused",
-                        data=result_data,
-                        metadata={"shape": result_data.shape},
-                        transformation=[fusion_op],
-                    )
-                else:
-                    result = input_mods[0].combine(input_mods[1:], fusion_op)
-            else:
-                if hasattr(node.operation(), "__class__"):
-                    op_instance = node.operation()
-                    if hasattr(input_mods[0], "apply_representation"):
-                        result = input_mods[0].apply_representation(op_instance)
-                    else:
-                        result = op_instance.transform(input_mods[0])
-                else:
-                    result = input_mods[0]
-
-            cache[node_id] = result
-            return result
-
-        final_result = execute_node_with_training(dag.root_node_id)
-        return final_result
-
     def save_tuning_results(self, filepath: str = None):
         if not filepath:
             filepath = f"hyperparameter_results_{int(time.time())}.json"
diff --git a/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py b/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py
index 0d3a896e8a4..91d569bc598 100644
--- a/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py
+++ b/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py
@@ -37,7 +37,6 @@
 from systemds.scuro.representations.aggregated_representation import (
     AggregatedRepresentation,
 )
-from systemds.scuro.modality.transformed import TransformedModality
 from systemds.scuro.representations.aggregate import Aggregation
 from systemds.scuro.drsearch.operator_registry import Registry
 from systemds.scuro.utils.schema_helpers import get_shape
@@ -189,18 +188,14 @@ def _evaluate_dag(self, dag: RepresentationDag, task: Task) -> "OptimizationResu
         start_time = time.time()
 
         try:
-            has_trainable_fusion = self._dag_has_trainable_fusion(dag)
-
-            if has_trainable_fusion:
-                fused_representation = self._execute_dag_with_training(dag, task)
-            else:
-                fused_representation = dag.execute(
-                    list(
-                        chain.from_iterable(
-                            self.k_best_representations[task.model.name].values()
-                        )
+            fused_representation = dag.execute(
+                list(
+                    chain.from_iterable(
+                        self.k_best_representations[task.model.name].values()
                     )
-                )
+                ),
+                task,
+            )
 
             if fused_representation is None:
                 return None
@@ -232,59 +227,6 @@ def _evaluate_dag(self, dag: RepresentationDag, task: Task) -> "OptimizationResu
             traceback.print_exc()
             return None
 
-    def _dag_has_trainable_fusion(self, dag: RepresentationDag) -> bool:
-        for node in dag.nodes:
-            if node.operation and hasattr(node.operation(), "needs_training"):
-                if node.operation().needs_training:
-                    return True
-        return False
-
-    def _execute_dag_with_training(self, dag: RepresentationDag, task: Task):
-        all_modalities = list(
-            chain.from_iterable(self.k_best_representations[task.model.name].values())
-        )
-
-        cache = {}
-
-        def execute_node_with_training(node_id: str):
-            if node_id in cache:
-                return cache[node_id]
-
-            node = dag.get_node_by_id(node_id)
-
-            if not node.inputs:
-                modality = self._get_modality_by_id_and_instance_id(
-                    all_modalities, node.modality_id, node.representation_index
-                )
-                cache[node_id] = modality
-                return modality
-
-            input_mods = [
-                execute_node_with_training(input_id) for input_id in node.inputs
-            ]
-
-            fusion_op = node.operation()
-
-            if hasattr(fusion_op, "needs_training") and fusion_op.needs_training:
-                fusion_op.transform_with_training(
-                    input_mods, task.train_indices, task.labels
-                )
-
-                result_data = fusion_op.transform_data(input_mods)
-                result = TransformedModality(
-                    input_mods[0], fusion_op, ModalityType.EMBEDDING
-                )
-                result.data = result_data
-
-            else:
-                result = input_mods[0].combine(input_mods[1:], fusion_op)
-
-            cache[node_id] = result
-            return result
-
-        execute_node_with_training(dag.root_node_id)
-        return cache
-
     def _get_modality_by_id_and_instance_id(self, modalities, modality_id, instance_id):
         counter = 0
         for modality in modalities:
diff --git a/src/main/python/systemds/scuro/drsearch/representation_dag.py b/src/main/python/systemds/scuro/drsearch/representation_dag.py
index 619e4236a2a..1d5f512eb83 100644
--- a/src/main/python/systemds/scuro/drsearch/representation_dag.py
+++ b/src/main/python/systemds/scuro/drsearch/representation_dag.py
@@ -30,6 +30,7 @@
     AggregatedRepresentation,
 )
 from systemds.scuro.representations.context import Context
+from systemds.scuro.utils.identifier import get_op_id, get_node_id
 
 
 @dataclass
@@ -118,10 +119,12 @@ def has_cycle(node_id: str, path: set) -> bool:
 
         return not has_cycle(self.root_node_id, set())
 
-    def execute(self, modalities: List[Modality]) -> Dict[str, TransformedModality]:
+    def execute(
+        self, modalities: List[Modality], task=None
+    ) -> Dict[str, TransformedModality]:
         cache = {}
 
-        def execute_node(node_id: str) -> TransformedModality:
+        def execute_node(node_id: str, task) -> TransformedModality:
             if node_id in cache:
                 return cache[node_id]
 
@@ -134,28 +137,39 @@ def execute_node(node_id: str) -> TransformedModality:
                 cache[node_id] = modality
                 return modality
 
-            input_mods = [execute_node(input_id) for input_id in node.inputs]
+            input_mods = [execute_node(input_id, task) for input_id in node.inputs]
 
+            node_operation = node.operation()
             if len(input_mods) == 1:
-                if isinstance(node.operation(), Context):
-                    result = input_mods[0].context(node.operation())
-                elif isinstance(node.operation(), UnimodalRepresentation):
+                # It's a unimodal operation
+                if isinstance(node_operation, Context):
+                    result = input_mods[0].context(node_operation)
+                elif isinstance(node_operation, AggregatedRepresentation):
+                    result = node_operation.transform(input_mods[0])
+                elif isinstance(node_operation, UnimodalRepresentation):
                     if (
                         isinstance(input_mods[0], TransformedModality)
                         and input_mods[0].transformation[0].__class__ == node.operation
                     ):
+                        # Avoid duplicate transformations
                         result = input_mods[0]
                     else:
-                        result = input_mods[0].apply_representation(node.operation())
-                elif isinstance(node.operation(), AggregatedRepresentation):
-                    result = node.operation().transform(input_mods[0])
+                        # Compute the representation
+                        result = input_mods[0].apply_representation(node_operation)
             else:
-                result = input_mods[0].combine(input_mods[1:], node.operation())
+                # It's a fusion operation
+                fusion_op = node_operation
+                if hasattr(fusion_op, "needs_training") and fusion_op.needs_training:
+                    result = input_mods[0].combine_with_training(
+                        input_mods[1:], fusion_op, task
+                    )
+                else:
+                    result = input_mods[0].combine(input_mods[1:], fusion_op)
 
             cache[node_id] = result
             return result
 
-        execute_node(self.root_node_id)
+        execute_node(self.root_node_id, task)
 
         return cache
 
@@ -184,7 +198,7 @@ def create_leaf_node(
         if representation_index != -1:
             node_id = f"leaf_{modality_id}_{representation_index}"
         else:
-            node_id = f"leaf_{self.node_counter}"
+            node_id = f"leaf_{get_node_id()}"
         node = RepresentationNode(
             node_id=node_id,
             inputs=[],
@@ -198,7 +212,7 @@ def create_leaf_node(
     def create_operation_node(
         self, operation: Any, inputs: List[str], parameters: Dict[str, Any] = None
     ) -> str:
-        node_id = f"op_{self.node_counter}"
+        node_id = f"op_{get_op_id()}"
         self.node_counter += 1
         node = RepresentationNode(
             node_id=node_id,
diff --git a/src/main/python/systemds/scuro/modality/transformed.py b/src/main/python/systemds/scuro/modality/transformed.py
index 4e68c222791..7e8e54eff33 100644
--- a/src/main/python/systemds/scuro/modality/transformed.py
+++ b/src/main/python/systemds/scuro/modality/transformed.py
@@ -136,11 +136,26 @@ def combine(self, other: Union[Modality, List[Modality]], fusion_method):
         fused_modality = TransformedModality(
             self, fusion_method, ModalityType.EMBEDDING
         )
+        fused_modality.data = fusion_method.transform(self.create_modality_list(other))
+
+        return fused_modality
+
+    def combine_with_training(
+        self, other: Union[Modality, List[Modality]], fusion_method, task
+    ):
+        fused_modality = TransformedModality(
+            self, fusion_method, ModalityType.EMBEDDING
+        )
+        modalities = self.create_modality_list(other)
+        fused_modality.data = fusion_method.transform_with_training(modalities, task)
+
+        return fused_modality
+
+    def create_modality_list(self, other: Union[Modality, List[Modality]]):
         modalities = [self]
         if isinstance(other, list):
             modalities.extend(other)
         else:
             modalities.append(other)
-        fused_modality.data = fusion_method.transform(modalities)
 
-        return fused_modality
+        return modalities
diff --git a/src/main/python/systemds/scuro/modality/type.py b/src/main/python/systemds/scuro/modality/type.py
index b2331d0faed..2853e8135d6 100644
--- a/src/main/python/systemds/scuro/modality/type.py
+++ b/src/main/python/systemds/scuro/modality/type.py
@@ -230,6 +230,17 @@ def create_text_metadata(self, length, data):
         md["length"] = length
         return md
 
+    def create_ts_metadata(
+        self, signal_names, data, sampling_rate=None, is_single_instance=True
+    ):
+        md = deepcopy(self.get_schema())
+        md = ModalitySchemas.update_base_metadata(md, data, is_single_instance)
+        md["frequency"] = sampling_rate if sampling_rate is not None else 1
+        md["length"] = data.shape[0]
+        md["signal_names"] = signal_names
+        md["timestamp"] = create_timestamps(md["frequency"], md["length"])
+        return md
+
     def create_video_metadata(self, frequency, length, width, height, num_channels):
         md = deepcopy(self.get_schema())
         md["frequency"] = frequency
diff --git a/src/main/python/systemds/scuro/modality/unimodal_modality.py b/src/main/python/systemds/scuro/modality/unimodal_modality.py
index 48ae3520a79..373921e95c2 100644
--- a/src/main/python/systemds/scuro/modality/unimodal_modality.py
+++ b/src/main/python/systemds/scuro/modality/unimodal_modality.py
@@ -130,6 +130,11 @@ def apply_representation(self, representation):
                 self.extract_raw_data()
             new_modality = representation.transform(self)
 
+            for i, d in enumerate(new_modality.data):
+                output = np.array(d)
+                if np.isnan(output).any():
+                    new_modality.data[i] = np.where(np.isnan(output), 0, output)
+
             if not all(
                 "attention_masks" in entry for entry in new_modality.metadata.values()
             ):
diff --git a/src/main/python/systemds/scuro/representations/aggregate.py b/src/main/python/systemds/scuro/representations/aggregate.py
index 2c046dc4016..0a8438e684f 100644
--- a/src/main/python/systemds/scuro/representations/aggregate.py
+++ b/src/main/python/systemds/scuro/representations/aggregate.py
@@ -20,6 +20,7 @@
 # -------------------------------------------------------------
 import numpy as np
 
+from systemds.scuro.modality.type import ModalityType
 from systemds.scuro.representations import utils
 
 
@@ -71,7 +72,13 @@ def execute(self, modality):
         for i, instance in enumerate(modality.data):
             data.append([])
             if isinstance(instance, np.ndarray):
-                aggregated_data = self._aggregation_func(instance)
+                if (
+                    modality.modality_type == ModalityType.IMAGE
+                    or modality.modality_type == ModalityType.VIDEO
+                ) and instance.ndim > 2:
+                    aggregated_data = instance.flatten()
+                else:
+                    aggregated_data = self._aggregation_func(instance)
             else:
                 aggregated_data = []
                 for entry in instance:
@@ -79,18 +86,17 @@ def execute(self, modality):
             max_len = max(max_len, len(aggregated_data))
             data[i] = aggregated_data
 
-        if self.pad_modality:
-            for i, instance in enumerate(data):
-                if isinstance(instance, np.ndarray):
-                    if len(instance) < max_len:
-                        padded_data = np.zeros(max_len, dtype=instance.dtype)
-                        padded_data[: len(instance)] = instance
-                        data[i] = padded_data
-                else:
-                    padded_data = []
-                    for entry in instance:
-                        padded_data.append(utils.pad_sequences(entry, max_len))
+        for i, instance in enumerate(data):
+            if isinstance(instance, np.ndarray):
+                if len(instance) < max_len:
+                    padded_data = np.zeros(max_len, dtype=instance.dtype)
+                    padded_data[: len(instance)] = instance
                     data[i] = padded_data
+            else:
+                padded_data = []
+                for entry in instance:
+                    padded_data.append(utils.pad_sequences(entry, max_len))
+                data[i] = padded_data
 
         return np.array(data)
 
diff --git a/src/main/python/systemds/scuro/representations/fusion.py b/src/main/python/systemds/scuro/representations/fusion.py
index e7ecfa5acc1..8cf67b1cb42 100644
--- a/src/main/python/systemds/scuro/representations/fusion.py
+++ b/src/main/python/systemds/scuro/representations/fusion.py
@@ -67,18 +67,28 @@ def transform(self, modalities: List[Modality]):
 
         return self.execute(mods)
 
-    def transform_with_training(
-        self, modalities: List[Modality], train_indices, labels
-    ):
-
+    def transform_with_training(self, modalities: List[Modality], task):
         train_modalities = []
         for modality in modalities:
-            train_data = [d for i, d in enumerate(modality.data) if i in train_indices]
+            train_data = [
+                d for i, d in enumerate(modality.data) if i in task.train_indices
+            ]
             train_modality = TransformedModality(modality, self)
             train_modality.data = copy.deepcopy(train_data)
             train_modalities.append(train_modality)
 
-        self.execute(train_modalities, labels[train_indices])
+        transformed_train = self.execute(
+            train_modalities, task.labels[task.train_indices]
+        )
+        transformed_val = self.transform_data(modalities, task.val_indices)
+
+        transformed_data = np.zeros(
+            (len(modalities[0].data), transformed_train.shape[1])
+        )
+        transformed_data[task.train_indices] = transformed_train
+        transformed_data[task.val_indices] = transformed_val
+
+        return transformed_data
 
     def transform_data(self, modalities: List[Modality], indices=None):
         val_modalities = []
@@ -111,6 +121,11 @@ def get_max_embedding_size(self, modalities: List[Modality]):
         :param modalities: List of modalities
         :return: maximum embedding size
         """
+        try:
+            modalities[0].data = np.array(modalities[0].data)
+        except:
+            pass
+
         if isinstance(modalities[0].data[0], list):
             max_size = modalities[0].data[0][0].shape[1]
         elif isinstance(modalities[0].data, np.ndarray):
diff --git a/src/main/python/systemds/scuro/representations/lstm.py b/src/main/python/systemds/scuro/representations/lstm.py
index c9e8bae1cdf..c8e96448815 100644
--- a/src/main/python/systemds/scuro/representations/lstm.py
+++ b/src/main/python/systemds/scuro/representations/lstm.py
@@ -86,7 +86,18 @@ def _prepare_data(self, modalities: List[Modality]) -> np.ndarray:
         processed_modalities = []
 
         for modality in modalities:
-            data = np.array(modality.data)
+            try:
+                data = np.array(modality.data)
+            except:
+                max_len = -1
+                for md in modality.metadata.values():
+                    if max_len < md["data_layout"]["shape"][0]:
+                        max_len = md["data_layout"]["shape"][0]
+                data = np.zeros((len(modality.data), max_len))
+                for i, d in enumerate(modality.data):
+                    data[i, : len(d)] = d
+
+                modality.data = data
 
             if data.ndim == 1:
                 data = data.reshape(-1, 1, 1)
@@ -196,6 +207,11 @@ def execute(self, modalities: List[Modality], labels: np.ndarray = None):
             "dropout_rate": self.dropout_rate,
         }
 
+        self.model.eval()
+        with torch.no_grad():
+            features, _ = self.model(X_tensor)
+        return features.cpu().numpy()
+
     def apply_representation(self, modalities: List[Modality]) -> np.ndarray:
         if not self.is_trained or self.model is None:
             raise ValueError("Model must be trained before applying representation")
diff --git a/src/main/python/systemds/scuro/representations/resnet.py b/src/main/python/systemds/scuro/representations/resnet.py
index 711d1f39a60..7bb94d8bfde 100644
--- a/src/main/python/systemds/scuro/representations/resnet.py
+++ b/src/main/python/systemds/scuro/representations/resnet.py
@@ -32,17 +32,13 @@
 from systemds.scuro.utils.static_variables import get_device
 
 
-@register_representation(
-    [ModalityType.IMAGE, ModalityType.VIDEO, ModalityType.TIMESERIES]
-)
+@register_representation([ModalityType.IMAGE, ModalityType.VIDEO])
 class ResNet(UnimodalRepresentation):
     def __init__(self, model_name="ResNet18", layer="avgpool", output_file=None):
         self.data_type = torch.bfloat16
         self.model_name = model_name
         parameters = self._get_parameters()
-        super().__init__(
-            "ResNet", ModalityType.TIMESERIES, parameters
-        )  # TODO: TIMESERIES only for videos - images would be handled as EMBEDDING
+        super().__init__("ResNet", ModalityType.EMBEDDING, parameters)
 
         self.output_file = output_file
         self.layer_name = layer
diff --git a/src/main/python/systemds/scuro/representations/spectrogram.py b/src/main/python/systemds/scuro/representations/spectrogram.py
index f71fe80bb74..662fb0d627a 100644
--- a/src/main/python/systemds/scuro/representations/spectrogram.py
+++ b/src/main/python/systemds/scuro/representations/spectrogram.py
@@ -44,8 +44,8 @@ def transform(self, modality):
 
         for i, sample in enumerate(modality.data):
             spectrogram = librosa.stft(
-                y=np.array(sample), hop_length=self.hop_length, n_fft=self.n_fft
-            ).astype(modality.data_type)
+                y=np.array(np.abs(sample)), hop_length=self.hop_length, n_fft=self.n_fft
+            )
             S_dB = librosa.amplitude_to_db(np.abs(spectrogram))
 
             result.append(S_dB.T)
diff --git a/src/main/python/systemds/scuro/representations/window_aggregation.py b/src/main/python/systemds/scuro/representations/window_aggregation.py
index 04d93142093..c16f6d747fc 100644
--- a/src/main/python/systemds/scuro/representations/window_aggregation.py
+++ b/src/main/python/systemds/scuro/representations/window_aggregation.py
@@ -198,6 +198,7 @@ def execute(self, modality):
                 )
                 output.append(val)
                 start = end
+
             windowed_data.append(output)
 
         return np.array(windowed_data)
diff --git a/src/main/python/systemds/scuro/utils/identifier.py b/src/main/python/systemds/scuro/utils/identifier.py
index ca352db211e..7b6802672cf 100644
--- a/src/main/python/systemds/scuro/utils/identifier.py
+++ b/src/main/python/systemds/scuro/utils/identifier.py
@@ -18,6 +18,9 @@
 # under the License.
 #
 # -------------------------------------------------------------
+import threading
+
+
 class Identifier:
     """ """
 
@@ -32,3 +35,27 @@ def __new__(cls):
     def new_id(self):  # TODO: make threadsafe when parallelizing
         self.id += 1
         return self.id
+
+
+class IdGenerator:
+    _instance = None
+    _lock = threading.Lock()
+
+    def __new__(cls):
+        if cls._instance is None:
+            with cls._lock:
+                if cls._instance is None:
+                    cls._instance = super().__new__(cls)
+                    cls._instance._ctr = 0
+                    cls._instance._ctr_lock = threading.Lock()
+        return cls._instance
+
+    def next(self) -> int:
+        with self._instance._ctr_lock:
+            self._instance._ctr += 1
+            n = self._instance._ctr
+        return n
+
+
+get_op_id = IdGenerator().next
+get_node_id = IdGenerator().next

From 5c2153d6624d1ef04e325144f53f94973ed8b799 Mon Sep 17 00:00:00 2001
From: Christina Dionysio <dionysio@tu-berlin.de>
Date: Wed, 15 Oct 2025 22:07:25 +0200
Subject: [PATCH 18/22] make scuro python workflow faster

---
 .github/workflows/python.yml | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index ac2857cc226..115cfc2d9a2 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -155,18 +155,24 @@ jobs:
 
     - name: Run Scuro Python Tests 
       if: ${{ matrix.test_mode == 'scuro' }}
+      env:
+        TORCH_HOME: ${{ github.workspace }}/.torch
       run: |
+        ( while true; do echo "."; sleep 25; done ) &
+        KA=$!
+        pip install --upgrade pip wheel setuptools
+        # Use CUDA 12.1 wheels to avoid slow/source builds
+        pip install --extra-index-url https://download.pytorch.org/whl/cu121 \
+          torch==2.4.1 torchvision==0.19.1
         pip install \
-          torchvision \
           transformers \
           opencv-python \
-          torch \
           librosa \
           h5py \
           gensim \
           opt-einsum \
           nltk
-        cd src/main/python
-        python -m unittest discover -s tests/scuro -p 'test_*.py'
-
 
+        cd src/main/python
+        python -m unittest discover -s tests/scuro -p 'test_*.py' -v
+        kill $KA || true
\ No newline at end of file

From f883b20c374e94ebcd8a44a611eea8a07c50f399 Mon Sep 17 00:00:00 2001
From: Christina Dionysio <dionysio@tu-berlin.de>
Date: Thu, 16 Oct 2025 10:34:31 +0200
Subject: [PATCH 19/22] remove high resource test for now

---
 src/main/python/tests/scuro/test_hp_tuner.py  | 52 +++++++++----------
 .../tests/scuro/test_unimodal_optimizer.py    | 11 ++++
 2 files changed, 37 insertions(+), 26 deletions(-)

diff --git a/src/main/python/tests/scuro/test_hp_tuner.py b/src/main/python/tests/scuro/test_hp_tuner.py
index 48b916ab908..73aab4493d3 100644
--- a/src/main/python/tests/scuro/test_hp_tuner.py
+++ b/src/main/python/tests/scuro/test_hp_tuner.py
@@ -156,31 +156,31 @@ def test_hp_tuner_for_audio_modality(self):
 
         self.run_hp_for_modality([audio])
 
-    def test_multimodal_hp_tuning(self):
-        audio_data, audio_md = ModalityRandomDataGenerator().create_audio_data(
-            self.num_instances, 3000
-        )
-        audio = UnimodalModality(
-            TestDataLoader(
-                self.indices, None, ModalityType.AUDIO, audio_data, np.float32, audio_md
-            )
-        )
-
-        text_data, text_md = ModalityRandomDataGenerator().create_text_data(
-            self.num_instances
-        )
-        text = UnimodalModality(
-            TestDataLoader(
-                self.indices, None, ModalityType.TEXT, text_data, str, text_md
-            )
-        )
-
-        self.run_hp_for_modality(
-            [audio, text], multimodal=True, tune_unimodal_representations=True
-        )
-        self.run_hp_for_modality(
-            [audio, text], multimodal=True, tune_unimodal_representations=False
-        )
+    # def test_multimodal_hp_tuning(self):
+    #     audio_data, audio_md = ModalityRandomDataGenerator().create_audio_data(
+    #         self.num_instances, 3000
+    #     )
+    #     audio = UnimodalModality(
+    #         TestDataLoader(
+    #             self.indices, None, ModalityType.AUDIO, audio_data, np.float32, audio_md
+    #         )
+    #     )
+    #
+    #     text_data, text_md = ModalityRandomDataGenerator().create_text_data(
+    #         self.num_instances
+    #     )
+    #     text = UnimodalModality(
+    #         TestDataLoader(
+    #             self.indices, None, ModalityType.TEXT, text_data, str, text_md
+    #         )
+    #     )
+    #
+    #     self.run_hp_for_modality(
+    #         [audio, text], multimodal=True, tune_unimodal_representations=True
+    #     )
+    #     self.run_hp_for_modality(
+    #         [audio, text], multimodal=True, tune_unimodal_representations=False
+    #     )
 
     def test_hp_tuner_for_text_modality(self):
         text_data, text_md = ModalityRandomDataGenerator().create_text_data(
@@ -229,7 +229,7 @@ def run_hp_for_modality(
 
                 hp.tune_multimodal_representations(
                     fusion_results,
-                    k=2,
+                    k=1,
                     optimize_unimodal=tune_unimodal_representations,
                 )
 
diff --git a/src/main/python/tests/scuro/test_unimodal_optimizer.py b/src/main/python/tests/scuro/test_unimodal_optimizer.py
index 0680b3edf54..e2f0378d584 100644
--- a/src/main/python/tests/scuro/test_unimodal_optimizer.py
+++ b/src/main/python/tests/scuro/test_unimodal_optimizer.py
@@ -163,6 +163,17 @@ def test_unimodal_optimizer_for_text_modality(self):
         )
         self.optimize_unimodal_representation_for_modality(text)
 
+    def test_unimodal_optimizer_for_video_modality(self):
+        video_data, video_md = ModalityRandomDataGenerator().create_visual_modality(
+            self.num_instances, 10, 10
+        )
+        video = UnimodalModality(
+            TestDataLoader(
+                self.indices, None, ModalityType.VIDEO, video_data, np.float32, video_md
+            )
+        )
+        self.optimize_unimodal_representation_for_modality(video)
+
     def optimize_unimodal_representation_for_modality(self, modality):
         with patch.object(
             Registry,

From cfe13ac0256d8b603986cc73cc08b3f092e1ae4a Mon Sep 17 00:00:00 2001
From: Christina Dionysio <dionysio@tu-berlin.de>
Date: Thu, 16 Oct 2025 10:47:27 +0200
Subject: [PATCH 20/22] remove timeseries test (functionality added in next PR)

---
 src/main/python/tests/scuro/test_operator_registry.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/main/python/tests/scuro/test_operator_registry.py b/src/main/python/tests/scuro/test_operator_registry.py
index 0d83d83bda8..b5fa4b01b4d 100644
--- a/src/main/python/tests/scuro/test_operator_registry.py
+++ b/src/main/python/tests/scuro/test_operator_registry.py
@@ -74,9 +74,9 @@ def test_video_representations_in_registry(self):
             # SwinVideoTransformer,
         ]
 
-    def test_timeseries_representations_in_registry(self):
-        registry = Registry()
-        assert registry.get_representations(ModalityType.TIMESERIES) == [ResNet]
+    # def test_timeseries_representations_in_registry(self):
+    #     registry = Registry()
+    #     assert registry.get_representations(ModalityType.TIMESERIES) == [ResNet]
 
     def test_text_representations_in_registry(self):
         registry = Registry()

From 7346a2155bba2f8c9be3a1929a594623c9025d30 Mon Sep 17 00:00:00 2001
From: Christina Dionysio <dionysio@tu-berlin.de>
Date: Thu, 16 Oct 2025 11:03:45 +0200
Subject: [PATCH 21/22] move keep-alive termination to after pip installs

---
 .github/workflows/python.yml | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index 115cfc2d9a2..299c50718e0 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -172,7 +172,6 @@ jobs:
           gensim \
           opt-einsum \
           nltk
-
+        kill $KA 
         cd src/main/python
-        python -m unittest discover -s tests/scuro -p 'test_*.py' -v
-        kill $KA || true
\ No newline at end of file
+        python -m unittest discover -s tests/scuro -p 'test_*.py' -v
\ No newline at end of file

From 1bc05657635d3bce84cd7ac778e0505daa0312c2 Mon Sep 17 00:00:00 2001
From: Christina Dionysio <dionysio@tu-berlin.de>
Date: Thu, 16 Oct 2025 18:47:40 +0200
Subject: [PATCH 22/22] remove prints

---
 src/main/python/tests/scuro/test_data_loaders.py             | 1 -
 src/main/python/tests/scuro/test_unimodal_representations.py | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/main/python/tests/scuro/test_data_loaders.py b/src/main/python/tests/scuro/test_data_loaders.py
index 85da2919a04..fb07df71543 100644
--- a/src/main/python/tests/scuro/test_data_loaders.py
+++ b/src/main/python/tests/scuro/test_data_loaders.py
@@ -66,7 +66,6 @@ def setUpClass(cls):
 
     @classmethod
     def tearDownClass(cls):
-        print("Cleaning up test data")
         shutil.rmtree(cls.test_file_path)
 
     def test_load_audio_data_from_file(self):
diff --git a/src/main/python/tests/scuro/test_unimodal_representations.py b/src/main/python/tests/scuro/test_unimodal_representations.py
index dbd5b8899ac..8c8e9baa2d4 100644
--- a/src/main/python/tests/scuro/test_unimodal_representations.py
+++ b/src/main/python/tests/scuro/test_unimodal_representations.py
@@ -112,7 +112,7 @@ def test_video_representations(self):
             assert r.data[0].ndim == 2
 
     def test_text_representations(self):
-        test_representations = [BoW(2, 2), W2V(5, 2, 2), TfIdf(2)]
+        test_representations = [BoW(2, 2), TfIdf(), W2V()]
         text_data, text_md = ModalityRandomDataGenerator().create_text_data(
             self.num_instances
         )