diff --git a/src/main/python/tests/scuro/test_hp_tuner.py b/src/main/python/tests/scuro/test_hp_tuner.py index c418cefcae8..03ffc1c2dad 100644 --- a/src/main/python/tests/scuro/test_hp_tuner.py +++ b/src/main/python/tests/scuro/test_hp_tuner.py @@ -24,6 +24,7 @@ import numpy as np +from systemds.scuro import Mean from systemds.scuro.drsearch.multimodal_optimizer import MultimodalOptimizer from systemds.scuro.representations.average import Average from systemds.scuro.representations.color_histogram import ColorHistogram @@ -128,7 +129,7 @@ def run_hp_for_modality( { ModalityType.TEXT: [BoW, W2V], ModalityType.AUDIO: [Spectrogram, ZeroCrossing, Spectral, Pitch], - ModalityType.TIMESERIES: [ResNet], + ModalityType.TIMESERIES: [Mean], ModalityType.VIDEO: [ResNet], ModalityType.IMAGE: [ResNet, ColorHistogram], ModalityType.EMBEDDING: [], @@ -136,7 +137,9 @@ def run_hp_for_modality( ): registry = Registry() registry._fusion_operators = [LSTM] - unimodal_optimizer = UnimodalOptimizer(modalities, self.tasks, False) + unimodal_optimizer = UnimodalOptimizer( + modalities, self.tasks, False, k=2, max_num_workers=1 + ) unimodal_optimizer.optimize() hp = HyperparameterTuner( @@ -165,7 +168,7 @@ def run_hp_for_modality( ) else: - hp.tune_unimodal_representations(max_eval_per_rep=10) + hp.tune_unimodal_representations(max_eval_per_rep=2) assert len(hp.optimization_results.results) == len(self.tasks) if multimodal: diff --git a/src/main/python/tests/scuro/test_multimodal_join.py b/src/main/python/tests/scuro/test_multimodal_join.py index 14ce9376be1..4a53129db33 100644 --- a/src/main/python/tests/scuro/test_multimodal_join.py +++ b/src/main/python/tests/scuro/test_multimodal_join.py @@ -47,7 +47,7 @@ def setUpClass(cls): cls.num_instances = 4 cls.indices = np.array(range(cls.num_instances)) cls.audio_data, cls.audio_md = ModalityRandomDataGenerator().create_audio_data( - cls.num_instances, 32000 + cls.num_instances, 500 ) cls.video_data, cls.video_md = ( @@ -104,7 +104,7 @@ def _prepare_data(self, l_chunk_size=None, r_chunk_size=None): l_chunk_size, ModalityType.VIDEO, copy.deepcopy(self.video_data), - np.float32, + np.uint8, copy.deepcopy(self.video_md), ) ) @@ -118,9 +118,7 @@ def _join(self, left_modality, right_modality, window_size): left_modality.join( right_modality, JoinCondition("timestamp", "timestamp", "<") ) - .apply_representation( - ResNet(layer_name="layer1.0.conv2", model_name="ResNet18") - ) + .apply_representation(ResNet()) .window_aggregation(window_size, "mean") .combine("concat") ) diff --git a/src/main/python/tests/scuro/test_unimodal_optimizer.py b/src/main/python/tests/scuro/test_unimodal_optimizer.py index ad824b0335f..11c3aa29ea6 100644 --- a/src/main/python/tests/scuro/test_unimodal_optimizer.py +++ b/src/main/python/tests/scuro/test_unimodal_optimizer.py @@ -23,17 +23,17 @@ import unittest import numpy as np -from systemds.scuro.representations.clip import CLIPText, CLIPVisual from systemds.scuro.representations.color_histogram import ColorHistogram from systemds.scuro.drsearch.operator_registry import Registry from systemds.scuro.drsearch.unimodal_optimizer import UnimodalOptimizer -from systemds.scuro.representations.mfcc import MFCC +from systemds.scuro.representations.covarep_audio_features import ZeroCrossing + +from systemds.scuro.representations.resnet import ResNet from systemds.scuro.representations.mel_spectrogram import MelSpectrogram -from systemds.scuro.representations.word2vec import W2V +from systemds.scuro.representations.tfidf import TfIdf from systemds.scuro.representations.bow import BoW from systemds.scuro.representations.bert import Bert from systemds.scuro.modality.unimodal_modality import UnimodalModality -from systemds.scuro.representations.resnet import ResNet from tests.scuro.data_generator import ( ModalityRandomDataGenerator, TestDataLoader, @@ -53,6 +53,15 @@ from unittest.mock import patch +LIGHTWEIGHT_REGISTRY = { + ModalityType.TEXT: [BoW, TfIdf], + ModalityType.AUDIO: [MelSpectrogram, ZeroCrossing], + ModalityType.VIDEO: [ResNet], + ModalityType.IMAGE: [ColorHistogram], + ModalityType.TIMESERIES: [], + ModalityType.EMBEDDING: [], +} + class TestUnimodalRepresentationOptimizer(unittest.TestCase): data_generator = None @@ -198,24 +207,7 @@ def optimize_unimodal_representation_for_modality(self, modalities): with patch.object( Registry, "_representations", - { - ModalityType.TEXT: [ - W2V, - BoW, - Bert, - CLIPText, - ], - ModalityType.AUDIO: [ - MFCC, - MelSpectrogram, - ], - ModalityType.VIDEO: [ - ResNet, - CLIPVisual, - ], - ModalityType.IMAGE: [ColorHistogram, CLIPVisual], - ModalityType.EMBEDDING: [], - }, + LIGHTWEIGHT_REGISTRY, ): registry = Registry() diff --git a/src/main/python/tests/scuro/test_unimodal_representations.py b/src/main/python/tests/scuro/test_unimodal_representations.py index 2f474be7fd9..59bef40ef64 100644 --- a/src/main/python/tests/scuro/test_unimodal_representations.py +++ b/src/main/python/tests/scuro/test_unimodal_representations.py @@ -19,18 +19,10 @@ # # ------------------------------------------------------------- -import time import unittest import copy import numpy as np -from systemds.scuro.representations.bert import ( - Bert, - ALBERT, - ELECTRA, - RoBERTa, - DistillBERT, -) -from systemds.scuro.representations.clip import CLIPVisual, CLIPText + from systemds.scuro.representations.bow import BoW from systemds.scuro.representations.covarep_audio_features import ( Spectral, @@ -38,20 +30,13 @@ Pitch, ZeroCrossing, ) -from systemds.scuro.representations.glove import GloVe -from systemds.scuro.representations.wav2vec import Wav2Vec +from systemds.scuro.representations.color_histogram import ColorHistogram from systemds.scuro.representations.spectrogram import Spectrogram -from systemds.scuro.representations.window_aggregation import WindowAggregation -from systemds.scuro.representations.word2vec import W2V from systemds.scuro.representations.tfidf import TfIdf -from systemds.scuro.representations.x3d import X3D -from systemds.scuro.representations.x3d import I3D -from systemds.scuro.representations.color_histogram import ColorHistogram +from systemds.scuro.representations.resnet import ResNet from systemds.scuro.modality.unimodal_modality import UnimodalModality from systemds.scuro.representations.mel_spectrogram import MelSpectrogram from systemds.scuro.representations.mfcc import MFCC -from systemds.scuro.representations.resnet import ResNet -from systemds.scuro.representations.swin_video_transformer import SwinVideoTransformer from tests.scuro.data_generator import ( TestDataLoader, ModalityRandomDataGenerator, @@ -72,7 +57,6 @@ ZeroCrossingRate, BandpowerFFT, ) -from systemds.scuro.representations.vgg import VGG19 class TestUnimodalRepresentations(unittest.TestCase): @@ -103,12 +87,11 @@ def _create_audio_modality(self, signal_length=1000): return audio def test_audio_representation_transform_output_shapes(self): - audio = self._create_audio_modality() + audio = self._create_audio_modality(signal_length=200) audio_representations = [ (MFCC(), (2, 12)), (MelSpectrogram(), (2, 128)), (Spectrogram(), (2, 1025)), - (Wav2Vec(), (1, None)), (Spectral(), (2, 4)), (ZeroCrossing(), (2, None)), (RMSE(), (2, None)), @@ -138,14 +121,13 @@ def test_audio_representations(self): MFCC(), MelSpectrogram(), Spectrogram(), - Wav2Vec(), Spectral(), ZeroCrossing(), RMSE(), Pitch(), ] audio_data, audio_md = ModalityRandomDataGenerator().create_audio_data( - self.num_instances, 1000 + self.num_instances, 200 ) audio = UnimodalModality( @@ -181,7 +163,7 @@ def test_timeseries_representations(self): BandpowerFFT(), ] ts_data, ts_md = ModalityRandomDataGenerator().create_timeseries_data( - self.num_instances, 1000 + self.num_instances, 100 ) ts = UnimodalModality( @@ -201,10 +183,8 @@ def test_timeseries_representations(self): assert (ts.data[i] == original_data[i]).all() def test_image_representations(self): - image_representations = [ColorHistogram(), CLIPVisual(), ResNet()] - image_data, image_md = ModalityRandomDataGenerator().create_visual_modality( - self.num_instances, 1 + self.num_instances, 1, height=8, width=8 ) image = UnimodalModality( @@ -213,10 +193,9 @@ def test_image_representations(self): ) ) - for representation in image_representations: - r = image.apply_representation(representation) - assert r.data is not None - assert len(r.data) == self.num_instances + r = image.apply_representation(ColorHistogram()) + assert r.data is not None + assert len(r.data) == self.num_instances # def test_video_representations(self): # video_representations = [ @@ -241,47 +220,34 @@ def test_image_representations(self): # assert len(r.data) == self.num_instances def test_text_representations(self): - test_representations = [ - CLIPText(), - Bert(), - BoW(2, 2), - TfIdf(), - W2V(), - GloVe(), - ALBERT(), - ELECTRA(), - RoBERTa(), - DistillBERT(), - ] text_data, text_md = ModalityRandomDataGenerator().create_text_data( - self.num_instances, 100 + self.num_instances, 3 ) text = UnimodalModality( TestDataLoader( self.indices, None, ModalityType.TEXT, text_data, str, text_md ) ) - for representation in test_representations: + for representation in [BoW(2, 2), TfIdf()]: r = text.apply_representation(representation) assert r.data is not None assert len(r.data) == self.num_instances def test_chunked_video_representations(self): - video_representations = [ResNet()] video_data, video_md = ModalityRandomDataGenerator().create_visual_modality( - self.num_instances, 25 + self.num_instances, 30 ) video = UnimodalModality( TestDataLoader( self.indices, None, ModalityType.VIDEO, video_data, np.float32, video_md ) ) - for representation in video_representations: - r = video.apply_representation(representation) - assert r.data is not None - assert len(r.data) == self.num_instances - assert len(r.metadata) == self.num_instances + r = video.apply_representation(ResNet(model_name="ResNet18")) + assert r.data is not None + assert len(r.data) == self.num_instances + assert len(r.metadata) == self.num_instances +# TODO: add unit tests for the other representations if __name__ == "__main__": unittest.main() diff --git a/src/main/python/tests/scuro/test_window_operations.py b/src/main/python/tests/scuro/test_window_operations.py index 2eaf5985db1..a8c86374801 100644 --- a/src/main/python/tests/scuro/test_window_operations.py +++ b/src/main/python/tests/scuro/test_window_operations.py @@ -39,13 +39,13 @@ class TestWindowOperations(unittest.TestCase): @classmethod def setUpClass(cls): - cls.num_instances = 40 + cls.num_instances = 4 cls.data_generator = ModalityRandomDataGenerator() cls.aggregations = ["mean", "sum", "max", "min"] def test_static_window(self): num_windows = 5 - data, md = self.data_generator.create_visual_modality(self.num_instances, 50) + data, md = self.data_generator.create_visual_modality(self.num_instances, 10) modality = UnimodalModality( TestDataLoader( [i for i in range(0, self.num_instances)], @@ -63,7 +63,7 @@ def test_static_window(self): def test_dynamic_window(self): num_windows = 5 - data, md = self.data_generator.create_visual_modality(self.num_instances, 50) + data, md = self.data_generator.create_visual_modality(self.num_instances, 10) modality = UnimodalModality( TestDataLoader( [i for i in range(0, self.num_instances)], @@ -93,19 +93,21 @@ def test_window_operations_on_text_representations(self): self.run_window_aggregation_for_modality(ModalityType.TEXT, window_size) def run_window_aggregation_for_modality(self, modality_type, window_size): - r = self.data_generator.create1DModality(40, 5000, modality_type) + r = self.data_generator.create1DModality(self.num_instances, 200, modality_type) for aggregation in self.aggregations: windowed_modality = r.window_aggregation(window_size, aggregation) self.verify_window_operation(aggregation, r, windowed_modality, window_size) def test_window_aggregation_on_3d_modality(self): - data, _ = self.data_generator.create_3d_modality(40, (100, 28, 28)) + data, _ = self.data_generator.create_3d_modality( + self.num_instances, (100, 8, 8) + ) embedding_modality = TransformedModality( self.data_generator, "test_transformation" ) embedding_modality.data = data - embedding_modality.stats = RepresentationStats(40, (100, 28, 28)) + embedding_modality.stats = RepresentationStats(self.num_instances, (100, 8, 8)) num_windows = 10 for window_operator in [ @@ -115,17 +117,17 @@ def test_window_aggregation_on_3d_modality(self): ]: stats = window_operator.get_output_stats(embedding_modality.stats) assert stats.num_instances == self.num_instances - assert stats.output_shape == (num_windows, 28, 28) + assert stats.output_shape == (num_windows, 8, 8) windowed_modality = embedding_modality.context(window_operator) def test_window_aggregation_on_2d_modality(self): - data, _ = self.data_generator.create_2d_modality(40, (100, 28)) + data, _ = self.data_generator.create_2d_modality(self.num_instances, (100, 8)) embedding_modality = TransformedModality( self.data_generator, "test_transformation" ) embedding_modality.data = data - embedding_modality.stats = RepresentationStats(40, (100, 28)) + embedding_modality.stats = RepresentationStats(self.num_instances, (100, 8)) num_windows = 10 for window_operator in [ @@ -135,7 +137,7 @@ def test_window_aggregation_on_2d_modality(self): ]: stats = window_operator.get_output_stats(embedding_modality.stats) assert stats.num_instances == self.num_instances - assert stats.output_shape == (num_windows, 28) + assert stats.output_shape == (num_windows, 8) windowed_modality = embedding_modality.context(window_operator)