Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions src/main/python/tests/scuro/test_hp_tuner.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

import numpy as np

from systemds.scuro import Mean
from systemds.scuro.drsearch.multimodal_optimizer import MultimodalOptimizer
from systemds.scuro.representations.average import Average
from systemds.scuro.representations.color_histogram import ColorHistogram
Expand Down Expand Up @@ -128,15 +129,17 @@ def run_hp_for_modality(
{
ModalityType.TEXT: [BoW, W2V],
ModalityType.AUDIO: [Spectrogram, ZeroCrossing, Spectral, Pitch],
ModalityType.TIMESERIES: [ResNet],
ModalityType.TIMESERIES: [Mean],
ModalityType.VIDEO: [ResNet],
ModalityType.IMAGE: [ResNet, ColorHistogram],
ModalityType.EMBEDDING: [],
},
):
registry = Registry()
registry._fusion_operators = [LSTM]
unimodal_optimizer = UnimodalOptimizer(modalities, self.tasks, False)
unimodal_optimizer = UnimodalOptimizer(
modalities, self.tasks, False, k=2, max_num_workers=1
)
unimodal_optimizer.optimize()

hp = HyperparameterTuner(
Expand Down Expand Up @@ -165,7 +168,7 @@ def run_hp_for_modality(
)

else:
hp.tune_unimodal_representations(max_eval_per_rep=10)
hp.tune_unimodal_representations(max_eval_per_rep=2)

assert len(hp.optimization_results.results) == len(self.tasks)
if multimodal:
Expand Down
8 changes: 3 additions & 5 deletions src/main/python/tests/scuro/test_multimodal_join.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def setUpClass(cls):
cls.num_instances = 4
cls.indices = np.array(range(cls.num_instances))
cls.audio_data, cls.audio_md = ModalityRandomDataGenerator().create_audio_data(
cls.num_instances, 32000
cls.num_instances, 500
)

cls.video_data, cls.video_md = (
Expand Down Expand Up @@ -104,7 +104,7 @@ def _prepare_data(self, l_chunk_size=None, r_chunk_size=None):
l_chunk_size,
ModalityType.VIDEO,
copy.deepcopy(self.video_data),
np.float32,
np.uint8,
copy.deepcopy(self.video_md),
)
)
Expand All @@ -118,9 +118,7 @@ def _join(self, left_modality, right_modality, window_size):
left_modality.join(
right_modality, JoinCondition("timestamp", "timestamp", "<")
)
.apply_representation(
ResNet(layer_name="layer1.0.conv2", model_name="ResNet18")
)
.apply_representation(ResNet())
.window_aggregation(window_size, "mean")
.combine("concat")
)
Expand Down
36 changes: 14 additions & 22 deletions src/main/python/tests/scuro/test_unimodal_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,17 @@
import unittest

import numpy as np
from systemds.scuro.representations.clip import CLIPText, CLIPVisual
from systemds.scuro.representations.color_histogram import ColorHistogram
from systemds.scuro.drsearch.operator_registry import Registry
from systemds.scuro.drsearch.unimodal_optimizer import UnimodalOptimizer
from systemds.scuro.representations.mfcc import MFCC
from systemds.scuro.representations.covarep_audio_features import ZeroCrossing

from systemds.scuro.representations.resnet import ResNet
from systemds.scuro.representations.mel_spectrogram import MelSpectrogram
from systemds.scuro.representations.word2vec import W2V
from systemds.scuro.representations.tfidf import TfIdf
from systemds.scuro.representations.bow import BoW
from systemds.scuro.representations.bert import Bert
from systemds.scuro.modality.unimodal_modality import UnimodalModality
from systemds.scuro.representations.resnet import ResNet
from tests.scuro.data_generator import (
ModalityRandomDataGenerator,
TestDataLoader,
Expand All @@ -53,6 +53,15 @@

from unittest.mock import patch

LIGHTWEIGHT_REGISTRY = {
ModalityType.TEXT: [BoW, TfIdf],
ModalityType.AUDIO: [MelSpectrogram, ZeroCrossing],
ModalityType.VIDEO: [ResNet],
ModalityType.IMAGE: [ColorHistogram],
ModalityType.TIMESERIES: [],
ModalityType.EMBEDDING: [],
}


class TestUnimodalRepresentationOptimizer(unittest.TestCase):
data_generator = None
Expand Down Expand Up @@ -198,24 +207,7 @@ def optimize_unimodal_representation_for_modality(self, modalities):
with patch.object(
Registry,
"_representations",
{
ModalityType.TEXT: [
W2V,
BoW,
Bert,
CLIPText,
],
ModalityType.AUDIO: [
MFCC,
MelSpectrogram,
],
ModalityType.VIDEO: [
ResNet,
CLIPVisual,
],
ModalityType.IMAGE: [ColorHistogram, CLIPVisual],
ModalityType.EMBEDDING: [],
},
LIGHTWEIGHT_REGISTRY,
):
registry = Registry()

Expand Down
70 changes: 18 additions & 52 deletions src/main/python/tests/scuro/test_unimodal_representations.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,39 +19,24 @@
#
# -------------------------------------------------------------

import time
import unittest
import copy
import numpy as np
from systemds.scuro.representations.bert import (
Bert,
ALBERT,
ELECTRA,
RoBERTa,
DistillBERT,
)
from systemds.scuro.representations.clip import CLIPVisual, CLIPText

from systemds.scuro.representations.bow import BoW
from systemds.scuro.representations.covarep_audio_features import (
Spectral,
RMSE,
Pitch,
ZeroCrossing,
)
from systemds.scuro.representations.glove import GloVe
from systemds.scuro.representations.wav2vec import Wav2Vec
from systemds.scuro.representations.color_histogram import ColorHistogram
from systemds.scuro.representations.spectrogram import Spectrogram
from systemds.scuro.representations.window_aggregation import WindowAggregation
from systemds.scuro.representations.word2vec import W2V
from systemds.scuro.representations.tfidf import TfIdf
from systemds.scuro.representations.x3d import X3D
from systemds.scuro.representations.x3d import I3D
from systemds.scuro.representations.color_histogram import ColorHistogram
from systemds.scuro.representations.resnet import ResNet
from systemds.scuro.modality.unimodal_modality import UnimodalModality
from systemds.scuro.representations.mel_spectrogram import MelSpectrogram
from systemds.scuro.representations.mfcc import MFCC
from systemds.scuro.representations.resnet import ResNet
from systemds.scuro.representations.swin_video_transformer import SwinVideoTransformer
from tests.scuro.data_generator import (
TestDataLoader,
ModalityRandomDataGenerator,
Expand All @@ -72,7 +57,6 @@
ZeroCrossingRate,
BandpowerFFT,
)
from systemds.scuro.representations.vgg import VGG19


class TestUnimodalRepresentations(unittest.TestCase):
Expand Down Expand Up @@ -103,12 +87,11 @@ def _create_audio_modality(self, signal_length=1000):
return audio

def test_audio_representation_transform_output_shapes(self):
audio = self._create_audio_modality()
audio = self._create_audio_modality(signal_length=200)
audio_representations = [
(MFCC(), (2, 12)),
(MelSpectrogram(), (2, 128)),
(Spectrogram(), (2, 1025)),
(Wav2Vec(), (1, None)),
(Spectral(), (2, 4)),
(ZeroCrossing(), (2, None)),
(RMSE(), (2, None)),
Expand Down Expand Up @@ -138,14 +121,13 @@ def test_audio_representations(self):
MFCC(),
MelSpectrogram(),
Spectrogram(),
Wav2Vec(),
Spectral(),
ZeroCrossing(),
RMSE(),
Pitch(),
]
audio_data, audio_md = ModalityRandomDataGenerator().create_audio_data(
self.num_instances, 1000
self.num_instances, 200
)

audio = UnimodalModality(
Expand Down Expand Up @@ -181,7 +163,7 @@ def test_timeseries_representations(self):
BandpowerFFT(),
]
ts_data, ts_md = ModalityRandomDataGenerator().create_timeseries_data(
self.num_instances, 1000
self.num_instances, 100
)

ts = UnimodalModality(
Expand All @@ -201,10 +183,8 @@ def test_timeseries_representations(self):
assert (ts.data[i] == original_data[i]).all()

def test_image_representations(self):
image_representations = [ColorHistogram(), CLIPVisual(), ResNet()]

image_data, image_md = ModalityRandomDataGenerator().create_visual_modality(
self.num_instances, 1
self.num_instances, 1, height=8, width=8
)

image = UnimodalModality(
Expand All @@ -213,10 +193,9 @@ def test_image_representations(self):
)
)

for representation in image_representations:
r = image.apply_representation(representation)
assert r.data is not None
assert len(r.data) == self.num_instances
r = image.apply_representation(ColorHistogram())
assert r.data is not None
assert len(r.data) == self.num_instances

# def test_video_representations(self):
# video_representations = [
Expand All @@ -241,47 +220,34 @@ def test_image_representations(self):
# assert len(r.data) == self.num_instances

def test_text_representations(self):
test_representations = [
CLIPText(),
Bert(),
BoW(2, 2),
TfIdf(),
W2V(),
GloVe(),
ALBERT(),
ELECTRA(),
RoBERTa(),
DistillBERT(),
]
text_data, text_md = ModalityRandomDataGenerator().create_text_data(
self.num_instances, 100
self.num_instances, 3
)
text = UnimodalModality(
TestDataLoader(
self.indices, None, ModalityType.TEXT, text_data, str, text_md
)
)
for representation in test_representations:
for representation in [BoW(2, 2), TfIdf()]:
r = text.apply_representation(representation)
assert r.data is not None
assert len(r.data) == self.num_instances

def test_chunked_video_representations(self):
video_representations = [ResNet()]
video_data, video_md = ModalityRandomDataGenerator().create_visual_modality(
self.num_instances, 25
self.num_instances, 30
)
video = UnimodalModality(
TestDataLoader(
self.indices, None, ModalityType.VIDEO, video_data, np.float32, video_md
)
)
for representation in video_representations:
r = video.apply_representation(representation)
assert r.data is not None
assert len(r.data) == self.num_instances
assert len(r.metadata) == self.num_instances
r = video.apply_representation(ResNet(model_name="ResNet18"))
assert r.data is not None
assert len(r.data) == self.num_instances
assert len(r.metadata) == self.num_instances


# TODO: add unit tests for the other representations
if __name__ == "__main__":
unittest.main()
22 changes: 12 additions & 10 deletions src/main/python/tests/scuro/test_window_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,13 @@
class TestWindowOperations(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.num_instances = 40
cls.num_instances = 4
cls.data_generator = ModalityRandomDataGenerator()
cls.aggregations = ["mean", "sum", "max", "min"]

def test_static_window(self):
num_windows = 5
data, md = self.data_generator.create_visual_modality(self.num_instances, 50)
data, md = self.data_generator.create_visual_modality(self.num_instances, 10)
modality = UnimodalModality(
TestDataLoader(
[i for i in range(0, self.num_instances)],
Expand All @@ -63,7 +63,7 @@ def test_static_window(self):

def test_dynamic_window(self):
num_windows = 5
data, md = self.data_generator.create_visual_modality(self.num_instances, 50)
data, md = self.data_generator.create_visual_modality(self.num_instances, 10)
modality = UnimodalModality(
TestDataLoader(
[i for i in range(0, self.num_instances)],
Expand Down Expand Up @@ -93,19 +93,21 @@ def test_window_operations_on_text_representations(self):
self.run_window_aggregation_for_modality(ModalityType.TEXT, window_size)

def run_window_aggregation_for_modality(self, modality_type, window_size):
r = self.data_generator.create1DModality(40, 5000, modality_type)
r = self.data_generator.create1DModality(self.num_instances, 200, modality_type)
for aggregation in self.aggregations:
windowed_modality = r.window_aggregation(window_size, aggregation)

self.verify_window_operation(aggregation, r, windowed_modality, window_size)

def test_window_aggregation_on_3d_modality(self):
data, _ = self.data_generator.create_3d_modality(40, (100, 28, 28))
data, _ = self.data_generator.create_3d_modality(
self.num_instances, (100, 8, 8)
)
embedding_modality = TransformedModality(
self.data_generator, "test_transformation"
)
embedding_modality.data = data
embedding_modality.stats = RepresentationStats(40, (100, 28, 28))
embedding_modality.stats = RepresentationStats(self.num_instances, (100, 8, 8))
num_windows = 10

for window_operator in [
Expand All @@ -115,17 +117,17 @@ def test_window_aggregation_on_3d_modality(self):
]:
stats = window_operator.get_output_stats(embedding_modality.stats)
assert stats.num_instances == self.num_instances
assert stats.output_shape == (num_windows, 28, 28)
assert stats.output_shape == (num_windows, 8, 8)

windowed_modality = embedding_modality.context(window_operator)

def test_window_aggregation_on_2d_modality(self):
data, _ = self.data_generator.create_2d_modality(40, (100, 28))
data, _ = self.data_generator.create_2d_modality(self.num_instances, (100, 8))
embedding_modality = TransformedModality(
self.data_generator, "test_transformation"
)
embedding_modality.data = data
embedding_modality.stats = RepresentationStats(40, (100, 28))
embedding_modality.stats = RepresentationStats(self.num_instances, (100, 8))
num_windows = 10

for window_operator in [
Expand All @@ -135,7 +137,7 @@ def test_window_aggregation_on_2d_modality(self):
]:
stats = window_operator.get_output_stats(embedding_modality.stats)
assert stats.num_instances == self.num_instances
assert stats.output_shape == (num_windows, 28)
assert stats.output_shape == (num_windows, 8)

windowed_modality = embedding_modality.context(window_operator)

Expand Down
Loading