From ac1faf4f00dea29cb7ec0f02abfdddbfce47e795 Mon Sep 17 00:00:00 2001 From: timonmerk Date: Sun, 29 Oct 2023 15:34:48 +0100 Subject: [PATCH 1/8] add torch API usage example --- docs/source/usage.rst | 93 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) diff --git a/docs/source/usage.rst b/docs/source/usage.rst index a10414bd..1fc0b13f 100644 --- a/docs/source/usage.rst +++ b/docs/source/usage.rst @@ -1316,3 +1316,96 @@ Below is the documentation on the available arguments. --train-ratio 0.8 Ratio of train dataset. The remaining will be used for valid and test split. --valid-ratio 0.1 Ratio of validation set after the train data split. The remaining will be test split --share-model + +Model initialization using the Torch API +---------------------------------------- + +The scikit-learn API provides parametrization to many common use cases. +The Torch API however allows for more flexibility and customization, for e.g. +sampling, criterions, and data loaders. + +In this minimal example we show how to initialize a CEBRA model using the Torch API. +Here the :py:class:`cebra.data.single_session.DiscreteDataLoader` +gets initilized which also allows the `prior` to be directly parametrized. + +👉 For an example notebook using the Torch API check out the :doc:`demo_notebooks/Demo_Allen`. + + +.. testcode:: + + import numpy as np + import cebra.datasets + from cebra import plot_embedding + import torch + + if torch.cuda.is_available(): + device = "cuda" + else: + device = "cpu" + + neural_data = cebra.load_data(file="neural_data.npz", key="neural") + + discrete_label = cebra.load_data( + file="auxiliary_behavior_data.h5", key="auxiliary_variables", columns=["discrete"], + ) + + # 1. Define Cebra Dataset + InputData = cebra.data.TensorDataset( + torch.from_numpy(neural_data).type(torch.FloatTensor), + discrete=torch.from_numpy(np.array(discrete_label[:, 0])).type(torch.LongTensor), + ).to(device) + + # 2. Define Cebra Model + neural_model = cebra.models.init( + name="offset10-model", + num_neurons=InputData.input_dimension, + num_units=32, + num_output=2, + ).to(device) + + InputData.configure_for(neural_model) + + # 3. Define Loss Function Criterion and Optimizer + Crit = cebra.models.criterions.LearnableCosineInfoNCE( + temperature=0.001, + min_temperature=0.0001 + ).to(device) + + Opt = torch.optim.Adam( + list(neural_model.parameters()) + list(Crit.parameters()), + lr=0.001, + weight_decay=0, + ) + + # 4. Initialize Cebra Model + solver = cebra.solver.init( + name="single-session", + model=neural_model, + criterion=Crit, + optimizer=Opt, + tqdm_on=True, + ).to(device) + + # 5. Define Data Loader + loader = cebra.data.single_session.DiscreteDataLoader( + dataset=InputData, num_steps=10, batch_size=200, prior="uniform" + ) + + # 6. Fit Model + solver.fit(loader=loader) + + # 7. Transform Embedding + TrainBatches = np.lib.stride_tricks.sliding_window_view( + neural_data, neural_model.get_offset().__len__(), axis=0 + ) + + X_train_emb = solver.transform( + torch.from_numpy(TrainBatches[:]).type(torch.FloatTensor).to(device) + ).to(device) + + # 8. Plot Embedding + plot_embedding( + X_train_emb, + discrete_label[neural_model.get_offset().__len__() - 1 :, 0], + markersize=10, + ) From c1c423bc331e1e22a3e2518260780f44dfb0bd4f Mon Sep 17 00:00:00 2001 From: timonmerk Date: Sun, 29 Oct 2023 17:06:53 +0100 Subject: [PATCH 2/8] Update docs/source/usage.rst Co-authored-by: Steffen Schneider --- docs/source/usage.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/source/usage.rst b/docs/source/usage.rst index 1fc0b13f..6139be52 100644 --- a/docs/source/usage.rst +++ b/docs/source/usage.rst @@ -1335,7 +1335,6 @@ gets initilized which also allows the `prior` to be directly parametrized. import numpy as np import cebra.datasets - from cebra import plot_embedding import torch if torch.cuda.is_available(): From 191da667ed1b69a46aa20ae75dbf0ee1c0e35cd3 Mon Sep 17 00:00:00 2001 From: timonmerk Date: Sun, 29 Oct 2023 17:07:05 +0100 Subject: [PATCH 3/8] Update docs/source/usage.rst Co-authored-by: Steffen Schneider --- docs/source/usage.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/usage.rst b/docs/source/usage.rst index 6139be52..631c436b 100644 --- a/docs/source/usage.rst +++ b/docs/source/usage.rst @@ -1349,7 +1349,7 @@ gets initilized which also allows the `prior` to be directly parametrized. ) # 1. Define Cebra Dataset - InputData = cebra.data.TensorDataset( + input_data = cebra.data.TensorDataset( torch.from_numpy(neural_data).type(torch.FloatTensor), discrete=torch.from_numpy(np.array(discrete_label[:, 0])).type(torch.LongTensor), ).to(device) From 628311f39043f9a2fec25b135588089b1a71fc36 Mon Sep 17 00:00:00 2001 From: timonmerk Date: Sun, 29 Oct 2023 17:09:11 +0100 Subject: [PATCH 4/8] Update usage.rst - fix typo --- docs/source/usage.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/usage.rst b/docs/source/usage.rst index 631c436b..376e5621 100644 --- a/docs/source/usage.rst +++ b/docs/source/usage.rst @@ -1326,7 +1326,7 @@ sampling, criterions, and data loaders. In this minimal example we show how to initialize a CEBRA model using the Torch API. Here the :py:class:`cebra.data.single_session.DiscreteDataLoader` -gets initilized which also allows the `prior` to be directly parametrized. +gets initialized which also allows the `prior` to be directly parametrized. 👉 For an example notebook using the Torch API check out the :doc:`demo_notebooks/Demo_Allen`. @@ -1403,7 +1403,7 @@ gets initilized which also allows the `prior` to be directly parametrized. ).to(device) # 8. Plot Embedding - plot_embedding( + cebra.plot_embedding( X_train_emb, discrete_label[neural_model.get_offset().__len__() - 1 :, 0], markersize=10, From 74b0b7dc12eb0c8c5e98afd66c292a1c37dc7edc Mon Sep 17 00:00:00 2001 From: timonmerk Date: Sun, 29 Oct 2023 17:19:39 +0100 Subject: [PATCH 5/8] Update usage.rst - FIx other variables --- docs/source/usage.rst | 52 +++++++++++++++++++++---------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/docs/source/usage.rst b/docs/source/usage.rst index 376e5621..85c8a6ad 100644 --- a/docs/source/usage.rst +++ b/docs/source/usage.rst @@ -1336,75 +1336,75 @@ gets initialized which also allows the `prior` to be directly parametrized. import numpy as np import cebra.datasets import torch - + if torch.cuda.is_available(): device = "cuda" else: device = "cpu" - + neural_data = cebra.load_data(file="neural_data.npz", key="neural") - + discrete_label = cebra.load_data( file="auxiliary_behavior_data.h5", key="auxiliary_variables", columns=["discrete"], ) - + # 1. Define Cebra Dataset input_data = cebra.data.TensorDataset( torch.from_numpy(neural_data).type(torch.FloatTensor), discrete=torch.from_numpy(np.array(discrete_label[:, 0])).type(torch.LongTensor), ).to(device) - + # 2. Define Cebra Model neural_model = cebra.models.init( name="offset10-model", - num_neurons=InputData.input_dimension, + num_neurons=input_data.input_dimension, num_units=32, num_output=2, ).to(device) - - InputData.configure_for(neural_model) - + + input_data.configure_for(neural_model) + # 3. Define Loss Function Criterion and Optimizer - Crit = cebra.models.criterions.LearnableCosineInfoNCE( + crit = cebra.models.criterions.LearnableCosineInfoNCE( temperature=0.001, min_temperature=0.0001 ).to(device) - - Opt = torch.optim.Adam( - list(neural_model.parameters()) + list(Crit.parameters()), + + opt = torch.optim.Adam( + list(neural_model.parameters()) + list(crit.parameters()), lr=0.001, weight_decay=0, ) - + # 4. Initialize Cebra Model solver = cebra.solver.init( name="single-session", model=neural_model, - criterion=Crit, - optimizer=Opt, + criterion=crit, + optimizer=opt, tqdm_on=True, ).to(device) - + # 5. Define Data Loader loader = cebra.data.single_session.DiscreteDataLoader( - dataset=InputData, num_steps=10, batch_size=200, prior="uniform" + dataset=input_data, num_steps=10, batch_size=200, prior="uniform" ) - + # 6. Fit Model solver.fit(loader=loader) - + # 7. Transform Embedding - TrainBatches = np.lib.stride_tricks.sliding_window_view( + train_batches = np.lib.stride_tricks.sliding_window_view( neural_data, neural_model.get_offset().__len__(), axis=0 ) - - X_train_emb = solver.transform( - torch.from_numpy(TrainBatches[:]).type(torch.FloatTensor).to(device) + + x_train_emb = solver.transform( + torch.from_numpy(train_batches[:]).type(torch.FloatTensor).to(device) ).to(device) - + # 8. Plot Embedding cebra.plot_embedding( - X_train_emb, + x_train_emb, discrete_label[neural_model.get_offset().__len__() - 1 :, 0], markersize=10, ) From a2da7fa7b1b29eeb4f4f84121290852090eee118 Mon Sep 17 00:00:00 2001 From: Steffen Schneider Date: Sun, 29 Oct 2023 19:07:34 +0100 Subject: [PATCH 6/8] Minor edit --- docs/source/usage.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/usage.rst b/docs/source/usage.rst index 56198511..4f9fea67 100644 --- a/docs/source/usage.rst +++ b/docs/source/usage.rst @@ -1318,8 +1318,8 @@ Below is the documentation on the available arguments. --valid-ratio 0.1 Ratio of validation set after the train data split. The remaining will be test split --share-model -Model initialization using the Torch API ----------------------------------------- +Model training using the Torch API +---------------------------------- The scikit-learn API provides parametrization to many common use cases. The Torch API however allows for more flexibility and customization, for e.g. From 481d08a58cab5d89f899082c75c95976213e7185 Mon Sep 17 00:00:00 2001 From: Mackenzie Mathis Date: Wed, 15 Nov 2023 13:38:31 +0100 Subject: [PATCH 7/8] Update usage.rst - minor typesetting --- docs/source/usage.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/source/usage.rst b/docs/source/usage.rst index 4f9fea67..948b80f0 100644 --- a/docs/source/usage.rst +++ b/docs/source/usage.rst @@ -1349,13 +1349,13 @@ gets initialized which also allows the `prior` to be directly parametrized. file="auxiliary_behavior_data.h5", key="auxiliary_variables", columns=["discrete"], ) - # 1. Define Cebra Dataset + # 1. Define a CEBRA-ready dataset input_data = cebra.data.TensorDataset( torch.from_numpy(neural_data).type(torch.FloatTensor), discrete=torch.from_numpy(np.array(discrete_label[:, 0])).type(torch.LongTensor), ).to(device) - # 2. Define Cebra Model + # 2. Define a CEBRA model neural_model = cebra.models.init( name="offset10-model", num_neurons=input_data.input_dimension, @@ -1365,7 +1365,7 @@ gets initialized which also allows the `prior` to be directly parametrized. input_data.configure_for(neural_model) - # 3. Define Loss Function Criterion and Optimizer + # 3. Define the Loss Function Criterion and Optimizer crit = cebra.models.criterions.LearnableCosineInfoNCE( temperature=0.001, min_temperature=0.0001 @@ -1377,7 +1377,7 @@ gets initialized which also allows the `prior` to be directly parametrized. weight_decay=0, ) - # 4. Initialize Cebra Model + # 4. Initialize the CEBRA model solver = cebra.solver.init( name="single-session", model=neural_model, From 3a13a9ea9cf6dc2c1012975b00b16084438b0151 Mon Sep 17 00:00:00 2001 From: Mackenzie Mathis Date: Thu, 23 Nov 2023 18:02:19 +0100 Subject: [PATCH 8/8] Update usage.rst --- docs/source/usage.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/source/usage.rst b/docs/source/usage.rst index 948b80f0..c802d2fe 100644 --- a/docs/source/usage.rst +++ b/docs/source/usage.rst @@ -1367,8 +1367,7 @@ gets initialized which also allows the `prior` to be directly parametrized. # 3. Define the Loss Function Criterion and Optimizer crit = cebra.models.criterions.LearnableCosineInfoNCE( - temperature=0.001, - min_temperature=0.0001 + temperature=1, ).to(device) opt = torch.optim.Adam(