DELTA-TJ-submission
diff --git a/‎README.md‎
Lines changed: 83 additions & 1 deletion b/‎README.md‎
Lines changed: 83 additions & 1 deletion
diff --git a/‎configs/BCIC_IV_2a_config.json‎
Lines changed: 16 additions & 0 deletions b/‎configs/BCIC_IV_2a_config.json‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎configs/FACED_config.json‎
Lines changed: 16 additions & 0 deletions b/‎configs/FACED_config.json‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎configs/Stress_config.json‎
Lines changed: 16 additions & 0 deletions b/‎configs/Stress_config.json‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎configs/TUAB_config.json‎
Lines changed: 16 additions & 0 deletions b/‎configs/TUAB_config.json‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎configs/pretrain_config.json‎
Lines changed: 34 additions & 0 deletions b/‎configs/pretrain_config.json‎
Lines changed: 34 additions & 0 deletions
diff --git a/‎demo_data/README.md‎
Lines changed: 15 additions & 0 deletions b/‎demo_data/README.md‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎demo_data/downstream_eeg_data.npz‎
10.3 KB b/‎demo_data/downstream_eeg_data.npz‎
10.3 KB
diff --git a/‎demo_data/eeg_data.npy‎
391 KB b/‎demo_data/eeg_data.npy‎
391 KB
diff --git a/‎photos/Main_fig1.png‎
2.46 MB b/‎photos/Main_fig1.png‎
2.46 MB
@@ -1 +1,83 @@
-# EEGFoundation
+# EEGFoundation: Tokenized Spatiotemporal Foundation Model for EEG Signals
+
+## Overview
+
+EEGFoundation is a novel foundation model that treats neural dynamics as a discrete semantic language, overcoming the limitations of vision-based EEG analysis paradigms. By implementing amplitude-aware tokenization and channel-independent pretraining on a 27,000+ hour EEG corpus, the model learns universal neural oscillation patterns that generalize across diverse EEG analysis tasks.
+
+
+
+![Main_fig1](./photos/Main_fig1.png)
+
+**Fig.1 The EEGFoundation framework for spatiotemporal sequence modeling**
+
+## Model Architecture
+
+EEGFoundation follows a three-stage hierarchical approach:
+
+1. **Amplitude-Aware Tokenization**: Continuous EEG signals are normalized and quantized into discrete symbolic tokens that preserve micro-voltage fluctuations while filtering high-frequency noise.
+
+2. **Temporal Pretraining**: Using a RoFormer encoder with Rotary Position Embeddings, the model learns universal temporal dynamics from channel-independent EEG streams.
+
+3. **Spatiotemporal Fusion**: Cross-channel attention dynamically aggregates local representations into a coherent global context for robust downstream task performance.
+
+## Quick Start
+
+### Environment Setup
+
+```bash
+# Clone the repository
+git clone https://github.com/yourusername/EEGFoundation_github.git
+cd EEGFoundation_github
+
+# Install dependencies (Python 3.10+ required)
+pip install torch>=2.0.0 transformers>=4.30.0 numpy>=1.24.0 scipy>=1.10.0
+pip install mne>=1.4.0 einops>=0.6.0 matplotlib>=3.7.0
+```
+
+### Basic Usage
+
+```python
+from src.models.downstream_EEGFoundation import load_downstream_model
+import torch
+import numpy as np
+
+# Load pre-trained model for motor imagery classification
+model = load_downstream_model(
+    model_path="models/BCIC-2a_model.pth",
+    config_path="configs/BCIC_IV_2a_config.json"
+)
+
+# Prepare input data (example)
+batch_size = 2
+num_channels = 20
+seq_length = 2000
+
+eeg_signal = torch.randn(batch_size, num_channels, seq_length).float()
+embedding = torch.randn(batch_size, 512).float()
+
+# Forward pass
+with torch.no_grad():
+    outputs = model(input_ids=eeg_signal, embedding_data=embedding)
+    predictions = torch.softmax(outputs['logits'], dim=-1)
+
+print(f"Predictions shape: {predictions.shape}")
+```
+
+### Demo Data
+
+```python
+import numpy as np
+
+# Load example data
+demo_data = np.load("demo_data/eeg_data.npy")
+print(f"Demo data shape: {demo_data.shape}")
+
+# The demo_data directory contains:
+# - eeg_data.npy: Sample EEG recordings
+# - downstream_eeg_data.npz: Processed data for downstream tasks
+# - make_data.py: Script to generate synthetic EEG data
+```
+
+## License
+
+This project is licensed under the Apache License 2.0. See the LICENSE file for details.
@@ -0,0 +1,16 @@
+{
+  "seq_len": 800,
+  "patch_size": 100,
+  "stride": 25,
+  "d_model": 512,
+  "num_classes": 4,
+  "num_channel": 22,
+  "rms_norm": false,
+  "embedding_dim": 512,
+  "projection_embedding_dim": 512,
+  "classification_dropout": 0.5,
+  "classification_hidden_dim": 512,
+  "learning_rate": 1e-4,
+  "weight_decay": 0.1,
+  "model_type": "eeg-downstream-classifier"
+}
@@ -0,0 +1,16 @@
+{
+  "seq_len": 2000,
+  "patch_size": 200,
+  "stride": 100,
+  "d_model": 512,
+  "num_classes": 9,
+  "num_channel": 32,
+  "rms_norm": false,
+  "embedding_dim": 512,
+  "projection_embedding_dim": 512,
+  "classification_dropout": 0.5,
+  "classification_hidden_dim": 512,
+  "learning_rate": 1e-4,
+  "weight_decay": 0.1,
+  "model_type": "eeg-downstream-classifier"
+}
@@ -0,0 +1,16 @@
+{
+  "seq_len": 1000,
+  "patch_size": 150,
+  "stride": 100,
+  "d_model": 512,
+  "num_classes": 2,
+  "num_channel": 20,
+  "rms_norm": false,
+  "embedding_dim": 512,
+  "projection_embedding_dim": 512,
+  "classification_dropout": 0.5,
+  "classification_hidden_dim": 512,
+  "learning_rate": 1e-4,
+  "weight_decay": 0.1,
+  "model_type": "eeg-downstream-classifier"
+}
@@ -0,0 +1,16 @@
+{
+  "seq_len": 4000,
+  "patch_size": 150,
+  "stride": 100,
+  "d_model": 512,
+  "num_classes": 2,
+  "num_channel": 20,
+  "rms_norm": false,
+  "embedding_dim": 512,
+  "projection_embedding_dim": 512,
+  "classification_dropout": 0.5,
+  "classification_hidden_dim": 512,
+  "learning_rate": 1e-4,
+  "weight_decay": 0.1,
+  "model_type": "eeg-downstream-classifier"
+}
@@ -0,0 +1,34 @@
+{
+    "activation": "silu",
+    "attention_probs_dropout_prob": 0.1,
+    "classifier_dropout": null,
+    "cls_token": "[CLS]",
+    "cls_token_id": 2003,
+    "hidden_act": "gelu",
+    "hidden_dropout_prob": 0.1,
+    "hidden_size": 768,
+    "initializer_range": 0.02,
+    "intermediate_size": 3072,
+    "layer_norm_eps": 1e-12,
+    "mask_token": "[MASK]",
+    "mask_token_id": 2001,
+    "max_position_embeddings": 2000,
+    "model_type": "bert",
+    "num_attention_heads": 12,
+    "num_hidden_layers": 12,
+    "pad_token": "[PAD]",
+    "pad_token_id": 2002,
+    "position_embedding_type": "rotary",
+    "rope_theta": 10000,
+    "rotary_dim": 64,
+    "sep_token": "[SEP]",
+    "sep_token_id": 2004,
+    "transformers_version": "4.48.0",
+    "type_vocab_size": 2,
+    "unk_token": "[UNK]",
+    "unk_token_id": 2005,
+    "use_cache": true,
+    "use_flash_attention": true,
+    "vocab_size": 2006
+  }
+  
@@ -0,0 +1,15 @@
+EEG Dataset Demo
+Overview
+This dataset demo contains synthetic EEG (electroencephalogram) data designed for testing and development purposes. It includes two main data files with different formats commonly used in EEG signal processing and machine learning applications.
+
+Data Structure
+1. eeg_data.npy
+Shape: (50, 2000)
+Description: Contains 50 samples of time-series data, each with 2000 time points
+
+
+2. dictionary_data.npz
+signal: Time-series data with shape (1, 2000)
+embedding: Embedding vector with shape (1, 512)
+
+