fix(validation): handle bool and complex tensor perturbation properly

johnmarktaylor91 · claude · johnmarktaylor91 · commit dfd2d7be8dce · 2026-02-27T19:10:25.000-05:00
- Generate proper complex perturbations using torch.complex() instead of
  casting away imaginary part
- Fix bool tensor crash by reordering .float().abs() (bool doesn't
  support abs, but float conversion handles it)
- Add ContextUnet diffusion model to example_models.py for self-contained
  stable_diffusion test
- Update test_stable_diffusion to use example_models.ContextUnet

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/tests/example_models.py b/tests/example_models.py
@@ -1483,3 +1483,141 @@ def forward(x):
             x = torch.sin(x)
             x = torch.cos(x)
         return x
+
+
+# =============================================================================
+# Conditional Diffusion UNet
+# =============================================================================
+# Adapted from TeaPearce/Conditional_Diffusion_MNIST:
+# https://github.com/TeaPearce/Conditional_Diffusion_MNIST
+
+
+class _ResidualConvBlock(nn.Module):
+    def __init__(self, in_channels, out_channels, is_res=False):
+        super().__init__()
+        self.same_channels = in_channels == out_channels
+        self.is_res = is_res
+        self.conv1 = nn.Sequential(
+            nn.Conv2d(in_channels, out_channels, 3, 1, 1),
+            nn.BatchNorm2d(out_channels),
+            nn.GELU(),
+        )
+        self.conv2 = nn.Sequential(
+            nn.Conv2d(out_channels, out_channels, 3, 1, 1),
+            nn.BatchNorm2d(out_channels),
+            nn.GELU(),
+        )
+
+    def forward(self, x):
+        if self.is_res:
+            x1 = self.conv1(x)
+            x2 = self.conv2(x1)
+            if self.same_channels:
+                out = x + x2
+            else:
+                out = x1 + x2
+            return out / 1.414
+        else:
+            x1 = self.conv1(x)
+            x2 = self.conv2(x1)
+            return x2
+
+
+class _UnetDown(nn.Module):
+    def __init__(self, in_channels, out_channels):
+        super().__init__()
+        self.model = nn.Sequential(_ResidualConvBlock(in_channels, out_channels), nn.MaxPool2d(2))
+
+    def forward(self, x):
+        return self.model(x)
+
+
+class _UnetUp(nn.Module):
+    def __init__(self, in_channels, out_channels):
+        super().__init__()
+        self.model = nn.Sequential(
+            nn.ConvTranspose2d(in_channels, out_channels, 2, 2),
+            _ResidualConvBlock(out_channels, out_channels),
+            _ResidualConvBlock(out_channels, out_channels),
+        )
+
+    def forward(self, x, skip):
+        x = torch.cat((x, skip), 1)
+        x = self.model(x)
+        return x
+
+
+class _EmbedFC(nn.Module):
+    def __init__(self, input_dim, emb_dim):
+        super().__init__()
+        self.input_dim = input_dim
+        self.model = nn.Sequential(
+            nn.Linear(input_dim, emb_dim),
+            nn.GELU(),
+            nn.Linear(emb_dim, emb_dim),
+        )
+
+    def forward(self, x):
+        x = x.view(-1, self.input_dim)
+        return self.model(x)
+
+
+class ContextUnet(nn.Module):
+    """Conditional UNet for diffusion models."""
+
+    def __init__(self, in_channels, n_feat=256, n_classes=10):
+        super().__init__()
+        self.in_channels = in_channels
+        self.n_feat = n_feat
+        self.n_classes = n_classes
+
+        self.init_conv = _ResidualConvBlock(in_channels, n_feat, is_res=True)
+
+        self.down1 = _UnetDown(n_feat, n_feat)
+        self.down2 = _UnetDown(n_feat, 2 * n_feat)
+
+        self.to_vec = nn.Sequential(nn.AvgPool2d(7), nn.GELU())
+
+        self.timeembed1 = _EmbedFC(1, 2 * n_feat)
+        self.timeembed2 = _EmbedFC(1, 1 * n_feat)
+        self.contextembed1 = _EmbedFC(n_classes, 2 * n_feat)
+        self.contextembed2 = _EmbedFC(n_classes, 1 * n_feat)
+
+        self.up0 = nn.Sequential(
+            nn.ConvTranspose2d(2 * n_feat, 2 * n_feat, 7, 7),
+            nn.GroupNorm(8, 2 * n_feat),
+            nn.ReLU(),
+        )
+
+        self.up1 = _UnetUp(4 * n_feat, n_feat)
+        self.up2 = _UnetUp(2 * n_feat, n_feat)
+        self.out = nn.Sequential(
+            nn.Conv2d(2 * n_feat, n_feat, 3, 1, 1),
+            nn.GroupNorm(8, n_feat),
+            nn.ReLU(),
+            nn.Conv2d(n_feat, self.in_channels, 3, 1, 1),
+        )
+
+    def forward(self, x, c, t, context_mask):
+        x = self.init_conv(x)
+        down1 = self.down1(x)
+        down2 = self.down2(down1)
+        hiddenvec = self.to_vec(down2)
+
+        c = nn.functional.one_hot(c, num_classes=self.n_classes).type(torch.float)
+
+        context_mask = context_mask[:, None]
+        context_mask = context_mask.repeat(1, self.n_classes)
+        context_mask = -1 * (1 - context_mask)
+        c = c * context_mask
+
+        cemb1 = self.contextembed1(c).view(-1, self.n_feat * 2, 1, 1)
+        temb1 = self.timeembed1(t).view(-1, self.n_feat * 2, 1, 1)
+        cemb2 = self.contextembed2(c).view(-1, self.n_feat, 1, 1)
+        temb2 = self.timeembed2(t).view(-1, self.n_feat, 1, 1)
+
+        up1 = self.up0(hiddenvec)
+        up2 = self.up1(cemb1 * up1 + temb1, down2)
+        up3 = self.up2(cemb2 * up2 + temb2, down1)
+        out = self.out(torch.cat((up3, x), 1))
+        return out
diff --git a/tests/test_real_world_models.py b/tests/test_real_world_models.py
@@ -331,14 +331,9 @@ def test_gpt2():
 
 
 def test_stable_diffusion():
-    try:
-        import UNet
-    except ModuleNotFoundError:
-        pytest.skip("UNet not available")
-
-    model = UNet(3, 16, 10)
+    model = example_models.ContextUnet(3, 16, 10)
     model_inputs = (
-        torch.rand(6, 3, 224, 224),
+        torch.rand(1, 3, 28, 28),
         torch.tensor([1]),
         torch.tensor([1.0]),
         torch.tensor([3.0]),
diff --git a/torchlens/validation.py b/torchlens/validation.py
@@ -671,10 +671,17 @@ def _perturb_layer_activations(
         mean_output += torch.rand(mean_output.shape, device=mean_output.device) * 100
         mean_output *= torch.rand(mean_output.shape, device=mean_output.device)
         mean_output.requires_grad = False
-        perturbed_activations = torch.randn_like(
-            parent_activations.float(), device=device
-        ) * mean_output.to(device)
-        perturbed_activations = perturbed_activations.type(parent_activations.dtype)
+        scale = mean_output.to(device)
+        if parent_activations.is_complex():
+            perturbed_activations = torch.complex(
+                torch.randn(parent_activations.shape, device=device) * scale,
+                torch.randn(parent_activations.shape, device=device) * scale,
+            ).type(parent_activations.dtype)
+        else:
+            perturbed_activations = (
+                torch.randn_like(parent_activations.float(), device=device) * scale
+            )
+            perturbed_activations = perturbed_activations.type(parent_activations.dtype)
 
     return perturbed_activations