From de5eada14814319c92e20cc6d5db5bc04246d49b Mon Sep 17 00:00:00 2001
From: Ed Savage <ed.savage@elastic.co>
Date: Mon, 30 Mar 2026 10:04:27 +1300
Subject: [PATCH 1/2] [ML] Add EuroBERT/Jina v5 ops to graph validation
 allowlist
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Jina Embeddings v5 is based on EuroBERT, which uses a different
architecture from the BERT family:
- RoPE (rotary position embeddings) → aten::sin, aten::cos
- RMSNorm (instead of LayerNorm) → aten::rsqrt
- SiLU activation (instead of GELU) → aten::silu

Required for Eland PR elastic/eland#818 which adds support for
importing Jina v5 models into Elasticsearch.

Made-with: Cursor
---
 bin/pytorch_inference/CSupportedOperations.cc | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/bin/pytorch_inference/CSupportedOperations.cc b/bin/pytorch_inference/CSupportedOperations.cc
index 56dbbaa84..61229a5e0 100644
--- a/bin/pytorch_inference/CSupportedOperations.cc
+++ b/bin/pytorch_inference/CSupportedOperations.cc
@@ -41,7 +41,8 @@ const CSupportedOperations::TStringViewSet CSupportedOperations::FORBIDDEN_OPERA
 // deepset/tinyroberta-squad2, typeform/squeezebert-mnli,
 // facebook/bart-large-mnli, valhalla/distilbart-mnli-12-6,
 // distilbert-base-uncased-finetuned-sst-2-english,
-// sentence-transformers/all-distilroberta-v1.
+// sentence-transformers/all-distilroberta-v1,
+// jinaai/jina-embeddings-v5-text-nano (EuroBERT + LoRA).
 // Eland-deployed variants of the above models (with pooling/normalization layers).
 // Additional ops from Elasticsearch integration test models
 // (PyTorchModelIT, TextExpansionQueryIT, TextEmbeddingQueryIT).
@@ -68,6 +69,7 @@ const CSupportedOperations::TStringViewSet CSupportedOperations::ALLOWED_OPERATI
     "aten::clone"sv,
     "aten::contiguous"sv,
     "aten::copy_"sv,
+    "aten::cos"sv,
     "aten::cumsum"sv,
     "aten::detach"sv,
     "aten::div"sv,
@@ -117,10 +119,13 @@ const CSupportedOperations::TStringViewSet CSupportedOperations::ALLOWED_OPERATI
     "aten::relu"sv,
     "aten::repeat"sv,
     "aten::reshape"sv,
+    "aten::rsqrt"sv,
     "aten::rsub"sv,
     "aten::scaled_dot_product_attention"sv,
     "aten::select"sv,
     "aten::sign"sv,
+    "aten::silu"sv,
+    "aten::sin"sv,
     "aten::size"sv,
     "aten::slice"sv,
     "aten::softmax"sv,

From 768d2f0156f7fe862d303c201a9250e436046675 Mon Sep 17 00:00:00 2001
From: Ed Savage <ed.savage@elastic.co>
Date: Mon, 30 Mar 2026 12:25:28 +1300
Subject: [PATCH 2/2] [ML] Fix graph validator tests for sin/cos now in
 allowlist

aten::sin and aten::cos are now in the allowlist (needed by
EuroBERT/Jina v5 for rotary position embeddings), so tests that
used them as example "unrecognised" ops now fail.

- Replace torch.sin with torch.logit in synthetic test modules
- Update malicious model tests to check for ops that remain
  unrecognised (aten::tan, aten::exp) rather than sin/cos

Made-with: Cursor
---
 .../unittest/CModelGraphValidatorTest.cc      | 38 ++++++++++---------
 1 file changed, 21 insertions(+), 17 deletions(-)

diff --git a/bin/pytorch_inference/unittest/CModelGraphValidatorTest.cc b/bin/pytorch_inference/unittest/CModelGraphValidatorTest.cc
index 5180fb403..e292b78b9 100644
--- a/bin/pytorch_inference/unittest/CModelGraphValidatorTest.cc
+++ b/bin/pytorch_inference/unittest/CModelGraphValidatorTest.cc
@@ -259,11 +259,11 @@ BOOST_AUTO_TEST_CASE(testValidModuleWithAllowedOps) {
 }
 
 BOOST_AUTO_TEST_CASE(testModuleWithUnrecognisedOps) {
-    // torch.sin is not in the transformer allowlist.
+    // torch.logit is not in the transformer allowlist.
     ::torch::jit::Module m("__torch__.UnknownOps");
     m.define(R"(
         def forward(self, x: Tensor) -> Tensor:
-            return torch.sin(x)
+            return torch.logit(x)
     )");
 
     auto result = CModelGraphValidator::validate(m);
@@ -271,13 +271,13 @@ BOOST_AUTO_TEST_CASE(testModuleWithUnrecognisedOps) {
     BOOST_REQUIRE(result.s_IsValid == false);
     BOOST_REQUIRE(result.s_ForbiddenOps.empty());
     BOOST_REQUIRE(result.s_UnrecognisedOps.empty() == false);
-    bool foundSin = false;
+    bool foundLogit = false;
     for (const auto& op : result.s_UnrecognisedOps) {
-        if (op == "aten::sin") {
-            foundSin = true;
+        if (op == "aten::logit") {
+            foundLogit = true;
         }
     }
-    BOOST_REQUIRE(foundSin);
+    BOOST_REQUIRE(foundLogit);
 }
 
 BOOST_AUTO_TEST_CASE(testModuleNodeCountPopulated) {
@@ -301,7 +301,7 @@ BOOST_AUTO_TEST_CASE(testModuleWithSubmoduleInlines) {
     ::torch::jit::Module child("__torch__.Child");
     child.define(R"(
         def forward(self, x: Tensor) -> Tensor:
-            return torch.sin(x)
+            return torch.logit(x)
     )");
 
     ::torch::jit::Module parent("__torch__.Parent");
@@ -314,13 +314,13 @@ BOOST_AUTO_TEST_CASE(testModuleWithSubmoduleInlines) {
     auto result = CModelGraphValidator::validate(parent);
 
     BOOST_REQUIRE(result.s_IsValid == false);
-    bool foundSin = false;
+    bool foundLogit = false;
     for (const auto& op : result.s_UnrecognisedOps) {
-        if (op == "aten::sin") {
-            foundSin = true;
+        if (op == "aten::logit") {
+            foundLogit = true;
         }
     }
-    BOOST_REQUIRE(foundSin);
+    BOOST_REQUIRE(foundLogit);
 }
 
 // --- Integration tests with malicious .pt model fixtures ---
@@ -363,34 +363,38 @@ BOOST_AUTO_TEST_CASE(testMaliciousMixedFileReader) {
 BOOST_AUTO_TEST_CASE(testMaliciousHiddenInSubmodule) {
     // Unrecognised ops buried three levels deep in nested submodules.
     // The validator must inline through all submodules to find them.
+    // The model uses aten::sin which is now allowed (EuroBERT/Jina v5),
+    // but also contains other ops that remain unrecognised.
     auto module = ::torch::jit::load("testfiles/malicious_models/malicious_hidden_in_submodule.pt");
     auto result = CModelGraphValidator::validate(module);
 
     BOOST_REQUIRE(result.s_IsValid == false);
     BOOST_REQUIRE(result.s_ForbiddenOps.empty());
-    BOOST_REQUIRE(hasUnrecognisedOp(result, "aten::sin"));
+    BOOST_REQUIRE(result.s_UnrecognisedOps.empty() == false);
 }
 
 BOOST_AUTO_TEST_CASE(testMaliciousConditionalBranch) {
     // An unrecognised op hidden inside a conditional branch. The
     // validator must recurse into prim::If blocks to detect it.
+    // The model uses aten::sin which is now allowed, but also contains
+    // other ops that remain unrecognised.
     auto module = ::torch::jit::load("testfiles/malicious_models/malicious_conditional.pt");
     auto result = CModelGraphValidator::validate(module);
 
     BOOST_REQUIRE(result.s_IsValid == false);
-    BOOST_REQUIRE(hasUnrecognisedOp(result, "aten::sin"));
+    BOOST_REQUIRE(result.s_UnrecognisedOps.empty() == false);
 }
 
 BOOST_AUTO_TEST_CASE(testMaliciousManyUnrecognisedOps) {
-    // A model using many different unrecognised ops (sin, cos, tan, exp).
+    // A model using many different ops (sin, cos, tan, exp).
+    // sin and cos are now allowed (EuroBERT/Jina v5), but tan and exp
+    // remain unrecognised.
     auto module = ::torch::jit::load("testfiles/malicious_models/malicious_many_unrecognised.pt");
     auto result = CModelGraphValidator::validate(module);
 
     BOOST_REQUIRE(result.s_IsValid == false);
     BOOST_REQUIRE(result.s_ForbiddenOps.empty());
-    BOOST_REQUIRE(result.s_UnrecognisedOps.size() >= 4);
-    BOOST_REQUIRE(hasUnrecognisedOp(result, "aten::sin"));
-    BOOST_REQUIRE(hasUnrecognisedOp(result, "aten::cos"));
+    BOOST_REQUIRE(result.s_UnrecognisedOps.size() >= 2);
     BOOST_REQUIRE(hasUnrecognisedOp(result, "aten::tan"));
     BOOST_REQUIRE(hasUnrecognisedOp(result, "aten::exp"));
 }