From de5eada14814319c92e20cc6d5db5bc04246d49b Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Mon, 30 Mar 2026 10:04:27 +1300 Subject: [PATCH 1/2] [ML] Add EuroBERT/Jina v5 ops to graph validation allowlist MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Jina Embeddings v5 is based on EuroBERT, which uses a different architecture from the BERT family: - RoPE (rotary position embeddings) → aten::sin, aten::cos - RMSNorm (instead of LayerNorm) → aten::rsqrt - SiLU activation (instead of GELU) → aten::silu Required for Eland PR elastic/eland#818 which adds support for importing Jina v5 models into Elasticsearch. Made-with: Cursor --- bin/pytorch_inference/CSupportedOperations.cc | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/bin/pytorch_inference/CSupportedOperations.cc b/bin/pytorch_inference/CSupportedOperations.cc index 56dbbaa84..61229a5e0 100644 --- a/bin/pytorch_inference/CSupportedOperations.cc +++ b/bin/pytorch_inference/CSupportedOperations.cc @@ -41,7 +41,8 @@ const CSupportedOperations::TStringViewSet CSupportedOperations::FORBIDDEN_OPERA // deepset/tinyroberta-squad2, typeform/squeezebert-mnli, // facebook/bart-large-mnli, valhalla/distilbart-mnli-12-6, // distilbert-base-uncased-finetuned-sst-2-english, -// sentence-transformers/all-distilroberta-v1. +// sentence-transformers/all-distilroberta-v1, +// jinaai/jina-embeddings-v5-text-nano (EuroBERT + LoRA). // Eland-deployed variants of the above models (with pooling/normalization layers). // Additional ops from Elasticsearch integration test models // (PyTorchModelIT, TextExpansionQueryIT, TextEmbeddingQueryIT). @@ -68,6 +69,7 @@ const CSupportedOperations::TStringViewSet CSupportedOperations::ALLOWED_OPERATI "aten::clone"sv, "aten::contiguous"sv, "aten::copy_"sv, + "aten::cos"sv, "aten::cumsum"sv, "aten::detach"sv, "aten::div"sv, @@ -117,10 +119,13 @@ const CSupportedOperations::TStringViewSet CSupportedOperations::ALLOWED_OPERATI "aten::relu"sv, "aten::repeat"sv, "aten::reshape"sv, + "aten::rsqrt"sv, "aten::rsub"sv, "aten::scaled_dot_product_attention"sv, "aten::select"sv, "aten::sign"sv, + "aten::silu"sv, + "aten::sin"sv, "aten::size"sv, "aten::slice"sv, "aten::softmax"sv, From 768d2f0156f7fe862d303c201a9250e436046675 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Mon, 30 Mar 2026 12:25:28 +1300 Subject: [PATCH 2/2] [ML] Fix graph validator tests for sin/cos now in allowlist aten::sin and aten::cos are now in the allowlist (needed by EuroBERT/Jina v5 for rotary position embeddings), so tests that used them as example "unrecognised" ops now fail. - Replace torch.sin with torch.logit in synthetic test modules - Update malicious model tests to check for ops that remain unrecognised (aten::tan, aten::exp) rather than sin/cos Made-with: Cursor --- .../unittest/CModelGraphValidatorTest.cc | 38 ++++++++++--------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/bin/pytorch_inference/unittest/CModelGraphValidatorTest.cc b/bin/pytorch_inference/unittest/CModelGraphValidatorTest.cc index 5180fb403..e292b78b9 100644 --- a/bin/pytorch_inference/unittest/CModelGraphValidatorTest.cc +++ b/bin/pytorch_inference/unittest/CModelGraphValidatorTest.cc @@ -259,11 +259,11 @@ BOOST_AUTO_TEST_CASE(testValidModuleWithAllowedOps) { } BOOST_AUTO_TEST_CASE(testModuleWithUnrecognisedOps) { - // torch.sin is not in the transformer allowlist. + // torch.logit is not in the transformer allowlist. ::torch::jit::Module m("__torch__.UnknownOps"); m.define(R"( def forward(self, x: Tensor) -> Tensor: - return torch.sin(x) + return torch.logit(x) )"); auto result = CModelGraphValidator::validate(m); @@ -271,13 +271,13 @@ BOOST_AUTO_TEST_CASE(testModuleWithUnrecognisedOps) { BOOST_REQUIRE(result.s_IsValid == false); BOOST_REQUIRE(result.s_ForbiddenOps.empty()); BOOST_REQUIRE(result.s_UnrecognisedOps.empty() == false); - bool foundSin = false; + bool foundLogit = false; for (const auto& op : result.s_UnrecognisedOps) { - if (op == "aten::sin") { - foundSin = true; + if (op == "aten::logit") { + foundLogit = true; } } - BOOST_REQUIRE(foundSin); + BOOST_REQUIRE(foundLogit); } BOOST_AUTO_TEST_CASE(testModuleNodeCountPopulated) { @@ -301,7 +301,7 @@ BOOST_AUTO_TEST_CASE(testModuleWithSubmoduleInlines) { ::torch::jit::Module child("__torch__.Child"); child.define(R"( def forward(self, x: Tensor) -> Tensor: - return torch.sin(x) + return torch.logit(x) )"); ::torch::jit::Module parent("__torch__.Parent"); @@ -314,13 +314,13 @@ BOOST_AUTO_TEST_CASE(testModuleWithSubmoduleInlines) { auto result = CModelGraphValidator::validate(parent); BOOST_REQUIRE(result.s_IsValid == false); - bool foundSin = false; + bool foundLogit = false; for (const auto& op : result.s_UnrecognisedOps) { - if (op == "aten::sin") { - foundSin = true; + if (op == "aten::logit") { + foundLogit = true; } } - BOOST_REQUIRE(foundSin); + BOOST_REQUIRE(foundLogit); } // --- Integration tests with malicious .pt model fixtures --- @@ -363,34 +363,38 @@ BOOST_AUTO_TEST_CASE(testMaliciousMixedFileReader) { BOOST_AUTO_TEST_CASE(testMaliciousHiddenInSubmodule) { // Unrecognised ops buried three levels deep in nested submodules. // The validator must inline through all submodules to find them. + // The model uses aten::sin which is now allowed (EuroBERT/Jina v5), + // but also contains other ops that remain unrecognised. auto module = ::torch::jit::load("testfiles/malicious_models/malicious_hidden_in_submodule.pt"); auto result = CModelGraphValidator::validate(module); BOOST_REQUIRE(result.s_IsValid == false); BOOST_REQUIRE(result.s_ForbiddenOps.empty()); - BOOST_REQUIRE(hasUnrecognisedOp(result, "aten::sin")); + BOOST_REQUIRE(result.s_UnrecognisedOps.empty() == false); } BOOST_AUTO_TEST_CASE(testMaliciousConditionalBranch) { // An unrecognised op hidden inside a conditional branch. The // validator must recurse into prim::If blocks to detect it. + // The model uses aten::sin which is now allowed, but also contains + // other ops that remain unrecognised. auto module = ::torch::jit::load("testfiles/malicious_models/malicious_conditional.pt"); auto result = CModelGraphValidator::validate(module); BOOST_REQUIRE(result.s_IsValid == false); - BOOST_REQUIRE(hasUnrecognisedOp(result, "aten::sin")); + BOOST_REQUIRE(result.s_UnrecognisedOps.empty() == false); } BOOST_AUTO_TEST_CASE(testMaliciousManyUnrecognisedOps) { - // A model using many different unrecognised ops (sin, cos, tan, exp). + // A model using many different ops (sin, cos, tan, exp). + // sin and cos are now allowed (EuroBERT/Jina v5), but tan and exp + // remain unrecognised. auto module = ::torch::jit::load("testfiles/malicious_models/malicious_many_unrecognised.pt"); auto result = CModelGraphValidator::validate(module); BOOST_REQUIRE(result.s_IsValid == false); BOOST_REQUIRE(result.s_ForbiddenOps.empty()); - BOOST_REQUIRE(result.s_UnrecognisedOps.size() >= 4); - BOOST_REQUIRE(hasUnrecognisedOp(result, "aten::sin")); - BOOST_REQUIRE(hasUnrecognisedOp(result, "aten::cos")); + BOOST_REQUIRE(result.s_UnrecognisedOps.size() >= 2); BOOST_REQUIRE(hasUnrecognisedOp(result, "aten::tan")); BOOST_REQUIRE(hasUnrecognisedOp(result, "aten::exp")); }