diff --git a/crates/lance-graph-contract/src/literal_graph.rs b/crates/lance-graph-contract/src/literal_graph.rs index 03f2b58c..9e8367af 100644 --- a/crates/lance-graph-contract/src/literal_graph.rs +++ b/crates/lance-graph-contract/src/literal_graph.rs @@ -623,6 +623,11 @@ mod tests { } #[test] + // Miri can't enter `std::fs` under its default isolation (sensible — + // host filesystem access would let Miri-checked code escape the + // sandbox). This test reads a real on-disk graph fixture, so it's + // not a Miri target. Stable / nightly without Miri still run it. + #[cfg_attr(miri, ignore)] fn test_real_aiwar_graph() { let json_path = "/root/data/aiwar_graph.json"; let json = match std::fs::read_to_string(json_path) { diff --git a/crates/lance-graph-contract/tests/manifest_codegen.rs b/crates/lance-graph-contract/tests/manifest_codegen.rs index 9eab67ad..f759888b 100644 --- a/crates/lance-graph-contract/tests/manifest_codegen.rs +++ b/crates/lance-graph-contract/tests/manifest_codegen.rs @@ -253,6 +253,10 @@ fn load_canonical_manifests() -> Vec<(PathBuf, String)> { // --------------------------------------------------------------------------- #[test] +// Reads the on-disk canonical manifests + codegen output; Miri's isolation +// blocks `std::fs::read*`. The other tests in this file use in-memory YAML +// strings and run clean under Miri. +#[cfg_attr(miri, ignore)] fn test_idempotency() { // Parse the same manifests twice; produce the same codegen output both times. let pairs = load_canonical_manifests(); diff --git a/crates/lance-graph-ontology/src/lance_cache.rs b/crates/lance-graph-ontology/src/lance_cache.rs index 1535a96f..5cc52b66 100644 --- a/crates/lance-graph-ontology/src/lance_cache.rs +++ b/crates/lance-graph-ontology/src/lance_cache.rs @@ -647,11 +647,19 @@ fn parse_semantic_type_label(s: &str) -> SemanticType { } fn chrono_micros() -> i64 { - use std::time::{SystemTime, UNIX_EPOCH}; - SystemTime::now() - .duration_since(UNIX_EPOCH) - .map(|d| d.as_micros() as i64) - .unwrap_or(0) + // Miri sandbox bypass — see `registry::now_micros` for rationale. + #[cfg(miri)] + { + return 0; + } + #[cfg(not(miri))] + { + use std::time::{SystemTime, UNIX_EPOCH}; + SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|d| d.as_micros() as i64) + .unwrap_or(0) + } } // ── ThinkingStyle label round-trip ────────────────────────────────────────── diff --git a/crates/lance-graph-ontology/src/registry.rs b/crates/lance-graph-ontology/src/registry.rs index 7e3fa589..71202893 100644 --- a/crates/lance-graph-ontology/src/registry.rs +++ b/crates/lance-graph-ontology/src/registry.rs @@ -488,6 +488,15 @@ impl RegistryState { } fn now_micros() -> i64 { + // Miri's default isolation blocks `clock_gettime(REALTIME)`, so any test + // that walks through the append path under Miri would abort. Timestamps + // aren't meaningful inside the sandbox anyway — return a deterministic + // sentinel so register/replay logic stays exercise-able under Miri. + #[cfg(miri)] + { + return 0; + } + #[cfg(not(miri))] SystemTime::now() .duration_since(UNIX_EPOCH) .map(|d| d.as_micros() as i64) diff --git a/crates/lance-graph-ontology/tests/bridge_scope_lock.rs b/crates/lance-graph-ontology/tests/bridge_scope_lock.rs index 1cc8fa82..b4b71a8f 100644 --- a/crates/lance-graph-ontology/tests/bridge_scope_lock.rs +++ b/crates/lance-graph-ontology/tests/bridge_scope_lock.rs @@ -1,3 +1,9 @@ +// Skip under Miri — the whole file uses `tempfile::tempdir()` + +// `std::fs::create_dir_all/write` to stage TTL fixtures on disk, and Miri's +// isolation blocks `mkdir`/`open`. Stable and nightly without Miri run it +// normally. +#![cfg(not(miri))] + //! Bridge scope-lock test. //! //! Verifies that a `WoaBridge` cannot resolve a `Healthcare` entity, and diff --git a/crates/lance-graph-ontology/tests/dcterms_source_attribute_test.rs b/crates/lance-graph-ontology/tests/dcterms_source_attribute_test.rs index 247d8fa0..71f41423 100644 --- a/crates/lance-graph-ontology/tests/dcterms_source_attribute_test.rs +++ b/crates/lance-graph-ontology/tests/dcterms_source_attribute_test.rs @@ -32,6 +32,10 @@ fn customer_ttl_path() -> Option { } #[test] +// Reads `Customer.ttl` from /home/user/OGIT via `statx`; Miri's isolation +// blocks the syscall. Stable runs it normally; the test even skips +// gracefully when the file isn't present. +#[cfg_attr(miri, ignore)] fn dcterms_source_attribute_pairs_surface_for_customer() { let Some(path) = customer_ttl_path() else { eprintln!("SKIP: Customer.ttl not found at /home/user/OGIT (set OGIT_FORK_PATH)"); diff --git a/crates/lance-graph-ontology/tests/hydrate_real_ogit.rs b/crates/lance-graph-ontology/tests/hydrate_real_ogit.rs index a203d07d..5d7362a8 100644 --- a/crates/lance-graph-ontology/tests/hydrate_real_ogit.rs +++ b/crates/lance-graph-ontology/tests/hydrate_real_ogit.rs @@ -1,3 +1,8 @@ +// Skip under Miri — entire file reads the on-disk OGIT fork via std::fs; +// Miri isolation blocks the syscalls. Stable runs it normally; the test +// even skips gracefully when OGIT_FORK_PATH is unset. +#![cfg(not(miri))] + //! Hydrate against the actual `AdaWorldAPI/OGIT` fork. //! //! Runs only when `OGIT_FORK_PATH` is set to a directory containing the diff --git a/crates/lance-graph-ontology/tests/round_trip_ttl.rs b/crates/lance-graph-ontology/tests/round_trip_ttl.rs index ec325dd3..38a003e0 100644 --- a/crates/lance-graph-ontology/tests/round_trip_ttl.rs +++ b/crates/lance-graph-ontology/tests/round_trip_ttl.rs @@ -1,3 +1,7 @@ +// Skip under Miri — fixture is written to disk via tempfile + std::fs; Miri +// isolation blocks the syscalls. Stable runs it normally. +#![cfg(not(miri))] + //! End-to-end TTL hydration test. //! //! Builds a tiny TTL fixture, writes it to a tempdir, hydrates the diff --git a/rust-toolchain.toml b/rust-toolchain.toml index 5305ef3f..5b2df78d 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -1,9 +1,12 @@ [toolchain] channel = "1.95.0" -# Pinned to 1.94.1 (latest 1.94 patch, 2026-03-25). 1.95 turned several -# previously-safe patterns into denied lints (e.g. unnecessary_sort_by) -# without sufficient value to justify the churn — bump explicitly when -# a future version is reviewed, never auto-track stable. +# Pinned to 1.95.0 (bumped from 1.94.1 in PR #367, 2026-05-13). Aligns +# with bevy (edition 2024 → 1.95 MSRV) and ndarray (bumped in parallel +# per LATEST_STATE 2026-05-13). 1.95 added clippy lints +# (`clippy::manual_checked_ops`, `clippy::unnecessary_sort_by`, +# `clippy::len_zero` cleanups) that PR #367 + #368 closed across the +# workspace. Never auto-track `stable` — bump explicitly when a future +# version is reviewed and workspace clippy passes clean. # # rustfmt + clippy are mandatory CI components and must be present # whenever the pinned channel is installed. Without this, `cargo fmt` diff --git a/scripts/miri-tests.sh b/scripts/miri-tests.sh new file mode 100755 index 00000000..2b448f5c --- /dev/null +++ b/scripts/miri-tests.sh @@ -0,0 +1,65 @@ +#!/bin/sh +# +# Miri test runner for lance-graph — ephemeral nightly, scoped to this +# script ONLY. +# +# Rules of the road (mirrors ndarray/scripts/miri-tests.sh): +# * Default toolchain is stable 1.95.0 (rust-toolchain.toml). +# `cargo build`, `cargo test`, `cargo clippy`, CI's clippy / tests +# jobs all use stable. Nothing else opts into nightly. +# * Miri ships nightly-only. This script invokes `cargo +nightly miri`, +# an ephemeral per-invocation switch — does NOT change the default. +# * The lance-graph workspace contains FFI-heavy crates that Miri +# CANNOT enter: `lance`, `arrow`, `datafusion`, BLAS, jitson/Cranelift. +# Those crates are skipped entirely. Miri's value is on the zero-dep +# contract crate, the planner's pure-Rust paths, and the small +# standalone codec/rbac/debug crates. +# * `lance-graph-ontology` has a `lance-cache` feature gating the +# Lance dataset path; this script runs it WITHOUT that feature so +# the registry / namespace / TTL parsing paths get checked under +# Miri. +# +# If this stays clean, the miri job in `.github/workflows/ci.yaml` can +# promote from optional → required for these crates. + +set -x +set -e + +# Idempotent install of miri + nextest. No-op when already present. +rustup component add miri --toolchain nightly >/dev/null 2>&1 || \ + rustup +nightly component add miri + +# Layout randomisation catches missing `#[repr(transparent)]` and similar. +export RUSTFLAGS="-Zrandomize-layout" + +# -Zmiri-ignore-leaks: lance-graph-ontology's test helpers do +# `Box::leak(name.into_boxed_str())` to fabricate `&'static str` values +# for `Schema::builder` (which intentionally takes `&'static str` for +# cache-friendly storage). Each test leaks ~10 bytes of namespace/entity +# names by design — Miri's process-exit leak detector flags every one. +# We accept the signal loss because production code does not use +# `Box::leak` anywhere; the leaks live only in test helpers. If real +# leaks creep into prod, clippy's `mem_forget` lint + the regular +# alloc-tracking would surface them on stable before they got near Miri. +export MIRIFLAGS="-Zmiri-ignore-leaks" + +# Crates that Miri can actually enter — no lance / arrow / datafusion +# / cblas / inline-asm cpuid paths in their default dependency closure. +MIRI_SAFE_CRATES=" + -p lance-graph-contract + -p lance-graph-rbac + -p neural-debug +" + +# Crates that build under Miri but skip their Lance-backed feature paths. +# `lance-graph-ontology` default (no lance-cache) is safe; the test +# `tests/round_trip_ttl.rs` exercises the registry / TTL pipeline. +MIRI_SAFE_NO_DEFAULT=" + -p lance-graph-ontology --no-default-features +" + +# Run via `cargo +nightly miri test` (not nextest — lance-graph CI doesn't +# wire nextest, and plain `cargo miri test` is sufficient for the targeted +# crate scope here). +cargo +nightly miri test $MIRI_SAFE_CRATES +cargo +nightly miri test $MIRI_SAFE_NO_DEFAULT