diff --git a/.claude/board/EPIPHANIES.md b/.claude/board/EPIPHANIES.md index f71eb975..dd072108 100644 --- a/.claude/board/EPIPHANIES.md +++ b/.claude/board/EPIPHANIES.md @@ -65,6 +65,47 @@ stay as historical references. ## Entries (reverse chronological) +## 2026-04-20 — CORRECTION-OF 2026-04-20 "CAM-PQ at 6 B/row solves the argmax blind spot" + +**Status:** FINDING + +The PR #218 bench measured ICC 0.9998 on **128 rows** trained and +measured on the same 128 rows. This is a trivially-correct fit: +128 rows ≤ 256 centroids per subspace → every row gets its own +centroid → perfect reconstruction → perfect ICC. It does NOT +generalize to production-size tensors. + +Full-size validation on Qwen3-TTS-0.6B (234 CamPq tensors, 478 +total, production-size rows 1024–3072 per tensor): + +| Metric | Value | +|---|---| +| Mean ICC across 234 argmax tensors | **0.195** | +| Max ICC | 0.957 | +| Tensors meeting D5 gate (ICC ≥ 0.99) | **0 of 234** | +| Tensors with ICC ≥ 0.5 | 8 of 234 | +| Typical relative L2 reconstruction error | 0.70–0.90 | + +Diagnostic probe on gate_proj [3072, 1024] (`cam_pq_row_count_probe`): + +| n_train | icc_train | icc_all_rows | +|---|---|---| +| 128 | **1.000** | −0.304 | +| 256 | **1.000** | −0.130 | +| 512 | 0.531 | 0.015 | +| 3072 | −0.079 | −0.079 | + +**Root cause:** 6×256 PQ is centroid-starved for tensors with >256 +rows. The "128× compression at ICC 0.9999" claim was extrapolated +from a trivial 128-row in-training fit. + +**Infrastructure is sound** — `cam_pq_calibrate` CLI, `route_tensor` +classifier, serialization, ICC harness all work correctly. The +negative result is the codec's capacity vs tensor sizes. + +Cross-ref: `crates/bgz-tensor/examples/cam_pq_row_count_probe.rs`, +`crates/bgz-tensor/src/bin/cam_pq_calibrate.rs`. + ## 2026-04-19 — Mandatory epiphanies log (this file) **Status:** FINDING @@ -750,7 +791,7 @@ quantized codec = 4 bits × n_cols = ~2 KB/row for q_proj (4096 cols), Correction needed in codec-findings-2026-04-20.md decision tree. ## 2026-04-20 — THE ANSWER: CAM-PQ at 6 B/row solves the argmax blind spot -**Status:** FINDING (measured, definitive) +**Status:** SUPERSEDED by 2026-04-20 CORRECTION (128-row trivial fit) Wired `ndarray::hpc::cam_pq::CamCodebook` as `CamPqRaw` + `CamPqPhase` candidates in codec_rnd_bench.rs. Same bench, same populations, diff --git a/crates/bgz-tensor/Cargo.lock b/crates/bgz-tensor/Cargo.lock index 8655e30a..e1bcb318 100644 --- a/crates/bgz-tensor/Cargo.lock +++ b/crates/bgz-tensor/Cargo.lock @@ -97,6 +97,7 @@ name = "bgz-tensor" version = "0.1.0" dependencies = [ "holograph", + "lance-graph-contract", "ndarray", "serde", "serde_json", @@ -449,6 +450,10 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "lance-graph-contract" +version = "0.1.0" + [[package]] name = "libc" version = "0.2.183" diff --git a/crates/bgz-tensor/Cargo.toml b/crates/bgz-tensor/Cargo.toml index e7da9f19..9199c113 100644 --- a/crates/bgz-tensor/Cargo.toml +++ b/crates/bgz-tensor/Cargo.toml @@ -24,6 +24,7 @@ manifold clustering, then replaces matmul with precomputed distance table lookup [dependencies] ndarray = { path = "../../../ndarray", default-features = false, features = ["std"] } holograph = { path = "../holograph", default-features = false } +lance-graph-contract = { path = "../lance-graph-contract", optional = true } serde = { version = "1", features = ["derive"], optional = true } serde_json = { version = "1", optional = true } sha2 = { version = "0.10", optional = true } @@ -42,6 +43,12 @@ qwen35-full = ["qwen35-9b", "qwen35-27b-v1", "qwen35-27b-v2"] # 430 MB — all # Hydrate binary deps (serde + sha2). Only needed for the CLI tool. hydrate = ["dep:serde", "dep:serde_json", "dep:sha2"] +# CAM-PQ calibration binary deps. Reads safetensors/GGUF, classifies tensors +# via route_tensor, trains a CamCodebook per argmax-regime tensor via +# cam_pq::train_geometric, serializes codebooks + fingerprints. sha2 for +# per-artifact manifest checksums. +calibrate = ["dep:serde", "dep:serde_json", "dep:sha2", "dep:lance-graph-contract"] + # Lab / R&D modules — analysis tools and experimental codec candidates # that must NOT leak into production builds. Gate for the MFDFA fractal # descriptor (probe showed per-row fractal structure is flat on Qwen3 @@ -54,8 +61,17 @@ name = "hydrate" path = "src/hydrate.rs" required-features = ["hydrate"] +[[bin]] +name = "cam_pq_calibrate" +path = "src/bin/cam_pq_calibrate.rs" +required-features = ["calibrate"] + [[example]] name = "fractal_probe" required-features = ["lab"] +[[example]] +name = "cam_pq_row_count_probe" +required-features = ["calibrate"] + [dev-dependencies] diff --git a/crates/bgz-tensor/examples/cam_pq_row_count_probe.rs b/crates/bgz-tensor/examples/cam_pq_row_count_probe.rs new file mode 100644 index 00000000..ef80aa00 --- /dev/null +++ b/crates/bgz-tensor/examples/cam_pq_row_count_probe.rs @@ -0,0 +1,210 @@ +//! CAM-PQ diagnostic: ICC vs calibration row count. +//! +//! Runs `train_geometric` on an isolated tensor at increasing row counts +//! ({128, 256, 512, 1024, n_rows}) and reports the ICC_3_1 score between +//! pairwise cosines of the original vs decoded rows, measured on the +//! training population itself. +//! +//! **Purpose:** demonstrate whether the small-row-count ICC values from +//! `codec_rnd_bench.rs` (which measured 128 rows and saw ICC ≈ 0.9998) +//! extrapolate to production-size tensors. +//! +//! Run: +//! ```sh +//! cargo run --release --features calibrate --example cam_pq_row_count_probe \ +//! --manifest-path crates/bgz-tensor/Cargo.toml \ +//! -- +//! ``` + +use ndarray::hpc::cam_pq::{self, CamCodebook, NUM_SUBSPACES}; +use ndarray::hpc::gguf::read_tensor_f32; +use ndarray::hpc::safetensors::read_safetensors_header; +use std::fs::File; +use std::io::BufReader; + +fn main() { + let args: Vec = std::env::args().collect(); + if args.len() < 3 { + eprintln!( + "Usage: cam_pq_row_count_probe " + ); + std::process::exit(1); + } + let path = &args[1]; + let pattern = &args[2]; + + let file = File::open(path).expect("open safetensors"); + let mut reader = BufReader::new(file); + let gguf = read_safetensors_header(&mut reader).expect("read header"); + + let tensor = gguf + .tensors + .iter() + .find(|t| t.name.contains(pattern)) + .unwrap_or_else(|| { + eprintln!("No tensor name contains {pattern:?}"); + std::process::exit(1); + }); + + println!("tensor: {} dims: {:?}", tensor.name, tensor.dimensions); + + let flat = read_tensor_f32(&mut reader, &gguf, tensor).expect("read tensor"); + + let (row_dim, n_rows) = if tensor.dimensions.len() == 2 { + (tensor.dimensions[1] as usize, tensor.dimensions[0] as usize) + } else { + eprintln!("Expected 2D tensor; got {:?}", tensor.dimensions); + std::process::exit(1); + }; + let adjusted_dim = (row_dim / NUM_SUBSPACES) * NUM_SUBSPACES; + let rows: Vec> = flat + .chunks_exact(row_dim) + .map(|c| c[..adjusted_dim].to_vec()) + .collect(); + assert_eq!(rows.len(), n_rows); + + let test_counts = [128, 256, 512, 1024, n_rows]; + + println!(); + println!( + "{:>8} | {:>10} | {:>12} | {:>10} | {:>12}", + "n_train", "icc_train", "icc_all_rows", "rel_err", "time_s" + ); + println!("{}", "-".repeat(62)); + + let mut seen = std::collections::BTreeSet::new(); + for &n_train in &test_counts { + if n_train > n_rows || !seen.insert(n_train) { + continue; + } + let start = std::time::Instant::now(); + let training = &rows[..n_train]; + let cb = cam_pq::train_geometric(training, adjusted_dim, 20); + let elapsed = start.elapsed(); + + let icc_train = measure_icc(training, &cb, 512); + let icc_all = measure_icc(&rows, &cb, 512); + let rel_err = relative_l2_error(&cb, &rows[..rows.len().min(512)]); + + println!( + "{:>8} | {:>10.4} | {:>12.4} | {:>10.4} | {:>12.2}", + n_train, + icc_train, + icc_all, + rel_err, + elapsed.as_secs_f32(), + ); + } + + println!(); + println!("Hypothesis: `icc_train` stays high (codebook fits training data);"); + println!("`icc_all_rows` collapses as n_train increases relative to codebook"); + println!("capacity (6 subspaces × 256 centroids = 256^6 possible fingerprints,"); + println!("but only 256 per-subspace partitions — ~n_train/256 rows land per"); + println!("centroid at saturation)."); +} + +fn measure_icc(rows: &[Vec], cb: &CamCodebook, samples: usize) -> f32 { + let n = rows.len(); + if n < 3 { + return f32::NAN; + } + let samples = samples.min(n * (n - 1) / 2); + let mut rng = SimpleRng::new(0x9E3779B97F4A7C15); + let mut truth: Vec = Vec::with_capacity(samples); + let mut pred: Vec = Vec::with_capacity(samples); + let mut count = 0; + while count < samples { + let i = (rng.next() as usize) % n; + let j = (rng.next() as usize) % n; + if i == j { + continue; + } + let t = cosine(&rows[i], &rows[j]); + let di = cb.decode(&cb.encode(&rows[i])); + let dj = cb.decode(&cb.encode(&rows[j])); + let p = cosine(&di, &dj); + truth.push(t); + pred.push(p); + count += 1; + } + icc_3_1(&truth, &pred) +} + +fn relative_l2_error(cb: &CamCodebook, rows: &[Vec]) -> f32 { + let mut sum_err = 0.0f64; + let mut sum_norm = 0.0f64; + for row in rows { + let decoded = cb.decode(&cb.encode(row)); + for (a, b) in row.iter().zip(decoded.iter()) { + sum_err += ((a - b) as f64).powi(2); + } + for &a in row { + sum_norm += (a as f64).powi(2); + } + } + if sum_norm > 0.0 { + (sum_err / sum_norm).sqrt() as f32 + } else { + 0.0 + } +} + +fn cosine(a: &[f32], b: &[f32]) -> f32 { + let n = a.len().min(b.len()); + let mut dot = 0.0f64; + let mut na = 0.0f64; + let mut nb = 0.0f64; + for i in 0..n { + let x = a[i] as f64; + let y = b[i] as f64; + dot += x * y; + na += x * x; + nb += y * y; + } + let d = (na * nb).sqrt(); + if d > 0.0 { + (dot / d) as f32 + } else { + 0.0 + } +} + +fn icc_3_1(truth: &[f32], pred: &[f32]) -> f32 { + let n = truth.len(); + if n < 2 { + return f32::NAN; + } + let mut grand = 0.0f64; + for i in 0..n { + grand += (truth[i] + pred[i]) as f64; + } + grand /= (2 * n) as f64; + let mut ms_r = 0.0f64; + let mut ms_w = 0.0f64; + for i in 0..n { + let row_mean = ((truth[i] + pred[i]) as f64) / 2.0; + ms_r += 2.0 * (row_mean - grand).powi(2); + ms_w += + (truth[i] as f64 - row_mean).powi(2) + (pred[i] as f64 - row_mean).powi(2); + } + ms_r /= (n - 1) as f64; + ms_w /= n as f64; + ((ms_r - ms_w) / (ms_r + ms_w)) as f32 +} + +struct SimpleRng { + state: u64, +} +impl SimpleRng { + fn new(seed: u64) -> Self { + Self { state: seed } + } + fn next(&mut self) -> u64 { + self.state = self.state.wrapping_add(0x9E3779B97F4A7C15); + let mut z = self.state; + z = (z ^ (z >> 30)).wrapping_mul(0xBF58476D1CE4E5B9); + z = (z ^ (z >> 27)).wrapping_mul(0x94D049BB133111EB); + z ^ (z >> 31) + } +} diff --git a/crates/bgz-tensor/src/bin/cam_pq_calibrate.rs b/crates/bgz-tensor/src/bin/cam_pq_calibrate.rs new file mode 100644 index 00000000..c78365f7 --- /dev/null +++ b/crates/bgz-tensor/src/bin/cam_pq_calibrate.rs @@ -0,0 +1,602 @@ +//! CAM-PQ calibration CLI — D2 of the CAM-PQ production wiring plan. +//! +//! Reads a safetensors / GGUF model checkpoint, classifies every tensor via +//! `lance_graph_contract::cam::route_tensor`, trains a per-tensor CAM-PQ +//! codebook for argmax-regime tensors (attention Q/K/V/O, MLP gate/up/down), +//! encodes each row to a 6-byte fingerprint, and writes codebooks + +//! fingerprints + a manifest to disk. +//! +//! # Output layout +//! +//! ```text +//! / +//! codebooks/.cbk per-tensor CamCodebook (binary) +//! fingerprints/.fp per-tensor fingerprints (flat u8, 6 bytes × n_rows) +//! passthrough/.f32 index-regime tensors stored as raw f32 LE +//! manifest.json list of tensors with route, dims, paths, ICC, err +//! ``` +//! +//! # Binary formats +//! +//! Codebook (`*.cbk`): +//! ```text +//! magic [u8; 4] b"CMPQ" +//! version u32 LE 1 +//! subspaces u32 LE 6 +//! centroids u32 LE 256 +//! subspace_dim u32 LE original_dim / 6 +//! total_dim u32 LE original row dim (subspaces × subspace_dim) +//! then 6 × (centroids × subspace_dim) f32 LE centroids +//! ``` +//! +//! Fingerprints (`*.fp`): +//! ```text +//! magic [u8; 4] b"CMFP" +//! version u32 LE 1 +//! n_rows u64 LE +//! row_dim u32 LE original row dim +//! then 6 × n_rows bytes of packed fingerprints (row-major). +//! ``` +//! +//! # Usage +//! +//! ```bash +//! cargo run --release --features calibrate --bin cam_pq_calibrate \ +//! --manifest-path crates/bgz-tensor/Cargo.toml \ +//! -- [--max-rows N] [--icc-samples K] +//! ``` + +use ndarray::hpc::cam_pq::{self, CamCodebook, CamFingerprint, NUM_CENTROIDS, NUM_SUBSPACES}; +use ndarray::hpc::gguf::read_tensor_f32; +use ndarray::hpc::safetensors::read_safetensors_header; +use sha2::{Digest, Sha256}; +use std::fs::{self, File}; +use std::io::{BufReader, BufWriter, Read, Write}; +use std::path::{Path, PathBuf}; +use std::time::Instant; + +use lance_graph_contract::cam::{route_tensor, CodecRoute}; + +const KMEANS_ITERATIONS: usize = 20; +const DEFAULT_ICC_SAMPLES: usize = 512; + +fn main() { + let args: Vec = std::env::args().collect(); + if args.len() < 3 { + eprintln!( + "Usage: cam_pq_calibrate \ + [--max-rows N] [--icc-samples K]" + ); + std::process::exit(1); + } + + let model_path = PathBuf::from(&args[1]); + let out_dir = PathBuf::from(&args[2]); + let mut max_rows: Option = None; + let mut icc_samples: usize = DEFAULT_ICC_SAMPLES; + + let mut i = 3; + while i < args.len() { + match args[i].as_str() { + "--max-rows" => { + i += 1; + max_rows = Some(args[i].parse().expect("--max-rows expects integer")); + } + "--icc-samples" => { + i += 1; + icc_samples = args[i].parse().expect("--icc-samples expects integer"); + } + other => { + eprintln!("Unknown flag: {other}"); + std::process::exit(1); + } + } + i += 1; + } + + eprintln!("cam_pq_calibrate"); + eprintln!(" model: {}", model_path.display()); + eprintln!(" out_dir: {}", out_dir.display()); + eprintln!(" max_rows: {}", max_rows.map_or("all".to_string(), |n| n.to_string())); + eprintln!(" icc_samples: {icc_samples}"); + + fs::create_dir_all(out_dir.join("codebooks")).expect("mkdir codebooks"); + fs::create_dir_all(out_dir.join("fingerprints")).expect("mkdir fingerprints"); + fs::create_dir_all(out_dir.join("passthrough")).expect("mkdir passthrough"); + + let file = File::open(&model_path).expect("open model"); + let mut reader = BufReader::new(file); + + // Dispatch on extension: .gguf vs .safetensors + let is_safetensors = model_path + .extension() + .and_then(|s| s.to_str()) + .map(|e| e.eq_ignore_ascii_case("safetensors")) + .unwrap_or(false); + + let gguf = if is_safetensors { + read_safetensors_header(&mut reader).expect("read safetensors header") + } else { + ndarray::hpc::gguf::read_gguf_header(&mut reader).expect("read gguf header") + }; + + eprintln!(" tensors: {}", gguf.tensors.len()); + + let mut manifest_entries: Vec = Vec::new(); + let t_start = Instant::now(); + + for (idx, tensor) in gguf.tensors.iter().enumerate() { + let dims_u64: Vec = tensor.dimensions.clone(); + let route = route_tensor(&tensor.name, &dims_u64); + + let sanitized = sanitize_name(&tensor.name); + + eprint!( + "[{:>4}/{}] {:>12?} {:<60} dims={:?}", + idx + 1, + gguf.tensors.len(), + route, + truncate(&tensor.name, 60), + dims_u64 + ); + + match route { + CodecRoute::CamPq => { + let t0 = Instant::now(); + let (row_dim, n_rows) = match row_layout(&dims_u64) { + Some(v) => v, + None => { + eprintln!(" [skip: not a 2D matrix]"); + continue; + } + }; + let row_dim_u = row_dim as usize; + let n_rows_u = n_rows as usize; + + // Read the full tensor as f32. + let flat = match read_tensor_f32(&mut reader, &gguf, tensor) { + Ok(v) => v, + Err(e) => { + eprintln!(" [read error: {e}]"); + continue; + } + }; + + // Chunk into rows. Limit row count for calibration if requested. + let rows_full: Vec> = flat + .chunks_exact(row_dim_u) + .map(|c| c.to_vec()) + .collect(); + assert_eq!(rows_full.len(), n_rows_u); + + let calibration_rows: &[Vec] = match max_rows { + Some(n) if n < n_rows_u => &rows_full[..n], + _ => &rows_full, + }; + + // CAM-PQ requires total_dim divisible by 6. If row_dim isn't, + // train on the largest multiple of 6 ≤ row_dim. + let adjusted_dim = (row_dim_u / NUM_SUBSPACES) * NUM_SUBSPACES; + if adjusted_dim == 0 { + eprintln!(" [skip: row_dim {row_dim_u} < 6]"); + continue; + } + + let codebook = + cam_pq::train_geometric(calibration_rows, adjusted_dim, KMEANS_ITERATIONS); + + // Encode every row (including any beyond max_rows). + let fingerprints: Vec = + rows_full.iter().map(|r| codebook.encode(r)).collect(); + + // Reconstruction error on a sample of the full population. + // Slice each row to adjusted_dim — CAM-PQ only encodes the + // first `adjusted_dim` floats; ndarray's + // `mean_reconstruction_error` would panic on row_dim mismatch + // when adjusted_dim < row_dim (non-6-multiple case). + let sample_n = rows_full.len().min(1024); + let recon_sample: Vec> = rows_full[..sample_n] + .iter() + .map(|r| r[..adjusted_dim].to_vec()) + .collect(); + let mean_err = codebook.mean_reconstruction_error(&recon_sample); + let rel_err = relative_l2_error(&codebook, &recon_sample); + + // Write codebook. + let cbk_path = out_dir.join("codebooks").join(format!("{sanitized}.cbk")); + write_codebook(&cbk_path, &codebook).expect("write codebook"); + let cbk_sha = sha256_file(&cbk_path).expect("sha256 codebook"); + + // Write fingerprints. + let fp_path = out_dir.join("fingerprints").join(format!("{sanitized}.fp")); + write_fingerprints(&fp_path, row_dim as u32, &fingerprints) + .expect("write fingerprints"); + let fp_sha = sha256_file(&fp_path).expect("sha256 fingerprints"); + + // ICC_3_1 on pairwise cosines between ground-truth rows and + // their decoded counterparts. D5 gate fires on this number. + let icc = measure_icc(&rows_full, &codebook, icc_samples); + + let elapsed = t0.elapsed(); + eprintln!( + " codebook={} KB fp={} KB err={:.4} rel_err={:.4} icc={:.4} time={:.1}s", + fs::metadata(&cbk_path).map(|m| m.len() / 1024).unwrap_or(0), + fs::metadata(&fp_path).map(|m| m.len() / 1024).unwrap_or(0), + mean_err, + rel_err, + icc, + elapsed.as_secs_f32(), + ); + + manifest_entries.push(ManifestEntry { + name: tensor.name.clone(), + dtype: format!("{:?}", tensor.dtype), + dims: dims_u64.clone(), + route: "CamPq".into(), + codebook_file: Some(format!("codebooks/{sanitized}.cbk")), + codebook_sha256: Some(cbk_sha), + fingerprints_file: Some(format!("fingerprints/{sanitized}.fp")), + fingerprints_sha256: Some(fp_sha), + passthrough_file: None, + passthrough_sha256: None, + n_rows: Some(n_rows), + row_dim: Some(row_dim as u32), + mean_reconstruction_error: Some(mean_err), + relative_l2_error: Some(rel_err), + icc_3_1: Some(icc), + }); + } + CodecRoute::Passthrough => { + let flat = match read_tensor_f32(&mut reader, &gguf, tensor) { + Ok(v) => v, + Err(e) => { + eprintln!(" [read error: {e}]"); + continue; + } + }; + let pt_path = out_dir.join("passthrough").join(format!("{sanitized}.f32")); + write_f32_le(&pt_path, &flat).expect("write passthrough"); + let pt_sha = sha256_file(&pt_path).expect("sha256 passthrough"); + eprintln!( + " [passthrough {:.1} MB]", + fs::metadata(&pt_path).map(|m| m.len() as f64 / 1e6).unwrap_or(0.0), + ); + manifest_entries.push(ManifestEntry { + name: tensor.name.clone(), + dtype: format!("{:?}", tensor.dtype), + dims: dims_u64.clone(), + route: "Passthrough".into(), + codebook_file: None, + codebook_sha256: None, + fingerprints_file: None, + fingerprints_sha256: None, + passthrough_file: Some(format!("passthrough/{sanitized}.f32")), + passthrough_sha256: Some(pt_sha), + n_rows: None, + row_dim: None, + mean_reconstruction_error: None, + relative_l2_error: None, + icc_3_1: None, + }); + } + CodecRoute::Skip => { + eprintln!(" [skip]"); + manifest_entries.push(ManifestEntry { + name: tensor.name.clone(), + dtype: format!("{:?}", tensor.dtype), + dims: dims_u64.clone(), + route: "Skip".into(), + codebook_file: None, + codebook_sha256: None, + fingerprints_file: None, + fingerprints_sha256: None, + passthrough_file: None, + passthrough_sha256: None, + n_rows: None, + row_dim: None, + mean_reconstruction_error: None, + relative_l2_error: None, + icc_3_1: None, + }); + } + } + } + + // Write manifest. + let manifest = Manifest { + model: model_path.display().to_string(), + kmeans_iterations: KMEANS_ITERATIONS, + num_subspaces: NUM_SUBSPACES as u32, + num_centroids: NUM_CENTROIDS as u32, + max_rows_calibration: max_rows, + icc_samples, + entries: manifest_entries, + }; + let manifest_path = out_dir.join("manifest.json"); + let file = File::create(&manifest_path).expect("create manifest"); + serde_json::to_writer_pretty(BufWriter::new(file), &manifest).expect("write manifest"); + + let total = t_start.elapsed(); + eprintln!("done in {:.1}s ({:.1} min)", total.as_secs_f32(), total.as_secs_f32() / 60.0); + eprintln!("manifest: {}", manifest_path.display()); + + // Summary. + let campq = manifest.entries.iter().filter(|e| e.route == "CamPq").count(); + let pt = manifest.entries.iter().filter(|e| e.route == "Passthrough").count(); + let skip = manifest.entries.iter().filter(|e| e.route == "Skip").count(); + eprintln!(" CamPq tensors: {campq}"); + eprintln!(" Passthrough tensors: {pt}"); + eprintln!(" Skip tensors: {skip}"); + + let min_icc = manifest + .entries + .iter() + .filter_map(|e| e.icc_3_1) + .fold(f32::INFINITY, f32::min); + let max_err = manifest + .entries + .iter() + .filter_map(|e| e.relative_l2_error) + .fold(0.0f32, f32::max); + if campq > 0 { + eprintln!(" min ICC_3_1 across CamPq tensors: {min_icc:.4}"); + eprintln!(" max relative L2 error: {max_err:.4}"); + if min_icc < 0.99 { + eprintln!( + "WARN: at least one tensor has ICC < 0.99 — D7 fallback threshold applies." + ); + } + } +} + +// ─── serialization ────────────────────────────────────────────────────────── + +fn write_codebook(path: &Path, cb: &CamCodebook) -> std::io::Result<()> { + let file = File::create(path)?; + let mut w = BufWriter::new(file); + w.write_all(b"CMPQ")?; + w.write_all(&1u32.to_le_bytes())?; // version + w.write_all(&(NUM_SUBSPACES as u32).to_le_bytes())?; + w.write_all(&(NUM_CENTROIDS as u32).to_le_bytes())?; + w.write_all(&(cb.subspace_dim as u32).to_le_bytes())?; + w.write_all(&(cb.total_dim as u32).to_le_bytes())?; + for s in 0..NUM_SUBSPACES { + let cb_s = &cb.codebooks[s]; + // Pad to NUM_CENTROIDS if the subspace had fewer unique centroids + // (kmeans may return fewer than NUM_CENTROIDS when n < k). Remaining + // centroids are zero-filled — encoder will never select them because + // squared_l2 against a zero vector dominates for any non-trivial row. + for c in 0..NUM_CENTROIDS { + let centroid = cb_s + .centroids + .get(c) + .map(|v| v.as_slice()) + .unwrap_or(&[]); + for d in 0..cb.subspace_dim { + let val = centroid.get(d).copied().unwrap_or(0.0); + w.write_all(&val.to_le_bytes())?; + } + } + } + w.flush()?; + Ok(()) +} + +fn write_fingerprints( + path: &Path, + row_dim: u32, + fps: &[CamFingerprint], +) -> std::io::Result<()> { + let file = File::create(path)?; + let mut w = BufWriter::new(file); + w.write_all(b"CMFP")?; + w.write_all(&1u32.to_le_bytes())?; // version + w.write_all(&(fps.len() as u64).to_le_bytes())?; + w.write_all(&row_dim.to_le_bytes())?; + for fp in fps { + w.write_all(fp)?; + } + w.flush()?; + Ok(()) +} + +fn write_f32_le(path: &Path, data: &[f32]) -> std::io::Result<()> { + let file = File::create(path)?; + let mut w = BufWriter::new(file); + let bytes: Vec = data.iter().flat_map(|f| f.to_le_bytes()).collect(); + w.write_all(&bytes)?; + w.flush()?; + Ok(()) +} + +fn sha256_file(path: &Path) -> std::io::Result { + let mut f = File::open(path)?; + let mut hasher = Sha256::new(); + let mut buf = vec![0u8; 1 << 16]; + loop { + let n = f.read(&mut buf)?; + if n == 0 { + break; + } + hasher.update(&buf[..n]); + } + Ok(format!("{:x}", hasher.finalize())) +} + +// ─── measurement ──────────────────────────────────────────────────────────── + +/// ICC_3_1 computed on pairwise cosines between ground-truth rows and +/// decoded rows. Matches the protocol used by `codec_rnd_bench.rs`. +fn measure_icc(rows: &[Vec], cb: &CamCodebook, samples: usize) -> f32 { + let n_rows = rows.len(); + if n_rows < 3 { + return f32::NAN; + } + let samples = samples.min(n_rows * (n_rows - 1) / 2); + let mut rng = SimpleRng::new(0x9E3779B97F4A7C15); + let mut pairs: Vec<(usize, usize)> = Vec::with_capacity(samples); + while pairs.len() < samples { + let i = (rng.next() as usize) % n_rows; + let j = (rng.next() as usize) % n_rows; + if i != j { + pairs.push((i.min(j), i.max(j))); + } + } + let adjusted_dim = (rows[0].len() / NUM_SUBSPACES) * NUM_SUBSPACES; + let mut truth: Vec = Vec::with_capacity(samples); + let mut pred: Vec = Vec::with_capacity(samples); + for (i, j) in pairs { + let t = cosine(&rows[i][..adjusted_dim], &rows[j][..adjusted_dim]); + let di = cb.decode(&cb.encode(&rows[i])); + let dj = cb.decode(&cb.encode(&rows[j])); + let p = cosine(&di, &dj); + truth.push(t); + pred.push(p); + } + icc_3_1(&truth, &pred) +} + +fn relative_l2_error(cb: &CamCodebook, rows: &[Vec]) -> f32 { + if rows.is_empty() { + return f32::NAN; + } + let adjusted_dim = (rows[0].len() / NUM_SUBSPACES) * NUM_SUBSPACES; + let mut sum_err = 0.0f64; + let mut sum_norm = 0.0f64; + for row in rows { + let decoded = cb.decode(&cb.encode(row)); + let slice = &row[..adjusted_dim.min(row.len())]; + for (a, b) in slice.iter().zip(decoded.iter()) { + let d = (a - b) as f64; + sum_err += d * d; + } + for &a in slice { + sum_norm += (a as f64) * (a as f64); + } + } + if sum_norm > 0.0 { + (sum_err / sum_norm).sqrt() as f32 + } else { + 0.0 + } +} + +fn cosine(a: &[f32], b: &[f32]) -> f32 { + let n = a.len().min(b.len()); + let mut dot = 0.0f64; + let mut na = 0.0f64; + let mut nb = 0.0f64; + for i in 0..n { + let x = a[i] as f64; + let y = b[i] as f64; + dot += x * y; + na += x * x; + nb += y * y; + } + let denom = (na * nb).sqrt(); + if denom > 0.0 { + (dot / denom) as f32 + } else { + 0.0 + } +} + +fn icc_3_1(truth: &[f32], pred: &[f32]) -> f32 { + let n = truth.len(); + if n < 2 { + return f32::NAN; + } + let mut ms_r = 0.0f64; + let mut ms_w = 0.0f64; + let mut grand = 0.0f64; + for i in 0..n { + grand += (truth[i] + pred[i]) as f64; + } + grand /= (2 * n) as f64; + for i in 0..n { + let row_mean = ((truth[i] + pred[i]) as f64) / 2.0; + ms_r += 2.0 * (row_mean - grand).powi(2); + ms_w += (truth[i] as f64 - row_mean).powi(2) + (pred[i] as f64 - row_mean).powi(2); + } + ms_r /= (n - 1) as f64; + ms_w /= n as f64; + let icc = (ms_r - ms_w) / (ms_r + ms_w); + icc as f32 +} + +// ─── helpers ──────────────────────────────────────────────────────────────── + +/// Determine (row_dim, n_rows) for a 2D tensor. +/// Convention: safetensors stores tensors in row-major with `[n_rows, row_dim]` +/// shape. CAM-PQ encodes one fingerprint per row, per this layout. +fn row_layout(dims: &[u64]) -> Option<(u64, u64)> { + if dims.len() == 2 { + Some((dims[1], dims[0])) + } else { + None + } +} + +fn sanitize_name(name: &str) -> String { + name.chars() + .map(|c| if c.is_ascii_alphanumeric() || c == '.' || c == '_' || c == '-' { c } else { '_' }) + .collect() +} + +fn truncate(s: &str, n: usize) -> &str { + if s.len() <= n { + s + } else { + &s[s.len() - n..] + } +} + +/// SplitMix64 — deterministic seed → sample index pairs for ICC. +struct SimpleRng { + state: u64, +} + +impl SimpleRng { + fn new(seed: u64) -> Self { + Self { state: seed } + } + fn next(&mut self) -> u64 { + self.state = self.state.wrapping_add(0x9E3779B97F4A7C15); + let mut z = self.state; + z = (z ^ (z >> 30)).wrapping_mul(0xBF58476D1CE4E5B9); + z = (z ^ (z >> 27)).wrapping_mul(0x94D049BB133111EB); + z ^ (z >> 31) + } +} + +// ─── manifest ─────────────────────────────────────────────────────────────── + +#[derive(serde::Serialize, serde::Deserialize)] +struct Manifest { + model: String, + kmeans_iterations: usize, + num_subspaces: u32, + num_centroids: u32, + max_rows_calibration: Option, + icc_samples: usize, + entries: Vec, +} + +#[derive(serde::Serialize, serde::Deserialize)] +struct ManifestEntry { + name: String, + dtype: String, + dims: Vec, + route: String, + codebook_file: Option, + codebook_sha256: Option, + fingerprints_file: Option, + fingerprints_sha256: Option, + passthrough_file: Option, + passthrough_sha256: Option, + n_rows: Option, + row_dim: Option, + mean_reconstruction_error: Option, + relative_l2_error: Option, + icc_3_1: Option, +} diff --git a/crates/lance-graph-contract/src/cam.rs b/crates/lance-graph-contract/src/cam.rs index 60d9367b..5c9316e4 100644 --- a/crates/lance-graph-contract/src/cam.rs +++ b/crates/lance-graph-contract/src/cam.rs @@ -12,6 +12,133 @@ pub const NUM_SUBSPACES: usize = 6; /// Number of centroids per subspace. pub const NUM_CENTROIDS: usize = 256; +/// Minimum element count for a tensor to be worth encoding via CAM-PQ. +/// Below this, codebook storage overhead dominates. +pub const CAM_PQ_MIN_ELEMENTS: u64 = 4096; + +/// Routing decision for a single tensor in a model checkpoint. +/// +/// Enforces invariant I1 (two regimes): index-regime tensors (embeddings, +/// lm_head) MUST stay Passthrough to preserve identity lookup; argmax-regime +/// tensors (attention Q/K/V/O, MLP gate/up/down) route through CAM-PQ. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum CodecRoute { + /// Encode via CAM-PQ: 6-byte fingerprint + per-tensor codebook. + /// Target: attention projections and MLP feed-forward layers. + CamPq, + /// Store as f32 (no compression). Required for index-regime tensors: + /// embedding table, lm_head output projection, any tensor where row + /// identity must round-trip exactly. + Passthrough, + /// Skip codec entirely — leave as f32 alongside other small tensors. + /// Target: norms, biases, anything too small to benefit from codec. + Skip, +} + +/// Route a single tensor by name + dimensions. +/// +/// Matching rules (applied in order; first match wins): +/// 1. `token_embd`, `embed_tokens`, `lm_head`, `wte`, `wpe` → `Passthrough`. +/// Identity lookup must be exact — no codec can survive Invariant I1. +/// 2. `norm`, `ln_`, `layer_norm` → `Skip`. Small; codec overhead wastes space. +/// 3. Attention `q/k/v/o_proj`, `attn_q/k/v/output`, `self_attn` → `CamPq`. +/// 4. MLP `gate_proj`, `up_proj`, `down_proj`, `ffn_gate/up/down`, `fc1/fc2`, +/// `w1/w2/w3`, generic `mlp`/`ffn` → `CamPq`. +/// 5. 4D tensors (Conv2D kernels) → `Skip` — not our target. +/// 6. Small tensors (< [`CAM_PQ_MIN_ELEMENTS`]) → `Skip`. +/// 7. Ambiguous 2D matrix ≥ `CAM_PQ_MIN_ELEMENTS` → `CamPq` (argmax default). +/// 8. Everything else → `Skip`. +/// +/// # Example +/// +/// ``` +/// use lance_graph_contract::cam::{route_tensor, CodecRoute}; +/// +/// assert_eq!(route_tensor("model.layers.0.self_attn.q_proj.weight", &[4096, 4096]), CodecRoute::CamPq); +/// assert_eq!(route_tensor("model.embed_tokens.weight", &[151936, 1024]), CodecRoute::Passthrough); +/// assert_eq!(route_tensor("lm_head.weight", &[151936, 1024]), CodecRoute::Passthrough); +/// assert_eq!(route_tensor("model.layers.0.post_attention_layernorm.weight", &[4096]), CodecRoute::Skip); +/// ``` +pub fn route_tensor(name: &str, dims: &[u64]) -> CodecRoute { + // Rule 1: index-regime tensors — must be exact. Check before size/shape + // rules so lm_head (which is 2D and large) isn't misrouted as CamPq. + // Use `wte.` / `wpe.` as anchors to avoid matching unrelated 3-letter runs. + let n_lower = name.to_ascii_lowercase(); + let is_wte_wpe = n_lower == "wte" + || n_lower == "wpe" + || n_lower.starts_with("wte.") + || n_lower.starts_with("wpe.") + || n_lower.ends_with(".wte") + || n_lower.ends_with(".wpe") + || n_lower.contains(".wte.") + || n_lower.contains(".wpe."); + if n_lower.contains("token_embd") + || n_lower.contains("embed_tokens") + || n_lower.contains("embedding") + || n_lower.ends_with(".embed.weight") + || n_lower.contains(".embed.") + || n_lower.contains("lm_head") + || is_wte_wpe + { + return CodecRoute::Passthrough; + } + + // Rule 2: norms are small and not worth encoding. + if n_lower.contains("norm") || n_lower.contains("ln_") || n_lower.contains("layer_norm") { + return CodecRoute::Skip; + } + + // Rule 5 (applied early): skip conv kernels. + if dims.len() == 4 { + return CodecRoute::Skip; + } + + // Rule 6: skip anything too small to benefit. + let total: u64 = dims.iter().product(); + if total < CAM_PQ_MIN_ELEMENTS { + return CodecRoute::Skip; + } + + // Rule 3: attention projections. + if n_lower.contains("q_proj") + || n_lower.contains("k_proj") + || n_lower.contains("v_proj") + || n_lower.contains("o_proj") + || n_lower.contains("attn_q") + || n_lower.contains("attn_k") + || n_lower.contains("attn_v") + || n_lower.contains("attn_output") + || n_lower.contains("self_attn") + { + return CodecRoute::CamPq; + } + + // Rule 4: MLP / feed-forward. + if n_lower.contains("gate_proj") + || n_lower.contains("up_proj") + || n_lower.contains("down_proj") + || n_lower.contains("ffn_gate") + || n_lower.contains("ffn_up") + || n_lower.contains("ffn_down") + || n_lower.contains("mlp") + || n_lower.contains("ffn") + || n_lower.contains("fc1") + || n_lower.contains("fc2") + || n_lower.contains("w1") + || n_lower.contains("w2") + || n_lower.contains("w3") + { + return CodecRoute::CamPq; + } + + // Rule 7: ambiguous 2D matrix that's large enough → CamPq by default. + if dims.len() == 2 && total >= CAM_PQ_MIN_ELEMENTS { + return CodecRoute::CamPq; + } + + CodecRoute::Skip +} + /// Named CAM bytes (stroke positions). #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum CamByte { @@ -77,3 +204,126 @@ pub trait IvfContract: Send + Sync { /// Find top-P closest partitions for a query. fn probe(&self, query: &[f32], num_probes: usize) -> Vec<(u32, f32)>; } + +#[cfg(test)] +mod route_tests { + use super::*; + + #[test] + fn attention_projections_route_campq() { + assert_eq!( + route_tensor("model.layers.0.self_attn.q_proj.weight", &[4096, 4096]), + CodecRoute::CamPq, + ); + assert_eq!( + route_tensor("model.layers.12.self_attn.k_proj.weight", &[4096, 1024]), + CodecRoute::CamPq, + ); + assert_eq!( + route_tensor("model.layers.5.self_attn.v_proj.weight", &[4096, 1024]), + CodecRoute::CamPq, + ); + assert_eq!( + route_tensor("model.layers.0.self_attn.o_proj.weight", &[4096, 4096]), + CodecRoute::CamPq, + ); + } + + #[test] + fn mlp_projections_route_campq() { + assert_eq!( + route_tensor("model.layers.0.mlp.gate_proj.weight", &[4096, 11008]), + CodecRoute::CamPq, + ); + assert_eq!( + route_tensor("model.layers.0.mlp.up_proj.weight", &[4096, 11008]), + CodecRoute::CamPq, + ); + assert_eq!( + route_tensor("model.layers.0.mlp.down_proj.weight", &[11008, 4096]), + CodecRoute::CamPq, + ); + } + + #[test] + fn embeddings_stay_passthrough() { + assert_eq!( + route_tensor("model.embed_tokens.weight", &[151936, 1024]), + CodecRoute::Passthrough, + ); + assert_eq!( + route_tensor("lm_head.weight", &[151936, 1024]), + CodecRoute::Passthrough, + ); + // GGUF naming + assert_eq!( + route_tensor("token_embd.weight", &[151936, 1024]), + CodecRoute::Passthrough, + ); + // GPT-2 naming + assert_eq!( + route_tensor("wte.weight", &[50257, 768]), + CodecRoute::Passthrough, + ); + // Generic embedding tables (e.g. Qwen3-TTS codec_embedding) + assert_eq!( + route_tensor("talker.code_predictor.model.codec_embedding.0.weight", &[2048, 1024]), + CodecRoute::Passthrough, + ); + assert_eq!( + route_tensor("speaker.embedding.weight", &[1000, 256]), + CodecRoute::Passthrough, + ); + } + + #[test] + fn norms_skipped() { + assert_eq!( + route_tensor("model.layers.0.input_layernorm.weight", &[4096]), + CodecRoute::Skip, + ); + assert_eq!( + route_tensor("model.norm.weight", &[4096]), + CodecRoute::Skip, + ); + assert_eq!( + route_tensor("ln_1.weight", &[768]), + CodecRoute::Skip, + ); + } + + #[test] + fn small_tensors_skipped() { + // Under 4096 elements — biases, small projections. + assert_eq!( + route_tensor("model.layers.0.self_attn.q_proj.bias", &[256]), + CodecRoute::Skip, + ); + } + + #[test] + fn conv2d_skipped() { + // 4D tensor — conv kernel, not our target. + assert_eq!( + route_tensor("vision.patch_embed.proj.weight", &[768, 3, 16, 16]), + CodecRoute::Skip, + ); + } + + #[test] + fn lm_head_not_misrouted_as_campq() { + // lm_head is 2D, large, would match the ambiguous-2D fallback. + // Must be caught by rule 1 first. + let r = route_tensor("lm_head.weight", &[151936, 4096]); + assert_eq!(r, CodecRoute::Passthrough, "lm_head must NOT route to CamPq"); + } + + #[test] + fn ambiguous_large_2d_routes_campq() { + // Generic 2D weight matrix, no clear role name → argmax default. + assert_eq!( + route_tensor("encoder.linear_fallback.weight", &[1024, 1024]), + CodecRoute::CamPq, + ); + } +}