From 0f785239cf726af5877d02bec655500aa1ba1fcc Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Wed, 10 Jun 2026 13:41:12 -0500 Subject: [PATCH 01/30] feat(core): ed25519 to x25519 key conversion for envelope crypto --- Cargo.lock | 125 ++++++++++++++++++++++++++++ crates/gitlawb-core/Cargo.toml | 3 + crates/gitlawb-core/src/encrypt.rs | 47 +++++++++++ crates/gitlawb-core/src/identity.rs | 6 ++ crates/gitlawb-core/src/lib.rs | 1 + 5 files changed, 182 insertions(+) create mode 100644 crates/gitlawb-core/src/encrypt.rs diff --git a/Cargo.lock b/Cargo.lock index 3cde378..53f8f4f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8,6 +8,16 @@ version = "0.11.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fe438c63458706e03479442743baae6c88256498e6431708f6dfc520a26515d3" +[[package]] +name = "aead" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d122413f284cf2d62fb1b7db97e02edb8cda96d769b16e443a4f6195e35662b0" +dependencies = [ + "crypto-common", + "generic-array", +] + [[package]] name = "ahash" version = "0.8.12" @@ -1975,6 +1985,30 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" +[[package]] +name = "chacha20" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3613f74bd2eac03dad61bd53dbe620703d4371614fe0bc3b9f04dd36fe4e818" +dependencies = [ + "cfg-if", + "cipher", + "cpufeatures", +] + +[[package]] +name = "chacha20poly1305" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10cd79432192d1c0f4e1a0fef9527696cc039165d729fb41b3f4f4f354c2dc35" +dependencies = [ + "aead", + "chacha20", + "cipher", + "poly1305", + "zeroize", +] + [[package]] name = "chrono" version = "0.4.44" @@ -2001,6 +2035,17 @@ dependencies = [ "unsigned-varint 0.8.0", ] +[[package]] +name = "cipher" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad" +dependencies = [ + "crypto-common", + "inout", + "zeroize", +] + [[package]] name = "clap" version = "4.5.60" @@ -2307,9 +2352,41 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" dependencies = [ "generic-array", + "rand_core 0.6.4", "typenum", ] +[[package]] +name = "crypto_box" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16182b4f39a82ec8a6851155cc4c0cda3065bb1db33651726a29e1951de0f009" +dependencies = [ + "aead", + "chacha20", + "crypto_secretbox", + "curve25519-dalek", + "salsa20", + "subtle", + "zeroize", +] + +[[package]] +name = "crypto_secretbox" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9d6cf87adf719ddf43a805e92c6870a531aedda35ff640442cbaf8674e141e1" +dependencies = [ + "aead", + "chacha20", + "cipher", + "generic-array", + "poly1305", + "salsa20", + "subtle", + "zeroize", +] + [[package]] name = "curve25519-dalek" version = "4.1.3" @@ -3255,8 +3332,11 @@ version = "0.3.9" dependencies = [ "anyhow", "base64", + "chacha20poly1305", "chrono", "cid", + "crypto_box", + "curve25519-dalek", "ed25519-dalek", "hex", "multibase", @@ -3919,6 +3999,15 @@ dependencies = [ "serde_core", ] +[[package]] +name = "inout" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "879f10e63c20629ecabbb64a8010319738c66a5cd0c29b02d63d272b03751d01" +dependencies = [ + "generic-array", +] + [[package]] name = "ipconfig" version = "0.3.4" @@ -4811,6 +4900,12 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" +[[package]] +name = "opaque-debug" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08d65885ee38876c4f86fa503fb49d7b507c2b62552df7c70b2fce627e06381" + [[package]] name = "openssl-probe" version = "0.2.1" @@ -5060,6 +5155,17 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "poly1305" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8159bd90725d2df49889a078b54f4f79e87f1f8a8444194cdca81d38f5393abf" +dependencies = [ + "cpufeatures", + "opaque-debug", + "universal-hash", +] + [[package]] name = "portable-atomic" version = "1.13.1" @@ -5787,6 +5893,15 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6518fc26bced4d53678a22d6e423e9d8716377def84545fe328236e3af070e7f" +[[package]] +name = "salsa20" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97a22f5af31f73a954c10289c93e8a50cc23d971e80ee446f1f6f7137a088213" +dependencies = [ + "cipher", +] + [[package]] name = "schannel" version = "0.1.29" @@ -7035,6 +7150,16 @@ version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" +[[package]] +name = "universal-hash" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc1de2c688dc15305988b563c3854064043356019f97a4b46276fe734c4f07ea" +dependencies = [ + "crypto-common", + "subtle", +] + [[package]] name = "unsigned-varint" version = "0.7.2" diff --git a/crates/gitlawb-core/Cargo.toml b/crates/gitlawb-core/Cargo.toml index 486a5aa..f479e1a 100644 --- a/crates/gitlawb-core/Cargo.toml +++ b/crates/gitlawb-core/Cargo.toml @@ -23,6 +23,9 @@ chrono = { workspace = true } uuid = { workspace = true } zeroize = { version = "1", features = ["derive"] } pkcs8 = { version = "0.10", features = ["pem", "std"] } +curve25519-dalek = "4" +crypto_box = { version = "0.9", features = ["std"] } +chacha20poly1305 = "0.10" [dev-dependencies] tokio = { workspace = true } diff --git a/crates/gitlawb-core/src/encrypt.rs b/crates/gitlawb-core/src/encrypt.rs new file mode 100644 index 0000000..c75add4 --- /dev/null +++ b/crates/gitlawb-core/src/encrypt.rs @@ -0,0 +1,47 @@ +//! Envelope encryption for withheld blobs (Option B). A random content key +//! encrypts the blob (XChaCha20-Poly1305); the content key is wrapped to each +//! recipient via an X25519 box keyed from their Ed25519 `did:key`. The node +//! seals with public keys only; readers open with their own private key. + +use anyhow::{Context, Result}; +use ed25519_dalek::VerifyingKey; + +/// X25519 public key (Montgomery u) for an Ed25519 verifying key. +fn x25519_public(vk: &VerifyingKey) -> Result<[u8; 32]> { + use curve25519_dalek::edwards::CompressedEdwardsY; + let edwards = CompressedEdwardsY::from_slice(vk.as_bytes()) + .ok() + .and_then(|c| c.decompress()) + .context("verifying key is not a valid edwards point")?; + Ok(edwards.to_montgomery().to_bytes()) +} + +/// X25519 secret scalar for an Ed25519 seed (SHA-512 of seed, lower 32, clamped). +fn x25519_secret_from_seed(seed: &[u8; 32]) -> [u8; 32] { + use sha2::{Digest, Sha512}; + let h = Sha512::digest(seed); + let mut s = [0u8; 32]; + s.copy_from_slice(&h[..32]); + s[0] &= 248; + s[31] &= 127; + s[31] |= 64; + s +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::identity::Keypair; + + #[test] + fn ed25519_to_x25519_keypair_agrees() { + // The X25519 public derived from the Ed25519 public must equal the + // X25519 public of the X25519 secret derived from the same seed. + let kp = Keypair::generate(); + let seed = kp.seed_bytes(); + let xpub_from_public = x25519_public(&kp.verifying_key()).unwrap(); + let xsec = x25519_secret_from_seed(&seed); + let xpub_from_secret = crypto_box::SecretKey::from(xsec).public_key().to_bytes(); + assert_eq!(xpub_from_public, xpub_from_secret); + } +} diff --git a/crates/gitlawb-core/src/identity.rs b/crates/gitlawb-core/src/identity.rs index 96d50b9..9d3fea1 100644 --- a/crates/gitlawb-core/src/identity.rs +++ b/crates/gitlawb-core/src/identity.rs @@ -52,6 +52,12 @@ impl Keypair { URL_SAFE_NO_PAD.encode(sig.to_bytes()) } + /// The raw 32-byte Ed25519 seed. Used to derive the X25519 secret for + /// envelope decryption (see `crate::encrypt`). + pub fn seed_bytes(&self) -> [u8; 32] { + self.signing_key.to_bytes() + } + /// Export the signing key as raw 32-byte seed (wrapped in Zeroizing). pub fn to_seed(&self) -> Zeroizing<[u8; 32]> { Zeroizing::new(self.signing_key.to_bytes()) diff --git a/crates/gitlawb-core/src/lib.rs b/crates/gitlawb-core/src/lib.rs index a608be1..a9e91f6 100644 --- a/crates/gitlawb-core/src/lib.rs +++ b/crates/gitlawb-core/src/lib.rs @@ -1,6 +1,7 @@ pub mod cert; pub mod cid; pub mod did; +pub mod encrypt; pub mod error; pub mod http_sig; pub mod identity; From 317e535d28f26bd2b7beb835b760984114041f1f Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Wed, 10 Jun 2026 13:44:11 -0500 Subject: [PATCH 02/30] feat(core): seal/open per-blob encryption envelopes --- crates/gitlawb-core/Cargo.toml | 2 +- crates/gitlawb-core/src/encrypt.rs | 147 +++++++++++++++++++++++++++++ 2 files changed, 148 insertions(+), 1 deletion(-) diff --git a/crates/gitlawb-core/Cargo.toml b/crates/gitlawb-core/Cargo.toml index f479e1a..4468d0c 100644 --- a/crates/gitlawb-core/Cargo.toml +++ b/crates/gitlawb-core/Cargo.toml @@ -24,7 +24,7 @@ uuid = { workspace = true } zeroize = { version = "1", features = ["derive"] } pkcs8 = { version = "0.10", features = ["pem", "std"] } curve25519-dalek = "4" -crypto_box = { version = "0.9", features = ["std"] } +crypto_box = { version = "0.9", features = ["std", "chacha20"] } chacha20poly1305 = "0.10" [dev-dependencies] diff --git a/crates/gitlawb-core/src/encrypt.rs b/crates/gitlawb-core/src/encrypt.rs index c75add4..aad6913 100644 --- a/crates/gitlawb-core/src/encrypt.rs +++ b/crates/gitlawb-core/src/encrypt.rs @@ -3,6 +3,7 @@ //! recipient via an X25519 box keyed from their Ed25519 `did:key`. The node //! seals with public keys only; readers open with their own private key. +use crate::identity::Keypair; use anyhow::{Context, Result}; use ed25519_dalek::VerifyingKey; @@ -28,6 +29,129 @@ fn x25519_secret_from_seed(seed: &[u8; 32]) -> [u8; 32] { s } +use base64::{engine::general_purpose::STANDARD as B64, Engine}; +use chacha20poly1305::{ + aead::{Aead, KeyInit}, + XChaCha20Poly1305, XNonce, +}; +use crypto_box::{ + aead::{AeadCore, OsRng}, + ChaChaBox, PublicKey as XPublic, SecretKey as XSecret, +}; +use rand::RngCore; +use serde::{Deserialize, Serialize}; + +const MAGIC: &[u8] = b"GLENC"; +const VERSION: u8 = 1; + +#[derive(Serialize, Deserialize)] +struct Recipient { + kid: String, // base64 recipient ed25519 pubkey (32B) + eph: String, // base64 ephemeral x25519 pubkey (32B) + nonce: String, // base64 box nonce (24B) + wrap: String, // base64 wrapped content key +} + +#[derive(Serialize, Deserialize)] +struct Header { + alg: String, + nonce: String, // base64 body nonce (24B) + recipients: Vec, +} + +/// Encrypt `plaintext` so any of `recipients` (Ed25519 keys) can decrypt. +pub fn seal_blob(plaintext: &[u8], recipients: &[VerifyingKey]) -> Result> { + if recipients.is_empty() { + return Err(anyhow::anyhow!("seal_blob: no recipients")); + } + let mut content_key = [0u8; 32]; + OsRng.fill_bytes(&mut content_key); + let body_cipher = XChaCha20Poly1305::new_from_slice(&content_key) + .map_err(|e| anyhow::anyhow!("content key: {e}"))?; + let mut body_nonce = [0u8; 24]; + OsRng.fill_bytes(&mut body_nonce); + let body = body_cipher + .encrypt(XNonce::from_slice(&body_nonce), plaintext) + .map_err(|e| anyhow::anyhow!("body encrypt: {e}"))?; + + let mut wrapped = Vec::with_capacity(recipients.len()); + for vk in recipients { + let recip_x = XPublic::from(x25519_public(vk)?); + let eph = XSecret::generate(&mut OsRng); + let abox = ChaChaBox::new(&recip_x, &eph); + let nonce = ChaChaBox::generate_nonce(&mut OsRng); + let ct = abox + .encrypt(&nonce, &content_key[..]) + .map_err(|e| anyhow::anyhow!("wrap: {e}"))?; + wrapped.push(Recipient { + kid: B64.encode(vk.as_bytes()), + eph: B64.encode(eph.public_key().as_bytes()), + nonce: B64.encode(nonce), + wrap: B64.encode(ct), + }); + } + + let header = Header { + alg: "xchacha20poly1305".into(), + nonce: B64.encode(body_nonce), + recipients: wrapped, + }; + let header_json = serde_json::to_vec(&header).context("encode header")?; + + let mut out = Vec::new(); + out.extend_from_slice(MAGIC); + out.push(VERSION); + out.extend_from_slice(&(header_json.len() as u32).to_le_bytes()); + out.extend_from_slice(&header_json); + out.extend_from_slice(&body); + Ok(out) +} + +/// Decrypt an envelope with `keypair`. Errors if not a recipient or on auth fail. +pub fn open_blob(envelope: &[u8], keypair: &Keypair) -> Result> { + let mut p = 0; + if envelope.len() < MAGIC.len() + 1 + 4 || &envelope[..MAGIC.len()] != MAGIC { + return Err(anyhow::anyhow!("bad envelope magic")); + } + p += MAGIC.len(); + if envelope[p] != VERSION { + return Err(anyhow::anyhow!("unsupported envelope version")); + } + p += 1; + let hlen = u32::from_le_bytes(envelope[p..p + 4].try_into().unwrap()) as usize; + p += 4; + let header: Header = + serde_json::from_slice(envelope.get(p..p + hlen).context("truncated header")?) + .context("decode header")?; + let body = &envelope[p + hlen..]; + + let my_kid = B64.encode(keypair.verifying_key().as_bytes()); + let my_x = XSecret::from(x25519_secret_from_seed(&keypair.seed_bytes())); + + let entry = header + .recipients + .iter() + .find(|r| r.kid == my_kid) + .context("not a recipient of this envelope")?; + let eph = XPublic::from(<[u8; 32]>::try_from(B64.decode(&entry.eph)?.as_slice())?); + let nonce = B64.decode(&entry.nonce)?; + let wrap = B64.decode(&entry.wrap)?; + let abox = ChaChaBox::new(&eph, &my_x); + let content_key = abox + .decrypt( + crypto_box::aead::generic_array::GenericArray::from_slice(&nonce), + wrap.as_slice(), + ) + .map_err(|_| anyhow::anyhow!("content-key unwrap failed"))?; + + let body_cipher = XChaCha20Poly1305::new_from_slice(&content_key) + .map_err(|e| anyhow::anyhow!("content key: {e}"))?; + let body_nonce = B64.decode(&header.nonce)?; + body_cipher + .decrypt(XNonce::from_slice(&body_nonce), body) + .map_err(|_| anyhow::anyhow!("body decrypt failed")) +} + #[cfg(test)] mod tests { use super::*; @@ -44,4 +168,27 @@ mod tests { let xpub_from_secret = crypto_box::SecretKey::from(xsec).public_key().to_bytes(); assert_eq!(xpub_from_public, xpub_from_secret); } + + #[test] + fn seal_open_round_trip_for_recipients() { + let owner = Keypair::generate(); + let reader_a = Keypair::generate(); + let reader_b = Keypair::generate(); + let msg = b"private blob contents"; + + let env = seal_blob(msg, &[owner.verifying_key(), reader_a.verifying_key()]).unwrap(); + + assert_eq!(open_blob(&env, &owner).unwrap(), msg); + assert_eq!(open_blob(&env, &reader_a).unwrap(), msg); + assert!(open_blob(&env, &reader_b).is_err(), "non-recipient must fail"); + } + + #[test] + fn tampered_envelope_fails() { + let owner = Keypair::generate(); + let mut env = seal_blob(b"hi", &[owner.verifying_key()]).unwrap(); + let last = env.len() - 1; + env[last] ^= 0x01; + assert!(open_blob(&env, &owner).is_err()); + } } From ca2e9ed4a09a4e663293614a1b45c916ba952e5d Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Wed, 10 Jun 2026 13:49:41 -0500 Subject: [PATCH 03/30] feat(node): resolve per-blob encryption recipients (least-privilege) --- .../gitlawb-node/src/git/visibility_pack.rs | 55 ++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/crates/gitlawb-node/src/git/visibility_pack.rs b/crates/gitlawb-node/src/git/visibility_pack.rs index c9c6d6b..f480d40 100644 --- a/crates/gitlawb-node/src/git/visibility_pack.rs +++ b/crates/gitlawb-node/src/git/visibility_pack.rs @@ -7,7 +7,7 @@ use crate::db::VisibilityRule; use crate::git::store; use crate::visibility::{visibility_check, Decision}; use anyhow::{Context, Result}; -use std::collections::HashSet; +use std::collections::{BTreeSet, HashMap, HashSet}; use std::path::Path; /// List every (blob_oid, "/repo/relative/path") pair reachable from any branch @@ -87,6 +87,43 @@ pub fn replicable_objects(all: Vec, withheld: &HashSet) -> Vec Result>> { + let withheld = withheld_blob_oids(repo_path, rules, is_public, owner_did, None)?; + if withheld.is_empty() { + return Ok(HashMap::new()); + } + let mut candidates: BTreeSet = BTreeSet::new(); + for r in rules { + for d in &r.reader_dids { + candidates.insert(d.clone()); + } + } + let mut out: HashMap> = HashMap::new(); + for (oid, path) in blob_paths(repo_path)? { + if !withheld.contains(&oid) { + continue; + } + let entry = out.entry(oid).or_default(); + entry.insert(owner_did.to_string()); + for did in &candidates { + if visibility_check(rules, is_public, owner_did, Some(did), &path) == Decision::Allow { + entry.insert(did.clone()); + } + } + } + Ok(out) +} + #[cfg(test)] mod tests { use super::*; @@ -230,4 +267,20 @@ mod tests { let got = replicable_objects(all.clone(), &withheld); assert_eq!(got, all); } + + #[test] + fn recipients_are_owner_plus_allowed_readers_only() { + let (_td, repo, secret_oid, public_oid) = fixture(); + let reader = "did:key:zReader"; + let rules = vec![rule("/secret/**", &[reader])]; + let map = withheld_blob_recipients(&repo, &rules, true, OWNER).unwrap(); + + let recips = map.get(&secret_oid).expect("secret blob has recipients"); + assert!(recips.contains(OWNER)); + assert!(recips.contains(reader)); + assert!( + !map.contains_key(&public_oid), + "public blob is not encrypted" + ); + } } From 99558c97f93e60a63e7b4037bfcdb83015f83fb8 Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Wed, 10 Jun 2026 13:55:46 -0500 Subject: [PATCH 04/30] feat(node): encrypted_blobs table and recipient-scoped queries --- crates/gitlawb-node/src/db/mod.rs | 94 +++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) diff --git a/crates/gitlawb-node/src/db/mod.rs b/crates/gitlawb-node/src/db/mod.rs index b00c861..088382d 100644 --- a/crates/gitlawb-node/src/db/mod.rs +++ b/crates/gitlawb-node/src/db/mod.rs @@ -720,6 +720,21 @@ const MIGRATIONS: &[Migration] = &[ "CREATE INDEX IF NOT EXISTS idx_visibility_rules_repo ON visibility_rules(repo_id)", ], }, + Migration { + version: 4, + name: "encrypted_blobs", + stmts: &[ + r#"CREATE TABLE IF NOT EXISTS encrypted_blobs ( + repo_id TEXT NOT NULL, + oid TEXT NOT NULL, + cid TEXT NOT NULL, + recipients TEXT NOT NULL, + created_at TEXT NOT NULL, + PRIMARY KEY (repo_id, oid) + )"#, + "CREATE INDEX IF NOT EXISTS idx_encrypted_blobs_repo ON encrypted_blobs(repo_id)", + ], + }, ]; // ── Repos ───────────────────────────────────────────────────────────────────── @@ -1628,6 +1643,85 @@ impl Db { Ok(()) } + pub async fn record_encrypted_blob( + &self, + repo_id: &str, + oid: &str, + cid: &str, + recipients: &[String], + ) -> Result<()> { + let recipients_json = serde_json::to_string(recipients)?; + sqlx::query( + "INSERT INTO encrypted_blobs (repo_id, oid, cid, recipients, created_at) + VALUES ($1, $2, $3, $4, $5) + ON CONFLICT (repo_id, oid) DO UPDATE SET cid = EXCLUDED.cid, recipients = EXCLUDED.recipients", + ) + .bind(repo_id) + .bind(oid) + .bind(cid) + .bind(recipients_json) + .bind(Utc::now().to_rfc3339()) + .execute(&self.pool) + .await?; + Ok(()) + } + + /// (oid, cid) for every encrypted blob in the repo that `caller` may decrypt. + pub async fn list_encrypted_blobs_for( + &self, + repo_id: &str, + caller: &str, + ) -> Result> { + let rows = sqlx::query("SELECT oid, cid, recipients FROM encrypted_blobs WHERE repo_id = $1") + .bind(repo_id) + .fetch_all(&self.pool) + .await?; + let mut out = Vec::new(); + for row in rows { + let oid: String = row.get("oid"); + let cid: String = row.get("cid"); + let recipients: String = row.get("recipients"); + let recipients: Vec = serde_json::from_str(&recipients).unwrap_or_default(); + if recipients.iter().any(|d| d == caller) { + out.push((oid, cid)); + } + } + Ok(out) + } + + /// The CID of one encrypted blob, only if `caller` is a recipient. + pub async fn encrypted_blob_cid( + &self, + repo_id: &str, + oid: &str, + caller: &str, + ) -> Result> { + let row = sqlx::query("SELECT cid, recipients FROM encrypted_blobs WHERE repo_id = $1 AND oid = $2") + .bind(repo_id) + .bind(oid) + .fetch_optional(&self.pool) + .await?; + let Some(row) = row else { return Ok(None) }; + let recipients: String = row.get("recipients"); + let recipients: Vec = serde_json::from_str(&recipients).unwrap_or_default(); + if recipients.iter().any(|d| d == caller) { + Ok(Some(row.get("cid"))) + } else { + Ok(None) + } + } + + /// Whether an encrypted blob row exists (recipient-agnostic), to avoid + /// re-pinning on subsequent pushes. + pub async fn has_encrypted_blob(&self, repo_id: &str, oid: &str) -> Result { + let row = sqlx::query("SELECT 1 AS x FROM encrypted_blobs WHERE repo_id = $1 AND oid = $2") + .bind(repo_id) + .bind(oid) + .fetch_optional(&self.pool) + .await?; + Ok(row.is_some()) + } + pub async fn list_pinned_cids(&self) -> Result> { let rows = sqlx::query( "SELECT sha256_hex, cid, pinned_at, pinata_cid FROM pinned_cids ORDER BY pinned_at DESC", From 9cb1a7b6dd108f9fe988a363e157b0ac34aface5 Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Wed, 10 Jun 2026 14:00:02 -0500 Subject: [PATCH 05/30] feat(node): encrypt-then-pin withheld blobs at push (IPFS) At the push chokepoint, after pinning withheld objects, resolve each withheld blob's recipient DIDs and seal it to their Ed25519 keys with seal_blob, pinning the ciphertext to IPFS and recording it in encrypted_blobs. Best-effort per blob: failures are logged and skipped, never pinned in plaintext. Pinata replication is unchanged; B1 encrypts to IPFS only. Adds ed25519-dalek as a direct dependency of gitlawb-node (it was only declared in the workspace Cargo.toml). --- Cargo.lock | 1 + crates/gitlawb-node/Cargo.toml | 1 + crates/gitlawb-node/src/api/repos.rs | 27 +++++++++ crates/gitlawb-node/src/encrypted_pin.rs | 59 +++++++++++++++++++ .../gitlawb-node/src/git/visibility_pack.rs | 12 +++- crates/gitlawb-node/src/main.rs | 1 + 6 files changed, 100 insertions(+), 1 deletion(-) create mode 100644 crates/gitlawb-node/src/encrypted_pin.rs diff --git a/Cargo.lock b/Cargo.lock index 53f8f4f..9cc259d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3370,6 +3370,7 @@ dependencies = [ "cid", "clap", "dirs-next", + "ed25519-dalek", "futures", "gitlawb-core", "hex", diff --git a/crates/gitlawb-node/Cargo.toml b/crates/gitlawb-node/Cargo.toml index 5f10ec9..a210aa0 100644 --- a/crates/gitlawb-node/Cargo.toml +++ b/crates/gitlawb-node/Cargo.toml @@ -11,6 +11,7 @@ path = "src/main.rs" [dependencies] gitlawb-core = { path = "../gitlawb-core" } +ed25519-dalek = { workspace = true } tokio = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } diff --git a/crates/gitlawb-node/src/api/repos.rs b/crates/gitlawb-node/src/api/repos.rs index 2886926..0eb4a12 100644 --- a/crates/gitlawb-node/src/api/repos.rs +++ b/crates/gitlawb-node/src/api/repos.rs @@ -686,6 +686,10 @@ pub async fn git_receive_pack( let ipfs_api = state.config.ipfs_api.clone(); let repo_path_clone = disk_path.clone(); let db_clone = state.db.clone(); + let rules_for_enc = rules_opt.clone(); + let repo_id = record.id.clone(); + let owner_did = record.owner_did.clone(); + let is_public = record.is_public; tokio::spawn(async move { let pinned = crate::ipfs_pin::pin_new_objects( &ipfs_api, @@ -700,6 +704,29 @@ pub async fn git_receive_pack( tracing::info!(sha = %sha, %cid, "pinned"); } } + + // Option B1: encrypt-then-pin the withheld blobs so authorized + // readers can recover them when the origin cannot serve them. + if let Some(rules) = rules_for_enc.filter(|r| !r.is_empty()) { + let p = repo_path_clone.clone(); + let owner = owner_did.clone(); + let recip = tokio::task::spawn_blocking(move || { + crate::git::visibility_pack::withheld_blob_recipients( + &p, &rules, is_public, &owner, + ) + }) + .await; + if let Ok(Ok(recipients)) = recip { + crate::encrypted_pin::encrypt_and_pin( + &ipfs_api, + &repo_path_clone, + &db_clone, + &repo_id, + &recipients, + ) + .await; + } + } }); } diff --git a/crates/gitlawb-node/src/encrypted_pin.rs b/crates/gitlawb-node/src/encrypted_pin.rs new file mode 100644 index 0000000..dc5c09f --- /dev/null +++ b/crates/gitlawb-node/src/encrypted_pin.rs @@ -0,0 +1,59 @@ +//! Encrypt-then-pin for withheld blobs (Option B1). Each withheld blob is sealed +//! to its recipient DIDs and the envelope pinned to IPFS, recorded in +//! `encrypted_blobs`. Best-effort per blob: a failure is logged and skipped, +//! never pinned in plaintext. + +use std::collections::{BTreeSet, HashMap}; +use std::path::Path; +use std::str::FromStr; + +use ed25519_dalek::VerifyingKey; +use gitlawb_core::did::Did; +use gitlawb_core::encrypt::seal_blob; + +use crate::db::Db; + +/// Resolve a DID string to its Ed25519 verifying key, or None if it carries no +/// inline key (e.g. did:web / did:gitlawb). +fn did_to_key(did: &str) -> Option { + Did::from_str(did).ok()?.to_verifying_key().ok() +} + +/// Encrypt and pin every withheld blob. `recipients` maps blob oid -> DID set. +pub async fn encrypt_and_pin( + ipfs_api: &str, + repo_path: &Path, + db: &Db, + repo_id: &str, + recipients: &HashMap>, +) { + for (oid, dids) in recipients { + if db.has_encrypted_blob(repo_id, oid).await.unwrap_or(false) { + continue; + } + let keys: Vec = dids.iter().filter_map(|d| did_to_key(d)).collect(); + if keys.is_empty() { + tracing::warn!(oid = %oid, "no resolvable recipient keys; skipping encrypted pin"); + continue; + } + let data = match crate::git::store::read_object(repo_path, oid) { + Ok(Some((_t, bytes))) => bytes, + _ => continue, + }; + let envelope = match seal_blob(&data, &keys) { + Ok(e) => e, + Err(e) => { + tracing::warn!(oid = %oid, err = %e, "seal_blob failed; skipping"); + continue; + } + }; + let cid = match crate::ipfs_pin::pin_git_object(ipfs_api, oid, &envelope).await { + Ok(c) if !c.is_empty() => c, + _ => continue, + }; + let dids_vec: Vec = dids.iter().cloned().collect(); + if let Err(e) = db.record_encrypted_blob(repo_id, oid, &cid, &dids_vec).await { + tracing::warn!(oid = %oid, err = %e, "record_encrypted_blob failed"); + } + } +} diff --git a/crates/gitlawb-node/src/git/visibility_pack.rs b/crates/gitlawb-node/src/git/visibility_pack.rs index f480d40..e32e084 100644 --- a/crates/gitlawb-node/src/git/visibility_pack.rs +++ b/crates/gitlawb-node/src/git/visibility_pack.rs @@ -87,7 +87,6 @@ pub fn replicable_objects(all: Vec, withheld: &HashSet) -> Vec Date: Wed, 10 Jun 2026 14:05:04 -0500 Subject: [PATCH 06/30] feat(node): authenticated discovery and fetch for encrypted blobs --- crates/gitlawb-node/src/api/encrypted.rs | 53 ++++++++++++++++++++++++ crates/gitlawb-node/src/api/mod.rs | 1 + crates/gitlawb-node/src/ipfs_pin.rs | 13 ++++++ crates/gitlawb-node/src/server.rs | 8 ++++ 4 files changed, 75 insertions(+) create mode 100644 crates/gitlawb-node/src/api/encrypted.rs diff --git a/crates/gitlawb-node/src/api/encrypted.rs b/crates/gitlawb-node/src/api/encrypted.rs new file mode 100644 index 0000000..8b692ab --- /dev/null +++ b/crates/gitlawb-node/src/api/encrypted.rs @@ -0,0 +1,53 @@ +//! Authenticated discovery + fetch for encrypted withheld blobs (Option B1). + +use axum::extract::{Extension, Path, State}; +use axum::Json; + +use crate::auth::AuthenticatedDid; +use crate::error::{AppError, Result}; +use crate::state::AppState; + +/// GET /api/v1/repos/{owner}/{repo}/encrypted-blobs +/// Returns [{oid, cid}] for encrypted blobs the caller may decrypt. +pub async fn list_encrypted_blobs( + State(state): State, + auth: Option>, + Path((owner, repo)): Path<(String, String)>, +) -> Result> { + let caller = auth.as_ref().map(|e| e.0 .0.as_str()).unwrap_or(""); + let record = state + .db + .get_repo(&owner, &repo) + .await? + .ok_or_else(|| AppError::RepoNotFound(format!("{owner}/{repo}")))?; + let rows = state.db.list_encrypted_blobs_for(&record.id, caller).await?; + let blobs: Vec<_> = rows + .into_iter() + .map(|(oid, cid)| serde_json::json!({ "oid": oid, "cid": cid })) + .collect(); + Ok(Json(serde_json::json!({ "blobs": blobs }))) +} + +/// GET /api/v1/repos/{owner}/{repo}/encrypted-blob/{oid} +/// Returns raw envelope bytes if the caller is a recipient. +pub async fn get_encrypted_blob( + State(state): State, + auth: Option>, + Path((owner, repo, oid)): Path<(String, String, String)>, +) -> Result> { + let caller = auth.as_ref().map(|e| e.0 .0.as_str()).unwrap_or(""); + let record = state + .db + .get_repo(&owner, &repo) + .await? + .ok_or_else(|| AppError::RepoNotFound(format!("{owner}/{repo}")))?; + let cid = state + .db + .encrypted_blob_cid(&record.id, &oid, caller) + .await? + .ok_or_else(|| AppError::RepoNotFound(format!("{owner}/{repo}/{oid}")))?; + let bytes = crate::ipfs_pin::cat(&state.config.ipfs_api, &cid) + .await + .map_err(|e| AppError::Git(e.to_string()))?; + Ok(bytes) +} diff --git a/crates/gitlawb-node/src/api/mod.rs b/crates/gitlawb-node/src/api/mod.rs index 2595c48..7f01365 100644 --- a/crates/gitlawb-node/src/api/mod.rs +++ b/crates/gitlawb-node/src/api/mod.rs @@ -3,6 +3,7 @@ pub mod arweave; pub mod bounties; pub mod certs; pub mod changelog; +pub mod encrypted; pub mod events; pub mod ipfs; pub mod issues; diff --git a/crates/gitlawb-node/src/ipfs_pin.rs b/crates/gitlawb-node/src/ipfs_pin.rs index 96d6abd..9bdaade 100644 --- a/crates/gitlawb-node/src/ipfs_pin.rs +++ b/crates/gitlawb-node/src/ipfs_pin.rs @@ -72,6 +72,19 @@ pub async fn pin_git_object(ipfs_api: &str, sha256_hex: &str, data: &[u8]) -> Re Ok(cid) } +/// Fetch raw bytes for a CID from the local Kubo node (`/api/v0/cat`). +pub async fn cat(ipfs_api: &str, cid: &str) -> Result> { + if ipfs_api.is_empty() { + return Err(anyhow::anyhow!("IPFS not configured")); + } + let url = format!("{}/api/v0/cat?arg={}", ipfs_api.trim_end_matches('/'), cid); + let resp = reqwest::Client::new().post(&url).send().await?; + if !resp.status().is_success() { + return Err(anyhow::anyhow!("ipfs cat {cid}: {}", resp.status())); + } + Ok(resp.bytes().await?.to_vec()) +} + /// List all git objects in the given bare repo and pin any that are not yet /// recorded in `pinned_cids`. /// diff --git a/crates/gitlawb-node/src/server.rs b/crates/gitlawb-node/src/server.rs index 9baea20..9d643b9 100644 --- a/crates/gitlawb-node/src/server.rs +++ b/crates/gitlawb-node/src/server.rs @@ -356,6 +356,14 @@ pub fn build_router(state: AppState) -> Router { "/api/v1/repos/{owner}/{repo}/withheld-paths", axum::routing::get(visibility::withheld_paths), ) + .route( + "/api/v1/repos/{owner}/{repo}/encrypted-blobs", + axum::routing::get(crate::api::encrypted::list_encrypted_blobs), + ) + .route( + "/api/v1/repos/{owner}/{repo}/encrypted-blob/{oid}", + axum::routing::get(crate::api::encrypted::get_encrypted_blob), + ) .layer(DefaultBodyLimit::disable()) .layer(RequestBodyLimitLayer::new(pack_limit)) .layer(middleware::from_fn(auth::optional_signature)); From 64c6c715143cf7530b31a1851b6d58c55e922de6 Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Wed, 10 Jun 2026 14:08:52 -0500 Subject: [PATCH 07/30] feat(gl): transparent recovery of authorized encrypted blobs on clone --- crates/gl/src/clone.rs | 152 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 152 insertions(+) diff --git a/crates/gl/src/clone.rs b/crates/gl/src/clone.rs index b5fe39d..514c0cb 100644 --- a/crates/gl/src/clone.rs +++ b/crates/gl/src/clone.rs @@ -217,6 +217,105 @@ struct WithheldPathsResponse { reinclude: Vec, } +/// After the base clone, recover encrypted blobs the caller is authorized for +/// that are missing locally: fetch the envelope, decrypt with the caller's key, +/// install as a loose object. Returns the repo-relative paths recovered. +/// Best-effort; logs and continues on any per-blob failure. +async fn recover_encrypted_blobs( + node: &str, + owner: &str, + name: &str, + dest: &Path, + keypair: &gitlawb_core::identity::Keypair, +) -> Result> { + use gitlawb_core::encrypt::open_blob; + use std::collections::HashMap; + use std::io::Write; + + let dest_str = dest.to_str().context("dest path not utf-8")?; + let client = NodeClient::new(node, Some(keypair.clone())); + + let resp = match client + .get_signed(&format!("/api/v1/repos/{owner}/{name}/encrypted-blobs")) + .await + { + Ok(r) if r.status().is_success() => r, + _ => return Ok(vec![]), + }; + let body: serde_json::Value = resp.json().await.context("parsing encrypted-blobs")?; + let blobs = body + .get("blobs") + .and_then(|b| b.as_array()) + .cloned() + .unwrap_or_default(); + if blobs.is_empty() { + return Ok(vec![]); + } + + // Map oid -> repo-relative path from the cloned tree. + let ls = Command::new("git") + .args(["-C", dest_str, "ls-tree", "-r", "HEAD"]) + .output()?; + let mut oid_to_path: HashMap = HashMap::new(); + for line in String::from_utf8_lossy(&ls.stdout).lines() { + if let Some((meta, path)) = line.split_once('\t') { + if let Some(oid) = meta.split_whitespace().nth(2) { + oid_to_path.insert(oid.to_string(), path.to_string()); + } + } + } + + let mut recovered = Vec::new(); + for entry in blobs { + let Some(oid) = entry.get("oid").and_then(|o| o.as_str()) else { + continue; + }; + // Skip if already present locally. + let present = Command::new("git") + .args(["-C", dest_str, "cat-file", "-e", oid]) + .status() + .map(|s| s.success()) + .unwrap_or(false); + if present { + continue; + } + let env_resp = match client + .get_signed(&format!("/api/v1/repos/{owner}/{name}/encrypted-blob/{oid}")) + .await + { + Ok(r) if r.status().is_success() => r, + _ => continue, + }; + let Ok(envelope) = env_resp.bytes().await else { + continue; + }; + let plaintext = match open_blob(&envelope, keypair) { + Ok(p) => p, + Err(e) => { + eprintln!("warning: could not decrypt {oid}: {e}"); + continue; + } + }; + // Install as a loose object; verify the OID matches. + let mut child = Command::new("git") + .args(["-C", dest_str, "hash-object", "-w", "-t", "blob", "--stdin"]) + .stdin(std::process::Stdio::piped()) + .stdout(std::process::Stdio::piped()) + .spawn()?; + child.stdin.take().unwrap().write_all(&plaintext)?; + let out = child.wait_with_output()?; + let written = String::from_utf8_lossy(&out.stdout).trim().to_string(); + if written == oid { + if let Some(p) = oid_to_path.get(oid) { + recovered.push(p.clone()); + } + } else { + eprintln!("warning: recovered blob {oid} hashed to {written}; discarding"); + } + } + Ok(recovered) +} + pub async fn run(args: CloneArgs) -> Result<()> { let (url, owner, name) = parse_repo(&args.repo)?; let dest_name = args.dir.unwrap_or_else(|| name.clone()); @@ -236,6 +335,30 @@ pub async fn run(args: CloneArgs) -> Result<()> { } setup_partial_clone(&dest, &url, &withheld, &reinclude, args.branch.as_deref())?; + + if let Ok(keypair) = load_keypair_from_dir(None) { + if let Ok(paths) = recover_encrypted_blobs(&args.node, &owner, &name, &dest, &keypair).await { + if !paths.is_empty() { + // Re-include recovered paths if this was a sparse clone, then + // materialize them in the working tree. + let spec = dest.join(".git/info/sparse-checkout"); + if spec.exists() { + if let Ok(mut s) = std::fs::read_to_string(&spec) { + for p in &paths { + s.push_str(&format!("/{p}\n")); + } + let _ = std::fs::write(&spec, s); + } + } + let _ = git(&dest, &["checkout", "--", "."]); + println!( + "Recovered {} private file(s) you are authorized to read", + paths.len() + ); + } + } + } + println!("Done. Cloned into {dest_name}"); Ok(()) } @@ -447,4 +570,33 @@ mod tests { // An extra slash would otherwise smuggle a path segment into the name. assert!(parse_repo("owner/name/extra").is_err()); } + + #[test] + fn recovered_blob_installs_with_matching_oid() { + use gitlawb_core::encrypt::{open_blob, seal_blob}; + use gitlawb_core::identity::Keypair; + let (td, url) = bare_remote(&[("public/a.txt", b"pub\n"), ("secret/b.txt", b"SECRET\n")]); + let dest = td.path().join("dest"); + setup_partial_clone(&dest, &url, &["/secret/**".to_string()], &[], None).unwrap(); + let oid = { + let out = std::process::Command::new("git") + .args(["-C", dest.to_str().unwrap(), "rev-parse", "HEAD:secret/b.txt"]) + .output() + .unwrap(); + String::from_utf8_lossy(&out.stdout).trim().to_string() + }; + let reader = Keypair::generate(); + let env = seal_blob(b"SECRET\n", &[reader.verifying_key()]).unwrap(); + let plaintext = open_blob(&env, &reader).unwrap(); + let mut child = std::process::Command::new("git") + .args(["-C", dest.to_str().unwrap(), "hash-object", "-w", "-t", "blob", "--stdin"]) + .stdin(std::process::Stdio::piped()) + .stdout(std::process::Stdio::piped()) + .spawn() + .unwrap(); + use std::io::Write; + child.stdin.take().unwrap().write_all(&plaintext).unwrap(); + let out = child.wait_with_output().unwrap(); + assert_eq!(String::from_utf8_lossy(&out.stdout).trim(), oid); + } } From 6bfade9066c94b2a2f9a0cdb2687c06fd51df693 Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Wed, 10 Jun 2026 14:11:01 -0500 Subject: [PATCH 08/30] style: cargo fmt --- crates/gitlawb-core/src/encrypt.rs | 5 +++- crates/gitlawb-node/src/api/encrypted.rs | 5 +++- crates/gitlawb-node/src/db/mod.rs | 21 +++++++++------- crates/gitlawb-node/src/encrypted_pin.rs | 5 +++- .../gitlawb-node/src/git/visibility_pack.rs | 4 +++- crates/gl/src/clone.rs | 24 +++++++++++++++---- 6 files changed, 47 insertions(+), 17 deletions(-) diff --git a/crates/gitlawb-core/src/encrypt.rs b/crates/gitlawb-core/src/encrypt.rs index aad6913..b626581 100644 --- a/crates/gitlawb-core/src/encrypt.rs +++ b/crates/gitlawb-core/src/encrypt.rs @@ -180,7 +180,10 @@ mod tests { assert_eq!(open_blob(&env, &owner).unwrap(), msg); assert_eq!(open_blob(&env, &reader_a).unwrap(), msg); - assert!(open_blob(&env, &reader_b).is_err(), "non-recipient must fail"); + assert!( + open_blob(&env, &reader_b).is_err(), + "non-recipient must fail" + ); } #[test] diff --git a/crates/gitlawb-node/src/api/encrypted.rs b/crates/gitlawb-node/src/api/encrypted.rs index 8b692ab..8374925 100644 --- a/crates/gitlawb-node/src/api/encrypted.rs +++ b/crates/gitlawb-node/src/api/encrypted.rs @@ -20,7 +20,10 @@ pub async fn list_encrypted_blobs( .get_repo(&owner, &repo) .await? .ok_or_else(|| AppError::RepoNotFound(format!("{owner}/{repo}")))?; - let rows = state.db.list_encrypted_blobs_for(&record.id, caller).await?; + let rows = state + .db + .list_encrypted_blobs_for(&record.id, caller) + .await?; let blobs: Vec<_> = rows .into_iter() .map(|(oid, cid)| serde_json::json!({ "oid": oid, "cid": cid })) diff --git a/crates/gitlawb-node/src/db/mod.rs b/crates/gitlawb-node/src/db/mod.rs index 088382d..a7fc4a7 100644 --- a/crates/gitlawb-node/src/db/mod.rs +++ b/crates/gitlawb-node/src/db/mod.rs @@ -1672,10 +1672,11 @@ impl Db { repo_id: &str, caller: &str, ) -> Result> { - let rows = sqlx::query("SELECT oid, cid, recipients FROM encrypted_blobs WHERE repo_id = $1") - .bind(repo_id) - .fetch_all(&self.pool) - .await?; + let rows = + sqlx::query("SELECT oid, cid, recipients FROM encrypted_blobs WHERE repo_id = $1") + .bind(repo_id) + .fetch_all(&self.pool) + .await?; let mut out = Vec::new(); for row in rows { let oid: String = row.get("oid"); @@ -1696,11 +1697,13 @@ impl Db { oid: &str, caller: &str, ) -> Result> { - let row = sqlx::query("SELECT cid, recipients FROM encrypted_blobs WHERE repo_id = $1 AND oid = $2") - .bind(repo_id) - .bind(oid) - .fetch_optional(&self.pool) - .await?; + let row = sqlx::query( + "SELECT cid, recipients FROM encrypted_blobs WHERE repo_id = $1 AND oid = $2", + ) + .bind(repo_id) + .bind(oid) + .fetch_optional(&self.pool) + .await?; let Some(row) = row else { return Ok(None) }; let recipients: String = row.get("recipients"); let recipients: Vec = serde_json::from_str(&recipients).unwrap_or_default(); diff --git a/crates/gitlawb-node/src/encrypted_pin.rs b/crates/gitlawb-node/src/encrypted_pin.rs index dc5c09f..b1004a0 100644 --- a/crates/gitlawb-node/src/encrypted_pin.rs +++ b/crates/gitlawb-node/src/encrypted_pin.rs @@ -52,7 +52,10 @@ pub async fn encrypt_and_pin( _ => continue, }; let dids_vec: Vec = dids.iter().cloned().collect(); - if let Err(e) = db.record_encrypted_blob(repo_id, oid, &cid, &dids_vec).await { + if let Err(e) = db + .record_encrypted_blob(repo_id, oid, &cid, &dids_vec) + .await + { tracing::warn!(oid = %oid, err = %e, "record_encrypted_blob failed"); } } diff --git a/crates/gitlawb-node/src/git/visibility_pack.rs b/crates/gitlawb-node/src/git/visibility_pack.rs index e32e084..90ca772 100644 --- a/crates/gitlawb-node/src/git/visibility_pack.rs +++ b/crates/gitlawb-node/src/git/visibility_pack.rs @@ -288,7 +288,9 @@ mod tests { use gitlawb_core::encrypt::{open_blob, seal_blob}; use gitlawb_core::identity::Keypair; let (_td, repo, secret_oid, _public) = fixture(); - let (_t, bytes) = crate::git::store::read_object(&repo, &secret_oid).unwrap().unwrap(); + let (_t, bytes) = crate::git::store::read_object(&repo, &secret_oid) + .unwrap() + .unwrap(); let reader = Keypair::generate(); let env = seal_blob(&bytes, &[reader.verifying_key()]).unwrap(); assert_eq!(open_blob(&env, &reader).unwrap(), bytes); diff --git a/crates/gl/src/clone.rs b/crates/gl/src/clone.rs index 514c0cb..7023796 100644 --- a/crates/gl/src/clone.rs +++ b/crates/gl/src/clone.rs @@ -280,7 +280,9 @@ async fn recover_encrypted_blobs( continue; } let env_resp = match client - .get_signed(&format!("/api/v1/repos/{owner}/{name}/encrypted-blob/{oid}")) + .get_signed(&format!( + "/api/v1/repos/{owner}/{name}/encrypted-blob/{oid}" + )) .await { Ok(r) if r.status().is_success() => r, @@ -337,7 +339,8 @@ pub async fn run(args: CloneArgs) -> Result<()> { setup_partial_clone(&dest, &url, &withheld, &reinclude, args.branch.as_deref())?; if let Ok(keypair) = load_keypair_from_dir(None) { - if let Ok(paths) = recover_encrypted_blobs(&args.node, &owner, &name, &dest, &keypair).await { + if let Ok(paths) = recover_encrypted_blobs(&args.node, &owner, &name, &dest, &keypair).await + { if !paths.is_empty() { // Re-include recovered paths if this was a sparse clone, then // materialize them in the working tree. @@ -580,7 +583,12 @@ mod tests { setup_partial_clone(&dest, &url, &["/secret/**".to_string()], &[], None).unwrap(); let oid = { let out = std::process::Command::new("git") - .args(["-C", dest.to_str().unwrap(), "rev-parse", "HEAD:secret/b.txt"]) + .args([ + "-C", + dest.to_str().unwrap(), + "rev-parse", + "HEAD:secret/b.txt", + ]) .output() .unwrap(); String::from_utf8_lossy(&out.stdout).trim().to_string() @@ -589,7 +597,15 @@ mod tests { let env = seal_blob(b"SECRET\n", &[reader.verifying_key()]).unwrap(); let plaintext = open_blob(&env, &reader).unwrap(); let mut child = std::process::Command::new("git") - .args(["-C", dest.to_str().unwrap(), "hash-object", "-w", "-t", "blob", "--stdin"]) + .args([ + "-C", + dest.to_str().unwrap(), + "hash-object", + "-w", + "-t", + "blob", + "--stdin", + ]) .stdin(std::process::Stdio::piped()) .stdout(std::process::Stdio::piped()) .spawn() From 6abdc42b321751179fd2e92a2fbd515125472696 Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Wed, 10 Jun 2026 14:18:23 -0500 Subject: [PATCH 09/30] fix(node): re-seal encrypted blob when recipient set changes Skip re-pinning only when an existing envelope already covers exactly the current recipients. A reader added to a rule after the first pin now gets a re-seal on the next push instead of being permanently locked out. Reader removal stays non-retroactive (the old envelope is already public). --- crates/gitlawb-node/src/db/mod.rs | 26 ++++++++++++++++-------- crates/gitlawb-node/src/encrypted_pin.rs | 11 ++++++++-- 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/crates/gitlawb-node/src/db/mod.rs b/crates/gitlawb-node/src/db/mod.rs index a7fc4a7..f1cdba3 100644 --- a/crates/gitlawb-node/src/db/mod.rs +++ b/crates/gitlawb-node/src/db/mod.rs @@ -1714,15 +1714,23 @@ impl Db { } } - /// Whether an encrypted blob row exists (recipient-agnostic), to avoid - /// re-pinning on subsequent pushes. - pub async fn has_encrypted_blob(&self, repo_id: &str, oid: &str) -> Result { - let row = sqlx::query("SELECT 1 AS x FROM encrypted_blobs WHERE repo_id = $1 AND oid = $2") - .bind(repo_id) - .bind(oid) - .fetch_optional(&self.pool) - .await?; - Ok(row.is_some()) + /// The recipient DID list stored for an encrypted blob, or None if there is + /// no row. Used to decide whether a re-seal is needed (recipients changed). + pub async fn encrypted_blob_recipients( + &self, + repo_id: &str, + oid: &str, + ) -> Result>> { + let row = + sqlx::query("SELECT recipients FROM encrypted_blobs WHERE repo_id = $1 AND oid = $2") + .bind(repo_id) + .bind(oid) + .fetch_optional(&self.pool) + .await?; + Ok(row.map(|r| { + let recipients: String = r.get("recipients"); + serde_json::from_str::>(&recipients).unwrap_or_default() + })) } pub async fn list_pinned_cids(&self) -> Result> { diff --git a/crates/gitlawb-node/src/encrypted_pin.rs b/crates/gitlawb-node/src/encrypted_pin.rs index b1004a0..6ca1382 100644 --- a/crates/gitlawb-node/src/encrypted_pin.rs +++ b/crates/gitlawb-node/src/encrypted_pin.rs @@ -28,8 +28,15 @@ pub async fn encrypt_and_pin( recipients: &HashMap>, ) { for (oid, dids) in recipients { - if db.has_encrypted_blob(repo_id, oid).await.unwrap_or(false) { - continue; + // Skip only if an existing envelope already covers exactly these + // recipients. If the recipient set changed (e.g. a reader was added to + // the rule), re-seal so the new reader can recover the blob. Reader + // removal is not retroactive: the old envelope is already public. + if let Ok(Some(stored)) = db.encrypted_blob_recipients(repo_id, oid).await { + let stored: BTreeSet = stored.into_iter().collect(); + if &stored == dids { + continue; + } } let keys: Vec = dids.iter().filter_map(|d| did_to_key(d)).collect(); if keys.is_empty() { From 00a4a26e97dfdb97c3556a3d1332f82ce6b8aed4 Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Wed, 10 Jun 2026 18:52:51 -0500 Subject: [PATCH 10/30] feat(node): unscoped encrypted-blob listing for replication --- crates/gitlawb-node/src/db/mod.rs | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/crates/gitlawb-node/src/db/mod.rs b/crates/gitlawb-node/src/db/mod.rs index f1cdba3..a3f5cae 100644 --- a/crates/gitlawb-node/src/db/mod.rs +++ b/crates/gitlawb-node/src/db/mod.rs @@ -1690,6 +1690,30 @@ impl Db { Ok(out) } + /// (oid, cid, recipients) for every encrypted blob in the repo, unscoped by + /// caller. This is the replication view used by peer mirrors (Option B2), + /// distinct from the recipient-scoped `list_encrypted_blobs_for`. It returns + /// only ciphertext metadata; no plaintext or key material is involved. + pub async fn list_all_encrypted_blobs( + &self, + repo_id: &str, + ) -> Result)>> { + let rows = + sqlx::query("SELECT oid, cid, recipients FROM encrypted_blobs WHERE repo_id = $1") + .bind(repo_id) + .fetch_all(&self.pool) + .await?; + let mut out = Vec::new(); + for row in rows { + let oid: String = row.get("oid"); + let cid: String = row.get("cid"); + let recipients: String = row.get("recipients"); + let recipients: Vec = serde_json::from_str(&recipients).unwrap_or_default(); + out.push((oid, cid, recipients)); + } + Ok(out) + } + /// The CID of one encrypted blob, only if `caller` is a recipient. pub async fn encrypted_blob_cid( &self, From d459d1ad86194a172c58121c5c98c772871e82d4 Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Wed, 10 Jun 2026 18:52:51 -0500 Subject: [PATCH 11/30] feat(node): encrypted-blobs/replicate endpoint for peer mirrors --- crates/gitlawb-node/src/api/encrypted.rs | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/crates/gitlawb-node/src/api/encrypted.rs b/crates/gitlawb-node/src/api/encrypted.rs index 8374925..6e19bd5 100644 --- a/crates/gitlawb-node/src/api/encrypted.rs +++ b/crates/gitlawb-node/src/api/encrypted.rs @@ -54,3 +54,27 @@ pub async fn get_encrypted_blob( .map_err(|e| AppError::Git(e.to_string()))?; Ok(bytes) } + +/// GET /api/v1/repos/{owner}/{repo}/encrypted-blobs/replicate +/// Returns [{oid, cid, recipients}] for every encrypted blob in the repo, for +/// peer-mirror replication (Option B2). Not recipient-scoped: recipient DIDs are +/// already public via the IPFS-pinned envelopes, so this exposes only ciphertext +/// metadata (content-addressed OIDs/CIDs and recipient DIDs), never plaintext. +pub async fn replicate_encrypted_blobs( + State(state): State, + Path((owner, repo)): Path<(String, String)>, +) -> Result> { + let record = state + .db + .get_repo(&owner, &repo) + .await? + .ok_or_else(|| AppError::RepoNotFound(format!("{owner}/{repo}")))?; + let rows = state.db.list_all_encrypted_blobs(&record.id).await?; + let blobs: Vec<_> = rows + .into_iter() + .map(|(oid, cid, recipients)| { + serde_json::json!({ "oid": oid, "cid": cid, "recipients": recipients }) + }) + .collect(); + Ok(Json(serde_json::json!({ "blobs": blobs }))) +} From e894130cd3f44bd8c00ac5070a9bd5a9885dc19b Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Wed, 10 Jun 2026 18:52:51 -0500 Subject: [PATCH 12/30] feat(node): route encrypted-blobs/replicate under git_read_routes --- crates/gitlawb-node/src/server.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crates/gitlawb-node/src/server.rs b/crates/gitlawb-node/src/server.rs index 9d643b9..31ce4b4 100644 --- a/crates/gitlawb-node/src/server.rs +++ b/crates/gitlawb-node/src/server.rs @@ -364,6 +364,10 @@ pub fn build_router(state: AppState) -> Router { "/api/v1/repos/{owner}/{repo}/encrypted-blob/{oid}", axum::routing::get(crate::api::encrypted::get_encrypted_blob), ) + .route( + "/api/v1/repos/{owner}/{repo}/encrypted-blobs/replicate", + axum::routing::get(crate::api::encrypted::replicate_encrypted_blobs), + ) .layer(DefaultBodyLimit::disable()) .layer(RequestBodyLimitLayer::new(pack_limit)) .layer(middleware::from_fn(auth::optional_signature)); From d673b741a6bd954ce84f1860080d8bfb605b3542 Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Wed, 10 Jun 2026 19:13:10 -0500 Subject: [PATCH 13/30] feat(node): peer mirrors replicate encrypted withheld blobs (Option B2) --- crates/gitlawb-node/src/sync.rs | 196 ++++++++++++++++++++++++++++++++ 1 file changed, 196 insertions(+) diff --git a/crates/gitlawb-node/src/sync.rs b/crates/gitlawb-node/src/sync.rs index f1ffecc..0434e0a 100644 --- a/crates/gitlawb-node/src/sync.rs +++ b/crates/gitlawb-node/src/sync.rs @@ -12,6 +12,7 @@ //! 5. On success, register ourselves as a replica with the origin node so //! its `replica_count` reflects reality (best-effort, idempotent). +use std::collections::HashMap; use std::path::Path; use std::sync::Arc; @@ -47,6 +48,52 @@ fn classify_mirror(withheld: Option>) -> MirrorMode { } } +/// One encrypted blob as advertised by an origin's `encrypted-blobs/replicate` +/// endpoint (Option B2). Ciphertext metadata only. +#[derive(Debug, Clone, PartialEq, Eq, serde::Deserialize)] +struct ReplicaBlob { + oid: String, + cid: String, + #[serde(default)] + recipients: Vec, +} + +/// The shape of the `encrypted-blobs/replicate` JSON response. +#[derive(Debug, serde::Deserialize)] +struct ReplicateResponse { + #[serde(default)] + blobs: Vec, +} + +/// Decide which of the origin's encrypted blobs this mirror must (re)replicate. +/// +/// `have` maps each already-stored blob's oid to its stored recipient DIDs. A +/// remote blob is returned when the mirror has no row for that oid, or when the +/// stored recipient set differs from the remote one (the origin re-sealed after a +/// reader-set change; same semantics as B1). Recipient order is ignored. +fn blobs_needing_replication( + remote: &[ReplicaBlob], + have: &HashMap>, +) -> Vec { + remote + .iter() + .filter(|b| match have.get(&b.oid) { + None => true, + Some(stored) => !same_recipients(stored, &b.recipients), + }) + .cloned() + .collect() +} + +/// Order-insensitive equality of two recipient DID lists. +fn same_recipients(a: &[String], b: &[String]) -> bool { + let mut a: Vec<&String> = a.iter().collect(); + let mut b: Vec<&String> = b.iter().collect(); + a.sort(); + b.sort(); + a == b +} + /// Start the background sync worker. Returns immediately; the worker runs /// as a detached tokio task that exits cleanly when `shutdown_rx` flips /// to `true`. @@ -167,6 +214,20 @@ async fn process_batch( machine_id, ) .await; + // Option B2: carry the encrypted withheld-blob envelopes too, so an + // authorized reader can recover private content from this mirror if + // the origin dies. `item.repo` is the slug "{owner_short}/{name}", + // which is the id upsert_mirror_repo wrote (the local repo_id). + replicate_encrypted_blobs( + client, + &origin_url, + owner_short, + repo_name, + db, + &item.repo, + &config.ipfs_api, + ) + .await; let _ = db.mark_sync_done(&item.id).await; crate::metrics::record_sync_processed("done"); @@ -277,6 +338,86 @@ async fn register_replica_with_origin( } } +/// Replicate the origin's encrypted withheld blobs onto this mirror (Option B2). +/// +/// After the git objects are mirrored, fetch the origin's replication listing, +/// then for each blob the mirror does not already hold (or whose recipients +/// changed) pull the ciphertext envelope over IPFS, pin it locally, and record +/// the `encrypted_blobs` row keyed by this mirror's local `repo_id`. +/// +/// Best-effort and idempotent: any per-blob failure is logged and skipped, to be +/// retried on the next sync. Confidentiality is never at risk; the mirror only +/// ever handles ciphertext and never decrypts. Cleanly a no-op when IPFS is +/// unconfigured, the origin reports no encrypted blobs, or the replicate endpoint +/// is absent (older peer) or unreachable. +async fn replicate_encrypted_blobs( + client: &reqwest::Client, + origin_url: &str, + owner: &str, + repo: &str, + db: &Db, + repo_id: &str, + ipfs_api: &str, +) { + if ipfs_api.is_empty() { + return; + } + + let url = format!("{origin_url}/api/v1/repos/{owner}/{repo}/encrypted-blobs/replicate"); + let resp = match client.get(&url).send().await { + Ok(r) if r.status().is_success() => r, + _ => return, + }; + let parsed: ReplicateResponse = match resp.json().await { + Ok(p) => p, + Err(e) => { + warn!(repo = %repo, err = %e, "failed to parse encrypted-blobs/replicate response"); + return; + } + }; + if parsed.blobs.is_empty() { + return; + } + + let have: HashMap> = match db.list_all_encrypted_blobs(repo_id).await { + Ok(rows) => rows + .into_iter() + .map(|(oid, _cid, recipients)| (oid, recipients)) + .collect(), + Err(e) => { + warn!(repo = %repo, err = %e, "failed to list local encrypted blobs for replication"); + return; + } + }; + + for blob in blobs_needing_replication(&parsed.blobs, &have) { + let envelope = match crate::ipfs_pin::cat(ipfs_api, &blob.cid).await { + Ok(bytes) => bytes, + Err(e) => { + warn!(oid = %blob.oid, cid = %blob.cid, err = %e, "failed to fetch encrypted envelope over IPFS; will retry next sync"); + continue; + } + }; + match crate::ipfs_pin::pin_git_object(ipfs_api, &blob.oid, &envelope).await { + Ok(cid) if !cid.is_empty() => { + if cid != blob.cid { + warn!(oid = %blob.oid, expected = %blob.cid, got = %cid, "replicated envelope CID mismatch; skipping record"); + continue; + } + if let Err(e) = db + .record_encrypted_blob(repo_id, &blob.oid, &cid, &blob.recipients) + .await + { + warn!(oid = %blob.oid, err = %e, "failed to record replicated encrypted blob"); + } + } + _ => { + warn!(oid = %blob.oid, "failed to pin replicated encrypted envelope; will retry next sync"); + } + } + } +} + /// Run a git subprocess, returning an error with stderr on non-zero exit. async fn git_run(args: &[&str]) -> anyhow::Result<()> { let out = tokio::process::Command::new("git") @@ -423,6 +564,61 @@ mod tests { assert!(matches!(mode, MirrorMode::Plain)); } + fn rb(oid: &str, cid: &str, recipients: &[&str]) -> ReplicaBlob { + ReplicaBlob { + oid: oid.to_string(), + cid: cid.to_string(), + recipients: recipients.iter().map(|s| s.to_string()).collect(), + } + } + + #[test] + fn replicate_stores_new_blob() { + let remote = vec![rb("oid1", "cidA", &["did:key:zA"])]; + let have = HashMap::new(); + assert_eq!(blobs_needing_replication(&remote, &have), remote); + } + + #[test] + fn replicate_skips_already_present_same_recipients() { + let remote = vec![rb("oid1", "cidA", &["did:key:zA", "did:key:zB"])]; + let mut have = HashMap::new(); + // stored in a different order: must still count as present + have.insert( + "oid1".to_string(), + vec!["did:key:zB".to_string(), "did:key:zA".to_string()], + ); + assert!(blobs_needing_replication(&remote, &have).is_empty()); + } + + #[test] + fn replicate_restores_on_recipient_change() { + let remote = vec![rb("oid1", "cidB", &["did:key:zA", "did:key:zC"])]; + let mut have = HashMap::new(); + have.insert("oid1".to_string(), vec!["did:key:zA".to_string()]); + assert_eq!(blobs_needing_replication(&remote, &have), remote); + } + + #[test] + fn replicate_empty_remote_is_noop() { + assert!(blobs_needing_replication(&[], &HashMap::new()).is_empty()); + } + + #[test] + fn replicate_response_parses() { + let json = r#"{"blobs":[{"oid":"o1","cid":"c1","recipients":["did:key:zA"]}]}"#; + let parsed: ReplicateResponse = serde_json::from_str(json).unwrap(); + assert_eq!(parsed.blobs.len(), 1); + assert_eq!(parsed.blobs[0].oid, "o1"); + assert_eq!(parsed.blobs[0].recipients, vec!["did:key:zA".to_string()]); + } + + #[test] + fn replicate_response_empty_blobs_parses() { + let parsed: ReplicateResponse = serde_json::from_str(r#"{"blobs":[]}"#).unwrap(); + assert!(parsed.blobs.is_empty()); + } + fn g(args: &[&str], dir: &Path) { assert!(Command::new("git") .args(args) From 2a1cce317efaf0f9679adba47c221bf31f321bb1 Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Wed, 10 Jun 2026 20:51:09 -0500 Subject: [PATCH 14/30] feat(node): anchor_encrypted_manifest for Option B3 Arweave manifests --- crates/gitlawb-node/src/arweave.rs | 152 +++++++++++++++++++++++++++++ 1 file changed, 152 insertions(+) diff --git a/crates/gitlawb-node/src/arweave.rs b/crates/gitlawb-node/src/arweave.rs index a88f31f..5027bc8 100644 --- a/crates/gitlawb-node/src/arweave.rs +++ b/crates/gitlawb-node/src/arweave.rs @@ -103,6 +103,99 @@ pub async fn anchor_ref_update( Ok(tx_id) } +/// A per-push manifest of the blobs encrypted this push (Option B3). The +/// `blobs` slice is `(oid, cid, recipients)` tuples. Anchored directly to +/// Arweave as its JSON body so the discovery index survives total node loss. +pub struct EncryptedManifest<'a> { + pub repo: &'a str, + pub owner_did: &'a str, + pub node_did: &'a str, + pub timestamp: &'a str, + pub blobs: &'a [(String, String, Vec)], +} + +/// Anchor a per-push encrypted-blob manifest to Arweave via Irys. The manifest +/// JSON body is the payload (not a CID pointer to IPFS), so the index is +/// permanent and self-contained. Recipient DIDs are already public via the +/// pinned envelopes, so the manifest carries no new secret. +/// +/// Returns the Irys/Arweave transaction ID, or `Ok("")` when `irys_url` is empty +/// (anchoring disabled) or there are no blobs to anchor. +pub async fn anchor_encrypted_manifest( + client: &reqwest::Client, + irys_url: &str, + manifest: &EncryptedManifest<'_>, +) -> Result { + if irys_url.is_empty() || manifest.blobs.is_empty() { + return Ok(String::new()); + } + + let blobs_json: Vec = manifest + .blobs + .iter() + .map(|(oid, cid, recipients)| json!({ "oid": oid, "cid": cid, "recipients": recipients })) + .collect(); + + let payload = json!({ + "schema": "gitlawb/encrypted-manifest/v1", + "repo": manifest.repo, + "owner_did": manifest.owner_did, + "node_did": manifest.node_did, + "timestamp": manifest.timestamp, + "blobs": blobs_json, + }); + + let body = serde_json::to_vec(&payload)?; + let url = format!("{}/upload", irys_url.trim_end_matches('/')); + + let resp = client + .post(&url) + .header("Content-Type", "application/json") + .header("x-irys-tags", build_manifest_tags_header(manifest)) + .body(body) + .send() + .await + .map_err(|e| anyhow::anyhow!("Irys upload failed: {e}"))?; + + if !resp.status().is_success() { + let status = resp.status(); + let body = resp.text().await.unwrap_or_default(); + return Err(anyhow::anyhow!("Irys returned {status}: {body}")); + } + + let json: serde_json::Value = resp + .json() + .await + .map_err(|e| anyhow::anyhow!("failed to parse Irys response: {e}"))?; + + let tx_id = json["id"] + .as_str() + .ok_or_else(|| anyhow::anyhow!("no 'id' in Irys response: {json}"))? + .to_string(); + + tracing::info!( + repo = %manifest.repo, + tx_id = %tx_id, + blobs = manifest.blobs.len(), + "anchored encrypted manifest to Arweave" + ); + + Ok(tx_id) +} + +/// Build the Irys tag header for an encrypted-blob manifest. `Repo` and `Schema` +/// are the tags the `gl` recovery query filters on. +fn build_manifest_tags_header(manifest: &EncryptedManifest<'_>) -> String { + [ + "App-Name:gitlawb".to_string(), + "Schema:gitlawb/encrypted-manifest/v1".to_string(), + format!("Repo:{}", sanitize_tag(manifest.repo)), + format!("Owner-DID:{}", sanitize_tag(manifest.owner_did)), + format!("Node-DID:{}", sanitize_tag(manifest.node_did)), + ] + .join(",") +} + /// Arweave permanent URL for a given Irys transaction ID. pub fn arweave_url(tx_id: &str) -> String { format!("https://arweave.net/{tx_id}") @@ -193,6 +286,65 @@ mod tests { ); } + #[tokio::test] + async fn test_manifest_anchor_noop_when_url_empty() { + let client = reqwest::Client::new(); + let blobs = vec![("oid1".to_string(), "cid1".to_string(), vec!["did:key:zA".to_string()])]; + let m = EncryptedManifest { + repo: "alice/r", + owner_did: "did:key:zO", + node_did: "did:key:zN", + timestamp: "2026-06-11T00:00:00Z", + blobs: &blobs, + }; + assert_eq!(anchor_encrypted_manifest(&client, "", &m).await.unwrap(), ""); + } + + #[tokio::test] + async fn test_manifest_anchor_noop_when_no_blobs() { + let client = reqwest::Client::new(); + let blobs: Vec<(String, String, Vec)> = vec![]; + let m = EncryptedManifest { + repo: "alice/r", + owner_did: "did:key:zO", + node_did: "did:key:zN", + timestamp: "2026-06-11T00:00:00Z", + blobs: &blobs, + }; + // Non-empty URL, but no blobs: still a no-op. + assert_eq!( + anchor_encrypted_manifest(&client, "https://example.invalid", &m) + .await + .unwrap(), + "" + ); + } + + #[tokio::test] + async fn test_manifest_anchor_success() { + let mut server = mockito::Server::new_async().await; + let _mock = server + .mock("POST", "/upload") + .with_status(200) + .with_header("content-type", "application/json") + .with_body(r#"{"id":"MANIFESTTX123","timestamp":1710000000000,"version":"1.0.0"}"#) + .create_async() + .await; + + let client = reqwest::Client::new(); + let blobs = vec![("oid1".to_string(), "cid1".to_string(), vec!["did:key:zA".to_string()])]; + let m = EncryptedManifest { + repo: "alice/r", + owner_did: "did:key:zO", + node_did: "did:key:zN", + timestamp: "2026-06-11T00:00:00Z", + blobs: &blobs, + }; + let r = anchor_encrypted_manifest(&client, &server.url(), &m).await; + assert_eq!(r.unwrap(), "MANIFESTTX123"); + _mock.assert_async().await; + } + #[test] fn test_sanitize_tag() { assert_eq!(sanitize_tag("alice/myrepo"), "alice/myrepo"); From 716eb31034802495fb2afe4dff604019f0b8e7b8 Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Wed, 10 Jun 2026 20:52:14 -0500 Subject: [PATCH 15/30] feat(node): encrypt_and_pin returns the per-push sealed delta --- crates/gitlawb-node/src/encrypted_pin.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/crates/gitlawb-node/src/encrypted_pin.rs b/crates/gitlawb-node/src/encrypted_pin.rs index 6ca1382..50797b5 100644 --- a/crates/gitlawb-node/src/encrypted_pin.rs +++ b/crates/gitlawb-node/src/encrypted_pin.rs @@ -20,13 +20,16 @@ fn did_to_key(did: &str) -> Option { } /// Encrypt and pin every withheld blob. `recipients` maps blob oid -> DID set. +/// Returns `(oid, cid, recipients)` for each blob actually sealed and recorded +/// this call (the per-push delta), used by Option B3 to anchor a manifest. pub async fn encrypt_and_pin( ipfs_api: &str, repo_path: &Path, db: &Db, repo_id: &str, recipients: &HashMap>, -) { +) -> Vec<(String, String, Vec)> { + let mut sealed = Vec::new(); for (oid, dids) in recipients { // Skip only if an existing envelope already covers exactly these // recipients. If the recipient set changed (e.g. a reader was added to @@ -64,6 +67,9 @@ pub async fn encrypt_and_pin( .await { tracing::warn!(oid = %oid, err = %e, "record_encrypted_blob failed"); + continue; } + sealed.push((oid.clone(), cid.clone(), dids_vec)); } + sealed } From aea893b7d65b169a5a4f5bb94c0fdcc46a4a1c7d Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Wed, 10 Jun 2026 20:54:14 -0500 Subject: [PATCH 16/30] feat(node): anchor encrypted-blob manifest on push (Option B3) --- crates/gitlawb-node/src/api/repos.rs | 42 +++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/crates/gitlawb-node/src/api/repos.rs b/crates/gitlawb-node/src/api/repos.rs index 0eb4a12..732c72f 100644 --- a/crates/gitlawb-node/src/api/repos.rs +++ b/crates/gitlawb-node/src/api/repos.rs @@ -690,6 +690,10 @@ pub async fn git_receive_pack( let repo_id = record.id.clone(); let owner_did = record.owner_did.clone(); let is_public = record.is_public; + let irys_url = state.config.irys_url.clone(); + let http_client = std::sync::Arc::clone(&state.http_client); + let node_did_str = state.node_did.to_string(); + let repo_name = record.name.clone(); tokio::spawn(async move { let pinned = crate::ipfs_pin::pin_new_objects( &ipfs_api, @@ -717,7 +721,7 @@ pub async fn git_receive_pack( }) .await; if let Ok(Ok(recipients)) = recip { - crate::encrypted_pin::encrypt_and_pin( + let delta = crate::encrypted_pin::encrypt_and_pin( &ipfs_api, &repo_path_clone, &db_clone, @@ -725,6 +729,42 @@ pub async fn git_receive_pack( &recipients, ) .await; + + // Option B3: anchor a per-push manifest of the blobs sealed + // this push to Arweave, so the oid->cid index survives total + // node loss. Best-effort; never fails the push. + if !delta.is_empty() && !irys_url.is_empty() { + let owner_short = + owner_did.split(':').next_back().unwrap_or(&owner_did); + let repo_slug = format!("{owner_short}/{repo_name}"); + let ts = chrono::Utc::now().to_rfc3339(); + let manifest = crate::arweave::EncryptedManifest { + repo: &repo_slug, + owner_did: &owner_did, + node_did: &node_did_str, + timestamp: &ts, + blobs: &delta, + }; + match crate::arweave::anchor_encrypted_manifest( + &http_client, + &irys_url, + &manifest, + ) + .await + { + Ok(tx) if !tx.is_empty() => tracing::info!( + repo = %repo_slug, + tx_id = %tx, + "anchored encrypted manifest to Arweave" + ), + Ok(_) => {} + Err(e) => tracing::warn!( + repo = %repo_slug, + err = %e, + "encrypted manifest anchor failed" + ), + } + } } } }); From 8cba97e3639398218956dec11fa049dfb9b26dbf Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Wed, 10 Jun 2026 20:56:07 -0500 Subject: [PATCH 17/30] feat(gl): Arweave/IPFS gateway recovery for encrypted blobs (Option B3) --- crates/gl/src/clone.rs | 232 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 232 insertions(+) diff --git a/crates/gl/src/clone.rs b/crates/gl/src/clone.rs index 7023796..f19eb50 100644 --- a/crates/gl/src/clone.rs +++ b/crates/gl/src/clone.rs @@ -318,6 +318,177 @@ async fn recover_encrypted_blobs( Ok(recovered) } +/// One blob entry in an Arweave-anchored encrypted manifest. The manifest also +/// carries a `recipients` field per blob, but `gl` does not need it: authorization +/// is enforced by whether `open_blob` can decrypt with the caller's key. Unknown +/// JSON fields are ignored by serde, so `recipients` is simply not declared here. +#[derive(Deserialize)] +struct ManifestBlob { + oid: String, + cid: String, +} + +/// An Arweave-anchored per-push encrypted manifest (Option B3). +#[derive(Deserialize)] +struct Manifest { + #[serde(default)] + timestamp: String, + #[serde(default)] + blobs: Vec, +} + +/// Extract transaction ids from an Arweave GraphQL `transactions` response. +fn parse_tx_ids(v: &serde_json::Value) -> Vec { + v.get("data") + .and_then(|d| d.get("transactions")) + .and_then(|t| t.get("edges")) + .and_then(|e| e.as_array()) + .map(|edges| { + edges + .iter() + .filter_map(|edge| { + edge.get("node") + .and_then(|n| n.get("id")) + .and_then(|i| i.as_str()) + .map(String::from) + }) + .collect() + }) + .unwrap_or_default() +} + +/// Merge per-push manifests into a single `oid -> cid` map, latest-wins by the +/// manifest `timestamp` (RFC3339, compared lexicographically; a later push that +/// re-sealed a blob overrides the earlier entry). +fn merge_manifests(manifests: Vec) -> std::collections::HashMap { + let mut best: std::collections::HashMap = + std::collections::HashMap::new(); // oid -> (cid, timestamp) + for m in manifests { + for b in m.blobs { + match best.get(&b.oid) { + Some((_, ts)) if ts.as_str() >= m.timestamp.as_str() => {} + _ => { + best.insert(b.oid, (b.cid, m.timestamp.clone())); + } + } + } + } + best.into_iter().map(|(oid, (cid, _))| (oid, cid)).collect() +} + +/// Option B3 fallback recovery, with no dependency on a gitlawb node API. Query +/// the Arweave gateway for this repo's encrypted manifests, merge them, and for +/// each blob still missing locally that the caller can decrypt, pull the envelope +/// from a public IPFS gateway, decrypt, and install it as a loose object. Returns +/// the repo-relative paths recovered. Best-effort; silent when gateways are +/// unreachable, leaving the clone exactly as node-based recovery left it. +async fn recover_from_arweave( + arweave_gateway: &str, + ipfs_gateway: &str, + owner: &str, + name: &str, + dest: &Path, + keypair: &gitlawb_core::identity::Keypair, +) -> Result> { + use gitlawb_core::encrypt::open_blob; + use std::collections::HashMap; + use std::io::Write; + + let dest_str = dest.to_str().context("dest path not utf-8")?; + let owner_short = owner.split(':').next_back().unwrap_or(owner); + let slug = format!("{owner_short}/{name}"); + let ag = arweave_gateway.trim_end_matches('/'); + let ig = ipfs_gateway.trim_end_matches('/'); + let client = reqwest::Client::new(); + + // 1. Discover manifest transaction ids via Arweave GraphQL. + let query = r#"query($repo:String!){transactions(tags:[{name:"App-Name",values:["gitlawb"]},{name:"Schema",values:["gitlawb/encrypted-manifest/v1"]},{name:"Repo",values:[$repo]}],first:100){edges{node{id}}}}"#; + let gql_body = serde_json::json!({ "query": query, "variables": { "repo": slug } }); + let resp = match client.post(format!("{ag}/graphql")).json(&gql_body).send().await { + Ok(r) if r.status().is_success() => r, + _ => return Ok(vec![]), + }; + let gql: serde_json::Value = match resp.json().await { + Ok(v) => v, + Err(_) => return Ok(vec![]), + }; + let tx_ids = parse_tx_ids(&gql); + if tx_ids.is_empty() { + return Ok(vec![]); + } + + // 2. Fetch and parse each manifest body, then merge latest-wins per oid. + let mut manifests = Vec::new(); + for tx in tx_ids { + let m = match client.get(format!("{ag}/{tx}")).send().await { + Ok(r) if r.status().is_success() => r, + _ => continue, + }; + if let Ok(parsed) = m.json::().await { + manifests.push(parsed); + } + } + let oid_cid = merge_manifests(manifests); + if oid_cid.is_empty() { + return Ok(vec![]); + } + + // Map oid -> repo-relative path from the cloned tree. + let ls = Command::new("git") + .args(["-C", dest_str, "ls-tree", "-r", "HEAD"]) + .output()?; + let mut oid_to_path: HashMap = HashMap::new(); + for line in String::from_utf8_lossy(&ls.stdout).lines() { + if let Some((meta, path)) = line.split_once('\t') { + if let Some(oid) = meta.split_whitespace().nth(2) { + oid_to_path.insert(oid.to_string(), path.to_string()); + } + } + } + + // 3. Recover each missing blob the caller can decrypt. + let mut recovered = Vec::new(); + for (oid, cid) in oid_cid { + let present = Command::new("git") + .args(["-C", dest_str, "cat-file", "-e", &oid]) + .status() + .map(|s| s.success()) + .unwrap_or(false); + if present { + continue; + } + let env_resp = match client.get(format!("{ig}/ipfs/{cid}")).send().await { + Ok(r) if r.status().is_success() => r, + _ => continue, + }; + let Ok(envelope) = env_resp.bytes().await else { + continue; + }; + // open_blob succeeds only if this caller is a recipient: this is the + // authorization gate (no node, no DID check needed). + let plaintext = match open_blob(&envelope, keypair) { + Ok(p) => p, + Err(_) => continue, + }; + let mut child = Command::new("git") + .args(["-C", dest_str, "hash-object", "-w", "-t", "blob", "--stdin"]) + .stdin(std::process::Stdio::piped()) + .stdout(std::process::Stdio::piped()) + .spawn()?; + child.stdin.take().unwrap().write_all(&plaintext)?; + let out = child.wait_with_output()?; + let written = String::from_utf8_lossy(&out.stdout).trim().to_string(); + if written == oid { + if let Some(p) = oid_to_path.get(&oid) { + recovered.push(p.clone()); + } + } else { + eprintln!("warning: recovered blob {oid} hashed to {written}; discarding"); + } + } + Ok(recovered) +} + pub async fn run(args: CloneArgs) -> Result<()> { let (url, owner, name) = parse_repo(&args.repo)?; let dest_name = args.dir.unwrap_or_else(|| name.clone()); @@ -554,6 +725,67 @@ mod tests { .is_err()); } + #[test] + fn parse_tx_ids_extracts_node_ids() { + let v: serde_json::Value = serde_json::from_str( + r#"{"data":{"transactions":{"edges":[{"node":{"id":"TX1"}},{"node":{"id":"TX2"}}]}}}"#, + ) + .unwrap(); + assert_eq!(parse_tx_ids(&v), vec!["TX1".to_string(), "TX2".to_string()]); + } + + #[test] + fn parse_tx_ids_empty_on_no_edges() { + let v: serde_json::Value = + serde_json::from_str(r#"{"data":{"transactions":{"edges":[]}}}"#).unwrap(); + assert!(parse_tx_ids(&v).is_empty()); + } + + #[test] + fn manifest_parses_and_ignores_recipients() { + let m: Manifest = serde_json::from_str( + r#"{"timestamp":"2026-06-11T00:00:00Z","blobs":[{"oid":"o1","cid":"c1","recipients":["did:key:zA"]}]}"#, + ) + .unwrap(); + assert_eq!(m.timestamp, "2026-06-11T00:00:00Z"); + assert_eq!(m.blobs.len(), 1); + assert_eq!(m.blobs[0].oid, "o1"); + assert_eq!(m.blobs[0].cid, "c1"); + } + + #[test] + fn merge_manifests_latest_wins_per_oid() { + let older = Manifest { + timestamp: "2026-06-10T00:00:00Z".to_string(), + blobs: vec![ManifestBlob { oid: "o1".to_string(), cid: "cidOLD".to_string() }], + }; + let newer = Manifest { + timestamp: "2026-06-11T00:00:00Z".to_string(), + blobs: vec![ + ManifestBlob { oid: "o1".to_string(), cid: "cidNEW".to_string() }, + ManifestBlob { oid: "o2".to_string(), cid: "cid2".to_string() }, + ], + }; + let merged = merge_manifests(vec![older, newer]); + assert_eq!(merged.get("o1").map(String::as_str), Some("cidNEW")); + assert_eq!(merged.get("o2").map(String::as_str), Some("cid2")); + } + + #[test] + fn merge_manifests_is_order_independent() { + let older = Manifest { + timestamp: "2026-06-10T00:00:00Z".to_string(), + blobs: vec![ManifestBlob { oid: "o1".to_string(), cid: "cidOLD".to_string() }], + }; + let newer = Manifest { + timestamp: "2026-06-11T00:00:00Z".to_string(), + blobs: vec![ManifestBlob { oid: "o1".to_string(), cid: "cidNEW".to_string() }], + }; + // Newer first, older second: newer must still win. + let merged = merge_manifests(vec![newer, older]); + assert_eq!(merged.get("o1").map(String::as_str), Some("cidNEW")); + } + #[test] fn parse_repo_accepts_url_and_bare() { let (url, o, n) = parse_repo("gitlawb://did:key:zAbc/myrepo").unwrap(); From 3fb45ec4ddf6adb0c3d17bf3bb0b3170d19788e3 Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Wed, 10 Jun 2026 20:57:25 -0500 Subject: [PATCH 18/30] feat(gl): transparent Arweave fallback recovery on clone (Option B3) --- crates/gl/src/clone.rs | 57 +++++++++++++++++++++++++++++------------- 1 file changed, 40 insertions(+), 17 deletions(-) diff --git a/crates/gl/src/clone.rs b/crates/gl/src/clone.rs index f19eb50..62e6198 100644 --- a/crates/gl/src/clone.rs +++ b/crates/gl/src/clone.rs @@ -29,6 +29,15 @@ pub struct CloneArgs { #[arg(long, default_value = "https://node.gitlawb.com", env = "GITLAWB_NODE")] pub node: String, + + /// Arweave gateway for B3 manifest discovery/fetch when a node cannot supply + /// the encrypted-blob mapping. + #[arg(long, default_value = "https://arweave.net", env = "GITLAWB_ARWEAVE_GATEWAY")] + pub arweave_gateway: String, + + /// Public IPFS gateway for fetching encrypted envelopes during B3 recovery. + #[arg(long, default_value = "https://dweb.link", env = "GITLAWB_IPFS_GATEWAY")] + pub ipfs_gateway: String, } /// Run a git command inside `dir`, erroring with stderr on failure. @@ -510,26 +519,40 @@ pub async fn run(args: CloneArgs) -> Result<()> { setup_partial_clone(&dest, &url, &withheld, &reinclude, args.branch.as_deref())?; if let Ok(keypair) = load_keypair_from_dir(None) { - if let Ok(paths) = recover_encrypted_blobs(&args.node, &owner, &name, &dest, &keypair).await - { - if !paths.is_empty() { - // Re-include recovered paths if this was a sparse clone, then - // materialize them in the working tree. - let spec = dest.join(".git/info/sparse-checkout"); - if spec.exists() { - if let Ok(mut s) = std::fs::read_to_string(&spec) { - for p in &paths { - s.push_str(&format!("/{p}\n")); - } - let _ = std::fs::write(&spec, s); + // Node-based recovery first (B1/B2), then the B3 Arweave/IPFS gateway + // fallback for any authorized blobs the node could not supply. + let mut paths = recover_encrypted_blobs(&args.node, &owner, &name, &dest, &keypair) + .await + .unwrap_or_default(); + let from_arweave = recover_from_arweave( + &args.arweave_gateway, + &args.ipfs_gateway, + &owner, + &name, + &dest, + &keypair, + ) + .await + .unwrap_or_default(); + paths.extend(from_arweave); + + if !paths.is_empty() { + // Re-include recovered paths if this was a sparse clone, then + // materialize them in the working tree. + let spec = dest.join(".git/info/sparse-checkout"); + if spec.exists() { + if let Ok(mut s) = std::fs::read_to_string(&spec) { + for p in &paths { + s.push_str(&format!("/{p}\n")); } + let _ = std::fs::write(&spec, s); } - let _ = git(&dest, &["checkout", "--", "."]); - println!( - "Recovered {} private file(s) you are authorized to read", - paths.len() - ); } + let _ = git(&dest, &["checkout", "--", "."]); + println!( + "Recovered {} private file(s) you are authorized to read", + paths.len() + ); } } From 02df8cb639a76ba9127637502569cb750bc49a2d Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Wed, 10 Jun 2026 20:58:13 -0500 Subject: [PATCH 19/30] style: cargo fmt --- crates/gitlawb-node/src/api/repos.rs | 3 +- crates/gitlawb-node/src/arweave.rs | 17 +++++++++-- crates/gl/src/clone.rs | 44 +++++++++++++++++++++++----- 3 files changed, 51 insertions(+), 13 deletions(-) diff --git a/crates/gitlawb-node/src/api/repos.rs b/crates/gitlawb-node/src/api/repos.rs index 732c72f..6fa028d 100644 --- a/crates/gitlawb-node/src/api/repos.rs +++ b/crates/gitlawb-node/src/api/repos.rs @@ -734,8 +734,7 @@ pub async fn git_receive_pack( // this push to Arweave, so the oid->cid index survives total // node loss. Best-effort; never fails the push. if !delta.is_empty() && !irys_url.is_empty() { - let owner_short = - owner_did.split(':').next_back().unwrap_or(&owner_did); + let owner_short = owner_did.split(':').next_back().unwrap_or(&owner_did); let repo_slug = format!("{owner_short}/{repo_name}"); let ts = chrono::Utc::now().to_rfc3339(); let manifest = crate::arweave::EncryptedManifest { diff --git a/crates/gitlawb-node/src/arweave.rs b/crates/gitlawb-node/src/arweave.rs index 5027bc8..c6cdd3d 100644 --- a/crates/gitlawb-node/src/arweave.rs +++ b/crates/gitlawb-node/src/arweave.rs @@ -289,7 +289,11 @@ mod tests { #[tokio::test] async fn test_manifest_anchor_noop_when_url_empty() { let client = reqwest::Client::new(); - let blobs = vec![("oid1".to_string(), "cid1".to_string(), vec!["did:key:zA".to_string()])]; + let blobs = vec![( + "oid1".to_string(), + "cid1".to_string(), + vec!["did:key:zA".to_string()], + )]; let m = EncryptedManifest { repo: "alice/r", owner_did: "did:key:zO", @@ -297,7 +301,10 @@ mod tests { timestamp: "2026-06-11T00:00:00Z", blobs: &blobs, }; - assert_eq!(anchor_encrypted_manifest(&client, "", &m).await.unwrap(), ""); + assert_eq!( + anchor_encrypted_manifest(&client, "", &m).await.unwrap(), + "" + ); } #[tokio::test] @@ -332,7 +339,11 @@ mod tests { .await; let client = reqwest::Client::new(); - let blobs = vec![("oid1".to_string(), "cid1".to_string(), vec!["did:key:zA".to_string()])]; + let blobs = vec![( + "oid1".to_string(), + "cid1".to_string(), + vec!["did:key:zA".to_string()], + )]; let m = EncryptedManifest { repo: "alice/r", owner_did: "did:key:zO", diff --git a/crates/gl/src/clone.rs b/crates/gl/src/clone.rs index 62e6198..40a0ee3 100644 --- a/crates/gl/src/clone.rs +++ b/crates/gl/src/clone.rs @@ -32,11 +32,19 @@ pub struct CloneArgs { /// Arweave gateway for B3 manifest discovery/fetch when a node cannot supply /// the encrypted-blob mapping. - #[arg(long, default_value = "https://arweave.net", env = "GITLAWB_ARWEAVE_GATEWAY")] + #[arg( + long, + default_value = "https://arweave.net", + env = "GITLAWB_ARWEAVE_GATEWAY" + )] pub arweave_gateway: String, /// Public IPFS gateway for fetching encrypted envelopes during B3 recovery. - #[arg(long, default_value = "https://dweb.link", env = "GITLAWB_IPFS_GATEWAY")] + #[arg( + long, + default_value = "https://dweb.link", + env = "GITLAWB_IPFS_GATEWAY" + )] pub ipfs_gateway: String, } @@ -413,7 +421,12 @@ async fn recover_from_arweave( // 1. Discover manifest transaction ids via Arweave GraphQL. let query = r#"query($repo:String!){transactions(tags:[{name:"App-Name",values:["gitlawb"]},{name:"Schema",values:["gitlawb/encrypted-manifest/v1"]},{name:"Repo",values:[$repo]}],first:100){edges{node{id}}}}"#; let gql_body = serde_json::json!({ "query": query, "variables": { "repo": slug } }); - let resp = match client.post(format!("{ag}/graphql")).json(&gql_body).send().await { + let resp = match client + .post(format!("{ag}/graphql")) + .json(&gql_body) + .send() + .await + { Ok(r) if r.status().is_success() => r, _ => return Ok(vec![]), }; @@ -780,13 +793,22 @@ mod tests { fn merge_manifests_latest_wins_per_oid() { let older = Manifest { timestamp: "2026-06-10T00:00:00Z".to_string(), - blobs: vec![ManifestBlob { oid: "o1".to_string(), cid: "cidOLD".to_string() }], + blobs: vec![ManifestBlob { + oid: "o1".to_string(), + cid: "cidOLD".to_string(), + }], }; let newer = Manifest { timestamp: "2026-06-11T00:00:00Z".to_string(), blobs: vec![ - ManifestBlob { oid: "o1".to_string(), cid: "cidNEW".to_string() }, - ManifestBlob { oid: "o2".to_string(), cid: "cid2".to_string() }, + ManifestBlob { + oid: "o1".to_string(), + cid: "cidNEW".to_string(), + }, + ManifestBlob { + oid: "o2".to_string(), + cid: "cid2".to_string(), + }, ], }; let merged = merge_manifests(vec![older, newer]); @@ -798,11 +820,17 @@ mod tests { fn merge_manifests_is_order_independent() { let older = Manifest { timestamp: "2026-06-10T00:00:00Z".to_string(), - blobs: vec![ManifestBlob { oid: "o1".to_string(), cid: "cidOLD".to_string() }], + blobs: vec![ManifestBlob { + oid: "o1".to_string(), + cid: "cidOLD".to_string(), + }], }; let newer = Manifest { timestamp: "2026-06-11T00:00:00Z".to_string(), - blobs: vec![ManifestBlob { oid: "o1".to_string(), cid: "cidNEW".to_string() }], + blobs: vec![ManifestBlob { + oid: "o1".to_string(), + cid: "cidNEW".to_string(), + }], }; // Newer first, older second: newer must still win. let merged = merge_manifests(vec![newer, older]); From a0e56688b777ff08e7d936a59c8cfb07eaa31970 Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Wed, 10 Jun 2026 21:04:59 -0500 Subject: [PATCH 20/30] fix(gl): bound Arweave recovery gateway requests with a 30s timeout --- crates/gl/src/clone.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/crates/gl/src/clone.rs b/crates/gl/src/clone.rs index 40a0ee3..0631608 100644 --- a/crates/gl/src/clone.rs +++ b/crates/gl/src/clone.rs @@ -416,7 +416,13 @@ async fn recover_from_arweave( let slug = format!("{owner_short}/{name}"); let ag = arweave_gateway.trim_end_matches('/'); let ig = ipfs_gateway.trim_end_matches('/'); - let client = reqwest::Client::new(); + // Bound every gateway request: this runs on every clone, so a slow or hung + // public gateway must not stall it. Best-effort recovery, so a timeout just + // skips the affected blob. + let client = reqwest::Client::builder() + .timeout(std::time::Duration::from_secs(30)) + .build() + .unwrap_or_else(|_| reqwest::Client::new()); // 1. Discover manifest transaction ids via Arweave GraphQL. let query = r#"query($repo:String!){transactions(tags:[{name:"App-Name",values:["gitlawb"]},{name:"Schema",values:["gitlawb/encrypted-manifest/v1"]},{name:"Repo",values:[$repo]}],first:100){edges{node{id}}}}"#; From 6a764d7253bdac1ba3e2c2b8a3dd7f32a1862360 Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Thu, 11 Jun 2026 01:39:21 -0500 Subject: [PATCH 21/30] test(gl): read-path recovery tests for Arweave fallback; quiet promisor present-check Add two hermetic integration tests for recover_from_arweave that drive the full read path over mocked Arweave GraphQL + IPFS gateways: discover the manifest, fetch it, fetch the envelope, decrypt, and install the withheld blob. One covers an authorized recipient (blob installed), the other a non-recipient (nothing recovered). Both simulate origin death by removing the promisor remote and enable uploadpack.allowFilter so the blob is truly withheld over file://. Also harden the local presence check in recover_from_arweave with GIT_NO_LAZY_FETCH=1 and .output() so the expected 'missing object' case does not trigger a wasted promisor fetch or leak git stderr to the user. --- crates/gl/src/clone.rs | 196 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 194 insertions(+), 2 deletions(-) diff --git a/crates/gl/src/clone.rs b/crates/gl/src/clone.rs index 0631608..ddc1e2d 100644 --- a/crates/gl/src/clone.rs +++ b/crates/gl/src/clone.rs @@ -477,10 +477,15 @@ async fn recover_from_arweave( // 3. Recover each missing blob the caller can decrypt. let mut recovered = Vec::new(); for (oid, cid) in oid_cid { + // Local presence check. GIT_NO_LAZY_FETCH stops git from making a wasted + // promisor fetch attempt (we are recovering precisely because the promisor + // cannot supply the blob), and `.output()` captures git's "missing object" + // stderr so that expected case does not leak a confusing error to the user. let present = Command::new("git") .args(["-C", dest_str, "cat-file", "-e", &oid]) - .status() - .map(|s| s.success()) + .env("GIT_NO_LAZY_FETCH", "1") + .output() + .map(|o| o.status.success()) .unwrap_or(false); if present { continue; @@ -843,6 +848,193 @@ mod tests { assert_eq!(merged.get("o1").map(String::as_str), Some("cidNEW")); } + /// Read-path end-to-end over a mocked Arweave + IPFS gateway: discover the + /// manifest via GraphQL, fetch it, fetch the envelope, decrypt with the + /// caller's key, and install the previously-withheld blob. + #[tokio::test] + async fn recover_from_arweave_installs_authorized_blob() { + use gitlawb_core::encrypt::seal_blob; + use gitlawb_core::identity::Keypair; + + let (td, url) = bare_remote(&[("public/a.txt", b"pub\n"), ("secret/b.txt", b"SECRET\n")]); + let dest = td.path().join("dest"); + // Make the bare honor `--filter=blob:none` over file:// so the withheld + // blob is genuinely omitted from the local store, not just unchecked-out. + let bare = url.strip_prefix("file://").unwrap(); + assert!(Command::new("git") + .args(["-C", bare, "config", "uploadpack.allowFilter", "true"]) + .status() + .unwrap() + .success()); + setup_partial_clone(&dest, &url, &["/secret/**".to_string()], &[], None).unwrap(); + assert!( + !dest.join("secret/b.txt").exists(), + "secret starts withheld" + ); + + let oid = { + let out = Command::new("git") + .args([ + "-C", + dest.to_str().unwrap(), + "rev-parse", + "HEAD:secret/b.txt", + ]) + .output() + .unwrap(); + String::from_utf8_lossy(&out.stdout).trim().to_string() + }; + + // Simulate origin death: drop the promisor remote so `cat-file -e` cannot + // lazily fetch the withheld blob. This is exactly the B3 premise (the node + // can no longer serve it), and forces recovery to go through Arweave/IPFS. + std::fs::remove_dir_all(url.strip_prefix("file://").unwrap()).unwrap(); + + let reader = Keypair::generate(); + let envelope = seal_blob(b"SECRET\n", &[reader.verifying_key()]).unwrap(); + + let cid = "testcid123"; + let mut server = mockito::Server::new_async().await; + let _gql = server + .mock("POST", "/graphql") + .with_status(200) + .with_header("content-type", "application/json") + .with_body(r#"{"data":{"transactions":{"edges":[{"node":{"id":"TX1"}}]}}}"#) + .create_async() + .await; + let manifest_body = serde_json::json!({ + "timestamp": "2026-06-11T00:00:00Z", + "blobs": [{ "oid": oid, "cid": cid, "recipients": [] }], + }) + .to_string(); + let _tx = server + .mock("GET", "/TX1") + .with_status(200) + .with_header("content-type", "application/json") + .with_body(manifest_body) + .create_async() + .await; + let _blob = server + .mock("GET", format!("/ipfs/{cid}").as_str()) + .with_status(200) + .with_body(envelope) + .create_async() + .await; + + let paths = recover_from_arweave( + &server.url(), + &server.url(), + "alice", + "myrepo", + &dest, + &reader, + ) + .await + .unwrap(); + assert_eq!(paths, vec!["secret/b.txt".to_string()]); + + let present = Command::new("git") + .args(["-C", dest.to_str().unwrap(), "cat-file", "-e", &oid]) + .env("GIT_NO_LAZY_FETCH", "1") + .output() + .unwrap() + .status + .success(); + assert!( + present, + "authorized reader's blob must be installed locally" + ); + } + + /// A caller who is not a recipient cannot decrypt the envelope, so nothing is + /// recovered even though the manifest and envelope are reachable. + #[tokio::test] + async fn recover_from_arweave_skips_unauthorized() { + use gitlawb_core::encrypt::seal_blob; + use gitlawb_core::identity::Keypair; + + let (td, url) = bare_remote(&[("public/a.txt", b"pub\n"), ("secret/b.txt", b"SECRET\n")]); + let dest = td.path().join("dest"); + let bare = url.strip_prefix("file://").unwrap(); + assert!(Command::new("git") + .args(["-C", bare, "config", "uploadpack.allowFilter", "true"]) + .status() + .unwrap() + .success()); + setup_partial_clone(&dest, &url, &["/secret/**".to_string()], &[], None).unwrap(); + + let oid = { + let out = Command::new("git") + .args([ + "-C", + dest.to_str().unwrap(), + "rev-parse", + "HEAD:secret/b.txt", + ]) + .output() + .unwrap(); + String::from_utf8_lossy(&out.stdout).trim().to_string() + }; + + // Simulate origin death (see the authorized test) so the withheld blob + // cannot be lazily fetched from the promisor remote. + std::fs::remove_dir_all(url.strip_prefix("file://").unwrap()).unwrap(); + + // Sealed to a different reader; the caller below is not a recipient. + let authorized = Keypair::generate(); + let envelope = seal_blob(b"SECRET\n", &[authorized.verifying_key()]).unwrap(); + let intruder = Keypair::generate(); + + let cid = "testcid123"; + let mut server = mockito::Server::new_async().await; + let _gql = server + .mock("POST", "/graphql") + .with_status(200) + .with_header("content-type", "application/json") + .with_body(r#"{"data":{"transactions":{"edges":[{"node":{"id":"TX1"}}]}}}"#) + .create_async() + .await; + let manifest_body = serde_json::json!({ + "timestamp": "2026-06-11T00:00:00Z", + "blobs": [{ "oid": oid, "cid": cid, "recipients": [] }], + }) + .to_string(); + let _tx = server + .mock("GET", "/TX1") + .with_status(200) + .with_header("content-type", "application/json") + .with_body(manifest_body) + .create_async() + .await; + let _blob = server + .mock("GET", format!("/ipfs/{cid}").as_str()) + .with_status(200) + .with_body(envelope) + .create_async() + .await; + + let paths = recover_from_arweave( + &server.url(), + &server.url(), + "alice", + "myrepo", + &dest, + &intruder, + ) + .await + .unwrap(); + assert!(paths.is_empty(), "non-recipient must recover nothing"); + + let present = Command::new("git") + .args(["-C", dest.to_str().unwrap(), "cat-file", "-e", &oid]) + .env("GIT_NO_LAZY_FETCH", "1") + .output() + .unwrap() + .status + .success(); + assert!(!present, "non-recipient must not install the blob"); + } + #[test] fn parse_repo_accepts_url_and_bare() { let (url, o, n) = parse_repo("gitlawb://did:key:zAbc/myrepo").unwrap(); From 668f4a3054394a4df443a97748f210edc04f36fc Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Thu, 11 Jun 2026 06:43:03 -0500 Subject: [PATCH 22/30] fix: surface corrupt recipients JSON and silent recovery I/O failures db: parse_recipients now propagates a descriptive error instead of defaulting corrupt recipients JSON to an empty list, which would have denied authorized readers and handed peers incomplete metadata. gl: clone recovery now warns when the sparse-checkout file cannot be read or written, or when the post-recovery checkout fails, instead of silently discarding those errors and claiming files were recovered. --- crates/gitlawb-node/src/db/mod.rs | 27 ++++++++++++++++++++------- crates/gl/src/clone.rs | 25 ++++++++++++++++++++----- 2 files changed, 40 insertions(+), 12 deletions(-) diff --git a/crates/gitlawb-node/src/db/mod.rs b/crates/gitlawb-node/src/db/mod.rs index a3f5cae..4a1c107 100644 --- a/crates/gitlawb-node/src/db/mod.rs +++ b/crates/gitlawb-node/src/db/mod.rs @@ -1666,6 +1666,16 @@ impl Db { Ok(()) } + /// Deserialize the stored recipients JSON. Corruption is surfaced as an + /// error rather than silently treated as an empty recipient list, which + /// would deny access to every legitimate reader and hand peers incomplete + /// replication metadata. + fn parse_recipients(repo_id: &str, oid: &str, raw: &str) -> Result> { + serde_json::from_str(raw).with_context(|| { + format!("corrupt recipients JSON in encrypted_blobs (repo_id={repo_id}, oid={oid})") + }) + } + /// (oid, cid) for every encrypted blob in the repo that `caller` may decrypt. pub async fn list_encrypted_blobs_for( &self, @@ -1682,7 +1692,7 @@ impl Db { let oid: String = row.get("oid"); let cid: String = row.get("cid"); let recipients: String = row.get("recipients"); - let recipients: Vec = serde_json::from_str(&recipients).unwrap_or_default(); + let recipients = Self::parse_recipients(repo_id, &oid, &recipients)?; if recipients.iter().any(|d| d == caller) { out.push((oid, cid)); } @@ -1708,7 +1718,7 @@ impl Db { let oid: String = row.get("oid"); let cid: String = row.get("cid"); let recipients: String = row.get("recipients"); - let recipients: Vec = serde_json::from_str(&recipients).unwrap_or_default(); + let recipients = Self::parse_recipients(repo_id, &oid, &recipients)?; out.push((oid, cid, recipients)); } Ok(out) @@ -1730,7 +1740,7 @@ impl Db { .await?; let Some(row) = row else { return Ok(None) }; let recipients: String = row.get("recipients"); - let recipients: Vec = serde_json::from_str(&recipients).unwrap_or_default(); + let recipients = Self::parse_recipients(repo_id, oid, &recipients)?; if recipients.iter().any(|d| d == caller) { Ok(Some(row.get("cid"))) } else { @@ -1751,10 +1761,13 @@ impl Db { .bind(oid) .fetch_optional(&self.pool) .await?; - Ok(row.map(|r| { - let recipients: String = r.get("recipients"); - serde_json::from_str::>(&recipients).unwrap_or_default() - })) + match row { + None => Ok(None), + Some(r) => { + let recipients: String = r.get("recipients"); + Ok(Some(Self::parse_recipients(repo_id, oid, &recipients)?)) + } + } } pub async fn list_pinned_cids(&self) -> Result> { diff --git a/crates/gl/src/clone.rs b/crates/gl/src/clone.rs index ddc1e2d..93e998d 100644 --- a/crates/gl/src/clone.rs +++ b/crates/gl/src/clone.rs @@ -565,14 +565,29 @@ pub async fn run(args: CloneArgs) -> Result<()> { // materialize them in the working tree. let spec = dest.join(".git/info/sparse-checkout"); if spec.exists() { - if let Ok(mut s) = std::fs::read_to_string(&spec) { - for p in &paths { - s.push_str(&format!("/{p}\n")); + match std::fs::read_to_string(&spec) { + Ok(mut s) => { + for p in &paths { + s.push_str(&format!("/{p}\n")); + } + if let Err(e) = std::fs::write(&spec, &s) { + eprintln!( + "warning: failed to update sparse-checkout, recovered files may not appear: {e}" + ); + } + } + Err(e) => { + eprintln!( + "warning: failed to read sparse-checkout, recovered files may not appear: {e}" + ); } - let _ = std::fs::write(&spec, s); } } - let _ = git(&dest, &["checkout", "--", "."]); + if let Err(e) = git(&dest, &["checkout", "--", "."]) { + eprintln!( + "warning: checkout after recovery failed, recovered files may not appear: {e}" + ); + } println!( "Recovered {} private file(s) you are authorized to read", paths.len() From 649ac78711b330433df3c3b5c5a1d16fb9a002b6 Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Fri, 12 Jun 2026 12:13:08 -0500 Subject: [PATCH 23/30] feat(core): blind recipient identities in withheld-blob envelopes Envelope v2 drops the cleartext recipient public key (kid) from each wrapped-key header entry, so a party holding the public IPFS/Arweave copy can no longer enumerate who is authorized to decrypt a withheld blob. The version is bumped to 2 and v1 envelopes are rejected. open_blob now selects the reader's entry by trial decryption (the AEAD tag authenticates exactly one entry) instead of matching on the public key. Recipient count is still visible; identity is not. Scope is envelope-only, the node DB recipients column and peer replication metadata are unchanged. Co-authored-by: CommandCodeBot --- crates/gitlawb-core/src/encrypt.rs | 81 +++++++++++++++++++++++------- 1 file changed, 64 insertions(+), 17 deletions(-) diff --git a/crates/gitlawb-core/src/encrypt.rs b/crates/gitlawb-core/src/encrypt.rs index b626581..72004a3 100644 --- a/crates/gitlawb-core/src/encrypt.rs +++ b/crates/gitlawb-core/src/encrypt.rs @@ -42,11 +42,10 @@ use rand::RngCore; use serde::{Deserialize, Serialize}; const MAGIC: &[u8] = b"GLENC"; -const VERSION: u8 = 1; +const VERSION: u8 = 2; #[derive(Serialize, Deserialize)] struct Recipient { - kid: String, // base64 recipient ed25519 pubkey (32B) eph: String, // base64 ephemeral x25519 pubkey (32B) nonce: String, // base64 box nonce (24B) wrap: String, // base64 wrapped content key @@ -84,7 +83,6 @@ pub fn seal_blob(plaintext: &[u8], recipients: &[VerifyingKey]) -> Result Result> { .context("decode header")?; let body = &envelope[p + hlen..]; - let my_kid = B64.encode(keypair.verifying_key().as_bytes()); let my_x = XSecret::from(x25519_secret_from_seed(&keypair.seed_bytes())); - let entry = header - .recipients - .iter() - .find(|r| r.kid == my_kid) - .context("not a recipient of this envelope")?; - let eph = XPublic::from(<[u8; 32]>::try_from(B64.decode(&entry.eph)?.as_slice())?); - let nonce = B64.decode(&entry.nonce)?; - let wrap = B64.decode(&entry.wrap)?; - let abox = ChaChaBox::new(&eph, &my_x); - let content_key = abox - .decrypt( + // Identities are blinded: no entry says which recipient it belongs to, so + // try each one. The ChaChaBox AEAD tag authenticates, so exactly the + // reader's own entry unwraps; every other entry fails cleanly. + let mut content_key: Option> = None; + for entry in &header.recipients { + let eph = match B64 + .decode(&entry.eph) + .ok() + .and_then(|b| <[u8; 32]>::try_from(b.as_slice()).ok()) + { + Some(b) => XPublic::from(b), + None => continue, + }; + let nonce = match B64.decode(&entry.nonce) { + Ok(n) => n, + Err(_) => continue, + }; + let wrap = match B64.decode(&entry.wrap) { + Ok(w) => w, + Err(_) => continue, + }; + let abox = ChaChaBox::new(&eph, &my_x); + if let Ok(ck) = abox.decrypt( crypto_box::aead::generic_array::GenericArray::from_slice(&nonce), wrap.as_slice(), - ) - .map_err(|_| anyhow::anyhow!("content-key unwrap failed"))?; + ) { + content_key = Some(ck); + break; + } + } + let content_key = content_key.context("not a recipient of this envelope")?; let body_cipher = XChaCha20Poly1305::new_from_slice(&content_key) .map_err(|e| anyhow::anyhow!("content key: {e}"))?; @@ -194,4 +207,38 @@ mod tests { env[last] ^= 0x01; assert!(open_blob(&env, &owner).is_err()); } + + #[test] + fn v2_header_contains_no_recipient_pubkey() { + // The blinded envelope header must not carry any recipient's public key. + let reader = Keypair::generate(); + let env = seal_blob(b"private blob contents", &[reader.verifying_key()]).unwrap(); + + // Slice out the header bytes using the envelope framing: + // MAGIC | version(1B) | header_len(4B LE) | header_json | body + let mut p = MAGIC.len() + 1; // skip MAGIC + version byte + let hlen = u32::from_le_bytes(env[p..p + 4].try_into().unwrap()) as usize; + p += 4; + let header = &env[p..p + hlen]; + let header_str = String::from_utf8_lossy(header); + + let pubkey_b64 = B64.encode(reader.verifying_key().as_bytes()); + assert!( + !header_str.contains(&pubkey_b64), + "recipient public key must not appear in the blinded header" + ); + } + + #[test] + fn v1_envelope_is_rejected() { + let reader = Keypair::generate(); + let mut env = seal_blob(b"hi", &[reader.verifying_key()]).unwrap(); + // Flip the version byte (immediately after MAGIC) from 2 to 1. + env[MAGIC.len()] = 1; + let err = open_blob(&env, &reader).unwrap_err(); + assert!( + err.to_string().contains("unsupported envelope version"), + "expected version-rejection error, got: {err}" + ); + } } From 57b0434a53dc794bf8faad081bce93f2aa031ce8 Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Fri, 12 Jun 2026 12:30:50 -0500 Subject: [PATCH 24/30] fix(core): reject malformed envelope nonces instead of panicking open_blob fed attacker-controlled envelopes to GenericArray::from_slice and XNonce::from_slice, which panic on a wrong-length input. Validate both the per-recipient box nonce and the body nonce to 24 bytes before use: skip a recipient entry whose nonce is malformed, and return an error for a malformed body nonce, so the public recovery path surfaces an error rather than panicking. --- crates/gitlawb-core/src/encrypt.rs | 55 +++++++++++++++++++++++++++--- 1 file changed, 51 insertions(+), 4 deletions(-) diff --git a/crates/gitlawb-core/src/encrypt.rs b/crates/gitlawb-core/src/encrypt.rs index 72004a3..0336270 100644 --- a/crates/gitlawb-core/src/encrypt.rs +++ b/crates/gitlawb-core/src/encrypt.rs @@ -138,9 +138,15 @@ pub fn open_blob(envelope: &[u8], keypair: &Keypair) -> Result> { Some(b) => XPublic::from(b), None => continue, }; - let nonce = match B64.decode(&entry.nonce) { - Ok(n) => n, - Err(_) => continue, + // from_slice panics on a wrong length, and the envelope is attacker + // controlled, so validate the 24-byte box nonce before using it. + let nonce = match B64 + .decode(&entry.nonce) + .ok() + .and_then(|n| <[u8; 24]>::try_from(n.as_slice()).ok()) + { + Some(n) => n, + None => continue, }; let wrap = match B64.decode(&entry.wrap) { Ok(w) => w, @@ -159,7 +165,11 @@ pub fn open_blob(envelope: &[u8], keypair: &Keypair) -> Result> { let body_cipher = XChaCha20Poly1305::new_from_slice(&content_key) .map_err(|e| anyhow::anyhow!("content key: {e}"))?; - let body_nonce = B64.decode(&header.nonce)?; + let body_nonce = B64 + .decode(&header.nonce) + .ok() + .and_then(|n| <[u8; 24]>::try_from(n.as_slice()).ok()) + .context("invalid body nonce")?; body_cipher .decrypt(XNonce::from_slice(&body_nonce), body) .map_err(|_| anyhow::anyhow!("body decrypt failed")) @@ -241,4 +251,41 @@ mod tests { "expected version-rejection error, got: {err}" ); } + + #[test] + fn malformed_nonce_returns_err_not_panic() { + // from_slice panics on wrong-length input; a crafted envelope on the + // public recovery path must surface an error, never panic. + let reader = Keypair::generate(); + let env = seal_blob(b"private blob contents", &[reader.verifying_key()]).unwrap(); + + // Split the envelope framing into header JSON and body. + let mut p = MAGIC.len() + 1; + let hlen = u32::from_le_bytes(env[p..p + 4].try_into().unwrap()) as usize; + p += 4; + let header_bytes = &env[p..p + hlen]; + let body = &env[p + hlen..]; + + let reframe = |header: &serde_json::Value| -> Vec { + let hj = serde_json::to_vec(header).unwrap(); + let mut out = Vec::new(); + out.extend_from_slice(MAGIC); + out.push(VERSION); + out.extend_from_slice(&(hj.len() as u32).to_le_bytes()); + out.extend_from_slice(&hj); + out.extend_from_slice(body); + out + }; + let bad_nonce = serde_json::Value::String(B64.encode([0u8; 12])); + + // Corrupted per-recipient nonce: entry is skipped, no match. + let mut header: serde_json::Value = serde_json::from_slice(header_bytes).unwrap(); + header["recipients"][0]["nonce"] = bad_nonce.clone(); + assert!(open_blob(&reframe(&header), &reader).is_err()); + + // Corrupted body nonce: unwrap succeeds, body nonce is rejected. + let mut header: serde_json::Value = serde_json::from_slice(header_bytes).unwrap(); + header["nonce"] = bad_nonce; + assert!(open_blob(&reframe(&header), &reader).is_err()); + } } From 706a48912ecacee847dd542513a1a6ecc5952f1c Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Fri, 12 Jun 2026 12:50:51 -0500 Subject: [PATCH 25/30] feat(node): blind recipient identities on the B2 replication surface The encrypted-blobs/replicate endpoint shipped every blob's full recipient DID list to mirrors, which persisted it, so any mirroring peer learned the reader set. The v2 envelope blinding already removed recipient public keys from the pinned envelopes, so the comment justifying this (DIDs are already public) was no longer true. /replicate now returns {oid, cid} only. Mirrors detect a re-seal by the CID changing (the OID is stable across re-seals) instead of comparing recipient sets, and store no recipient identities. Origin-side authz and the at-rest recipients column are unchanged; this blinds only the peer-facing surface. --- crates/gitlawb-node/src/api/encrypted.rs | 35 ++++++++++--- crates/gitlawb-node/src/sync.rs | 64 ++++++++++-------------- 2 files changed, 55 insertions(+), 44 deletions(-) diff --git a/crates/gitlawb-node/src/api/encrypted.rs b/crates/gitlawb-node/src/api/encrypted.rs index 6e19bd5..20827fb 100644 --- a/crates/gitlawb-node/src/api/encrypted.rs +++ b/crates/gitlawb-node/src/api/encrypted.rs @@ -56,10 +56,11 @@ pub async fn get_encrypted_blob( } /// GET /api/v1/repos/{owner}/{repo}/encrypted-blobs/replicate -/// Returns [{oid, cid, recipients}] for every encrypted blob in the repo, for -/// peer-mirror replication (Option B2). Not recipient-scoped: recipient DIDs are -/// already public via the IPFS-pinned envelopes, so this exposes only ciphertext -/// metadata (content-addressed OIDs/CIDs and recipient DIDs), never plaintext. +/// Returns [{oid, cid}] for every encrypted blob in the repo, for peer-mirror +/// replication (Option B2). Recipient identities are deliberately withheld: the +/// v2 envelopes no longer carry recipient public keys, so peers must not learn +/// the reader set either. A mirror detects a re-seal by the CID changing (the +/// OID is stable across re-seals). Ciphertext metadata only, never plaintext. pub async fn replicate_encrypted_blobs( State(state): State, Path((owner, repo)): Path<(String, String)>, @@ -72,9 +73,29 @@ pub async fn replicate_encrypted_blobs( let rows = state.db.list_all_encrypted_blobs(&record.id).await?; let blobs: Vec<_> = rows .into_iter() - .map(|(oid, cid, recipients)| { - serde_json::json!({ "oid": oid, "cid": cid, "recipients": recipients }) - }) + .map(|(oid, cid, _recipients)| replicate_blob_json(oid, cid)) .collect(); Ok(Json(serde_json::json!({ "blobs": blobs }))) } + +/// Serialize one blob for the replication wire. Recipient identities are +/// intentionally absent so a mirror never learns the reader set. +fn replicate_blob_json(oid: String, cid: String) -> serde_json::Value { + serde_json::json!({ "oid": oid, "cid": cid }) +} + +#[cfg(test)] +mod tests { + use super::replicate_blob_json; + + #[test] + fn replicate_blob_json_omits_recipients() { + let v = replicate_blob_json("oid1".into(), "cidA".into()); + assert_eq!(v["oid"], "oid1"); + assert_eq!(v["cid"], "cidA"); + assert!( + v.get("recipients").is_none(), + "replication wire must not carry recipient identities" + ); + } +} diff --git a/crates/gitlawb-node/src/sync.rs b/crates/gitlawb-node/src/sync.rs index 0434e0a..615ce22 100644 --- a/crates/gitlawb-node/src/sync.rs +++ b/crates/gitlawb-node/src/sync.rs @@ -49,13 +49,12 @@ fn classify_mirror(withheld: Option>) -> MirrorMode { } /// One encrypted blob as advertised by an origin's `encrypted-blobs/replicate` -/// endpoint (Option B2). Ciphertext metadata only. +/// endpoint (Option B2). Ciphertext metadata only; recipient identities are +/// withheld from peers, so a re-seal is detected by the CID changing. #[derive(Debug, Clone, PartialEq, Eq, serde::Deserialize)] struct ReplicaBlob { oid: String, cid: String, - #[serde(default)] - recipients: Vec, } /// The shape of the `encrypted-blobs/replicate` JSON response. @@ -67,33 +66,26 @@ struct ReplicateResponse { /// Decide which of the origin's encrypted blobs this mirror must (re)replicate. /// -/// `have` maps each already-stored blob's oid to its stored recipient DIDs. A +/// `have` maps each already-stored blob's oid to the CID the mirror pinned. A /// remote blob is returned when the mirror has no row for that oid, or when the -/// stored recipient set differs from the remote one (the origin re-sealed after a -/// reader-set change; same semantics as B1). Recipient order is ignored. +/// stored CID differs from the advertised one. A re-seal regenerates the +/// envelope (new content key, nonce, and per-recipient wraps), so the CID +/// changes while the OID stays stable; comparing CIDs detects a re-seal without +/// the mirror ever holding recipient identities. fn blobs_needing_replication( remote: &[ReplicaBlob], - have: &HashMap>, + have: &HashMap, ) -> Vec { remote .iter() .filter(|b| match have.get(&b.oid) { None => true, - Some(stored) => !same_recipients(stored, &b.recipients), + Some(stored_cid) => stored_cid != &b.cid, }) .cloned() .collect() } -/// Order-insensitive equality of two recipient DID lists. -fn same_recipients(a: &[String], b: &[String]) -> bool { - let mut a: Vec<&String> = a.iter().collect(); - let mut b: Vec<&String> = b.iter().collect(); - a.sort(); - b.sort(); - a == b -} - /// Start the background sync worker. Returns immediately; the worker runs /// as a detached tokio task that exits cleanly when `shutdown_rx` flips /// to `true`. @@ -341,9 +333,10 @@ async fn register_replica_with_origin( /// Replicate the origin's encrypted withheld blobs onto this mirror (Option B2). /// /// After the git objects are mirrored, fetch the origin's replication listing, -/// then for each blob the mirror does not already hold (or whose recipients -/// changed) pull the ciphertext envelope over IPFS, pin it locally, and record -/// the `encrypted_blobs` row keyed by this mirror's local `repo_id`. +/// then for each blob the mirror does not already hold (or whose CID changed, +/// i.e. the origin re-sealed) pull the ciphertext envelope over IPFS, pin it +/// locally, and record the `encrypted_blobs` row keyed by this mirror's local +/// `repo_id`. The mirror stores no recipient identities. /// /// Best-effort and idempotent: any per-blob failure is logged and skipped, to be /// retried on the next sync. Confidentiality is never at risk; the mirror only @@ -379,10 +372,10 @@ async fn replicate_encrypted_blobs( return; } - let have: HashMap> = match db.list_all_encrypted_blobs(repo_id).await { + let have: HashMap = match db.list_all_encrypted_blobs(repo_id).await { Ok(rows) => rows .into_iter() - .map(|(oid, _cid, recipients)| (oid, recipients)) + .map(|(oid, cid, _recipients)| (oid, cid)) .collect(), Err(e) => { warn!(repo = %repo, err = %e, "failed to list local encrypted blobs for replication"); @@ -405,7 +398,7 @@ async fn replicate_encrypted_blobs( continue; } if let Err(e) = db - .record_encrypted_blob(repo_id, &blob.oid, &cid, &blob.recipients) + .record_encrypted_blob(repo_id, &blob.oid, &cid, &[]) .await { warn!(oid = %blob.oid, err = %e, "failed to record replicated encrypted blob"); @@ -564,38 +557,34 @@ mod tests { assert!(matches!(mode, MirrorMode::Plain)); } - fn rb(oid: &str, cid: &str, recipients: &[&str]) -> ReplicaBlob { + fn rb(oid: &str, cid: &str) -> ReplicaBlob { ReplicaBlob { oid: oid.to_string(), cid: cid.to_string(), - recipients: recipients.iter().map(|s| s.to_string()).collect(), } } #[test] fn replicate_stores_new_blob() { - let remote = vec![rb("oid1", "cidA", &["did:key:zA"])]; + let remote = vec![rb("oid1", "cidA")]; let have = HashMap::new(); assert_eq!(blobs_needing_replication(&remote, &have), remote); } #[test] - fn replicate_skips_already_present_same_recipients() { - let remote = vec![rb("oid1", "cidA", &["did:key:zA", "did:key:zB"])]; + fn replicate_skips_already_present_same_cid() { + let remote = vec![rb("oid1", "cidA")]; let mut have = HashMap::new(); - // stored in a different order: must still count as present - have.insert( - "oid1".to_string(), - vec!["did:key:zB".to_string(), "did:key:zA".to_string()], - ); + have.insert("oid1".to_string(), "cidA".to_string()); assert!(blobs_needing_replication(&remote, &have).is_empty()); } #[test] - fn replicate_restores_on_recipient_change() { - let remote = vec![rb("oid1", "cidB", &["did:key:zA", "did:key:zC"])]; + fn replicate_restores_on_cid_change() { + // The origin re-sealed: same oid, new envelope, new cid. + let remote = vec![rb("oid1", "cidB")]; let mut have = HashMap::new(); - have.insert("oid1".to_string(), vec!["did:key:zA".to_string()]); + have.insert("oid1".to_string(), "cidA".to_string()); assert_eq!(blobs_needing_replication(&remote, &have), remote); } @@ -606,11 +595,12 @@ mod tests { #[test] fn replicate_response_parses() { + // An older origin may still send a recipients field; it must be ignored. let json = r#"{"blobs":[{"oid":"o1","cid":"c1","recipients":["did:key:zA"]}]}"#; let parsed: ReplicateResponse = serde_json::from_str(json).unwrap(); assert_eq!(parsed.blobs.len(), 1); assert_eq!(parsed.blobs[0].oid, "o1"); - assert_eq!(parsed.blobs[0].recipients, vec!["did:key:zA".to_string()]); + assert_eq!(parsed.blobs[0].cid, "c1"); } #[test] From dbd2c0436aaad9a749dd711163b0e865fa26bd5b Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Fri, 12 Jun 2026 14:51:11 -0500 Subject: [PATCH 26/30] feat(node): stop anchoring recipient identities to Arweave The B3 encrypted-blob manifest anchored {oid, cid, recipients} to Arweave, a permanent public record of every blob's reader set. The v2 envelope blinding already removed recipient keys from the pinned envelopes, so the comment justifying this (recipient DIDs are already public) was no longer true. The manifest now anchors {oid, cid} only. The gl reader already ignores the recipients field (it recovers by trial decryption), so no reader changes. --- crates/gitlawb-node/src/arweave.rs | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/crates/gitlawb-node/src/arweave.rs b/crates/gitlawb-node/src/arweave.rs index c6cdd3d..cf13947 100644 --- a/crates/gitlawb-node/src/arweave.rs +++ b/crates/gitlawb-node/src/arweave.rs @@ -104,8 +104,9 @@ pub async fn anchor_ref_update( } /// A per-push manifest of the blobs encrypted this push (Option B3). The -/// `blobs` slice is `(oid, cid, recipients)` tuples. Anchored directly to -/// Arweave as its JSON body so the discovery index survives total node loss. +/// `blobs` slice is `(oid, cid, recipients)` tuples; only `oid` and `cid` are +/// anchored. Anchored directly to Arweave as its JSON body so the discovery +/// index survives total node loss. pub struct EncryptedManifest<'a> { pub repo: &'a str, pub owner_did: &'a str, @@ -116,8 +117,9 @@ pub struct EncryptedManifest<'a> { /// Anchor a per-push encrypted-blob manifest to Arweave via Irys. The manifest /// JSON body is the payload (not a CID pointer to IPFS), so the index is -/// permanent and self-contained. Recipient DIDs are already public via the -/// pinned envelopes, so the manifest carries no new secret. +/// permanent and self-contained. Recipient identities are deliberately omitted: +/// the anchor is permanent and public, and the v2 envelopes no longer expose +/// recipients, so the reader set must not be written to Arweave either. /// /// Returns the Irys/Arweave transaction ID, or `Ok("")` when `irys_url` is empty /// (anchoring disabled) or there are no blobs to anchor. @@ -133,7 +135,7 @@ pub async fn anchor_encrypted_manifest( let blobs_json: Vec = manifest .blobs .iter() - .map(|(oid, cid, recipients)| json!({ "oid": oid, "cid": cid, "recipients": recipients })) + .map(|(oid, cid, _recipients)| manifest_blob_json(oid, cid)) .collect(); let payload = json!({ @@ -183,6 +185,13 @@ pub async fn anchor_encrypted_manifest( Ok(tx_id) } +/// Serialize one blob for the Arweave manifest. Recipient identities are +/// intentionally absent so the permanent public anchor never records who can +/// read a blob. +fn manifest_blob_json(oid: &str, cid: &str) -> serde_json::Value { + json!({ "oid": oid, "cid": cid }) +} + /// Build the Irys tag header for an encrypted-blob manifest. `Repo` and `Schema` /// are the tags the `gl` recovery query filters on. fn build_manifest_tags_header(manifest: &EncryptedManifest<'_>) -> String { @@ -356,6 +365,17 @@ mod tests { _mock.assert_async().await; } + #[test] + fn manifest_blob_json_omits_recipients() { + let v = manifest_blob_json("oid1", "cidA"); + assert_eq!(v["oid"], "oid1"); + assert_eq!(v["cid"], "cidA"); + assert!( + v.get("recipients").is_none(), + "Arweave manifest must not anchor recipient identities" + ); + } + #[test] fn test_sanitize_tag() { assert_eq!(sanitize_tag("alice/myrepo"), "alice/myrepo"); From 6eb2c40343f23b10ee687b4723f2e5eb79b59e50 Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Fri, 12 Jun 2026 15:14:53 -0500 Subject: [PATCH 27/30] feat(node): blind recipient identities at rest and gate B1 by repo readability The origin no longer stores recipient DIDs. Migration v5 replaces the encrypted_blobs.recipients column with an opaque, node-keyed recipients_tag used only to detect a recipient-set change for re-seal. B1 discovery and fetch are now gated by the same repo-readability check the git read path uses, not by per-recipient matching; decryption is gated by the envelope crypto, so a non-recipient who can read the repo sees a blob's {oid, cid} but cannot open it. encrypt_and_pin keys the tag from the node seed and returns {oid, cid}; the Arweave manifest tuple drops the now-unused recipient vec. A DB compromise no longer reveals the reader set; recovering it would require brute-forcing candidate DID sets against the keyed tag with the node key. --- crates/gitlawb-node/src/api/encrypted.rs | 29 +++-- crates/gitlawb-node/src/api/repos.rs | 2 + crates/gitlawb-node/src/arweave.rs | 24 ++--- crates/gitlawb-node/src/db/mod.rs | 128 ++++++++--------------- crates/gitlawb-node/src/encrypted_pin.rs | 83 ++++++++++++--- crates/gitlawb-node/src/sync.rs | 10 +- 6 files changed, 142 insertions(+), 134 deletions(-) diff --git a/crates/gitlawb-node/src/api/encrypted.rs b/crates/gitlawb-node/src/api/encrypted.rs index 20827fb..b7bda00 100644 --- a/crates/gitlawb-node/src/api/encrypted.rs +++ b/crates/gitlawb-node/src/api/encrypted.rs @@ -6,24 +6,30 @@ use axum::Json; use crate::auth::AuthenticatedDid; use crate::error::{AppError, Result}; use crate::state::AppState; +use crate::visibility::{visibility_check, Decision}; /// GET /api/v1/repos/{owner}/{repo}/encrypted-blobs -/// Returns [{oid, cid}] for encrypted blobs the caller may decrypt. +/// Returns [{oid, cid}] for every encrypted blob in the repo, to any caller who +/// can read the repo. Not recipient-scoped: recipient identities are not stored, +/// so access control here is repo readability and decryption is gated by the +/// envelope crypto (only a real recipient can open an envelope). pub async fn list_encrypted_blobs( State(state): State, auth: Option>, Path((owner, repo)): Path<(String, String)>, ) -> Result> { - let caller = auth.as_ref().map(|e| e.0 .0.as_str()).unwrap_or(""); let record = state .db .get_repo(&owner, &repo) .await? .ok_or_else(|| AppError::RepoNotFound(format!("{owner}/{repo}")))?; - let rows = state - .db - .list_encrypted_blobs_for(&record.id, caller) - .await?; + let caller = auth.as_ref().map(|e| e.0 .0.as_str()); + let rules = state.db.list_visibility_rules(&record.id).await?; + if visibility_check(&rules, record.is_public, &record.owner_did, caller, "/") == Decision::Deny + { + return Err(AppError::RepoNotFound(format!("{owner}/{repo}"))); + } + let rows = state.db.list_all_encrypted_blobs(&record.id).await?; let blobs: Vec<_> = rows .into_iter() .map(|(oid, cid)| serde_json::json!({ "oid": oid, "cid": cid })) @@ -38,15 +44,20 @@ pub async fn get_encrypted_blob( auth: Option>, Path((owner, repo, oid)): Path<(String, String, String)>, ) -> Result> { - let caller = auth.as_ref().map(|e| e.0 .0.as_str()).unwrap_or(""); let record = state .db .get_repo(&owner, &repo) .await? .ok_or_else(|| AppError::RepoNotFound(format!("{owner}/{repo}")))?; + let caller = auth.as_ref().map(|e| e.0 .0.as_str()); + let rules = state.db.list_visibility_rules(&record.id).await?; + if visibility_check(&rules, record.is_public, &record.owner_did, caller, "/") == Decision::Deny + { + return Err(AppError::RepoNotFound(format!("{owner}/{repo}/{oid}"))); + } let cid = state .db - .encrypted_blob_cid(&record.id, &oid, caller) + .encrypted_blob_cid(&record.id, &oid) .await? .ok_or_else(|| AppError::RepoNotFound(format!("{owner}/{repo}/{oid}")))?; let bytes = crate::ipfs_pin::cat(&state.config.ipfs_api, &cid) @@ -73,7 +84,7 @@ pub async fn replicate_encrypted_blobs( let rows = state.db.list_all_encrypted_blobs(&record.id).await?; let blobs: Vec<_> = rows .into_iter() - .map(|(oid, cid, _recipients)| replicate_blob_json(oid, cid)) + .map(|(oid, cid)| replicate_blob_json(oid, cid)) .collect(); Ok(Json(serde_json::json!({ "blobs": blobs }))) } diff --git a/crates/gitlawb-node/src/api/repos.rs b/crates/gitlawb-node/src/api/repos.rs index 6fa028d..3253cdc 100644 --- a/crates/gitlawb-node/src/api/repos.rs +++ b/crates/gitlawb-node/src/api/repos.rs @@ -693,6 +693,7 @@ pub async fn git_receive_pack( let irys_url = state.config.irys_url.clone(); let http_client = std::sync::Arc::clone(&state.http_client); let node_did_str = state.node_did.to_string(); + let node_seed = state.node_keypair.seed_bytes(); let repo_name = record.name.clone(); tokio::spawn(async move { let pinned = crate::ipfs_pin::pin_new_objects( @@ -726,6 +727,7 @@ pub async fn git_receive_pack( &repo_path_clone, &db_clone, &repo_id, + &node_seed, &recipients, ) .await; diff --git a/crates/gitlawb-node/src/arweave.rs b/crates/gitlawb-node/src/arweave.rs index cf13947..43f35a0 100644 --- a/crates/gitlawb-node/src/arweave.rs +++ b/crates/gitlawb-node/src/arweave.rs @@ -104,15 +104,15 @@ pub async fn anchor_ref_update( } /// A per-push manifest of the blobs encrypted this push (Option B3). The -/// `blobs` slice is `(oid, cid, recipients)` tuples; only `oid` and `cid` are -/// anchored. Anchored directly to Arweave as its JSON body so the discovery -/// index survives total node loss. +/// `blobs` slice is `(oid, cid)` tuples. Anchored directly to Arweave as its JSON +/// body so the discovery index survives total node loss. Recipient identities are +/// never part of the manifest. pub struct EncryptedManifest<'a> { pub repo: &'a str, pub owner_did: &'a str, pub node_did: &'a str, pub timestamp: &'a str, - pub blobs: &'a [(String, String, Vec)], + pub blobs: &'a [(String, String)], } /// Anchor a per-push encrypted-blob manifest to Arweave via Irys. The manifest @@ -135,7 +135,7 @@ pub async fn anchor_encrypted_manifest( let blobs_json: Vec = manifest .blobs .iter() - .map(|(oid, cid, _recipients)| manifest_blob_json(oid, cid)) + .map(|(oid, cid)| manifest_blob_json(oid, cid)) .collect(); let payload = json!({ @@ -298,11 +298,7 @@ mod tests { #[tokio::test] async fn test_manifest_anchor_noop_when_url_empty() { let client = reqwest::Client::new(); - let blobs = vec![( - "oid1".to_string(), - "cid1".to_string(), - vec!["did:key:zA".to_string()], - )]; + let blobs = vec![("oid1".to_string(), "cid1".to_string())]; let m = EncryptedManifest { repo: "alice/r", owner_did: "did:key:zO", @@ -319,7 +315,7 @@ mod tests { #[tokio::test] async fn test_manifest_anchor_noop_when_no_blobs() { let client = reqwest::Client::new(); - let blobs: Vec<(String, String, Vec)> = vec![]; + let blobs: Vec<(String, String)> = vec![]; let m = EncryptedManifest { repo: "alice/r", owner_did: "did:key:zO", @@ -348,11 +344,7 @@ mod tests { .await; let client = reqwest::Client::new(); - let blobs = vec![( - "oid1".to_string(), - "cid1".to_string(), - vec!["did:key:zA".to_string()], - )]; + let blobs = vec![("oid1".to_string(), "cid1".to_string())]; let m = EncryptedManifest { repo: "alice/r", owner_did: "did:key:zO", diff --git a/crates/gitlawb-node/src/db/mod.rs b/crates/gitlawb-node/src/db/mod.rs index 4a1c107..81bd00d 100644 --- a/crates/gitlawb-node/src/db/mod.rs +++ b/crates/gitlawb-node/src/db/mod.rs @@ -735,6 +735,17 @@ const MIGRATIONS: &[Migration] = &[ "CREATE INDEX IF NOT EXISTS idx_encrypted_blobs_repo ON encrypted_blobs(repo_id)", ], }, + Migration { + version: 5, + name: "encrypted_blobs_blind_recipients", + stmts: &[ + // Replace the cleartext recipient DID list with an opaque, node-keyed + // tag used only to detect a recipient-set change. Existing rows get an + // empty tag and are re-sealed on the next push. + "ALTER TABLE encrypted_blobs DROP COLUMN IF EXISTS recipients", + "ALTER TABLE encrypted_blobs ADD COLUMN IF NOT EXISTS recipients_tag TEXT NOT NULL DEFAULT ''", + ], + }, ]; // ── Repos ───────────────────────────────────────────────────────────────────── @@ -1648,126 +1659,69 @@ impl Db { repo_id: &str, oid: &str, cid: &str, - recipients: &[String], + recipients_tag: &str, ) -> Result<()> { - let recipients_json = serde_json::to_string(recipients)?; sqlx::query( - "INSERT INTO encrypted_blobs (repo_id, oid, cid, recipients, created_at) + "INSERT INTO encrypted_blobs (repo_id, oid, cid, recipients_tag, created_at) VALUES ($1, $2, $3, $4, $5) - ON CONFLICT (repo_id, oid) DO UPDATE SET cid = EXCLUDED.cid, recipients = EXCLUDED.recipients", + ON CONFLICT (repo_id, oid) DO UPDATE SET cid = EXCLUDED.cid, recipients_tag = EXCLUDED.recipients_tag", ) .bind(repo_id) .bind(oid) .bind(cid) - .bind(recipients_json) + .bind(recipients_tag) .bind(Utc::now().to_rfc3339()) .execute(&self.pool) .await?; Ok(()) } - /// Deserialize the stored recipients JSON. Corruption is surfaced as an - /// error rather than silently treated as an empty recipient list, which - /// would deny access to every legitimate reader and hand peers incomplete - /// replication metadata. - fn parse_recipients(repo_id: &str, oid: &str, raw: &str) -> Result> { - serde_json::from_str(raw).with_context(|| { - format!("corrupt recipients JSON in encrypted_blobs (repo_id={repo_id}, oid={oid})") - }) - } - - /// (oid, cid) for every encrypted blob in the repo that `caller` may decrypt. - pub async fn list_encrypted_blobs_for( - &self, - repo_id: &str, - caller: &str, - ) -> Result> { - let rows = - sqlx::query("SELECT oid, cid, recipients FROM encrypted_blobs WHERE repo_id = $1") - .bind(repo_id) - .fetch_all(&self.pool) - .await?; + /// (oid, cid) for every encrypted blob in the repo, unscoped by caller. Used + /// by both the B2 replication view and B1 discovery. Recipient identities are + /// not stored, so authorization is the caller's repo readability, not a per + /// recipient check. Ciphertext metadata only. + pub async fn list_all_encrypted_blobs(&self, repo_id: &str) -> Result> { + let rows = sqlx::query("SELECT oid, cid FROM encrypted_blobs WHERE repo_id = $1") + .bind(repo_id) + .fetch_all(&self.pool) + .await?; let mut out = Vec::new(); for row in rows { let oid: String = row.get("oid"); let cid: String = row.get("cid"); - let recipients: String = row.get("recipients"); - let recipients = Self::parse_recipients(repo_id, &oid, &recipients)?; - if recipients.iter().any(|d| d == caller) { - out.push((oid, cid)); - } + out.push((oid, cid)); } Ok(out) } - /// (oid, cid, recipients) for every encrypted blob in the repo, unscoped by - /// caller. This is the replication view used by peer mirrors (Option B2), - /// distinct from the recipient-scoped `list_encrypted_blobs_for`. It returns - /// only ciphertext metadata; no plaintext or key material is involved. - pub async fn list_all_encrypted_blobs( - &self, - repo_id: &str, - ) -> Result)>> { - let rows = - sqlx::query("SELECT oid, cid, recipients FROM encrypted_blobs WHERE repo_id = $1") - .bind(repo_id) - .fetch_all(&self.pool) - .await?; - let mut out = Vec::new(); - for row in rows { - let oid: String = row.get("oid"); - let cid: String = row.get("cid"); - let recipients: String = row.get("recipients"); - let recipients = Self::parse_recipients(repo_id, &oid, &recipients)?; - out.push((oid, cid, recipients)); - } - Ok(out) + /// The CID of one encrypted blob, or None if there is no such row. Recipient + /// authorization is not enforced here: the handler checks repo readability and + /// the envelope crypto gates decryption. + pub async fn encrypted_blob_cid(&self, repo_id: &str, oid: &str) -> Result> { + let row = sqlx::query("SELECT cid FROM encrypted_blobs WHERE repo_id = $1 AND oid = $2") + .bind(repo_id) + .bind(oid) + .fetch_optional(&self.pool) + .await?; + Ok(row.map(|r| r.get("cid"))) } - /// The CID of one encrypted blob, only if `caller` is a recipient. - pub async fn encrypted_blob_cid( + /// The opaque recipients tag stored for an encrypted blob, or None if there is + /// no row. Used only to decide whether a re-seal is needed (the recipient set + /// changed); the tag is a node-keyed fingerprint, not the DID list. + pub async fn encrypted_blob_recipients_tag( &self, repo_id: &str, oid: &str, - caller: &str, ) -> Result> { let row = sqlx::query( - "SELECT cid, recipients FROM encrypted_blobs WHERE repo_id = $1 AND oid = $2", + "SELECT recipients_tag FROM encrypted_blobs WHERE repo_id = $1 AND oid = $2", ) .bind(repo_id) .bind(oid) .fetch_optional(&self.pool) .await?; - let Some(row) = row else { return Ok(None) }; - let recipients: String = row.get("recipients"); - let recipients = Self::parse_recipients(repo_id, oid, &recipients)?; - if recipients.iter().any(|d| d == caller) { - Ok(Some(row.get("cid"))) - } else { - Ok(None) - } - } - - /// The recipient DID list stored for an encrypted blob, or None if there is - /// no row. Used to decide whether a re-seal is needed (recipients changed). - pub async fn encrypted_blob_recipients( - &self, - repo_id: &str, - oid: &str, - ) -> Result>> { - let row = - sqlx::query("SELECT recipients FROM encrypted_blobs WHERE repo_id = $1 AND oid = $2") - .bind(repo_id) - .bind(oid) - .fetch_optional(&self.pool) - .await?; - match row { - None => Ok(None), - Some(r) => { - let recipients: String = r.get("recipients"); - Ok(Some(Self::parse_recipients(repo_id, oid, &recipients)?)) - } - } + Ok(row.map(|r| r.get("recipients_tag"))) } pub async fn list_pinned_cids(&self) -> Result> { diff --git a/crates/gitlawb-node/src/encrypted_pin.rs b/crates/gitlawb-node/src/encrypted_pin.rs index 50797b5..9732b88 100644 --- a/crates/gitlawb-node/src/encrypted_pin.rs +++ b/crates/gitlawb-node/src/encrypted_pin.rs @@ -13,31 +13,53 @@ use gitlawb_core::encrypt::seal_blob; use crate::db::Db; +use hmac::{Hmac, Mac}; +use sha2::Sha256; + +type HmacSha256 = Hmac; + +/// Opaque, node-keyed fingerprint of a blob's recipient set. Stored in place of +/// the cleartext DID list so a DB compromise cannot reveal the reader set; used +/// only to detect a recipient-set change so an unchanged blob is not re-sealed. +/// Order-insensitive (the input `BTreeSet` is already sorted). +pub fn recipients_tag(node_seed: &[u8; 32], dids: &BTreeSet) -> String { + let mut mac = HmacSha256::new_from_slice(node_seed).expect("HMAC accepts any key length"); + mac.update(b"gitlawb/recipients-tag/v1"); + for did in dids { + mac.update(b"\n"); + mac.update(did.as_bytes()); + } + hex::encode(mac.finalize().into_bytes()) +} + /// Resolve a DID string to its Ed25519 verifying key, or None if it carries no /// inline key (e.g. did:web / did:gitlawb). fn did_to_key(did: &str) -> Option { Did::from_str(did).ok()?.to_verifying_key().ok() } -/// Encrypt and pin every withheld blob. `recipients` maps blob oid -> DID set. -/// Returns `(oid, cid, recipients)` for each blob actually sealed and recorded -/// this call (the per-push delta), used by Option B3 to anchor a manifest. +/// Encrypt and pin every withheld blob. `recipients` maps blob oid -> DID set; +/// `node_seed` keys the opaque recipients tag. Returns `(oid, cid)` for each blob +/// actually sealed and recorded this call (the per-push delta), used by Option B3 +/// to anchor a manifest. Recipient identities are never stored or returned. pub async fn encrypt_and_pin( ipfs_api: &str, repo_path: &Path, db: &Db, repo_id: &str, + node_seed: &[u8; 32], recipients: &HashMap>, -) -> Vec<(String, String, Vec)> { +) -> Vec<(String, String)> { let mut sealed = Vec::new(); for (oid, dids) in recipients { // Skip only if an existing envelope already covers exactly these // recipients. If the recipient set changed (e.g. a reader was added to // the rule), re-seal so the new reader can recover the blob. Reader - // removal is not retroactive: the old envelope is already public. - if let Ok(Some(stored)) = db.encrypted_blob_recipients(repo_id, oid).await { - let stored: BTreeSet = stored.into_iter().collect(); - if &stored == dids { + // removal is not retroactive: the old envelope is already public. The + // comparison is on the opaque node-keyed tag, never the DID list. + let tag = recipients_tag(node_seed, dids); + if let Ok(Some(stored_tag)) = db.encrypted_blob_recipients_tag(repo_id, oid).await { + if stored_tag == tag { continue; } } @@ -61,15 +83,48 @@ pub async fn encrypt_and_pin( Ok(c) if !c.is_empty() => c, _ => continue, }; - let dids_vec: Vec = dids.iter().cloned().collect(); - if let Err(e) = db - .record_encrypted_blob(repo_id, oid, &cid, &dids_vec) - .await - { + if let Err(e) = db.record_encrypted_blob(repo_id, oid, &cid, &tag).await { tracing::warn!(oid = %oid, err = %e, "record_encrypted_blob failed"); continue; } - sealed.push((oid.clone(), cid.clone(), dids_vec)); + sealed.push((oid.clone(), cid.clone())); } sealed } + +#[cfg(test)] +mod tests { + use super::recipients_tag; + use std::collections::BTreeSet; + + fn set(dids: &[&str]) -> BTreeSet { + dids.iter().map(|s| s.to_string()).collect() + } + + #[test] + fn tag_is_order_insensitive() { + let seed = [7u8; 32]; + let a = recipients_tag(&seed, &set(&["did:key:zA", "did:key:zB"])); + let b = recipients_tag(&seed, &set(&["did:key:zB", "did:key:zA"])); + assert_eq!(a, b); + } + + #[test] + fn tag_differs_for_different_sets() { + let seed = [7u8; 32]; + let a = recipients_tag(&seed, &set(&["did:key:zA"])); + let b = recipients_tag(&seed, &set(&["did:key:zA", "did:key:zB"])); + assert_ne!(a, b); + } + + #[test] + fn tag_is_keyed_by_node_seed() { + let dids = set(&["did:key:zA", "did:key:zB"]); + let a = recipients_tag(&[1u8; 32], &dids); + let b = recipients_tag(&[2u8; 32], &dids); + assert_ne!( + a, b, + "tag must depend on the node seed, not be a plain hash" + ); + } +} diff --git a/crates/gitlawb-node/src/sync.rs b/crates/gitlawb-node/src/sync.rs index 615ce22..58cfa4d 100644 --- a/crates/gitlawb-node/src/sync.rs +++ b/crates/gitlawb-node/src/sync.rs @@ -373,10 +373,7 @@ async fn replicate_encrypted_blobs( } let have: HashMap = match db.list_all_encrypted_blobs(repo_id).await { - Ok(rows) => rows - .into_iter() - .map(|(oid, cid, _recipients)| (oid, cid)) - .collect(), + Ok(rows) => rows.into_iter().collect(), Err(e) => { warn!(repo = %repo, err = %e, "failed to list local encrypted blobs for replication"); return; @@ -397,10 +394,7 @@ async fn replicate_encrypted_blobs( warn!(oid = %blob.oid, expected = %blob.cid, got = %cid, "replicated envelope CID mismatch; skipping record"); continue; } - if let Err(e) = db - .record_encrypted_blob(repo_id, &blob.oid, &cid, &[]) - .await - { + if let Err(e) = db.record_encrypted_blob(repo_id, &blob.oid, &cid, "").await { warn!(oid = %blob.oid, err = %e, "failed to record replicated encrypted blob"); } } From acd9ce2ade0a176cfab2dec3493a17dbe1252786 Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Fri, 12 Jun 2026 15:28:48 -0500 Subject: [PATCH 28/30] chore: remove redundant .gitignore entry (covered by local exclude) --- .gitignore | 3 --- 1 file changed, 3 deletions(-) diff --git a/.gitignore b/.gitignore index a36d8f7..404c87b 100644 --- a/.gitignore +++ b/.gitignore @@ -23,6 +23,3 @@ keys/ # Logs *.log .openclaude-profile.json - -# Local planning / scratch docs (never commit) -docs/superpowers/ From d0a059b09866a3b4044583d1625cad91f7185a2d Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Fri, 12 Jun 2026 15:43:54 -0500 Subject: [PATCH 29/30] fix(node): gate /replicate by repo readability; harden reseal on DB error Address review on the at-rest blinding change: - The encrypted-blobs/replicate listing returned {oid, cid} with no visibility check, so a non-readable repo's blob index was reachable by an unauthenticated caller who guessed {owner}/{repo}. Gate it by the same repo-readability check discovery and fetch use. For the intended case (a public repo with withheld subtrees) the public root keeps this open to peers; only fully non-readable repos are withheld, which is the desired behavior. - encrypt_and_pin treated a recipients_tag DB read error as a cache miss and resealed, causing avoidable IPFS writes during a partial outage; skip and retry on the next push instead. - Correct the get_encrypted_blob doc comment to describe repo-readability access. --- crates/gitlawb-node/src/api/encrypted.rs | 21 ++++++++++++++++----- crates/gitlawb-node/src/encrypted_pin.rs | 10 ++++++++-- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/crates/gitlawb-node/src/api/encrypted.rs b/crates/gitlawb-node/src/api/encrypted.rs index b7bda00..d9fa52a 100644 --- a/crates/gitlawb-node/src/api/encrypted.rs +++ b/crates/gitlawb-node/src/api/encrypted.rs @@ -38,7 +38,8 @@ pub async fn list_encrypted_blobs( } /// GET /api/v1/repos/{owner}/{repo}/encrypted-blob/{oid} -/// Returns raw envelope bytes if the caller is a recipient. +/// Returns raw envelope bytes to callers who can read the repo; the envelope +/// crypto still ensures only true recipients can decrypt. pub async fn get_encrypted_blob( State(state): State, auth: Option>, @@ -68,12 +69,16 @@ pub async fn get_encrypted_blob( /// GET /api/v1/repos/{owner}/{repo}/encrypted-blobs/replicate /// Returns [{oid, cid}] for every encrypted blob in the repo, for peer-mirror -/// replication (Option B2). Recipient identities are deliberately withheld: the -/// v2 envelopes no longer carry recipient public keys, so peers must not learn -/// the reader set either. A mirror detects a re-seal by the CID changing (the -/// OID is stable across re-seals). Ciphertext metadata only, never plaintext. +/// replication (Option B2). Gated by repo readability, like discovery, so a +/// non-readable repo does not expose its blob index; for the intended case (a +/// public repo with withheld subtrees) the public root keeps this open to peers. +/// Recipient identities are deliberately withheld: the v2 envelopes no longer +/// carry recipient public keys, so peers must not learn the reader set either. A +/// mirror detects a re-seal by the CID changing (the OID is stable across +/// re-seals). Ciphertext metadata only, never plaintext. pub async fn replicate_encrypted_blobs( State(state): State, + auth: Option>, Path((owner, repo)): Path<(String, String)>, ) -> Result> { let record = state @@ -81,6 +86,12 @@ pub async fn replicate_encrypted_blobs( .get_repo(&owner, &repo) .await? .ok_or_else(|| AppError::RepoNotFound(format!("{owner}/{repo}")))?; + let caller = auth.as_ref().map(|e| e.0 .0.as_str()); + let rules = state.db.list_visibility_rules(&record.id).await?; + if visibility_check(&rules, record.is_public, &record.owner_did, caller, "/") == Decision::Deny + { + return Err(AppError::RepoNotFound(format!("{owner}/{repo}"))); + } let rows = state.db.list_all_encrypted_blobs(&record.id).await?; let blobs: Vec<_> = rows .into_iter() diff --git a/crates/gitlawb-node/src/encrypted_pin.rs b/crates/gitlawb-node/src/encrypted_pin.rs index 9732b88..25439ee 100644 --- a/crates/gitlawb-node/src/encrypted_pin.rs +++ b/crates/gitlawb-node/src/encrypted_pin.rs @@ -58,8 +58,14 @@ pub async fn encrypt_and_pin( // removal is not retroactive: the old envelope is already public. The // comparison is on the opaque node-keyed tag, never the DID list. let tag = recipients_tag(node_seed, dids); - if let Ok(Some(stored_tag)) = db.encrypted_blob_recipients_tag(repo_id, oid).await { - if stored_tag == tag { + match db.encrypted_blob_recipients_tag(repo_id, oid).await { + Ok(Some(stored_tag)) if stored_tag == tag => continue, + Ok(_) => {} + Err(e) => { + // A DB read failure is not a cache miss: re-sealing here would do + // an avoidable IPFS write during a partial outage. Skip and retry + // on the next push. + tracing::warn!(oid = %oid, err = %e, "recipients_tag lookup failed; skipping reseal"); continue; } } From 67afb20a04c11ca7b61179acb23e34c72b89e97c Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Fri, 12 Jun 2026 18:27:03 -0500 Subject: [PATCH 30/30] fix(node): close under-withholding via full ref scope and reachable-only pin set blob_paths walked only refs/heads/* and refs/tags/* and skipped silently on a failed git ls-tree, so a blob reachable only through another namespace, or a ref that failed to traverse, could fall out of the withheld set and ship in cleartext. Walk every ref and fail closed on traversal error. The pin enumerators (ipfs_pin, pinata) used git cat-file --batch-all-objects, which includes unreachable/dangling objects that have no path and cannot be classified for withholding. Switch them to git rev-list --objects --all so the pin set matches the reachable graph blob_paths evaluates. --- .../gitlawb-node/src/git/visibility_pack.rs | 76 +++++++++++++++-- crates/gitlawb-node/src/ipfs_pin.rs | 84 ++++++++++++++++--- crates/gitlawb-node/src/pinata.rs | 73 ++++++++++++++-- 3 files changed, 206 insertions(+), 27 deletions(-) diff --git a/crates/gitlawb-node/src/git/visibility_pack.rs b/crates/gitlawb-node/src/git/visibility_pack.rs index 90ca772..a670dfc 100644 --- a/crates/gitlawb-node/src/git/visibility_pack.rs +++ b/crates/gitlawb-node/src/git/visibility_pack.rs @@ -10,25 +10,32 @@ use anyhow::{Context, Result}; use std::collections::{BTreeSet, HashMap, HashSet}; use std::path::Path; -/// List every (blob_oid, "/repo/relative/path") pair reachable from any branch -/// ref in `repo_path`. Uses `git ls-tree -r` per ref so each path a blob lives -/// at is represented (the same blob content can appear at several paths). Paths -/// are returned with a leading "/" to match the glob form used by visibility -/// rules ("/secret/**"). +/// List every (blob_oid, "/repo/relative/path") pair reachable from any ref in +/// `repo_path`. Walks every ref, not just `refs/heads/*` and `refs/tags/*`, so +/// the withheld set covers the same object graph the pack and pin paths expose; +/// a blob reachable only through another namespace (e.g. `refs/notes/*`) must not +/// escape withholding. Uses `git ls-tree -r` per ref so each path a blob lives +/// at is represented (the same blob content can appear at several paths). This is +/// why it is not `git rev-list --objects`, which reports only one path per object. +/// Paths carry a leading "/" to match the glob form used by visibility rules +/// ("/secret/**"). +/// +/// Fails closed: if a ref cannot be traversed, returns an error so the caller +/// aborts the serve/pin rather than producing a partial (under-withheld) set. fn blob_paths(repo_path: &Path) -> Result> { let refs = store::list_refs(repo_path).context("list_refs failed")?; let mut out = Vec::new(); for (refname, _oid) in refs { - if !refname.starts_with("refs/heads/") && !refname.starts_with("refs/tags/") { - continue; - } let listing = std::process::Command::new("git") .args(["ls-tree", "-r", &refname]) .current_dir(repo_path) .output() .context("git ls-tree -r failed")?; if !listing.status.success() { - continue; + anyhow::bail!( + "git ls-tree -r {refname} failed: {}", + String::from_utf8_lossy(&listing.stderr) + ); } for line in String::from_utf8_lossy(&listing.stdout).lines() { // " blob \t" @@ -295,4 +302,55 @@ mod tests { let env = seal_blob(&bytes, &[reader.verifying_key()]).unwrap(); assert_eq!(open_blob(&env, &reader).unwrap(), bytes); } + + #[test] + fn withholds_blob_reachable_only_via_nonstandard_ref() { + let (_td, bare, secret_oid, _public) = fixture(); + // Move the sole ref out of refs/heads/* into a custom namespace so the + // secret blob is reachable only through a ref the old heads/tags filter + // skipped. It must still be withheld. + let head_ref = { + let out = Command::new("git") + .args(["symbolic-ref", "HEAD"]) + .current_dir(&bare) + .output() + .unwrap(); + String::from_utf8_lossy(&out.stdout).trim().to_string() + }; + let run = |args: &[&str]| { + assert!( + Command::new("git") + .args(args) + .current_dir(&bare) + .status() + .unwrap() + .success(), + "git {args:?} failed" + ); + }; + run(&["update-ref", "refs/custom/snap", "HEAD"]); + run(&["update-ref", "-d", &head_ref]); + + let rules = [rule("/secret/**", &[])]; + let withheld = withheld_blob_oids(&bare, &rules, true, OWNER, None).unwrap(); + assert!( + withheld.contains(&secret_oid), + "blob reachable only via refs/custom/* must still be withheld" + ); + } + + #[test] + fn fails_closed_when_a_ref_cannot_be_traversed() { + let (_td, bare, secret, _public) = fixture(); + // Point a ref at a blob (a valid object that is not tree-ish). `ls-tree -r` + // fails on it; that must propagate as Err rather than silently dropping the + // ref and under-withholding. + std::fs::write(bare.join("refs/heads/blobref"), format!("{secret}\n")).unwrap(); + let rules = [rule("/secret/**", &[])]; + let result = withheld_blob_oids(&bare, &rules, true, OWNER, None); + assert!( + result.is_err(), + "a ref that cannot be traversed must fail closed (Err)" + ); + } } diff --git a/crates/gitlawb-node/src/ipfs_pin.rs b/crates/gitlawb-node/src/ipfs_pin.rs index 9bdaade..89b500b 100644 --- a/crates/gitlawb-node/src/ipfs_pin.rs +++ b/crates/gitlawb-node/src/ipfs_pin.rs @@ -151,30 +151,94 @@ pub async fn pin_new_objects( pinned } -/// Run `git cat-file --batch-all-objects --batch-check='%(objectname)'` -/// to get all object SHA-256 hashes in the repository. +/// Names of every object reachable from any ref, via `git rev-list --objects --all`. +/// Reachable-only on purpose (not `cat-file --batch-all-objects`): an unreachable +/// or dangling object has no ref and no path, so visibility rules cannot classify +/// it for withholding, so it must not be pinned in cleartext. This keeps the pin set +/// aligned with what `blob_paths` can evaluate. fn list_all_objects(repo_path: &std::path::Path) -> Result> { let output = std::process::Command::new("git") - .args([ - "cat-file", - "--batch-all-objects", - "--batch-check=%(objectname)", - ]) + .args(["rev-list", "--objects", "--all"]) .current_dir(repo_path) .output() - .map_err(|e| anyhow::anyhow!("failed to run git cat-file: {e}"))?; + .map_err(|e| anyhow::anyhow!("failed to run git rev-list: {e}"))?; if !output.status.success() { let stderr = String::from_utf8_lossy(&output.stderr); - return Err(anyhow::anyhow!("git cat-file failed: {stderr}")); + return Err(anyhow::anyhow!("git rev-list failed: {stderr}")); } + // `rev-list --objects` lines are "" or " "; keep the oid. let stdout = String::from_utf8_lossy(&output.stdout); let hashes = stdout .lines() - .map(|l| l.trim().to_string()) + .filter_map(|l| l.split_whitespace().next().map(str::to_string)) .filter(|l| !l.is_empty()) .collect(); Ok(hashes) } + +#[cfg(test)] +mod tests { + use super::list_all_objects; + use std::process::Command; + use tempfile::TempDir; + + #[test] + fn list_all_objects_excludes_unreachable_blobs() { + let td = TempDir::new().unwrap(); + let work = td.path(); + let run = |args: &[&str]| { + assert!( + Command::new("git") + .args(args) + .current_dir(work) + .status() + .unwrap() + .success(), + "git {args:?} failed" + ); + }; + run(&["init", "-q"]); + run(&["config", "user.email", "t@t"]); + run(&["config", "user.name", "t"]); + std::fs::write(work.join("a.txt"), b"reachable\n").unwrap(); + run(&["add", "."]); + run(&["commit", "-qm", "init"]); + + let reachable = String::from_utf8_lossy( + &Command::new("git") + .args(["rev-parse", "HEAD:a.txt"]) + .current_dir(work) + .output() + .unwrap() + .stdout, + ) + .trim() + .to_string(); + + // Write a loose blob that no ref reaches (dangling). + std::fs::write(work.join("dangling.txt"), b"DANGLING SECRET\n").unwrap(); + let dangling = String::from_utf8_lossy( + &Command::new("git") + .args(["hash-object", "-w", "dangling.txt"]) + .current_dir(work) + .output() + .unwrap() + .stdout, + ) + .trim() + .to_string(); + + let objs = list_all_objects(work).unwrap(); + assert!( + objs.contains(&reachable), + "the committed (reachable) blob must be listed" + ); + assert!( + !objs.contains(&dangling), + "an unreachable/dangling blob must NOT be listed" + ); + } +} diff --git a/crates/gitlawb-node/src/pinata.rs b/crates/gitlawb-node/src/pinata.rs index 90bddad..1021d77 100644 --- a/crates/gitlawb-node/src/pinata.rs +++ b/crates/gitlawb-node/src/pinata.rs @@ -134,25 +134,26 @@ pub async fn pin_new_objects( pinned } +/// Names of every object reachable from any ref, via `git rev-list --objects --all`. +/// Reachable-only on purpose (not `cat-file --batch-all-objects`): an unreachable +/// or dangling object has no path, cannot be classified for withholding, and must +/// not be pinned in cleartext. fn list_all_objects(repo_path: &std::path::Path) -> Result> { let out = std::process::Command::new("git") - .args([ - "cat-file", - "--batch-all-objects", - "--batch-check=%(objectname)", - ]) + .args(["rev-list", "--objects", "--all"]) .current_dir(repo_path) .output() - .map_err(|e| anyhow::anyhow!("failed to run git cat-file: {e}"))?; + .map_err(|e| anyhow::anyhow!("failed to run git rev-list: {e}"))?; if !out.status.success() { let stderr = String::from_utf8_lossy(&out.stderr); - return Err(anyhow::anyhow!("git cat-file failed: {stderr}")); + return Err(anyhow::anyhow!("git rev-list failed: {stderr}")); } + // `rev-list --objects` lines are "" or " "; keep the oid. Ok(String::from_utf8_lossy(&out.stdout) .lines() - .map(|l| l.trim().to_string()) + .filter_map(|l| l.split_whitespace().next().map(str::to_string)) .filter(|l| !l.is_empty()) .collect()) } @@ -163,6 +164,62 @@ fn list_all_objects(repo_path: &std::path::Path) -> Result> { mod tests { use super::*; + #[test] + fn list_all_objects_excludes_unreachable_blobs() { + use std::process::Command; + use tempfile::TempDir; + + let td = TempDir::new().unwrap(); + let work = td.path(); + let run = |args: &[&str]| { + assert!( + Command::new("git") + .args(args) + .current_dir(work) + .status() + .unwrap() + .success(), + "git {args:?} failed" + ); + }; + run(&["init", "-q"]); + run(&["config", "user.email", "t@t"]); + run(&["config", "user.name", "t"]); + std::fs::write(work.join("a.txt"), b"reachable\n").unwrap(); + run(&["add", "."]); + run(&["commit", "-qm", "init"]); + + let reachable = String::from_utf8_lossy( + &Command::new("git") + .args(["rev-parse", "HEAD:a.txt"]) + .current_dir(work) + .output() + .unwrap() + .stdout, + ) + .trim() + .to_string(); + + std::fs::write(work.join("dangling.txt"), b"DANGLING SECRET\n").unwrap(); + let dangling = String::from_utf8_lossy( + &Command::new("git") + .args(["hash-object", "-w", "dangling.txt"]) + .current_dir(work) + .output() + .unwrap() + .stdout, + ) + .trim() + .to_string(); + + let objs = list_all_objects(work).unwrap(); + assert!(objs.contains(&reachable), "reachable blob must be listed"); + assert!( + !objs.contains(&dangling), + "unreachable/dangling blob must NOT be listed" + ); + } + #[tokio::test] async fn test_pin_skipped_when_jwt_empty() { let client = reqwest::Client::new();