diff --git a/.gitignore b/.gitignore index a36d8f7..404c87b 100644 --- a/.gitignore +++ b/.gitignore @@ -23,6 +23,3 @@ keys/ # Logs *.log .openclaude-profile.json - -# Local planning / scratch docs (never commit) -docs/superpowers/ diff --git a/crates/gitlawb-node/src/api/encrypted.rs b/crates/gitlawb-node/src/api/encrypted.rs index 20827fb..d9fa52a 100644 --- a/crates/gitlawb-node/src/api/encrypted.rs +++ b/crates/gitlawb-node/src/api/encrypted.rs @@ -6,24 +6,30 @@ use axum::Json; use crate::auth::AuthenticatedDid; use crate::error::{AppError, Result}; use crate::state::AppState; +use crate::visibility::{visibility_check, Decision}; /// GET /api/v1/repos/{owner}/{repo}/encrypted-blobs -/// Returns [{oid, cid}] for encrypted blobs the caller may decrypt. +/// Returns [{oid, cid}] for every encrypted blob in the repo, to any caller who +/// can read the repo. Not recipient-scoped: recipient identities are not stored, +/// so access control here is repo readability and decryption is gated by the +/// envelope crypto (only a real recipient can open an envelope). pub async fn list_encrypted_blobs( State(state): State, auth: Option>, Path((owner, repo)): Path<(String, String)>, ) -> Result> { - let caller = auth.as_ref().map(|e| e.0 .0.as_str()).unwrap_or(""); let record = state .db .get_repo(&owner, &repo) .await? .ok_or_else(|| AppError::RepoNotFound(format!("{owner}/{repo}")))?; - let rows = state - .db - .list_encrypted_blobs_for(&record.id, caller) - .await?; + let caller = auth.as_ref().map(|e| e.0 .0.as_str()); + let rules = state.db.list_visibility_rules(&record.id).await?; + if visibility_check(&rules, record.is_public, &record.owner_did, caller, "/") == Decision::Deny + { + return Err(AppError::RepoNotFound(format!("{owner}/{repo}"))); + } + let rows = state.db.list_all_encrypted_blobs(&record.id).await?; let blobs: Vec<_> = rows .into_iter() .map(|(oid, cid)| serde_json::json!({ "oid": oid, "cid": cid })) @@ -32,21 +38,27 @@ pub async fn list_encrypted_blobs( } /// GET /api/v1/repos/{owner}/{repo}/encrypted-blob/{oid} -/// Returns raw envelope bytes if the caller is a recipient. +/// Returns raw envelope bytes to callers who can read the repo; the envelope +/// crypto still ensures only true recipients can decrypt. pub async fn get_encrypted_blob( State(state): State, auth: Option>, Path((owner, repo, oid)): Path<(String, String, String)>, ) -> Result> { - let caller = auth.as_ref().map(|e| e.0 .0.as_str()).unwrap_or(""); let record = state .db .get_repo(&owner, &repo) .await? .ok_or_else(|| AppError::RepoNotFound(format!("{owner}/{repo}")))?; + let caller = auth.as_ref().map(|e| e.0 .0.as_str()); + let rules = state.db.list_visibility_rules(&record.id).await?; + if visibility_check(&rules, record.is_public, &record.owner_did, caller, "/") == Decision::Deny + { + return Err(AppError::RepoNotFound(format!("{owner}/{repo}/{oid}"))); + } let cid = state .db - .encrypted_blob_cid(&record.id, &oid, caller) + .encrypted_blob_cid(&record.id, &oid) .await? .ok_or_else(|| AppError::RepoNotFound(format!("{owner}/{repo}/{oid}")))?; let bytes = crate::ipfs_pin::cat(&state.config.ipfs_api, &cid) @@ -57,12 +69,16 @@ pub async fn get_encrypted_blob( /// GET /api/v1/repos/{owner}/{repo}/encrypted-blobs/replicate /// Returns [{oid, cid}] for every encrypted blob in the repo, for peer-mirror -/// replication (Option B2). Recipient identities are deliberately withheld: the -/// v2 envelopes no longer carry recipient public keys, so peers must not learn -/// the reader set either. A mirror detects a re-seal by the CID changing (the -/// OID is stable across re-seals). Ciphertext metadata only, never plaintext. +/// replication (Option B2). Gated by repo readability, like discovery, so a +/// non-readable repo does not expose its blob index; for the intended case (a +/// public repo with withheld subtrees) the public root keeps this open to peers. +/// Recipient identities are deliberately withheld: the v2 envelopes no longer +/// carry recipient public keys, so peers must not learn the reader set either. A +/// mirror detects a re-seal by the CID changing (the OID is stable across +/// re-seals). Ciphertext metadata only, never plaintext. pub async fn replicate_encrypted_blobs( State(state): State, + auth: Option>, Path((owner, repo)): Path<(String, String)>, ) -> Result> { let record = state @@ -70,10 +86,16 @@ pub async fn replicate_encrypted_blobs( .get_repo(&owner, &repo) .await? .ok_or_else(|| AppError::RepoNotFound(format!("{owner}/{repo}")))?; + let caller = auth.as_ref().map(|e| e.0 .0.as_str()); + let rules = state.db.list_visibility_rules(&record.id).await?; + if visibility_check(&rules, record.is_public, &record.owner_did, caller, "/") == Decision::Deny + { + return Err(AppError::RepoNotFound(format!("{owner}/{repo}"))); + } let rows = state.db.list_all_encrypted_blobs(&record.id).await?; let blobs: Vec<_> = rows .into_iter() - .map(|(oid, cid, _recipients)| replicate_blob_json(oid, cid)) + .map(|(oid, cid)| replicate_blob_json(oid, cid)) .collect(); Ok(Json(serde_json::json!({ "blobs": blobs }))) } diff --git a/crates/gitlawb-node/src/api/repos.rs b/crates/gitlawb-node/src/api/repos.rs index 9a06f7f..3fae91e 100644 --- a/crates/gitlawb-node/src/api/repos.rs +++ b/crates/gitlawb-node/src/api/repos.rs @@ -689,6 +689,7 @@ pub async fn git_receive_pack( let irys_url = state.config.irys_url.clone(); let http_client = std::sync::Arc::clone(&state.http_client); let node_did_str = state.node_did.to_string(); + let node_seed = state.node_keypair.seed_bytes(); let repo_name = record.name.clone(); tokio::spawn(async move { let pinned = crate::ipfs_pin::pin_new_objects( @@ -722,6 +723,7 @@ pub async fn git_receive_pack( &repo_path_clone, &db_clone, &repo_id, + &node_seed, &recipients, ) .await; diff --git a/crates/gitlawb-node/src/arweave.rs b/crates/gitlawb-node/src/arweave.rs index cf13947..43f35a0 100644 --- a/crates/gitlawb-node/src/arweave.rs +++ b/crates/gitlawb-node/src/arweave.rs @@ -104,15 +104,15 @@ pub async fn anchor_ref_update( } /// A per-push manifest of the blobs encrypted this push (Option B3). The -/// `blobs` slice is `(oid, cid, recipients)` tuples; only `oid` and `cid` are -/// anchored. Anchored directly to Arweave as its JSON body so the discovery -/// index survives total node loss. +/// `blobs` slice is `(oid, cid)` tuples. Anchored directly to Arweave as its JSON +/// body so the discovery index survives total node loss. Recipient identities are +/// never part of the manifest. pub struct EncryptedManifest<'a> { pub repo: &'a str, pub owner_did: &'a str, pub node_did: &'a str, pub timestamp: &'a str, - pub blobs: &'a [(String, String, Vec)], + pub blobs: &'a [(String, String)], } /// Anchor a per-push encrypted-blob manifest to Arweave via Irys. The manifest @@ -135,7 +135,7 @@ pub async fn anchor_encrypted_manifest( let blobs_json: Vec = manifest .blobs .iter() - .map(|(oid, cid, _recipients)| manifest_blob_json(oid, cid)) + .map(|(oid, cid)| manifest_blob_json(oid, cid)) .collect(); let payload = json!({ @@ -298,11 +298,7 @@ mod tests { #[tokio::test] async fn test_manifest_anchor_noop_when_url_empty() { let client = reqwest::Client::new(); - let blobs = vec![( - "oid1".to_string(), - "cid1".to_string(), - vec!["did:key:zA".to_string()], - )]; + let blobs = vec![("oid1".to_string(), "cid1".to_string())]; let m = EncryptedManifest { repo: "alice/r", owner_did: "did:key:zO", @@ -319,7 +315,7 @@ mod tests { #[tokio::test] async fn test_manifest_anchor_noop_when_no_blobs() { let client = reqwest::Client::new(); - let blobs: Vec<(String, String, Vec)> = vec![]; + let blobs: Vec<(String, String)> = vec![]; let m = EncryptedManifest { repo: "alice/r", owner_did: "did:key:zO", @@ -348,11 +344,7 @@ mod tests { .await; let client = reqwest::Client::new(); - let blobs = vec![( - "oid1".to_string(), - "cid1".to_string(), - vec!["did:key:zA".to_string()], - )]; + let blobs = vec![("oid1".to_string(), "cid1".to_string())]; let m = EncryptedManifest { repo: "alice/r", owner_did: "did:key:zO", diff --git a/crates/gitlawb-node/src/db/mod.rs b/crates/gitlawb-node/src/db/mod.rs index 4a1c107..81bd00d 100644 --- a/crates/gitlawb-node/src/db/mod.rs +++ b/crates/gitlawb-node/src/db/mod.rs @@ -735,6 +735,17 @@ const MIGRATIONS: &[Migration] = &[ "CREATE INDEX IF NOT EXISTS idx_encrypted_blobs_repo ON encrypted_blobs(repo_id)", ], }, + Migration { + version: 5, + name: "encrypted_blobs_blind_recipients", + stmts: &[ + // Replace the cleartext recipient DID list with an opaque, node-keyed + // tag used only to detect a recipient-set change. Existing rows get an + // empty tag and are re-sealed on the next push. + "ALTER TABLE encrypted_blobs DROP COLUMN IF EXISTS recipients", + "ALTER TABLE encrypted_blobs ADD COLUMN IF NOT EXISTS recipients_tag TEXT NOT NULL DEFAULT ''", + ], + }, ]; // ── Repos ───────────────────────────────────────────────────────────────────── @@ -1648,126 +1659,69 @@ impl Db { repo_id: &str, oid: &str, cid: &str, - recipients: &[String], + recipients_tag: &str, ) -> Result<()> { - let recipients_json = serde_json::to_string(recipients)?; sqlx::query( - "INSERT INTO encrypted_blobs (repo_id, oid, cid, recipients, created_at) + "INSERT INTO encrypted_blobs (repo_id, oid, cid, recipients_tag, created_at) VALUES ($1, $2, $3, $4, $5) - ON CONFLICT (repo_id, oid) DO UPDATE SET cid = EXCLUDED.cid, recipients = EXCLUDED.recipients", + ON CONFLICT (repo_id, oid) DO UPDATE SET cid = EXCLUDED.cid, recipients_tag = EXCLUDED.recipients_tag", ) .bind(repo_id) .bind(oid) .bind(cid) - .bind(recipients_json) + .bind(recipients_tag) .bind(Utc::now().to_rfc3339()) .execute(&self.pool) .await?; Ok(()) } - /// Deserialize the stored recipients JSON. Corruption is surfaced as an - /// error rather than silently treated as an empty recipient list, which - /// would deny access to every legitimate reader and hand peers incomplete - /// replication metadata. - fn parse_recipients(repo_id: &str, oid: &str, raw: &str) -> Result> { - serde_json::from_str(raw).with_context(|| { - format!("corrupt recipients JSON in encrypted_blobs (repo_id={repo_id}, oid={oid})") - }) - } - - /// (oid, cid) for every encrypted blob in the repo that `caller` may decrypt. - pub async fn list_encrypted_blobs_for( - &self, - repo_id: &str, - caller: &str, - ) -> Result> { - let rows = - sqlx::query("SELECT oid, cid, recipients FROM encrypted_blobs WHERE repo_id = $1") - .bind(repo_id) - .fetch_all(&self.pool) - .await?; + /// (oid, cid) for every encrypted blob in the repo, unscoped by caller. Used + /// by both the B2 replication view and B1 discovery. Recipient identities are + /// not stored, so authorization is the caller's repo readability, not a per + /// recipient check. Ciphertext metadata only. + pub async fn list_all_encrypted_blobs(&self, repo_id: &str) -> Result> { + let rows = sqlx::query("SELECT oid, cid FROM encrypted_blobs WHERE repo_id = $1") + .bind(repo_id) + .fetch_all(&self.pool) + .await?; let mut out = Vec::new(); for row in rows { let oid: String = row.get("oid"); let cid: String = row.get("cid"); - let recipients: String = row.get("recipients"); - let recipients = Self::parse_recipients(repo_id, &oid, &recipients)?; - if recipients.iter().any(|d| d == caller) { - out.push((oid, cid)); - } + out.push((oid, cid)); } Ok(out) } - /// (oid, cid, recipients) for every encrypted blob in the repo, unscoped by - /// caller. This is the replication view used by peer mirrors (Option B2), - /// distinct from the recipient-scoped `list_encrypted_blobs_for`. It returns - /// only ciphertext metadata; no plaintext or key material is involved. - pub async fn list_all_encrypted_blobs( - &self, - repo_id: &str, - ) -> Result)>> { - let rows = - sqlx::query("SELECT oid, cid, recipients FROM encrypted_blobs WHERE repo_id = $1") - .bind(repo_id) - .fetch_all(&self.pool) - .await?; - let mut out = Vec::new(); - for row in rows { - let oid: String = row.get("oid"); - let cid: String = row.get("cid"); - let recipients: String = row.get("recipients"); - let recipients = Self::parse_recipients(repo_id, &oid, &recipients)?; - out.push((oid, cid, recipients)); - } - Ok(out) + /// The CID of one encrypted blob, or None if there is no such row. Recipient + /// authorization is not enforced here: the handler checks repo readability and + /// the envelope crypto gates decryption. + pub async fn encrypted_blob_cid(&self, repo_id: &str, oid: &str) -> Result> { + let row = sqlx::query("SELECT cid FROM encrypted_blobs WHERE repo_id = $1 AND oid = $2") + .bind(repo_id) + .bind(oid) + .fetch_optional(&self.pool) + .await?; + Ok(row.map(|r| r.get("cid"))) } - /// The CID of one encrypted blob, only if `caller` is a recipient. - pub async fn encrypted_blob_cid( + /// The opaque recipients tag stored for an encrypted blob, or None if there is + /// no row. Used only to decide whether a re-seal is needed (the recipient set + /// changed); the tag is a node-keyed fingerprint, not the DID list. + pub async fn encrypted_blob_recipients_tag( &self, repo_id: &str, oid: &str, - caller: &str, ) -> Result> { let row = sqlx::query( - "SELECT cid, recipients FROM encrypted_blobs WHERE repo_id = $1 AND oid = $2", + "SELECT recipients_tag FROM encrypted_blobs WHERE repo_id = $1 AND oid = $2", ) .bind(repo_id) .bind(oid) .fetch_optional(&self.pool) .await?; - let Some(row) = row else { return Ok(None) }; - let recipients: String = row.get("recipients"); - let recipients = Self::parse_recipients(repo_id, oid, &recipients)?; - if recipients.iter().any(|d| d == caller) { - Ok(Some(row.get("cid"))) - } else { - Ok(None) - } - } - - /// The recipient DID list stored for an encrypted blob, or None if there is - /// no row. Used to decide whether a re-seal is needed (recipients changed). - pub async fn encrypted_blob_recipients( - &self, - repo_id: &str, - oid: &str, - ) -> Result>> { - let row = - sqlx::query("SELECT recipients FROM encrypted_blobs WHERE repo_id = $1 AND oid = $2") - .bind(repo_id) - .bind(oid) - .fetch_optional(&self.pool) - .await?; - match row { - None => Ok(None), - Some(r) => { - let recipients: String = r.get("recipients"); - Ok(Some(Self::parse_recipients(repo_id, oid, &recipients)?)) - } - } + Ok(row.map(|r| r.get("recipients_tag"))) } pub async fn list_pinned_cids(&self) -> Result> { diff --git a/crates/gitlawb-node/src/encrypted_pin.rs b/crates/gitlawb-node/src/encrypted_pin.rs index 50797b5..25439ee 100644 --- a/crates/gitlawb-node/src/encrypted_pin.rs +++ b/crates/gitlawb-node/src/encrypted_pin.rs @@ -13,31 +13,59 @@ use gitlawb_core::encrypt::seal_blob; use crate::db::Db; +use hmac::{Hmac, Mac}; +use sha2::Sha256; + +type HmacSha256 = Hmac; + +/// Opaque, node-keyed fingerprint of a blob's recipient set. Stored in place of +/// the cleartext DID list so a DB compromise cannot reveal the reader set; used +/// only to detect a recipient-set change so an unchanged blob is not re-sealed. +/// Order-insensitive (the input `BTreeSet` is already sorted). +pub fn recipients_tag(node_seed: &[u8; 32], dids: &BTreeSet) -> String { + let mut mac = HmacSha256::new_from_slice(node_seed).expect("HMAC accepts any key length"); + mac.update(b"gitlawb/recipients-tag/v1"); + for did in dids { + mac.update(b"\n"); + mac.update(did.as_bytes()); + } + hex::encode(mac.finalize().into_bytes()) +} + /// Resolve a DID string to its Ed25519 verifying key, or None if it carries no /// inline key (e.g. did:web / did:gitlawb). fn did_to_key(did: &str) -> Option { Did::from_str(did).ok()?.to_verifying_key().ok() } -/// Encrypt and pin every withheld blob. `recipients` maps blob oid -> DID set. -/// Returns `(oid, cid, recipients)` for each blob actually sealed and recorded -/// this call (the per-push delta), used by Option B3 to anchor a manifest. +/// Encrypt and pin every withheld blob. `recipients` maps blob oid -> DID set; +/// `node_seed` keys the opaque recipients tag. Returns `(oid, cid)` for each blob +/// actually sealed and recorded this call (the per-push delta), used by Option B3 +/// to anchor a manifest. Recipient identities are never stored or returned. pub async fn encrypt_and_pin( ipfs_api: &str, repo_path: &Path, db: &Db, repo_id: &str, + node_seed: &[u8; 32], recipients: &HashMap>, -) -> Vec<(String, String, Vec)> { +) -> Vec<(String, String)> { let mut sealed = Vec::new(); for (oid, dids) in recipients { // Skip only if an existing envelope already covers exactly these // recipients. If the recipient set changed (e.g. a reader was added to // the rule), re-seal so the new reader can recover the blob. Reader - // removal is not retroactive: the old envelope is already public. - if let Ok(Some(stored)) = db.encrypted_blob_recipients(repo_id, oid).await { - let stored: BTreeSet = stored.into_iter().collect(); - if &stored == dids { + // removal is not retroactive: the old envelope is already public. The + // comparison is on the opaque node-keyed tag, never the DID list. + let tag = recipients_tag(node_seed, dids); + match db.encrypted_blob_recipients_tag(repo_id, oid).await { + Ok(Some(stored_tag)) if stored_tag == tag => continue, + Ok(_) => {} + Err(e) => { + // A DB read failure is not a cache miss: re-sealing here would do + // an avoidable IPFS write during a partial outage. Skip and retry + // on the next push. + tracing::warn!(oid = %oid, err = %e, "recipients_tag lookup failed; skipping reseal"); continue; } } @@ -61,15 +89,48 @@ pub async fn encrypt_and_pin( Ok(c) if !c.is_empty() => c, _ => continue, }; - let dids_vec: Vec = dids.iter().cloned().collect(); - if let Err(e) = db - .record_encrypted_blob(repo_id, oid, &cid, &dids_vec) - .await - { + if let Err(e) = db.record_encrypted_blob(repo_id, oid, &cid, &tag).await { tracing::warn!(oid = %oid, err = %e, "record_encrypted_blob failed"); continue; } - sealed.push((oid.clone(), cid.clone(), dids_vec)); + sealed.push((oid.clone(), cid.clone())); } sealed } + +#[cfg(test)] +mod tests { + use super::recipients_tag; + use std::collections::BTreeSet; + + fn set(dids: &[&str]) -> BTreeSet { + dids.iter().map(|s| s.to_string()).collect() + } + + #[test] + fn tag_is_order_insensitive() { + let seed = [7u8; 32]; + let a = recipients_tag(&seed, &set(&["did:key:zA", "did:key:zB"])); + let b = recipients_tag(&seed, &set(&["did:key:zB", "did:key:zA"])); + assert_eq!(a, b); + } + + #[test] + fn tag_differs_for_different_sets() { + let seed = [7u8; 32]; + let a = recipients_tag(&seed, &set(&["did:key:zA"])); + let b = recipients_tag(&seed, &set(&["did:key:zA", "did:key:zB"])); + assert_ne!(a, b); + } + + #[test] + fn tag_is_keyed_by_node_seed() { + let dids = set(&["did:key:zA", "did:key:zB"]); + let a = recipients_tag(&[1u8; 32], &dids); + let b = recipients_tag(&[2u8; 32], &dids); + assert_ne!( + a, b, + "tag must depend on the node seed, not be a plain hash" + ); + } +} diff --git a/crates/gitlawb-node/src/sync.rs b/crates/gitlawb-node/src/sync.rs index 615ce22..58cfa4d 100644 --- a/crates/gitlawb-node/src/sync.rs +++ b/crates/gitlawb-node/src/sync.rs @@ -373,10 +373,7 @@ async fn replicate_encrypted_blobs( } let have: HashMap = match db.list_all_encrypted_blobs(repo_id).await { - Ok(rows) => rows - .into_iter() - .map(|(oid, cid, _recipients)| (oid, cid)) - .collect(), + Ok(rows) => rows.into_iter().collect(), Err(e) => { warn!(repo = %repo, err = %e, "failed to list local encrypted blobs for replication"); return; @@ -397,10 +394,7 @@ async fn replicate_encrypted_blobs( warn!(oid = %blob.oid, expected = %blob.cid, got = %cid, "replicated envelope CID mismatch; skipping record"); continue; } - if let Err(e) = db - .record_encrypted_blob(repo_id, &blob.oid, &cid, &[]) - .await - { + if let Err(e) = db.record_encrypted_blob(repo_id, &blob.oid, &cid, "").await { warn!(oid = %blob.oid, err = %e, "failed to record replicated encrypted blob"); } }