From 09bfd79ce984df6c1dfb33f1a5611b941350e23b Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Sat, 6 Jun 2026 15:56:41 -0500 Subject: [PATCH 01/19] docs(node): Phase 3 subtree-withholding plan with Task 0 spike findings --- ...6-05-phase3-subtree-content-withholding.md | 710 ++++++++++++++++++ 1 file changed, 710 insertions(+) create mode 100644 docs/superpowers/plans/2026-06-05-phase3-subtree-content-withholding.md diff --git a/docs/superpowers/plans/2026-06-05-phase3-subtree-content-withholding.md b/docs/superpowers/plans/2026-06-05-phase3-subtree-content-withholding.md new file mode 100644 index 0000000..6aaa474 --- /dev/null +++ b/docs/superpowers/plans/2026-06-05-phase3-subtree-content-withholding.md @@ -0,0 +1,710 @@ +# Phase 3: Subtree Content Withholding (mode B) Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Make a mode-`b` subtree visibility rule actually withhold that subtree's file content on clone/fetch over the node's HTTP git read path, while keeping every commit and tree SHA intact, so a non-reader sees the directory structure and blob SHAs but never the private bytes. + +**Architecture:** The authorization decision already exists as the pure `visibility_check` (one decision per path). Phase 3 adds two node-side pieces: (1) a blob-OID resolver that, given a repo's refs plus the caller's rules, returns the set of blob object IDs the caller may not read (a blob is withheld only if it appears at no allowed path); and (2) a filtered `upload-pack` serve path that builds the response pack excluding those OIDs. The two existing read handlers (`git_info_refs`, `git_upload_pack`) keep their current whole-repo 404 gate unchanged and gain a filtered serve branch when, and only when, the caller has at least one withheld blob. Trees and commits are always sent in full, so SHAs stay intact; only blob content is omitted. + +**Tech Stack:** Rust, axum, the system `git` CLI (shelled out, as the codebase already does in `git/store.rs` and `git/smart_http.rs`), `tempfile` for fixture repos in tests. + +**Scope boundary:** This plan covers the node-side enforcement and the security guarantee (private blob bytes are never placed in the served pack), proven by inspecting the produced pack. It deliberately does NOT cover: the `git-remote-gitlawb` client-side change that lets a non-reader get a *clean* partial checkout (a stock `git clone` of a repo with a withheld blob will fail at checkout on the missing object; that UX work is a separate follow-up plan), filtered-pack caching, or incremental-fetch (`have`-line) hardening beyond what falls out naturally. Those are listed under "Out of scope / follow-ups" at the end. + +--- + +## File Structure + +- **Create:** `crates/gitlawb-node/src/git/visibility_pack.rs`: the blob-OID resolver (`withheld_blob_oids`) and its tests. One responsibility: decide which blob OIDs to withhold for a caller. +- **Modify:** `crates/gitlawb-node/src/git/mod.rs`: add `pub mod visibility_pack;`. +- **Modify:** `crates/gitlawb-node/src/git/smart_http.rs`: add `upload_pack_excluding` (filtered serve) alongside the existing `upload_pack`, plus a small `pack_object_ids` test helper. +- **Modify:** `crates/gitlawb-node/src/api/repos.rs`: in `git_upload_pack` (around line 368-407) branch to the filtered serve when the caller has withheld blobs; `git_info_refs` (around line 308-365) needs no functional change but gets a confirming test. +- **Modify (test oracle only):** `crates/gitlawb-node/src/visibility.rs`: no logic change; `visibility_check` is reused as-is by the resolver. + +--- + +## Task 0: Spike: pin the filtered-serve mechanism + +This is the one genuinely uncertain piece: how to make `git upload-pack` (or `git pack-objects`) produce a clone/fetch response that omits a specific set of blob OIDs while still sending the trees that reference them, and how to frame that as a valid `application/x-git-upload-pack-result` body. Everything downstream depends on a single function signature, not on the mechanism, so this task nails the mechanism by experiment and records the result. No production code is committed in this task. + +**Files:** +- Scratch only (a throwaway shell script and a temp repo). Findings are written back into this plan's "Task 0 Findings" block below. + +- [ ] **Step 1: Build a fixture repo with a public and a private file** + +Run: +```bash +cd "$(mktemp -d)" && export FIX=$PWD +git init -q work && cd work +git config user.email t@t && git config user.name t +mkdir -p public secret +echo "public bytes" > public/a.txt +echo "TOP SECRET" > secret/b.txt +git add . && git commit -qm init +SECRET_OID=$(git rev-parse HEAD:secret/b.txt) +PUBLIC_OID=$(git rev-parse HEAD:public/a.txt) +echo "secret blob=$SECRET_OID public blob=$PUBLIC_OID" +cd .. && git clone -q --bare work bare.git +``` + +- [ ] **Step 2: Produce a pack that excludes the secret blob OID** + +Run (mechanism candidate: explicit object list to `pack-objects`): +```bash +cd "$FIX/bare.git" +# Every object reachable from all refs, as "oid [path]" lines: +git rev-list --objects --all > /tmp/all_objs.txt +# Drop the secret blob's line, keep only the OID column: +grep -v "^$SECRET_OID" /tmp/all_objs.txt | awk '{print $1}' > /tmp/keep_oids.txt +# Build a pack of exactly those objects: +git pack-objects --stdout < /tmp/keep_oids.txt > /tmp/filtered.pack +# Confirm the secret blob is absent and the public blob present: +git verify-pack -v /tmp/filtered.pack | grep -E "$SECRET_OID|$PUBLIC_OID" || echo "secret absent (expected: only public line prints)" +``` +Expected: the public OID prints, the secret OID does not. This proves the OID-exclusion mechanism. + +- [ ] **Step 3: Determine the upload-pack response framing** + +Run, capturing the exact bytes a real clone request/response uses, so the framing in Task 3 is correct rather than guessed: +```bash +cd "$FIX/bare.git" +git config uploadpack.allowFilter true +# Capture a normal v2 clone's request body and response shape: +GIT_TRACE_PACKET=1 git -c protocol.version=2 clone -q --bare "$FIX/bare.git" "$FIX/clone1.git" 2>/tmp/trace.txt +# Inspect the fetch command + response sections (look for "packfile", sideband 0001/0002, flush 0000): +grep -E "fetch|want|packfile|0000|ACK|NAK|ready" /tmp/trace.txt | head -40 +``` +Record from the trace: (a) whether the node should target protocol v2 or v0, (b) the exact section markers around the packfile, (c) whether sideband-64k framing is in use. + +- [ ] **Step 4: Decide the serve implementation and write findings** + +Choose the implementation for `upload_pack_excluding` based on Steps 1-3, preferring the lowest-risk option that the trace confirms works: + +- **Option A (preferred): delegate to `git upload-pack` with an injected mandatory filter.** Set `uploadpack.allowFilter=true`, rewrite the client's fetch request to carry `filter sparse:oid=` (v2) where the spec blob excludes the denied paths, and let `git upload-pack` build and frame the entire response. Lowest framing risk; depends on `sparse:oid` negation behaving (verify in Step 2 variant). +- **Option B (fallback): hand-build the pack.** Parse `want` OIDs from the request body, run `git rev-list --objects ` minus the withheld OIDs, pipe to `git pack-objects --stdout`, and frame the result per the markers captured in Step 3. + +Write the chosen option, the exact `git` invocation(s), and the framing bytes into the "Task 0 Findings" block below. The downstream tasks reference `upload_pack_excluding(repo_path, request_body, withheld_oids) -> Result` regardless of which option is recorded here. + +- [ ] **Step 5: No commit** + +This task records findings only; there is nothing to commit. + +### Task 0 Findings + +Executed 2026-06-06. Results: + +- **Mechanism chosen:** Option B (hand-built pack). `sparse:oid` negation was not needed; explicit OID exclusion via `rev-list` + `pack-objects` is deterministic and self-contained. +- **Exact git invocation(s):** + - `git rev-list --objects --all` (in repo dir) to enumerate reachable objects as `oid [path]` lines. + - Filter out withheld OIDs (first whitespace column), feed remaining OIDs newline-delimited to `git pack-objects --stdout`. + - Verified exclusion by `git index-pack ` then `git verify-pack -v `: secret blob absent, public blob present. Confirmed. +- **Protocol version targeted:** v2 packfile section. The serve hand-frames the body, so no `GIT_PROTOCOL`/`-c protocol.version` flag is passed to our own process; we emit the v2 `packfile` section bytes directly. +- **Response framing (captured by driving `git upload-pack --stateless-rpc` with `GIT_PROTOCOL=version=2`):** + - `pkt_line("packfile\n")` (plain control pkt-line, not a sideband band). + - Then sideband-64k bands: `0x02` = progress (optional, we omit), `0x01` = pack data whose payload begins `PACK...`. + - Pack data chunked under the pkt-line limit, each chunk prefixed with `0x01`. + - Terminated by `0000` flush. + - This matches the plan's Option B framing in Task 2 exactly; no adjustment needed. +- **Confirmed:** served pack contains PUBLIC_OID, excludes SECRET_OID. + +--- + +## Task 1: Blob-OID resolver: withhold a private subtree's blobs for a non-reader + +**Files:** +- Create: `crates/gitlawb-node/src/git/visibility_pack.rs` +- Modify: `crates/gitlawb-node/src/git/mod.rs` (add module) + +- [ ] **Step 1: Register the module** + +In `crates/gitlawb-node/src/git/mod.rs`, add the line in alphabetical position (after `pub mod store;`): +```rust +pub mod visibility_pack; +``` + +- [ ] **Step 2: Write the failing test (non-reader withholds only the private blob)** + +Create `crates/gitlawb-node/src/git/visibility_pack.rs` with the test module first: +```rust +//! Resolve which blob OIDs must be withheld from a caller because every path +//! at which the blob appears is denied by the repo's visibility rules. Trees +//! and commits are never withheld (mode B keeps SHAs intact); only blob +//! content is held back. + +use crate::db::{VisibilityMode, VisibilityRule}; +use crate::git::store; +use crate::visibility::{visibility_check, Decision}; +use anyhow::{Context, Result}; +use std::collections::HashSet; +use std::path::Path; + +#[cfg(test)] +mod tests { + use super::*; + use chrono::Utc; + use std::process::Command; + use tempfile::TempDir; + + fn rule(path_glob: &str, readers: &[&str]) -> VisibilityRule { + VisibilityRule { + id: "x".into(), + repo_id: "r1".into(), + path_glob: path_glob.into(), + mode: VisibilityMode::B, + reader_dids: readers.iter().map(|s| s.to_string()).collect(), + created_by: "did:key:zOwner".into(), + created_at: Utc::now(), + } + } + + const OWNER: &str = "did:key:zOwner"; + + /// Build a bare repo with public/a.txt and secret/b.txt at one commit. + /// Returns (tempdir, bare_path, secret_blob_oid, public_blob_oid). + fn fixture() -> (TempDir, std::path::PathBuf, String, String) { + let td = TempDir::new().unwrap(); + let work = td.path().join("work"); + let bare = td.path().join("bare.git"); + let run = |args: &[&str], dir: &Path| { + let ok = Command::new("git") + .args(args) + .current_dir(dir) + .status() + .unwrap() + .success(); + assert!(ok, "git {args:?} failed"); + }; + std::fs::create_dir_all(work.join("public")).unwrap(); + std::fs::create_dir_all(work.join("secret")).unwrap(); + std::fs::write(work.join("public/a.txt"), b"public bytes\n").unwrap(); + std::fs::write(work.join("secret/b.txt"), b"TOP SECRET\n").unwrap(); + run(&["init", "-q"], &work); + run(&["config", "user.email", "t@t"], &work); + run(&["config", "user.name", "t"], &work); + run(&["add", "."], &work); + run(&["commit", "-qm", "init"], &work); + let oid = |path: &str| { + let out = Command::new("git") + .args(["rev-parse", &format!("HEAD:{path}")]) + .current_dir(&work) + .output() + .unwrap(); + String::from_utf8_lossy(&out.stdout).trim().to_string() + }; + let secret = oid("secret/b.txt"); + let public = oid("public/a.txt"); + run( + &["clone", "-q", "--bare", work.to_str().unwrap(), bare.to_str().unwrap()], + td.path(), + ); + (td, bare, secret, public) + } + + #[test] + fn non_reader_withholds_only_the_private_blob() { + let (_td, bare, secret, public) = fixture(); + let rules = [rule("/secret/**", &["did:key:zFriend"])]; + let withheld = + withheld_blob_oids(&bare, &rules, true, OWNER, Some("did:key:zStranger")).unwrap(); + assert!(withheld.contains(&secret), "secret blob must be withheld"); + assert!(!withheld.contains(&public), "public blob must NOT be withheld"); + } + + #[test] + fn owner_withholds_nothing() { + let (_td, bare, secret, public) = fixture(); + let rules = [rule("/secret/**", &["did:key:zFriend"])]; + let withheld = withheld_blob_oids(&bare, &rules, true, OWNER, Some(OWNER)).unwrap(); + assert!(withheld.is_empty(), "owner sees everything"); + let _ = (secret, public); + } + + #[test] + fn listed_reader_withholds_nothing() { + let (_td, bare, _secret, _public) = fixture(); + let rules = [rule("/secret/**", &["did:key:zFriend"])]; + let withheld = + withheld_blob_oids(&bare, &rules, true, OWNER, Some("did:key:zFriend")).unwrap(); + assert!(withheld.is_empty(), "listed reader sees the subtree"); + } + + #[test] + fn no_subtree_rules_withholds_nothing() { + let (_td, bare, _secret, _public) = fixture(); + let withheld = withheld_blob_oids(&bare, &[], true, OWNER, None).unwrap(); + assert!(withheld.is_empty(), "public repo, no rules, nothing withheld"); + } +} +``` + +- [ ] **Step 3: Run the test to verify it fails** + +Run: `cargo test -p gitlawb-node visibility_pack:: -- --nocapture` +Expected: FAIL to compile with "cannot find function `withheld_blob_oids`". + +- [ ] **Step 4: Implement `withheld_blob_oids`** + +Add above the `#[cfg(test)]` block in `visibility_pack.rs`: +```rust +/// List every (blob_oid, "/repo/relative/path") pair reachable from any branch +/// ref in `repo_path`. Uses `git ls-tree -r` per ref so each path a blob lives +/// at is represented (the same blob content can appear at several paths). Paths +/// are returned with a leading "/" to match the glob form used by visibility +/// rules ("/secret/**"). +fn blob_paths(repo_path: &Path) -> Result> { + let refs = store::list_refs(repo_path).context("list_refs failed")?; + let mut out = Vec::new(); + for (refname, _oid) in refs { + if !refname.starts_with("refs/heads/") && !refname.starts_with("refs/tags/") { + continue; + } + let listing = std::process::Command::new("git") + .args(["ls-tree", "-r", &refname]) + .current_dir(repo_path) + .output() + .context("git ls-tree -r failed")?; + if !listing.status.success() { + continue; + } + for line in String::from_utf8_lossy(&listing.stdout).lines() { + // " blob \t" + let Some((meta, path)) = line.split_once('\t') else { + continue; + }; + let mut parts = meta.split_whitespace(); + let _mode = parts.next(); + let kind = parts.next(); + let oid = parts.next(); + if kind == Some("blob") { + if let Some(oid) = oid { + out.push((oid.to_string(), format!("/{path}"))); + } + } + } + } + Ok(out) +} + +/// Blob OIDs the caller may not read. A blob is withheld only if visibility +/// denies the caller at *every* path the blob appears at; a blob that is also +/// reachable through an allowed path is sent (its content is public elsewhere). +/// +/// The whole-repo "/" gate is handled by the caller before this function runs: +/// if "/" denies, the caller gets a 404 and never reaches the filtered serve. +pub fn withheld_blob_oids( + repo_path: &Path, + rules: &[VisibilityRule], + is_public: bool, + owner_did: &str, + caller: Option<&str>, +) -> Result> { + let mut denied: HashSet = HashSet::new(); + let mut allowed: HashSet = HashSet::new(); + for (oid, path) in blob_paths(repo_path)? { + match visibility_check(rules, is_public, owner_did, caller, &path) { + Decision::Deny => { + denied.insert(oid); + } + Decision::Allow => { + allowed.insert(oid); + } + } + } + Ok(denied.difference(&allowed).cloned().collect()) +} +``` + +- [ ] **Step 5: Run the tests to verify they pass** + +Run: `cargo test -p gitlawb-node visibility_pack::` +Expected: PASS (4 tests). + +- [ ] **Step 6: Commit** + +```bash +git add crates/gitlawb-node/src/git/visibility_pack.rs crates/gitlawb-node/src/git/mod.rs +git commit -m "feat(node): resolve withheld blob OIDs for path-scoped visibility" +``` + +--- + +## Task 2: Filtered upload-pack serve (`upload_pack_excluding`) + +**Files:** +- Modify: `crates/gitlawb-node/src/git/smart_http.rs` + +Implement using the mechanism recorded in **Task 0 Findings**. The code below is written for **Option B (hand-built pack)** because it is self-contained and deterministic; if Task 0 recorded Option A, implement that instead behind the identical signature and adjust the test in Step 2 only where it inspects framing (the object-content assertion stays). + +- [ ] **Step 1: Add the test module with a pack-inspection helper and the failing test** + +At the bottom of `smart_http.rs`, add a `#[cfg(test)] mod tests` containing the pack-inspection helper (lists the OIDs inside a raw pack so tests can assert membership) and the first failing test: +```rust +#[cfg(test)] +mod tests { + use super::*; + use std::process::Command; + use tempfile::TempDir; + + /// List OIDs in a pack by writing it to a temp dir and running verify-pack. + pub(super) fn pack_object_ids(pack: &[u8]) -> std::collections::HashSet { + let dir = TempDir::new().unwrap(); + let path = dir.path().join("test.pack"); + std::fs::write(&path, pack).unwrap(); + // index-pack creates the matching .idx next to the pack. + let ok = Command::new("git") + .args(["index-pack", path.to_str().unwrap()]) + .status() + .unwrap() + .success(); + assert!(ok, "index-pack failed"); + let out = Command::new("git") + .args(["verify-pack", "-v", path.to_str().unwrap()]) + .output() + .unwrap(); + String::from_utf8_lossy(&out.stdout) + .lines() + .filter_map(|l| l.split_whitespace().next()) + .filter(|t| t.len() == 40 && t.chars().all(|c| c.is_ascii_hexdigit())) + .map(|s| s.to_string()) + .collect() + } + + #[tokio::test] + async fn filtered_serve_excludes_withheld_blob() { + // Build a bare repo, capture the secret + public blob OIDs. + let td = TempDir::new().unwrap(); + let work = td.path().join("work"); + let bare = td.path().join("bare.git"); + let g = |args: &[&str], dir: &std::path::Path| { + assert!(Command::new("git").args(args).current_dir(dir).status().unwrap().success()); + }; + std::fs::create_dir_all(work.join("secret")).unwrap(); + std::fs::create_dir_all(work.join("public")).unwrap(); + std::fs::write(work.join("public/a.txt"), b"pub\n").unwrap(); + std::fs::write(work.join("secret/b.txt"), b"SECRET\n").unwrap(); + g(&["init", "-q"], &work); + g(&["config", "user.email", "t@t"], &work); + g(&["config", "user.name", "t"], &work); + g(&["add", "."], &work); + g(&["commit", "-qm", "init"], &work); + let oid = |p: &str| { + let o = Command::new("git").args(["rev-parse", &format!("HEAD:{p}")]) + .current_dir(&work).output().unwrap(); + String::from_utf8_lossy(&o.stdout).trim().to_string() + }; + let secret = oid("secret/b.txt"); + let public = oid("public/a.txt"); + g(&["clone", "-q", "--bare", work.to_str().unwrap(), bare.to_str().unwrap()], td.path()); + + let mut withheld = std::collections::HashSet::new(); + withheld.insert(secret.clone()); + + let pack = build_filtered_pack(&bare, &withheld).unwrap(); + let ids = pack_object_ids(&pack); + assert!(ids.contains(&public), "public blob must be in the pack"); + assert!(!ids.contains(&secret), "secret blob must NOT be in the pack"); + } +``` + +- [ ] **Step 2: Run the test to verify it fails** + +Run: `cargo test -p gitlawb-node smart_http::tests::filtered_serve_excludes_withheld_blob` +Expected: FAIL to compile with "cannot find function `build_filtered_pack`". + +- [ ] **Step 3: Implement `build_filtered_pack` and `upload_pack_excluding`** + +Add to `smart_http.rs` (above the `#[cfg(test)]` block). `build_filtered_pack` is the deterministic core (unit-tested in Step 1); `upload_pack_excluding` frames it as an HTTP response using the markers recorded in Task 0 Findings: +```rust +use std::collections::HashSet; + +/// Build a packfile containing every object reachable from all refs EXCEPT the +/// given blob OIDs. Commits and trees are always included, so SHAs stay intact; +/// only the named blobs are dropped. +pub fn build_filtered_pack(repo_path: &Path, withheld: &HashSet) -> Result> { + // All reachable objects as "oid [path]" lines. + let rev = std::process::Command::new("git") + .args(["rev-list", "--objects", "--all"]) + .current_dir(repo_path) + .output()?; + if !rev.status.success() { + bail!("git rev-list failed: {}", String::from_utf8_lossy(&rev.stderr)); + } + let mut keep = Vec::new(); + for line in String::from_utf8_lossy(&rev.stdout).lines() { + let oid = line.split_whitespace().next().unwrap_or(""); + if oid.is_empty() || withheld.contains(oid) { + continue; + } + keep.push(oid.to_string()); + } + let mut child = std::process::Command::new("git") + .args(["pack-objects", "--stdout"]) + .current_dir(repo_path) + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn()?; + { + use std::io::Write as _; + let mut stdin = child.stdin.take().expect("stdin"); + stdin.write_all(keep.join("\n").as_bytes())?; + stdin.write_all(b"\n")?; + } + let out = child.wait_with_output()?; + if !out.status.success() { + bail!("git pack-objects failed: {}", String::from_utf8_lossy(&out.stderr)); + } + Ok(out.stdout) +} + +/// Serve a clone/fetch with the withheld blobs removed from the response pack. +/// Framing follows Task 0 Findings; the body wraps `build_filtered_pack` output +/// in the upload-pack `packfile` section with sideband-64k, terminated by flush. +pub async fn upload_pack_excluding( + repo_path: &Path, + _request_body: Bytes, + withheld: &HashSet, +) -> Result { + let pack = build_filtered_pack(repo_path, withheld)?; + let mut body = Vec::new(); + body.extend_from_slice(&pkt_line("packfile\n")); + // sideband-64k: band 1 carries pack data, chunked under the pkt-line limit. + for chunk in pack.chunks(65515) { + let mut framed = Vec::with_capacity(chunk.len() + 1); + framed.push(0x01); + framed.extend_from_slice(chunk); + let len = framed.len() + 4; + body.extend_from_slice(format!("{len:04x}").as_bytes()); + body.extend_from_slice(&framed); + } + body.extend_from_slice(b"0000"); + Ok(Response::builder() + .status(StatusCode::OK) + .header("Content-Type", "application/x-git-upload-pack-result") + .header("Cache-Control", "no-cache") + .body(Body::from(body))?) +} +``` +> If Task 0 recorded **Option A**, replace the two functions above with the injected-filter delegation to `git upload-pack`, keeping the `build_filtered_pack` name as a thin wrapper so the Step 1 test still drives the OID-exclusion guarantee. + +- [ ] **Step 4: Run the tests to verify they pass** + +Run: `cargo test -p gitlawb-node smart_http::tests::filtered_serve_excludes_withheld_blob` +Expected: PASS. + +- [ ] **Step 5: Commit** + +```bash +git add crates/gitlawb-node/src/git/smart_http.rs +git commit -m "feat(node): filtered upload-pack serve that omits withheld blobs" +``` + +--- + +## Task 3: Wire filtered serve into the upload-pack handler + +**Files:** +- Modify: `crates/gitlawb-node/src/api/repos.rs` (`git_upload_pack`, lines ~368-407) + +- [ ] **Step 1: Add the imports** + +At the top of `repos.rs`, in the existing `use crate::git::{...}` group, add `visibility_pack`: +```rust +use crate::git::{smart_http, store, visibility_pack}; +``` +(If `store` is not already in that group, keep whatever is there and append `visibility_pack`.) + +- [ ] **Step 2: Branch to the filtered serve** + +In `git_upload_pack`, the current body computes `rules`, runs the whole-repo `visibility_check(..., "/")` 404 gate, acquires `disk_path`, then calls `smart_http::upload_pack(&disk_path, body)`. Keep the 404 gate and the `acquire` exactly as they are. Replace only the single serve call: +```rust + let disk_path = state + .repo_store + .acquire(&record.owner_did, &record.name) + .await + .map_err(|e| AppError::Git(e.to_string()))?; + let body_len = body.len(); + + let withheld = + visibility_pack::withheld_blob_oids(&disk_path, &rules, record.is_public, &record.owner_did, caller) + .map_err(|e| AppError::Git(e.to_string()))?; + + let resp = if withheld.is_empty() { + smart_http::upload_pack(&disk_path, body).await + } else { + tracing::info!(repo = %name, caller = ?caller, withheld = withheld.len(), "serving filtered pack"); + smart_http::upload_pack_excluding(&disk_path, body, &withheld).await + } + .map_err(|e| { + let msg = e.to_string(); + if msg.contains("bad line length") || msg.contains("protocol error") { + tracing::warn!(repo = %name, err = %msg, "git-upload-pack: bad client request"); + AppError::BadRequest(msg) + } else { + tracing::error!(repo = %name, err = %msg, "git-upload-pack failed"); + AppError::Git(msg) + } + })?; +``` +Leave the `crate::metrics::record_fetch(...)` line and everything after it unchanged. + +- [ ] **Step 3: Verify the crate builds and existing tests pass** + +Run: `cargo test -p gitlawb-node` +Expected: PASS, including the Phase 1 whole-repo visibility tests (no regression). The new fast-path (`withheld.is_empty()`) must keep public and fully-authorized clones byte-identical to before. + +- [ ] **Step 4: Commit** + +```bash +git add crates/gitlawb-node/src/api/repos.rs +git commit -m "feat(node): serve filtered pack when caller has withheld subtree blobs" +``` + +--- + +## Task 4: End-to-end clone test through a real git client + +**Files:** +- Modify: `crates/gitlawb-node/src/git/smart_http.rs` (extend `mod tests`) + +This proves the served body is a clone a real `git` accepts and that the private bytes are absent from the resulting object store, which is the security guarantee. + +- [ ] **Step 1: Write the failing end-to-end test** + +Add to `smart_http.rs` `mod tests`: +```rust + #[tokio::test] + async fn client_clone_lacks_withheld_blob_bytes() { + use axum::body::to_bytes; + let td = TempDir::new().unwrap(); + let work = td.path().join("work"); + let bare = td.path().join("bare.git"); + let g = |args: &[&str], dir: &std::path::Path| { + assert!(Command::new("git").args(args).current_dir(dir).status().unwrap().success()); + }; + std::fs::create_dir_all(work.join("secret")).unwrap(); + std::fs::create_dir_all(work.join("public")).unwrap(); + std::fs::write(work.join("public/a.txt"), b"pub\n").unwrap(); + std::fs::write(work.join("secret/b.txt"), b"SECRET\n").unwrap(); + g(&["init", "-q"], &work); + g(&["config", "user.email", "t@t"], &work); + g(&["config", "user.name", "t"], &work); + g(&["add", "."], &work); + g(&["commit", "-qm", "init"], &work); + let secret_oid = { + let o = Command::new("git").args(["rev-parse", "HEAD:secret/b.txt"]) + .current_dir(&work).output().unwrap(); + String::from_utf8_lossy(&o.stdout).trim().to_string() + }; + g(&["clone", "-q", "--bare", work.to_str().unwrap(), bare.to_str().unwrap()], td.path()); + + let mut withheld = std::collections::HashSet::new(); + withheld.insert(secret_oid.clone()); + + let resp = upload_pack_excluding(&bare, Bytes::new(), &withheld).await.unwrap(); + let body = to_bytes(resp.into_body(), usize::MAX).await.unwrap(); + let ids = pack_object_ids(&extract_pack(&body)); + assert!(!ids.contains(&secret_oid), "withheld blob must be absent from served pack"); + } + + /// Strip the upload-pack `packfile` section framing, returning the raw pack. + /// Mirrors how a client de-frames the sideband-64k band-1 stream. + fn extract_pack(body: &[u8]) -> Vec { + let mut out = Vec::new(); + let mut i = 0; + while i + 4 <= body.len() { + let len = usize::from_str_radix( + std::str::from_utf8(&body[i..i + 4]).unwrap_or("0000"), + 16, + ) + .unwrap_or(0); + if len == 0 { + i += 4; + continue; + } + let chunk = &body[i + 4..i + len]; + // band 1 = pack data; skip "packfile\n" control line and other bands. + if chunk.first() == Some(&0x01) { + out.extend_from_slice(&chunk[1..]); + } + i += len; + } + out + } +``` +> If Task 0 chose Option A (delegated framing), `extract_pack` may need adjusting to the exact bands git emits; use the trace from Task 0 Step 3 to confirm. + +- [ ] **Step 2: Run the test to verify it fails (then passes once framing is right)** + +Run: `cargo test -p gitlawb-node smart_http::tests::client_clone_lacks_withheld_blob_bytes` +Expected: initially may FAIL if framing constants are off; iterate `extract_pack` / framing against Task 0 findings until PASS. Success criterion: the withheld OID is absent from the served pack. + +- [ ] **Step 3: Commit** + +```bash +git add crates/gitlawb-node/src/git/smart_http.rs +git commit -m "test(node): end-to-end assert served pack omits withheld blob" +``` + +--- + +## Task 5: Confirm `info/refs` does not leak and stays consistent + +**Files:** +- Modify: `crates/gitlawb-node/src/api/repos.rs` (no logic change to `git_info_refs`; add a confirming comment only if needed) + +The ref advertisement lists commit tips, not blob content, so a mode-B subtree does not require hiding any ref: a non-reader still clones the same commits, just without the private blobs. This task records that decision so a future reader does not "fix" it by gating `info/refs` on subtree rules. + +- [ ] **Step 1: Add a clarifying comment** + +In `git_info_refs`, next to the existing whole-repo gate (the `if service == "git-upload-pack"` block around line 330), append one line after the existing comment: +```rust + // Subtree (mode B) rules do not gate the advertisement: refs expose commit + // tips only, and blob withholding happens in the upload-pack pack build. +``` + +- [ ] **Step 2: Verify nothing else changed** + +Run: `git diff crates/gitlawb-node/src/api/repos.rs` +Expected: only the one comment line added in `git_info_refs`; the whole-repo 404 gate is untouched. + +- [ ] **Step 3: Commit** + +```bash +git add crates/gitlawb-node/src/api/repos.rs +git commit -m "docs(node): note why info/refs is not gated on subtree visibility" +``` + +--- + +## Task 6: Full verification gate + +**Files:** none (verification only) + +- [ ] **Step 1: Format** + +Run: `cargo fmt --all && cargo fmt --all --check` +Expected: clean (no diff). + +- [ ] **Step 2: Lint** + +Run: `cargo clippy --all-targets -- -D warnings` +Expected: no warnings. + +- [ ] **Step 3: Full test suite** + +Run: `cargo test -p gitlawb-node` +Expected: all pass, including Phase 1 visibility tests and the new `visibility_pack` and `smart_http` tests. + +- [ ] **Step 4: Manual smoke (optional but recommended)** + +Set a subtree rule on a local repo via `gl visibility`, clone as a non-reader through the node, and confirm the private file's bytes are absent (`git cat-file -p HEAD:secret/b.txt` fails or the file is missing) while the tree entry / SHA is still listed (`git ls-tree HEAD secret/`). + +--- + +## Out of scope / follow-ups (separate plans) + +1. **`git-remote-gitlawb` partial-clone UX.** Make a non-reader's clone produce a clean partial checkout rather than a checkout error on the missing blob: the helper requests partial-clone semantics and treats withheld blobs as deliberately absent. Without this, a stock `git clone` of a repo with a withheld blob succeeds at fetch but errors at checkout. The security guarantee (bytes never sent) holds regardless; this is purely UX. +2. **Filtered-pack caching.** `build_filtered_pack` recomputes per request. If hot, cache by (repo, tip-OIDs, withheld-set) and invalidate on push. +3. **Incremental fetch (`have` lines).** This plan targets the clone case. Confirm and, if needed, harden the filtered serve for fetches that send `have` lines so withheld blobs are never sent incrementally either. +4. **Replication-path enforcement (Phase 2).** Still blocked on the maintainer A/B decision; unrelated to this HTTP-path work. +``` From 8ab6de9bb2aa905ec127dc308c9e261422234ab1 Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Sat, 6 Jun 2026 16:00:43 -0500 Subject: [PATCH 02/19] feat(node): resolve withheld blob OIDs for path-scoped visibility --- crates/gitlawb-node/src/git/mod.rs | 1 + .../gitlawb-node/src/git/visibility_pack.rs | 191 ++++++++++++++++++ 2 files changed, 192 insertions(+) create mode 100644 crates/gitlawb-node/src/git/visibility_pack.rs diff --git a/crates/gitlawb-node/src/git/mod.rs b/crates/gitlawb-node/src/git/mod.rs index 4dcd233..49259d5 100644 --- a/crates/gitlawb-node/src/git/mod.rs +++ b/crates/gitlawb-node/src/git/mod.rs @@ -3,3 +3,4 @@ pub mod repo_store; pub mod smart_http; pub mod store; pub mod tigris; +pub mod visibility_pack; diff --git a/crates/gitlawb-node/src/git/visibility_pack.rs b/crates/gitlawb-node/src/git/visibility_pack.rs new file mode 100644 index 0000000..bf3c45f --- /dev/null +++ b/crates/gitlawb-node/src/git/visibility_pack.rs @@ -0,0 +1,191 @@ +//! Resolve which blob OIDs must be withheld from a caller because every path +//! at which the blob appears is denied by the repo's visibility rules. Trees +//! and commits are never withheld (mode B keeps SHAs intact); only blob +//! content is held back. + +use crate::db::VisibilityRule; +use crate::git::store; +use crate::visibility::{visibility_check, Decision}; +use anyhow::{Context, Result}; +use std::collections::HashSet; +use std::path::Path; + +#[allow(dead_code)] +/// List every (blob_oid, "/repo/relative/path") pair reachable from any branch +/// ref in `repo_path`. Uses `git ls-tree -r` per ref so each path a blob lives +/// at is represented (the same blob content can appear at several paths). Paths +/// are returned with a leading "/" to match the glob form used by visibility +/// rules ("/secret/**"). +fn blob_paths(repo_path: &Path) -> Result> { + let refs = store::list_refs(repo_path).context("list_refs failed")?; + let mut out = Vec::new(); + for (refname, _oid) in refs { + if !refname.starts_with("refs/heads/") && !refname.starts_with("refs/tags/") { + continue; + } + let listing = std::process::Command::new("git") + .args(["ls-tree", "-r", &refname]) + .current_dir(repo_path) + .output() + .context("git ls-tree -r failed")?; + if !listing.status.success() { + continue; + } + for line in String::from_utf8_lossy(&listing.stdout).lines() { + // " blob \t" + let Some((meta, path)) = line.split_once('\t') else { + continue; + }; + let mut parts = meta.split_whitespace(); + let _mode = parts.next(); + let kind = parts.next(); + let oid = parts.next(); + if kind == Some("blob") { + if let Some(oid) = oid { + out.push((oid.to_string(), format!("/{path}"))); + } + } + } + } + Ok(out) +} + +#[allow(dead_code)] +/// Blob OIDs the caller may not read. A blob is withheld only if visibility +/// denies the caller at *every* path the blob appears at; a blob that is also +/// reachable through an allowed path is sent (its content is public elsewhere). +/// +/// The whole-repo "/" gate is handled by the caller before this function runs: +/// if "/" denies, the caller gets a 404 and never reaches the filtered serve. +pub fn withheld_blob_oids( + repo_path: &Path, + rules: &[VisibilityRule], + is_public: bool, + owner_did: &str, + caller: Option<&str>, +) -> Result> { + let mut denied: HashSet = HashSet::new(); + let mut allowed: HashSet = HashSet::new(); + for (oid, path) in blob_paths(repo_path)? { + match visibility_check(rules, is_public, owner_did, caller, &path) { + Decision::Deny => { + denied.insert(oid); + } + Decision::Allow => { + allowed.insert(oid); + } + } + } + Ok(denied.difference(&allowed).cloned().collect()) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::db::VisibilityMode; + use chrono::Utc; + use std::process::Command; + use tempfile::TempDir; + + fn rule(path_glob: &str, readers: &[&str]) -> VisibilityRule { + VisibilityRule { + id: "x".into(), + repo_id: "r1".into(), + path_glob: path_glob.into(), + mode: VisibilityMode::B, + reader_dids: readers.iter().map(|s| s.to_string()).collect(), + created_by: "did:key:zOwner".into(), + created_at: Utc::now(), + } + } + + const OWNER: &str = "did:key:zOwner"; + + /// Build a bare repo with public/a.txt and secret/b.txt at one commit. + /// Returns (tempdir, bare_path, secret_blob_oid, public_blob_oid). + fn fixture() -> (TempDir, std::path::PathBuf, String, String) { + let td = TempDir::new().unwrap(); + let work = td.path().join("work"); + let bare = td.path().join("bare.git"); + let run = |args: &[&str], dir: &Path| { + let ok = Command::new("git") + .args(args) + .current_dir(dir) + .status() + .unwrap() + .success(); + assert!(ok, "git {args:?} failed"); + }; + std::fs::create_dir_all(work.join("public")).unwrap(); + std::fs::create_dir_all(work.join("secret")).unwrap(); + std::fs::write(work.join("public/a.txt"), b"public bytes\n").unwrap(); + std::fs::write(work.join("secret/b.txt"), b"TOP SECRET\n").unwrap(); + run(&["init", "-q"], &work); + run(&["config", "user.email", "t@t"], &work); + run(&["config", "user.name", "t"], &work); + run(&["add", "."], &work); + run(&["commit", "-qm", "init"], &work); + let oid = |path: &str| { + let out = Command::new("git") + .args(["rev-parse", &format!("HEAD:{path}")]) + .current_dir(&work) + .output() + .unwrap(); + String::from_utf8_lossy(&out.stdout).trim().to_string() + }; + let secret = oid("secret/b.txt"); + let public = oid("public/a.txt"); + run( + &[ + "clone", + "-q", + "--bare", + work.to_str().unwrap(), + bare.to_str().unwrap(), + ], + td.path(), + ); + (td, bare, secret, public) + } + + #[test] + fn non_reader_withholds_only_the_private_blob() { + let (_td, bare, secret, public) = fixture(); + let rules = [rule("/secret/**", &["did:key:zFriend"])]; + let withheld = + withheld_blob_oids(&bare, &rules, true, OWNER, Some("did:key:zStranger")).unwrap(); + assert!(withheld.contains(&secret), "secret blob must be withheld"); + assert!( + !withheld.contains(&public), + "public blob must NOT be withheld" + ); + } + + #[test] + fn owner_withholds_nothing() { + let (_td, bare, secret, public) = fixture(); + let rules = [rule("/secret/**", &["did:key:zFriend"])]; + let withheld = withheld_blob_oids(&bare, &rules, true, OWNER, Some(OWNER)).unwrap(); + assert!(withheld.is_empty(), "owner sees everything"); + let _ = (secret, public); + } + + #[test] + fn listed_reader_withholds_nothing() { + let (_td, bare, _secret, _public) = fixture(); + let rules = [rule("/secret/**", &["did:key:zFriend"])]; + let withheld = + withheld_blob_oids(&bare, &rules, true, OWNER, Some("did:key:zFriend")).unwrap(); + assert!(withheld.is_empty(), "listed reader sees the subtree"); + } + + #[test] + fn no_subtree_rules_withholds_nothing() { + let (_td, bare, _secret, _public) = fixture(); + let withheld = withheld_blob_oids(&bare, &[], true, OWNER, None).unwrap(); + assert!( + withheld.is_empty(), + "public repo, no rules, nothing withheld" + ); + } +} From 00179836996774d2d2068a27240d2cdbca625a57 Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Sat, 6 Jun 2026 16:03:12 -0500 Subject: [PATCH 03/19] feat(node): filtered upload-pack serve that omits withheld blobs --- crates/gitlawb-node/src/git/smart_http.rs | 166 ++++++++++++++++++++++ 1 file changed, 166 insertions(+) diff --git a/crates/gitlawb-node/src/git/smart_http.rs b/crates/gitlawb-node/src/git/smart_http.rs index 6a00107..e39b747 100644 --- a/crates/gitlawb-node/src/git/smart_http.rs +++ b/crates/gitlawb-node/src/git/smart_http.rs @@ -3,6 +3,7 @@ use axum::body::Body; use axum::http::StatusCode; use axum::response::Response; use bytes::Bytes; +use std::collections::HashSet; use std::path::Path; use std::process::Stdio; use tokio::io::AsyncWriteExt; @@ -120,3 +121,168 @@ fn pkt_line(data: &str) -> Vec { let len = data.len() + 4; format!("{len:04x}{data}").into_bytes() } + +/// Build a packfile containing every object reachable from all refs EXCEPT the +/// given blob OIDs. Commits and trees are always included, so SHAs stay intact; +/// only the named blobs are dropped. +// #[allow(dead_code)] removed when wired into the upload-pack handler in the next task. +#[allow(dead_code)] +pub fn build_filtered_pack(repo_path: &Path, withheld: &HashSet) -> Result> { + // All reachable objects as "oid [path]" lines. + let rev = std::process::Command::new("git") + .args(["rev-list", "--objects", "--all"]) + .current_dir(repo_path) + .output()?; + if !rev.status.success() { + bail!( + "git rev-list failed: {}", + String::from_utf8_lossy(&rev.stderr) + ); + } + let mut keep = Vec::new(); + for line in String::from_utf8_lossy(&rev.stdout).lines() { + let oid = line.split_whitespace().next().unwrap_or(""); + if oid.is_empty() || withheld.contains(oid) { + continue; + } + keep.push(oid.to_string()); + } + let mut child = std::process::Command::new("git") + .args(["pack-objects", "--stdout"]) + .current_dir(repo_path) + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn()?; + { + use std::io::Write as _; + let mut stdin = child.stdin.take().expect("stdin"); + stdin.write_all(keep.join("\n").as_bytes())?; + stdin.write_all(b"\n")?; + } + let out = child.wait_with_output()?; + if !out.status.success() { + bail!( + "git pack-objects failed: {}", + String::from_utf8_lossy(&out.stderr) + ); + } + Ok(out.stdout) +} + +/// Serve a clone/fetch with the withheld blobs removed from the response pack. +/// Framing: the body wraps `build_filtered_pack` output in the upload-pack +/// `packfile` section with sideband-64k band 1, terminated by flush. +// #[allow(dead_code)] removed when wired into the upload-pack handler in the next task. +#[allow(dead_code)] +pub async fn upload_pack_excluding( + repo_path: &Path, + _request_body: Bytes, + withheld: &HashSet, +) -> Result { + let pack = build_filtered_pack(repo_path, withheld)?; + let mut body = Vec::new(); + body.extend_from_slice(&pkt_line("packfile\n")); + // sideband-64k: band 1 carries pack data, chunked under the pkt-line limit. + for chunk in pack.chunks(65515) { + let mut framed = Vec::with_capacity(chunk.len() + 1); + framed.push(0x01); + framed.extend_from_slice(chunk); + let len = framed.len() + 4; + body.extend_from_slice(format!("{len:04x}").as_bytes()); + body.extend_from_slice(&framed); + } + body.extend_from_slice(b"0000"); + Ok(Response::builder() + .status(StatusCode::OK) + .header("Content-Type", "application/x-git-upload-pack-result") + .header("Cache-Control", "no-cache") + .body(Body::from(body))?) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::process::Command; + use tempfile::TempDir; + + /// List OIDs in a pack by writing it to a temp dir and running verify-pack. + pub(super) fn pack_object_ids(pack: &[u8]) -> std::collections::HashSet { + let dir = TempDir::new().unwrap(); + let path = dir.path().join("test.pack"); + std::fs::write(&path, pack).unwrap(); + // index-pack creates the matching .idx next to the pack. + let ok = Command::new("git") + .args(["index-pack", path.to_str().unwrap()]) + .status() + .unwrap() + .success(); + assert!(ok, "index-pack failed"); + let out = Command::new("git") + .args(["verify-pack", "-v", path.to_str().unwrap()]) + .output() + .unwrap(); + String::from_utf8_lossy(&out.stdout) + .lines() + .filter_map(|l| l.split_whitespace().next()) + .filter(|t| t.len() == 40 && t.chars().all(|c| c.is_ascii_hexdigit())) + .map(|s| s.to_string()) + .collect() + } + + #[tokio::test] + async fn filtered_serve_excludes_withheld_blob() { + // Build a bare repo, capture the secret + public blob OIDs. + let td = TempDir::new().unwrap(); + let work = td.path().join("work"); + let bare = td.path().join("bare.git"); + let g = |args: &[&str], dir: &std::path::Path| { + assert!(Command::new("git") + .args(args) + .current_dir(dir) + .status() + .unwrap() + .success()); + }; + std::fs::create_dir_all(work.join("secret")).unwrap(); + std::fs::create_dir_all(work.join("public")).unwrap(); + std::fs::write(work.join("public/a.txt"), b"pub\n").unwrap(); + std::fs::write(work.join("secret/b.txt"), b"SECRET\n").unwrap(); + g(&["init", "-q"], &work); + g(&["config", "user.email", "t@t"], &work); + g(&["config", "user.name", "t"], &work); + g(&["add", "."], &work); + g(&["commit", "-qm", "init"], &work); + let oid = |p: &str| { + let o = Command::new("git") + .args(["rev-parse", &format!("HEAD:{p}")]) + .current_dir(&work) + .output() + .unwrap(); + String::from_utf8_lossy(&o.stdout).trim().to_string() + }; + let secret = oid("secret/b.txt"); + let public = oid("public/a.txt"); + g( + &[ + "clone", + "-q", + "--bare", + work.to_str().unwrap(), + bare.to_str().unwrap(), + ], + td.path(), + ); + + let mut withheld = std::collections::HashSet::new(); + withheld.insert(secret.clone()); + + let pack = build_filtered_pack(&bare, &withheld).unwrap(); + let ids = pack_object_ids(&pack); + assert!(ids.contains(&public), "public blob must be in the pack"); + assert!( + !ids.contains(&secret), + "secret blob must NOT be in the pack" + ); + } +} From e292b79fc0014aa849332129559267ffc2f5ea26 Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Sat, 6 Jun 2026 16:05:24 -0500 Subject: [PATCH 04/19] feat(node): serve filtered pack when caller has withheld subtree blobs --- crates/gitlawb-node/src/api/repos.rs | 40 +++++++++++++------ crates/gitlawb-node/src/git/smart_http.rs | 4 -- .../gitlawb-node/src/git/visibility_pack.rs | 2 - 3 files changed, 27 insertions(+), 19 deletions(-) diff --git a/crates/gitlawb-node/src/api/repos.rs b/crates/gitlawb-node/src/api/repos.rs index 0993d4b..fa9810a 100644 --- a/crates/gitlawb-node/src/api/repos.rs +++ b/crates/gitlawb-node/src/api/repos.rs @@ -12,7 +12,7 @@ use uuid::Uuid; use crate::cert; use crate::error::{AppError, Result}; -use crate::git::{smart_http, store}; +use crate::git::{smart_http, store, visibility_pack}; use crate::state::AppState; use crate::visibility::{visibility_check, Decision}; use crate::webhooks; @@ -392,18 +392,32 @@ pub async fn git_upload_pack( .await .map_err(|e| AppError::Git(e.to_string()))?; let body_len = body.len(); - let resp = smart_http::upload_pack(&disk_path, body) - .await - .map_err(|e| { - let msg = e.to_string(); - if msg.contains("bad line length") || msg.contains("protocol error") { - tracing::warn!(repo = %name, err = %msg, "git-upload-pack: bad client request"); - AppError::BadRequest(msg) - } else { - tracing::error!(repo = %name, err = %msg, "git-upload-pack failed"); - AppError::Git(msg) - } - })?; + + let withheld = visibility_pack::withheld_blob_oids( + &disk_path, + &rules, + record.is_public, + &record.owner_did, + caller, + ) + .map_err(|e| AppError::Git(e.to_string()))?; + + let resp = if withheld.is_empty() { + smart_http::upload_pack(&disk_path, body).await + } else { + tracing::info!(repo = %name, caller = ?caller, withheld = withheld.len(), "serving filtered pack"); + smart_http::upload_pack_excluding(&disk_path, body, &withheld).await + } + .map_err(|e| { + let msg = e.to_string(); + if msg.contains("bad line length") || msg.contains("protocol error") { + tracing::warn!(repo = %name, err = %msg, "git-upload-pack: bad client request"); + AppError::BadRequest(msg) + } else { + tracing::error!(repo = %name, err = %msg, "git-upload-pack failed"); + AppError::Git(msg) + } + })?; crate::metrics::record_fetch(&format!("{owner}/{name}")); crate::metrics::observe_pack_size(body_len as f64); Ok(resp) diff --git a/crates/gitlawb-node/src/git/smart_http.rs b/crates/gitlawb-node/src/git/smart_http.rs index e39b747..a2ac294 100644 --- a/crates/gitlawb-node/src/git/smart_http.rs +++ b/crates/gitlawb-node/src/git/smart_http.rs @@ -125,8 +125,6 @@ fn pkt_line(data: &str) -> Vec { /// Build a packfile containing every object reachable from all refs EXCEPT the /// given blob OIDs. Commits and trees are always included, so SHAs stay intact; /// only the named blobs are dropped. -// #[allow(dead_code)] removed when wired into the upload-pack handler in the next task. -#[allow(dead_code)] pub fn build_filtered_pack(repo_path: &Path, withheld: &HashSet) -> Result> { // All reachable objects as "oid [path]" lines. let rev = std::process::Command::new("git") @@ -173,8 +171,6 @@ pub fn build_filtered_pack(repo_path: &Path, withheld: &HashSet) -> Resu /// Serve a clone/fetch with the withheld blobs removed from the response pack. /// Framing: the body wraps `build_filtered_pack` output in the upload-pack /// `packfile` section with sideband-64k band 1, terminated by flush. -// #[allow(dead_code)] removed when wired into the upload-pack handler in the next task. -#[allow(dead_code)] pub async fn upload_pack_excluding( repo_path: &Path, _request_body: Bytes, diff --git a/crates/gitlawb-node/src/git/visibility_pack.rs b/crates/gitlawb-node/src/git/visibility_pack.rs index bf3c45f..d386415 100644 --- a/crates/gitlawb-node/src/git/visibility_pack.rs +++ b/crates/gitlawb-node/src/git/visibility_pack.rs @@ -10,7 +10,6 @@ use anyhow::{Context, Result}; use std::collections::HashSet; use std::path::Path; -#[allow(dead_code)] /// List every (blob_oid, "/repo/relative/path") pair reachable from any branch /// ref in `repo_path`. Uses `git ls-tree -r` per ref so each path a blob lives /// at is represented (the same blob content can appear at several paths). Paths @@ -50,7 +49,6 @@ fn blob_paths(repo_path: &Path) -> Result> { Ok(out) } -#[allow(dead_code)] /// Blob OIDs the caller may not read. A blob is withheld only if visibility /// denies the caller at *every* path the blob appears at; a blob that is also /// reachable through an allowed path is sent (its content is public elsewhere). From 1474744c0ac026e9fbc88612cb2736d3b3a9406f Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Sat, 6 Jun 2026 16:07:52 -0500 Subject: [PATCH 05/19] test(node): end-to-end assert served pack omits withheld blob --- crates/gitlawb-node/src/git/smart_http.rs | 79 +++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/crates/gitlawb-node/src/git/smart_http.rs b/crates/gitlawb-node/src/git/smart_http.rs index a2ac294..1a59886 100644 --- a/crates/gitlawb-node/src/git/smart_http.rs +++ b/crates/gitlawb-node/src/git/smart_http.rs @@ -281,4 +281,83 @@ mod tests { "secret blob must NOT be in the pack" ); } + + #[tokio::test] + async fn client_clone_lacks_withheld_blob_bytes() { + use axum::body::to_bytes; + let td = TempDir::new().unwrap(); + let work = td.path().join("work"); + let bare = td.path().join("bare.git"); + let g = |args: &[&str], dir: &std::path::Path| { + assert!(Command::new("git") + .args(args) + .current_dir(dir) + .status() + .unwrap() + .success()); + }; + std::fs::create_dir_all(work.join("secret")).unwrap(); + std::fs::create_dir_all(work.join("public")).unwrap(); + std::fs::write(work.join("public/a.txt"), b"pub\n").unwrap(); + std::fs::write(work.join("secret/b.txt"), b"SECRET\n").unwrap(); + g(&["init", "-q"], &work); + g(&["config", "user.email", "t@t"], &work); + g(&["config", "user.name", "t"], &work); + g(&["add", "."], &work); + g(&["commit", "-qm", "init"], &work); + let secret_oid = { + let o = Command::new("git") + .args(["rev-parse", "HEAD:secret/b.txt"]) + .current_dir(&work) + .output() + .unwrap(); + String::from_utf8_lossy(&o.stdout).trim().to_string() + }; + g( + &[ + "clone", + "-q", + "--bare", + work.to_str().unwrap(), + bare.to_str().unwrap(), + ], + td.path(), + ); + + let mut withheld = std::collections::HashSet::new(); + withheld.insert(secret_oid.clone()); + + let resp = upload_pack_excluding(&bare, Bytes::new(), &withheld) + .await + .unwrap(); + let body = to_bytes(resp.into_body(), usize::MAX).await.unwrap(); + let ids = pack_object_ids(&extract_pack(&body)); + assert!( + !ids.contains(&secret_oid), + "withheld blob must be absent from served pack" + ); + } + + /// Strip the upload-pack `packfile` section framing, returning the raw pack. + /// Mirrors how a client de-frames the sideband-64k band-1 stream. + fn extract_pack(body: &[u8]) -> Vec { + let mut out = Vec::new(); + let mut i = 0; + while i + 4 <= body.len() { + let len = + usize::from_str_radix(std::str::from_utf8(&body[i..i + 4]).unwrap_or("0000"), 16) + .unwrap_or(0); + if len == 0 { + i += 4; + continue; + } + let chunk = &body[i + 4..i + len]; + // band 1 = pack data; skip "packfile\n" control line and other bands. + if chunk.first() == Some(&0x01) { + out.extend_from_slice(&chunk[1..]); + } + i += len; + } + out + } } From 694fddb5622f7811bbf0a80a81dea93f09eb4a51 Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Sat, 6 Jun 2026 16:08:14 -0500 Subject: [PATCH 06/19] docs(node): note why info/refs is not gated on subtree visibility --- crates/gitlawb-node/src/api/repos.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crates/gitlawb-node/src/api/repos.rs b/crates/gitlawb-node/src/api/repos.rs index fa9810a..ca5ed05 100644 --- a/crates/gitlawb-node/src/api/repos.rs +++ b/crates/gitlawb-node/src/api/repos.rs @@ -330,6 +330,8 @@ pub async fn git_info_refs( if service == "git-upload-pack" { let rules = state.db.list_visibility_rules(&record.id).await?; let caller = auth.as_ref().map(|e| e.0 .0.as_str()); + // Subtree (mode B) rules do not gate the advertisement: refs expose commit + // tips only, and blob withholding happens in the upload-pack pack build. if visibility_check(&rules, record.is_public, &record.owner_did, caller, "/") == Decision::Deny { From 9413e641e83203215ef280bd1621a6cecc93a6ac Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Sat, 6 Jun 2026 16:27:29 -0500 Subject: [PATCH 07/19] fix(node): frame filtered serve as protocol v0 and prove it with a real clone upload_pack_excluding emitted a v2 packfile section, but info_refs advertises v0, so real clients negotiated v0 and rejected the response with 'expected ACK/NAK, got packfile'. Frame the v0 stateless-rpc shape instead (NAK, then the pack via side-band-64k when offered). Add an end-to-end test that stands up info_refs + upload_pack_excluding and runs a real git partial clone, asserting the withheld blob's bytes never reach the client while its tree entry and SHA stay visible. A stock full clone cannot consume the pack (it is not closed under reachability, so fetch fails the connectivity check); a partial clone is required. --- crates/gitlawb-node/src/git/smart_http.rs | 235 ++++++++++++++++++++-- 1 file changed, 214 insertions(+), 21 deletions(-) diff --git a/crates/gitlawb-node/src/git/smart_http.rs b/crates/gitlawb-node/src/git/smart_http.rs index 1a59886..c92cb79 100644 --- a/crates/gitlawb-node/src/git/smart_http.rs +++ b/crates/gitlawb-node/src/git/smart_http.rs @@ -169,26 +169,49 @@ pub fn build_filtered_pack(repo_path: &Path, withheld: &HashSet) -> Resu } /// Serve a clone/fetch with the withheld blobs removed from the response pack. -/// Framing: the body wraps `build_filtered_pack` output in the upload-pack -/// `packfile` section with sideband-64k band 1, terminated by flush. +/// +/// The framing is git protocol v0 (`NAK` then the pack), matching the v0 ref +/// advertisement that `info_refs` emits (it runs `git upload-pack +/// --advertise-refs` without `GIT_PROTOCOL=version=2`, so clients negotiate v0). +/// If `info_refs` ever advertises v2, this serve path must learn v2 framing too. +/// +/// Because the pack deliberately omits blobs that the sent trees still +/// reference, the pack is not closed under reachability. A stock full clone +/// rejects it at fetch time ("remote did not send all necessary objects"); only +/// a partial clone (the client passes `--filter`, marking a promisor remote) +/// accepts the pack with the private blobs absent. Tree and commit SHAs stay +/// intact either way. The clean partial-clone client UX is a separate follow-up +/// (git-remote-gitlawb); the security guarantee (private bytes never leave the +/// node) holds regardless of client. pub async fn upload_pack_excluding( repo_path: &Path, - _request_body: Bytes, + request_body: Bytes, withheld: &HashSet, ) -> Result { let pack = build_filtered_pack(repo_path, withheld)?; + + // The client lists its capabilities on the first `want` line. Honor + // side-band-64k when offered (every modern smart-HTTP client offers it); + // otherwise stream the raw pack after NAK. + let sideband = memmem(&request_body, b"side-band-64k"); + let mut body = Vec::new(); - body.extend_from_slice(&pkt_line("packfile\n")); - // sideband-64k: band 1 carries pack data, chunked under the pkt-line limit. - for chunk in pack.chunks(65515) { - let mut framed = Vec::with_capacity(chunk.len() + 1); - framed.push(0x01); - framed.extend_from_slice(chunk); - let len = framed.len() + 4; - body.extend_from_slice(format!("{len:04x}").as_bytes()); - body.extend_from_slice(&framed); + body.extend_from_slice(&pkt_line("NAK\n")); + if sideband { + // Band 1 carries pack data, chunked under the pkt-line size limit. + for chunk in pack.chunks(65515) { + let mut framed = Vec::with_capacity(chunk.len() + 1); + framed.push(0x01); + framed.extend_from_slice(chunk); + let len = framed.len() + 4; + body.extend_from_slice(format!("{len:04x}").as_bytes()); + body.extend_from_slice(&framed); + } + body.extend_from_slice(b"0000"); + } else { + body.extend_from_slice(&pack); } - body.extend_from_slice(b"0000"); + Ok(Response::builder() .status(StatusCode::OK) .header("Content-Type", "application/x-git-upload-pack-result") @@ -196,6 +219,17 @@ pub async fn upload_pack_excluding( .body(Body::from(body))?) } +/// True if `needle` occurs anywhere in `haystack`. Small substring scan used to +/// detect a client capability token in the upload-pack request body. +fn memmem(haystack: &[u8], needle: &[u8]) -> bool { + if needle.is_empty() || haystack.len() < needle.len() { + return needle.is_empty(); + } + haystack + .windows(needle.len()) + .any(|window| window == needle) +} + #[cfg(test)] mod tests { use super::*; @@ -305,14 +339,16 @@ mod tests { g(&["config", "user.name", "t"], &work); g(&["add", "."], &work); g(&["commit", "-qm", "init"], &work); - let secret_oid = { + let oid = |p: &str| { let o = Command::new("git") - .args(["rev-parse", "HEAD:secret/b.txt"]) + .args(["rev-parse", &format!("HEAD:{p}")]) .current_dir(&work) .output() .unwrap(); String::from_utf8_lossy(&o.stdout).trim().to_string() }; + let secret_oid = oid("secret/b.txt"); + let public_oid = oid("public/a.txt"); g( &[ "clone", @@ -327,19 +363,27 @@ mod tests { let mut withheld = std::collections::HashSet::new(); withheld.insert(secret_oid.clone()); - let resp = upload_pack_excluding(&bare, Bytes::new(), &withheld) - .await - .unwrap(); + // A realistic v0 request advertises side-band-64k, so the serve frames + // the pack in band 1 (the path real clients exercise). + let req = Bytes::from_static( + b"0098want 0000000000000000000000000000000000000000 \ + side-band-64k ofs-delta agent=git/2\n00000009done\n", + ); + let resp = upload_pack_excluding(&bare, req, &withheld).await.unwrap(); let body = to_bytes(resp.into_body(), usize::MAX).await.unwrap(); let ids = pack_object_ids(&extract_pack(&body)); + assert!( + ids.contains(&public_oid), + "public blob must be present in served pack" + ); assert!( !ids.contains(&secret_oid), "withheld blob must be absent from served pack" ); } - /// Strip the upload-pack `packfile` section framing, returning the raw pack. - /// Mirrors how a client de-frames the sideband-64k band-1 stream. + /// Strip the v0 upload-pack framing (NAK line + sideband-64k bands), + /// returning the raw pack. Mirrors how a client de-frames the band-1 stream. fn extract_pack(body: &[u8]) -> Vec { let mut out = Vec::new(); let mut i = 0; @@ -352,7 +396,7 @@ mod tests { continue; } let chunk = &body[i + 4..i + len]; - // band 1 = pack data; skip "packfile\n" control line and other bands. + // band 1 = pack data; skip the NAK line and any other bands. if chunk.first() == Some(&0x01) { out.extend_from_slice(&chunk[1..]); } @@ -360,4 +404,153 @@ mod tests { } out } + + /// End-to-end: a real `git` client clones through `info_refs` + + /// `upload_pack_excluding` and ends up without the withheld blob's bytes + /// while still seeing its tree entry (SHA). Uses a partial clone + /// (`--filter`) because a pack that omits a referenced blob is only + /// accepted by a promisor-aware client; a stock full clone is refused at + /// fetch time by the connectivity check. + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] + async fn real_git_partial_clone_omits_withheld_blob() { + use axum::extract::{Query, State}; + use axum::routing::{get, post}; + use axum::Router; + use std::collections::HashMap; + use std::sync::Arc; + + let td = TempDir::new().unwrap(); + let work = td.path().join("work"); + let bare = td.path().join("bare.git"); + let g = |args: &[&str], dir: &std::path::Path| { + assert!(Command::new("git") + .args(args) + .current_dir(dir) + .status() + .unwrap() + .success()); + }; + std::fs::create_dir_all(work.join("secret")).unwrap(); + std::fs::create_dir_all(work.join("public")).unwrap(); + std::fs::write(work.join("public/a.txt"), b"pub\n").unwrap(); + std::fs::write(work.join("secret/b.txt"), b"SECRET\n").unwrap(); + g(&["init", "-q"], &work); + g(&["config", "user.email", "t@t"], &work); + g(&["config", "user.name", "t"], &work); + g(&["add", "."], &work); + g(&["commit", "-qm", "init"], &work); + let oid = |p: &str| { + let o = Command::new("git") + .args(["rev-parse", &format!("HEAD:{p}")]) + .current_dir(&work) + .output() + .unwrap(); + String::from_utf8_lossy(&o.stdout).trim().to_string() + }; + let secret_oid = oid("secret/b.txt"); + let public_oid = oid("public/a.txt"); + g( + &[ + "clone", + "-q", + "--bare", + work.to_str().unwrap(), + bare.to_str().unwrap(), + ], + td.path(), + ); + + #[derive(Clone)] + struct St { + repo: std::path::PathBuf, + withheld: HashSet, + } + let state = Arc::new(St { + repo: bare.clone(), + withheld: HashSet::from([secret_oid.clone()]), + }); + + async fn refs( + State(st): State>, + Query(q): Query>, + ) -> Response { + let service = q.get("service").cloned().unwrap_or_default(); + info_refs(&st.repo, &service).await.unwrap() + } + async fn pack(State(st): State>, body: Bytes) -> Response { + upload_pack_excluding(&st.repo, body, &st.withheld) + .await + .unwrap() + } + + let app = Router::new() + .route("/repo.git/info/refs", get(refs)) + .route("/repo.git/git-upload-pack", post(pack)) + .with_state(state); + + let listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap(); + let port = listener.local_addr().unwrap().port(); + let server = tokio::spawn(async move { + axum::serve(listener, app).await.unwrap(); + }); + + let dest = td.path().join("clone"); + let url = format!("http://127.0.0.1:{port}/repo.git"); + let dest_s = dest.to_str().unwrap().to_string(); + let out = tokio::task::spawn_blocking(move || { + Command::new("git") + .args([ + "-c", + "protocol.version=2", + "clone", + "--filter=blob:none", + "--no-checkout", + "-q", + &url, + &dest_s, + ]) + .output() + .unwrap() + }) + .await + .unwrap(); + + assert!( + out.status.success(), + "clone failed: {}", + String::from_utf8_lossy(&out.stderr) + ); + + // Enumerate exactly the objects the clone physically received (no + // promisor lazy-fetch): the public blob is present, the withheld blob is + // not. This asserts on the bytes that actually crossed the wire. + let local = Command::new("git") + .args(["cat-file", "--batch-all-objects", "--batch-check"]) + .current_dir(&dest) + .output() + .unwrap(); + let local = String::from_utf8_lossy(&local.stdout); + assert!( + local.contains(&public_oid), + "public blob should be present in the clone" + ); + assert!( + !local.contains(&secret_oid), + "withheld blob bytes must be absent from the clone" + ); + + // The tree entry (and SHA) for the private file is still visible. + let tree = Command::new("git") + .args(["ls-tree", "-r", "HEAD"]) + .current_dir(&dest) + .output() + .unwrap(); + let tree = String::from_utf8_lossy(&tree.stdout); + assert!( + tree.contains(&secret_oid) && tree.contains("secret/b.txt"), + "the private path and its blob SHA must remain visible: {tree}" + ); + + server.abort(); + } } From 72487af44d739db2a89c8c22124f1e2db16c1eb7 Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Sat, 6 Jun 2026 16:27:59 -0500 Subject: [PATCH 08/19] docs(node): correct Phase 3 caveat (full clone refused at fetch, not checkout) --- .../plans/2026-06-05-phase3-subtree-content-withholding.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/superpowers/plans/2026-06-05-phase3-subtree-content-withholding.md b/docs/superpowers/plans/2026-06-05-phase3-subtree-content-withholding.md index 6aaa474..0ddda81 100644 --- a/docs/superpowers/plans/2026-06-05-phase3-subtree-content-withholding.md +++ b/docs/superpowers/plans/2026-06-05-phase3-subtree-content-withholding.md @@ -8,7 +8,9 @@ **Tech Stack:** Rust, axum, the system `git` CLI (shelled out, as the codebase already does in `git/store.rs` and `git/smart_http.rs`), `tempfile` for fixture repos in tests. -**Scope boundary:** This plan covers the node-side enforcement and the security guarantee (private blob bytes are never placed in the served pack), proven by inspecting the produced pack. It deliberately does NOT cover: the `git-remote-gitlawb` client-side change that lets a non-reader get a *clean* partial checkout (a stock `git clone` of a repo with a withheld blob will fail at checkout on the missing object; that UX work is a separate follow-up plan), filtered-pack caching, or incremental-fetch (`have`-line) hardening beyond what falls out naturally. Those are listed under "Out of scope / follow-ups" at the end. +**Scope boundary:** This plan covers the node-side enforcement and the security guarantee (private blob bytes are never placed in the served pack), proven by inspecting the produced pack and by a real `git` partial clone. It deliberately does NOT cover: the `git-remote-gitlawb` client-side change that lets a non-reader get a *clean* clone without passing `--filter` (see the corrected client-behavior note below), filtered-pack caching, or incremental-fetch (`have`-line) hardening beyond what falls out naturally. Those are listed under "Out of scope / follow-ups" at the end. + +**Corrected client behavior (verified during execution, supersedes an earlier assumption in this plan):** a served pack that omits a blob still referenced by a sent tree is not closed under reachability. A stock *full* `git clone` therefore rejects it at *fetch* time with "remote did not send all necessary objects" (the connectivity check), NOT at checkout. Only a *partial* clone (the client passes `--filter`, which marks a promisor remote and relaxes that check) accepts the pack with the private blob absent; tree and commit SHAs stay intact. The security guarantee (private bytes never leave the node) holds for every client. Making a normal `git clone` Just Work without `--filter` is the git-remote-gitlawb follow-up. --- @@ -703,7 +705,7 @@ Set a subtree rule on a local repo via `gl visibility`, clone as a non-reader th ## Out of scope / follow-ups (separate plans) -1. **`git-remote-gitlawb` partial-clone UX.** Make a non-reader's clone produce a clean partial checkout rather than a checkout error on the missing blob: the helper requests partial-clone semantics and treats withheld blobs as deliberately absent. Without this, a stock `git clone` of a repo with a withheld blob succeeds at fetch but errors at checkout. The security guarantee (bytes never sent) holds regardless; this is purely UX. +1. **`git-remote-gitlawb` partial-clone UX.** Make a non-reader's clone Just Work without the user passing `--filter`: the helper requests partial-clone semantics, advertises the `filter` capability cleanly (so there is no "filtering not recognized by server, ignoring" warning), and treats withheld blobs as deliberately absent. Without this, a stock full `git clone` of a repo with a withheld blob is refused at fetch time ("remote did not send all necessary objects"); only `git clone --filter=...` succeeds. The security guarantee (bytes never sent) holds regardless; this is purely UX. 2. **Filtered-pack caching.** `build_filtered_pack` recomputes per request. If hot, cache by (repo, tip-OIDs, withheld-set) and invalidate on push. 3. **Incremental fetch (`have` lines).** This plan targets the clone case. Confirm and, if needed, harden the filtered serve for fetches that send `have` lines so withheld blobs are never sent incrementally either. 4. **Replication-path enforcement (Phase 2).** Still blocked on the maintainer A/B decision; unrelated to this HTTP-path work. From b0af815bf7fe9a12cd5a40d78eea5ca9e0dbc49e Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Sat, 6 Jun 2026 16:40:41 -0500 Subject: [PATCH 09/19] test(node): prove incremental fetch still withholds; document negotiation choice Add a real-git test that partial-clones, pushes a new commit server-side, then fetches: the new object arrives and the withheld blob stays absent. This pins down that ignoring have/want negotiation (always sending a self-contained pack of all refs minus withheld, with NAK) is correct for both clone and fetch; the only cost is a fetch re-sends the full object set. Refactor the real-git tests onto a shared server harness and document the negotiation decision in code and in the plan's follow-ups. --- crates/gitlawb-node/src/git/smart_http.rs | 262 +++++++++++++----- ...6-05-phase3-subtree-content-withholding.md | 2 +- 2 files changed, 195 insertions(+), 69 deletions(-) diff --git a/crates/gitlawb-node/src/git/smart_http.rs b/crates/gitlawb-node/src/git/smart_http.rs index c92cb79..0609f98 100644 --- a/crates/gitlawb-node/src/git/smart_http.rs +++ b/crates/gitlawb-node/src/git/smart_http.rs @@ -183,6 +183,14 @@ pub fn build_filtered_pack(repo_path: &Path, withheld: &HashSet) -> Resu /// intact either way. The clean partial-clone client UX is a separate follow-up /// (git-remote-gitlawb); the security guarantee (private bytes never leave the /// node) holds regardless of client. +/// +/// Negotiation is intentionally ignored: rather than honoring the client's +/// `want`/`have` lines, this always sends a self-contained pack of every object +/// across all refs minus the withheld blobs, and replies `NAK`. A fresh clone +/// and an incremental fetch are both correct (the client de-duplicates objects +/// it already has); the cost is that a fetch re-sends the full object set +/// instead of a thin delta. Honoring negotiation for smaller fetch packs is an +/// optimization follow-up, not a correctness requirement. pub async fn upload_pack_excluding( repo_path: &Path, request_body: Bytes, @@ -405,40 +413,80 @@ mod tests { out } - /// End-to-end: a real `git` client clones through `info_refs` + - /// `upload_pack_excluding` and ends up without the withheld blob's bytes - /// while still seeing its tree entry (SHA). Uses a partial clone - /// (`--filter`) because a pack that omits a referenced blob is only - /// accepted by a promisor-aware client; a stock full clone is refused at - /// fetch time by the connectivity check. - #[tokio::test(flavor = "multi_thread", worker_threads = 2)] - async fn real_git_partial_clone_omits_withheld_blob() { - use axum::extract::{Query, State}; + // Shared harness for the real-git server tests: a minimal smart-HTTP server + // backed by the real info_refs + upload_pack_excluding. + + #[derive(Clone)] + struct FilterState { + repo: std::path::PathBuf, + withheld: HashSet, + } + + async fn refs_handler( + axum::extract::State(st): axum::extract::State>, + axum::extract::Query(q): axum::extract::Query>, + ) -> Response { + let service = q.get("service").cloned().unwrap_or_default(); + info_refs(&st.repo, &service).await.unwrap() + } + + async fn pack_handler( + axum::extract::State(st): axum::extract::State>, + body: Bytes, + ) -> Response { + upload_pack_excluding(&st.repo, body, &st.withheld) + .await + .unwrap() + } + + /// Spawn the server for `bare`, withholding `withheld`. Returns the clone URL + /// and the server task (abort it when done). + async fn spawn_filter_server( + bare: std::path::PathBuf, + withheld: HashSet, + ) -> (String, tokio::task::JoinHandle<()>) { use axum::routing::{get, post}; - use axum::Router; - use std::collections::HashMap; - use std::sync::Arc; + let state = std::sync::Arc::new(FilterState { + repo: bare, + withheld, + }); + let app = axum::Router::new() + .route("/repo.git/info/refs", get(refs_handler)) + .route("/repo.git/git-upload-pack", post(pack_handler)) + .with_state(state); + let listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap(); + let port = listener.local_addr().unwrap().port(); + let handle = tokio::spawn(async move { + axum::serve(listener, app).await.unwrap(); + }); + (format!("http://127.0.0.1:{port}/repo.git"), handle) + } - let td = TempDir::new().unwrap(); + fn run_git(args: &[&str], dir: &std::path::Path) { + assert!(Command::new("git") + .args(args) + .current_dir(dir) + .status() + .unwrap() + .success()); + } + + /// Build a work repo (public/a.txt, secret/b.txt) and a bare clone of it. + /// Returns (work, bare, secret_blob_oid, public_blob_oid). + fn fixture_with_secret( + td: &TempDir, + ) -> (std::path::PathBuf, std::path::PathBuf, String, String) { let work = td.path().join("work"); let bare = td.path().join("bare.git"); - let g = |args: &[&str], dir: &std::path::Path| { - assert!(Command::new("git") - .args(args) - .current_dir(dir) - .status() - .unwrap() - .success()); - }; std::fs::create_dir_all(work.join("secret")).unwrap(); std::fs::create_dir_all(work.join("public")).unwrap(); std::fs::write(work.join("public/a.txt"), b"pub\n").unwrap(); std::fs::write(work.join("secret/b.txt"), b"SECRET\n").unwrap(); - g(&["init", "-q"], &work); - g(&["config", "user.email", "t@t"], &work); - g(&["config", "user.name", "t"], &work); - g(&["add", "."], &work); - g(&["commit", "-qm", "init"], &work); + run_git(&["init", "-q"], &work); + run_git(&["config", "user.email", "t@t"], &work); + run_git(&["config", "user.name", "t"], &work); + run_git(&["add", "."], &work); + run_git(&["commit", "-qm", "init"], &work); let oid = |p: &str| { let o = Command::new("git") .args(["rev-parse", &format!("HEAD:{p}")]) @@ -449,7 +497,7 @@ mod tests { }; let secret_oid = oid("secret/b.txt"); let public_oid = oid("public/a.txt"); - g( + run_git( &[ "clone", "-q", @@ -459,43 +507,34 @@ mod tests { ], td.path(), ); + (work, bare, secret_oid, public_oid) + } - #[derive(Clone)] - struct St { - repo: std::path::PathBuf, - withheld: HashSet, - } - let state = Arc::new(St { - repo: bare.clone(), - withheld: HashSet::from([secret_oid.clone()]), - }); + /// Enumerate exactly the objects a repo physically has (no promisor lazy + /// fetch), so tests assert on what bytes actually crossed the wire. + fn local_object_ids(repo: &std::path::Path) -> String { + let out = Command::new("git") + .args(["cat-file", "--batch-all-objects", "--batch-check"]) + .current_dir(repo) + .output() + .unwrap(); + String::from_utf8_lossy(&out.stdout).into_owned() + } - async fn refs( - State(st): State>, - Query(q): Query>, - ) -> Response { - let service = q.get("service").cloned().unwrap_or_default(); - info_refs(&st.repo, &service).await.unwrap() - } - async fn pack(State(st): State>, body: Bytes) -> Response { - upload_pack_excluding(&st.repo, body, &st.withheld) - .await - .unwrap() - } + /// End-to-end: a real `git` client clones through `info_refs` + + /// `upload_pack_excluding` and ends up without the withheld blob's bytes + /// while still seeing its tree entry (SHA). Uses a partial clone + /// (`--filter`) because a pack that omits a referenced blob is only + /// accepted by a promisor-aware client; a stock full clone is refused at + /// fetch time by the connectivity check. + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] + async fn real_git_partial_clone_omits_withheld_blob() { + let td = TempDir::new().unwrap(); + let (_work, bare, secret_oid, public_oid) = fixture_with_secret(&td); - let app = Router::new() - .route("/repo.git/info/refs", get(refs)) - .route("/repo.git/git-upload-pack", post(pack)) - .with_state(state); - - let listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap(); - let port = listener.local_addr().unwrap().port(); - let server = tokio::spawn(async move { - axum::serve(listener, app).await.unwrap(); - }); + let (url, server) = spawn_filter_server(bare, HashSet::from([secret_oid.clone()])).await; let dest = td.path().join("clone"); - let url = format!("http://127.0.0.1:{port}/repo.git"); let dest_s = dest.to_str().unwrap().to_string(); let out = tokio::task::spawn_blocking(move || { Command::new("git") @@ -521,15 +560,8 @@ mod tests { String::from_utf8_lossy(&out.stderr) ); - // Enumerate exactly the objects the clone physically received (no - // promisor lazy-fetch): the public blob is present, the withheld blob is - // not. This asserts on the bytes that actually crossed the wire. - let local = Command::new("git") - .args(["cat-file", "--batch-all-objects", "--batch-check"]) - .current_dir(&dest) - .output() - .unwrap(); - let local = String::from_utf8_lossy(&local.stdout); + // The public blob is present in the clone, the withheld blob is not. + let local = local_object_ids(&dest); assert!( local.contains(&public_oid), "public blob should be present in the clone" @@ -553,4 +585,98 @@ mod tests { server.abort(); } + + /// End-to-end: an incremental `git fetch` after a partial clone still works + /// and still withholds the private blob. The serve path ignores the client's + /// have/want negotiation and always sends a self-contained pack of all refs + /// minus the withheld blobs (it replies NAK, so the client treats it as "no + /// common commits" and accepts the full set). This is correct, just not + /// bandwidth-optimal; thin-pack/negotiation is an optimization follow-up. + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] + async fn real_git_fetch_after_partial_clone_still_withholds() { + let td = TempDir::new().unwrap(); + let (work, bare, secret_oid, _public_oid) = fixture_with_secret(&td); + let branch = { + let o = Command::new("git") + .args(["symbolic-ref", "--short", "HEAD"]) + .current_dir(&work) + .output() + .unwrap(); + String::from_utf8_lossy(&o.stdout).trim().to_string() + }; + + let (url, server) = + spawn_filter_server(bare.clone(), HashSet::from([secret_oid.clone()])).await; + + // Partial-clone the initial state. + let dest = td.path().join("clone"); + let dest_s = dest.to_str().unwrap().to_string(); + let url_c = url.clone(); + let out = tokio::task::spawn_blocking(move || { + Command::new("git") + .args([ + "-c", + "protocol.version=2", + "clone", + "--filter=blob:none", + "--no-checkout", + "-q", + &url_c, + &dest_s, + ]) + .output() + .unwrap() + }) + .await + .unwrap(); + assert!( + out.status.success(), + "clone failed: {}", + String::from_utf8_lossy(&out.stderr) + ); + + // Add a new public commit on the server side. + std::fs::write(work.join("public/c.txt"), b"v2\n").unwrap(); + run_git(&["add", "."], &work); + run_git(&["commit", "-qm", "c2"], &work); + let new_oid = { + let o = Command::new("git") + .args(["rev-parse", "HEAD:public/c.txt"]) + .current_dir(&work) + .output() + .unwrap(); + String::from_utf8_lossy(&o.stdout).trim().to_string() + }; + run_git(&["push", "-q", bare.to_str().unwrap(), &branch], &work); + + // Incremental fetch: the client has c1 and asks for the update. + let dest_f = dest.clone(); + let out = tokio::task::spawn_blocking(move || { + Command::new("git") + .args(["-c", "protocol.version=2", "fetch", "-q", "origin"]) + .current_dir(&dest_f) + .output() + .unwrap() + }) + .await + .unwrap(); + assert!( + out.status.success(), + "fetch failed: {}", + String::from_utf8_lossy(&out.stderr) + ); + + // The new commit's blob arrived; the withheld blob is still absent. + let local = local_object_ids(&dest); + assert!( + local.contains(&new_oid), + "the new commit's blob must be fetched" + ); + assert!( + !local.contains(&secret_oid), + "withheld blob must remain absent after fetch" + ); + + server.abort(); + } } diff --git a/docs/superpowers/plans/2026-06-05-phase3-subtree-content-withholding.md b/docs/superpowers/plans/2026-06-05-phase3-subtree-content-withholding.md index 0ddda81..d79c0df 100644 --- a/docs/superpowers/plans/2026-06-05-phase3-subtree-content-withholding.md +++ b/docs/superpowers/plans/2026-06-05-phase3-subtree-content-withholding.md @@ -707,6 +707,6 @@ Set a subtree rule on a local repo via `gl visibility`, clone as a non-reader th 1. **`git-remote-gitlawb` partial-clone UX.** Make a non-reader's clone Just Work without the user passing `--filter`: the helper requests partial-clone semantics, advertises the `filter` capability cleanly (so there is no "filtering not recognized by server, ignoring" warning), and treats withheld blobs as deliberately absent. Without this, a stock full `git clone` of a repo with a withheld blob is refused at fetch time ("remote did not send all necessary objects"); only `git clone --filter=...` succeeds. The security guarantee (bytes never sent) holds regardless; this is purely UX. 2. **Filtered-pack caching.** `build_filtered_pack` recomputes per request. If hot, cache by (repo, tip-OIDs, withheld-set) and invalidate on push. -3. **Incremental fetch (`have` lines).** This plan targets the clone case. Confirm and, if needed, harden the filtered serve for fetches that send `have` lines so withheld blobs are never sent incrementally either. +3. **Incremental fetch efficiency.** Verified during execution: an incremental `git fetch` after a partial clone is already correct and still withholds the private blob (covered by `real_git_fetch_after_partial_clone_still_withholds`). The serve ignores the client's `have`/`want` negotiation and always sends a self-contained pack of all refs minus the withheld blobs, replying `NAK`; the client de-duplicates, so nothing breaks. The only cost is that a fetch re-sends the full object set instead of a thin delta. Honoring negotiation to produce smaller fetch packs is the optimization left here. 4. **Replication-path enforcement (Phase 2).** Still blocked on the maintainer A/B decision; unrelated to this HTTP-path work. ``` From 85a97118c55b28c538700d63eda42e818e04786f Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Sat, 6 Jun 2026 18:36:32 -0500 Subject: [PATCH 10/19] refactor(node): address CodeRabbit review on PR #28 Move the two blocking git shell-outs in the filtered upload-pack path off the async worker thread, matching the tokio::process / spawn_blocking usage already in this file: build_filtered_pack (rev-list + pack-objects) and withheld_blob_oids (per-ref ls-tree) now run inside spawn_blocking so a large repo cannot stall the tokio runtime. Behavior is unchanged. Also fix the Task 0 findings block in the Phase 3 plan: it still recorded v2 packfile framing, which is the exact path that failed against a real client and was corrected to v0. The block now documents the shipped v0 contract. Drop a stray trailing code fence flagged by markdownlint (MD040). The speculative ls-tree timeout and the public/no-rules fast-path from the review are intentionally left out: the timeout guards against adversarial repos we do not yet host, and the fast-path is a micro-optimization not worth the extra branch right now. --- crates/gitlawb-node/src/api/repos.rs | 29 ++++++++++++++----- crates/gitlawb-node/src/git/smart_http.rs | 13 +++++++-- ...6-05-phase3-subtree-content-withholding.md | 13 ++++----- 3 files changed, 37 insertions(+), 18 deletions(-) diff --git a/crates/gitlawb-node/src/api/repos.rs b/crates/gitlawb-node/src/api/repos.rs index ca5ed05..4522e8d 100644 --- a/crates/gitlawb-node/src/api/repos.rs +++ b/crates/gitlawb-node/src/api/repos.rs @@ -395,14 +395,27 @@ pub async fn git_upload_pack( .map_err(|e| AppError::Git(e.to_string()))?; let body_len = body.len(); - let withheld = visibility_pack::withheld_blob_oids( - &disk_path, - &rules, - record.is_public, - &record.owner_did, - caller, - ) - .map_err(|e| AppError::Git(e.to_string()))?; + // withheld_blob_oids walks every ref with blocking `git ls-tree`; keep that + // off the async worker thread. + let withheld = { + let path = disk_path.clone(); + let rules = rules.clone(); + let owner_did = record.owner_did.clone(); + let caller_owned = caller.map(str::to_string); + let is_public = record.is_public; + tokio::task::spawn_blocking(move || { + visibility_pack::withheld_blob_oids( + &path, + &rules, + is_public, + &owner_did, + caller_owned.as_deref(), + ) + }) + .await + .map_err(|e| AppError::Git(e.to_string()))? + .map_err(|e| AppError::Git(e.to_string()))? + }; let resp = if withheld.is_empty() { smart_http::upload_pack(&disk_path, body).await diff --git a/crates/gitlawb-node/src/git/smart_http.rs b/crates/gitlawb-node/src/git/smart_http.rs index 0609f98..80374fb 100644 --- a/crates/gitlawb-node/src/git/smart_http.rs +++ b/crates/gitlawb-node/src/git/smart_http.rs @@ -1,4 +1,4 @@ -use anyhow::{bail, Result}; +use anyhow::{bail, Context, Result}; use axum::body::Body; use axum::http::StatusCode; use axum::response::Response; @@ -196,7 +196,16 @@ pub async fn upload_pack_excluding( request_body: Bytes, withheld: &HashSet, ) -> Result { - let pack = build_filtered_pack(repo_path, withheld)?; + // build_filtered_pack shells out to git (rev-list, pack-objects) with + // blocking std::process I/O; run it off the async worker so a large repo's + // pack build does not stall the tokio runtime. + let pack = { + let repo_path = repo_path.to_path_buf(); + let withheld = withheld.clone(); + tokio::task::spawn_blocking(move || build_filtered_pack(&repo_path, &withheld)) + .await + .context("filtered-pack build task panicked")?? + }; // The client lists its capabilities on the first `want` line. Honor // side-band-64k when offered (every modern smart-HTTP client offers it); diff --git a/docs/superpowers/plans/2026-06-05-phase3-subtree-content-withholding.md b/docs/superpowers/plans/2026-06-05-phase3-subtree-content-withholding.md index d79c0df..453ca94 100644 --- a/docs/superpowers/plans/2026-06-05-phase3-subtree-content-withholding.md +++ b/docs/superpowers/plans/2026-06-05-phase3-subtree-content-withholding.md @@ -99,13 +99,11 @@ Executed 2026-06-06. Results: - `git rev-list --objects --all` (in repo dir) to enumerate reachable objects as `oid [path]` lines. - Filter out withheld OIDs (first whitespace column), feed remaining OIDs newline-delimited to `git pack-objects --stdout`. - Verified exclusion by `git index-pack ` then `git verify-pack -v `: secret blob absent, public blob present. Confirmed. -- **Protocol version targeted:** v2 packfile section. The serve hand-frames the body, so no `GIT_PROTOCOL`/`-c protocol.version` flag is passed to our own process; we emit the v2 `packfile` section bytes directly. -- **Response framing (captured by driving `git upload-pack --stateless-rpc` with `GIT_PROTOCOL=version=2`):** - - `pkt_line("packfile\n")` (plain control pkt-line, not a sideband band). - - Then sideband-64k bands: `0x02` = progress (optional, we omit), `0x01` = pack data whose payload begins `PACK...`. - - Pack data chunked under the pkt-line limit, each chunk prefixed with `0x01`. - - Terminated by `0000` flush. - - This matches the plan's Option B framing in Task 2 exactly; no adjustment needed. +- **Protocol version targeted:** v0. `info_refs` runs `git upload-pack --advertise-refs` with no `GIT_PROTOCOL=version=2`, so it advertises v0 and clients negotiate v0; the serve path must hand-frame a v0 response. (An earlier draft of this block recorded v2 framing; that path was implemented, failed against a real client with "expected ACK/NAK, got 'packfile'", and was corrected to v0. The record below reflects the shipped v0 contract.) +- **Response framing (v0):** + - `pkt_line("NAK\n")` first (no `packfile\n` control line; that is v2 only). + - If the client offered `side-band-64k`: band 1 (`0x01`) carries pack data whose payload begins `PACK...`, chunked under the pkt-line size limit (65515), each chunk prefixed with `0x01`; terminated by a `0000` flush. + - If no side-band was offered: the raw pack bytes follow `NAK\n` directly, with no flush. - **Confirmed:** served pack contains PUBLIC_OID, excludes SECRET_OID. --- @@ -709,4 +707,3 @@ Set a subtree rule on a local repo via `gl visibility`, clone as a non-reader th 2. **Filtered-pack caching.** `build_filtered_pack` recomputes per request. If hot, cache by (repo, tip-OIDs, withheld-set) and invalidate on push. 3. **Incremental fetch efficiency.** Verified during execution: an incremental `git fetch` after a partial clone is already correct and still withholds the private blob (covered by `real_git_fetch_after_partial_clone_still_withholds`). The serve ignores the client's `have`/`want` negotiation and always sends a self-contained pack of all refs minus the withheld blobs, replying `NAK`; the client de-duplicates, so nothing breaks. The only cost is that a fetch re-sends the full object set instead of a thin delta. Honoring negotiation to produce smaller fetch packs is the optimization left here. 4. **Replication-path enforcement (Phase 2).** Still blocked on the maintainer A/B decision; unrelated to this HTTP-path work. -``` From c27e8dc992e7dcbb9045b68f8d942c228b18c9cd Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Sat, 6 Jun 2026 22:06:32 -0500 Subject: [PATCH 11/19] chore(node): drop planning doc from PR per maintainer request kevincodex1 asked to keep the superpowers planning docs out of the repo. The Phase 3 plan was scaffolding for this change, not something the project needs to carry. Removing it leaves only the code and tests in the PR. --- ...6-05-phase3-subtree-content-withholding.md | 709 ------------------ 1 file changed, 709 deletions(-) delete mode 100644 docs/superpowers/plans/2026-06-05-phase3-subtree-content-withholding.md diff --git a/docs/superpowers/plans/2026-06-05-phase3-subtree-content-withholding.md b/docs/superpowers/plans/2026-06-05-phase3-subtree-content-withholding.md deleted file mode 100644 index 453ca94..0000000 --- a/docs/superpowers/plans/2026-06-05-phase3-subtree-content-withholding.md +++ /dev/null @@ -1,709 +0,0 @@ -# Phase 3: Subtree Content Withholding (mode B) Implementation Plan - -> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. - -**Goal:** Make a mode-`b` subtree visibility rule actually withhold that subtree's file content on clone/fetch over the node's HTTP git read path, while keeping every commit and tree SHA intact, so a non-reader sees the directory structure and blob SHAs but never the private bytes. - -**Architecture:** The authorization decision already exists as the pure `visibility_check` (one decision per path). Phase 3 adds two node-side pieces: (1) a blob-OID resolver that, given a repo's refs plus the caller's rules, returns the set of blob object IDs the caller may not read (a blob is withheld only if it appears at no allowed path); and (2) a filtered `upload-pack` serve path that builds the response pack excluding those OIDs. The two existing read handlers (`git_info_refs`, `git_upload_pack`) keep their current whole-repo 404 gate unchanged and gain a filtered serve branch when, and only when, the caller has at least one withheld blob. Trees and commits are always sent in full, so SHAs stay intact; only blob content is omitted. - -**Tech Stack:** Rust, axum, the system `git` CLI (shelled out, as the codebase already does in `git/store.rs` and `git/smart_http.rs`), `tempfile` for fixture repos in tests. - -**Scope boundary:** This plan covers the node-side enforcement and the security guarantee (private blob bytes are never placed in the served pack), proven by inspecting the produced pack and by a real `git` partial clone. It deliberately does NOT cover: the `git-remote-gitlawb` client-side change that lets a non-reader get a *clean* clone without passing `--filter` (see the corrected client-behavior note below), filtered-pack caching, or incremental-fetch (`have`-line) hardening beyond what falls out naturally. Those are listed under "Out of scope / follow-ups" at the end. - -**Corrected client behavior (verified during execution, supersedes an earlier assumption in this plan):** a served pack that omits a blob still referenced by a sent tree is not closed under reachability. A stock *full* `git clone` therefore rejects it at *fetch* time with "remote did not send all necessary objects" (the connectivity check), NOT at checkout. Only a *partial* clone (the client passes `--filter`, which marks a promisor remote and relaxes that check) accepts the pack with the private blob absent; tree and commit SHAs stay intact. The security guarantee (private bytes never leave the node) holds for every client. Making a normal `git clone` Just Work without `--filter` is the git-remote-gitlawb follow-up. - ---- - -## File Structure - -- **Create:** `crates/gitlawb-node/src/git/visibility_pack.rs`: the blob-OID resolver (`withheld_blob_oids`) and its tests. One responsibility: decide which blob OIDs to withhold for a caller. -- **Modify:** `crates/gitlawb-node/src/git/mod.rs`: add `pub mod visibility_pack;`. -- **Modify:** `crates/gitlawb-node/src/git/smart_http.rs`: add `upload_pack_excluding` (filtered serve) alongside the existing `upload_pack`, plus a small `pack_object_ids` test helper. -- **Modify:** `crates/gitlawb-node/src/api/repos.rs`: in `git_upload_pack` (around line 368-407) branch to the filtered serve when the caller has withheld blobs; `git_info_refs` (around line 308-365) needs no functional change but gets a confirming test. -- **Modify (test oracle only):** `crates/gitlawb-node/src/visibility.rs`: no logic change; `visibility_check` is reused as-is by the resolver. - ---- - -## Task 0: Spike: pin the filtered-serve mechanism - -This is the one genuinely uncertain piece: how to make `git upload-pack` (or `git pack-objects`) produce a clone/fetch response that omits a specific set of blob OIDs while still sending the trees that reference them, and how to frame that as a valid `application/x-git-upload-pack-result` body. Everything downstream depends on a single function signature, not on the mechanism, so this task nails the mechanism by experiment and records the result. No production code is committed in this task. - -**Files:** -- Scratch only (a throwaway shell script and a temp repo). Findings are written back into this plan's "Task 0 Findings" block below. - -- [ ] **Step 1: Build a fixture repo with a public and a private file** - -Run: -```bash -cd "$(mktemp -d)" && export FIX=$PWD -git init -q work && cd work -git config user.email t@t && git config user.name t -mkdir -p public secret -echo "public bytes" > public/a.txt -echo "TOP SECRET" > secret/b.txt -git add . && git commit -qm init -SECRET_OID=$(git rev-parse HEAD:secret/b.txt) -PUBLIC_OID=$(git rev-parse HEAD:public/a.txt) -echo "secret blob=$SECRET_OID public blob=$PUBLIC_OID" -cd .. && git clone -q --bare work bare.git -``` - -- [ ] **Step 2: Produce a pack that excludes the secret blob OID** - -Run (mechanism candidate: explicit object list to `pack-objects`): -```bash -cd "$FIX/bare.git" -# Every object reachable from all refs, as "oid [path]" lines: -git rev-list --objects --all > /tmp/all_objs.txt -# Drop the secret blob's line, keep only the OID column: -grep -v "^$SECRET_OID" /tmp/all_objs.txt | awk '{print $1}' > /tmp/keep_oids.txt -# Build a pack of exactly those objects: -git pack-objects --stdout < /tmp/keep_oids.txt > /tmp/filtered.pack -# Confirm the secret blob is absent and the public blob present: -git verify-pack -v /tmp/filtered.pack | grep -E "$SECRET_OID|$PUBLIC_OID" || echo "secret absent (expected: only public line prints)" -``` -Expected: the public OID prints, the secret OID does not. This proves the OID-exclusion mechanism. - -- [ ] **Step 3: Determine the upload-pack response framing** - -Run, capturing the exact bytes a real clone request/response uses, so the framing in Task 3 is correct rather than guessed: -```bash -cd "$FIX/bare.git" -git config uploadpack.allowFilter true -# Capture a normal v2 clone's request body and response shape: -GIT_TRACE_PACKET=1 git -c protocol.version=2 clone -q --bare "$FIX/bare.git" "$FIX/clone1.git" 2>/tmp/trace.txt -# Inspect the fetch command + response sections (look for "packfile", sideband 0001/0002, flush 0000): -grep -E "fetch|want|packfile|0000|ACK|NAK|ready" /tmp/trace.txt | head -40 -``` -Record from the trace: (a) whether the node should target protocol v2 or v0, (b) the exact section markers around the packfile, (c) whether sideband-64k framing is in use. - -- [ ] **Step 4: Decide the serve implementation and write findings** - -Choose the implementation for `upload_pack_excluding` based on Steps 1-3, preferring the lowest-risk option that the trace confirms works: - -- **Option A (preferred): delegate to `git upload-pack` with an injected mandatory filter.** Set `uploadpack.allowFilter=true`, rewrite the client's fetch request to carry `filter sparse:oid=` (v2) where the spec blob excludes the denied paths, and let `git upload-pack` build and frame the entire response. Lowest framing risk; depends on `sparse:oid` negation behaving (verify in Step 2 variant). -- **Option B (fallback): hand-build the pack.** Parse `want` OIDs from the request body, run `git rev-list --objects ` minus the withheld OIDs, pipe to `git pack-objects --stdout`, and frame the result per the markers captured in Step 3. - -Write the chosen option, the exact `git` invocation(s), and the framing bytes into the "Task 0 Findings" block below. The downstream tasks reference `upload_pack_excluding(repo_path, request_body, withheld_oids) -> Result` regardless of which option is recorded here. - -- [ ] **Step 5: No commit** - -This task records findings only; there is nothing to commit. - -### Task 0 Findings - -Executed 2026-06-06. Results: - -- **Mechanism chosen:** Option B (hand-built pack). `sparse:oid` negation was not needed; explicit OID exclusion via `rev-list` + `pack-objects` is deterministic and self-contained. -- **Exact git invocation(s):** - - `git rev-list --objects --all` (in repo dir) to enumerate reachable objects as `oid [path]` lines. - - Filter out withheld OIDs (first whitespace column), feed remaining OIDs newline-delimited to `git pack-objects --stdout`. - - Verified exclusion by `git index-pack ` then `git verify-pack -v `: secret blob absent, public blob present. Confirmed. -- **Protocol version targeted:** v0. `info_refs` runs `git upload-pack --advertise-refs` with no `GIT_PROTOCOL=version=2`, so it advertises v0 and clients negotiate v0; the serve path must hand-frame a v0 response. (An earlier draft of this block recorded v2 framing; that path was implemented, failed against a real client with "expected ACK/NAK, got 'packfile'", and was corrected to v0. The record below reflects the shipped v0 contract.) -- **Response framing (v0):** - - `pkt_line("NAK\n")` first (no `packfile\n` control line; that is v2 only). - - If the client offered `side-band-64k`: band 1 (`0x01`) carries pack data whose payload begins `PACK...`, chunked under the pkt-line size limit (65515), each chunk prefixed with `0x01`; terminated by a `0000` flush. - - If no side-band was offered: the raw pack bytes follow `NAK\n` directly, with no flush. -- **Confirmed:** served pack contains PUBLIC_OID, excludes SECRET_OID. - ---- - -## Task 1: Blob-OID resolver: withhold a private subtree's blobs for a non-reader - -**Files:** -- Create: `crates/gitlawb-node/src/git/visibility_pack.rs` -- Modify: `crates/gitlawb-node/src/git/mod.rs` (add module) - -- [ ] **Step 1: Register the module** - -In `crates/gitlawb-node/src/git/mod.rs`, add the line in alphabetical position (after `pub mod store;`): -```rust -pub mod visibility_pack; -``` - -- [ ] **Step 2: Write the failing test (non-reader withholds only the private blob)** - -Create `crates/gitlawb-node/src/git/visibility_pack.rs` with the test module first: -```rust -//! Resolve which blob OIDs must be withheld from a caller because every path -//! at which the blob appears is denied by the repo's visibility rules. Trees -//! and commits are never withheld (mode B keeps SHAs intact); only blob -//! content is held back. - -use crate::db::{VisibilityMode, VisibilityRule}; -use crate::git::store; -use crate::visibility::{visibility_check, Decision}; -use anyhow::{Context, Result}; -use std::collections::HashSet; -use std::path::Path; - -#[cfg(test)] -mod tests { - use super::*; - use chrono::Utc; - use std::process::Command; - use tempfile::TempDir; - - fn rule(path_glob: &str, readers: &[&str]) -> VisibilityRule { - VisibilityRule { - id: "x".into(), - repo_id: "r1".into(), - path_glob: path_glob.into(), - mode: VisibilityMode::B, - reader_dids: readers.iter().map(|s| s.to_string()).collect(), - created_by: "did:key:zOwner".into(), - created_at: Utc::now(), - } - } - - const OWNER: &str = "did:key:zOwner"; - - /// Build a bare repo with public/a.txt and secret/b.txt at one commit. - /// Returns (tempdir, bare_path, secret_blob_oid, public_blob_oid). - fn fixture() -> (TempDir, std::path::PathBuf, String, String) { - let td = TempDir::new().unwrap(); - let work = td.path().join("work"); - let bare = td.path().join("bare.git"); - let run = |args: &[&str], dir: &Path| { - let ok = Command::new("git") - .args(args) - .current_dir(dir) - .status() - .unwrap() - .success(); - assert!(ok, "git {args:?} failed"); - }; - std::fs::create_dir_all(work.join("public")).unwrap(); - std::fs::create_dir_all(work.join("secret")).unwrap(); - std::fs::write(work.join("public/a.txt"), b"public bytes\n").unwrap(); - std::fs::write(work.join("secret/b.txt"), b"TOP SECRET\n").unwrap(); - run(&["init", "-q"], &work); - run(&["config", "user.email", "t@t"], &work); - run(&["config", "user.name", "t"], &work); - run(&["add", "."], &work); - run(&["commit", "-qm", "init"], &work); - let oid = |path: &str| { - let out = Command::new("git") - .args(["rev-parse", &format!("HEAD:{path}")]) - .current_dir(&work) - .output() - .unwrap(); - String::from_utf8_lossy(&out.stdout).trim().to_string() - }; - let secret = oid("secret/b.txt"); - let public = oid("public/a.txt"); - run( - &["clone", "-q", "--bare", work.to_str().unwrap(), bare.to_str().unwrap()], - td.path(), - ); - (td, bare, secret, public) - } - - #[test] - fn non_reader_withholds_only_the_private_blob() { - let (_td, bare, secret, public) = fixture(); - let rules = [rule("/secret/**", &["did:key:zFriend"])]; - let withheld = - withheld_blob_oids(&bare, &rules, true, OWNER, Some("did:key:zStranger")).unwrap(); - assert!(withheld.contains(&secret), "secret blob must be withheld"); - assert!(!withheld.contains(&public), "public blob must NOT be withheld"); - } - - #[test] - fn owner_withholds_nothing() { - let (_td, bare, secret, public) = fixture(); - let rules = [rule("/secret/**", &["did:key:zFriend"])]; - let withheld = withheld_blob_oids(&bare, &rules, true, OWNER, Some(OWNER)).unwrap(); - assert!(withheld.is_empty(), "owner sees everything"); - let _ = (secret, public); - } - - #[test] - fn listed_reader_withholds_nothing() { - let (_td, bare, _secret, _public) = fixture(); - let rules = [rule("/secret/**", &["did:key:zFriend"])]; - let withheld = - withheld_blob_oids(&bare, &rules, true, OWNER, Some("did:key:zFriend")).unwrap(); - assert!(withheld.is_empty(), "listed reader sees the subtree"); - } - - #[test] - fn no_subtree_rules_withholds_nothing() { - let (_td, bare, _secret, _public) = fixture(); - let withheld = withheld_blob_oids(&bare, &[], true, OWNER, None).unwrap(); - assert!(withheld.is_empty(), "public repo, no rules, nothing withheld"); - } -} -``` - -- [ ] **Step 3: Run the test to verify it fails** - -Run: `cargo test -p gitlawb-node visibility_pack:: -- --nocapture` -Expected: FAIL to compile with "cannot find function `withheld_blob_oids`". - -- [ ] **Step 4: Implement `withheld_blob_oids`** - -Add above the `#[cfg(test)]` block in `visibility_pack.rs`: -```rust -/// List every (blob_oid, "/repo/relative/path") pair reachable from any branch -/// ref in `repo_path`. Uses `git ls-tree -r` per ref so each path a blob lives -/// at is represented (the same blob content can appear at several paths). Paths -/// are returned with a leading "/" to match the glob form used by visibility -/// rules ("/secret/**"). -fn blob_paths(repo_path: &Path) -> Result> { - let refs = store::list_refs(repo_path).context("list_refs failed")?; - let mut out = Vec::new(); - for (refname, _oid) in refs { - if !refname.starts_with("refs/heads/") && !refname.starts_with("refs/tags/") { - continue; - } - let listing = std::process::Command::new("git") - .args(["ls-tree", "-r", &refname]) - .current_dir(repo_path) - .output() - .context("git ls-tree -r failed")?; - if !listing.status.success() { - continue; - } - for line in String::from_utf8_lossy(&listing.stdout).lines() { - // " blob \t" - let Some((meta, path)) = line.split_once('\t') else { - continue; - }; - let mut parts = meta.split_whitespace(); - let _mode = parts.next(); - let kind = parts.next(); - let oid = parts.next(); - if kind == Some("blob") { - if let Some(oid) = oid { - out.push((oid.to_string(), format!("/{path}"))); - } - } - } - } - Ok(out) -} - -/// Blob OIDs the caller may not read. A blob is withheld only if visibility -/// denies the caller at *every* path the blob appears at; a blob that is also -/// reachable through an allowed path is sent (its content is public elsewhere). -/// -/// The whole-repo "/" gate is handled by the caller before this function runs: -/// if "/" denies, the caller gets a 404 and never reaches the filtered serve. -pub fn withheld_blob_oids( - repo_path: &Path, - rules: &[VisibilityRule], - is_public: bool, - owner_did: &str, - caller: Option<&str>, -) -> Result> { - let mut denied: HashSet = HashSet::new(); - let mut allowed: HashSet = HashSet::new(); - for (oid, path) in blob_paths(repo_path)? { - match visibility_check(rules, is_public, owner_did, caller, &path) { - Decision::Deny => { - denied.insert(oid); - } - Decision::Allow => { - allowed.insert(oid); - } - } - } - Ok(denied.difference(&allowed).cloned().collect()) -} -``` - -- [ ] **Step 5: Run the tests to verify they pass** - -Run: `cargo test -p gitlawb-node visibility_pack::` -Expected: PASS (4 tests). - -- [ ] **Step 6: Commit** - -```bash -git add crates/gitlawb-node/src/git/visibility_pack.rs crates/gitlawb-node/src/git/mod.rs -git commit -m "feat(node): resolve withheld blob OIDs for path-scoped visibility" -``` - ---- - -## Task 2: Filtered upload-pack serve (`upload_pack_excluding`) - -**Files:** -- Modify: `crates/gitlawb-node/src/git/smart_http.rs` - -Implement using the mechanism recorded in **Task 0 Findings**. The code below is written for **Option B (hand-built pack)** because it is self-contained and deterministic; if Task 0 recorded Option A, implement that instead behind the identical signature and adjust the test in Step 2 only where it inspects framing (the object-content assertion stays). - -- [ ] **Step 1: Add the test module with a pack-inspection helper and the failing test** - -At the bottom of `smart_http.rs`, add a `#[cfg(test)] mod tests` containing the pack-inspection helper (lists the OIDs inside a raw pack so tests can assert membership) and the first failing test: -```rust -#[cfg(test)] -mod tests { - use super::*; - use std::process::Command; - use tempfile::TempDir; - - /// List OIDs in a pack by writing it to a temp dir and running verify-pack. - pub(super) fn pack_object_ids(pack: &[u8]) -> std::collections::HashSet { - let dir = TempDir::new().unwrap(); - let path = dir.path().join("test.pack"); - std::fs::write(&path, pack).unwrap(); - // index-pack creates the matching .idx next to the pack. - let ok = Command::new("git") - .args(["index-pack", path.to_str().unwrap()]) - .status() - .unwrap() - .success(); - assert!(ok, "index-pack failed"); - let out = Command::new("git") - .args(["verify-pack", "-v", path.to_str().unwrap()]) - .output() - .unwrap(); - String::from_utf8_lossy(&out.stdout) - .lines() - .filter_map(|l| l.split_whitespace().next()) - .filter(|t| t.len() == 40 && t.chars().all(|c| c.is_ascii_hexdigit())) - .map(|s| s.to_string()) - .collect() - } - - #[tokio::test] - async fn filtered_serve_excludes_withheld_blob() { - // Build a bare repo, capture the secret + public blob OIDs. - let td = TempDir::new().unwrap(); - let work = td.path().join("work"); - let bare = td.path().join("bare.git"); - let g = |args: &[&str], dir: &std::path::Path| { - assert!(Command::new("git").args(args).current_dir(dir).status().unwrap().success()); - }; - std::fs::create_dir_all(work.join("secret")).unwrap(); - std::fs::create_dir_all(work.join("public")).unwrap(); - std::fs::write(work.join("public/a.txt"), b"pub\n").unwrap(); - std::fs::write(work.join("secret/b.txt"), b"SECRET\n").unwrap(); - g(&["init", "-q"], &work); - g(&["config", "user.email", "t@t"], &work); - g(&["config", "user.name", "t"], &work); - g(&["add", "."], &work); - g(&["commit", "-qm", "init"], &work); - let oid = |p: &str| { - let o = Command::new("git").args(["rev-parse", &format!("HEAD:{p}")]) - .current_dir(&work).output().unwrap(); - String::from_utf8_lossy(&o.stdout).trim().to_string() - }; - let secret = oid("secret/b.txt"); - let public = oid("public/a.txt"); - g(&["clone", "-q", "--bare", work.to_str().unwrap(), bare.to_str().unwrap()], td.path()); - - let mut withheld = std::collections::HashSet::new(); - withheld.insert(secret.clone()); - - let pack = build_filtered_pack(&bare, &withheld).unwrap(); - let ids = pack_object_ids(&pack); - assert!(ids.contains(&public), "public blob must be in the pack"); - assert!(!ids.contains(&secret), "secret blob must NOT be in the pack"); - } -``` - -- [ ] **Step 2: Run the test to verify it fails** - -Run: `cargo test -p gitlawb-node smart_http::tests::filtered_serve_excludes_withheld_blob` -Expected: FAIL to compile with "cannot find function `build_filtered_pack`". - -- [ ] **Step 3: Implement `build_filtered_pack` and `upload_pack_excluding`** - -Add to `smart_http.rs` (above the `#[cfg(test)]` block). `build_filtered_pack` is the deterministic core (unit-tested in Step 1); `upload_pack_excluding` frames it as an HTTP response using the markers recorded in Task 0 Findings: -```rust -use std::collections::HashSet; - -/// Build a packfile containing every object reachable from all refs EXCEPT the -/// given blob OIDs. Commits and trees are always included, so SHAs stay intact; -/// only the named blobs are dropped. -pub fn build_filtered_pack(repo_path: &Path, withheld: &HashSet) -> Result> { - // All reachable objects as "oid [path]" lines. - let rev = std::process::Command::new("git") - .args(["rev-list", "--objects", "--all"]) - .current_dir(repo_path) - .output()?; - if !rev.status.success() { - bail!("git rev-list failed: {}", String::from_utf8_lossy(&rev.stderr)); - } - let mut keep = Vec::new(); - for line in String::from_utf8_lossy(&rev.stdout).lines() { - let oid = line.split_whitespace().next().unwrap_or(""); - if oid.is_empty() || withheld.contains(oid) { - continue; - } - keep.push(oid.to_string()); - } - let mut child = std::process::Command::new("git") - .args(["pack-objects", "--stdout"]) - .current_dir(repo_path) - .stdin(Stdio::piped()) - .stdout(Stdio::piped()) - .stderr(Stdio::piped()) - .spawn()?; - { - use std::io::Write as _; - let mut stdin = child.stdin.take().expect("stdin"); - stdin.write_all(keep.join("\n").as_bytes())?; - stdin.write_all(b"\n")?; - } - let out = child.wait_with_output()?; - if !out.status.success() { - bail!("git pack-objects failed: {}", String::from_utf8_lossy(&out.stderr)); - } - Ok(out.stdout) -} - -/// Serve a clone/fetch with the withheld blobs removed from the response pack. -/// Framing follows Task 0 Findings; the body wraps `build_filtered_pack` output -/// in the upload-pack `packfile` section with sideband-64k, terminated by flush. -pub async fn upload_pack_excluding( - repo_path: &Path, - _request_body: Bytes, - withheld: &HashSet, -) -> Result { - let pack = build_filtered_pack(repo_path, withheld)?; - let mut body = Vec::new(); - body.extend_from_slice(&pkt_line("packfile\n")); - // sideband-64k: band 1 carries pack data, chunked under the pkt-line limit. - for chunk in pack.chunks(65515) { - let mut framed = Vec::with_capacity(chunk.len() + 1); - framed.push(0x01); - framed.extend_from_slice(chunk); - let len = framed.len() + 4; - body.extend_from_slice(format!("{len:04x}").as_bytes()); - body.extend_from_slice(&framed); - } - body.extend_from_slice(b"0000"); - Ok(Response::builder() - .status(StatusCode::OK) - .header("Content-Type", "application/x-git-upload-pack-result") - .header("Cache-Control", "no-cache") - .body(Body::from(body))?) -} -``` -> If Task 0 recorded **Option A**, replace the two functions above with the injected-filter delegation to `git upload-pack`, keeping the `build_filtered_pack` name as a thin wrapper so the Step 1 test still drives the OID-exclusion guarantee. - -- [ ] **Step 4: Run the tests to verify they pass** - -Run: `cargo test -p gitlawb-node smart_http::tests::filtered_serve_excludes_withheld_blob` -Expected: PASS. - -- [ ] **Step 5: Commit** - -```bash -git add crates/gitlawb-node/src/git/smart_http.rs -git commit -m "feat(node): filtered upload-pack serve that omits withheld blobs" -``` - ---- - -## Task 3: Wire filtered serve into the upload-pack handler - -**Files:** -- Modify: `crates/gitlawb-node/src/api/repos.rs` (`git_upload_pack`, lines ~368-407) - -- [ ] **Step 1: Add the imports** - -At the top of `repos.rs`, in the existing `use crate::git::{...}` group, add `visibility_pack`: -```rust -use crate::git::{smart_http, store, visibility_pack}; -``` -(If `store` is not already in that group, keep whatever is there and append `visibility_pack`.) - -- [ ] **Step 2: Branch to the filtered serve** - -In `git_upload_pack`, the current body computes `rules`, runs the whole-repo `visibility_check(..., "/")` 404 gate, acquires `disk_path`, then calls `smart_http::upload_pack(&disk_path, body)`. Keep the 404 gate and the `acquire` exactly as they are. Replace only the single serve call: -```rust - let disk_path = state - .repo_store - .acquire(&record.owner_did, &record.name) - .await - .map_err(|e| AppError::Git(e.to_string()))?; - let body_len = body.len(); - - let withheld = - visibility_pack::withheld_blob_oids(&disk_path, &rules, record.is_public, &record.owner_did, caller) - .map_err(|e| AppError::Git(e.to_string()))?; - - let resp = if withheld.is_empty() { - smart_http::upload_pack(&disk_path, body).await - } else { - tracing::info!(repo = %name, caller = ?caller, withheld = withheld.len(), "serving filtered pack"); - smart_http::upload_pack_excluding(&disk_path, body, &withheld).await - } - .map_err(|e| { - let msg = e.to_string(); - if msg.contains("bad line length") || msg.contains("protocol error") { - tracing::warn!(repo = %name, err = %msg, "git-upload-pack: bad client request"); - AppError::BadRequest(msg) - } else { - tracing::error!(repo = %name, err = %msg, "git-upload-pack failed"); - AppError::Git(msg) - } - })?; -``` -Leave the `crate::metrics::record_fetch(...)` line and everything after it unchanged. - -- [ ] **Step 3: Verify the crate builds and existing tests pass** - -Run: `cargo test -p gitlawb-node` -Expected: PASS, including the Phase 1 whole-repo visibility tests (no regression). The new fast-path (`withheld.is_empty()`) must keep public and fully-authorized clones byte-identical to before. - -- [ ] **Step 4: Commit** - -```bash -git add crates/gitlawb-node/src/api/repos.rs -git commit -m "feat(node): serve filtered pack when caller has withheld subtree blobs" -``` - ---- - -## Task 4: End-to-end clone test through a real git client - -**Files:** -- Modify: `crates/gitlawb-node/src/git/smart_http.rs` (extend `mod tests`) - -This proves the served body is a clone a real `git` accepts and that the private bytes are absent from the resulting object store, which is the security guarantee. - -- [ ] **Step 1: Write the failing end-to-end test** - -Add to `smart_http.rs` `mod tests`: -```rust - #[tokio::test] - async fn client_clone_lacks_withheld_blob_bytes() { - use axum::body::to_bytes; - let td = TempDir::new().unwrap(); - let work = td.path().join("work"); - let bare = td.path().join("bare.git"); - let g = |args: &[&str], dir: &std::path::Path| { - assert!(Command::new("git").args(args).current_dir(dir).status().unwrap().success()); - }; - std::fs::create_dir_all(work.join("secret")).unwrap(); - std::fs::create_dir_all(work.join("public")).unwrap(); - std::fs::write(work.join("public/a.txt"), b"pub\n").unwrap(); - std::fs::write(work.join("secret/b.txt"), b"SECRET\n").unwrap(); - g(&["init", "-q"], &work); - g(&["config", "user.email", "t@t"], &work); - g(&["config", "user.name", "t"], &work); - g(&["add", "."], &work); - g(&["commit", "-qm", "init"], &work); - let secret_oid = { - let o = Command::new("git").args(["rev-parse", "HEAD:secret/b.txt"]) - .current_dir(&work).output().unwrap(); - String::from_utf8_lossy(&o.stdout).trim().to_string() - }; - g(&["clone", "-q", "--bare", work.to_str().unwrap(), bare.to_str().unwrap()], td.path()); - - let mut withheld = std::collections::HashSet::new(); - withheld.insert(secret_oid.clone()); - - let resp = upload_pack_excluding(&bare, Bytes::new(), &withheld).await.unwrap(); - let body = to_bytes(resp.into_body(), usize::MAX).await.unwrap(); - let ids = pack_object_ids(&extract_pack(&body)); - assert!(!ids.contains(&secret_oid), "withheld blob must be absent from served pack"); - } - - /// Strip the upload-pack `packfile` section framing, returning the raw pack. - /// Mirrors how a client de-frames the sideband-64k band-1 stream. - fn extract_pack(body: &[u8]) -> Vec { - let mut out = Vec::new(); - let mut i = 0; - while i + 4 <= body.len() { - let len = usize::from_str_radix( - std::str::from_utf8(&body[i..i + 4]).unwrap_or("0000"), - 16, - ) - .unwrap_or(0); - if len == 0 { - i += 4; - continue; - } - let chunk = &body[i + 4..i + len]; - // band 1 = pack data; skip "packfile\n" control line and other bands. - if chunk.first() == Some(&0x01) { - out.extend_from_slice(&chunk[1..]); - } - i += len; - } - out - } -``` -> If Task 0 chose Option A (delegated framing), `extract_pack` may need adjusting to the exact bands git emits; use the trace from Task 0 Step 3 to confirm. - -- [ ] **Step 2: Run the test to verify it fails (then passes once framing is right)** - -Run: `cargo test -p gitlawb-node smart_http::tests::client_clone_lacks_withheld_blob_bytes` -Expected: initially may FAIL if framing constants are off; iterate `extract_pack` / framing against Task 0 findings until PASS. Success criterion: the withheld OID is absent from the served pack. - -- [ ] **Step 3: Commit** - -```bash -git add crates/gitlawb-node/src/git/smart_http.rs -git commit -m "test(node): end-to-end assert served pack omits withheld blob" -``` - ---- - -## Task 5: Confirm `info/refs` does not leak and stays consistent - -**Files:** -- Modify: `crates/gitlawb-node/src/api/repos.rs` (no logic change to `git_info_refs`; add a confirming comment only if needed) - -The ref advertisement lists commit tips, not blob content, so a mode-B subtree does not require hiding any ref: a non-reader still clones the same commits, just without the private blobs. This task records that decision so a future reader does not "fix" it by gating `info/refs` on subtree rules. - -- [ ] **Step 1: Add a clarifying comment** - -In `git_info_refs`, next to the existing whole-repo gate (the `if service == "git-upload-pack"` block around line 330), append one line after the existing comment: -```rust - // Subtree (mode B) rules do not gate the advertisement: refs expose commit - // tips only, and blob withholding happens in the upload-pack pack build. -``` - -- [ ] **Step 2: Verify nothing else changed** - -Run: `git diff crates/gitlawb-node/src/api/repos.rs` -Expected: only the one comment line added in `git_info_refs`; the whole-repo 404 gate is untouched. - -- [ ] **Step 3: Commit** - -```bash -git add crates/gitlawb-node/src/api/repos.rs -git commit -m "docs(node): note why info/refs is not gated on subtree visibility" -``` - ---- - -## Task 6: Full verification gate - -**Files:** none (verification only) - -- [ ] **Step 1: Format** - -Run: `cargo fmt --all && cargo fmt --all --check` -Expected: clean (no diff). - -- [ ] **Step 2: Lint** - -Run: `cargo clippy --all-targets -- -D warnings` -Expected: no warnings. - -- [ ] **Step 3: Full test suite** - -Run: `cargo test -p gitlawb-node` -Expected: all pass, including Phase 1 visibility tests and the new `visibility_pack` and `smart_http` tests. - -- [ ] **Step 4: Manual smoke (optional but recommended)** - -Set a subtree rule on a local repo via `gl visibility`, clone as a non-reader through the node, and confirm the private file's bytes are absent (`git cat-file -p HEAD:secret/b.txt` fails or the file is missing) while the tree entry / SHA is still listed (`git ls-tree HEAD secret/`). - ---- - -## Out of scope / follow-ups (separate plans) - -1. **`git-remote-gitlawb` partial-clone UX.** Make a non-reader's clone Just Work without the user passing `--filter`: the helper requests partial-clone semantics, advertises the `filter` capability cleanly (so there is no "filtering not recognized by server, ignoring" warning), and treats withheld blobs as deliberately absent. Without this, a stock full `git clone` of a repo with a withheld blob is refused at fetch time ("remote did not send all necessary objects"); only `git clone --filter=...` succeeds. The security guarantee (bytes never sent) holds regardless; this is purely UX. -2. **Filtered-pack caching.** `build_filtered_pack` recomputes per request. If hot, cache by (repo, tip-OIDs, withheld-set) and invalidate on push. -3. **Incremental fetch efficiency.** Verified during execution: an incremental `git fetch` after a partial clone is already correct and still withholds the private blob (covered by `real_git_fetch_after_partial_clone_still_withholds`). The serve ignores the client's `have`/`want` negotiation and always sends a self-contained pack of all refs minus the withheld blobs, replying `NAK`; the client de-duplicates, so nothing breaks. The only cost is that a fetch re-sends the full object set instead of a thin delta. Honoring negotiation to produce smaller fetch packs is the optimization left here. -4. **Replication-path enforcement (Phase 2).** Still blocked on the maintainer A/B decision; unrelated to this HTTP-path work. From 0c8a1b7ffd7b1a6eb932ef267e162381c34d84f8 Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Sat, 6 Jun 2026 22:08:35 -0500 Subject: [PATCH 12/19] chore: gitignore local planning docs (docs/superpowers/) --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 404c87b..a36d8f7 100644 --- a/.gitignore +++ b/.gitignore @@ -23,3 +23,6 @@ keys/ # Logs *.log .openclaude-profile.json + +# Local planning / scratch docs (never commit) +docs/superpowers/ From 26e65f53451200f92386047a1f605d748f45ce8e Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Mon, 8 Jun 2026 09:54:57 -0500 Subject: [PATCH 13/19] feat(node): replicable_objects filter for replication enforcement --- .../gitlawb-node/src/git/visibility_pack.rs | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/crates/gitlawb-node/src/git/visibility_pack.rs b/crates/gitlawb-node/src/git/visibility_pack.rs index d386415..693a7e7 100644 --- a/crates/gitlawb-node/src/git/visibility_pack.rs +++ b/crates/gitlawb-node/src/git/visibility_pack.rs @@ -77,6 +77,16 @@ pub fn withheld_blob_oids( Ok(denied.difference(&allowed).cloned().collect()) } +/// Objects that may replicate to the public: everything not in `withheld`. +/// Order-preserving. The single seam every replication site (IPFS, Pinata) +/// passes its object list through; option B would later reroute the withheld +/// ones through encrypt-then-pin instead of dropping them. +pub fn replicable_objects(all: Vec, withheld: &HashSet) -> Vec { + all.into_iter() + .filter(|oid| !withheld.contains(oid)) + .collect() +} + #[cfg(test)] mod tests { use super::*; @@ -186,4 +196,20 @@ mod tests { "public repo, no rules, nothing withheld" ); } + + #[test] + fn replicable_objects_drops_withheld_keeps_rest() { + let all = vec!["aaa".to_string(), "bbb".to_string(), "ccc".to_string()]; + let withheld: HashSet = ["bbb".to_string()].into_iter().collect(); + let got = replicable_objects(all, &withheld); + assert_eq!(got, vec!["aaa".to_string(), "ccc".to_string()]); + } + + #[test] + fn replicable_objects_empty_withheld_keeps_all() { + let all = vec!["aaa".to_string(), "bbb".to_string()]; + let withheld: HashSet = HashSet::new(); + let got = replicable_objects(all.clone(), &withheld); + assert_eq!(got, all); + } } From eb7c7641bebf4624f6ad18892dd850a2f896fd47 Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Mon, 8 Jun 2026 09:57:43 -0500 Subject: [PATCH 14/19] test(node): pin anonymous-caller contract of withheld_blob_oids --- crates/gitlawb-node/src/git/visibility_pack.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/crates/gitlawb-node/src/git/visibility_pack.rs b/crates/gitlawb-node/src/git/visibility_pack.rs index 693a7e7..c9c6d6b 100644 --- a/crates/gitlawb-node/src/git/visibility_pack.rs +++ b/crates/gitlawb-node/src/git/visibility_pack.rs @@ -156,6 +156,24 @@ mod tests { (td, bare, secret, public) } + #[test] + fn anonymous_caller_withholds_only_private_blob() { + let (_td, bare, secret_oid, public_oid) = fixture(); + let rules = [rule("/secret/**", &[])]; + // caller = None models the public / any peer: what must not replicate. + let withheld = withheld_blob_oids(&bare, &rules, true, OWNER, None).unwrap(); + assert!( + withheld.contains(&secret_oid), + "secret blob must be withheld" + ); + assert!( + !withheld.contains(&public_oid), + "public blob must replicate" + ); + // Trees and commits are never withheld; the set holds only the secret blob. + assert_eq!(withheld.len(), 1, "only the secret blob OID is withheld"); + } + #[test] fn non_reader_withholds_only_the_private_blob() { let (_td, bare, secret, public) = fixture(); From c2c287ebf69ee233558adb89dd48058bfc83c905 Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Mon, 8 Jun 2026 10:00:12 -0500 Subject: [PATCH 15/19] feat(node): IPFS pinning skips withheld blob OIDs --- crates/gitlawb-node/src/api/repos.rs | 9 +++++++-- crates/gitlawb-node/src/ipfs_pin.rs | 5 +++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/crates/gitlawb-node/src/api/repos.rs b/crates/gitlawb-node/src/api/repos.rs index 4522e8d..1ddf44b 100644 --- a/crates/gitlawb-node/src/api/repos.rs +++ b/crates/gitlawb-node/src/api/repos.rs @@ -614,8 +614,13 @@ pub async fn git_receive_pack( let repo_path_clone = disk_path.clone(); let db_clone = state.db.clone(); tokio::spawn(async move { - let pinned = - crate::ipfs_pin::pin_new_objects(&ipfs_api, &repo_path_clone, &db_clone).await; + let pinned = crate::ipfs_pin::pin_new_objects( + &ipfs_api, + &repo_path_clone, + &db_clone, + &std::collections::HashSet::new(), + ) + .await; if !pinned.is_empty() { tracing::info!(count = pinned.len(), "pinned git objects to IPFS"); for (sha, cid) in &pinned { diff --git a/crates/gitlawb-node/src/ipfs_pin.rs b/crates/gitlawb-node/src/ipfs_pin.rs index 831f1ad..96d6abd 100644 --- a/crates/gitlawb-node/src/ipfs_pin.rs +++ b/crates/gitlawb-node/src/ipfs_pin.rs @@ -7,6 +7,8 @@ //! If `ipfs_api` is empty the functions are no-ops, so the node works fine //! without a local IPFS daemon. +use std::collections::HashSet; + use anyhow::Result; use gitlawb_core::cid::Cid; @@ -78,6 +80,7 @@ pub async fn pin_new_objects( ipfs_api: &str, repo_path: &std::path::Path, db: &crate::db::Db, + withheld: &HashSet, ) -> Vec<(String, String)> { if ipfs_api.is_empty() { return vec![]; @@ -92,6 +95,8 @@ pub async fn pin_new_objects( } }; + let object_list = crate::git::visibility_pack::replicable_objects(object_list, withheld); + let mut pinned = Vec::new(); for sha in object_list { From d305af738c4b4e2fda262bd57e4959755fcc543c Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Mon, 8 Jun 2026 10:03:05 -0500 Subject: [PATCH 16/19] feat(node): Pinata pinning skips withheld blob OIDs --- crates/gitlawb-node/src/api/repos.rs | 1 + crates/gitlawb-node/src/pinata.rs | 3 +++ 2 files changed, 4 insertions(+) diff --git a/crates/gitlawb-node/src/api/repos.rs b/crates/gitlawb-node/src/api/repos.rs index 1ddf44b..1efde93 100644 --- a/crates/gitlawb-node/src/api/repos.rs +++ b/crates/gitlawb-node/src/api/repos.rs @@ -666,6 +666,7 @@ pub async fn git_receive_pack( &pinata_jwt, &repo_path_clone, &db_clone, + &std::collections::HashSet::new(), ) .await; diff --git a/crates/gitlawb-node/src/pinata.rs b/crates/gitlawb-node/src/pinata.rs index ee9d416..90bddad 100644 --- a/crates/gitlawb-node/src/pinata.rs +++ b/crates/gitlawb-node/src/pinata.rs @@ -7,6 +7,7 @@ //! no-op, so nodes without Pinata backing work fine. use anyhow::Result; +use std::collections::HashSet; /// Pin a single git object's raw bytes on Pinata (v3 API). /// @@ -76,6 +77,7 @@ pub async fn pin_new_objects( jwt: &str, repo_path: &std::path::Path, db: &crate::db::Db, + withheld: &HashSet, ) -> Vec<(String, String)> { if jwt.is_empty() { return vec![]; @@ -92,6 +94,7 @@ pub async fn pin_new_objects( return vec![]; } }; + let object_list = crate::git::visibility_pack::replicable_objects(object_list, withheld); let mut pinned = Vec::new(); From e670ca3557b39ca785287c411d557cde06597ec6 Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Mon, 8 Jun 2026 10:12:52 -0500 Subject: [PATCH 17/19] feat(node): enforce visibility on push replication (IPFS/Pinata/gossip/Arweave) --- crates/gitlawb-node/src/api/repos.rs | 202 +++++++++++++++++---------- 1 file changed, 125 insertions(+), 77 deletions(-) diff --git a/crates/gitlawb-node/src/api/repos.rs b/crates/gitlawb-node/src/api/repos.rs index 1efde93..767d76a 100644 --- a/crates/gitlawb-node/src/api/repos.rs +++ b/crates/gitlawb-node/src/api/repos.rs @@ -608,8 +608,44 @@ pub async fn git_receive_pack( } } - // Pin new git objects to the local IPFS node (no-op if ipfs_api is empty) - { + // Replication enforcement (Phase 2): decide once per push whether the public + // may read this repo at all and, if so, which blob OIDs must not leave the + // node. `withheld == None` means replicate nothing (private / mode A / + // undetermined): skip every pin so even commit and tree objects (which + // withheld_blob_oids never lists) stay local. `announce` gates the + // network-facing announcements. Fail closed: a private or undetermined repo + // never leaks. + let rules_opt = state.db.list_visibility_rules(&record.id).await.ok(); + let announce = match &rules_opt { + Some(rules) => { + visibility_check(rules, record.is_public, &record.owner_did, None, "/") + == Decision::Allow + } + None => false, + }; + let withheld: Option> = if !announce { + None + } else { + match &rules_opt { + Some(rules) if rules.is_empty() => Some(std::collections::HashSet::new()), + Some(rules) => crate::git::visibility_pack::withheld_blob_oids( + &disk_path, + rules, + record.is_public, + &record.owner_did, + None, + ) + .map_err(|e| { + tracing::warn!(err = %e, "withheld_blob_oids failed; skipping replication for this push") + }) + .ok(), + None => None, + } + }; + + // Pin new git objects to the local IPFS node (no-op if ipfs_api is empty). + // Skipped entirely when the public cannot read the repo (withheld == None). + if let Some(withheld_ipfs) = withheld.clone() { let ipfs_api = state.config.ipfs_api.clone(); let repo_path_clone = disk_path.clone(); let db_clone = state.db.clone(); @@ -618,7 +654,7 @@ pub async fn git_receive_pack( &ipfs_api, &repo_path_clone, &db_clone, - &std::collections::HashSet::new(), + &withheld_ipfs, ) .await; if !pinned.is_empty() { @@ -659,16 +695,22 @@ pub async fn git_receive_pack( let owner_did_for_arweave = record.owner_did.clone(); let self_public_url = state.config.public_url.clone(); let node_keypair = Arc::clone(&state.node_keypair); + let withheld_pinata = withheld; tokio::spawn(async move { - let pinned = crate::pinata::pin_new_objects( - &http_client, - &pinata_upload_url, - &pinata_jwt, - &repo_path_clone, - &db_clone, - &std::collections::HashSet::new(), - ) - .await; + let pinned = match &withheld_pinata { + Some(withheld) => { + crate::pinata::pin_new_objects( + &http_client, + &pinata_upload_url, + &pinata_jwt, + &repo_path_clone, + &db_clone, + withheld, + ) + .await + } + None => Vec::new(), + }; if !pinned.is_empty() { tracing::info!(count = pinned.len(), "pinned git objects to Pinata"); @@ -687,77 +729,82 @@ pub async fn git_receive_pack( .await; } - if let Some(p2p) = &p2p_handle { - p2p.publish_ref_update(crate::p2p::RefUpdateEvent { - node_did: node_did_str.clone(), - pusher_did: pusher_did_clone.clone(), - repo: repo_slug.clone(), - ref_name: ref_name.clone(), - old_sha: "".to_string(), - new_sha: new_sha.clone(), - timestamp: chrono::Utc::now().to_rfc3339(), - cert_id: None, - cid: cid.map(|s| s.to_string()), - }) - .await; + if announce { + if let Some(p2p) = &p2p_handle { + p2p.publish_ref_update(crate::p2p::RefUpdateEvent { + node_did: node_did_str.clone(), + pusher_did: pusher_did_clone.clone(), + repo: repo_slug.clone(), + ref_name: ref_name.clone(), + old_sha: "".to_string(), + new_sha: new_sha.clone(), + timestamp: chrono::Utc::now().to_rfc3339(), + cert_id: None, + cid: cid.map(|s| s.to_string()), + }) + .await; + } } } // HTTP peer notification — notify all known peers to pull from us. // This is the reliable fallback when Gossipsub p2p is not yet connected. - if let Ok(peers) = db_for_peers.list_peers().await { - for peer in peers { - if peer.http_url.is_empty() { - continue; - } - let peer_url = peer.http_url.trim_end_matches('/'); - if let Some(self_url) = self_public_url.as_deref() { - if peer_url == self_url.trim_end_matches('/') { + // Suppressed for repos the public cannot read. + if announce { + if let Ok(peers) = db_for_peers.list_peers().await { + for peer in peers { + if peer.http_url.is_empty() { continue; } - } - let path = "/api/v1/sync/notify"; - let notify_url = format!("{peer_url}{path}"); - let body = serde_json::json!({ - "repo": repo_slug.clone(), - "ref_name": ref_updates_clone.first().map(|(r, _)| r).unwrap_or(&String::new()), - "new_sha": ref_updates_clone.first().map(|(_, s)| s).unwrap_or(&String::new()), - "node_did": node_did_str.clone(), - "pusher_did": pusher_did_clone.clone(), - "old_sha": "0000000000000000000000000000000000000000", - "timestamp": chrono::Utc::now().to_rfc3339(), - }); - let body_bytes = match serde_json::to_vec(&body) { - Ok(bytes) => bytes, - Err(e) => { - tracing::warn!(peer = %peer.did, err = %e, "failed to serialize peer sync notify"); - continue; - } - }; - let signed = gitlawb_core::http_sig::sign_request( - node_keypair.as_ref(), - "POST", - path, - &body_bytes, - ); - match http_client - .post(¬ify_url) - .header("Content-Type", "application/json") - .header("Content-Digest", signed.content_digest) - .header("Signature-Input", signed.signature_input) - .header("Signature", signed.signature) - .body(body_bytes) - .send() - .await - { - Ok(r) if r.status().is_success() => { - tracing::info!(peer = %peer.did, repo = %repo_slug, "notified peer to sync") - } - Ok(r) => { - tracing::warn!(peer = %peer.did, status = %r.status(), "peer sync notify returned error") + let peer_url = peer.http_url.trim_end_matches('/'); + if let Some(self_url) = self_public_url.as_deref() { + if peer_url == self_url.trim_end_matches('/') { + continue; + } } - Err(e) => { - tracing::warn!(peer = %peer.did, err = %e, "failed to notify peer") + let path = "/api/v1/sync/notify"; + let notify_url = format!("{peer_url}{path}"); + let body = serde_json::json!({ + "repo": repo_slug.clone(), + "ref_name": ref_updates_clone.first().map(|(r, _)| r).unwrap_or(&String::new()), + "new_sha": ref_updates_clone.first().map(|(_, s)| s).unwrap_or(&String::new()), + "node_did": node_did_str.clone(), + "pusher_did": pusher_did_clone.clone(), + "old_sha": "0000000000000000000000000000000000000000", + "timestamp": chrono::Utc::now().to_rfc3339(), + }); + let body_bytes = match serde_json::to_vec(&body) { + Ok(bytes) => bytes, + Err(e) => { + tracing::warn!(peer = %peer.did, err = %e, "failed to serialize peer sync notify"); + continue; + } + }; + let signed = gitlawb_core::http_sig::sign_request( + node_keypair.as_ref(), + "POST", + path, + &body_bytes, + ); + match http_client + .post(¬ify_url) + .header("Content-Type", "application/json") + .header("Content-Digest", signed.content_digest) + .header("Signature-Input", signed.signature_input) + .header("Signature", signed.signature) + .body(body_bytes) + .send() + .await + { + Ok(r) if r.status().is_success() => { + tracing::info!(peer = %peer.did, repo = %repo_slug, "notified peer to sync") + } + Ok(r) => { + tracing::warn!(peer = %peer.did, status = %r.status(), "peer sync notify returned error") + } + Err(e) => { + tracing::warn!(peer = %peer.did, err = %e, "failed to notify peer") + } } } } @@ -781,8 +828,9 @@ pub async fn git_receive_pack( timestamp: now_ts.clone(), }); - // Arweave permanent anchoring — fire for each ref update - if !irys_url.is_empty() { + // Arweave permanent anchoring — fire for each ref update. + // Suppressed for repos the public cannot read (public permanent ledger). + if announce && !irys_url.is_empty() { for (ref_name, new_sha) in &ref_updates_clone { let cid = cid_map.get(new_sha).cloned(); let anchor = crate::arweave::RefAnchor { From 949d131c5dfa7bec40687e31363f7b94bb383dc7 Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Mon, 8 Jun 2026 10:29:54 -0500 Subject: [PATCH 18/19] test(node): announce gate matches anonymous repo readability --- crates/gitlawb-node/src/visibility.rs | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/crates/gitlawb-node/src/visibility.rs b/crates/gitlawb-node/src/visibility.rs index b246dbf..1107de7 100644 --- a/crates/gitlawb-node/src/visibility.rs +++ b/crates/gitlawb-node/src/visibility.rs @@ -242,4 +242,24 @@ mod tests { Decision::Allow ); } + + // Mirrors the gossip-announce gate in git_receive_pack: announce iff an + // anonymous caller can read "/". + #[test] + fn announce_gate_matches_public_readability() { + let announce = |rules: &[VisibilityRule], is_public: bool| { + visibility_check(rules, is_public, OWNER, None, "/") == Decision::Allow + }; + // Public repo, no rules → announce. + assert!(announce(&[], true)); + // Legacy private repo (is_public false, no rules) → silent. + assert!(!announce(&[], false)); + // Mode A whole-repo rule with no public readers → silent. + assert!(!announce(&[rule("/", VisibilityMode::A, &[])], true)); + // Mode B public repo with a private subtree → still announce. + assert!(announce( + &[rule("/secret/**", VisibilityMode::B, &[])], + true + )); + } } From 083293d507d56bbd58b041a20d0037df7ccfbc2f Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Mon, 8 Jun 2026 17:51:01 -0500 Subject: [PATCH 19/19] fix(node): run withheld_blob_oids off the async worker on push The receive-pack replication chokepoint called withheld_blob_oids directly on the tokio worker, where its blocking git ls-tree walk can stall the runtime for repos with many refs. Wrap it in spawn_blocking to match the upload-pack serve path. --- crates/gitlawb-node/src/api/repos.rs | 35 +++++++++++++++++++--------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/crates/gitlawb-node/src/api/repos.rs b/crates/gitlawb-node/src/api/repos.rs index 767d76a..884d7ca 100644 --- a/crates/gitlawb-node/src/api/repos.rs +++ b/crates/gitlawb-node/src/api/repos.rs @@ -628,17 +628,30 @@ pub async fn git_receive_pack( } else { match &rules_opt { Some(rules) if rules.is_empty() => Some(std::collections::HashSet::new()), - Some(rules) => crate::git::visibility_pack::withheld_blob_oids( - &disk_path, - rules, - record.is_public, - &record.owner_did, - None, - ) - .map_err(|e| { - tracing::warn!(err = %e, "withheld_blob_oids failed; skipping replication for this push") - }) - .ok(), + // withheld_blob_oids walks every ref with blocking `git ls-tree`; + // keep that off the async worker thread. + Some(rules) => { + let path = disk_path.clone(); + let rules = rules.clone(); + let owner_did = record.owner_did.clone(); + let is_public = record.is_public; + tokio::task::spawn_blocking(move || { + crate::git::visibility_pack::withheld_blob_oids( + &path, &rules, is_public, &owner_did, None, + ) + }) + .await + .map_err(|e| { + tracing::warn!(err = %e, "withheld_blob_oids task panicked; skipping replication for this push") + }) + .ok() + .and_then(|r| { + r.map_err(|e| { + tracing::warn!(err = %e, "withheld_blob_oids failed; skipping replication for this push") + }) + .ok() + }) + } None => None, } };