From 6dd7002b8d996d9cddec40db7d971627662aa325 Mon Sep 17 00:00:00 2001 From: Sephyi Date: Sun, 19 Apr 2026 19:01:09 +0200 Subject: [PATCH] perf(services): bound concurrent git show spawning with semaphore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `fetch_file_contents` previously spawned one `git show` task per staged file with no upper bound on concurrency. Each file spawns two processes (staged blob + HEAD blob), so a 50-file stage fanned out to ~100 subprocesses at once — enough to cause fork/FD pressure on machines with a modest core count. Wrap the `JoinSet` spawns in a `tokio::sync::Semaphore` whose permit count is `available_parallelism() * 2`, clamped to `16..=32`. Permits are acquired inside each spawned task and held for the task's lifetime, so parallelism scales with the host but never exceeds a safe ceiling. The public signature of `fetch_file_contents` is unchanged; callers see identical behavior for small stages and smoother subprocess spawning on large ones. Uses `std::thread::available_parallelism()` to avoid adding a `num_cpus` dependency. Closes audit entry F-020 from #3. --- src/services/git.rs | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/src/services/git.rs b/src/services/git.rs index 5d3ee8a..a21c045 100644 --- a/src/services/git.rs +++ b/src/services/git.rs @@ -222,19 +222,41 @@ impl GitService { // ─── File Content ─── - /// Fetch staged and HEAD content for multiple files concurrently. - /// Spawns all git-show processes in parallel instead of sequentially. + /// Concurrency ceiling for the `git show` subprocesses spawned by + /// [`Self::fetch_file_contents`]. Each staged file spawns two processes + /// (staged + HEAD); capping at `cores * 2` (clamped to 16..=32) keeps + /// parallelism high on beefy machines without causing fork/FD pressure on + /// large stages. + fn git_show_concurrency_limit() -> usize { + let cores = std::thread::available_parallelism() + .map(std::num::NonZeroUsize::get) + .unwrap_or(4); + (cores * 2).clamp(16, 32) + } + + /// Fetch staged and HEAD content for multiple files concurrently, bounded + /// by a [`tokio::sync::Semaphore`] to avoid unbounded `git show` spawning + /// on large stages. pub async fn fetch_file_contents( &self, paths: &[PathBuf], ) -> (HashMap, HashMap) { let mut set = tokio::task::JoinSet::new(); let work_dir: Arc = Arc::new(self.work_dir.clone()); + let semaphore = Arc::new(tokio::sync::Semaphore::new( + Self::git_show_concurrency_limit(), + )); for path in paths { let work_dir = Arc::clone(&work_dir); + let semaphore = Arc::clone(&semaphore); let path = path.clone(); set.spawn(async move { + // Semaphore is never closed, so acquire cannot fail. + let _permit = semaphore + .acquire_owned() + .await + .expect("git-show semaphore closed unexpectedly"); let staged = Self::fetch_git_show(&work_dir, &format!(":0:{}", path.display())).await; let head =