From a8649ff4ec0e08f7bb0a377d4ee189434b5ed139 Mon Sep 17 00:00:00 2001
From: AD2000X <thecausticfinale@gmail.com>
Date: Wed, 3 Jun 2026 17:17:55 +0100
Subject: [PATCH] chore: Phase 4 tidy pass (docstrings, DEVLOG/PLAN/README
 wording)

Wording and docstring cleanups across the Phase 4 files. No behavior change: pytest 246 green, reports/phase4_metrics.md unchanged on rebuild (no-drift), demo still imports without gradio.
---
 DEVLOG.md                       | 19 ++++++++++++++++++-
 PLAN.md                         |  9 +++++----
 README.md                       |  2 +-
 docs/phase4_brief.md            | 20 ++++++++++----------
 notebooks/06_demo.ipynb         |  2 +-
 notebooks/07_final_report.ipynb |  6 +++---
 reports/final_report.md         |  6 +++---
 scripts/build_phase4_summary.py |  4 ++--
 scripts/run_demo.py             |  8 ++++----
 src/phase4_summary.py           |  2 +-
 tests/test_phase4_summary.py    |  2 +-
 11 files changed, 49 insertions(+), 31 deletions(-)

diff --git a/DEVLOG.md b/DEVLOG.md
index b214114..275110a 100644
--- a/DEVLOG.md
+++ b/DEVLOG.md
@@ -181,6 +181,22 @@ Decisions outgrow this file, split them into `DECISIONS.md` (or `docs/adr/`).
 
 ---
 
+## 2026-06-03 - Phase 4 final demo readiness pass
+
+### Result - branch/docs/notebooks now point at the integrated final-demo branch
+
+- **What changed:** aligned Phase 4 references from the earlier split branches to the integrated
+  `feature/phase4-demo` branch, where PR-A/PR-B/PR-C are now present together.
+- **Demo fix:** `scripts/run_demo.py` launches Gradio with `allowed_paths` for
+  `outputs/layout/crops`, so Colab Drive-resident layout crop PNGs can be displayed in the
+  gallery without Gradio's `InvalidPathError`.
+- **Naming cleanup:** replaced inflated wrap-up wording with "final demo", "final integration", or
+  "Phase 4 summary" to keep the project description practical.
+- **Scope hygiene:** raw data and generated machine artifacts remain gitignored under `data/` and
+  `outputs/`; committed report artifacts stay under `reports/`.
+
+---
+
 ## 2026-06-03 - Phase 4 eval-summary backbone (PR-A)
 
 ### Result - one summary aggregated from the per-phase artifacts; report numbers never hand-copied
@@ -203,7 +219,8 @@ Decisions outgrow this file, split them into `DECISIONS.md` (or `docs/adr/`).
   relevant chunk per question, `src/eval_retrieval.py`); a missing artifact degrades to
   `{"available": false}` rather than failing.
 - **Result:** full `pytest` green (246, +10). Headline echoes: FUNSD `test_50.qa_links` F1 0.727;
-  QA `gt_markdown` answer_exact 0.675. PR-B (report) and PR-C (Gradio demo) follow.
+  QA `gt_markdown` answer_exact 0.675. PR-B (report) and PR-C (Gradio demo) later landed on the
+  integrated Phase 4 demo branch.
 
 ---
 
diff --git a/PLAN.md b/PLAN.md
index 3a539c0..f228aa1 100644
--- a/PLAN.md
+++ b/PLAN.md
@@ -545,15 +545,16 @@ Implementation details:
 **Phases 0 through 3 are complete and merged** (v1 = table-only RAG; Phase 2 = DocLayNet
 layout-crop integration; Phase 3 = FUNSD relation baseline, both merged to `main`
 2026-06-03). **Phase 4 (full demo + evaluation + report) is in progress** on
-`feature/phase4-demo-eval-report`; PR-A (the eval-summary backbone) has landed.
+`feature/phase4-demo`; PR-A/PR-B/PR-C are implemented on the branch and ready for final review.
 
-Phase 4 PR-A delivered (capstone summary backbone; see `docs/phase4_brief.md`):
+Phase 4 PR-A delivered (Phase 4 summary backbone; see `docs/phase4_brief.md`):
 `src/phase4_summary.py` (pure per-phase summarizers + inline layout-CSV aggregation + markdown
 render), `scripts/build_phase4_summary.py` (writes `outputs/evaluation/phase4_summary.json` and
 the committed `reports/phase4_metrics.md`), `tests/test_phase4_summary.py` (10 synthetic tests).
 Report numbers are generated from the summary (never hand-copied), guarded by a no-drift gate.
-Next: PR-B (`reports/final_report.md` + `notebooks/07_final_report.ipynb`) and PR-C
-(`scripts/run_demo.py` + `notebooks/06_demo.ipynb`, key-optional Gradio demo).
+PR-B (`reports/final_report.md` + `notebooks/07_final_report.ipynb`) and PR-C
+(`scripts/run_demo.py` + `notebooks/06_demo.ipynb`, key-optional Gradio demo) are now present on
+the integrated Phase 4 demo branch.
 
 Phase 3 V1 delivered (annotation-only deterministic relation baseline; see
 `docs/phase3_brief.md`): `src/funsd_extraction.py` (parse + dedupe + per-answer-argmax
diff --git a/README.md b/README.md
index a6a84fb..79b5f98 100644
--- a/README.md
+++ b/README.md
@@ -58,6 +58,6 @@ pipeline); and Phase 3 FUNSD relation-linking baseline (annotation-only determin
 predictor, held-out `test_50.qa_links` F1 0.727).
 
 Current phase: Phase 4 (full demo + evaluation + report) is in progress on
-`feature/phase4-demo-eval-report` — a capstone that aggregates the per-phase metrics into
+`feature/phase4-demo` — a final integration phase that aggregates the per-phase metrics into
 one summary, a key-optional Gradio demo, and a written report. See [PLAN.md](PLAN.md) for
 the phase roadmap.
diff --git a/docs/phase4_brief.md b/docs/phase4_brief.md
index cc598ad..3a659d0 100644
--- a/docs/phase4_brief.md
+++ b/docs/phase4_brief.md
@@ -1,17 +1,16 @@
-# Phase 4 — Demo + Eval Summary + Final Report (capstone)
+# Phase 4 — Final Demo + Eval Summary + Final Report
 
 > Implementation brief for Phase 4. Committed in the repo (travels with `git pull` to Colab) so
 > the references to it in `DEVLOG.md` and the `src/phase4_summary.py` /
-> `scripts/build_phase4_summary.py` docstrings resolve. Status: PR-A (the eval-summary backbone)
-> implemented on `feature/phase4-demo-eval-report` — `src/phase4_summary.py`,
-> `scripts/build_phase4_summary.py`, `tests/test_phase4_summary.py`, and the generated
-> `reports/phase4_metrics.md`. PR-B (report) and PR-C (demo) follow.
+> `scripts/build_phase4_summary.py` docstrings resolve. Status: PR-A/PR-B/PR-C are implemented
+> on `feature/phase4-demo` — summary backbone, generated metrics, final report, report notebook,
+> key-optional Gradio demo, and demo notebook.
 
 ## Context
 
 Phases 0-3 are merged to `main` (FinTabNet.c table topology + OCR content + table-only RAG +
-DocLayNet layout + FUNSD relations). Phase 4 is the **capstone**: make the work presentable,
-reportable, and reproducible. It is explicitly **not new research** — it assembles the existing
+DocLayNet layout + FUNSD relations). Phase 4 is the **final integration**: make the work
+presentable, reportable, and reproducible. It is explicitly **not new research** — it assembles the existing
 deterministic/custom metrics into one summary, a Gradio demo, and a written report.
 GriTS/Ragas/DeepEval are future work.
 
@@ -97,8 +96,9 @@ cross-encoder reranker / learned query routing; live PDF -> pipeline; HF Spaces
 ## Build order (TDD) + PR boundaries
 - **PR-A (core, done):** tests -> `src/phase4_summary.py` -> `scripts/build_phase4_summary.py` ->
   generated `reports/phase4_metrics.md`; + README/DEVLOG/PLAN docs.
-- **PR-B (report):** `reports/final_report.md` + `notebooks/07_final_report.ipynb`.
-- **PR-C (demo):** `scripts/run_demo.py` + `notebooks/06_demo.ipynb`.
+- **PR-B (report, done):** `reports/final_report.md` + `notebooks/07_final_report.ipynb`.
+- **PR-C (demo, done):** `scripts/run_demo.py` + `notebooks/06_demo.ipynb`.
 
 ## Branch
-`feature/phase4-demo-eval-report` cut from the latest `origin/main` after `git fetch`.
+`feature/phase4-demo` integrates PR-A/PR-B/PR-C and was cut from the latest `origin/main` after
+`git fetch`.
diff --git a/notebooks/06_demo.ipynb b/notebooks/06_demo.ipynb
index 1cd4d2f..2d448f7 100644
--- a/notebooks/06_demo.ipynb
+++ b/notebooks/06_demo.ipynb
@@ -45,7 +45,7 @@
     "import os\n",
     "\n",
     "REPO = '/content/FinDocStructRAG'\n",
-    "BRANCH = 'feature/phase4-demo'  # PR-C; flip to 'main' after merge\n",
+    "BRANCH = 'feature/phase4-demo'  # integrated Phase 4 branch; flip to 'main' after merge\n",
     "\n",
     "if not os.path.isdir(f'{REPO}/.git'):\n",
     "    !git clone --quiet https://github.com/AD2000X/FinDocStructRAG.git {REPO}\n",
diff --git a/notebooks/07_final_report.ipynb b/notebooks/07_final_report.ipynb
index d78fe45..43a7a8c 100644
--- a/notebooks/07_final_report.ipynb
+++ b/notebooks/07_final_report.ipynb
@@ -6,7 +6,7 @@
    "source": [
     "# Phase 4 - Final report (Colab runner)\n",
     "\n",
-    "Runner only: mount Drive, pull the Phase 4 branch, regenerate the capstone summary from the staged\n",
+    "Runner only: mount Drive, pull the Phase 4 branch, regenerate the Phase 4 summary from the staged\n",
     "evaluation artifacts, then render the final report and the generated metrics table inline. Logic\n",
     "lives in `src/` and `scripts/`, not in this notebook (P1/P2).\n",
     "\n",
@@ -43,7 +43,7 @@
     "import os\n",
     "\n",
     "REPO = '/content/FinDocStructRAG'\n",
-    "BRANCH = 'feature/phase4-report'  # PR-B; flip to 'main' after merge\n",
+    "BRANCH = 'feature/phase4-demo'  # integrated Phase 4 branch; flip to 'main' after merge\n",
     "\n",
     "if not os.path.isdir(f'{REPO}/.git'):\n",
     "    !git clone --quiet https://github.com/AD2000X/FinDocStructRAG.git {REPO}\n",
@@ -79,7 +79,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Step 1 - build the capstone summary\n",
+    "## Step 1 - build the Phase 4 summary\n",
     "\n",
     "Aggregates the per-phase artifacts on Drive into `outputs/evaluation/phase4_summary.json` and the\n",
     "committed `reports/phase4_metrics.md`. Re-running is idempotent (no-drift)."
diff --git a/reports/final_report.md b/reports/final_report.md
index 03a9e3f..c97c442 100644
--- a/reports/final_report.md
+++ b/reports/final_report.md
@@ -2,7 +2,7 @@
 
 A layout-aware pipeline for extracting structured tables from financial-report PDFs and
 answering questions over them, plus a standalone form relation-linking baseline. This report
-is the Phase 4 capstone: it states what was built, how it was evaluated, and what the results
+is the Phase 4 final report: it states what was built, how it was evaluated, and what the results
 mean. **All metric numbers are generated** by `scripts/build_phase4_summary.py` into
 `reports/phase4_metrics.md` and are never hand-copied into this prose;
 `notebooks/07_final_report.ipynb` renders that generated table inline beneath this report.
@@ -100,7 +100,7 @@ per-phase notebooks (`notebooks/01`-`05`) are the runners for steps 1-6.
 5. **Phase 2 layout.** `run_layout_batch.py` -> `eval_layout_iou.py --require-table-gt` (pos) and
    `--exclude-table-gt` (neg) -> `smoke_structure.py`.
 6. **Phase 3 relations.** `evaluate_funsd.py`.
-7. **Capstone summary.** `python scripts/build_phase4_summary.py` ->
+7. **Phase 4 summary.** `python scripts/build_phase4_summary.py` ->
    `reports/phase4_metrics.md` + `outputs/evaluation/phase4_summary.json` (this report reads the
    former).
-8. **Demo.** `python scripts/run_demo.py` (key-optional Gradio; PR-C).
+8. **Demo.** `python scripts/run_demo.py` (key-optional Gradio final demo).
diff --git a/scripts/build_phase4_summary.py b/scripts/build_phase4_summary.py
index 32542ab..5afd3be 100644
--- a/scripts/build_phase4_summary.py
+++ b/scripts/build_phase4_summary.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python3
-"""Build the Phase 4 capstone summary from the per-phase evaluation artifacts.
+"""Build the Phase 4 summary from the per-phase evaluation artifacts.
 
 Reads the five metrics JSONs + the three Phase 2 layout CSVs from outputs/, aggregates them with
 the pure helpers in src/phase4_summary.py, and writes:
@@ -50,7 +50,7 @@ def _layout_part(layout_dir: Path):
 
 
 def main() -> None:
-    ap = argparse.ArgumentParser(description="Build the Phase 4 capstone summary.")
+    ap = argparse.ArgumentParser(description="Build the Phase 4 summary.")
     ap.add_argument("--run-id", default="mvp_rand",
                     help="run-id suffix of the Phase 1A/1B deliverable artifacts")
     args = ap.parse_args()
diff --git a/scripts/run_demo.py b/scripts/run_demo.py
index a05bce5..000880e 100644
--- a/scripts/run_demo.py
+++ b/scripts/run_demo.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python3
-"""Phase 4 demo: artifact-backed Gradio app for the FinDocStructRAG capstone.
+"""Phase 4 final demo: artifact-backed Gradio app for FinDocStructRAG.
 
 Serves the already-produced evaluation artifacts (metrics, table outputs, layout crops, FUNSD
 results) and does live BM25 retrieval + (optional) grounded answer generation over the existing
@@ -238,7 +238,7 @@ def funsd_view() -> str:
 
 def overview_view() -> str:
     summary = _load_json(config.EVALUATION / "phase4_summary.json")
-    parts = ["## Capstone overview", ""]
+    parts = ["## Project overview", ""]
     if summary:
         parts.append("**Artifact availability:** " + ", ".join(
             f"{name}={'OK' if part.get('available') else 'MISSING'}" for name, part in summary.items()))
@@ -277,8 +277,8 @@ def main() -> None:
     pages = list_layout_pages()
     answer_gen = "enabled" if HAS_KEY else "disabled (no OPENROUTER_API_KEY)"
 
-    with gr.Blocks(title="FinDocStructRAG capstone demo") as demo:
-        gr.Markdown(f"# FinDocStructRAG - capstone demo\n"
+    with gr.Blocks(title="FinDocStructRAG final demo") as demo:
+        gr.Markdown(f"# FinDocStructRAG - final demo\n"
                     f"Artifact-backed. Retrieval: {', '.join(RETRIEVAL_METHODS)}. "
                     f"Answer generation: {answer_gen}.")
 
diff --git a/src/phase4_summary.py b/src/phase4_summary.py
index 5f04afc..d216675 100644
--- a/src/phase4_summary.py
+++ b/src/phase4_summary.py
@@ -1,4 +1,4 @@
-"""Phase 4 capstone: aggregate the per-phase evaluation artifacts into one summary.
+"""Phase 4 summary: aggregate the per-phase evaluation artifacts into one summary.
 
 Pure helpers only - no file IO, no Drive, no gradio. Each summarizer takes an already-loaded
 metrics dict (the per-phase evaluation JSON) or parsed CSV rows (layout) and returns a normalized
diff --git a/tests/test_phase4_summary.py b/tests/test_phase4_summary.py
index f483074..5595332 100644
--- a/tests/test_phase4_summary.py
+++ b/tests/test_phase4_summary.py
@@ -1,4 +1,4 @@
-"""Phase 4 capstone summary tests (CPU, synthetic) - Phase 4.
+"""Phase 4 summary tests (CPU, synthetic) - Phase 4.
 
 The summarizers take already-loaded metrics dicts (the per-phase evaluation JSONs) or parsed
 CSV rows (layout) and return normalized summary dicts; no file IO, no Drive, no gradio is