From faeaafcff1bbed9d478f2dcdab035d118d6f85d9 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 11 May 2026 23:42:36 +0000 Subject: [PATCH 1/3] Add CodeGraph shared workflow, reference docs, and code search example workflow Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- .github/workflows/codegraph-code-search.md | 181 ++++++++++ .github/workflows/shared/mcp/codegraph.md | 244 +++++++++++++ DICTATION.md | 1 + docs/src/content/docs/reference/codegraph.md | 354 +++++++++++++++++++ docs/src/content/docs/reference/tools.md | 1 + 5 files changed, 781 insertions(+) create mode 100644 .github/workflows/codegraph-code-search.md create mode 100644 .github/workflows/shared/mcp/codegraph.md create mode 100644 docs/src/content/docs/reference/codegraph.md diff --git a/.github/workflows/codegraph-code-search.md b/.github/workflows/codegraph-code-search.md new file mode 100644 index 00000000000..91f917219c6 --- /dev/null +++ b/.github/workflows/codegraph-code-search.md @@ -0,0 +1,181 @@ +--- +name: CodeGraph Code Search Analysis +description: On-demand deep code search using CodeGraph's semantic knowledge graph — finds code by concept, traces dependencies, and maps architecture from a pull request comment command +on: + slash_command: + name: codegraph + events: [pull_request_comment, issue_comment] + workflow_dispatch: + inputs: + query: + description: "Code search query (e.g. 'how does authentication work?')" + required: true + type: string +permissions: + contents: read + pull-requests: read + issues: read +engine: copilot +strict: true +timeout-minutes: 30 +imports: + - uses: shared/mcp/codegraph.md + with: + index-tier: fast + cache-key: "codegraph-${{ github.repository }}-${{ hashFiles('**/*.go', '**/*.ts', '**/*.rs', '**/*.py') }}" + - shared/observability-otlp.md +tools: + cli-proxy: true + bash: + - "cat *" + - "ls *" + - "echo *" + - "find *" + - "grep *" + - "wc -l *" + github: + mode: gh-proxy + toolsets: [default] +safe-outputs: + add-comment: + hide-older-comments: true + max: 3 + messages: + footer: "> 🔍 *CodeGraph analysis by [{workflow_name}]({run_url})*{effective_tokens_suffix}{history_link}" + run-started: "🔍 [{workflow_name}]({run_url}) is indexing the codebase and searching..." + run-success: "✅ [{workflow_name}]({run_url}) code search complete." + run-failure: "⚠️ [{workflow_name}]({run_url}) {status} during code search." +network: + allowed: + - defaults + - github + - api.anthropic.com + - api.openai.com + - jina.ai +--- + +# CodeGraph Code Search Agent 🔍 + +You are a code search and analysis agent powered by **CodeGraph** — a semantic knowledge +graph that understands code structure, dependencies, and relationships across the entire codebase. + +## Context + +- **Repository**: ${{ github.repository }} +- **Workspace**: ${{ github.workspace }} +- **Query** (if manual dispatch): `${{ github.event.inputs.query }}` +- **Comment** (if slash command): `${{ steps.sanitized.outputs.text }}` + +## Initialization + +Always start by loading CodeGraph's guidance: + +``` +Tool: read_initial_instructions +(from the codegraph MCP server) +``` + +## Determine the Search Task + +Parse the user's query from either: +1. The `${{ github.event.inputs.query }}` input (workflow_dispatch) +2. The comment text after `/codegraph` (slash_command) + +If no specific query is provided, default to: "Give me an overview of this codebase's architecture." + +## Code Search Strategy + +Select the most appropriate CodeGraph tool for the task: + +### For "find code" queries (e.g., "where is X implemented?", "show me how Y works") +``` +Tool: agentic_context +Args: { "query": "", "focus": "search" } +``` + +### For impact/dependency questions (e.g., "what calls X?", "what breaks if I change Y?") +``` +Tool: agentic_impact +Args: { "query": "", "focus": "dependencies" } +``` + +### For execution flow questions (e.g., "trace the path from A to B") +``` +Tool: agentic_impact +Args: { "query": "", "focus": "call_chain" } +``` + +### For architecture questions (e.g., "how is this project structured?", "what's the API surface of X?") +``` +Tool: agentic_architecture +Args: { "query": "" } +``` + +### For quality/complexity questions (e.g., "what's the most complex module?", "where should I refactor first?") +``` +Tool: agentic_quality +Args: { "query": "" } +``` + +### For pre-implementation context (e.g., "I need to add X, what should I know?") +``` +Tool: agentic_context +Args: { "query": "", "focus": "builder" } +``` + +### For cross-cutting questions (e.g., "how is error handling done across the codebase?") +``` +Tool: agentic_context +Args: { "query": "", "focus": "question" } +``` + +## Supplemental Bash Commands + +Use bash to verify or enrich CodeGraph's results when helpful: + +```bash +# Confirm a file/symbol exists +find . -name "*.go" | xargs grep -l "FunctionName" 2>/dev/null | head -5 + +# Count related files +find pkg/ -name "*.go" ! -name "*_test.go" | wc -l + +# Quick pattern check +grep -r "pattern" --include="*.go" -l | head -10 +``` + +## Output Format + +Post a **concise, actionable comment** on the issue or pull request. Structure: + +```markdown +## 🔍 CodeGraph Analysis: + +<1-2 sentence answer to the query> + +### Key Findings + +- ****: +- ****: +- ****: + + + +
+Full Analysis Details + + + +
+``` + +## Important Notes + +- Keep the top-level comment **short** — 3-5 bullet points, expandable details in `
` +- Reference specific files and functions by path (e.g., `pkg/workflow/compiler.go:142`) +- If CodeGraph returns references to internal node IDs, resolve them to human-readable file paths using bash `find` or `cat` before reporting +- Call exactly one safe-output tool: `add_comment` for slash commands, or `noop` if no useful result found + +```json +{"noop": {"message": "No code search query provided or query could not be interpreted."}} +``` diff --git a/.github/workflows/shared/mcp/codegraph.md b/.github/workflows/shared/mcp/codegraph.md new file mode 100644 index 00000000000..ea40c1d2f06 --- /dev/null +++ b/.github/workflows/shared/mcp/codegraph.md @@ -0,0 +1,244 @@ +--- +# CodeGraph MCP Server - Semantic Code Knowledge Graph +# Transforms your codebase into a semantically searchable knowledge graph +# that AI agents can reason about using graph traversal + vector embeddings. +# +# Documentation: https://github.com/Jakedismo/codegraph-rust +# +# Prerequisites (secrets): +# ANTHROPIC_API_KEY (or OPENAI_API_KEY / JINA_API_KEY) for the internal +# reasoning agents that power CodeGraph's agentic tools. +# +# Usage: +# imports: +# - uses: shared/mcp/codegraph.md +# with: +# index-tier: fast # fast | balanced | full (default: fast) +# cache-key: "codegraph-${{ github.repository }}" # optional; enables caching +# +# The shared workflow: +# 1. Installs SurrealDB (graph + vector database) +# 2. Builds/restores the codegraph binary (Rust; cached after first build) +# 3. Applies the CodeGraph schema to SurrealDB +# 4. Restores the code index from cache (if cache-key is set and cache exists) +# 5. Indexes the workspace (skipped on cache hit) +# 6. Saves the index to cache (if cache-key is set and cache was missed) +# 7. Starts the codegraph MCP server in stdio mode + +import-schema: + index-tier: + type: string + enum: [fast, balanced, full] + default: fast + description: > + Indexing tier controlling speed vs. graph richness. + fast: AST nodes + core edges only (no LSP or enrichment). Fastest, lowest storage. + balanced: LSP symbols + docs/enrichment + module linking. Best for agentic workflows. + full: All analyzers + LSP definitions + dataflow + architecture. Maximum accuracy. + cache-key: + type: string + default: "" + description: > + GitHub Actions cache key for the SurrealDB index. When set, the index is cached + across workflow runs. On a cache hit the indexing step is skipped (read-only mode). + Use a key that includes the content hash to invalidate on source changes, e.g.: + "codegraph-${{ github.repository }}-${{ hashFiles('**/*.rs', '**/*.go', '**/*.ts') }}" + +steps: + - name: Restore codegraph binary from cache + id: binary-cache + uses: actions/cache/restore@v4 + with: + path: ~/.cargo/bin/codegraph + key: codegraph-bin-${{ runner.os }}-${{ runner.arch }} + + - name: Install Rust toolchain + if: steps.binary-cache.outputs.cache-hit != 'true' + uses: dtolnay/rust-toolchain@stable + + - name: Build and install codegraph from source + if: steps.binary-cache.outputs.cache-hit != 'true' + run: | + set -euo pipefail + echo "Building codegraph from source (this takes a few minutes on first run)..." + cargo install \ + --git https://github.com/Jakedismo/codegraph-rust \ + --bin codegraph \ + --all-features \ + --quiet + echo "codegraph installed: $(codegraph --version 2>/dev/null || echo 'version unknown')" + + - name: Save codegraph binary to cache + if: steps.binary-cache.outputs.cache-hit != 'true' + uses: actions/cache/save@v4 + with: + path: ~/.cargo/bin/codegraph + key: codegraph-bin-${{ runner.os }}-${{ runner.arch }} + + - name: Install SurrealDB + run: | + set -euo pipefail + if ! command -v surreal &>/dev/null; then + curl -sSf https://install.surrealdb.com | sh + echo "$HOME/.surrealdb" >> "$GITHUB_PATH" + export PATH="$HOME/.surrealdb:$PATH" + fi + echo "SurrealDB version: $(surreal version 2>/dev/null || surreal --version 2>/dev/null)" + + - name: Restore codegraph index from cache + id: index-cache + if: "${{ github.aw.import-inputs['cache-key'] != '' }}" + uses: actions/cache/restore@v4 + with: + path: /tmp/codegraph-data + key: "${{ github.aw.import-inputs['cache-key'] }}" + restore-keys: "codegraph-index-${{ github.repository }}-" + + - name: Start SurrealDB (in-memory or file-backed) + run: | + set -euo pipefail + mkdir -p /tmp/codegraph-data /tmp/gh-aw/codegraph/logs + + SURREAL_BIN="${HOME}/.surrealdb/surreal" + if ! command -v "$SURREAL_BIN" &>/dev/null; then + SURREAL_BIN="surreal" + fi + + # Use file-backed mode when caching is enabled for persistence between steps + if [ -n "$CODEGRAPH_CACHE_KEY" ]; then + DB_BACKEND="file:///tmp/codegraph-data/surreal.db" + else + DB_BACKEND="memory" + fi + + "$SURREAL_BIN" start \ + --bind "0.0.0.0:3004" \ + --user root \ + --pass root \ + "$DB_BACKEND" \ + > /tmp/gh-aw/codegraph/logs/surrealdb.log 2>&1 & + + SURREAL_PID=$! + echo "$SURREAL_PID" > /tmp/gh-aw/codegraph/surrealdb.pid + + # Wait for SurrealDB to become ready + for i in $(seq 1 30); do + if "$SURREAL_BIN" is-ready --endpoint "http://localhost:3004" 2>/dev/null; then + echo "SurrealDB is ready (PID: $SURREAL_PID)" + break + fi + if [ "$i" -eq 30 ]; then + echo "SurrealDB failed to start. Logs:" + cat /tmp/gh-aw/codegraph/logs/surrealdb.log + exit 1 + fi + echo "Waiting for SurrealDB... ($i/30)" + sleep 2 + done + env: + CODEGRAPH_CACHE_KEY: "${{ github.aw.import-inputs['cache-key'] }}" + + - name: Apply CodeGraph schema + run: | + set -euo pipefail + SURREAL_BIN="${HOME}/.surrealdb/surreal" + if ! command -v "$SURREAL_BIN" &>/dev/null; then + SURREAL_BIN="surreal" + fi + + echo "Applying CodeGraph schema..." + curl -fsSL \ + https://raw.githubusercontent.com/Jakedismo/codegraph-rust/main/schema/codegraph.surql \ + | "$SURREAL_BIN" sql \ + --endpoint "ws://localhost:3004" \ + --namespace ouroboros \ + --database codegraph \ + --username root \ + --password root + echo "Schema applied successfully" + + - name: Index repository with CodeGraph + if: steps.index-cache.outputs.cache-hit != 'true' + run: | + set -euo pipefail + TIER="${CODEGRAPH_INDEX_TIER:-fast}" + echo "Indexing ${GITHUB_WORKSPACE} with tier: ${TIER}" + + codegraph index \ + --path "${GITHUB_WORKSPACE}" \ + --index-tier "${TIER}" \ + 2>&1 | tee /tmp/gh-aw/codegraph/logs/index.log + + echo "Indexing complete. Log tail:" + tail -5 /tmp/gh-aw/codegraph/logs/index.log + env: + SURREAL_URL: "ws://localhost:3004" + SURREAL_USER: root + SURREAL_PASS: root + SURREAL_NAMESPACE: ouroboros + SURREAL_DATABASE: codegraph + CODEGRAPH_INDEX_TIER: "${{ github.aw.import-inputs['index-tier'] }}" + ANTHROPIC_API_KEY: "${{ secrets.CODEGRAPH_ANTHROPIC_API_KEY || secrets.ANTHROPIC_API_KEY }}" + OPENAI_API_KEY: "${{ secrets.OPENAI_API_KEY }}" + JINA_API_KEY: "${{ secrets.JINA_API_KEY }}" + + - name: Save codegraph index to cache + if: | + steps.index-cache.outputs.cache-hit != 'true' && + github.aw.import-inputs['cache-key'] != '' + uses: actions/cache/save@v4 + with: + path: /tmp/codegraph-data + key: "${{ github.aw.import-inputs['cache-key'] }}" + +mcp-servers: + codegraph: + command: "codegraph" + args: ["start", "stdio"] + env: + SURREAL_URL: "ws://localhost:3004" + SURREAL_USER: "root" + SURREAL_PASS: "root" + SURREAL_NAMESPACE: "ouroboros" + SURREAL_DATABASE: "codegraph" + ANTHROPIC_API_KEY: "${{ secrets.CODEGRAPH_ANTHROPIC_API_KEY || secrets.ANTHROPIC_API_KEY }}" + OPENAI_API_KEY: "${{ secrets.OPENAI_API_KEY }}" + JINA_API_KEY: "${{ secrets.JINA_API_KEY }}" + CODEGRAPH_ARCH_BOOTSTRAP: "true" + CODEGRAPH_INDEX_TIER: "${{ github.aw.import-inputs['index-tier'] }}" +--- + +## CodeGraph Semantic Code Analysis + +The CodeGraph MCP server is connected. The `${{ github.repository }}` codebase has been +indexed using the **${{ github.aw.import-inputs.index-tier }}** tier. + +- **Workspace**: `${{ github.workspace }}` +- **Index tier**: `${{ github.aw.import-inputs.index-tier }}` +- **Caching**: ${{ github.aw.import-inputs['cache-key'] != '' && 'enabled' || 'disabled (single-run)' }} +- **Logs**: `/tmp/gh-aw/codegraph/logs/` + +### Initialize Session + +Load CodeGraph's context and guidance before your first tool call: + +``` +Tool: read_initial_instructions +``` + +### Available Agentic Tools + +| Tool | Best For | +|------|----------| +| `agentic_context` | Finding code, exploring patterns, building pre-implementation context | +| `agentic_impact` | Dependency analysis, impact mapping before refactoring | +| `agentic_architecture` | Big-picture structure, API surface analysis | +| `agentic_quality` | Complexity hotspots, coupling metrics, refactoring priorities | + +### Analysis Strategy + +1. **Always call `read_initial_instructions` first** — loads per-tool guidance +2. **Use `agentic_context` with `focus: "search"`** for concept-based code search +3. **Use `agentic_impact` with `focus: "dependencies"`** to trace what a change would break +4. **Use `agentic_context` with `focus: "builder"`** to gather pre-implementation context +5. **Use `agentic_quality`** to identify where complexity accumulates before refactoring diff --git a/DICTATION.md b/DICTATION.md index a9b3c2b45e1..6b299317df8 100644 --- a/DICTATION.md +++ b/DICTATION.md @@ -312,6 +312,7 @@ When fixing dictated text, correct these common misrecognitions: - "cloud" → claude (when referring to the AI engine) - "gem ini" → gemini (when referring to the AI engine) - "serena" → serena (code intelligence MCP server) +- "code graph" → codegraph (semantic code knowledge graph MCP server) ### Commands and Operations - "G.H. A.W." → gh-aw or `gh aw` (depending on context) diff --git a/docs/src/content/docs/reference/codegraph.md b/docs/src/content/docs/reference/codegraph.md new file mode 100644 index 00000000000..2230c281600 --- /dev/null +++ b/docs/src/content/docs/reference/codegraph.md @@ -0,0 +1,354 @@ +--- +title: CodeGraph Code Search +description: Configure a semantic code knowledge graph for AI-powered code search, dependency analysis, and architecture exploration in agentic workflows. +sidebar: + order: 821 +--- + +CodeGraph Code Search provides a semantically searchable knowledge graph over your codebase. It runs [Jakedismo/codegraph-rust](https://github.com/Jakedismo/codegraph-rust) as an MCP server so agents can search, reason about, and navigate code by meaning rather than by text pattern. + +Unlike text search or basic semantic embedding tools, CodeGraph builds a **real knowledge graph**: AST nodes and edges enriched with relationships (calls, defines, uses, returns, mutates) that allow agents to traverse dependencies, trace call chains, and understand architecture — not just find matching strings. + +The index is built in a dedicated indexing step (with a separate `contents: read` job or pre-agent steps) and shared with the agent via GitHub Actions cache, so the agent can re-use the same index across runs without re-indexing. + +:::caution[Experimental] +CodeGraph Code Search is an experimental integration. The `codegraph-rust` project does not yet publish pre-built binaries, so the binary is compiled from source on first use (cached thereafter). The MCP API may change as the project evolves. +::: + +## Prerequisites + +CodeGraph's internal reasoning agents require an LLM API key. Configure at least one of the following repository secrets: + +| Secret | Provider | +|--------|----------| +| `ANTHROPIC_API_KEY` | Claude (recommended) | +| `OPENAI_API_KEY` | GPT models | +| `JINA_API_KEY` | Jina AI embeddings (free tier available) | + +You can also use `CODEGRAPH_ANTHROPIC_API_KEY` as a scoped alias for `ANTHROPIC_API_KEY` if you want to isolate the key used by CodeGraph from other workflows. + +## Basic Configuration + +```aw wrap +--- +engine: copilot +permissions: + contents: read +imports: + - uses: shared/mcp/codegraph.md + with: + index-tier: fast +--- +``` + +## Configuration Options + +### `index-tier` + +Controls the depth of analysis during indexing. Defaults to `fast`. + +| Tier | What it enables | Typical use | +|------|-----------------|-------------| +| `fast` | AST nodes + core edges only | Quick CI runs, low storage | +| `balanced` | LSP symbols + docs/enrichment + module linking | Best agentic accuracy | +| `full` | All analyzers + LSP definitions + dataflow + architecture | Maximum richness | + +```aw wrap +--- +imports: + - uses: shared/mcp/codegraph.md + with: + index-tier: balanced +--- +``` + +The `balanced` tier requires language server tools to be available on the runner: + +| Language | Requires | +|----------|---------| +| Rust | `rust-analyzer` | +| TypeScript/JavaScript | `node` + `typescript-language-server` | +| Python | `node` + `pyright-langserver` | +| Go | `gopls` | + +### `cache-key` + +A GitHub Actions cache key used to persist the SurrealDB index across workflow runs. When set and a cache hit occurs, the indexing step is skipped (read-only mode), making the agent job significantly faster. + +```aw wrap +--- +imports: + - uses: shared/mcp/codegraph.md + with: + cache-key: "codegraph-${{ github.repository }}-${{ hashFiles('src/**', '*.rs') }}" +--- +``` + +When the cache key does not match (cache miss), CodeGraph re-indexes and saves the new index under the given key. Use content-based hashing in the key to invalidate the cache when source files change. + +**Read-only mode** (restore without re-indexing): set `cache-key` to a fixed key that was previously saved: + +```aw wrap +--- +imports: + - uses: shared/mcp/codegraph.md + with: + cache-key: "codegraph-my-project-stable" +--- +``` + +If the cache key does not exist at all, indexing runs normally and the result is saved. + +## Agentic Tools + +CodeGraph exposes four consolidated agentic tools. Each tool runs an internal reasoning agent that plans, searches the graph, and synthesizes an answer — not just a list of files. + +| Tool | Focus values | Best for | +|------|-------------|----------| +| `agentic_context` | `search`, `builder`, `question` | Finding code by concept; pre-implementation context | +| `agentic_impact` | `dependencies`, `call_chain` | Impact analysis before refactoring; tracing callers | +| `agentic_architecture` | `structure`, `api_surface` | Architecture overview; public API enumeration | +| `agentic_quality` | `complexity`, `coupling`, `hotspots` | Risk assessment; refactoring targets | + +Each tool accepts an optional `focus` parameter. Without it, the tool auto-selects the most appropriate reasoning strategy based on the query. + +## Example: Code Search on Pull Request + +```aw wrap +--- +on: + pull_request: +engine: copilot +permissions: + contents: read + pull-requests: write +imports: + - uses: shared/mcp/codegraph.md + with: + index-tier: fast + cache-key: "codegraph-${{ github.repository }}-${{ hashFiles('**/*.go', '**/*.ts', '**/*.rs') }}" +safe-outputs: + add-comment: + hide-older-comments: true +--- + +Call `read_initial_instructions` from the codegraph MCP server, then analyze the pull request diff +to answer: which functions and modules are affected by these changes? + +Use `agentic_impact` with `focus: "dependencies"` for each changed file to understand +the blast radius, then use `agentic_context` with `focus: "search"` to find any tests +or related patterns that should be updated. + +Post a concise comment summarizing the impact. +``` + +## Example: Daily Architecture Analysis + +```aw wrap +--- +on: + schedule: weekly on monday around 09:00 + workflow_dispatch: +engine: copilot +permissions: + contents: read + issues: write +imports: + - uses: shared/mcp/codegraph.md + with: + index-tier: balanced + cache-key: "codegraph-${{ github.repository }}-arch" +safe-outputs: + create-issue: + title-prefix: "[arch-analysis] " + labels: [architecture, automated-analysis] + max: 1 + close-older-issues: true +--- + +Call `read_initial_instructions`, then perform a weekly architecture health review: + +1. Use `agentic_quality` with `focus: "hotspots"` to identify the top 3 complexity hotspots +2. Use `agentic_quality` with `focus: "coupling"` to find tightly coupled modules +3. Use `agentic_architecture` to describe the overall layer structure + +Create a GitHub issue summarizing findings and actionable improvement suggestions. +``` + +## Example: Separate Indexing Job + +For large codebases or workflows where the agent job should not need `contents: read`, +use a custom indexing job to build the index and pass it as an artifact: + +```aw wrap +--- +on: + push: + branches: [main] + workflow_dispatch: +engine: copilot +permissions: + contents: read + issues: write +jobs: + codegraph-index: + runs-on: ubuntu-latest + needs: [activation] + permissions: + contents: read + steps: + - name: Checkout repository + uses: actions/checkout@v6.0.2 + with: + persist-credentials: false + + - name: Restore codegraph binary from cache + id: binary-cache + uses: actions/cache/restore@v4 + with: + path: ~/.cargo/bin/codegraph + key: codegraph-bin-${{ runner.os }}-${{ runner.arch }} + + - name: Install Rust and build codegraph + if: steps.binary-cache.outputs.cache-hit != 'true' + run: | + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + source "$HOME/.cargo/env" + cargo install \ + --git https://github.com/Jakedismo/codegraph-rust \ + --bin codegraph --all-features --quiet + + - name: Save codegraph binary to cache + if: steps.binary-cache.outputs.cache-hit != 'true' + uses: actions/cache/save@v4 + with: + path: ~/.cargo/bin/codegraph + key: codegraph-bin-${{ runner.os }}-${{ runner.arch }} + + - name: Install SurrealDB + run: | + curl -sSf https://install.surrealdb.com | sh + echo "$HOME/.surrealdb" >> "$GITHUB_PATH" + + - name: Start SurrealDB and index repository + run: | + mkdir -p /tmp/codegraph-data /tmp/gh-aw/codegraph/logs + surreal start \ + --bind 0.0.0.0:3004 \ + --user root --pass root \ + file:///tmp/codegraph-data/surreal.db \ + > /tmp/gh-aw/codegraph/logs/surrealdb.log 2>&1 & + for i in $(seq 1 30); do + surreal is-ready --endpoint http://localhost:3004 2>/dev/null && break + sleep 2 + done + curl -fsSL https://raw.githubusercontent.com/Jakedismo/codegraph-rust/main/schema/codegraph.surql \ + | surreal sql \ + --endpoint ws://localhost:3004 \ + --namespace ouroboros --database codegraph \ + --username root --password root + codegraph index --path "$GITHUB_WORKSPACE" --index-tier fast \ + 2>&1 | tee /tmp/gh-aw/codegraph/logs/index.log + env: + SURREAL_URL: ws://localhost:3004 + SURREAL_USER: root + SURREAL_PASS: root + SURREAL_NAMESPACE: ouroboros + SURREAL_DATABASE: codegraph + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + + - name: Upload codegraph index artifact + uses: actions/upload-artifact@v7.0.1 + with: + name: codegraph-index + path: /tmp/codegraph-data + retention-days: 1 + +steps: + - name: Download codegraph index artifact + uses: actions/download-artifact@v8.0.1 + with: + name: codegraph-index + path: /tmp/codegraph-data + + - name: Install SurrealDB and start with indexed data + run: | + curl -sSf https://install.surrealdb.com | sh + echo "$HOME/.surrealdb" >> "$GITHUB_PATH" + mkdir -p /tmp/gh-aw/codegraph/logs + surreal start \ + --bind 0.0.0.0:3004 \ + --user root --pass root \ + file:///tmp/codegraph-data/surreal.db \ + > /tmp/gh-aw/codegraph/logs/surrealdb.log 2>&1 & + for i in $(seq 1 30); do + surreal is-ready --endpoint http://localhost:3004 2>/dev/null && break + sleep 2 + done + + - name: Restore codegraph binary from cache + uses: actions/cache/restore@v4 + with: + path: ~/.cargo/bin/codegraph + key: codegraph-bin-${{ runner.os }}-${{ runner.arch }} + +mcp-servers: + codegraph: + command: "codegraph" + args: ["start", "stdio"] + env: + SURREAL_URL: ws://localhost:3004 + SURREAL_USER: root + SURREAL_PASS: root + SURREAL_NAMESPACE: ouroboros + SURREAL_DATABASE: codegraph + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + CODEGRAPH_ARCH_BOOTSTRAP: "true" +--- + +Call `read_initial_instructions` first, then perform your code analysis task. +``` + +In the separate-job pattern the indexing job runs with `contents: read` while the +agent job can operate without repository access, downloading only the pre-built index artifact. + +## Telemetry + +Use `shared/observability-otlp.md` to record CodeGraph index and query metrics alongside the workflow's distributed trace: + +```yaml wrap title=".github/workflows/shared/codegraph-otlp.md" +--- +# Shared import: emit codegraph index stats after the agent job. + +steps: + - name: Record codegraph telemetry + id: codegraph-otlp + uses: actions/github-script@v8 + with: + script: | + const fs = require('fs'); + const otlp = require('/tmp/gh-aw/actions/otlp.cjs'); + + // codegraph writes index stats to /tmp/gh-aw/codegraph/logs/index.log + let nodesIndexed = 0; + try { + const log = fs.readFileSync('/tmp/gh-aw/codegraph/logs/index.log', 'utf8'); + const match = log.match(/nodes[:\s]+(\d+)/i); + if (match) nodesIndexed = parseInt(match[1], 10); + } catch { /* index not available */ } + + await otlp.logSpan('codegraph', { + 'codegraph.nodes.indexed': nodesIndexed, + }); +--- +``` + +## Related Documentation + +- [Tools](/gh-aw/reference/tools/) - Overview of all available tools and configuration +- [Imports](/gh-aw/reference/imports/) - Importing shared workflow components +- [QMD Documentation Search](/gh-aw/reference/qmd/) - Vector search over documentation files +- [Cache Memory](/gh-aw/reference/cache-memory/) - Persistent memory across workflow runs +- [GitHub Tools](/gh-aw/reference/github-tools/) - GitHub API operations +- [Custom OTLP Attributes](/gh-aw/guides/custom-otlp-attributes/) - Emit telemetry from shared imports +- [Serena](/gh-aw/guides/serena/) - LSP-based semantic code analysis (alternative/complement) diff --git a/docs/src/content/docs/reference/tools.md b/docs/src/content/docs/reference/tools.md index 4f1fe6a3b81..2a243537842 100644 --- a/docs/src/content/docs/reference/tools.md +++ b/docs/src/content/docs/reference/tools.md @@ -214,6 +214,7 @@ mcp-servers: - [Cache Memory](/gh-aw/reference/cache-memory/) - Persistent memory across workflow runs - [Repo Memory](/gh-aw/reference/repo-memory/) - Repository-specific memory storage - [QMD Documentation Search](/gh-aw/reference/qmd/) - Vector similarity search over documentation files +- [CodeGraph Code Search](/gh-aw/reference/codegraph/) - Semantic code knowledge graph for AI-powered code search - [MCP Scripts](/gh-aw/reference/mcp-scripts/) - Define custom inline tools with JavaScript or shell scripts - [Frontmatter](/gh-aw/reference/frontmatter/) - All frontmatter configuration options - [Network Permissions](/gh-aw/reference/network/) - Network access control for AI engines From 33b13754094cddf71d7179a2e3ec8685382ba79e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 12 May 2026 01:24:32 +0000 Subject: [PATCH 2/3] Plan: add codegraph to daily-compiler-quality workflow Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- .github/aw/actions-lock.json | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.github/aw/actions-lock.json b/.github/aw/actions-lock.json index cda10e31605..858a91139e0 100644 --- a/.github/aw/actions-lock.json +++ b/.github/aw/actions-lock.json @@ -33,11 +33,21 @@ "version": "v4.1.0", "sha": "a2bbfa25375fe432b6a289bc6b6cd05ecd0c4c32" }, + "actions/cache/restore@v4": { + "repo": "actions/cache/restore", + "version": "v4", + "sha": "0057852bfaa89a56745cba8c7296529d2fc39830" + }, "actions/cache/restore@v5.0.5": { "repo": "actions/cache/restore", "version": "v5.0.5", "sha": "27d5ce7f107fe9357f9df03efb73ab90386fccae" }, + "actions/cache/save@v4": { + "repo": "actions/cache/save", + "version": "v4", + "sha": "0057852bfaa89a56745cba8c7296529d2fc39830" + }, "actions/cache/save@v5.0.5": { "repo": "actions/cache/save", "version": "v5.0.5", From 1f15bb64ff30c58ff73c93bfeffa66f286a65277 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 12 May 2026 01:27:32 +0000 Subject: [PATCH 3/3] feat: plug codegraph into daily-compiler-quality workflow Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- .github/workflows/daily-compiler-quality.md | 67 ++++++++++++++++++--- 1 file changed, 60 insertions(+), 7 deletions(-) diff --git a/.github/workflows/daily-compiler-quality.md b/.github/workflows/daily-compiler-quality.md index e1cefee1a74..4f8e19d9bad 100644 --- a/.github/workflows/daily-compiler-quality.md +++ b/.github/workflows/daily-compiler-quality.md @@ -17,6 +17,10 @@ imports: title-prefix: "[daily-compiler-quality] " expires: 1d - shared/go-source-analysis.md + - uses: shared/mcp/codegraph.md + with: + index-tier: balanced + cache-key: "codegraph-${{ github.repository }}-${{ hashFiles('pkg/workflow/compiler*.go') }}" - shared/observability-otlp.md tools: cli-proxy: true @@ -62,7 +66,7 @@ You are the Daily Compiler Quality Check Agent - a code quality specialist that ## Mission -Analyze a rotating subset of compiler files daily using Serena's semantic analysis capabilities to assess code quality. Generate comprehensive reports identifying areas that meet or fall short of "human-written quality" standards. Use cache memory to track analysis history and avoid re-analyzing unchanged files. +Analyze a rotating subset of compiler files daily using Serena's semantic analysis capabilities and CodeGraph's call-chain/dependency search to assess code quality. Generate comprehensive reports identifying areas that meet or fall short of "human-written quality" standards. Use cache memory to track analysis history and avoid re-analyzing unchanged files. ## Current Context @@ -148,9 +152,16 @@ Organize analysis state in `/tmp/gh-aw/cache-memory/`: 4. **Update rotation state** in `rotation.json` -## Phase 2: Analyze Code Quality with Serena +## Phase 2: Analyze Code Quality with Serena and CodeGraph + +For each selected file, use both the Serena MCP server (LSP-based symbol analysis) and the +CodeGraph MCP server (call-chain and dependency graph) to perform deep semantic analysis. -For each selected file, use Serena MCP server to perform deep semantic analysis: +**Before your first CodeGraph tool call**, initialize the session: +``` +Tool: read_initial_instructions +(from the codegraph MCP server) +``` ### Quality Assessment Criteria @@ -228,6 +239,32 @@ Use Serena's semantic understanding to identify: - Appropriate error handling patterns ``` +#### 6. Call-Chain & Dependency Health (bonus insight — CodeGraph) + +Use the CodeGraph MCP server to enrich the structural picture beyond what LSP provides: + +- **Blast radius**: Which functions/packages call into this file? +- **Call depth**: Are there deep call chains that make the code hard to follow? +- **Coupling hotspots**: Are any functions referenced from an unusually large number of callers? +- **Architecture alignment**: Does the file's role in the call graph match its stated responsibility? + +**CodeGraph Analysis**: +``` +# Dependency / blast-radius: who calls functions in this file? +Tool: agentic_impact +Args: { "query": "what depends on and what breaks if it changes?", "focus": "dependencies" } + +# Trace the call chain into a key function (e.g., compileWorkflow): +Tool: agentic_impact +Args: { "query": "trace the execution path from entry points into ", "focus": "call_chain" } + +# Surface coupling and complexity hotspots within the file: +Tool: agentic_quality +Args: { "query": "what are the highest-coupling and most complex functions in ?" } +``` + +Record findings under a `codegraph_analysis` key alongside `serena_analysis` in the cache JSON. + ### Scoring System Each dimension is scored out of its point allocation: @@ -283,6 +320,12 @@ For each analyzed file, document: "max_function_length": 78, "comment_density": "12%", "complexity_score": 7.2 + }, + "codegraph_analysis": { + "caller_count": 12, + "max_call_depth": 4, + "coupling_hotspots": ["compileWorkflow", "OrchestrateCompilation"], + "blast_radius_summary": "Changes here affect 3 packages" } } ``` @@ -579,7 +622,7 @@ The compiler codebase maintains **good overall quality** with an average score o --- *Report generated by Daily Compiler Quality Check workflow* -*Analysis powered by Serena MCP Server* +*Analysis powered by Serena MCP Server and CodeGraph* *Cache memory: `/tmp/gh-aw/cache-memory/compiler-quality/`* ``` @@ -591,7 +634,7 @@ The compiler codebase maintains **good overall quality** with an average score o ### Analysis Best Practices -- **Be Objective**: Use concrete metrics from Serena, not subjective opinions +- **Be Objective**: Use concrete metrics from Serena and CodeGraph, not subjective opinions - **Be Specific**: Reference exact line numbers, function names, and code patterns - **Be Actionable**: Provide clear recommendations with estimated effort - **Be Constructive**: Highlight strengths alongside areas for improvement @@ -604,6 +647,14 @@ The compiler codebase maintains **good overall quality** with an average score o 3. **Cache Results**: Store Serena findings in cache memory for future reference 4. **Validate Findings**: Cross-check Serena analysis with actual code +### CodeGraph Usage + +1. **Initialize Session**: Call `read_initial_instructions` (from the codegraph MCP server) before the first CodeGraph tool +2. **Dependency Map**: Use `agentic_impact` with `{ "focus": "dependencies" }` for each analyzed file to surface callers and blast radius +3. **Call Chain**: Use `agentic_impact` with `{ "focus": "call_chain" }` on complex or high-coupling functions to trace execution paths +4. **Quality Hotspots**: Use `agentic_quality` to get coupling and complexity rankings across the file +5. **Cache Results**: Store `codegraph_analysis` findings in the per-file JSON cache alongside `serena_analysis` + ### Cache Memory Management 1. **Check for Changes**: Always compare git hashes before re-analyzing @@ -614,6 +665,7 @@ The compiler codebase maintains **good overall quality** with an average score o ### Error Handling - If Serena is unavailable, fall back to basic static analysis with bash/grep +- If CodeGraph is unavailable, skip the call-chain/dependency section and note it in the report - If a file cannot be analyzed, document the issue and skip to next file - If cache is corrupted, reinitialize and start fresh analysis @@ -628,8 +680,9 @@ The compiler codebase maintains **good overall quality** with an average score o ## Success Criteria A successful analysis run: -- ✅ Analyzes 2-3 compiler files using Serena +- ✅ Analyzes 2-3 compiler files using Serena and CodeGraph - ✅ Generates comprehensive quality scores across all dimensions +- ✅ Enriches each file analysis with CodeGraph call-chain and dependency data - ✅ Saves analysis to cache memory with git hashes - ✅ Creates detailed discussion report with findings - ✅ Provides actionable recommendations @@ -638,6 +691,6 @@ A successful analysis run: --- -Begin your analysis now. Remember to use Serena's semantic capabilities to provide deep, meaningful insights into code quality beyond surface-level metrics. +Begin your analysis now. Remember to use Serena's LSP capabilities for symbol-level quality assessment and CodeGraph's semantic search for call-chain and dependency insights. {{#runtime-import shared/noop-reminder.md}}