-
Notifications
You must be signed in to change notification settings - Fork 9
126 lines (116 loc) · 5.08 KB
/
update-graph.yml
File metadata and controls
126 lines (116 loc) · 5.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# Incrementally updates the FalkorDB docs knowledge graph on PR merge.
#
# On merge to main of a PR touching .md files, this workflow:
# 1. Computes the diff (added / modified / deleted .md files).
# 2. Reads the content of added + modified files.
# 3. POSTs the payload to GraphRAG-UI's /api/admin/update-graph endpoint.
# 4. The endpoint does all the SDK / FalkorDB / smoke-test work server-side
# using its existing credentials. This workflow only needs ONE secret:
# ``UPDATE_GRAPH_TOKEN`` — a shared-secret bearer token.
#
# No Azure OpenAI keys, no FalkorDB credentials, no PAT for cross-repo
# checkout. Failures from the server come back as HTTP non-2xx with a
# detail message; curl --fail-with-body bubbles them up as a CI failure.
name: Update graph (incremental)
on:
pull_request:
types: [closed]
branches: [main]
paths:
- "**/*.md"
# Merge target is always main, so all runs share one group. Bursts of PR
# merges queue rather than race. cancel-in-progress: false because each
# run consumes LLM credit and we'd rather pay the wait than re-do work.
#
# IMPORTANT: ``github.ref`` resolves to ``refs/pull/<N>/merge`` in a
# pull_request event — a different value per PR. Using it here would
# defeat the queue (each PR would get its own group). Use the target
# branch ref instead so all merges to main share one group.
concurrency:
group: update-graph-${{ github.event.pull_request.base.ref }}
cancel-in-progress: false
jobs:
update-graph:
if: github.event.pull_request.merged == true
runs-on: ubuntu-latest
timeout-minutes: 30
# Least-privilege GITHUB_TOKEN: this job only needs to read repo
# source to compute the diff. No writes back to the repo, no comments,
# no status updates. Closes CodeQL "workflow does not contain
# permissions" alerts.
permissions:
contents: read
env:
GRAPH_ID: docs_benchmark
# The base URL of the GraphRAG-UI deployment. Set this as a repo or
# environment variable so it can differ between staging and prod.
GRAPHRAG_UI_URL: ${{ vars.GRAPHRAG_UI_URL }}
steps:
- name: Checkout docs
uses: actions/checkout@v4
with:
fetch-depth: 0 # need history for diff against the PR base
- name: Build diff payload
id: payload
env:
BASE_SHA: ${{ github.event.pull_request.base.sha }}
HEAD_SHA: ${{ github.sha }}
run: |
python3 - <<'PY'
import json, os, pathlib, subprocess, sys
base, head = os.environ["BASE_SHA"], os.environ["HEAD_SHA"]
out = subprocess.run(
["git", "diff", "--name-status", base, head],
capture_output=True, text=True, check=True,
).stdout
added, modified, deleted = {}, {}, []
for line in out.splitlines():
parts = line.split("\t")
if not parts:
continue
status = parts[0][0] # R100 -> R
if status == "R" and len(parts) >= 3:
old, new = parts[1], parts[2]
if old.endswith(".md"):
deleted.append(old)
if new.endswith(".md"):
try:
added[new] = pathlib.Path(new).read_text(encoding="utf-8")
except FileNotFoundError:
pass
continue
if len(parts) < 2 or not parts[1].endswith(".md"):
continue
path = parts[1]
if status == "A":
added[path] = pathlib.Path(path).read_text(encoding="utf-8")
elif status == "M":
modified[path] = pathlib.Path(path).read_text(encoding="utf-8")
elif status == "D":
deleted.append(path)
payload = {
"graph_id": os.environ.get("GRAPH_ID", "docs_benchmark"),
"files": {"added": added, "modified": modified, "deleted": deleted},
}
if not (added or modified or deleted):
# Path filter on the workflow trigger should make this rare, but
# be explicit: no work to do, exit clean before the POST.
print("::notice::No .md changes after filtering — skipping graph update.", file=sys.stderr)
with open(os.environ["GITHUB_OUTPUT"], "a") as f:
f.write("skip=true\n")
sys.exit(0)
pathlib.Path("payload.json").write_text(json.dumps(payload))
print(f"::notice::Diff: +{len(added)} ~{len(modified)} -{len(deleted)} files")
with open(os.environ["GITHUB_OUTPUT"], "a") as f:
f.write("skip=false\n")
PY
- name: Call admin update-graph endpoint
if: steps.payload.outputs.skip != 'true'
run: |
curl -X POST "$GRAPHRAG_UI_URL/api/admin/update-graph" \
-H "Authorization: Bearer ${{ secrets.UPDATE_GRAPH_TOKEN }}" \
-H "Content-Type: application/json" \
--data-binary @payload.json \
--fail-with-body \
--show-error \
--max-time 1800