Skip to content

Commit 2446f25

Browse files
committed
ci: Push to cache in a separate job to isolate secrets (leanprover-community#34847)
Stage cache artifacts in the build job and upload them in a separate job. The motivation is to only ever use the cache secrets in the upload job. Changes: - Add cache-artifact staging support to `lake exe cache`: - `stage-unpacked --staging-dir=<dir>` - Packs any unpacked cache entries (i.e., ones not yet in .ltar form) into .ltar files and copies those .ltars into `<dir>`. Not very useful outside of our CI, but it could be used for p2p sharing of cache artifacts. - `put-staged --staging-dir=<dir> [ARGS]` Uploads all .ltar files found in `<dir>` to the cache server. No packing happens here: the point is to be able to just run this on the 'publishing' job with the artifact produced before. - `unstage --staging-dir=<dir>` Copies absent .ltar files from `<dir>` into the local cache directory. Useful to consume the PR artifact locally if downloaded manually. - `unstage! --staging-dir=<dir>`. Same as `unstage`, but overwrites existing `.ltar` files in the local cache. - Update CI to stage .ltar files on the build runner, upload them as a cache-staging artifact only if non‑empty, and upload to Azure in a new upload_cache job that holds secrets.
1 parent 7c1ce07 commit 2446f25

File tree

3 files changed

+214
-75
lines changed

3 files changed

+214
-75
lines changed

.github/workflows/build_template.yml

Lines changed: 99 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ jobs:
3737
build-outcome: ${{ steps.build.outcome }}
3838
archive-outcome: ${{ steps.archive.outcome }}
3939
counterexamples-outcome: ${{ steps.counterexamples.outcome }}
40+
cache-staging-has-files: ${{ steps.cache_staging_check.outputs.has_files }}
4041
get-cache-outcome: ${{ steps.get.outcome }}
4142
lint-outcome: ${{ steps.lint.outcome }}
4243
mk_all-outcome: ${{ steps.mk_all.outcome }}
@@ -367,31 +368,6 @@ jobs:
367368
cd pr-branch
368369
du .lake/build/lib/lean/Mathlib || echo "This code should be unreachable"
369370
370-
# The cache secrets are available here, so we must not run any untrusted code.
371-
- name: Upload cache to Azure
372-
# We only upload the cache if the build started (whether succeeding, failing, or cancelled)
373-
# but not if any earlier step failed or was cancelled.
374-
# See discussion at https://leanprover.zulipchat.com/#narrow/stream/287929-mathlib4/topic/Some.20files.20not.20found.20in.20the.20cache/near/407183836
375-
if: ${{ always() && steps.get.outcome == 'success' }}
376-
shell: bash
377-
run: |
378-
cd pr-branch
379-
380-
# Trim trailing whitespace from secrets to prevent issues with accidentally added spaces
381-
export MATHLIB_CACHE_SAS="${MATHLIB_CACHE_SAS_RAW%"${MATHLIB_CACHE_SAS_RAW##*[![:space:]]}"}"
382-
383-
# Use the trusted cache tool from tools-branch
384-
# TODO: this is not doing anything currently, and needs to be integrated with put-unpacked
385-
# ../tools-branch/.lake/build/bin/cache commit || true
386-
# run this in CI if it gets an incorrect lake hash for existing cache files somehow
387-
# ../tools-branch/.lake/build/bin/cache put!
388-
# do not try to upload files just downloaded
389-
390-
echo "Uploading cache to Azure..."
391-
MATHLIB_CACHE_USE_CLOUDFLARE=0 ../tools-branch/.lake/build/bin/cache --repo=${{ github.event.pull_request.head.repo.full_name || github.repository }} put-unpacked
392-
env:
393-
MATHLIB_CACHE_SAS_RAW: ${{ secrets.MATHLIB_CACHE_SAS }}
394-
395371
# Note: we should not be including `Archive` and `Counterexamples` in the cache.
396372
# We do this for now for the sake of not rebuilding them in every CI run
397373
# even when they are not touched.
@@ -423,6 +399,52 @@ jobs:
423399
../tools-branch/scripts/lake-build-with-retry.sh Counterexamples
424400
# results of build at pr-branch/.lake/build_summary_Counterexamples.json
425401
402+
- name: prepare cache staging directory
403+
if: ${{ steps.build.outcome == 'success' }}
404+
shell: bash
405+
run: |
406+
rm -rf cache-staging
407+
mkdir -p cache-staging
408+
409+
- name: stage Mathlib cache files
410+
if: ${{ steps.build.outcome == 'success' }}
411+
shell: bash
412+
run: |
413+
cd pr-branch
414+
lake env ../tools-branch/.lake/build/bin/cache --staging-dir="../cache-staging" stage
415+
416+
- name: stage Archive cache files
417+
if: ${{ steps.archive.outcome == 'success' }}
418+
shell: bash
419+
run: |
420+
cd pr-branch
421+
lake env ../tools-branch/.lake/build/bin/cache --staging-dir="../cache-staging" stage Archive.lean
422+
423+
- name: stage Counterexamples cache files
424+
if: ${{ steps.counterexamples.outcome == 'success' }}
425+
shell: bash
426+
run: |
427+
cd pr-branch
428+
lake env ../tools-branch/.lake/build/bin/cache --staging-dir="../cache-staging" stage Counterexamples.lean
429+
430+
- name: check cache staging contents
431+
id: cache_staging_check
432+
if: ${{ steps.build.outcome == 'success' }}
433+
shell: bash
434+
run: |
435+
if find cache-staging -type f -name '*.ltar' -print -quit | grep -q .; then
436+
echo "has_files=true" >> "$GITHUB_OUTPUT"
437+
else
438+
echo "has_files=false" >> "$GITHUB_OUTPUT"
439+
fi
440+
441+
- name: upload cache staging artifact
442+
if: ${{ steps.cache_staging_check.outputs.has_files == 'true' }}
443+
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
444+
with:
445+
name: cache-staging
446+
path: cache-staging/
447+
426448
- name: Check if building Archive or Counterexamples failed
427449
if: steps.archive.outcome == 'failure' || steps.counterexamples.outcome == 'failure'
428450
run: |
@@ -434,21 +456,6 @@ jobs:
434456
fi
435457
exit 1
436458
437-
# The cache secrets are available here, so we must not run any untrusted code.
438-
- name: Upload Archive and Counterexamples cache to Azure
439-
shell: bash
440-
run: |
441-
cd pr-branch
442-
443-
# Trim trailing whitespace from secrets to prevent issues with accidentally added spaces
444-
export MATHLIB_CACHE_SAS="${MATHLIB_CACHE_SAS_RAW%"${MATHLIB_CACHE_SAS_RAW##*[![:space:]]}"}"
445-
446-
echo "Uploading Archive and Counterexamples cache to Azure..."
447-
MATHLIB_CACHE_USE_CLOUDFLARE=0 ../tools-branch/.lake/build/bin/cache --repo=${{ github.event.pull_request.head.repo.full_name || github.repository }} put-unpacked Archive.lean
448-
MATHLIB_CACHE_USE_CLOUDFLARE=0 ../tools-branch/.lake/build/bin/cache --repo=${{ github.event.pull_request.head.repo.full_name || github.repository }} put-unpacked Counterexamples.lean
449-
env:
450-
MATHLIB_CACHE_SAS_RAW: ${{ secrets.MATHLIB_CACHE_SAS }}
451-
452459
- name: Check {Mathlib, Tactic, Counterexamples, Archive}.lean
453460
if: ${{ always() && steps.mk_all.outcome != 'skipped' }}
454461
run: |
@@ -595,9 +602,59 @@ jobs:
595602
tools-branch/scripts/lean-pr-testing-comments.sh lean
596603
tools-branch/scripts/lean-pr-testing-comments.sh batteries
597604
605+
upload_cache:
606+
name: Upload to cache
607+
needs: [build]
608+
runs-on: ubuntu-latest # These steps run on a GitHub runner; no landrun sandboxing is needed.
609+
# We only upload the cache if the build started (whether succeeding, failing, or cancelled)
610+
# but not if any earlier step failed or was cancelled.
611+
# See discussion at https://leanprover.zulipchat.com/#narrow/stream/287929-mathlib4/topic/Some.20files.20not.20found.20in.20the.20cache/near/407183836
612+
if: ${{ always() && needs.build.outputs.build-outcome == 'success' && needs.build.outputs.get-cache-outcome == 'success' && needs.build.outputs.cache-staging-has-files == 'true' }}
613+
steps:
614+
- name: Checkout tools branch
615+
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
616+
with:
617+
ref: ${{ inputs.tools_branch_ref != '' && inputs.tools_branch_ref || (github.repository == 'leanprover-community/mathlib4-nightly-testing' && 'nightly-testing-green' || 'master') }}
618+
fetch-depth: 1
619+
620+
- name: Configure Lean
621+
uses: leanprover/lean-action@c544e89643240c6b398f14a431bcdc6309e36b3e # v1.4.0
622+
with:
623+
auto-config: false # Don't run `lake build`, `lake test`, or `lake lint` automatically.
624+
use-github-cache: false
625+
use-mathlib-cache: false # This can be re-enabled once we are confident in the cache again.
626+
reinstall-transient-toolchain: true
627+
628+
- name: build cache executable
629+
run: |
630+
lake build cache
631+
CACHE_BIN="$(pwd)/.lake/build/bin/cache"
632+
if [ ! -x "$CACHE_BIN" ]; then
633+
echo "cache binary not found: $CACHE_BIN"
634+
exit 1
635+
fi
636+
echo "CACHE_BIN=$CACHE_BIN" >> "$GITHUB_ENV"
637+
638+
- name: Download cache staging artifact
639+
uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0
640+
with:
641+
name: cache-staging
642+
path: cache-staging
643+
644+
- name: Upload staged cache to Azure
645+
shell: bash
646+
run: |
647+
# Trim trailing whitespace from secrets to prevent issues with accidentally added spaces
648+
export MATHLIB_CACHE_SAS="${MATHLIB_CACHE_SAS_RAW%"${MATHLIB_CACHE_SAS_RAW##*[![:space:]]}"}"
649+
650+
echo "Uploading cache to Azure..."
651+
MATHLIB_CACHE_USE_CLOUDFLARE=0 lake env "$CACHE_BIN" put-staged --staging-dir="cache-staging" --repo=${{ github.event.pull_request.head.repo.full_name || github.repository }}
652+
env:
653+
MATHLIB_CACHE_SAS_RAW: ${{ secrets.MATHLIB_CACHE_SAS }}
654+
598655
post_steps:
599656
name: Post-Build Step
600-
needs: [build]
657+
needs: [build, upload_cache]
601658
runs-on: ubuntu-latest # Note these steps run on disposable GitHub runners, so no landrun sandboxing is needed.
602659
steps:
603660

@@ -674,6 +731,7 @@ jobs:
674731
675732
# We no longer run `lean4checker` in regular CI, as it is quite expensive for little benefit.
676733
# Instead we run it in a cron job on master: see `daily.yml`.
734+
677735
style_lint:
678736
name: Lint style
679737
runs-on: ubuntu-latest

Cache/Main.lean

Lines changed: 56 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -11,26 +11,34 @@ Usage: cache [OPTIONS] [COMMAND]
1111
1212
Commands:
1313
# No privilege required
14-
get [ARGS] Download linked files missing on the local cache and decompress
15-
get! [ARGS] Download all linked files and decompress
16-
get- [ARGS] Download linked files missing to the local cache, but do not decompress
17-
pack Compress non-compressed build files into the local cache
18-
pack! Compress build files into the local cache (no skipping)
19-
unpack Decompress linked already downloaded files
20-
unpack! Decompress linked already downloaded files (no skipping)
21-
clean Delete non-linked files
22-
clean! Delete everything on the local cache
23-
lookup [ARGS] Show information about cache files for the given Lean files
14+
get [ARGS] Download linked files missing on the local cache and decompress
15+
get! [ARGS] Download all linked files and decompress
16+
get- [ARGS] Download linked files missing to the local cache, but do not decompress
17+
pack Compress non-compressed build files into the local cache
18+
pack! Compress build files into the local cache (no skipping)
19+
unpack Decompress linked already downloaded files
20+
unpack! Decompress linked already downloaded files (no skipping)
21+
clean Delete non-linked files
22+
clean! Delete everything on the local cache
23+
lookup [ARGS] Show information about cache files for the given Lean files
2424
2525
# Privilege required
2626
put Run 'pack' then upload linked files missing on the server
2727
put! Run 'pack' then upload all linked files
28-
put-unpacked 'put' only files not already 'pack'ed; intended for CI use
2928
commit Write a commit on the server
3029
commit! Overwrite a commit on the server
3130
31+
# Intended for CI use
32+
unstage Copy *.ltar files from the staging directory to the local cache
33+
unstage! Copy *.ltar files from the staging directory to the local cache (overwrite existing files)
34+
stage Move files not already 'pack'ed to an output directory
35+
stage! Move all linked cache files to an output directory
36+
put-staged Upload *.ltar files from the staging directory (privilege required)
37+
put-unpacked Run 'put' only for files not already 'pack'ed (privilege required)
38+
3239
Options:
3340
--repo=OWNER/REPO Override the repository to fetch/push cache from
41+
--staging-dir=<output-directory> Required for 'stage', 'stage!', 'unstage' and 'put-staged': staging directory.
3442
3543
* Linked files refer to local cache files with corresponding Lean sources
3644
* Commands ending with '!' should be used manually, when hot-fixes are needed
@@ -61,21 +69,19 @@ See Cache/README.md for more details.
6169

6270
/-- Commands which (potentially) call `curl` for downloading files -/
6371
def curlArgs : List String :=
64-
["get", "get!", "get-", "put", "put!", "put-unpacked", "commit", "commit!"]
72+
["get", "get!", "get-", "put", "put!", "put-unpacked", "put-staged", "commit", "commit!"]
6573

6674
/-- Commands which (potentially) call `leantar` for compressing or decompressing files -/
6775
def leanTarArgs : List String :=
68-
["get", "get!", "put", "put!", "put-unpacked", "pack", "pack!", "unpack", "lookup"]
69-
70-
/-- Parses an optional `--repo` option. -/
71-
def parseRepo (args : List String) : IO (Option String × List String) := do
72-
if let arg :: args := args then
73-
if arg.startsWith "--" then
74-
if let some repo := arg.dropPrefix? "--repo=" then
75-
return (some repo.toString, args)
76-
else
77-
throw <| IO.userError s!"unknown option: {arg}"
78-
return (none, args)
76+
["get", "get!", "put", "put!", "put-unpacked", "pack", "pack!", "unpack", "lookup", "stage", "stage!"]
77+
78+
/-- Parses an optional `--foo=bar` option. -/
79+
def parseNamedOpt (opt : String) (args : List String) : IO (Option String) := do
80+
let pref := s!"--{opt}="
81+
if let some a := args.findRev? (fun a => a.startsWith pref) then
82+
let val := a.drop pref.length
83+
return some val.toString
84+
return none
7985

8086
open Cache IO Hashing Requests System in
8187
def main (args : List String) : IO Unit := do
@@ -84,7 +90,13 @@ def main (args : List String) : IO Unit := do
8490
Process.exit 0
8591
CacheM.run do
8692

87-
let (repo?, args) ← parseRepo args
93+
-- split args and named options
94+
let (options, args) := args.partition (·.startsWith "--")
95+
96+
-- parse relevant options, ignore the rest
97+
let repo? ← parseNamedOpt "repo" options
98+
let stagingDir? ← parseNamedOpt "staging-dir" options
99+
88100
let mut roots : Std.HashMap Lean.Name FilePath ← parseArgs args
89101
if roots.isEmpty then do
90102
-- No arguments means to start from `Mathlib.lean`
@@ -106,6 +118,18 @@ def main (args : List String) : IO Unit := do
106118
let put (overwrite unpackedOnly := false) := do
107119
let repo := repo?.getD MATHLIBREPO
108120
putFiles repo (← pack overwrite (verbose := true) unpackedOnly) overwrite (← getToken)
121+
let stage outDir (unpackedOnly := true) := do
122+
stageFiles outDir (← pack (verbose := true) (unpackedOnly := unpackedOnly))
123+
let unstage (overwrite := false) := do
124+
if stagingDir?.isNone then IO.println "unstage requires --staging-dir=" return else
125+
unstageFiles stagingDir?.get! overwrite
126+
let putStaged (stagingDir : FilePath) := do
127+
let repo := repo?.getD MATHLIBREPO
128+
if !(←stagingDir.isDir) then IO.println "--staging-dir must be a directory" return
129+
else
130+
let fileSet ← getFilesWithExtension stagingDir "ltar"
131+
putFilesAbsolute repo fileSet (tempConfigFilePath := stagingDir / "curl.config") (overwrite := false) (← getToken)
132+
109133
match args with
110134
| "get" :: args => get args
111135
| "get!" :: args => get args (force := true)
@@ -114,13 +138,21 @@ def main (args : List String) : IO Unit := do
114138
| ["pack!"] => discard <| pack (overwrite := true)
115139
| ["unpack"] => unpackCache hashMap false
116140
| ["unpack!"] => unpackCache hashMap true
141+
| ["unstage"] => unstage
142+
| ["unstage!"] => unstage (overwrite := true)
117143
| ["clean"] =>
118144
cleanCache <| hashMap.fold (fun acc _ hash => acc.insert <| CACHEDIR / hash.asLTar) .empty
119145
| ["clean!"] => cleanCache
120146
-- We allow arguments for `put*` so they can be added to the `roots`.
121147
| "put" :: _ => put
122148
| "put!" :: _ => put (overwrite := true)
123149
| "put-unpacked" :: _ => put (unpackedOnly := true)
150+
| "stage" :: _ => if (stagingDir?.isNone) then IO.println "stage requires --staging-dir=" return else
151+
stage stagingDir?.get!
152+
| "stage!" :: _ => if (stagingDir?.isNone) then IO.println "stage! requires --staging-dir=" return else
153+
stage stagingDir?.get! (unpackedOnly := false)
154+
| "put-staged" :: _ => if (stagingDir?.isNone) then IO.println "put-staged requires --staging-dir=" return else
155+
putStaged stagingDir?.get!
124156
| ["commit"] =>
125157
if !(← isGitStatusClean) then IO.println "Please commit your changes first" return else
126158
commit hashMap false (← getToken)

0 commit comments

Comments
 (0)