@@ -7,6 +7,7 @@ Authors: Arthur Paulino
77import Lean.Data.HashMap
88import Lean.Data.RBMap
99import Lean.Data.RBTree
10+ import Lean.Data.Json.Printer
1011
1112/-- Removes a parent path from the beginning of a path -/
1213def System.FilePath.withoutParent (path parent : FilePath) : FilePath :=
@@ -16,8 +17,15 @@ def System.FilePath.withoutParent (path parent : FilePath) : FilePath :=
1617 | x, [] => x
1718 mkFilePath $ aux path.components parent.components
1819
19- def UInt64.asTarGz (n : UInt64) : String :=
20- s! "{ n} .tar.gz"
20+ def Nat.toHexDigits (n : Nat) : Nat → (res : String := "" ) → String
21+ | 0 , s => s
22+ | len+1 , s =>
23+ let b := UInt8.ofNat (n >>> (len * 8 ))
24+ Nat.toHexDigits n len <|
25+ s.push (Nat.digitChar (b >>> 4 ).toNat) |>.push (Nat.digitChar (b &&& 15 ).toNat)
26+
27+ def UInt64.asLTar (n : UInt64) : String :=
28+ s! "{ Nat.toHexDigits n.toNat 8 } .ltar"
2129
2230namespace Cache.IO
2331
@@ -51,12 +59,23 @@ def CURLBIN :=
5159 -- change file name if we ever need a more recent version to trigger re-download
5260 IO.CACHEDIR / s! "curl-{ CURLVERSION} "
5361
62+ /-- leantar version at https://github.com/digama0/leangz -/
63+ def LEANTARVERSION :=
64+ "0.1.1"
65+
66+ def LEANTARBIN :=
67+ -- change file name if we ever need a more recent version to trigger re-download
68+ IO.CACHEDIR / s! "leantar-{ LEANTARVERSION} "
69+
5470def LAKEPACKAGESDIR : FilePath :=
5571 ⟨"lake-packages" ⟩
5672
5773def getCurl : IO String := do
5874 return if (← CURLBIN.pathExists) then CURLBIN.toString else "curl"
5975
76+ def getLeanTar : IO String := do
77+ return if (← LEANTARBIN.pathExists) then LEANTARBIN.toString else "leantar"
78+
6079abbrev PackageDirs := Lean.RBMap String FilePath compare
6180
6281/-- Whether this is running on Mathlib repo or not -/
@@ -136,6 +155,44 @@ def validateCurl : IO Bool := do
136155 | _ => throw $ IO.userError "Invalidly formatted version of `curl`"
137156 | _ => throw $ IO.userError "Invalidly formatted response from `curl --version`"
138157
158+ def Version := Nat × Nat × Nat
159+ deriving Inhabited, DecidableEq
160+
161+ instance : Ord Version := let _ := @lexOrd; lexOrd
162+ instance : LE Version := leOfOrd
163+
164+ def parseVersion (s : String) : Option Version := do
165+ let [maj, min, patch] := s.splitOn "." | none
166+ some (maj.toNat!, min.toNat!, patch.toNat!)
167+
168+ def validateLeanTar : IO Unit := do
169+ if (← LEANTARBIN.pathExists) then return
170+ if let some version ← some <$> runCmd "leantar" #["--version" ] <|> pure none then
171+ let "leantar" :: v :: _ := version.splitOn " "
172+ | throw $ IO.userError "Invalidly formatted response from `leantar --version`"
173+ let some v := parseVersion v | throw $ IO.userError "Invalidly formatted version of `leantar`"
174+ -- currently we need exactly one version of leantar, change this to reflect compatibility
175+ if v = (parseVersion LEANTARVERSION).get! then return
176+ let win := System.Platform.getIsWindows ()
177+ let target ← if win then
178+ pure "x86_64-pc-windows-msvc"
179+ else
180+ let arch ← (·.trim) <$> runCmd "uname" #["-m" ] false
181+ unless arch ∈ ["x86_64" , "aarch64" ] do
182+ throw $ IO.userError s! "unsupported architecture { arch} "
183+ pure <|
184+ if System.Platform.getIsOSX () then s! "{ arch} -apple-darwin"
185+ else s! "{ arch} -unknown-linux-gnu"
186+ IO.println s! "leantar is too old; downloading more recent version"
187+ IO.FS.createDirAll IO.CACHEDIR
188+ let ext := if win then "zip" else "tar.xz"
189+ let _ ← runCmd "curl" #[
190+ s! "https://github.com/digama0/leangz/releases/download/v{ LEANTARVERSION} /leantar-v{ LEANTARVERSION} -{ target} .{ ext} " ,
191+ "-L" , "-o" , s! "{ LEANTARBIN} .{ ext} " ]
192+ let _ ← runCmd "tar" #["-xf" , s! "{ LEANTARBIN} .{ ext} " ,
193+ "-C" , IO.CACHEDIR.toString, "--strip-components=1" ]
194+ let _ ← runCmd "mv" #[(IO.CACHEDIR / s! "leantar" ).toString, LEANTARBIN.toString]
195+
139196/-- Recursively gets all files from a directory with a certain extension -/
140197partial def getFilesWithExtension
141198 (fp : FilePath) (extension : String) (acc : Array FilePath := #[]) :
@@ -150,7 +207,7 @@ namespace HashMap
150207
151208def filter (hashMap : HashMap) (set : Lean.RBTree String compare) (keep : Bool) : HashMap :=
152209 hashMap.fold (init := default) fun acc path hash =>
153- let contains := set.contains hash.asTarGz
210+ let contains := set.contains hash.asLTar
154211 let add := if keep then contains else !contains
155212 if add then acc.insert path hash else acc
156213
@@ -169,9 +226,9 @@ Each build file also has a `Bool` indicating whether that file is required for c
169226def mkBuildPaths (path : FilePath) : IO $ Array (FilePath × Bool) := do
170227 let packageDir ← getPackageDir path
171228 return #[
229+ (packageDir / LIBDIR / path.withExtension "trace" , true ),
172230 (packageDir / LIBDIR / path.withExtension "olean" , true ),
173231 (packageDir / LIBDIR / path.withExtension "ilean" , true ),
174- (packageDir / LIBDIR / path.withExtension "trace" , true ),
175232 (packageDir / IRDIR / path.withExtension "c" , true ),
176233 (packageDir / LIBDIR / path.withExtension "extra" , false )]
177234
@@ -185,21 +242,25 @@ def allExist (paths : Array (FilePath × Bool)) : IO Bool := do
185242def packCache (hashMap : HashMap) (overwrite : Bool) : IO $ Array String := do
186243 mkDir CACHEDIR
187244 IO.println "Compressing cache"
188- let mut acc := default
245+ let mut acc := #[]
246+ let mut tasks := #[]
189247 for (path, hash) in hashMap.toList do
190- let zip := hash.asTarGz
248+ let zip := hash.asLTar
191249 let zipPath := CACHEDIR / zip
192250 let buildPaths ← mkBuildPaths path
193251 if ← allExist buildPaths then
194252 if overwrite || !(← zipPath.pathExists) then
195- discard $ runCmd "tar" $ #["-I" , "gzip -9" , "-cf" , zipPath.toString] ++
196- ((← buildPaths.filterM (·.1 .pathExists)) |>.map (·.1 .toString))
253+ tasks := tasks.push <| ← IO.asTask do
254+ runCmd (← getLeanTar) $ #[zipPath.toString] ++
255+ ((← buildPaths.filterM (·.1 .pathExists)) |>.map (·.1 .toString))
197256 acc := acc.push zip
257+ for task in tasks do
258+ _ ← IO.ofExcept task.get
198259 return acc
199260
200261/-- Gets the set of all cached files -/
201262def getLocalCacheSet : IO $ Lean.RBTree String compare := do
202- let paths ← getFilesWithExtension CACHEDIR "gz "
263+ let paths ← getFilesWithExtension CACHEDIR "ltar "
203264 return .fromList (paths.data.map (·.withoutParent CACHEDIR |>.toString)) _
204265
205266def isPathFromMathlib (path : FilePath) : Bool :=
@@ -215,19 +276,22 @@ def unpackCache (hashMap : HashMap) : IO Unit := do
215276 let hashMap := hashMap.filter (← getLocalCacheSet) true
216277 let size := hashMap.size
217278 if size > 0 then
279+ let now ← IO.monoMsNow
218280 IO.println s! "Decompressing { size} file(s)"
219281 let isMathlibRoot ← isMathlibRoot
220- hashMap.forM fun path hash => do
221- let _ ← IO.asTask do
222- match path.parent with
223- | none | some path => do
224- let packageDir ← getPackageDir path
225- mkDir $ packageDir / LIBDIR / path
226- mkDir $ packageDir / IRDIR / path
282+ let child ← IO.Process.spawn
283+ { cmd := ← getLeanTar, args := #["-x" , "-j" , "-" ], stdin := .piped }
284+ let (stdin, child) ← child.takeStdin
285+ let config : Array Lean.Json := hashMap.fold (init := #[]) fun config path hash =>
286+ let pathStr := s! "{ CACHEDIR / hash.asLTar} "
227287 if isMathlibRoot || !isPathFromMathlib path then
228- runCmd "tar" #[ "-xzf" , s! " { CACHEDIR / hash.asTarGz } " ]
288+ config.push <| .str pathStr
229289 else -- only mathlib files, when not in the mathlib4 repo, need to be redirected
230- runCmd "tar" #["-xzf" , s! "{ CACHEDIR / hash.asTarGz} " , "-C" , mathlibDepPath.toString]
290+ config.push <| .mkObj [("file" , pathStr), ("base" , mathlibDepPath.toString)]
291+ stdin.putStr <| Lean.Json.compress <| .arr config
292+ let exitCode ← child.wait
293+ if exitCode != 0 then throw $ IO.userError s! "leantar failed with error code { exitCode} "
294+ IO.println s! "unpacked in { (← IO.monoMsNow) - now} ms"
231295 else IO.println "No cache files to decompress"
232296
233297/-- Retrieves the azure token from the environment -/
@@ -241,7 +305,7 @@ instance : Ord FilePath where
241305
242306/-- Removes all cache files except for what's in the `keep` set -/
243307def cleanCache (keep : Lean.RBTree FilePath compare := default) : IO Unit := do
244- for path in ← getFilesWithExtension CACHEDIR "gz " do
308+ for path in ← getFilesWithExtension CACHEDIR "ltar " do
245309 if !keep.contains path then IO.FS.removeFile path
246310
247311end Cache.IO
0 commit comments