Skip to content

Commit e863269

Browse files
authored
ref(plugins/languages): indepth mode (lowlighter#1118)
1 parent 85d8187 commit e863269

File tree

15 files changed

+779
-351
lines changed

15 files changed

+779
-351
lines changed

.github/actions/spelling/allow.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
1+
gpgarmor
12
github
23
https
34
leetcode
45
pgn
6+
scm
7+
shas
58
ssh
69
ubuntu
10+
yargsparser

package-lock.json

Lines changed: 3 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,8 @@
6868
"twemoji-parser": "^14.0.0",
6969
"vue": "^2.7.1",
7070
"vue-prism-component": "^1.2.0",
71-
"xml-formatter": "^2.6.1"
71+
"xml-formatter": "^2.6.1",
72+
"yargs-parser": "^21.1.1"
7273
},
7374
"devDependencies": {
7475
"eslint": "^8.25.0",

source/app/metrics/utils.mjs

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -225,17 +225,19 @@ export async function language({filename, patch}) {
225225
}
226226

227227
/**Run command (use this to execute commands and process whole output at once, may not be suitable for large outputs) */
228-
export async function run(command, options, {prefixed = true, log = true} = {}) {
228+
export async function run(command, options, {prefixed = true, log = true, debug = true} = {}) {
229229
const prefix = {win32: "wsl"}[process.platform] ?? ""
230230
command = `${prefixed ? prefix : ""} ${command}`.trim()
231231
return new Promise((solve, reject) => {
232-
console.debug(`metrics/command/run > ${command}`)
232+
if (debug)
233+
console.debug(`metrics/command/run > ${command}`)
233234
const child = processes.exec(command, options)
234235
let [stdout, stderr] = ["", ""]
235236
child.stdout.on("data", data => stdout += data)
236237
child.stderr.on("data", data => stderr += data)
237238
child.on("close", code => {
238-
console.debug(`metrics/command/run > ${command} > exited with code ${code}`)
239+
if (debug)
240+
console.debug(`metrics/command/run > ${command} > exited with code ${code}`)
239241
if (log) {
240242
console.debug(stdout)
241243
console.debug(stderr)
@@ -246,7 +248,7 @@ export async function run(command, options, {prefixed = true, log = true} = {})
246248
}
247249

248250
/**Spawn command (use this to execute commands and process output on the fly) */
249-
export async function spawn(command, args = [], options = {}, {prefixed = true, timeout = 300 * 1000, stdout} = {}) { //eslint-disable-line max-params
251+
export async function spawn(command, args = [], options = {}, {prefixed = true, timeout = 300 * 1000, stdout, debug = true} = {}) { //eslint-disable-line max-params
250252
const prefix = {win32: "wsl"}[process.platform] ?? ""
251253
if ((prefixed) && (prefix)) {
252254
args.unshift(command)
@@ -255,15 +257,18 @@ export async function spawn(command, args = [], options = {}, {prefixed = true,
255257
if (!stdout)
256258
throw new Error("`stdout` argument was not provided, use run() instead of spawn() if processing output is not needed")
257259
return new Promise((solve, reject) => {
258-
console.debug(`metrics/command/spawn > ${command} with ${args.join(" ")}`)
260+
if (debug)
261+
console.debug(`metrics/command/spawn > ${command} with ${args.join(" ")}`)
259262
const child = processes.spawn(command, args, {...options, shell: true, timeout})
260263
const reader = readline.createInterface({input: child.stdout})
261264
reader.on("line", stdout)
262265
const closed = new Promise(close => reader.on("close", close))
263266
child.on("close", async code => {
264-
console.debug(`metrics/command/spawn > ${command} with ${args.join(" ")} > exited with code ${code}`)
267+
if (debug)
268+
console.debug(`metrics/command/spawn > ${command} with ${args.join(" ")} > exited with code ${code}`)
265269
await closed
266-
console.debug(`metrics/command/spawn > ${command} with ${args.join(" ")} > reader closed`)
270+
if (debug)
271+
console.debug(`metrics/command/spawn > ${command} with ${args.join(" ")} > reader closed`)
267272
return code === 0 ? solve() : reject()
268273
})
269274
})
@@ -372,7 +377,7 @@ export const filters = {
372377
return result
373378
},
374379
/**Repository filter*/
375-
repo(repository, patterns) {
380+
repo(repository, patterns, {debug = true} = {}) {
376381
//Disable filtering when no pattern is provided
377382
if (!patterns.length)
378383
return true
@@ -390,11 +395,12 @@ export const filters = {
390395

391396
//Basic pattern matching
392397
const include = (!patterns.includes(repo)) && (!patterns.includes(`${user}/${repo}`))
393-
console.debug(`metrics/filters/repo > filter ${repo} (${include ? "included" : "excluded"})`)
398+
if (debug)
399+
console.debug(`metrics/filters/repo > filter ${repo} (${include ? "included" : "excluded"})`)
394400
return include
395401
},
396402
/**Text filter*/
397-
text(text, patterns) {
403+
text(text, patterns, {debug = true} = {}) {
398404
//Disable filtering when no pattern is provided
399405
if (!patterns.length)
400406
return true
@@ -404,7 +410,8 @@ export const filters = {
404410

405411
//Basic pattern matching
406412
const include = !patterns.includes(text)
407-
console.debug(`metrics/filters/text > filter ${text} (${include ? "included" : "excluded"})`)
413+
if (debug)
414+
console.debug(`metrics/filters/text > filter ${text} (${include ? "included" : "excluded"})`)
408415
return include
409416
},
410417
}

source/plugins/languages/README.md

Lines changed: 42 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -236,8 +236,7 @@ It will be automatically hidden if empty.</p>
236236

237237
## 🔎 `indepth` mode
238238

239-
The default algorithm use the top languages provided of each repository you contributed to.
240-
When working in collaborative projects with a lot of people, these numbers may be less representative of your actual work.
239+
The default algorithm uses the top languages from each repository you contributed to using GitHub GraphQL API (which is similar to the displayed languages bar on github.com). When working in collaborative projects with a lot of people, these numbers may be less representative of your actual work.
241240

242241
The `plugin_languages_indepth` option lets you use a more advanced algorithm for more accurate statistics.
243242
Under the hood, it will clone your repositories, run [linguist-js](https://github.com/Nixinova/Linguist) (a JavaScript port of [GitHub linguist](https://github.com/github/linguist)) and iterate over patches matching your `commits_authoring` setting.
@@ -257,12 +256,52 @@ Since git lets you use any email and username for commits, *metrics* may not be
257256

258257
> ⚠️ This feature significantly increase workflow time
259258

260-
> ⚠️ Since this mode iterates over **each commit of each repository**, it is not suited for large code base, especially those with a large amount of commits and the ones containing binaries. While `plugin_languages_analysis_timeout` can be used to increase the default timeout for analysis, please be responsible and keep this feature disabled if it cannot work on your account to save GitHub resources and our planet 🌏
259+
> ⚠️ Since this mode iterates over **each matching commit of each repository**, it is not suited for large code base, especially those with a large amount of commits and the ones containing binaries. While `plugin_languages_analysis_timeout` and `plugin_languages_analysis_timeout_repositories` can be used to increase the default timeout for analysis, please be responsible and keep this feature disabled if it cannot work on your account to save GitHub resources and our planet 🌏
261260

262261
> ⚠️ Although *metrics* does not send any code to external sources, repositories are temporarily cloned on the GitHub Action runner. It is advised to keep this option disabled when working with sensitive data or company code. Use at your own risk, *metrics* and its authors **cannot** be held responsible for any resulting code leaks. Source code is available for auditing at [analyzers.mjs](/source/plugins/languages/analyzers.mjs).
263262

264263
> 🌐 Web instances must enable this feature in `settings.json`
265264

265+
Below is a summary of the process used to compute indepth statistics:
266+
267+
## Most used mode
268+
269+
1. Fetch GPG keys linked to your GitHub account
270+
- automatically add attached emails to `commits_authoring`
271+
- *web-flow* (GitHub's public key for changes made through web-ui) is also fetched
272+
2. Import GPG keys so they can be used to verify commits later
273+
3. Iterate through repositories
274+
- early break if `plugin_languages_analysis_timeout` is reached
275+
- skip repository if it matches `plugin_languages_skipped`
276+
- include repositories from `plugin_languages_indepth_custom`
277+
- a specific branch and commit range can be used
278+
- a source other than github.com can be used
279+
4. Clone repository
280+
- target branch is checkout
281+
5. List of authored commits is computed
282+
- using `git log --author` and `commits_authoring` to search in commit headers
283+
- using `git log --grep` and `commits_authoring` to search in commit body
284+
- ensure these are within the range specified by `plugin_languages_indepth_custom` (if applicable)
285+
6. Process authored commits
286+
- early break if `plugin_languages_analysis_timeout_repositories` is reached
287+
- using `git verify-commit` to check authenticity against imported GPG keys
288+
- using `git log --patch` to extract added/deleted lines/bytes from each file
289+
- using [GitHub linguist](https://github.com/github/linguist) ([linguist-js](https://github.com/Nixinova/LinguistJS)) to detect language for each file
290+
- respect `plugin_languages_categories` option
291+
- if a file has since been deleted or moved, checkout on the last commit file was present and run linguist again
292+
7. Aggregate results
293+
294+
## Recently used mode
295+
296+
1. Fetch push events linked to your account (or target repository)
297+
- matching `plugin_languages_recent_load` and `plugin_languages_recent_days` options
298+
- matching committer emails from `commits_authoring`
299+
2. Process authored commits
300+
- using [GitHub linguist](https://github.com/github/linguist) ([linguist-js](https://github.com/Nixinova/LinguistJS)) to detect language for each file
301+
- respect `plugin_languages_recent_categories` option
302+
- directly pass file content rather than performing I/O and simulating a git repository
303+
3. Aggregate results
304+
266305
## 📅 Recently used languages
267306

268307
This feature uses a similar algorithm as `indepth` mode, but uses patches from your events feed instead.
Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
//Imports
2+
import fs from "fs/promises"
3+
import os from "os"
4+
import paths from "path"
5+
import git from "simple-git"
6+
import {filters} from "../../../app/metrics/utils.mjs"
7+
8+
/**Analyzer */
9+
export class Analyzer {
10+
11+
/**Constructor */
12+
constructor(login, {account = "bypass", authoring = [], uid = Math.random(), shell, rest = null, context = {mode:"user"}, skipped = [], categories = ["programming", "markup"], timeout = {global:NaN, repositories:NaN}}) {
13+
//User informations
14+
this.login = login
15+
this.account = account
16+
this.authoring = authoring
17+
this.uid = uid
18+
this.gpg = []
19+
20+
//Utilities
21+
this.shell = shell
22+
this.rest = rest
23+
this.context = context
24+
this.markers = {
25+
hash:/\b[0-9a-f]{40}\b/,
26+
file:/^[+]{3}\sb[/](?<file>[\s\S]+)$/,
27+
line:/^(?<op>[-+])\s*(?<content>[\s\S]+)$/,
28+
}
29+
this.parser = /^(?<login>[\s\S]+?)\/(?<name>[\s\S]+?)(?:@(?<branch>[\s\S]+?)(?::(?<ref>[\s\S]+))?)?$/
30+
this.consumed = false
31+
32+
//Options
33+
this.skipped = skipped
34+
this.categories = categories
35+
this.timeout = timeout
36+
37+
//Results
38+
this.results = {partial: {global:false, repositories:false}, total: 0, lines: {}, stats: {}, colors: {}, commits: 0, files: 0, missed: {lines: 0, bytes: 0, commits: 0}, elapsed:0}
39+
this.debug(`instantiated a new ${this.constructor.name}`)
40+
}
41+
42+
/**Run analyzer */
43+
async run(runner) {
44+
if (this.consumed)
45+
throw new Error("This analyzer has already been consumed, another instance needs to be created to perform a new analysis")
46+
this.consumed = true
47+
const results = await new Promise(async solve => {
48+
let completed = false
49+
if (Number.isFinite(this.timeout.global)) {
50+
this.debug(`timeout set to ${this.timeout.global}m`)
51+
setTimeout(() => {
52+
if (!completed) {
53+
try {
54+
this.debug(`reached maximum execution time of ${this.timeout.global}m for analysis`)
55+
this.results.partial.global = true
56+
solve(this.results)
57+
}
58+
catch {
59+
//Ignore errors
60+
}
61+
}
62+
}, this.timeout.global * 60 * 1000)
63+
}
64+
await runner()
65+
completed = true
66+
solve(this.results)
67+
})
68+
results.partial = (results.partial.global)||(results.partial.repositories)
69+
return results
70+
}
71+
72+
/**Parse repository */
73+
parse(repository) {
74+
let branch = null, ref = null
75+
if (typeof repository === "string") {
76+
if (!this.parser.test(repository))
77+
throw new TypeError(`"${repository}" pattern is not supported`)
78+
const {login, name, ...groups} = repository.match(this.parser)?.groups ?? {}
79+
repository = {owner:{login}, name}
80+
branch = groups.branch ?? null
81+
ref = groups.ref ?? null
82+
}
83+
const repo = `${repository.owner.login}/${repository.name}`
84+
const path = paths.join(os.tmpdir(), `${this.uid}-${repo.replace(/[^\w]/g, "_")}`)
85+
return {repo, path, branch, ref}
86+
}
87+
88+
/**Clone a repository */
89+
async clone(repository) {
90+
const {repo, branch, path} = this.parse(repository)
91+
let url = /^https?:\/\//.test(repo) ? repo : `https://github.com/${repo}`
92+
try {
93+
this.debug(`cloning ${url} to ${path}`)
94+
await fs.rm(path, {recursive: true, force: true})
95+
await fs.mkdir(path, {recursive: true})
96+
await git(path).clone(url, ".", ["--single-branch"]).status()
97+
this.debug(`cloned ${url} to ${path}`)
98+
if (branch) {
99+
this.debug(`switching to branch ${branch} for ${repo}`)
100+
await git(path).branch(branch)
101+
}
102+
return true
103+
}
104+
catch (error) {
105+
this.debug(`failed to clone ${url} (${error})`)
106+
this.clean(path)
107+
return false
108+
}
109+
}
110+
111+
/**Analyze a repository */
112+
async analyze(path, {commits = []} = {}) {
113+
const cache = {files:{}, languages:{}}
114+
const start = Date.now()
115+
let elapsed = 0, processed = 0
116+
if (this.timeout.repositories)
117+
this.debug(`timeout for repository analysis set to ${this.timeout.repositories}m`)
118+
for (const commit of commits) {
119+
elapsed = (Date.now() - start)/1000/60
120+
if ((this.timeout.repositories)&&(elapsed > this.timeout.repositories)) {
121+
this.results.partial.repositories = true
122+
this.debug(`reached maximum execution time of ${this.timeout.repositories}m for repository analysis (${elapsed}m elapsed)`)
123+
break
124+
}
125+
try {
126+
const {total, files, missed, lines, stats} = await this.linguist(path, {commit, cache})
127+
this.results.commits++
128+
this.results.total += total
129+
this.results.files += files
130+
this.results.missed.lines += missed.lines
131+
this.results.missed.bytes += missed.bytes
132+
for (const language in lines) {
133+
if (this.categories.includes(cache.languages[language]?.type))
134+
this.results.lines[language] = (this.results.lines[language] ?? 0) + lines[language]
135+
}
136+
for (const language in stats) {
137+
if (this.categories.includes(cache.languages[language]?.type))
138+
this.results.stats[language] = (this.results.stats[language] ?? 0) + stats[language]
139+
}
140+
}
141+
catch (error) {
142+
this.debug(`skipping commit ${commit.sha} (${error})`)
143+
this.results.missed.commits++
144+
}
145+
finally {
146+
this.results.elapsed += elapsed
147+
processed++
148+
if ((processed%50 === 0)||(processed === commits.length))
149+
this.debug(`at commit ${processed}/${commits.length} (${(100*processed/commits.length).toFixed(2)}%, ${elapsed.toFixed(2)}m elapsed)`)
150+
}
151+
}
152+
this.results.colors = Object.fromEntries(Object.entries(cache.languages).map(([lang, {color}]) => [lang, color]))
153+
}
154+
155+
/**Clean a path */
156+
async clean(path) {
157+
try {
158+
this.debug(`cleaning ${path}`)
159+
await fs.rm(path, {recursive: true, force: true})
160+
this.debug(`cleaned ${path}`)
161+
return true
162+
}
163+
catch (error) {
164+
this.debug(`failed to clean (${error})`)
165+
return false
166+
}
167+
}
168+
169+
/**Whether to skip a repository or not */
170+
ignore(repository) {
171+
const ignored = !filters.repo(repository, this.skipped)
172+
if (ignored)
173+
this.debug(`skipping ${typeof repository === "string" ? repository : `${repository?.owner?.login}/${repository?.name}`} as it matches skipped repositories`)
174+
return ignored
175+
}
176+
177+
/**Debug log */
178+
debug(message) {
179+
return console.debug(`metrics/compute/${this.login}/plugins > languages > ${this.constructor.name.replace(/([a-z])([A-Z])/, (_, a, b) => `${a} ${b.toLocaleLowerCase()}`).toLocaleLowerCase()} > ${message}`)
180+
}
181+
182+
}

0 commit comments

Comments
 (0)