diff --git a/repository-integrity-ledger/README.md b/repository-integrity-ledger/README.md new file mode 100644 index 0000000..647f0fb --- /dev/null +++ b/repository-integrity-ledger/README.md @@ -0,0 +1,70 @@ +# Repository Integrity Ledger + +Self-contained project repository and version-control milestone for [SCIBASE.AI issue #10](https://github.com/SCIBASE-AI/SCIBASE.AI/issues/10). + +The issue asks for scientific project repositories with manifests, versioning, forks, merge requests, reproducibility checks, citations, and exports. This module provides a deterministic integrity ledger that reviewers can run locally without external Git, DOI, or storage services. + +## What It Adds + +- Repository component manifest for manuscript, data, code, notebooks, results, protocols, and metadata. +- Content hashes and Git LFS-style large-file candidate detection. +- Parent-linked commit records with changed components and commit hashes. +- Semantic tags with DOI metadata and citation generation. +- Fork attribution records preserving source DOI, authors, base commit, and fork hash. +- Merge-request evaluation for source/target existence, approvals, discussions, and reproducibility blockers. +- Branch protection report with protected-branch counts, required reviews, required status checks, force-push policy, stale branch detection, and head-commit validation. +- Editor/diff summary for scientific text, Jupyter notebooks, structured data, code-aware diffs, and rollback commands. +- Reproducibility run status with environment, check pass rate, and hash. +- Release readiness packet with required component, semantic tag, DOI, reproducibility, dataset-diff risk, export API, and CLI gates. +- Export bundle with manifest, reproducibility summary, API route list, CLI commands, and bundle hash. +- Sample repository fixture, tests, requirement map, CLI demo, and short demo GIF. + +## Run + +```bash +cd repository-integrity-ledger +npm run check +npm test +npm run demo +``` + +Expected demo shape: + +```json +{ + "repository": "Coastal flooding microbiome atlas", + "missingComponents": [], + "lfsComponents": ["data/samples.csv"], + "reproducibility": "passed", + "mergeableRequests": 1, + "branchProtection": { + "protectedBranchCount": 1, + "blockedBranchCount": 1 + }, + "editorModes": ["scientific-text", "structured-data", "code-aware"], + "rollbackCommand": "scibase restore repo-flood-microbiome --commit commit-2", + "releaseReadiness": { + "status": "ready", + "datasetDiffRisk": { + "mediumRiskCount": 1 + } + }, + "bundleHash": "..." +} +``` + +## Demo Artifact + +See [docs/demo.gif](docs/demo.gif) for a short visual walkthrough. The SVG source is included at [docs/demo.svg](docs/demo.svg). + +## Files + +- `src/repository-ledger.js` - manifests, commits, tags, forks, merge requests, branch protection, reproducibility, release readiness, citation, exports. +- `data/sample-repository.json` - reviewable scientific repository fixture. +- `test/repository-ledger.test.js` - dependency-free Node tests. +- `scripts/demo.js` - CLI demo. +- `docs/issue-10-requirement-map.md` - maps the implementation to bounty requirements. + +## AI-Assisted Disclosure + +This contribution was produced with AI assistance and manually verified with the local commands above. diff --git a/repository-integrity-ledger/data/sample-repository.json b/repository-integrity-ledger/data/sample-repository.json new file mode 100644 index 0000000..d00c1d0 --- /dev/null +++ b/repository-integrity-ledger/data/sample-repository.json @@ -0,0 +1,124 @@ +{ + "id": "repo-flood-microbiome", + "title": "Coastal flooding microbiome atlas", + "metadata": { + "doi": "10.5555/scibase.flood.microbiome", + "year": 2026, + "authors": ["Principal Investigator", "Data Analyst"], + "funding": ["Northstar Climate Health Fund"], + "tags": ["microbiome", "flooding", "reproducibility"] + }, + "components": [ + { "id": "c-manuscript", "kind": "manuscript", "path": "manuscript/main.md", "content": "# Coastal flooding microbiome atlas", "sizeBytes": 12000 }, + { + "id": "c-data", + "kind": "data", + "path": "data/samples.csv", + "content": "sample,diversity", + "sizeBytes": 25000000, + "diffStats": { + "rowsAdded": 48, + "rowsRemoved": 2, + "schemaChanged": false, + "hashChanged": true + } + }, + { "id": "c-code", "kind": "code", "path": "code/analyze.py", "content": "print('analysis')", "sizeBytes": 2000 }, + { "id": "c-notebook", "kind": "notebooks", "path": "notebooks/run_analysis.ipynb", "content": "{}", "sizeBytes": 8000 }, + { "id": "c-results", "kind": "results", "path": "results/figure1.png", "content": "binary", "sizeBytes": 400000 }, + { "id": "c-protocol", "kind": "protocols", "path": "protocols/sequencing.md", "content": "Protocol v2.1", "sizeBytes": 3000 }, + { "id": "c-metadata", "kind": "metadata", "path": "metadata.json", "content": "{\"doi\":\"10.5555/scibase.flood.microbiome\"}", "sizeBytes": 1000 } + ], + "commits": [ + { + "id": "commit-1", + "parentId": null, + "authorId": "u-owner", + "message": "Initial repository import", + "changedComponents": ["c-manuscript", "c-data", "c-code"], + "createdAt": "2026-05-01T00:00:00Z", + "commitHash": "initialhash" + }, + { + "id": "commit-2", + "parentId": "commit-1", + "authorId": "u-analyst", + "message": "Add reproducibility outputs", + "changedComponents": ["c-notebook", "c-results", "c-protocol", "c-metadata"], + "createdAt": "2026-05-07T00:00:00Z", + "commitHash": "outputhash" + } + ], + "branches": [ + { + "id": "main", + "headCommitId": "commit-2", + "statusChecks": [ + { "id": "reproducibility", "status": "passed" }, + { "id": "metadata", "status": "passed" } + ] + }, + { + "id": "hypothesis-moisture", + "headCommitId": "commit-1", + "statusChecks": [ + { "id": "reproducibility", "status": "passed" }, + { "id": "metadata", "status": "passed" } + ] + } + ], + "tags": [ + { + "id": "preprint-v1", + "version": "preprint-v1.0", + "label": "Initial preprint", + "commitId": "commit-2", + "doi": "10.5555/scibase.flood.microbiome.v1", + "tagHash": "taghash" + } + ], + "forks": [ + { + "id": "fork-1", + "sourceRepositoryId": "repo-flood-microbiome", + "forkRepositoryId": "repo-flood-microbiome-reanalysis", + "createdBy": "u-reviewer", + "baseCommitId": "commit-2" + } + ], + "mergeRequests": [ + { + "id": "mr-1", + "sourceBranchId": "hypothesis-moisture", + "targetBranchId": "main", + "sourceCommitId": "commit-2", + "targetCommitId": "commit-1", + "changedComponents": [{ "kind": "code", "id": "c-code" }], + "reviews": [{ "reviewerId": "u-reviewer", "state": "approved" }], + "discussions": [{ "id": "disc-1", "body": "Looks reproducible." }] + } + ], + "reproducibilityRuns": [ + { + "id": "run-1", + "environment": "docker://python:3.12", + "checks": [ + { "id": "environment", "status": "passed" }, + { "id": "notebook-execution", "status": "passed" }, + { "id": "results-hash", "status": "passed" } + ] + } + ], + "releasePolicy": { + "requiredTagId": "preprint-v1", + "datasetRemovalWarnThreshold": 100 + }, + "branchProtection": { + "asOf": "2026-05-14T00:00:00Z", + "protectedBranches": ["main"], + "requiredReviews": 1, + "requiredStatusChecks": ["reproducibility", "metadata"], + "allowForcePushes": false, + "staleAfterDays": 7 + } +} diff --git a/repository-integrity-ledger/docs/demo.gif b/repository-integrity-ledger/docs/demo.gif new file mode 100644 index 0000000..67b94f1 Binary files /dev/null and b/repository-integrity-ledger/docs/demo.gif differ diff --git a/repository-integrity-ledger/docs/demo.mp4 b/repository-integrity-ledger/docs/demo.mp4 new file mode 100644 index 0000000..ab14cf7 Binary files /dev/null and b/repository-integrity-ledger/docs/demo.mp4 differ diff --git a/repository-integrity-ledger/docs/demo.svg b/repository-integrity-ledger/docs/demo.svg new file mode 100644 index 0000000..5fb8be1 --- /dev/null +++ b/repository-integrity-ledger/docs/demo.svg @@ -0,0 +1,33 @@ + + Repository Integrity Ledger Demo + Visual demo for scientific repository manifests, commits, reproducibility, citations, and export bundles. + + + Repository Integrity Ledger + Manifests · commits · reproducibility · citations · exports + + Manifest + 7 / 7 + required components present + + Reproducibility + passed + Docker run checks passed + + Export + ready + API + CLI bundle hash + + Citation + Principal Investigator, Data Analyst (2026). Coastal flooding microbiome atlas... + Large data component is flagged for Git LFS-style storage. + diff --git a/repository-integrity-ledger/docs/issue-10-requirement-map.md b/repository-integrity-ledger/docs/issue-10-requirement-map.md new file mode 100644 index 0000000..4a96318 --- /dev/null +++ b/repository-integrity-ledger/docs/issue-10-requirement-map.md @@ -0,0 +1,30 @@ +# Issue #10 Requirement Map + +This module is a deterministic milestone for SCIBASE issue #10, Project Repository & Version Control. It focuses on repository manifests, content integrity, commit/tag metadata, forks, merge requests, reproducibility checks, release readiness, citations, and export bundles. + +| Issue requirement | Implementation | +| --- | --- | +| Repository structure and components | `buildComponentManifest()` validates manuscript, data, code, notebooks, results, protocols, and metadata components. | +| Git LFS and content integrity | Components receive hashes and large data files are marked as LFS candidates. | +| Version control and commit history | `createCommit()` creates parent-linked commits with changed component lists and commit hashes. | +| Semantic versioning and tags | `createSemanticTag()` attaches semantic tags, DOI metadata, and tag hashes to commits. | +| Forking and attribution | `buildForkRecord()` records source repository, source DOI, authors, base commit, and fork hash. | +| Merge requests and review | `evaluateMergeRequest()` checks source/target commits, discussions, approvals, and data-change reproducibility blockers. | +| Branch protection and review gates | `buildBranchProtectionReport()` reports protected branches, required reviews, required status checks, force-push policy, stale branch blockers, and unknown-head blockers. | +| In-browser editors and diffs | `buildEditorDiffSummary()` maps components to scientific text, Jupyter notebook, structured-data, code-aware, or hash-only editor/diff modes. | +| Visual revision timeline and rollback | `buildEditorDiffSummary()` emits a commit rollback timeline with deterministic restore commands. | +| Reproducibility pipelines | `evaluateReproducibility()` reports execution environment, check pass rate, status, and reproducibility hash. | +| Dataset diffs and release gates | `buildDatasetDiffSummary()` and `buildReleaseReadiness()` report data change risk plus required component, semantic tag, DOI, reproducibility, export API, and CLI gates before release. | +| Repository identifiers and citation | `generateCitation()` produces APA and BibTeX-style citations from repository/tag metadata. | +| Programmatic access and export | `buildExportBundle()` emits API routes, CLI commands, manifest, reproducibility status, and export bundle hash. | +| Reviewer demo | `npm run demo` prints manifest, LFS components, reproducibility status, mergeability, release gates, citation, and bundle hash. | + +## Verification + +```bash +npm run check +npm test +npm run demo +``` + +The module is dependency-free and isolated under `repository-integrity-ledger/`. diff --git a/repository-integrity-ledger/package.json b/repository-integrity-ledger/package.json new file mode 100644 index 0000000..9a630ae --- /dev/null +++ b/repository-integrity-ledger/package.json @@ -0,0 +1,12 @@ +{ + "name": "scibase-repository-integrity-ledger", + "version": "0.1.0", + "private": true, + "description": "Scientific repository integrity and version-control module for SCIBASE issue #10.", + "type": "commonjs", + "scripts": { + "check": "node --check src/repository-ledger.js && node --check scripts/demo.js && node --check test/repository-ledger.test.js", + "demo": "node scripts/demo.js", + "test": "node test/repository-ledger.test.js" + } +} diff --git a/repository-integrity-ledger/scripts/demo.js b/repository-integrity-ledger/scripts/demo.js new file mode 100644 index 0000000..472771c --- /dev/null +++ b/repository-integrity-ledger/scripts/demo.js @@ -0,0 +1,47 @@ +"use strict"; + +const repository = require("../data/sample-repository.json"); +const { buildRepositoryIntegrityPacket } = require("../src/repository-ledger"); + +const packet = buildRepositoryIntegrityPacket(repository); + +console.log( + JSON.stringify( + { + repository: packet.repository.title, + missingComponents: packet.manifest.missingRequiredKinds, + lfsComponents: packet.manifest.components.filter((component) => component.lfs).map((component) => component.path), + reproducibility: packet.reproducibility.status, + mergeableRequests: packet.mergeRequests.filter((request) => request.mergeable).length, + branchProtection: { + protectedBranchCount: packet.branchProtection.protectedBranchCount, + readyBranchCount: packet.branchProtection.readyBranchCount, + blockedBranchCount: packet.branchProtection.blockedBranchCount, + branches: packet.branchProtection.branches.map((branch) => ({ + branchId: branch.branchId, + status: branch.status, + blockers: branch.blockers, + statusChecks: branch.statusChecks, + })), + }, + editorModes: packet.editorDiff.componentEditors.map((item) => item.editorMode), + rollbackCommand: packet.editorDiff.rollbackTimeline.at(-1).rollbackCommand, + releaseReadiness: { + status: packet.releaseReadiness.status, + gates: packet.releaseReadiness.gates.map((gate) => ({ + id: gate.id, + status: gate.status, + })), + datasetDiffRisk: { + highRiskCount: packet.releaseReadiness.datasetDiffs.highRiskCount, + mediumRiskCount: packet.releaseReadiness.datasetDiffs.mediumRiskCount, + }, + releaseHash: packet.releaseReadiness.releaseHash, + }, + citation: packet.citations.apa, + bundleHash: packet.exportBundle.bundleHash, + }, + null, + 2, + ), +); diff --git a/repository-integrity-ledger/src/repository-ledger.js b/repository-integrity-ledger/src/repository-ledger.js new file mode 100644 index 0000000..c4e251b --- /dev/null +++ b/repository-integrity-ledger/src/repository-ledger.js @@ -0,0 +1,504 @@ +"use strict"; + +const crypto = require("crypto"); + +const REQUIRED_COMPONENTS = [ + "manuscript", + "data", + "code", + "notebooks", + "results", + "protocols", + "metadata", +]; + +function asArray(value) { + return Array.isArray(value) ? value : []; +} + +function hashRecord(value) { + return crypto.createHash("sha256").update(JSON.stringify(value)).digest("hex").slice(0, 20); +} + +function normalizeRepository(repository) { + if (!repository || typeof repository !== "object") { + throw new TypeError("repository must be an object"); + } + return { + id: repository.id || "repo-unknown", + title: repository.title || "Untitled scientific repository", + metadata: repository.metadata || {}, + components: asArray(repository.components), + commits: asArray(repository.commits), + branches: asArray(repository.branches), + tags: asArray(repository.tags), + forks: asArray(repository.forks), + mergeRequests: asArray(repository.mergeRequests), + reproducibilityRuns: asArray(repository.reproducibilityRuns), + releasePolicy: repository.releasePolicy || {}, + branchProtection: repository.branchProtection || {}, + }; +} + +function buildComponentManifest(repositoryInput) { + const repository = normalizeRepository(repositoryInput); + const components = repository.components.map((component) => ({ + id: component.id, + kind: component.kind, + path: component.path, + sizeBytes: Number(component.sizeBytes || 0), + lfs: Number(component.sizeBytes || 0) > 10_000_000 || Boolean(component.lfs), + hash: component.hash || hashRecord({ + kind: component.kind, + path: component.path, + content: component.content || "", + sizeBytes: component.sizeBytes || 0, + }), + schema: component.schema || null, + })); + const presentKinds = new Set(components.map((component) => component.kind)); + + return { + repositoryId: repository.id, + components, + missingRequiredKinds: REQUIRED_COMPONENTS.filter((kind) => !presentKinds.has(kind)), + manifestHash: hashRecord({ repositoryId: repository.id, components }), + }; +} + +function createCommit(repositoryInput, commitInput) { + const repository = normalizeRepository(repositoryInput); + const parent = repository.commits.at(-1); + const changedComponents = asArray(commitInput.changedComponents); + const commit = { + id: commitInput.id || `commit-${repository.commits.length + 1}`, + parentId: commitInput.parentId || (parent && parent.id) || null, + authorId: commitInput.authorId, + message: commitInput.message || "Update scientific repository", + changedComponents, + createdAt: commitInput.createdAt || new Date().toISOString(), + }; + commit.commitHash = hashRecord({ + parentId: commit.parentId, + authorId: commit.authorId, + message: commit.message, + changedComponents, + createdAt: commit.createdAt, + }); + + return { + ...repository, + commits: [...repository.commits, commit], + }; +} + +function createSemanticTag(repositoryInput, tagInput) { + const repository = normalizeRepository(repositoryInput); + const commit = repository.commits.find((candidate) => candidate.id === tagInput.commitId); + if (!commit) throw new Error(`unknown commit: ${tagInput.commitId}`); + + const tag = { + id: tagInput.id || tagInput.version, + version: tagInput.version, + label: tagInput.label || tagInput.version, + commitId: commit.id, + doi: tagInput.doi || null, + citationStyle: tagInput.citationStyle || "apa", + tagHash: hashRecord({ version: tagInput.version, commitHash: commit.commitHash }), + }; + + return { + ...repository, + tags: [...repository.tags, tag], + }; +} + +function buildForkRecord(repositoryInput, forkInput) { + const repository = normalizeRepository(repositoryInput); + return { + id: forkInput.id, + sourceRepositoryId: repository.id, + forkRepositoryId: forkInput.forkRepositoryId, + createdBy: forkInput.createdBy, + attribution: { + sourceTitle: repository.title, + sourceDoi: repository.metadata.doi || null, + sourceAuthors: asArray(repository.metadata.authors), + }, + baseCommitId: forkInput.baseCommitId || (repository.commits.at(-1) && repository.commits.at(-1).id), + forkHash: hashRecord({ sourceRepositoryId: repository.id, forkInput }), + }; +} + +function evaluateMergeRequest(repositoryInput, mergeRequestInput) { + const repository = normalizeRepository(repositoryInput); + const sourceCommit = repository.commits.find((commit) => commit.id === mergeRequestInput.sourceCommitId); + const targetCommit = repository.commits.find((commit) => commit.id === mergeRequestInput.targetCommitId); + const discussionCount = asArray(mergeRequestInput.discussions).length; + const approvals = asArray(mergeRequestInput.reviews).filter((review) => review.state === "approved"); + const blockingChanges = asArray(mergeRequestInput.changedComponents).filter( + (component) => component.kind === "data" && !component.reproducibilityRunId, + ); + + return { + id: mergeRequestInput.id, + repositoryId: repository.id, + sourceCommitId: mergeRequestInput.sourceCommitId, + targetCommitId: mergeRequestInput.targetCommitId, + sourceExists: Boolean(sourceCommit), + targetExists: Boolean(targetCommit), + discussionCount, + approvals: approvals.length, + blockingChanges, + mergeable: Boolean(sourceCommit && targetCommit && approvals.length > 0 && blockingChanges.length === 0), + }; +} + +function evaluateReproducibility(repositoryInput) { + const repository = normalizeRepository(repositoryInput); + const latest = repository.reproducibilityRuns.at(-1); + if (!latest) { + return { + status: "missing", + checks: [], + reproducibilityHash: hashRecord({ repositoryId: repository.id, runs: [] }), + }; + } + const checks = asArray(latest.checks); + if (checks.length === 0) { + return { + status: "missing", + runId: latest.id, + environment: latest.environment || null, + checks, + passRate: 0, + reproducibilityHash: hashRecord({ repositoryId: repository.id, latest }), + }; + } + const passed = checks.filter((check) => check.status === "passed").length; + return { + status: passed === checks.length ? "passed" : passed >= Math.ceil(checks.length / 2) ? "partial" : "failed", + runId: latest.id, + environment: latest.environment || null, + checks, + passRate: checks.length ? Number((passed / checks.length).toFixed(4)) : 0, + reproducibilityHash: hashRecord({ repositoryId: repository.id, latest }), + }; +} + +function editorModeFor(component) { + const path = String(component.path || "").toLowerCase(); + if (component.kind === "manuscript" || /\.(md|tex)$/.test(path)) return "scientific-text"; + if (component.kind === "notebooks" || path.endsWith(".ipynb")) return "jupyter-notebook"; + if (component.kind === "data" || /\.(csv|tsv|json|parquet)$/.test(path)) return "structured-data"; + if (component.kind === "code" || /\.(py|r|jl|js|ts)$/.test(path)) return "code-aware"; + return "binary-or-metadata"; +} + +function diffModeFor(component) { + const path = String(component.path || "").toLowerCase(); + if (component.kind === "data" || /\.(csv|tsv|json|parquet)$/.test(path)) return "rich-data-diff"; + if (component.kind === "notebooks" || path.endsWith(".ipynb")) return "notebook-output-diff"; + if (component.kind === "code" || /\.(py|r|jl|js|ts)$/.test(path)) return "code-aware-diff"; + if (component.kind === "manuscript" || /\.(md|tex)$/.test(path)) return "text-diff"; + return "hash-only-diff"; +} + +function buildEditorDiffSummary(repositoryInput) { + const repository = normalizeRepository(repositoryInput); + const componentsById = new Map(repository.components.map((component) => [component.id, component])); + const componentEditors = repository.components.map((component) => ({ + componentId: component.id, + path: component.path, + kind: component.kind, + editorMode: editorModeFor(component), + diffMode: diffModeFor(component), + })); + const mergeRequestDiffs = repository.mergeRequests.map((mergeRequest) => ({ + mergeRequestId: mergeRequest.id, + changedComponents: asArray(mergeRequest.changedComponents).map((change) => { + const component = componentsById.get(change.id) || change; + return { + componentId: change.id, + kind: change.kind || component.kind, + diffMode: diffModeFor(component), + }; + }), + })); + const rollbackTimeline = repository.commits.map((commit) => ({ + commitId: commit.id, + parentId: commit.parentId || null, + message: commit.message, + createdAt: commit.createdAt, + rollbackCommand: `scibase restore ${repository.id} --commit ${commit.id}`, + })); + + return { + componentEditors, + mergeRequestDiffs, + rollbackTimeline, + summaryHash: hashRecord({ componentEditors, mergeRequestDiffs, rollbackTimeline }), + }; +} + +function generateCitation(repositoryInput, tagId, style = "apa") { + const repository = normalizeRepository(repositoryInput); + const tag = repository.tags.find((candidate) => candidate.id === tagId || candidate.version === tagId); + if (!tag) throw new Error(`unknown tag: ${tagId}`); + const authors = asArray(repository.metadata.authors).join(", ") || "Unknown authors"; + const year = repository.metadata.year || new Date().getUTCFullYear(); + const doi = tag.doi || repository.metadata.doi || "DOI pending"; + + if (style === "bibtex") { + return `@misc{${repository.id}-${tag.version}, title={${repository.title}}, author={${authors}}, year={${year}}, doi={${doi}}}`; + } + + return `${authors} (${year}). ${repository.title} (${tag.version}). ${doi}.`; +} + +function buildExportBundle(repositoryInput, tagId) { + const repository = normalizeRepository(repositoryInput); + const manifest = buildComponentManifest(repository); + const tag = repository.tags.find((candidate) => candidate.id === tagId || candidate.version === tagId); + const reproducibility = evaluateReproducibility(repository); + + return { + repositoryId: repository.id, + title: repository.title, + tag: tag || null, + manifest, + reproducibility, + apiRoutes: [ + `GET /repositories/${repository.id}`, + `GET /repositories/${repository.id}/components`, + `POST /repositories/${repository.id}/merge-requests`, + `GET /repositories/${repository.id}/exports/${tagId}`, + ], + cliCommands: [ + `scibase clone ${repository.id}`, + `scibase export ${repository.id} --tag ${tagId}`, + ], + bundleHash: hashRecord({ repositoryId: repository.id, tag, manifest, reproducibility }), + }; +} + +function buildDatasetDiffSummary(repositoryInput) { + const repository = normalizeRepository(repositoryInput); + const removalWarnThreshold = Number(repository.releasePolicy.datasetRemovalWarnThreshold || 100); + const dataDiffs = repository.components + .filter((component) => component.kind === "data") + .map((component) => { + const stats = component.diffStats || {}; + const rowsAdded = Number(stats.rowsAdded || 0); + const rowsRemoved = Number(stats.rowsRemoved || 0); + const schemaChanged = Boolean(stats.schemaChanged); + const risk = + schemaChanged || rowsRemoved > removalWarnThreshold + ? "high" + : rowsAdded > 0 || rowsRemoved > 0 || Boolean(stats.hashChanged) + ? "medium" + : "low"; + + return { + componentId: component.id, + path: component.path, + rowsAdded, + rowsRemoved, + schemaChanged, + hashChanged: Boolean(stats.hashChanged), + risk, + diffHash: hashRecord({ id: component.id, path: component.path, stats }), + }; + }); + + return { + dataDiffs, + highRiskCount: dataDiffs.filter((diff) => diff.risk === "high").length, + mediumRiskCount: dataDiffs.filter((diff) => diff.risk === "medium").length, + summaryHash: hashRecord(dataDiffs), + }; +} + +function buildReleaseReadiness(repositoryInput, tagId) { + const repository = normalizeRepository(repositoryInput); + const manifest = buildComponentManifest(repository); + const reproducibility = evaluateReproducibility(repository); + const datasetDiffs = buildDatasetDiffSummary(repository); + const requestedTagId = tagId || repository.releasePolicy.requiredTagId || (repository.tags.at(-1) && repository.tags.at(-1).id); + const tag = repository.tags.find((candidate) => candidate.id === requestedTagId || candidate.version === requestedTagId); + const exportBundle = tag ? buildExportBundle(repository, tag.id) : null; + const gates = [ + { + id: "required-components", + status: manifest.missingRequiredKinds.length === 0 ? "pass" : "fail", + evidence: { missingRequiredKinds: manifest.missingRequiredKinds }, + }, + { + id: "semantic-tag", + status: tag ? "pass" : "fail", + evidence: { tagId: requestedTagId || null }, + }, + { + id: "doi-metadata", + status: tag && (tag.doi || repository.metadata.doi) ? "pass" : "fail", + evidence: { doi: tag ? tag.doi || repository.metadata.doi || null : null }, + }, + { + id: "reproducibility", + status: reproducibility.status === "passed" ? "pass" : "fail", + evidence: { + status: reproducibility.status, + runId: reproducibility.runId || null, + passRate: reproducibility.passRate || 0, + }, + }, + { + id: "dataset-diff-risk", + status: datasetDiffs.highRiskCount === 0 ? "pass" : "review", + evidence: { + highRiskCount: datasetDiffs.highRiskCount, + mediumRiskCount: datasetDiffs.mediumRiskCount, + }, + }, + { + id: "export-contract", + status: exportBundle && exportBundle.apiRoutes.length > 0 && exportBundle.cliCommands.length > 0 ? "pass" : "fail", + evidence: { + apiRoutes: exportBundle ? exportBundle.apiRoutes : [], + cliCommands: exportBundle ? exportBundle.cliCommands : [], + }, + }, + ]; + const failed = gates.filter((gate) => gate.status === "fail"); + const review = gates.filter((gate) => gate.status === "review"); + + return { + repositoryId: repository.id, + tagId: tag ? tag.id : null, + status: failed.length > 0 ? "blocked" : review.length > 0 ? "review" : "ready", + gates, + datasetDiffs, + exportBundle, + releaseHash: hashRecord({ + repositoryId: repository.id, + tagId: tag && tag.id, + gates, + datasetDiffs: datasetDiffs.summaryHash, + exportBundle: exportBundle && exportBundle.bundleHash, + }), + }; +} + +function buildBranchProtectionReport(repositoryInput) { + const repository = normalizeRepository(repositoryInput); + const commitsById = new Set(repository.commits.map((commit) => commit.id)); + const asOf = repository.branchProtection.asOf || new Date().toISOString(); + const staleAfterDays = Number(repository.branchProtection.staleAfterDays || 30); + const protectedBranches = new Set(asArray(repository.branchProtection.protectedBranches)); + const requiredStatusChecks = asArray(repository.branchProtection.requiredStatusChecks); + const requiredReviews = Number(repository.branchProtection.requiredReviews || 0); + const allowForcePushes = Boolean(repository.branchProtection.allowForcePushes); + const asOfTime = new Date(asOf).getTime(); + + const branches = repository.branches.map((branch) => { + const latestCommit = repository.commits.find((commit) => commit.id === branch.headCommitId); + const latestTime = latestCommit ? new Date(latestCommit.createdAt).getTime() : NaN; + const staleDays = + Number.isFinite(asOfTime) && Number.isFinite(latestTime) + ? Math.max(0, Math.floor((asOfTime - latestTime) / (1000 * 60 * 60 * 24))) + : null; + const matchingMergeRequests = repository.mergeRequests.filter( + (mergeRequest) => mergeRequest.sourceBranchId === branch.id || mergeRequest.targetBranchId === branch.id, + ); + const approvedReviews = matchingMergeRequests.reduce( + (sum, mergeRequest) => sum + asArray(mergeRequest.reviews).filter((review) => review.state === "approved").length, + 0, + ); + const checkStatuses = requiredStatusChecks.map((checkId) => { + const check = asArray(branch.statusChecks).find((candidate) => candidate.id === checkId); + return { + id: checkId, + status: check ? check.status : "missing", + }; + }); + const blockers = []; + if (!commitsById.has(branch.headCommitId)) blockers.push("unknown-head-commit"); + if (protectedBranches.has(branch.id) && allowForcePushes) blockers.push("force-pushes-enabled"); + if (protectedBranches.has(branch.id) && approvedReviews < requiredReviews) blockers.push("insufficient-review-approvals"); + if (checkStatuses.some((check) => check.status !== "passed")) blockers.push("required-status-check-failed"); + if (staleDays !== null && staleDays > staleAfterDays) blockers.push("stale-branch"); + + return { + branchId: branch.id, + headCommitId: branch.headCommitId, + protected: protectedBranches.has(branch.id), + staleDays, + requiredReviews, + approvedReviews, + statusChecks: checkStatuses, + blockers, + status: blockers.length ? "blocked" : "ready", + protectionHash: hashRecord({ branch, checkStatuses, blockers, asOf }), + }; + }); + + return { + asOf, + branches, + protectedBranchCount: branches.filter((branch) => branch.protected).length, + blockedBranchCount: branches.filter((branch) => branch.status === "blocked").length, + readyBranchCount: branches.filter((branch) => branch.status === "ready").length, + protectionHash: hashRecord({ branches, asOf, requiredStatusChecks, requiredReviews }), + }; +} + +function buildRepositoryIntegrityPacket(repositoryInput) { + const repository = normalizeRepository(repositoryInput); + const manifest = buildComponentManifest(repository); + const reproducibility = evaluateReproducibility(repository); + const latestTag = repository.tags.at(-1); + const releaseReadiness = latestTag ? buildReleaseReadiness(repository, latestTag.id) : buildReleaseReadiness(repository); + + return { + repository: { + id: repository.id, + title: repository.title, + doi: repository.metadata.doi || null, + }, + manifest, + reproducibility, + editorDiff: buildEditorDiffSummary(repository), + branchProtection: buildBranchProtectionReport(repository), + forks: repository.forks, + mergeRequests: repository.mergeRequests.map((mergeRequest) => + evaluateMergeRequest(repository, mergeRequest), + ), + citations: latestTag + ? { + apa: generateCitation(repository, latestTag.id, "apa"), + bibtex: generateCitation(repository, latestTag.id, "bibtex"), + } + : null, + releaseReadiness, + exportBundle: latestTag ? buildExportBundle(repository, latestTag.id) : null, + }; +} + +module.exports = { + REQUIRED_COMPONENTS, + buildComponentManifest, + buildBranchProtectionReport, + buildDatasetDiffSummary, + buildEditorDiffSummary, + buildExportBundle, + buildReleaseReadiness, + buildForkRecord, + buildRepositoryIntegrityPacket, + createCommit, + createSemanticTag, + evaluateMergeRequest, + evaluateReproducibility, + generateCitation, + hashRecord, + normalizeRepository, +}; diff --git a/repository-integrity-ledger/test/repository-ledger.test.js b/repository-integrity-ledger/test/repository-ledger.test.js new file mode 100644 index 0000000..c9078db --- /dev/null +++ b/repository-integrity-ledger/test/repository-ledger.test.js @@ -0,0 +1,179 @@ +"use strict"; + +const assert = require("assert"); +const repository = require("../data/sample-repository.json"); +const { + buildBranchProtectionReport, + buildComponentManifest, + buildDatasetDiffSummary, + buildEditorDiffSummary, + buildExportBundle, + buildForkRecord, + buildReleaseReadiness, + buildRepositoryIntegrityPacket, + createCommit, + createSemanticTag, + evaluateMergeRequest, + evaluateReproducibility, + generateCitation, +} = require("../src/repository-ledger"); + +function testComponentManifest() { + const manifest = buildComponentManifest(repository); + + assert.deepStrictEqual(manifest.missingRequiredKinds, []); + assert.strictEqual(manifest.components.find((component) => component.kind === "data").lfs, true); + assert.ok(manifest.manifestHash.length >= 12); +} + +function testCommitAndTag() { + const withCommit = createCommit(repository, { + authorId: "u-owner", + message: "Add DataCite metadata", + changedComponents: ["c-metadata"], + createdAt: "2026-05-08T00:00:00Z", + }); + const withTag = createSemanticTag(withCommit, { + version: "v1.1.0", + commitId: withCommit.commits.at(-1).id, + doi: "10.5555/scibase.flood.microbiome.v1.1", + }); + + assert.strictEqual(withCommit.commits.length, repository.commits.length + 1); + assert.strictEqual(withTag.tags.at(-1).version, "v1.1.0"); + assert.ok(withTag.tags.at(-1).tagHash); +} + +function testForkRecord() { + const fork = buildForkRecord(repository, { + id: "fork-2", + forkRepositoryId: "repo-reanalysis", + createdBy: "u-reviewer", + }); + + assert.strictEqual(fork.sourceRepositoryId, repository.id); + assert.strictEqual(fork.attribution.sourceDoi, repository.metadata.doi); + assert.ok(fork.forkHash); +} + +function testMergeRequestAndReproducibility() { + const merge = evaluateMergeRequest(repository, repository.mergeRequests[0]); + const reproducibility = evaluateReproducibility(repository); + const emptyChecks = evaluateReproducibility({ + ...repository, + reproducibilityRuns: [{ id: "run-empty", environment: "docker://node:22", checks: [] }], + }); + + assert.strictEqual(merge.mergeable, true); + assert.strictEqual(merge.approvals, 1); + assert.strictEqual(reproducibility.status, "passed"); + assert.strictEqual(reproducibility.passRate, 1); + assert.strictEqual(emptyChecks.status, "missing"); + assert.strictEqual(emptyChecks.passRate, 0); +} + +function testCitationAndExport() { + const citation = generateCitation(repository, "preprint-v1", "apa"); + const bibtex = generateCitation(repository, "preprint-v1", "bibtex"); + const bundle = buildExportBundle(repository, "preprint-v1"); + + assert.ok(citation.includes("Coastal flooding microbiome atlas")); + assert.ok(bibtex.startsWith("@misc")); + assert.ok(bundle.apiRoutes.length >= 4); + assert.ok(bundle.bundleHash.length >= 12); +} + +function testDatasetDiffSummary() { + const summary = buildDatasetDiffSummary(repository); + + assert.strictEqual(summary.dataDiffs.length, 1); + assert.strictEqual(summary.dataDiffs[0].risk, "medium"); + assert.strictEqual(summary.highRiskCount, 0); + assert.strictEqual(summary.mediumRiskCount, 1); + assert.ok(summary.summaryHash.length >= 12); +} + +function testReleaseReadiness() { + const readiness = buildReleaseReadiness(repository, "preprint-v1"); + const blocked = buildReleaseReadiness({ + ...repository, + tags: [], + reproducibilityRuns: [{ id: "run-empty", checks: [] }], + }, "missing-tag"); + + assert.strictEqual(readiness.status, "ready"); + assert.strictEqual(readiness.gates.every((gate) => gate.status === "pass"), true); + assert.strictEqual(readiness.datasetDiffs.mediumRiskCount, 1); + assert.ok(readiness.exportBundle.bundleHash); + assert.ok(readiness.releaseHash.length >= 12); + assert.strictEqual(blocked.status, "blocked"); + assert.ok(blocked.gates.find((gate) => gate.id === "semantic-tag" && gate.status === "fail")); +} + +function testBranchProtectionReport() { + const protection = buildBranchProtectionReport(repository); + const main = protection.branches.find((branch) => branch.branchId === "main"); + const stale = protection.branches.find((branch) => branch.branchId === "hypothesis-moisture"); + const broken = buildBranchProtectionReport({ + ...repository, + branchProtection: { + ...repository.branchProtection, + allowForcePushes: true, + }, + branches: [ + { + id: "main", + headCommitId: "missing-commit", + statusChecks: [{ id: "reproducibility", status: "failed" }], + }, + ], + }); + + assert.strictEqual(protection.protectedBranchCount, 1); + assert.strictEqual(main.status, "ready"); + assert.strictEqual(main.approvedReviews, 1); + assert.strictEqual(stale.status, "blocked"); + assert.ok(stale.blockers.includes("stale-branch")); + assert.ok(broken.branches[0].blockers.includes("unknown-head-commit")); + assert.ok(broken.branches[0].blockers.includes("force-pushes-enabled")); + assert.ok(broken.branches[0].blockers.includes("required-status-check-failed")); + assert.ok(protection.protectionHash.length >= 12); +} + +function testEditorDiffSummary() { + const summary = buildEditorDiffSummary(repository); + const dataEditor = summary.componentEditors.find((item) => item.componentId === "c-data"); + const codeEditor = summary.componentEditors.find((item) => item.componentId === "c-code"); + + assert.strictEqual(dataEditor.editorMode, "structured-data"); + assert.strictEqual(dataEditor.diffMode, "rich-data-diff"); + assert.strictEqual(codeEditor.diffMode, "code-aware-diff"); + assert.strictEqual(summary.mergeRequestDiffs[0].changedComponents[0].diffMode, "code-aware-diff"); + assert.ok(summary.rollbackTimeline[0].rollbackCommand.includes("scibase restore")); + assert.ok(summary.summaryHash.length >= 12); +} + +function testFullPacket() { + const packet = buildRepositoryIntegrityPacket(repository); + + assert.strictEqual(packet.repository.id, repository.id); + assert.strictEqual(packet.mergeRequests[0].mergeable, true); + assert.strictEqual(packet.branchProtection.protectedBranchCount, 1); + assert.ok(packet.editorDiff.componentEditors.length >= 7); + assert.ok(packet.citations.apa); + assert.strictEqual(packet.releaseReadiness.status, "ready"); + assert.ok(packet.exportBundle.bundleHash); +} + +testComponentManifest(); +testCommitAndTag(); +testForkRecord(); +testMergeRequestAndReproducibility(); +testCitationAndExport(); +testDatasetDiffSummary(); +testReleaseReadiness(); +testBranchProtectionReport(); +testEditorDiffSummary(); +testFullPacket(); + +console.log("repository-integrity-ledger tests passed");