Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 70 additions & 0 deletions repository-integrity-ledger/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# Repository Integrity Ledger

Self-contained project repository and version-control milestone for [SCIBASE.AI issue #10](https://github.com/SCIBASE-AI/SCIBASE.AI/issues/10).

The issue asks for scientific project repositories with manifests, versioning, forks, merge requests, reproducibility checks, citations, and exports. This module provides a deterministic integrity ledger that reviewers can run locally without external Git, DOI, or storage services.

## What It Adds

- Repository component manifest for manuscript, data, code, notebooks, results, protocols, and metadata.
- Content hashes and Git LFS-style large-file candidate detection.
- Parent-linked commit records with changed components and commit hashes.
- Semantic tags with DOI metadata and citation generation.
- Fork attribution records preserving source DOI, authors, base commit, and fork hash.
- Merge-request evaluation for source/target existence, approvals, discussions, and reproducibility blockers.
- Branch protection report with protected-branch counts, required reviews, required status checks, force-push policy, stale branch detection, and head-commit validation.
- Editor/diff summary for scientific text, Jupyter notebooks, structured data, code-aware diffs, and rollback commands.
- Reproducibility run status with environment, check pass rate, and hash.
- Release readiness packet with required component, semantic tag, DOI, reproducibility, dataset-diff risk, export API, and CLI gates.
- Export bundle with manifest, reproducibility summary, API route list, CLI commands, and bundle hash.
- Sample repository fixture, tests, requirement map, CLI demo, and short demo GIF.

## Run

```bash
cd repository-integrity-ledger
npm run check
npm test
npm run demo
```

Expected demo shape:

```json
{
"repository": "Coastal flooding microbiome atlas",
"missingComponents": [],
"lfsComponents": ["data/samples.csv"],
"reproducibility": "passed",
"mergeableRequests": 1,
"branchProtection": {
"protectedBranchCount": 1,
"blockedBranchCount": 1
},
"editorModes": ["scientific-text", "structured-data", "code-aware"],
"rollbackCommand": "scibase restore repo-flood-microbiome --commit commit-2",
"releaseReadiness": {
"status": "ready",
"datasetDiffRisk": {
"mediumRiskCount": 1
}
},
"bundleHash": "..."
}
```

## Demo Artifact

See [docs/demo.gif](docs/demo.gif) for a short visual walkthrough. The SVG source is included at [docs/demo.svg](docs/demo.svg).

## Files

- `src/repository-ledger.js` - manifests, commits, tags, forks, merge requests, branch protection, reproducibility, release readiness, citation, exports.
- `data/sample-repository.json` - reviewable scientific repository fixture.
- `test/repository-ledger.test.js` - dependency-free Node tests.
- `scripts/demo.js` - CLI demo.
- `docs/issue-10-requirement-map.md` - maps the implementation to bounty requirements.

## AI-Assisted Disclosure

This contribution was produced with AI assistance and manually verified with the local commands above.
124 changes: 124 additions & 0 deletions repository-integrity-ledger/data/sample-repository.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
{
"id": "repo-flood-microbiome",
"title": "Coastal flooding microbiome atlas",
"metadata": {
"doi": "10.5555/scibase.flood.microbiome",
"year": 2026,
"authors": ["Principal Investigator", "Data Analyst"],
"funding": ["Northstar Climate Health Fund"],
"tags": ["microbiome", "flooding", "reproducibility"]
},
"components": [
{ "id": "c-manuscript", "kind": "manuscript", "path": "manuscript/main.md", "content": "# Coastal flooding microbiome atlas", "sizeBytes": 12000 },
{
"id": "c-data",
"kind": "data",
"path": "data/samples.csv",
"content": "sample,diversity",
"sizeBytes": 25000000,
"diffStats": {
"rowsAdded": 48,
"rowsRemoved": 2,
"schemaChanged": false,
"hashChanged": true
}
},
{ "id": "c-code", "kind": "code", "path": "code/analyze.py", "content": "print('analysis')", "sizeBytes": 2000 },
{ "id": "c-notebook", "kind": "notebooks", "path": "notebooks/run_analysis.ipynb", "content": "{}", "sizeBytes": 8000 },
{ "id": "c-results", "kind": "results", "path": "results/figure1.png", "content": "binary", "sizeBytes": 400000 },
{ "id": "c-protocol", "kind": "protocols", "path": "protocols/sequencing.md", "content": "Protocol v2.1", "sizeBytes": 3000 },
{ "id": "c-metadata", "kind": "metadata", "path": "metadata.json", "content": "{\"doi\":\"10.5555/scibase.flood.microbiome\"}", "sizeBytes": 1000 }
],
"commits": [
{
"id": "commit-1",
"parentId": null,
"authorId": "u-owner",
"message": "Initial repository import",
"changedComponents": ["c-manuscript", "c-data", "c-code"],
"createdAt": "2026-05-01T00:00:00Z",
"commitHash": "initialhash"
},
{
"id": "commit-2",
"parentId": "commit-1",
"authorId": "u-analyst",
"message": "Add reproducibility outputs",
"changedComponents": ["c-notebook", "c-results", "c-protocol", "c-metadata"],
"createdAt": "2026-05-07T00:00:00Z",
"commitHash": "outputhash"
}
],
"branches": [
{
"id": "main",
"headCommitId": "commit-2",
"statusChecks": [
{ "id": "reproducibility", "status": "passed" },
{ "id": "metadata", "status": "passed" }
]
},
{
"id": "hypothesis-moisture",
"headCommitId": "commit-1",
"statusChecks": [
{ "id": "reproducibility", "status": "passed" },
{ "id": "metadata", "status": "passed" }
]
}
],
"tags": [
{
"id": "preprint-v1",
"version": "preprint-v1.0",
"label": "Initial preprint",
"commitId": "commit-2",
"doi": "10.5555/scibase.flood.microbiome.v1",
"tagHash": "taghash"
}
],
"forks": [
{
"id": "fork-1",
"sourceRepositoryId": "repo-flood-microbiome",
"forkRepositoryId": "repo-flood-microbiome-reanalysis",
"createdBy": "u-reviewer",
"baseCommitId": "commit-2"
}
],
"mergeRequests": [
{
"id": "mr-1",
"sourceBranchId": "hypothesis-moisture",
"targetBranchId": "main",
"sourceCommitId": "commit-2",
"targetCommitId": "commit-1",
"changedComponents": [{ "kind": "code", "id": "c-code" }],
"reviews": [{ "reviewerId": "u-reviewer", "state": "approved" }],
"discussions": [{ "id": "disc-1", "body": "Looks reproducible." }]
}
],
"reproducibilityRuns": [
{
"id": "run-1",
"environment": "docker://python:3.12",
"checks": [
{ "id": "environment", "status": "passed" },
{ "id": "notebook-execution", "status": "passed" },
{ "id": "results-hash", "status": "passed" }
]
}
],
"releasePolicy": {
"requiredTagId": "preprint-v1",
"datasetRemovalWarnThreshold": 100
},
"branchProtection": {
"asOf": "2026-05-14T00:00:00Z",
"protectedBranches": ["main"],
"requiredReviews": 1,
"requiredStatusChecks": ["reproducibility", "metadata"],
"allowForcePushes": false,
"staleAfterDays": 7
}
}
Binary file added repository-integrity-ledger/docs/demo.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added repository-integrity-ledger/docs/demo.mp4
Binary file not shown.
33 changes: 33 additions & 0 deletions repository-integrity-ledger/docs/demo.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
30 changes: 30 additions & 0 deletions repository-integrity-ledger/docs/issue-10-requirement-map.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Issue #10 Requirement Map

This module is a deterministic milestone for SCIBASE issue #10, Project Repository & Version Control. It focuses on repository manifests, content integrity, commit/tag metadata, forks, merge requests, reproducibility checks, release readiness, citations, and export bundles.

| Issue requirement | Implementation |
| --- | --- |
| Repository structure and components | `buildComponentManifest()` validates manuscript, data, code, notebooks, results, protocols, and metadata components. |
| Git LFS and content integrity | Components receive hashes and large data files are marked as LFS candidates. |
| Version control and commit history | `createCommit()` creates parent-linked commits with changed component lists and commit hashes. |
| Semantic versioning and tags | `createSemanticTag()` attaches semantic tags, DOI metadata, and tag hashes to commits. |
| Forking and attribution | `buildForkRecord()` records source repository, source DOI, authors, base commit, and fork hash. |
| Merge requests and review | `evaluateMergeRequest()` checks source/target commits, discussions, approvals, and data-change reproducibility blockers. |
| Branch protection and review gates | `buildBranchProtectionReport()` reports protected branches, required reviews, required status checks, force-push policy, stale branch blockers, and unknown-head blockers. |
| In-browser editors and diffs | `buildEditorDiffSummary()` maps components to scientific text, Jupyter notebook, structured-data, code-aware, or hash-only editor/diff modes. |
| Visual revision timeline and rollback | `buildEditorDiffSummary()` emits a commit rollback timeline with deterministic restore commands. |
| Reproducibility pipelines | `evaluateReproducibility()` reports execution environment, check pass rate, status, and reproducibility hash. |
| Dataset diffs and release gates | `buildDatasetDiffSummary()` and `buildReleaseReadiness()` report data change risk plus required component, semantic tag, DOI, reproducibility, export API, and CLI gates before release. |
| Repository identifiers and citation | `generateCitation()` produces APA and BibTeX-style citations from repository/tag metadata. |
| Programmatic access and export | `buildExportBundle()` emits API routes, CLI commands, manifest, reproducibility status, and export bundle hash. |
| Reviewer demo | `npm run demo` prints manifest, LFS components, reproducibility status, mergeability, release gates, citation, and bundle hash. |

## Verification

```bash
npm run check
npm test
npm run demo
```

The module is dependency-free and isolated under `repository-integrity-ledger/`.
12 changes: 12 additions & 0 deletions repository-integrity-ledger/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"name": "scibase-repository-integrity-ledger",
"version": "0.1.0",
"private": true,
"description": "Scientific repository integrity and version-control module for SCIBASE issue #10.",
"type": "commonjs",
"scripts": {
"check": "node --check src/repository-ledger.js && node --check scripts/demo.js && node --check test/repository-ledger.test.js",
"demo": "node scripts/demo.js",
"test": "node test/repository-ledger.test.js"
}
}
47 changes: 47 additions & 0 deletions repository-integrity-ledger/scripts/demo.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
"use strict";

const repository = require("../data/sample-repository.json");
const { buildRepositoryIntegrityPacket } = require("../src/repository-ledger");

const packet = buildRepositoryIntegrityPacket(repository);

console.log(
JSON.stringify(
{
repository: packet.repository.title,
missingComponents: packet.manifest.missingRequiredKinds,
lfsComponents: packet.manifest.components.filter((component) => component.lfs).map((component) => component.path),
reproducibility: packet.reproducibility.status,
mergeableRequests: packet.mergeRequests.filter((request) => request.mergeable).length,
branchProtection: {
protectedBranchCount: packet.branchProtection.protectedBranchCount,
readyBranchCount: packet.branchProtection.readyBranchCount,
blockedBranchCount: packet.branchProtection.blockedBranchCount,
branches: packet.branchProtection.branches.map((branch) => ({
branchId: branch.branchId,
status: branch.status,
blockers: branch.blockers,
statusChecks: branch.statusChecks,
})),
},
editorModes: packet.editorDiff.componentEditors.map((item) => item.editorMode),
rollbackCommand: packet.editorDiff.rollbackTimeline.at(-1).rollbackCommand,
releaseReadiness: {
status: packet.releaseReadiness.status,
gates: packet.releaseReadiness.gates.map((gate) => ({
id: gate.id,
status: gate.status,
})),
datasetDiffRisk: {
highRiskCount: packet.releaseReadiness.datasetDiffs.highRiskCount,
mediumRiskCount: packet.releaseReadiness.datasetDiffs.mediumRiskCount,
},
releaseHash: packet.releaseReadiness.releaseHash,
},
citation: packet.citations.apa,
bundleHash: packet.exportBundle.bundleHash,
},
null,
2,
),
);
Loading