parsing: Improve parsing converage based on the corpus files #4

Workflow file for this run

	name: Corpus Parsing Tests

	# pull_request_target runs the workflow from the BASE branch (main),
	# so a fork PR cannot modify this workflow to exfiltrate secrets.
	# The PR's code is checked out explicitly below.
	on:
	push:
	branches:
	- main
	pull_request_target:
	types: [opened, synchronize]

	jobs:
	corpus:
	name: Corpus Parsing Tests
	runs-on: ubuntu-latest
	# Require approval via environment for fork PRs (secrets access)
	environment: ${{ github.event_name == 'pull_request_target' && github.event.pull_request.head.repo.full_name != github.repository && 'corpus-tests' \|\| '' }}
	permissions:
	pull-requests: write
	actions: read
	steps:
	# Checkout the PR head (or push sha) — NOT the base branch
	- uses: actions/checkout@v3
	with:
	ref: ${{ github.event.pull_request.head.sha \|\| github.sha }}

	- uses: actions/checkout@v3
	with:
	repository: getsynq/kernel-cll-corpus
	ssh-key: ${{ secrets.CORPUS_DEPLOY_KEY }}
	ref: main
	path: tests/corpus-repo
	- run: ln -s "$PWD/tests/corpus-repo/corpus" tests/corpus
	- uses: dtolnay/rust-toolchain@stable

	- name: Run corpus tests
	continue-on-error: true
	run: cargo test --test sqlparser_corpus

	- name: Upload corpus report
	if: always()
	uses: actions/upload-artifact@v4
	with:
	name: corpus-report
	path: target/corpus-report.json

	- name: Download baseline from main
	if: github.event_name == 'pull_request_target'
	id: baseline
	continue-on-error: true
	uses: dawidd6/action-download-artifact@v6
	with:
	name: corpus-report
	path: target/baseline
	branch: main
	workflow: corpus.yml
	search_artifacts: true

	- name: Post PR comment
	if: github.event_name == 'pull_request_target'
	uses: actions/github-script@v7
	with:
	script: \|
	const fs = require('fs');
	const current = JSON.parse(fs.readFileSync('target/corpus-report.json', 'utf8'));

	let body = '## Corpus Parsing Report\n\n';
	const totalTests = current.total_passed + current.total_failed;
	const passRate = (current.total_passed / totalTests * 100).toFixed(1);
	body += `Total: ${current.total_passed} passed, ${current.total_failed} failed (${passRate}% pass rate)\n\n`;

	let hasBaseline = false;
	let baseline = null;
	try {
	baseline = JSON.parse(fs.readFileSync('target/baseline/corpus-report.json', 'utf8'));
	hasBaseline = true;
	} catch (e) {
	// no baseline available
	}

	body += '\| Dialect \| Passed \| Failed \| Total \| Pass Rate \|';
	if (hasBaseline) body += ' Delta \|';
	body += '\n';

	body += '\|---------\|-------:\|-------:\|------:\|----------:\|';
	if (hasBaseline) body += '------:\|';
	body += '\n';

	for (const [dialect, stats] of Object.entries(current.dialects)) {
	const total = stats.passed + stats.failed;
	const rate = (stats.passed / total * 100).toFixed(1);
	body += `\| ${dialect} \| ${stats.passed} \| ${stats.failed} \| ${total} \| ${rate}% \|`;

	if (hasBaseline && baseline.dialects[dialect]) {
	const base = baseline.dialects[dialect];
	const diff = stats.passed - base.passed;
	if (diff > 0) body += ` +${diff} \|`;
	else if (diff < 0) body += ` ${diff} \|`;
	else body += ` - \|`;
	} else if (hasBaseline) {
	body += ` new \|`;
	}

	body += '\n';
	}

	if (hasBaseline) {
	const totalDiff = current.total_passed - baseline.total_passed;
	body += '\n';
	if (totalDiff > 0) {
	body += `Improvement: +${totalDiff} more files parsed successfully\n`;
	} else if (totalDiff < 0) {
	body += `Regression: ${totalDiff} fewer files parsed successfully\n`;
	} else {
	body += 'No change in parsing coverage\n';
	}
	} else {
	body += '\nNo baseline available for comparison (first run on this branch)\n';
	}

	// Find existing comment to update
	const { data: comments } = await github.rest.issues.listComments({
	owner: context.repo.owner,
	repo: context.repo.repo,
	issue_number: context.issue.number,
	});

	const marker = '## Corpus Parsing Report';
	const existing = comments.find(c => c.body.startsWith(marker));

	if (existing) {
	await github.rest.issues.updateComment({
	owner: context.repo.owner,
	repo: context.repo.repo,
	comment_id: existing.id,
	body,
	});
	} else {
	await github.rest.issues.createComment({
	owner: context.repo.owner,
	repo: context.repo.repo,
	issue_number: context.issue.number,
	body,
	});
	}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

parsing: Improve parsing converage based on the corpus files #4

Workflow file

parsing: Improve parsing converage based on the corpus files #4

Uh oh!

Workflow file for this run