parsing: Improve parsing converage based on the corpus files #4
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Corpus Parsing Tests | |
| # pull_request_target runs the workflow from the BASE branch (main), | |
| # so a fork PR cannot modify this workflow to exfiltrate secrets. | |
| # The PR's code is checked out explicitly below. | |
| on: | |
| push: | |
| branches: | |
| - main | |
| pull_request_target: | |
| types: [opened, synchronize] | |
| jobs: | |
| corpus: | |
| name: Corpus Parsing Tests | |
| runs-on: ubuntu-latest | |
| # Require approval via environment for fork PRs (secrets access) | |
| environment: ${{ github.event_name == 'pull_request_target' && github.event.pull_request.head.repo.full_name != github.repository && 'corpus-tests' || '' }} | |
| permissions: | |
| pull-requests: write | |
| actions: read | |
| steps: | |
| # Checkout the PR head (or push sha) — NOT the base branch | |
| - uses: actions/checkout@v3 | |
| with: | |
| ref: ${{ github.event.pull_request.head.sha || github.sha }} | |
| - uses: actions/checkout@v3 | |
| with: | |
| repository: getsynq/kernel-cll-corpus | |
| ssh-key: ${{ secrets.CORPUS_DEPLOY_KEY }} | |
| ref: main | |
| path: tests/corpus-repo | |
| - run: ln -s "$PWD/tests/corpus-repo/corpus" tests/corpus | |
| - uses: dtolnay/rust-toolchain@stable | |
| - name: Run corpus tests | |
| continue-on-error: true | |
| run: cargo test --test sqlparser_corpus | |
| - name: Upload corpus report | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: corpus-report | |
| path: target/corpus-report.json | |
| - name: Download baseline from main | |
| if: github.event_name == 'pull_request_target' | |
| id: baseline | |
| continue-on-error: true | |
| uses: dawidd6/action-download-artifact@v6 | |
| with: | |
| name: corpus-report | |
| path: target/baseline | |
| branch: main | |
| workflow: corpus.yml | |
| search_artifacts: true | |
| - name: Post PR comment | |
| if: github.event_name == 'pull_request_target' | |
| uses: actions/github-script@v7 | |
| with: | |
| script: | | |
| const fs = require('fs'); | |
| const current = JSON.parse(fs.readFileSync('target/corpus-report.json', 'utf8')); | |
| let body = '## Corpus Parsing Report\n\n'; | |
| const totalTests = current.total_passed + current.total_failed; | |
| const passRate = (current.total_passed / totalTests * 100).toFixed(1); | |
| body += `**Total: ${current.total_passed} passed, ${current.total_failed} failed** (${passRate}% pass rate)\n\n`; | |
| let hasBaseline = false; | |
| let baseline = null; | |
| try { | |
| baseline = JSON.parse(fs.readFileSync('target/baseline/corpus-report.json', 'utf8')); | |
| hasBaseline = true; | |
| } catch (e) { | |
| // no baseline available | |
| } | |
| body += '| Dialect | Passed | Failed | Total | Pass Rate |'; | |
| if (hasBaseline) body += ' Delta |'; | |
| body += '\n'; | |
| body += '|---------|-------:|-------:|------:|----------:|'; | |
| if (hasBaseline) body += '------:|'; | |
| body += '\n'; | |
| for (const [dialect, stats] of Object.entries(current.dialects)) { | |
| const total = stats.passed + stats.failed; | |
| const rate = (stats.passed / total * 100).toFixed(1); | |
| body += `| ${dialect} | ${stats.passed} | ${stats.failed} | ${total} | ${rate}% |`; | |
| if (hasBaseline && baseline.dialects[dialect]) { | |
| const base = baseline.dialects[dialect]; | |
| const diff = stats.passed - base.passed; | |
| if (diff > 0) body += ` **+${diff}** |`; | |
| else if (diff < 0) body += ` **${diff}** |`; | |
| else body += ` - |`; | |
| } else if (hasBaseline) { | |
| body += ` *new* |`; | |
| } | |
| body += '\n'; | |
| } | |
| if (hasBaseline) { | |
| const totalDiff = current.total_passed - baseline.total_passed; | |
| body += '\n'; | |
| if (totalDiff > 0) { | |
| body += `**Improvement: +${totalDiff} more files parsed successfully**\n`; | |
| } else if (totalDiff < 0) { | |
| body += `**Regression: ${totalDiff} fewer files parsed successfully**\n`; | |
| } else { | |
| body += '**No change in parsing coverage**\n'; | |
| } | |
| } else { | |
| body += '\n*No baseline available for comparison (first run on this branch)*\n'; | |
| } | |
| // Find existing comment to update | |
| const { data: comments } = await github.rest.issues.listComments({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| issue_number: context.issue.number, | |
| }); | |
| const marker = '## Corpus Parsing Report'; | |
| const existing = comments.find(c => c.body.startsWith(marker)); | |
| if (existing) { | |
| await github.rest.issues.updateComment({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| comment_id: existing.id, | |
| body, | |
| }); | |
| } else { | |
| await github.rest.issues.createComment({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| issue_number: context.issue.number, | |
| body, | |
| }); | |
| } |