Skip to content

Commit cb692fe

Browse files
grokifyclaude
andcommitted
feat(render): add terminal and markdown renderers
Add new render packages for evaluation reports: - render/terminal: ANSI-colored output with UTF8 icons - Color-coded pass/partial/fail status - Box drawing for visual structure - Severity-colored findings - render/markdown: Markdown report format - Summary table with category and finding counts - Expandable category details - Formatted findings with severity badges Both renderers use the new categorical evaluation types. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 4d0d9e6 commit cb692fe

4 files changed

Lines changed: 1138 additions & 0 deletions

File tree

render/markdown/renderer.go

Lines changed: 378 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,378 @@
1+
// Package markdown provides Markdown rendering for evaluation reports.
2+
package markdown
3+
4+
import (
5+
"fmt"
6+
"io"
7+
"strings"
8+
9+
"github.com/plexusone/structured-evaluation/evaluation"
10+
)
11+
12+
// Renderer renders evaluation reports as Markdown.
13+
type Renderer struct {
14+
w io.Writer
15+
}
16+
17+
// New creates a new Markdown renderer.
18+
func New(w io.Writer) *Renderer {
19+
return &Renderer{w: w}
20+
}
21+
22+
// Render outputs the evaluation report as Markdown.
23+
func (r *Renderer) Render(report *evaluation.EvaluationReport) error {
24+
var b strings.Builder
25+
26+
// Title
27+
b.WriteString("## Evaluation Report")
28+
if report.Metadata.DocumentTitle != "" {
29+
b.WriteString(": " + report.Metadata.DocumentTitle)
30+
}
31+
b.WriteString("\n\n")
32+
33+
// Summary section
34+
b.WriteString("### Summary\n\n")
35+
b.WriteString(fmt.Sprintf("**Overall Decision: %s** %s\n\n",
36+
strings.ToUpper(string(report.Decision.Status)),
37+
decisionIcon(report.Decision.Status)))
38+
39+
// Summary table
40+
catCounts := report.Decision.CategoryCounts
41+
findCounts := report.Decision.FindingCounts
42+
43+
b.WriteString("| Metric | Value |\n")
44+
b.WriteString("|--------|-------|\n")
45+
b.WriteString(fmt.Sprintf("| Categories | %d pass, %d partial, %d fail |\n",
46+
catCounts.Pass, catCounts.Partial, catCounts.Fail))
47+
b.WriteString(fmt.Sprintf("| Findings | %d critical, %d high, %d medium |\n",
48+
findCounts.Critical, findCounts.High, findCounts.Medium))
49+
b.WriteString(fmt.Sprintf("| Decision | %s |\n", strings.Title(string(report.Decision.Status))))
50+
b.WriteString("\n---\n\n")
51+
52+
// Metadata
53+
b.WriteString("### Metadata\n\n")
54+
b.WriteString(fmt.Sprintf("- **Document**: %s\n", report.Metadata.Document))
55+
if report.Metadata.DocumentTitle != "" {
56+
b.WriteString(fmt.Sprintf("- **Title**: %s\n", report.Metadata.DocumentTitle))
57+
}
58+
if report.RubricID != "" {
59+
b.WriteString(fmt.Sprintf("- **Rubric**: %s", report.RubricID))
60+
if report.RubricVersion != "" {
61+
b.WriteString(fmt.Sprintf(" v%s", report.RubricVersion))
62+
}
63+
b.WriteString("\n")
64+
}
65+
b.WriteString(fmt.Sprintf("- **Review Type**: %s\n", report.ReviewType))
66+
b.WriteString(fmt.Sprintf("- **Generated**: %s\n", report.Metadata.GeneratedAt.Format("2006-01-02 15:04:05 UTC")))
67+
b.WriteString("\n---\n\n")
68+
69+
// Category Results
70+
b.WriteString("### Category Results\n\n")
71+
b.WriteString("| Category | Score | Weight | Required | Reasoning |\n")
72+
b.WriteString("|----------|-------|--------|----------|----------|\n")
73+
74+
for _, cr := range report.Categories {
75+
icon := scoreIcon(cr.Score)
76+
required := ""
77+
// Note: We don't have required info in CategoryResult, would need rubric
78+
b.WriteString(fmt.Sprintf("| **%s** | %s %s | - | %s | %s |\n",
79+
cr.Category,
80+
icon,
81+
strings.Title(string(cr.Score)),
82+
required,
83+
truncate(cr.Reasoning, 60)))
84+
}
85+
b.WriteString("\n---\n\n")
86+
87+
// Evidence section
88+
hasEvidence := false
89+
for _, cr := range report.Categories {
90+
if len(cr.Evidence) > 0 {
91+
hasEvidence = true
92+
break
93+
}
94+
}
95+
96+
if hasEvidence {
97+
b.WriteString("### Evidence\n\n")
98+
for _, cr := range report.Categories {
99+
if len(cr.Evidence) > 0 {
100+
b.WriteString(fmt.Sprintf("**%s:**\n", cr.Category))
101+
for _, ev := range cr.Evidence {
102+
b.WriteString(fmt.Sprintf("- `%s`\n", ev))
103+
}
104+
b.WriteString("\n")
105+
}
106+
}
107+
b.WriteString("---\n\n")
108+
}
109+
110+
// Findings section
111+
if len(report.Findings) > 0 {
112+
b.WriteString("### Findings\n\n")
113+
114+
// Group by severity
115+
for _, sev := range evaluation.AllSeverities() {
116+
findings := filterBySeverity(report.Findings, sev)
117+
if len(findings) == 0 {
118+
continue
119+
}
120+
121+
b.WriteString(fmt.Sprintf("#### %s %s (%d)\n\n",
122+
severityIcon(sev),
123+
strings.Title(string(sev)),
124+
len(findings)))
125+
126+
for _, f := range findings {
127+
b.WriteString(fmt.Sprintf("**%s** [%s]\n", f.Title, f.Category))
128+
b.WriteString(fmt.Sprintf("- %s\n", f.Description))
129+
if f.Recommendation != "" {
130+
b.WriteString(fmt.Sprintf("- **Recommendation**: %s\n", f.Recommendation))
131+
}
132+
b.WriteString("\n")
133+
}
134+
}
135+
b.WriteString("---\n\n")
136+
} else {
137+
b.WriteString("### Findings\n\n")
138+
b.WriteString("**None** - No issues identified.\n\n")
139+
b.WriteString("---\n\n")
140+
}
141+
142+
// Next Steps
143+
writeNextSteps(&b, report)
144+
145+
// Footer
146+
b.WriteString("---\n\n")
147+
b.WriteString("*Generated by structured-evaluation*\n")
148+
149+
_, err := fmt.Fprint(r.w, b.String())
150+
return err
151+
}
152+
153+
// RenderWithRubric renders with additional rubric context (weights, required flags).
154+
func (r *Renderer) RenderWithRubric(report *evaluation.EvaluationReport, rubric *evaluation.RubricSet) error {
155+
var b strings.Builder
156+
157+
// Title
158+
b.WriteString("## Evaluation Report")
159+
if report.Metadata.DocumentTitle != "" {
160+
b.WriteString(": " + report.Metadata.DocumentTitle)
161+
}
162+
b.WriteString("\n\n")
163+
164+
// Summary section
165+
b.WriteString("### Summary\n\n")
166+
b.WriteString(fmt.Sprintf("**Overall Decision: %s** %s\n\n",
167+
strings.ToUpper(string(report.Decision.Status)),
168+
decisionIcon(report.Decision.Status)))
169+
170+
// Summary table
171+
catCounts := report.Decision.CategoryCounts
172+
findCounts := report.Decision.FindingCounts
173+
174+
b.WriteString("| Metric | Value |\n")
175+
b.WriteString("|--------|-------|\n")
176+
b.WriteString(fmt.Sprintf("| Categories | %d pass, %d partial, %d fail |\n",
177+
catCounts.Pass, catCounts.Partial, catCounts.Fail))
178+
b.WriteString(fmt.Sprintf("| Findings | %d critical, %d high, %d medium |\n",
179+
findCounts.Critical, findCounts.High, findCounts.Medium))
180+
b.WriteString(fmt.Sprintf("| Decision | %s |\n", strings.Title(string(report.Decision.Status))))
181+
b.WriteString("\n---\n\n")
182+
183+
// Category Results with rubric info
184+
b.WriteString("### Category Results\n\n")
185+
b.WriteString("| Category | Score | Weight | Required | Reasoning |\n")
186+
b.WriteString("|----------|-------|--------|----------|----------|\n")
187+
188+
for _, cr := range report.Categories {
189+
icon := scoreIcon(cr.Score)
190+
weight := "-"
191+
required := ""
192+
193+
// Look up rubric info
194+
if rubric != nil {
195+
if cat := rubric.GetCategory(cr.Category); cat != nil {
196+
weight = fmt.Sprintf("%.1f", cat.Weight)
197+
if cat.Required {
198+
required = "✅"
199+
} else {
200+
required = "❌"
201+
}
202+
}
203+
}
204+
205+
b.WriteString(fmt.Sprintf("| **%s** | %s %s | %s | %s | %s |\n",
206+
cr.Category,
207+
icon,
208+
strings.Title(string(cr.Score)),
209+
weight,
210+
required,
211+
truncate(cr.Reasoning, 50)))
212+
}
213+
b.WriteString("\n---\n\n")
214+
215+
// Evidence section
216+
hasEvidence := false
217+
for _, cr := range report.Categories {
218+
if len(cr.Evidence) > 0 {
219+
hasEvidence = true
220+
break
221+
}
222+
}
223+
224+
if hasEvidence {
225+
b.WriteString("### Evidence\n\n")
226+
for _, cr := range report.Categories {
227+
if len(cr.Evidence) > 0 {
228+
b.WriteString(fmt.Sprintf("**%s:**\n", cr.Category))
229+
for _, ev := range cr.Evidence {
230+
b.WriteString(fmt.Sprintf("- `%s`\n", ev))
231+
}
232+
b.WriteString("\n")
233+
}
234+
}
235+
b.WriteString("---\n\n")
236+
}
237+
238+
// Findings section
239+
if len(report.Findings) > 0 {
240+
b.WriteString("### Findings\n\n")
241+
242+
for _, sev := range evaluation.AllSeverities() {
243+
findings := filterBySeverity(report.Findings, sev)
244+
if len(findings) == 0 {
245+
continue
246+
}
247+
248+
b.WriteString(fmt.Sprintf("#### %s %s (%d)\n\n",
249+
severityIcon(sev),
250+
strings.Title(string(sev)),
251+
len(findings)))
252+
253+
for _, f := range findings {
254+
b.WriteString(fmt.Sprintf("**%s** [%s]\n", f.Title, f.Category))
255+
b.WriteString(fmt.Sprintf("- %s\n", f.Description))
256+
if f.Recommendation != "" {
257+
b.WriteString(fmt.Sprintf("- **Recommendation**: %s\n", f.Recommendation))
258+
}
259+
b.WriteString("\n")
260+
}
261+
}
262+
b.WriteString("---\n\n")
263+
} else {
264+
b.WriteString("### Findings\n\n")
265+
b.WriteString("**None** - No issues identified.\n\n")
266+
b.WriteString("---\n\n")
267+
}
268+
269+
// Next Steps
270+
writeNextSteps(&b, report)
271+
272+
// Footer
273+
b.WriteString("---\n\n")
274+
b.WriteString("*Generated by structured-evaluation*\n")
275+
276+
_, err := fmt.Fprint(r.w, b.String())
277+
return err
278+
}
279+
280+
// Helper functions
281+
282+
func writeNextSteps(b *strings.Builder, report *evaluation.EvaluationReport) {
283+
if len(report.NextSteps.Immediate) == 0 && len(report.NextSteps.Recommended) == 0 {
284+
return
285+
}
286+
287+
b.WriteString("### Next Steps\n\n")
288+
289+
if len(report.NextSteps.Immediate) > 0 {
290+
b.WriteString("**Immediate Actions:**\n\n")
291+
for _, action := range report.NextSteps.Immediate {
292+
b.WriteString(fmt.Sprintf("- [ ] %s", action.Action))
293+
if action.Category != "" {
294+
b.WriteString(fmt.Sprintf(" [%s]", action.Category))
295+
}
296+
b.WriteString("\n")
297+
}
298+
b.WriteString("\n")
299+
}
300+
301+
if len(report.NextSteps.Recommended) > 0 {
302+
b.WriteString("**Recommended:**\n\n")
303+
for _, action := range report.NextSteps.Recommended {
304+
b.WriteString(fmt.Sprintf("- [ ] %s", action.Action))
305+
if action.Category != "" {
306+
b.WriteString(fmt.Sprintf(" [%s]", action.Category))
307+
}
308+
b.WriteString("\n")
309+
}
310+
b.WriteString("\n")
311+
}
312+
313+
if report.NextSteps.RerunCommand != "" {
314+
b.WriteString(fmt.Sprintf("**Re-run command**: `%s`\n\n", report.NextSteps.RerunCommand))
315+
}
316+
}
317+
318+
func decisionIcon(status evaluation.DecisionStatus) string {
319+
switch status {
320+
case evaluation.DecisionPass:
321+
return "✅"
322+
case evaluation.DecisionConditional:
323+
return "⚠️"
324+
case evaluation.DecisionFail:
325+
return "❌"
326+
case evaluation.DecisionHumanReview:
327+
return "👤"
328+
default:
329+
return "📋"
330+
}
331+
}
332+
333+
func scoreIcon(score evaluation.ScoreValue) string {
334+
switch score {
335+
case evaluation.ScorePass:
336+
return "🟢"
337+
case evaluation.ScorePartial:
338+
return "🟡"
339+
case evaluation.ScoreFail:
340+
return "🔴"
341+
default:
342+
return "⚪"
343+
}
344+
}
345+
346+
func severityIcon(sev evaluation.Severity) string {
347+
switch sev {
348+
case evaluation.SeverityCritical:
349+
return "🔴"
350+
case evaluation.SeverityHigh:
351+
return "🔴"
352+
case evaluation.SeverityMedium:
353+
return "🟡"
354+
case evaluation.SeverityLow:
355+
return "🟢"
356+
case evaluation.SeverityInfo:
357+
return "ℹ️"
358+
default:
359+
return "⚪"
360+
}
361+
}
362+
363+
func filterBySeverity(findings []evaluation.Finding, sev evaluation.Severity) []evaluation.Finding {
364+
var result []evaluation.Finding
365+
for _, f := range findings {
366+
if f.Severity == sev {
367+
result = append(result, f)
368+
}
369+
}
370+
return result
371+
}
372+
373+
func truncate(s string, maxLen int) string {
374+
if len(s) <= maxLen {
375+
return s
376+
}
377+
return s[:maxLen-3] + "..."
378+
}

0 commit comments

Comments
 (0)