Skip to content

Commit 396df3b

Browse files
salmad3claude
andcommitted
test: prose knowledge extractor coverage
17 tests covering conflicts_with, depends_on, constrains, supersedes extraction with singular/plural inflection, deduplication, security requirements, timing patterns, empty input, spec-doc filtering, and block text fallback. Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 4ce12c5 commit 396df3b

File tree

1 file changed

+185
-0
lines changed

1 file changed

+185
-0
lines changed
Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
import { describe, it, expect } from 'vitest';
2+
import { buildSpecVocabulary, extractProseKnowledge } from './prose-knowledge.js';
3+
import type { DocumentMetadata } from '../types.js';
4+
5+
function makeSpecDoc(tag: string, endpoints: string[]): DocumentMetadata {
6+
return {
7+
filePath: `openapi:${tag}`,
8+
title: `API — ${tag}`,
9+
blocks: endpoints.map((ep) => ({
10+
id: ep.replace(/\s+/g, '-').toLowerCase(),
11+
type: 'reference',
12+
headingText: ep,
13+
textContent: ep,
14+
annotations: {},
15+
codeBlocks: [],
16+
sourcePath: `openapi:${tag}`,
17+
})),
18+
crossRefs: [],
19+
links: [],
20+
wordCount: 0,
21+
headings: [],
22+
};
23+
}
24+
25+
function makeProseDoc(filePath: string, title: string): DocumentMetadata {
26+
return {
27+
filePath,
28+
title,
29+
blocks: [],
30+
crossRefs: [],
31+
links: [],
32+
wordCount: 0,
33+
headings: [],
34+
};
35+
}
36+
37+
const specDocs = [
38+
makeSpecDoc('customers', ['POST /v2/customers', 'GET /v2/customers/{id}']),
39+
makeSpecDoc('payments', ['POST /v2/payments', 'POST /v2/payments/{id}/confirm', 'POST /v2/payments/{id}/refund']),
40+
makeSpecDoc('subscriptions', ['POST /v2/subscriptions', 'POST /v2/subscriptions/{id}/cancel', 'POST /v2/subscriptions/{id}/pause']),
41+
makeSpecDoc('webhooks', ['POST /v2/webhooks']),
42+
];
43+
44+
const vocabulary = buildSpecVocabulary(specDocs);
45+
46+
describe('buildSpecVocabulary', () => {
47+
it('extracts tags from spec documents', () => {
48+
expect(vocabulary.tags).toEqual(
49+
expect.arrayContaining(['customers', 'payments', 'subscriptions', 'webhooks']),
50+
);
51+
});
52+
53+
it('extracts endpoints from spec documents', () => {
54+
expect(vocabulary.endpoints).toContain('POST /v2/payments');
55+
expect(vocabulary.endpoints).toContain('POST /v2/subscriptions');
56+
});
57+
58+
it('maps endpoints to their tags', () => {
59+
expect(vocabulary.endpointToTag.get('post /v2/payments')).toBe('payments');
60+
expect(vocabulary.endpointToTag.get('post /v2/webhooks')).toBe('webhooks');
61+
});
62+
63+
it('skips schemas and security documents', () => {
64+
const docsWithSchemas = [
65+
...specDocs,
66+
makeSpecDoc('schemas', ['Customer', 'Payment']),
67+
makeSpecDoc('security', ['bearer']),
68+
];
69+
const vocab = buildSpecVocabulary(docsWithSchemas);
70+
expect(vocab.tags).not.toContain('schemas');
71+
expect(vocab.tags).not.toContain('security');
72+
});
73+
});
74+
75+
describe('extractProseKnowledge', () => {
76+
const proseDocs = [makeProseDoc('docs/guide.md', 'Payment Guide')];
77+
78+
it('extracts conflicts_with from mutual exclusion language', () => {
79+
const text = 'You should not call POST /v2/payments directly when using subscriptions.';
80+
const rels = extractProseKnowledge(proseDocs, vocabulary, new Map([['docs/guide.md', text]]));
81+
const conflict = rels.find((r) => r.type === 'conflicts_with');
82+
expect(conflict).toBeDefined();
83+
expect(conflict!.sourceDocPath).toBe('openapi:payments');
84+
expect(conflict!.targetDocPath).toBe('openapi:subscriptions');
85+
});
86+
87+
it('extracts depends_on from workflow ordering language', () => {
88+
const text = 'Before processing a payment, you must create a customer first.';
89+
const rels = extractProseKnowledge(proseDocs, vocabulary, new Map([['docs/guide.md', text]]));
90+
const dep = rels.find((r) => r.type === 'depends_on');
91+
expect(dep).toBeDefined();
92+
});
93+
94+
it('extracts constrains from time constraint language', () => {
95+
const text = 'You must confirm the payment within 30 minutes or it expires.';
96+
const rels = extractProseKnowledge(proseDocs, vocabulary, new Map([['docs/guide.md', text]]));
97+
const constraint = rels.find((r) => r.type === 'constrains' && r.evidence.includes('time'));
98+
expect(constraint).toBeDefined();
99+
expect(constraint!.targetDocPath).toBe('openapi:payments');
100+
});
101+
102+
it('extracts constrains from failure handling language', () => {
103+
const text = 'Failed payments cannot be retried. Create a new payment intent.';
104+
const rels = extractProseKnowledge(proseDocs, vocabulary, new Map([['docs/guide.md', text]]));
105+
const failure = rels.find((r) => r.type === 'constrains' && r.evidence.includes('failure'));
106+
expect(failure).toBeDefined();
107+
expect(failure!.targetDocPath).toBe('openapi:payments');
108+
});
109+
110+
it('extracts constrains from retry policy language', () => {
111+
const text = 'Subscriptions trigger automatic retry after 1 day, then 3 days, then 7 days.';
112+
const rels = extractProseKnowledge(proseDocs, vocabulary, new Map([['docs/guide.md', text]]));
113+
const retry = rels.find((r) => r.type === 'constrains' && r.evidence.includes('retry'));
114+
expect(retry).toBeDefined();
115+
expect(retry!.targetDocPath).toBe('openapi:subscriptions');
116+
});
117+
118+
it('extracts supersedes from deprecation language', () => {
119+
const text = 'The cancel endpoint for subscriptions is deprecated. Use pause instead.';
120+
const rels = extractProseKnowledge(proseDocs, vocabulary, new Map([['docs/guide.md', text]]));
121+
const dep = rels.find((r) => r.type === 'supersedes');
122+
expect(dep).toBeDefined();
123+
expect(dep!.targetDocPath).toBe('openapi:subscriptions');
124+
});
125+
126+
it('extracts depends_on from security requirement language', () => {
127+
const text = 'Always verify the webhook signature before processing the event.';
128+
const rels = extractProseKnowledge(proseDocs, vocabulary, new Map([['docs/guide.md', text]]));
129+
const sec = rels.find((r) => r.type === 'depends_on' && r.evidence.includes('security'));
130+
expect(sec).toBeDefined();
131+
expect(sec!.sourceDocPath).toBe('openapi:webhooks');
132+
});
133+
134+
it('handles singular/plural inflection (payment → payments tag)', () => {
135+
const text = 'A payment must be confirmed within 30 minutes.';
136+
const rels = extractProseKnowledge(proseDocs, vocabulary, new Map([['docs/guide.md', text]]));
137+
const match = rels.find((r) => r.targetDocPath === 'openapi:payments');
138+
expect(match).toBeDefined();
139+
});
140+
141+
it('returns empty array when no spec entities are mentioned', () => {
142+
const text = 'This is a general guide about software engineering best practices.';
143+
const rels = extractProseKnowledge(proseDocs, vocabulary, new Map([['docs/guide.md', text]]));
144+
expect(rels).toHaveLength(0);
145+
});
146+
147+
it('skips openapi-prefixed documents (spec docs are not prose)', () => {
148+
const specAsProse = [makeProseDoc('openapi:payments', 'Payments')];
149+
const text = 'Payments cannot be retried.';
150+
const rels = extractProseKnowledge(specAsProse, vocabulary, new Map([['openapi:payments', text]]));
151+
expect(rels).toHaveLength(0);
152+
});
153+
154+
it('deduplicates rules with same type, source, and target', () => {
155+
const text = 'Do not use payments when using subscriptions. Never call payments directly with subscriptions.';
156+
const rels = extractProseKnowledge(proseDocs, vocabulary, new Map([['docs/guide.md', text]]));
157+
const conflicts = rels.filter((r) => r.type === 'conflicts_with');
158+
expect(conflicts).toHaveLength(1);
159+
});
160+
161+
it('extracts timing information from "X business days" patterns', () => {
162+
const text = 'Refunds take 5-10 business days to appear on the customer statement.';
163+
const rels = extractProseKnowledge(proseDocs, vocabulary, new Map([['docs/guide.md', text]]));
164+
const timing = rels.find((r) => r.evidence.includes('timing'));
165+
expect(timing).toBeDefined();
166+
});
167+
168+
it('falls back to block text when raw texts are not provided', () => {
169+
const docsWithBlocks: DocumentMetadata[] = [{
170+
...proseDocs[0]!,
171+
blocks: [{
172+
id: 'b1',
173+
type: 'section',
174+
headingText: 'Payments',
175+
textContent: 'Do not use payments when using subscriptions directly.',
176+
annotations: {},
177+
codeBlocks: [],
178+
sourcePath: 'docs/guide.md',
179+
}],
180+
}];
181+
const rels = extractProseKnowledge(docsWithBlocks, vocabulary);
182+
const conflict = rels.find((r) => r.type === 'conflicts_with');
183+
expect(conflict).toBeDefined();
184+
});
185+
});

0 commit comments

Comments
 (0)