Skip to content

Commit 7bd428e

Browse files
salmad3claude
andcommitted
feat: spec-prose linker for cross-source knowledge composition
Detects when prose documentation and OpenAPI spec tag groups describe the same domain. Matching signals: endpoint mention in prose text, title/tag similarity, schema name references. Produces composes_with edges that connect structured interface knowledge (from specs) with natural language guidance (from prose). Validated on Petstore: prose guides about pet management and store operations correctly linked to corresponding spec tag groups. Co-Authored-By: Claude <noreply@anthropic.com>
1 parent d5475c4 commit 7bd428e

File tree

3 files changed

+315
-2
lines changed

3 files changed

+315
-2
lines changed

packages/agent-metadata/src/extractors/index.ts

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,14 @@ import type { ExtractedMetadata, ExtractedRelationship, DocumentMetadata } from
1010
import { extractDocumentMetadata } from './document.js';
1111
import { extractRelationships } from './relationships.js';
1212
import { extractFromOpenApi } from './openapi.js';
13+
import { linkSpecAndProse } from './spec-prose-linker.js';
1314

1415
export { extractInlineText, extractBlockText, estimateTokens, countWords } from './text.js';
1516
export { extractDocumentMetadata } from './document.js';
1617
export { inferAnnotations } from './infer-annotations.js';
1718
export { extractRelationships } from './relationships.js';
1819
export { extractFromOpenApi } from './openapi.js';
20+
export { linkSpecAndProse } from './spec-prose-linker.js';
1921

2022
export interface ExtractAllOptions {
2123
/** Raw OpenAPI spec objects to extract alongside documentation. */
@@ -41,9 +43,16 @@ export function extractAll(
4143
specRelationships.push(...extracted.relationships);
4244
}
4345

46+
const allDocs = [...docs, ...specDocs];
47+
48+
// Cross-reference prose and spec documents when both exist
49+
const crossSourceLinks = specDocs.length > 0
50+
? linkSpecAndProse(allDocs)
51+
: [];
52+
4453
return {
45-
documents: [...docs, ...specDocs],
46-
relationships: [...proseRelationships, ...specRelationships],
54+
documents: allDocs,
55+
relationships: [...proseRelationships, ...specRelationships, ...crossSourceLinks],
4756
siteTitle: config.title,
4857
siteUrl: config.siteUrl,
4958
baseUrl: config.baseUrl ?? '/',
Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
import { describe, it, expect } from 'vitest';
2+
import { linkSpecAndProse } from './spec-prose-linker.js';
3+
import type { DocumentMetadata } from '../types.js';
4+
5+
function makeDoc(overrides: Partial<DocumentMetadata> & { filePath: string }): DocumentMetadata {
6+
return {
7+
title: 'Untitled',
8+
description: undefined,
9+
frontmatter: {},
10+
blocks: [],
11+
crossRefs: [],
12+
links: [],
13+
wordCount: 0,
14+
headings: [],
15+
...overrides,
16+
};
17+
}
18+
19+
describe('spec-prose linker', () => {
20+
it('links prose to spec by endpoint mention in text', () => {
21+
// Use a title that does NOT match the tag name to isolate endpoint matching
22+
const docs: DocumentMetadata[] = [
23+
makeDoc({
24+
filePath: 'docs/api-integration.md',
25+
title: 'API Integration Guide',
26+
blocks: [{
27+
id: 'b1',
28+
type: 'guide',
29+
textContent: 'Use POST /pet to add a new pet.',
30+
annotations: { audience: 'human' as const },
31+
codeBlocks: [],
32+
sourcePath: 'docs/api-integration.md',
33+
}],
34+
}),
35+
makeDoc({
36+
filePath: 'openapi:pet',
37+
title: 'API — pet',
38+
blocks: [{
39+
id: 'op1',
40+
type: 'reference',
41+
headingText: 'POST /pet',
42+
textContent: 'Add a new pet',
43+
annotations: { type: 'reference' as const, audience: 'agent' as const },
44+
codeBlocks: [],
45+
sourcePath: 'openapi:pet',
46+
}],
47+
}),
48+
];
49+
50+
const links = linkSpecAndProse(docs);
51+
expect(links.length).toBe(1);
52+
expect(links[0]!.sourceDocPath).toBe('docs/api-integration.md');
53+
expect(links[0]!.targetDocPath).toBe('openapi:pet');
54+
expect(links[0]!.type).toBe('composes_with');
55+
expect(links[0]!.evidence).toContain('endpoint mention');
56+
});
57+
58+
it('links prose to spec by title similarity', () => {
59+
const docs: DocumentMetadata[] = [
60+
makeDoc({
61+
filePath: 'docs/payments.md',
62+
title: 'Payments',
63+
blocks: [{
64+
id: 'b1',
65+
type: 'guide',
66+
textContent: 'This guide covers payment processing.',
67+
annotations: {},
68+
codeBlocks: [],
69+
sourcePath: 'docs/payments.md',
70+
}],
71+
}),
72+
makeDoc({
73+
filePath: 'openapi:payments',
74+
title: 'API — payments',
75+
blocks: [],
76+
}),
77+
];
78+
79+
const links = linkSpecAndProse(docs);
80+
expect(links.length).toBe(1);
81+
expect(links[0]!.evidence).toContain('title match');
82+
});
83+
84+
it('returns empty when no specs present', () => {
85+
const docs: DocumentMetadata[] = [
86+
makeDoc({ filePath: 'docs/guide.md', title: 'Guide' }),
87+
];
88+
expect(linkSpecAndProse(docs)).toEqual([]);
89+
});
90+
91+
it('returns empty when no prose present', () => {
92+
const docs: DocumentMetadata[] = [
93+
makeDoc({ filePath: 'openapi:pet', title: 'Pet' }),
94+
];
95+
expect(linkSpecAndProse(docs)).toEqual([]);
96+
});
97+
98+
it('skips openapi:schemas and openapi:security documents', () => {
99+
const docs: DocumentMetadata[] = [
100+
makeDoc({
101+
filePath: 'docs/auth.md',
102+
title: 'Authentication',
103+
blocks: [{
104+
id: 'b1',
105+
type: 'guide',
106+
textContent: 'Configure security for your API.',
107+
annotations: {},
108+
codeBlocks: [],
109+
sourcePath: 'docs/auth.md',
110+
}],
111+
}),
112+
makeDoc({ filePath: 'openapi:schemas', title: 'Schemas' }),
113+
makeDoc({ filePath: 'openapi:security', title: 'Security' }),
114+
];
115+
116+
const links = linkSpecAndProse(docs);
117+
expect(links).toEqual([]);
118+
});
119+
120+
it('links multiple prose docs to different spec tags', () => {
121+
const docs: DocumentMetadata[] = [
122+
makeDoc({
123+
filePath: 'docs/pet-guide.md',
124+
title: 'Pet Guide',
125+
blocks: [{
126+
id: 'b1',
127+
type: 'guide',
128+
textContent: 'Use GET /pet/findByStatus to search.',
129+
annotations: {},
130+
codeBlocks: [],
131+
sourcePath: 'docs/pet-guide.md',
132+
}],
133+
}),
134+
makeDoc({
135+
filePath: 'docs/store-guide.md',
136+
title: 'Store Guide',
137+
blocks: [{
138+
id: 'b2',
139+
type: 'guide',
140+
textContent: 'Check inventory with GET /store/inventory.',
141+
annotations: {},
142+
codeBlocks: [],
143+
sourcePath: 'docs/store-guide.md',
144+
}],
145+
}),
146+
makeDoc({
147+
filePath: 'openapi:pet',
148+
title: 'API — pet',
149+
blocks: [{
150+
id: 'op1',
151+
type: 'reference',
152+
headingText: 'GET /pet/findByStatus',
153+
textContent: 'Find pets by status',
154+
annotations: { type: 'reference' as const },
155+
codeBlocks: [],
156+
sourcePath: 'openapi:pet',
157+
}],
158+
}),
159+
makeDoc({
160+
filePath: 'openapi:store',
161+
title: 'API — store',
162+
blocks: [{
163+
id: 'op2',
164+
type: 'reference',
165+
headingText: 'GET /store/inventory',
166+
textContent: 'Returns pet inventories',
167+
annotations: { type: 'reference' as const },
168+
codeBlocks: [],
169+
sourcePath: 'openapi:store',
170+
}],
171+
}),
172+
];
173+
174+
const links = linkSpecAndProse(docs);
175+
expect(links.length).toBe(2);
176+
177+
const petLink = links.find((l) => l.sourceDocPath === 'docs/pet-guide.md');
178+
const storeLink = links.find((l) => l.sourceDocPath === 'docs/store-guide.md');
179+
180+
expect(petLink?.targetDocPath).toBe('openapi:pet');
181+
expect(storeLink?.targetDocPath).toBe('openapi:store');
182+
});
183+
});
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
/**
2+
* Spec-prose linker.
3+
*
4+
* Detects when a prose document and an OpenAPI-derived document describe
5+
* the same domain, producing cross-source relationships that connect
6+
* structured interface knowledge (from specs) with natural language
7+
* guidance (from prose).
8+
*
9+
* Three matching signals:
10+
* 1. Title/tag similarity (prose "Payment Guide" matches spec tag "payments")
11+
* 2. Endpoint mention (prose text contains "POST /v1/payment_intents")
12+
* 3. Schema mention (prose text references "PaymentIntent" or "payment_intent")
13+
*/
14+
15+
import type {
16+
DocumentMetadata,
17+
ExtractedRelationship,
18+
} from '../types.js';
19+
20+
21+
/**
22+
* Link prose documents to spec-derived documents by detecting
23+
* shared domain references. Returns relationships typed as
24+
* 'composes_with' (the prose document composes with the spec
25+
* document to form a complete knowledge unit).
26+
*/
27+
export function linkSpecAndProse(
28+
documents: readonly DocumentMetadata[],
29+
): readonly ExtractedRelationship[] {
30+
const proseDocs = documents.filter((d) => !d.filePath.startsWith('openapi:'));
31+
const specDocs = documents.filter((d) => d.filePath.startsWith('openapi:'));
32+
33+
if (proseDocs.length === 0 || specDocs.length === 0) return [];
34+
35+
const relationships: ExtractedRelationship[] = [];
36+
37+
// Collect all spec tag names and their operation endpoints
38+
const specTags = new Map<string, {
39+
readonly doc: DocumentMetadata;
40+
readonly endpoints: readonly string[];
41+
readonly schemas: readonly string[];
42+
}>();
43+
44+
for (const specDoc of specDocs) {
45+
const tag = specDoc.filePath.replace('openapi:', '');
46+
if (tag === 'schemas' || tag === 'security') continue;
47+
48+
const endpoints = specDoc.blocks.map((b) => b.headingText ?? '').filter(Boolean);
49+
50+
// Extract schema names from block evidence/annotations
51+
const schemas: string[] = [];
52+
for (const block of specDoc.blocks) {
53+
if (block.annotations.answers) {
54+
// Extract nouns that look like schema names (PascalCase or snake_case)
55+
const schemaPattern = /\b([A-Z][a-zA-Z]+|[a-z]+_[a-z_]+)\b/g;
56+
let match;
57+
while ((match = schemaPattern.exec(block.annotations.answers)) !== null) {
58+
schemas.push(match[1]!);
59+
}
60+
}
61+
}
62+
63+
specTags.set(tag, { doc: specDoc, endpoints, schemas });
64+
}
65+
66+
for (const proseDoc of proseDocs) {
67+
const proseText = proseDoc.blocks
68+
.map((b) => `${b.headingText ?? ''} ${b.textContent}`)
69+
.join(' ')
70+
.toLowerCase();
71+
72+
const proseTitle = proseDoc.title.toLowerCase();
73+
74+
for (const [tag, specInfo] of specTags) {
75+
const tagLower = tag.toLowerCase().replace(/[._-]/g, ' ');
76+
let matchSignal: string | undefined;
77+
78+
// Signal 1: Title/tag similarity
79+
if (proseTitle.includes(tagLower) || tagLower.includes(proseTitle.split(' ')[0]!)) {
80+
matchSignal = `title match: "${proseDoc.title}" ↔ tag "${tag}"`;
81+
}
82+
83+
// Signal 2: Endpoint mention in prose
84+
if (!matchSignal) {
85+
for (const endpoint of specInfo.endpoints) {
86+
// Match "POST /v1/pets" or just "/v1/pets" or "POST /pets"
87+
const pathPart = endpoint.split(' ')[1] ?? endpoint;
88+
if (pathPart && proseText.includes(pathPart.toLowerCase())) {
89+
matchSignal = `endpoint mention: "${endpoint}" in "${proseDoc.title}"`;
90+
break;
91+
}
92+
}
93+
}
94+
95+
// Signal 3: Schema mention in prose (case-insensitive, underscore-tolerant)
96+
if (!matchSignal) {
97+
for (const schema of specInfo.schemas) {
98+
const schemaLower = schema.toLowerCase().replace(/_/g, ' ');
99+
if (schemaLower.length >= 4 && proseText.includes(schemaLower)) {
100+
matchSignal = `schema mention: "${schema}" in "${proseDoc.title}"`;
101+
break;
102+
}
103+
}
104+
}
105+
106+
if (matchSignal) {
107+
relationships.push({
108+
sourceDocPath: proseDoc.filePath,
109+
sourceSection: proseDoc.title,
110+
targetDocPath: specInfo.doc.filePath,
111+
targetSection: specInfo.doc.title,
112+
type: 'composes_with',
113+
confidence: 0.8,
114+
evidence: `spec-prose link: ${matchSignal}`,
115+
});
116+
}
117+
}
118+
}
119+
120+
return relationships;
121+
}

0 commit comments

Comments
 (0)