Skip to content

Commit 62e4e19

Browse files
salmad3claude
andcommitted
fix: OpenAPI extractor correctness and pipeline robustness
Co-Authored-By: Claude <noreply@anthropic.com>
1 parent d5475c4 commit 62e4e19

File tree

11 files changed

+403
-39
lines changed

11 files changed

+403
-39
lines changed

CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ Initial public release.
4949
- SafeHtml branded types with DOMPurify sanitization
5050
- Input validation at system boundaries
5151
- Rate limiting on MCP server
52-
- CSP headers on rendered output
52+
- Symlink traversal prevention via `followSymbolicLinks: false` on file discovery
5353

5454
### Design System
5555

packages/agent-metadata/src/emitters/agent-skills.ts

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,22 @@ export function emitAgentSkills(
2525
_config: AgentMetadataConfig,
2626
): AgentSkillsOutput {
2727
const relationships = metadata.relationships ?? [];
28-
const skills = deriveSkills(metadata, relationships);
28+
const rawSkills = deriveSkills(metadata, relationships);
29+
30+
// Filter prerequisites and composesWith to reference only emitted skills.
31+
// Prevents phantom references to skills like "openapisecurity" that have
32+
// no corresponding document.
33+
const skillIds = new Set(rawSkills.map((s) => s.id));
34+
const skills = rawSkills.map((s) => ({
35+
...s,
36+
prerequisites: s.prerequisites?.filter((id) => skillIds.has(id)),
37+
composesWith: s.composesWith?.filter((id) => skillIds.has(id)),
38+
})).map((s) => ({
39+
...s,
40+
prerequisites: s.prerequisites && s.prerequisites.length > 0 ? s.prerequisites : undefined,
41+
composesWith: s.composesWith && s.composesWith.length > 0 ? s.composesWith : undefined,
42+
}));
43+
2944
const edges = deriveEdges(relationships, skills);
3045

3146
return {
@@ -135,10 +150,13 @@ function deriveEdges(
135150

136151

137152
function makeSkillId(filePath: string): string {
153+
// Preserve directory separators as distinct from hyphens in filenames
154+
// to prevent collisions: docs/auth/setup.md → auth--setup
155+
// while docs/auth-setup.md → auth-setup
138156
return filePath
139157
.replace(/^.*?docs\//, '')
140158
.replace(/\.(kd|kdx|md|mdx|adoc|rst)$/, '')
141-
.replace(/\//g, '-')
159+
.replace(/\//g, '--')
142160
.replace(/[^a-z0-9-]/gi, '')
143161
.replace(/^-+|-+$/g, '')
144162
|| 'index';
@@ -159,8 +177,12 @@ function groupRelationshipsBySource(
159177
): ReadonlyMap<string, readonly ExtractedRelationship[]> {
160178
const map = new Map<string, ExtractedRelationship[]>();
161179
for (const rel of relationships) {
162-
const existing = map.get(rel.sourceDocPath) ?? [];
163-
map.set(rel.sourceDocPath, [...existing, rel]);
180+
let group = map.get(rel.sourceDocPath);
181+
if (!group) {
182+
group = [];
183+
map.set(rel.sourceDocPath, group);
184+
}
185+
group.push(rel);
164186
}
165187
return map;
166188
}

packages/agent-metadata/src/emitters/index.ts

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -68,11 +68,7 @@ const ALL_EMITTERS: readonly EmitterName[] = [
6868
'agent-skills',
6969
];
7070

71-
/**
72-
* Default emitters for the reformed pipeline.
73-
* llms.txt is excluded: SE Ranking 300K-domain study showed zero
74-
* correlation with AI citations. Opt-in only.
75-
*/
71+
/** Default emitters. All formats enabled; configure via AgentMetadataConfig.emitters to restrict. */
7672
const DEFAULT_EMITTERS: readonly EmitterName[] = [
7773
'a2a',
7874
'agents-json',

packages/agent-metadata/src/extractors/index.ts

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,14 @@ import type { ExtractedMetadata, ExtractedRelationship, DocumentMetadata } from
1010
import { extractDocumentMetadata } from './document.js';
1111
import { extractRelationships } from './relationships.js';
1212
import { extractFromOpenApi } from './openapi.js';
13+
import { linkSpecAndProse } from './spec-prose-linker.js';
1314

1415
export { extractInlineText, extractBlockText, estimateTokens, countWords } from './text.js';
1516
export { extractDocumentMetadata } from './document.js';
1617
export { inferAnnotations } from './infer-annotations.js';
1718
export { extractRelationships } from './relationships.js';
1819
export { extractFromOpenApi } from './openapi.js';
20+
export { linkSpecAndProse } from './spec-prose-linker.js';
1921

2022
export interface ExtractAllOptions {
2123
/** Raw OpenAPI spec objects to extract alongside documentation. */
@@ -41,9 +43,16 @@ export function extractAll(
4143
specRelationships.push(...extracted.relationships);
4244
}
4345

46+
const allDocs = [...docs, ...specDocs];
47+
48+
// Cross-reference prose and spec documents when both exist
49+
const crossSourceLinks = specDocs.length > 0
50+
? linkSpecAndProse(allDocs)
51+
: [];
52+
4453
return {
45-
documents: [...docs, ...specDocs],
46-
relationships: [...proseRelationships, ...specRelationships],
54+
documents: allDocs,
55+
relationships: [...proseRelationships, ...specRelationships, ...crossSourceLinks],
4756
siteTitle: config.title,
4857
siteUrl: config.siteUrl,
4958
baseUrl: config.baseUrl ?? '/',

packages/agent-metadata/src/extractors/openapi.test.ts

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -149,11 +149,12 @@ describe('OpenAPI extractor', () => {
149149
expect(depends.every((r) => r.targetSection.includes('api_key'))).toBe(true);
150150
});
151151

152-
it('extracts supersedes edges for deprecated operations', () => {
152+
it('extracts constrains edges for deprecated operations', () => {
153153
const { relationships } = extractFromOpenApi(PETSTORE_SPEC as any);
154-
const supersedes = relationships.filter((r) => r.type === 'supersedes');
155-
expect(supersedes).toHaveLength(1);
156-
expect(supersedes[0]!.sourceSection).toBe('GET /pet/{id}');
154+
const deprecated = relationships.filter((r) => r.type === 'constrains' && r.evidence.includes('deprecated'));
155+
expect(deprecated).toHaveLength(1);
156+
expect(deprecated[0]!.sourceSection).toBe('GET /pet/{id}');
157+
expect(deprecated[0]!.targetSection).toBe('[deprecated]');
157158
});
158159

159160
it('extracts schema references via star topology with direct refs', () => {
@@ -187,7 +188,7 @@ describe('OpenAPI extractor', () => {
187188
it('produces all relationships at confidence 1.0 (structurally explicit)', () => {
188189
const { relationships } = extractFromOpenApi(PETSTORE_SPEC as any);
189190
const nonPerfect = relationships.filter(
190-
(r) => r.confidence !== 1.0 && r.type !== 'supersedes',
191+
(r) => r.confidence !== 1.0,
191192
);
192193
expect(nonPerfect).toHaveLength(0);
193194
});

packages/agent-metadata/src/extractors/openapi.ts

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,14 @@ export function extractFromOpenApi(
133133
readonly documents: readonly DocumentMetadata[];
134134
readonly relationships: readonly ExtractedRelationship[];
135135
} {
136+
// Validate minimum spec structure to prevent TypeError on malformed input
137+
if (!spec || typeof spec !== 'object') {
138+
return { documents: [], relationships: [] };
139+
}
140+
if (!spec.info || typeof spec.info !== 'object') {
141+
return { documents: [], relationships: [] };
142+
}
143+
136144
const operations = parseOperations(spec);
137145
const documents = groupOperationsIntoDocuments(operations, spec);
138146
const rawRelationships = extractSpecRelationships(operations, spec);
@@ -214,7 +222,7 @@ function parseOperations(spec: OpenApiSpec): readonly ParsedOperation[] {
214222
const operations: ParsedOperation[] = [];
215223
const globalSecurity = spec.security ?? [];
216224

217-
for (const [path, pathItem] of Object.entries(spec.paths ?? {})) {
225+
for (const [path, pathItem] of Object.entries(spec.paths || {})) {
218226
for (const method of ['get', 'post', 'put', 'patch', 'delete', 'head', 'options']) {
219227
const op = (pathItem as Record<string, OperationObject | undefined>)[method];
220228
if (!op) continue;
@@ -484,18 +492,21 @@ function extractSpecRelationships(
484492
}
485493
}
486494

487-
// 4. Deprecated operations → supersedes
495+
// 4. Deprecated operations → constrains
496+
// Without a documented replacement endpoint, a supersedes edge has
497+
// no valid target. Instead, emit a constraint noting the deprecation
498+
// so downstream consumers (agent skills) can flag it.
488499
for (const op of operations) {
489500
if (op.deprecated) {
490501
const tag = op.tags[0] ?? 'default';
491502
relationships.push({
492503
sourceDocPath: `openapi:${tag}`,
493504
sourceSection: `${op.method} ${op.path}`,
494505
targetDocPath: `openapi:${tag}`,
495-
targetSection: `${op.method} ${op.path}`,
496-
type: 'supersedes',
497-
confidence: 0.8,
498-
evidence: `deprecated operation: ${op.operationId}`,
506+
targetSection: `[deprecated]`,
507+
type: 'constrains',
508+
confidence: 1.0,
509+
evidence: `deprecated: ${op.operationId} is marked for removal`,
499510
});
500511
}
501512
}
Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
import { describe, it, expect } from 'vitest';
2+
import { linkSpecAndProse } from './spec-prose-linker.js';
3+
import type { DocumentMetadata } from '../types.js';
4+
5+
function makeDoc(overrides: Partial<DocumentMetadata> & { filePath: string }): DocumentMetadata {
6+
return {
7+
title: 'Untitled',
8+
description: undefined,
9+
frontmatter: {},
10+
blocks: [],
11+
crossRefs: [],
12+
links: [],
13+
wordCount: 0,
14+
headings: [],
15+
...overrides,
16+
};
17+
}
18+
19+
describe('spec-prose linker', () => {
20+
it('links prose to spec by endpoint mention in text', () => {
21+
// Use a title that does NOT match the tag name to isolate endpoint matching
22+
const docs: DocumentMetadata[] = [
23+
makeDoc({
24+
filePath: 'docs/api-integration.md',
25+
title: 'API Integration Guide',
26+
blocks: [{
27+
id: 'b1',
28+
type: 'guide',
29+
textContent: 'Use POST /pet to add a new pet.',
30+
annotations: { audience: 'human' as const },
31+
codeBlocks: [],
32+
sourcePath: 'docs/api-integration.md',
33+
}],
34+
}),
35+
makeDoc({
36+
filePath: 'openapi:pet',
37+
title: 'API — pet',
38+
blocks: [{
39+
id: 'op1',
40+
type: 'reference',
41+
headingText: 'POST /pet',
42+
textContent: 'Add a new pet',
43+
annotations: { type: 'reference' as const, audience: 'agent' as const },
44+
codeBlocks: [],
45+
sourcePath: 'openapi:pet',
46+
}],
47+
}),
48+
];
49+
50+
const links = linkSpecAndProse(docs);
51+
expect(links.length).toBe(1);
52+
expect(links[0]!.sourceDocPath).toBe('docs/api-integration.md');
53+
expect(links[0]!.targetDocPath).toBe('openapi:pet');
54+
expect(links[0]!.type).toBe('composes_with');
55+
expect(links[0]!.evidence).toContain('endpoint mention');
56+
});
57+
58+
it('links prose to spec by title similarity', () => {
59+
const docs: DocumentMetadata[] = [
60+
makeDoc({
61+
filePath: 'docs/payments.md',
62+
title: 'Payments',
63+
blocks: [{
64+
id: 'b1',
65+
type: 'guide',
66+
textContent: 'This guide covers payment processing.',
67+
annotations: {},
68+
codeBlocks: [],
69+
sourcePath: 'docs/payments.md',
70+
}],
71+
}),
72+
makeDoc({
73+
filePath: 'openapi:payments',
74+
title: 'API — payments',
75+
blocks: [],
76+
}),
77+
];
78+
79+
const links = linkSpecAndProse(docs);
80+
expect(links.length).toBe(1);
81+
expect(links[0]!.evidence).toContain('title match');
82+
});
83+
84+
it('returns empty when no specs present', () => {
85+
const docs: DocumentMetadata[] = [
86+
makeDoc({ filePath: 'docs/guide.md', title: 'Guide' }),
87+
];
88+
expect(linkSpecAndProse(docs)).toEqual([]);
89+
});
90+
91+
it('returns empty when no prose present', () => {
92+
const docs: DocumentMetadata[] = [
93+
makeDoc({ filePath: 'openapi:pet', title: 'Pet' }),
94+
];
95+
expect(linkSpecAndProse(docs)).toEqual([]);
96+
});
97+
98+
it('skips openapi:schemas and openapi:security documents', () => {
99+
const docs: DocumentMetadata[] = [
100+
makeDoc({
101+
filePath: 'docs/auth.md',
102+
title: 'Authentication',
103+
blocks: [{
104+
id: 'b1',
105+
type: 'guide',
106+
textContent: 'Configure security for your API.',
107+
annotations: {},
108+
codeBlocks: [],
109+
sourcePath: 'docs/auth.md',
110+
}],
111+
}),
112+
makeDoc({ filePath: 'openapi:schemas', title: 'Schemas' }),
113+
makeDoc({ filePath: 'openapi:security', title: 'Security' }),
114+
];
115+
116+
const links = linkSpecAndProse(docs);
117+
expect(links).toEqual([]);
118+
});
119+
120+
it('links multiple prose docs to different spec tags', () => {
121+
const docs: DocumentMetadata[] = [
122+
makeDoc({
123+
filePath: 'docs/pet-guide.md',
124+
title: 'Pet Guide',
125+
blocks: [{
126+
id: 'b1',
127+
type: 'guide',
128+
textContent: 'Use GET /pet/findByStatus to search.',
129+
annotations: {},
130+
codeBlocks: [],
131+
sourcePath: 'docs/pet-guide.md',
132+
}],
133+
}),
134+
makeDoc({
135+
filePath: 'docs/store-guide.md',
136+
title: 'Store Guide',
137+
blocks: [{
138+
id: 'b2',
139+
type: 'guide',
140+
textContent: 'Check inventory with GET /store/inventory.',
141+
annotations: {},
142+
codeBlocks: [],
143+
sourcePath: 'docs/store-guide.md',
144+
}],
145+
}),
146+
makeDoc({
147+
filePath: 'openapi:pet',
148+
title: 'API — pet',
149+
blocks: [{
150+
id: 'op1',
151+
type: 'reference',
152+
headingText: 'GET /pet/findByStatus',
153+
textContent: 'Find pets by status',
154+
annotations: { type: 'reference' as const },
155+
codeBlocks: [],
156+
sourcePath: 'openapi:pet',
157+
}],
158+
}),
159+
makeDoc({
160+
filePath: 'openapi:store',
161+
title: 'API — store',
162+
blocks: [{
163+
id: 'op2',
164+
type: 'reference',
165+
headingText: 'GET /store/inventory',
166+
textContent: 'Returns pet inventories',
167+
annotations: { type: 'reference' as const },
168+
codeBlocks: [],
169+
sourcePath: 'openapi:store',
170+
}],
171+
}),
172+
];
173+
174+
const links = linkSpecAndProse(docs);
175+
expect(links.length).toBe(2);
176+
177+
const petLink = links.find((l) => l.sourceDocPath === 'docs/pet-guide.md');
178+
const storeLink = links.find((l) => l.sourceDocPath === 'docs/store-guide.md');
179+
180+
expect(petLink?.targetDocPath).toBe('openapi:pet');
181+
expect(storeLink?.targetDocPath).toBe('openapi:store');
182+
});
183+
});

0 commit comments

Comments
 (0)