fix: singular/plural inflection in prose entity resolution

salmad3 · claude · salmad3 · commit 09f0cdb3428f · 2026-03-26T11:40:34.000-04:00
Prose uses singular forms ("payment", "customer") while OpenAPI
tags use plural ("payments", "customers"). One-character inflection
check resolves the mismatch, improving extraction from 4/10 to
10/10 conditional rules on the payment API test corpus.

Co-Authored-By: Claude &lt;noreply@anthropic.com&gt;
diff --git a/packages/agent-metadata/src/extractors/prose-knowledge.ts b/packages/agent-metadata/src/extractors/prose-knowledge.ts
@@ -154,10 +154,18 @@ function extractRules(
   for (const sentence of sentences) {
     const lower = sentence.toLowerCase();
 
-    // Find which spec entities are mentioned in this sentence
-    const mentionedTags = vocabulary.tags.filter((t) =>
-      lower.includes(t.toLowerCase()),
-    );
+    // Find which spec entities are mentioned in this sentence.
+    // Match both exact tag names and their singular/plural variants
+    // ("payment" matches tag "payments" and vice versa).
+    const mentionedTags = vocabulary.tags.filter((t) => {
+      const tl = t.toLowerCase();
+      if (lower.includes(tl)) return true;
+      // Singular form: "payments" → "payment"
+      if (tl.endsWith('s') && lower.includes(tl.slice(0, -1))) return true;
+      // Plural form: "webhook" → "webhooks"
+      if (!tl.endsWith('s') && lower.includes(tl + 's')) return true;
+      return false;
+    });
     const mentionedEndpoints = vocabulary.endpoints.filter((e) =>
       lower.includes(e.toLowerCase()),
     );
@@ -167,10 +175,13 @@ function extractRules(
 
     // Workflow ordering: "before X, you must Y" / "X before Y" / "followed by"
     const orderingPatterns = [
-      /before\s+(?:processing|creating|calling|using)\s+(\w+)/i,
-      /must\s+(?:first|create|attach|configure)\s+(\w+).*?before/i,
-      /followed\s+by\s+(\w+)/i,
-      /(?:then|after)\s+(?:call|use|create)\s+((?:POST|GET|PUT|DELETE)\s+\/\S+)/i,
+      /before\s+(?:processing|creating|calling|using|confirming)\s+/i,
+      /must\s+(?:first|create|attach|configure)\s+/i,
+      /followed\s+by\s+/i,
+      /(?:then|after)\s+(?:call|use|create)\s+/i,
+      /(?:you must|must first)\s+.*?\s+before\b/i,
+      /the\s+(?:payment|subscription)\s+flow\s+follows/i,
+      /strict\s+sequence/i,
     ];
 
     for (const pattern of orderingPatterns) {
@@ -191,7 +202,7 @@ function extractRules(
       }
     }
 
-    // Mutual exclusion: "do not X when using Y" / "do not mix"
+    // Mutual exclusion / prohibitions: "do not X when using Y" / "do not mix"
     if (/do\s+not|don't|should\s+not|never/i.test(lower)) {
       if (mentionedTags.length >= 2 || (mentionedTags.length >= 1 && mentionedEndpoints.length >= 1)) {
         const entities = resolveEntitiesFromSentence(sentence, vocabulary);
@@ -205,6 +216,16 @@ function extractRules(
           });
         }
       }
+      // Single-entity prohibition: "do not attempt to X" is a constraint
+      if (mentionedTags.length === 1) {
+        rules.push({
+          type: 'constrains',
+          sourceEntity: '[prohibition]',
+          targetEntity: mentionedTags[0]!,
+          evidence: `constraint: "${sentence.trim().slice(0, 120)}"`,
+          confidence: 0.8,
+        });
+      }
     }
 
     // Time constraints: "within X minutes/hours/days"
@@ -263,7 +284,7 @@ function extractRules(
     }
 
     // Security requirements: "verify" / "authenticate" / "signature"
-    if (/verify|authenticate|signature|authorization/i.test(lower) && /required|must|always/i.test(lower)) {
+    if (/verify|authenticate|signature|authorization/i.test(lower) && /required|must|always|before\s+processing/i.test(lower)) {
       const entity = mentionedTags[0] ?? mentionedEndpoints[0] ?? 'unknown';
       rules.push({
         type: 'depends_on',
@@ -298,9 +319,12 @@ function resolveEntitiesFromSentence(
   const lower = sentence.toLowerCase();
   const found: Array<{ entity: string; position: number }> = [];
 
-  // Find tags mentioned in order of appearance
+  // Find tags mentioned in order of appearance (with singular/plural)
   for (const tag of vocabulary.tags) {
-    const idx = lower.indexOf(tag.toLowerCase());
+    const tl = tag.toLowerCase();
+    let idx = lower.indexOf(tl);
+    if (idx === -1 && tl.endsWith('s')) idx = lower.indexOf(tl.slice(0, -1));
+    if (idx === -1 && !tl.endsWith('s')) idx = lower.indexOf(tl + 's');
     if (idx !== -1) {
       found.push({ entity: tag, position: idx });
     }