From 27288549ed3e6f6b7761a73615b466e05e7f1f53 Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Mon, 25 May 2026 12:25:40 +0200 Subject: [PATCH] feat(title-normalization): broader leading bracket + external-ID strip MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Generalise the leading bracketed-tag regex in the template (and the matching example cascade in security-cve-allocate/SKILL.md) so it catches any square- or round-bracketed leading tag whose body contains the word "security" or "important", case-insensitive — e.g. (Security Issue), [ Security Vulnerability ], [IMPORTANT], (Important - please read), in addition to the existing four [Security X] forms. Add a new pattern #9 to strip trailing IDs from known external bug-bounty / disclosure trackers — (ZDRES-NNN), [HUNTR-NNN], (GHSL-YYYY-NNN) — in either bracket style. Extend the alternation per project as new reporter brands surface (SNYK-, BDSA-, etc.). Real motivating example: an airflow-s tracker whose subject was "[ Security Report ] LDAP Filter Injection in FAB Auth Manager _search_ldap reachable via /auth/token (ZDRES-223)" — the leading prefix matched the old pattern but no pattern covered the trailing (ZDRES-223). Verified the new cascade collapses this to "LDAP Filter Injection in FAB Auth Manager _search_ldap reachable via /auth/token". Co-Authored-By: Claude Opus 4.7 (1M context) --- .claude/skills/security-cve-allocate/SKILL.md | 7 ++++++- projects/_template/title-normalization.md | 20 +++++++++++++++---- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/.claude/skills/security-cve-allocate/SKILL.md b/.claude/skills/security-cve-allocate/SKILL.md index aeefa869..28c7c7fd 100644 --- a/.claude/skills/security-cve-allocate/SKILL.md +++ b/.claude/skills/security-cve-allocate/SKILL.md @@ -294,7 +294,9 @@ t = subprocess.check_output( ).strip() patterns_leading = [ - r"^[ \t]*\[ ?Security (?:Report|Issue|Vulnerability|Bug) ?\][ \t:|\-–—]*", + # Any [...] or (...) leading tag whose body contains "security" + # or "important" (case-insensitive). + r"^[ \t]*(?:\[[^\]]*\b(?:Security|Important)\b[^\]]*\]|\([^)]*\b(?:Security|Important)\b[^)]*\))[ \t:|\-–—]*", r"^[ \t]*Security (?:Report|Issue|Vulnerability|Bug)[ \t:|\-–—]+", r"^[ \t]*Apache[ \t]+Airflow(?:[ \t]+v?\d+(?:\.\d+)*(?:\.x)?)?[ \t]*[:|\-–—]?[ \t]*", r"^[ \t]*Airflow(?:[ \t]+v?\d+(?:\.\d+)*(?:\.x)?)?[ \t]*[:|\-–—][ \t]*", @@ -303,6 +305,9 @@ patterns_trailing = [ r"[ \t]+in[ \t]+(?:Apache[ \t]+)?Airflow[ \t]*\.?$", r"[ \t]*\((?:Apache[ \t]+)?Airflow(?:[ \t]+v?\d+(?:\.\d+)*(?:\.x)?)?\)\.?[ \t]*$", r"[ \t]*\(GHSA-[\w-]+\)\.?[ \t]*$", + # Trailing IDs from known external trackers, square or round + # brackets. Extend the alternation per project. + r"[ \t]*(?:\[(?:ZDRES|HUNTR|GHSL)-[\w-]+\]|\((?:ZDRES|HUNTR|GHSL)-[\w-]+\))\.?[ \t]*$", r"[ \t]*\([^)]*split from #\d+[^)]*\)\.?[ \t]*$", ] diff --git a/projects/_template/title-normalization.md b/projects/_template/title-normalization.md index 13432ea7..c1d63120 100644 --- a/projects/_template/title-normalization.md +++ b/projects/_template/title-normalization.md @@ -34,7 +34,13 @@ stripping step. Otherwise, list the regex cascade below. TODO: one rule per bullet, applied in order. Typical patterns: -1. Leading bracketed tags — e.g. `^[ \t]*\[ ?Security (Report|Issue|Vulnerability|Bug) ?\][ \t:|\-–—]*` +1. Leading bracketed `security` / `important` tag — + `^[ \t]*(?:\[[^\]]*\b(?:Security|Important)\b[^\]]*\]|\([^)]*\b(?:Security|Important)\b[^)]*\))[ \t:|\-–—]*` + Matches any square- or round-bracketed leading tag whose body + contains the word *security* or *important* (case-insensitive) — + e.g. `[Security Report]`, `(Security Issue)`, `[ Security + Vulnerability ]`, `[IMPORTANT]`, `(Important — please read)`. + Followed by an optional separator. Apply with `re.IGNORECASE`. 2. Leading plain tags — `^[ \t]*Security (Report|Issue|Vulnerability|Bug)[ \t:|\-–—]+` 3. Leading `` (optional version, optional separator) — TODO 4. Leading bare product name (optional version) — TODO @@ -43,10 +49,16 @@ TODO: one rule per bullet, applied in order. Typical patterns: 6. Trailing `in ` — TODO 7. Trailing bare version parens — TODO 8. Trailing GHSA ID paren — `[ \t]*\(GHSA-[\w-]+\)\.?[ \t]*$` -9. Trailing *"split from #NNN"* paren — `[ \t]*\([^)]*split from #\d+[^)]*\)\.?[ \t]*$` -10. Trailing trivia — strip trailing whitespace, trailing `.`, +9. Trailing known external-tracker IDs (square or round brackets) — + `[ \t]*(?:\[(?:ZDRES|HUNTR|GHSL)-[\w-]+\]|\((?:ZDRES|HUNTR|GHSL)-[\w-]+\))\.?[ \t]*$` + Strips trailing IDs from known external trackers — `(ZDRES-223)`, + `[HUNTR-456]`, `(GHSL-2024-001)` — in either bracket style. Extend + the alternation per project when a new reporter brand surfaces + (e.g. `SNYK-…`, `BDSA-…`, internal bug-bounty platforms). +10. Trailing *"split from #NNN"* paren — `[ \t]*\([^)]*split from #\d+[^)]*\)\.?[ \t]*$` +11. Trailing trivia — strip trailing whitespace, trailing `.`, collapse internal whitespace. -11. Capitalise — upper-case the first letter; leave the rest alone +12. Capitalise — upper-case the first letter; leave the rest alone so acronyms stay intact. ## Implementation recipe