Skip to content

Commit b6a9648

Browse files
committed
fix(core): seal group collision loopholes and harden varargs validation
Finalized architectural safety before v1.6.0 release: - Implemented stealth metadata extraction for nested SiftPattern injections in addPattern and addNamedCapture to prevent group name collisions. - Added pre-emptive fail-fast null validation for followedBy varargs to prevent corrupted builder states. - Achieved 100% Jacoco coverage with extensive nested group collision and happy-path scenarios.
1 parent 8bee547 commit b6a9648

File tree

4 files changed

+136
-26
lines changed

4 files changed

+136
-26
lines changed

sift-core/src/main/java/com/mirkoddd/sift/core/PatternAssembler.java

Lines changed: 38 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,10 @@ class PatternAssembler {
4444
mainPattern.append(RegexSyntax.GROUP_CLOSE);
4545
}
4646

47+
Set<String> getRegisteredGroups() {
48+
return registeredGroups;
49+
}
50+
4751
void setQuantifier(String quantifier) {
4852
this.currentQuantifier = quantifier;
4953
}
@@ -59,28 +63,39 @@ void addClassRange(String range) {
5963
}
6064

6165
void addClassInclusion(char c, char... additionalExtras) {
62-
RegexEscaper.escapeInsideBrackets(c, pendingClass);
63-
for (char extra : additionalExtras) {
64-
RegexEscaper.escapeInsideBrackets(extra, pendingClass);
65-
}
66+
RegexEscaper.escapeInsideBrackets(c, pendingClass);
67+
for (char extra : additionalExtras) {
68+
RegexEscaper.escapeInsideBrackets(extra, pendingClass);
69+
}
6670
}
6771

6872
void addClassExclusion(char excluded, char... additionalExcluded) {
69-
pendingClass.append(RegexSyntax.CLASS_INTERSECTION_NEGATION);
70-
RegexEscaper.escapeInsideBrackets(excluded, pendingClass);
71-
for (char c : additionalExcluded) {
72-
RegexEscaper.escapeInsideBrackets(c, pendingClass);
73-
}
74-
pendingClass.append(RegexSyntax.CLASS_CLOSE);
73+
pendingClass.append(RegexSyntax.CLASS_INTERSECTION_NEGATION);
74+
RegexEscaper.escapeInsideBrackets(excluded, pendingClass);
75+
for (char c : additionalExcluded) {
76+
RegexEscaper.escapeInsideBrackets(c, pendingClass);
77+
}
78+
pendingClass.append(RegexSyntax.CLASS_CLOSE);
7579
}
7680

7781
void addNamedCapture(NamedCapture group) {
7882
boolean isAdded = registeredGroups.add(group.getName());
79-
if (!isAdded){
83+
if (!isAdded) {
8084
throw new IllegalStateException("A capturing group with the name '" + group.getName() +
8185
"' has already been defined. Each group name must be unique.");
8286
}
8387

88+
if (group.getPattern() instanceof SiftBuilder) {
89+
SiftBuilder subBuilder = (SiftBuilder) group.getPattern();
90+
for (String incomingGroup : subBuilder.getRegisteredGroupNames()) {
91+
if (!registeredGroups.add(incomingGroup)) {
92+
throw new IllegalStateException("Collision detected: The pattern inside capturing group '" +
93+
group.getName() + "' contains a nested group named '" + incomingGroup +
94+
"' which has already been defined in the current builder.");
95+
}
96+
}
97+
}
98+
8499
mainPattern.append(RegexSyntax.NAMED_GROUP_OPEN)
85100
.append(group.getName())
86101
.append(RegexSyntax.NAMED_GROUP_NAME_CLOSE)
@@ -116,6 +131,18 @@ void addCharacter(char literal) {
116131

117132
void addPattern(SiftPattern pattern) {
118133
flush();
134+
135+
if (pattern instanceof SiftBuilder) {
136+
SiftBuilder subBuilder = (SiftBuilder) pattern;
137+
for (String incomingGroup : subBuilder.getRegisteredGroupNames()) {
138+
boolean isAdded = registeredGroups.add(incomingGroup);
139+
if (!isAdded) {
140+
throw new IllegalStateException("Collision detected: The sub-pattern contains a capturing group named '" +
141+
incomingGroup + "' which has already been defined in the current pattern.");
142+
}
143+
}
144+
}
145+
119146
if (!currentQuantifier.isEmpty()) {
120147
mainPattern.append(RegexSyntax.NON_CAPTURING_GROUP_OPEN)
121148
.append(pattern.shake())

sift-core/src/main/java/com/mirkoddd/sift/core/SiftBuilder.java

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import com.mirkoddd.sift.core.dsl.*;
1919

2020
import java.util.Objects;
21+
import java.util.Set;
2122

2223
/**
2324
* Internal implementation of the State Machine.
@@ -52,13 +53,12 @@ public SiftBuilder anchorStart() {
5253
// QUANTIFIERS
5354
// ===================================================================================
5455

55-
@SuppressWarnings("unchecked")
5656
@Override
5757
public TypeStep<ConnectorStep, CharacterClassConnectorStep> exactly(int n) {
5858
if (n < 0) throw new IllegalArgumentException("Quantity cannot be negative: " + n);
5959
assembler.setQuantifier((n == 1) ? RegexSyntax.EMPTY :
6060
RegexSyntax.QUANTIFIER_OPEN + n + RegexSyntax.QUANTIFIER_CLOSE);
61-
return (TypeStep<ConnectorStep, CharacterClassConnectorStep>) (Object) this;
61+
return this;
6262
}
6363

6464
@SuppressWarnings("unchecked")
@@ -306,6 +306,10 @@ public ConnectorStep followedBy(SiftPattern pattern, SiftPattern... additionalPa
306306
Objects.requireNonNull(pattern, "First SiftPattern cannot be null");
307307
Objects.requireNonNull(additionalPatterns, "Additional SiftPatterns array cannot be null");
308308

309+
for (int i = 0; i < additionalPatterns.length; i++) {
310+
Objects.requireNonNull(additionalPatterns[i], "SiftPattern at index " + i + " cannot be null");
311+
}
312+
309313
ConnectorStep current = this.then().exactly(1).pattern(pattern);
310314

311315
for (SiftPattern p : additionalPatterns) {
@@ -355,4 +359,8 @@ public SiftPattern andNothingElse() {
355359
public String shake() {
356360
return assembler.build();
357361
}
362+
363+
Set<String> getRegisteredGroupNames() {
364+
return assembler.getRegisteredGroups();
365+
}
358366
}

sift-core/src/main/java/com/mirkoddd/sift/core/dsl/SiftPattern.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@
1515
*/
1616
package com.mirkoddd.sift.core.dsl;
1717

18+
import java.util.Collections;
19+
import java.util.Set;
20+
1821
/**
1922
* Represents a component that can be converted into a valid Regex string.
2023
* <p>

sift-core/src/test/java/com/mirkoddd/sift/core/SiftTest.java

Lines changed: 85 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,17 @@ void throwOnDuplicateGroupNames() {
384384
assertTrue(exception.getMessage().contains(duplicateName));
385385
}
386386

387+
@Test
388+
@DisplayName("Should throw NullPointerException when a null pattern is passed in followedBy varargs")
389+
void followedByNullVarargs() {
390+
SiftPattern validPattern = SiftPatterns.literal("valid");
391+
392+
NullPointerException exception = assertThrows(NullPointerException.class, () ->
393+
Sift.fromAnywhere().letters().followedBy(validPattern, validPattern, null, validPattern)
394+
);
395+
396+
assertTrue(exception.getMessage().contains("cannot be null"));
397+
}
387398
}
388399

389400
@Nested
@@ -1037,20 +1048,81 @@ void negativeLookbehindTest() {
10371048
assertRegexMatches(regex, "tomcat");
10381049
assertRegexMatches(regex, "cat");
10391050
assertRegexDoesNotMatch(regex, "supercat");
1051+
}
1052+
}
10401053

1041-
String rege = fromStart()
1042-
.exactly(1).unicodeLettersUppercaseOnly() // Must start with an uppercase letter
1043-
.then()
1044-
.between(3, 15).unicodeWordCharacters().withoutBacktracking() // Secure against ReDoS
1045-
.then()
1046-
.optional().digits() // May end with an ASCII number
1047-
.andNothingElse()
1048-
.shake();
1054+
@Test
1055+
@DisplayName("Should detect group name collisions when injecting nested patterns")
1056+
void nestedPatternGroupCollision() {
1057+
SiftPattern nestedModule = Sift.fromAnywhere()
1058+
.namedCapture(SiftPatterns.capture("id", literal("foo")))
1059+
.andNothingElse();
1060+
1061+
IllegalStateException exception = assertThrows(IllegalStateException.class, () ->
1062+
Sift.fromAnywhere()
1063+
.namedCapture(SiftPatterns.capture("id", literal("bar")))
1064+
.then()
1065+
.pattern(nestedModule) // FAIL!
1066+
.shake()
1067+
);
1068+
1069+
assertTrue(exception.getMessage().contains("Collision detected"));
1070+
assertTrue(exception.getMessage().contains("id"));
1071+
}
10491072

1050-
// Result from README: ^\p{Lu}[\p{L}\p{Nd}_]{3,15}+[0-9]?$
1051-
// Real result from test: ^[\p{Lu}][\p{L}\p{Nd}_]{3,15}+[0-9]?$
1052-
// Result: ^[\p{Lu}][\p{L}\p{Nd}_]{3,15}+[0-9]?$
1053-
System.out.println(rege);
1054-
}
1073+
@Test
1074+
@DisplayName("Should successfully merge nested patterns with non-colliding groups")
1075+
void nestedPatternGroupMergeSuccess() {
1076+
SiftPattern nestedModule = Sift.fromAnywhere()
1077+
.namedCapture(SiftPatterns.capture("nestedId", fromAnywhere().oneOrMore().digits()))
1078+
.andNothingElse();
1079+
1080+
String regex = Sift.fromAnywhere()
1081+
.namedCapture(SiftPatterns.capture("mainId", fromAnywhere().oneOrMore().letters()))
1082+
.then()
1083+
.pattern(nestedModule)
1084+
.shake();
1085+
1086+
assertEquals("(?<mainId>[a-zA-Z]+)(?<nestedId>[0-9]+)$", regex);
1087+
}
1088+
1089+
@Test
1090+
@DisplayName("Should detect collisions when a NamedCapture contains nested groups")
1091+
void namedCaptureNestedGroupCollision() {
1092+
SiftPattern innerPattern = Sift.fromAnywhere()
1093+
.namedCapture(SiftPatterns.capture("sharedId", Sift.fromAnywhere().oneOrMore().digits()))
1094+
.andNothingElse();
1095+
1096+
NamedCapture wrapperGroup = SiftPatterns.capture("wrapper", innerPattern);
1097+
1098+
IllegalStateException exception = assertThrows(IllegalStateException.class, () ->
1099+
Sift.fromAnywhere()
1100+
.namedCapture(SiftPatterns.capture("sharedId", Sift.fromAnywhere().oneOrMore().letters()))
1101+
.then()
1102+
.namedCapture(wrapperGroup) // FAIL!
1103+
.shake()
1104+
);
1105+
1106+
assertTrue(exception.getMessage().contains("Collision detected"));
1107+
assertTrue(exception.getMessage().contains("wrapper"));
1108+
assertTrue(exception.getMessage().contains("sharedId"));
1109+
}
1110+
1111+
@Test
1112+
@DisplayName("Should successfully merge NamedCapture containing non-colliding nested groups")
1113+
void namedCaptureNestedGroupMergeSuccess() {
1114+
SiftPattern innerPattern = Sift.fromAnywhere()
1115+
.namedCapture(SiftPatterns.capture("innerId", Sift.fromAnywhere().oneOrMore().digits()))
1116+
.andNothingElse();
1117+
1118+
NamedCapture wrapperGroup = SiftPatterns.capture("wrapper", innerPattern);
1119+
1120+
String regex = Sift.fromAnywhere()
1121+
.namedCapture(SiftPatterns.capture("mainId", Sift.fromAnywhere().oneOrMore().letters()))
1122+
.then()
1123+
.namedCapture(wrapperGroup)
1124+
.shake();
1125+
1126+
assertEquals("(?<mainId>[a-zA-Z]+)(?<wrapper>(?<innerId>[0-9]+)$)", regex);
10551127
}
10561128
}

0 commit comments

Comments
 (0)