Skip to content

Commit 113f598

Browse files
committed
feat(core): implement robust recursion engine for nested structures
- Added private recursive() engine for safe regex unrolling (depth 2-10). - Introduced Delimiter contract to enforce structural symmetry (Dyck languages). - Implemented NestingAssembler to hide recursion complexity and provide a safe DSL. - Added comprehensive SiftNestingTest suite including JSON parsing and exception handling.
1 parent ff33320 commit 113f598

File tree

4 files changed

+458
-0
lines changed

4 files changed

+458
-0
lines changed
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
/*
2+
* Copyright 2026 Mirko Dimartino
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package com.mirkoddd.sift.core;
17+
18+
import java.util.Objects;
19+
20+
/**
21+
* Represents a symmetric pair of delimiters used to define boundaries for nested structures.
22+
* <p>
23+
* In formal language theory (specifically Dyck languages), balanced structures require a strict
24+
* 1-to-1 relationship between an opening symbol and a closing symbol. This class enforces that
25+
* contract, ensuring that developers cannot accidentally create asymmetric recursive patterns
26+
* (e.g., opening with a parenthesis but closing with a square bracket).
27+
* </p>
28+
* <p>
29+
* Standard symmetric pairs are provided as immutable constants. For domain-specific parsers,
30+
* custom boundaries can be generated via the {@link #custom(String, String)} factory method.
31+
* </p>
32+
*/
33+
public final class Delimiter {
34+
35+
private final String open;
36+
private final String close;
37+
38+
/**
39+
* Standard parentheses pair: {@code (} and {@code )}.
40+
*/
41+
public static final Delimiter PARENTHESES = new Delimiter("(", ")");
42+
43+
/**
44+
* Standard square brackets pair: {@code [} and {@code ]}.
45+
*/
46+
public static final Delimiter BRACKETS = new Delimiter("[", "]");
47+
48+
/**
49+
* Standard curly braces pair: {@code \{} and {@code \}}.
50+
*/
51+
public static final Delimiter BRACES = new Delimiter("{", "}");
52+
53+
/**
54+
* Standard angle brackets (chevrons) pair: {@code <} and {@code >}.
55+
*/
56+
public static final Delimiter CHEVRONS = new Delimiter("<", ">");
57+
58+
/**
59+
* Internal constructor to enforce symmetry.
60+
*
61+
* @param open The exact literal string that opens the structure.
62+
* @param close The exact literal string that closes the structure.
63+
*/
64+
private Delimiter(String open, String close) {
65+
this.open = open;
66+
this.close = close;
67+
}
68+
69+
/**
70+
* Factory method to create a custom symmetric pair for domain-specific nested structures
71+
* (e.g., HTML tags, multi-character comment blocks like {@code /*} and {@code *\/}).
72+
*
73+
* @param open The exact literal string that opens the structure. Cannot be null.
74+
* @param close The exact literal string that closes the structure. Cannot be null.
75+
* @return A new {@link Delimiter} instance representing the symmetric contract.
76+
* @throws NullPointerException if either parameter is null.
77+
*/
78+
public static Delimiter custom(String open, String close) {
79+
return new Delimiter(
80+
Objects.requireNonNull(open, "Opening delimiter cannot be null."),
81+
Objects.requireNonNull(close, "Closing delimiter cannot be null.")
82+
);
83+
}
84+
85+
/**
86+
* Retrieves the opening boundary literal.
87+
*
88+
* @return The literal string that triggers a new nesting level.
89+
*/
90+
public String open() {
91+
return open;
92+
}
93+
94+
/**
95+
* Retrieves the closing boundary literal.
96+
*
97+
* @return The literal string that terminates the current nesting level.
98+
*/
99+
public String close() {
100+
return close;
101+
}
102+
}
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
/*
2+
* Copyright 2026 Mirko Dimartino
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package com.mirkoddd.sift.core;
17+
18+
import com.mirkoddd.sift.core.dsl.Fragment;
19+
import com.mirkoddd.sift.core.dsl.SiftPattern;
20+
import java.util.Objects;
21+
22+
import static com.mirkoddd.sift.core.SiftPatterns.anyOf;
23+
import static com.mirkoddd.sift.core.SiftPatterns.literal;
24+
import static com.mirkoddd.sift.core.SiftPatterns.recursive;
25+
26+
/**
27+
* A fluent builder designed to safely construct recursive nested structures.
28+
* <p>
29+
* It enforces the use of a symmetric {@link Delimiter} pair and handles the
30+
* complex injection of the 'self' recursive reference, shielding the developer
31+
* from logic errors and unbalanced grammar definitions.
32+
* </p>
33+
*/
34+
public final class NestingAssembler {
35+
private final int depth;
36+
private Delimiter pair;
37+
38+
NestingAssembler(int depth) {
39+
this.depth = depth;
40+
}
41+
42+
/**
43+
* Defines the symmetric boundaries for this nested structure.
44+
*
45+
* @param pair The {@link Delimiter} pair (e.g., {@link Delimiter#PARENTHESES}).
46+
* @return This builder instance for method chaining.
47+
* @throws NullPointerException if the pair is null.
48+
*/
49+
public NestingAssembler using(Delimiter pair) {
50+
this.pair = Objects.requireNonNull(pair, "Delimiter pair cannot be null.");
51+
return this;
52+
}
53+
54+
/**
55+
* Finalizes the nested structure by defining what content is allowed inside.
56+
* <p>
57+
* The builder automatically wraps the provided content pattern (and the hidden
58+
* recursive call to itself) within the specified opening and closing delimiters.
59+
* Both delimiters are strictly treated as literal strings to prevent regex injection.
60+
* </p>
61+
*
62+
* @param content The pattern representing the allowed content inside the structure.
63+
* @return A deeply nested fragment capable of parsing balanced structures.
64+
* @throws NullPointerException if the content is null.
65+
* @throws IllegalStateException if {@code using()} was not called prior to this method.
66+
*/
67+
public SiftPattern<Fragment> containing(SiftPattern<Fragment> content) {
68+
if (this.pair == null) {
69+
throw new IllegalStateException(
70+
"A Delimiter pair must be specified using .using() before defining content."
71+
);
72+
}
73+
Objects.requireNonNull(content, "Content pattern cannot be null.");
74+
75+
// THE SECRET SAUCE:
76+
// We invoke the private unrolling engine. The 'self' parameter representing
77+
// the recursive call is injected as an alternative to the base content,
78+
// strictly bounded by the literal opening and closing delimiters.
79+
return recursive(depth, self ->
80+
Sift.fromAnywhere()
81+
.of(literal(pair.open()))
82+
.then()
83+
.zeroOrMore().of(anyOf(content, self))
84+
.followedBy(literal(pair.close()))
85+
);
86+
}
87+
}
88+

sift-core/src/main/java/com/mirkoddd/sift/core/SiftPatterns.java

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import com.mirkoddd.sift.core.dsl.SiftPattern;
2323

2424
import java.util.Objects;
25+
import java.util.function.Function;
2526
import java.util.function.Supplier;
2627

2728
/**
@@ -343,6 +344,69 @@ public static SiftPattern<Fragment> anythingBut(String chars) {
343344
});
344345
}
345346

347+
/**
348+
* Initiates the creation of a safely nested recursive pattern.
349+
* <p>
350+
* This factory method returns a fluent {@link NestingAssembler} that guarantees
351+
* structural symmetry by forcing the use of a valid {@link Delimiter} pair.
352+
* </p>
353+
*
354+
* @param depth The maximum nesting depth to unroll (must be between 2 and 10).
355+
* @return A {@link NestingAssembler} to configure the delimiter and content.
356+
*/
357+
public static NestingAssembler nesting(int depth) {
358+
return new NestingAssembler(depth);
359+
}
360+
361+
/**
362+
* Internal unrolling engine that emulates Regular Expression recursion.
363+
* <p>
364+
* Since Java's native {@link java.util.regex.Pattern} lacks support for true
365+
* PCRE-style recursion (like {@code (?R)}), this method achieves a similar result
366+
* by functionally unrolling the pattern definition from the inside out.
367+
* </p>
368+
* <p>
369+
* <b>JVM Safety Bounds:</b> The depth is strictly clamped between 2 and 10.
370+
* Compiling deeply nested regex strings grows exponentially and will quickly trigger
371+
* a {@code StackOverflowError} within the JVM's regex compiler. A depth of 10 is
372+
* mathematically vast for real-world structured data.
373+
* </p>
374+
*
375+
* @param maxDepth The maximum number of nesting levels to unroll (must be between 2 and 10).
376+
* @param definition A functional block where the parameter represents the recursive
377+
* call to the pattern itself (the 'self' reference).
378+
* @return A deeply nested fragment capable of parsing recursive structures up to {@code maxDepth}.
379+
* @throws IllegalArgumentException if {@code maxDepth} is outside the safe bounds [2, 10].
380+
* @throws NullPointerException if the {@code definition} function is null.
381+
*/
382+
static SiftPattern<Fragment> recursive(
383+
int maxDepth,
384+
Function<SiftPattern<Fragment>, SiftPattern<Fragment>> definition) {
385+
386+
if (maxDepth < 2 || maxDepth > 10) {
387+
throw new IllegalArgumentException(
388+
"Recursion depth must be strictly between 2 and 10 to ensure JVM stability and prevent StackOverflowErrors."
389+
);
390+
}
391+
Objects.requireNonNull(definition, "Recursive definition cannot be null.");
392+
393+
// BASE CASE (The bottom of the Matryoshka):
394+
// If the parsing engine attempts to go deeper than maxDepth, it hits this empty negative lookahead.
395+
// In Regex mathematics, (?!) is a logical contradiction that is guaranteed to ALWAYS fail.
396+
// This ensures the regex stops safely instead of breaking unexpectedly.
397+
SiftPattern<Fragment> current = memoize(() -> "(?!)");
398+
399+
// UNROLLING (Inside-Out Injection):
400+
// We build the nested structure by applying the function repeatedly.
401+
// At each iteration, the previously built layer is injected as the 'self' parameter
402+
// for the next layer up, effectively unrolling the recursion.
403+
for (int i = 0; i < maxDepth; i++) {
404+
current = definition.apply(current);
405+
}
406+
407+
return current;
408+
}
409+
346410
/**
347411
* Memoizes a pattern generation using a supplier, making it generic so it can
348412
* safely produce both Fragments and Assertions without unchecked casts.

0 commit comments

Comments
 (0)