From c86372785c6d32fa4f5c9a7adf5de4c70e79199a Mon Sep 17 00:00:00 2001 From: Klemen Tusar Date: Fri, 22 May 2026 21:09:51 +0100 Subject: [PATCH 01/14] :bug: align decode list_limit with qs arrayLimit semantics --- src/qs_codec/decode.py | 33 +++++++++++++++++++++----------- tests/comparison/test_cases.json | 10 +++++++++- tests/unit/decode_test.py | 28 ++++++++++++++++++++++++--- 3 files changed, 56 insertions(+), 15 deletions(-) diff --git a/src/qs_codec/decode.py b/src/qs_codec/decode.py index a462e7d..a826a75 100644 --- a/src/qs_codec/decode.py +++ b/src/qs_codec/decode.py @@ -31,6 +31,10 @@ from .utils.utils import Utils +def _list_limit_exceeded_message(limit: int) -> str: + return f"List limit exceeded: Only {limit} element{'' if limit == 1 else 's'} allowed in a list." + + def decode( value: t.Optional[t.Union[str, Mapping[str, t.Any]]], options: t.Optional[DecodeOptions] = None, @@ -243,16 +247,14 @@ def _parse_array_value(value: t.Any, options: DecodeOptions, current_list_length """ if isinstance(value, str) and value and options.comma and "," in value: split_val: t.List[str] = value.split(",") - if options.raise_on_limit_exceeded and len(split_val) > options.list_limit: - raise ValueError( - f"List limit exceeded: Only {options.list_limit} element{'' if options.list_limit == 1 else 's'} allowed in a list." - ) + if len(split_val) > options.list_limit: + if options.raise_on_limit_exceeded: + raise ValueError(_list_limit_exceeded_message(options.list_limit)) + return OverflowDict({str(i): item for i, item in enumerate(split_val)}) return split_val if options.raise_on_limit_exceeded and current_list_length >= options.list_limit: - raise ValueError( - f"List limit exceeded: Only {options.list_limit} element{'' if options.list_limit == 1 else 's'} allowed in a list." - ) + raise ValueError(_list_limit_exceeded_message(options.list_limit)) return value @@ -378,7 +380,12 @@ def _parse_query_string_values( options, len(obj[key]) if key in obj and isinstance(obj[key], (list, tuple)) else 0, ) - if isinstance(parsed_value, (list, tuple)): + if Utils.is_overflow(parsed_value): + parsed_overflow = t.cast(OverflowDict, parsed_value) + val = OverflowDict( + {key: decoder_fn(v, charset, kind=DecodeKind.VALUE) for key, v in parsed_overflow.items()} + ) + elif isinstance(parsed_value, (list, tuple)): val = [decoder_fn(v, charset, kind=DecodeKind.VALUE) for v in parsed_value] else: val = decoder_fn(parsed_value, charset, kind=DecodeKind.VALUE) @@ -518,10 +525,14 @@ def _parse_object( and root != decoded_root and str(index) == decoded_root and parse_lists_enabled - and index <= options.list_limit ): - obj = [UNDEFINED for _ in range(index + 1)] - obj[index] = leaf + if index < options.list_limit: + obj = [UNDEFINED for _ in range(index + 1)] + obj[index] = leaf + elif options.raise_on_limit_exceeded: + raise ValueError(_list_limit_exceeded_message(options.list_limit)) + else: + obj[decoded_root] = leaf else: # Preserve the literal decoded key for non-array roots (e.g. "[01]" -> "01"), # matching Node `qs` behavior for leading-zero numeric-like segments. diff --git a/tests/comparison/test_cases.json b/tests/comparison/test_cases.json index dcf30f8..92c2eef 100644 --- a/tests/comparison/test_cases.json +++ b/tests/comparison/test_cases.json @@ -91,6 +91,14 @@ }, "encoded": "a[0]=f" }, + { + "data": { + "a": { + "20": "f" + } + }, + "encoded": "a[20]=f" + }, { "data": { "a": [ @@ -369,4 +377,4 @@ }, "encoded": "posts[0][id]=1&posts[0][someId]=du761-8bc98&posts[0][text]=Lorem Ipsum Dolor&posts[0][user][firstname]=John&posts[0][user][lastname]=Doe&posts[0][user][age]=25&posts[0][relationships][tags][0][name]=super&posts[0][relationships][tags][1][name]=awesome&posts[1][id]=1&posts[1][someId]=pa813-7jx02&posts[1][text]=Lorem Ipsum Dolor&posts[1][user][firstname]=Mary&posts[1][user][lastname]=Doe&posts[1][user][age]=25&posts[1][relationships][tags][0][name]=super&posts[1][relationships][tags][1][name]=awesome&total=2" } -] \ No newline at end of file +] diff --git a/tests/unit/decode_test.py b/tests/unit/decode_test.py index e29d0df..bcc3207 100644 --- a/tests/unit/decode_test.py +++ b/tests/unit/decode_test.py @@ -406,9 +406,11 @@ def test_allows_to_specify_list_indices( @pytest.mark.parametrize( "query, options, expected", [ - pytest.param("a[20]=a", DecodeOptions(list_limit=20), {"a": ["a"]}, id="at-limit"), + pytest.param("a[19]=a", DecodeOptions(list_limit=20), {"a": ["a"]}, id="below-limit"), + pytest.param("a[20]=a", DecodeOptions(list_limit=20), {"a": {"20": "a"}}, id="at-limit"), pytest.param("a[21]=a", DecodeOptions(list_limit=20), {"a": {"21": "a"}}, id="above-limit"), - pytest.param("a[20]=a", None, {"a": ["a"]}, id="default-at-limit"), + pytest.param("a[19]=a", None, {"a": ["a"]}, id="default-below-limit"), + pytest.param("a[20]=a", None, {"a": {"20": "a"}}, id="default-at-limit"), pytest.param("a[21]=a", None, {"a": {"21": "a"}}, id="default-above-limit"), ], ) @@ -712,7 +714,7 @@ def test_allows_setting_the_parameter_limit_to_infinity(self) -> None: "query, options, expected", [ pytest.param("a[0]=b", DecodeOptions(list_limit=-1), {"a": {"0": "b"}}, id="limit--1-single-index-0"), - pytest.param("a[0]=b", DecodeOptions(list_limit=0), {"a": ["b"]}, id="limit-0-single-index-0"), + pytest.param("a[0]=b", DecodeOptions(list_limit=0), {"a": {"0": "b"}}, id="limit-0-single-index-0"), pytest.param("a[-1]=b", DecodeOptions(list_limit=-1), {"a": {"-1": "b"}}, id="limit--1-negative-index"), pytest.param("a[-1]=b", DecodeOptions(list_limit=0), {"a": {"-1": "b"}}, id="limit-0-negative-index"), pytest.param( @@ -1405,6 +1407,20 @@ def test_current_list_length_calculation(self) -> None: True, id="comma-separated-list-exceed-limit", ), + pytest.param( + "a[1001]=b", + DecodeOptions(list_limit=1000, raise_on_limit_exceeded=True), + None, + True, + id="indexed-notation-exceeds-list-limit-raise", + ), + pytest.param( + "a[0]=1&a[1]=2&a[2]=3&a[10]=4", + DecodeOptions(list_limit=6, raise_on_limit_exceeded=True), + None, + True, + id="sparse-index-exceeds-list-limit-raise", + ), ], ) def test_list_limit( @@ -1416,6 +1432,12 @@ def test_list_limit( else: assert decode(query, options) == expected + def test_comma_list_over_limit_converts_to_overflow_dict(self) -> None: + result = decode("a=1,2,3,4", DecodeOptions(comma=True, list_limit=3)) + + assert isinstance(result["a"], OverflowDict) + assert result == {"a": {"0": "1", "1": "2", "2": "3", "3": "4"}} + # --- Additional tests for decoder kind and parser state isolation --- From e4fb2e1fa5cda43da29a3e95b259effadec1ada5 Mon Sep 17 00:00:00 2001 From: Klemen Tusar Date: Fri, 22 May 2026 21:11:01 +0100 Subject: [PATCH 02/14] :bug: combine bracket-array duplicates regardless of duplicate policy --- src/qs_codec/decode.py | 5 +++-- tests/unit/decode_test.py | 40 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/src/qs_codec/decode.py b/src/qs_codec/decode.py index a826a75..29a9bce 100644 --- a/src/qs_codec/decode.py +++ b/src/qs_codec/decode.py @@ -401,11 +401,12 @@ def _parse_query_string_values( val = [val] existing: bool = key in obj + part_duplicates = Duplicates.COMBINE if pos != -1 and "[]=" in part else duplicates # Combine/overwrite according to the configured duplicates policy. - if existing and duplicates == Duplicates.COMBINE: + if existing and part_duplicates == Duplicates.COMBINE: obj[key] = Utils.combine(obj[key], val, options) - elif not existing or duplicates == Duplicates.LAST: + elif not existing or part_duplicates == Duplicates.LAST: obj[key] = val return obj diff --git a/tests/unit/decode_test.py b/tests/unit/decode_test.py index bcc3207..415a995 100644 --- a/tests/unit/decode_test.py +++ b/tests/unit/decode_test.py @@ -1199,6 +1199,46 @@ def test_duplicates_option( result = decode(query) if options is None else decode(query, options) assert result == expected + @pytest.mark.parametrize( + "query, options, expected", + [ + pytest.param( + "b[]=1&b[]=2", + DecodeOptions(duplicates=Duplicates.LAST), + {"b": ["1", "2"]}, + id="last-bracket-array-combines", + ), + pytest.param( + "b[]=1&b[]=2", + DecodeOptions(duplicates=Duplicates.FIRST), + {"b": ["1", "2"]}, + id="first-bracket-array-combines", + ), + pytest.param( + "a=1&a=2&b[]=1&b[]=2", + DecodeOptions(duplicates=Duplicates.LAST), + {"a": "2", "b": ["1", "2"]}, + id="last-preserves-flat-policy", + ), + pytest.param( + "a=1&a=2&b[]=1&b[]=2", + DecodeOptions(duplicates=Duplicates.FIRST), + {"a": "1", "b": ["1", "2"]}, + id="first-preserves-flat-policy", + ), + pytest.param( + "b%5B%5D=1&b%5B%5D=2", + DecodeOptions(duplicates=Duplicates.LAST), + {"b": ["1", "2"]}, + id="encoded-bracket-array-combines", + ), + ], + ) + def test_bracket_notation_always_combines_regardless_of_duplicates( + self, query: str, options: DecodeOptions, expected: t.Mapping[str, t.Any] + ) -> None: + assert decode(query, options) == expected + class TestStrictDepthOption: @pytest.mark.parametrize( From f1b06c0caf5835eefd07bf2860ab49d4782b00e1 Mon Sep 17 00:00:00 2001 From: Klemen Tusar Date: Fri, 22 May 2026 21:13:27 +0100 Subject: [PATCH 03/14] :sparkles: add strict_merge decode option --- src/qs_codec/models/decode_options.py | 8 +++++ src/qs_codec/utils/utils.py | 19 ++++++++++- tests/comparison/test_cases.json | 11 +++++++ tests/unit/decode_options_test.py | 1 + tests/unit/decode_test.py | 46 ++++++++++++++++++++++++++- tests/unit/utils_test.py | 13 ++++++-- 6 files changed, 93 insertions(+), 5 deletions(-) diff --git a/src/qs_codec/models/decode_options.py b/src/qs_codec/models/decode_options.py index aa039f0..5bbd1f0 100644 --- a/src/qs_codec/models/decode_options.py +++ b/src/qs_codec/models/decode_options.py @@ -92,6 +92,14 @@ class DecodeOptions: - ``LAST``: keep only the last value seen (``{"a": 2}``). """ + strict_merge: bool = True + """Wrap object/scalar conflicts in a list. + + When ``True`` (default), input such as ``a[b]=c&a=d`` decodes to ``{"a": [{"b": "c"}, "d"]}``. + When ``False``, the decoder restores the legacy behavior and adds non-empty string scalars as object keys, + e.g. ``{"a": {"b": "c", "d": True}}``. + """ + ignore_query_prefix: bool = False """Set to ``True`` to ignore the leading question mark query prefix in the encoded input.""" diff --git a/src/qs_codec/utils/utils.py b/src/qs_codec/utils/utils.py index 4e74987..68bc736 100644 --- a/src/qs_codec/utils/utils.py +++ b/src/qs_codec/utils/utils.py @@ -205,8 +205,25 @@ def merge( last_result = new_target continue + if isinstance(current_source, Undefined) or current_source == "": + stack.pop() + last_result = current_target + continue + + if frame.options.strict_merge: + stack.pop() + last_result = [current_target, current_source] + continue + + if isinstance(current_source, str): + new_target = dict(current_target) + new_target[current_source] = True + stack.pop() + last_result = new_target + continue + stack.pop() - last_result = current_target + last_result = [current_target, current_source] continue if not isinstance(current_target, (list, tuple)) and isinstance(current_source, (list, tuple)): diff --git a/tests/comparison/test_cases.json b/tests/comparison/test_cases.json index 92c2eef..d66e87c 100644 --- a/tests/comparison/test_cases.json +++ b/tests/comparison/test_cases.json @@ -60,6 +60,17 @@ }, "encoded": "a[b]=c" }, + { + "data": { + "a": [ + { + "b": "c" + }, + "d" + ] + }, + "encoded": "a[b]=c&a=d" + }, { "data": { "a": { diff --git a/tests/unit/decode_options_test.py b/tests/unit/decode_options_test.py index dac1bb1..6cdd900 100644 --- a/tests/unit/decode_options_test.py +++ b/tests/unit/decode_options_test.py @@ -13,6 +13,7 @@ def test_defaults_normalize(self) -> None: opts = DecodeOptions() assert opts.decode_dot_in_keys is False assert opts.allow_dots is False + assert opts.strict_merge is True def test_decode_dot_implies_allow_dots(self) -> None: opts = DecodeOptions(decode_dot_in_keys=True) diff --git a/tests/unit/decode_test.py b/tests/unit/decode_test.py index 415a995..b404705 100644 --- a/tests/unit/decode_test.py +++ b/tests/unit/decode_test.py @@ -1240,6 +1240,50 @@ def test_bracket_notation_always_combines_regardless_of_duplicates( assert decode(query, options) == expected +class TestStrictMergeOption: + @pytest.mark.parametrize( + "query, options, expected", + [ + pytest.param("a[b]=c&a=d", DecodeOptions(), {"a": [{"b": "c"}, "d"]}, id="object-then-scalar"), + pytest.param("a=d&a[b]=c", DecodeOptions(), {"a": ["d", {"b": "c"}]}, id="scalar-then-object"), + pytest.param( + "a[b]=c&a=d", + DecodeOptions(strict_merge=False), + {"a": {"b": "c", "d": True}}, + id="legacy-object-then-scalar", + ), + pytest.param( + "a[b]=c&a=", + DecodeOptions(strict_merge=False), + {"a": {"b": "c"}}, + id="legacy-ignores-empty-assigned-scalar", + ), + pytest.param( + "a[b]=c&a", + DecodeOptions(strict_merge=False), + {"a": {"b": "c"}}, + id="legacy-ignores-empty-missing-value-scalar", + ), + pytest.param( + "a[b]=c&a=", + DecodeOptions(), + {"a": {"b": "c"}}, + id="default-ignores-empty-assigned-scalar", + ), + pytest.param( + "a[b]=c&a", + DecodeOptions(strict_null_handling=True), + {"a": {"b": "c"}}, + id="default-ignores-strict-null-scalar", + ), + ], + ) + def test_strict_merge( + self, query: str, options: DecodeOptions, expected: t.Mapping[str, t.Any] + ) -> None: + assert decode(query, options) == expected + + class TestStrictDepthOption: @pytest.mark.parametrize( "query, options, expected, raises_error", @@ -1657,7 +1701,7 @@ class TestDecodeMixedBypassParity: "query, options, expected", [ pytest.param("a=1&a[b]=2", None, {"a": ["1", {"b": "2"}]}, id="flat-before-structured"), - pytest.param("a[b]=2&a=1", None, {"a": {"b": "2"}}, id="structured-before-flat"), + pytest.param("a[b]=2&a=1", None, {"a": [{"b": "2"}, "1"]}, id="structured-before-flat"), pytest.param("0=y&[]=x", None, {"0": "x"}, id="flat-zero-collides-leading-bracket-root"), pytest.param("[]=x&0=y", None, {"0": ["x", "y"]}, id="leading-bracket-root-collides-flat-zero"), pytest.param( diff --git a/tests/unit/utils_test.py b/tests/unit/utils_test.py index b203417..ba675c3 100644 --- a/tests/unit/utils_test.py +++ b/tests/unit/utils_test.py @@ -601,14 +601,21 @@ def test_merges_array_into_object(self) -> None: {"foo": ["xyzzy"]}, ) == {"foo": {"bar": "baz", "0": "xyzzy"}} - def test_merge_mapping_target_with_scalar_source_returns_target_unchanged(self) -> None: + def test_merge_mapping_target_with_scalar_source_wraps_with_strict_merge(self) -> None: target = {"a": "b"} source = "scalar" result = Utils.merge(target, source) # type: ignore[arg-type] - assert result == {"a": "b"} - assert result is target + assert result == [{"a": "b"}, "scalar"] + + def test_merge_mapping_target_with_scalar_source_uses_legacy_strict_merge_false(self) -> None: + target = {"a": "b"} + source = "scalar" + + result = Utils.merge(target, source, DecodeOptions(strict_merge=False)) # type: ignore[arg-type] + + assert result == {"a": "b", "scalar": True} def test_merge_structured_lists_prefers_source_when_target_slot_is_undefined(self) -> None: options = DecodeOptions() From 34b24a3514d94fe897c39189216210621c1c8b84 Mon Sep 17 00:00:00 2001 From: Klemen Tusar Date: Fri, 22 May 2026 21:14:59 +0100 Subject: [PATCH 04/14] :memo: document qs 6.15 parse parity updates --- CHANGELOG.md | 3 +++ docs/README.rst | 42 +++++++++++++++++++++++++++++++++----- tests/unit/example_test.py | 18 ++++++++++++---- 3 files changed, 54 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 039192c..8f82885 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ ## 1.5.1-dev +* [FEAT] add `DecodeOptions.strict_merge` for Node `qs` 6.15 `strictMerge` parity +* [FIX] align `decode` `list_limit` semantics with Node `qs` `arrayLimit` as a maximum element count +* [FIX] combine bracket-array duplicate assignments regardless of `DecodeOptions.duplicates` * [FIX] align `decode` with Node `qs` 6.15.2 by normalizing dotted keys before preserving `depth=0` input * [FIX] align `encode` with Node `qs` 6.15.2 by using the configured delimiter after `charset_sentinel` diff --git a/docs/README.rst b/docs/README.rst index 8c6f3bc..bd28488 100644 --- a/docs/README.rst +++ b/docs/README.rst @@ -196,6 +196,38 @@ change the behavior when duplicate keys are encountered qs.DecodeOptions(duplicates=qs.Duplicates.LAST), ) == {'foo': 'baz'} +Bracket-array keys always combine, regardless of the duplicate strategy: + +.. code:: python + + import qs_codec as qs + + assert qs.decode( + 'a=1&a=2&b[]=1&b[]=2', + qs.DecodeOptions(duplicates=qs.Duplicates.LAST), + ) == {'a': '2', 'b': ['1', '2']} + +When a key appears as both an object and a scalar, +:py:attr:`strict_merge ` wraps the conflicting values in a +``list`` by default: + +.. code:: python + + import qs_codec as qs + + assert qs.decode('a[b]=c&a=d') == {'a': [{'b': 'c'}, 'd']} + +Set ``strict_merge`` to ``False`` to restore the legacy behavior, where non-empty string scalars become object keys: + +.. code:: python + + import qs_codec as qs + + assert qs.decode( + 'a[b]=c&a=d', + qs.DecodeOptions(strict_merge=False), + ) == {'a': {'b': 'c', 'd': True}} + If you have to deal with legacy browsers or services, there’s also support for decoding percent-encoded octets as :py:attr:`LATIN1 `: @@ -310,11 +342,11 @@ Note that an empty ``str``\ing is also a value and will be preserved: assert qs.decode('a[0]=b&a[1]=&a[2]=c') == {'a': ['b', '', 'c']} :py:attr:`decode ` will also limit specifying indices -in a ``list`` to a maximum index of ``20``. Any ``list`` members with an -index of greater than ``20`` will instead be converted to a ``dict`` with -the index as the key. This is needed to handle cases when someone sent, -for example, ``a[999999999]`` and it will take significant time to iterate -over this huge ``list``. +in a ``list`` to a maximum element count of ``20``. Index ``19`` is the +last index that can create a default ``list``; index ``20`` and higher +are converted to a ``dict`` with the index as the key. This is needed to +handle cases when someone sent, for example, ``a[999999999]`` and it +would take significant time to iterate over this huge ``list``. .. code:: python diff --git a/tests/unit/example_test.py b/tests/unit/example_test.py index 1077b0f..230f8f3 100644 --- a/tests/unit/example_test.py +++ b/tests/unit/example_test.py @@ -79,6 +79,16 @@ def test_dictionaries(self): assert qs_codec.decode("foo=bar&foo=baz", qs_codec.DecodeOptions(duplicates=qs_codec.Duplicates.LAST)) == { "foo": "baz" } + assert qs_codec.decode( + "a=1&a=2&b[]=1&b[]=2", + qs_codec.DecodeOptions(duplicates=qs_codec.Duplicates.LAST), + ) == {"a": "2", "b": ["1", "2"]} + + # When a key appears as both an object and a scalar, `DecodeOptions.strict_merge` wraps the conflict in a list. + assert qs_codec.decode("a[b]=c&a=d") == {"a": [{"b": "c"}, "d"]} + assert qs_codec.decode("a[b]=c&a=d", qs_codec.DecodeOptions(strict_merge=False)) == { + "a": {"b": "c", "d": True} + } # If you have to deal with legacy browsers or services, there's also support for decoding percent-encoded octets # as `Charset.LATIN1`: @@ -126,10 +136,10 @@ def test_lists(self): assert qs_codec.decode("a[]=&a[]=b") == {"a": ["", "b"]} assert qs_codec.decode("a[0]=b&a[1]=&a[2]=c") == {"a": ["b", "", "c"]} - # **qs_codec** will also limit specifying indices in a `list` to a maximum index of `20`. - # Any `list` members with an index of greater than `20` will instead be converted to a `dict` with the index as - # the key. This is needed to handle cases when someone sent, for example, `a[999999999]` and it will take - # significant time to iterate over this huge `list`. + # **qs_codec** will also limit specifying indices in a `list` to a maximum element count of `20`. + # Index `19` is the last index that can create a default `list`; index `20` and higher are converted to a + # `dict` with the index as the key. This is needed to handle cases when someone sent, for example, + # `a[999999999]` and it will take significant time to iterate over this huge `list`. assert qs_codec.decode("a[100]=b") == {"a": {"100": "b"}} # This limit can be overridden by passing an `DecodeOptions.list_limit` option: From ff0a8fb34524033739c6ff56e7f45ed7031ecc12 Mon Sep 17 00:00:00 2001 From: Klemen Tusar Date: Sat, 23 May 2026 09:07:19 +0100 Subject: [PATCH 05/14] :bug: preserve comma overflow context for bracket arrays --- src/qs_codec/decode.py | 35 ++++++++++------ src/qs_codec/models/overflow_dict.py | 10 +++-- src/qs_codec/utils/utils.py | 11 +++-- tests/unit/decode_test.py | 60 ++++++++++++++++++++++++++++ 4 files changed, 98 insertions(+), 18 deletions(-) diff --git a/src/qs_codec/decode.py b/src/qs_codec/decode.py index 29a9bce..167923e 100644 --- a/src/qs_codec/decode.py +++ b/src/qs_codec/decode.py @@ -24,7 +24,7 @@ from .enums.duplicates import Duplicates from .enums.sentinel import Sentinel from .models.decode_options import DecodeOptions -from .models.overflow_dict import OverflowDict +from .models.overflow_dict import CommaOverflowDict, OverflowDict from .models.structured_key_scan import StructuredKeyScan from .models.undefined import UNDEFINED from .utils.decode_utils import DecodeUtils @@ -225,12 +225,20 @@ def _interpret_numeric_entities(value: str) -> str: return re.sub(r"&#(\d+);", lambda match: chr(int(match.group(1))), value) -def _parse_array_value(value: t.Any, options: DecodeOptions, current_list_length: int) -> t.Any: +def _parse_array_value( + value: t.Any, + options: DecodeOptions, + current_list_length: int, + *, + enforce_comma_limit: bool = True, +) -> t.Any: """Post-process a raw scalar for list semantics and enforce ``list_limit``. Behavior -------- - If ``comma=True`` and ``value`` is a string that contains commas, split into a list. + When ``enforce_comma_limit`` is ``True``, over-limit comma values raise or degrade to an ``OverflowDict`` here. + Raw query-string parsing passes ``False`` so the caller can account for bracket-array key context first. - Otherwise, enforce the per-list length limit by comparing ``current_list_length`` to ``options.list_limit``. When ``raise_on_limit_exceeded=True``, violations raise ``ValueError``. - When ``list_limit`` is negative: @@ -247,10 +255,10 @@ def _parse_array_value(value: t.Any, options: DecodeOptions, current_list_length """ if isinstance(value, str) and value and options.comma and "," in value: split_val: t.List[str] = value.split(",") - if len(split_val) > options.list_limit: + if enforce_comma_limit and len(split_val) > options.list_limit: if options.raise_on_limit_exceeded: raise ValueError(_list_limit_exceeded_message(options.list_limit)) - return OverflowDict({str(i): item for i, item in enumerate(split_val)}) + return CommaOverflowDict({str(i): item for i, item in enumerate(split_val)}) return split_val if options.raise_on_limit_exceeded and current_list_length >= options.list_limit: @@ -356,9 +364,11 @@ def _parse_query_string_values( continue bracket_equals_pos: int = part.find("]=") pos: int = part.find("=") if bracket_equals_pos == -1 else (bracket_equals_pos + 1) + bracket_array_assignment = pos != -1 and "[]=" in part # Decode key and value with a key-aware decoder; skip pairs whose key decodes to None raw_key = "" + comma_limit_exceeded = False if pos == -1: key_decoded = decoder_fn(part, charset, kind=DecodeKind.KEY) if key_decoded is None: @@ -379,13 +389,10 @@ def _parse_query_string_values( part[pos + 1 :], options, len(obj[key]) if key in obj and isinstance(obj[key], (list, tuple)) else 0, + enforce_comma_limit=False, ) - if Utils.is_overflow(parsed_value): - parsed_overflow = t.cast(OverflowDict, parsed_value) - val = OverflowDict( - {key: decoder_fn(v, charset, kind=DecodeKind.VALUE) for key, v in parsed_overflow.items()} - ) - elif isinstance(parsed_value, (list, tuple)): + comma_limit_exceeded = isinstance(parsed_value, (list, tuple)) and len(parsed_value) > options.list_limit + if isinstance(parsed_value, (list, tuple)): val = [decoder_fn(v, charset, kind=DecodeKind.VALUE) for v in parsed_value] else: val = decoder_fn(parsed_value, charset, kind=DecodeKind.VALUE) @@ -397,11 +404,15 @@ def _parse_query_string_values( # Upstream parity: if token contains "[]=", only wrap values that are already arrays # (typically produced by comma splitting), preserving list-of-lists semantics. - if parse_lists_enabled and pos != -1 and "[]=" in part and isinstance(val, (list, tuple)): + if bracket_array_assignment and isinstance(val, (list, tuple)): val = [val] + elif comma_limit_exceeded and isinstance(val, (list, tuple)): + if options.raise_on_limit_exceeded: + raise ValueError(_list_limit_exceeded_message(options.list_limit)) + val = CommaOverflowDict({str(i): item for i, item in enumerate(val)}) existing: bool = key in obj - part_duplicates = Duplicates.COMBINE if pos != -1 and "[]=" in part else duplicates + part_duplicates = Duplicates.COMBINE if bracket_array_assignment else duplicates # Combine/overwrite according to the configured duplicates policy. if existing and part_duplicates == Duplicates.COMBINE: diff --git a/src/qs_codec/models/overflow_dict.py b/src/qs_codec/models/overflow_dict.py index 96ca37a..46f2897 100644 --- a/src/qs_codec/models/overflow_dict.py +++ b/src/qs_codec/models/overflow_dict.py @@ -10,16 +10,20 @@ class OverflowDict(dict): def copy(self) -> "OverflowDict": """Return an OverflowDict copy to preserve the overflow marker.""" - return OverflowDict(super().copy()) + return self.__class__(super().copy()) def __copy__(self) -> "OverflowDict": """Return an OverflowDict copy to preserve the overflow marker.""" - return OverflowDict(super().copy()) + return self.__class__(super().copy()) def __deepcopy__(self, memo: dict[int, object]) -> "OverflowDict": """Return an OverflowDict deepcopy to preserve the overflow marker.""" - copied = OverflowDict() + copied = self.__class__() memo[id(self)] = copied for key, value in self.items(): copied[copy.deepcopy(key, memo)] = copy.deepcopy(value, memo) return copied + + +class CommaOverflowDict(OverflowDict): + """Overflow marker for comma-split values that exceeded `list_limit`.""" diff --git a/src/qs_codec/utils/utils.py b/src/qs_codec/utils/utils.py index 68bc736..9dbe9e8 100644 --- a/src/qs_codec/utils/utils.py +++ b/src/qs_codec/utils/utils.py @@ -26,7 +26,7 @@ from enum import Enum from ..models.decode_options import DecodeOptions -from ..models.overflow_dict import OverflowDict +from ..models.overflow_dict import CommaOverflowDict, OverflowDict from ..models.undefined import Undefined @@ -540,12 +540,15 @@ def combine( # a is already an OverflowDict. Append b to a *copy* at the next numeric index. # We assume sequential keys; len(a_copy) gives the next index. orig_a = t.cast(OverflowDict, a) - a_copy = OverflowDict({k: v for k, v in orig_a.items() if not isinstance(v, Undefined)}) + a_copy = orig_a.__class__({k: v for k, v in orig_a.items() if not isinstance(v, Undefined)}) # Use max key + 1 to handle sparse dicts safely, rather than len(a) key_pairs = _numeric_key_pairs(a_copy) idx = (max(key for key, _ in key_pairs) + 1) if key_pairs else 0 - if isinstance(b, (list, tuple)): + if isinstance(orig_a, CommaOverflowDict): + if not isinstance(b, Undefined): + a_copy[str(idx)] = b + elif isinstance(b, (list, tuple)): for item in b: if not isinstance(item, Undefined): a_copy[str(idx)] = item @@ -573,6 +576,8 @@ def combine( # Flatten b, handling OverflowDict as a list source if isinstance(b, (list, tuple)): list_b = [x for x in b if not isinstance(x, Undefined)] + elif isinstance(b, CommaOverflowDict): + list_b = [b] elif Utils.is_overflow(b): b_of = t.cast(OverflowDict, b) list_b = [ diff --git a/tests/unit/decode_test.py b/tests/unit/decode_test.py index b404705..a5e87e7 100644 --- a/tests/unit/decode_test.py +++ b/tests/unit/decode_test.py @@ -871,6 +871,24 @@ def _decoder(s: t.Optional[str], charset: t.Optional[Charset]) -> t.Any: {"foo": [["1", "2", "3"], "a"]}, id="string-second-list", ), + pytest.param( + "foo[]=1,2,3,4", + DecodeOptions(comma=True, list_limit=3), + {"foo": [["1", "2", "3", "4"]]}, + id="bracket-list-comma-value-can-exceed-list-limit", + ), + pytest.param( + "foo[]=1,2,3,4", + DecodeOptions(comma=True, list_limit=3, raise_on_limit_exceeded=True), + {"foo": [["1", "2", "3", "4"]]}, + id="bracket-list-comma-value-does-not-raise-when-over-limit", + ), + pytest.param( + "foo[]=1,2,3,4", + DecodeOptions(comma=True, list_limit=3, parse_lists=False), + {"foo": {"0": [["1", "2", "3", "4"]]}}, + id="bracket-list-comma-value-keeps-nesting-when-list-parsing-disabled", + ), pytest.param( "a[b]=x,y[]=z", DecodeOptions(comma=True), @@ -1522,6 +1540,48 @@ def test_comma_list_over_limit_converts_to_overflow_dict(self) -> None: assert isinstance(result["a"], OverflowDict) assert result == {"a": {"0": "1", "1": "2", "2": "3", "3": "4"}} + @pytest.mark.parametrize( + "query, options, expected", + [ + pytest.param( + "a=5&a=1,2,3,4", + DecodeOptions(comma=True, list_limit=3), + {"a": ["5", {"0": "1", "1": "2", "2": "3", "3": "4"}]}, + id="scalar-then-overflow-comma-list", + ), + pytest.param( + "a=5&a=1,2,3,4", + DecodeOptions(comma=True, list_limit=1), + {"a": {"0": "5", "1": {"0": "1", "1": "2", "2": "3", "3": "4"}}}, + id="scalar-then-overflow-comma-list-over-combined-limit", + ), + pytest.param( + "a=1,2,3,4&a=5,6", + DecodeOptions(comma=True, list_limit=3), + {"a": {"0": "1", "1": "2", "2": "3", "3": "4", "4": ["5", "6"]}}, + id="overflow-comma-list-then-in-limit-comma-list", + ), + pytest.param( + "a=1,2,3,4&a=5,6,7,8", + DecodeOptions(comma=True, list_limit=3), + { + "a": { + "0": "1", + "1": "2", + "2": "3", + "3": "4", + "4": {"0": "5", "1": "6", "2": "7", "3": "8"}, + } + }, + id="overflow-comma-list-then-overflow-comma-list", + ), + ], + ) + def test_comma_overflow_duplicates_keep_overflow_values_nested( + self, query: str, options: DecodeOptions, expected: t.Mapping[str, t.Any] + ) -> None: + assert decode(query, options) == expected + # --- Additional tests for decoder kind and parser state isolation --- From 9e190a1be92a677fe12b9d0b7034b250b4a199d9 Mon Sep 17 00:00:00 2001 From: Klemen Tusar Date: Sat, 23 May 2026 09:21:07 +0100 Subject: [PATCH 06/14] :art: simplify test function signatures for strict merge options --- tests/unit/decode_test.py | 4 +--- tests/unit/example_test.py | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/tests/unit/decode_test.py b/tests/unit/decode_test.py index a5e87e7..1ec3931 100644 --- a/tests/unit/decode_test.py +++ b/tests/unit/decode_test.py @@ -1296,9 +1296,7 @@ class TestStrictMergeOption: ), ], ) - def test_strict_merge( - self, query: str, options: DecodeOptions, expected: t.Mapping[str, t.Any] - ) -> None: + def test_strict_merge(self, query: str, options: DecodeOptions, expected: t.Mapping[str, t.Any]) -> None: assert decode(query, options) == expected diff --git a/tests/unit/example_test.py b/tests/unit/example_test.py index 230f8f3..b27ccc7 100644 --- a/tests/unit/example_test.py +++ b/tests/unit/example_test.py @@ -86,9 +86,7 @@ def test_dictionaries(self): # When a key appears as both an object and a scalar, `DecodeOptions.strict_merge` wraps the conflict in a list. assert qs_codec.decode("a[b]=c&a=d") == {"a": [{"b": "c"}, "d"]} - assert qs_codec.decode("a[b]=c&a=d", qs_codec.DecodeOptions(strict_merge=False)) == { - "a": {"b": "c", "d": True} - } + assert qs_codec.decode("a[b]=c&a=d", qs_codec.DecodeOptions(strict_merge=False)) == {"a": {"b": "c", "d": True}} # If you have to deal with legacy browsers or services, there's also support for decoding percent-encoded octets # as `Charset.LATIN1`: From eb3ffbece65589f4c3d9f79abf9e0be2c6b04aa5 Mon Sep 17 00:00:00 2001 From: Klemen Tusar Date: Sat, 23 May 2026 09:25:13 +0100 Subject: [PATCH 07/14] :bug: remove stale parse_lists argument --- src/qs_codec/decode.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/qs_codec/decode.py b/src/qs_codec/decode.py index 167923e..dca98cb 100644 --- a/src/qs_codec/decode.py +++ b/src/qs_codec/decode.py @@ -90,9 +90,7 @@ def decode( parse_lists_effective = False if decode_from_string: - temp_obj: t.Optional[t.Dict[str, t.Any]] = _parse_query_string_values( - str_value, opts, parse_lists=parse_lists_effective - ) + temp_obj: t.Optional[t.Dict[str, t.Any]] = _parse_query_string_values(str_value, opts) else: temp_obj = dict(mapping_value) if not temp_obj: @@ -267,9 +265,7 @@ def _parse_array_value( return value -def _parse_query_string_values( - value: str, options: DecodeOptions, *, parse_lists: t.Optional[bool] = None -) -> t.Dict[str, t.Any]: +def _parse_query_string_values(value: str, options: DecodeOptions) -> t.Dict[str, t.Any]: """Tokenize a raw query string into a flat ``Dict[str, Any]``. Responsibilities @@ -292,7 +288,6 @@ def _parse_query_string_values( ``_parse_keys`` / ``_parse_object``. """ obj: t.Dict[str, t.Any] = {} - parse_lists_enabled = options.parse_lists if parse_lists is None else parse_lists clean_str: str = value.replace("?", "", 1) if options.ignore_query_prefix else value # Normalize %5B/%5D to literal brackets before splitting (case-insensitive). From d59c1b96b5c899dc12f3757f47a2292586add6e6 Mon Sep 17 00:00:00 2001 From: Klemen Tusar Date: Sat, 23 May 2026 10:12:29 +0100 Subject: [PATCH 08/14] :bug: append overflow values without flattening --- src/qs_codec/utils/utils.py | 24 +++--------------------- tests/unit/decode_test.py | 6 ++++++ tests/unit/utils_test.py | 11 +++++------ 3 files changed, 14 insertions(+), 27 deletions(-) diff --git a/src/qs_codec/utils/utils.py b/src/qs_codec/utils/utils.py index 9dbe9e8..4f2c78a 100644 --- a/src/qs_codec/utils/utils.py +++ b/src/qs_codec/utils/utils.py @@ -537,33 +537,15 @@ def combine( list to :class:`OverflowDict`. """ if Utils.is_overflow(a): - # a is already an OverflowDict. Append b to a *copy* at the next numeric index. - # We assume sequential keys; len(a_copy) gives the next index. + # a is already an OverflowDict. Append b as one value at the next numeric index. orig_a = t.cast(OverflowDict, a) a_copy = orig_a.__class__({k: v for k, v in orig_a.items() if not isinstance(v, Undefined)}) # Use max key + 1 to handle sparse dicts safely, rather than len(a) key_pairs = _numeric_key_pairs(a_copy) idx = (max(key for key, _ in key_pairs) + 1) if key_pairs else 0 - if isinstance(orig_a, CommaOverflowDict): - if not isinstance(b, Undefined): - a_copy[str(idx)] = b - elif isinstance(b, (list, tuple)): - for item in b: - if not isinstance(item, Undefined): - a_copy[str(idx)] = item - idx += 1 - elif Utils.is_overflow(b): - b = t.cast(OverflowDict, b) - # Iterate in numeric key order to preserve list semantics - for _, k in sorted(_numeric_key_pairs(b), key=lambda item: item[0]): - val = b[k] - if not isinstance(val, Undefined): - a_copy[str(idx)] = val - idx += 1 - else: - if not isinstance(b, Undefined): - a_copy[str(idx)] = b + if not isinstance(b, Undefined): + a_copy[str(idx)] = b return a_copy # Normal combination: flatten lists/tuples diff --git a/tests/unit/decode_test.py b/tests/unit/decode_test.py index 1ec3931..1f16b49 100644 --- a/tests/unit/decode_test.py +++ b/tests/unit/decode_test.py @@ -1573,6 +1573,12 @@ def test_comma_list_over_limit_converts_to_overflow_dict(self) -> None: }, id="overflow-comma-list-then-overflow-comma-list", ), + pytest.param( + "a[]=1&a[]=2&a[]=3,4", + DecodeOptions(comma=True, list_limit=1), + {"a": {"0": "1", "1": "2", "2": [["3", "4"]]}}, + id="bracket-overflow-then-comma-list", + ), ], ) def test_comma_overflow_duplicates_keep_overflow_values_nested( diff --git a/tests/unit/utils_test.py b/tests/unit/utils_test.py index ba675c3..2d91bee 100644 --- a/tests/unit/utils_test.py +++ b/tests/unit/utils_test.py @@ -762,7 +762,7 @@ def test_combine_overflow_dict_with_overflow_dict(self) -> None: combined = Utils.combine(a, b) assert isinstance(combined, OverflowDict) assert combined["0"] == "x" - assert combined["1"] == "y" + assert combined["1"] == {"0": "y"} assert len(combined) == 2 def test_compact_removes_undefined_entries_and_avoids_cycles(self) -> None: @@ -1136,12 +1136,12 @@ def test_combine_list_with_overflow_dict(self) -> None: result = Utils.combine(a, b) assert result == ["start", "x", "y"] - def test_combine_skips_undefined_in_overflow_dict_append(self) -> None: + def test_combine_overflow_dict_appends_list_as_single_value(self) -> None: a = OverflowDict({"0": "x"}) b = ["y", Undefined(), "z"] result = Utils.combine(a, b) assert isinstance(result, OverflowDict) - assert result == {"0": "x", "1": "y", "2": "z"} + assert result == {"0": "x", "1": ["y", Undefined(), "z"]} def test_combine_skips_undefined_in_list_flattening(self) -> None: a = ["x", Undefined()] @@ -1165,13 +1165,12 @@ def test_combine_overflow_dict_skips_existing_undefined_and_ignores_non_numeric_ assert result["skip"] == "keep" assert "1" in a # Original should remain unchanged - def test_combine_overflow_dict_source_skips_non_numeric_keys(self) -> None: + def test_combine_overflow_dict_appends_overflow_source_as_single_value(self) -> None: a = OverflowDict({"0": "x"}) b = OverflowDict({"foo": "bar", "1": "y", "0": "z"}) result = Utils.combine(a, b) assert isinstance(result, OverflowDict) - assert result == {"0": "x", "1": "z", "2": "y"} - assert "foo" not in result + assert result == {"0": "x", "1": {"foo": "bar", "1": "y", "0": "z"}} def test_merge_overflow_dict_source_preserves_non_numeric_keys(self) -> None: target = "a" From cd2ef21fb9ef6ff1dba0fdd48937f53f6b4e9253 Mon Sep 17 00:00:00 2001 From: Klemen Tusar Date: Sat, 23 May 2026 10:24:05 +0100 Subject: [PATCH 09/14] :bug: enforce bracket comma list limit after wrapping --- src/qs_codec/decode.py | 7 ++++--- src/qs_codec/models/decode_options.py | 10 ++++++---- tests/unit/decode_test.py | 20 ++++++++++++++++++++ 3 files changed, 30 insertions(+), 7 deletions(-) diff --git a/src/qs_codec/decode.py b/src/qs_codec/decode.py index dca98cb..cfd76a0 100644 --- a/src/qs_codec/decode.py +++ b/src/qs_codec/decode.py @@ -363,7 +363,7 @@ def _parse_query_string_values(value: str, options: DecodeOptions) -> t.Dict[str # Decode key and value with a key-aware decoder; skip pairs whose key decodes to None raw_key = "" - comma_limit_exceeded = False + list_limit_exceeded = False if pos == -1: key_decoded = decoder_fn(part, charset, kind=DecodeKind.KEY) if key_decoded is None: @@ -386,7 +386,7 @@ def _parse_query_string_values(value: str, options: DecodeOptions) -> t.Dict[str len(obj[key]) if key in obj and isinstance(obj[key], (list, tuple)) else 0, enforce_comma_limit=False, ) - comma_limit_exceeded = isinstance(parsed_value, (list, tuple)) and len(parsed_value) > options.list_limit + list_limit_exceeded = isinstance(parsed_value, (list, tuple)) and len(parsed_value) > options.list_limit if isinstance(parsed_value, (list, tuple)): val = [decoder_fn(v, charset, kind=DecodeKind.VALUE) for v in parsed_value] else: @@ -401,7 +401,8 @@ def _parse_query_string_values(value: str, options: DecodeOptions) -> t.Dict[str # (typically produced by comma splitting), preserving list-of-lists semantics. if bracket_array_assignment and isinstance(val, (list, tuple)): val = [val] - elif comma_limit_exceeded and isinstance(val, (list, tuple)): + list_limit_exceeded = len(val) > options.list_limit + if list_limit_exceeded and isinstance(val, (list, tuple)): if options.raise_on_limit_exceeded: raise ValueError(_list_limit_exceeded_message(options.list_limit)) val = CommaOverflowDict({str(i): item for i, item in enumerate(val)}) diff --git a/src/qs_codec/models/decode_options.py b/src/qs_codec/models/decode_options.py index 5bbd1f0..a5dff35 100644 --- a/src/qs_codec/models/decode_options.py +++ b/src/qs_codec/models/decode_options.py @@ -38,12 +38,14 @@ class DecodeOptions: """Set to ``True`` to allow empty ``list`` values inside ``dict``\\s in the encoded input.""" list_limit: int = 20 - """Maximum number of **indexed** items allowed in a single list (default: ``20``). + """Maximum number of items allowed in a single decoded list (default: ``20``). During decoding, keys like ``a[0]``, ``a[1]``, … are treated as list indices. If an - index exceeds this limit, the container is treated as a ``dict`` instead, with the - numeric index kept as a string key (e.g., ``{"999": "x"}``) to prevent creation of - massive sparse lists (e.g., ``a[999999999]``). + index is greater than or equal to this limit, the container is treated as a ``dict`` + instead, with the numeric index kept as a string key (e.g., ``{"999": "x"}``) to + prevent creation of massive sparse lists (e.g., ``a[999999999]``). With the default + limit, index ``19`` is the last index that can create a list; index ``20`` already + overflows to a ``dict``. This limit also applies to comma–split lists when ``comma=True``. Set a larger value if you explicitly need more items, or set a smaller one to harden against abuse. diff --git a/tests/unit/decode_test.py b/tests/unit/decode_test.py index 1f16b49..8c14d12 100644 --- a/tests/unit/decode_test.py +++ b/tests/unit/decode_test.py @@ -883,6 +883,12 @@ def _decoder(s: t.Optional[str], charset: t.Optional[Charset]) -> t.Any: {"foo": [["1", "2", "3", "4"]]}, id="bracket-list-comma-value-does-not-raise-when-over-limit", ), + pytest.param( + "foo[]=1,2,3,4", + DecodeOptions(comma=True, list_limit=0), + {"foo": {"0": ["1", "2", "3", "4"]}}, + id="bracket-list-comma-value-over-zero-limit-becomes-dict", + ), pytest.param( "foo[]=1,2,3,4", DecodeOptions(comma=True, list_limit=3, parse_lists=False), @@ -1507,6 +1513,20 @@ def test_current_list_length_calculation(self) -> None: True, id="comma-separated-list-exceed-limit", ), + pytest.param( + "foo[]=1,2", + DecodeOptions(list_limit=0, raise_on_limit_exceeded=True, comma=True), + None, + True, + id="bracket-comma-list-zero-limit-raise", + ), + pytest.param( + "foo[]=1,2", + DecodeOptions(list_limit=-1, raise_on_limit_exceeded=True, comma=True), + None, + True, + id="bracket-comma-list-negative-limit-raise", + ), pytest.param( "a[1001]=b", DecodeOptions(list_limit=1000, raise_on_limit_exceeded=True), From f8659ff71b9f210a720143909bec257194086091 Mon Sep 17 00:00:00 2001 From: Klemen Tusar Date: Sat, 23 May 2026 11:06:34 +0100 Subject: [PATCH 10/14] :bug: preserve DecodeOptions positional arguments --- src/qs_codec/models/decode_options.py | 16 ++++++++-------- tests/unit/decode_options_test.py | 21 ++++++++++++++++++++- 2 files changed, 28 insertions(+), 9 deletions(-) diff --git a/src/qs_codec/models/decode_options.py b/src/qs_codec/models/decode_options.py index a5dff35..97255a3 100644 --- a/src/qs_codec/models/decode_options.py +++ b/src/qs_codec/models/decode_options.py @@ -94,14 +94,6 @@ class DecodeOptions: - ``LAST``: keep only the last value seen (``{"a": 2}``). """ - strict_merge: bool = True - """Wrap object/scalar conflicts in a list. - - When ``True`` (default), input such as ``a[b]=c&a=d`` decodes to ``{"a": [{"b": "c"}, "d"]}``. - When ``False``, the decoder restores the legacy behavior and adds non-empty string scalars as object keys, - e.g. ``{"a": {"b": "c", "d": True}}``. - """ - ignore_query_prefix: bool = False """Set to ``True`` to ignore the leading question mark query prefix in the encoded input.""" @@ -150,6 +142,14 @@ class DecodeOptions: Prefer ``decoder`` which may optionally accept a ``kind`` argument. When both are supplied, ``decoder`` takes precedence (mirroring Kotlin/C#/Swift/Dart behavior).""" + strict_merge: bool = True + """Wrap object/scalar conflicts in a list. + + When ``True`` (default), input such as ``a[b]=c&a=d`` decodes to ``{"a": [{"b": "c"}, "d"]}``. + When ``False``, the decoder restores the legacy behavior and adds non-empty string scalars as object keys, + e.g. ``{"a": {"b": "c", "d": True}}``. + """ + def __post_init__(self) -> None: """Post-initialization.""" # Default `decode_dot_in_keys` first, then mirror into `allow_dots` when unspecified. diff --git a/tests/unit/decode_options_test.py b/tests/unit/decode_options_test.py index 6cdd900..eb583ff 100644 --- a/tests/unit/decode_options_test.py +++ b/tests/unit/decode_options_test.py @@ -3,7 +3,7 @@ import pytest -from qs_codec import Charset, DecodeOptions +from qs_codec import Charset, DecodeOptions, Duplicates from qs_codec.enums.decode_kind import DecodeKind from qs_codec.utils.decode_utils import DecodeUtils @@ -15,6 +15,25 @@ def test_defaults_normalize(self) -> None: assert opts.allow_dots is False assert opts.strict_merge is True + def test_strict_merge_append_preserves_existing_positional_slots(self) -> None: + opts = DecodeOptions( + None, + None, + False, + 20, + Charset.UTF8, + False, + False, + "&", + 5, + 1000, + Duplicates.COMBINE, + True, + ) + + assert opts.ignore_query_prefix is True + assert opts.strict_merge is True + def test_decode_dot_implies_allow_dots(self) -> None: opts = DecodeOptions(decode_dot_in_keys=True) assert opts.allow_dots is True From a236ce939c2b8ab1b5fcc4b3b41e5b08ee90abab Mon Sep 17 00:00:00 2001 From: Klemen Tusar Date: Sat, 23 May 2026 17:56:26 +0100 Subject: [PATCH 11/14] :test_tube: cover negative comma list limit parity --- src/qs_codec/decode.py | 12 +++++------- tests/unit/decode_test.py | 12 ++++++++++++ 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/src/qs_codec/decode.py b/src/qs_codec/decode.py index cfd76a0..4852acc 100644 --- a/src/qs_codec/decode.py +++ b/src/qs_codec/decode.py @@ -239,12 +239,10 @@ def _parse_array_value( Raw query-string parsing passes ``False`` so the caller can account for bracket-array key context first. - Otherwise, enforce the per-list length limit by comparing ``current_list_length`` to ``options.list_limit``. When ``raise_on_limit_exceeded=True``, violations raise ``ValueError``. - - When ``list_limit`` is negative: - * if ``raise_on_limit_exceeded=True``, **any** list-growth operation here (e.g., comma-splitting) - raises immediately; - * if ``raise_on_limit_exceeded=False`` (default), comma-splitting still returns a list; numeric - bracket indices are handled later by ``_parse_object`` (where negative ``list_limit`` disables - numeric-index parsing only). + - When ``list_limit`` is negative, any non-empty comma split exceeds the limit: raising mode raises, + while non-raising mode degrades to an ``OverflowDict``/``CommaOverflowDict``. Raw query-string + parsing temporarily returns the split list when ``enforce_comma_limit=False`` so the caller can + apply bracket-array wrapping before the final limit check. Returns ------- @@ -279,7 +277,7 @@ def _parse_query_string_values(value: str, options: DecodeOptions) -> t.Dict[str * Decode key/value via ``options.decoder`` (default: percent-decoding using the selected ``charset``). Keys are passed with ``kind=DecodeKind.KEY`` and values with ``kind=DecodeKind.VALUE``; a custom decoder may return the raw token or ``None``. - * Apply comma-split list logic to values (handled here). Index-based list growth from bracket segments is applied later in ``_parse_object``. When ``list_limit < 0`` and ``raise_on_limit_exceeded=True``, any comma-split that would increase the list length raises immediately; otherwise the split proceeds. + * Apply comma-split list logic to values (handled here). Index-based list growth from bracket segments is applied later in ``_parse_object``. When ``list_limit < 0``, comma-split values always exceed the limit: they raise under ``raise_on_limit_exceeded=True`` and degrade to overflow dictionaries otherwise. * Interpret numeric entities for Latin-1 when requested. * Handle empty brackets ``[]`` as list markers (wrapping exactly once). * Merge duplicate keys according to ``duplicates`` policy. diff --git a/tests/unit/decode_test.py b/tests/unit/decode_test.py index 8c14d12..3682f02 100644 --- a/tests/unit/decode_test.py +++ b/tests/unit/decode_test.py @@ -1558,6 +1558,18 @@ def test_comma_list_over_limit_converts_to_overflow_dict(self) -> None: assert isinstance(result["a"], OverflowDict) assert result == {"a": {"0": "1", "1": "2", "2": "3", "3": "4"}} + def test_comma_list_negative_limit_converts_to_overflow_dict(self) -> None: + result = decode("a=1,2", DecodeOptions(comma=True, list_limit=-1)) + + assert isinstance(result["a"], OverflowDict) + assert result == {"a": {"0": "1", "1": "2"}} + + def test_bracket_comma_list_negative_limit_converts_wrapped_value_to_overflow_dict(self) -> None: + result = decode("a[]=1,2", DecodeOptions(comma=True, list_limit=-1)) + + assert isinstance(result["a"], OverflowDict) + assert result == {"a": {"0": ["1", "2"]}} + @pytest.mark.parametrize( "query, options, expected", [ From 77d7c0abdb24930ffeca27c514224a7b3f41c55f Mon Sep 17 00:00:00 2001 From: Klemen Tusar Date: Sat, 23 May 2026 17:58:03 +0100 Subject: [PATCH 12/14] :memo: clarify bracket comma list limit docs --- src/qs_codec/models/decode_options.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/qs_codec/models/decode_options.py b/src/qs_codec/models/decode_options.py index 97255a3..871273b 100644 --- a/src/qs_codec/models/decode_options.py +++ b/src/qs_codec/models/decode_options.py @@ -47,8 +47,10 @@ class DecodeOptions: limit, index ``19`` is the last index that can create a list; index ``20`` already overflows to a ``dict``. - This limit also applies to comma–split lists when ``comma=True``. Set a larger value if - you explicitly need more items, or set a smaller one to harden against abuse. + This limit also applies to decoded list growth from comma-split values when ``comma=True``. + For bracket-array assignments such as ``foo[]=1,2,3``, the comma-split payload is wrapped + as a single outer list element, so the inner payload may contain more values than + ``list_limit`` while still respecting the outer container limit. """ charset: Charset = Charset.UTF8 From c6c6e44330300186976c68921559a762683b9297 Mon Sep 17 00:00:00 2001 From: Klemen Tusar Date: Sat, 23 May 2026 18:00:17 +0100 Subject: [PATCH 13/14] :bug: avoid aliasing strict merge targets --- src/qs_codec/utils/utils.py | 2 +- tests/unit/utils_test.py | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/qs_codec/utils/utils.py b/src/qs_codec/utils/utils.py index 4f2c78a..1dd113e 100644 --- a/src/qs_codec/utils/utils.py +++ b/src/qs_codec/utils/utils.py @@ -212,7 +212,7 @@ def merge( if frame.options.strict_merge: stack.pop() - last_result = [current_target, current_source] + last_result = [dict(current_target), current_source] continue if isinstance(current_source, str): diff --git a/tests/unit/utils_test.py b/tests/unit/utils_test.py index 2d91bee..279aa63 100644 --- a/tests/unit/utils_test.py +++ b/tests/unit/utils_test.py @@ -608,6 +608,9 @@ def test_merge_mapping_target_with_scalar_source_wraps_with_strict_merge(self) - result = Utils.merge(target, source) # type: ignore[arg-type] assert result == [{"a": "b"}, "scalar"] + assert target == {"a": "b"} + assert isinstance(result, list) + assert result[0] is not target def test_merge_mapping_target_with_scalar_source_uses_legacy_strict_merge_false(self) -> None: target = {"a": "b"} @@ -616,6 +619,8 @@ def test_merge_mapping_target_with_scalar_source_uses_legacy_strict_merge_false( result = Utils.merge(target, source, DecodeOptions(strict_merge=False)) # type: ignore[arg-type] assert result == {"a": "b", "scalar": True} + assert target == {"a": "b"} + assert result is not target def test_merge_structured_lists_prefers_source_when_target_slot_is_undefined(self) -> None: options = DecodeOptions() From 47ca334f41d36db3541c84633a65d7f9bd9cc42c Mon Sep 17 00:00:00 2001 From: Klemen Tusar Date: Sat, 23 May 2026 21:47:42 +0100 Subject: [PATCH 14/14] :bug: copy overflow append values --- src/qs_codec/utils/utils.py | 10 +++++++++- tests/unit/utils_test.py | 3 +++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/qs_codec/utils/utils.py b/src/qs_codec/utils/utils.py index 1dd113e..54005f4 100644 --- a/src/qs_codec/utils/utils.py +++ b/src/qs_codec/utils/utils.py @@ -17,6 +17,7 @@ - Several routines use an object-identity `visited` set to avoid infinite recursion when user inputs contain cycles. """ +import copy import typing as t from collections import deque from collections.abc import Mapping as ABCMapping @@ -46,6 +47,13 @@ def _numeric_key_pairs(mapping: t.Mapping[t.Any, t.Any]) -> t.List[t.Tuple[int, return pairs +def _copy_overflow_append_value(value: t.Any) -> t.Any: + """Copy container values before storing them in an overflow append slot.""" + if isinstance(value, (ABCMapping, list, tuple)): + return copy.copy(value) + return value + + @dataclass class _MergeFrame: target: t.Any @@ -545,7 +553,7 @@ def combine( idx = (max(key for key, _ in key_pairs) + 1) if key_pairs else 0 if not isinstance(b, Undefined): - a_copy[str(idx)] = b + a_copy[str(idx)] = _copy_overflow_append_value(b) return a_copy # Normal combination: flatten lists/tuples diff --git a/tests/unit/utils_test.py b/tests/unit/utils_test.py index 279aa63..ef3011b 100644 --- a/tests/unit/utils_test.py +++ b/tests/unit/utils_test.py @@ -1147,6 +1147,7 @@ def test_combine_overflow_dict_appends_list_as_single_value(self) -> None: result = Utils.combine(a, b) assert isinstance(result, OverflowDict) assert result == {"0": "x", "1": ["y", Undefined(), "z"]} + assert result["1"] is not b def test_combine_skips_undefined_in_list_flattening(self) -> None: a = ["x", Undefined()] @@ -1176,6 +1177,8 @@ def test_combine_overflow_dict_appends_overflow_source_as_single_value(self) -> result = Utils.combine(a, b) assert isinstance(result, OverflowDict) assert result == {"0": "x", "1": {"foo": "bar", "1": "y", "0": "z"}} + assert isinstance(result["1"], OverflowDict) + assert result["1"] is not b def test_merge_overflow_dict_source_preserves_non_numeric_keys(self) -> None: target = "a"