diff --git a/CHANGELOG.md b/CHANGELOG.md index 039192c..8f82885 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ ## 1.5.1-dev +* [FEAT] add `DecodeOptions.strict_merge` for Node `qs` 6.15 `strictMerge` parity +* [FIX] align `decode` `list_limit` semantics with Node `qs` `arrayLimit` as a maximum element count +* [FIX] combine bracket-array duplicate assignments regardless of `DecodeOptions.duplicates` * [FIX] align `decode` with Node `qs` 6.15.2 by normalizing dotted keys before preserving `depth=0` input * [FIX] align `encode` with Node `qs` 6.15.2 by using the configured delimiter after `charset_sentinel` diff --git a/docs/README.rst b/docs/README.rst index 8c6f3bc..bd28488 100644 --- a/docs/README.rst +++ b/docs/README.rst @@ -196,6 +196,38 @@ change the behavior when duplicate keys are encountered qs.DecodeOptions(duplicates=qs.Duplicates.LAST), ) == {'foo': 'baz'} +Bracket-array keys always combine, regardless of the duplicate strategy: + +.. code:: python + + import qs_codec as qs + + assert qs.decode( + 'a=1&a=2&b[]=1&b[]=2', + qs.DecodeOptions(duplicates=qs.Duplicates.LAST), + ) == {'a': '2', 'b': ['1', '2']} + +When a key appears as both an object and a scalar, +:py:attr:`strict_merge ` wraps the conflicting values in a +``list`` by default: + +.. code:: python + + import qs_codec as qs + + assert qs.decode('a[b]=c&a=d') == {'a': [{'b': 'c'}, 'd']} + +Set ``strict_merge`` to ``False`` to restore the legacy behavior, where non-empty string scalars become object keys: + +.. code:: python + + import qs_codec as qs + + assert qs.decode( + 'a[b]=c&a=d', + qs.DecodeOptions(strict_merge=False), + ) == {'a': {'b': 'c', 'd': True}} + If you have to deal with legacy browsers or services, there’s also support for decoding percent-encoded octets as :py:attr:`LATIN1 `: @@ -310,11 +342,11 @@ Note that an empty ``str``\ing is also a value and will be preserved: assert qs.decode('a[0]=b&a[1]=&a[2]=c') == {'a': ['b', '', 'c']} :py:attr:`decode ` will also limit specifying indices -in a ``list`` to a maximum index of ``20``. Any ``list`` members with an -index of greater than ``20`` will instead be converted to a ``dict`` with -the index as the key. This is needed to handle cases when someone sent, -for example, ``a[999999999]`` and it will take significant time to iterate -over this huge ``list``. +in a ``list`` to a maximum element count of ``20``. Index ``19`` is the +last index that can create a default ``list``; index ``20`` and higher +are converted to a ``dict`` with the index as the key. This is needed to +handle cases when someone sent, for example, ``a[999999999]`` and it +would take significant time to iterate over this huge ``list``. .. code:: python diff --git a/src/qs_codec/decode.py b/src/qs_codec/decode.py index a462e7d..4852acc 100644 --- a/src/qs_codec/decode.py +++ b/src/qs_codec/decode.py @@ -24,13 +24,17 @@ from .enums.duplicates import Duplicates from .enums.sentinel import Sentinel from .models.decode_options import DecodeOptions -from .models.overflow_dict import OverflowDict +from .models.overflow_dict import CommaOverflowDict, OverflowDict from .models.structured_key_scan import StructuredKeyScan from .models.undefined import UNDEFINED from .utils.decode_utils import DecodeUtils from .utils.utils import Utils +def _list_limit_exceeded_message(limit: int) -> str: + return f"List limit exceeded: Only {limit} element{'' if limit == 1 else 's'} allowed in a list." + + def decode( value: t.Optional[t.Union[str, Mapping[str, t.Any]]], options: t.Optional[DecodeOptions] = None, @@ -86,9 +90,7 @@ def decode( parse_lists_effective = False if decode_from_string: - temp_obj: t.Optional[t.Dict[str, t.Any]] = _parse_query_string_values( - str_value, opts, parse_lists=parse_lists_effective - ) + temp_obj: t.Optional[t.Dict[str, t.Any]] = _parse_query_string_values(str_value, opts) else: temp_obj = dict(mapping_value) if not temp_obj: @@ -221,20 +223,26 @@ def _interpret_numeric_entities(value: str) -> str: return re.sub(r"&#(\d+);", lambda match: chr(int(match.group(1))), value) -def _parse_array_value(value: t.Any, options: DecodeOptions, current_list_length: int) -> t.Any: +def _parse_array_value( + value: t.Any, + options: DecodeOptions, + current_list_length: int, + *, + enforce_comma_limit: bool = True, +) -> t.Any: """Post-process a raw scalar for list semantics and enforce ``list_limit``. Behavior -------- - If ``comma=True`` and ``value`` is a string that contains commas, split into a list. + When ``enforce_comma_limit`` is ``True``, over-limit comma values raise or degrade to an ``OverflowDict`` here. + Raw query-string parsing passes ``False`` so the caller can account for bracket-array key context first. - Otherwise, enforce the per-list length limit by comparing ``current_list_length`` to ``options.list_limit``. When ``raise_on_limit_exceeded=True``, violations raise ``ValueError``. - - When ``list_limit`` is negative: - * if ``raise_on_limit_exceeded=True``, **any** list-growth operation here (e.g., comma-splitting) - raises immediately; - * if ``raise_on_limit_exceeded=False`` (default), comma-splitting still returns a list; numeric - bracket indices are handled later by ``_parse_object`` (where negative ``list_limit`` disables - numeric-index parsing only). + - When ``list_limit`` is negative, any non-empty comma split exceeds the limit: raising mode raises, + while non-raising mode degrades to an ``OverflowDict``/``CommaOverflowDict``. Raw query-string + parsing temporarily returns the split list when ``enforce_comma_limit=False`` so the caller can + apply bracket-array wrapping before the final limit check. Returns ------- @@ -243,23 +251,19 @@ def _parse_array_value(value: t.Any, options: DecodeOptions, current_list_length """ if isinstance(value, str) and value and options.comma and "," in value: split_val: t.List[str] = value.split(",") - if options.raise_on_limit_exceeded and len(split_val) > options.list_limit: - raise ValueError( - f"List limit exceeded: Only {options.list_limit} element{'' if options.list_limit == 1 else 's'} allowed in a list." - ) + if enforce_comma_limit and len(split_val) > options.list_limit: + if options.raise_on_limit_exceeded: + raise ValueError(_list_limit_exceeded_message(options.list_limit)) + return CommaOverflowDict({str(i): item for i, item in enumerate(split_val)}) return split_val if options.raise_on_limit_exceeded and current_list_length >= options.list_limit: - raise ValueError( - f"List limit exceeded: Only {options.list_limit} element{'' if options.list_limit == 1 else 's'} allowed in a list." - ) + raise ValueError(_list_limit_exceeded_message(options.list_limit)) return value -def _parse_query_string_values( - value: str, options: DecodeOptions, *, parse_lists: t.Optional[bool] = None -) -> t.Dict[str, t.Any]: +def _parse_query_string_values(value: str, options: DecodeOptions) -> t.Dict[str, t.Any]: """Tokenize a raw query string into a flat ``Dict[str, Any]``. Responsibilities @@ -273,7 +277,7 @@ def _parse_query_string_values( * Decode key/value via ``options.decoder`` (default: percent-decoding using the selected ``charset``). Keys are passed with ``kind=DecodeKind.KEY`` and values with ``kind=DecodeKind.VALUE``; a custom decoder may return the raw token or ``None``. - * Apply comma-split list logic to values (handled here). Index-based list growth from bracket segments is applied later in ``_parse_object``. When ``list_limit < 0`` and ``raise_on_limit_exceeded=True``, any comma-split that would increase the list length raises immediately; otherwise the split proceeds. + * Apply comma-split list logic to values (handled here). Index-based list growth from bracket segments is applied later in ``_parse_object``. When ``list_limit < 0``, comma-split values always exceed the limit: they raise under ``raise_on_limit_exceeded=True`` and degrade to overflow dictionaries otherwise. * Interpret numeric entities for Latin-1 when requested. * Handle empty brackets ``[]`` as list markers (wrapping exactly once). * Merge duplicate keys according to ``duplicates`` policy. @@ -282,7 +286,6 @@ def _parse_query_string_values( ``_parse_keys`` / ``_parse_object``. """ obj: t.Dict[str, t.Any] = {} - parse_lists_enabled = options.parse_lists if parse_lists is None else parse_lists clean_str: str = value.replace("?", "", 1) if options.ignore_query_prefix else value # Normalize %5B/%5D to literal brackets before splitting (case-insensitive). @@ -354,9 +357,11 @@ def _parse_query_string_values( continue bracket_equals_pos: int = part.find("]=") pos: int = part.find("=") if bracket_equals_pos == -1 else (bracket_equals_pos + 1) + bracket_array_assignment = pos != -1 and "[]=" in part # Decode key and value with a key-aware decoder; skip pairs whose key decodes to None raw_key = "" + list_limit_exceeded = False if pos == -1: key_decoded = decoder_fn(part, charset, kind=DecodeKind.KEY) if key_decoded is None: @@ -377,7 +382,9 @@ def _parse_query_string_values( part[pos + 1 :], options, len(obj[key]) if key in obj and isinstance(obj[key], (list, tuple)) else 0, + enforce_comma_limit=False, ) + list_limit_exceeded = isinstance(parsed_value, (list, tuple)) and len(parsed_value) > options.list_limit if isinstance(parsed_value, (list, tuple)): val = [decoder_fn(v, charset, kind=DecodeKind.VALUE) for v in parsed_value] else: @@ -390,15 +397,21 @@ def _parse_query_string_values( # Upstream parity: if token contains "[]=", only wrap values that are already arrays # (typically produced by comma splitting), preserving list-of-lists semantics. - if parse_lists_enabled and pos != -1 and "[]=" in part and isinstance(val, (list, tuple)): + if bracket_array_assignment and isinstance(val, (list, tuple)): val = [val] + list_limit_exceeded = len(val) > options.list_limit + if list_limit_exceeded and isinstance(val, (list, tuple)): + if options.raise_on_limit_exceeded: + raise ValueError(_list_limit_exceeded_message(options.list_limit)) + val = CommaOverflowDict({str(i): item for i, item in enumerate(val)}) existing: bool = key in obj + part_duplicates = Duplicates.COMBINE if bracket_array_assignment else duplicates # Combine/overwrite according to the configured duplicates policy. - if existing and duplicates == Duplicates.COMBINE: + if existing and part_duplicates == Duplicates.COMBINE: obj[key] = Utils.combine(obj[key], val, options) - elif not existing or duplicates == Duplicates.LAST: + elif not existing or part_duplicates == Duplicates.LAST: obj[key] = val return obj @@ -518,10 +531,14 @@ def _parse_object( and root != decoded_root and str(index) == decoded_root and parse_lists_enabled - and index <= options.list_limit ): - obj = [UNDEFINED for _ in range(index + 1)] - obj[index] = leaf + if index < options.list_limit: + obj = [UNDEFINED for _ in range(index + 1)] + obj[index] = leaf + elif options.raise_on_limit_exceeded: + raise ValueError(_list_limit_exceeded_message(options.list_limit)) + else: + obj[decoded_root] = leaf else: # Preserve the literal decoded key for non-array roots (e.g. "[01]" -> "01"), # matching Node `qs` behavior for leading-zero numeric-like segments. diff --git a/src/qs_codec/models/decode_options.py b/src/qs_codec/models/decode_options.py index aa039f0..871273b 100644 --- a/src/qs_codec/models/decode_options.py +++ b/src/qs_codec/models/decode_options.py @@ -38,15 +38,19 @@ class DecodeOptions: """Set to ``True`` to allow empty ``list`` values inside ``dict``\\s in the encoded input.""" list_limit: int = 20 - """Maximum number of **indexed** items allowed in a single list (default: ``20``). + """Maximum number of items allowed in a single decoded list (default: ``20``). During decoding, keys like ``a[0]``, ``a[1]``, … are treated as list indices. If an - index exceeds this limit, the container is treated as a ``dict`` instead, with the - numeric index kept as a string key (e.g., ``{"999": "x"}``) to prevent creation of - massive sparse lists (e.g., ``a[999999999]``). - - This limit also applies to comma–split lists when ``comma=True``. Set a larger value if - you explicitly need more items, or set a smaller one to harden against abuse. + index is greater than or equal to this limit, the container is treated as a ``dict`` + instead, with the numeric index kept as a string key (e.g., ``{"999": "x"}``) to + prevent creation of massive sparse lists (e.g., ``a[999999999]``). With the default + limit, index ``19`` is the last index that can create a list; index ``20`` already + overflows to a ``dict``. + + This limit also applies to decoded list growth from comma-split values when ``comma=True``. + For bracket-array assignments such as ``foo[]=1,2,3``, the comma-split payload is wrapped + as a single outer list element, so the inner payload may contain more values than + ``list_limit`` while still respecting the outer container limit. """ charset: Charset = Charset.UTF8 @@ -140,6 +144,14 @@ class DecodeOptions: Prefer ``decoder`` which may optionally accept a ``kind`` argument. When both are supplied, ``decoder`` takes precedence (mirroring Kotlin/C#/Swift/Dart behavior).""" + strict_merge: bool = True + """Wrap object/scalar conflicts in a list. + + When ``True`` (default), input such as ``a[b]=c&a=d`` decodes to ``{"a": [{"b": "c"}, "d"]}``. + When ``False``, the decoder restores the legacy behavior and adds non-empty string scalars as object keys, + e.g. ``{"a": {"b": "c", "d": True}}``. + """ + def __post_init__(self) -> None: """Post-initialization.""" # Default `decode_dot_in_keys` first, then mirror into `allow_dots` when unspecified. diff --git a/src/qs_codec/models/overflow_dict.py b/src/qs_codec/models/overflow_dict.py index 96ca37a..46f2897 100644 --- a/src/qs_codec/models/overflow_dict.py +++ b/src/qs_codec/models/overflow_dict.py @@ -10,16 +10,20 @@ class OverflowDict(dict): def copy(self) -> "OverflowDict": """Return an OverflowDict copy to preserve the overflow marker.""" - return OverflowDict(super().copy()) + return self.__class__(super().copy()) def __copy__(self) -> "OverflowDict": """Return an OverflowDict copy to preserve the overflow marker.""" - return OverflowDict(super().copy()) + return self.__class__(super().copy()) def __deepcopy__(self, memo: dict[int, object]) -> "OverflowDict": """Return an OverflowDict deepcopy to preserve the overflow marker.""" - copied = OverflowDict() + copied = self.__class__() memo[id(self)] = copied for key, value in self.items(): copied[copy.deepcopy(key, memo)] = copy.deepcopy(value, memo) return copied + + +class CommaOverflowDict(OverflowDict): + """Overflow marker for comma-split values that exceeded `list_limit`.""" diff --git a/src/qs_codec/utils/utils.py b/src/qs_codec/utils/utils.py index 4e74987..54005f4 100644 --- a/src/qs_codec/utils/utils.py +++ b/src/qs_codec/utils/utils.py @@ -17,6 +17,7 @@ - Several routines use an object-identity `visited` set to avoid infinite recursion when user inputs contain cycles. """ +import copy import typing as t from collections import deque from collections.abc import Mapping as ABCMapping @@ -26,7 +27,7 @@ from enum import Enum from ..models.decode_options import DecodeOptions -from ..models.overflow_dict import OverflowDict +from ..models.overflow_dict import CommaOverflowDict, OverflowDict from ..models.undefined import Undefined @@ -46,6 +47,13 @@ def _numeric_key_pairs(mapping: t.Mapping[t.Any, t.Any]) -> t.List[t.Tuple[int, return pairs +def _copy_overflow_append_value(value: t.Any) -> t.Any: + """Copy container values before storing them in an overflow append slot.""" + if isinstance(value, (ABCMapping, list, tuple)): + return copy.copy(value) + return value + + @dataclass class _MergeFrame: target: t.Any @@ -205,8 +213,25 @@ def merge( last_result = new_target continue + if isinstance(current_source, Undefined) or current_source == "": + stack.pop() + last_result = current_target + continue + + if frame.options.strict_merge: + stack.pop() + last_result = [dict(current_target), current_source] + continue + + if isinstance(current_source, str): + new_target = dict(current_target) + new_target[current_source] = True + stack.pop() + last_result = new_target + continue + stack.pop() - last_result = current_target + last_result = [current_target, current_source] continue if not isinstance(current_target, (list, tuple)) and isinstance(current_source, (list, tuple)): @@ -520,30 +545,15 @@ def combine( list to :class:`OverflowDict`. """ if Utils.is_overflow(a): - # a is already an OverflowDict. Append b to a *copy* at the next numeric index. - # We assume sequential keys; len(a_copy) gives the next index. + # a is already an OverflowDict. Append b as one value at the next numeric index. orig_a = t.cast(OverflowDict, a) - a_copy = OverflowDict({k: v for k, v in orig_a.items() if not isinstance(v, Undefined)}) + a_copy = orig_a.__class__({k: v for k, v in orig_a.items() if not isinstance(v, Undefined)}) # Use max key + 1 to handle sparse dicts safely, rather than len(a) key_pairs = _numeric_key_pairs(a_copy) idx = (max(key for key, _ in key_pairs) + 1) if key_pairs else 0 - if isinstance(b, (list, tuple)): - for item in b: - if not isinstance(item, Undefined): - a_copy[str(idx)] = item - idx += 1 - elif Utils.is_overflow(b): - b = t.cast(OverflowDict, b) - # Iterate in numeric key order to preserve list semantics - for _, k in sorted(_numeric_key_pairs(b), key=lambda item: item[0]): - val = b[k] - if not isinstance(val, Undefined): - a_copy[str(idx)] = val - idx += 1 - else: - if not isinstance(b, Undefined): - a_copy[str(idx)] = b + if not isinstance(b, Undefined): + a_copy[str(idx)] = _copy_overflow_append_value(b) return a_copy # Normal combination: flatten lists/tuples @@ -556,6 +566,8 @@ def combine( # Flatten b, handling OverflowDict as a list source if isinstance(b, (list, tuple)): list_b = [x for x in b if not isinstance(x, Undefined)] + elif isinstance(b, CommaOverflowDict): + list_b = [b] elif Utils.is_overflow(b): b_of = t.cast(OverflowDict, b) list_b = [ diff --git a/tests/comparison/test_cases.json b/tests/comparison/test_cases.json index dcf30f8..d66e87c 100644 --- a/tests/comparison/test_cases.json +++ b/tests/comparison/test_cases.json @@ -60,6 +60,17 @@ }, "encoded": "a[b]=c" }, + { + "data": { + "a": [ + { + "b": "c" + }, + "d" + ] + }, + "encoded": "a[b]=c&a=d" + }, { "data": { "a": { @@ -91,6 +102,14 @@ }, "encoded": "a[0]=f" }, + { + "data": { + "a": { + "20": "f" + } + }, + "encoded": "a[20]=f" + }, { "data": { "a": [ @@ -369,4 +388,4 @@ }, "encoded": "posts[0][id]=1&posts[0][someId]=du761-8bc98&posts[0][text]=Lorem Ipsum Dolor&posts[0][user][firstname]=John&posts[0][user][lastname]=Doe&posts[0][user][age]=25&posts[0][relationships][tags][0][name]=super&posts[0][relationships][tags][1][name]=awesome&posts[1][id]=1&posts[1][someId]=pa813-7jx02&posts[1][text]=Lorem Ipsum Dolor&posts[1][user][firstname]=Mary&posts[1][user][lastname]=Doe&posts[1][user][age]=25&posts[1][relationships][tags][0][name]=super&posts[1][relationships][tags][1][name]=awesome&total=2" } -] \ No newline at end of file +] diff --git a/tests/unit/decode_options_test.py b/tests/unit/decode_options_test.py index dac1bb1..eb583ff 100644 --- a/tests/unit/decode_options_test.py +++ b/tests/unit/decode_options_test.py @@ -3,7 +3,7 @@ import pytest -from qs_codec import Charset, DecodeOptions +from qs_codec import Charset, DecodeOptions, Duplicates from qs_codec.enums.decode_kind import DecodeKind from qs_codec.utils.decode_utils import DecodeUtils @@ -13,6 +13,26 @@ def test_defaults_normalize(self) -> None: opts = DecodeOptions() assert opts.decode_dot_in_keys is False assert opts.allow_dots is False + assert opts.strict_merge is True + + def test_strict_merge_append_preserves_existing_positional_slots(self) -> None: + opts = DecodeOptions( + None, + None, + False, + 20, + Charset.UTF8, + False, + False, + "&", + 5, + 1000, + Duplicates.COMBINE, + True, + ) + + assert opts.ignore_query_prefix is True + assert opts.strict_merge is True def test_decode_dot_implies_allow_dots(self) -> None: opts = DecodeOptions(decode_dot_in_keys=True) diff --git a/tests/unit/decode_test.py b/tests/unit/decode_test.py index e29d0df..3682f02 100644 --- a/tests/unit/decode_test.py +++ b/tests/unit/decode_test.py @@ -406,9 +406,11 @@ def test_allows_to_specify_list_indices( @pytest.mark.parametrize( "query, options, expected", [ - pytest.param("a[20]=a", DecodeOptions(list_limit=20), {"a": ["a"]}, id="at-limit"), + pytest.param("a[19]=a", DecodeOptions(list_limit=20), {"a": ["a"]}, id="below-limit"), + pytest.param("a[20]=a", DecodeOptions(list_limit=20), {"a": {"20": "a"}}, id="at-limit"), pytest.param("a[21]=a", DecodeOptions(list_limit=20), {"a": {"21": "a"}}, id="above-limit"), - pytest.param("a[20]=a", None, {"a": ["a"]}, id="default-at-limit"), + pytest.param("a[19]=a", None, {"a": ["a"]}, id="default-below-limit"), + pytest.param("a[20]=a", None, {"a": {"20": "a"}}, id="default-at-limit"), pytest.param("a[21]=a", None, {"a": {"21": "a"}}, id="default-above-limit"), ], ) @@ -712,7 +714,7 @@ def test_allows_setting_the_parameter_limit_to_infinity(self) -> None: "query, options, expected", [ pytest.param("a[0]=b", DecodeOptions(list_limit=-1), {"a": {"0": "b"}}, id="limit--1-single-index-0"), - pytest.param("a[0]=b", DecodeOptions(list_limit=0), {"a": ["b"]}, id="limit-0-single-index-0"), + pytest.param("a[0]=b", DecodeOptions(list_limit=0), {"a": {"0": "b"}}, id="limit-0-single-index-0"), pytest.param("a[-1]=b", DecodeOptions(list_limit=-1), {"a": {"-1": "b"}}, id="limit--1-negative-index"), pytest.param("a[-1]=b", DecodeOptions(list_limit=0), {"a": {"-1": "b"}}, id="limit-0-negative-index"), pytest.param( @@ -869,6 +871,30 @@ def _decoder(s: t.Optional[str], charset: t.Optional[Charset]) -> t.Any: {"foo": [["1", "2", "3"], "a"]}, id="string-second-list", ), + pytest.param( + "foo[]=1,2,3,4", + DecodeOptions(comma=True, list_limit=3), + {"foo": [["1", "2", "3", "4"]]}, + id="bracket-list-comma-value-can-exceed-list-limit", + ), + pytest.param( + "foo[]=1,2,3,4", + DecodeOptions(comma=True, list_limit=3, raise_on_limit_exceeded=True), + {"foo": [["1", "2", "3", "4"]]}, + id="bracket-list-comma-value-does-not-raise-when-over-limit", + ), + pytest.param( + "foo[]=1,2,3,4", + DecodeOptions(comma=True, list_limit=0), + {"foo": {"0": ["1", "2", "3", "4"]}}, + id="bracket-list-comma-value-over-zero-limit-becomes-dict", + ), + pytest.param( + "foo[]=1,2,3,4", + DecodeOptions(comma=True, list_limit=3, parse_lists=False), + {"foo": {"0": [["1", "2", "3", "4"]]}}, + id="bracket-list-comma-value-keeps-nesting-when-list-parsing-disabled", + ), pytest.param( "a[b]=x,y[]=z", DecodeOptions(comma=True), @@ -1197,6 +1223,88 @@ def test_duplicates_option( result = decode(query) if options is None else decode(query, options) assert result == expected + @pytest.mark.parametrize( + "query, options, expected", + [ + pytest.param( + "b[]=1&b[]=2", + DecodeOptions(duplicates=Duplicates.LAST), + {"b": ["1", "2"]}, + id="last-bracket-array-combines", + ), + pytest.param( + "b[]=1&b[]=2", + DecodeOptions(duplicates=Duplicates.FIRST), + {"b": ["1", "2"]}, + id="first-bracket-array-combines", + ), + pytest.param( + "a=1&a=2&b[]=1&b[]=2", + DecodeOptions(duplicates=Duplicates.LAST), + {"a": "2", "b": ["1", "2"]}, + id="last-preserves-flat-policy", + ), + pytest.param( + "a=1&a=2&b[]=1&b[]=2", + DecodeOptions(duplicates=Duplicates.FIRST), + {"a": "1", "b": ["1", "2"]}, + id="first-preserves-flat-policy", + ), + pytest.param( + "b%5B%5D=1&b%5B%5D=2", + DecodeOptions(duplicates=Duplicates.LAST), + {"b": ["1", "2"]}, + id="encoded-bracket-array-combines", + ), + ], + ) + def test_bracket_notation_always_combines_regardless_of_duplicates( + self, query: str, options: DecodeOptions, expected: t.Mapping[str, t.Any] + ) -> None: + assert decode(query, options) == expected + + +class TestStrictMergeOption: + @pytest.mark.parametrize( + "query, options, expected", + [ + pytest.param("a[b]=c&a=d", DecodeOptions(), {"a": [{"b": "c"}, "d"]}, id="object-then-scalar"), + pytest.param("a=d&a[b]=c", DecodeOptions(), {"a": ["d", {"b": "c"}]}, id="scalar-then-object"), + pytest.param( + "a[b]=c&a=d", + DecodeOptions(strict_merge=False), + {"a": {"b": "c", "d": True}}, + id="legacy-object-then-scalar", + ), + pytest.param( + "a[b]=c&a=", + DecodeOptions(strict_merge=False), + {"a": {"b": "c"}}, + id="legacy-ignores-empty-assigned-scalar", + ), + pytest.param( + "a[b]=c&a", + DecodeOptions(strict_merge=False), + {"a": {"b": "c"}}, + id="legacy-ignores-empty-missing-value-scalar", + ), + pytest.param( + "a[b]=c&a=", + DecodeOptions(), + {"a": {"b": "c"}}, + id="default-ignores-empty-assigned-scalar", + ), + pytest.param( + "a[b]=c&a", + DecodeOptions(strict_null_handling=True), + {"a": {"b": "c"}}, + id="default-ignores-strict-null-scalar", + ), + ], + ) + def test_strict_merge(self, query: str, options: DecodeOptions, expected: t.Mapping[str, t.Any]) -> None: + assert decode(query, options) == expected + class TestStrictDepthOption: @pytest.mark.parametrize( @@ -1405,6 +1513,34 @@ def test_current_list_length_calculation(self) -> None: True, id="comma-separated-list-exceed-limit", ), + pytest.param( + "foo[]=1,2", + DecodeOptions(list_limit=0, raise_on_limit_exceeded=True, comma=True), + None, + True, + id="bracket-comma-list-zero-limit-raise", + ), + pytest.param( + "foo[]=1,2", + DecodeOptions(list_limit=-1, raise_on_limit_exceeded=True, comma=True), + None, + True, + id="bracket-comma-list-negative-limit-raise", + ), + pytest.param( + "a[1001]=b", + DecodeOptions(list_limit=1000, raise_on_limit_exceeded=True), + None, + True, + id="indexed-notation-exceeds-list-limit-raise", + ), + pytest.param( + "a[0]=1&a[1]=2&a[2]=3&a[10]=4", + DecodeOptions(list_limit=6, raise_on_limit_exceeded=True), + None, + True, + id="sparse-index-exceeds-list-limit-raise", + ), ], ) def test_list_limit( @@ -1416,6 +1552,72 @@ def test_list_limit( else: assert decode(query, options) == expected + def test_comma_list_over_limit_converts_to_overflow_dict(self) -> None: + result = decode("a=1,2,3,4", DecodeOptions(comma=True, list_limit=3)) + + assert isinstance(result["a"], OverflowDict) + assert result == {"a": {"0": "1", "1": "2", "2": "3", "3": "4"}} + + def test_comma_list_negative_limit_converts_to_overflow_dict(self) -> None: + result = decode("a=1,2", DecodeOptions(comma=True, list_limit=-1)) + + assert isinstance(result["a"], OverflowDict) + assert result == {"a": {"0": "1", "1": "2"}} + + def test_bracket_comma_list_negative_limit_converts_wrapped_value_to_overflow_dict(self) -> None: + result = decode("a[]=1,2", DecodeOptions(comma=True, list_limit=-1)) + + assert isinstance(result["a"], OverflowDict) + assert result == {"a": {"0": ["1", "2"]}} + + @pytest.mark.parametrize( + "query, options, expected", + [ + pytest.param( + "a=5&a=1,2,3,4", + DecodeOptions(comma=True, list_limit=3), + {"a": ["5", {"0": "1", "1": "2", "2": "3", "3": "4"}]}, + id="scalar-then-overflow-comma-list", + ), + pytest.param( + "a=5&a=1,2,3,4", + DecodeOptions(comma=True, list_limit=1), + {"a": {"0": "5", "1": {"0": "1", "1": "2", "2": "3", "3": "4"}}}, + id="scalar-then-overflow-comma-list-over-combined-limit", + ), + pytest.param( + "a=1,2,3,4&a=5,6", + DecodeOptions(comma=True, list_limit=3), + {"a": {"0": "1", "1": "2", "2": "3", "3": "4", "4": ["5", "6"]}}, + id="overflow-comma-list-then-in-limit-comma-list", + ), + pytest.param( + "a=1,2,3,4&a=5,6,7,8", + DecodeOptions(comma=True, list_limit=3), + { + "a": { + "0": "1", + "1": "2", + "2": "3", + "3": "4", + "4": {"0": "5", "1": "6", "2": "7", "3": "8"}, + } + }, + id="overflow-comma-list-then-overflow-comma-list", + ), + pytest.param( + "a[]=1&a[]=2&a[]=3,4", + DecodeOptions(comma=True, list_limit=1), + {"a": {"0": "1", "1": "2", "2": [["3", "4"]]}}, + id="bracket-overflow-then-comma-list", + ), + ], + ) + def test_comma_overflow_duplicates_keep_overflow_values_nested( + self, query: str, options: DecodeOptions, expected: t.Mapping[str, t.Any] + ) -> None: + assert decode(query, options) == expected + # --- Additional tests for decoder kind and parser state isolation --- @@ -1595,7 +1797,7 @@ class TestDecodeMixedBypassParity: "query, options, expected", [ pytest.param("a=1&a[b]=2", None, {"a": ["1", {"b": "2"}]}, id="flat-before-structured"), - pytest.param("a[b]=2&a=1", None, {"a": {"b": "2"}}, id="structured-before-flat"), + pytest.param("a[b]=2&a=1", None, {"a": [{"b": "2"}, "1"]}, id="structured-before-flat"), pytest.param("0=y&[]=x", None, {"0": "x"}, id="flat-zero-collides-leading-bracket-root"), pytest.param("[]=x&0=y", None, {"0": ["x", "y"]}, id="leading-bracket-root-collides-flat-zero"), pytest.param( diff --git a/tests/unit/example_test.py b/tests/unit/example_test.py index 1077b0f..b27ccc7 100644 --- a/tests/unit/example_test.py +++ b/tests/unit/example_test.py @@ -79,6 +79,14 @@ def test_dictionaries(self): assert qs_codec.decode("foo=bar&foo=baz", qs_codec.DecodeOptions(duplicates=qs_codec.Duplicates.LAST)) == { "foo": "baz" } + assert qs_codec.decode( + "a=1&a=2&b[]=1&b[]=2", + qs_codec.DecodeOptions(duplicates=qs_codec.Duplicates.LAST), + ) == {"a": "2", "b": ["1", "2"]} + + # When a key appears as both an object and a scalar, `DecodeOptions.strict_merge` wraps the conflict in a list. + assert qs_codec.decode("a[b]=c&a=d") == {"a": [{"b": "c"}, "d"]} + assert qs_codec.decode("a[b]=c&a=d", qs_codec.DecodeOptions(strict_merge=False)) == {"a": {"b": "c", "d": True}} # If you have to deal with legacy browsers or services, there's also support for decoding percent-encoded octets # as `Charset.LATIN1`: @@ -126,10 +134,10 @@ def test_lists(self): assert qs_codec.decode("a[]=&a[]=b") == {"a": ["", "b"]} assert qs_codec.decode("a[0]=b&a[1]=&a[2]=c") == {"a": ["b", "", "c"]} - # **qs_codec** will also limit specifying indices in a `list` to a maximum index of `20`. - # Any `list` members with an index of greater than `20` will instead be converted to a `dict` with the index as - # the key. This is needed to handle cases when someone sent, for example, `a[999999999]` and it will take - # significant time to iterate over this huge `list`. + # **qs_codec** will also limit specifying indices in a `list` to a maximum element count of `20`. + # Index `19` is the last index that can create a default `list`; index `20` and higher are converted to a + # `dict` with the index as the key. This is needed to handle cases when someone sent, for example, + # `a[999999999]` and it will take significant time to iterate over this huge `list`. assert qs_codec.decode("a[100]=b") == {"a": {"100": "b"}} # This limit can be overridden by passing an `DecodeOptions.list_limit` option: diff --git a/tests/unit/utils_test.py b/tests/unit/utils_test.py index b203417..ef3011b 100644 --- a/tests/unit/utils_test.py +++ b/tests/unit/utils_test.py @@ -601,14 +601,26 @@ def test_merges_array_into_object(self) -> None: {"foo": ["xyzzy"]}, ) == {"foo": {"bar": "baz", "0": "xyzzy"}} - def test_merge_mapping_target_with_scalar_source_returns_target_unchanged(self) -> None: + def test_merge_mapping_target_with_scalar_source_wraps_with_strict_merge(self) -> None: target = {"a": "b"} source = "scalar" result = Utils.merge(target, source) # type: ignore[arg-type] - assert result == {"a": "b"} - assert result is target + assert result == [{"a": "b"}, "scalar"] + assert target == {"a": "b"} + assert isinstance(result, list) + assert result[0] is not target + + def test_merge_mapping_target_with_scalar_source_uses_legacy_strict_merge_false(self) -> None: + target = {"a": "b"} + source = "scalar" + + result = Utils.merge(target, source, DecodeOptions(strict_merge=False)) # type: ignore[arg-type] + + assert result == {"a": "b", "scalar": True} + assert target == {"a": "b"} + assert result is not target def test_merge_structured_lists_prefers_source_when_target_slot_is_undefined(self) -> None: options = DecodeOptions() @@ -755,7 +767,7 @@ def test_combine_overflow_dict_with_overflow_dict(self) -> None: combined = Utils.combine(a, b) assert isinstance(combined, OverflowDict) assert combined["0"] == "x" - assert combined["1"] == "y" + assert combined["1"] == {"0": "y"} assert len(combined) == 2 def test_compact_removes_undefined_entries_and_avoids_cycles(self) -> None: @@ -1129,12 +1141,13 @@ def test_combine_list_with_overflow_dict(self) -> None: result = Utils.combine(a, b) assert result == ["start", "x", "y"] - def test_combine_skips_undefined_in_overflow_dict_append(self) -> None: + def test_combine_overflow_dict_appends_list_as_single_value(self) -> None: a = OverflowDict({"0": "x"}) b = ["y", Undefined(), "z"] result = Utils.combine(a, b) assert isinstance(result, OverflowDict) - assert result == {"0": "x", "1": "y", "2": "z"} + assert result == {"0": "x", "1": ["y", Undefined(), "z"]} + assert result["1"] is not b def test_combine_skips_undefined_in_list_flattening(self) -> None: a = ["x", Undefined()] @@ -1158,13 +1171,14 @@ def test_combine_overflow_dict_skips_existing_undefined_and_ignores_non_numeric_ assert result["skip"] == "keep" assert "1" in a # Original should remain unchanged - def test_combine_overflow_dict_source_skips_non_numeric_keys(self) -> None: + def test_combine_overflow_dict_appends_overflow_source_as_single_value(self) -> None: a = OverflowDict({"0": "x"}) b = OverflowDict({"foo": "bar", "1": "y", "0": "z"}) result = Utils.combine(a, b) assert isinstance(result, OverflowDict) - assert result == {"0": "x", "1": "z", "2": "y"} - assert "foo" not in result + assert result == {"0": "x", "1": {"foo": "bar", "1": "y", "0": "z"}} + assert isinstance(result["1"], OverflowDict) + assert result["1"] is not b def test_merge_overflow_dict_source_preserves_non_numeric_keys(self) -> None: target = "a"