From e1984d89000cf1fea30f70439f8f30765d960088 Mon Sep 17 00:00:00 2001 From: Lie Ryan Date: Fri, 16 Dec 2022 20:18:55 +1100 Subject: [PATCH 01/31] Implement the start of rope.base.serializer --- rope/base/serializer.py | 67 +++++++++++++++++++++++++++++++++++++ ropetest/serializer_test.py | 22 ++++++++++++ 2 files changed, 89 insertions(+) create mode 100644 rope/base/serializer.py create mode 100644 ropetest/serializer_test.py diff --git a/rope/base/serializer.py b/rope/base/serializer.py new file mode 100644 index 000000000..1b6ca24ff --- /dev/null +++ b/rope/base/serializer.py @@ -0,0 +1,67 @@ +""" +This module serves to convert a data structure composed of Python primitives +(dict, list, tuple, int, str, None) to JSON-serializable primitives (object, +array, number, str, null). + +A core feature of this serializer is that the produced will round-trip to +identical objects when deserialized by the standard library json module. +In other words, this property always holds: + + >>> original_data = ... any JSON ... + >>> encoded = python_to_json(original_data) + >>> serialized = json.dumps(encoded) + >>> decoded = json.loads(serialized) + >>> rehydrated_data = json_to_python(decoded) + + >>> assert rehydrated_data == original_data + >>> assert encoded == decoded + +Couple challenges in straight serialization that this module helps resolve: + +- json.dumps() maps both Python list and tuple to JSON array. This module + converts Python list `[1, 2, 3]` to `["list", [1, 2, 3]]` and Python tuple + `(1, 2, 3)` to `["tuple", [1, 2, 3]]` + +- Python Dictionary keys can be a tuple, but JSON Object keys must be strings + This module replaces all `dict` keys with refid which can be resolved using + the `encoded["references"][refid]` lookup table. + +- There is currently no support for floating point numbers. + +Note that `json_to_python` only accepts Python objects that can be the output +of `python_to_json`, there is NO guarantee for going the other way around. This +may or may not work: + + >>> python_to_json(json_to_python(original_data)) == original_data + +""" + + +def python_to_json(o): + references = [] + return { + "v": 1, + "data": _py2js(o, references), + "references": references, + } + + +def json_to_python(o): + assert o["v"] == 1 + references = o["references"] + data = _js2py(o["data"], references) + return data + + +def _py2js(o, references): + assert not isinstance(o, list) + if isinstance(o, (str, int)): + return o + assert False, o + + +def _js2py(o, references): + assert not isinstance(o, tuple) + if isinstance(o, (str, int)): + return o + assert False diff --git a/ropetest/serializer_test.py b/ropetest/serializer_test.py new file mode 100644 index 000000000..0955c01b4 --- /dev/null +++ b/ropetest/serializer_test.py @@ -0,0 +1,22 @@ +import json + +import pytest + +from rope.base.serializer import python_to_json, json_to_python + + +@pytest.mark.parametrize( + "original_data", + [ + 4, + "3", + ], +) +def test_roundtrip(original_data): + encoded = python_to_json(original_data) + serialized = json.dumps(encoded) + decoded = json.loads(serialized) + rehydrated_data = json_to_python(decoded) + + assert encoded == decoded + assert rehydrated_data == original_data From b76b2452e9ea79787dede91d3938451012327e35 Mon Sep 17 00:00:00 2001 From: Lie Ryan Date: Fri, 16 Dec 2022 20:21:20 +1100 Subject: [PATCH 02/31] Implement None <-> null --- rope/base/serializer.py | 4 ++-- ropetest/serializer_test.py | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/rope/base/serializer.py b/rope/base/serializer.py index 1b6ca24ff..d2ab846ff 100644 --- a/rope/base/serializer.py +++ b/rope/base/serializer.py @@ -55,13 +55,13 @@ def json_to_python(o): def _py2js(o, references): assert not isinstance(o, list) - if isinstance(o, (str, int)): + if isinstance(o, (str, int)) or o is None: return o assert False, o def _js2py(o, references): assert not isinstance(o, tuple) - if isinstance(o, (str, int)): + if isinstance(o, (str, int)) or o is None: return o assert False diff --git a/ropetest/serializer_test.py b/ropetest/serializer_test.py index 0955c01b4..2aacabcbc 100644 --- a/ropetest/serializer_test.py +++ b/ropetest/serializer_test.py @@ -8,6 +8,7 @@ @pytest.mark.parametrize( "original_data", [ + None, 4, "3", ], From 81e2240f2f674c210818e5f5f50bb3f76bd79a31 Mon Sep 17 00:00:00 2001 From: Lie Ryan Date: Fri, 16 Dec 2022 20:30:03 +1100 Subject: [PATCH 03/31] Reject unrecognized objects --- rope/base/serializer.py | 4 ++-- ropetest/serializer_test.py | 12 ++++++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/rope/base/serializer.py b/rope/base/serializer.py index d2ab846ff..0d6294ebd 100644 --- a/rope/base/serializer.py +++ b/rope/base/serializer.py @@ -57,11 +57,11 @@ def _py2js(o, references): assert not isinstance(o, list) if isinstance(o, (str, int)) or o is None: return o - assert False, o + raise TypeError(f"Object of type {type(o)} is not allowed {o}") def _js2py(o, references): assert not isinstance(o, tuple) if isinstance(o, (str, int)) or o is None: return o - assert False + raise TypeError(f"Object of type {type(o)} is not allowed {o}") diff --git a/ropetest/serializer_test.py b/ropetest/serializer_test.py index 2aacabcbc..7415b679c 100644 --- a/ropetest/serializer_test.py +++ b/ropetest/serializer_test.py @@ -21,3 +21,15 @@ def test_roundtrip(original_data): assert encoded == decoded assert rehydrated_data == original_data + + +@pytest.mark.parametrize( + "original_data", + [ + object(), + 4.8, + ], +) +def test_rejects_unrecognized_object(original_data): + with pytest.raises(TypeError): + python_to_json(original_data) From b908b2a9eeb1c76cd5644d5aee93eb19363f9bb6 Mon Sep 17 00:00:00 2001 From: Lie Ryan Date: Fri, 16 Dec 2022 20:32:28 +1100 Subject: [PATCH 04/31] Implement tuple <-> array conversion --- rope/base/serializer.py | 4 ++++ ropetest/serializer_test.py | 3 +++ 2 files changed, 7 insertions(+) diff --git a/rope/base/serializer.py b/rope/base/serializer.py index 0d6294ebd..3d9c6cb32 100644 --- a/rope/base/serializer.py +++ b/rope/base/serializer.py @@ -57,6 +57,8 @@ def _py2js(o, references): assert not isinstance(o, list) if isinstance(o, (str, int)) or o is None: return o + elif isinstance(o, tuple): + return [_py2js(item, references) for item in o] raise TypeError(f"Object of type {type(o)} is not allowed {o}") @@ -64,4 +66,6 @@ def _js2py(o, references): assert not isinstance(o, tuple) if isinstance(o, (str, int)) or o is None: return o + elif isinstance(o, list): + return tuple(_js2py(item, references) for item in o) raise TypeError(f"Object of type {type(o)} is not allowed {o}") diff --git a/ropetest/serializer_test.py b/ropetest/serializer_test.py index 7415b679c..c7a79a7a5 100644 --- a/ropetest/serializer_test.py +++ b/ropetest/serializer_test.py @@ -11,6 +11,9 @@ None, 4, "3", + (), + ("hello",), + (1, 2, "hello"), ], ) def test_roundtrip(original_data): From 31108d4e7b73ae29e145f5cbe75c9b3aeefec8ae Mon Sep 17 00:00:00 2001 From: Lie Ryan Date: Fri, 16 Dec 2022 20:39:23 +1100 Subject: [PATCH 05/31] Implement tuple and list <-> array conversion --- rope/base/serializer.py | 12 +++++++++--- ropetest/serializer_test.py | 4 +++- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/rope/base/serializer.py b/rope/base/serializer.py index 3d9c6cb32..d50b188d0 100644 --- a/rope/base/serializer.py +++ b/rope/base/serializer.py @@ -54,11 +54,12 @@ def json_to_python(o): def _py2js(o, references): - assert not isinstance(o, list) if isinstance(o, (str, int)) or o is None: return o elif isinstance(o, tuple): - return [_py2js(item, references) for item in o] + return ["tuple", [_py2js(item, references) for item in o]] + elif isinstance(o, list): + return ["list", [_py2js(item, references) for item in o]] raise TypeError(f"Object of type {type(o)} is not allowed {o}") @@ -67,5 +68,10 @@ def _js2py(o, references): if isinstance(o, (str, int)) or o is None: return o elif isinstance(o, list): - return tuple(_js2py(item, references) for item in o) + typ, data = o + if typ == "tuple": + return tuple(_js2py(item, references) for item in data) + elif typ == "list": + return list(_js2py(item, references) for item in data) + assert False raise TypeError(f"Object of type {type(o)} is not allowed {o}") diff --git a/ropetest/serializer_test.py b/ropetest/serializer_test.py index c7a79a7a5..892fe5448 100644 --- a/ropetest/serializer_test.py +++ b/ropetest/serializer_test.py @@ -12,8 +12,10 @@ 4, "3", (), + [], ("hello",), - (1, 2, "hello"), + (1, [2], "hello"), + [1, [2], "hello"], ], ) def test_roundtrip(original_data): From 01372f7935d7476d8291360e0d27eea90ec18704 Mon Sep 17 00:00:00 2001 From: Lie Ryan Date: Fri, 16 Dec 2022 20:42:54 +1100 Subject: [PATCH 06/31] Implement dict with string keys <-> object conversion --- rope/base/serializer.py | 12 ++++++++++++ ropetest/serializer_test.py | 1 + 2 files changed, 13 insertions(+) diff --git a/rope/base/serializer.py b/rope/base/serializer.py index d50b188d0..ec31da5a7 100644 --- a/rope/base/serializer.py +++ b/rope/base/serializer.py @@ -60,6 +60,12 @@ def _py2js(o, references): return ["tuple", [_py2js(item, references) for item in o]] elif isinstance(o, list): return ["list", [_py2js(item, references) for item in o]] + elif isinstance(o, dict): + result = {} + for k, v in o.items(): + assert isinstance(k, str) + result[_py2js(k, references)] = _py2js(v, references) + return result raise TypeError(f"Object of type {type(o)} is not allowed {o}") @@ -74,4 +80,10 @@ def _js2py(o, references): elif typ == "list": return list(_js2py(item, references) for item in data) assert False + elif isinstance(o, dict): + result = {} + for k, v in o.items(): + assert isinstance(k, str) + result[_js2py(k, references)] = _js2py(v, references) + return result raise TypeError(f"Object of type {type(o)} is not allowed {o}") diff --git a/ropetest/serializer_test.py b/ropetest/serializer_test.py index 892fe5448..fa759c9cb 100644 --- a/ropetest/serializer_test.py +++ b/ropetest/serializer_test.py @@ -16,6 +16,7 @@ ("hello",), (1, [2], "hello"), [1, [2], "hello"], + {"hello": "world"}, ], ) def test_roundtrip(original_data): From 58558384cfe909eee6e3afbadd479ea7d6610f34 Mon Sep 17 00:00:00 2001 From: Lie Ryan Date: Fri, 16 Dec 2022 20:45:03 +1100 Subject: [PATCH 07/31] Implement dict with complex key <-> object conversion --- rope/base/serializer.py | 12 ++++++++---- ropetest/serializer_test.py | 2 ++ 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/rope/base/serializer.py b/rope/base/serializer.py index ec31da5a7..cdcbabfa2 100644 --- a/rope/base/serializer.py +++ b/rope/base/serializer.py @@ -63,8 +63,9 @@ def _py2js(o, references): elif isinstance(o, dict): result = {} for k, v in o.items(): - assert isinstance(k, str) - result[_py2js(k, references)] = _py2js(v, references) + refid = len(references) + references.append(_py2js(k, references)) + result[str(refid)] = _py2js(v, references) return result raise TypeError(f"Object of type {type(o)} is not allowed {o}") @@ -82,8 +83,11 @@ def _js2py(o, references): assert False elif isinstance(o, dict): result = {} - for k, v in o.items(): - assert isinstance(k, str) + for refid, v in o.items(): + assert isinstance(refid, str) + refid = int(refid) + assert 0 <= refid < len(references) + k = references[refid] result[_js2py(k, references)] = _js2py(v, references) return result raise TypeError(f"Object of type {type(o)} is not allowed {o}") diff --git a/ropetest/serializer_test.py b/ropetest/serializer_test.py index fa759c9cb..fa0511cd2 100644 --- a/ropetest/serializer_test.py +++ b/ropetest/serializer_test.py @@ -17,6 +17,8 @@ (1, [2], "hello"), [1, [2], "hello"], {"hello": "world"}, + {"hello": ("hello", 1)}, + {("hello", 1): "world"}, ], ) def test_roundtrip(original_data): From f2f65e554400a25cff83d370743c8235871147fe Mon Sep 17 00:00:00 2001 From: Lie Ryan Date: Fri, 16 Dec 2022 20:56:49 +1100 Subject: [PATCH 08/31] Add test for the shape of encoded data --- ropetest/serializer_test.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/ropetest/serializer_test.py b/ropetest/serializer_test.py index fa0511cd2..fc9ae8745 100644 --- a/ropetest/serializer_test.py +++ b/ropetest/serializer_test.py @@ -31,6 +31,32 @@ def test_roundtrip(original_data): assert rehydrated_data == original_data +@pytest.mark.parametrize( + "original_data,expected_encoded", + [ + (None, None), + (4, 4), + ("3", "3"), + ((), ["tuple", []]), + ([], ["list", []]), + (("hello",), ["tuple", ["hello",]]), + ((1, [2], "hello"), ["tuple", [1, ["list", [2]], "hello"]]), + ([1, [2], "hello"], ["list", [1, ["list", [2]], "hello"]]), + ], +) +def test_expected_encoded_simple(original_data, expected_encoded): + encoded = python_to_json(original_data) + serialized = json.dumps(encoded) + decoded = json.loads(serialized) + rehydrated_data = json_to_python(decoded) + + assert encoded == decoded + assert encoded["version"] == 1 + assert encoded["data"] == expected_encoded and encoded["references"] == [] + assert rehydrated_data == original_data + + + @pytest.mark.parametrize( "original_data", [ From 20f1a192f9be57c0cfb46e5d1c3a02f5644c3524 Mon Sep 17 00:00:00 2001 From: Lie Ryan Date: Fri, 16 Dec 2022 21:00:21 +1100 Subject: [PATCH 09/31] Use single letter type code, for compactness --- rope/base/serializer.py | 12 ++++++------ ropetest/serializer_test.py | 10 +++++----- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/rope/base/serializer.py b/rope/base/serializer.py index cdcbabfa2..413ba4a2c 100644 --- a/rope/base/serializer.py +++ b/rope/base/serializer.py @@ -19,8 +19,8 @@ Couple challenges in straight serialization that this module helps resolve: - json.dumps() maps both Python list and tuple to JSON array. This module - converts Python list `[1, 2, 3]` to `["list", [1, 2, 3]]` and Python tuple - `(1, 2, 3)` to `["tuple", [1, 2, 3]]` + converts Python list `[1, 2, 3]` to `["l", [1, 2, 3]]` and Python tuple + `(1, 2, 3)` to `["t", [1, 2, 3]]` - Python Dictionary keys can be a tuple, but JSON Object keys must be strings This module replaces all `dict` keys with refid which can be resolved using @@ -57,9 +57,9 @@ def _py2js(o, references): if isinstance(o, (str, int)) or o is None: return o elif isinstance(o, tuple): - return ["tuple", [_py2js(item, references) for item in o]] + return ["t", [_py2js(item, references) for item in o]] elif isinstance(o, list): - return ["list", [_py2js(item, references) for item in o]] + return ["l", [_py2js(item, references) for item in o]] elif isinstance(o, dict): result = {} for k, v in o.items(): @@ -76,9 +76,9 @@ def _js2py(o, references): return o elif isinstance(o, list): typ, data = o - if typ == "tuple": + if typ == "t": return tuple(_js2py(item, references) for item in data) - elif typ == "list": + elif typ == "l": return list(_js2py(item, references) for item in data) assert False elif isinstance(o, dict): diff --git a/ropetest/serializer_test.py b/ropetest/serializer_test.py index fc9ae8745..f7ad31617 100644 --- a/ropetest/serializer_test.py +++ b/ropetest/serializer_test.py @@ -37,11 +37,11 @@ def test_roundtrip(original_data): (None, None), (4, 4), ("3", "3"), - ((), ["tuple", []]), - ([], ["list", []]), - (("hello",), ["tuple", ["hello",]]), - ((1, [2], "hello"), ["tuple", [1, ["list", [2]], "hello"]]), - ([1, [2], "hello"], ["list", [1, ["list", [2]], "hello"]]), + ((), ["t", []]), + ([], ["l", []]), + (("hello",), ["t", ["hello",]]), + ((1, [2], "hello"), ["t", [1, ["l", [2]], "hello"]]), + ([1, [2], "hello"], ["l", [1, ["l", [2]], "hello"]]), ], ) def test_expected_encoded_simple(original_data, expected_encoded): From 1ac83e3ce7e42cc17916e756c3e054dd5605ebc6 Mon Sep 17 00:00:00 2001 From: Lie Ryan Date: Fri, 16 Dec 2022 21:01:53 +1100 Subject: [PATCH 10/31] Add test for version number --- ropetest/serializer_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ropetest/serializer_test.py b/ropetest/serializer_test.py index f7ad31617..43470c412 100644 --- a/ropetest/serializer_test.py +++ b/ropetest/serializer_test.py @@ -51,7 +51,7 @@ def test_expected_encoded_simple(original_data, expected_encoded): rehydrated_data = json_to_python(decoded) assert encoded == decoded - assert encoded["version"] == 1 + assert encoded["v"] == 1 assert encoded["data"] == expected_encoded and encoded["references"] == [] assert rehydrated_data == original_data From 90c636ded236c7ab6ade661f60506273add7028e Mon Sep 17 00:00:00 2001 From: Lie Ryan Date: Fri, 16 Dec 2022 21:05:38 +1100 Subject: [PATCH 11/31] Document the types that can be used for dict keys --- rope/base/serializer.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/rope/base/serializer.py b/rope/base/serializer.py index 413ba4a2c..1d8393153 100644 --- a/rope/base/serializer.py +++ b/rope/base/serializer.py @@ -22,9 +22,14 @@ converts Python list `[1, 2, 3]` to `["l", [1, 2, 3]]` and Python tuple `(1, 2, 3)` to `["t", [1, 2, 3]]` -- Python Dictionary keys can be a tuple, but JSON Object keys must be strings - This module replaces all `dict` keys with refid which can be resolved using - the `encoded["references"][refid]` lookup table. +- Python dict keys can be a tuple/dict, but JSON Object keys must be strings + This module replaces all `dict` keys with `refid` which can be resolved using + the `encoded["references"][refid]` lookup table. Except there's a small + optimisation, if the dict key is a string that isn't only numeric, which is + encoded directly into the object. + +- Python dict keys cannot be another dict because it is unhashable, therefore + there's no encoding for having objects as keys either. - There is currently no support for floating point numbers. From 781c30cc05669fc630165dfed61d29e45dc7e5e0 Mon Sep 17 00:00:00 2001 From: Lie Ryan Date: Fri, 16 Dec 2022 21:11:10 +1100 Subject: [PATCH 12/31] Simplify encoding for dict with non-numeric string key --- rope/base/serializer.py | 20 +++++++++++++------- ropetest/serializer_test.py | 1 + 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/rope/base/serializer.py b/rope/base/serializer.py index 1d8393153..72f974375 100644 --- a/rope/base/serializer.py +++ b/rope/base/serializer.py @@ -68,9 +68,12 @@ def _py2js(o, references): elif isinstance(o, dict): result = {} for k, v in o.items(): - refid = len(references) - references.append(_py2js(k, references)) - result[str(refid)] = _py2js(v, references) + if isinstance(k, str) and not k.isdigit(): + result[k] = _py2js(v, references) + else: + refid = len(references) + references.append(_py2js(k, references)) + result[str(refid)] = _py2js(v, references) return result raise TypeError(f"Object of type {type(o)} is not allowed {o}") @@ -90,9 +93,12 @@ def _js2py(o, references): result = {} for refid, v in o.items(): assert isinstance(refid, str) - refid = int(refid) - assert 0 <= refid < len(references) - k = references[refid] - result[_js2py(k, references)] = _js2py(v, references) + if refid.isdigit(): + refid = int(refid) + assert 0 <= refid < len(references) + k = references[refid] + result[_js2py(k, references)] = _js2py(v, references) + else: + result[refid] = _js2py(v, references) return result raise TypeError(f"Object of type {type(o)} is not allowed {o}") diff --git a/ropetest/serializer_test.py b/ropetest/serializer_test.py index 43470c412..988ea5b15 100644 --- a/ropetest/serializer_test.py +++ b/ropetest/serializer_test.py @@ -42,6 +42,7 @@ def test_roundtrip(original_data): (("hello",), ["t", ["hello",]]), ((1, [2], "hello"), ["t", [1, ["l", [2]], "hello"]]), ([1, [2], "hello"], ["l", [1, ["l", [2]], "hello"]]), + ({"hello": "world"}, {"hello": "world"}), ], ) def test_expected_encoded_simple(original_data, expected_encoded): From 483792704b55019cf02800508456c98f8d2a8259 Mon Sep 17 00:00:00 2001 From: Lie Ryan Date: Fri, 16 Dec 2022 21:18:23 +1100 Subject: [PATCH 13/31] Split the assertion for better clarity --- ropetest/serializer_test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ropetest/serializer_test.py b/ropetest/serializer_test.py index 988ea5b15..a67db4643 100644 --- a/ropetest/serializer_test.py +++ b/ropetest/serializer_test.py @@ -53,7 +53,8 @@ def test_expected_encoded_simple(original_data, expected_encoded): assert encoded == decoded assert encoded["v"] == 1 - assert encoded["data"] == expected_encoded and encoded["references"] == [] + assert encoded["data"] == expected_encoded + assert encoded["references"] == [] assert rehydrated_data == original_data From 23f10f3c0682aced3ec8f875b8e18a657af2ddef Mon Sep 17 00:00:00 2001 From: Lie Ryan Date: Fri, 16 Dec 2022 21:24:37 +1100 Subject: [PATCH 14/31] Add a few more corner cases --- ropetest/serializer_test.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ropetest/serializer_test.py b/ropetest/serializer_test.py index a67db4643..fef681586 100644 --- a/ropetest/serializer_test.py +++ b/ropetest/serializer_test.py @@ -19,6 +19,8 @@ {"hello": "world"}, {"hello": ("hello", 1)}, {("hello", 1): "world"}, + {"4": "hello"}, + {4: "hello"}, ], ) def test_roundtrip(original_data): @@ -43,6 +45,7 @@ def test_roundtrip(original_data): ((1, [2], "hello"), ["t", [1, ["l", [2]], "hello"]]), ([1, [2], "hello"], ["l", [1, ["l", [2]], "hello"]]), ({"hello": "world"}, {"hello": "world"}), + ({"hello": ("hello", 1)}, {"hello": ["t", ["hello", 1]]}), ], ) def test_expected_encoded_simple(original_data, expected_encoded): From 0085530409143f077efa808417c54543ef14fdd4 Mon Sep 17 00:00:00 2001 From: Lie Ryan Date: Fri, 16 Dec 2022 21:37:43 +1100 Subject: [PATCH 15/31] Add test for complex serialization with references --- ropetest/serializer_test.py | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/ropetest/serializer_test.py b/ropetest/serializer_test.py index fef681586..246ea425e 100644 --- a/ropetest/serializer_test.py +++ b/ropetest/serializer_test.py @@ -62,6 +62,40 @@ def test_expected_encoded_simple(original_data, expected_encoded): +@pytest.mark.parametrize( + "original_data,expected_encoded,expected_references", + [ + ( + {("hello", 1): "world"}, + {"0": "world"}, + [["t", ["hello", 1]]], + ), + ( + {"4": "hello"}, + {"0": "hello"}, + ["4"], + ), + ( + {4: "hello"}, + {"0": "hello"}, + [4], + ), + ], +) +def test_expected_encoded_with_references(original_data, expected_encoded, expected_references): + encoded = python_to_json(original_data) + serialized = json.dumps(encoded) + decoded = json.loads(serialized) + rehydrated_data = json_to_python(decoded) + + assert encoded == decoded + assert encoded["v"] == 1 + assert encoded["data"] == expected_encoded + assert encoded["references"] == expected_references + assert rehydrated_data == original_data + + + @pytest.mark.parametrize( "original_data", [ From f067b8dbd370151a79576840709b255874c8f749 Mon Sep 17 00:00:00 2001 From: Lie Ryan Date: Fri, 16 Dec 2022 21:40:36 +1100 Subject: [PATCH 16/31] Annotate what can be dict key --- rope/base/serializer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/rope/base/serializer.py b/rope/base/serializer.py index 72f974375..17d4f5480 100644 --- a/rope/base/serializer.py +++ b/rope/base/serializer.py @@ -71,6 +71,7 @@ def _py2js(o, references): if isinstance(k, str) and not k.isdigit(): result[k] = _py2js(v, references) else: + assert isinstance(k, (str, int, list, tuple)) refid = len(references) references.append(_py2js(k, references)) result[str(refid)] = _py2js(v, references) From 1e89b0ea0487dfdfb386f80f809b192e34283509 Mon Sep 17 00:00:00 2001 From: Lie Ryan Date: Fri, 16 Dec 2022 21:42:54 +1100 Subject: [PATCH 17/31] Allow None to be dict key --- rope/base/serializer.py | 2 +- ropetest/serializer_test.py | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/rope/base/serializer.py b/rope/base/serializer.py index 17d4f5480..e219867ff 100644 --- a/rope/base/serializer.py +++ b/rope/base/serializer.py @@ -71,7 +71,7 @@ def _py2js(o, references): if isinstance(k, str) and not k.isdigit(): result[k] = _py2js(v, references) else: - assert isinstance(k, (str, int, list, tuple)) + assert isinstance(k, (str, int, list, tuple)) or k is None refid = len(references) references.append(_py2js(k, references)) result[str(refid)] = _py2js(v, references) diff --git a/ropetest/serializer_test.py b/ropetest/serializer_test.py index 246ea425e..789441c84 100644 --- a/ropetest/serializer_test.py +++ b/ropetest/serializer_test.py @@ -80,6 +80,11 @@ def test_expected_encoded_simple(original_data, expected_encoded): {"0": "hello"}, [4], ), + ( + {None: "hello"}, + {"0": "hello"}, + [None], + ), ], ) def test_expected_encoded_with_references(original_data, expected_encoded, expected_references): From d4af8183d4f318dab4466c5690dba36df94c6107 Mon Sep 17 00:00:00 2001 From: Lie Ryan Date: Fri, 16 Dec 2022 22:47:10 +1100 Subject: [PATCH 18/31] Reserve special key "$" --- rope/base/serializer.py | 2 ++ ropetest/serializer_test.py | 11 ++++++----- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/rope/base/serializer.py b/rope/base/serializer.py index e219867ff..87729c30f 100644 --- a/rope/base/serializer.py +++ b/rope/base/serializer.py @@ -68,6 +68,8 @@ def _py2js(o, references): elif isinstance(o, dict): result = {} for k, v in o.items(): + if k == "$": + raise ValueError('dict cannot contain reserved key "$"') if isinstance(k, str) and not k.isdigit(): result[k] = _py2js(v, references) else: diff --git a/ropetest/serializer_test.py b/ropetest/serializer_test.py index 789441c84..61c0eab20 100644 --- a/ropetest/serializer_test.py +++ b/ropetest/serializer_test.py @@ -102,12 +102,13 @@ def test_expected_encoded_with_references(original_data, expected_encoded, expec @pytest.mark.parametrize( - "original_data", + "original_data,exctype", [ - object(), - 4.8, + (object(), TypeError), + (4.8, TypeError), + ({"$": "hello"}, ValueError), ], ) -def test_rejects_unrecognized_object(original_data): - with pytest.raises(TypeError): +def test_rejects_unrecognized_object(original_data, exctype): + with pytest.raises(exctype): python_to_json(original_data) From 266f735b047dc2d619593525d4d68c54a99662c3 Mon Sep 17 00:00:00 2001 From: Lie Ryan Date: Fri, 16 Dec 2022 22:54:17 +1100 Subject: [PATCH 19/31] New list encoding to use object encoding It's easier to parse this because `json.loads(object_hook)` exists, but there's no `json.loads(array_hook)` --- rope/base/serializer.py | 26 ++++++++++++++++---------- ropetest/serializer_test.py | 10 +++++----- 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/rope/base/serializer.py b/rope/base/serializer.py index 87729c30f..ee7beb011 100644 --- a/rope/base/serializer.py +++ b/rope/base/serializer.py @@ -62,7 +62,7 @@ def _py2js(o, references): if isinstance(o, (str, int)) or o is None: return o elif isinstance(o, tuple): - return ["t", [_py2js(item, references) for item in o]] + return {"$": "t", "items": [_py2js(item, references) for item in o]} elif isinstance(o, list): return ["l", [_py2js(item, references) for item in o]] elif isinstance(o, dict): @@ -94,14 +94,20 @@ def _js2py(o, references): assert False elif isinstance(o, dict): result = {} - for refid, v in o.items(): - assert isinstance(refid, str) - if refid.isdigit(): - refid = int(refid) - assert 0 <= refid < len(references) - k = references[refid] - result[_js2py(k, references)] = _js2py(v, references) - else: - result[refid] = _js2py(v, references) + if "$" in o: + if o["$"] == "t": + data = o["items"] + return tuple(_js2py(item, references) for item in data) + raise TypeError(f'Unrecognized object of type: {o["$"]} {o}') + else: + for refid, v in o.items(): + assert isinstance(refid, str) + if refid.isdigit(): + refid = int(refid) + assert 0 <= refid < len(references) + k = references[refid] + result[_js2py(k, references)] = _js2py(v, references) + else: + result[refid] = _js2py(v, references) return result raise TypeError(f"Object of type {type(o)} is not allowed {o}") diff --git a/ropetest/serializer_test.py b/ropetest/serializer_test.py index 61c0eab20..6a3743f8a 100644 --- a/ropetest/serializer_test.py +++ b/ropetest/serializer_test.py @@ -39,13 +39,13 @@ def test_roundtrip(original_data): (None, None), (4, 4), ("3", "3"), - ((), ["t", []]), + ((), {"$": "t", "items": []}), ([], ["l", []]), - (("hello",), ["t", ["hello",]]), - ((1, [2], "hello"), ["t", [1, ["l", [2]], "hello"]]), + (("hello",), {"$": "t", "items": ["hello",]}), + ((1, [2], "hello"), {"$": "t", "items": [1, ["l", [2]], "hello"]}), ([1, [2], "hello"], ["l", [1, ["l", [2]], "hello"]]), ({"hello": "world"}, {"hello": "world"}), - ({"hello": ("hello", 1)}, {"hello": ["t", ["hello", 1]]}), + ({"hello": ("hello", 1)}, {"hello": {"$": "t", "items": ["hello", 1]}}), ], ) def test_expected_encoded_simple(original_data, expected_encoded): @@ -68,7 +68,7 @@ def test_expected_encoded_simple(original_data, expected_encoded): ( {("hello", 1): "world"}, {"0": "world"}, - [["t", ["hello", 1]]], + [{"$": "t", "items": ["hello", 1]}], ), ( {"4": "hello"}, From 06d714192dfa96137fd3f25dec7c24db36b1d27c Mon Sep 17 00:00:00 2001 From: Lie Ryan Date: Fri, 16 Dec 2022 23:02:02 +1100 Subject: [PATCH 20/31] Simplify list encoding Now that tuple encoding is no longer being overloaded with list, this can become much simpler. --- rope/base/serializer.py | 9 ++------- ropetest/serializer_test.py | 6 +++--- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/rope/base/serializer.py b/rope/base/serializer.py index ee7beb011..03cc45b18 100644 --- a/rope/base/serializer.py +++ b/rope/base/serializer.py @@ -64,7 +64,7 @@ def _py2js(o, references): elif isinstance(o, tuple): return {"$": "t", "items": [_py2js(item, references) for item in o]} elif isinstance(o, list): - return ["l", [_py2js(item, references) for item in o]] + return [_py2js(item, references) for item in o] elif isinstance(o, dict): result = {} for k, v in o.items(): @@ -86,12 +86,7 @@ def _js2py(o, references): if isinstance(o, (str, int)) or o is None: return o elif isinstance(o, list): - typ, data = o - if typ == "t": - return tuple(_js2py(item, references) for item in data) - elif typ == "l": - return list(_js2py(item, references) for item in data) - assert False + return list(_js2py(item, references) for item in o) elif isinstance(o, dict): result = {} if "$" in o: diff --git a/ropetest/serializer_test.py b/ropetest/serializer_test.py index 6a3743f8a..b8c052d30 100644 --- a/ropetest/serializer_test.py +++ b/ropetest/serializer_test.py @@ -40,10 +40,10 @@ def test_roundtrip(original_data): (4, 4), ("3", "3"), ((), {"$": "t", "items": []}), - ([], ["l", []]), + ([], []), (("hello",), {"$": "t", "items": ["hello",]}), - ((1, [2], "hello"), {"$": "t", "items": [1, ["l", [2]], "hello"]}), - ([1, [2], "hello"], ["l", [1, ["l", [2]], "hello"]]), + ((1, [2], "hello"), {"$": "t", "items": [1, [2], "hello"]}), + ([1, [2], "hello"], [1, [2], "hello"]), ({"hello": "world"}, {"hello": "world"}), ({"hello": ("hello", 1)}, {"hello": {"$": "t", "items": ["hello", 1]}}), ], From d841ad2e4e4e60004b9f433f1023cd2b0c986467 Mon Sep 17 00:00:00 2001 From: Lie Ryan Date: Fri, 16 Dec 2022 23:04:06 +1100 Subject: [PATCH 21/31] Update documentation to reflect current implementation --- rope/base/serializer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rope/base/serializer.py b/rope/base/serializer.py index 03cc45b18..b70b7fd9c 100644 --- a/rope/base/serializer.py +++ b/rope/base/serializer.py @@ -19,8 +19,8 @@ Couple challenges in straight serialization that this module helps resolve: - json.dumps() maps both Python list and tuple to JSON array. This module - converts Python list `[1, 2, 3]` to `["l", [1, 2, 3]]` and Python tuple - `(1, 2, 3)` to `["t", [1, 2, 3]]` + converts Python list `[1, 2, 3]` as-is and converts Python tuple `(1, 2, 3)` + to special object construct `{"$": "t", "items": [1, 2, 3]}` - Python dict keys can be a tuple/dict, but JSON Object keys must be strings This module replaces all `dict` keys with `refid` which can be resolved using From d4e356cb7f68931b57f7447cc01526c5b2c241c2 Mon Sep 17 00:00:00 2001 From: Lie Ryan Date: Fri, 16 Dec 2022 23:56:10 +1100 Subject: [PATCH 22/31] Change ScopeInfo.__getstate__() and __setstate__() to use rope.base.serializer --- rope/base/oi/memorydb.py | 25 ++++++++++++++++--- ropetest/objectdbtest.py | 53 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+), 3 deletions(-) diff --git a/rope/base/oi/memorydb.py b/rope/base/oi/memorydb.py index f138dd500..0a54fc1a1 100644 --- a/rope/base/oi/memorydb.py +++ b/rope/base/oi/memorydb.py @@ -1,5 +1,7 @@ -from rope.base.oi import objectdb +from rope.base.serializer import python_to_json, json_to_python + from rope.base import utils +from rope.base.oi import objectdb class MemoryDB(objectdb.FileDict): @@ -115,7 +117,24 @@ def add_call(self, parameters, returned): self.call_info[parameters] = returned def __getstate__(self): - return (self.call_info, self.per_name) + import json + original_data = (self.call_info, self.per_name) + encoded = python_to_json(original_data) + serialized = json.dumps(encoded) + decoded = json.loads(serialized) + rehydrated_data = json_to_python(decoded) + + assert encoded == decoded + assert rehydrated_data == original_data + + encoded["$"] = "ScopeInfo" + return encoded def __setstate__(self, data): - self.call_info, self.per_name = data + if isinstance(data, tuple) and len(data) == 2: + # legacy pickle-based serialization + self.call_info, self.per_name = data + else: + # new serialization + assert data["$"] == "ScopeInfo" + self.call_info, self.per_name = json_to_python(data) diff --git a/ropetest/objectdbtest.py b/ropetest/objectdbtest.py index 1a8b6c75a..a5e74a3b1 100644 --- a/ropetest/objectdbtest.py +++ b/ropetest/objectdbtest.py @@ -147,3 +147,56 @@ def test_using_file_list_observer(self, db): db.add_file_list_observer(observer) db.validate_files() self.assertEqual("removed invalid ", observer.log) + + @_do_for_all_dbs + def test_legacy_serialization(self, db): + import pickle + + db.add_callinfo("file", "key", (1, 2), 3) + db.add_pername("file", "key", "name", 1) + scope_info = db._get_scope_info("file", "key") + + pickled_data = b'\x80\x04\x95D\x00\x00\x00\x00\x00\x00\x00\x8c\x15rope.base.oi.memorydb\x94\x8c\tScopeInfo\x94\x93\x94)\x81\x94}\x94K\x01K\x02\x86\x94K\x03s}\x94\x8c\x04name\x94K\x01s\x86\x94b.' # noqa + + assert pickle.loads(pickled_data).call_info == scope_info.call_info + assert pickle.loads(pickled_data).per_name == scope_info.per_name + + @_do_for_all_dbs + def test_new_pickle_serialization(self, db): + import pickle + + db.add_callinfo("file", "key", (1, 2), 3) + db.add_pername("file", "key", "name", 1) + scope_info = db._get_scope_info("file", "key") + + serialized = pickle.dumps(scope_info) + + rehydrated_data = pickle.loads(serialized) + assert rehydrated_data.call_info == scope_info.call_info + assert rehydrated_data.per_name == scope_info.per_name + + @_do_for_all_dbs + def test_new_json_serialization(self, db): + import json + from rope.base.oi.memorydb import ScopeInfo + + db.add_callinfo("file", "key", (1, 2), 3) + db.add_pername("file", "key", "name", 1) + scope_info = db._get_scope_info("file", "key") + + data = {"inside": [scope_info], "other": scope_info, "things": [1, 2, 3]} + + def object_hook(o): + if o.get("$") == "ScopeInfo": + new_o = ScopeInfo.__new__(ScopeInfo) + new_o.__setstate__(o) + return new_o + return o + + serialized = json.dumps(data, default=lambda o: o.__getstate__()) + rehydrated_data = json.loads(serialized, object_hook=object_hook) + + rehydrated_scope_info = rehydrated_data["inside"][0] + assert isinstance(rehydrated_scope_info, ScopeInfo) + assert rehydrated_scope_info.call_info == scope_info.call_info + assert rehydrated_scope_info.per_name == scope_info.per_name From e8fbaa583c67f48159c452ba33f7e314ea3d4f9a Mon Sep 17 00:00:00 2001 From: Lie Ryan Date: Sat, 17 Dec 2022 00:08:10 +1100 Subject: [PATCH 23/31] Added version 2 of the serializer --- rope/base/serializer.py | 32 +++++++++++++++++--------------- ropetest/serializer_test.py | 5 +++-- 2 files changed, 20 insertions(+), 17 deletions(-) diff --git a/rope/base/serializer.py b/rope/base/serializer.py index b70b7fd9c..8bd0a5b9a 100644 --- a/rope/base/serializer.py +++ b/rope/base/serializer.py @@ -42,57 +42,59 @@ """ -def python_to_json(o): +def python_to_json(o, version=1): + assert version in (1, 2) references = [] return { "v": 1, - "data": _py2js(o, references), + "data": _py2js(o, references, version=version), "references": references, } def json_to_python(o): - assert o["v"] == 1 + version = o["v"] + assert version in (1, 2) references = o["references"] - data = _js2py(o["data"], references) + data = _js2py(o["data"], references, version) return data -def _py2js(o, references): +def _py2js(o, references, version): if isinstance(o, (str, int)) or o is None: return o elif isinstance(o, tuple): - return {"$": "t", "items": [_py2js(item, references) for item in o]} + return {"$": "t", "items": [_py2js(item, references, version) for item in o]} elif isinstance(o, list): - return [_py2js(item, references) for item in o] + return [_py2js(item, references, version) for item in o] elif isinstance(o, dict): result = {} for k, v in o.items(): if k == "$": raise ValueError('dict cannot contain reserved key "$"') if isinstance(k, str) and not k.isdigit(): - result[k] = _py2js(v, references) + result[k] = _py2js(v, references, version) else: assert isinstance(k, (str, int, list, tuple)) or k is None refid = len(references) - references.append(_py2js(k, references)) - result[str(refid)] = _py2js(v, references) + references.append(_py2js(k, references, version)) + result[str(refid)] = _py2js(v, references, version) return result raise TypeError(f"Object of type {type(o)} is not allowed {o}") -def _js2py(o, references): +def _js2py(o, references, version): assert not isinstance(o, tuple) if isinstance(o, (str, int)) or o is None: return o elif isinstance(o, list): - return list(_js2py(item, references) for item in o) + return list(_js2py(item, references, version) for item in o) elif isinstance(o, dict): result = {} if "$" in o: if o["$"] == "t": data = o["items"] - return tuple(_js2py(item, references) for item in data) + return tuple(_js2py(item, references, version) for item in data) raise TypeError(f'Unrecognized object of type: {o["$"]} {o}') else: for refid, v in o.items(): @@ -101,8 +103,8 @@ def _js2py(o, references): refid = int(refid) assert 0 <= refid < len(references) k = references[refid] - result[_js2py(k, references)] = _js2py(v, references) + result[_js2py(k, references, version)] = _js2py(v, references, version) else: - result[refid] = _js2py(v, references) + result[refid] = _js2py(v, references, version) return result raise TypeError(f"Object of type {type(o)} is not allowed {o}") diff --git a/ropetest/serializer_test.py b/ropetest/serializer_test.py index b8c052d30..d7b5e66b8 100644 --- a/ropetest/serializer_test.py +++ b/ropetest/serializer_test.py @@ -5,6 +5,7 @@ from rope.base.serializer import python_to_json, json_to_python +@pytest.mark.parametrize("version", [1, 2]) @pytest.mark.parametrize( "original_data", [ @@ -23,8 +24,8 @@ {4: "hello"}, ], ) -def test_roundtrip(original_data): - encoded = python_to_json(original_data) +def test_roundtrip(original_data, version): + encoded = python_to_json(original_data, version) serialized = json.dumps(encoded) decoded = json.loads(serialized) rehydrated_data = json_to_python(decoded) From b66d7f5176d0d83c93aa4d022dcb3286b7aa3216 Mon Sep 17 00:00:00 2001 From: Lie Ryan Date: Sat, 17 Dec 2022 00:50:09 +1100 Subject: [PATCH 24/31] Implement version 2 serializer --- rope/base/serializer.py | 34 +++++++++++++++++---- ropetest/serializer_test.py | 59 ++++++++++++++++++++++++------------- 2 files changed, 67 insertions(+), 26 deletions(-) diff --git a/rope/base/serializer.py b/rope/base/serializer.py index 8bd0a5b9a..4cc988a04 100644 --- a/rope/base/serializer.py +++ b/rope/base/serializer.py @@ -19,8 +19,15 @@ Couple challenges in straight serialization that this module helps resolve: - json.dumps() maps both Python list and tuple to JSON array. This module - converts Python list `[1, 2, 3]` as-is and converts Python tuple `(1, 2, 3)` - to special object construct `{"$": "t", "items": [1, 2, 3]}` + provides two variants: + + - In version=1, this module converts Python list `[1, 2, 3]` as-is and + converts Python tuple `(1, 2, 3)` to special object construct + `{"$": "t", "items": [1, 2, 3]}` + + - In version=2, it is the other way around, this module converts Python tuple + `(1, 2, 3)` as-is and converts Python list `[1, 2, 3]` to special object + construct `{"$": "l", "items": [1, 2, 3]}` - Python dict keys can be a tuple/dict, but JSON Object keys must be strings This module replaces all `dict` keys with `refid` which can be resolved using @@ -46,7 +53,7 @@ def python_to_json(o, version=1): assert version in (1, 2) references = [] return { - "v": 1, + "v": version, "data": _py2js(o, references, version=version), "references": references, } @@ -64,9 +71,15 @@ def _py2js(o, references, version): if isinstance(o, (str, int)) or o is None: return o elif isinstance(o, tuple): - return {"$": "t", "items": [_py2js(item, references, version) for item in o]} + if version == 1: + return {"$": "t", "items": [_py2js(item, references, version) for item in o]} + else: + return [_py2js(item, references, version) for item in o] elif isinstance(o, list): - return [_py2js(item, references, version) for item in o] + if version == 2: + return {"$": "l", "items": [_py2js(item, references, version) for item in o]} + else: + return [_py2js(item, references, version) for item in o] elif isinstance(o, dict): result = {} for k, v in o.items(): @@ -88,13 +101,22 @@ def _js2py(o, references, version): if isinstance(o, (str, int)) or o is None: return o elif isinstance(o, list): - return list(_js2py(item, references, version) for item in o) + if version == 1: + return list(_js2py(item, references, version) for item in o) + elif version == 2: + return tuple(_js2py(item, references, version) for item in o) + assert False elif isinstance(o, dict): result = {} if "$" in o: if o["$"] == "t": + assert version == 1 data = o["items"] return tuple(_js2py(item, references, version) for item in data) + elif o["$"] == "l": + assert version == 2 + data = o["items"] + return list(_js2py(item, references, version) for item in data) raise TypeError(f'Unrecognized object of type: {o["$"]} {o}') else: for refid, v in o.items(): diff --git a/ropetest/serializer_test.py b/ropetest/serializer_test.py index d7b5e66b8..df17b72df 100644 --- a/ropetest/serializer_test.py +++ b/ropetest/serializer_test.py @@ -16,7 +16,7 @@ [], ("hello",), (1, [2], "hello"), - [1, [2], "hello"], + [1, (2,), "hello"], {"hello": "world"}, {"hello": ("hello", 1)}, {("hello", 1): "world"}, @@ -34,62 +34,80 @@ def test_roundtrip(original_data, version): assert rehydrated_data == original_data -@pytest.mark.parametrize( - "original_data,expected_encoded", - [ +def expected_encoded_simple_data(version): + data = [ (None, None), (4, 4), ("3", "3"), - ((), {"$": "t", "items": []}), - ([], []), - (("hello",), {"$": "t", "items": ["hello",]}), - ((1, [2], "hello"), {"$": "t", "items": [1, [2], "hello"]}), - ([1, [2], "hello"], [1, [2], "hello"]), ({"hello": "world"}, {"hello": "world"}), - ({"hello": ("hello", 1)}, {"hello": {"$": "t", "items": ["hello", 1]}}), + ] + return [[*d, version] for d in data] + +@pytest.mark.parametrize( + "original_data,expected_encoded,version", + [ + *expected_encoded_simple_data(version=1), + *expected_encoded_simple_data(version=2), + + ((), {"$": "t", "items": []}, 1), + ([], [], 1), + (("hello",), {"$": "t", "items": ["hello"]}, 1), + ((1, [2], "hello"), {"$": "t", "items": [1, [2], "hello"]}, 1), + ([1, (2,), "hello"], [1, {"$": "t", "items": [2]}, "hello"], 1), + ({"hello": ("hello", 1)}, {"hello": {"$": "t", "items": ["hello", 1]}}, 1), + + ((), [], 2), + ([], {"$": "l", "items": []}, 2), + (("hello",), ["hello"], 2), + ((1, [2], "hello"), [1, {"$": "l", "items": [2]}, "hello"], 2), + ([1, (2,), "hello"], {"$": "l", "items": [1, [2], "hello"]}, 2), + ({"hello": ("hello", 1)}, {"hello": ["hello", 1]}, 2), ], ) -def test_expected_encoded_simple(original_data, expected_encoded): - encoded = python_to_json(original_data) +def test_expected_encoded_simple(original_data, expected_encoded, version): + encoded = python_to_json(original_data, version) serialized = json.dumps(encoded) decoded = json.loads(serialized) rehydrated_data = json_to_python(decoded) assert encoded == decoded - assert encoded["v"] == 1 - assert encoded["data"] == expected_encoded + assert encoded["v"] == version + assert encoded["data"] == expected_encoded, (original_data, version) assert encoded["references"] == [] assert rehydrated_data == original_data - @pytest.mark.parametrize( - "original_data,expected_encoded,expected_references", + "original_data,expected_encoded,expected_references,version", [ ( {("hello", 1): "world"}, {"0": "world"}, [{"$": "t", "items": ["hello", 1]}], + 1, ), ( {"4": "hello"}, {"0": "hello"}, ["4"], + 1, ), ( {4: "hello"}, {"0": "hello"}, [4], + 1, ), ( {None: "hello"}, {"0": "hello"}, [None], + 1, ), ], ) -def test_expected_encoded_with_references(original_data, expected_encoded, expected_references): - encoded = python_to_json(original_data) +def test_expected_encoded_with_references(original_data, expected_encoded, expected_references, version): + encoded = python_to_json(original_data, version) serialized = json.dumps(encoded) decoded = json.loads(serialized) rehydrated_data = json_to_python(decoded) @@ -102,6 +120,7 @@ def test_expected_encoded_with_references(original_data, expected_encoded, expec +@pytest.mark.parametrize("version", [1, 2]) @pytest.mark.parametrize( "original_data,exctype", [ @@ -110,6 +129,6 @@ def test_expected_encoded_with_references(original_data, expected_encoded, expec ({"$": "hello"}, ValueError), ], ) -def test_rejects_unrecognized_object(original_data, exctype): +def test_rejects_unrecognized_object(original_data, exctype, version): with pytest.raises(exctype): - python_to_json(original_data) + python_to_json(original_data, version) From 08a1d07d675430b38027defab0c9e910b4106f42 Mon Sep 17 00:00:00 2001 From: Lie Ryan Date: Sat, 17 Dec 2022 00:55:14 +1100 Subject: [PATCH 25/31] Use version 2 serializer for ScopeInfo --- rope/base/oi/memorydb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rope/base/oi/memorydb.py b/rope/base/oi/memorydb.py index 0a54fc1a1..e915f731a 100644 --- a/rope/base/oi/memorydb.py +++ b/rope/base/oi/memorydb.py @@ -119,7 +119,7 @@ def add_call(self, parameters, returned): def __getstate__(self): import json original_data = (self.call_info, self.per_name) - encoded = python_to_json(original_data) + encoded = python_to_json(original_data, version=2) serialized = json.dumps(encoded) decoded = json.loads(serialized) rehydrated_data = json_to_python(decoded) From 6fd3739cb7708e019c3c19e3d3aa7e2ccba7ad32 Mon Sep 17 00:00:00 2001 From: Lie Ryan Date: Sat, 17 Dec 2022 01:00:46 +1100 Subject: [PATCH 26/31] Don't store "references" if it's empty --- rope/base/serializer.py | 7 +++++-- ropetest/serializer_test.py | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/rope/base/serializer.py b/rope/base/serializer.py index 4cc988a04..53c51caf7 100644 --- a/rope/base/serializer.py +++ b/rope/base/serializer.py @@ -52,17 +52,20 @@ def python_to_json(o, version=1): assert version in (1, 2) references = [] - return { + result = { "v": version, "data": _py2js(o, references, version=version), "references": references, } + if not result["references"]: + del result["references"] + return result def json_to_python(o): version = o["v"] assert version in (1, 2) - references = o["references"] + references = o.get("references", {}) data = _js2py(o["data"], references, version) return data diff --git a/ropetest/serializer_test.py b/ropetest/serializer_test.py index df17b72df..0cbfcce6a 100644 --- a/ropetest/serializer_test.py +++ b/ropetest/serializer_test.py @@ -73,7 +73,7 @@ def test_expected_encoded_simple(original_data, expected_encoded, version): assert encoded == decoded assert encoded["v"] == version assert encoded["data"] == expected_encoded, (original_data, version) - assert encoded["references"] == [] + assert "references" not in encoded assert rehydrated_data == original_data From 86c1e14acfe93ff16b868342d9d8d7a1aef5c45a Mon Sep 17 00:00:00 2001 From: Lie Ryan Date: Sat, 17 Dec 2022 01:36:22 +1100 Subject: [PATCH 27/31] Improve error handling --- rope/base/serializer.py | 14 ++++++++------ ropetest/serializer_test.py | 35 ++++++++++++++++++++++++++++++++++- 2 files changed, 42 insertions(+), 7 deletions(-) diff --git a/rope/base/serializer.py b/rope/base/serializer.py index 53c51caf7..d9bdcda63 100644 --- a/rope/base/serializer.py +++ b/rope/base/serializer.py @@ -50,7 +50,8 @@ def python_to_json(o, version=1): - assert version in (1, 2) + if version not in (1, 2): + raise ValueError(f'Unexpected version {version}') references = [] result = { "v": version, @@ -64,7 +65,8 @@ def python_to_json(o, version=1): def json_to_python(o): version = o["v"] - assert version in (1, 2) + if version not in (1, 2): + raise ValueError(f'Unexpected version {version}') references = o.get("references", {}) data = _js2py(o["data"], references, version) return data @@ -91,7 +93,8 @@ def _py2js(o, references, version): if isinstance(k, str) and not k.isdigit(): result[k] = _py2js(v, references, version) else: - assert isinstance(k, (str, int, list, tuple)) or k is None + assert isinstance(k, (str, int, tuple)) or k is None + assert not isinstance(k, list) refid = len(references) references.append(_py2js(k, references, version)) result[str(refid)] = _py2js(v, references, version) @@ -100,7 +103,6 @@ def _py2js(o, references, version): def _js2py(o, references, version): - assert not isinstance(o, tuple) if isinstance(o, (str, int)) or o is None: return o elif isinstance(o, list): @@ -108,7 +110,7 @@ def _js2py(o, references, version): return list(_js2py(item, references, version) for item in o) elif version == 2: return tuple(_js2py(item, references, version) for item in o) - assert False + raise ValueError(f'Unexpected version {version}') elif isinstance(o, dict): result = {} if "$" in o: @@ -132,4 +134,4 @@ def _js2py(o, references, version): else: result[refid] = _js2py(v, references, version) return result - raise TypeError(f"Object of type {type(o)} is not allowed {o}") + raise TypeError(f'Object of type "{type(o).__name__}" is not allowed {o}') diff --git a/ropetest/serializer_test.py b/ropetest/serializer_test.py index 0cbfcce6a..981e3564e 100644 --- a/ropetest/serializer_test.py +++ b/ropetest/serializer_test.py @@ -2,7 +2,7 @@ import pytest -from rope.base.serializer import python_to_json, json_to_python +from rope.base.serializer import python_to_json, json_to_python, _js2py @pytest.mark.parametrize("version", [1, 2]) @@ -132,3 +132,36 @@ def test_expected_encoded_with_references(original_data, expected_encoded, expec def test_rejects_unrecognized_object(original_data, exctype, version): with pytest.raises(exctype): python_to_json(original_data, version) + + +def test_unexpected_version_python_to_json(): + with pytest.raises(ValueError, match="Unexpected version"): + python_to_json({"hello": ["world"]}, version=-123456) + + +def test_unexpected_version_json_to_python(): + modified = python_to_json({"hello": ["world"]}) + modified["v"] = -123456 + assert isinstance(modified["data"]["hello"], list) + + with pytest.raises(ValueError, match="Unexpected version"): + json_to_python(modified) + + with pytest.raises(ValueError, match="Unexpected version"): + _js2py(modified["data"], {}, modified["v"]) + + +def test_unexpected_dollar_object_type(): + modified = python_to_json({"hello": ["world"]}) + modified["data"]["$"] = "unexpected" + + with pytest.raises(TypeError, match="Unrecognized object of type"): + json_to_python(modified) + + +def test_unexpected_object_type(): + modified = python_to_json({"hello": ["world"]}) + modified["data"]["hello"] = () + + with pytest.raises(TypeError, match='Object of type "tuple" is not allowed'): + json_to_python(modified) From ffa4851551fb38f08d8d2c11a0322bae43acf42f Mon Sep 17 00:00:00 2001 From: Lie Ryan Date: Sat, 17 Dec 2022 01:51:10 +1100 Subject: [PATCH 28/31] Write both the pickle and .json version of _DataFile --- rope/base/oi/memorydb.py | 8 -------- rope/base/project.py | 5 ++++- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/rope/base/oi/memorydb.py b/rope/base/oi/memorydb.py index e915f731a..fe5920428 100644 --- a/rope/base/oi/memorydb.py +++ b/rope/base/oi/memorydb.py @@ -117,16 +117,8 @@ def add_call(self, parameters, returned): self.call_info[parameters] = returned def __getstate__(self): - import json original_data = (self.call_info, self.per_name) encoded = python_to_json(original_data, version=2) - serialized = json.dumps(encoded) - decoded = json.loads(serialized) - rehydrated_data = json_to_python(decoded) - - assert encoded == decoded - assert rehydrated_data == original_data - encoded["$"] = "ScopeInfo" return encoded diff --git a/rope/base/project.py b/rope/base/project.py index 602ce9fb7..d730ab949 100644 --- a/rope/base/project.py +++ b/rope/base/project.py @@ -1,4 +1,5 @@ import os +import json import shutil import sys import warnings @@ -399,8 +400,10 @@ def read_data(self, name): def write_data(self, name, data): if self.project.ropefolder is not None: file = self._get_file(name) - with open(file.real_path, "wb") as output_file: + with open(file.real_path, "wb") as output_file, \ + open(file.real_path + ".json", "w") as output_file2: pickle.dump(data, output_file, 2) + json.dump(data, output_file2, default=lambda o: o.__getstate__()) def add_write_hook(self, hook): self.hooks.append(hook) From a91948a0c8979483b6b00663bc52f27949b2ae1f Mon Sep 17 00:00:00 2001 From: Lie Ryan Date: Wed, 28 Dec 2022 15:02:28 +1100 Subject: [PATCH 29/31] Blacken --- rope/base/project.py | 6 ++++-- rope/base/serializer.py | 42 ++++++++++++++++++++++++----------------- 2 files changed, 29 insertions(+), 19 deletions(-) diff --git a/rope/base/project.py b/rope/base/project.py index d730ab949..757929b35 100644 --- a/rope/base/project.py +++ b/rope/base/project.py @@ -400,8 +400,10 @@ def read_data(self, name): def write_data(self, name, data): if self.project.ropefolder is not None: file = self._get_file(name) - with open(file.real_path, "wb") as output_file, \ - open(file.real_path + ".json", "w") as output_file2: + with ( + open(file.real_path, "wb") as output_file, + open(file.real_path + ".json", "w") as output_file2, + ): pickle.dump(data, output_file, 2) json.dump(data, output_file2, default=lambda o: o.__getstate__()) diff --git a/rope/base/serializer.py b/rope/base/serializer.py index d9bdcda63..eee5165f7 100644 --- a/rope/base/serializer.py +++ b/rope/base/serializer.py @@ -51,7 +51,7 @@ def python_to_json(o, version=1): if version not in (1, 2): - raise ValueError(f'Unexpected version {version}') + raise ValueError(f"Unexpected version {version}") references = [] result = { "v": version, @@ -66,7 +66,7 @@ def python_to_json(o, version=1): def json_to_python(o): version = o["v"] if version not in (1, 2): - raise ValueError(f'Unexpected version {version}') + raise ValueError(f"Unexpected version {version}") references = o.get("references", {}) data = _js2py(o["data"], references, version) return data @@ -77,27 +77,33 @@ def _py2js(o, references, version): return o elif isinstance(o, tuple): if version == 1: - return {"$": "t", "items": [_py2js(item, references, version) for item in o]} + return { + "$": "t", + "items": [_py2js(item, references, version) for item in o], + } else: return [_py2js(item, references, version) for item in o] elif isinstance(o, list): if version == 2: - return {"$": "l", "items": [_py2js(item, references, version) for item in o]} + return { + "$": "l", + "items": [_py2js(item, references, version) for item in o], + } else: return [_py2js(item, references, version) for item in o] elif isinstance(o, dict): result = {} - for k, v in o.items(): - if k == "$": + for pykey, pyvalue in o.items(): + if pykey == "$": raise ValueError('dict cannot contain reserved key "$"') - if isinstance(k, str) and not k.isdigit(): - result[k] = _py2js(v, references, version) + if isinstance(pykey, str) and not pykey.isdigit(): + result[pykey] = _py2js(pyvalue, references, version) else: - assert isinstance(k, (str, int, tuple)) or k is None - assert not isinstance(k, list) + assert isinstance(pykey, (str, int, tuple)) or pykey is None + assert not isinstance(pykey, list) refid = len(references) - references.append(_py2js(k, references, version)) - result[str(refid)] = _py2js(v, references, version) + references.append(_py2js(pykey, references, version)) + result[str(refid)] = _py2js(pyvalue, references, version) return result raise TypeError(f"Object of type {type(o)} is not allowed {o}") @@ -110,7 +116,7 @@ def _js2py(o, references, version): return list(_js2py(item, references, version) for item in o) elif version == 2: return tuple(_js2py(item, references, version) for item in o) - raise ValueError(f'Unexpected version {version}') + raise ValueError(f"Unexpected version {version}") elif isinstance(o, dict): result = {} if "$" in o: @@ -124,14 +130,16 @@ def _js2py(o, references, version): return list(_js2py(item, references, version) for item in data) raise TypeError(f'Unrecognized object of type: {o["$"]} {o}') else: - for refid, v in o.items(): + for refid, jsvalue in o.items(): assert isinstance(refid, str) if refid.isdigit(): refid = int(refid) assert 0 <= refid < len(references) - k = references[refid] - result[_js2py(k, references, version)] = _js2py(v, references, version) + jskey = references[refid] + pyvalue = _js2py(jsvalue, references, version) + pykey = _js2py(jskey, references, version) + result[pykey] = pyvalue else: - result[refid] = _js2py(v, references, version) + result[refid] = _js2py(jsvalue, references, version) return result raise TypeError(f'Object of type "{type(o).__name__}" is not allowed {o}') From 77dcdc28c25f36e9b15d287aca1160398f6b2a6c Mon Sep 17 00:00:00 2001 From: Lie Ryan Date: Wed, 28 Dec 2022 15:06:16 +1100 Subject: [PATCH 30/31] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 673a2a230..a1cd675fd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ - #626 Install pre-commit hooks on rope repository (@lieryan) - #548 Implement MoveGlobal using string as destination module names (@lieryan) - #627 Fix parsing of octal literal (@lieryan) +- #611 Implement JSON DataFile serialization (@lieryan) # Release 1.6.0 From 89ee77fbe9e43ceaf028ab31208c4018657262ef Mon Sep 17 00:00:00 2001 From: Lie Ryan Date: Wed, 28 Dec 2022 15:14:06 +1100 Subject: [PATCH 31/31] Use ExitStack() Parenthesised context manager isn't supported until Python 3.10 and up (and unofficially, since Python 3.9). --- rope/base/project.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/rope/base/project.py b/rope/base/project.py index e4fe7643c..28e3528c0 100644 --- a/rope/base/project.py +++ b/rope/base/project.py @@ -2,6 +2,7 @@ import os import sys import warnings +from contextlib import ExitStack from typing import Optional import rope.base.fscommands # Use full qualification for clarity. @@ -394,10 +395,9 @@ def read_data(self, name): def write_data(self, name, data): if self.project.ropefolder is not None: file = self._get_file(name) - with ( - open(file.real_path, "wb") as output_file, - open(file.real_path + ".json", "w") as output_file2, - ): + with ExitStack() as cm: + output_file = cm.enter_context(open(file.real_path, "wb")) + output_file2 = cm.enter_context(open(file.real_path + ".json", "w")) pickle.dump(data, output_file, 2) json.dump(data, output_file2, default=lambda o: o.__getstate__())