From 4c88a67b124f703b9238d4900d292de371a25cf7 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Fri, 9 Feb 2024 18:29:11 +0200 Subject: [PATCH 1/7] Add tests to increase json coverage --- Lib/test/test_json/test_decode.py | 17 +++++- Lib/test/test_json/test_detect_encoding.py | 38 +++++++++++++ Lib/test/test_json/test_encode_basestring.py | 56 ++++++++++++++++++++ 3 files changed, 109 insertions(+), 2 deletions(-) create mode 100644 Lib/test/test_json/test_detect_encoding.py create mode 100644 Lib/test/test_json/test_encode_basestring.py diff --git a/Lib/test/test_json/test_decode.py b/Lib/test/test_json/test_decode.py index 124045b13184b3..150811e18789ea 100644 --- a/Lib/test/test_json/test_decode.py +++ b/Lib/test/test_json/test_decode.py @@ -8,14 +8,27 @@ class TestDecode: def test_decimal(self): rval = self.loads('1.1', parse_float=decimal.Decimal) - self.assertTrue(isinstance(rval, decimal.Decimal)) + self.assertIsInstance(rval, decimal.Decimal) self.assertEqual(rval, decimal.Decimal('1.1')) def test_float(self): rval = self.loads('1', parse_int=float) - self.assertTrue(isinstance(rval, float)) + self.assertIsInstance(rval, float) self.assertEqual(rval, 1.0) + def test_bytes(self): + self.assertEqual(self.loads(b"1"), 1) + + def test_constant(self): + for constant, expected in [ + ("Infinity", "INFINITY"), + ("-Infinity", "-INFINITY"), + ("NaN", "NAN"), + ]: + self.assertEqual( + self.loads(constant, parse_constant=str.upper), expected + ) + def test_empty_objects(self): self.assertEqual(self.loads('{}'), {}) self.assertEqual(self.loads('[]'), []) diff --git a/Lib/test/test_json/test_detect_encoding.py b/Lib/test/test_json/test_detect_encoding.py new file mode 100644 index 00000000000000..b92e4a335ab29c --- /dev/null +++ b/Lib/test/test_json/test_detect_encoding.py @@ -0,0 +1,38 @@ +import codecs + +from test.test_json import PyTest, CTest + + +class TestDetectEncoding: + def test_utf32(self): + # Arrange + for test_input, expected_encoding in [ + (codecs.BOM_UTF32_BE + "abc".encode("utf-32-be"), "utf-32"), + (codecs.BOM_UTF32_LE + "abc".encode("utf-32-le"), "utf-32"), + (codecs.BOM_UTF16_BE + "abc".encode("utf-16-be"), "utf-16"), + (codecs.BOM_UTF16_LE + "abc".encode("utf-16-le"), "utf-16"), + (codecs.BOM_UTF8 + "abc".encode("utf-8-sig"), "utf-8-sig"), + (b"\x00\x00\x00a\x00\x00\x00b\x00\x00\x00c", "utf-32-be"), + (b"\x00a\x00b\x00c", "utf-16-be"), + (b"a\x00\x00\x00b\x00\x00\x00c\x00\x00\x00", "utf-32-le"), + (b"a\x00\x00b\x00c\x00", "utf-16-le"), + (b"a\x00b\x00c\x00", "utf-16-le"), + (b"\x00a", "utf-16-be"), + (b"a\x00", "utf-16-le"), + (b"abcd", "utf-8"), + (b"abc", "utf-8"), + (b"ab", "utf-8"), + ]: + # Act + result = self.json.detect_encoding(test_input) + + # Assert + self.assertEqual(result, expected_encoding) + + +class TestPyTestDetectEncoding(TestDetectEncoding, PyTest): + pass + + +class TestCTestDetectEncoding(TestDetectEncoding, CTest): + pass diff --git a/Lib/test/test_json/test_encode_basestring.py b/Lib/test/test_json/test_encode_basestring.py new file mode 100644 index 00000000000000..2ae1ce64707fe8 --- /dev/null +++ b/Lib/test/test_json/test_encode_basestring.py @@ -0,0 +1,56 @@ +from test.test_json import PyTest, CTest +from test.support import bigaddrspacetest + + +CASES = [ + ( + '/\\"\ucafe\ubabe\uab98\ufcde\ubcda\uef4a\x08\x0c\n\r\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?', + '"/\\\\\\"\ucafe\ubabe\uab98\ufcde\ubcda\uef4a\\b\\f\\n\\r\\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?"', + ), + ( + '\u0123\u4567\u89ab\ucdef\uabcd\uef4a', + '"\u0123\u4567\u89ab\ucdef\uabcd\uef4a"', + ), + ('controls', '"controls"'), + ('\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'), + ( + '{"object with 1 member":["array with 1 element"]}', + '"{\\"object with 1 member\\":[\\"array with 1 element\\"]}"', + ), + (' s p a c e d ', '" s p a c e d "'), + ('\U0001d120', '"\U0001d120"'), + ('\u03b1\u03a9', '"\u03b1\u03a9"'), + ("`1~!@#$%^&*()_+-={':[,]}|;.?", '"`1~!@#$%^&*()_+-={\':[,]}|;.?"'), + ('\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'), + ( + '\u0123\u4567\u89ab\ucdef\uabcd\uef4a', + '"\u0123\u4567\u89ab\ucdef\uabcd\uef4a"', + ), +] + + +class TestEncodeBasestring: + def test_encode_basestring(self): + filename = self.json.encoder.encode_basestring.__name__ + for input_string, expect in CASES: + result = self.json.encoder.encode_basestring(input_string) + self.assertEqual( + result, + expect, + '{0!r} != {1!r} for {2}({3!r})'.format( + result, expect, filename, input_string + ), + ) + + +class TestPyEncodeBasestring(TestEncodeBasestring, PyTest): + pass + + +class TestCEncodeBasestring(TestEncodeBasestring, CTest): + @bigaddrspacetest + def test_overflow(self): + size = (2**32) // 6 + 1 + s = "\x00" * size + with self.assertRaises(OverflowError): + self.json.encoder.encode_basestring(s) From 5ac05a9b407912481af6b3b143907af77fef45d3 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Tue, 26 Mar 2024 17:00:45 +0200 Subject: [PATCH 2/7] Use f-strings --- Lib/test/test_json/test_decode.py | 2 +- Lib/test/test_json/test_encode_basestring.py | 4 +--- Lib/test/test_json/test_encode_basestring_ascii.py | 3 +-- Lib/test/test_json/test_fail.py | 2 +- Lib/test/test_json/test_unicode.py | 8 ++++---- 5 files changed, 8 insertions(+), 11 deletions(-) diff --git a/Lib/test/test_json/test_decode.py b/Lib/test/test_json/test_decode.py index 150811e18789ea..8b29a0985954b8 100644 --- a/Lib/test/test_json/test_decode.py +++ b/Lib/test/test_json/test_decode.py @@ -101,7 +101,7 @@ def test_string_with_utf8_bom(self): self.json.load(StringIO(bom_json)) self.assertIn('BOM', str(cm.exception)) # make sure that the BOM is not detected in the middle of a string - bom_in_str = '"{}"'.format(''.encode('utf-8-sig').decode('utf-8')) + bom_in_str = f"\"{''.encode('utf-8-sig').decode('utf-8')}\"" self.assertEqual(self.loads(bom_in_str), '\ufeff') self.assertEqual(self.json.load(StringIO(bom_in_str)), '\ufeff') diff --git a/Lib/test/test_json/test_encode_basestring.py b/Lib/test/test_json/test_encode_basestring.py index 2ae1ce64707fe8..d305821d46f6e3 100644 --- a/Lib/test/test_json/test_encode_basestring.py +++ b/Lib/test/test_json/test_encode_basestring.py @@ -37,9 +37,7 @@ def test_encode_basestring(self): self.assertEqual( result, expect, - '{0!r} != {1!r} for {2}({3!r})'.format( - result, expect, filename, input_string - ), + f'{result!r} != {expect!r} for {filename}({input_string!r})', ) diff --git a/Lib/test/test_json/test_encode_basestring_ascii.py b/Lib/test/test_json/test_encode_basestring_ascii.py index 4bbc6c71489a83..6a39b72a09df35 100644 --- a/Lib/test/test_json/test_encode_basestring_ascii.py +++ b/Lib/test/test_json/test_encode_basestring_ascii.py @@ -23,8 +23,7 @@ def test_encode_basestring_ascii(self): for input_string, expect in CASES: result = self.json.encoder.encode_basestring_ascii(input_string) self.assertEqual(result, expect, - '{0!r} != {1!r} for {2}({3!r})'.format( - result, expect, fname, input_string)) + f'{result!r} != {expect!r} for {fname}({input_string!r})') def test_ordered_dict(self): # See issue 6105 diff --git a/Lib/test/test_json/test_fail.py b/Lib/test/test_json/test_fail.py index d6bce605e21463..a74240f1107de3 100644 --- a/Lib/test/test_json/test_fail.py +++ b/Lib/test/test_json/test_fail.py @@ -89,7 +89,7 @@ def test_failures(self): except self.JSONDecodeError: pass else: - self.fail("Expected failure for fail{0}.json: {1!r}".format(idx, doc)) + self.fail(f"Expected failure for fail{idx}.json: {doc!r}") def test_non_string_keys_dict(self): data = {'a' : 1, (1, 2) : 2} diff --git a/Lib/test/test_json/test_unicode.py b/Lib/test/test_json/test_unicode.py index 2e8bba2775256a..533894e094f7b7 100644 --- a/Lib/test/test_json/test_unicode.py +++ b/Lib/test/test_json/test_unicode.py @@ -20,12 +20,12 @@ def test_encoding4(self): def test_encoding5(self): u = '\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' j = self.dumps(u, ensure_ascii=False) - self.assertEqual(j, '"{0}"'.format(u)) + self.assertEqual(j, f'"{u}"') def test_encoding6(self): u = '\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' j = self.dumps([u], ensure_ascii=False) - self.assertEqual(j, '["{0}"]'.format(u)) + self.assertEqual(j, f'["{u}"]') def test_big_unicode_encode(self): u = '\U0001d120' @@ -34,13 +34,13 @@ def test_big_unicode_encode(self): def test_big_unicode_decode(self): u = 'z\U0001d120x' - self.assertEqual(self.loads('"' + u + '"'), u) + self.assertEqual(self.loads(f'"{u}"'), u) self.assertEqual(self.loads('"z\\ud834\\udd20x"'), u) def test_unicode_decode(self): for i in range(0, 0xd7ff): u = chr(i) - s = '"\\u{0:04x}"'.format(i) + s = f'"\\u{i:04x}"' self.assertEqual(self.loads(s), u) def test_unicode_preservation(self): From b305f13cfacfb3512538dc34f3bd58a96d4bd197 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Tue, 26 Mar 2024 17:02:58 +0200 Subject: [PATCH 3/7] Test incorrect 'NaN' and 'Infinity' cases fail --- Lib/test/test_json/test_decode.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Lib/test/test_json/test_decode.py b/Lib/test/test_json/test_decode.py index 8b29a0985954b8..0a5d23b2e18f86 100644 --- a/Lib/test/test_json/test_decode.py +++ b/Lib/test/test_json/test_decode.py @@ -29,6 +29,13 @@ def test_constant(self): self.loads(constant, parse_constant=str.upper), expected ) + def test_constant_invalid_case(self): + for constant in [ + "nan", "NAN", "naN", "infinity", "INFINITY", "inFiniTy" + ]: + with self.assertRaises(self.JSONDecodeError): + self.loads(constant) + def test_empty_objects(self): self.assertEqual(self.loads('{}'), {}) self.assertEqual(self.loads('[]'), []) From d8815e142921c72f74b6319a11f332bfaaba163b Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Wed, 27 Mar 2024 17:36:30 +0200 Subject: [PATCH 4/7] Rename test --- Lib/test/test_json/test_decode.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_json/test_decode.py b/Lib/test/test_json/test_decode.py index 0a5d23b2e18f86..3700d9acfcb8e8 100644 --- a/Lib/test/test_json/test_decode.py +++ b/Lib/test/test_json/test_decode.py @@ -19,7 +19,7 @@ def test_float(self): def test_bytes(self): self.assertEqual(self.loads(b"1"), 1) - def test_constant(self): + def test_parse_constant(self): for constant, expected in [ ("Infinity", "INFINITY"), ("-Infinity", "-INFINITY"), From 5aa4e963d83b082ff47fcf14b009528d151aad37 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Wed, 27 Mar 2024 17:37:56 +0200 Subject: [PATCH 5/7] Remove, detect_encoding not part of public API, already covered by test_bytes_decode --- Lib/test/test_json/test_detect_encoding.py | 38 ---------------------- 1 file changed, 38 deletions(-) delete mode 100644 Lib/test/test_json/test_detect_encoding.py diff --git a/Lib/test/test_json/test_detect_encoding.py b/Lib/test/test_json/test_detect_encoding.py deleted file mode 100644 index b92e4a335ab29c..00000000000000 --- a/Lib/test/test_json/test_detect_encoding.py +++ /dev/null @@ -1,38 +0,0 @@ -import codecs - -from test.test_json import PyTest, CTest - - -class TestDetectEncoding: - def test_utf32(self): - # Arrange - for test_input, expected_encoding in [ - (codecs.BOM_UTF32_BE + "abc".encode("utf-32-be"), "utf-32"), - (codecs.BOM_UTF32_LE + "abc".encode("utf-32-le"), "utf-32"), - (codecs.BOM_UTF16_BE + "abc".encode("utf-16-be"), "utf-16"), - (codecs.BOM_UTF16_LE + "abc".encode("utf-16-le"), "utf-16"), - (codecs.BOM_UTF8 + "abc".encode("utf-8-sig"), "utf-8-sig"), - (b"\x00\x00\x00a\x00\x00\x00b\x00\x00\x00c", "utf-32-be"), - (b"\x00a\x00b\x00c", "utf-16-be"), - (b"a\x00\x00\x00b\x00\x00\x00c\x00\x00\x00", "utf-32-le"), - (b"a\x00\x00b\x00c\x00", "utf-16-le"), - (b"a\x00b\x00c\x00", "utf-16-le"), - (b"\x00a", "utf-16-be"), - (b"a\x00", "utf-16-le"), - (b"abcd", "utf-8"), - (b"abc", "utf-8"), - (b"ab", "utf-8"), - ]: - # Act - result = self.json.detect_encoding(test_input) - - # Assert - self.assertEqual(result, expected_encoding) - - -class TestPyTestDetectEncoding(TestDetectEncoding, PyTest): - pass - - -class TestCTestDetectEncoding(TestDetectEncoding, CTest): - pass From 05a220cb3c57a8ed2ddab7728aaa667675dae7a7 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Wed, 27 Mar 2024 17:39:23 +0200 Subject: [PATCH 6/7] Replace tests for encode_basestring (not part of public API) with test for dumps --- Lib/test/test_json/test_encode_basestring.py | 54 -------------------- Lib/test/test_json/test_unicode.py | 5 ++ 2 files changed, 5 insertions(+), 54 deletions(-) delete mode 100644 Lib/test/test_json/test_encode_basestring.py diff --git a/Lib/test/test_json/test_encode_basestring.py b/Lib/test/test_json/test_encode_basestring.py deleted file mode 100644 index d305821d46f6e3..00000000000000 --- a/Lib/test/test_json/test_encode_basestring.py +++ /dev/null @@ -1,54 +0,0 @@ -from test.test_json import PyTest, CTest -from test.support import bigaddrspacetest - - -CASES = [ - ( - '/\\"\ucafe\ubabe\uab98\ufcde\ubcda\uef4a\x08\x0c\n\r\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?', - '"/\\\\\\"\ucafe\ubabe\uab98\ufcde\ubcda\uef4a\\b\\f\\n\\r\\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?"', - ), - ( - '\u0123\u4567\u89ab\ucdef\uabcd\uef4a', - '"\u0123\u4567\u89ab\ucdef\uabcd\uef4a"', - ), - ('controls', '"controls"'), - ('\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'), - ( - '{"object with 1 member":["array with 1 element"]}', - '"{\\"object with 1 member\\":[\\"array with 1 element\\"]}"', - ), - (' s p a c e d ', '" s p a c e d "'), - ('\U0001d120', '"\U0001d120"'), - ('\u03b1\u03a9', '"\u03b1\u03a9"'), - ("`1~!@#$%^&*()_+-={':[,]}|;.?", '"`1~!@#$%^&*()_+-={\':[,]}|;.?"'), - ('\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'), - ( - '\u0123\u4567\u89ab\ucdef\uabcd\uef4a', - '"\u0123\u4567\u89ab\ucdef\uabcd\uef4a"', - ), -] - - -class TestEncodeBasestring: - def test_encode_basestring(self): - filename = self.json.encoder.encode_basestring.__name__ - for input_string, expect in CASES: - result = self.json.encoder.encode_basestring(input_string) - self.assertEqual( - result, - expect, - f'{result!r} != {expect!r} for {filename}({input_string!r})', - ) - - -class TestPyEncodeBasestring(TestEncodeBasestring, PyTest): - pass - - -class TestCEncodeBasestring(TestEncodeBasestring, CTest): - @bigaddrspacetest - def test_overflow(self): - size = (2**32) // 6 + 1 - s = "\x00" * size - with self.assertRaises(OverflowError): - self.json.encoder.encode_basestring(s) diff --git a/Lib/test/test_json/test_unicode.py b/Lib/test/test_json/test_unicode.py index 533894e094f7b7..68629cceeb9be9 100644 --- a/Lib/test/test_json/test_unicode.py +++ b/Lib/test/test_json/test_unicode.py @@ -27,6 +27,11 @@ def test_encoding6(self): j = self.dumps([u], ensure_ascii=False) self.assertEqual(j, f'["{u}"]') + def test_encoding7(self): + u = '\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + j = self.dumps(u + "\n", ensure_ascii=False) + self.assertEqual(j, f'"{u}\\n"') + def test_big_unicode_encode(self): u = '\U0001d120' self.assertEqual(self.dumps(u), '"\\ud834\\udd20"') From b9ca3c254c0618fcacb1f2fc8d17e18db3d3b3ad Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Mon, 8 Apr 2024 17:40:26 +0300 Subject: [PATCH 7/7] Apply suggestions from code review Co-authored-by: Petr Viktorin --- Lib/test/test_json/test_decode.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_json/test_decode.py b/Lib/test/test_json/test_decode.py index 3700d9acfcb8e8..79fb239b35d3f2 100644 --- a/Lib/test/test_json/test_decode.py +++ b/Lib/test/test_json/test_decode.py @@ -108,7 +108,8 @@ def test_string_with_utf8_bom(self): self.json.load(StringIO(bom_json)) self.assertIn('BOM', str(cm.exception)) # make sure that the BOM is not detected in the middle of a string - bom_in_str = f"\"{''.encode('utf-8-sig').decode('utf-8')}\"" + bom = ''.encode('utf-8-sig').decode('utf-8') + bom_in_str = f'"{bom}"' self.assertEqual(self.loads(bom_in_str), '\ufeff') self.assertEqual(self.json.load(StringIO(bom_in_str)), '\ufeff')