Coverage for Lib/json/decoder.py: 96%
212 statements
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-18 20:15 +0200
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-18 20:15 +0200
1"""Implementation of JSONDecoder
2"""
3import re
5from json import scanner
6try:
7 from _json import scanstring as c_scanstring
8except ImportError:
9 c_scanstring = None
11__all__ = ['JSONDecoder', 'JSONDecodeError']
13FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
15NaN = float('nan')
16PosInf = float('inf')
17NegInf = float('-inf')
20class JSONDecodeError(ValueError):
21 """Subclass of ValueError with the following additional properties:
23 msg: The unformatted error message
24 doc: The JSON document being parsed
25 pos: The start index of doc where parsing failed
26 lineno: The line corresponding to pos
27 colno: The column corresponding to pos
29 """
30 # Note that this exception is used from _json
31 def __init__(self, msg, doc, pos):
32 lineno = doc.count('\n', 0, pos) + 1
33 colno = pos - doc.rfind('\n', 0, pos)
34 errmsg = '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos)
35 ValueError.__init__(self, errmsg)
36 self.msg = msg
37 self.doc = doc
38 self.pos = pos
39 self.lineno = lineno
40 self.colno = colno
42 def __reduce__(self):
43 return self.__class__, (self.msg, self.doc, self.pos)
46_CONSTANTS = {
47 '-Infinity': NegInf,
48 'Infinity': PosInf,
49 'NaN': NaN,
50}
53STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
54BACKSLASH = {
55 '"': '"', '\\': '\\', '/': '/',
56 'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t',
57}
59def _decode_uXXXX(s, pos):
60 esc = s[pos + 1:pos + 5]
61 if len(esc) == 4 and esc[1] not in 'xX':
62 try:
63 return int(esc, 16)
64 except ValueError:
65 pass
66 msg = "Invalid \\uXXXX escape"
67 raise JSONDecodeError(msg, s, pos)
69def py_scanstring(s, end, strict=True,
70 _b=BACKSLASH, _m=STRINGCHUNK.match):
71 """Scan the string s for a JSON string. End is the index of the
72 character in s after the quote that started the JSON string.
73 Unescapes all valid JSON string escape sequences and raises ValueError
74 on attempt to decode an invalid string. If strict is False then literal
75 control characters are allowed in the string.
77 Returns a tuple of the decoded string and the index of the character in s
78 after the end quote."""
79 chunks = []
80 _append = chunks.append
81 begin = end - 1
82 while 1:
83 chunk = _m(s, end)
84 if chunk is None:
85 raise JSONDecodeError("Unterminated string starting at", s, begin)
86 end = chunk.end()
87 content, terminator = chunk.groups()
88 # Content is contains zero or more unescaped string characters
89 if content:
90 _append(content)
91 # Terminator is the end of string, a literal control character,
92 # or a backslash denoting that an escape sequence follows
93 if terminator == '"':
94 break
95 elif terminator != '\\':
96 if strict: 96 ↛ 101line 96 didn't jump to line 101, because the condition on line 96 was never false
97 #msg = "Invalid control character %r at" % (terminator,)
98 msg = "Invalid control character {0!r} at".format(terminator)
99 raise JSONDecodeError(msg, s, end)
100 else:
101 _append(terminator)
102 continue
103 try:
104 esc = s[end]
105 except IndexError:
106 raise JSONDecodeError("Unterminated string starting at",
107 s, begin) from None
108 # If not a unicode escape sequence, must be in the lookup table
109 if esc != 'u':
110 try:
111 char = _b[esc]
112 except KeyError:
113 msg = "Invalid \\escape: {0!r}".format(esc)
114 raise JSONDecodeError(msg, s, end)
115 end += 1
116 else:
117 uni = _decode_uXXXX(s, end)
118 end += 5
119 if 0xd800 <= uni <= 0xdbff and s[end:end + 2] == '\\u':
120 uni2 = _decode_uXXXX(s, end + 1)
121 if 0xdc00 <= uni2 <= 0xdfff:
122 uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
123 end += 6
124 char = chr(uni)
125 _append(char)
126 return ''.join(chunks), end
129# Use speedup if available
130scanstring = c_scanstring or py_scanstring
132WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
133WHITESPACE_STR = ' \t\n\r'
136def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook,
137 memo=None, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
138 s, end = s_and_end
139 pairs = []
140 pairs_append = pairs.append
141 # Backwards compatibility
142 if memo is None: 142 ↛ 143line 142 didn't jump to line 143, because the condition on line 142 was never true
143 memo = {}
144 memo_get = memo.setdefault
145 # Use a slice to prevent IndexError from being raised, the following
146 # check will raise a more specific ValueError if the string is empty
147 nextchar = s[end:end + 1]
148 # Normally we expect nextchar == '"'
149 if nextchar != '"':
150 if nextchar in _ws:
151 end = _w(s, end).end()
152 nextchar = s[end:end + 1]
153 # Trivial empty object
154 if nextchar == '}':
155 if object_pairs_hook is not None:
156 result = object_pairs_hook(pairs)
157 return result, end + 1
158 pairs = {}
159 if object_hook is not None: 159 ↛ 160line 159 didn't jump to line 160, because the condition on line 159 was never true
160 pairs = object_hook(pairs)
161 return pairs, end + 1
162 elif nextchar != '"':
163 raise JSONDecodeError(
164 "Expecting property name enclosed in double quotes", s, end)
165 end += 1
166 while True:
167 key, end = scanstring(s, end, strict)
168 key = memo_get(key, key)
169 # To skip some function call overhead we optimize the fast paths where
170 # the JSON key separator is ": " or just ":".
171 if s[end:end + 1] != ':':
172 end = _w(s, end).end()
173 if s[end:end + 1] != ':':
174 raise JSONDecodeError("Expecting ':' delimiter", s, end)
175 end += 1
177 try:
178 if s[end] in _ws:
179 end += 1
180 if s[end] in _ws:
181 end = _w(s, end + 1).end()
182 except IndexError:
183 pass
185 try:
186 value, end = scan_once(s, end)
187 except StopIteration as err:
188 raise JSONDecodeError("Expecting value", s, err.value) from None
189 pairs_append((key, value))
190 try:
191 nextchar = s[end]
192 if nextchar in _ws:
193 end = _w(s, end + 1).end()
194 nextchar = s[end]
195 except IndexError:
196 nextchar = ''
197 end += 1
199 if nextchar == '}':
200 break
201 elif nextchar != ',':
202 raise JSONDecodeError("Expecting ',' delimiter", s, end - 1)
203 comma_idx = end - 1
204 end = _w(s, end).end()
205 nextchar = s[end:end + 1]
206 end += 1
207 if nextchar != '"':
208 if nextchar == '}':
209 raise JSONDecodeError("Illegal trailing comma before end of object", s, comma_idx)
210 raise JSONDecodeError(
211 "Expecting property name enclosed in double quotes", s, end - 1)
212 if object_pairs_hook is not None:
213 result = object_pairs_hook(pairs)
214 return result, end
215 pairs = dict(pairs)
216 if object_hook is not None: 216 ↛ 217line 216 didn't jump to line 217, because the condition on line 216 was never true
217 pairs = object_hook(pairs)
218 return pairs, end
220def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
221 s, end = s_and_end
222 values = []
223 nextchar = s[end:end + 1]
224 if nextchar in _ws:
225 end = _w(s, end + 1).end()
226 nextchar = s[end:end + 1]
227 # Look-ahead for trivial empty array
228 if nextchar == ']':
229 return values, end + 1
230 _append = values.append
231 while True:
232 try:
233 value, end = scan_once(s, end)
234 except StopIteration as err:
235 raise JSONDecodeError("Expecting value", s, err.value) from None
236 _append(value)
237 nextchar = s[end:end + 1]
238 if nextchar in _ws:
239 end = _w(s, end + 1).end()
240 nextchar = s[end:end + 1]
241 end += 1
242 if nextchar == ']':
243 break
244 elif nextchar != ',':
245 raise JSONDecodeError("Expecting ',' delimiter", s, end - 1)
246 comma_idx = end - 1
247 try:
248 if s[end] in _ws:
249 end += 1
250 if s[end] in _ws:
251 end = _w(s, end + 1).end()
252 nextchar = s[end:end + 1]
253 except IndexError:
254 pass
255 if nextchar == ']':
256 raise JSONDecodeError("Illegal trailing comma before end of array", s, comma_idx)
258 return values, end
261class JSONDecoder(object):
262 """Simple JSON <https://json.org> decoder
264 Performs the following translations in decoding by default:
266 +---------------+-------------------+
267 | JSON | Python |
268 +===============+===================+
269 | object | dict |
270 +---------------+-------------------+
271 | array | list |
272 +---------------+-------------------+
273 | string | str |
274 +---------------+-------------------+
275 | number (int) | int |
276 +---------------+-------------------+
277 | number (real) | float |
278 +---------------+-------------------+
279 | true | True |
280 +---------------+-------------------+
281 | false | False |
282 +---------------+-------------------+
283 | null | None |
284 +---------------+-------------------+
286 It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
287 their corresponding ``float`` values, which is outside the JSON spec.
289 """
291 def __init__(self, *, object_hook=None, parse_float=None,
292 parse_int=None, parse_constant=None, strict=True,
293 object_pairs_hook=None):
294 """``object_hook``, if specified, will be called with the result
295 of every JSON object decoded and its return value will be used in
296 place of the given ``dict``. This can be used to provide custom
297 deserializations (e.g. to support JSON-RPC class hinting).
299 ``object_pairs_hook``, if specified will be called with the result of
300 every JSON object decoded with an ordered list of pairs. The return
301 value of ``object_pairs_hook`` will be used instead of the ``dict``.
302 This feature can be used to implement custom decoders.
303 If ``object_hook`` is also defined, the ``object_pairs_hook`` takes
304 priority.
306 ``parse_float``, if specified, will be called with the string
307 of every JSON float to be decoded. By default this is equivalent to
308 float(num_str). This can be used to use another datatype or parser
309 for JSON floats (e.g. decimal.Decimal).
311 ``parse_int``, if specified, will be called with the string
312 of every JSON int to be decoded. By default this is equivalent to
313 int(num_str). This can be used to use another datatype or parser
314 for JSON integers (e.g. float).
316 ``parse_constant``, if specified, will be called with one of the
317 following strings: -Infinity, Infinity, NaN.
318 This can be used to raise an exception if invalid JSON numbers
319 are encountered.
321 If ``strict`` is false (true is the default), then control
322 characters will be allowed inside strings. Control characters in
323 this context are those with character codes in the 0-31 range,
324 including ``'\\t'`` (tab), ``'\\n'``, ``'\\r'`` and ``'\\0'``.
325 """
326 self.object_hook = object_hook
327 self.parse_float = parse_float or float
328 self.parse_int = parse_int or int
329 self.parse_constant = parse_constant or _CONSTANTS.__getitem__
330 self.strict = strict
331 self.object_pairs_hook = object_pairs_hook
332 self.parse_object = JSONObject
333 self.parse_array = JSONArray
334 self.parse_string = scanstring
335 self.memo = {}
336 self.scan_once = scanner.make_scanner(self)
339 def decode(self, s, _w=WHITESPACE.match):
340 """Return the Python representation of ``s`` (a ``str`` instance
341 containing a JSON document).
343 """
344 obj, end = self.raw_decode(s, idx=_w(s, 0).end())
345 end = _w(s, end).end()
346 if end != len(s):
347 raise JSONDecodeError("Extra data", s, end)
348 return obj
350 def raw_decode(self, s, idx=0):
351 """Decode a JSON document from ``s`` (a ``str`` beginning with
352 a JSON document) and return a 2-tuple of the Python
353 representation and the index in ``s`` where the document ended.
355 This can be used to decode a JSON document from a string that may
356 have extraneous data at the end.
358 """
359 try:
360 obj, end = self.scan_once(s, idx)
361 except StopIteration as err:
362 raise JSONDecodeError("Expecting value", s, err.value) from None
363 return obj, end