Coverage for Lib/json/decoder.py: 96%

212 statements  

« prev     ^ index     » next       coverage.py v7.4.1, created at 2024-02-19 23:21 +0200

1"""Implementation of JSONDecoder 

2""" 

3import re 

4 

5from json import scanner 

6try: 

7 from _json import scanstring as c_scanstring 

8except ImportError: 

9 c_scanstring = None 

10 

11__all__ = ['JSONDecoder', 'JSONDecodeError'] 

12 

13FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL 

14 

15NaN = float('nan') 

16PosInf = float('inf') 

17NegInf = float('-inf') 

18 

19 

20class JSONDecodeError(ValueError): 

21 """Subclass of ValueError with the following additional properties: 

22 

23 msg: The unformatted error message 

24 doc: The JSON document being parsed 

25 pos: The start index of doc where parsing failed 

26 lineno: The line corresponding to pos 

27 colno: The column corresponding to pos 

28 

29 """ 

30 # Note that this exception is used from _json 

31 def __init__(self, msg, doc, pos): 

32 lineno = doc.count('\n', 0, pos) + 1 

33 colno = pos - doc.rfind('\n', 0, pos) 

34 errmsg = '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos) 

35 ValueError.__init__(self, errmsg) 

36 self.msg = msg 

37 self.doc = doc 

38 self.pos = pos 

39 self.lineno = lineno 

40 self.colno = colno 

41 

42 def __reduce__(self): 

43 return self.__class__, (self.msg, self.doc, self.pos) 

44 

45 

46_CONSTANTS = { 

47 '-Infinity': NegInf, 

48 'Infinity': PosInf, 

49 'NaN': NaN, 

50} 

51 

52 

53STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) 

54BACKSLASH = { 

55 '"': '"', '\\': '\\', '/': '/', 

56 'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t', 

57} 

58 

59def _decode_uXXXX(s, pos): 

60 esc = s[pos + 1:pos + 5] 

61 if len(esc) == 4 and esc[1] not in 'xX': 

62 try: 

63 return int(esc, 16) 

64 except ValueError: 

65 pass 

66 msg = "Invalid \\uXXXX escape" 

67 raise JSONDecodeError(msg, s, pos) 

68 

69def py_scanstring(s, end, strict=True, 

70 _b=BACKSLASH, _m=STRINGCHUNK.match): 

71 """Scan the string s for a JSON string. End is the index of the 

72 character in s after the quote that started the JSON string. 

73 Unescapes all valid JSON string escape sequences and raises ValueError 

74 on attempt to decode an invalid string. If strict is False then literal 

75 control characters are allowed in the string. 

76 

77 Returns a tuple of the decoded string and the index of the character in s 

78 after the end quote.""" 

79 chunks = [] 

80 _append = chunks.append 

81 begin = end - 1 

82 while 1: 

83 chunk = _m(s, end) 

84 if chunk is None: 

85 raise JSONDecodeError("Unterminated string starting at", s, begin) 

86 end = chunk.end() 

87 content, terminator = chunk.groups() 

88 # Content is contains zero or more unescaped string characters 

89 if content: 

90 _append(content) 

91 # Terminator is the end of string, a literal control character, 

92 # or a backslash denoting that an escape sequence follows 

93 if terminator == '"': 

94 break 

95 elif terminator != '\\': 

96 if strict: 96 ↛ 101line 96 didn't jump to line 101, because the condition on line 96 was never false

97 #msg = "Invalid control character %r at" % (terminator,) 

98 msg = "Invalid control character {0!r} at".format(terminator) 

99 raise JSONDecodeError(msg, s, end) 

100 else: 

101 _append(terminator) 

102 continue 

103 try: 

104 esc = s[end] 

105 except IndexError: 

106 raise JSONDecodeError("Unterminated string starting at", 

107 s, begin) from None 

108 # If not a unicode escape sequence, must be in the lookup table 

109 if esc != 'u': 

110 try: 

111 char = _b[esc] 

112 except KeyError: 

113 msg = "Invalid \\escape: {0!r}".format(esc) 

114 raise JSONDecodeError(msg, s, end) 

115 end += 1 

116 else: 

117 uni = _decode_uXXXX(s, end) 

118 end += 5 

119 if 0xd800 <= uni <= 0xdbff and s[end:end + 2] == '\\u': 

120 uni2 = _decode_uXXXX(s, end + 1) 

121 if 0xdc00 <= uni2 <= 0xdfff: 

122 uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) 

123 end += 6 

124 char = chr(uni) 

125 _append(char) 

126 return ''.join(chunks), end 

127 

128 

129# Use speedup if available 

130scanstring = c_scanstring or py_scanstring 

131 

132WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) 

133WHITESPACE_STR = ' \t\n\r' 

134 

135 

136def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook, 

137 memo=None, _w=WHITESPACE.match, _ws=WHITESPACE_STR): 

138 s, end = s_and_end 

139 pairs = [] 

140 pairs_append = pairs.append 

141 # Backwards compatibility 

142 if memo is None: 142 ↛ 143line 142 didn't jump to line 143, because the condition on line 142 was never true

143 memo = {} 

144 memo_get = memo.setdefault 

145 # Use a slice to prevent IndexError from being raised, the following 

146 # check will raise a more specific ValueError if the string is empty 

147 nextchar = s[end:end + 1] 

148 # Normally we expect nextchar == '"' 

149 if nextchar != '"': 

150 if nextchar in _ws: 

151 end = _w(s, end).end() 

152 nextchar = s[end:end + 1] 

153 # Trivial empty object 

154 if nextchar == '}': 

155 if object_pairs_hook is not None: 

156 result = object_pairs_hook(pairs) 

157 return result, end + 1 

158 pairs = {} 

159 if object_hook is not None: 159 ↛ 160line 159 didn't jump to line 160, because the condition on line 159 was never true

160 pairs = object_hook(pairs) 

161 return pairs, end + 1 

162 elif nextchar != '"': 

163 raise JSONDecodeError( 

164 "Expecting property name enclosed in double quotes", s, end) 

165 end += 1 

166 while True: 

167 key, end = scanstring(s, end, strict) 

168 key = memo_get(key, key) 

169 # To skip some function call overhead we optimize the fast paths where 

170 # the JSON key separator is ": " or just ":". 

171 if s[end:end + 1] != ':': 

172 end = _w(s, end).end() 

173 if s[end:end + 1] != ':': 

174 raise JSONDecodeError("Expecting ':' delimiter", s, end) 

175 end += 1 

176 

177 try: 

178 if s[end] in _ws: 

179 end += 1 

180 if s[end] in _ws: 

181 end = _w(s, end + 1).end() 

182 except IndexError: 

183 pass 

184 

185 try: 

186 value, end = scan_once(s, end) 

187 except StopIteration as err: 

188 raise JSONDecodeError("Expecting value", s, err.value) from None 

189 pairs_append((key, value)) 

190 try: 

191 nextchar = s[end] 

192 if nextchar in _ws: 

193 end = _w(s, end + 1).end() 

194 nextchar = s[end] 

195 except IndexError: 

196 nextchar = '' 

197 end += 1 

198 

199 if nextchar == '}': 

200 break 

201 elif nextchar != ',': 

202 raise JSONDecodeError("Expecting ',' delimiter", s, end - 1) 

203 comma_idx = end - 1 

204 end = _w(s, end).end() 

205 nextchar = s[end:end + 1] 

206 end += 1 

207 if nextchar != '"': 

208 if nextchar == '}': 

209 raise JSONDecodeError("Illegal trailing comma before end of object", s, comma_idx) 

210 raise JSONDecodeError( 

211 "Expecting property name enclosed in double quotes", s, end - 1) 

212 if object_pairs_hook is not None: 

213 result = object_pairs_hook(pairs) 

214 return result, end 

215 pairs = dict(pairs) 

216 if object_hook is not None: 216 ↛ 217line 216 didn't jump to line 217, because the condition on line 216 was never true

217 pairs = object_hook(pairs) 

218 return pairs, end 

219 

220def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): 

221 s, end = s_and_end 

222 values = [] 

223 nextchar = s[end:end + 1] 

224 if nextchar in _ws: 

225 end = _w(s, end + 1).end() 

226 nextchar = s[end:end + 1] 

227 # Look-ahead for trivial empty array 

228 if nextchar == ']': 

229 return values, end + 1 

230 _append = values.append 

231 while True: 

232 try: 

233 value, end = scan_once(s, end) 

234 except StopIteration as err: 

235 raise JSONDecodeError("Expecting value", s, err.value) from None 

236 _append(value) 

237 nextchar = s[end:end + 1] 

238 if nextchar in _ws: 

239 end = _w(s, end + 1).end() 

240 nextchar = s[end:end + 1] 

241 end += 1 

242 if nextchar == ']': 

243 break 

244 elif nextchar != ',': 

245 raise JSONDecodeError("Expecting ',' delimiter", s, end - 1) 

246 comma_idx = end - 1 

247 try: 

248 if s[end] in _ws: 

249 end += 1 

250 if s[end] in _ws: 

251 end = _w(s, end + 1).end() 

252 nextchar = s[end:end + 1] 

253 except IndexError: 

254 pass 

255 if nextchar == ']': 

256 raise JSONDecodeError("Illegal trailing comma before end of array", s, comma_idx) 

257 

258 return values, end 

259 

260 

261class JSONDecoder(object): 

262 """Simple JSON <https://json.org> decoder 

263 

264 Performs the following translations in decoding by default: 

265 

266 +---------------+-------------------+ 

267 | JSON | Python | 

268 +===============+===================+ 

269 | object | dict | 

270 +---------------+-------------------+ 

271 | array | list | 

272 +---------------+-------------------+ 

273 | string | str | 

274 +---------------+-------------------+ 

275 | number (int) | int | 

276 +---------------+-------------------+ 

277 | number (real) | float | 

278 +---------------+-------------------+ 

279 | true | True | 

280 +---------------+-------------------+ 

281 | false | False | 

282 +---------------+-------------------+ 

283 | null | None | 

284 +---------------+-------------------+ 

285 

286 It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as 

287 their corresponding ``float`` values, which is outside the JSON spec. 

288 

289 """ 

290 

291 def __init__(self, *, object_hook=None, parse_float=None, 

292 parse_int=None, parse_constant=None, strict=True, 

293 object_pairs_hook=None): 

294 """``object_hook``, if specified, will be called with the result 

295 of every JSON object decoded and its return value will be used in 

296 place of the given ``dict``. This can be used to provide custom 

297 deserializations (e.g. to support JSON-RPC class hinting). 

298 

299 ``object_pairs_hook``, if specified will be called with the result of 

300 every JSON object decoded with an ordered list of pairs. The return 

301 value of ``object_pairs_hook`` will be used instead of the ``dict``. 

302 This feature can be used to implement custom decoders. 

303 If ``object_hook`` is also defined, the ``object_pairs_hook`` takes 

304 priority. 

305 

306 ``parse_float``, if specified, will be called with the string 

307 of every JSON float to be decoded. By default this is equivalent to 

308 float(num_str). This can be used to use another datatype or parser 

309 for JSON floats (e.g. decimal.Decimal). 

310 

311 ``parse_int``, if specified, will be called with the string 

312 of every JSON int to be decoded. By default this is equivalent to 

313 int(num_str). This can be used to use another datatype or parser 

314 for JSON integers (e.g. float). 

315 

316 ``parse_constant``, if specified, will be called with one of the 

317 following strings: -Infinity, Infinity, NaN. 

318 This can be used to raise an exception if invalid JSON numbers 

319 are encountered. 

320 

321 If ``strict`` is false (true is the default), then control 

322 characters will be allowed inside strings. Control characters in 

323 this context are those with character codes in the 0-31 range, 

324 including ``'\\t'`` (tab), ``'\\n'``, ``'\\r'`` and ``'\\0'``. 

325 """ 

326 self.object_hook = object_hook 

327 self.parse_float = parse_float or float 

328 self.parse_int = parse_int or int 

329 self.parse_constant = parse_constant or _CONSTANTS.__getitem__ 

330 self.strict = strict 

331 self.object_pairs_hook = object_pairs_hook 

332 self.parse_object = JSONObject 

333 self.parse_array = JSONArray 

334 self.parse_string = scanstring 

335 self.memo = {} 

336 self.scan_once = scanner.make_scanner(self) 

337 

338 

339 def decode(self, s, _w=WHITESPACE.match): 

340 """Return the Python representation of ``s`` (a ``str`` instance 

341 containing a JSON document). 

342 

343 """ 

344 obj, end = self.raw_decode(s, idx=_w(s, 0).end()) 

345 end = _w(s, end).end() 

346 if end != len(s): 

347 raise JSONDecodeError("Extra data", s, end) 

348 return obj 

349 

350 def raw_decode(self, s, idx=0): 

351 """Decode a JSON document from ``s`` (a ``str`` beginning with 

352 a JSON document) and return a 2-tuple of the Python 

353 representation and the index in ``s`` where the document ended. 

354 

355 This can be used to decode a JSON document from a string that may 

356 have extraneous data at the end. 

357 

358 """ 

359 try: 

360 obj, end = self.scan_once(s, idx) 

361 except StopIteration as err: 

362 raise JSONDecodeError("Expecting value", s, err.value) from None 

363 return obj, end