Coverage for Lib/json/encoder.py: 96%

233 statements  

« prev     ^ index     » next       coverage.py v7.4.1, created at 2024-02-18 20:15 +0200

1"""Implementation of JSONEncoder 

2""" 

3import re 

4 

5try: 

6 from _json import encode_basestring_ascii as c_encode_basestring_ascii 

7except ImportError: 

8 c_encode_basestring_ascii = None 

9try: 

10 from _json import encode_basestring as c_encode_basestring 

11except ImportError: 

12 c_encode_basestring = None 

13try: 

14 from _json import make_encoder as c_make_encoder 

15except ImportError: 

16 c_make_encoder = None 

17 

18ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]') 

19ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') 

20HAS_UTF8 = re.compile(b'[\x80-\xff]') 

21ESCAPE_DCT = { 

22 '\\': '\\\\', 

23 '"': '\\"', 

24 '\b': '\\b', 

25 '\f': '\\f', 

26 '\n': '\\n', 

27 '\r': '\\r', 

28 '\t': '\\t', 

29} 

30for i in range(0x20): 

31 ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i)) 

32 #ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,)) 

33del i 

34 

35INFINITY = float('inf') 

36 

37def py_encode_basestring(s): 

38 """Return a JSON representation of a Python string 

39 

40 """ 

41 def replace(match): 

42 return ESCAPE_DCT[match.group(0)] 

43 return '"' + ESCAPE.sub(replace, s) + '"' 

44 

45 

46encode_basestring = (c_encode_basestring or py_encode_basestring) 

47 

48 

49def py_encode_basestring_ascii(s): 

50 """Return an ASCII-only JSON representation of a Python string 

51 

52 """ 

53 def replace(match): 

54 s = match.group(0) 

55 try: 

56 return ESCAPE_DCT[s] 

57 except KeyError: 

58 n = ord(s) 

59 if n < 0x10000: 

60 return '\\u{0:04x}'.format(n) 

61 #return '\\u%04x' % (n,) 

62 else: 

63 # surrogate pair 

64 n -= 0x10000 

65 s1 = 0xd800 | ((n >> 10) & 0x3ff) 

66 s2 = 0xdc00 | (n & 0x3ff) 

67 return '\\u{0:04x}\\u{1:04x}'.format(s1, s2) 

68 return '"' + ESCAPE_ASCII.sub(replace, s) + '"' 

69 

70 

71encode_basestring_ascii = ( 

72 c_encode_basestring_ascii or py_encode_basestring_ascii) 

73 

74class JSONEncoder(object): 

75 """Extensible JSON <https://json.org> encoder for Python data structures. 

76 

77 Supports the following objects and types by default: 

78 

79 +-------------------+---------------+ 

80 | Python | JSON | 

81 +===================+===============+ 

82 | dict | object | 

83 +-------------------+---------------+ 

84 | list, tuple | array | 

85 +-------------------+---------------+ 

86 | str | string | 

87 +-------------------+---------------+ 

88 | int, float | number | 

89 +-------------------+---------------+ 

90 | True | true | 

91 +-------------------+---------------+ 

92 | False | false | 

93 +-------------------+---------------+ 

94 | None | null | 

95 +-------------------+---------------+ 

96 

97 To extend this to recognize other objects, subclass and implement a 

98 ``.default()`` method with another method that returns a serializable 

99 object for ``o`` if possible, otherwise it should call the superclass 

100 implementation (to raise ``TypeError``). 

101 

102 """ 

103 item_separator = ', ' 

104 key_separator = ': ' 

105 def __init__(self, *, skipkeys=False, ensure_ascii=True, 

106 check_circular=True, allow_nan=True, sort_keys=False, 

107 indent=None, separators=None, default=None): 

108 """Constructor for JSONEncoder, with sensible defaults. 

109 

110 If skipkeys is false, then it is a TypeError to attempt 

111 encoding of keys that are not str, int, float or None. If 

112 skipkeys is True, such items are simply skipped. 

113 

114 If ensure_ascii is true, the output is guaranteed to be str 

115 objects with all incoming non-ASCII characters escaped. If 

116 ensure_ascii is false, the output can contain non-ASCII characters. 

117 

118 If check_circular is true, then lists, dicts, and custom encoded 

119 objects will be checked for circular references during encoding to 

120 prevent an infinite recursion (which would cause an RecursionError). 

121 Otherwise, no such check takes place. 

122 

123 If allow_nan is true, then NaN, Infinity, and -Infinity will be 

124 encoded as such. This behavior is not JSON specification compliant, 

125 but is consistent with most JavaScript based encoders and decoders. 

126 Otherwise, it will be a ValueError to encode such floats. 

127 

128 If sort_keys is true, then the output of dictionaries will be 

129 sorted by key; this is useful for regression tests to ensure 

130 that JSON serializations can be compared on a day-to-day basis. 

131 

132 If indent is a non-negative integer, then JSON array 

133 elements and object members will be pretty-printed with that 

134 indent level. An indent level of 0 will only insert newlines. 

135 None is the most compact representation. 

136 

137 If specified, separators should be an (item_separator, key_separator) 

138 tuple. The default is (', ', ': ') if *indent* is ``None`` and 

139 (',', ': ') otherwise. To get the most compact JSON representation, 

140 you should specify (',', ':') to eliminate whitespace. 

141 

142 If specified, default is a function that gets called for objects 

143 that can't otherwise be serialized. It should return a JSON encodable 

144 version of the object or raise a ``TypeError``. 

145 

146 """ 

147 

148 self.skipkeys = skipkeys 

149 self.ensure_ascii = ensure_ascii 

150 self.check_circular = check_circular 

151 self.allow_nan = allow_nan 

152 self.sort_keys = sort_keys 

153 self.indent = indent 

154 if separators is not None: 

155 self.item_separator, self.key_separator = separators 

156 elif indent is not None: 

157 self.item_separator = ',' 

158 if default is not None: 

159 self.default = default 

160 

161 def default(self, o): 

162 """Implement this method in a subclass such that it returns 

163 a serializable object for ``o``, or calls the base implementation 

164 (to raise a ``TypeError``). 

165 

166 For example, to support arbitrary iterators, you could 

167 implement default like this:: 

168 

169 def default(self, o): 

170 try: 

171 iterable = iter(o) 

172 except TypeError: 

173 pass 

174 else: 

175 return list(iterable) 

176 # Let the base class default method raise the TypeError 

177 return JSONEncoder.default(self, o) 

178 

179 """ 

180 raise TypeError(f'Object of type {o.__class__.__name__} ' 

181 f'is not JSON serializable') 

182 

183 def encode(self, o): 

184 """Return a JSON string representation of a Python data structure. 

185 

186 >>> from json.encoder import JSONEncoder 

187 >>> JSONEncoder().encode({"foo": ["bar", "baz"]}) 

188 '{"foo": ["bar", "baz"]}' 

189 

190 """ 

191 # This is for extremely simple cases and benchmarks. 

192 if isinstance(o, str): 

193 if self.ensure_ascii: 

194 return encode_basestring_ascii(o) 

195 else: 

196 return encode_basestring(o) 

197 # This doesn't pass the iterator directly to ''.join() because the 

198 # exceptions aren't as detailed. The list call should be roughly 

199 # equivalent to the PySequence_Fast that ''.join() would do. 

200 chunks = self.iterencode(o, _one_shot=True) 

201 if not isinstance(chunks, (list, tuple)): 

202 chunks = list(chunks) 

203 return ''.join(chunks) 

204 

205 def iterencode(self, o, _one_shot=False): 

206 """Encode the given object and yield each string 

207 representation as available. 

208 

209 For example:: 

210 

211 for chunk in JSONEncoder().iterencode(bigobject): 

212 mysocket.write(chunk) 

213 

214 """ 

215 if self.check_circular: 

216 markers = {} 

217 else: 

218 markers = None 

219 if self.ensure_ascii: 

220 _encoder = encode_basestring_ascii 

221 else: 

222 _encoder = encode_basestring 

223 

224 def floatstr(o, allow_nan=self.allow_nan, 

225 _repr=float.__repr__, _inf=INFINITY, _neginf=-INFINITY): 

226 # Check for specials. Note that this type of test is processor 

227 # and/or platform-specific, so do tests which don't depend on the 

228 # internals. 

229 

230 if o != o: 

231 text = 'NaN' 

232 elif o == _inf: 

233 text = 'Infinity' 

234 elif o == _neginf: 

235 text = '-Infinity' 

236 else: 

237 return _repr(o) 

238 

239 if not allow_nan: 

240 raise ValueError( 

241 "Out of range float values are not JSON compliant: " + 

242 repr(o)) 

243 

244 return text 

245 

246 

247 if (_one_shot and c_make_encoder is not None 

248 and self.indent is None): 

249 _iterencode = c_make_encoder( 

250 markers, self.default, _encoder, self.indent, 

251 self.key_separator, self.item_separator, self.sort_keys, 

252 self.skipkeys, self.allow_nan) 

253 else: 

254 _iterencode = _make_iterencode( 

255 markers, self.default, _encoder, self.indent, floatstr, 

256 self.key_separator, self.item_separator, self.sort_keys, 

257 self.skipkeys, _one_shot) 

258 return _iterencode(o, 0) 

259 

260def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, 

261 _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, 

262 ## HACK: hand-optimized bytecode; turn globals into locals 

263 ValueError=ValueError, 

264 dict=dict, 

265 float=float, 

266 id=id, 

267 int=int, 

268 isinstance=isinstance, 

269 list=list, 

270 str=str, 

271 tuple=tuple, 

272 _intstr=int.__repr__, 

273 ): 

274 

275 if _indent is not None and not isinstance(_indent, str): 

276 _indent = ' ' * _indent 

277 

278 def _iterencode_list(lst, _current_indent_level): 

279 if not lst: 

280 yield '[]' 

281 return 

282 if markers is not None: 

283 markerid = id(lst) 

284 if markerid in markers: 

285 raise ValueError("Circular reference detected") 

286 markers[markerid] = lst 

287 buf = '[' 

288 if _indent is not None: 

289 _current_indent_level += 1 

290 newline_indent = '\n' + _indent * _current_indent_level 

291 separator = _item_separator + newline_indent 

292 buf += newline_indent 

293 else: 

294 newline_indent = None 

295 separator = _item_separator 

296 first = True 

297 for value in lst: 

298 if first: 

299 first = False 

300 else: 

301 buf = separator 

302 if isinstance(value, str): 

303 yield buf + _encoder(value) 

304 elif value is None: 

305 yield buf + 'null' 

306 elif value is True: 

307 yield buf + 'true' 

308 elif value is False: 

309 yield buf + 'false' 

310 elif isinstance(value, int): 

311 # Subclasses of int/float may override __repr__, but we still 

312 # want to encode them as integers/floats in JSON. One example 

313 # within the standard library is IntEnum. 

314 yield buf + _intstr(value) 

315 elif isinstance(value, float): 

316 # see comment above for int 

317 yield buf + _floatstr(value) 

318 else: 

319 yield buf 

320 if isinstance(value, (list, tuple)): 

321 chunks = _iterencode_list(value, _current_indent_level) 

322 elif isinstance(value, dict): 

323 chunks = _iterencode_dict(value, _current_indent_level) 

324 else: 

325 chunks = _iterencode(value, _current_indent_level) 

326 yield from chunks 

327 if newline_indent is not None: 

328 _current_indent_level -= 1 

329 yield '\n' + _indent * _current_indent_level 

330 yield ']' 

331 if markers is not None: 331 ↛ exitline 331 didn't return from function '_iterencode_list', because the condition on line 331 was never false

332 del markers[markerid] 

333 

334 def _iterencode_dict(dct, _current_indent_level): 

335 if not dct: 

336 yield '{}' 

337 return 

338 if markers is not None: 338 ↛ 343line 338 didn't jump to line 343, because the condition on line 338 was never false

339 markerid = id(dct) 

340 if markerid in markers: 

341 raise ValueError("Circular reference detected") 

342 markers[markerid] = dct 

343 yield '{' 

344 if _indent is not None: 

345 _current_indent_level += 1 

346 newline_indent = '\n' + _indent * _current_indent_level 

347 item_separator = _item_separator + newline_indent 

348 yield newline_indent 

349 else: 

350 newline_indent = None 

351 item_separator = _item_separator 

352 first = True 

353 if _sort_keys: 

354 items = sorted(dct.items()) 

355 else: 

356 items = dct.items() 

357 for key, value in items: 

358 if isinstance(key, str): 

359 pass 

360 # JavaScript is weakly typed for these, so it makes sense to 

361 # also allow them. Many encoders seem to do something like this. 

362 elif isinstance(key, float): 

363 # see comment for int/float in _make_iterencode 

364 key = _floatstr(key) 

365 elif key is True: 

366 key = 'true' 

367 elif key is False: 

368 key = 'false' 

369 elif key is None: 369 ↛ 370line 369 didn't jump to line 370, because the condition on line 369 was never true

370 key = 'null' 

371 elif isinstance(key, int): 

372 # see comment for int/float in _make_iterencode 

373 key = _intstr(key) 

374 elif _skipkeys: 

375 continue 

376 else: 

377 raise TypeError(f'keys must be str, int, float, bool or None, ' 

378 f'not {key.__class__.__name__}') 

379 if first: 

380 first = False 

381 else: 

382 yield item_separator 

383 yield _encoder(key) 

384 yield _key_separator 

385 if isinstance(value, str): 

386 yield _encoder(value) 

387 elif value is None: 

388 yield 'null' 

389 elif value is True: 

390 yield 'true' 

391 elif value is False: 

392 yield 'false' 

393 elif isinstance(value, int): 

394 # see comment for int/float in _make_iterencode 

395 yield _intstr(value) 

396 elif isinstance(value, float): 

397 # see comment for int/float in _make_iterencode 

398 yield _floatstr(value) 

399 else: 

400 if isinstance(value, (list, tuple)): 

401 chunks = _iterencode_list(value, _current_indent_level) 

402 elif isinstance(value, dict): 402 ↛ 405line 402 didn't jump to line 405, because the condition on line 402 was never false

403 chunks = _iterencode_dict(value, _current_indent_level) 

404 else: 

405 chunks = _iterencode(value, _current_indent_level) 

406 yield from chunks 

407 if newline_indent is not None: 

408 _current_indent_level -= 1 

409 yield '\n' + _indent * _current_indent_level 

410 yield '}' 

411 if markers is not None: 411 ↛ exitline 411 didn't return from function '_iterencode_dict', because the condition on line 411 was never false

412 del markers[markerid] 

413 

414 def _iterencode(o, _current_indent_level): 

415 if isinstance(o, str): 

416 yield _encoder(o) 

417 elif o is None: 

418 yield 'null' 

419 elif o is True: 419 ↛ 420line 419 didn't jump to line 420, because the condition on line 419 was never true

420 yield 'true' 

421 elif o is False: 421 ↛ 422line 421 didn't jump to line 422, because the condition on line 421 was never true

422 yield 'false' 

423 elif isinstance(o, int): 

424 # see comment for int/float in _make_iterencode 

425 yield _intstr(o) 

426 elif isinstance(o, float): 

427 # see comment for int/float in _make_iterencode 

428 yield _floatstr(o) 

429 elif isinstance(o, (list, tuple)): 

430 yield from _iterencode_list(o, _current_indent_level) 

431 elif isinstance(o, dict): 

432 yield from _iterencode_dict(o, _current_indent_level) 

433 else: 

434 if markers is not None: 

435 markerid = id(o) 

436 if markerid in markers: 

437 raise ValueError("Circular reference detected") 

438 markers[markerid] = o 

439 o = _default(o) 

440 yield from _iterencode(o, _current_indent_level) 

441 if markers is not None: 441 ↛ exitline 441 didn't return from function '_iterencode', because the condition on line 441 was never false

442 del markers[markerid] 

443 return _iterencode