Skip to content

Commit ee687ae

Browse files
committed
Replace quadratic algo in LineDecoder
Leading to enormous speedups when doing things such as Response(...).iter_lines() as described on issue #2422
1 parent 9e97d7d commit ee687ae

File tree

2 files changed

+24
-57
lines changed

2 files changed

+24
-57
lines changed

httpx/_decoders.py

Lines changed: 14 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -266,57 +266,24 @@ def __init__(self) -> None:
266266
self.buffer = ""
267267

268268
def decode(self, text: str) -> typing.List[str]:
269-
lines = []
270-
271-
if text and self.buffer and self.buffer[-1] == "\r":
272-
if text.startswith("\n"):
273-
# Handle the case where we have an "\r\n" split across
274-
# our previous input, and our new chunk.
275-
lines.append(self.buffer[:-1] + "\n")
276-
self.buffer = ""
277-
text = text[1:]
278-
else:
279-
# Handle the case where we have "\r" at the end of our
280-
# previous input.
281-
lines.append(self.buffer[:-1] + "\n")
282-
self.buffer = ""
283-
284-
while text:
285-
num_chars = len(text)
286-
for idx in range(num_chars):
287-
char = text[idx]
288-
next_char = None if idx + 1 == num_chars else text[idx + 1]
289-
if char == "\n":
290-
lines.append(self.buffer + text[: idx + 1])
291-
self.buffer = ""
292-
text = text[idx + 1 :]
293-
break
294-
elif char == "\r" and next_char == "\n":
295-
lines.append(self.buffer + text[:idx] + "\n")
296-
self.buffer = ""
297-
text = text[idx + 2 :]
298-
break
299-
elif char == "\r" and next_char is not None:
300-
lines.append(self.buffer + text[:idx] + "\n")
301-
self.buffer = ""
302-
text = text[idx + 1 :]
303-
break
304-
elif next_char is None:
305-
self.buffer += text
306-
text = ""
307-
break
269+
if self.buffer:
270+
text = self.buffer + text
271+
272+
if not text:
273+
return []
274+
275+
lines = text.splitlines(True)
276+
if text.endswith("\n"):
277+
self.buffer = ""
278+
else:
279+
remainder = lines.pop()
280+
self.buffer = remainder
308281

309282
return lines
310283

311284
def flush(self) -> typing.List[str]:
312-
if self.buffer.endswith("\r"):
313-
# Handle the case where we had a trailing '\r', which could have
314-
# been a '\r\n' pair.
315-
lines = [self.buffer[:-1] + "\n"]
316-
elif self.buffer:
317-
lines = [self.buffer]
318-
else:
319-
lines = []
285+
# this handles stripping any trailing "\r"
286+
lines = self.buffer.splitlines(True)
320287
self.buffer = ""
321288
return lines
322289

tests/test_decoders.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -257,48 +257,48 @@ def test_line_decoder_nl():
257257
def test_line_decoder_cr():
258258
decoder = LineDecoder()
259259
assert decoder.decode("") == []
260-
assert decoder.decode("a\r\rb\rc") == ["a\n", "\n", "b\n"]
260+
assert decoder.decode("a\r\rb\rc") == ["a\r", "\r", "b\r"]
261261
assert decoder.flush() == ["c"]
262262

263263
decoder = LineDecoder()
264264
assert decoder.decode("") == []
265-
assert decoder.decode("a\r\rb\rc\r") == ["a\n", "\n", "b\n"]
266-
assert decoder.flush() == ["c\n"]
265+
assert decoder.decode("a\r\rb\rc\r") == ["a\r", "\r", "b\r"]
266+
assert decoder.flush() == ["c\r"]
267267

268268
# Issue #1033
269269
decoder = LineDecoder()
270270
assert decoder.decode("") == []
271271
assert decoder.decode("12345\r") == []
272-
assert decoder.decode("foo ") == ["12345\n"]
272+
assert decoder.decode("foo ") == ["12345\r"]
273273
assert decoder.decode("bar ") == []
274274
assert decoder.decode("baz\r") == []
275-
assert decoder.flush() == ["foo bar baz\n"]
275+
assert decoder.flush() == ["foo bar baz\r"]
276276

277277

278278
def test_line_decoder_crnl():
279279
decoder = LineDecoder()
280280
assert decoder.decode("") == []
281-
assert decoder.decode("a\r\n\r\nb\r\nc") == ["a\n", "\n", "b\n"]
281+
assert decoder.decode("a\r\n\r\nb\r\nc") == ["a\r\n", "\r\n", "b\r\n"]
282282
assert decoder.flush() == ["c"]
283283

284284
decoder = LineDecoder()
285285
assert decoder.decode("") == []
286-
assert decoder.decode("a\r\n\r\nb\r\nc\r\n") == ["a\n", "\n", "b\n", "c\n"]
286+
assert decoder.decode("a\r\n\r\nb\r\nc\r\n") == ["a\r\n", "\r\n", "b\r\n", "c\r\n"]
287287
assert decoder.flush() == []
288288

289289
decoder = LineDecoder()
290290
assert decoder.decode("") == []
291291
assert decoder.decode("a\r") == []
292-
assert decoder.decode("\n\r\nb\r\nc") == ["a\n", "\n", "b\n"]
292+
assert decoder.decode("\n\r\nb\r\nc") == ["a\r\n", "\r\n", "b\r\n"]
293293
assert decoder.flush() == ["c"]
294294

295295
# Issue #1033
296296
decoder = LineDecoder()
297297
assert decoder.decode("") == []
298-
assert decoder.decode("12345\r\n") == ["12345\n"]
298+
assert decoder.decode("12345\r\n") == ["12345\r\n"]
299299
assert decoder.decode("foo ") == []
300300
assert decoder.decode("bar ") == []
301-
assert decoder.decode("baz\r\n") == ["foo bar baz\n"]
301+
assert decoder.decode("baz\r\n") == ["foo bar baz\r\n"]
302302
assert decoder.flush() == []
303303

304304

0 commit comments

Comments
 (0)