|
5 | 5 | """ |
6 | 6 | import codecs |
7 | 7 | import io |
| 8 | +import re |
8 | 9 | import typing |
9 | 10 | import zlib |
10 | 11 |
|
@@ -262,49 +263,24 @@ class LineDecoder: |
262 | 263 | as line endings, normalizing to `\n`. |
263 | 264 | """ |
264 | 265 |
|
| 266 | + _re = re.compile(r"(.*\n?)") |
| 267 | + |
265 | 268 | def __init__(self) -> None: |
266 | 269 | self.buffer = "" |
267 | 270 |
|
268 | 271 | def decode(self, text: str) -> typing.List[str]: |
269 | | - lines = [] |
270 | | - |
271 | | - if text and self.buffer and self.buffer[-1] == "\r": |
272 | | - if text.startswith("\n"): |
273 | | - # Handle the case where we have an "\r\n" split across |
274 | | - # our previous input, and our new chunk. |
275 | | - lines.append(self.buffer[:-1] + "\n") |
276 | | - self.buffer = "" |
277 | | - text = text[1:] |
278 | | - else: |
279 | | - # Handle the case where we have "\r" at the end of our |
280 | | - # previous input. |
281 | | - lines.append(self.buffer[:-1] + "\n") |
282 | | - self.buffer = "" |
283 | | - |
284 | | - while text: |
285 | | - num_chars = len(text) |
286 | | - for idx in range(num_chars): |
287 | | - char = text[idx] |
288 | | - next_char = None if idx + 1 == num_chars else text[idx + 1] |
289 | | - if char == "\n": |
290 | | - lines.append(self.buffer + text[: idx + 1]) |
291 | | - self.buffer = "" |
292 | | - text = text[idx + 1 :] |
293 | | - break |
294 | | - elif char == "\r" and next_char == "\n": |
295 | | - lines.append(self.buffer + text[:idx] + "\n") |
296 | | - self.buffer = "" |
297 | | - text = text[idx + 2 :] |
298 | | - break |
299 | | - elif char == "\r" and next_char is not None: |
300 | | - lines.append(self.buffer + text[:idx] + "\n") |
301 | | - self.buffer = "" |
302 | | - text = text[idx + 1 :] |
303 | | - break |
304 | | - elif next_char is None: |
305 | | - self.buffer += text |
306 | | - text = "" |
307 | | - break |
| 272 | + if self.buffer: |
| 273 | + text = self.buffer + text |
| 274 | + |
| 275 | + lines = self._re.findall(text) |
| 276 | + lines.pop() # always an empty match at the end |
| 277 | + remainder = "" |
| 278 | + if lines: |
| 279 | + final = lines[-1] |
| 280 | + if not final.endswith("\n"): |
| 281 | + remainder = lines.pop() |
| 282 | + |
| 283 | + self.buffer = remainder |
308 | 284 |
|
309 | 285 | return lines |
310 | 286 |
|
|
0 commit comments