Skip to content

Commit f062485

Browse files
committed
Merge branch 'fix-fast-binlog' into 'master'
fix(binlog): Improve error handling and streamline binary log processing Closes IDFGH-17270 See merge request espressif/esp-idf-monitor!110
2 parents a22d03f + ac5f499 commit f062485

File tree

3 files changed

+234
-90
lines changed

3 files changed

+234
-90
lines changed

esp_idf_monitor/base/binlog.py

Lines changed: 125 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -129,63 +129,70 @@ def retrieve_arguments(self, format_str: str, raw_args: bytes) -> List[Union[int
129129
length = match.group('length') or ''
130130
specifier = match.group('specifier')
131131

132-
# **Handling String (%s, %S)**
133-
if specifier in 'sS':
134-
if self.buffer_hex_log or self.buffer_char_log or self.buffer_hexdump_log:
135-
buffer_len = (
136-
args[-1] if isinstance(args[-1], int) else None
137-
) # Previous argument is the buffer length
138-
data, data_len = self.retrieve_data(raw_args[i_arg:], buffer_len)
139-
args.append(data)
132+
try:
133+
# Handling String (%s, %S)
134+
if specifier in 'sS':
135+
if self.buffer_hex_log or self.buffer_char_log or self.buffer_hexdump_log:
136+
buffer_len = (
137+
args[-1] if isinstance(args[-1], int) else None
138+
) # Previous argument is the buffer length
139+
data, data_len = self.retrieve_data(raw_args[i_arg:], buffer_len)
140+
args.append(data)
141+
else:
142+
text, data_len = self.retrieve_string(raw_args[i_arg:])
143+
args.append(text)
144+
i_arg += data_len
145+
146+
# Handling Floating Point (%f, %e, %g, etc.)
147+
elif specifier in 'fFeEgGaA':
148+
arg_format = '>d'
149+
args.append(struct.unpack_from(arg_format, raw_args, i_arg)[0])
150+
i_arg += struct.calcsize(arg_format)
151+
152+
# Handling Length-Prefixed Integers
153+
elif specifier in 'diouxX':
154+
# ESP32 sends only 32-bit or 64-bit values
155+
if length == 'll': # 64-bit integers
156+
fmt = 'q'
157+
else: # 32-bit integers
158+
if length == 'h': # short
159+
fmt = 'h'
160+
i_arg += 2 # padding to 32-bit
161+
elif length == 'hh': # char
162+
fmt = 'b'
163+
i_arg += 3 # padding to 32-bit
164+
else: # int
165+
fmt = 'i'
166+
fmt = fmt.lower() if specifier in 'di' else fmt.upper() # Signed or unsigned
167+
value = struct.unpack_from(f'>{fmt}', raw_args, i_arg)[0]
168+
i_arg += struct.calcsize(fmt)
169+
args.append(value)
170+
171+
# Handling Pointer (%p)
172+
elif specifier == 'p':
173+
arg_format = '>I'
174+
args.append(struct.unpack_from(arg_format, raw_args, i_arg)[0])
175+
i_arg += struct.calcsize(arg_format)
176+
177+
# Handling Characters (%c)
178+
elif specifier == 'c':
179+
i_arg += 3 # padding to 32-bit
180+
arg_format = '>c'
181+
args.append(struct.unpack_from(arg_format, raw_args, i_arg)[0].decode(errors='replace'))
182+
i_arg += struct.calcsize(arg_format)
183+
184+
# Default: Treat as integer (%d, %i)
140185
else:
141-
text, data_len = self.retrieve_string(raw_args[i_arg:])
142-
args.append(text)
143-
i_arg += data_len
144-
145-
# **Handling Floating Point (%f, %e, %g, etc.)**
146-
elif specifier in 'fFeEgGaA':
147-
arg_format = '>d'
148-
args.append(struct.unpack_from(arg_format, raw_args, i_arg)[0])
149-
i_arg += struct.calcsize(arg_format)
150-
151-
# **Handling Length-Prefixed Integers**
152-
elif specifier in 'diouxX':
153-
# **ESP32 sends only 32-bit or 64-bit values**
154-
if length == 'll': # 64-bit integers
155-
fmt = 'q'
156-
else: # 32-bit integers
157-
if length == 'h': # short
158-
fmt = 'h'
159-
i_arg += 2
160-
elif length == 'hh': # char
161-
fmt = 'b'
162-
i_arg += 3
163-
else: # int
164-
fmt = 'i'
165-
fmt = fmt.lower() if specifier in 'di' else fmt.upper() # Signed or unsigned
166-
value = struct.unpack_from(f'>{fmt}', raw_args, i_arg)[0]
167-
i_arg += struct.calcsize(fmt)
168-
args.append(value)
169-
170-
# **Handling Pointer (%p)**
171-
elif specifier == 'p':
172-
arg_format = '>I'
173-
args.append(struct.unpack_from(arg_format, raw_args, i_arg)[0])
174-
i_arg += struct.calcsize(arg_format)
175-
176-
# **Handling Characters (%c)**
177-
elif specifier == 'c':
178-
i_arg += 3
179-
arg_format = '>c'
180-
args.append(struct.unpack_from(arg_format, raw_args, i_arg)[0].decode(errors='replace'))
181-
i_arg += struct.calcsize(arg_format)
182-
183-
# **Default: Treat as integer (%d, %i)**
184-
else:
185-
arg_format = '>i'
186-
val = struct.unpack_from(arg_format, raw_args, i_arg)[0]
187-
args.append(val)
188-
i_arg += struct.calcsize(arg_format)
186+
arg_format = '>i'
187+
val = struct.unpack_from(arg_format, raw_args, i_arg)[0]
188+
args.append(val)
189+
i_arg += struct.calcsize(arg_format)
190+
191+
except struct.error:
192+
# Argument data is truncated (e.g. frame cut off mid-transmission).
193+
# Return whatever args we managed to unpack rather than crashing.
194+
break
195+
189196
return args
190197

191198

@@ -224,10 +231,20 @@ def crc8(data: bytes) -> int:
224231
crc &= 0xFF # Ensure it's within 8 bits
225232
return crc
226233

227-
def find_frames(self, data: bytes) -> Tuple[List[bytes], bytes]:
234+
@staticmethod
235+
def _plausible_frame_start(control: 'Control') -> bool:
236+
"""Return True if the control word looks like it could be the start of a real
237+
binary log frame rather than noise. Valid ESP log levels are 1-5 and the only
238+
defined version is 0; noise bytes (e.g. 0xFF 0xFF) produce level=7 / version=3
239+
which we reject so that scan-ahead continues instead of prematurely breaking."""
240+
return 1 <= control.level <= 5 and control.version == 0 and control.pkg_len >= 15
241+
242+
def find_frames(self, data: bytes) -> Tuple[List[bytes], bytes, bytes]:
228243
frames: List[bytes] = []
244+
leaked_chunks: List[bytes] = []
229245
i = 0
230-
idx_of_last_found_pkg = 0
246+
idx_after_last_frame = 0
247+
idx_partial_frame = None # start index of a plausible but incomplete frame
231248

232249
while i < len(data):
233250
if len(data[i:]) < 15: # Minimal frame len
@@ -237,40 +254,76 @@ def find_frames(self, data: bytes) -> Tuple[List[bytes], bytes]:
237254
try:
238255
control = Control(data[start_idx + 1 :])
239256
if control.pkg_len > len(data[i:]):
240-
break
257+
# Frame extends beyond available data. Only treat it as a
258+
# genuine partial frame (and stop scanning) if the control
259+
# word looks plausible. Noise bytes that happen to start
260+
# with 0x01/0x02 typically have nonsensical level/version
261+
# values, so those are skipped and scanning continues.
262+
if self._plausible_frame_start(control):
263+
idx_partial_frame = start_idx
264+
break
265+
i += 1
266+
continue
241267
frame = data[start_idx : start_idx + control.pkg_len]
242268
if control.pkg_len != 0 and self.crc8(frame) == 0:
269+
# Collect non-frame bytes before this frame
270+
if start_idx > idx_after_last_frame:
271+
leaked_chunks.append(data[idx_after_last_frame:start_idx])
243272
frames.append(frame)
244-
idx_of_last_found_pkg = start_idx + control.pkg_len
273+
idx_after_last_frame = start_idx + control.pkg_len
245274
i += control.pkg_len - 1
246275
else:
247-
raise ValueError('Invalid binary log; invalid CRC')
276+
# CRC mismatch – skip this byte and try to re-sync
277+
i += 1
278+
continue
248279
except (struct.error, IndexError):
249-
# Invalid control structure
250-
raise ValueError('Invalid binary log; invalid control structure')
251-
else:
252-
raise ValueError('Invalid binary log; invalid start of frame')
280+
# Malformed control structure – skip this byte and try to re-sync
281+
i += 1
282+
continue
283+
# Not a recognised start byte – advance and keep scanning
253284

254285
i += 1
255-
# Return recognized frames and any remaining unprocessed data
256-
return frames, data[idx_of_last_found_pkg:]
257286

258-
def convert_to_text(self, data: bytes) -> Tuple[List[bytes], bytes]:
287+
# Trailing non-frame data (e.g. boot log text after last valid frame)
288+
if idx_partial_frame is not None:
289+
# Stopped on partial frame: leaked text is from last frame to partial start
290+
if idx_partial_frame > idx_after_last_frame:
291+
leaked_chunks.append(data[idx_after_last_frame:idx_partial_frame])
292+
remaining = data[idx_partial_frame:]
293+
elif i < len(data) and self.detected(data[i]):
294+
# Remaining bytes start with binary marker; include up to there as leaked
295+
if i > idx_after_last_frame:
296+
leaked_chunks.append(data[idx_after_last_frame:i])
297+
remaining = data[i:]
298+
else:
299+
# No partial frame; everything after last frame is non-binary
300+
if len(data) > idx_after_last_frame:
301+
leaked_chunks.append(data[idx_after_last_frame:])
302+
remaining = b''
303+
304+
leaked_text = b''.join(leaked_chunks)
305+
return frames, remaining, leaked_text
306+
307+
def convert_to_text(self, data: bytes) -> Tuple[List[bytes], bytes, bytes]:
259308
messages: List[bytes] = []
260-
frames, incomplete_fragment = self.find_frames(data)
309+
frames, incomplete_fragment, leaked_text = self.find_frames(data)
261310
for pkg_msg in frames:
262311
elf_path = self.source_of_message(pkg_msg[0])
263312
msg = Message(self.debug, elf_path, pkg_msg)
264313
if msg.buffer_hex_log or msg.buffer_char_log or msg.buffer_hexdump_log:
265314
messages += self.format_buffer_message(msg)
266315
else:
267316
messages.append(self.format_message(msg))
268-
return messages, incomplete_fragment
317+
return messages, incomplete_fragment, leaked_text
269318

270319
def format_message(self, message: Message) -> bytes:
271-
text_msg = ArgFormatter().c_format(message.format, message.args)
320+
try:
321+
text_msg = ArgFormatter().c_format(message.format, message.args)
322+
except (IndexError, KeyError):
323+
# Fewer args were decoded than the format string expects (truncated frame).
324+
text_msg = message.format
272325
level_name = {1: 'E', 2: 'W', 3: 'I', 4: 'D', 5: 'V'}[message.control.level]
273-
return f'{level_name} ({message.timestamp}) {message.tag}: {text_msg}\n'.encode('ascii')
326+
return f'{level_name} ({message.timestamp}) {message.tag}: {text_msg}\n'.encode()
274327

275328
def format_buffer_message(self, message) -> List[bytes]:
276329
text_msg: List[bytes] = []

esp_idf_monitor/base/serial_handler.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -204,22 +204,22 @@ def handle_serial_input(
204204

205205
sp = self.splitdata(data)
206206
if self.binary_log_detected:
207-
try:
208-
text_lines, self._last_line_part = self.binlog.convert_to_text(sp[0])
209-
for line in text_lines:
207+
text_lines, self._last_line_part, leaked_text = self.binlog.convert_to_text(sp[0])
208+
for line in text_lines:
209+
self.print_colored(line)
210+
self.logger.handle_possible_pc_address_in_line(line)
211+
self.monitor_cmd_executor.execute_from_log_line(line)
212+
if leaked_text:
213+
leaked_lines = leaked_text.splitlines(keepends=True) or [b'']
214+
if leaked_lines and not (leaked_lines[-1].endswith(b'\n') or leaked_lines[-1].endswith(b'\r')):
215+
incomplete = leaked_lines.pop()
216+
if not self._last_line_part:
217+
self._last_line_part = incomplete
218+
for line in leaked_lines:
210219
self.print_colored(line)
211220
self.logger.handle_possible_pc_address_in_line(line)
212221
self.monitor_cmd_executor.execute_from_log_line(line)
213-
return
214-
except ValueError:
215-
# If no valid binary log frames were found, or if we have too much accumulated data
216-
# without valid frames, exit binary log mode
217-
self.binary_log_detected = False
218-
# Process the accumulated data as regular text instead
219-
accumulated_data = sp[0] if sp else b''
220-
if accumulated_data:
221-
self._last_line_part = accumulated_data
222-
sp = self.splitdata(b'') # Re-split the data normally
222+
return
223223

224224
for line in sp:
225225
line_strip = line.strip()

0 commit comments

Comments
 (0)