Skip to content

Commit b01000e

Browse files
committed
[3.9] bpo-44885: Correct the ast locations of f-strings with format specs and repeated expressions (GH-27729).
(cherry picked from commit 8e832fb) Co-authored-by: Pablo Galindo Salgado <Pablogsal@gmail.com>
1 parent f7635f0 commit b01000e

4 files changed

Lines changed: 62 additions & 55 deletions

File tree

Lib/test/test_fstring.py

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -212,11 +212,6 @@ def test_ast_line_numbers_nested(self):
212212
self.assertEqual(call.col_offset, 11)
213213

214214
def test_ast_line_numbers_duplicate_expression(self):
215-
"""Duplicate expression
216-
217-
NOTE: this is currently broken, always sets location of the first
218-
expression.
219-
"""
220215
expr = """
221216
a = 10
222217
f'{a * x()} {a * x()} {a * x()}'
@@ -266,9 +261,9 @@ def test_ast_line_numbers_duplicate_expression(self):
266261
self.assertEqual(binop.lineno, 3)
267262
self.assertEqual(binop.left.lineno, 3)
268263
self.assertEqual(binop.right.lineno, 3)
269-
self.assertEqual(binop.col_offset, 3) # FIXME: this is wrong
270-
self.assertEqual(binop.left.col_offset, 3) # FIXME: this is wrong
271-
self.assertEqual(binop.right.col_offset, 7) # FIXME: this is wrong
264+
self.assertEqual(binop.col_offset, 13)
265+
self.assertEqual(binop.left.col_offset, 13)
266+
self.assertEqual(binop.right.col_offset, 17)
272267
# check the third binop location
273268
binop = t.body[1].value.values[4].value
274269
self.assertEqual(type(binop), ast.BinOp)
@@ -278,9 +273,32 @@ def test_ast_line_numbers_duplicate_expression(self):
278273
self.assertEqual(binop.lineno, 3)
279274
self.assertEqual(binop.left.lineno, 3)
280275
self.assertEqual(binop.right.lineno, 3)
281-
self.assertEqual(binop.col_offset, 3) # FIXME: this is wrong
282-
self.assertEqual(binop.left.col_offset, 3) # FIXME: this is wrong
283-
self.assertEqual(binop.right.col_offset, 7) # FIXME: this is wrong
276+
self.assertEqual(binop.col_offset, 23)
277+
self.assertEqual(binop.left.col_offset, 23)
278+
self.assertEqual(binop.right.col_offset, 27)
279+
280+
def test_ast_numbers_fstring_with_formatting(self):
281+
282+
t = ast.parse('f"Here is that pesky {xxx:.3f} again"')
283+
self.assertEqual(len(t.body), 1)
284+
self.assertEqual(t.body[0].lineno, 1)
285+
286+
self.assertEqual(type(t.body[0]), ast.Expr)
287+
self.assertEqual(type(t.body[0].value), ast.JoinedStr)
288+
self.assertEqual(len(t.body[0].value.values), 3)
289+
290+
self.assertEqual(type(t.body[0].value.values[0]), ast.Constant)
291+
self.assertEqual(type(t.body[0].value.values[1]), ast.FormattedValue)
292+
self.assertEqual(type(t.body[0].value.values[2]), ast.Constant)
293+
294+
_, expr, _ = t.body[0].value.values
295+
296+
name = expr.value
297+
self.assertEqual(type(name), ast.Name)
298+
self.assertEqual(name.lineno, 1)
299+
self.assertEqual(name.end_lineno, 1)
300+
self.assertEqual(name.col_offset, 22)
301+
self.assertEqual(name.end_col_offset, 25)
284302

285303
def test_ast_line_numbers_multiline_fstring(self):
286304
# See bpo-30465 for details.

Lib/test/test_peg_parser.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -231,11 +231,6 @@ def f() -> Any:
231231
('f-string_doublestarred', "f'{ {**x} }'"),
232232
('f-string_escape_brace', "f'{{Escape'"),
233233
('f-string_escape_closing_brace', "f'Escape}}'"),
234-
('f-string_repr', "f'{a!r}'"),
235-
('f-string_str', "f'{a!s}'"),
236-
('f-string_ascii', "f'{a!a}'"),
237-
('f-string_debug', "f'{a=}'"),
238-
('f-string_padding', "f'{a:03d}'"),
239234
('f-string_multiline',
240235
"""
241236
f'''
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Correct the ast locations of f-strings with format specs and repeated
2+
expressions. Patch by Pablo Galindo

Parser/pegen/parse_string.c

Lines changed: 31 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -284,49 +284,48 @@ _PyPegen_parsestr(Parser *p, int *bytesmode, int *rawmode, PyObject **result,
284284
/* Fix locations for the given node and its children.
285285
286286
`parent` is the enclosing node.
287+
`expr_start` is the starting position of the expression (pointing to the open brace).
287288
`n` is the node which locations are going to be fixed relative to parent.
288289
`expr_str` is the child node's string representation, including braces.
289290
*/
290291
static bool
291-
fstring_find_expr_location(Token *parent, char *expr_str, int *p_lines, int *p_cols)
292+
fstring_find_expr_location(Token *parent, const char* expr_start, char *expr_str, int *p_lines, int *p_cols)
292293
{
293294
*p_lines = 0;
294295
*p_cols = 0;
296+
assert(expr_start != NULL && *expr_start == '{');
295297
if (parent && parent->bytes) {
296298
char *parent_str = PyBytes_AsString(parent->bytes);
297299
if (!parent_str) {
298300
return false;
299301
}
300-
char *substr = strstr(parent_str, expr_str);
301-
if (substr) {
302-
// The following is needed, in order to correctly shift the column
303-
// offset, in the case that (disregarding any whitespace) a newline
304-
// immediately follows the opening curly brace of the fstring expression.
305-
bool newline_after_brace = 1;
306-
char *start = substr + 1;
307-
while (start && *start != '}' && *start != '\n') {
308-
if (*start != ' ' && *start != '\t' && *start != '\f') {
309-
newline_after_brace = 0;
310-
break;
311-
}
312-
start++;
302+
// The following is needed, in order to correctly shift the column
303+
// offset, in the case that (disregarding any whitespace) a newline
304+
// immediately follows the opening curly brace of the fstring expression.
305+
bool newline_after_brace = 1;
306+
const char *start = expr_start + 1;
307+
while (start && *start != '}' && *start != '\n') {
308+
if (*start != ' ' && *start != '\t' && *start != '\f') {
309+
newline_after_brace = 0;
310+
break;
313311
}
312+
start++;
313+
}
314314

315-
// Account for the characters from the last newline character to our
316-
// left until the beginning of substr.
317-
if (!newline_after_brace) {
318-
start = substr;
319-
while (start > parent_str && *start != '\n') {
320-
start--;
321-
}
322-
*p_cols += (int)(substr - start);
315+
// Account for the characters from the last newline character to our
316+
// left until the beginning of expr_start.
317+
if (!newline_after_brace) {
318+
start = expr_start;
319+
while (start > parent_str && *start != '\n') {
320+
start--;
323321
}
324-
/* adjust the start based on the number of newlines encountered
325-
before the f-string expression */
326-
for (char* p = parent_str; p < substr; p++) {
327-
if (*p == '\n') {
328-
(*p_lines)++;
329-
}
322+
*p_cols += (int)(expr_start - start);
323+
}
324+
/* adjust the start based on the number of newlines encountered
325+
before the f-string expression */
326+
for (const char *p = parent_str; p < expr_start; p++) {
327+
if (*p == '\n') {
328+
(*p_lines)++;
330329
}
331330
}
332331
}
@@ -370,26 +369,19 @@ fstring_compile_expr(Parser *p, const char *expr_start, const char *expr_end,
370369

371370
len = expr_end - expr_start;
372371
/* Allocate 3 extra bytes: open paren, close paren, null byte. */
373-
str = PyMem_Malloc(len + 3);
372+
str = PyMem_Calloc(len + 3, sizeof(char));
374373
if (str == NULL) {
375374
PyErr_NoMemory();
376375
return NULL;
377376
}
378377

379378
// The call to fstring_find_expr_location is responsible for finding the column offset
380379
// the generated AST nodes need to be shifted to the right, which is equal to the number
381-
// of the f-string characters before the expression starts. In order to correctly compute
382-
// this offset, strstr gets called in fstring_find_expr_location which only succeeds
383-
// if curly braces appear before and after the f-string expression (exactly like they do
384-
// in the f-string itself), hence the following lines.
385-
str[0] = '{';
380+
// of the f-string characters before the expression starts.
386381
memcpy(str+1, expr_start, len);
387-
str[len+1] = '}';
388-
str[len+2] = 0;
389-
390382
int lines, cols;
391-
if (!fstring_find_expr_location(t, str, &lines, &cols)) {
392-
PyMem_FREE(str);
383+
if (!fstring_find_expr_location(t, expr_start-1, str+1, &lines, &cols)) {
384+
PyMem_Free(str);
393385
return NULL;
394386
}
395387

0 commit comments

Comments
 (0)