Skip to content

Commit dfb1696

Browse files
committed
test that static strings (_PyRuntime.static_objects.singletons.strings, not strings allocated by deepfreeze) are reused.
Ref: python/cpython#103876
1 parent a163765 commit dfb1696

File tree

4 files changed

+70
-4
lines changed

4 files changed

+70
-4
lines changed

src/_cds/_cdsmodule.c

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -442,6 +442,20 @@ PyCDS_InitMoveIn()
442442
PyCDS_Table_New();
443443
cds_status.move_in_ctx->in_heap_str_to_string_ref_list_map =
444444
PyCDS_Table_New();
445+
cds_status.move_in_ctx->static_strings = PyDict_New();
446+
447+
#define HANDLE_LITERAL(lit) \
448+
PyDict_SetDefault(cds_status.move_in_ctx->static_strings, (lit), (lit));
449+
#define HANDLE_ASCII(c) \
450+
PyDict_SetDefault(cds_status.move_in_ctx->static_strings, (c), (c));
451+
#define HANDLE_LATIN1(c) \
452+
PyDict_SetDefault(cds_status.move_in_ctx->static_strings, (c), (c));
453+
454+
#include "string_singletons.h"
455+
456+
#undef HANDLE_LITERAL
457+
#undef HANDLE_ASCII
458+
#undef HANDLE_LATIN1
445459
}
446460

447461
void
@@ -453,6 +467,9 @@ PyCDS_FinalizeMoveIn()
453467
cds_status.move_in_ctx->orig_pyobject_to_in_heap_pyobject_map);
454468
PyCDS_Table_Destroy(
455469
cds_status.move_in_ctx->in_heap_str_to_string_ref_list_map);
470+
471+
Py_DecRef(cds_status.move_in_ctx->static_strings);
472+
456473
free(cds_status.move_in_ctx);
457474
}
458475

@@ -578,6 +595,16 @@ _Py_COMP_DIAG_POP
578595
assert(PyCDS_STR_INTERNED(*target) == SSTATE_INTERNED_IMMORTAL);
579596
}
580597
else { /* String is not in archive yet. */
598+
if ((*target = PyDict_GetItem(
599+
cds_status.move_in_ctx->static_strings, op)) != NULL) {
600+
// `_Py_ID` are static, and interned.
601+
// But `_Py_STR`, `_Py_SINGLETON(strings).{ascii,latin1}` are
602+
// not interned.
603+
// So we manually construct the `static_strings` dict to make
604+
// archive to refer to those static strings.
605+
return;
606+
}
607+
581608
PyUnicode_InternInPlace(&op);
582609

583610
*target = _PyCDS_PyUnicode_Copy(op);
@@ -857,8 +884,7 @@ _PyCDS_PyUnicode_Copy(PyObject *op)
857884
data = unicode + 1;
858885
(((PyASCIIObject *)(unicode))->length) = size;
859886
(((PyASCIIObject *)(unicode))->hash) = -1;
860-
// immortal bit will be set later by interning
861-
PyCDS_STR_INTERNED(unicode) = SSTATE_NOT_INTERNED;
887+
PyCDS_STR_INTERNED(unicode) = SSTATE_INTERNED_IMMORTAL;
862888
(((PyASCIIObject *)(unicode))->state).kind = kind;
863889
(((PyASCIIObject *)(unicode))->state).compact = 1;
864890
(((PyASCIIObject *)(unicode))->state).ascii = is_ascii;

src/_cds/_cdsmodule.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,8 @@ struct MoveInContext {
7676

7777
table *orig_pyobject_to_in_heap_pyobject_map;
7878
table *in_heap_str_to_string_ref_list_map;
79+
80+
PyObject *static_strings;
7981
};
8082

8183
/*

src/_cds/string_singletons.h

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
#ifndef PYCDS_STRING_SINGLETONS_H
2+
#define PYCDS_STRING_SINGLETONS_H
3+
4+
#define LIT(lit_name) \
5+
(PyObject *)(&_PyRuntime.static_objects.singletons.strings.literals \
6+
._py_##lit_name)
7+
8+
HANDLE_LITERAL(LIT(anon_dictcomp))
9+
HANDLE_LITERAL(LIT(anon_genexpr))
10+
HANDLE_LITERAL(LIT(anon_lambda))
11+
HANDLE_LITERAL(LIT(anon_listcomp))
12+
HANDLE_LITERAL(LIT(anon_module))
13+
HANDLE_LITERAL(LIT(anon_setcomp))
14+
HANDLE_LITERAL(LIT(anon_string))
15+
HANDLE_LITERAL(LIT(anon_unknown))
16+
HANDLE_LITERAL(LIT(close_br))
17+
HANDLE_LITERAL(LIT(dbl_close_br))
18+
HANDLE_LITERAL(LIT(dbl_open_br))
19+
HANDLE_LITERAL(LIT(dbl_percent))
20+
HANDLE_LITERAL(LIT(dot))
21+
HANDLE_LITERAL(LIT(dot_locals))
22+
HANDLE_LITERAL(LIT(empty))
23+
HANDLE_LITERAL(LIT(json_decoder))
24+
HANDLE_LITERAL(LIT(list_err))
25+
HANDLE_LITERAL(LIT(newline))
26+
HANDLE_LITERAL(LIT(open_br))
27+
HANDLE_LITERAL(LIT(percent))
28+
HANDLE_LITERAL(LIT(shim_name))
29+
HANDLE_LITERAL(LIT(utf_8))
30+
31+
for (size_t i = 0; i < 128; ++i) {
32+
HANDLE_ASCII(
33+
(PyObject *)&_PyRuntime.static_objects.singletons.strings.ascii[i]);
34+
HANDLE_LATIN1(
35+
(PyObject *)&_PyRuntime.static_objects.singletons.strings.latin1[i]);
36+
}
37+
38+
#endif

tests/test_cds_extension/test_share_object.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -268,8 +268,8 @@ def test_share_dynamic(self):
268268
self.run_check(s + random.choice(string.punctuation.replace('_', '')), False, True, True)
269269

270270
@assert_archive_created
271-
def test_share_ascii(self):
272-
for i in random.sample(range(0, 128 + 1), 10):
271+
def test_share_static_ascii_latin1(self):
272+
for i in random.sample(range(0, 128), 10):
273273
c = chr(i)
274274
l = chr(128 + i)
275275
self.run_check(c, True, False, False)

0 commit comments

Comments
 (0)