@@ -766,7 +766,7 @@ config_set_bytes_string(PyConfig *config, wchar_t **config_str,
766766 configured. */
767767PyStatus
768768PyConfig_SetBytesString (PyConfig * config , wchar_t * * config_str ,
769- const char * str )
769+ const char * str )
770770{
771771 return CONFIG_SET_BYTES_STR (config , config_str , str , "string" );
772772}
@@ -1466,8 +1466,13 @@ config_read_complex_options(PyConfig *config)
14661466
14671467
14681468static const wchar_t *
1469- config_get_stdio_errors (void )
1469+ config_get_stdio_errors (const PyPreConfig * preconfig )
14701470{
1471+ if (preconfig -> utf8_mode ) {
1472+ /* UTF-8 Mode uses UTF-8/surrogateescape */
1473+ return L"surrogateescape" ;
1474+ }
1475+
14711476#ifndef MS_WINDOWS
14721477 const char * loc = setlocale (LC_CTYPE , NULL );
14731478 if (loc != NULL ) {
@@ -1492,26 +1497,41 @@ config_get_stdio_errors(void)
14921497}
14931498
14941499
1500+ // See also _Py_GetLocaleEncoding() and config_get_fs_encoding()
14951501static PyStatus
1496- config_get_locale_encoding (PyConfig * config , wchar_t * * locale_encoding )
1502+ config_get_locale_encoding (PyConfig * config , const PyPreConfig * preconfig ,
1503+ wchar_t * * locale_encoding )
14971504{
1505+ #ifdef _Py_FORCE_UTF8_LOCALE
1506+ return PyConfig_SetString (config , locale_encoding , L"utf-8" );
1507+ #else
1508+ if (preconfig -> utf8_mode ) {
1509+ return PyConfig_SetString (config , locale_encoding , L"utf-8" );
1510+ }
1511+
14981512#ifdef MS_WINDOWS
14991513 char encoding [20 ];
15001514 PyOS_snprintf (encoding , sizeof (encoding ), "cp%u" , GetACP ());
15011515 return PyConfig_SetBytesString (config , locale_encoding , encoding );
1502- #elif defined(_Py_FORCE_UTF8_LOCALE )
1503- return PyConfig_SetString (config , locale_encoding , L"utf-8" );
15041516#else
15051517 const char * encoding = nl_langinfo (CODESET );
15061518 if (!encoding || encoding [0 ] == '\0' ) {
1519+ #ifdef _Py_FORCE_UTF8_FS_ENCODING
1520+ // nl_langinfo() can return an empty string when the LC_CTYPE locale is
1521+ // not supported. Default to UTF-8 in that case, because UTF-8 is the
1522+ // default charset on macOS.
1523+ encoding = "UTF-8" ;
1524+ #else
15071525 return _PyStatus_ERR ("failed to get the locale encoding: "
1508- "nl_langinfo(CODESET) failed" );
1526+ "nl_langinfo(CODESET) returns an empty string" );
1527+ #endif
15091528 }
15101529 /* nl_langinfo(CODESET) is decoded by Py_DecodeLocale() */
15111530 return CONFIG_SET_BYTES_STR (config ,
15121531 locale_encoding , encoding ,
15131532 "nl_langinfo(CODESET)" );
1514- #endif
1533+ #endif // !MS_WINDOWS
1534+ #endif // !_Py_FORCE_UTF8_LOCALE
15151535}
15161536
15171537
@@ -1596,33 +1616,16 @@ config_init_stdio_encoding(PyConfig *config,
15961616 PyMem_RawFree (pythonioencoding );
15971617 }
15981618
1599- /* UTF-8 Mode uses UTF-8/surrogateescape */
1600- if (preconfig -> utf8_mode ) {
1601- if (config -> stdio_encoding == NULL ) {
1602- status = PyConfig_SetString (config , & config -> stdio_encoding ,
1603- L"utf-8" );
1604- if (_PyStatus_EXCEPTION (status )) {
1605- return status ;
1606- }
1607- }
1608- if (config -> stdio_errors == NULL ) {
1609- status = PyConfig_SetString (config , & config -> stdio_errors ,
1610- L"surrogateescape" );
1611- if (_PyStatus_EXCEPTION (status )) {
1612- return status ;
1613- }
1614- }
1615- }
1616-
16171619 /* Choose the default error handler based on the current locale. */
16181620 if (config -> stdio_encoding == NULL ) {
1619- status = config_get_locale_encoding (config , & config -> stdio_encoding );
1621+ status = config_get_locale_encoding (config , preconfig ,
1622+ & config -> stdio_encoding );
16201623 if (_PyStatus_EXCEPTION (status )) {
16211624 return status ;
16221625 }
16231626 }
16241627 if (config -> stdio_errors == NULL ) {
1625- const wchar_t * errors = config_get_stdio_errors ();
1628+ const wchar_t * errors = config_get_stdio_errors (preconfig );
16261629 assert (errors != NULL );
16271630
16281631 status = PyConfig_SetString (config , & config -> stdio_errors , errors );
@@ -1635,46 +1638,46 @@ config_init_stdio_encoding(PyConfig *config,
16351638}
16361639
16371640
1641+ // See also config_get_locale_encoding()
1642+ static PyStatus
1643+ config_get_fs_encoding (PyConfig * config , const PyPreConfig * preconfig ,
1644+ wchar_t * * fs_encoding )
1645+ {
1646+ #ifdef _Py_FORCE_UTF8_FS_ENCODING
1647+ return PyConfig_SetString (config , fs_encoding , L"utf-8" );
1648+ #elif defined(MS_WINDOWS )
1649+ const wchar_t * encoding ;
1650+ if (preconfig -> legacy_windows_fs_encoding ) {
1651+ // Legacy Windows filesystem encoding: mbcs/replace
1652+ encoding = L"mbcs" ;
1653+ }
1654+ else {
1655+ // Windows defaults to utf-8/surrogatepass (PEP 529)
1656+ encoding = L"utf-8" ;
1657+ }
1658+ return PyConfig_SetString (config , fs_encoding , encoding );
1659+ #else // !MS_WINDOWS
1660+ if (preconfig -> utf8_mode ) {
1661+ return PyConfig_SetString (config , fs_encoding , L"utf-8" );
1662+ }
1663+ else if (_Py_GetForceASCII ()) {
1664+ return PyConfig_SetString (config , fs_encoding , L"ascii" );
1665+ }
1666+ else {
1667+ return config_get_locale_encoding (config , preconfig , fs_encoding );
1668+ }
1669+ #endif // !MS_WINDOWS
1670+ }
1671+
1672+
16381673static PyStatus
16391674config_init_fs_encoding (PyConfig * config , const PyPreConfig * preconfig )
16401675{
16411676 PyStatus status ;
16421677
16431678 if (config -> filesystem_encoding == NULL ) {
1644- #ifdef _Py_FORCE_UTF8_FS_ENCODING
1645- status = PyConfig_SetString (config , & config -> filesystem_encoding , L"utf-8" );
1646- #else
1647-
1648- #ifdef MS_WINDOWS
1649- if (preconfig -> legacy_windows_fs_encoding ) {
1650- /* Legacy Windows filesystem encoding: mbcs/replace */
1651- status = PyConfig_SetString (config , & config -> filesystem_encoding ,
1652- L"mbcs" );
1653- }
1654- else
1655- #endif
1656- if (preconfig -> utf8_mode ) {
1657- status = PyConfig_SetString (config , & config -> filesystem_encoding ,
1658- L"utf-8" );
1659- }
1660- #ifndef MS_WINDOWS
1661- else if (_Py_GetForceASCII ()) {
1662- status = PyConfig_SetString (config , & config -> filesystem_encoding ,
1663- L"ascii" );
1664- }
1665- #endif
1666- else {
1667- #ifdef MS_WINDOWS
1668- /* Windows defaults to utf-8/surrogatepass (PEP 529). */
1669- status = PyConfig_SetString (config , & config -> filesystem_encoding ,
1670- L"utf-8" );
1671- #else
1672- status = config_get_locale_encoding (config ,
1673- & config -> filesystem_encoding );
1674- #endif
1675- }
1676- #endif /* !_Py_FORCE_UTF8_FS_ENCODING */
1677-
1679+ status = config_get_fs_encoding (config , preconfig ,
1680+ & config -> filesystem_encoding );
16781681 if (_PyStatus_EXCEPTION (status )) {
16791682 return status ;
16801683 }
0 commit comments