Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
c84f314
gh-119609: Add PyUnicode_Export() function
vstinner May 27, 2024
d0cdbd1
Address reviews
vstinner Sep 5, 2024
9b33dca
Exclude from limited C API 3.13 and older
vstinner Sep 5, 2024
cf1f74a
Replace PyErr_Format() with PyErr_SetString()
vstinner Sep 5, 2024
93d4470
Fix test_collections: implement UserString.__release_buffer__()
vstinner Sep 5, 2024
17ad7b9
Add format parameter to PyUnicode_Export()
vstinner Sep 9, 2024
d683d0a
format must not be NULL
vstinner Sep 9, 2024
78a70fa
Fix memory leak in unicode_releasebuffer()
vstinner Sep 10, 2024
79207f5
Remove PyUnicode_GetBufferFormat() documentation
vstinner Sep 10, 2024
bc0fb69
Apply suggestions from code review
vstinner Sep 10, 2024
2cdbc27
Set format to 0 on error
vstinner Sep 10, 2024
b5be22d
Remove trailing space
vstinner Sep 10, 2024
2960b25
Change constant values
vstinner Sep 10, 2024
bcb41f3
Update constants value in the doc
vstinner Sep 11, 2024
44cb702
Remove unicode_releasebuffer(); use bytes instead
vstinner Sep 12, 2024
1809d8d
PyUnicode_Export() returns the format
vstinner Sep 12, 2024
6707ef4
Fix PyUnicode_Export() signature in doc
vstinner Sep 12, 2024
abf5c58
Use _PyUnicode_EncodeUTF16() and _PyUnicode_EncodeUTF32()
vstinner Sep 12, 2024
033fc07
Use signed int in C tests
vstinner Sep 12, 2024
078dfcf
Update stable_abi: remove PyUnicode_GetBufferFormat()
vstinner Sep 12, 2024
79c6d01
Revert "Use _PyUnicode_EncodeUTF16() and _PyUnicode_EncodeUTF32()"
vstinner Sep 12, 2024
5479ab2
Allow surrogate characters in UTF-8
vstinner Sep 12, 2024
ab2f9b0
Merge branch 'main' into unicode_view
vstinner Sep 13, 2024
f71f230
Avoid a second copy in the UTF-8 export
vstinner Sep 13, 2024
492f10a
UCS-4 export: remove one memory copy
vstinner Sep 13, 2024
b031163
Update Py_buffer format
vstinner Sep 16, 2024
21e6012
Add PyUnicode_EXPORT_COPY flag
vstinner Sep 23, 2024
3267ce6
doc
vstinner Sep 23, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Update Py_buffer format
Use "=H" and "=I" formats.
  • Loading branch information
vstinner committed Sep 16, 2024
commit b031163710e9e16cca0390b9816b7438a4a45e96
9 changes: 2 additions & 7 deletions Lib/test/test_capi/test_unicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -1746,13 +1746,8 @@ def test_unicode_export(self):
| PyUnicode_FORMAT_UCS2
| PyUnicode_FORMAT_UCS4)
BUFFER_UCS1 = 'B'
BUFFER_UCS2 = 'H'
if struct.calcsize('I') == 4:
BUFFER_UCS4 = 'I'
elif struct.calcsize('L') == 4:
BUFFER_UCS4 = 'L'
else:
self.fail("unable to get BUFFER_UCS4 ")
BUFFER_UCS2 = '=H'
BUFFER_UCS4 = '=I'

def check_ucs1(text, formats):
if formats == PyUnicode_FORMAT_UCS1:
Expand Down
18 changes: 4 additions & 14 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -2374,14 +2374,6 @@ int32_t
PyUnicode_Export(PyObject *unicode, int32_t requested_formats,
Py_buffer *view)
{
#if SIZEOF_INT == 4
# define BUFFER_UCS4 "I"
#elif SIZEOF_LONG == 4
# define BUFFER_UCS4 "L"
#else
# error "unable to find BUFFER_UCS4"
#endif

if (!PyUnicode_Check(unicode)) {
PyErr_Format(PyExc_TypeError, "must be str, not %T", unicode);
return -1;
Expand Down Expand Up @@ -2413,7 +2405,7 @@ PyUnicode_Export(PyObject *unicode, int32_t requested_formats,
{
return unicode_export(unicode, view,
len, PyUnicode_2BYTE_DATA(unicode),
2, "H", PyUnicode_FORMAT_UCS2);
2, "=H", PyUnicode_FORMAT_UCS2);
}

// Convert ASCII or UCS1 to UCS2
Expand All @@ -2433,7 +2425,7 @@ PyUnicode_Export(PyObject *unicode, int32_t requested_formats,
ucs2[len] = 0;

return unicode_export_bytes(bytes, view, len,
2, "H", PyUnicode_FORMAT_UCS2);
2, "=H", PyUnicode_FORMAT_UCS2);
}

// Native UCS4
Expand All @@ -2442,7 +2434,7 @@ PyUnicode_Export(PyObject *unicode, int32_t requested_formats,
{
return unicode_export(unicode, view,
len, PyUnicode_4BYTE_DATA(unicode),
4, BUFFER_UCS4, PyUnicode_FORMAT_UCS4);
4, "=I", PyUnicode_FORMAT_UCS4);
}

// Convert ASCII, UCS1 or UCS2 to UCS4
Expand All @@ -2456,7 +2448,7 @@ PyUnicode_Export(PyObject *unicode, int32_t requested_formats,
(void)as_ucs4(unicode, ucs4, len + 1, 1);

return unicode_export_bytes(bytes, view, len,
4, BUFFER_UCS4, PyUnicode_FORMAT_UCS4);
4, "=I", PyUnicode_FORMAT_UCS4);
}

// Encode UCS1, UCS2 or UCS4 to UTF-8
Expand Down Expand Up @@ -2485,8 +2477,6 @@ PyUnicode_Export(PyObject *unicode, int32_t requested_formats,
PyErr_SetString(PyExc_ValueError,
"unable to find a matching export format");
return -1;

#undef BUFFER_UCS4
}


Expand Down