Skip to content

Commit 324e0bb

Browse files
committed
Fix for bug #59454
1 parent a5f4b88 commit 324e0bb

File tree

2 files changed

+23
-8
lines changed

2 files changed

+23
-8
lines changed

src/Data/String/converter.cpp

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -877,20 +877,36 @@ utf8_to_hex_entities (string s) {
877877
return result;
878878
}
879879

880+
880881
string
881-
utf8_to_hex_string (string s) {
882-
string result;
882+
utf8_to_utf16be_string (string s) {
883+
string result, hex;
883884
int i, n= N(s);
884885
for (i=0; i<n; ) {
885886
unsigned int code= decode_from_utf8 (s, i);
886-
string hex= as_hexadecimal (code);
887-
while (N(hex) < 4) hex = "0" * hex;
888-
result << hex;
887+
// see e.g. https://en.wikipedia.org/wiki/UTF-16
888+
if (code >= 0x10000) {
889+
// supplementary planes
890+
unsigned int code2= code - 0x10000;
891+
unsigned int w1= 0xD800 + (code2 >> 10);
892+
unsigned int w2= 0xDC00 + (code2 & 0x3FF);
893+
hex= as_hexadecimal (w1);
894+
while (N(hex) < 4) hex = "0" * hex;
895+
result << hex;
896+
hex= as_hexadecimal (w2);
897+
while (N(hex) < 4) hex = "0" * hex;
898+
result << hex;
899+
} else {
900+
// basic planes
901+
string hex= as_hexadecimal (code);
902+
while (N(hex) < 4) hex = "0" * hex;
903+
result << hex;
904+
}
889905
}
890906
return result;
891907
}
892908

893909
string
894910
utf8_to_pdf_hex_string (string s) {
895-
return "<FEFF" * utf8_to_hex_string (cork_to_utf8 (s)) * ">";
896-
}
911+
return "<FEFF" * utf8_to_utf16be_string (cork_to_utf8 (s)) * ">";
912+
}

src/Data/String/converter.hpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,6 @@ string convert_escapes (string in, bool utf8);
121121
string convert_char_entities (string s);
122122
string convert_char_entity (string s, int& start, bool& success);
123123
string utf8_to_hex_entities (string s);
124-
string utf8_to_hex_string (string s);
125124
string utf8_to_pdf_hex_string (string s);
126125

127126
#endif // CONVERTER_H

0 commit comments

Comments
 (0)