File tree Expand file tree Collapse file tree 2 files changed +23
-8
lines changed
Expand file tree Collapse file tree 2 files changed +23
-8
lines changed Original file line number Diff line number Diff line change @@ -877,20 +877,36 @@ utf8_to_hex_entities (string s) {
877877 return result;
878878}
879879
880+
880881string
881- utf8_to_hex_string (string s) {
882- string result;
882+ utf8_to_utf16be_string (string s) {
883+ string result, hex ;
883884 int i, n= N (s);
884885 for (i=0 ; i<n; ) {
885886 unsigned int code= decode_from_utf8 (s, i);
886- string hex= as_hexadecimal (code);
887- while (N (hex) < 4 ) hex = " 0" * hex;
888- result << hex;
887+ // see e.g. https://en.wikipedia.org/wiki/UTF-16
888+ if (code >= 0x10000 ) {
889+ // supplementary planes
890+ unsigned int code2= code - 0x10000 ;
891+ unsigned int w1= 0xD800 + (code2 >> 10 );
892+ unsigned int w2= 0xDC00 + (code2 & 0x3FF );
893+ hex= as_hexadecimal (w1);
894+ while (N (hex) < 4 ) hex = " 0" * hex;
895+ result << hex;
896+ hex= as_hexadecimal (w2);
897+ while (N (hex) < 4 ) hex = " 0" * hex;
898+ result << hex;
899+ } else {
900+ // basic planes
901+ string hex= as_hexadecimal (code);
902+ while (N (hex) < 4 ) hex = " 0" * hex;
903+ result << hex;
904+ }
889905 }
890906 return result;
891907}
892908
893909string
894910utf8_to_pdf_hex_string (string s) {
895- return " <FEFF" * utf8_to_hex_string (cork_to_utf8 (s)) * " >" ;
896- }
911+ return " <FEFF" * utf8_to_utf16be_string (cork_to_utf8 (s)) * " >" ;
912+ }
Original file line number Diff line number Diff line change @@ -121,7 +121,6 @@ string convert_escapes (string in, bool utf8);
121121string convert_char_entities (string s);
122122string convert_char_entity (string s, int & start, bool & success);
123123string utf8_to_hex_entities (string s);
124- string utf8_to_hex_string (string s);
125124string utf8_to_pdf_hex_string (string s);
126125
127126#endif // CONVERTER_H
You can’t perform that action at this time.
0 commit comments