Skip to content

Commit 691f1d4

Browse files
seismanyvonnefroehlichweiji14
authored
Support non-ASCII characters in function/method arguments (#2584)
* Support all ISOlatin1 characters * Support more ISOLatin1+ characters * Support Symbols charset * Support ZapfDingbats charset * Add a test for non-ascii support Co-authored-by: Yvonne Fröhlich <94163266+yvonnefroehlich@users.noreply.github.com> Co-authored-by: Wei Ji <23487320+weiji14@users.noreply.github.com>
1 parent 5b05b6a commit 691f1d4

File tree

5 files changed

+141
-5
lines changed

5 files changed

+141
-5
lines changed

examples/gallery/embellishments/colorbar.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
# with a length/width (+w) of 4 cm by 0.5 cm, and plotted horizontally (+h)
4141
position="g0.3/8.7+w4c/0.5c+h",
4242
box=True,
43-
frame=["x+lTemperature", r"y+l\260C"],
43+
frame=["x+lTemperature", "y+l°C"],
4444
scale=100,
4545
)
4646

examples/gallery/symbols/text_symbols.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,8 @@
3636
# plot a lowercase "s" of size 3.5c and use the "Times-Italic" font,
3737
# color fill is set to "gold"
3838
fig.plot(x=5.5, y=1.5, style="l3.5c+ts+fTimes-Italic", fill="gold", pen=pen)
39-
# plot the pi symbol (\160 is octal code for pi) of size 3.5c, for this use
40-
# the "Symbol" font, the outline color of the symbol is set to
39+
# plot the pi symbol of size 3.5c, the outline color of the symbol is set to
4140
# "darkorange", the color fill is set to "magenta4"
42-
fig.plot(x=7, y=1.5, style="l3.5c+t\160+fSymbol,darkorange", fill="magenta4", pen=pen)
41+
fig.plot(x=7, y=1.5, style="l3.5c+tπ+fdarkorange", fill="magenta4", pen=pen)
4342

4443
fig.show()

pygmt/helpers/utils.py

Lines changed: 115 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import os
55
import pathlib
66
import shutil
7+
import string
78
import subprocess
89
import sys
910
import time
@@ -196,6 +197,119 @@ def data_kind(data=None, x=None, y=None, z=None, required_z=False, required_data
196197
return kind
197198

198199

200+
def non_ascii_to_octal(argstr):
201+
r"""
202+
Translate non-ASCII characters to their corresponding octal codes.
203+
204+
Currently, only characters in the ISOLatin1+ charset and
205+
Symbol/ZapfDingbats fonts are supported.
206+
207+
Parameters
208+
----------
209+
argstr : str
210+
The string to be translated.
211+
212+
Returns
213+
-------
214+
translated_argstr : str
215+
The translated string.
216+
217+
Examples
218+
--------
219+
>>> non_ascii_to_octal("•‰“”±°ÿ")
220+
'\\31\\214\\216\\217\\261\\260\\377'
221+
>>> non_ascii_to_octal("αζΔΩ∑π∇")
222+
'@~\\141@~@~\\172@~@~\\104@~@~\\127@~@~\\345@~@~\\160@~@~\\321@~'
223+
>>> non_ascii_to_octal("✁❞❡➾")
224+
'@%34%\\41@%%@%34%\\176@%%@%34%\\241@%%@%34%\\376@%%'
225+
>>> non_ascii_to_octal("ABC ±120° DEF α ♥")
226+
'ABC \\261120\\260 DEF @~\\141@~ @%34%\\252@%%'
227+
"""
228+
# Dictionary mapping non-ASCII characters to octal codes
229+
mapping = {}
230+
231+
# Adobe Symbol charset
232+
# References:
233+
# 1. https://en.wikipedia.org/wiki/Symbol_(typeface)
234+
# 2. https://unicode.org/Public/MAPPINGS/VENDORS/ADOBE/symbol.txt
235+
# Notes:
236+
# 1. \322 and \342 are "REGISTERED SIGN SERIF" and
237+
# "REGISTERED SIGN SANS SERIF" respectively, but only "REGISTERED SIGN"
238+
# is available in the unicode table. So both are mapped to
239+
# "REGISTERED SIGN". \323, \343, \324 and \344 also have the same
240+
# problem.
241+
# 2. Characters for \140, \275, \276 are incorrect.
242+
mapping.update(
243+
{
244+
c: "@~\\" + format(i, "o") + "@~"
245+
for c, i in zip(
246+
" !∀#∃%&∋()∗+,−./" # \04x-05x
247+
+ "0123456789:;<=>?" # \06x-07x
248+
+ "≅ΑΒΧΔΕΦΓΗΙϑΚΛΜΝΟ" # \10x-11x
249+
+ "ΠΘΡΣΤΥςΩΞΨΖ[∴]⊥_" # \12x-13x
250+
+ "αβχδεφγηιϕκλμνο" # \14x-15x
251+
+ "πθρστυϖωξψζ{|}∼" # \16x-17x. \177 is undefined
252+
+ "€ϒ′≤⁄∞ƒ♣♦♥♠↔←↑→↓" # \24x-\25x
253+
+ "°±″≥×∝∂•÷≠≡≈…↵" # \26x-27x
254+
+ "ℵℑℜ℘⊗⊕∅∩∪⊃⊇⊄⊂⊆∈∉" # \30x-31x
255+
+ "∠∇®©™∏√⋅¬∧∨⇔⇐⇑⇒⇓" # \32x-33x
256+
+ "◊〈®©™∑" # \34x-35x
257+
+ "〉∫⌠⌡", # \36x-37x. \360 and \377 are undefined
258+
[*range(32, 127), *range(160, 240), *range(241, 255)],
259+
)
260+
}
261+
)
262+
263+
# Adobe ZapfDingbats charset
264+
# References:
265+
# 1. https://en.wikipedia.org/wiki/Zapf_Dingbats
266+
# 2. https://unicode.org/Public/MAPPINGS/VENDORS/ADOBE/zdingbat.txt
267+
mapping.update(
268+
{
269+
c: "@%34%\\" + format(i, "o") + "@%%"
270+
for c, i in zip(
271+
" ✁✂✃✄☎✆✇✈✉☛☞✌✍✎✏" # \04x-\05x
272+
+ "✐✑✒✓✔✕✖✗✘✙✚✛✜✝✞✟" # \06x-\07x
273+
+ "✠✡✢✣✤✥✦✧★✩✪✫✬✭✮✯" # \10x-\11x
274+
+ "✰✱✲✳✴✵✶✷✸✹✺✻✼✽✾✿" # \12x-\13x
275+
+ "❀❁❂❃❄❅❆❇❈❉❊❋●❍■❏" # \14x-\15x
276+
+ "❐❑❒▲▼◆❖◗❘❙❚❛❜❝❞" # \16x-\17x. \177 is undefined
277+
+ "❡❢❣❤❥❦❧♣♦♥♠①②③④" # \24x-\25x. \240 is undefined
278+
+ "⑤⑥⑦⑧⑨⑩❶❷❸❹❺❻❼❽❾❿" # \26x-\27x
279+
+ "➀➁➂➃➄➅➆➇➈➉➊➋➌➍➎➏" # \30x-\31x
280+
+ "➐➑➒➓➔→↔↕➘➙➚➛➜➝➞➟" # \32x-\33x
281+
+ "➠➡➢➣➤➥➦➧➨➩➪➫➬➭➮➯" # \34x-\35x
282+
+ "➱➲➳➴➵➶➷➸➹➺➻➼➽➾", # \36x-\37x. \360 and \377 are undefined
283+
[*range(32, 127), *range(161, 240), *range(241, 255)],
284+
)
285+
}
286+
)
287+
288+
# Adobe ISOLatin1+ charset (i.e., ISO-8859-1 with extensions)
289+
# References:
290+
# 1. https://en.wikipedia.org/wiki/ISO/IEC_8859-1
291+
# 2. https://docs.generic-mapping-tools.org/dev/cookbook/octal-codes.html
292+
# 3. https://www.adobe.com/jp/print/postscript/pdfs/PLRM.pdf
293+
mapping.update(
294+
{
295+
c: "\\" + format(i, "o")
296+
for c, i in zip(
297+
"•…™—–fiž" # \03x. \030 is undefined
298+
+ "š" # \177
299+
+ "Œ†‡Ł⁄‹Š›œŸŽł‰„“”" # \20x-\21x
300+
+ "ı`´ˆ˜¯˘˙¨‚˚¸'˝˛ˇ", # \22x-\23x
301+
[*range(25, 32), *range(127, 160)],
302+
)
303+
}
304+
)
305+
# \240-\377
306+
mapping.update({chr(i): "\\" + format(i, "o") for i in range(160, 256)})
307+
308+
# Remove any printable characters
309+
mapping = {k: v for k, v in mapping.items() if k not in string.printable}
310+
return argstr.translate(str.maketrans(mapping))
311+
312+
199313
def build_arg_string(kwdict, confdict=None, infile=None, outfile=None):
200314
r"""
201315
Convert keyword dictionaries and input/output files into a GMT argument
@@ -318,7 +432,7 @@ def build_arg_string(kwdict, confdict=None, infile=None, outfile=None):
318432
gmt_args = [str(infile)] + gmt_args
319433
if outfile:
320434
gmt_args.append("->" + str(outfile))
321-
return " ".join(gmt_args)
435+
return non_ascii_to_octal(" ".join(gmt_args))
322436

323437

324438
def is_nonstr_iter(value):
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
outs:
2+
- md5: d93bed7495d77cd2ef7cc1b64edb9b3a
3+
size: 19563
4+
path: test_non_ascii_to_octal.png

pygmt/tests/test_helpers.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import numpy as np
77
import pytest
88
import xarray as xr
9+
from pygmt import Figure
910
from pygmt.exceptions import GMTInvalidInput
1011
from pygmt.helpers import (
1112
GMTTempFile,
@@ -57,6 +58,24 @@ def test_unique_name():
5758
assert len(names) == len(set(names))
5859

5960

61+
@pytest.mark.mpl_image_compare
62+
def test_non_ascii_to_octal():
63+
"""
64+
Test support of non-ASCII characters.
65+
"""
66+
fig = Figure()
67+
fig.basemap(
68+
region=[0, 10, 0, 5],
69+
projection="X10c/5c",
70+
frame=[
71+
"xaf+lISOLatin1: fi‰“”¥",
72+
"yaf+lSymbol: αβ∇∋∈",
73+
"WSen+tZapfDingbats: ①❷➂➍✦❝❞",
74+
],
75+
)
76+
return fig
77+
78+
6079
def test_kwargs_to_strings_fails():
6180
"""
6281
Make sure it fails for invalid conversion types.

0 commit comments

Comments
 (0)