Skip to content

Commit c880781

Browse files
committed
inline translation in book
1 parent e77dff8 commit c880781

File tree

7 files changed

+160
-89
lines changed

7 files changed

+160
-89
lines changed

README.md

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ and [extended markup](https://python-markdown.github.io/extensions/).
5959
Sound changes are defined in `changes.lsc`,
6060
and applied using Lexurgy.
6161

62+
### Lexicon
6263
Pyconlang's lexicon is defined in `lexicon.txt`.
6364
You define basic entries using the `entry` declaration:
6465
```
@@ -129,7 +130,7 @@ entry &noun <apple> *saka (n.) apple, any kind of tree-fruit
129130
You can see the diagrams for the lexicon syntax
130131
[here](https://htmlpreview.github.io/?https://github.com/neta-elad/pyconlang/blob/main/diagrams.html).
131132

132-
### Complete Example
133+
#### Example
133134
Given the sound changes `changes.lsc`:
134135
```
135136
Class vowel {a, e, i, o, u}
@@ -181,6 +182,15 @@ The following entries will appear in the book:
181182
>
182183
> ...
183184
185+
### Markdown Extensions
186+
Inline translations (using the lexicon)
187+
can be inserted between two hash signs:
188+
```
189+
**An example: #*aki@after-palatalization <stone>.PL#.**
190+
```
191+
will turn out as
192+
> **An example: agi abagigim.**
193+
184194

185195

186196
## TODO
@@ -202,4 +212,4 @@ The following entries will appear in the book:
202212
- [ ] Affixes list
203213
- [ ] Phonology tables
204214
- [ ] Conjugation tables
205-
- [ ] Inline translation
215+
- [x] Inline translation

pyconlang/book/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@
1010
from .. import PYCONLANG_PATH
1111
from .block import Boxed
1212
from .inline import InlineDelete, InlineInsert
13-
from .preprocess import LexiconInserter, SkipLine
13+
from .lexicon_inserter import LexiconInserter
14+
from .preprocess import SkipLine
1415

1516

1617
class Compiler:

pyconlang/book/lexicon_inserter.py

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
import string
2+
from itertools import chain
3+
from typing import Any, Dict, List, Match, Tuple, Union
4+
from xml.etree.ElementTree import Element
5+
6+
from markdown import Extension, Markdown
7+
from markdown.inlinepatterns import InlineProcessor
8+
from markdown.preprocessors import Preprocessor
9+
10+
from ..lexicon import Lexicon
11+
from ..lexicon.parser import parse_lexicon_file, parse_sentence
12+
from ..lexurgy import evolve
13+
from ..types import AffixType, Entry, Form, ResolvedForm
14+
15+
16+
class LexiconPreprocessor(Preprocessor):
17+
lexicon: Lexicon
18+
19+
def __init__(self, md: Markdown, lexicon: Lexicon) -> None:
20+
super().__init__(md)
21+
self.lexicon = lexicon
22+
23+
def run(self, lines: List[str]) -> List[str]:
24+
new_lines = []
25+
for line in lines:
26+
if line.strip() == "!lexicon":
27+
lexicon: Dict[str, List[Tuple[List[str], Entry]]] = {}
28+
for entry in self.lexicon.entries:
29+
evolved = self.evolve_all(entry)
30+
letter = evolved[0][0]
31+
lexicon.setdefault(letter, [])
32+
lexicon[letter].append((evolved, entry))
33+
for letter in string.ascii_lowercase:
34+
new_lines.append(f"## {letter.upper()}")
35+
36+
if letter not in lexicon:
37+
continue
38+
39+
lexicon[letter].sort()
40+
for evolved, entry in lexicon[letter]:
41+
protos = " + ".join(
42+
f"_\\*{proto}_" for proto in self.form_to_protos(entry.form)
43+
)
44+
all_evolved = ", ".join(f"**{each}**" for each in evolved)
45+
new_lines.append(
46+
f"""
47+
{all_evolved} {protos} ({entry.part_of_speech.name}.) {entry.definition}
48+
""".strip()
49+
)
50+
new_lines.append("")
51+
else:
52+
new_lines.append(line)
53+
return new_lines
54+
55+
def form_to_protos(self, form: Form) -> List[str]:
56+
return self.resolved_form_to_protos(self.lexicon.resolve(form))
57+
58+
def resolved_form_to_protos(self, form: ResolvedForm) -> List[str]:
59+
protos = [[form.stem.form]]
60+
for affix in form.affixes:
61+
affix_protos = self.resolved_form_to_protos(affix.form)
62+
if affix.affix.type is AffixType.PREFIX:
63+
protos.insert(0, affix_protos)
64+
else:
65+
protos.append(affix_protos)
66+
67+
return list(chain(*protos))
68+
69+
def evolve_all(self, entry: Entry) -> List[str]:
70+
return [
71+
evolve(self.lexicon.substitute(var, entry.form))
72+
for var in self.lexicon.get_vars(entry.template)
73+
]
74+
75+
76+
class LexiconInlineProcessor(InlineProcessor):
77+
lexicon: Lexicon
78+
79+
def __init__(self, lexicon: Lexicon) -> None:
80+
super().__init__(r"#(.*?)#")
81+
self.lexicon = lexicon
82+
83+
# InlineProcessor and its parent Pattern
84+
# have contradictory type annotations,
85+
# so we have to ignore type.
86+
def handleMatch( # type: ignore
87+
self, m: Match[str], data: Any
88+
) -> Union[Tuple[Element, int, int], Tuple[None, None, None]]:
89+
element = Element("span")
90+
element.text = self.evolve(m.group(1))
91+
return element, m.start(), m.end()
92+
93+
def evolve(self, raw: str) -> str:
94+
return " ".join(
95+
evolve(self.lexicon.resolve(form)) for form in parse_sentence(raw)
96+
)
97+
98+
99+
class LexiconInserter(Extension):
100+
lexicon: Lexicon
101+
102+
def __init__(self) -> None:
103+
super().__init__()
104+
105+
self.lexicon = parse_lexicon_file()
106+
107+
def extendMarkdown(self, md: Markdown) -> None:
108+
md.registerExtension(self)
109+
md.preprocessors.register(LexiconPreprocessor(md, self.lexicon), "lexicon", 0)
110+
md.inlinePatterns.register(
111+
LexiconInlineProcessor(self.lexicon), "inline-lexicon", 200
112+
)

pyconlang/book/preprocess.py

Lines changed: 1 addition & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,9 @@
1-
import string
2-
from itertools import chain
3-
from typing import Dict, List, Tuple
1+
from typing import List
42

53
from markdown import Markdown
64
from markdown.extensions import Extension
75
from markdown.preprocessors import Preprocessor
86

9-
from ..lexicon import Lexicon
10-
from ..lexicon.parser import parse_lexicon_file
11-
from ..lexurgy import evolve
12-
from ..types import AffixType, Entry, Form, ResolvedForm
13-
147

158
class SkipLinePreprocessor(Preprocessor):
169
state: "SkipLine"
@@ -44,71 +37,3 @@ def extendMarkdown(self, md: Markdown) -> None:
4437

4538
def reset(self) -> None:
4639
self.skipped = []
47-
48-
49-
class LexiconPreprocessor(Preprocessor):
50-
lexicon: Lexicon
51-
52-
def __init__(self, md: Markdown, lexicon: Lexicon) -> None:
53-
super().__init__(md)
54-
self.lexicon = lexicon
55-
56-
def run(self, lines: List[str]) -> List[str]:
57-
new_lines = []
58-
for line in lines:
59-
if line.strip() == "!lexicon":
60-
lexicon: Dict[str, List[Tuple[List[str], Entry]]] = {}
61-
for entry in self.lexicon.entries:
62-
evolved = self.evolve_all(entry)
63-
letter = evolved[0][0]
64-
lexicon.setdefault(letter, [])
65-
lexicon[letter].append((evolved, entry))
66-
for letter in string.ascii_lowercase:
67-
new_lines.append(f"## {letter.upper()}")
68-
69-
if letter not in lexicon:
70-
continue
71-
72-
lexicon[letter].sort()
73-
for evolved, entry in lexicon[letter]:
74-
protos = " + ".join(
75-
f"_\\*{proto}_" for proto in self.form_to_protos(entry.form)
76-
)
77-
all_evolved = ", ".join(f"**{each}**" for each in evolved)
78-
new_lines.append(
79-
f"""
80-
{all_evolved} {protos} ({entry.part_of_speech.name}.) {entry.definition}
81-
""".strip()
82-
)
83-
new_lines.append("")
84-
else:
85-
new_lines.append(line)
86-
return new_lines
87-
88-
def form_to_protos(self, form: Form) -> List[str]:
89-
return self.resolved_form_to_protos(self.lexicon.resolve(form))
90-
91-
def resolved_form_to_protos(self, form: ResolvedForm) -> List[str]:
92-
protos = [[form.stem.form]]
93-
for affix in form.affixes:
94-
affix_protos = self.resolved_form_to_protos(affix.form)
95-
if affix.affix.type is AffixType.PREFIX:
96-
protos.insert(0, affix_protos)
97-
else:
98-
protos.append(affix_protos)
99-
100-
return list(chain(*protos))
101-
102-
def evolve_all(self, entry: Entry) -> List[str]:
103-
return [
104-
evolve(self.lexicon.substitute(var, entry.form))
105-
for var in self.lexicon.get_vars(entry.template)
106-
]
107-
108-
109-
class LexiconInserter(Extension):
110-
def extendMarkdown(self, md: Markdown) -> None:
111-
md.registerExtension(self)
112-
md.preprocessors.register(
113-
LexiconPreprocessor(md, parse_lexicon_file()), "lexicon", 0
114-
)

pyconlang/lexicon/parser.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,13 @@
11
from pathlib import Path
2-
from typing import Any, Callable, TypeVar, Union
2+
from typing import Any, Callable, List, TypeVar, Union, cast
33

44
from pyparsing import (
55
Group,
6-
OneOrMore,
76
Opt,
87
ParserElement,
98
ParseResults,
109
Suppress,
1110
Word,
12-
ZeroOrMore,
1311
alphanums,
1412
alphas,
1513
pyparsing_unicode,
@@ -23,6 +21,7 @@
2321
AffixType,
2422
Canonical,
2523
Entry,
24+
Form,
2625
Fusion,
2726
PartOfSpeech,
2827
Proto,
@@ -60,7 +59,7 @@ def parse_lexicon(string: str) -> Lexicon:
6059
result = lexicon.parse_string(string, parse_all=True)[0]
6160

6261
if not isinstance(result, Lexicon):
63-
raise RuntimeError("Bad parsing")
62+
raise RuntimeError(f"Could not parse {string}")
6463

6564
return result
6665

@@ -69,6 +68,10 @@ def parse_lexicon_file(filename: Path = Path("lexicon.txt")) -> Lexicon:
6968
return parse_lexicon(filename.read_text())
7069

7170

71+
def parse_sentence(string: str) -> List[Form]:
72+
return cast(List[Form], list(sentence.parse_string(string, parse_all=True)))
73+
74+
7275
ident = Word(alphanums + "-").set_name("ident")
7376
rule = (Suppress("@") + ident).set_parse_action(token_map(Rule)).set_name("rule")
7477
canonical = (
@@ -107,23 +110,23 @@ def parse_lexicon_file(filename: Path = Path("lexicon.txt")) -> Lexicon:
107110
)
108111
affix = (prefix | suffix).set_name("affix")
109112
var = (
110-
(ZeroOrMore(prefix) + Suppress("$") + ZeroOrMore(suffix))
113+
(prefix[...] + Suppress("$") + suffix[...])
111114
.set_parse_action(Var.from_iterable)
112115
.set_name("var")
113116
)
114117
template = (
115-
(Suppress("template") + template_name + OneOrMore(var))
118+
(Suppress("template") + template_name + var[1, ...])
116119
.set_parse_action(tokens_map(Template.from_args))
117120
.set_name("template")
118121
)
119122
fusion = (
120-
(Group(ZeroOrMore(prefix), True) + canonical + Group(ZeroOrMore(suffix), True))
123+
(Group(prefix[...], True) + canonical + Group(suffix[...], True))
121124
.set_parse_action(tokens_map(Fusion.from_prefixes_and_suffixes))
122125
.set_name("fusion")
123126
)
124127
form = (proto | fusion).set_name("form")
125128
lexical_sources = (
126-
Suppress("(") + OneOrMore(canonical).set_parse_action(tuple) + Suppress(")")
129+
Suppress("(") + canonical[1, ...].set_parse_action(tuple) + Suppress(")")
127130
)
128131
affix_definition = (
129132
(
@@ -153,11 +156,12 @@ def parse_lexicon_file(filename: Path = Path("lexicon.txt")) -> Lexicon:
153156
.set_name("entry")
154157
)
155158
lexicon = (
156-
ZeroOrMore(entry | affix_definition | template)
159+
(entry | affix_definition | template)[...]
157160
.set_parse_action(Lexicon.from_iterable)
158161
.set_name("lexicon")
159162
)
160163

164+
sentence = form[...]
161165

162166
if __name__ == "__main__":
163167
make_diagrams()

tests/lexicon/test_parser.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
form,
88
fusion,
99
lexical_sources,
10+
parse_sentence,
1011
part_of_speech,
1112
proto,
1213
rule,
@@ -181,5 +182,14 @@ def test_var(sample_lexicon):
181182
)
182183

183184

185+
def test_sentence():
186+
assert tuple(parse_sentence("*aka <strong> COL.<with space> *taka@start")) == (
187+
Proto("aka", None),
188+
Fusion(Canonical("strong"), ()),
189+
Fusion(Canonical("with space"), (Affix("COL", AffixType.PREFIX),)),
190+
Proto("taka", Rule("start")),
191+
)
192+
193+
184194
def parse(parser, string):
185-
return parser.parse_string(string)[0]
195+
return parser.parse_string(string, parse_all=True)[0]

tests/test_book.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,21 @@
33

44

55
def test_book(simple_pyconlang):
6+
(simple_pyconlang / "grammar.md").write_text(
7+
"**This is an example: #*kika@era1 <stone>.PL#**"
8+
)
9+
610
compile_book()
711

812
html = (PYCONLANG_PATH / "output.html").read_text()
913

1014
assert "By Mr. Tester" in html
1115
assert "TestLang" in html
16+
17+
assert (
18+
"<p><strong>This is an example: <span>kiga abagigi</span></strong></p>" in html
19+
)
20+
1221
assert (
1322
"<p><strong>abagigi</strong> <em>*apak</em> + <em>*iki</em> (n.) gravel</p>"
1423
in html

0 commit comments

Comments
 (0)