Skip to content

Commit cd61f3f

Browse files
committed
repl improvements
1 parent c880781 commit cd61f3f

File tree

15 files changed

+237
-135
lines changed

15 files changed

+237
-135
lines changed

README.md

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -194,16 +194,27 @@ will turn out as
194194

195195

196196
## TODO
197+
- [ ] ? Combined, general repl
198+
- [ ] Working with proto-forms
199+
- [ ] Working with lexicon
200+
- [ ] Defining words, affixes and templates in session
201+
- [ ] Saving session
202+
- [ ] Search for a word
203+
- [ ] Working with book
204+
- [ ] Compile
197205
- [ ] `repl`: Interactive session with Lexurgy
198206
- [x] Enter proto form, get romanized modern form
199-
- [ ] Get phonetic modern form
200-
- [ ] Get simplified (no accents) romanized modern form
201-
- [ ] `lexurgy`: Evolving forms and auto-glomination
207+
- [x] Get phonetic modern form
208+
- [x] Get simplified (no accents) romanized modern form
209+
- [ ] Traces
210+
- [ ] `evolve`: Evolving forms and auto-glomination
202211
- [x] Evolve and auto-glominate forms at the correct point of time
203212
- [ ] Handle stress
213+
- [ ] Traces
204214
- [ ] `lexicon`: Interactive lexicon
205215
- [ ] Define word by canonical
206216
- [ ] Find by canonical, proto, affix, or definition
217+
- [ ] ? Protos to take affixes as well
207218
- [ ] `translator`: Automatic translation according to gloss
208219
- [ ] Evolve each form (with affixes)
209220
- [ ] Match-up forms and glosses

diagrams.html

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
font-family: monospace;
88
}
99
</style>
10+
11+
</head>
1012
<body>
1113

1214

@@ -344,7 +346,7 @@ <h1 class="railroad-heading">unicode_word</h1>
344346
</style><g transform="translate(.5 .5)">
345347
<g>
346348
<path d="M20 21v20m10 -20v20m-10 -10h20" /></g><path d="M40 31h10" /><g class="terminal ">
347-
<path d="M50 31h0.0" /><path d="M240.0 31h0.0" /><rect height="22" rx="10" ry="10" width="190" x="50" y="20"></rect><text x="145" y="35">W:(A-Za-zªµºÀ-ÖØ...)</text></g><path d="M240.0 31h10" /><path d="M 250.0 31 h 20 m -10 -10 v 20 m 10 -20 v 20"></path></g></svg>
349+
<path d="M50 31h0.0" /><path d="M240.0 31h0.0" /><rect height="22" rx="10" ry="10" width="190" x="50" y="20"></rect><text x="145" y="35">W:(A-Za-z-„†-Ÿ¡...)</text></g><path d="M240.0 31h10" /><path d="M 250.0 31 h 20 m -10 -10 v 20 m 10 -20 v 20"></path></g></svg>
348350
</div>
349351
</div>
350352

pyconlang/book/lexicon_inserter.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,8 @@
77
from markdown.inlinepatterns import InlineProcessor
88
from markdown.preprocessors import Preprocessor
99

10-
from ..lexicon import Lexicon
11-
from ..lexicon.parser import parse_lexicon_file, parse_sentence
12-
from ..lexurgy import evolve
10+
from ..evolve import evolve
11+
from ..lexicon import Lexicon, parse_lexicon_file, parse_sentence
1312
from ..types import AffixType, Entry, Form, ResolvedForm
1413

1514

@@ -68,7 +67,7 @@ def resolved_form_to_protos(self, form: ResolvedForm) -> List[str]:
6867

6968
def evolve_all(self, entry: Entry) -> List[str]:
7069
return [
71-
evolve(self.lexicon.substitute(var, entry.form))
70+
evolve(self.lexicon.substitute(var, entry.form)).modern # todo change
7271
for var in self.lexicon.get_vars(entry.template)
7372
]
7473

@@ -92,7 +91,8 @@ def handleMatch( # type: ignore
9291

9392
def evolve(self, raw: str) -> str:
9493
return " ".join(
95-
evolve(self.lexicon.resolve(form)) for form in parse_sentence(raw)
94+
evolve(self.lexicon.resolve(form)).modern
95+
for form in parse_sentence(raw) # todo change
9696
)
9797

9898

pyconlang/data/book/changes.lsc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ Symbol j [voiced palatal approximant]
104104
Symbol ʎ [voiced palatal +lateral approximant]
105105

106106
# Velar
107-
Symbol ŋ [voiced velar nasal]]
107+
Symbol ŋ [voiced velar nasal]
108108
Symbol k [voiceless velar stop]
109109
Symbol g [voiced velar stop]
110110
Symbol x [voiceless velar fricative]
@@ -145,15 +145,15 @@ Symbol e [mid front -round vowel]
145145
Symbol ø [mid front +round vowel]
146146
Symbol ə [mid central -round vowel]
147147
Symbol ɤ [mid back -round vowel]
148-
Symbol o [mid back +round vowel]]
148+
Symbol o [mid back +round vowel]
149149

150150
# Low
151151
Symbol ɛ [low front -round vowel]
152152
Symbol œ [low front +round vowel]
153153
Symbol a [low central -round vowel]
154154
Symbol ɶ [low central +round vowel]
155155
Symbol ɑ [low back -round vowel]
156-
Symbol ɒ [low back +round vowel]]
156+
Symbol ɒ [low back +round vowel]
157157

158158
# +-------------+
159159
# | ~ Classes ~ |
Lines changed: 42 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
from dataclasses import dataclass, field
22
from pathlib import Path
3-
from subprocess import check_call
3+
from subprocess import run
44
from tempfile import TemporaryDirectory
55
from typing import List, Optional
6+
from unicodedata import normalize
67

78
from . import PYCONLANG_PATH
89
from .data import LEXURGY_VERSION
@@ -12,6 +13,13 @@
1213
LEXURGY_PATH = PYCONLANG_PATH / f"lexurgy-{LEXURGY_VERSION}" / "bin" / "lexurgy"
1314

1415

16+
@dataclass(eq=True, frozen=True)
17+
class Evolved:
18+
proto: str
19+
modern: str
20+
phonetic: str
21+
22+
1523
@dataclass
1624
class LexurgySession:
1725
args: List[str] = field(default_factory=list)
@@ -23,35 +31,45 @@ def clear(self) -> None:
2331
def add_word(self, word: str) -> None:
2432
self.words.append(word)
2533

26-
def evolve(self) -> List[str]:
34+
def evolve(self) -> List[Evolved]:
2735
return self.evolve_words(self.words)
2836

29-
def evolve_word(self, word: str) -> str:
37+
def evolve_word(self, word: str) -> Evolved:
3038
return self.evolve_words([word])[0]
3139

32-
def evolve_words(self, words: List[str]) -> List[str]:
40+
def evolve_words(self, words: List[str]) -> List[Evolved]:
3341
with TemporaryDirectory() as tmpdir:
3442
dir_path = Path(tmpdir)
3543
input_words = dir_path / "words.wli"
3644
input_words.write_text("\n".join(words))
37-
check_call(
38-
[
39-
"sh",
40-
str(LEXURGY_PATH),
41-
"sc",
42-
"changes.lsc",
43-
str(input_words),
44-
]
45-
+ self.args
45+
run(
46+
["sh", str(LEXURGY_PATH), "sc", "changes.lsc", str(input_words), "-m"]
47+
+ self.args,
48+
check=True,
49+
capture_output=True,
4650
)
4751

4852
output_words = dir_path / "words_ev.wli"
49-
return output_words.read_text().strip().split("\n")
53+
phonetic_words = dir_path / "words_phonetic.wli"
54+
55+
moderns = normalize("NFD", output_words.read_text().strip()).split("\n")
56+
57+
if phonetic_words.exists():
58+
phonetics = (
59+
normalize("NFD", phonetic_words.read_text()).strip().split("\n")
60+
)
61+
else:
62+
phonetics = moderns
63+
64+
return [
65+
Evolved(proto, modern, phonetic)
66+
for proto, modern, phonetic in zip(words, moderns, phonetics)
67+
]
5068

5169

5270
def evolve_word(
5371
word: str, *, start: Optional[Rule] = None, end: Optional[Rule] = None
54-
) -> str:
72+
) -> Evolved:
5573
args = []
5674
if start is not None:
5775
args.extend(["-a", start.name])
@@ -60,16 +78,16 @@ def evolve_word(
6078
return LexurgySession(args).evolve_word(word)
6179

6280

63-
def evolve_proto(proto: Proto, end: Optional[Rule] = None) -> str:
81+
def evolve_proto(proto: Proto, end: Optional[Rule] = None) -> Evolved:
6482
return evolve_word(proto.form, start=proto.era, end=end)
6583

6684

67-
def evolve(fusion: ResolvedForm) -> str:
85+
def evolve(fusion: ResolvedForm) -> Evolved:
6886
form = fusion.stem
6987
for affix in fusion.affixes:
7088
if affix.era is not None and form.era != affix.era:
71-
evolved = evolve_proto(form, affix.era)
72-
evolved_affix = evolve(affix.form)
89+
evolved = evolve_proto(form, affix.era).phonetic
90+
evolved_affix = evolve(affix.form).phonetic
7391

7492
form = Proto(fuse(evolved, evolved_affix, affix.affix.type), affix.era)
7593
else:
@@ -87,9 +105,11 @@ def fuse(stem: str, affix: str, affix_type: AffixType) -> str:
87105
return stem + affix
88106

89107

90-
def glom_at(prefix: str, suffix: str, rule: Rule, stressed_suffix: bool = False) -> str:
91-
evolved_prefix = evolve_word(prefix, end=rule)
92-
evolved_suffix = evolve_word(suffix, end=rule)
108+
def glom_at(
109+
prefix: str, suffix: str, rule: Rule, stressed_suffix: bool = False
110+
) -> Evolved:
111+
evolved_prefix = evolve_word(prefix, end=rule).phonetic
112+
evolved_suffix = evolve_word(suffix, end=rule).phonetic
93113

94114
if stressed_suffix:
95115
evolved_prefix = remove_primary_stress(evolved_prefix)

pyconlang/lexicon/__init__.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
from dataclasses import dataclass
2-
from typing import Iterable, Optional, Set, Tuple, Union
2+
from pathlib import Path
3+
from typing import Iterable, List, Optional, Set, Tuple, Union, cast
34

5+
from ..evolve import Evolved, evolve
46
from ..types import (
57
Affix,
68
AffixDefinition,
@@ -14,6 +16,7 @@
1416
TemplateName,
1517
Var,
1618
)
19+
from .parser import lexicon, sentence
1720

1821

1922
@dataclass
@@ -76,6 +79,12 @@ def resolve(self, form: Form) -> ResolvedForm:
7679
resolved = self.resolve(self.get_entry(form.stem).form)
7780
return ResolvedForm(resolved.stem, resolved.affixes + affixes)
7881

82+
def evolve(self, form: Form) -> Evolved:
83+
return evolve(self.resolve(form))
84+
85+
def evolve_string(self, string: str) -> List[Evolved]:
86+
return [self.evolve(form) for form in parse_sentence(string)]
87+
7988
def resolve_with_affixes(
8089
self, form: Form, affixes: Tuple[Affix, ...]
8190
) -> ResolvedForm:
@@ -97,3 +106,15 @@ def get_vars(self, name: Optional[TemplateName]) -> Tuple[Var, ...]:
97106
return template.vars
98107

99108
raise KeyError(name)
109+
110+
111+
def parse_sentence(string: str) -> List[Form]:
112+
return cast(List[Form], list(sentence.parse_string(string, parse_all=True)))
113+
114+
115+
def parse_lexicon(string: str) -> Lexicon:
116+
return Lexicon.from_iterable(lexicon.parse_string(string, parse_all=True))
117+
118+
119+
def parse_lexicon_file(filename: Path = Path("lexicon.txt")) -> Lexicon:
120+
return parse_lexicon(filename.read_text())

pyconlang/lexicon/parser.py

Lines changed: 7 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
from pathlib import Path
2-
from typing import Any, Callable, List, TypeVar, Union, cast
1+
from string import digits, punctuation, whitespace
2+
from typing import Any, Callable, TypeVar, Union
33

44
from pyparsing import (
55
Group,
@@ -21,7 +21,6 @@
2121
AffixType,
2222
Canonical,
2323
Entry,
24-
Form,
2524
Fusion,
2625
PartOfSpeech,
2726
Proto,
@@ -30,7 +29,6 @@
3029
TemplateName,
3130
Var,
3231
)
33-
from . import Lexicon
3432

3533
T = TypeVar("T")
3634

@@ -55,33 +53,17 @@ def make_diagrams() -> None:
5553
lexicon.create_diagram("diagrams.html", show_results_names=True)
5654

5755

58-
def parse_lexicon(string: str) -> Lexicon:
59-
result = lexicon.parse_string(string, parse_all=True)[0]
60-
61-
if not isinstance(result, Lexicon):
62-
raise RuntimeError(f"Could not parse {string}")
63-
64-
return result
65-
66-
67-
def parse_lexicon_file(filename: Path = Path("lexicon.txt")) -> Lexicon:
68-
return parse_lexicon(filename.read_text())
69-
70-
71-
def parse_sentence(string: str) -> List[Form]:
72-
return cast(List[Form], list(sentence.parse_string(string, parse_all=True)))
73-
74-
7556
ident = Word(alphanums + "-").set_name("ident")
7657
rule = (Suppress("@") + ident).set_parse_action(token_map(Rule)).set_name("rule")
7758
canonical = (
7859
(Suppress("<") + Word(alphanums + "-" + " ") + Suppress(">"))
7960
.set_parse_action(token_map(Canonical))
8061
.set_name("canonical")
8162
)
82-
unicode_word = Word(pyparsing_unicode.BasicMultilingualPlane.alphas).set_name(
83-
"unicode_word"
84-
)
63+
unicode_word = Word(
64+
pyparsing_unicode.BasicMultilingualPlane.printables,
65+
exclude_chars=whitespace + digits + punctuation,
66+
).set_name("unicode_word")
8567
proto = (
8668
(Suppress("*") + unicode_word + explicit_opt(rule))
8769
.set_parse_action(tokens_map(Proto))
@@ -155,11 +137,7 @@ def parse_sentence(string: str) -> List[Form]:
155137
.set_parse_action(tokens_map(Entry))
156138
.set_name("entry")
157139
)
158-
lexicon = (
159-
(entry | affix_definition | template)[...]
160-
.set_parse_action(Lexicon.from_iterable)
161-
.set_name("lexicon")
162-
)
140+
lexicon = (entry | affix_definition | template)[...].set_name("lexicon")
163141

164142
sentence = form[...]
165143

0 commit comments

Comments
 (0)