Skip to content

Commit b0220fd

Browse files
committed
initial Lexurgy client
1 parent 81f047f commit b0220fd

File tree

8 files changed

+144
-63
lines changed

8 files changed

+144
-63
lines changed

pyconlang/assets/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
LEXURGY_VERSION = "1.2.1"
1+
LEXURGY_VERSION = "1.2.2-server"
5.97 MB
Binary file not shown.

pyconlang/evolve/__init__.py

Lines changed: 39 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,21 @@
55
from collections.abc import Mapping, Sequence
66
from dataclasses import dataclass, field
77
from functools import cached_property
8-
from itertools import chain
98
from pathlib import Path
10-
from subprocess import run
11-
from time import time
129
from unicodedata import normalize
1310

1411
from .. import PYCONLANG_PATH
1512
from ..assets import LEXURGY_VERSION
1613
from ..checksum import checksum
1714
from ..domain import Component, Morpheme, ResolvedForm
18-
from ..lexurgy.domain import TraceLine
15+
from ..lexurgy import client
16+
from ..lexurgy.client import LexurgyClient
17+
from ..lexurgy.domain import (
18+
LexurgyErrorResponse,
19+
LexurgyRequest,
20+
LexurgyResponse,
21+
TraceLine,
22+
)
1923
from ..lexurgy.tracer import parse_trace_lines
2024
from .arrange import AffixArranger
2125
from .batch import Batcher, Cache, ComponentQuery, CompoundQuery, Query
@@ -59,6 +63,10 @@ class Evolver:
5963
def arranger(self) -> AffixArranger:
6064
return AffixArranger.from_path(CHANGES_PATH)
6165

66+
@cached_property
67+
def lexurgy(self) -> LexurgyClient:
68+
return client() # .value
69+
6270
@classmethod
6371
def load(cls) -> "Evolver":
6472
if not all(
@@ -188,67 +196,41 @@ def evolve_words(
188196
if not words:
189197
return [], {}
190198

191-
base_name = f"words-{time():.0f}"
192-
193-
self.evolve_directory.mkdir(parents=True, exist_ok=True)
194-
195-
input_words = self.evolve_directory / f"{base_name}.wli"
196-
input_words.write_text("\n".join(words))
197-
198-
output_words = self.evolve_directory / f"{base_name}_ev.wli"
199-
output_words.unlink(missing_ok=True)
200-
201-
phonetic_words = self.evolve_directory / f"{base_name}_phonetic.wli"
202-
phonetic_words.unlink(missing_ok=True)
203-
204-
trace_file = self.evolve_directory / f"{base_name}_trace.wli"
205-
trace_file.unlink(missing_ok=True)
206-
207-
args = [
208-
"sh",
209-
str(LEXURGY_PATH),
210-
"sc",
211-
str(CHANGES_PATH),
212-
str(input_words),
213-
"-m",
214-
]
215-
216-
if start is not None:
217-
args.append("-a")
218-
args.append(start)
199+
debug_words = []
200+
if trace:
201+
debug_words = words
219202

220-
if end is not None:
221-
args.append("-b")
222-
args.append(end)
203+
request = LexurgyRequest(words, start, end, debug_words)
223204

224-
if trace:
225-
args.extend(chain(*zip(["-t"] * len(words), words)))
205+
response = self.lexurgy.roundtrip(request)
226206

227-
result = run(args, capture_output=True, text=True)
207+
match response:
208+
case LexurgyErrorResponse():
209+
raise LexurgyError(response.message)
228210

229-
if result.returncode != 0:
230-
# todo too heuristic?
231-
stdout = result.stdout.strip().splitlines()
232-
if len(stdout) > 0:
233-
raise LexurgyError(result.stdout.strip().splitlines()[-1])
234-
else:
235-
raise LexurgyError(result.stderr)
211+
case LexurgyResponse():
212+
moderns = [normalize("NFD", word) for word in response.words]
236213

237-
moderns = normalize("NFD", output_words.read_text().strip()).split("\n")
214+
if "phonetic" in response.intermediates:
215+
phonetics = [
216+
normalize("NFD", word)
217+
for word in response.intermediates["phonetic"]
218+
]
219+
else:
220+
phonetics = moderns
238221

239-
if phonetic_words.exists():
240-
phonetics = normalize("NFD", phonetic_words.read_text()).strip().split("\n")
241-
else:
242-
phonetics = moderns
222+
assert len(phonetics) == len(moderns)
243223

244-
trace_lines: Mapping[str, list[TraceLine]] = {}
245-
if trace:
246-
trace_lines = parse_trace_lines(trace_file.read_text(), words[0])
224+
trace_lines: Mapping[str, list[TraceLine]] = {}
225+
if trace:
226+
trace_lines = parse_trace_lines(
227+
"\n".join(response.trace_lines), words[0]
228+
)
247229

248-
return [
249-
Evolved(proto, modern, phonetic)
250-
for proto, modern, phonetic in zip(words, moderns, phonetics)
251-
], trace_lines
230+
return [
231+
Evolved(proto, modern, phonetic)
232+
for proto, modern, phonetic in zip(words, moderns, phonetics)
233+
], trace_lines
252234

253235
def cleanup(self) -> None:
254236
if self.evolve_directory.exists():

pyconlang/lexurgy/__init__.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
from pathlib import Path
2+
from subprocess import PIPE, Popen
3+
4+
from .. import PYCONLANG_PATH
5+
from ..assets import LEXURGY_VERSION
6+
from .client import LexurgyClient
7+
8+
LEXURGY_PATH = PYCONLANG_PATH / f"lexurgy-{LEXURGY_VERSION}" / "bin" / "lexurgy"
9+
CHANGES_PATH = Path("changes.lsc")
10+
11+
12+
def client() -> LexurgyClient:
13+
args = [
14+
"sh",
15+
str(LEXURGY_PATH),
16+
"server",
17+
str(CHANGES_PATH),
18+
]
19+
return LexurgyClient(Popen(args, stdin=PIPE, stdout=PIPE, text=True, bufsize=1))

pyconlang/lexurgy/client.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
from dataclasses import dataclass
2+
from functools import cached_property
3+
from subprocess import Popen
4+
from typing import IO
5+
6+
from .domain import (
7+
AnyLexurgyResponse,
8+
LexurgyErrorResponse,
9+
LexurgyRequest,
10+
LexurgyResponse,
11+
)
12+
13+
14+
@dataclass
15+
class LexurgyClient:
16+
popen: Popen[str]
17+
18+
@cached_property
19+
def stdin(self) -> IO[str]:
20+
assert self.popen.stdin is not None
21+
return self.popen.stdin
22+
23+
@cached_property
24+
def stdout(self) -> IO[str]:
25+
assert self.popen.stdout is not None
26+
return self.popen.stdout
27+
28+
def write_line(self, line: str) -> None:
29+
self.stdin.write(f"{line}\n")
30+
31+
def read_line(self) -> str:
32+
return self.stdout.readline()
33+
34+
def send(self, request: LexurgyRequest) -> None:
35+
self.write_line(request.to_json())
36+
37+
def receive(self) -> AnyLexurgyResponse:
38+
line = self.read_line()
39+
try:
40+
return LexurgyResponse.from_json(line)
41+
except:
42+
return LexurgyErrorResponse.from_json(line)
43+
44+
def roundtrip(self, request: LexurgyRequest) -> AnyLexurgyResponse:
45+
self.send(request)
46+
return self.receive()

pyconlang/lexurgy/domain.py

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1-
from dataclasses import dataclass
1+
from dataclasses import dataclass, field
2+
from typing import cast
3+
4+
from dataclasses_json import DataClassJsonMixin, LetterCase, config
25

36

47
@dataclass(eq=True, frozen=True)
@@ -10,3 +13,35 @@ class TraceLine:
1013

1114
def set_word(self, new_word: str) -> "TraceLine":
1215
return TraceLine(self.rule, new_word, self.before, self.after)
16+
17+
18+
class CamelCaseJsonMixin(DataClassJsonMixin):
19+
dataclass_json_config = cast(
20+
None, config(letter_case=LetterCase.CAMEL)["dataclasses_json"]
21+
)
22+
23+
24+
# @dataclass_json(letter_case=LetterCase.CAMEL)
25+
@dataclass
26+
class LexurgyRequest(CamelCaseJsonMixin):
27+
words: list[str]
28+
start_at: str | None = field(default=None)
29+
stop_before: str | None = field(default=None)
30+
debug_words: list[str] = field(default_factory=list)
31+
romanize: bool = field(default=True)
32+
33+
34+
@dataclass
35+
class LexurgyResponse(CamelCaseJsonMixin):
36+
words: list[str]
37+
intermediates: dict[str, list[str]]
38+
trace_lines: list[str]
39+
40+
41+
@dataclass
42+
class LexurgyErrorResponse(CamelCaseJsonMixin):
43+
message: str
44+
stack_trace: list[str]
45+
46+
47+
AnyLexurgyResponse = LexurgyResponse | LexurgyErrorResponse

pyconlang/unicode.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
from typing import Optional
2-
31
from wcwidth import wcswidth
42

53
from .metadata import Metadata
@@ -23,7 +21,7 @@ def center(string: str, width: int, fill_char: str = " ") -> str:
2321
return fill_char * add_left + string + fill_char * add_right
2422

2523

26-
def combine(head: str, tail: str, syllable_break: Optional[str] = None) -> str:
24+
def combine(head: str, tail: str, syllable_break: str | None = None) -> str:
2725
if syllable_break is None:
2826
syllable_break = default_syllable_break()
2927

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ dependencies = [
1919
"toml==0.10.2",
2020
"pyparsing==3.0.9",
2121
"unidecode==1.3.4",
22-
"wcwidth==0.2.5 "
22+
"wcwidth==0.2.5",
23+
"dataclasses-json==0.5.8"
2324
]
2425
dynamic = ["version"]
2526

0 commit comments

Comments
 (0)