|
| 1 | +import string |
| 2 | +from itertools import chain |
| 3 | +from typing import Any, Dict, List, Match, Tuple, Union |
| 4 | +from xml.etree.ElementTree import Element |
| 5 | + |
| 6 | +from markdown import Extension, Markdown |
| 7 | +from markdown.inlinepatterns import InlineProcessor |
| 8 | +from markdown.preprocessors import Preprocessor |
| 9 | + |
| 10 | +from ..lexicon import Lexicon |
| 11 | +from ..lexicon.parser import parse_lexicon_file, parse_sentence |
| 12 | +from ..lexurgy import evolve |
| 13 | +from ..types import AffixType, Entry, Form, ResolvedForm |
| 14 | + |
| 15 | + |
| 16 | +class LexiconPreprocessor(Preprocessor): |
| 17 | + lexicon: Lexicon |
| 18 | + |
| 19 | + def __init__(self, md: Markdown, lexicon: Lexicon) -> None: |
| 20 | + super().__init__(md) |
| 21 | + self.lexicon = lexicon |
| 22 | + |
| 23 | + def run(self, lines: List[str]) -> List[str]: |
| 24 | + new_lines = [] |
| 25 | + for line in lines: |
| 26 | + if line.strip() == "!lexicon": |
| 27 | + lexicon: Dict[str, List[Tuple[List[str], Entry]]] = {} |
| 28 | + for entry in self.lexicon.entries: |
| 29 | + evolved = self.evolve_all(entry) |
| 30 | + letter = evolved[0][0] |
| 31 | + lexicon.setdefault(letter, []) |
| 32 | + lexicon[letter].append((evolved, entry)) |
| 33 | + for letter in string.ascii_lowercase: |
| 34 | + new_lines.append(f"## {letter.upper()}") |
| 35 | + |
| 36 | + if letter not in lexicon: |
| 37 | + continue |
| 38 | + |
| 39 | + lexicon[letter].sort() |
| 40 | + for evolved, entry in lexicon[letter]: |
| 41 | + protos = " + ".join( |
| 42 | + f"_\\*{proto}_" for proto in self.form_to_protos(entry.form) |
| 43 | + ) |
| 44 | + all_evolved = ", ".join(f"**{each}**" for each in evolved) |
| 45 | + new_lines.append( |
| 46 | + f""" |
| 47 | + {all_evolved} {protos} ({entry.part_of_speech.name}.) {entry.definition} |
| 48 | + """.strip() |
| 49 | + ) |
| 50 | + new_lines.append("") |
| 51 | + else: |
| 52 | + new_lines.append(line) |
| 53 | + return new_lines |
| 54 | + |
| 55 | + def form_to_protos(self, form: Form) -> List[str]: |
| 56 | + return self.resolved_form_to_protos(self.lexicon.resolve(form)) |
| 57 | + |
| 58 | + def resolved_form_to_protos(self, form: ResolvedForm) -> List[str]: |
| 59 | + protos = [[form.stem.form]] |
| 60 | + for affix in form.affixes: |
| 61 | + affix_protos = self.resolved_form_to_protos(affix.form) |
| 62 | + if affix.affix.type is AffixType.PREFIX: |
| 63 | + protos.insert(0, affix_protos) |
| 64 | + else: |
| 65 | + protos.append(affix_protos) |
| 66 | + |
| 67 | + return list(chain(*protos)) |
| 68 | + |
| 69 | + def evolve_all(self, entry: Entry) -> List[str]: |
| 70 | + return [ |
| 71 | + evolve(self.lexicon.substitute(var, entry.form)) |
| 72 | + for var in self.lexicon.get_vars(entry.template) |
| 73 | + ] |
| 74 | + |
| 75 | + |
| 76 | +class LexiconInlineProcessor(InlineProcessor): |
| 77 | + lexicon: Lexicon |
| 78 | + |
| 79 | + def __init__(self, lexicon: Lexicon) -> None: |
| 80 | + super().__init__(r"#(.*?)#") |
| 81 | + self.lexicon = lexicon |
| 82 | + |
| 83 | + # InlineProcessor and its parent Pattern |
| 84 | + # have contradictory type annotations, |
| 85 | + # so we have to ignore type. |
| 86 | + def handleMatch( # type: ignore |
| 87 | + self, m: Match[str], data: Any |
| 88 | + ) -> Union[Tuple[Element, int, int], Tuple[None, None, None]]: |
| 89 | + element = Element("span") |
| 90 | + element.text = self.evolve(m.group(1)) |
| 91 | + return element, m.start(), m.end() |
| 92 | + |
| 93 | + def evolve(self, raw: str) -> str: |
| 94 | + return " ".join( |
| 95 | + evolve(self.lexicon.resolve(form)) for form in parse_sentence(raw) |
| 96 | + ) |
| 97 | + |
| 98 | + |
| 99 | +class LexiconInserter(Extension): |
| 100 | + lexicon: Lexicon |
| 101 | + |
| 102 | + def __init__(self) -> None: |
| 103 | + super().__init__() |
| 104 | + |
| 105 | + self.lexicon = parse_lexicon_file() |
| 106 | + |
| 107 | + def extendMarkdown(self, md: Markdown) -> None: |
| 108 | + md.registerExtension(self) |
| 109 | + md.preprocessors.register(LexiconPreprocessor(md, self.lexicon), "lexicon", 0) |
| 110 | + md.inlinePatterns.register( |
| 111 | + LexiconInlineProcessor(self.lexicon), "inline-lexicon", 200 |
| 112 | + ) |
0 commit comments