Skip to content

Commit 30dc156

Browse files
developer0hyeclaude
andcommitted
fix: resolve Typst codegen failures for math subscripts, radicals, and text escaping
Fix four Typst compilation errors caused by codegen issues: - OMML subscript/superscript with empty base: emit `""` placeholder to avoid bare `_` or `^` which are invalid in Typst math mode - OMML math run text with literal parentheses: quote `(` and `)` from `<m:t>` elements as `"("` / `")"` to prevent them from breaking function call syntax (e.g., `sqrt()` with unbalanced parens) - Unstyled text after styled runs: wrap in `#[...]` content block when text starts with `(` after `]` to prevent Typst interpreting it as function arguments (fixes both DOCX BiDi and PPTX text) Files now converting successfully: - math-subscripts.docx (was: unexpected underscore) - tdf158023_import.docx (was: missing argument: radicand) - tdf87533_bidi.docx (was: 15 expected comma errors) - at.ecodesign...elektronik.pptx (was: invalid number suffix, etc.) Related: #95 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> Signed-off-by: Yonghye Kwon <developer.0hye@gmail.com>
1 parent 52ac387 commit 30dc156

File tree

2 files changed

+140
-5
lines changed

2 files changed

+140
-5
lines changed

crates/office2pdf/src/parser/omml.rs

Lines changed: 85 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,12 @@ fn parse_superscript(reader: &mut Reader<&[u8]>, out: &mut String) {
170170
}
171171
}
172172

173-
out.push_str(&base);
173+
// Empty base needs a placeholder to avoid bare `^` in Typst math
174+
if base.is_empty() {
175+
out.push_str("\"\"");
176+
} else {
177+
out.push_str(&base);
178+
}
174179
let _ = std::fmt::Write::write_fmt(out, format_args!("^{}", wrap_if_needed(&sup)));
175180
}
176181

@@ -192,7 +197,12 @@ fn parse_subscript(reader: &mut Reader<&[u8]>, out: &mut String) {
192197
}
193198
}
194199

195-
out.push_str(&base);
200+
// Empty base needs a placeholder to avoid bare `_` in Typst math
201+
if base.is_empty() {
202+
out.push_str("\"\"");
203+
} else {
204+
out.push_str(&base);
205+
}
196206
let _ = std::fmt::Write::write_fmt(out, format_args!("_{}", wrap_if_needed(&sub)));
197207
}
198208

@@ -216,7 +226,12 @@ fn parse_sub_superscript(reader: &mut Reader<&[u8]>, out: &mut String) {
216226
}
217227
}
218228

219-
out.push_str(&base);
229+
// Empty base needs a placeholder to avoid bare `_` in Typst math
230+
if base.is_empty() {
231+
out.push_str("\"\"");
232+
} else {
233+
out.push_str(&base);
234+
}
220235
let _ = std::fmt::Write::write_fmt(
221236
out,
222237
format_args!("_{}^{}", wrap_if_needed(&sub), wrap_if_needed(&sup)),
@@ -459,7 +474,16 @@ fn map_math_text(input: &str) -> String {
459474
flush_non_ascii_text(&mut result, &non_ascii_buf, &mut last_was_name);
460475
non_ascii_buf.clear();
461476
}
462-
result.push(ch);
477+
// Parentheses from <m:t> are literal characters, not Typst math
478+
// grouping. Quote them to prevent breaking function call syntax
479+
// (e.g., `sqrt()` when radicand contains `)` from OMML text).
480+
if ch == '(' || ch == ')' {
481+
result.push('"');
482+
result.push(ch);
483+
result.push('"');
484+
} else {
485+
result.push(ch);
486+
}
463487
last_was_name = false;
464488
}
465489
}
@@ -1535,4 +1559,61 @@ mod tests {
15351559
let xml = r#"<m:r><m:t>α</m:t></m:r>"#;
15361560
assert_eq!(omml_to_typst(xml), "alpha");
15371561
}
1562+
1563+
// --- US-380: subscript/superscript with empty base ---
1564+
1565+
#[test]
1566+
fn test_subscript_empty_base() {
1567+
// When base is empty (e.g., <m:e/> or <m:e></m:e>), the output must
1568+
// not start with bare `_` which is invalid in Typst math.
1569+
let xml = r#"<m:sSub><m:e></m:e><m:sub><m:r><m:t>2</m:t></m:r></m:sub></m:sSub>"#;
1570+
let result = omml_to_typst(xml);
1571+
assert!(
1572+
!result.starts_with('_'),
1573+
"Empty base subscript must not start with bare '_': got '{result}'"
1574+
);
1575+
assert!(
1576+
result.contains("_2"),
1577+
"Should still contain subscript: got '{result}'"
1578+
);
1579+
}
1580+
1581+
#[test]
1582+
fn test_superscript_empty_base() {
1583+
let xml = r#"<m:sSup><m:e></m:e><m:sup><m:r><m:t>1</m:t></m:r></m:sup></m:sSup>"#;
1584+
let result = omml_to_typst(xml);
1585+
assert!(
1586+
!result.starts_with('^'),
1587+
"Empty base superscript must not start with bare '^': got '{result}'"
1588+
);
1589+
assert!(
1590+
result.contains("^1"),
1591+
"Should still contain superscript: got '{result}'"
1592+
);
1593+
}
1594+
1595+
#[test]
1596+
fn test_sub_superscript_empty_base() {
1597+
let xml = r#"<m:sSubSup><m:e></m:e><m:sub><m:r><m:t>2</m:t></m:r></m:sub><m:sup><m:r><m:t>1</m:t></m:r></m:sup></m:sSubSup>"#;
1598+
let result = omml_to_typst(xml);
1599+
assert!(
1600+
!result.starts_with('_'),
1601+
"Empty base sub-superscript must not start with bare '_': got '{result}'"
1602+
);
1603+
}
1604+
1605+
// --- US-381: literal parens in math run text ---
1606+
1607+
#[test]
1608+
fn test_math_text_literal_parens() {
1609+
// Literal ( and ) in <m:t> should not break Typst math function calls
1610+
let xml = r#"<m:rad><m:radPr><m:degHide m:val="on"/></m:radPr><m:deg/><m:e><m:r><m:t>)2(</m:t></m:r></m:e></m:rad>"#;
1611+
let result = omml_to_typst(xml);
1612+
// Must produce valid Typst: sqrt() must have its radicand argument
1613+
// The result must not be "sqrt()2()" which would fail compilation
1614+
assert!(
1615+
!result.contains("sqrt()"),
1616+
"Literal parens in radicand must not produce empty sqrt(): got '{result}'"
1617+
);
1618+
}
15381619
}

crates/office2pdf/src/render/typst_gen.rs

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1730,7 +1730,21 @@ fn generate_run(out: &mut String, run: &Run) {
17301730
out.push_str(&escaped);
17311731
out.push(']');
17321732
} else {
1733-
out.push_str(&escaped);
1733+
// Prevent `](` pattern: when previous output ends with an
1734+
// unescaped `]` and this text starts with `(`, `.`, or `[`,
1735+
// Typst would interpret it as function arguments / method call /
1736+
// trailing content. Wrap in `#[...]` to keep it in content mode.
1737+
let needs_wrap = !escaped.is_empty()
1738+
&& out.ends_with(']')
1739+
&& !out.ends_with("\\]")
1740+
&& matches!(escaped.as_bytes()[0], b'(' | b'.' | b'[');
1741+
if needs_wrap {
1742+
out.push_str("#[");
1743+
out.push_str(&escaped);
1744+
out.push(']');
1745+
} else {
1746+
out.push_str(&escaped);
1747+
}
17341748
}
17351749

17361750
if needs_underline {
@@ -6781,4 +6795,44 @@ mod tests {
67816795
output.source,
67826796
);
67836797
}
6798+
6799+
// --- US-382/383: unstyled run after styled run must not create `](` pattern ---
6800+
6801+
#[test]
6802+
fn test_unstyled_run_with_parens_after_styled_run() {
6803+
// When a styled run is followed by an unstyled run starting with `(`,
6804+
// the `](` pattern must not be interpreted as Typst function arguments.
6805+
let doc = make_doc(vec![make_flow_page(vec![Block::Paragraph(Paragraph {
6806+
style: ParagraphStyle::default(),
6807+
runs: vec![
6808+
Run {
6809+
text: "bold text".to_string(),
6810+
style: TextStyle {
6811+
bold: Some(true),
6812+
..TextStyle::default()
6813+
},
6814+
href: None,
6815+
footnote: None,
6816+
},
6817+
Run {
6818+
text: "(parenthetical note)".to_string(),
6819+
style: TextStyle::default(),
6820+
href: None,
6821+
footnote: None,
6822+
},
6823+
],
6824+
})])]);
6825+
let result = generate_typst(&doc).unwrap().source;
6826+
// The result must not contain `](` directly — it would be interpreted
6827+
// as function arguments in Typst
6828+
assert!(
6829+
!result.contains("](\\(") || !result.contains("]("),
6830+
"Unstyled text with parens after styled run must be wrapped safely. Got: {result}"
6831+
);
6832+
// Verify the output uses #[...] wrapper or other safe pattern
6833+
assert!(
6834+
result.contains("#[") || result.contains("\\("),
6835+
"Unstyled text should be wrapped in #[...] to prevent syntax issues. Got: {result}"
6836+
);
6837+
}
67846838
}

0 commit comments

Comments
 (0)