Skip to content

Commit 954885e

Browse files
Merge pull request #99 from developer0hye/ralph/phase22-fix-typst-codegen
fix: resolve Typst codegen failures for math, BiDi, and PPTX text
2 parents 4bbf754 + 30dc156 commit 954885e

File tree

2 files changed

+140
-5
lines changed

2 files changed

+140
-5
lines changed

crates/office2pdf/src/parser/omml.rs

Lines changed: 85 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,12 @@ fn parse_superscript(reader: &mut Reader<&[u8]>, out: &mut String) {
170170
}
171171
}
172172

173-
out.push_str(&base);
173+
// Empty base needs a placeholder to avoid bare `^` in Typst math
174+
if base.is_empty() {
175+
out.push_str("\"\"");
176+
} else {
177+
out.push_str(&base);
178+
}
174179
let _ = std::fmt::Write::write_fmt(out, format_args!("^{}", wrap_if_needed(&sup)));
175180
}
176181

@@ -192,7 +197,12 @@ fn parse_subscript(reader: &mut Reader<&[u8]>, out: &mut String) {
192197
}
193198
}
194199

195-
out.push_str(&base);
200+
// Empty base needs a placeholder to avoid bare `_` in Typst math
201+
if base.is_empty() {
202+
out.push_str("\"\"");
203+
} else {
204+
out.push_str(&base);
205+
}
196206
let _ = std::fmt::Write::write_fmt(out, format_args!("_{}", wrap_if_needed(&sub)));
197207
}
198208

@@ -216,7 +226,12 @@ fn parse_sub_superscript(reader: &mut Reader<&[u8]>, out: &mut String) {
216226
}
217227
}
218228

219-
out.push_str(&base);
229+
// Empty base needs a placeholder to avoid bare `_` in Typst math
230+
if base.is_empty() {
231+
out.push_str("\"\"");
232+
} else {
233+
out.push_str(&base);
234+
}
220235
let _ = std::fmt::Write::write_fmt(
221236
out,
222237
format_args!("_{}^{}", wrap_if_needed(&sub), wrap_if_needed(&sup)),
@@ -459,7 +474,16 @@ fn map_math_text(input: &str) -> String {
459474
flush_non_ascii_text(&mut result, &non_ascii_buf, &mut last_was_name);
460475
non_ascii_buf.clear();
461476
}
462-
result.push(ch);
477+
// Parentheses from <m:t> are literal characters, not Typst math
478+
// grouping. Quote them to prevent breaking function call syntax
479+
// (e.g., `sqrt()` when radicand contains `)` from OMML text).
480+
if ch == '(' || ch == ')' {
481+
result.push('"');
482+
result.push(ch);
483+
result.push('"');
484+
} else {
485+
result.push(ch);
486+
}
463487
last_was_name = false;
464488
}
465489
}
@@ -1535,4 +1559,61 @@ mod tests {
15351559
let xml = r#"<m:r><m:t>α</m:t></m:r>"#;
15361560
assert_eq!(omml_to_typst(xml), "alpha");
15371561
}
1562+
1563+
// --- US-380: subscript/superscript with empty base ---
1564+
1565+
#[test]
1566+
fn test_subscript_empty_base() {
1567+
// When base is empty (e.g., <m:e/> or <m:e></m:e>), the output must
1568+
// not start with bare `_` which is invalid in Typst math.
1569+
let xml = r#"<m:sSub><m:e></m:e><m:sub><m:r><m:t>2</m:t></m:r></m:sub></m:sSub>"#;
1570+
let result = omml_to_typst(xml);
1571+
assert!(
1572+
!result.starts_with('_'),
1573+
"Empty base subscript must not start with bare '_': got '{result}'"
1574+
);
1575+
assert!(
1576+
result.contains("_2"),
1577+
"Should still contain subscript: got '{result}'"
1578+
);
1579+
}
1580+
1581+
#[test]
1582+
fn test_superscript_empty_base() {
1583+
let xml = r#"<m:sSup><m:e></m:e><m:sup><m:r><m:t>1</m:t></m:r></m:sup></m:sSup>"#;
1584+
let result = omml_to_typst(xml);
1585+
assert!(
1586+
!result.starts_with('^'),
1587+
"Empty base superscript must not start with bare '^': got '{result}'"
1588+
);
1589+
assert!(
1590+
result.contains("^1"),
1591+
"Should still contain superscript: got '{result}'"
1592+
);
1593+
}
1594+
1595+
#[test]
1596+
fn test_sub_superscript_empty_base() {
1597+
let xml = r#"<m:sSubSup><m:e></m:e><m:sub><m:r><m:t>2</m:t></m:r></m:sub><m:sup><m:r><m:t>1</m:t></m:r></m:sup></m:sSubSup>"#;
1598+
let result = omml_to_typst(xml);
1599+
assert!(
1600+
!result.starts_with('_'),
1601+
"Empty base sub-superscript must not start with bare '_': got '{result}'"
1602+
);
1603+
}
1604+
1605+
// --- US-381: literal parens in math run text ---
1606+
1607+
#[test]
1608+
fn test_math_text_literal_parens() {
1609+
// Literal ( and ) in <m:t> should not break Typst math function calls
1610+
let xml = r#"<m:rad><m:radPr><m:degHide m:val="on"/></m:radPr><m:deg/><m:e><m:r><m:t>)2(</m:t></m:r></m:e></m:rad>"#;
1611+
let result = omml_to_typst(xml);
1612+
// Must produce valid Typst: sqrt() must have its radicand argument
1613+
// The result must not be "sqrt()2()" which would fail compilation
1614+
assert!(
1615+
!result.contains("sqrt()"),
1616+
"Literal parens in radicand must not produce empty sqrt(): got '{result}'"
1617+
);
1618+
}
15381619
}

crates/office2pdf/src/render/typst_gen.rs

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1751,7 +1751,21 @@ fn generate_run(out: &mut String, run: &Run) {
17511751
out.push_str(&escaped);
17521752
out.push(']');
17531753
} else {
1754-
out.push_str(&escaped);
1754+
// Prevent `](` pattern: when previous output ends with an
1755+
// unescaped `]` and this text starts with `(`, `.`, or `[`,
1756+
// Typst would interpret it as function arguments / method call /
1757+
// trailing content. Wrap in `#[...]` to keep it in content mode.
1758+
let needs_wrap = !escaped.is_empty()
1759+
&& out.ends_with(']')
1760+
&& !out.ends_with("\\]")
1761+
&& matches!(escaped.as_bytes()[0], b'(' | b'.' | b'[');
1762+
if needs_wrap {
1763+
out.push_str("#[");
1764+
out.push_str(&escaped);
1765+
out.push(']');
1766+
} else {
1767+
out.push_str(&escaped);
1768+
}
17551769
}
17561770

17571771
if needs_underline {
@@ -6802,4 +6816,44 @@ mod tests {
68026816
output.source,
68036817
);
68046818
}
6819+
6820+
// --- US-382/383: unstyled run after styled run must not create `](` pattern ---
6821+
6822+
#[test]
6823+
fn test_unstyled_run_with_parens_after_styled_run() {
6824+
// When a styled run is followed by an unstyled run starting with `(`,
6825+
// the `](` pattern must not be interpreted as Typst function arguments.
6826+
let doc = make_doc(vec![make_flow_page(vec![Block::Paragraph(Paragraph {
6827+
style: ParagraphStyle::default(),
6828+
runs: vec![
6829+
Run {
6830+
text: "bold text".to_string(),
6831+
style: TextStyle {
6832+
bold: Some(true),
6833+
..TextStyle::default()
6834+
},
6835+
href: None,
6836+
footnote: None,
6837+
},
6838+
Run {
6839+
text: "(parenthetical note)".to_string(),
6840+
style: TextStyle::default(),
6841+
href: None,
6842+
footnote: None,
6843+
},
6844+
],
6845+
})])]);
6846+
let result = generate_typst(&doc).unwrap().source;
6847+
// The result must not contain `](` directly — it would be interpreted
6848+
// as function arguments in Typst
6849+
assert!(
6850+
!result.contains("](\\(") || !result.contains("]("),
6851+
"Unstyled text with parens after styled run must be wrapped safely. Got: {result}"
6852+
);
6853+
// Verify the output uses #[...] wrapper or other safe pattern
6854+
assert!(
6855+
result.contains("#[") || result.contains("\\("),
6856+
"Unstyled text should be wrapped in #[...] to prevent syntax issues. Got: {result}"
6857+
);
6858+
}
68056859
}

0 commit comments

Comments
 (0)