@@ -94,6 +94,10 @@ fn parse_omml_children(reader: &mut Reader<&[u8]>, out: &mut String, end_tag: &[
9494 ensure_math_separator ( out) ;
9595 parse_sub_superscript ( reader, out) ;
9696 }
97+ b"groupChr" => {
98+ ensure_math_separator ( out) ;
99+ parse_group_chr ( reader, out) ;
100+ }
97101 b"d" => parse_delimiter ( reader, out) ,
98102 b"r" => parse_math_run ( reader, out) ,
99103 b"oMath" => parse_omml_children ( reader, out, b"oMath" ) ,
@@ -238,10 +242,12 @@ fn parse_radical(reader: &mut Reader<&[u8]>, out: &mut String) {
238242 }
239243 }
240244
245+ // Use placeholder for empty radicand to avoid Typst "missing argument" error
246+ let radicand = if content. is_empty ( ) { "\" \" " } else { & content } ;
241247 if deg_hide || deg. is_empty ( ) {
242- let _ = std:: fmt:: Write :: write_fmt ( out, format_args ! ( "sqrt({content })" ) ) ;
248+ let _ = std:: fmt:: Write :: write_fmt ( out, format_args ! ( "sqrt({radicand })" ) ) ;
243249 } else {
244- let _ = std:: fmt:: Write :: write_fmt ( out, format_args ! ( "root({deg}, {content })" ) ) ;
250+ let _ = std:: fmt:: Write :: write_fmt ( out, format_args ! ( "root({deg}, {radicand })" ) ) ;
245251 }
246252}
247253
@@ -289,7 +295,15 @@ fn parse_delimiter(reader: &mut Reader<&[u8]>, out: &mut String) {
289295 let beg = map_delimiter ( & beg_chr) ;
290296 let end = map_delimiter ( & end_chr) ;
291297 let content = elements. join ( ", " ) ;
292- let _ = std:: fmt:: Write :: write_fmt ( out, format_args ! ( "{beg}{content}{end}" ) ) ;
298+ // If either delimiter is empty, omit both to avoid unbalanced delimiters in Typst
299+ if beg. is_empty ( ) && end. is_empty ( ) {
300+ out. push_str ( & content) ;
301+ } else if beg. is_empty ( ) || end. is_empty ( ) {
302+ // One-sided invisible delimiter: emit content without delimiters
303+ out. push_str ( & content) ;
304+ } else {
305+ let _ = std:: fmt:: Write :: write_fmt ( out, format_args ! ( "{beg}{content}{end}" ) ) ;
306+ }
293307}
294308
295309fn map_delimiter ( chr : & str ) -> & str {
@@ -391,19 +405,35 @@ fn map_math_text(input: &str) -> String {
391405 let mut word_buf = String :: new ( ) ;
392406 let mut last_was_name = false ;
393407
408+ let mut non_ascii_buf = String :: new ( ) ;
409+
394410 for ch in input. chars ( ) {
395411 if ch. is_ascii_alphabetic ( ) {
412+ // Flush non-ASCII buffer first
413+ if !non_ascii_buf. is_empty ( ) {
414+ flush_non_ascii_text ( & mut result, & non_ascii_buf, & mut last_was_name) ;
415+ non_ascii_buf. clear ( ) ;
416+ }
396417 word_buf. push ( ch) ;
397418 continue ;
398419 }
399420
400421 // Flush accumulated word before processing this character
401422 if !word_buf. is_empty ( ) {
423+ // Flush non-ASCII buffer first
424+ if !non_ascii_buf. is_empty ( ) {
425+ flush_non_ascii_text ( & mut result, & non_ascii_buf, & mut last_was_name) ;
426+ non_ascii_buf. clear ( ) ;
427+ }
402428 flush_math_word ( & mut result, & word_buf, & mut last_was_name) ;
403429 word_buf. clear ( ) ;
404430 }
405431
406432 if let Some ( name) = unicode_to_typst ( ch) {
433+ if !non_ascii_buf. is_empty ( ) {
434+ flush_non_ascii_text ( & mut result, & non_ascii_buf, & mut last_was_name) ;
435+ non_ascii_buf. clear ( ) ;
436+ }
407437 if !result. is_empty ( )
408438 && ( last_was_name || result. chars ( ) . last ( ) . is_some_and ( |c| c. is_alphanumeric ( ) ) )
409439 {
@@ -412,25 +442,52 @@ fn map_math_text(input: &str) -> String {
412442 result. push_str ( name) ;
413443 last_was_name = true ;
414444 } else if ch. is_ascii_digit ( ) {
445+ if !non_ascii_buf. is_empty ( ) {
446+ flush_non_ascii_text ( & mut result, & non_ascii_buf, & mut last_was_name) ;
447+ non_ascii_buf. clear ( ) ;
448+ }
415449 if last_was_name {
416450 result. push ( ' ' ) ;
417451 }
418452 result. push ( ch) ;
419453 last_was_name = false ;
454+ } else if !ch. is_ascii ( ) && ch. is_alphabetic ( ) {
455+ // Non-ASCII alphabetic (Cyrillic, CJK, etc.) — accumulate for upright() wrapping
456+ non_ascii_buf. push ( ch) ;
420457 } else {
458+ if !non_ascii_buf. is_empty ( ) {
459+ flush_non_ascii_text ( & mut result, & non_ascii_buf, & mut last_was_name) ;
460+ non_ascii_buf. clear ( ) ;
461+ }
421462 result. push ( ch) ;
422463 last_was_name = false ;
423464 }
424465 }
425466
426- // Flush remaining word
467+ // Flush remaining buffers
427468 if !word_buf. is_empty ( ) {
428469 flush_math_word ( & mut result, & word_buf, & mut last_was_name) ;
429470 }
471+ if !non_ascii_buf. is_empty ( ) {
472+ flush_non_ascii_text ( & mut result, & non_ascii_buf, & mut last_was_name) ;
473+ }
430474
431475 result
432476}
433477
478+ /// Flush accumulated non-ASCII alphabetic text as `upright("text")` for Typst math mode.
479+ fn flush_non_ascii_text ( result : & mut String , text : & str , last_was_name : & mut bool ) {
480+ if !result. is_empty ( )
481+ && ( * last_was_name || result. chars ( ) . last ( ) . is_some_and ( |c| c. is_alphanumeric ( ) ) )
482+ {
483+ result. push ( ' ' ) ;
484+ }
485+ result. push_str ( "upright(\" " ) ;
486+ result. push_str ( text) ;
487+ result. push_str ( "\" )" ) ;
488+ * last_was_name = true ;
489+ }
490+
434491/// Flush an accumulated word of ASCII letters to the result.
435492///
436493/// Known math function names (cos, sin, etc.) are kept intact.
@@ -811,6 +868,52 @@ fn parse_bar_props(reader: &mut Reader<&[u8]>, pos: &mut String) {
811868 }
812869}
813870
871+ fn parse_group_chr ( reader : & mut Reader < & [ u8 ] > , out : & mut String ) {
872+ let mut chr = "\u{23DF} " . to_string ( ) ; // default: underbrace ⏟
873+ let mut content = String :: new ( ) ;
874+
875+ loop {
876+ match reader. read_event ( ) {
877+ Ok ( Event :: Start ( ref e) ) => match e. local_name ( ) . as_ref ( ) {
878+ b"groupChrPr" => parse_group_chr_props ( reader, & mut chr) ,
879+ b"e" => content = parse_sub_element ( reader, b"e" ) ,
880+ other => skip_element ( reader, other) ,
881+ } ,
882+ Ok ( Event :: End ( ref e) ) if e. local_name ( ) . as_ref ( ) == b"groupChr" => break ,
883+ Ok ( Event :: Eof ) | Err ( _) => break ,
884+ _ => { }
885+ }
886+ }
887+
888+ let func = match chr. as_str ( ) {
889+ "\u{23DE} " => "overbrace" , // ⏞
890+ "\u{23DF} " => "underbrace" , // ⏟
891+ _ => "underbrace" ,
892+ } ;
893+ let _ = std:: fmt:: Write :: write_fmt ( out, format_args ! ( "{func}({content})" ) ) ;
894+ }
895+
896+ fn parse_group_chr_props ( reader : & mut Reader < & [ u8 ] > , chr : & mut String ) {
897+ loop {
898+ match reader. read_event ( ) {
899+ Ok ( Event :: Start ( ref e) ) | Ok ( Event :: Empty ( ref e) ) => {
900+ if e. local_name ( ) . as_ref ( ) == b"chr" {
901+ for attr in e. attributes ( ) . flatten ( ) {
902+ if attr. key . local_name ( ) . as_ref ( ) == b"val"
903+ && let Ok ( v) = attr. unescape_value ( )
904+ {
905+ * chr = v. to_string ( ) ;
906+ }
907+ }
908+ }
909+ }
910+ Ok ( Event :: End ( ref e) ) if e. local_name ( ) . as_ref ( ) == b"groupChrPr" => break ,
911+ Ok ( Event :: Eof ) | Err ( _) => break ,
912+ _ => { }
913+ }
914+ }
915+ }
916+
814917fn parse_matrix ( reader : & mut Reader < & [ u8 ] > , out : & mut String ) {
815918 let mut rows: Vec < Vec < String > > = Vec :: new ( ) ;
816919
@@ -1316,4 +1419,120 @@ mod tests {
13161419 let xml = "<m:r><m:t>α+β</m:t></m:r>" ;
13171420 assert_eq ! ( omml_to_typst( xml) , "alpha+beta" ) ;
13181421 }
1422+
1423+ // --- US-310: groupChr (overbrace/underbrace) tests ---
1424+
1425+ #[ test]
1426+ fn test_group_chr_overbrace ( ) {
1427+ let xml = r#"<m:groupChr><m:groupChrPr><m:chr m:val="⏞"/><m:pos m:val="top"/></m:groupChrPr><m:e><m:r><m:t>a+b</m:t></m:r></m:e></m:groupChr>"# ;
1428+ assert_eq ! ( omml_to_typst( xml) , "overbrace(a+b)" ) ;
1429+ }
1430+
1431+ #[ test]
1432+ fn test_group_chr_underbrace ( ) {
1433+ let xml = r#"<m:groupChr><m:groupChrPr><m:chr m:val="⏟"/><m:pos m:val="bot"/></m:groupChrPr><m:e><m:r><m:t>x+y</m:t></m:r></m:e></m:groupChr>"# ;
1434+ assert_eq ! ( omml_to_typst( xml) , "underbrace(x+y)" ) ;
1435+ }
1436+
1437+ #[ test]
1438+ fn test_group_chr_default_underbrace ( ) {
1439+ // Default groupChr without explicit chr attr should use underbrace
1440+ let xml = r#"<m:groupChr><m:groupChrPr><m:pos m:val="bot"/></m:groupChrPr><m:e><m:r><m:t>z</m:t></m:r></m:e></m:groupChr>"# ;
1441+ assert_eq ! ( omml_to_typst( xml) , "underbrace(z)" ) ;
1442+ }
1443+
1444+ // --- US-311: subscript/superscript parentheses tests ---
1445+
1446+ #[ test]
1447+ fn test_superscript_multi_token_parens ( ) {
1448+ let xml = "<m:sSup><m:e><m:r><m:t>x</m:t></m:r></m:e><m:sup><m:r><m:t>n+1</m:t></m:r></m:sup></m:sSup>" ;
1449+ assert_eq ! ( omml_to_typst( xml) , "x^(n+1)" ) ;
1450+ }
1451+
1452+ #[ test]
1453+ fn test_subscript_multi_token_parens ( ) {
1454+ let xml = "<m:sSub><m:e><m:r><m:t>a</m:t></m:r></m:e><m:sub><m:r><m:t>i+1</m:t></m:r></m:sub></m:sSub>" ;
1455+ assert_eq ! ( omml_to_typst( xml) , "a_(i+1)" ) ;
1456+ }
1457+
1458+ // --- US-312: empty radicand tests ---
1459+
1460+ #[ test]
1461+ fn test_radical_empty_radicand ( ) {
1462+ let xml = r#"<m:rad><m:radPr><m:degHide m:val="1"/></m:radPr><m:deg/><m:e></m:e></m:rad>"# ;
1463+ let result = omml_to_typst ( xml) ;
1464+ assert ! (
1465+ result. contains( "sqrt(" ) && result. ends_with( ')' ) ,
1466+ "Empty radicand should produce valid sqrt(): got '{result}'"
1467+ ) ;
1468+ // Should not be "sqrt()" — needs a placeholder
1469+ assert_ne ! ( result, "sqrt()" , "Empty radicand should have a placeholder" ) ;
1470+ }
1471+
1472+ #[ test]
1473+ fn test_root_empty_radicand_with_degree ( ) {
1474+ let xml = r#"<m:rad><m:radPr><m:degHide m:val="0"/></m:radPr><m:deg><m:r><m:t>3</m:t></m:r></m:deg><m:e></m:e></m:rad>"# ;
1475+ let result = omml_to_typst ( xml) ;
1476+ assert ! (
1477+ result. starts_with( "root(3," ) && result. ends_with( ')' ) ,
1478+ "Empty radicand with degree should produce valid root(): got '{result}'"
1479+ ) ;
1480+ }
1481+
1482+ // --- US-313: delimiter balancing tests ---
1483+
1484+ #[ test]
1485+ fn test_delimiter_empty_begin_chr ( ) {
1486+ // When begChr is empty, should not produce unbalanced `)` alone
1487+ let xml = r#"<m:d><m:dPr><m:begChr m:val=""/><m:endChr m:val=")"/></m:dPr><m:e><m:r><m:t>x</m:t></m:r></m:e></m:d>"# ;
1488+ let result = omml_to_typst ( xml) ;
1489+ // Must not end with bare `)` without matching `(`
1490+ assert ! (
1491+ !result. ends_with( ')' ) || result. contains( '(' ) ,
1492+ "Empty begChr should not produce unmatched ')': got '{result}'"
1493+ ) ;
1494+ }
1495+
1496+ #[ test]
1497+ fn test_delimiter_empty_end_chr ( ) {
1498+ // When endChr is empty, should not produce unbalanced `(`
1499+ let xml = r#"<m:d><m:dPr><m:begChr m:val="("/><m:endChr m:val=""/></m:dPr><m:e><m:r><m:t>x</m:t></m:r></m:e></m:d>"# ;
1500+ let result = omml_to_typst ( xml) ;
1501+ // Must not have bare `(` without matching `)`
1502+ assert ! (
1503+ !result. starts_with( '(' ) || result. contains( ')' ) ,
1504+ "Empty endChr should not produce unmatched '(': got '{result}'"
1505+ ) ;
1506+ }
1507+
1508+ #[ test]
1509+ fn test_delimiter_both_empty ( ) {
1510+ // When both begChr and endChr are empty, should just emit content
1511+ let xml = r#"<m:d><m:dPr><m:begChr m:val=""/><m:endChr m:val=""/></m:dPr><m:e><m:r><m:t>x</m:t></m:r></m:e></m:d>"# ;
1512+ let result = omml_to_typst ( xml) ;
1513+ assert_eq ! (
1514+ result, "x" ,
1515+ "Both empty delimiters should emit bare content: got '{result}'"
1516+ ) ;
1517+ }
1518+
1519+ // --- US-314: non-ASCII text in math context ---
1520+
1521+ #[ test]
1522+ fn test_non_ascii_cyrillic_in_math ( ) {
1523+ let xml = r#"<m:r><m:t>если</m:t></m:r>"# ;
1524+ let result = omml_to_typst ( xml) ;
1525+ // Cyrillic text in math should be wrapped in upright() to avoid "unknown variable"
1526+ assert ! (
1527+ result. contains( "upright(" ) ,
1528+ "Cyrillic text in math should be wrapped in upright(): got '{result}'"
1529+ ) ;
1530+ }
1531+
1532+ #[ test]
1533+ fn test_non_ascii_single_char_passthrough ( ) {
1534+ // Single non-ASCII char that maps to a Typst symbol should pass through
1535+ let xml = r#"<m:r><m:t>α</m:t></m:r>"# ;
1536+ assert_eq ! ( omml_to_typst( xml) , "alpha" ) ;
1537+ }
13191538}
0 commit comments