Skip to content

Commit 8767cd9

Browse files
developer0hyeclaude
andcommitted
fix: add depth limits to prevent stack overflow on deeply nested tables
Add MAX_TABLE_DEPTH (64) constant to both the DOCX parser and Typst renderer. Deeply nested tables beyond this limit are silently truncated instead of causing a stack overflow. Parser (docx.rs): - convert_table(), extract_raw_rows(), extract_cell_content() now accept a depth parameter; top-level callers pass 0 - extract_cell_content() skips nested tables when depth >= MAX_TABLE_DEPTH Renderer (typst_gen.rs): - GenCtx gains a table_depth field (default 0) - generate_table() increments/decrements depth via a wrapper - generate_cell_content() skips Block::Table when depth >= MAX_TABLE_DEPTH Fixes #94 Signed-off-by: Yonghye Kwon <developer.0hye@gmail.com> Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> Signed-off-by: Yonghye Kwon <developer.0hye@gmail.com>
1 parent 52ac387 commit 8767cd9

File tree

2 files changed

+53
-14
lines changed

2 files changed

+53
-14
lines changed

crates/office2pdf/src/parser/docx.rs

Lines changed: 31 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@ use std::io::Read;
44

55
use crate::config::ConvertOptions;
66
use crate::error::{ConvertError, ConvertWarning};
7+
8+
/// Maximum nesting depth for tables-within-tables. Deeper nesting is silently
9+
/// truncated to prevent stack overflow on pathological documents.
10+
const MAX_TABLE_DEPTH: usize = 64;
711
use crate::ir::{
812
Alignment, Block, BorderLineStyle, BorderSide, CellBorder, Chart, Color, ColumnLayout,
913
Document, FloatingImage, FlowPage, HFInline, HeaderFooter, HeaderFooterParagraph, ImageData,
@@ -1013,6 +1017,7 @@ impl Parser for DocxParser {
10131017
&notes,
10141018
&wraps,
10151019
&bidi,
1020+
0,
10161021
))])]
10171022
}
10181023
docx_rs::DocumentChild::StructuredDataTag(sdt) => convert_sdt_children(
@@ -1268,7 +1273,7 @@ fn convert_sdt_children(
12681273
}
12691274
docx_rs::StructuredDataTagChild::Table(table) => {
12701275
result.push(TaggedElement::Plain(vec![Block::Table(convert_table(
1271-
table, images, hyperlinks, style_map, notes, wraps, bidi,
1276+
table, images, hyperlinks, style_map, notes, wraps, bidi, 0,
12721277
))]));
12731278
}
12741279
docx_rs::StructuredDataTagChild::StructuredDataTag(nested) => {
@@ -1631,6 +1636,7 @@ fn extract_line_spacing(
16311636
(line_spacing, space_before, space_after)
16321637
}
16331638

1639+
#[allow(clippy::too_many_arguments)]
16341640
/// Convert a docx-rs Table to an IR Table.
16351641
///
16361642
/// Handles:
@@ -1648,11 +1654,14 @@ fn convert_table(
16481654
notes: &NoteContext,
16491655
wraps: &WrapContext,
16501656
bidi: &BidiContext,
1657+
depth: usize,
16511658
) -> Table {
16521659
let column_widths: Vec<f64> = table.grid.iter().map(|&w| w as f64 / 20.0).collect();
16531660

16541661
// First pass: extract raw rows with vmerge info for rowspan calculation
1655-
let raw_rows = extract_raw_rows(table, images, hyperlinks, style_map, notes, wraps, bidi);
1662+
let raw_rows = extract_raw_rows(
1663+
table, images, hyperlinks, style_map, notes, wraps, bidi, depth,
1664+
);
16561665

16571666
// Second pass: resolve vertical merges into rowspan values and build IR rows
16581667
let rows = resolve_vmerge_and_build_rows(&raw_rows);
@@ -1673,6 +1682,7 @@ struct RawCell {
16731682
background: Option<Color>,
16741683
}
16751684

1685+
#[allow(clippy::too_many_arguments)]
16761686
/// Extract raw rows from a docx-rs Table, tracking column indices and vmerge state.
16771687
fn extract_raw_rows(
16781688
table: &docx_rs::Table,
@@ -1682,6 +1692,7 @@ fn extract_raw_rows(
16821692
notes: &NoteContext,
16831693
wraps: &WrapContext,
16841694
bidi: &BidiContext,
1695+
depth: usize,
16851696
) -> Vec<Vec<RawCell>> {
16861697
let mut raw_rows = Vec::new();
16871698

@@ -1706,8 +1717,9 @@ fn extract_raw_rows(
17061717
.and_then(|v| v.as_str())
17071718
.map(String::from);
17081719

1709-
let content =
1710-
extract_cell_content(cell, images, hyperlinks, style_map, notes, wraps, bidi);
1720+
let content = extract_cell_content(
1721+
cell, images, hyperlinks, style_map, notes, wraps, bidi, depth,
1722+
);
17111723
let border = prop_json
17121724
.as_ref()
17131725
.and_then(|j| j.get("borders"))
@@ -1802,6 +1814,7 @@ fn count_vmerge_span(raw_rows: &[Vec<RawCell>], start_row: usize, col_index: usi
18021814
span
18031815
}
18041816

1817+
#[allow(clippy::too_many_arguments)]
18051818
/// Extract cell content (paragraphs) from a docx-rs TableCell.
18061819
fn extract_cell_content(
18071820
cell: &docx_rs::TableCell,
@@ -1811,6 +1824,7 @@ fn extract_cell_content(
18111824
notes: &NoteContext,
18121825
wraps: &WrapContext,
18131826
bidi: &BidiContext,
1827+
depth: usize,
18141828
) -> Vec<Block> {
18151829
let mut blocks = Vec::new();
18161830
for content in &cell.children {
@@ -1828,15 +1842,19 @@ fn extract_cell_content(
18281842
);
18291843
}
18301844
docx_rs::TableCellContent::Table(nested_table) => {
1831-
blocks.push(Block::Table(convert_table(
1832-
nested_table,
1833-
images,
1834-
hyperlinks,
1835-
style_map,
1836-
notes,
1837-
wraps,
1838-
bidi,
1839-
)));
1845+
if depth < MAX_TABLE_DEPTH {
1846+
blocks.push(Block::Table(convert_table(
1847+
nested_table,
1848+
images,
1849+
hyperlinks,
1850+
style_map,
1851+
notes,
1852+
wraps,
1853+
bidi,
1854+
depth + 1,
1855+
)));
1856+
}
1857+
// Silently skip nested tables beyond MAX_TABLE_DEPTH
18401858
}
18411859
_ => {}
18421860
}

crates/office2pdf/src/render/typst_gen.rs

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,17 +30,22 @@ pub struct TypstOutput {
3030
pub images: Vec<ImageAsset>,
3131
}
3232

33+
/// Maximum nesting depth for tables-within-tables, matching the parser limit.
34+
const MAX_TABLE_DEPTH: usize = 64;
35+
3336
/// Internal context for tracking image assets during code generation.
3437
struct GenCtx {
3538
images: Vec<ImageAsset>,
3639
next_image_id: usize,
40+
table_depth: usize,
3741
}
3842

3943
impl GenCtx {
4044
fn new() -> Self {
4145
Self {
4246
images: Vec::new(),
4347
next_image_id: 0,
48+
table_depth: 0,
4449
}
4550
}
4651

@@ -1270,6 +1275,17 @@ fn generate_list_items(
12701275
}
12711276

12721277
fn generate_table(out: &mut String, table: &Table, ctx: &mut GenCtx) -> Result<(), ConvertError> {
1278+
ctx.table_depth += 1;
1279+
let result = generate_table_inner(out, table, ctx);
1280+
ctx.table_depth -= 1;
1281+
result
1282+
}
1283+
1284+
fn generate_table_inner(
1285+
out: &mut String,
1286+
table: &Table,
1287+
ctx: &mut GenCtx,
1288+
) -> Result<(), ConvertError> {
12731289
out.push_str("#table(\n");
12741290

12751291
// Determine number of columns
@@ -1487,7 +1503,12 @@ fn generate_cell_content(
14871503
}
14881504
match block {
14891505
Block::Paragraph(para) => generate_cell_paragraph(out, para),
1490-
Block::Table(table) => generate_table(out, table, ctx)?,
1506+
Block::Table(table) => {
1507+
if ctx.table_depth < MAX_TABLE_DEPTH {
1508+
generate_table(out, table, ctx)?;
1509+
}
1510+
// Silently skip nested tables beyond MAX_TABLE_DEPTH
1511+
}
14911512
Block::Image(img) => generate_image(out, img, ctx),
14921513
Block::FloatingImage(fi) => generate_floating_image(out, fi, ctx),
14931514
Block::List(list) => generate_list(out, list)?,

0 commit comments

Comments
 (0)