Skip to content

Commit 391d5fa

Browse files
Merge pull request #88 from developer0hye/ralph/phase16-encrypted-file-detection
feat: detect encrypted/password-protected OOXML files
2 parents 23de658 + 7e70dc9 commit 391d5fa

File tree

5 files changed

+333
-38
lines changed

5 files changed

+333
-38
lines changed

crates/office2pdf/src/error.rs

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@ pub enum ConvertError {
1414

1515
#[error("render error: {0}")]
1616
Render(String),
17+
18+
#[error("file is encrypted/password-protected and cannot be converted")]
19+
UnsupportedEncryption,
1720
}
1821

1922
/// A non-fatal warning emitted when an element cannot be fully processed.
@@ -289,6 +292,26 @@ mod tests {
289292
assert!(dbg.contains("UnsupportedFormat"));
290293
}
291294

295+
#[test]
296+
fn test_unsupported_encryption_display() {
297+
let e = ConvertError::UnsupportedEncryption;
298+
let msg = e.to_string();
299+
assert!(
300+
msg.contains("encrypted") || msg.contains("password"),
301+
"UnsupportedEncryption display should mention encryption or password: {msg}"
302+
);
303+
}
304+
305+
#[test]
306+
fn test_unsupported_encryption_debug() {
307+
let e = ConvertError::UnsupportedEncryption;
308+
let dbg = format!("{e:?}");
309+
assert!(
310+
dbg.contains("UnsupportedEncryption"),
311+
"Debug format should contain variant name: {dbg}"
312+
);
313+
}
314+
292315
#[test]
293316
fn test_all_variants_carry_format() {
294317
let variants = [

crates/office2pdf/src/lib.rs

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,19 @@ fn extract_panic_message(payload: &Box<dyn std::any::Any + Send>) -> String {
6464
}
6565
}
6666

67+
/// OLE2 Compound Binary File magic bytes.
68+
///
69+
/// Encrypted OOXML files are wrapped in an OLE2 container instead of being
70+
/// ZIP archives. Detecting this signature lets us return a clear
71+
/// [`ConvertError::UnsupportedEncryption`] before the ZIP reader sees
72+
/// invalid data.
73+
const OLE2_MAGIC: [u8; 8] = [0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1];
74+
75+
/// Returns `true` if `data` starts with the OLE2 compound-file magic bytes.
76+
fn is_ole2(data: &[u8]) -> bool {
77+
data.len() >= OLE2_MAGIC.len() && data[..OLE2_MAGIC.len()] == OLE2_MAGIC
78+
}
79+
6780
/// Convert a file at the given path to PDF bytes with warnings.
6881
///
6982
/// Detects the format from the file extension (`.docx`, `.pptx`, `.xlsx`).
@@ -126,6 +139,11 @@ pub fn convert_bytes(
126139
format: Format,
127140
options: &ConvertOptions,
128141
) -> Result<ConvertResult, ConvertError> {
142+
// Encrypted OOXML files are wrapped in OLE2 containers — reject early.
143+
if is_ole2(data) {
144+
return Err(ConvertError::UnsupportedEncryption);
145+
}
146+
129147
// Use streaming path for XLSX when requested and pdf-ops is available
130148
#[cfg(feature = "pdf-ops")]
131149
if options.streaming && format == Format::Xlsx {
@@ -2035,6 +2053,70 @@ mod tests {
20352053
"Tagged PDF with headings should contain structure tags"
20362054
);
20372055
}
2056+
2057+
#[test]
2058+
fn test_is_ole2_with_magic_bytes() {
2059+
let ole2_magic: [u8; 8] = [0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1];
2060+
let mut data = ole2_magic.to_vec();
2061+
data.extend_from_slice(&[0x00; 100]);
2062+
assert!(is_ole2(&data));
2063+
}
2064+
2065+
#[test]
2066+
fn test_is_ole2_with_zip_bytes() {
2067+
let zip_data = [0x50, 0x4B, 0x03, 0x04, 0x00, 0x00, 0x00, 0x00];
2068+
assert!(!is_ole2(&zip_data));
2069+
}
2070+
2071+
#[test]
2072+
fn test_is_ole2_with_short_data() {
2073+
let short = [0xD0, 0xCF, 0x11];
2074+
assert!(!is_ole2(&short));
2075+
}
2076+
2077+
#[test]
2078+
fn test_is_ole2_with_empty_data() {
2079+
assert!(!is_ole2(&[]));
2080+
}
2081+
2082+
#[test]
2083+
fn test_ole2_bytes_return_unsupported_encryption() {
2084+
let ole2_magic: [u8; 8] = [0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1];
2085+
let mut data = ole2_magic.to_vec();
2086+
data.extend_from_slice(&[0x00; 100]);
2087+
2088+
let err = convert_bytes(&data, Format::Docx, &ConvertOptions::default()).unwrap_err();
2089+
assert!(
2090+
matches!(err, ConvertError::UnsupportedEncryption),
2091+
"Expected UnsupportedEncryption, got: {err:?}"
2092+
);
2093+
}
2094+
2095+
#[test]
2096+
fn test_ole2_bytes_return_unsupported_encryption_xlsx() {
2097+
let ole2_magic: [u8; 8] = [0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1];
2098+
let mut data = ole2_magic.to_vec();
2099+
data.extend_from_slice(&[0x00; 100]);
2100+
2101+
let err = convert_bytes(&data, Format::Xlsx, &ConvertOptions::default()).unwrap_err();
2102+
assert!(
2103+
matches!(err, ConvertError::UnsupportedEncryption),
2104+
"Expected UnsupportedEncryption, got: {err:?}"
2105+
);
2106+
}
2107+
2108+
#[test]
2109+
fn test_ole2_bytes_return_unsupported_encryption_pptx() {
2110+
let ole2_magic: [u8; 8] = [0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1];
2111+
let mut data = ole2_magic.to_vec();
2112+
data.extend_from_slice(&[0x00; 100]);
2113+
2114+
let err = convert_bytes(&data, Format::Pptx, &ConvertOptions::default()).unwrap_err();
2115+
assert!(
2116+
matches!(err, ConvertError::UnsupportedEncryption),
2117+
"Expected UnsupportedEncryption, got: {err:?}"
2118+
);
2119+
}
20382120
}
20392121

20402122
#[cfg(all(test, feature = "typescript"))]

0 commit comments

Comments
 (0)