From 0c26bff330bc11f956a062596d4dd16c487c1ab4 Mon Sep 17 00:00:00 2001 From: Minh Vu <38443830+fallintoplace@users.noreply.github.com> Date: Wed, 20 May 2026 20:27:40 +0200 Subject: [PATCH 1/2] fix transform parameter parsing --- crates/iceberg/src/spec/transform.rs | 99 ++++++++++++++++++++-------- 1 file changed, 71 insertions(+), 28 deletions(-) diff --git a/crates/iceberg/src/spec/transform.rs b/crates/iceberg/src/spec/transform.rs index 97ab638e79..7f54ff81b5 100644 --- a/crates/iceberg/src/spec/transform.rs +++ b/crates/iceberg/src/spec/transform.rs @@ -1013,37 +1013,11 @@ impl FromStr for Transform { "void" => Transform::Void, "unknown" => Transform::Unknown, v if v.starts_with("bucket") => { - let length = v - .strip_prefix("bucket") - .expect("transform must starts with `bucket`") - .trim_start_matches('[') - .trim_end_matches(']') - .parse() - .map_err(|err| { - Error::new( - ErrorKind::DataInvalid, - format!("transform bucket type {v:?} is invalid"), - ) - .with_source(err) - })?; - + let length = parse_parameterized_transform(v, "bucket")?; Transform::Bucket(length) } v if v.starts_with("truncate") => { - let width = v - .strip_prefix("truncate") - .expect("transform must starts with `truncate`") - .trim_start_matches('[') - .trim_end_matches(']') - .parse() - .map_err(|err| { - Error::new( - ErrorKind::DataInvalid, - format!("transform truncate type {v:?} is invalid"), - ) - .with_source(err) - })?; - + let width = parse_parameterized_transform(v, "truncate")?; Transform::Truncate(width) } v => { @@ -1058,6 +1032,34 @@ impl FromStr for Transform { } } +fn parse_parameterized_transform(value: &str, transform: &str) -> Result { + let parameter = value + .strip_prefix(transform) + .and_then(|v| v.strip_prefix('[')) + .and_then(|v| v.strip_suffix(']')) + .ok_or_else(|| invalid_parameterized_transform(value, transform))?; + + let parsed = parameter + .parse::() + .map_err(|err| invalid_parameterized_transform(value, transform).with_source(err))?; + + if parsed == 0 { + return Err(Error::new( + ErrorKind::DataInvalid, + format!("transform {transform} type {value:?} must be greater than 0"), + )); + } + + Ok(parsed) +} + +fn invalid_parameterized_transform(value: &str, transform: &str) -> Error { + Error::new( + ErrorKind::DataInvalid, + format!("transform {transform} type {value:?} is invalid"), + ) +} + impl Serialize for Transform { fn serialize(&self, serializer: S) -> std::result::Result where S: Serializer { @@ -1090,6 +1092,47 @@ mod tests { assert_eq!(result, expected); } + #[test] + fn test_should_parse_parameterized_transform_if_syntax_is_valid() { + assert_eq!( + "bucket[16]".parse::().unwrap(), + Transform::Bucket(16) + ); + assert_eq!( + "truncate[4]".parse::().unwrap(), + Transform::Truncate(4) + ); + } + + #[test] + fn test_should_reject_parameterized_transform_if_value_is_zero() { + assert!("bucket[0]".parse::().is_err()); + assert!("truncate[0]".parse::().is_err()); + } + + #[test] + fn test_should_reject_parameterized_transform_if_syntax_is_malformed() { + for transform in [ + "bucket10", + "bucket[]", + "bucket[10", + "bucket10]", + "bucket[[10]]", + "bucket[10]extra", + "truncate10", + "truncate[]", + "truncate[10", + "truncate10]", + "truncate[[10]]", + "truncate[10]extra", + ] { + assert!( + transform.parse::().is_err(), + "transform {transform:?} should be rejected" + ); + } + } + #[test] fn test_adjust_boundary_timestamp_types() { for (datum, dec, inc) in [ From 2899378748833f91f428e29db06f2256fc5f79ed Mon Sep 17 00:00:00 2001 From: Minh Vu Date: Sat, 23 May 2026 19:33:20 +0200 Subject: [PATCH 2/2] fix: reject leading plus in transform args --- crates/iceberg/src/spec/transform.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/crates/iceberg/src/spec/transform.rs b/crates/iceberg/src/spec/transform.rs index 7f54ff81b5..20ef02e041 100644 --- a/crates/iceberg/src/spec/transform.rs +++ b/crates/iceberg/src/spec/transform.rs @@ -1039,6 +1039,10 @@ fn parse_parameterized_transform(value: &str, transform: &str) -> Result { .and_then(|v| v.strip_suffix(']')) .ok_or_else(|| invalid_parameterized_transform(value, transform))?; + if parameter.is_empty() || !parameter.chars().all(|c| c.is_ascii_digit()) { + return Err(invalid_parameterized_transform(value, transform)); + } + let parsed = parameter .parse::() .map_err(|err| invalid_parameterized_transform(value, transform).with_source(err))?; @@ -1115,12 +1119,14 @@ mod tests { for transform in [ "bucket10", "bucket[]", + "bucket[+1]", "bucket[10", "bucket10]", "bucket[[10]]", "bucket[10]extra", "truncate10", "truncate[]", + "truncate[+1]", "truncate[10", "truncate10]", "truncate[[10]]",