diff --git a/doc/user/content/release-notes.md b/doc/user/content/release-notes.md index 3f1de46af38ef..812ede8f6681d 100644 --- a/doc/user/content/release-notes.md +++ b/doc/user/content/release-notes.md @@ -58,6 +58,8 @@ Wrap your release notes at the 80 character mark. `mz_kafka_consumer_partitions` has an additional column `initial_high_offset` containing the first reported `hi_offset` from the broker for each partition. +- Add `left` to the [string function](/sql/functions#string-func) suite. + {{% version-header v0.8.1 %}} - Add [timelines](/sql/timelines) to all sources to prevent diff --git a/doc/user/data/sql_funcs.yml b/doc/user/data/sql_funcs.yml index d817a39452ea4..58d218dd3553d 100644 --- a/doc/user/data/sql_funcs.yml +++ b/doc/user/data/sql_funcs.yml @@ -224,6 +224,9 @@ description: Encode `b` using the specified textual representation. url: encode + - signature: 'left(s: str, n: int) -> str' + description: The first `n` characters of `s`. If `n` is negative, all but the last `|n|` characters of `s`. + - signature: 'length(s: str) -> int' description: Number of code points in `s` url: length diff --git a/src/expr/src/scalar/func.rs b/src/expr/src/scalar/func.rs index e9446e9b81d13..1cd34d4a347b7 100644 --- a/src/expr/src/scalar/func.rs +++ b/src/expr/src/scalar/func.rs @@ -2814,6 +2814,7 @@ pub enum BinaryFunc { MapContainsAnyKeys, MapContainsMap, ConvertFrom, + Left, Position, Right, RepeatString, @@ -2987,6 +2988,7 @@ impl BinaryFunc { BinaryFunc::ConvertFrom => eager!(convert_from), BinaryFunc::Encode => eager!(encode, temp_storage), BinaryFunc::Decode => eager!(decode, temp_storage), + BinaryFunc::Left => eager!(left), BinaryFunc::Position => eager!(position), BinaryFunc::Right => eager!(right), BinaryFunc::Trim => Ok(eager!(trim)), @@ -3043,8 +3045,8 @@ impl BinaryFunc { ScalarType::Bool.nullable(true) } - ToCharTimestamp | ToCharTimestampTz | ConvertFrom | Right | Trim | TrimLeading - | TrimTrailing => ScalarType::String.nullable(in_nullable), + ToCharTimestamp | ToCharTimestampTz | ConvertFrom | Left | Right | Trim + | TrimLeading | TrimTrailing => ScalarType::String.nullable(in_nullable), AddInt32 | SubInt32 @@ -3370,6 +3372,7 @@ impl BinaryFunc { | RoundDecimal(_) | RoundAPD | ConvertFrom + | Left | Position | Right | Trim @@ -3506,6 +3509,7 @@ impl fmt::Display for BinaryFunc { BinaryFunc::RoundDecimal(_) => f.write_str("round"), BinaryFunc::RoundAPD => f.write_str("round"), BinaryFunc::ConvertFrom => f.write_str("convert_from"), + BinaryFunc::Left => f.write_str("left"), BinaryFunc::Position => f.write_str("position"), BinaryFunc::Right => f.write_str("right"), BinaryFunc::Trim => f.write_str("btrim"), @@ -5170,6 +5174,32 @@ fn position<'a>(a: Datum<'a>, b: Datum<'a>) -> Result, EvalError> { } } +fn left<'a>(a: Datum<'a>, b: Datum<'a>) -> Result, EvalError> { + let string: &'a str = a.unwrap_str(); + let n = b.unwrap_int32(); + + let mut byte_indices = string.char_indices().map(|(i, _)| i); + + let end_in_bytes = match n.cmp(&0) { + Ordering::Equal => 0, + Ordering::Greater => { + let n = usize::try_from(n).map_err(|_| { + EvalError::InvalidParameterValue(format!("invalid parameter n: {:?}", n)) + })?; + // nth from the back + byte_indices.nth(n).unwrap_or_else(|| string.len()) + } + Ordering::Less => { + let n = usize::try_from(n.abs() - 1).map_err(|_| { + EvalError::InvalidParameterValue(format!("invalid parameter n: {:?}", n)) + })?; + byte_indices.rev().nth(n).unwrap_or(0) + } + }; + + Ok(Datum::String(&string[..end_in_bytes])) +} + fn right<'a>(a: Datum<'a>, b: Datum<'a>) -> Result, EvalError> { let string: &'a str = a.unwrap_str(); let n = b.unwrap_int32(); diff --git a/src/sql/src/func.rs b/src/sql/src/func.rs index 774387c09bce8..ac8596306e43a 100644 --- a/src/sql/src/func.rs +++ b/src/sql/src/func.rs @@ -1473,6 +1473,9 @@ lazy_static! { "jsonb_typeof" => Scalar { params!(Jsonb) => UnaryFunc::JsonbTypeof, 3210; }, + "left" => Scalar { + params!(String, Int32) => BinaryFunc::Left, 3060; + }, "length" => Scalar { params!(Bytes) => UnaryFunc::ByteLengthBytes, 2010; params!(String) => UnaryFunc::CharLength, 1317; diff --git a/test/sqllogictest/string.slt b/test/sqllogictest/string.slt index e0deb2c81615d..4c8352baa91c4 100644 --- a/test/sqllogictest/string.slt +++ b/test/sqllogictest/string.slt @@ -939,6 +939,128 @@ SELECT position(e'\u0903\u0951' IN e'\u0930\u0903\u0951') ---- 2 +### left ### +statement ok +CREATE TABLE lefttest (strcol char(15), vccol varchar(15), smicol smallint, intcol int) + +statement ok +INSERT INTO lefttest VALUES ('Mg', 'Mn', 1, -1), ('magnesium', 'manganese', 3, NULL), + (NULL, NULL, 0, 0), ('24.31', '54.94', 3, -3), ('长久不见', '爱不释手', NULL, 3), + ('', '', -1, 2) + +# invalid input +statement error +SELECT left(42) + +statement error +SELECT left('str') + +statement error +SELECT left(42, 'str') + +statement error +SELECT left('str', 42, 17) + +# standard tests + +#TODO: materialize#589 select left(strcol, foo) from lefttest + +# edge case +query T rowsort +SELECT left(vccol, 0) FROM lefttest +---- +(empty) +(empty) +NULL +(empty) +(empty) +(empty) + +# from the front +query T rowsort +SELECT left(vccol, 2) FROM lefttest +---- +Mn +ma +NULL +54 +爱不 +(empty) + +# from the back +query T rowsort +SELECT left(vccol, -2) FROM lefttest +---- +(empty) +mangane +NULL +54. +爱不 +(empty) + +query TT rowsort +SELECT left(vccol, smicol), left(vccol, intcol) FROM lefttest +---- +M M +man NULL +NULL NULL +54. 54 +NULL 爱不释 +(empty) (empty) + + +# NULL inputs +query T +SELECT left(NULL, 99) +---- +NULL + +query T +SELECT left('str', NULL) +---- +NULL + +# combining characters + +query T +SELECT left('रः॑', 0) +---- +(empty) + +query T +SELECT left('रः॑', 1) +---- + र + +query T +SELECT left('रः॑', -1) +---- +रः + +# this is exactly the same as above, but using unicode escapes +query T +SELECT left(e'\u0930\u0903\u0951', -1) +---- +रः + +# integer edge cases + +# i32:MIN +query T +SELECT left('hello', -2147483648) +---- +(empty) + +# i32:MIN + 1 +query T +SELECT left('hello', -2147483647) +---- +(empty) + +# i64 +query error arguments cannot be implicitly cast to any implementation's parameters +SELECT left('hello', 2147483648) + ### right ### statement ok CREATE TABLE righttest (strcol char(15), vccol varchar(15), smicol smallint, intcol int)