diff --git a/doc/user/content/transform-data/idiomatic-materialize-sql/any.md b/doc/user/content/transform-data/idiomatic-materialize-sql/any.md index 1c3ba249e5c6d..71a2b153c3065 100644 --- a/doc/user/content/transform-data/idiomatic-materialize-sql/any.md +++ b/doc/user/content/transform-data/idiomatic-materialize-sql/any.md @@ -51,49 +51,7 @@ array/list/map contains duplicates, include [`DISTINCT`](/sql/select/#select-dis Materialize SQL -**If no duplicates exist in the unnested field:** Use a Common Table -Expression (CTE) to [`UNNEST()`](/sql/functions/#unnest) the array of values and -perform the equi-join on the unnested values. - -
-
- -```mzsql --- array_field contains no duplicates.-- - -WITH my_expanded_values AS -(SELECT UNNEST(array_field) AS fieldZ FROM tableB) -SELECT a.fieldA, ... -FROM tableA a -JOIN my_expanded_values t ON a.fieldZ = t.fieldZ -; -``` - - - - -Materialize SQL - - -**Duplicates may exist in the unnested field:** Use a Common Table -Expression (CTE) to [`DISTINCT`](/sql/select/#select-distinct) -[`UNNEST()`](/sql/functions/#unnest) the array of values and perform the -equi-join on the unnested values. - -
-
- - -```mzsql --- array_field may contain duplicates.-- - -WITH my_expanded_values AS -(SELECT DISTINCT UNNEST(array_field) AS fieldZ FROM tableB) -SELECT a.fieldA, ... -FROM tableA a -JOIN my_expanded_values t ON a.fieldZ = t.fieldZ -; -``` +{{% include-from-yaml data="idiomatic_mzsql/patterns_general" name="any-equi-join" field="syntax_idiomatic" %}} @@ -101,22 +59,8 @@ JOIN my_expanded_values t ON a.fieldZ = t.fieldZ Anti-pattern ❌ -Avoid the use of [`ANY(...)` function](/sql/functions/#expression-bool_op-any) for equi-join -conditions. - -
-
- -```nofmt --- Anti-pattern. Avoid. -- -SELECT a.fieldA, ... -FROM tableA a, tableB b -WHERE a.fieldZ = ANY(b.array_field) -- Anti-pattern. Avoid. -; +{{% include-from-yaml data="idiomatic_mzsql/patterns_general" name="any-equi-join" field="syntax_anti_pattern" %}} -``` - -
@@ -155,35 +99,7 @@ with the `orders` table on the unnested values. Materialize SQL ✅ -***If no duplicates in the unnested field*** - -```mzsql --- sales_items.items contains no duplicates. -- - -WITH individual_sales_items AS -(SELECT unnest(items) as item, week_of FROM sales_items) -SELECT s.week_of, o.order_id, o.item, o.quantity -FROM orders o -JOIN individual_sales_items s ON o.item = s.item -WHERE date_trunc('week', o.order_date) = s.week_of -ORDER BY s.week_of, o.order_id, o.item, o.quantity -; -``` - -***To omit duplicates that may exist in the unnested field*** - -```mzsql --- sales_items.items may contains duplicates -- - -WITH individual_sales_items AS -(SELECT DISTINCT unnest(items) as item, week_of FROM sales_items) -SELECT s.week_of, o.order_id, o.item, o.quantity -FROM orders o -JOIN individual_sales_items s ON o.item = s.item -WHERE date_trunc('week', o.order_date) = s.week_of -ORDER BY s.week_of, o.order_id, o.item, o.quantity -; -``` +{{% include-from-yaml data="idiomatic_mzsql/patterns_general" name="any-equi-join" field="example_idiomatic" %}} @@ -192,22 +108,7 @@ ORDER BY s.week_of, o.order_id, o.item, o.quantity Anti-pattern ❌ -Avoid the use of [`ANY()`](/sql/functions/#expression-bool_op-any) for the equi-join condition. - -
-
- -```nofmt --- Anti-pattern. Avoid. -- -SELECT s.week_of, o.order_id, o.item, o.quantity -FROM orders o -JOIN sales_items s ON o.item = ANY(s.items) -WHERE date_trunc('week', o.order_date) = s.week_of -ORDER BY s.week_of, o.order_id, o.item, o.quantity -; -``` - -
+{{% include-from-yaml data="idiomatic_mzsql/patterns_general" name="any-equi-join" field="example_anti_pattern" %}} diff --git a/doc/user/content/transform-data/idiomatic-materialize-sql/appendix/idiomatic-sql-chart.md b/doc/user/content/transform-data/idiomatic-materialize-sql/appendix/idiomatic-sql-chart.md index 996ce44af9003..961ee023e79fa 100644 --- a/doc/user/content/transform-data/idiomatic-materialize-sql/appendix/idiomatic-sql-chart.md +++ b/doc/user/content/transform-data/idiomatic-materialize-sql/appendix/idiomatic-sql-chart.md @@ -17,11 +17,13 @@ performance. ### Query Patterns -{{% idiomatic-sql/general-syntax-table %}} +{{< yaml-table data="idiomatic_mzsql/patterns_general" + columns="pattern_title,syntax_idiomatic" >}} ### Examples -{{% idiomatic-sql/general-example-table %}} +{{< yaml-table data="idiomatic_mzsql/patterns_general" + columns="pattern_title,example_idiomatic" >}} ## Window Functions {{< callout >}} @@ -34,11 +36,13 @@ performance. ### Query Patterns -{{% idiomatic-sql/window-functions-syntax-table %}} +{{< yaml-table data="idiomatic_mzsql/patterns_window_functions" + columns="pattern_title,syntax_idiomatic" >}} ### Examples -{{% idiomatic-sql/window-functions-example-table %}} +{{< yaml-table data="idiomatic_mzsql/patterns_window_functions" + columns="pattern_title,example_idiomatic" >}} ## See also diff --git a/doc/user/content/transform-data/idiomatic-materialize-sql/first-value.md b/doc/user/content/transform-data/idiomatic-materialize-sql/first-value.md index 689b89ccc6f7c..16c96fef2f0ca 100644 --- a/doc/user/content/transform-data/idiomatic-materialize-sql/first-value.md +++ b/doc/user/content/transform-data/idiomatic-materialize-sql/first-value.md @@ -39,23 +39,7 @@ in a subquery. Materialize SQL -Use a subquery that uses the [MIN()](/sql/functions/#min) or -[MAX()](/sql/functions/#max) aggregate function. - -
-
- -```mzsql -SELECT tableA.fieldA, tableA.fieldB, minmax.Z - FROM tableA, - (SELECT fieldA, - MIN(fieldZ), - MAX(fieldZ) - FROM tableA - GROUP BY fieldA) minmax -WHERE tableA.fieldA = minmax.fieldA -ORDER BY fieldA ... ; -``` +{{% include-from-yaml data="idiomatic_mzsql/patterns_window_functions" name="first-value" field="syntax_idiomatic" %}} @@ -64,23 +48,8 @@ ORDER BY fieldA ... ; Anti-pattern ❌ -Avoid the use of [`FIRST_VALUE() OVER (PARTITION BY ... ORDER BY ...)` -window function](/sql/functions/#first_value) for first value within groups -queries. +{{% include-from-yaml data="idiomatic_mzsql/patterns_window_functions" name="first-value" field="syntax_anti_pattern" %}} -
-
- -```nofmt --- Anti-pattern. Avoid. -- -SELECT fieldA, fieldB, - FIRST_VALUE(fieldZ) OVER (PARTITION BY fieldA ORDER BY ...), - FIRST_VALUE(fieldZ) OVER (PARTITION BY fieldA ORDER BY ... DESC) -FROM tableA -ORDER BY fieldA, ...; -``` - -
@@ -139,17 +108,7 @@ value if ordered by ascending price values). Materialize SQL ✅ -```mzsql -SELECT o.order_id, minmax.lowest_price, o.item, o.price, - o.price - minmax.lowest_price AS diff_lowest_price -FROM orders_view o, - (SELECT order_id, - MIN(price) AS lowest_price - FROM orders_view - GROUP BY order_id) minmax -WHERE o.order_id = minmax.order_id -ORDER BY o.order_id, o.item; -``` +{{% include-from-yaml data="idiomatic_mzsql/patterns_window_functions" name="first-value" field="extra_example_idiomatic_min" %}} @@ -157,27 +116,8 @@ ORDER BY o.order_id, o.item; Anti-pattern ❌ -Avoid the use of [`FIRST_VALUE() OVER (PARTITION BY ... ORDER BY ...)` -window function](/sql/functions/#first_value) for first value within groups queries. - -
-
- - -```nofmt --- Anti-pattern -- -SELECT order_id, - FIRST_VALUE(price) - OVER (PARTITION BY order_id ORDER BY price) AS lowest_price, - item, - price, - price - FIRST_VALUE(price) - OVER (PARTITION BY order_id ORDER BY price) AS diff_lowest_price -FROM orders_view -ORDER BY order_id, item; -``` +{{% include-from-yaml data="idiomatic_mzsql/patterns_window_functions" name="first-value" field="extra_example_anti_pattern_min" %}} -
@@ -203,17 +143,7 @@ value if ordered by descending price values). Materialize SQL ✅ -```mzsql -SELECT o.order_id, minmax.highest_price, o.item, o.price, - o.price - minmax.highest_price AS diff_highest_price -FROM orders_view o, - (SELECT order_id, - MAX(price) AS highest_price - FROM orders_view - GROUP BY order_id) minmax -WHERE o.order_id = minmax.order_id -ORDER BY o.order_id, o.item; -``` +{{% include-from-yaml data="idiomatic_mzsql/patterns_window_functions" name="first-value" field="extra_example_idiomatic_max" %}} @@ -221,31 +151,10 @@ ORDER BY o.order_id, o.item; Anti-pattern ❌ -Avoid the use of [`FIRST_VALUE() OVER (PARTITION BY ... ORDER BY ...)` -window function](/sql/functions/#first_value) for first value within groups -queries. - -
-
- - -```nofmt --- Anti-pattern -- -SELECT order_id, - FIRST_VALUE(price) - OVER (PARTITION BY order_id ORDER BY price DESC) AS highest_price, - item, - price, - price - FIRST_VALUE(price) - OVER (PARTITION BY order_id ORDER BY price DESC) AS diff_highest_price -FROM orders_view -ORDER BY order_id, item; -``` +{{% include-from-yaml data="idiomatic_mzsql/patterns_window_functions" name="first-value" field="extra_example_anti_pattern_max" %}} -
- @@ -257,7 +166,7 @@ in the order and these prices. The example uses a subquery that groups by the `order_id` and selects `MIN(price)` as the lowest price (i.e., first value if ordered by price values) and `MAX(price)` as the highest price (i.e., first -value if ordered by descending price values) +value if ordered by descending price values). @@ -271,20 +180,7 @@ value if ordered by descending price values) @@ -292,31 +188,8 @@ ORDER BY o.order_id, o.item; diff --git a/doc/user/content/transform-data/idiomatic-materialize-sql/lag.md b/doc/user/content/transform-data/idiomatic-materialize-sql/lag.md index b1f7aad0269a0..b79f2f24714f2 100644 --- a/doc/user/content/transform-data/idiomatic-materialize-sql/lag.md +++ b/doc/user/content/transform-data/idiomatic-materialize-sql/lag.md @@ -56,29 +56,7 @@ row. @@ -87,25 +65,7 @@ ORDER BY fieldA; @@ -135,31 +95,7 @@ lag value. @@ -168,25 +104,8 @@ ORDER BY fieldA; @@ -226,21 +145,7 @@ previous row. @@ -249,19 +154,7 @@ ordering can be represented by some **equality condition**. @@ -291,22 +184,7 @@ query includes the first row in the results, using `null` as the previous value. @@ -315,19 +193,7 @@ ordering can be represented by some **equality condition**. diff --git a/doc/user/content/transform-data/idiomatic-materialize-sql/last-value.md b/doc/user/content/transform-data/idiomatic-materialize-sql/last-value.md index 40d8d6885f51a..bc6ae88e77f6c 100644 --- a/doc/user/content/transform-data/idiomatic-materialize-sql/last-value.md +++ b/doc/user/content/transform-data/idiomatic-materialize-sql/last-value.md @@ -39,23 +39,7 @@ in a subquery. @@ -64,38 +48,8 @@ ORDER BY fieldA ... ; @@ -154,18 +108,7 @@ highest price (i.e., the last price if ordered by ascending price values): @@ -174,38 +117,8 @@ ORDER BY o.order_id, o.item; @@ -231,18 +144,7 @@ in the order and the lowest price. That is, use a subquery that groups by the @@ -251,38 +153,8 @@ ORDER BY o.order_id, o.item; @@ -310,67 +182,16 @@ ordered by ascending price values). + diff --git a/doc/user/content/transform-data/idiomatic-materialize-sql/lead.md b/doc/user/content/transform-data/idiomatic-materialize-sql/lead.md index d8cc5a69dd148..b543d45463a59 100644 --- a/doc/user/content/transform-data/idiomatic-materialize-sql/lead.md +++ b/doc/user/content/transform-data/idiomatic-materialize-sql/lead.md @@ -56,28 +56,7 @@ have a next row. @@ -86,25 +65,7 @@ ORDER BY fieldA; @@ -134,29 +95,7 @@ lead value. @@ -165,25 +104,8 @@ ORDER BY fieldA; @@ -222,22 +144,7 @@ next row. @@ -246,19 +153,7 @@ ordering can be represented by some **equality condition**. @@ -289,23 +184,7 @@ value. @@ -314,19 +193,7 @@ ordering can be represented by some **equality condition**. diff --git a/doc/user/content/transform-data/idiomatic-materialize-sql/not-in.md b/doc/user/content/transform-data/idiomatic-materialize-sql/not-in.md new file mode 100644 index 0000000000000..7b5c601709e77 --- /dev/null +++ b/doc/user/content/transform-data/idiomatic-materialize-sql/not-in.md @@ -0,0 +1,142 @@ +--- +title: "`NOT IN` subquery" +description: "Use idiomatic Materialize SQL for `NOT IN (subquery)` predicates to avoid a cross join in the dataflow plan." +menu: + main: + parent: idiomatic-materialize-sql + identifier: idiomatic-materialize-not-in + weight: 7 +--- + +## Overview + +The `fieldX NOT IN ()` predicate returns `true` if `fieldX` does not +equal any value returned by the subquery. For predicates where `fieldX` or the +`` can contain `NULL` values, Materialize provides idiomatic SQL +alternatives. + +### Materialize and `NOT IN ()` + +When evaluating a `WHERE fieldX NOT IN ()` predicate involving +possible `NULL` values for `fieldX` or ``, Materialize performs a +cross join between the outer relation and the subquery to preserve SQL `NULL` +semantics, which can significantly increase memory usage. If possible, rewrite +the query to avoid the cross join. + +## Idiomatic Materialize SQL + +For `fieldX NOT IN ()` predicates involving possible `NULL` values, +the following rewrites are available: + +{{< note >}} + +Neither rewrite is strictly equivalent to `NOT IN ()`. + +Both rewrites avoid the `NULL` propagation semantics of `NOT IN`; that is, they +treat subquery `NULL` values as non-matches rather than allowing them to +invalidate the comparison. In addition, the `NOT EXISTS` rewrite retains outer +rows whose value is `NULL`, whereas both `NOT IN` and the filter-`NULL`s rewrite +exclude them. + +{{}} + +- Rewrite to [`NOT EXISTS`](/sql/functions/#not-exists) with a correlated + subquery. +- Retain `NOT IN`, but filter out `NULL` values from both the outer field and + the subquery. + +
Materialize SQL - -```mzsql -SELECT o.order_id, minmax.lowest_price, minmax.highest_price, o.item, o.price, - o.price - minmax.lowest_price AS diff_lowest_price, - o.price - minmax.highest_price AS diff_highest_price -FROM orders_view o, - (SELECT order_id, - MIN(price) AS lowest_price, - MAX(price) AS highest_price - FROM orders_view - GROUP BY order_id) minmax -WHERE o.order_id = minmax.order_id -ORDER BY o.order_id, o.item; -``` +{{% include-from-yaml data="idiomatic_mzsql/patterns_window_functions" name="first-value" field="example_idiomatic" %}}
Anti-pattern -Avoid the use of [`FIRST_VALUE() OVER (PARTITION BY ... ORDER BY ...)` -window function](/sql/functions/#first_value) for first value within groups -queries. - -
-
- - -```nofmt --- Anti-pattern -- -SELECT order_id, - FIRST_VALUE(price) - OVER (PARTITION BY order_id ORDER BY price) AS lowest_price, - FIRST_VALUE(price) - OVER (PARTITION BY order_id ORDER BY price DESC) AS highest_price, - item, - price, - price - FIRST_VALUE(price) - OVER (PARTITION BY order_id ORDER BY price) AS diff_lowest_price, - price - FIRST_VALUE(price) - OVER (PARTITION BY order_id ORDER BY price DESC) AS diff_highest_price -FROM orders_view -ORDER BY order_id, item; -``` -
+{{% include-from-yaml data="idiomatic_mzsql/patterns_window_functions" name="first-value" field="example_anti_pattern" %}} +
Idiomatic Materialize SQL -Use a self join that specifies an **equality match** on the lag's order by field -(e.g., `fieldA`). The order by field must increment in a regular pattern in -order to be represented by an equality condition (e.g., `WHERE t1.fieldA = -t2.fieldA + ...`). The -query *excludes* the first row in the results since it does not have a previous -row. - -{{< important >}} - -The idiomatic Materialize SQL applies only to those "lag over" queries whose -ordering can be represented by some **equality condition**. - -{{}} - -
- -```mzsql --- Excludes the first row in the results -- -SELECT t1.fieldA, t2.fieldB as previous_row_value -FROM tableA t1, tableA t2 -WHERE t1.fieldA = t2.fieldA + ... -- or some other operand -ORDER BY fieldA; -``` +{{% include-from-yaml data="idiomatic_mzsql/patterns_window_functions" name="lag" field="extra_syntax_idiomatic_exclude" %}}
Anti-pattern - - -Avoid the use of [`LAG(fieldZ) OVER (ORDER BY ...)`](/sql/functions/#lag) window -function when the order by field increases in a regular pattern. - - - -
- -
- -```nofmt --- Anti-pattern. Avoid. -- -SELECT fieldA, ... - LAG(fieldZ) OVER (ORDER BY fieldA) as previous_row_value -FROM tableA; -``` - -
+{{% include-from-yaml data="idiomatic_mzsql/patterns_window_functions" name="lag" field="syntax_anti_pattern" %}}
Idiomatic Materialize SQL -Use a self [`LEFT JOIN/LEFT OUTER JOIN`](/sql/select/join/#left-outer-join) -(e.g., `FROM tableA t1 LEFT JOIN tableA t2`) that specifies an **equality -match** on the lag's order by field (e.g., `fieldA`). The order by field must -increment in a regular pattern in order to be represented by an equality -condition (e.g., `ON t1.fieldA = t2.fieldA + ...`). The -query *includes* the first row, returning `null` as its lag value. - -{{< important >}} - -The idiomatic Materialize SQL applies only to those "lag over" queries whose -ordering can be represented by some **equality condition**. - -{{}} - - -
- -```mzsql --- Includes the first row in the results -- -SELECT t1.fieldA, t2.fieldB as previous_row_value -FROM tableA t1 -LEFT JOIN tableA t2 -ON t1.fieldA = t2.fieldA + ... -- or some other operand -ORDER BY fieldA; -``` +{{% include-from-yaml data="idiomatic_mzsql/patterns_window_functions" name="lag" field="extra_syntax_idiomatic_include" %}}
Anti-pattern - - -Avoid the use of [`LAG(fieldZ) OVER (ORDER BY ...) window -function`](/sql/functions/#lag) when the order by field increases in a regular -pattern. - - - -
- -
+{{% include-from-yaml data="idiomatic_mzsql/patterns_window_functions" name="lag" field="syntax_anti_pattern" %}} -```nofmt -SELECT fieldA, ... - LAG(fieldZ) OVER (ORDER BY fieldA) as previous_row_value -FROM tableA; -``` - -
Materialize SQL -```mzsql --- Excludes the first row in results -- -SELECT o1.order_date, o1.daily_total, - o2.daily_total as previous_daily_total -FROM orders_daily_totals o1, orders_daily_totals o2 -WHERE o1.order_date = o2.order_date + INTERVAL '1' DAY -ORDER BY order_date; -``` - -{{< important >}} - -The idiomatic Materialize SQL applies only to those "lag over" queries whose -ordering can be represented by some **equality condition**. - -{{}} +{{% include-from-yaml data="idiomatic_mzsql/patterns_window_functions" name="lag" field="extra_example_idiomatic_exclude" %}}
Anti-pattern -Avoid the use of [`LAG() OVER (ORDER BY ...)` window -function](/sql/functions/#lag) to access previous row's value if the order by -field increases in a regular pattern. - -
-
- -```nofmt --- Anti-pattern. Includes the first row's value. -- -SELECT order_date, daily_total, - LAG(daily_total) OVER (ORDER BY order_date) as previous_daily_total -FROM orders_daily_totals; -``` +{{% include-from-yaml data="idiomatic_mzsql/patterns_window_functions" name="lag" field="example_anti_pattern" %}}
Materialize SQL -```mzsql --- Include the first row in results -- -SELECT o1.order_date, o1.daily_total, - o2.daily_total as previous_daily_total -FROM orders_daily_totals o1 -LEFT JOIN orders_daily_totals o2 -ON o1.order_date = o2.order_date + INTERVAL '1' DAY -ORDER BY order_date; -``` - -{{< important >}} - -The idiomatic Materialize SQL applies only to those "lag over" queries whose -ordering can be represented by some **equality condition**. - -{{}} +{{% include-from-yaml data="idiomatic_mzsql/patterns_window_functions" name="lag" field="extra_example_idiomatic_include" %}}
Anti-pattern -Avoid the use of [`LAG() OVER (ORDER BY ...)` -window function](/sql/functions/#lag) to access previous row's value if the -order by field increases in a regular pattern. - -
-
- -```nofmt --- Anti-pattern. Includes the first row's value. -- -SELECT order_date, daily_total, - LAG(daily_total) OVER (ORDER BY order_date) as previous_daily_total -FROM orders_daily_totals; -``` +{{% include-from-yaml data="idiomatic_mzsql/patterns_window_functions" name="lag" field="example_anti_pattern" %}}
Idiomatic Materialize SQL -Use a subquery that uses the [MIN()](/sql/functions/#min) or -[MAX()](/sql/functions/#max) aggregate function. - -
-
- -```mzsql -SELECT tableA.fieldA, tableA.fieldB, minmax.Z - FROM tableA, - (SELECT fieldA, - MAX(fieldZ), - MIN(fieldZ) - FROM tableA - GROUP BY fieldA) minmax -WHERE tableA.fieldA = minmax.fieldA -ORDER BY fieldA ... ; -``` +{{% include-from-yaml data="idiomatic_mzsql/patterns_window_functions" name="last-value" field="syntax_idiomatic" %}}
Anti-pattern -Do not use [`LAST_VALUE() OVER (PARTITION BY ... ORDER BY ... RANGE -...)` window function](/sql/functions/#last_value) for last value in each group -queries. +{{% include-from-yaml data="idiomatic_mzsql/patterns_window_functions" name="last-value" field="syntax_anti_pattern" %}} -{{< note >}} - -Materialize does not support `RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED -FOLLOWING`. - -{{}} - -
-
- -```nofmt --- Unsupported -- -SELECT fieldA, fieldB, - LAST_VALUE(fieldZ) - OVER (PARTITION BY fieldA ORDER BY fieldZ - RANGE BETWEEN - UNBOUNDED PRECEDING AND - UNBOUNDED FOLLOWING), - LAST_VALUE(fieldZ) - OVER (PARTITION BY fieldA ORDER BY fieldZ DESC - RANGE BETWEEN - UNBOUNDED PRECEDING AND - UNBOUNDED FOLLOWING) -FROM tableA -ORDER BY fieldA, ...; -``` - -
Idiomatic Materialize SQL - -```mzsql -SELECT o.order_id, minmax.highest_price, o.item, o.price, - o.price - minmax.highest_price AS diff_highest_price -FROM orders_view o, - (SELECT order_id, - MAX(price) AS highest_price - FROM orders_view - GROUP BY order_id) minmax -WHERE o.order_id = minmax.order_id -ORDER BY o.order_id, o.item; -``` +{{% include-from-yaml data="idiomatic_mzsql/patterns_window_functions" name="last-value" field="extra_example_idiomatic_max" %}}
Anti-pattern -Do not use of `LAST_VALUE() OVER (PARTITION BY ... ORDER BY ... RANGE ...)` -for last value in each group queries. - -{{< note >}} - -Materialize does not support `RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED -FOLLOWING`. +{{% include-from-yaml data="idiomatic_mzsql/patterns_window_functions" name="last-value" field="extra_example_anti_pattern_max" %}} -{{}} - -
- -```nofmt --- Unsupported -- -SELECT order_id, - LAST_VALUE(price) - OVER (PARTITION BY order_id ORDER BY price - RANGE BETWEEN - UNBOUNDED PRECEDING AND - UNBOUNDED FOLLOWING) AS highest_price, - item, - price, - price - LAST_VALUE(price) - OVER (PARTITION BY order_id ORDER BY price - RANGE BETWEEN - UNBOUNDED PRECEDING AND - UNBOUNDED FOLLOWING) AS diff_highest_price -FROM orders_view -ORDER BY order_id, item; -``` - -
Idiomatic Materialize SQL - -```mzsql -SELECT o.order_id, minmax.lowest_price, o.item, o.price, - o.price - minmax.lowest_price AS diff_lowest_price -FROM orders_view o, - (SELECT order_id, - MIN(price) AS lowest_price - FROM orders_view - GROUP BY order_id) minmax -WHERE o.order_id = minmax.order_id -ORDER BY o.order_id, o.item; -``` +{{% include-from-yaml data="idiomatic_mzsql/patterns_window_functions" name="last-value" field="extra_example_idiomatic_min" %}}
Anti-pattern -Do not use `LAST_VALUE() OVER (PARTITION BY ... ORDER BY ... RANGE ... )` -for last value in each group queries. - -{{< note >}} - -Materialize does not support `RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED -FOLLOWING`. - -{{}} - -
- -```nofmt --- Unsupported -- -SELECT order_id, - LAST_VALUE(price) - OVER (PARTITION BY order_id ORDER BY price DESC - RANGE BETWEEN - UNBOUNDED PRECEDING AND - UNBOUNDED FOLLOWING) AS lowest_price, - item, - price, - price - LAST_VALUE(price) - OVER (PARTITION BY order_id ORDER BY price DESC - RANGE BETWEEN - UNBOUNDED PRECEDING AND - UNBOUNDED FOLLOWING) AS diff_lowest_price -FROM orders_view -ORDER BY order_id, item; -``` +{{% include-from-yaml data="idiomatic_mzsql/patterns_window_functions" name="last-value" field="extra_example_anti_pattern_min" %}} -
Idiomatic Materialize SQL -```mzsql -SELECT o.order_id, minmax.lowest_price, minmax.highest_price, o.item, o.price, - o.price - minmax.lowest_price AS diff_lowest_price, - o.price - minmax.highest_price AS diff_highest_price -FROM orders_view o, - (SELECT order_id, - MIN(price) AS lowest_price, - MAX(price) AS highest_price - FROM orders_view - GROUP BY order_id) minmax -WHERE o.order_id = minmax.order_id -ORDER BY o.order_id, o.item; -``` +{{% include-from-yaml data="idiomatic_mzsql/patterns_window_functions" name="last-value" field="example_idiomatic" %}}
Anti-pattern -Do not use `LAST_VALUE() OVER (PARTITION BY ... ORDER BY -)` for last value within groups queries. - -{{< note >}} - -Materialize does not support `RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED -FOLLOWING`. - -{{}} - -
- -```nofmt --- Unsupported -- -SELECT order_id, - LAST_VALUE(price) - OVER (PARTITION BY order_id ORDER BY price DESC - RANGE BETWEEN - UNBOUNDED PRECEDING AND - UNBOUNDED FOLLOWING) AS lowest_price, - LAST_VALUE(price) - OVER (PARTITION BY order_id ORDER BY price - RANGE BETWEEN - UNBOUNDED PRECEDING AND - UNBOUNDED FOLLOWING) AS highest_price, - item, - price, - price - LAST_VALUE(price) - OVER (PARTITION BY order_id ORDER BY price DESC - RANGE BETWEEN - UNBOUNDED PRECEDING AND - UNBOUNDED FOLLOWING) AS diff_lowest_price, - price - LAST_VALUE(price) - OVER (PARTITION BY order_id ORDER BY price - RANGE BETWEEN - UNBOUNDED PRECEDING AND - UNBOUNDED FOLLOWING) AS diff_highest_price -FROM orders_view -ORDER BY order_id, item; -``` +{{% include-from-yaml data="idiomatic_mzsql/patterns_window_functions" name="last-value" field="example_anti_pattern" %}} -
Idiomatic Materialize SQL -Use a self join that specifies an **equality match** on the lead's order by -field (e.g., `fieldA`). The order by field must increment in a regular pattern -in order to be represented by an equality condition (e.g., `WHERE t1.fieldA = -t2.fieldA - ...`). The query *excludes* the last row in the results since it -does not have a next row. - -{{< important >}} - -The idiomatic Materialize SQL applies only to those "lead over" queries whose -ordering can be represented by some **equality condition**. - -{{}} - -
- -```mzsql --- Excludes the last row in the results -- -SELECT t1.fieldA, t2.fieldB as next_row_value -FROM tableA t1, tableA t2 -WHERE t1.fieldA = t2.fieldA - ... -- or some other operand -ORDER BY fieldA; -``` +{{% include-from-yaml data="idiomatic_mzsql/patterns_window_functions" name="lead" field="extra_syntax_idiomatic_exclude" %}}
Anti-pattern - - -Avoid the use of [`LEAD(fieldZ) OVER (ORDER BY ...) window -function`](/sql/functions/#lead) when the order by field increases in a regular pattern. - - - -
- -
- -```nofmt --- Anti-pattern. Avoid. -- -SELECT fieldA, ... - LEAD(fieldZ) OVER (ORDER BY fieldA) as next_row_value -FROM tableA; -``` - -
+{{% include-from-yaml data="idiomatic_mzsql/patterns_window_functions" name="lead" field="syntax_anti_pattern" %}}
Idiomatic Materialize SQL -Use a self [`LEFT JOIN/LEFT OUTER JOIN`](/sql/select/join/#left-outer-join) -(e.g., `FROM tableA t1 LEFT JOIN tableA t2`) that specifies an **equality -match** on the lag's order by field (e.g., `fieldA`). The order by field must -increment in a regular pattern in order to be represented by an equality -condition (e.g., `ON t1.fieldA = t2.fieldA - ...`). The query *includes* the -last row, returning `null` as its lead value. - -{{< important >}} - -The idiomatic Materialize SQL applies only to those "lead over" queries whose -ordering can be represented by some **equality condition**. - -{{}} - - -```mzsql --- Includes the last row in the response -- -SELECT t1.fieldA, t2.fieldB as next_row_value -FROM tableA t1 -LEFT JOIN tableA t2 -ON t1.fieldA = t2.fieldA - ... -- or some other operand -ORDER BY fieldA; -``` +{{% include-from-yaml data="idiomatic_mzsql/patterns_window_functions" name="lead" field="extra_syntax_idiomatic_include" %}}
Anti-pattern - - -Avoid the use of [`LEAD(fieldZ) OVER (ORDER BY ...) window -function`](/sql/functions/#lead) when the order by field increases in regular -intervals. +{{% include-from-yaml data="idiomatic_mzsql/patterns_window_functions" name="lead" field="syntax_anti_pattern" %}} - - -
- -
- -```nofmt -SELECT fieldA, ... - LEAD(fieldZ) OVER (ORDER BY fieldA) as next_row_value -FROM tableA; -``` - -
Materialize SQL -```mzsql --- Excludes the last row in results -- -SELECT o1.order_date, o1.daily_total, - o2.daily_total as next_daily_total -FROM orders_daily_totals o1, orders_daily_totals o2 -WHERE o1.order_date = o2.order_date - INTERVAL '1' DAY -ORDER BY order_date; -``` - -{{< important >}} - -The idiomatic Materialize SQL applies only to those "lead over" queries whose -ordering can be represented by some **equality condition**. - -{{}} - +{{% include-from-yaml data="idiomatic_mzsql/patterns_window_functions" name="lead" field="extra_example_idiomatic_exclude" %}}
Anti-pattern -Avoid the use of [`LEAD() OVER (ORDER BY ...)` -window function](/sql/functions/#lead) to access next row's value if the -order by field increases in regular intervals. - -
-
- -```nofmt --- Anti-pattern. Includes the last row's value. -- -SELECT order_date, daily_total, - LEAD(daily_total) OVER (ORDER BY order_date) as next_daily_total -FROM orders_daily_totals; -``` +{{% include-from-yaml data="idiomatic_mzsql/patterns_window_functions" name="lead" field="example_anti_pattern" %}}
Materialize SQL -```mzsql --- Include the last row in the results -- -SELECT o1.order_date, o1.daily_total, - o2.daily_total as next_daily_total -FROM orders_daily_totals o1 -LEFT JOIN orders_daily_totals o2 -ON o1.order_date = o2.order_date - INTERVAL '1' DAY -ORDER BY order_date; -``` - -{{< important >}} - -The idiomatic Materialize SQL applies only to those "lead over" queries whose -ordering can be represented by some **equality condition**. - -{{}} - +{{% include-from-yaml data="idiomatic_mzsql/patterns_window_functions" name="lead" field="extra_example_idiomatic_include" %}}
Anti-pattern -Avoid the use of [`LEAD() OVER (ORDER BY ...)` -window function](/sql/functions/#lead) to access next row's value if the -order by field increases in a regular pattern. - -
-
- -```nofmt --- Anti-pattern. Includes the last row in results. -- -SELECT order_date, daily_total, - LEAD(daily_total) OVER (ORDER BY order_date) as next_daily_total -FROM orders_daily_totals; -``` +{{% include-from-yaml data="idiomatic_mzsql/patterns_window_functions" name="lead" field="example_anti_pattern" %}}
+ + + + + + + + + + + + + + + + + +
Materialize SQL + +{{% include-from-yaml data="idiomatic_mzsql/patterns_general" name="not-in-subquery" field="syntax_idiomatic" %}} + +
Anti-pattern + +{{% include-from-yaml data="idiomatic_mzsql/patterns_general" name="not-in-subquery" field="syntax_anti_pattern" %}} + +
+ +If the subquery uses [`UNNEST()`](/sql/functions/#unnest) on a column whose +value depends on the outer row: +- Factor the `UNNEST()` into an uncorrelated [Common Table Expression + (CTE)](/sql/select/#common-table-expressions-ctes) first. +- Then apply the rewrite against the CTE. See the [example + below](#find-items-not-currently-on-sale). + +## Examples + +{{< note >}} + +The example data can be found in the +[Appendix](/transform-data/idiomatic-materialize-sql/appendix/example-orders). + +{{}} + +### Find items not currently on sale + +Using idiomatic Materialize SQL, the following examples find items in the +`items` table whose `item` value (declared `NOT NULL`) does not appear in any of +this week's sales arrays in `sales_items`, a nullable `text[]`. The subquery +uses [`UNNEST()`](/sql/functions/#unnest) to expand each week's `items` array +into individual values for comparison. + +If the subquery uses [`UNNEST()`](/sql/functions/#unnest) on a column whose +value depends on the outer row: + +- First, factor the `UNNEST()` into an uncorrelated [Common Table Expression + (CTE)](/sql/select/#common-table-expressions-ctes). +- Then, apply the rewrite against the CTE. + + + + + + + + + + + + + + + + + + + + + +
Materialize SQL + +{{% include-from-yaml data="idiomatic_mzsql/patterns_general" name="not-in-subquery" field="example_idiomatic" %}} + +
Anti-pattern + +{{% include-from-yaml data="idiomatic_mzsql/patterns_general" name="not-in-subquery" field="example_anti_pattern" %}} + +
+ +## See also + +- [`NOT EXISTS`](/sql/functions/#not-exists) + +- [Idiomatic Materialize SQL + Chart](/transform-data/idiomatic-materialize-sql/appendix/idiomatic-sql-chart/) diff --git a/doc/user/content/transform-data/idiomatic-materialize-sql/top-k.md b/doc/user/content/transform-data/idiomatic-materialize-sql/top-k.md index 6b080b1163789..8ea1f3dad561e 100644 --- a/doc/user/content/transform-data/idiomatic-materialize-sql/top-k.md +++ b/doc/user/content/transform-data/idiomatic-materialize-sql/top-k.md @@ -47,20 +47,7 @@ with another subquery that specifies the ordering and the limit K. Idiomatic Materialize SQL -Use a subquery to -[SELECT DISTINCT](/sql/select/#select-distinct) on the grouping key (e.g., -`fieldA`), and perform a [LATERAL](/sql/select/join/#lateral-subqueries) join -(by the grouping key `fieldA`) with another subquery that specifies the ordering -(e.g., `fieldZ [ASC|DESC]`) and the limit K. - -```mzsql -SELECT fieldA, fieldB, ... -FROM (SELECT DISTINCT fieldA FROM tableA) grp, - LATERAL (SELECT fieldB, ... , fieldZ FROM tableA - WHERE fieldA = grp.fieldA - ORDER BY fieldZ ... LIMIT K) -- K is a number >= 1 -ORDER BY fieldA, fieldZ ... ; -``` +{{% include-from-yaml data="idiomatic_mzsql/patterns_window_functions" name="top-k-k-ge-1" field="syntax_idiomatic" %}} @@ -68,24 +55,8 @@ ORDER BY fieldA, fieldZ ... ; Anti-pattern -Avoid the use of `ROW_NUMBER() OVER (PARTITION BY ... ORDER BY ...)` for Top-K queries. - -
-
- -```nofmt --- Anti-pattern. Avoid. -- -SELECT fieldA, fieldB, ... -FROM ( - SELECT fieldA, fieldB, ... , fieldZ, - ROW_NUMBER() OVER (PARTITION BY fieldA - ORDER BY fieldZ ... ) as rn - FROM tableA) -WHERE rn <= K -- K is a number >= 1 -ORDER BY fieldA, fieldZ ...; -``` +{{% include-from-yaml data="idiomatic_mzsql/patterns_window_functions" name="top-k-k-ge-1" field="syntax_anti_pattern" %}} -
@@ -132,13 +103,8 @@ pattern, specifying 1 as the limit. Idiomatic Materialize SQL -```mzsql -SELECT DISTINCT ON(fieldA) fieldA, fieldB, ... -FROM tableA -ORDER BY fieldA, fieldZ ... ; -``` +{{% include-from-yaml data="idiomatic_mzsql/patterns_window_functions" name="top-k-k-eq-1" field="syntax_idiomatic" %}} -
@@ -146,24 +112,8 @@ ORDER BY fieldA, fieldZ ... ; Anti-pattern -Avoid the use of `ROW_NUMBER() OVER (PARTITION BY ... ORDER BY ...)` for Top-K queries. - -
-
- -```nofmt --- Anti-pattern. Avoid. -- -SELECT fieldA, fieldB, ... -FROM ( - SELECT fieldA, fieldB, ... , fieldZ, - ROW_NUMBER() OVER (PARTITION BY fieldA - ORDER BY fieldZ ... ) as rn - FROM tableA) -WHERE rn = 1 -ORDER BY fieldA, fieldZ ...; -``` +{{% include-from-yaml data="idiomatic_mzsql/patterns_window_functions" name="top-k-k-eq-1" field="syntax_anti_pattern" %}} -
@@ -215,14 +165,7 @@ DESC`) and limits its results to 3 (`LIMIT 3`). Idiomatic Materialize SQL -```mzsql -SELECT order_id, item, subtotal -FROM (SELECT DISTINCT order_id FROM orders_view) grp, - LATERAL (SELECT item, subtotal FROM orders_view - WHERE order_id = grp.order_id - ORDER BY subtotal DESC LIMIT 3) -ORDER BY order_id, subtotal DESC; -``` +{{% include-from-yaml data="idiomatic_mzsql/patterns_window_functions" name="top-k-k-ge-1" field="example_idiomatic" %}} @@ -231,23 +174,8 @@ ORDER BY order_id, subtotal DESC; Anti-pattern ❌ -Avoid the use of `ROW_NUMBER() OVER (PARTITION BY ... ORDER BY ...)` for Top-K queries. +{{% include-from-yaml data="idiomatic_mzsql/patterns_window_functions" name="top-k-k-ge-1" field="example_anti_pattern" %}} -
-
- -```nofmt --- Anti-pattern -- -SELECT order_id, item, subtotal -FROM ( - SELECT order_id, item, subtotal, - ROW_NUMBER() OVER (PARTITION BY order_id ORDER BY subtotal DESC) as rn - FROM orders_view) -WHERE rn <= 3 -ORDER BY order_id, subtotal DESC; -``` - -
@@ -274,11 +202,7 @@ ON`/grouping key, then the descending subtotal). [^1] Idiomatic Materialize SQL -```mzsql -SELECT DISTINCT ON(order_id) order_id, item, subtotal -FROM orders_view -ORDER BY order_id, subtotal DESC; -``` +{{% include-from-yaml data="idiomatic_mzsql/patterns_window_functions" name="top-k-k-eq-1" field="example_idiomatic" %}} @@ -286,23 +210,8 @@ ORDER BY order_id, subtotal DESC; Anti-pattern ❌ -Avoid the use of `ROW_NUMBER() OVER (PARTITION BY ... ORDER BY ...)` for Top-K queries. - -
-
- -```nofmt --- Anti-pattern -- -SELECT order_id, item, subtotal -FROM ( - SELECT order_id, item, subtotal, - ROW_NUMBER() OVER (PARTITION BY order_id ORDER BY subtotal DESC) as rn - FROM orders_view) -WHERE rn = 1 -ORDER BY order_id, subtotal DESC; -``` +{{% include-from-yaml data="idiomatic_mzsql/patterns_window_functions" name="top-k-k-eq-1" field="example_anti_pattern" %}} -
diff --git a/doc/user/data/idiomatic_mzsql/patterns_general.yml b/doc/user/data/idiomatic_mzsql/patterns_general.yml new file mode 100644 index 0000000000000..c0feda6c0b570 --- /dev/null +++ b/doc/user/data/idiomatic_mzsql/patterns_general.yml @@ -0,0 +1,261 @@ +columns: + - column: pattern_title + header: " " + - column: syntax_idiomatic + header: "Idiomatic Materialize SQL Pattern" + - column: example_idiomatic + header: "Example" + +rows: + + - name: any-equi-join + pattern_title: | + [`ANY()` Equi-join condition](/transform-data/idiomatic-materialize-sql/any/) + + syntax_idiomatic: | + **If no duplicates exist in the unnested field:** Use a Common Table + Expression (CTE) to [`UNNEST()`](/sql/functions/#unnest) the array of + values and perform the equi-join on the unnested values. + + ```mzsql + -- array_field contains no duplicates.-- + + WITH my_expanded_values AS + (SELECT UNNEST(array_field) AS fieldZ FROM tableB) + SELECT a.fieldA, ... + FROM tableA a + JOIN my_expanded_values t ON a.fieldZ = t.fieldZ + ; + ``` + + **Duplicates may exist in the unnested field:** Use a Common Table + Expression (CTE) to [`DISTINCT`](/sql/select/#select-distinct) + [`UNNEST()`](/sql/functions/#unnest) the array of values and perform the + equi-join on the unnested values. + + ```mzsql + -- array_field may contain duplicates.-- + + WITH my_expanded_values AS + (SELECT DISTINCT UNNEST(array_field) AS fieldZ FROM tableB) + SELECT a.fieldA, ... + FROM tableA a + JOIN my_expanded_values t ON a.fieldZ = t.fieldZ + ; + ``` + + syntax_anti_pattern: | + Avoid the use of [`ANY(...)` function](/sql/functions/#expression-bool_op-any) for equi-join + conditions. + + ```nofmt + -- Anti-pattern. Avoid. -- + SELECT a.fieldA, ... + FROM tableA a, tableB b + WHERE a.fieldZ = ANY(b.array_field) -- Anti-pattern. Avoid. + ; + ``` + + example_idiomatic: | + ***If no duplicates in the unnested field*** + + ```mzsql + -- sales_items.items contains no duplicates. -- + + WITH individual_sales_items AS + (SELECT unnest(items) as item, week_of FROM sales_items) + SELECT s.week_of, o.order_id, o.item, o.quantity + FROM orders o + JOIN individual_sales_items s ON o.item = s.item + WHERE date_trunc('week', o.order_date) = s.week_of + ORDER BY s.week_of, o.order_id, o.item, o.quantity + ; + ``` + + ***To omit duplicates that may exist in the unnested field*** + + ```mzsql + -- sales_items.items may contains duplicates -- + + WITH individual_sales_items AS + (SELECT DISTINCT unnest(items) as item, week_of FROM sales_items) + SELECT s.week_of, o.order_id, o.item, o.quantity + FROM orders o + JOIN individual_sales_items s ON o.item = s.item + WHERE date_trunc('week', o.order_date) = s.week_of + ORDER BY s.week_of, o.order_id, o.item, o.quantity + ; + ``` + + example_anti_pattern: | + Avoid the use of [`ANY()`](/sql/functions/#expression-bool_op-any) for the equi-join condition. + + ```nofmt + -- Anti-pattern. Avoid. -- + SELECT s.week_of, o.order_id, o.item, o.quantity + FROM orders o + JOIN sales_items s ON o.item = ANY(s.items) + WHERE date_trunc('week', o.order_date) = s.week_of + ORDER BY s.week_of, o.order_id, o.item, o.quantity + ; + ``` + + - name: not-in-subquery + pattern_title: | + [`NOT IN ()` predicate](/transform-data/idiomatic-materialize-sql/not-in/) + + syntax_idiomatic: | + **Rewrite to `NOT EXISTS` with a correlated subquery.** + + ```mzsql + SELECT t1.* + FROM t1 + WHERE NOT EXISTS (SELECT 1 FROM t2 WHERE t2.a = t1.a) + ; + ``` + + **Filter out `NULL`s on both sides of the `NOT IN`.** + + ```mzsql + SELECT t1.* + FROM t1 + WHERE t1.a IS NOT NULL + AND t1.a NOT IN (SELECT t2.a FROM t2 WHERE t2.a IS NOT NULL) + ; + ``` + + syntax_anti_pattern: | + Avoid `NOT IN ()` predicates, which force a cross join + between the outer relation and the subquery. + + ```nofmt + -- Anti-pattern. Avoid. -- + SELECT t1.* + FROM t1 + WHERE t1.a NOT IN (SELECT t2.a FROM t2) -- Anti-pattern. Avoid. + ; + ``` + + example_idiomatic: | + Because the subquery uses [`UNNEST()`](/sql/functions/#unnest) on a column + of the outer-correlated row, factor the `UNNEST()` into an uncorrelated + [Common Table Expression + (CTE)](/sql/select/#common-table-expressions-ctes) first. + + ***Rewrite to `NOT EXISTS` with a CTE for the `UNNEST()`*** + + ```mzsql + WITH this_weeks_sales AS ( + SELECT unnest(items) AS sale_item + FROM sales_items + WHERE week_of = date_trunc('week', current_timestamp) + ) + SELECT i.item, i.price + FROM items i + WHERE NOT EXISTS ( + SELECT 1 FROM this_weeks_sales s WHERE s.sale_item = i.item + ) + ORDER BY i.item + ; + ``` + + ***Filter out `NULL`s with a CTE for the `UNNEST()`*** + + ```mzsql + WITH this_weeks_sales AS ( + SELECT unnest(items) AS sale_item + FROM sales_items + WHERE week_of = date_trunc('week', current_timestamp) + ) + SELECT i.item, i.price + FROM items i + WHERE i.item IS NOT NULL + AND i.item NOT IN ( + SELECT sale_item FROM this_weeks_sales WHERE sale_item IS NOT NULL + ) + ORDER BY i.item + ; + ``` + + example_anti_pattern: | + Avoid `NOT IN ()`, which forces a cross join. + + ```nofmt + -- Anti-pattern. Avoid. -- + SELECT i.item, i.price + FROM items i + WHERE i.item NOT IN ( + SELECT unnest(items) FROM sales_items + WHERE week_of = date_trunc('week', current_timestamp) + ) + ORDER BY i.item + ; + ``` + + - name: mz_now-date-time + pattern_title: | + [`mz_now()` with date/time operators](/transform-data/idiomatic-materialize-sql/mz_now/#mz_now-expressions-to-calculate-past-or-future-timestamp) + + syntax_idiomatic: | + Rewrite the query expression; specifically, move the operation to the + other side of the comparison. + + example_idiomatic: | + ```mzsql + SELECT * from orders + WHERE mz_now() > order_date + INTERVAL '5min' + ; + ``` + + - name: mz_now-disjunctions + pattern_title: | + [`mz_now()` with disjunctions (`OR`) in materialized/indexed view + definitions and `SUBSCRIBE` statements](/transform-data/idiomatic-materialize-sql/mz_now/#disjunctions-or) + + syntax_idiomatic: | + Rewrite as `UNION ALL` or `UNION`, deduplicating as necessary: + + - In some cases, you may need to modify the conditions to deduplicate + results when using `UNION ALL`. For example, you might add the + negation of one input's condition to the other as a conjunction. + + - In some cases, using `UNION` instead of `UNION ALL` may suffice if + the inputs do not contain other duplicates that need to be retained. + + example_idiomatic: | + **Rewrite as `UNION ALL` with possible duplicates** + + ```mzsql + CREATE MATERIALIZED VIEW forecast_completed_orders_duplicates_possible AS + SELECT item, quantity, status from orders + WHERE status = 'Shipped' + UNION ALL + SELECT item, quantity, status from orders + WHERE order_date + interval '30' minutes >= mz_now() + ; + ``` + + **Rewrite as `UNION ALL` that avoids duplicates across queries** + + ```mzsql + CREATE MATERIALIZED VIEW forecast_completed_orders_deduplicated_union_all AS + SELECT item, quantity, status from orders + WHERE status = 'Shipped' + UNION ALL + SELECT item, quantity, status from orders + WHERE order_date + interval '30' minutes >= mz_now() + AND status != 'Shipped' -- Deduplicate by excluding those with status 'Shipped' + ; + ``` + + **Rewrite as `UNION` to deduplicate any and all duplicated results** + + ```mzsql + CREATE MATERIALIZED VIEW forecast_completed_orders_deduplicated_results AS + SELECT item, quantity, status from orders + WHERE status = 'Shipped' + UNION + SELECT item, quantity, status from orders + WHERE order_date + interval '30' minutes >= mz_now() + ; + ``` diff --git a/doc/user/data/idiomatic_mzsql/patterns_window_functions.yml b/doc/user/data/idiomatic_mzsql/patterns_window_functions.yml new file mode 100644 index 0000000000000..bfb1003a401dd --- /dev/null +++ b/doc/user/data/idiomatic_mzsql/patterns_window_functions.yml @@ -0,0 +1,748 @@ +columns: + - column: pattern_title + header: " " + - column: syntax_idiomatic + header: "Idiomatic Materialize SQL Pattern" + - column: example_idiomatic + header: "Example" + +rows: + + - name: top-k-k-ge-1 + pattern_title: | + [Top-K over partition
(K >= 1)](/transform-data/idiomatic-materialize-sql/top-k/#for-k--1) + + syntax_idiomatic: | + Use a subquery to + [SELECT DISTINCT](/sql/select/#select-distinct) on the grouping key (e.g., + `fieldA`), and perform a [LATERAL](/sql/select/join/#lateral-subqueries) join + (by the grouping key `fieldA`) with another subquery that specifies the ordering + (e.g., `fieldZ [ASC|DESC]`) and the limit K. + + ```mzsql + SELECT fieldA, fieldB, ... + FROM (SELECT DISTINCT fieldA FROM tableA) grp, + LATERAL (SELECT fieldB, ... , fieldZ FROM tableA + WHERE fieldA = grp.fieldA + ORDER BY fieldZ ... LIMIT K) -- K is a number >= 1 + ORDER BY fieldA, fieldZ ... ; + ``` + + syntax_anti_pattern: | + Avoid the use of `ROW_NUMBER() OVER (PARTITION BY ... ORDER BY ...)` for Top-K queries. + + ```nofmt + -- Anti-pattern. Avoid. -- + SELECT fieldA, fieldB, ... + FROM ( + SELECT fieldA, fieldB, ... , fieldZ, + ROW_NUMBER() OVER (PARTITION BY fieldA + ORDER BY fieldZ ... ) as rn + FROM tableA) + WHERE rn <= K -- K is a number >= 1 + ORDER BY fieldA, fieldZ ...; + ``` + + example_idiomatic: | + ```mzsql + SELECT order_id, item, subtotal + FROM (SELECT DISTINCT order_id FROM orders_view) grp, + LATERAL (SELECT item, subtotal FROM orders_view + WHERE order_id = grp.order_id + ORDER BY subtotal DESC LIMIT 3) + ORDER BY order_id, subtotal DESC; + ``` + + example_anti_pattern: | + Avoid the use of `ROW_NUMBER() OVER (PARTITION BY ... ORDER BY ...)` for Top-K queries. + + ```nofmt + -- Anti-pattern -- + SELECT order_id, item, subtotal + FROM ( + SELECT order_id, item, subtotal, + ROW_NUMBER() OVER (PARTITION BY order_id ORDER BY subtotal DESC) as rn + FROM orders_view) + WHERE rn <= 3 + ORDER BY order_id, subtotal DESC; + ``` + + - name: top-k-k-eq-1 + pattern_title: | + [Top-K over partition
(K = 1)](/transform-data/idiomatic-materialize-sql/top-k/#for-k--1-1) + + syntax_idiomatic: | + ```mzsql + SELECT DISTINCT ON(fieldA) fieldA, fieldB, ... + FROM tableA + ORDER BY fieldA, fieldZ ... ; + ``` + + syntax_anti_pattern: | + Avoid the use of `ROW_NUMBER() OVER (PARTITION BY ... ORDER BY ...)` for Top-K queries. + + ```nofmt + -- Anti-pattern. Avoid. -- + SELECT fieldA, fieldB, ... + FROM ( + SELECT fieldA, fieldB, ... , fieldZ, + ROW_NUMBER() OVER (PARTITION BY fieldA + ORDER BY fieldZ ... ) as rn + FROM tableA) + WHERE rn = 1 + ORDER BY fieldA, fieldZ ...; + ``` + + example_idiomatic: | + ```mzsql + SELECT DISTINCT ON(order_id) order_id, item, subtotal + FROM orders_view + ORDER BY order_id, subtotal DESC; + ``` + + example_anti_pattern: | + Avoid the use of `ROW_NUMBER() OVER (PARTITION BY ... ORDER BY ...)` for Top-K queries. + + ```nofmt + -- Anti-pattern -- + SELECT order_id, item, subtotal + FROM ( + SELECT order_id, item, subtotal, + ROW_NUMBER() OVER (PARTITION BY order_id ORDER BY subtotal DESC) as rn + FROM orders_view) + WHERE rn = 1 + ORDER BY order_id, subtotal DESC; + ``` + + - name: first-value + pattern_title: | + [First value over partition
order by ...](/transform-data/idiomatic-materialize-sql/first-value/) + + syntax_idiomatic: | + Use a subquery that uses the [MIN()](/sql/functions/#min) or + [MAX()](/sql/functions/#max) aggregate function. + + ```mzsql + SELECT tableA.fieldA, tableA.fieldB, minmax.Z + FROM tableA, + (SELECT fieldA, + MIN(fieldZ), + MAX(fieldZ) + FROM tableA + GROUP BY fieldA) minmax + WHERE tableA.fieldA = minmax.fieldA + ORDER BY fieldA ... ; + ``` + + syntax_anti_pattern: | + Avoid the use of [`FIRST_VALUE() OVER (PARTITION BY ... ORDER BY ...)` + window function](/sql/functions/#first_value) for first value within groups + queries. + + ```nofmt + -- Anti-pattern. Avoid. -- + SELECT fieldA, fieldB, + FIRST_VALUE(fieldZ) OVER (PARTITION BY fieldA ORDER BY ...), + FIRST_VALUE(fieldZ) OVER (PARTITION BY fieldA ORDER BY ... DESC) + FROM tableA + ORDER BY fieldA, ...; + ``` + + example_idiomatic: | + ```mzsql + SELECT o.order_id, minmax.lowest_price, minmax.highest_price, o.item, o.price, + o.price - minmax.lowest_price AS diff_lowest_price, + o.price - minmax.highest_price AS diff_highest_price + FROM orders_view o, + (SELECT order_id, + MIN(price) AS lowest_price, + MAX(price) AS highest_price + FROM orders_view + GROUP BY order_id) minmax + WHERE o.order_id = minmax.order_id + ORDER BY o.order_id, o.item; + ``` + + example_anti_pattern: | + Avoid the use of [`FIRST_VALUE() OVER (PARTITION BY ... ORDER BY ...)` + window function](/sql/functions/#first_value) for first value within groups + queries. + + ```nofmt + -- Anti-pattern -- + SELECT order_id, + FIRST_VALUE(price) + OVER (PARTITION BY order_id ORDER BY price) AS lowest_price, + FIRST_VALUE(price) + OVER (PARTITION BY order_id ORDER BY price DESC) AS highest_price, + item, + price, + price - FIRST_VALUE(price) + OVER (PARTITION BY order_id ORDER BY price) AS diff_lowest_price, + price - FIRST_VALUE(price) + OVER (PARTITION BY order_id ORDER BY price DESC) AS diff_highest_price + FROM orders_view + ORDER BY order_id, item; + ``` + + # Extra fields used by the per-topic page (first-value.md) but not the chart. + extra_example_idiomatic_min: | + ```mzsql + SELECT o.order_id, minmax.lowest_price, o.item, o.price, + o.price - minmax.lowest_price AS diff_lowest_price + FROM orders_view o, + (SELECT order_id, + MIN(price) AS lowest_price + FROM orders_view + GROUP BY order_id) minmax + WHERE o.order_id = minmax.order_id + ORDER BY o.order_id, o.item; + ``` + + extra_example_anti_pattern_min: | + Avoid the use of [`FIRST_VALUE() OVER (PARTITION BY ... ORDER BY ...)` + window function](/sql/functions/#first_value) for first value within groups queries. + + ```nofmt + -- Anti-pattern -- + SELECT order_id, + FIRST_VALUE(price) + OVER (PARTITION BY order_id ORDER BY price) AS lowest_price, + item, + price, + price - FIRST_VALUE(price) + OVER (PARTITION BY order_id ORDER BY price) AS diff_lowest_price + FROM orders_view + ORDER BY order_id, item; + ``` + + extra_example_idiomatic_max: | + ```mzsql + SELECT o.order_id, minmax.highest_price, o.item, o.price, + o.price - minmax.highest_price AS diff_highest_price + FROM orders_view o, + (SELECT order_id, + MAX(price) AS highest_price + FROM orders_view + GROUP BY order_id) minmax + WHERE o.order_id = minmax.order_id + ORDER BY o.order_id, o.item; + ``` + + extra_example_anti_pattern_max: | + Avoid the use of [`FIRST_VALUE() OVER (PARTITION BY ... ORDER BY ...)` + window function](/sql/functions/#first_value) for first value within groups + queries. + + ```nofmt + -- Anti-pattern -- + SELECT order_id, + FIRST_VALUE(price) + OVER (PARTITION BY order_id ORDER BY price DESC) AS highest_price, + item, + price, + price - FIRST_VALUE(price) + OVER (PARTITION BY order_id ORDER BY price DESC) AS diff_highest_price + FROM orders_view + ORDER BY order_id, item; + ``` + + - name: last-value + pattern_title: | + [Last value over partition
order by ...
range between unbounded preceding
and unbounded following](/transform-data/idiomatic-materialize-sql/last-value/) + + syntax_idiomatic: | + Use a subquery that uses the [MIN()](/sql/functions/#min) or + [MAX()](/sql/functions/#max) aggregate function. + + ```mzsql + SELECT tableA.fieldA, tableA.fieldB, minmax.Z + FROM tableA, + (SELECT fieldA, + MAX(fieldZ), + MIN(fieldZ) + FROM tableA + GROUP BY fieldA) minmax + WHERE tableA.fieldA = minmax.fieldA + ORDER BY fieldA ... ; + ``` + + syntax_anti_pattern: | + Do not use [`LAST_VALUE() OVER (PARTITION BY ... ORDER BY ... RANGE + ...)` window function](/sql/functions/#last_value) for last value in each group + queries. + + {{< note >}} + + Materialize does not support `RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED + FOLLOWING`. + + {{}} + + ```nofmt + -- Unsupported -- + SELECT fieldA, fieldB, + LAST_VALUE(fieldZ) + OVER (PARTITION BY fieldA ORDER BY fieldZ + RANGE BETWEEN + UNBOUNDED PRECEDING AND + UNBOUNDED FOLLOWING), + LAST_VALUE(fieldZ) + OVER (PARTITION BY fieldA ORDER BY fieldZ DESC + RANGE BETWEEN + UNBOUNDED PRECEDING AND + UNBOUNDED FOLLOWING) + FROM tableA + ORDER BY fieldA, ...; + ``` + + example_idiomatic: | + ```mzsql + SELECT o.order_id, minmax.lowest_price, minmax.highest_price, o.item, o.price, + o.price - minmax.lowest_price AS diff_lowest_price, + o.price - minmax.highest_price AS diff_highest_price + FROM orders_view o, + (SELECT order_id, + MIN(price) AS lowest_price, + MAX(price) AS highest_price + FROM orders_view + GROUP BY order_id) minmax + WHERE o.order_id = minmax.order_id + ORDER BY o.order_id, o.item; + ``` + + example_anti_pattern: | + Do not use `LAST_VALUE() OVER (PARTITION BY ... ORDER BY ...)` for last + value within groups queries. + + {{< note >}} + + Materialize does not support `RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED + FOLLOWING`. + + {{}} + + ```nofmt + -- Unsupported -- + SELECT order_id, + LAST_VALUE(price) + OVER (PARTITION BY order_id ORDER BY price DESC + RANGE BETWEEN + UNBOUNDED PRECEDING AND + UNBOUNDED FOLLOWING) AS lowest_price, + LAST_VALUE(price) + OVER (PARTITION BY order_id ORDER BY price + RANGE BETWEEN + UNBOUNDED PRECEDING AND + UNBOUNDED FOLLOWING) AS highest_price, + item, + price, + price - LAST_VALUE(price) + OVER (PARTITION BY order_id ORDER BY price DESC + RANGE BETWEEN + UNBOUNDED PRECEDING AND + UNBOUNDED FOLLOWING) AS diff_lowest_price, + price - LAST_VALUE(price) + OVER (PARTITION BY order_id ORDER BY price + RANGE BETWEEN + UNBOUNDED PRECEDING AND + UNBOUNDED FOLLOWING) AS diff_highest_price + FROM orders_view + ORDER BY order_id, item; + ``` + + # Extra fields used by the per-topic page (last-value.md) but not the chart. + extra_example_idiomatic_max: | + ```mzsql + SELECT o.order_id, minmax.highest_price, o.item, o.price, + o.price - minmax.highest_price AS diff_highest_price + FROM orders_view o, + (SELECT order_id, + MAX(price) AS highest_price + FROM orders_view + GROUP BY order_id) minmax + WHERE o.order_id = minmax.order_id + ORDER BY o.order_id, o.item; + ``` + + extra_example_anti_pattern_max: | + Do not use of `LAST_VALUE() OVER (PARTITION BY ... ORDER BY ... RANGE ...)` + for last value in each group queries. + + {{< note >}} + + Materialize does not support `RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED + FOLLOWING`. + + {{}} + + ```nofmt + -- Unsupported -- + SELECT order_id, + LAST_VALUE(price) + OVER (PARTITION BY order_id ORDER BY price + RANGE BETWEEN + UNBOUNDED PRECEDING AND + UNBOUNDED FOLLOWING) AS highest_price, + item, + price, + price - LAST_VALUE(price) + OVER (PARTITION BY order_id ORDER BY price + RANGE BETWEEN + UNBOUNDED PRECEDING AND + UNBOUNDED FOLLOWING) AS diff_highest_price + FROM orders_view + ORDER BY order_id, item; + ``` + + extra_example_idiomatic_min: | + ```mzsql + SELECT o.order_id, minmax.lowest_price, o.item, o.price, + o.price - minmax.lowest_price AS diff_lowest_price + FROM orders_view o, + (SELECT order_id, + MIN(price) AS lowest_price + FROM orders_view + GROUP BY order_id) minmax + WHERE o.order_id = minmax.order_id + ORDER BY o.order_id, o.item; + ``` + + extra_example_anti_pattern_min: | + Do not use `LAST_VALUE() OVER (PARTITION BY ... ORDER BY ... RANGE ... )` + for last value in each group queries. + + {{< note >}} + + Materialize does not support `RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED + FOLLOWING`. + + {{}} + + ```nofmt + -- Unsupported -- + SELECT order_id, + LAST_VALUE(price) + OVER (PARTITION BY order_id ORDER BY price DESC + RANGE BETWEEN + UNBOUNDED PRECEDING AND + UNBOUNDED FOLLOWING) AS lowest_price, + item, + price, + price - LAST_VALUE(price) + OVER (PARTITION BY order_id ORDER BY price DESC + RANGE BETWEEN + UNBOUNDED PRECEDING AND + UNBOUNDED FOLLOWING) AS diff_lowest_price + FROM orders_view + ORDER BY order_id, item; + ``` + + - name: lag + pattern_title: | + [Lag over (order by) whose ordering can be represented by some equality + condition.](/transform-data/idiomatic-materialize-sql/lag/) + + syntax_idiomatic: | + ***To exclude the first row since it has no previous row*** + + ```mzsql + -- Excludes the first row in the results -- + SELECT t1.fieldA, t2.fieldB as previous_row_value + FROM tableA t1, tableA t2 + WHERE t1.fieldA = t2.fieldA + ... -- or some other operand + ORDER BY fieldA; + ``` + + ***To include the first row*** + + ```mzsql + -- Includes the first row in the results -- + SELECT t1.fieldA, t2.fieldB as previous_row_value + FROM tableA t1 + LEFT JOIN tableA t2 + ON t1.fieldA = t2.fieldA + ... -- or some other operand + ORDER BY fieldA; + ``` + + syntax_anti_pattern: | + Avoid the use of [`LAG(fieldZ) OVER (ORDER BY ...)`](/sql/functions/#lag) + window function when the order by field increases in a regular pattern. + + ```nofmt + -- Anti-pattern. Avoid. -- + SELECT fieldA, ... + LAG(fieldZ) OVER (ORDER BY fieldA) as previous_row_value + FROM tableA; + ``` + + example_idiomatic: | + ***To exclude the first row since it has no previous row*** + + ```mzsql + SELECT o1.order_date, o1.daily_total, + o2.daily_total as previous_daily_total + FROM orders_daily_totals o1, orders_daily_totals o2 + WHERE o1.order_date = o2.order_date + INTERVAL '1' DAY + ORDER BY order_date; + ``` + + ***To include the first row*** + + ```mzsql + SELECT o1.order_date, o1.daily_total, + o2.daily_total as previous_daily_total + FROM orders_daily_totals o1 + LEFT JOIN orders_daily_totals o2 + ON o1.order_date = o2.order_date + INTERVAL '1' DAY + ORDER BY order_date; + ``` + + example_anti_pattern: | + Avoid the use of [`LAG() OVER (ORDER BY ...)` window + function](/sql/functions/#lag) to access previous row's value if the order by + field increases in a regular pattern. + + ```nofmt + -- Anti-pattern. Includes the first row's value. -- + SELECT order_date, daily_total, + LAG(daily_total) OVER (ORDER BY order_date) as previous_daily_total + FROM orders_daily_totals; + ``` + + # Extra fields used by the per-topic page (lag.md) but not the chart. + extra_syntax_idiomatic_exclude: | + Use a self join that specifies an **equality match** on the lag's order by field + (e.g., `fieldA`). The order by field must increment in a regular pattern in + order to be represented by an equality condition (e.g., `WHERE t1.fieldA = + t2.fieldA + ...`). The + query *excludes* the first row in the results since it does not have a previous + row. + + {{< important >}} + + The idiomatic Materialize SQL applies only to those "lag over" queries whose + ordering can be represented by some **equality condition**. + + {{}} + + ```mzsql + -- Excludes the first row in the results -- + SELECT t1.fieldA, t2.fieldB as previous_row_value + FROM tableA t1, tableA t2 + WHERE t1.fieldA = t2.fieldA + ... -- or some other operand + ORDER BY fieldA; + ``` + + extra_syntax_idiomatic_include: | + Use a self [`LEFT JOIN/LEFT OUTER JOIN`](/sql/select/join/#left-outer-join) + (e.g., `FROM tableA t1 LEFT JOIN tableA t2`) that specifies an **equality + match** on the lag's order by field (e.g., `fieldA`). The order by field must + increment in a regular pattern in order to be represented by an equality + condition (e.g., `ON t1.fieldA = t2.fieldA + ...`). The + query *includes* the first row, returning `null` as its lag value. + + {{< important >}} + + The idiomatic Materialize SQL applies only to those "lag over" queries whose + ordering can be represented by some **equality condition**. + + {{}} + + ```mzsql + -- Includes the first row in the results -- + SELECT t1.fieldA, t2.fieldB as previous_row_value + FROM tableA t1 + LEFT JOIN tableA t2 + ON t1.fieldA = t2.fieldA + ... -- or some other operand + ORDER BY fieldA; + ``` + + extra_example_idiomatic_exclude: | + ```mzsql + -- Excludes the first row in results -- + SELECT o1.order_date, o1.daily_total, + o2.daily_total as previous_daily_total + FROM orders_daily_totals o1, orders_daily_totals o2 + WHERE o1.order_date = o2.order_date + INTERVAL '1' DAY + ORDER BY order_date; + ``` + + {{< important >}} + + The idiomatic Materialize SQL applies only to those "lag over" queries whose + ordering can be represented by some **equality condition**. + + {{}} + + extra_example_idiomatic_include: | + ```mzsql + -- Include the first row in results -- + SELECT o1.order_date, o1.daily_total, + o2.daily_total as previous_daily_total + FROM orders_daily_totals o1 + LEFT JOIN orders_daily_totals o2 + ON o1.order_date = o2.order_date + INTERVAL '1' DAY + ORDER BY order_date; + ``` + + {{< important >}} + + The idiomatic Materialize SQL applies only to those "lag over" queries whose + ordering can be represented by some **equality condition**. + + {{}} + + - name: lead + pattern_title: | + [Lead over (order by) whose ordering can be represented by some equality + condition.](/transform-data/idiomatic-materialize-sql/lead/) + + syntax_idiomatic: | + ***To exclude the last row since it has no next row*** + + ```mzsql + -- Excludes the last row in the results -- + SELECT t1.fieldA, t2.fieldB as next_row_value + FROM tableA t1, tableA t2 + WHERE t1.fieldA = t2.fieldA - ... -- or some other operand + ORDER BY fieldA; + ``` + + ***To include the last row*** + + ```mzsql + -- Includes the last row in the results -- + SELECT t1.fieldA, t2.fieldB as next_row_value + FROM tableA t1 + LEFT JOIN tableA t2 + ON t1.fieldA = t2.fieldA - ... -- or some other operand + ORDER BY fieldA; + ``` + + syntax_anti_pattern: | + Avoid the use of [`LEAD(fieldZ) OVER (ORDER BY ...)`](/sql/functions/#lead) + window function when the order by field increases in a regular pattern. + + ```nofmt + -- Anti-pattern. Avoid. -- + SELECT fieldA, ... + LEAD(fieldZ) OVER (ORDER BY fieldA) as next_row_value + FROM tableA; + ``` + + example_idiomatic: | + ***To exclude the last row since it has no next row*** + + ```mzsql + SELECT o1.order_date, o1.daily_total, + o2.daily_total as next_daily_total + FROM orders_daily_totals o1, orders_daily_totals o2 + WHERE o1.order_date = o2.order_date - INTERVAL '1' DAY + ORDER BY order_date; + ``` + + ***To include the last row*** + + ```mzsql + SELECT o1.order_date, o1.daily_total, + o2.daily_total as next_daily_total + FROM orders_daily_totals o1 + LEFT JOIN orders_daily_totals o2 + ON o1.order_date = o2.order_date - INTERVAL '1' DAY + ORDER BY order_date; + ``` + + example_anti_pattern: | + Avoid the use of [`LEAD() OVER (ORDER BY ...)` window + function](/sql/functions/#lead) to access next row's value if the order by + field increases in a regular pattern. + + ```nofmt + -- Anti-pattern. Includes the last row's value. -- + SELECT order_date, daily_total, + LEAD(daily_total) OVER (ORDER BY order_date) as next_daily_total + FROM orders_daily_totals; + ``` + + # Extra fields used by the per-topic page (lead.md) but not the chart. + extra_syntax_idiomatic_exclude: | + Use a self join that specifies an **equality match** on the lead's order by + field (e.g., `fieldA`). The order by field must increment in a regular pattern + in order to be represented by an equality condition (e.g., `WHERE t1.fieldA = + t2.fieldA - ...`). The query *excludes* the last row in the results since it + does not have a next row. + + {{< important >}} + + The idiomatic Materialize SQL applies only to those "lead over" queries whose + ordering can be represented by some **equality condition**. + + {{}} + + ```mzsql + -- Excludes the last row in the results -- + SELECT t1.fieldA, t2.fieldB as next_row_value + FROM tableA t1, tableA t2 + WHERE t1.fieldA = t2.fieldA - ... -- or some other operand + ORDER BY fieldA; + ``` + + extra_syntax_idiomatic_include: | + Use a self [`LEFT JOIN/LEFT OUTER JOIN`](/sql/select/join/#left-outer-join) + (e.g., `FROM tableA t1 LEFT JOIN tableA t2`) that specifies an **equality + match** on the lag's order by field (e.g., `fieldA`). The order by field must + increment in a regular pattern in order to be represented by an equality + condition (e.g., `ON t1.fieldA = t2.fieldA - ...`). The query *includes* the + last row, returning `null` as its lead value. + + {{< important >}} + + The idiomatic Materialize SQL applies only to those "lead over" queries whose + ordering can be represented by some **equality condition**. + + {{}} + + ```mzsql + -- Includes the last row in the response -- + SELECT t1.fieldA, t2.fieldB as next_row_value + FROM tableA t1 + LEFT JOIN tableA t2 + ON t1.fieldA = t2.fieldA - ... -- or some other operand + ORDER BY fieldA; + ``` + + extra_example_idiomatic_exclude: | + ```mzsql + -- Excludes the last row in results -- + SELECT o1.order_date, o1.daily_total, + o2.daily_total as next_daily_total + FROM orders_daily_totals o1, orders_daily_totals o2 + WHERE o1.order_date = o2.order_date - INTERVAL '1' DAY + ORDER BY order_date; + ``` + + {{< important >}} + + The idiomatic Materialize SQL applies only to those "lead over" queries whose + ordering can be represented by some **equality condition**. + + {{}} + + extra_example_idiomatic_include: | + ```mzsql + -- Include the last row in the results -- + SELECT o1.order_date, o1.daily_total, + o2.daily_total as next_daily_total + FROM orders_daily_totals o1 + LEFT JOIN orders_daily_totals o2 + ON o1.order_date = o2.order_date - INTERVAL '1' DAY + ORDER BY order_date; + ``` + + {{< important >}} + + The idiomatic Materialize SQL applies only to those "lead over" queries whose + ordering can be represented by some **equality condition**. + + {{}} diff --git a/doc/user/data/idiomatic_mzsql/toc_query_patterns.yml b/doc/user/data/idiomatic_mzsql/toc_query_patterns.yml index 22914fbf7b658..dac0012918a04 100644 --- a/doc/user/data/idiomatic_mzsql/toc_query_patterns.yml +++ b/doc/user/data/idiomatic_mzsql/toc_query_patterns.yml @@ -8,6 +8,11 @@ rows: Idiomatic Materialize: | [Use `UNNEST()` or `DISTINCT UNNEST()` to expand the values and join](/transform-data/idiomatic-materialize-sql/any/). + - Query Pattern: | + [`NOT IN ()` predicate](/transform-data/idiomatic-materialize-sql/not-in/) + Idiomatic Materialize: | + [Rewrite to `NOT EXISTS`, or filter out `NULL`s on both sides of the `NOT IN`](/transform-data/idiomatic-materialize-sql/not-in/). + - Query Pattern: | [`mz_now()` with date/time operators](/transform-data/idiomatic-materialize-sql/mz_now/#mz_now-expressions-to-calculate-past-or-future-timestamp) Idiomatic Materialize: | diff --git a/doc/user/data/sql_funcs.yml b/doc/user/data/sql_funcs.yml index 0628df329a635..c4b9de19ec2dd 100644 --- a/doc/user/data/sql_funcs.yml +++ b/doc/user/data/sql_funcs.yml @@ -559,8 +559,20 @@ description: "`true` if `s` returns zero rows." - signature: 'expression NOT IN(s: Query) -> bool' - description: "`s` must return exactly one column; `true` for each value in `expression` - if it does not match any elements of `s`." + description: | + `s` must return exactly one column; `true` for each value in `expression` + if it does not match any elements of `s`. + + {{< note >}} + + When evaluating a `WHERE fieldX NOT IN ()` predicate involving + possible `NULL` values for `fieldX` or ``, Materialize performs a + cross join between the outer relation and the subquery to preserve SQL + `NULL` semantics, which can significantly increase memory usage. If + possible, rewrite using [idiomatic Materialize + SQL](/transform-data/idiomatic-materialize-sql/not-in/). + + {{< /note >}} - signature: 'expression bool_op SOME(s: Query) -> bool' description: "`s` must return exactly one column; `true` if applying [bool_op](#boolean-operators) diff --git a/doc/user/layouts/shortcodes/idiomatic-sql/general-example-table.html b/doc/user/layouts/shortcodes/idiomatic-sql/general-example-table.html deleted file mode 100644 index 128320bb9db2d..0000000000000 --- a/doc/user/layouts/shortcodes/idiomatic-sql/general-example-table.html +++ /dev/null @@ -1,109 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - -
Idiomatic Materialize SQL
ANY() Equi-join condition - -***If no duplicates in the unnested field*** - -```mzsql --- sales_items.items contains no duplicates. -- - -WITH individual_sales_items AS -(SELECT unnest(items) as item, week_of FROM sales_items) -SELECT s.week_of, o.order_id, o.item, o.quantity -FROM orders o -JOIN individual_sales_items s ON o.item = s.item -WHERE date_trunc('week', o.order_date) = s.week_of; -``` - -***If duplicates exist in the unnested field*** - -```mzsql --- sales_items.items may contains duplicates -- - -WITH individual_sales_items AS -(SELECT DISTINCT unnest(items) as item, week_of FROM sales_items) -SELECT s.week_of, o.order_id, o.item, o.quantity -FROM orders o -JOIN individual_sales_items s ON o.item = s.item -WHERE date_trunc('week', o.order_date) = s.week_of -ORDER BY s.week_of, o.order_id, o.item, o.quantity -; -``` - -
mz_now() cannot be used with date/time operators - -```mzsql -SELECT * from orders -WHERE mz_now() > order_date + INTERVAL '5min' -; -``` - -
mz_now() cannot be used with ORs in materialized/indexed view definitions and SUBSCRIBE statements - -**Rewrite as `UNION ALL` with possible duplicates** - -```mzsql -CREATE MATERIALIZED VIEW forecast_completed_orders_duplicates_possible AS -SELECT item, quantity, status from orders -WHERE status = 'Shipped' -UNION ALL -SELECT item, quantity, status from orders -WHERE order_date + interval '30' minutes >= mz_now() -; -``` - -**Rewrite as UNION ALL that avoids duplicates across queries** - -```mzsql -CREATE MATERIALIZED VIEW forecast_completed_orders_deduplicated_union_all AS -SELECT item, quantity, status from orders -WHERE status = 'Shipped' -UNION ALL -SELECT item, quantity, status from orders -WHERE order_date + interval '30' minutes >= mz_now() -AND status != 'Shipped' -- Deduplicate by excluding those with status 'Shipped' -; -``` - -**Rewrite as UNION to deduplicate any and all duplicated results** - -```mzsql -CREATE MATERIALIZED VIEW forecast_completed_orders_deduplicated_results AS -SELECT item, quantity, status from orders -WHERE status = 'Shipped' -UNION -SELECT item, quantity, status from orders -WHERE order_date + interval '30' minutes >= mz_now() -; -``` - -
- - diff --git a/doc/user/layouts/shortcodes/idiomatic-sql/general-syntax-table.html b/doc/user/layouts/shortcodes/idiomatic-sql/general-syntax-table.html deleted file mode 100644 index 3f367a9c3348c..0000000000000 --- a/doc/user/layouts/shortcodes/idiomatic-sql/general-syntax-table.html +++ /dev/null @@ -1,67 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - -
Idiomatic Materialize SQL Pattern
ANY() Equi-join condition - -***If no duplicates in the unnested field*** - -```mzsql -WITH my_expanded_values AS -(SELECT UNNEST(array|list|map) AS fieldZ FROM tableB) -SELECT a.fieldA, ... -FROM tableA a -JOIN my_expanded_values t ON a.fieldZ = t.fieldZ -; -``` - -***If duplicates exist in the unnested field*** -```mzsql -WITH my_expanded_values AS -(SELECT DISTINCT UNNEST(array|list|map) AS fieldZ FROM tableB) -SELECT a.fieldA, ... -FROM tableA a -JOIN my_expanded_values t ON a.fieldZ = t.fieldZ -; -``` - -
mz_now() cannot be used with date/time operators -Rewrite the query expression; specifically, move the operation to the other side of the comparison. -
mz_now() cannot be used with ORs in materialized/indexed view definitions and SUBSCRIBE statements -Rewrite as UNION ALL or UNION, deduplicating as -necessary: - -
    -
  • In some cases, you may need to modify the conditions to deduplicate results -when using UNION ALL. For example, you might add the negation of -one input's condition to the other as a conjunction.
  • - -
  • In some cases, using UNION instead of UNION ALL -may suffice if the inputs do not contain other duplicates that need to be -retained.
  • - -
- -
diff --git a/doc/user/layouts/shortcodes/idiomatic-sql/window-functions-example-table.html b/doc/user/layouts/shortcodes/idiomatic-sql/window-functions-example-table.html deleted file mode 100644 index cbcb30a48bb92..0000000000000 --- a/doc/user/layouts/shortcodes/idiomatic-sql/window-functions-example-table.html +++ /dev/null @@ -1,159 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Idiomatic Materialize SQL
Top-K over partition
(K >= 1)
- -```mzsql -SELECT order_id, item, subtotal -FROM (SELECT DISTINCT order_id FROM orders_view) grp, - LATERAL (SELECT item, subtotal FROM orders_view - WHERE order_id = grp.order_id - ORDER BY subtotal DESC LIMIT 3) -- For Top 3 -ORDER BY order_id, subtotal DESC; -``` - -
Top-K over partition
(K = 1)
- -```mzsql -SELECT DISTINCT ON(order_id) order_id, item, subtotal -FROM orders_view -ORDER BY order_id, subtotal DESC; -- For Top 1 -``` - -
First value over partition
order by ...
- -```mzsql -SELECT o.order_id, minmax.lowest_price, minmax.highest_price, - o.item, - o.price, - o.price - minmax.lowest_price AS diff_lowest_price, - o.price - minmax.highest_price AS diff_highest_price -FROM orders_view o, - (SELECT order_id, - MIN(price) AS lowest_price, - MAX(price) AS highest_price - FROM orders_view - GROUP BY order_id) minmax -WHERE o.order_id = minmax.order_id -ORDER BY o.order_id, o.item; -``` - -
Last value over partition
order by...
range between unbounded preceding
and unbounded following
- -```mzsql -SELECT o.order_id, minmax.lowest_price, minmax.highest_price, - o.item, - o.price, - o.price - minmax.lowest_price AS diff_lowest_price, - o.price - minmax.highest_price AS diff_highest_price -FROM orders_view o, - (SELECT order_id, - MIN(price) AS lowest_price, - MAX(price) AS highest_price - FROM orders_view - GROUP BY order_id) minmax -WHERE o.order_id = minmax.order_id -ORDER BY o.order_id, o.item; -``` - -
- -Lag over (order by) whose ordering can be represented by some equality -condition. - - - -***If suppressing the first row since it has no previous row*** - -```mzsql -SELECT o1.order_date, o1.daily_total, - o2.daily_total as previous_daily_total -FROM orders_daily_totals o1, orders_daily_totals o2 -WHERE o1.order_date = o2.order_date + INTERVAL '1' DAY -ORDER BY order_date; -``` - -***To include the first row*** - -```mzsql -SELECT o1.order_date, o1.daily_total, - o2.daily_total as previous_daily_total -FROM orders_daily_totals o1 -LEFT JOIN orders_daily_totals o2 -ON o1.order_date = o2.order_date + INTERVAL '1' DAY -ORDER BY order_date; -``` - -
- -Lead over (order by) whose ordering can be represented by some equality -condition. - - - -***To suppress the last row since it has no next row*** - - ```mzsql - SELECT o1.order_date, o1.daily_total, - o2.daily_total as previous_daily_total - FROM orders_daily_totals o1, orders_daily_totals o2 - WHERE o1.order_date = o2.order_date - INTERVAL '1' DAY - ORDER BY order_date; - ``` - - ***To include the last row*** - - ```mzsql - SELECT o1.order_date, o1.daily_total, - o2.daily_total as previous_daily_total - FROM orders_daily_totals o1 - LEFT JOIN orders_daily_totals o2 - ON o1.order_date = o2.order_date - INTERVAL '1' DAY - ORDER BY order_date; - ``` - -
- - diff --git a/doc/user/layouts/shortcodes/idiomatic-sql/window-functions-syntax-table.html b/doc/user/layouts/shortcodes/idiomatic-sql/window-functions-syntax-table.html deleted file mode 100644 index b55a8dbe0bdf6..0000000000000 --- a/doc/user/layouts/shortcodes/idiomatic-sql/window-functions-syntax-table.html +++ /dev/null @@ -1,141 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Idiomatic Materialize SQL Pattern
Top-K over partition
(K >= 1)
- -```mzsql -SELECT fieldA, fieldB, ... -FROM (SELECT DISTINCT fieldA FROM tableA) grp, - LATERAL (SELECT fieldB, ... , fieldZ FROM tableA - WHERE fieldA = grp.fieldA - ORDER BY fieldZ ... LIMIT K) -- K is a number >= 1 -ORDER BY fieldA, fieldZ ... ; -``` - -
Top-K over partition
(K = 1)
- -```mzsql -SELECT DISTINCT ON(fieldA) fieldA, fieldB, ... -FROM tableA -ORDER BY fieldA, fieldZ ... -- Top-K where K is 1; -``` - -
First value over partition
order by ...
- -```mzsql -SELECT tableA.fieldA, tableA.fieldB, minmax.Z - FROM tableA, - (SELECT fieldA, - MIN(fieldZ) -- Or MAX() - FROM tableA - GROUP BY fieldA) minmax -WHERE tableA.fieldA = minmax.fieldA -ORDER BY fieldA ... ; -``` - -
Last value over partition
order by ...
range between unbounded preceding
and unbounded following
- -```mzsql -SELECT tableA.fieldA, tableA.fieldB, minmax.Z - FROM tableA, - (SELECT fieldA, - MAX(fieldZ) -- Or MIN() - FROM tableA - GROUP BY fieldA) minmax -WHERE tableA.fieldA = minmax.fieldA -ORDER BY fieldA ... ; -``` - -
- -Lag over (order by) whose ordering can be represented by some equality -condition. - - - -***To exclude the first row since it has no previous row*** - -```mzsql -SELECT t1.fieldA, t2.fieldB -FROM tableA t1, tableA t2 -WHERE t1.fieldA = t2.fieldA + ... -ORDER BY fieldA; -``` - -***To include the first row*** - -```mzsql -SELECT t1.fieldA, t2.fieldB -FROM tableA t1 -LEFT JOIN tableA t2 -ON t1.fieldA = t2.fieldA + ... -ORDER BY fieldA; -``` - -
- -Lead over (order by) whose ordering can be represented by some equality -condition. - - - -***To exclude the last row since it has no next row*** - -```mzsql -SELECT t1.fieldA, t2.fieldB -FROM tableA t1, tableA t2 -WHERE t1.fieldA = t2.fieldA - ... -ORDER BY fieldA; -``` - -***To include the last row*** - -```mzsql -SELECT t1.fieldA, t2.fieldB -FROM tableA t1 -LEFT JOIN tableA t2 -ON t1.fieldA = t2.fieldA - ... -ORDER BY fieldA; -``` - -
diff --git a/doc/user/layouts/shortcodes/include-from-yaml.html b/doc/user/layouts/shortcodes/include-from-yaml.html index 019353cffbe11..e6867b680c7ca 100644 --- a/doc/user/layouts/shortcodes/include-from-yaml.html +++ b/doc/user/layouts/shortcodes/include-from-yaml.html @@ -4,9 +4,15 @@ {{ $data = index $data . }} {{ end }} {{- $name := .Get "name" -}} +{{- $field := .Get "field" | default "content" -}} -{{- range $data -}} +{{- $rows := $data -}} +{{- if reflect.IsMap $data -}} + {{- $rows = $data.rows -}} +{{- end -}} + +{{- range $rows -}} {{- if eq .name $name -}} -{{ .content | $.Page.RenderString }} +{{ index . $field | $.Page.RenderString }} {{- end -}} {{- end -}} diff --git a/doc/user/layouts/shortcodes/include-from-yaml.skill.md b/doc/user/layouts/shortcodes/include-from-yaml.skill.md index 34dbafc6a6f10..5fbdaa3a19e10 100644 --- a/doc/user/layouts/shortcodes/include-from-yaml.skill.md +++ b/doc/user/layouts/shortcodes/include-from-yaml.skill.md @@ -5,8 +5,15 @@ {{- $data = index $data . -}} {{- end }} {{- $name := .Get "name" -}} -{{- range $data -}} +{{- $field := .Get "field" | default "content" -}} + +{{- $rows := $data -}} +{{- if reflect.IsMap $data -}} + {{- $rows = $data.rows -}} +{{- end -}} + +{{- range $rows -}} {{- if eq .name $name -}} -{{- .content | $.Page.RenderString -}} +{{- index . $field | $.Page.RenderString -}} {{- end -}} {{- end -}} diff --git a/doc/user/layouts/shortcodes/yaml-table.html b/doc/user/layouts/shortcodes/yaml-table.html index 8bcfaf77b6c6d..85758c3651eb9 100644 --- a/doc/user/layouts/shortcodes/yaml-table.html +++ b/doc/user/layouts/shortcodes/yaml-table.html @@ -1,15 +1,31 @@ -{{ $pathArray := split (lower (.Get "data")) "/" }} -{{ $noHeader := .Get "noHeader" }} -{{ $data := $.Site.Data }} +{{- $pathArray := split (lower (.Get "data")) "/" -}} +{{- $noHeader := .Get "noHeader" -}} +{{- $columnsParam := .Get "columns" -}} +{{- $data := $.Site.Data -}} -{{ range $pathArray }} - {{ $data = index $data . }} -{{ end }} +{{- range $pathArray -}} + {{- $data = index $data . -}} +{{- end -}} + +{{- $columns := $data.columns -}} +{{- if $columnsParam -}} + {{- $wanted := split $columnsParam "," -}} + {{- $filtered := slice -}} + {{- range $wanted -}} + {{- $name := strings.TrimSpace . -}} + {{- range $data.columns -}} + {{- if eq .column $name -}} + {{- $filtered = $filtered | append . -}} + {{- end -}} + {{- end -}} + {{- end -}} + {{- $columns = $filtered -}} +{{- end -}} {{ if $noHeader }} {{ partial "yaml-tables/generic-table-no-header.html" (dict "rows" $data.rows "columns" - $data.columns) }} + $columns) }} {{ else }} {{ partial "yaml-tables/generic-table.html" (dict "rows" $data.rows "columns" - $data.columns) }} + $columns) }} {{ end }} diff --git a/doc/user/layouts/shortcodes/yaml-table.skill.md b/doc/user/layouts/shortcodes/yaml-table.skill.md index 346fc56867d0c..ec5a0c76d1097 100644 --- a/doc/user/layouts/shortcodes/yaml-table.skill.md +++ b/doc/user/layouts/shortcodes/yaml-table.skill.md @@ -1,14 +1,31 @@ {{- /* Skill output: yaml-table renders as markdown table with shortcode processing */ -}} {{- $pathArray := split (lower (.Get "data")) "/" -}} {{- $noHeader := .Get "noHeader" -}} +{{- $columnsParam := .Get "columns" -}} {{- $data := $.Site.Data -}} {{- range $pathArray }} {{- $data = index $data . -}} {{- end }} + +{{- $columns := $data.columns -}} +{{- if $columnsParam -}} + {{- $wanted := split $columnsParam "," -}} + {{- $filtered := slice -}} + {{- range $wanted -}} + {{- $name := strings.TrimSpace . -}} + {{- range $data.columns -}} + {{- if eq .column $name -}} + {{- $filtered = $filtered | append . -}} + {{- end -}} + {{- end -}} + {{- end -}} + {{- $columns = $filtered -}} +{{- end -}} + {{- $fields := slice -}} {{- $headers := slice -}} {{- $separators := slice -}} -{{- range $data.columns -}} +{{- range $columns -}} {{- $headers = $headers | append (.header | default .column) -}} {{- $fields = $fields | append (dict "field" .column) -}} {{- $separators = $separators | append "---" -}}