Skip to content
16 changes: 14 additions & 2 deletions datafusion/functions-window/src/cume_dist.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,20 @@ define_udwf_and_expr!(
/// CumeDist calculates the cume_dist in the window function with order by
#[user_doc(
doc_section(label = "Ranking Functions"),
description = "Relative rank of the current row: (number of rows preceding or peer with current row) / (total rows).",
syntax_example = "cume_dist()"
description = "Relative rank of the current row: (number of rows preceding or peer with the current row) / (total rows).",
syntax_example = "cume_dist()",
sql_example = r#"-- Example usage of the cume_dist window function:
SELECT salary,
cume_dist() OVER (ORDER BY salary) AS cume_dist
FROM employees;
+--------+-----------+
| salary | cume_dist |
+--------+-----------+
| 30000 | 0.33 |
| 50000 | 0.67 |
| 70000 | 1.00 |
+--------+-----------+
"#
)]
#[derive(Debug)]
pub struct CumeDist {
Expand Down
43 changes: 38 additions & 5 deletions datafusion/functions-window/src/nth_value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -160,16 +160,49 @@ fn get_last_value_doc() -> &'static Documentation {
static NTH_VALUE_DOCUMENTATION: LazyLock<Documentation> = LazyLock::new(|| {
Documentation::builder(
DOC_SECTION_ANALYTICAL,
"Returns value evaluated at the row that is the nth row of the window \
frame (counting from 1); null if no such row.",
"Returns the value evaluated at the nth row of the window frame \
(counting from 1). Returns NULL if no such row exists.",
"nth_value(expression, n)",
)
.with_argument(
"expression",
"The name the column of which nth \
value to retrieve",
"The column from which to retrieve the nth value.",
)
.with_argument(
"n",
"Integer. Specifies the row number (starting from 1) in the window frame.",
)
.with_sql_example(
r#"```sql
-- Sample employees table:
CREATE TABLE employees (id INT, salary INT);
INSERT INTO employees (id, salary) VALUES
(1, 30000),
(2, 40000),
(3, 50000),
(4, 60000),
(5, 70000);

-- Example usage of nth_value:
SELECT nth_value(salary, 2) OVER (
ORDER BY salary
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS nth_value
FROM employees;
```

```text
+-----------+
| nth_value |
+-----------+
| 40000 |
| 40000 |
| 40000 |
| 40000 |
| 40000 |
+-----------+
```"#,
)
.with_argument("n", "Integer. Specifies the n in nth")
.build()
});

Expand Down
54 changes: 50 additions & 4 deletions docs/source/user-guide/sql/window_functions.md
Original file line number Diff line number Diff line change
Expand Up @@ -160,12 +160,26 @@ All [aggregate functions](aggregate_functions.md) can be used as window function

### `cume_dist`

Relative rank of the current row: (number of rows preceding or peer with current row) / (total rows).
Relative rank of the current row: (number of rows preceding or peer with the current row) / (total rows).

```sql
cume_dist()
```

#### Example

-- Example usage of the cume_dist window function:
SELECT salary,
cume_dist() OVER (ORDER BY salary) AS cume_dist
FROM employees;
+--------+-----------+
| salary | cume_dist |
+--------+-----------+
| 30000 | 0.33 |
| 50000 | 0.67 |
| 70000 | 1.00 |
+--------+-----------+

### `dense_rank`

Returns the rank of the current row without gaps. This function ranks rows in a dense manner, meaning consecutive ranks are assigned even for identical values.
Expand Down Expand Up @@ -272,13 +286,45 @@ lead(expression, offset, default)

### `nth_value`

Returns value evaluated at the row that is the nth row of the window frame (counting from 1); null if no such row.
Returns the value evaluated at the nth row of the window frame (counting from 1). Returns NULL if no such row exists.

```sql
nth_value(expression, n)
```

#### Arguments

- **expression**: The name the column of which nth value to retrieve
- **n**: Integer. Specifies the n in nth
- **expression**: The column from which to retrieve the nth value.
- **n**: Integer. Specifies the row number (starting from 1) in the window frame.

#### Example

```sql
-- Sample employees table:
CREATE TABLE employees (id INT, salary INT);
INSERT INTO employees (id, salary) VALUES
(1, 30000),
(2, 40000),
(3, 50000),
(4, 60000),
(5, 70000);

-- Example usage of nth_value:
SELECT nth_value(salary, 2) OVER (
ORDER BY salary
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS nth_value
FROM employees;
```

```text
+-----------+
| nth_value |
+-----------+
| 40000 |
| 40000 |
| 40000 |
| 40000 |
| 40000 |
+-----------+
```