Skip to content

Commit d7a31aa

Browse files
authored
feat(ir): more flexible dereferencing support for join right hand side (ibis-project#8992)
Enables to use fields from parent tables of the join right hand side instead of enforcing to use the same exact table: ```py t1 = ibis.table(name="t1", schema={"a": "int64", "b": "string"}) t2 = ibis.table(name="t2", schema={"c": "int64", "d": "string"}) t3 = t2.mutate(e=t2.c + 1) joined = t1.join(t3, [t1.a == t2.c]) # here we use t2.c instead of t3.c ``` Identify ambiguous cases and raise an error, like the following case: ```py t.join(t, [t.a == t.a]) ``` depends on: - ibis-project#9043 - ibis-project#9041 fixes ibis-project#8581
1 parent e04c3e5 commit d7a31aa

42 files changed

Lines changed: 563 additions & 424 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

docs/_code/setup_penguins.qmd

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
```{python}
22
import ibis # <1>
33
import ibis.selectors as s # <1>
4+
from ibis import _
45
56
ibis.options.interactive = True # <2>
67

docs/how-to/analytics/basics.qmd

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -58,20 +58,20 @@ t.mutate(bill_length_cm=t["bill_length_mm"] / 10).relocate(
5858
Use the `.join()` method to join data:
5959

6060
```{python}
61-
t.join(t, t["species"] == t["species"], how="left_semi")
61+
t.join(t, ["species"], how="left_semi")
6262
```
6363

6464
## Combining it all together
6565

6666
We can use [the underscore to chain expressions together](./chain_expressions.qmd).
6767

6868
```{python}
69-
t.join(t, t["species"] == t["species"], how="left_semi").filter(
70-
ibis._["species"] != "Adelie"
69+
t.join(t, ["species"], how="left_semi").filter(
70+
_.species != "Adelie"
7171
).group_by(["species", "island"]).aggregate(
72-
avg_bill_length=ibis._["bill_length_mm"].mean()
72+
avg_bill_length=_.bill_length_mm.mean()
7373
).order_by(
74-
ibis._["avg_bill_length"].desc()
74+
_.avg_bill_length.desc()
7575
)
7676
```
7777

ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_projection_fusion_only_peeks_at_immediate_parent/out.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,5 +15,5 @@ SELECT
1515
`t3`.`val`,
1616
`t3`.`XYZ`
1717
FROM `t1` AS `t3`
18-
INNER JOIN `t1` AS `t5`
18+
INNER JOIN `t1` AS `t4`
1919
ON TRUE

ibis/backends/bigquery/tests/unit/test_compiler.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -274,7 +274,8 @@ class MockBackend(ibis.backends.bigquery.Backend):
274274
table = ops.SQLQueryResult("select * from t", schema, ibis_client).to_expr()
275275
for _ in range(num_joins): # noqa: F402
276276
table = table.mutate(dummy=ibis.literal(""))
277-
table = table.left_join(table, ["dummy"])[[table]]
277+
table_ = table.view()
278+
table = table.left_join(table_, ["dummy"])[[table_]]
278279

279280
start = time.time()
280281
table.compile()

ibis/backends/clickhouse/tests/snapshots/test_select/test_join_self_reference/out.sql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,5 +13,5 @@ SELECT
1313
"t1"."year",
1414
"t1"."month"
1515
FROM "functional_alltypes" AS "t1"
16-
INNER JOIN "functional_alltypes" AS "t3"
17-
ON "t1"."id" = "t3"."id"
16+
INNER JOIN "functional_alltypes" AS "t2"
17+
ON "t1"."id" = "t2"."id"
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
WITH `t9` AS (SELECT EXTRACT(year FROM `t8`.`odate`) AS `year`, COUNT(*) AS `CountStar()` FROM (SELECT `t6`.`c_custkey`, `t6`.`c_name`, `t6`.`c_address`, `t6`.`c_nationkey`, `t6`.`c_phone`, `t6`.`c_acctbal`, `t6`.`c_mktsegment`, `t6`.`c_comment`, `t4`.`r_name` AS `region`, `t7`.`o_totalprice`, CAST(`t7`.`o_orderdate` AS TIMESTAMP) AS `odate` FROM `tpch_region` AS `t4` INNER JOIN `tpch_nation` AS `t5` ON `t4`.`r_regionkey` = `t5`.`n_regionkey` INNER JOIN `tpch_customer` AS `t6` ON `t6`.`c_nationkey` = `t5`.`n_nationkey` INNER JOIN `tpch_orders` AS `t7` ON `t7`.`o_custkey` = `t6`.`c_custkey`) AS `t8` GROUP BY 1) SELECT `t11`.`year`, `t11`.`CountStar()` AS `pre_count`, `t13`.`CountStar()` AS `post_count` FROM `t9` AS `t11` INNER JOIN `t9` AS `t13` ON `t11`.`year` = `t13`.`year`
1+
WITH `t9` AS (SELECT EXTRACT(year FROM `t8`.`odate`) AS `year`, COUNT(*) AS `CountStar()` FROM (SELECT `t6`.`c_custkey`, `t6`.`c_name`, `t6`.`c_address`, `t6`.`c_nationkey`, `t6`.`c_phone`, `t6`.`c_acctbal`, `t6`.`c_mktsegment`, `t6`.`c_comment`, `t4`.`r_name` AS `region`, `t7`.`o_totalprice`, CAST(`t7`.`o_orderdate` AS TIMESTAMP) AS `odate` FROM `tpch_region` AS `t4` INNER JOIN `tpch_nation` AS `t5` ON `t4`.`r_regionkey` = `t5`.`n_regionkey` INNER JOIN `tpch_customer` AS `t6` ON `t6`.`c_nationkey` = `t5`.`n_nationkey` INNER JOIN `tpch_orders` AS `t7` ON `t7`.`o_custkey` = `t6`.`c_custkey`) AS `t8` GROUP BY 1) SELECT `t11`.`year`, `t11`.`CountStar()` AS `pre_count`, `t12`.`CountStar()` AS `post_count` FROM `t9` AS `t11` INNER JOIN `t9` AS `t12` ON `t11`.`year` = `t12`.`year`

ibis/backends/impala/tests/snapshots/test_sql/test_join_with_nested_or_condition/out.sql

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,12 @@ SELECT
22
`t1`.`a`,
33
`t1`.`b`
44
FROM `t` AS `t1`
5-
INNER JOIN `t` AS `t3`
6-
ON `t1`.`a` = `t3`.`a`
5+
INNER JOIN `t` AS `t2`
6+
ON `t1`.`a` = `t2`.`a`
77
AND (
88
(
9-
`t1`.`a` <> `t3`.`b`
9+
`t1`.`a` <> `t2`.`b`
1010
) OR (
11-
`t1`.`b` <> `t3`.`a`
11+
`t1`.`b` <> `t2`.`a`
1212
)
1313
)

ibis/backends/impala/tests/snapshots/test_sql/test_join_with_nested_xor_condition/out.sql

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,13 @@ SELECT
22
`t1`.`a`,
33
`t1`.`b`
44
FROM `t` AS `t1`
5-
INNER JOIN `t` AS `t3`
6-
ON `t1`.`a` = `t3`.`a`
5+
INNER JOIN `t` AS `t2`
6+
ON `t1`.`a` = `t2`.`a`
77
AND (
88
(
9-
`t1`.`a` <> `t3`.`b` OR `t1`.`b` <> `t3`.`a`
9+
`t1`.`a` <> `t2`.`b` OR `t1`.`b` <> `t2`.`a`
1010
)
1111
AND NOT (
12-
`t1`.`a` <> `t3`.`b` AND `t1`.`b` <> `t3`.`a`
12+
`t1`.`a` <> `t2`.`b` AND `t1`.`b` <> `t2`.`a`
1313
)
1414
)

ibis/backends/impala/tests/snapshots/test_sql/test_limit_cte_extract/out.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,5 +19,5 @@ SELECT
1919
`t3`.`year`,
2020
`t3`.`month`
2121
FROM `t1` AS `t3`
22-
INNER JOIN `t1` AS `t5`
22+
INNER JOIN `t1` AS `t4`
2323
ON TRUE

ibis/backends/impala/tests/snapshots/test_sql/test_nested_join_base/out.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ WITH `t1` AS (
77
1
88
)
99
SELECT
10-
`t5`.`uuid`,
10+
`t3`.`uuid`,
1111
`t3`.`CountStar(t)`
1212
FROM (
1313
SELECT

0 commit comments

Comments
 (0)