Skip to content

Commit f926995

Browse files
kszucscpcloud
andauthored
feat(common): introduce FrozenOrderedDict (ibis-project#9081)
Continuation of ibis-project#9068 by adding `FrozenOrderedDict` which calculates its hash from `tuple(self.items()` rather than `frozenset(self.items())` and also checks for item order during equality checks. Closes ibis-project#9063. --------- Co-authored-by: Phillip Cloud <417981+cpcloud@users.noreply.github.com>
1 parent f5d9084 commit f926995

9 files changed

Lines changed: 136 additions & 20 deletions

File tree

ibis/backends/tests/test_client.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1608,3 +1608,29 @@ def test_json_to_pyarrow(con):
16081608
if val is not None
16091609
}
16101610
assert result == expected
1611+
1612+
1613+
@pytest.mark.notyet(["mssql"], raises=PyODBCProgrammingError)
1614+
@pytest.mark.notyet(
1615+
["risingwave", "exasol"],
1616+
raises=com.UnsupportedOperationError,
1617+
reason="no temp table support",
1618+
)
1619+
@pytest.mark.notyet(
1620+
["impala", "trino"], raises=NotImplementedError, reason="no temp table support"
1621+
)
1622+
@pytest.mark.notyet(
1623+
["druid"], raises=NotImplementedError, reason="doesn't support create_table"
1624+
)
1625+
@pytest.mark.notyet(
1626+
["flink"], raises=com.IbisError, reason="no persistent temp table support"
1627+
)
1628+
def test_schema_with_caching(alltypes):
1629+
t1 = alltypes.limit(5).select("bigint_col", "string_col")
1630+
t2 = alltypes.limit(5).select("string_col", "bigint_col")
1631+
1632+
pt1 = t1.cache()
1633+
pt2 = t2.cache()
1634+
1635+
assert pt1.schema() == t1.schema()
1636+
assert pt2.schema() == t2.schema()

ibis/common/collections.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -288,7 +288,9 @@ def __hash__(self) -> int:
288288
return self.__precomputed_hash__
289289

290290
def __setitem__(self, key: K, value: V) -> None:
291-
raise TypeError("'FrozenDict' object does not support item assignment")
291+
raise TypeError(
292+
f"'{self.__class__.__name__}' object does not support item assignment"
293+
)
292294

293295
def __setattr__(self, name: str, _: Any) -> None:
294296
raise TypeError(f"Attribute {name!r} cannot be assigned to frozendict")
@@ -297,6 +299,25 @@ def __reduce__(self) -> tuple:
297299
return (self.__class__, (dict(self),))
298300

299301

302+
@public
303+
class FrozenOrderedDict(FrozenDict[K, V]):
304+
def __init__(self, *args, **kwargs):
305+
super(FrozenDict, self).__init__(*args, **kwargs)
306+
hashable = tuple(self.items())
307+
object.__setattr__(self, "__precomputed_hash__", hash(hashable))
308+
309+
def __hash__(self) -> int:
310+
return self.__precomputed_hash__
311+
312+
def __eq__(self, other: Any) -> bool:
313+
if not isinstance(other, collections.abc.Mapping):
314+
return NotImplemented
315+
return tuple(self.items()) == tuple(other.items())
316+
317+
def __ne__(self, other: Any) -> bool:
318+
return not self == other
319+
320+
300321
class RewindableIterator(Iterator[V]):
301322
"""Iterator that can be rewound to a checkpoint.
302323

ibis/common/tests/test_collections.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
Collection,
99
Container,
1010
FrozenDict,
11+
FrozenOrderedDict,
1112
Iterable,
1213
Iterator,
1314
Mapping,
@@ -429,6 +430,41 @@ def test_frozendict():
429430
assert_pickle_roundtrip(d)
430431

431432

433+
def test_frozenordereddict():
434+
d = FrozenOrderedDict({"a": 1, "b": 2, "c": 3})
435+
e = FrozenOrderedDict(a=1, b=2, c=3)
436+
f = FrozenOrderedDict(a=1, b=2, c=3, d=4)
437+
g = FrozenOrderedDict(a=1, c=3, b=2)
438+
h = FrozenDict(a=1, b=2, c=3)
439+
440+
assert isinstance(d, Mapping)
441+
assert isinstance(d, collections.abc.Mapping)
442+
443+
assert d == e
444+
assert d != f
445+
assert e == h
446+
assert h == e
447+
assert e != g
448+
assert g != e
449+
assert g != h
450+
assert h != g
451+
452+
assert d["a"] == 1
453+
assert d["b"] == 2
454+
455+
msg = "'FrozenOrderedDict' object does not support item assignment"
456+
with pytest.raises(TypeError, match=msg):
457+
d["a"] = 2
458+
with pytest.raises(TypeError, match=msg):
459+
d["d"] = 4
460+
461+
assert hash(FrozenOrderedDict(a=1, b=2)) == hash(FrozenOrderedDict(a=1, b=2))
462+
assert hash(FrozenOrderedDict(a=1, b=2)) != hash(FrozenOrderedDict(b=2, a=1))
463+
assert hash(FrozenOrderedDict(a=1, b=2)) != hash(d)
464+
465+
assert_pickle_roundtrip(d)
466+
467+
432468
def test_rewindable_iterator():
433469
it = RewindableIterator(range(10))
434470
assert next(it) == 0

ibis/expr/datatypes/core.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from typing_extensions import Self, get_args, get_origin
1515

1616
from ibis.common.annotations import attribute
17-
from ibis.common.collections import FrozenDict, MapSet
17+
from ibis.common.collections import FrozenOrderedDict, MapSet
1818
from ibis.common.dispatch import lazy_singledispatch
1919
from ibis.common.grounds import Concrete, Singleton
2020
from ibis.common.patterns import Coercible, CoercionError
@@ -823,7 +823,7 @@ def _pretty_piece(self) -> str:
823823
class Struct(Parametric, MapSet):
824824
"""Structured values."""
825825

826-
fields: FrozenDict[str, DataType]
826+
fields: FrozenOrderedDict[str, DataType]
827827

828828
scalar = "StructScalar"
829829
column = "StructColumn"

ibis/expr/datatypes/tests/test_core.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -421,6 +421,18 @@ def test_struct_set_operations():
421421
assert d > c
422422

423423

424+
def test_struct_equality():
425+
st1 = dt.Struct({"a": dt.int64, "b": dt.string})
426+
st2 = dt.Struct({"a": dt.int64, "b": dt.string})
427+
st3 = dt.Struct({"b": dt.string, "a": dt.int64})
428+
st4 = dt.Struct({"a": dt.int64, "b": dt.string, "c": dt.float64})
429+
430+
assert st1 == st2
431+
assert st1 != st3
432+
assert st1 != st4
433+
assert st3 != st2
434+
435+
424436
def test_singleton_null():
425437
assert dt.null is dt.Null()
426438

ibis/expr/operations/relations.py

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
import ibis.expr.datashape as ds
1111
import ibis.expr.datatypes as dt
1212
from ibis.common.annotations import attribute
13-
from ibis.common.collections import FrozenDict
13+
from ibis.common.collections import FrozenDict, FrozenOrderedDict
1414
from ibis.common.exceptions import IbisTypeError, IntegrityError, RelationError
1515
from ibis.common.grounds import Concrete
1616
from ibis.common.patterns import Between, InstanceOf
@@ -41,7 +41,7 @@ def __coerce__(cls, value):
4141

4242
@property
4343
@abstractmethod
44-
def values(self) -> FrozenDict[str, Value]:
44+
def values(self) -> FrozenOrderedDict[str, Value]:
4545
"""A mapping of column names to expressions which build up the relation.
4646
4747
This attribute is heavily used in rewrites as well as during field
@@ -59,13 +59,13 @@ def schema(self) -> Schema:
5959
...
6060

6161
@property
62-
def fields(self) -> FrozenDict[str, Column]:
62+
def fields(self) -> FrozenOrderedDict[str, Column]:
6363
"""A mapping of column names to fields of the relation.
6464
6565
This calculated property shouldn't be overridden in subclasses since it
6666
is mostly used for convenience.
6767
"""
68-
return FrozenDict({k: Field(self, k) for k in self.schema})
68+
return FrozenOrderedDict({k: Field(self, k) for k in self.schema})
6969

7070
def to_expr(self):
7171
from ibis.expr.types import Table
@@ -110,7 +110,7 @@ def _check_integrity(values, allowed_parents):
110110
@public
111111
class Project(Relation):
112112
parent: Relation
113-
values: FrozenDict[str, NonSortKey[Unaliased[Value]]]
113+
values: FrozenOrderedDict[str, NonSortKey[Unaliased[Value]]]
114114

115115
def __init__(self, parent, values):
116116
_check_integrity(values.values(), {parent})
@@ -152,7 +152,7 @@ def schema(self):
152152
# TODO(kszucs): remove in favor of View
153153
@public
154154
class SelfReference(Reference):
155-
values = FrozenDict()
155+
values = FrozenOrderedDict()
156156

157157

158158
@public
@@ -187,7 +187,7 @@ class JoinLink(Node):
187187
class JoinChain(Relation):
188188
first: Reference
189189
rest: VarTuple[JoinLink]
190-
values: FrozenDict[str, Unaliased[Value]]
190+
values: FrozenOrderedDict[str, Unaliased[Value]]
191191

192192
def __init__(self, first, rest, values):
193193
allowed_parents = {first}
@@ -259,8 +259,8 @@ class Limit(Simple):
259259
@public
260260
class Aggregate(Relation):
261261
parent: Relation
262-
groups: FrozenDict[str, Unaliased[Column]]
263-
metrics: FrozenDict[str, Unaliased[Scalar]]
262+
groups: FrozenOrderedDict[str, Unaliased[Column]]
263+
metrics: FrozenOrderedDict[str, Unaliased[Scalar]]
264264

265265
def __init__(self, parent, groups, metrics):
266266
_check_integrity(groups.values(), {parent})
@@ -273,7 +273,7 @@ def __init__(self, parent, groups, metrics):
273273

274274
@attribute
275275
def values(self):
276-
return FrozenDict({**self.groups, **self.metrics})
276+
return FrozenOrderedDict({**self.groups, **self.metrics})
277277

278278
@attribute
279279
def schema(self):
@@ -285,7 +285,7 @@ class Set(Relation):
285285
left: Relation
286286
right: Relation
287287
distinct: bool = False
288-
values = FrozenDict()
288+
values = FrozenOrderedDict()
289289

290290
def __init__(self, left, right, **kwargs):
291291
# convert to dictionary first, to get key-unordered comparison semantics
@@ -321,7 +321,7 @@ class Difference(Set):
321321
@public
322322
class PhysicalTable(Relation):
323323
name: str
324-
values = FrozenDict()
324+
values = FrozenOrderedDict()
325325

326326

327327
@public
@@ -356,7 +356,7 @@ class SQLQueryResult(Relation):
356356
query: str
357357
schema: Schema
358358
source: Any
359-
values = FrozenDict()
359+
values = FrozenOrderedDict()
360360

361361

362362
@public
@@ -378,12 +378,12 @@ class SQLStringView(Relation):
378378
child: Relation
379379
query: str
380380
schema: Schema
381-
values = FrozenDict()
381+
values = FrozenOrderedDict()
382382

383383

384384
@public
385385
class DummyTable(Relation):
386-
values: FrozenDict[str, Value]
386+
values: FrozenOrderedDict[str, Value]
387387

388388
@attribute
389389
def schema(self):

ibis/expr/schema.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
import ibis.expr.datatypes as dt
77
from ibis.common.annotations import attribute
8-
from ibis.common.collections import FrozenDict, MapSet
8+
from ibis.common.collections import FrozenOrderedDict, MapSet
99
from ibis.common.dispatch import lazy_singledispatch
1010
from ibis.common.exceptions import InputTypeError, IntegrityError
1111
from ibis.common.grounds import Concrete
@@ -19,7 +19,7 @@
1919
class Schema(Concrete, Coercible, MapSet):
2020
"""An ordered mapping of str -> [datatype](./datatypes.qmd), used to hold a [Table](./expression-tables.qmd#ibis.expr.tables.Table)'s schema."""
2121

22-
fields: FrozenDict[str, dt.DataType]
22+
fields: FrozenOrderedDict[str, dt.DataType]
2323
"""A mapping of [](`str`) to
2424
[`DataType`](./datatypes.qmd#ibis.expr.datatypes.DataType)
2525
objects representing the type of each column."""

ibis/expr/tests/test_newrels.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1711,3 +1711,12 @@ def test_mutate_ambiguty_check_not_too_strict():
17111711
values={"id": first.id, "v": first.v, "v2": first.id},
17121712
)
17131713
assert second.op() == expected
1714+
1715+
1716+
def test_projections_with_different_field_order_are_unequal():
1717+
t = ibis.table({"a": "int64", "b": "string"}, name="t")
1718+
1719+
t1 = t.select(a=1, b=2)
1720+
t2 = t.select(b=2, a=1)
1721+
1722+
assert not t1.equals(t2)

ibis/expr/tests/test_schema.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,18 @@ def test_schema_mapping_api():
203203
assert tuple(s.items()) == tuple(zip(s.names, s.types))
204204

205205

206+
def test_schema_equality():
207+
s1 = sch.schema({"a": "int64", "b": "string"})
208+
s2 = sch.schema({"a": "int64", "b": "string"})
209+
s3 = sch.schema({"b": "string", "a": "int64"})
210+
s4 = sch.schema({"a": "int64", "b": "int64", "c": "string"})
211+
212+
assert s1 == s2
213+
assert s1 != s3
214+
assert s1 != s4
215+
assert s3 != s2
216+
217+
206218
class BarSchema:
207219
a: int
208220
b: str

0 commit comments

Comments
 (0)