Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 27 additions & 8 deletions deepdiff/deephash.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@
convert_item_or_items_into_compiled_regexes_else_none,
get_id, type_is_subclass_of_type_group, type_in_type_group,
number_to_string, datetime_normalize, KEY_TO_VAL_STR,
get_truncate_datetime, dict_, add_root_to_paths, PydanticBaseModel)
get_truncate_datetime, dict_, add_root_to_paths, PydanticBaseModel,
separate_wildcard_and_exact_paths)

from deepdiff.base import Base

Expand Down Expand Up @@ -189,6 +190,7 @@ def __init__(self,
custom_operators: Optional[List[Any]] = None,
default_timezone: Union[datetime.timezone, "BaseTzInfo"] = datetime.timezone.utc,
encodings: Optional[List[str]] = None,
exclude_glob_paths: Optional[List[Any]] = None,
exclude_obj_callback: Optional[Callable[[Any, str], bool]] = None,
exclude_paths: Optional[PathType] = None,
exclude_regex_paths: Optional[RegexType] = None,
Expand All @@ -205,6 +207,7 @@ def __init__(self,
ignore_type_in_groups: Any = None,
ignore_type_subclasses: bool = False,
ignore_uuid_types: bool = False,
include_glob_paths: Optional[List[Any]] = None,
include_paths: Optional[PathType] = None,
number_format_notation: str = "f",
number_to_string_func: Optional[NumberToStringFunc] = None,
Expand All @@ -231,8 +234,14 @@ def __init__(self,
exclude_types = set() if exclude_types is None else set(exclude_types)
self.exclude_types_tuple = tuple(exclude_types) # we need tuple for checking isinstance
self.ignore_repetition = ignore_repetition
self.exclude_paths = add_root_to_paths(convert_item_or_items_into_set_else_none(exclude_paths))
self.include_paths = add_root_to_paths(convert_item_or_items_into_set_else_none(include_paths))
_exclude_set = convert_item_or_items_into_set_else_none(exclude_paths)
_exclude_exact, _exclude_globs = separate_wildcard_and_exact_paths(_exclude_set)
self.exclude_paths = add_root_to_paths(_exclude_exact)
self.exclude_glob_paths = exclude_glob_paths or _exclude_globs
_include_set = convert_item_or_items_into_set_else_none(include_paths)
_include_exact, _include_globs = separate_wildcard_and_exact_paths(_include_set)
self.include_paths = add_root_to_paths(_include_exact)
self.include_glob_paths = include_glob_paths or _include_globs
self.exclude_regex_paths = convert_item_or_items_into_compiled_regexes_else_none(exclude_regex_paths)
self.hasher = default_hasher if hasher is None else hasher
self.hashes[UNPROCESSED_KEY] = [] # type: ignore
Expand Down Expand Up @@ -461,11 +470,21 @@ def _skip_this(self, obj: Any, parent: str) -> bool:
skip = False
if self.exclude_paths and parent in self.exclude_paths:
skip = True
if self.include_paths and parent != 'root':
if parent not in self.include_paths:
skip = True
for prefix in self.include_paths:
if parent.startswith(prefix):
elif self.exclude_glob_paths and any(gp.match(parent) for gp in self.exclude_glob_paths):
skip = True
if (self.include_paths or self.include_glob_paths) and parent != 'root':
skip = True
if self.include_paths:
if parent in self.include_paths:
skip = False
else:
for prefix in self.include_paths:
if parent.startswith(prefix):
skip = False
break
if skip and self.include_glob_paths:
for gp in self.include_glob_paths:
if gp.match_or_is_ancestor(parent):
skip = False
break
elif self.exclude_regex_paths and any(
Expand Down
116 changes: 86 additions & 30 deletions deepdiff/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@
TEXT_VIEW, TREE_VIEW, DELTA_VIEW, COLORED_VIEW, COLORED_COMPACT_VIEW,
detailed__dict__, add_root_to_paths,
np, get_truncate_datetime, dict_, CannotCompare, ENUM_INCLUDE_KEYS,
PydanticBaseModel, Opcode, SetOrdered, ipranges)
PydanticBaseModel, Opcode, SetOrdered, ipranges,
separate_wildcard_and_exact_paths)
from deepdiff.serialization import SerializationMixin
from deepdiff.distance import DistanceMixin, logarithmic_similarity
from deepdiff.model import (
Expand Down Expand Up @@ -102,7 +103,9 @@ def _report_progress(_stats: Dict[str, Any], progress_logger: Callable[[str], No
DEEPHASH_PARAM_KEYS = (
'exclude_types',
'exclude_paths',
'exclude_glob_paths',
'include_paths',
'include_glob_paths',
'exclude_regex_paths',
'hasher',
'significant_digits',
Expand Down Expand Up @@ -198,6 +201,10 @@ def __init__(self,
_shared_parameters: Optional[Dict[str, Any]]=None,
**kwargs):
super().__init__()
# Defaults for glob path attributes — needed for non-root instances
# that may receive _parameters without these keys.
self.exclude_glob_paths = None
self.include_glob_paths = None
if kwargs:
raise ValueError((
"The following parameter(s) are not valid: %s\n"
Expand Down Expand Up @@ -245,8 +252,12 @@ def __init__(self,
ignore_type_subclasses=ignore_type_subclasses,
ignore_uuid_types=ignore_uuid_types)
self.report_repetition = report_repetition
self.exclude_paths = add_root_to_paths(convert_item_or_items_into_set_else_none(exclude_paths))
self.include_paths = add_root_to_paths(convert_item_or_items_into_set_else_none(include_paths))
_exclude_set = convert_item_or_items_into_set_else_none(exclude_paths)
_exclude_exact, self.exclude_glob_paths = separate_wildcard_and_exact_paths(_exclude_set)
self.exclude_paths = add_root_to_paths(_exclude_exact)
_include_set = convert_item_or_items_into_set_else_none(include_paths)
_include_exact, self.include_glob_paths = separate_wildcard_and_exact_paths(_include_set)
self.include_paths = add_root_to_paths(_include_exact)
self.exclude_regex_paths = convert_item_or_items_into_compiled_regexes_else_none(exclude_regex_paths)
self.exclude_types = set(exclude_types) if exclude_types else None
self.exclude_types_tuple = tuple(exclude_types) if exclude_types else None # we need tuple for checking isinstance
Expand Down Expand Up @@ -402,7 +413,7 @@ def _group_by_sort_key(x):
self.__dict__.clear()

def _get_deephash_params(self):
result = {key: self._parameters[key] for key in DEEPHASH_PARAM_KEYS}
result = {key: self._parameters.get(key) for key in DEEPHASH_PARAM_KEYS}
result['ignore_repetition'] = not self.report_repetition
result['number_to_string_func'] = self.number_to_string
return result
Expand All @@ -421,6 +432,8 @@ def _report_result(self, report_type, change_level, local_tree=None):
"""

if not self._skip_this(change_level):
if self._skip_report_for_include_glob(change_level):
return
change_level.report_type = report_type
tree = self.tree if local_tree is None else local_tree
tree[report_type].add(change_level)
Expand All @@ -440,10 +453,33 @@ def custom_report_result(self, report_type, level, extra_info=None):
"""

if not self._skip_this(level):
if self._skip_report_for_include_glob(level):
return
level.report_type = report_type
level.additional[CUSTOM_FIELD] = extra_info
self.tree[report_type].add(level)

def _skip_report_for_include_glob(self, level):
"""When include_glob_paths is set, _skip_this allows ancestors through for traversal.
This method does a stricter check at report time: only report if the path
actually matches a glob pattern or is a descendant of a matching path,
or if it already matches an exact include_path."""
if not self.include_glob_paths:
return False
level_path = level.path()
# If exact include_paths already matched, don't skip
if self.include_paths:
if level_path in self.include_paths:
return False
for prefix in self.include_paths:
if prefix in level_path:
return False
# Check glob patterns: match or descendant
for gp in self.include_glob_paths:
if gp.match_or_is_descendant(level_path):
return False
return True

@staticmethod
def _dict_from_slots(object: Any) -> Dict[str, Any]:
def unmangle(attribute: str) -> str:
Expand Down Expand Up @@ -531,11 +567,21 @@ def _skip_this(self, level: Any) -> bool:
skip = False
if self.exclude_paths and level_path in self.exclude_paths:
skip = True
if self.include_paths and level_path != 'root':
if level_path not in self.include_paths:
skip = True
for prefix in self.include_paths:
if prefix in level_path or level_path in prefix:
elif self.exclude_glob_paths and any(gp.match(level_path) for gp in self.exclude_glob_paths):
skip = True
if not skip and (self.include_paths or self.include_glob_paths) and level_path != 'root':
skip = True
if self.include_paths:
if level_path in self.include_paths:
skip = False
else:
for prefix in self.include_paths:
if prefix in level_path or level_path in prefix:
skip = False
break
if skip and self.include_glob_paths:
for gp in self.include_glob_paths:
if gp.match_or_is_ancestor(level_path):
skip = False
break
elif self.exclude_regex_paths and any(
Expand Down Expand Up @@ -565,28 +611,34 @@ def _skip_this(self, level: Any) -> bool:

def _skip_this_key(self, level: Any, key: Any) -> bool:
# if include_paths is not set, than treet every path as included
if self.include_paths is None:
return False
if "{}['{}']".format(level.path(), key) in self.include_paths:
return False
if level.path() in self.include_paths:
# matches e.g. level+key root['foo']['bar']['veg'] include_paths ["root['foo']['bar']"]
if self.include_paths is None and self.include_glob_paths is None:
return False
for prefix in self.include_paths:
if "{}['{}']".format(level.path(), key) in prefix:
# matches as long the prefix is longer than this object key
# eg.: level+key root['foo']['bar'] matches prefix root['foo']['bar'] from include paths
# level+key root['foo'] matches prefix root['foo']['bar'] from include_paths
# level+key root['foo']['bar'] DOES NOT match root['foo'] from include_paths This needs to be handled afterwards
key_path = "{}['{}']".format(level.path(), key)
if self.include_paths:
if key_path in self.include_paths:
return False
# check if a higher level is included as a whole (=without any sublevels specified)
# matches e.g. level+key root['foo']['bar']['veg'] include_paths ["root['foo']"]
# but does not match, if it is level+key root['foo']['bar']['veg'] include_paths ["root['foo']['bar']['fruits']"]
up = level.up
while up is not None:
if up.path() in self.include_paths:
if level.path() in self.include_paths:
# matches e.g. level+key root['foo']['bar']['veg'] include_paths ["root['foo']['bar']"]
return False
up = up.up
for prefix in self.include_paths:
if key_path in prefix:
# matches as long the prefix is longer than this object key
# eg.: level+key root['foo']['bar'] matches prefix root['foo']['bar'] from include paths
# level+key root['foo'] matches prefix root['foo']['bar'] from include_paths
# level+key root['foo']['bar'] DOES NOT match root['foo'] from include_paths This needs to be handled afterwards
return False
# check if a higher level is included as a whole (=without any sublevels specified)
# matches e.g. level+key root['foo']['bar']['veg'] include_paths ["root['foo']"]
# but does not match, if it is level+key root['foo']['bar']['veg'] include_paths ["root['foo']['bar']['fruits']"]
up = level.up
while up is not None:
if up.path() in self.include_paths:
return False
up = up.up
if self.include_glob_paths:
for gp in self.include_glob_paths:
if gp.match_or_is_ancestor(key_path):
return False
return True

def _get_clean_to_keys_mapping(self, keys: Any, level: Any) -> Dict[Any, Any]:
Expand Down Expand Up @@ -680,9 +732,13 @@ def _diff_dict(
t_keys_removed = t1_keys - t_keys_intersect

if self.threshold_to_diff_deeper:
if self.exclude_paths:
if self.exclude_paths or self.exclude_glob_paths:
t_keys_union = {f"{level.path()}[{repr(key)}]" for key in (t2_keys | t1_keys)}
t_keys_union -= self.exclude_paths
if self.exclude_paths:
t_keys_union -= self.exclude_paths
if self.exclude_glob_paths:
t_keys_union = {k for k in t_keys_union
if not any(gp.match(k) for gp in self.exclude_glob_paths)}
t_keys_union_len = len(t_keys_union)
else:
t_keys_union_len = len(t2_keys | t1_keys)
Expand Down
24 changes: 24 additions & 0 deletions deepdiff/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,30 @@ def add_root_to_paths(paths: Optional[Iterable[str]]) -> Optional[SetOrdered]:
return result


def separate_wildcard_and_exact_paths(paths):
"""Separate a set of paths into exact paths and wildcard pattern paths.

Returns ``(exact_set_or_none, wildcard_list_or_none)``.
Wildcard paths must start with ``root``; a ``ValueError`` is raised otherwise.
"""
if not paths:
return None, None
from deepdiff.path import path_has_wildcard, compile_glob_paths
exact = set()
wildcards = []
for path in paths:
if path_has_wildcard(path):
if not path.startswith('root'):
raise ValueError(
"Wildcard paths must start with 'root'. Got: {}".format(path))
wildcards.append(path)
else:
exact.add(path)
exact_result = exact if exact else None
glob_result = compile_glob_paths(wildcards) if wildcards else None
return exact_result, glob_result


RE_COMPILED_TYPE = type(re.compile(''))


Expand Down
Loading
Loading