-
Notifications
You must be signed in to change notification settings - Fork 154
Checks in readers/writers to enforce correct types for event_name and event_id container and elements
#951
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
12 commits
Select commit
Hold shift + click to select a range
b2fdbd6
update Impact reader(writer) to warn(fail) if event_name not all str
spjuhel 09a0ee0
Renames dfr to imp_df for consistency with from_csv()
spjuhel cafeb7c
implements a checker/caster methods
spjuhel 87ad969
updates test to comply with new checks
spjuhel 82f6421
correct column access style
spjuhel bc86758
Merge branch 'develop' into feature/event_name_handling
spjuhel 5ecb3c0
Merge remote-tracking branch 'origin/develop-white' into feature/even…
ca71118
applied black to branch
77113f1
Merge branch 'develop-black' into feature/event_name_handling
d080fef
merge develop into branch
bb5f855
Merge branch 'develop' into feature/event_name_handling
emanuel-schmid a7bb5f2
fix mergin mistake and re-lint
emanuel-schmid File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -24,6 +24,8 @@ | |
| import itertools | ||
| import logging | ||
| import pathlib | ||
| import warnings | ||
| from collections.abc import Collection | ||
| from typing import Any, Callable, Dict, Optional, Union | ||
|
|
||
| import h5py | ||
|
|
@@ -177,6 +179,8 @@ def from_raster( | |
| files_fraction = [files_fraction] | ||
| if not attrs: | ||
| attrs = {} | ||
| else: | ||
| attrs = cls._check_and_cast_attrs(attrs) | ||
| if not band: | ||
| band = [1] | ||
| if files_fraction is not None and len(files_intensity) != len(files_fraction): | ||
|
|
@@ -889,10 +893,135 @@ def vshape(array): | |
| **ident | ||
| ) | ||
|
|
||
| hazard_kwargs = cls._check_and_cast_attrs(hazard_kwargs) | ||
|
|
||
| # Done! | ||
| LOGGER.debug("Hazard successfully loaded. Number of events: %i", num_events) | ||
| return cls(centroids=centroids, intensity=intensity_matrix, **hazard_kwargs) | ||
|
|
||
| @staticmethod | ||
| def _check_and_cast_attrs(attrs: Dict[str, Any]) -> Dict[str, Any]: | ||
| """Check the validity of the hazard attributes given and cast to correct type if required and possible. | ||
|
|
||
| The current purpose is to check that event_name is a list of string | ||
| (and convert to string otherwise), although other checks and casting could be included here in the future. | ||
|
|
||
| Parameters | ||
| ---------- | ||
|
|
||
| attrs : dict | ||
| Attributes for a new Hazard object | ||
|
|
||
| Returns | ||
| ------- | ||
|
|
||
| attrs : dict | ||
| Attributes checked for type validity and casted otherwise (only event_name at the moment). | ||
|
|
||
| Warns | ||
| ----- | ||
|
|
||
| UserWarning | ||
| Warns the user if any value casting happens. | ||
| """ | ||
|
|
||
| def _check_and_cast_container( | ||
| attr_value: Any, expected_container: Collection | ||
| ) -> Any: | ||
| """Check if the attribute is of the expected container type and cast if necessary. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| attr_value : any | ||
| The current value of the attribute. | ||
|
|
||
| expected_container : type | ||
| The expected type of the container (e.g., list, np.ndarray). | ||
|
|
||
| Returns | ||
| ------- | ||
| attr_value : any | ||
| The value cast to the expected container type, if needed. | ||
| """ | ||
| if not isinstance(attr_value, expected_container): | ||
| warnings.warn( | ||
| f"Value should be of type {expected_container}. Casting it.", | ||
| UserWarning, | ||
| ) | ||
| # Attempt to cast to the expected container type | ||
| if expected_container is list: | ||
| return list(attr_value) | ||
| elif expected_container is np.ndarray: | ||
| return np.array(attr_value) | ||
| else: | ||
| raise TypeError(f"Unsupported container type: {expected_container}") | ||
| return attr_value | ||
|
|
||
| def _check_and_cast_elements( | ||
| attr_value: Any, expected_dtype: Union[Any, None] | ||
| ) -> Any: | ||
| """Check if the elements of the container are of the expected dtype and cast if necessary, | ||
| while preserving the original container type. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| attr_value : any | ||
| The current value of the attribute (a container). | ||
|
|
||
| expected_dtype : type or None | ||
| The expected type of the elements within the container. If None, no casting is done. | ||
|
|
||
| Returns | ||
| ------- | ||
| attr_value : any | ||
| The value with elements cast to the expected type, preserving the original container type. | ||
| """ | ||
| if expected_dtype is None: | ||
| # No dtype enforcement required | ||
| return attr_value | ||
|
|
||
| container_type = type(attr_value) # Preserve the original container type | ||
|
|
||
| # Perform type checking and casting of elements | ||
| if isinstance(attr_value, (list, np.ndarray)): | ||
| if not all(isinstance(val, expected_dtype) for val in attr_value): | ||
| warnings.warn( | ||
| f"Not all values are of type {expected_dtype}. Casting values.", | ||
| UserWarning, | ||
| ) | ||
| casted_values = [expected_dtype(val) for val in attr_value] | ||
| # Return the casted values in the same container type | ||
| if container_type is list: | ||
| return casted_values | ||
| elif container_type is np.ndarray: | ||
| return np.array(casted_values) | ||
| else: | ||
| raise TypeError(f"Unsupported container type: {container_type}") | ||
| else: | ||
| raise TypeError( | ||
| f"Expected a container (e.g., list or ndarray), got {type(attr_value)} instead." | ||
| ) | ||
|
|
||
| return attr_value | ||
|
|
||
| ## This should probably be defined as a CONSTANT? | ||
| attrs_to_check = {"event_name": (list, str), "event_id": (np.ndarray, None)} | ||
|
Comment on lines
+1007
to
+1008
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 agreed. CONSTANT dict seems sensible. |
||
|
|
||
| for attr_name, (expected_container, expected_dtype) in attrs_to_check.items(): | ||
| attr_value = attrs.get(attr_name) | ||
|
|
||
| if attr_value is not None: | ||
| # Check and cast the container type | ||
| attr_value = _check_and_cast_container(attr_value, expected_container) | ||
|
|
||
| # Check and cast the element types (if applicable) | ||
| attr_value = _check_and_cast_elements(attr_value, expected_dtype) | ||
|
|
||
| # Update the attrs dictionary with the modified value | ||
| attrs[attr_name] = attr_value | ||
|
|
||
| return attrs | ||
|
|
||
| @staticmethod | ||
| def _attrs_to_kwargs(attrs: Dict[str, Any], num_events: int) -> Dict[str, Any]: | ||
| """Transform attributes to init kwargs or use default values | ||
|
|
@@ -986,7 +1115,9 @@ def from_excel(cls, file_name, var_names=None, haz_type=None): | |
| centroids = Centroids._legacy_from_excel( | ||
| file_name, var_names=var_names["col_centroids"] | ||
| ) | ||
| hazard_kwargs.update(cls._read_att_excel(file_name, var_names, centroids)) | ||
| attrs = cls._read_att_excel(file_name, var_names, centroids) | ||
| attrs = cls._check_and_cast_attrs(attrs) | ||
| hazard_kwargs.update(attrs) | ||
| except KeyError as var_err: | ||
| raise KeyError("Variable not in Excel file: " + str(var_err)) from var_err | ||
|
|
||
|
|
@@ -1071,6 +1202,9 @@ def write_hdf5(self, file_name, todense=False): | |
| with h5py.File(file_name, "w") as hf_data: | ||
| str_dt = h5py.special_dtype(vlen=str) | ||
| for var_name, var_val in self.__dict__.items(): | ||
| if var_name == "event_name": | ||
| if not all((isinstance(val, str) for val in var_val)): | ||
| raise TypeError("'event_name' must be a list of strings") | ||
| if var_name == "centroids": | ||
| # Centroids have their own write_hdf5 method, | ||
| # which is invoked at the end of this method (s.b.) | ||
|
|
@@ -1172,6 +1306,7 @@ def from_hdf5(cls, file_name): | |
| else: | ||
| hazard_kwargs[var_name] = hf_data.get(var_name) | ||
| hazard_kwargs["centroids"] = Centroids.from_hdf5(file_name) | ||
| hazard_kwargs = cls._check_and_cast_attrs(hazard_kwargs) | ||
| # Now create the actual object we want to return! | ||
| return cls(**hazard_kwargs) | ||
|
|
||
|
|
||
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
👍 good choice. even though in case of
attr_valuebeing and.array,np.issubtye(attr_value, expected_type)would be infinitely faster but make the code more complex with nothing to be gained but a couple of seconds if the array is huge.