Merge pull request #88 from TUDelftGeodesy/add_api_doc

SarahAlidoost · web-flow · commit 93b4d477ac48 · 2025-06-10T10:14:06.000+02:00
Add api doc
diff --git a/docs/api_reference.md b/docs/api_reference.md
@@ -0,0 +1,25 @@
+---
+hide:
+- navigation
+---
+#
+
+Here is the API reference for the `stmtools` package.
+
+## **Space Time Matrix module**
+
+::: stmtools.stm.SpaceTimeMatrix
+
+## **I/O module**
+
+::: stmtools._io.from_csv
+
+## **Metadata schema module**
+
+::: stmtools.metadata.STMMetaData
+
+## **Utility**
+
+::: stmtools.utils.crop
+
+::: stmtools.utils.monotonic_coords
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -3,10 +3,10 @@ repo_url: https://github.com/tudelftgeodesy/stmtools/
 repo_name: STM Tools
 
 nav:
-  - Getting Started: 
+  - Getting Started:
     - About STM Tools: index.md
     - Installation: setup.md
-  - Usage: 
+  - Usage:
     - Initiate an STM: stm_init.md
     - Operations on STM: operations.md
     - Ordering an STM: order.md
@@ -17,6 +17,7 @@ nav:
     - Contributing Guidelines: CONTRIBUTING.md
     - Code of Conduct: CODE_OF_CONDUCT.md
   - Change Log: CHANGELOG.md
+  - API Reference: api_reference.md
 
 
 theme:
@@ -44,7 +45,7 @@ theme:
     - navigation.tabs
     - navigation.tabs.sticky
     - content.code.copy
-  
+
 plugins:
 - mkdocs-jupyter:
       include_source: True
@@ -53,16 +54,31 @@ plugins:
     handlers:
       python:
         options:
-          docstring_style: google
+          docstring_style: numpy
           docstring_options:
-            ignore_init_summary: no
-          merge_init_into_class: yes
-          show_submodules: no
+            ignore_init_summary: true
+          merge_init_into_class: true
+          docstring_section_style: list
+          show_submodules: true
+          show_root_heading: true
+          show_source: true
+          heading_level: 3
+          relative_crossrefs: true
+          parameter_headings: false
+          separate_signature: true
+          show_bases: true
+          show_signature_annotations: true
+          show_symbol_type_heading: true
+          signature_crossrefs: true
+          summary: true
+          backlinks: tree
+          scoped_crossrefs: true
 
 markdown_extensions:
   - pymdownx.highlight:
       anchor_linenums: true
   - pymdownx.superfences
+  - pymdownx.highlight
 
 extra:
   generator: false
diff --git a/pyproject.toml b/pyproject.toml
@@ -152,3 +152,6 @@ line-ending = "auto"
 
 [tool.ruff.per-file-ignores]
 "tests/**" = ["D"]
+
+[tool.ruff.pydocstyle]
+convention = "numpy"
diff --git a/stmtools/_io.py b/stmtools/_io.py
@@ -25,47 +25,47 @@ def from_csv(
     """Initiate an STM instance from a csv file.
 
     The specified csv file will be loaded using `dask.dataframe.read_csv` with a fixed blocksize.
-
     The columns of the csv file will be classified into coordinates, and data variables.
-
     This classification is performed by Regular Expression (RE) pattern matching according to
-      three variables: `space_pattern`, `spacetime_pattern` and `coords_cols`.
-
+    three variables: `space_pattern`, `spacetime_pattern` and `coords_cols`.
     The following assumptions are made to the column names of the csv file:
-        1. All columns with space-only attributes share the same RE pattern in the column names.
-          E.g. Latitude, Longitude and height columns are named as "pnt_lat", "pnt_lon" and
-          "pnt_height", sharing the same RE pattern "^pnt_";
-        2. Per space-time attribute, a common RE pattern is shared by all columns. E.g. for the
-          time-series of amplitude data, the column names are "a_20100101", "a_20100110",
-          "a_20100119" ..., where "^a_" is the common RE pattern;
-        3. There is no temporal-only (i.e. 1-row attribute) attribute present in the csv file.
-
-    `from_csv` does not retrieve time stamps based on column names. The `time` coordinate of
-      the output STM will be a monotonic integer series starting from 0.
-
-    Args:
-    ----
-        file (str | Path): Path to the csv file.
-        space_pattern (str, optional): RE pattern to match space attribute columns.
-          Defaults to "^pnt_".
-        spacetime_pattern (dict | None, optional): A dictionay mapping RE patterns of each
-          space-time attribute to corresponding variable names. Defaults to None, which means
-          the following map will be applied:
-          {"^d_": "deformation", "^a_": "amplitude", "^h2ph_": "h2ph"}.
-        coords_cols (list | dict, optional): List of columns to be used as space coordinates.
-          When `coords_cols` is a dictionary, a reaming will be performed per coordinates.
-          Defaults to None, then the following renaming will be performed:
-          "{"pnt_lat": "lat", "pnt_lon": "lon"}"
-        output_chunksize (dict | None, optional): Chunksize of the output. Defaults to None,
-          then the size of the first chunk in the DaskDataFrame will be used, up-rounding to
-          the next 5000.
-        blocksize (int | str | None, optional): Blocksize to load the csv.
-          Defaults to 200e6 (in bytes). See the documentation of
-          [dask.dataframe.read_csv](https://docs.dask.org/en/stable/generated/dask.dataframe.read_csv.html)
-
-    Returns:
+
+    1. All columns with space-only attributes share the same RE pattern in the column names.
+        E.g. Latitude, Longitude and height columns are named as "pnt_lat", "pnt_lon" and
+        "pnt_height", sharing the same RE pattern "^pnt_";
+    2. Per space-time attribute, a common RE pattern is shared by all columns. E.g. for the
+        time-series of amplitude data, the column names are "a_20100101", "a_20100110",
+        "a_20100119" ..., where "^a_" is the common RE pattern;
+    3. There is no temporal-only (i.e. 1-row attribute) attribute present in the csv file.
+
+    Parameters
+    ----------
+    file: str | Path
+        Path to the csv file.
+    space_pattern: str, optional
+        RE pattern to match space attribute columns. Defaults to "^pnt_".
+    spacetime_pattern: dict | None, optional
+        A dictionay mapping RE patterns of each space-time attribute to
+        corresponding variable names. Defaults to None, which means the
+        following map will be applied: {"^d_": "deformation", "^a_":
+        "amplitude", "^h2ph_": "h2ph"}.
+    coords_cols: list | dict, optional
+        List of columns to be used as space coordinates. When `coords_cols` is a
+        dictionary, a reaming will be performed per coordinates. Defaults to
+        None, then the following renaming will be performed: "{"pnt_lat": "lat",
+        "pnt_lon": "lon"}"
+    output_chunksize: dict | None, optional
+        Chunksize of the output. Defaults to None, then the size of the first
+        chunk in the DaskDataFrame will be used, up-rounding to the next 5000.
+    blocksize: int | str | None, optional
+        Blocksize to load the csv. Defaults to 200e6 (in bytes). See the
+        documentation of
+        [dask.dataframe.read_csv](https://docs.dask.org/en/stable/generated/dask.dataframe.read_csv.html)
+
+    Returns
     -------
-        xr.Dataset: Output STM instance
+    xr.Dataset
+        Output STM instance
 
     """
     # Load csv as Dask DataFrame
diff --git a/stmtools/stm.py b/stmtools/stm.py
@@ -48,18 +48,15 @@ def add_metadata(self, metadata):
     def regulate_dims(self, space_label=None, time_label=None):
         """Regulate the dimension of a Space-Time Matrix instance.
 
-        An STM should have two dimensions: "space" and "time".
-
-        If the inupt argument `space_label` or `time_label` is specified,
-        and that dimension exists, the function will rename that dimension to "space" or "time".
-
-        If either `space_label` or `time_label` are None, a "space" or "time" dimension with
-        size 1 will be created.
-
-        If both `space_label` or `time_label` are None. Data variables will also be regulated.
-
-        For data variables with a name started with "pnt_", they are regared as
-        point-only attribute and will not be affected by "time" dimension expansion.
+        An STM should have two dimensions: `"space"` and `"time"`. If the inupt
+        argument `space_label` or `time_label` is specified, and that dimension
+        exists, the function will rename that dimension to "space" or "time". If
+        either `space_label` or `time_label` are None, a "space" or "time"
+        dimension with size 1 will be created. If both `space_label` or
+        `time_label` are None. Data variables will also be regulated. For data
+        variables with a name started with "pnt_", they are regared as
+        point-only attribute and will not be affected by "time" dimension
+        expansion.
 
         Parameters
         ----------
@@ -113,16 +110,23 @@ def subset(self, method: str, **kwargs):
         ----------
         method : str
             Method of subsetting. Choose from "threshold", "density" and "polygon".
+
             - threshold: select all space entries with a threshold criterion, e.g.
+                ```python
                 data_xr.stm.subset(method="threshold", var="thres", threshold='>1')
+                ```
             - density: select one point in every [dx, dy] cell, e.g.
+                ```python
                 data_xr.stm.subset(method="density", dx=0.1, dy=0.1)
+                ```
             - polygon: select all space entries inside a given polygon, e.g.
+                ```python
                 data_xr.stm.subset(method='polygon', polygon=path_polygon_file)
-                or
+                # or
                 import geopandas as gpd
                 polygon = gpd.read_file(path_polygon_file)
                 data_xr.stm.subset(method='polygon', polygon=polygon)
+                ```
         **kwargs:
             - when method="threshold": data variable "var" and  threshold "threshold"
             - when method="density": x and y density size: "dx" and "dy"
@@ -187,11 +191,9 @@ def enrich_from_polygon(self, polygon, fields, xlabel="lon", ylabel="lat"):
         """Enrich the SpaceTimeMatrix from one or more attribute fields of a (multi-)polygon.
 
         Each attribute in fields will be assigned as a data variable to the STM.
-
-        If a point of the STM falls into the given polygon, the value of the specified field will
-        be added.
-
-        For space entries outside the (multi-)polygon, the value will be None.
+        If a point of the STM falls into the given polygon, the value of the
+        specified field will be added. For space entries outside the
+        (multi-)polygon, the value will be None.
 
         Parameters
         ----------
@@ -200,9 +202,9 @@ def enrich_from_polygon(self, polygon, fields, xlabel="lon", ylabel="lat"):
         fields : str or list of str
             Field name(s) in the (multi-)polygon for enrichment
         xlabel : str, optional
-            Name of the x-coordinates of the STM, by default "lon"
+            Name of the x-coordinates of the STM, by default `"lon"`
         ylabel : str, optional
-            Name of the y-coordinates of the STM, by default "lat"
+            Name of the y-coordinates of the STM, by default `"lat"`
 
         Returns
         -------
@@ -332,7 +334,7 @@ def register_datatype(self, keys: str | Iterable, datatype: DataVarTypes):
         keys : Union[str, Iterable]
             Keys of the data variables to register
         datatype : str in DataVarTypes
-            String of the datatype. Choose from ["obsData", "auxData", "pntAttrib", "epochAttrib"].
+            String of the datatype. Choose from `["obsData", "auxData", "pntAttrib", "epochAttrib"]`.
 
         Returns
         -------
@@ -353,12 +355,12 @@ def register_datatype(self, keys: str | Iterable, datatype: DataVarTypes):
     def get_order(self, xlabel="azimuth", ylabel="range", xscale=1.0, yscale=1.0):
         """Compute an ordering on the points based on coordinates with xlabel and ylabel.
 
-        This order is stored in a (new) point attribute "order".
-
-        Note that this ordering is most intuitive for integer coordinates (e.g. pixel coordinates).
-        For float coordinates (e.g. lat-lon), the coordinates should be scaled to determine the
-        resolution of the ordering: only the whole-number part influences the order.
-        While coordinates could also be offset, this has limited effect on the relative order.
+        This order is stored in a (new) point attribute "order". Note that this
+        ordering is most intuitive for integer coordinates (e.g. pixel
+        coordinates). For float coordinates (e.g. lat-lon), the coordinates
+        should be scaled to determine the resolution of the ordering: only the
+        whole-number part influences the order. While coordinates could also be
+        offset, this has limited effect on the relative order.
 
         Parameters
         ----------
@@ -388,13 +390,14 @@ def get_order(self, xlabel="azimuth", ylabel="range", xscale=1.0, yscale=1.0):
     def reorder(self, xlabel="azimuth", ylabel="range", xscale=1.0, yscale=1.0):
         """Compute and apply an ordering on the points based on coordinates with xlabel and ylabel.
 
-        Note that this ordering is most intuitive for integer coordinates (e.g. pixel coordinates).
-        For float coordinates (e.g. lat-lon), the coordinates should be scaled to determine the
-        resolution of the ordering: only the whole-number part influences the order.
-        While coordinates could also be offset, this has limited effect on the relative order.
-
-        Also note that reordering a dataset may be an expensive operation. Because it is applied
-        lazily, this preformance cost will only manifest once the elements are evaluated.
+        Note that this ordering is most intuitive for integer coordinates (e.g.
+        pixel coordinates). For float coordinates (e.g. lat-lon), the
+        coordinates should be scaled to determine the resolution of the
+        ordering: only the whole-number part influences the order. While
+        coordinates could also be offset, this has limited effect on the
+        relative order. Also note that reordering a dataset may be an expensive
+        operation. Because it is applied lazily, this preformance cost will only
+        manifest once the elements are evaluated.
 
         Parameters
         ----------
@@ -411,7 +414,7 @@ def reorder(self, xlabel="azimuth", ylabel="range", xscale=1.0, yscale=1.0):
 
         """
         self._obj = self.get_order(xlabel, ylabel, xscale, yscale)
-        
+
         # Sorting may split up the chunks, which may interfere with later operations
         # so we immediately restore the chunk sizes.
         chunks = {"space": self._obj.chunksizes["space"][0]}
@@ -440,8 +443,8 @@ def enrich_from_dataset(self,
         """Enrich the SpaceTimeMatrix from one or more fields of a dataset.
 
         scipy is required. if dataset is raster, it uses
-        _enrich_from_raster_block to do interpolation using method. if dataset
-        is point, it uses _enrich_from_points_block to find the nearest points
+        `_enrich_from_raster_block` to do interpolation using method. if dataset
+        is point, it uses `_enrich_from_points_block` to find the nearest points
         in space and time using Euclidean distance.
 
         Parameters
@@ -451,8 +454,9 @@ def enrich_from_dataset(self,
         fields : str or list of str
             Field name(s) in the dataset for enrichment
         method : str, optional
-            Method of interpolation, by default "nearest", see
-            https://docs.xarray.dev/en/stable/generated/xarray.Dataset.interp.html
+            Method of interpolation, by default `"nearest"`, see [xarray
+            interpolation
+            methods](https://docs.xarray.dev/en/stable/generated/xarray.Dataset.interp.html)
 
         Returns
         -------