Add parameter dtype and index_col

seisman · seisman · commit 9f678446569b · 2024-03-26T14:51:30.000+08:00
diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py
@@ -1747,6 +1747,8 @@ def virtualfile_to_dataset(
         vfname: str,
         output_type: Literal["pandas", "numpy", "file"] = "pandas",
         column_names: list[str] | None = None,
+        dtype: type | dict[str, type] | None = None,
+        index_col: str | int | None = None,
     ) -> pd.DataFrame | np.ndarray | None:
         """
         Output a tabular dataset stored in a virtual file to a different format.
@@ -1766,6 +1768,11 @@ def virtualfile_to_dataset(
             - ``"file"`` means the result was saved to a file and will return ``None``.
         column_names
             The column names for the :class:`pandas.DataFrame` output.
+        dtype
+            Data type for the columns of the :class:`pandas.DataFrame` output. Can be a
+            single type for all columns or a dictionary mapping column names to types.
+        index_col
+            Column to set as the index of the :class:`pandas.DataFrame` output.
 
         Returns
         -------
@@ -1855,7 +1862,9 @@ def virtualfile_to_dataset(
 
         # Read the virtual file as a GMT dataset and convert to pandas.DataFrame
         result = self.read_virtualfile(vfname, kind="dataset").contents.to_dataframe(
-            names=column_names
+            names=column_names,
+            dtype=dtype,
+            index_col=index_col,
         )
         if output_type == "numpy":  # numpy.ndarray output
             return result.to_numpy()
diff --git a/pygmt/datatypes/dataset.py b/pygmt/datatypes/dataset.py
@@ -143,14 +143,29 @@ class _GMT_DATASEGMENT(ctp.Structure):  # noqa: N801
         ("hidden", ctp.c_void_p),
     ]
 
-    def to_dataframe(self, names: list[str] | None = None) -> pd.DataFrame:
+    def to_dataframe(
+        self,
+        names: list[str] | None = None,
+        dtype: type | dict[str, type] | None = None,
+        index_col: str | int | None = None,
+    ) -> pd.DataFrame:
         """
         Convert a _GMT_DATASET object to a :class:`pandas.DataFrame` object.
 
         Currently, the number of columns in all segments of all tables are assumed to be
         the same. The same column in all segments of all tables are concatenated. The
         trailing text column is also concatenated as a single string column.
 
+        Parameters
+        ----------
+        names
+            A list of column names.
+        dtype
+            Data type. Can be a single type for all columns or a dictionary mapping
+            column names to types.
+        index_col
+            Column to set as index.
+
         Returns
         -------
         df
@@ -214,4 +229,8 @@ def to_dataframe(self, names: list[str] | None = None) -> pd.DataFrame:
         df = pd.concat(objs=vectors, axis="columns")
         if names is not None:  # Assigne column names
             df.columns = names
+        if dtype is not None:
+            df = df.astype(dtype)
+        if index_col is not None:
+            df = df.set_index(index_col)
         return df
diff --git a/pygmt/src/grdhisteq.py b/pygmt/src/grdhisteq.py
@@ -242,14 +242,11 @@ def compute_bins(
                 vfname=vouttbl,
                 output_type=output_type,
                 column_names=["start", "stop", "bin_id"],
+                dtype={
+                    "start": np.float32,
+                    "stop": np.float32,
+                    "bin_id": np.uint32,
+                },
+                index_col="bin_id" if output_type == "pandas" else None,
             )
-            if output_type == "pandas":
-                result = result.astype(
-                    {
-                        "start": np.float32,
-                        "stop": np.float32,
-                        "bin_id": np.uint32,
-                    }
-                )
-                return result.set_index("bin_id")
             return result