File tree Expand file tree Collapse file tree 3 files changed +36
-11
lines changed
Expand file tree Collapse file tree 3 files changed +36
-11
lines changed Original file line number Diff line number Diff line change @@ -1747,6 +1747,8 @@ def virtualfile_to_dataset(
17471747 vfname : str ,
17481748 output_type : Literal ["pandas" , "numpy" , "file" ] = "pandas" ,
17491749 column_names : list [str ] | None = None ,
1750+ dtype : type | dict [str , type ] | None = None ,
1751+ index_col : str | int | None = None ,
17501752 ) -> pd .DataFrame | np .ndarray | None :
17511753 """
17521754 Output a tabular dataset stored in a virtual file to a different format.
@@ -1766,6 +1768,11 @@ def virtualfile_to_dataset(
17661768 - ``"file"`` means the result was saved to a file and will return ``None``.
17671769 column_names
17681770 The column names for the :class:`pandas.DataFrame` output.
1771+ dtype
1772+ Data type for the columns of the :class:`pandas.DataFrame` output. Can be a
1773+ single type for all columns or a dictionary mapping column names to types.
1774+ index_col
1775+ Column to set as the index of the :class:`pandas.DataFrame` output.
17691776
17701777 Returns
17711778 -------
@@ -1855,7 +1862,9 @@ def virtualfile_to_dataset(
18551862
18561863 # Read the virtual file as a GMT dataset and convert to pandas.DataFrame
18571864 result = self .read_virtualfile (vfname , kind = "dataset" ).contents .to_dataframe (
1858- names = column_names
1865+ names = column_names ,
1866+ dtype = dtype ,
1867+ index_col = index_col ,
18591868 )
18601869 if output_type == "numpy" : # numpy.ndarray output
18611870 return result .to_numpy ()
Original file line number Diff line number Diff line change @@ -143,14 +143,29 @@ class _GMT_DATASEGMENT(ctp.Structure): # noqa: N801
143143 ("hidden" , ctp .c_void_p ),
144144 ]
145145
146- def to_dataframe (self , names : list [str ] | None = None ) -> pd .DataFrame :
146+ def to_dataframe (
147+ self ,
148+ names : list [str ] | None = None ,
149+ dtype : type | dict [str , type ] | None = None ,
150+ index_col : str | int | None = None ,
151+ ) -> pd .DataFrame :
147152 """
148153 Convert a _GMT_DATASET object to a :class:`pandas.DataFrame` object.
149154
150155 Currently, the number of columns in all segments of all tables are assumed to be
151156 the same. The same column in all segments of all tables are concatenated. The
152157 trailing text column is also concatenated as a single string column.
153158
159+ Parameters
160+ ----------
161+ names
162+ A list of column names.
163+ dtype
164+ Data type. Can be a single type for all columns or a dictionary mapping
165+ column names to types.
166+ index_col
167+ Column to set as index.
168+
154169 Returns
155170 -------
156171 df
@@ -214,4 +229,8 @@ def to_dataframe(self, names: list[str] | None = None) -> pd.DataFrame:
214229 df = pd .concat (objs = vectors , axis = "columns" )
215230 if names is not None : # Assigne column names
216231 df .columns = names
232+ if dtype is not None :
233+ df = df .astype (dtype )
234+ if index_col is not None :
235+ df = df .set_index (index_col )
217236 return df
Original file line number Diff line number Diff line change @@ -242,14 +242,11 @@ def compute_bins(
242242 vfname = vouttbl ,
243243 output_type = output_type ,
244244 column_names = ["start" , "stop" , "bin_id" ],
245+ dtype = {
246+ "start" : np .float32 ,
247+ "stop" : np .float32 ,
248+ "bin_id" : np .uint32 ,
249+ },
250+ index_col = "bin_id" if output_type == "pandas" else None ,
245251 )
246- if output_type == "pandas" :
247- result = result .astype (
248- {
249- "start" : np .float32 ,
250- "stop" : np .float32 ,
251- "bin_id" : np .uint32 ,
252- }
253- )
254- return result .set_index ("bin_id" )
255252 return result
You can’t perform that action at this time.
0 commit comments