add rolling eval func but not finished yet

OuyangWenyu · OuyangWenyu · commit 08d5c9043e7a · 2025-04-27T18:39:10.000+08:00
diff --git a/torchhydro/configs/config.py b/torchhydro/configs/config.py
@@ -237,11 +237,11 @@ def default_config_file():
             # for each batch, we fix length of hindcast and forecast length.
             # data from different lead time with a number representing the lead time,
             # for example, now is 2020-09-30, our min_time_interval is 1 day, hindcast length is 30 and forecast length is 1,
-            # lead_time = 3 means 2020-09-01 to 2020-09-30, and the forecast data is 2020-10-01 from 2020-09-28
+            # lead_time = 3 means 2020-09-01 to 2020-09-30, and the forecast data is 2020-10-01 forecast-performed at 2020-09-28
             # for forecast data, we have two different configurations:
-            # 1st, we can set a same lead time for all forecast time
+            # 1st "fixed", we can set a same lead time for all forecast time
             # 2020-09-30now, 30hindcast, 2forecast, 3leadtime means 2020-09-01 to 2020-09-30 obs concatenate with 2020-10-01 forecast data from 2020-09-28 and 2020-10-02 forecast data from 2020-09-29
-            # 2nd, we can set a increasing lead time for each forecast time
+            # 2nd "increasing", we can set a increasing lead time for each forecast time
             # 2020-09-30now, 30hindcast, 2forecast, [1, 2]leadtime means 2020-09-01 to 2020-09-30 obs concatenate with 2020-10-01 to 2010-10-02 forecast data from 2020-09-30
             "lead_time_type": "fixed",  # must be fixed or increasing
             "lead_time_start": 1,
diff --git a/torchhydro/datasets/data_sets.py b/torchhydro/datasets/data_sets.py
@@ -1,10 +1,10 @@
 """
 Author: Wenyu Ouyang
 Date: 2024-04-08 18:16:53
-LastEditTime: 2025-04-19 17:35:29
+LastEditTime: 2025-04-27 14:28:24
 LastEditors: Wenyu Ouyang
 Description: A pytorch dataset class; references to https://github.com/neuralhydrology/neuralhydrology
-FilePath: /torchhydro/torchhydro/datasets/data_sets.py
+FilePath: /HydroForecastEval/mnt/disk1/owen/code/torchhydro/torchhydro/datasets/data_sets.py
 Copyright (c) 2024-2024 Wenyu Ouyang. All rights reserved.
 """
 
@@ -806,7 +806,7 @@ def _concat_xf(self, x, f):
         for x_idx, f_idx in self.xf_var_indices.items():
             # Replace the variables in the forecast period of x with the forecast variables in f
             # The forecast period of x starts from the rho position
-            x_combined[self.rho :, x_idx] = f[:, f_idx]
+            x_combined[self.warmup_length + self.rho :, x_idx] = f[:, f_idx]
 
         return x_combined
 
diff --git a/torchhydro/trainers/train_utils.py b/torchhydro/trainers/train_utils.py
@@ -1,7 +1,7 @@
 """
 Author: Wenyu Ouyang
 Date: 2024-04-08 18:16:26
-LastEditTime: 2025-04-18 08:46:07
+LastEditTime: 2025-04-27 18:36:57
 LastEditors: Wenyu Ouyang
 Description: Some basic functions for training
 FilePath: /HydroForecastEval/mnt/disk1/owen/code/torchhydro/torchhydro/trainers/train_utils.py
@@ -308,15 +308,60 @@ def get_evaluation(
         preds_xr = valte_dataset.denormalize(pred)
         obss_xr = valte_dataset.denormalize(obs)
     elif evaluator["eval_way"] == "rolling":
-        # TODO: to be implemented
-        raise NotImplementedError(
-            "we will implement this function in the future, please choose 1pace or once now"
-        )
+        # TODO: to be test
+        pred = _recover_samples_to_4d(output, valorte_data_loader, evaluator["stride"])
+        obs = _recover_samples_to_4d(labels, valorte_data_loader, evaluator["stride"])
+        valte_dataset = valorte_data_loader.dataset
+        preds_xr = valte_dataset.denormalize(pred)
+        obss_xr = valte_dataset.denormalize(obs)
     else:
         raise ValueError("eval_way should be rolling or 1pace")
     return obss_xr, preds_xr
 
 
+def _recover_samples_to_4d(arr_3d, valorte_data_loader, stride):
+    """Reorganize the 3D prediction results to 4D
+    TODO: to be finished
+
+    Parameters
+    ----------
+    arr_3d : np.ndarray
+        A 3D prediction array with the shape (total number of samples, number of time steps, number of features).
+    valorte_data_loader: DataLoader
+        The corresponding data loader used to obtain the basin-time index mapping.
+    stride: int
+        The stride of the rolling.
+
+    Returns
+        -------
+        np.ndarray
+            The reorganized 4D array with the shape (number of basins, length of time, forecast steps, number of features).
+    """
+    dataset = valorte_data_loader.dataset
+    batch_size = valorte_data_loader.batch_size
+    basin_num = len(dataset.t_s_dict["sites_id"])
+    nt = dataset.nt
+    rho = dataset.rho
+    warmup_len = dataset.warmup_length
+    horizon = dataset.horizon
+    nf = dataset.noutputvar
+
+    # Initialize the 4D array with NaN values
+    basin_array = np.full((basin_num, nt - warmup_len - rho, horizon, nf), np.nan)
+
+    for sample_idx in range(arr_3d.shape[0]):
+        # Get the basin and start time index corresponding to this sample
+        basin, start_time = dataset.lookup_table[sample_idx]
+        # Take the value at the last time step of this sample (at the position of rho + horizon)
+        value = arr_3d[sample_idx, warmup_len + rho :, :]
+        # Calculate the time position in the result array
+        result_time_idx = start_time + warmup_len + stride * (sample_idx % batch_size)
+        # Fill in the corresponding position
+        basin_array[basin, result_time_idx, :, :] = value
+
+    return basin_array
+
+
 def _recover_samples_to_basin(arr_3d, valorte_data_loader, pace_idx):
     """Reorganize the 3D prediction results by basin