Skip to content

Commit ed99d55

Browse files
committed
create single row selection function
1 parent 8a59926 commit ed99d55

File tree

4 files changed

+36
-26
lines changed

4 files changed

+36
-26
lines changed

scripts/derived/reanalysis-cerra-land_accumulation.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
import sys
1111
from datetime import datetime
1212
sys.path.append('../utilities')
13-
from utils import load_path_from_df, load_output_path_from_row
13+
from utils import load_output_path_from_row, require_single_row
1414

1515
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
1616

@@ -136,11 +136,13 @@ def accumulation(ds,var):
136136

137137
for var in derived_variables_list:
138138
logging.info(f"Calculating {var}")
139-
input_row = df_parameters[(df_parameters['filename_variable'] == var) & (df_parameters['product_type'] == 'raw')]
139+
mask_input = (df_parameters['filename_variable'] == var) & (df_parameters['product_type'] == 'raw')
140+
input_row = require_single_row(df_parameters, mask_input, f"{var}/raw")
140141

141-
var_row = df_parameters[(df_parameters['filename_variable'] == var) & (df_parameters['product_type'] == 'derived')]
142+
mask_var = (df_parameters['filename_variable'] == var) & (df_parameters['product_type'] == 'derived')
143+
var_row = require_single_row(df_parameters, mask_var, f"{var}/derived")
142144
# Use utility function to load input path
143-
var_download_path = load_output_path_from_row(input_row.iloc[0], dataset)
145+
var_download_path = load_output_path_from_row(input_row], dataset)
144146
var_files = np.sort(glob.glob(f"{var_download_path}/*.nc"))
145147
print(f"{var_download_path}/*.nc")
146148
logging.info(f"List of file variables: {var_files}")
@@ -151,11 +153,11 @@ def accumulation(ds,var):
151153
date_str = basename.split('_')[-1].replace(".nc","")
152154
date_obj = datetime.strptime(date_str, "%Y%m")
153155
year = date_obj.year
154-
logging.info(f"Processing year: {year} and end year: {var_row.cds_years_end.iloc[0]}")
155-
if year> var_row.cds_years_end.iloc[0]:
156+
logging.info(f"Processing year: {year} and end year: {var_row.cds_years_end}")
157+
if year > var_row.cds_years_end:
156158
logging.info("Skipping file as it is after the end year")
157159
continue
158-
dest_dir = load_output_path_from_row(var_row.iloc[0], dataset)
160+
dest_dir = load_output_path_from_row(var_row, dataset)
159161
var_file = os.path.basename(file).replace(".nc", "_daily_accumulated.nc")
160162
output_file=Path(f"{dest_dir}/{var_file}")
161163
logging.info(f"Saving calculated {var} to {dest_dir}")

scripts/derived/reanalysis-era5-single-levels.py

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from pathlib import Path
88
import sys
99
sys.path.append('../utilities')
10-
from utils import load_path_from_df, load_output_path_from_row
10+
from utils import load_output_path_from_row, require_single_row
1111

1212
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
1313

@@ -19,27 +19,26 @@ def main():
1919
derived_variables_list = derived_variables.tolist()
2020
for var in derived_variables_list:
2121
logging.info(f"Calculating {var}")
22-
input_row = df_parameters[(df_parameters['filename_variable'] == var) & (df_parameters['product_type'] == 'raw')]
23-
var_row = df_parameters[(df_parameters['filename_variable'] == var) & (df_parameters['product_type'] == 'derived')]
22+
mask_input = (df_parameters['filename_variable'] == var) & (df_parameters['product_type'] == 'raw')
23+
input_row = require_single_row(df_parameters, mask_input, f"{var}/raw")
24+
25+
mask_var = (df_parameters['filename_variable'] == var) & (df_parameters['product_type'] == 'derived')
26+
var_row = require_single_row(df_parameters, mask_var, f"{var}/derived")
2427

2528
# Create a list of years from start to end
2629
year_list = list(range(var_row["cds_years_start"].squeeze() , var_row["cds_years_end"].squeeze() + 1))
2730
for year in year_list:
2831

29-
if var == "sfcwind":
30-
31-
32-
33-
input_row_u10 = df_parameters[(df_parameters['filename_variable'] == "u10") & (df_parameters['product_type'] == 'raw')]
34-
input_row_v10 = df_parameters[(df_parameters['filename_variable'] == "v10") & (df_parameters['product_type'] == 'raw')]
32+
if var == "sfcwind":
33+
input_row_u10 = require_single_row(df_parameters, (df_parameters['filename_variable'] == "u10") & (df_parameters['product_type'] == 'raw'), "u10/raw")
34+
input_row_v10 = require_single_row(df_parameters, (df_parameters['filename_variable'] == "v10") & (df_parameters['product_type'] == 'raw'), "v10/raw")
3535
# Use utility function to load input paths
36-
u10_download_path = load_output_path_from_row(input_row_u10.iloc[0], dataset)
36+
u10_download_path = load_output_path_from_row(input_row_u10, dataset)
3737
u_10_file = glob.glob(f"{u10_download_path}/*{year}*.nc")[0]
38-
v10_download_path = load_output_path_from_row(input_row_v10.iloc[0], dataset)
38+
v10_download_path = load_output_path_from_row(input_row_v10, dataset)
3939
v_10_file = glob.glob(f"{v10_download_path}/*{year}*.nc")[0]
40-
4140
# Use utility function to build output path
42-
dest_dir = load_output_path_from_row(var_row.iloc[0], dataset)
41+
dest_dir = load_output_path_from_row(var_row, dataset)
4342
os.makedirs(dest_dir, exist_ok=True)
4443
sfcwind_file = os.path.basename(u_10_file).replace("u10", "sfcwind")
4544
output_file=Path(f"{dest_dir}/{sfcwind_file}")
@@ -54,7 +53,6 @@ def main():
5453
sfcwind = operations.sfcwind_from_u_v(ds_merge)
5554
sfcwind_daily = operations.resample_to_daily(sfcwind,"valid_time")
5655

57-
5856
logging.info(f"Saving calculated sfcwind to {dest_dir}")
5957
sfcwind_daily.to_netcdf(output_file)
6058

scripts/interpolation/reanalysis-cerra-single-levels.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from pathlib import Path
88
import sys
99
sys.path.append('../utilities')
10-
from utils import load_input_path_from_row, load_output_path_from_row
10+
from utils import load_output_path_from_row,require_single_row
1111

1212
def write_to_netcdf(dataset: xr.Dataset, path: str, var: str):
1313
"""
@@ -36,8 +36,9 @@ def main():
3636
variables_file_path = f"../../requests/{dataset}.csv"
3737
df_parameters = pd.read_csv(variables_file_path)
3838

39-
# Load the reference grid file from the first interpolated row
40-
interpolated_row = df_parameters[(df_parameters['interpolation'] != 'native') & (df_parameters['product_type'] == 'derived')].iloc[0]
39+
mask_ref = (df_parameters['interpolation'] != 'native') & (df_parameters['product_type'] == 'derived')
40+
interpolated_row = require_single_row(df_parameters, mask_ref, "interpolated/derived reference row")
41+
4142
interpolation_file = interpolated_row.get('interpolation_file', 'land_sea_mask_0.0625degree.nc4')
4243
ds_ref=xr.open_dataset(f"/lustre/gmeteo/WORK/chantreuxa/cica/data/resources/reference-grids/{interpolation_file}")
4344

@@ -50,7 +51,8 @@ def main():
5051
ds_variable=row["filename_variable"]
5152

5253
# Use utility function to load input path (from raw data)
53-
raw_row = df_parameters[(df_parameters['filename_variable'] == ds_variable) & (df_parameters['product_type'] == 'raw')].iloc[0]
54+
mask_raw = (df_parameters['filename_variable'] == ds_variable) & (df_parameters['product_type'] == 'raw')
55+
raw_row = require_single_row(df_parameters, mask_raw, f"{ds_variable}/raw")
5456
orig_dir = load_output_path_from_row(raw_row, dataset)
5557

5658
# Use utility function to load output path

scripts/utilities/utils.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -279,4 +279,12 @@ def download_files(dataset, variables_file_path, create_request_func, get_output
279279
try:
280280
future.result()
281281
except Exception as e:
282-
logging.error(f"Failed to download file: {e}")
282+
logging.error(f"Failed to download file: {e}")
283+
284+
def require_single_row(df, mask, desc=None):
285+
matches = df[mask]
286+
if matches.shape[0] == 0:
287+
raise KeyError(f"No row found{': ' + desc if desc else ''}")
288+
if matches.shape[0] > 1:
289+
raise ValueError(f"{matches.shape[0]} rows found{': ' + desc if desc else ''} — expected exactly 1")
290+
return matches.iloc[0]

0 commit comments

Comments
 (0)