From 1e1366300bd6691ecda4356fc1efdf13721ef177 Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Tue, 12 May 2026 11:38:11 +0100 Subject: [PATCH 1/6] Add generic range-parsing code --- src/input.rs | 7 +++ src/input/range.rs | 148 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 155 insertions(+) create mode 100644 src/input/range.rs diff --git a/src/input.rs b/src/input.rs index 8ef1e1f04..d6d15b6ea 100644 --- a/src/input.rs +++ b/src/input.rs @@ -29,6 +29,13 @@ mod region; use region::read_regions; mod time_slice; use time_slice::read_time_slice_info; +mod range; +#[allow(unused_imports)] +use range::parse_range; +#[allow(unused_imports)] +use range::parse_range_parts; +#[allow(unused_imports)] +use range::partition; /// A trait which provides a method to insert a key and value into a map pub trait Insert { diff --git a/src/input/range.rs b/src/input/range.rs new file mode 100644 index 000000000..e470b21c5 --- /dev/null +++ b/src/input/range.rs @@ -0,0 +1,148 @@ +//! Provides a helper for parsing range-type parameters from input files. +use anyhow::{Context, Result, ensure}; +use std::error::Error; +use std::fmt::Display; +use std::ops::RangeInclusive; +use std::str::FromStr; + +/// Try to divide a string into two parts at the specified delimiter. +/// +/// # Returns +/// +/// - `None` if `delimiter` is not present +/// - `Some` tuple of the two strings if it is +pub fn partition<'a>(s: &'a str, delimiter: &str) -> Option<(&'a str, &'a str)> { + let idx = s.find(delimiter)?; + + #[allow(clippy::string_slice)] + Some((&s[..idx], &s[idx + delimiter.len()..])) +} + +/// Parse a range from an input string, using values in `limits` as defaults. +/// +/// Start and end values must be a type that is parseable from a string. Ranges are inclusive. +/// Whitespace is trimmed from start and end values before parsing. +/// +/// Valid ranges: +/// +/// - Range of values (e.g. 1990..2000) +/// - Range with no upper limit (e.g. 1990..) +/// - Range with no lower limit (e.g. ..2000) +#[allow(dead_code)] +pub fn parse_range(s: &str, limits: RangeInclusive) -> Result> +where + T: FromStr + Copy + PartialOrd + Display, + ::Err: Error + Sync + Send + 'static, +{ + let (start, end) = partition(s, "..").context( + "Range must be in the form [start]..[end] (where [start] and [end] can be empty)", + )?; + parse_range_parts(start, end, limits.clone(), *limits.start(), *limits.end()) +} + +/// Parse parts of a range from input strings. +/// +/// Start and end values must be a type that is parseable from a string. Ranges are inclusive. +/// Whitespace is trimmed from start and end values before parsing. +/// +/// If start or end values are empty, the values in `defaults` will be used. +/// +/// # Panics +/// +/// Panics if `limits` has a start after its end or `default_lower` is greater than +/// `default_upper`. +pub fn parse_range_parts( + start: &str, + end: &str, + limits: RangeInclusive, + default_lower: T, + default_upper: T, +) -> Result> +where + T: FromStr + Copy + PartialOrd + Display, + ::Err: Error + Sync + Send + 'static, +{ + assert!( + limits.start() <= limits.end(), + "Start of limits must be before end" + ); + assert!( + default_lower <= default_upper, + "default_lower must be less than default_upper" + ); + + let start = start.trim(); + let end = end.trim(); + ensure!( + !start.is_empty() || !end.is_empty(), + "Start and end of range cannot both be omitted" + ); + + let value1 = if start.is_empty() { + default_lower + } else { + start.parse()? + }; + let value2 = if end.is_empty() { + default_upper + } else { + end.parse()? + }; + + ensure!( + value1 <= value2, + "Start value must be less than or equal to end value" + ); + ensure!( + value1 >= *limits.start(), + "Start value must be >= {}", + limits.start() + ); + ensure!( + value2 <= *limits.end(), + "End value must be <= {}", + limits.end() + ); + + Ok(value1..=value2) +} + +#[cfg(test)] +mod tests { + use super::*; + use rstest::rstest; + + #[rstest] + #[case("1,2", ",", Some(("1","2")))] + #[case("hello world", " ", Some(("hello", "world")))] + #[case("a..b", "..", Some(("a","b")))] + #[case("a", "", Some(("", "a")))] + #[case("", "", Some(("", "")))] + #[case("a..b", "c", None)] + #[case("🙂😐😞", "😐", Some(("🙂", "😞")))] + fn partition_works( + #[case] input: &str, + #[case] delim: &str, + #[case] expected: Option<(&str, &str)>, + ) { + assert_eq!(partition(input, delim), expected); + } + + #[rstest] + #[case("1..2", 1..=2)] + #[case("1..1", 1..=1)] + #[case("..2", 0..=2)] + #[case("1..", 1..=100)] + fn parse_range_ok(#[case] input: &str, #[case] expected: RangeInclusive) { + assert_eq!(parse_range(input, 0..=100).unwrap(), expected); + } + + #[rstest] + #[case("..")] // can't omit start and end + #[case("-1..10")] // start out of range + #[case("0..101")] // end out of range + #[case("2..1")] // start greater than end + fn parse_range_error(#[case] input: &str) { + parse_range(input, 0..=100).unwrap_err(); + } +} From 63eb1b11039ffb994afef46a40d227ab6f0bbd6a Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Tue, 12 May 2026 12:42:05 +0100 Subject: [PATCH 2/6] Implement `FromStr` for unit types --- Cargo.toml | 2 +- src/units.rs | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 1e71909c0..6da9c1d81 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,7 +31,7 @@ indexmap = "2.14.0" human-panic = "2.0.8" clap-markdown = "0.1.5" platform-info = "2.1.0" -derive_more = {version = "2.1", features = ["add", "display"]} +derive_more = {version = "2.1", features = ["add", "display", "from_str"]} petgraph = "0.8.3" strum = {version = "0.28.0", features = ["derive"]} documented = "0.9.2" diff --git a/src/units.rs b/src/units.rs index 2c40554f6..dc9b7ec4a 100644 --- a/src/units.rs +++ b/src/units.rs @@ -1,15 +1,16 @@ //! This module defines various unit types and their conversions. - use float_cmp::{ApproxEq, F64Margin}; use serde::{Deserialize, Serialize}; use std::fmt; use std::iter::Sum; use std::ops::{Add, AddAssign, Div, Mul, Sub, SubAssign}; +use std::str::FromStr; /// A trait encompassing most of the functionality of unit types pub trait UnitType: fmt::Debug + Copy + + FromStr + PartialEq + PartialOrd + Serialize @@ -53,6 +54,7 @@ macro_rules! base_unit_struct { Serialize, derive_more::Add, derive_more::Sub, + derive_more::FromStr, )] pub struct $name(pub f64); From 12506964d1d9bba606f411438873549b23af169a Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Tue, 12 May 2026 12:46:40 +0100 Subject: [PATCH 3/6] Use generic range-parsing code for process availabilities --- src/input.rs | 1 - src/input/process/availability.rs | 100 ++---------------------------- src/input/range.rs | 1 - 3 files changed, 4 insertions(+), 98 deletions(-) diff --git a/src/input.rs b/src/input.rs index d6d15b6ea..6bae3e55b 100644 --- a/src/input.rs +++ b/src/input.rs @@ -30,7 +30,6 @@ use region::read_regions; mod time_slice; use time_slice::read_time_slice_info; mod range; -#[allow(unused_imports)] use range::parse_range; #[allow(unused_imports)] use range::parse_range_parts; diff --git a/src/input/process/availability.rs b/src/input/process/availability.rs index 0d408a81c..7bd1bb230 100644 --- a/src/input/process/availability.rs +++ b/src/input/process/availability.rs @@ -1,11 +1,11 @@ //! Code for reading process availabilities from a CSV file. -use super::super::{input_err_msg, read_csv_optional, try_insert}; +use super::super::{input_err_msg, parse_range, read_csv_optional, try_insert}; use crate::process::{ActivityLimits, ProcessActivityLimitsMap, ProcessID, ProcessMap}; use crate::region::parse_region_str; use crate::time_slice::TimeSliceInfo; use crate::units::{Dimensionless, Year}; use crate::year::parse_year_str; -use anyhow::{Context, Result, ensure}; +use anyhow::{Context, Result}; use itertools::iproduct; use serde::Deserialize; use std::collections::HashMap; @@ -33,7 +33,8 @@ impl ProcessAvailabilityRaw { /// capacity. fn to_bounds(&self, length: Year) -> Result> { // Parse availability_range string - let availability_range = parse_availabilities_string(&self.limits)?; + let availability_range = parse_range(&self.limits, Dimensionless(0.0)..=Dimensionless(1.0)) + .with_context(|| format!("Could not parse availabilities range: {}", &self.limits))?; // Convert to bounds based on fraction of the year covered let ts_frac = length / Year(1.0); @@ -43,61 +44,6 @@ impl ProcessAvailabilityRaw { } } -/// Parse a string representing availability limits into a range. -fn parse_availabilities_string(s: &str) -> Result> { - // Disallow empty string - ensure!(!s.trim().is_empty(), "Availability range cannot be empty"); - - // Require exactly one ".." separator so only forms lower..upper, lower.. or ..upper are allowed. - let parts: Vec<&str> = s.split("..").collect(); - ensure!( - parts.len() == 2, - "Availability range must be of the form 'lower..upper', 'lower..' or '..upper'. Invalid: {s}" - ); - let left = parts[0].trim(); - let right = parts[1].trim(); - - // Parse lower limit - let lower = if left.is_empty() { - Dimensionless(0.0) - } else { - Dimensionless( - left.parse::() - .ok() - .with_context(|| format!("Invalid lower availability limit: {left}"))?, - ) - }; - - // Parse upper limit - let upper = if right.is_empty() { - Dimensionless(1.0) - } else { - Dimensionless( - right - .parse::() - .ok() - .with_context(|| format!("Invalid upper availability limit: {right}"))?, - ) - }; - - // Validation checks - ensure!( - upper >= lower, - "Upper availability limit must be greater than or equal to lower limit. Invalid: {s}" - ); - ensure!( - lower >= Dimensionless(0.0), - "Lower availability limit must be >= 0. Invalid: {s}" - ); - ensure!( - upper <= Dimensionless(1.0), - "Upper availability limit must be <= 1. Invalid: {s}" - ); - - // Return range - Ok(lower..=upper) -} - /// Read the process availabilities CSV file. /// /// This file contains information about the availability of processes over the course of a year as @@ -216,7 +162,6 @@ where #[cfg(test)] mod tests { use super::*; - use crate::fixture::assert_error; use float_cmp::assert_approx_eq; use rstest::rstest; @@ -230,43 +175,6 @@ mod tests { } } - #[rstest] - #[case("0.1..0.9", Dimensionless(0.1)..=Dimensionless(0.9))] - #[case("..0.9", Dimensionless(0.0)..=Dimensionless(0.9))] // Empty lower - #[case("0.1..", Dimensionless(0.1)..=Dimensionless(1.0))] // Empty upper - #[case("0.5..0.5", Dimensionless(0.5)..=Dimensionless(0.5))] // Equality - fn parse_availabilities_string_valid( - #[case] input: &str, - #[case] expected: RangeInclusive, - ) { - assert_eq!(parse_availabilities_string(input).unwrap(), expected); - } - - #[rstest] - #[case("", "Availability range cannot be empty")] - #[case( - "0.6..0.5", - "Upper availability limit must be greater than or equal to lower limit. Invalid: 0.6..0.5" - )] - #[case( - "..0.1..0.9", - "Availability range must be of the form 'lower..upper', 'lower..' or '..upper'. Invalid: ..0.1..0.9" - )] - #[case("0.1...0.9", "Invalid upper availability limit: .0.9")] - #[case( - "-0.1..0.5", - "Lower availability limit must be >= 0. Invalid: -0.1..0.5" - )] - #[case("0.1..1.5", "Upper availability limit must be <= 1. Invalid: 0.1..1.5")] - #[case("abc..0.5", "Invalid lower availability limit: abc")] - #[case( - "0.5", - "Availability range must be of the form 'lower..upper', 'lower..' or '..upper'. Invalid: 0.5" - )] - fn parse_availabilities_string_invalid(#[case] input: &str, #[case] error_msg: &str) { - assert_error!(parse_availabilities_string(input), error_msg); - } - #[rstest] #[case("0.1..", Year(0.1), Dimensionless(0.01)..=Dimensionless(0.1))] // Lower bound #[case("..0.5", Year(0.1), Dimensionless(0.0)..=Dimensionless(0.05))] // Upper bound diff --git a/src/input/range.rs b/src/input/range.rs index e470b21c5..c80c9b382 100644 --- a/src/input/range.rs +++ b/src/input/range.rs @@ -28,7 +28,6 @@ pub fn partition<'a>(s: &'a str, delimiter: &str) -> Option<(&'a str, &'a str)> /// - Range of values (e.g. 1990..2000) /// - Range with no upper limit (e.g. 1990..) /// - Range with no lower limit (e.g. ..2000) -#[allow(dead_code)] pub fn parse_range(s: &str, limits: RangeInclusive) -> Result> where T: FromStr + Copy + PartialOrd + Display, From fec8841b70515b374d6dd0997324e6021a8f450f Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Wed, 13 May 2026 16:16:22 +0100 Subject: [PATCH 4/6] Move `year` module to be submodule of `input` It's not used outside of `input`, so let's keep it in here along with all the other input-related code. --- src/input.rs | 2 ++ src/input/agent/commodity_portion.rs | 2 +- src/input/agent/objective.rs | 2 +- src/input/agent/search_space.rs | 2 +- src/input/commodity/levy.rs | 2 +- src/input/process/availability.rs | 2 +- src/input/process/flow.rs | 2 +- src/input/process/investment_constraints.rs | 2 +- src/input/process/parameter.rs | 2 +- src/{ => input}/year.rs | 2 +- src/lib.rs | 1 - 11 files changed, 11 insertions(+), 10 deletions(-) rename src/{ => input}/year.rs (99%) diff --git a/src/input.rs b/src/input.rs index 6bae3e55b..c4ac07312 100644 --- a/src/input.rs +++ b/src/input.rs @@ -35,6 +35,8 @@ use range::parse_range; use range::parse_range_parts; #[allow(unused_imports)] use range::partition; +mod year; +use year::parse_year_str; /// A trait which provides a method to insert a key and value into a map pub trait Insert { diff --git a/src/input/agent/commodity_portion.rs b/src/input/agent/commodity_portion.rs index 4fd971d80..667c282d0 100644 --- a/src/input/agent/commodity_portion.rs +++ b/src/input/agent/commodity_portion.rs @@ -3,9 +3,9 @@ use super::super::{deserialise_proportion_nonzero, input_err_msg, read_csv, try_ use crate::agent::{AgentCommodityPortionsMap, AgentID, AgentMap}; use crate::commodity::{CommodityMap, CommodityType}; use crate::id::IDCollection; +use crate::input::parse_year_str; use crate::region::RegionID; use crate::units::Dimensionless; -use crate::year::parse_year_str; use anyhow::{Context, Result, ensure}; use float_cmp::approx_eq; use indexmap::IndexSet; diff --git a/src/input/agent/objective.rs b/src/input/agent/objective.rs index 7c036910b..70dc97be3 100644 --- a/src/input/agent/objective.rs +++ b/src/input/agent/objective.rs @@ -1,8 +1,8 @@ //! Code for reading agent objectives from a CSV file. use super::super::{input_err_msg, read_csv, try_insert}; use crate::agent::{AgentID, AgentMap, AgentObjectiveMap, DecisionRule, ObjectiveType}; +use crate::input::parse_year_str; use crate::units::Dimensionless; -use crate::year::parse_year_str; use anyhow::{Context, Result, ensure}; use itertools::Itertools; use serde::Deserialize; diff --git a/src/input/agent/search_space.rs b/src/input/agent/search_space.rs index 7630f745e..10df36c96 100644 --- a/src/input/agent/search_space.rs +++ b/src/input/agent/search_space.rs @@ -3,8 +3,8 @@ use super::super::{input_err_msg, read_csv_optional, try_insert}; use crate::agent::{Agent, AgentID, AgentMap, AgentSearchSpaceMap}; use crate::commodity::CommodityID; use crate::id::IDCollection; +use crate::input::parse_year_str; use crate::process::{Process, ProcessMap}; -use crate::year::parse_year_str; use anyhow::{Context, Result}; use itertools::Itertools; use serde::Deserialize; diff --git a/src/input/commodity/levy.rs b/src/input/commodity/levy.rs index 3ad5b5e94..378d4b59d 100644 --- a/src/input/commodity/levy.rs +++ b/src/input/commodity/levy.rs @@ -2,10 +2,10 @@ use super::super::{input_err_msg, read_csv_optional, try_insert}; use crate::commodity::{BalanceType, CommodityID, CommodityLevyMap}; use crate::id::IDCollection; +use crate::input::parse_year_str; use crate::region::{RegionID, parse_region_str}; use crate::time_slice::TimeSliceInfo; use crate::units::MoneyPerFlow; -use crate::year::parse_year_str; use anyhow::{Context, Result, ensure}; use indexmap::IndexSet; use log::warn; diff --git a/src/input/process/availability.rs b/src/input/process/availability.rs index 7bd1bb230..d9d86fdca 100644 --- a/src/input/process/availability.rs +++ b/src/input/process/availability.rs @@ -1,10 +1,10 @@ //! Code for reading process availabilities from a CSV file. use super::super::{input_err_msg, parse_range, read_csv_optional, try_insert}; +use crate::input::parse_year_str; use crate::process::{ActivityLimits, ProcessActivityLimitsMap, ProcessID, ProcessMap}; use crate::region::parse_region_str; use crate::time_slice::TimeSliceInfo; use crate::units::{Dimensionless, Year}; -use crate::year::parse_year_str; use anyhow::{Context, Result}; use itertools::iproduct; use serde::Deserialize; diff --git a/src/input/process/flow.rs b/src/input/process/flow.rs index ab4900b8c..004ee10c6 100644 --- a/src/input/process/flow.rs +++ b/src/input/process/flow.rs @@ -1,12 +1,12 @@ //! Code for reading process flows from a CSV file. use super::super::{input_err_msg, read_csv}; use crate::commodity::{CommodityID, CommodityMap, CommodityType}; +use crate::input::parse_year_str; use crate::process::{ FlowDirection, FlowType, ProcessFlow, ProcessFlowsMap, ProcessID, ProcessMap, }; use crate::region::{RegionID, parse_region_str}; use crate::units::{FlowPerActivity, MoneyPerFlow}; -use crate::year::parse_year_str; use anyhow::{Context, Result, bail, ensure}; use indexmap::{IndexMap, IndexSet}; use itertools::iproduct; diff --git a/src/input/process/investment_constraints.rs b/src/input/process/investment_constraints.rs index 0854a3db1..c4c3f8cfa 100644 --- a/src/input/process/investment_constraints.rs +++ b/src/input/process/investment_constraints.rs @@ -1,12 +1,12 @@ //! Code for reading process investment constraints from a CSV file. use super::super::input_err_msg; +use crate::input::parse_year_str; use crate::input::{read_csv_optional, try_insert}; use crate::process::{ ProcessID, ProcessInvestmentConstraint, ProcessInvestmentConstraintsMap, ProcessMap, }; use crate::region::parse_region_str; use crate::units::{CapacityPerYear, Year}; -use crate::year::parse_year_str; use anyhow::{Context, Result, ensure}; use itertools::iproduct; use serde::Deserialize; diff --git a/src/input/process/parameter.rs b/src/input/process/parameter.rs index b8b9b302e..fc70d948d 100644 --- a/src/input/process/parameter.rs +++ b/src/input/process/parameter.rs @@ -1,9 +1,9 @@ //! Code for reading process parameters from a CSV file use super::super::{format_items_with_cap, input_err_msg, read_csv, try_insert}; +use crate::input::parse_year_str; use crate::process::{ProcessID, ProcessMap, ProcessParameter, ProcessParameterMap}; use crate::region::parse_region_str; use crate::units::{Dimensionless, MoneyPerActivity, MoneyPerCapacity, MoneyPerCapacityPerYear}; -use crate::year::parse_year_str; use ::log::warn; use anyhow::{Context, Result, ensure}; use serde::Deserialize; diff --git a/src/year.rs b/src/input/year.rs similarity index 99% rename from src/year.rs rename to src/input/year.rs index 0800a9af4..ed4109260 100644 --- a/src/year.rs +++ b/src/input/year.rs @@ -1,5 +1,5 @@ //! Code for working with years. -use crate::input::is_sorted_and_unique; +use super::is_sorted_and_unique; use anyhow::{Context, Result, ensure}; use itertools::Itertools; diff --git a/src/lib.rs b/src/lib.rs index 55a8676f2..47bf143a0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -26,7 +26,6 @@ pub mod settings; pub mod simulation; pub mod time_slice; pub mod units; -pub mod year; #[cfg(test)] mod fixture; From 9a994d59bae583ff4b6ce92e4842e12582e00023 Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Thu, 14 May 2026 09:55:01 +0100 Subject: [PATCH 5/6] Add `is_sorted_and_unique_with` helper function --- src/input.rs | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/input.rs b/src/input.rs index c4ac07312..240fe91c1 100644 --- a/src/input.rs +++ b/src/input.rs @@ -182,7 +182,20 @@ where T: PartialOrd + Clone, I: IntoIterator, { - iter.into_iter().tuple_windows().all(|(a, b)| a < b) + is_sorted_and_unique_with(iter, |a, b| a < b) +} + +/// Check whether an iterator contains values that are sorted and unique, comparing with a custom +/// function +pub fn is_sorted_and_unique_with(iter: I, mut less_than: F) -> bool +where + T: Clone, + I: IntoIterator, + F: FnMut(&T, &T) -> bool, +{ + iter.into_iter() + .tuple_windows() + .all(|(a, b)| less_than(&a, &b)) } /// Insert a key-value pair into a map implementing the `Insert` trait if the key does not From 64f8f11b1439544ba55c9ea198651dede7ff1948 Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Tue, 19 May 2026 16:53:31 +0100 Subject: [PATCH 6/6] Use generic range-parsing code for years --- src/input.rs | 6 +- src/input/year.rs | 159 ++++++++++++++++++++-------------------------- 2 files changed, 71 insertions(+), 94 deletions(-) diff --git a/src/input.rs b/src/input.rs index 240fe91c1..db48c13aa 100644 --- a/src/input.rs +++ b/src/input.rs @@ -30,11 +30,7 @@ use region::read_regions; mod time_slice; use time_slice::read_time_slice_info; mod range; -use range::parse_range; -#[allow(unused_imports)] -use range::parse_range_parts; -#[allow(unused_imports)] -use range::partition; +use range::{parse_range, parse_range_parts, partition}; mod year; use year::parse_year_str; diff --git a/src/input/year.rs b/src/input/year.rs index ed4109260..8dd09c621 100644 --- a/src/input/year.rs +++ b/src/input/year.rs @@ -1,18 +1,25 @@ //! Code for working with years. -use super::is_sorted_and_unique; -use anyhow::{Context, Result, ensure}; +use super::{is_sorted_and_unique, is_sorted_and_unique_with, parse_range_parts, partition}; +use anyhow::{Context, Result, bail, ensure}; use itertools::Itertools; +use std::ops::RangeInclusive; -/// Parse a single year from a string and check it is in `valid_years` -fn parse_and_validate_year(s: &str, valid_years: &[u32]) -> Option { - let year = s.trim().parse::().ok()?; - valid_years.binary_search(&year).is_ok().then_some(year) +/// Return any valid years in the specified range +fn get_valid_years_in_range( + range: &RangeInclusive, + valid_years: &[u32], +) -> impl Iterator { + valid_years + .iter() + .copied() + .filter(move |year| range.contains(year)) } /// Parse a string of years separated by semicolons into a vector of u32 years. /// -/// The string can be either "all" (case-insensitive), a single year, or a semicolon-separated list -/// of years (e.g. "2020;2021;2022" or "2020; 2021; 2022") +/// The string can be either "all" (case-insensitive) or year ranges (optionally) separated with +/// semicolons. A year range can be a single year (e.g. 2020) or a range with a start year and/or +/// end year (e.g. 2020.., ..2020, 2020..2025). /// /// # Arguments /// @@ -25,9 +32,9 @@ fn parse_and_validate_year(s: &str, valid_years: &[u32]) -> Option { /// /// # Panics /// -/// If `valid_years` is unsorted or non-unique. +/// If `valid_years` is empty, unsorted or contains duplicates. pub fn parse_year_str(s: &str, valid_years: &[u32]) -> Result> { - // We depend on this in `parse_and_validate_year` + assert!(!valid_years.is_empty(), "`valid_years` cannot be empty"); assert!( is_sorted_and_unique(valid_years), "`valid_years` must be sorted and unique" @@ -40,74 +47,48 @@ pub fn parse_year_str(s: &str, valid_years: &[u32]) -> Result> { return Ok(Vec::from_iter(valid_years.iter().copied())); } - ensure!( - !(s.contains(';') && s.contains("..")), - "Both ';' and '..' found in year string {s}. Discrete years and ranges cannot be mixed." - ); - - // We first process ranges - let years: Vec<_> = if s.contains("..") { - parse_years_range(s, valid_years)? - } else { - s.split(';') - .map(|y| { - parse_and_validate_year(y, valid_years) - .with_context(|| format!("Invalid year: {y}")) - }) - .try_collect()? - }; + // Get ranges of years, separated by semicolons. Note that a range can be a single year. + let ranges: Vec<_> = s + .split(';') + .map(|s| { + let (start, end) = partition(s, "..").unwrap_or((s, s)); + parse_range_parts( + start, + end, + u32::MIN..=u32::MAX, + *valid_years.first().unwrap(), + *valid_years.last().unwrap(), + ) + .with_context(|| format!("Invalid year range: {s}")) + }) + .try_collect()?; ensure!( - is_sorted_and_unique(&years), - "Years must be in order and unique" + is_sorted_and_unique_with(ranges.iter(), |a, b| { + a.start() < b.start() && a.end() < b.start() + }), + "Year ranges must be sorted and non-overlapping" ); - Ok(years) -} + let mut years = Vec::new(); + for range in ranges { + let old_len = years.len(); + years.extend(get_valid_years_in_range(&range, valid_years)); -/// Parse a year string that is defined as a range, selecting the valid years within that range. -/// -/// It should be of the form start..end. If either of the limits are omitted, they will default to -/// the first and last years of the `valid_years`. If both limits are missing, this is equivalent to -/// passing all. -fn parse_years_range(s: &str, valid_years: &[u32]) -> Result> { - // Require exactly one ".." separator so only forms start..end, start.. or ..end are allowed. - let parts: Vec<&str> = s.split("..").collect(); - ensure!( - parts.len() == 2, - "Year range must be of the form 'start..end', 'start..' or '..end'. Invalid: {s}" - ); - let left = parts[0].trim(); - let right = parts[1].trim(); - - // If the range start is open, we assign the first valid year - let start = if left.is_empty() { - valid_years[0] - } else { - left.parse::() - .ok() - .with_context(|| format!("Invalid start year in range: {left}"))? - }; - - // If the range end is open, we assign the last valid year - let end = if right.is_empty() { - *valid_years.last().unwrap() - } else { - right - .parse::() - .ok() - .with_context(|| format!("Invalid end year in range: {right}"))? - }; + // No valid years in range + if years.len() == old_len { + // For readability, provide different error messages for single year vs range + if range.start() == range.end() { + bail!("Invalid year: {}", range.start()); + } + bail!( + "No valid years in year range: {}..{}", + range.start(), + range.end() + ); + } + } - ensure!( - end > start, - "End year must be bigger than start year in range {s}" - ); - let years: Vec<_> = (start..=end).filter(|y| valid_years.contains(y)).collect(); - ensure!( - !years.is_empty(), - "No valid years found in year range string {s}" - ); Ok(years) } @@ -119,15 +100,15 @@ mod tests { #[rstest] #[case("2020", &[2020, 2021], &[2020])] - #[case("all", &[2020, 2021], &[2020,2021])] - #[case("ALL", &[2020, 2021], &[2020,2021])] - #[case(" ALL ", &[2020, 2021], &[2020,2021])] - #[case("2020;2021", &[2020, 2021], &[2020,2021])] - #[case(" 2020; 2021", &[2020, 2021], &[2020,2021])] // whitespace should be stripped - #[case("2019..2026", &[2020,2025], &[2020,2025])] - #[case("..2023", &[2020,2025], &[2020])] // Empty start - #[case("2021..", &[2020,2025], &[2025])] // Empty end - #[case("..", &[2020,2025], &[2020,2025])] + #[case("all", &[2020, 2021], &[2020, 2021])] + #[case("ALL", &[2020, 2021], &[2020, 2021])] + #[case(" ALL ", &[2020, 2021], &[2020, 2021])] + #[case("2020;2021", &[2020, 2021], &[2020, 2021])] + #[case(" 2020; 2021", &[2020, 2021], &[2020, 2021])] // whitespace should be stripped + #[case("2019..2026", &[2020, 2025], &[2020, 2025])] + #[case("..2023", &[2020, 2025], &[2020])] // Empty start + #[case("2021..", &[2020, 2025], &[2025])] // Empty end + #[case("2020;2021..2022", &[2020, 2021, 2022], &[2020, 2021, 2022])] // Can have multiple ranges fn parse_year_str_valid( #[case] input: &str, #[case] milestone_years: &[u32], @@ -139,14 +120,14 @@ mod tests { #[rstest] #[case("", &[2020], "No years provided")] #[case("2021", &[2020], "Invalid year: 2021")] - #[case("a;2020", &[2020], "Invalid year: a")] - #[case("2021;2020", &[2020, 2021],"Years must be in order and unique")] // out of order - #[case("2021;2020;2021", &[2020, 2021],"Years must be in order and unique")] // duplicate - #[case("2021;2020..2021", &[2020, 2021],"Both ';' and '..' found in year string 2021;2020..2021. Discrete years and ranges cannot be mixed.")] - #[case("2021..2020", &[2020, 2021],"End year must be bigger than start year in range 2021..2020")] // out of order - #[case("2021..2024", &[2020,2025], "No valid years found in year range string 2021..2024")] - #[case("..2020..2025", &[2020,2025], "Year range must be of the form 'start..end', 'start..' or '..end'. Invalid: ..2020..2025")] - #[case("2020...2025", &[2020,2025], "Invalid end year in range: .2025")] + #[case("a;2020", &[2020], "Invalid year range: a")] + #[case("2021;2020", &[2020, 2021], "Year ranges must be sorted and non-overlapping")] // out of order + #[case("2021;2020;2021", &[2020, 2021], "Year ranges must be sorted and non-overlapping")] // duplicate + #[case("2021..2020", &[2020, 2021], "Invalid year range: 2021..2020")] // out of order + #[case("2021..2024", &[2020, 2025], "No valid years in year range: 2021..2024")] + #[case("..2020..2025", &[2020, 2025], "Invalid year range: ..2020..2025")] + #[case("2020...2025", &[2020, 2025], "Invalid year range: 2020...2025")] + #[case("..", &[2020, 2025], "Invalid year range: ..")] fn parse_year_str_invalid( #[case] input: &str, #[case] milestone_years: &[u32],