diff --git a/src/uucore/src/lib/features/i18n/datetime.rs b/src/uucore/src/lib/features/i18n/datetime.rs index 1e94f299abd..74c93f55b4f 100644 --- a/src/uucore/src/lib/features/i18n/datetime.rs +++ b/src/uucore/src/lib/features/i18n/datetime.rs @@ -9,13 +9,33 @@ use icu_calendar::Date; use icu_calendar::cal::{Buddhist, Ethiopian, Iso, Persian}; -use icu_datetime::DateTimeFormatter; -use icu_datetime::fieldsets; use icu_locale::Locale; use jiff::civil::Date as JiffDate; use jiff_icu::ConvertFrom; use std::sync::OnceLock; +#[cfg(any( + not(unix), + target_os = "android", + target_os = "cygwin", + target_os = "redox" +))] +use icu_datetime::DateTimeFormatter; +#[cfg(any( + not(unix), + target_os = "android", + target_os = "cygwin", + target_os = "redox" +))] +use icu_datetime::fieldsets; +#[cfg(all( + unix, + not(target_os = "android"), + not(target_os = "cygwin"), + not(target_os = "redox") +))] +use nix::libc; + use crate::i18n::get_locale_from_env; /// Get the locale for time/date formatting from LC_TIME environment variable @@ -67,6 +87,177 @@ pub enum CalendarType { Ethiopian, } +/// Locale-specific month name for the current `LC_TIME` locale. +#[cfg(all( + unix, + not(target_os = "android"), + not(target_os = "cygwin"), + not(target_os = "redox") +))] +fn locale_month_name(date: &Date, long: bool) -> Option { + use std::ffi::CStr; + + let month_items: [libc::nl_item; 12] = if long { + [ + libc::MON_1, + libc::MON_2, + libc::MON_3, + libc::MON_4, + libc::MON_5, + libc::MON_6, + libc::MON_7, + libc::MON_8, + libc::MON_9, + libc::MON_10, + libc::MON_11, + libc::MON_12, + ] + } else { + [ + libc::ABMON_1, + libc::ABMON_2, + libc::ABMON_3, + libc::ABMON_4, + libc::ABMON_5, + libc::ABMON_6, + libc::ABMON_7, + libc::ABMON_8, + libc::ABMON_9, + libc::ABMON_10, + libc::ABMON_11, + libc::ABMON_12, + ] + }; + + unsafe { + libc::setlocale(libc::LC_TIME, c"".as_ptr()); + } + + let ordinal = usize::from(date.month().ordinal).checked_sub(1)?; + let ptr = unsafe { libc::nl_langinfo(month_items[ordinal]) }; + if ptr.is_null() { + return None; + } + + let name = unsafe { CStr::from_ptr(ptr) }.to_string_lossy(); + if name.is_empty() { + None + } else { + Some(name.into_owned()) + } +} + +/// Locale-specific weekday name for the current `LC_TIME` locale. +#[cfg(all( + unix, + not(target_os = "android"), + not(target_os = "cygwin"), + not(target_os = "redox") +))] +fn locale_weekday_name(date: &Date, long: bool) -> Option { + use std::ffi::CStr; + + let weekday_items: [libc::nl_item; 7] = if long { + [ + libc::DAY_1, + libc::DAY_2, + libc::DAY_3, + libc::DAY_4, + libc::DAY_5, + libc::DAY_6, + libc::DAY_7, + ] + } else { + [ + libc::ABDAY_1, + libc::ABDAY_2, + libc::ABDAY_3, + libc::ABDAY_4, + libc::ABDAY_5, + libc::ABDAY_6, + libc::ABDAY_7, + ] + }; + + unsafe { + libc::setlocale(libc::LC_TIME, c"".as_ptr()); + } + + let weekday = usize::from((date.weekday() as u8) % 7); + let ptr = unsafe { libc::nl_langinfo(weekday_items[weekday]) }; + if ptr.is_null() { + return None; + } + + let name = unsafe { CStr::from_ptr(ptr) }.to_string_lossy(); + if name.is_empty() { + None + } else { + Some(name.into_owned()) + } +} + +/// Locale-specific month name for the current `LC_TIME` locale. +#[cfg(any( + not(unix), + target_os = "android", + target_os = "cygwin", + target_os = "redox" +))] +fn locale_month_name(date: &Date, long: bool) -> Option { + let (locale, _) = get_time_locale(); + let locale = if locale.to_string().starts_with("th") { + icu_locale::locale!("en-US") + } else { + locale.clone() + }; + let locale_prefs = locale.into(); + let formatter = DateTimeFormatter::try_new( + locale_prefs, + if long { + fieldsets::M::long() + } else { + fieldsets::M::medium() + }, + ) + .ok()?; + + let name = formatter.format(date).to_string(); + Some(if long { + name + } else { + name.trim_end_matches('.').to_string() + }) +} + +/// Locale-specific weekday name for the current `LC_TIME` locale. +#[cfg(any( + not(unix), + target_os = "android", + target_os = "cygwin", + target_os = "redox" +))] +fn locale_weekday_name(date: &Date, long: bool) -> Option { + let (locale, _) = get_time_locale(); + let locale = if locale.to_string().starts_with("th") { + icu_locale::locale!("en-US") + } else { + locale.clone() + }; + let locale_prefs = locale.into(); + let formatter = DateTimeFormatter::try_new( + locale_prefs, + if long { + fieldsets::E::long() + } else { + fieldsets::E::short() + }, + ) + .ok()?; + + Some(formatter.format(date).to_string()) +} + /// Transform a strftime format string to use locale-specific calendar values pub fn localize_format_string(format: &str, date: JiffDate) -> String { const PERCENT_PLACEHOLDER: &str = "\x00\x00"; @@ -75,74 +266,61 @@ pub fn localize_format_string(format: &str, date: JiffDate) -> String { let iso_date = Date::::convert_from(date); let mut fmt = format.replace("%%", PERCENT_PLACEHOLDER); - - // For non-Gregorian calendars, replace date components with converted values + // Leave `%EY` untouched so GNU-compatible alternate year formatting can be + // handled by the underlying strftime implementation. let calendar_type = get_locale_calendar_type(locale); - if calendar_type != CalendarType::Gregorian { - let (cal_year, cal_month, cal_day) = match calendar_type { - CalendarType::Buddhist => { - let d = iso_date.to_calendar(Buddhist); - ( - d.year().extended_year(), - d.month().ordinal, - d.day_of_month().0, - ) - } - CalendarType::Persian => { - let d = iso_date.to_calendar(Persian); - ( - d.year().extended_year(), - d.month().ordinal, - d.day_of_month().0, - ) - } - CalendarType::Ethiopian => { - let d = iso_date.to_calendar(Ethiopian::new()); - ( - d.year().extended_year(), - d.month().ordinal, - d.day_of_month().0, - ) - } - CalendarType::Gregorian => unreachable!(), - }; - fmt = fmt - .replace("%Y", &cal_year.to_string()) - .replace("%m", &format!("{cal_month:02}")) - .replace("%d", &format!("{cal_day:02}")) - .replace("%e", &format!("{cal_day:2}")); + match calendar_type { + CalendarType::Buddhist => { + let d = iso_date.to_calendar(Buddhist); + let buddhist_year = d.year().era_year_or_related_iso(); + fmt = fmt + .replace("%EY", &format!("พ.ศ. {buddhist_year}")) + .replace("%EC", "พ.ศ.") + .replace("%Ey", &buddhist_year.to_string()); + } + CalendarType::Persian => { + let d = iso_date.to_calendar(Persian); + let cal_year = d.year().extended_year(); + let cal_month = d.month().ordinal; + let cal_day = d.day_of_month().0; + fmt = fmt + .replace("%Y", &cal_year.to_string()) + .replace("%m", &format!("{cal_month:02}")) + .replace("%d", &format!("{cal_day:02}")) + .replace("%e", &format!("{cal_day:2}")); + } + CalendarType::Ethiopian => { + let d = iso_date.to_calendar(Ethiopian::new()); + let cal_year = d.year().extended_year(); + let cal_month = d.month().ordinal; + let cal_day = d.day_of_month().0; + fmt = fmt + .replace("%Y", &cal_year.to_string()) + .replace("%m", &format!("{cal_month:02}")) + .replace("%d", &format!("{cal_day:02}")) + .replace("%e", &format!("{cal_day:2}")); + } + CalendarType::Gregorian => {} } - // Format localized names using ICU DateTimeFormatter - let locale_prefs = locale.clone().into(); - if fmt.contains("%B") { - if let Ok(f) = DateTimeFormatter::try_new(locale_prefs, fieldsets::M::long()) { - fmt = fmt.replace("%B", &f.format(&iso_date).to_string()); + if let Some(month_name) = locale_month_name(&iso_date, true) { + fmt = fmt.replace("%B", &month_name); } } if fmt.contains("%b") || fmt.contains("%h") { - if let Ok(f) = DateTimeFormatter::try_new(locale_prefs, fieldsets::M::medium()) { - // ICU's medium format may include trailing periods (e.g., "febr." for Hungarian), - // which when combined with locale format strings that also add periods after - // %b (e.g., "%Y. %b. %d") results in double periods ("febr.."). - // The standard C/POSIX locale via nl_langinfo returns abbreviations - // WITHOUT trailing periods, so we strip them here for consistency. - let month_abbrev = f.format(&iso_date).to_string(); - let month_abbrev = month_abbrev.trim_end_matches('.').to_string(); - fmt = fmt - .replace("%b", &month_abbrev) - .replace("%h", &month_abbrev); + if let Some(month_name) = locale_month_name(&iso_date, false) { + fmt = fmt.replace("%b", &month_name).replace("%h", &month_name); } } if fmt.contains("%A") { - if let Ok(f) = DateTimeFormatter::try_new(locale_prefs, fieldsets::E::long()) { - fmt = fmt.replace("%A", &f.format(&iso_date).to_string()); + if let Some(weekday_name) = locale_weekday_name(&iso_date, true) { + fmt = fmt.replace("%A", &weekday_name); } } if fmt.contains("%a") { - if let Ok(f) = DateTimeFormatter::try_new(locale_prefs, fieldsets::E::short()) { - fmt = fmt.replace("%a", &f.format(&iso_date).to_string()); + if let Some(weekday_name) = locale_weekday_name(&iso_date, false) { + fmt = fmt.replace("%a", &weekday_name); } } @@ -176,7 +354,6 @@ pub fn get_locale_months() -> Option<&'static [Vec; 12]> { not(target_os = "redox") ))] fn get_locale_months_inner() -> Option<[Vec; 12]> { - use nix::libc; use std::ffi::CStr; let abmon_items: [libc::nl_item; 12] = [ diff --git a/tests/by-util/test_date.rs b/tests/by-util/test_date.rs index d99a9b4baa3..d55d498bfed 100644 --- a/tests/by-util/test_date.rs +++ b/tests/by-util/test_date.rs @@ -2210,7 +2210,7 @@ fn test_date_thai_locale_solar_calendar() { .parse() .unwrap(); - // Since 1941, the year in the Thai solar calendar is the Gregorian year plus 543 + // GNU date keeps %Y Gregorian in the Thai locale. let thai_year: i32 = new_ucmd!() .env("LC_ALL", "th_TH.UTF-8") .arg("+%Y") @@ -2220,25 +2220,46 @@ fn test_date_thai_locale_solar_calendar() { .parse() .unwrap(); - assert_eq!(thai_year, current_year + 543); + assert_eq!(thai_year, current_year); - // All months that have 31 days have names that end with "คม" (Thai characters) - let days_31_suffix = "\u{0E04}\u{0E21}"; // "คม" in Unicode + // GNU date uses the locale's alternate year for %EY in the Thai locale. + let thai_ey_year = new_ucmd!() + .env("LC_ALL", "th_TH.UTF-8") + .arg("+%EY") + .succeeds() + .stdout_str() + .trim() + .to_string(); - for month in ["01", "03", "05", "07", "08", "10", "12"] { - let month_result = new_ucmd!() - .env("LC_ALL", "th_TH.UTF-8") - .arg("--date") - .arg(format!("{current_year}-{month}-01")) - .arg("+%B") - .succeeds(); - let month_name = month_result.stdout_str(); + assert_eq!(thai_ey_year, "พ.ศ. 2569"); - assert!( - month_name.trim().ends_with(days_31_suffix), - "Month {month} should end with 'คม', got: {month_name}" - ); - } + let thai_ec = new_ucmd!() + .env("LC_ALL", "th_TH.UTF-8") + .arg("+%EC") + .succeeds() + .stdout_str() + .trim() + .to_string(); + + assert_eq!(thai_ec, "พ.ศ."); + + let thai_ey = new_ucmd!() + .env("LC_ALL", "th_TH.UTF-8") + .arg("+%Ey") + .succeeds() + .stdout_str() + .trim() + .to_string(); + + assert_eq!(thai_ey, "2569"); + + // GNU date keeps the locale month/day names from LC_TIME here as well. + check_date( + "th_TH.UTF-8", + "2026-06-14", + "+%Y %EY %B %A", + "2026 พ.ศ. 2569 มิถุนายน อาทิตย์", + ); // Check that --iso-8601 and --rfc-3339 use the Gregorian calendar let iso_result = new_ucmd!() @@ -2291,18 +2312,19 @@ fn test_locale_calendar_conversions() { check_date("fa_IR.UTF-8", d, "+%Y-%m-%d", e); } - // Thai Buddhist (year + 543, same month/day) + // Thai locale keeps Gregorian %Y, but %EY uses the locale's alternate year. for (d, e) in [ - ("2026-01-01", "2569-01-01"), - ("2026-01-26", "2569-01-26"), - ("2026-06-15", "2569-06-15"), - ("2026-12-31", "2569-12-31"), - ("2025-01-01", "2568-01-01"), - ("2024-02-29", "2567-02-29"), - ("2000-01-01", "2543-01-01"), - ("1970-01-01", "2513-01-01"), + ("2026-01-01", "พ.ศ. 2569-01-01"), + ("2026-01-26", "พ.ศ. 2569-01-26"), + ("2026-06-15", "พ.ศ. 2569-06-15"), + ("2026-12-31", "พ.ศ. 2569-12-31"), + ("2025-01-01", "พ.ศ. 2568-01-01"), + ("2024-02-29", "พ.ศ. 2567-02-29"), + ("2000-01-01", "พ.ศ. 2543-01-01"), + ("1970-01-01", "พ.ศ. 2513-01-01"), ] { - check_date("th_TH.UTF-8", d, "+%Y-%m-%d", e); + check_date("th_TH.UTF-8", d, "+%Y-%m-%d", d); + check_date("th_TH.UTF-8", d, "+%EY-%m-%d", e); } // Ethiopian (13 months, New Year on Sept 11) @@ -2335,6 +2357,10 @@ fn test_locale_month_names() { ("ja_JP.UTF-8", "1月", "6月", "12月"), ("zh_CN.UTF-8", "一月", "六月", "十二月"), ] { + if !is_locale_available(loc) { + println!("Skipping locale month test for {loc} - locale not available"); + continue; + } check_date(loc, "2026-01-15", "+%B", jan); check_date(loc, "2026-06-15", "+%B", jun); check_date(loc, "2026-12-15", "+%B", dec); @@ -2358,6 +2384,10 @@ fn test_locale_abbreviated_month_names() { // Hungarian locale - the fix ensures no double periods ("hu_HU.UTF-8", "febr", "jún", "dec"), ] { + if !is_locale_available(loc) { + println!("Skipping abbreviated month test for {loc} - locale not available"); + continue; + } check_date(loc, "2026-02-12", "+%b", feb); check_date(loc, "2026-06-14", "+%b", jun); check_date(loc, "2026-12-09", "+%b", dec); @@ -2375,6 +2405,10 @@ fn test_locale_day_names() { ("ja_JP.UTF-8", "月曜日", "日曜日", "土曜日"), ("zh_CN.UTF-8", "星期一", "星期日", "星期六"), ] { + if !is_locale_available(loc) { + println!("Skipping day-name test for {loc} - locale not available"); + continue; + } check_date(loc, "2026-01-26", "+%A", mon); check_date(loc, "2026-01-25", "+%A", sun); check_date(loc, "2026-01-24", "+%A", sat);