diff --git a/fluent-langneg/src/locale.js b/fluent-langneg/src/locale.js index 4a5185462..346c37b97 100644 --- a/fluent-langneg/src/locale.js +++ b/fluent-langneg/src/locale.js @@ -34,53 +34,60 @@ export default class Locale { * It also allows skipping the script section of the id, so `en-US` is * properly parsed as `en-*-US-*`. */ - constructor(locale, range = false) { + constructor(locale) { const result = localeRe.exec(locale.replace(/_/g, "-")); if (!result) { + this.isWellFormed = false; return; } - const missing = range ? "*" : undefined; + let [, language, script, region, variant] = result; - const language = result[1] || missing; - const script = result[2] || missing; - const region = result[3] || missing; - const variant = result[4] || missing; - - this.language = language; - this.script = script; - this.region = region; + if (language) { + this.language = language.toLowerCase(); + } + if (script) { + this.script = script[0].toUpperCase() + script.slice(1); + } + if (region) { + this.region = region.toUpperCase(); + } this.variant = variant; - this.string = locale; + this.isWellFormed = true; } isEqual(locale) { return localeParts.every(part => this[part] === locale[part]); } - matches(locale) { + matches(locale, thisRange = false, otherRange = false) { return localeParts.every(part => { - return this[part] === "*" || locale[part] === "*" || - (this[part] === undefined && locale[part] === undefined) || - (this[part] !== undefined && locale[part] !== undefined && - this[part].toLowerCase() === locale[part].toLowerCase()); + return ((thisRange && this[part] === undefined) || + (otherRange && locale[part] === undefined) || + this[part] === locale[part]); }); } - setVariantRange() { - this.variant = "*"; + toString() { + return localeParts + .map(part => this[part]) + .filter(part => part !== undefined) + .join("-"); + } + + clearVariants() { + this.variant = undefined; } - setRegionRange() { - this.region = "*"; + clearRegion() { + this.region = undefined; } addLikelySubtags() { - const newLocale = getLikelySubtagsMin(this.string.toLowerCase()); + const newLocale = getLikelySubtagsMin(this.toString().toLowerCase()); if (newLocale) { localeParts.forEach(part => this[part] = newLocale[part]); - this.string = newLocale.string; return true; } return false; diff --git a/fluent-langneg/src/matches.js b/fluent-langneg/src/matches.js index 071694168..3f1c07ff9 100644 --- a/fluent-langneg/src/matches.js +++ b/fluent-langneg/src/matches.js @@ -76,10 +76,17 @@ import Locale from "./locale"; export default function filterMatches( requestedLocales, availableLocales, strategy ) { + /* eslint complexity: ["error", 31]*/ const supportedLocales = new Set(); - const availLocales = - new Set(availableLocales.map(locale => new Locale(locale, true))); + const availableLocalesMap = new Map(); + + for (let locale of availableLocales) { + let newLocale = new Locale(locale); + if (newLocale.isWellFormed) { + availableLocalesMap.set(locale, new Locale(locale)); + } + } outer: for (const reqLocStr of requestedLocales) { @@ -90,17 +97,12 @@ export default function filterMatches( continue; } - // Attempt to make an exact match + // 1) Attempt to make an exact match // Example: `en-US` === `en-US` - for (const availableLocale of availableLocales) { - if (reqLocStrLC === availableLocale.toLowerCase()) { - supportedLocales.add(availableLocale); - for (const loc of availLocales) { - if (loc.isEqual(requestedLocale)) { - availLocales.delete(loc); - break; - } - } + for (const key of availableLocalesMap.keys()) { + if (reqLocStrLC === key.toLowerCase()) { + supportedLocales.add(key); + availableLocalesMap.delete(key); if (strategy === "lookup") { return Array.from(supportedLocales); } else if (strategy === "filtering") { @@ -112,13 +114,13 @@ export default function filterMatches( } - // Attempt to match against the available range + // 2) Attempt to match against the available range // This turns `en` into `en-*-*-*` and `en-US` into `en-*-US-*` // Example: ['en-US'] * ['en'] = ['en'] - for (const availableLocale of availLocales) { - if (requestedLocale.matches(availableLocale)) { - supportedLocales.add(availableLocale.string); - availLocales.delete(availableLocale); + for (const [key, availableLocale] of availableLocalesMap.entries()) { + if (availableLocale.matches(requestedLocale, true, false)) { + supportedLocales.add(key); + availableLocalesMap.delete(key); if (strategy === "lookup") { return Array.from(supportedLocales); } else if (strategy === "filtering") { @@ -129,15 +131,15 @@ export default function filterMatches( } } - // Attempt to retrieve a maximal version of the requested locale ID + // 3) Attempt to retrieve a maximal version of the requested locale ID // If data is available, it'll expand `en` into `en-Latn-US` and // `zh` into `zh-Hans-CN`. // Example: ['en'] * ['en-GB', 'en-US'] = ['en-US'] if (requestedLocale.addLikelySubtags()) { - for (const availableLocale of availLocales) { - if (requestedLocale.matches(availableLocale)) { - supportedLocales.add(availableLocale.string); - availLocales.delete(availableLocale); + for (const [key, availableLocale] of availableLocalesMap.entries()) { + if (availableLocale.matches(requestedLocale, true, false)) { + supportedLocales.add(key); + availableLocalesMap.delete(key); if (strategy === "lookup") { return Array.from(supportedLocales); } else if (strategy === "filtering") { @@ -149,14 +151,14 @@ export default function filterMatches( } } - // Attempt to look up for a different variant for the same locale ID + // 4) Attempt to look up for a different variant for the same locale ID // Example: ['en-US-mac'] * ['en-US-win'] = ['en-US-win'] - requestedLocale.setVariantRange(); + requestedLocale.clearVariants(); - for (const availableLocale of availLocales) { - if (requestedLocale.matches(availableLocale)) { - supportedLocales.add(availableLocale.string); - availLocales.delete(availableLocale); + for (const [key, availableLocale] of availableLocalesMap.entries()) { + if (availableLocale.matches(requestedLocale, true, true)) { + supportedLocales.add(key); + availableLocalesMap.delete(key); if (strategy === "lookup") { return Array.from(supportedLocales); } else if (strategy === "filtering") { @@ -167,14 +169,38 @@ export default function filterMatches( } } - // Attempt to look up for a different region for the same locale ID + // 5) Attempt to match against the likely subtag without region + // In the example below, addLikelySubtags will turn + // `zh-Hant` into `zh-Hant-TW` giving `zh-TW` priority match + // over `zh-CN`. + // + // Example: ['zh-Hant-HK'] * ['zh-TW', 'zh-CN'] = ['zh-TW'] + requestedLocale.clearRegion(); + + if (requestedLocale.addLikelySubtags()) { + for (const [key, availableLocale] of availableLocalesMap.entries()) { + if (availableLocale.matches(requestedLocale, true, false)) { + supportedLocales.add(key); + availableLocalesMap.delete(key); + if (strategy === "lookup") { + return Array.from(supportedLocales); + } else if (strategy === "filtering") { + continue; + } else { + continue outer; + } + } + } + } + + // 6) Attempt to look up for a different region for the same locale ID // Example: ['en-US'] * ['en-AU'] = ['en-AU'] - requestedLocale.setRegionRange(); + requestedLocale.clearRegion(); - for (const availableLocale of availLocales) { - if (requestedLocale.matches(availableLocale)) { - supportedLocales.add(availableLocale.string); - availLocales.delete(availableLocale); + for (const [key, availableLocale] of availableLocalesMap.entries()) { + if (availableLocale.matches(requestedLocale, true, true)) { + supportedLocales.add(key); + availableLocalesMap.delete(key); if (strategy === "lookup") { return Array.from(supportedLocales); } else if (strategy === "filtering") { diff --git a/fluent-langneg/src/subtags.js b/fluent-langneg/src/subtags.js index 21dd09c12..7b726aeaf 100644 --- a/fluent-langneg/src/subtags.js +++ b/fluent-langneg/src/subtags.js @@ -27,6 +27,8 @@ const likelySubtagsMin = { "ta": "ta-taml-in", "uk": "uk-cyrl-ua", "zh": "zh-hans-cn", + "zh-hant": "zh-hant-tw", + "zh-hk": "zh-hant-hk", "zh-gb": "zh-hant-gb", "zh-us": "zh-hant-us", }; @@ -55,8 +57,7 @@ export function getLikelySubtagsMin(loc) { } const locale = new Locale(loc); if (regionMatchingLangs.includes(locale.language)) { - locale.region = locale.language; - locale.string = `${locale.language}-${locale.region}`; + locale.region = locale.language.toUpperCase(); return locale; } return null; diff --git a/fluent-langneg/test/langneg_test.js b/fluent-langneg/test/langneg_test.js index 9004ad600..bed13c317 100644 --- a/fluent-langneg/test/langneg_test.js +++ b/fluent-langneg/test/langneg_test.js @@ -30,19 +30,10 @@ const data = { [["sr", "ru"], ["sr-Latn", "ru"], ["ru"]], [["sr-RU"], ["sr-Latn-RO", "sr-Cyrl"], ["sr-Latn-RO"]], ], - "should match on a requested locale as a range": [ - [["en-*-US"], ["en-US"], ["en-US"]], - [["en-Latn-US-*"], ["en-Latn-US"], ["en-Latn-US"]], - [["en-*-US-*"], ["en-US"], ["en-US"]], - [["*"], ["de", "pl", "it", "fr", "ru"], ["de", "pl", "it", "fr", "ru"]] - ], "should match cross-region": [ [["en"], ["en-US"], ["en-US"]], [["en-US"], ["en-GB"], ["en-GB"]], [["en-Latn-US"], ["en-Latn-GB"], ["en-Latn-GB"]], - // This is a cross-region check, because the requested Locale - // is really lang: en, script: *, region: undefined - [["en-*"], ["en-US"], ["en-US"]], ], "should match cross-variant": [ [["en-US-mac"], ["en-US-win"], ["en-US-win"]], @@ -61,6 +52,7 @@ const data = { ], "should prioritize properly (extra tests)": [ [["en-US"], ["en-GB", "en"], ["en", "en-GB"]], + [["zh-HK"], ["zh-CN", "zh-TW"], ["zh-TW", "zh-CN"]], ], "should handle default locale properly": [ [["fr"], ["de", "it"], []], @@ -101,11 +93,6 @@ const data = { ["en-US", "fr-FR", "en", "fr"], undefined, "matching", ["fr", "en"] ], - [ - ["*"], - ["fr", "de", "it", "ru", "pl"], undefined, - "matching", ["fr"] - ], ], }, "lookup": {