Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 29 additions & 22 deletions fluent-langneg/src/locale.js
Original file line number Diff line number Diff line change
Expand Up @@ -34,53 +34,60 @@ export default class Locale {
* It also allows skipping the script section of the id, so `en-US` is
* properly parsed as `en-*-US-*`.
*/
constructor(locale, range = false) {
constructor(locale) {
const result = localeRe.exec(locale.replace(/_/g, "-"));
if (!result) {
this.isWellFormed = false;
return;
}

const missing = range ? "*" : undefined;
let [, language, script, region, variant] = result;

const language = result[1] || missing;
const script = result[2] || missing;
const region = result[3] || missing;
const variant = result[4] || missing;

this.language = language;
this.script = script;
this.region = region;
if (language) {
this.language = language.toLowerCase();
}
if (script) {
this.script = script[0].toUpperCase() + script.slice(1);
}
if (region) {
this.region = region.toUpperCase();
}
this.variant = variant;
this.string = locale;
this.isWellFormed = true;
}

isEqual(locale) {
return localeParts.every(part => this[part] === locale[part]);
}

matches(locale) {
matches(locale, thisRange = false, otherRange = false) {
return localeParts.every(part => {
return this[part] === "*" || locale[part] === "*" ||
(this[part] === undefined && locale[part] === undefined) ||
(this[part] !== undefined && locale[part] !== undefined &&
this[part].toLowerCase() === locale[part].toLowerCase());
return ((thisRange && this[part] === undefined) ||
(otherRange && locale[part] === undefined) ||
this[part] === locale[part]);

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Understanding this method is important for understanding the whole algorithm and I'm not fond of adding two boolean arguments to it. It's hard to follow what they mean when the method is called. In fact, I think I largely preferred the old code which used an explicit * to represent ranges.

A few suggestions on how to improve this code. I'm not saying all of them need to be applied at the same time, but I'd like to see this method improved somehow.

  • Remove the thisRange param as it is always true across all callsites, or start using it for exact matching.
  • Use an options dictionary: locale.matches(other, {thisRange: true, otherRange: true}).
  • Separate this code into multiple methods: exactMatches, matchesThisRange, matchesRanges.
  • Add a flag to Locale called undefinedAsRange and a method called toRange() which returns a new Locale instance based on the one the method was called on. match would inspect the value of the flag for both this and other and act accordingly.

Now, I understand that this code is based on the existing C++/Rust code. With that in mind, I think this can be moved to a follow-up. Please file it if you agree.

});
}

setVariantRange() {
this.variant = "*";
toString() {
return localeParts
.map(part => this[part])
.filter(part => part !== undefined)
.join("-");
}

clearVariants() {
this.variant = undefined;
}

setRegionRange() {
this.region = "*";
clearRegion() {
this.region = undefined;
}

addLikelySubtags() {
const newLocale = getLikelySubtagsMin(this.string.toLowerCase());
const newLocale = getLikelySubtagsMin(this.toString().toLowerCase());

if (newLocale) {
localeParts.forEach(part => this[part] = newLocale[part]);
this.string = newLocale.string;
return true;
}
return false;
Expand Down
94 changes: 60 additions & 34 deletions fluent-langneg/src/matches.js
Original file line number Diff line number Diff line change
Expand Up @@ -76,10 +76,17 @@ import Locale from "./locale";
export default function filterMatches(
requestedLocales, availableLocales, strategy
) {
/* eslint complexity: ["error", 31]*/
const supportedLocales = new Set();

const availLocales =
new Set(availableLocales.map(locale => new Locale(locale, true)));
const availableLocalesMap = new Map();

for (let locale of availableLocales) {
let newLocale = new Locale(locale);
if (newLocale.isWellFormed) {
availableLocalesMap.set(locale, new Locale(locale));
}
}

outer:
for (const reqLocStr of requestedLocales) {
Expand All @@ -90,17 +97,12 @@ export default function filterMatches(
continue;
}

// Attempt to make an exact match
// 1) Attempt to make an exact match
// Example: `en-US` === `en-US`
for (const availableLocale of availableLocales) {
if (reqLocStrLC === availableLocale.toLowerCase()) {
supportedLocales.add(availableLocale);
for (const loc of availLocales) {
if (loc.isEqual(requestedLocale)) {
availLocales.delete(loc);
break;
}
}
for (const key of availableLocalesMap.keys()) {
if (reqLocStrLC === key.toLowerCase()) {
supportedLocales.add(key);
availableLocalesMap.delete(key);
if (strategy === "lookup") {
return Array.from(supportedLocales);
} else if (strategy === "filtering") {
Expand All @@ -112,13 +114,13 @@ export default function filterMatches(
}


// Attempt to match against the available range
// 2) Attempt to match against the available range
// This turns `en` into `en-*-*-*` and `en-US` into `en-*-US-*`
// Example: ['en-US'] * ['en'] = ['en']
for (const availableLocale of availLocales) {
if (requestedLocale.matches(availableLocale)) {
supportedLocales.add(availableLocale.string);
availLocales.delete(availableLocale);
for (const [key, availableLocale] of availableLocalesMap.entries()) {
if (availableLocale.matches(requestedLocale, true, false)) {
supportedLocales.add(key);
availableLocalesMap.delete(key);
if (strategy === "lookup") {
return Array.from(supportedLocales);
} else if (strategy === "filtering") {
Expand All @@ -129,15 +131,15 @@ export default function filterMatches(
}
}

// Attempt to retrieve a maximal version of the requested locale ID
// 3) Attempt to retrieve a maximal version of the requested locale ID
// If data is available, it'll expand `en` into `en-Latn-US` and
// `zh` into `zh-Hans-CN`.
// Example: ['en'] * ['en-GB', 'en-US'] = ['en-US']
if (requestedLocale.addLikelySubtags()) {
for (const availableLocale of availLocales) {
if (requestedLocale.matches(availableLocale)) {
supportedLocales.add(availableLocale.string);
availLocales.delete(availableLocale);
for (const [key, availableLocale] of availableLocalesMap.entries()) {
if (availableLocale.matches(requestedLocale, true, false)) {
supportedLocales.add(key);
availableLocalesMap.delete(key);
if (strategy === "lookup") {
return Array.from(supportedLocales);
} else if (strategy === "filtering") {
Expand All @@ -149,14 +151,14 @@ export default function filterMatches(
}
}

// Attempt to look up for a different variant for the same locale ID
// 4) Attempt to look up for a different variant for the same locale ID
// Example: ['en-US-mac'] * ['en-US-win'] = ['en-US-win']
requestedLocale.setVariantRange();
requestedLocale.clearVariants();

for (const availableLocale of availLocales) {
if (requestedLocale.matches(availableLocale)) {
supportedLocales.add(availableLocale.string);
availLocales.delete(availableLocale);
for (const [key, availableLocale] of availableLocalesMap.entries()) {
if (availableLocale.matches(requestedLocale, true, true)) {
supportedLocales.add(key);
availableLocalesMap.delete(key);
if (strategy === "lookup") {
return Array.from(supportedLocales);
} else if (strategy === "filtering") {
Expand All @@ -167,14 +169,38 @@ export default function filterMatches(
}
}

// Attempt to look up for a different region for the same locale ID
// 5) Attempt to match against the likely subtag without region
// In the example below, addLikelySubtags will turn
// `zh-Hant` into `zh-Hant-TW` giving `zh-TW` priority match
// over `zh-CN`.
//
// Example: ['zh-Hant-HK'] * ['zh-TW', 'zh-CN'] = ['zh-TW']
requestedLocale.clearRegion();

if (requestedLocale.addLikelySubtags()) {
for (const [key, availableLocale] of availableLocalesMap.entries()) {
if (availableLocale.matches(requestedLocale, true, false)) {
supportedLocales.add(key);
availableLocalesMap.delete(key);
if (strategy === "lookup") {
return Array.from(supportedLocales);
} else if (strategy === "filtering") {
continue;
} else {
continue outer;
}
}
}
}

// 6) Attempt to look up for a different region for the same locale ID
// Example: ['en-US'] * ['en-AU'] = ['en-AU']
requestedLocale.setRegionRange();
requestedLocale.clearRegion();

for (const availableLocale of availLocales) {
if (requestedLocale.matches(availableLocale)) {
supportedLocales.add(availableLocale.string);
availLocales.delete(availableLocale);
for (const [key, availableLocale] of availableLocalesMap.entries()) {
if (availableLocale.matches(requestedLocale, true, true)) {
supportedLocales.add(key);
availableLocalesMap.delete(key);
if (strategy === "lookup") {
return Array.from(supportedLocales);
} else if (strategy === "filtering") {
Expand Down
5 changes: 3 additions & 2 deletions fluent-langneg/src/subtags.js
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ const likelySubtagsMin = {
"ta": "ta-taml-in",
"uk": "uk-cyrl-ua",
"zh": "zh-hans-cn",
"zh-hant": "zh-hant-tw",
"zh-hk": "zh-hant-hk",
"zh-gb": "zh-hant-gb",
"zh-us": "zh-hant-us",
};
Expand Down Expand Up @@ -55,8 +57,7 @@ export function getLikelySubtagsMin(loc) {
}
const locale = new Locale(loc);
if (regionMatchingLangs.includes(locale.language)) {
locale.region = locale.language;
locale.string = `${locale.language}-${locale.region}`;
locale.region = locale.language.toUpperCase();
return locale;
}
return null;
Expand Down
15 changes: 1 addition & 14 deletions fluent-langneg/test/langneg_test.js
Original file line number Diff line number Diff line change
Expand Up @@ -30,19 +30,10 @@ const data = {
[["sr", "ru"], ["sr-Latn", "ru"], ["ru"]],
[["sr-RU"], ["sr-Latn-RO", "sr-Cyrl"], ["sr-Latn-RO"]],
],
"should match on a requested locale as a range": [
[["en-*-US"], ["en-US"], ["en-US"]],
[["en-Latn-US-*"], ["en-Latn-US"], ["en-Latn-US"]],
[["en-*-US-*"], ["en-US"], ["en-US"]],
[["*"], ["de", "pl", "it", "fr", "ru"], ["de", "pl", "it", "fr", "ru"]]
],
"should match cross-region": [
[["en"], ["en-US"], ["en-US"]],
[["en-US"], ["en-GB"], ["en-GB"]],
[["en-Latn-US"], ["en-Latn-GB"], ["en-Latn-GB"]],
// This is a cross-region check, because the requested Locale
// is really lang: en, script: *, region: undefined
[["en-*"], ["en-US"], ["en-US"]],
],
"should match cross-variant": [
[["en-US-mac"], ["en-US-win"], ["en-US-win"]],
Expand All @@ -61,6 +52,7 @@ const data = {
],
"should prioritize properly (extra tests)": [
[["en-US"], ["en-GB", "en"], ["en", "en-GB"]],
[["zh-HK"], ["zh-CN", "zh-TW"], ["zh-TW", "zh-CN"]],
],
"should handle default locale properly": [
[["fr"], ["de", "it"], []],
Expand Down Expand Up @@ -101,11 +93,6 @@ const data = {
["en-US", "fr-FR", "en", "fr"], undefined,
"matching", ["fr", "en"]
],
[
["*"],
["fr", "de", "it", "ru", "pl"], undefined,
"matching", ["fr"]
],
],
},
"lookup": {
Expand Down