From 2bdfc0882c8da795b314e7a8dfd8db4f63b68287 Mon Sep 17 00:00:00 2001 From: Matous Kozak Date: Fri, 1 Dec 2023 14:16:44 +0000 Subject: [PATCH 01/20] Re-implement the grapheme segmenter from Intl. --- THIRD-PARTY-NOTICES.TXT | 14 +++ .../hybrid-globalization/collations.ts | 68 +++++----- .../grapheme-segmenter.ts | 117 ++++++++++++++++++ .../segmentation-rules.ts | 87 +++++++++++++ 4 files changed, 248 insertions(+), 38 deletions(-) create mode 100644 src/mono/wasm/runtime/hybrid-globalization/grapheme-segmenter.ts create mode 100644 src/mono/wasm/runtime/hybrid-globalization/segmentation-rules.ts diff --git a/THIRD-PARTY-NOTICES.TXT b/THIRD-PARTY-NOTICES.TXT index 07a2b94ba5fc63..ff5aaacd21b76a 100644 --- a/THIRD-PARTY-NOTICES.TXT +++ b/THIRD-PARTY-NOTICES.TXT @@ -1331,3 +1331,17 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. Aspects of base64 encoding / decoding are based on algorithm described in "Base64 encoding and decoding at almost the speed of a memory copy", Wojciech Muła and Daniel Lemire. https://arxiv.org/pdf/1910.05109.pdf + +License for FormatJS Intl.Segmenter grapheme segmentation algorithm +-------------------------------------------------------------------------- +Available at https://github.com/formatjs/formatjs/blob/58d6a7b398d776ca3d2726d72ae1573b65cc3bef/packages/intl-segmenter/LICENSE.md + +MIT License + +Copyright (c) 2022 FormatJS + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/src/mono/wasm/runtime/hybrid-globalization/collations.ts b/src/mono/wasm/runtime/hybrid-globalization/collations.ts index 9ec14b2a31dde9..6843e96acc977c 100644 --- a/src/mono/wasm/runtime/hybrid-globalization/collations.ts +++ b/src/mono/wasm/runtime/hybrid-globalization/collations.ts @@ -6,6 +6,7 @@ import { monoStringToString, utf16ToString } from "../strings"; import { MonoObject, MonoObjectRef, MonoString, MonoStringRef } from "../types/internal"; import { Int32Ptr } from "../types/emscripten"; import { wrap_error_root, wrap_no_error_root } from "../invoke-js"; +import { GraphemeSegmenter } from "./grapheme-segmenter"; const COMPARISON_ERROR = -2; const INDEXING_ERROR = -1; @@ -114,54 +115,45 @@ export function mono_wasm_index_of(culture: MonoStringRef, needlePtr: number, ne const cultureName = monoStringToString(cultureRoot); const locale = cultureName ? cultureName : undefined; const casePicker = (options & 0x1f); - - const segmenter = new Intl.Segmenter(locale, { granularity: "grapheme" }); - const needleSegments = Array.from(segmenter.segment(needle)).map(s => s.segment); - let i = 0; - let stop = false; let result = -1; - let segmentWidth = 0; - let index = 0; - let nextIndex = 0; - while (!stop) { - // we need to restart the iterator in this outer loop because we have shifted it in the inner loop - const iteratorSrc = segmenter.segment(source.slice(i, source.length))[Symbol.iterator](); - let srcNext = iteratorSrc.next(); - if (srcNext.done) - break; + const graphemeBreaker = new GraphemeSegmenter(); + const needleSegments = []; + let needleIdx = 0; + + // Grapheme segmentation of needle string + while (needleIdx < needle.length) { + const breakIdx = graphemeBreaker.next_grapheme_break(needle, needleIdx); + needleSegments.push(needle.slice(needleIdx, breakIdx)); + needleIdx = breakIdx; + } + + let srcIdx = 0; + while (srcIdx < source.length) { + const breakIdx = graphemeBreaker.next_grapheme_break(source, srcIdx); + const srcGrapheme = source.slice(srcIdx, breakIdx); + srcIdx = breakIdx; - let matchFound = check_match_found(srcNext.value.segment, needleSegments[0], locale, casePicker); - index = nextIndex; - srcNext = iteratorSrc.next(); - if (srcNext.done) { - result = matchFound ? index : result; - break; + if (!check_match_found(srcGrapheme, needleSegments[0], locale, casePicker)) { + continue; } - segmentWidth = srcNext.value.index; - nextIndex = index + segmentWidth; - if (matchFound) { - for (let j = 1; j < needleSegments.length; j++) { - if (srcNext.done) { - stop = true; - break; - } - matchFound = check_match_found(srcNext.value.segment, needleSegments[j], locale, casePicker); - if (!matchFound) - break; - srcNext = iteratorSrc.next(); - } - if (stop) + let j; + let srcNextIdx = srcIdx; + for (j = 1; j < needleSegments.length; j++) { + const breakIdx = graphemeBreaker.next_grapheme_break(source, srcNextIdx); + const srcGrapheme = source.slice(srcNextIdx, breakIdx); + + if (!check_match_found(srcGrapheme, needleSegments[j], locale, casePicker)) { break; + } + srcNextIdx = breakIdx; } - - if (matchFound) { - result = index; + if (j == needleSegments.length) { + result = srcIdx - srcGrapheme.length; if (fromBeginning) break; } - i = nextIndex; } wrap_no_error_root(is_exception, exceptionRoot); return result; diff --git a/src/mono/wasm/runtime/hybrid-globalization/grapheme-segmenter.ts b/src/mono/wasm/runtime/hybrid-globalization/grapheme-segmenter.ts new file mode 100644 index 00000000000000..457cee00655b5a --- /dev/null +++ b/src/mono/wasm/runtime/hybrid-globalization/grapheme-segmenter.ts @@ -0,0 +1,117 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/** + * This file is partially using code from FormatJS Intl.Segmenter implementation, reference: + * https://github.com/formatjs/formatjs/blob/58d6a7b398d776ca3d2726d72ae1573b65cc3bef/packages/intl-segmenter/src/segmenter.ts + * https://github.com/formatjs/formatjs/blob/58d6a7b398d776ca3d2726d72ae1573b65cc3bef/packages/intl-segmenter/src/segmentation-utils.ts + */ + +import { SegmentationRules } from "./segmentation-rules"; + +type SegmentationRule = { + breaks: boolean + before?: RegExp + after?: RegExp +} + +type SegmentationRuleRaw = { + breaks: boolean + before?: string + after?: string +} + +type SegmentationTypeTypeRaw = { + variables: Record + rules: Record +} + +function replace_variables(variables: Record, input: string):string { + const findVarRegex = /\$[A-Za-z0-9_]+/gm; + return input.replaceAll(findVarRegex, match => { + if (!(match in variables)) { + throw new Error(`No such variable ${match}`); + } + return variables[match]; + }); +} + +function generate_rule_regex (rule: string, variables: Record, after: boolean): RegExp { + return new RegExp(`${after ? "^" : ""}${replace_variables(variables, rule)}${after ? "" : "$"}`); +} + +function prepare_segmanation_rules(segmentationTypeValue: SegmentationTypeTypeRaw): Record { + const preparedRules: Record = {}; + + for (const ruleNr of Object.keys(segmentationTypeValue.rules)) { + const ruleValue = segmentationTypeValue.rules[ruleNr]; + const preparedRule: SegmentationRule = {breaks: ruleValue.breaks,}; + + if ("before" in ruleValue && ruleValue.before) { + preparedRule.before = generate_rule_regex(ruleValue.before, segmentationTypeValue.variables, false); + } + if ("after" in ruleValue && ruleValue.after) { + preparedRule.after = generate_rule_regex(ruleValue.after, segmentationTypeValue.variables, true); + } + + preparedRules[ruleNr] = preparedRule; + } + return preparedRules; +} + +export class GraphemeSegmenter { + private readonly rules; + private readonly ruleSortedKeys; + + public constructor() { + // Process segmentation rules + this.rules = prepare_segmanation_rules(SegmentationRules); + this.ruleSortedKeys = Object.keys(this.rules).sort((a, b) => Number(a) - Number(b)); + } + + + public next_grapheme_break(str: string, startIndex: number): number { + if (startIndex < 0) + return 0; + + if (startIndex >= str.length - 1) + return str.length; + + let prev = String.fromCodePoint(str.codePointAt(startIndex)!); + for (let i = startIndex + 1; i < str.length; i++) { + // check if we are in the middle of surrogate pair + let high, low; + if ((0xD800 <= (high = str.charCodeAt(i - 1)) && high <= 0xDBFF) && + (0xDC00 <= (low = str.charCodeAt(i)) && low <= 0xDFFF)) { + continue; + } + + const next = String.fromCodePoint(str.codePointAt(i)!); + + if (this.is_grapheme_break(prev, next)) + return i; + + prev = next; + } + + return str.length; + } + + private is_grapheme_break(prev: string, next: string): boolean { + for (const key of this.ruleSortedKeys) { + const {before, after, breaks} = this.rules[key]; + // match before and after rules + if (before && !before.test(prev)) { + continue; + } + if (after && !after.test(next)) { + continue; + } + + return breaks; + } + + // GB999: Any ÷ Any + return true; + } +} diff --git a/src/mono/wasm/runtime/hybrid-globalization/segmentation-rules.ts b/src/mono/wasm/runtime/hybrid-globalization/segmentation-rules.ts new file mode 100644 index 00000000000000..76245c5f86c2d9 --- /dev/null +++ b/src/mono/wasm/runtime/hybrid-globalization/segmentation-rules.ts @@ -0,0 +1,87 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// Reference for SegmentationRules: https://github.com/formatjs/formatjs/blob/58d6a7b398d776ca3d2726d72ae1573b65cc3bef/packages/intl-segmenter/src/cldr-segmentation-rules.generated.ts#L953-L1037 +export const SegmentationRules = { + "rules": { + "11": { + "after": "$ExtPict", + "before": "$ExtPict$Extend*$ZWJ", + "breaks": false + }, + "12": { + "after": "$RI", + "before": "^($RI$RI)*$RI", + "breaks": false + }, + "13": { + "after": "$RI", + "before": "[^\\uDDE6-\\uDDFF]($RI$RI)*$RI", + "breaks": false + }, + "3": { + "after": "$LF", + "before": "$CR", + "breaks": false + }, + "4": { + "before": "($Control|$CR|$LF)", + "breaks": true + }, + "5": { + "after": "($Control|$CR|$LF)", + "breaks": true + }, + "6": { + "after": "($L|$V|$LV|$LVT)", + "before": "$L", + "breaks": false + }, + "7": { + "after": "($V|$T)", + "before": "($LV|$V)", + "breaks": false + }, + "8": { + "after": "$T", + "before": "($LVT|$T)", + "breaks": false + }, + "9": { + "after": "($Extend|$ZWJ)", + "breaks": false + }, + "9.1": { + "after": "$SpacingMark", + "breaks": false + }, + "9.2": { + "before": "$Prepend", + "breaks": false + }, + "9.3": { + "after": "$LinkingConsonant", + "before": "$LinkingConsonant$ExtCccZwj*$Virama$ExtCccZwj*", + "breaks": false + } + }, + "variables": { + "$CR": "\\r", + "$Control": "(?:[\\0-\\t\\x0B\\f\\x0E-\\x1F\\x7F-\\x9F\\xAD\\u061C\\u180E\\u200B\\u200E\\u200F\\u2028-\\u202E\\u2060-\\u206F\\uFEFF\\uFFF0-\\uFFFB]|\\uD80D[\\uDC30-\\uDC3F]|\\uD82F[\\uDCA0-\\uDCA3]|\\uD834[\\uDD73-\\uDD7A]|\\uDB40[\\uDC00-\\uDC1F\\uDC80-\\uDCFF\\uDDF0-\\uDFFF]|[\\uDB41-\\uDB43][\\uDC00-\\uDFFF])", + "$ExtCccZwj": "(?:[\\u0300-\\u034E\\u0350-\\u036F\\u0483-\\u0487\\u0591-\\u05BD\\u05BF\\u05C1\\u05C2\\u05C4\\u05C5\\u05C7\\u0610-\\u061A\\u064B-\\u065F\\u0670\\u06D6-\\u06DC\\u06DF-\\u06E4\\u06E7\\u06E8\\u06EA-\\u06ED\\u0711\\u0730-\\u074A\\u07EB-\\u07F3\\u07FD\\u0816-\\u0819\\u081B-\\u0823\\u0825-\\u0827\\u0829-\\u082D\\u0859-\\u085B\\u0898-\\u089F\\u08CA-\\u08E1\\u08E3-\\u08FF\\u093C\\u094D\\u0951-\\u0954\\u09BC\\u09CD\\u09FE\\u0A3C\\u0A4D\\u0ABC\\u0ACD\\u0B3C\\u0B4D\\u0BCD\\u0C3C\\u0C4D\\u0C55\\u0C56\\u0CBC\\u0CCD\\u0D3B\\u0D3C\\u0D4D\\u0DCA\\u0E38-\\u0E3A\\u0E48-\\u0E4B\\u0EB8-\\u0EBA\\u0EC8-\\u0ECB\\u0F18\\u0F19\\u0F35\\u0F37\\u0F39\\u0F71\\u0F72\\u0F74\\u0F7A-\\u0F7D\\u0F80\\u0F82-\\u0F84\\u0F86\\u0F87\\u0FC6\\u1037\\u1039\\u103A\\u108D\\u135D-\\u135F\\u1714\\u17D2\\u17DD\\u18A9\\u1939-\\u193B\\u1A17\\u1A18\\u1A60\\u1A75-\\u1A7C\\u1A7F\\u1AB0-\\u1ABD\\u1ABF-\\u1ACE\\u1B34\\u1B6B-\\u1B73\\u1BAB\\u1BE6\\u1C37\\u1CD0-\\u1CD2\\u1CD4-\\u1CE0\\u1CE2-\\u1CE8\\u1CED\\u1CF4\\u1CF8\\u1CF9\\u1DC0-\\u1DFF\\u200D\\u20D0-\\u20DC\\u20E1\\u20E5-\\u20F0\\u2CEF-\\u2CF1\\u2D7F\\u2DE0-\\u2DFF\\u302A-\\u302F\\u3099\\u309A\\uA66F\\uA674-\\uA67D\\uA69E\\uA69F\\uA6F0\\uA6F1\\uA806\\uA82C\\uA8C4\\uA8E0-\\uA8F1\\uA92B-\\uA92D\\uA9B3\\uAAB0\\uAAB2-\\uAAB4\\uAAB7\\uAAB8\\uAABE\\uAABF\\uAAC1\\uAAF6\\uABED\\uFB1E\\uFE20-\\uFE2F]|\\uD800[\\uDDFD\\uDEE0\\uDF76-\\uDF7A]|\\uD802[\\uDE0D\\uDE0F\\uDE38-\\uDE3A\\uDE3F\\uDEE5\\uDEE6]|\\uD803[\\uDD24-\\uDD27\\uDEAB\\uDEAC\\uDEFD-\\uDEFF\\uDF46-\\uDF50\\uDF82-\\uDF85]|\\uD804[\\uDC46\\uDC70\\uDC7F\\uDCB9\\uDCBA\\uDD00-\\uDD02\\uDD33\\uDD34\\uDD73\\uDDCA\\uDE36\\uDEE9\\uDEEA\\uDF3B\\uDF3C\\uDF66-\\uDF6C\\uDF70-\\uDF74]|\\uD805[\\uDC42\\uDC46\\uDC5E\\uDCC2\\uDCC3\\uDDBF\\uDDC0\\uDE3F\\uDEB7\\uDF2B]|\\uD806[\\uDC39\\uDC3A\\uDD3E\\uDD43\\uDDE0\\uDE34\\uDE47\\uDE99]|\\uD807[\\uDC3F\\uDD42\\uDD44\\uDD45\\uDD97\\uDF42]|\\uD81A[\\uDEF0-\\uDEF4\\uDF30-\\uDF36]|\\uD82F\\uDC9E|\\uD834[\\uDD65\\uDD67-\\uDD69\\uDD6E-\\uDD72\\uDD7B-\\uDD82\\uDD85-\\uDD8B\\uDDAA-\\uDDAD\\uDE42-\\uDE44]|\\uD838[\\uDC00-\\uDC06\\uDC08-\\uDC18\\uDC1B-\\uDC21\\uDC23\\uDC24\\uDC26-\\uDC2A\\uDC8F\\uDD30-\\uDD36\\uDEAE\\uDEEC-\\uDEEF]|\\uD839[\\uDCEC-\\uDCEF]|\\uD83A[\\uDCD0-\\uDCD6\\uDD44-\\uDD4A])", + "$ExtPict": "(?:[\\xA9\\xAE\\u203C\\u2049\\u2122\\u2139\\u2194-\\u2199\\u21A9\\u21AA\\u231A\\u231B\\u2328\\u2388\\u23CF\\u23E9-\\u23F3\\u23F8-\\u23FA\\u24C2\\u25AA\\u25AB\\u25B6\\u25C0\\u25FB-\\u25FE\\u2600-\\u2605\\u2607-\\u2612\\u2614-\\u2685\\u2690-\\u2705\\u2708-\\u2712\\u2714\\u2716\\u271D\\u2721\\u2728\\u2733\\u2734\\u2744\\u2747\\u274C\\u274E\\u2753-\\u2755\\u2757\\u2763-\\u2767\\u2795-\\u2797\\u27A1\\u27B0\\u27BF\\u2934\\u2935\\u2B05-\\u2B07\\u2B1B\\u2B1C\\u2B50\\u2B55\\u3030\\u303D\\u3297\\u3299]|\\uD83C[\\uDC00-\\uDCFF\\uDD0D-\\uDD0F\\uDD2F\\uDD6C-\\uDD71\\uDD7E\\uDD7F\\uDD8E\\uDD91-\\uDD9A\\uDDAD-\\uDDE5\\uDE01-\\uDE0F\\uDE1A\\uDE2F\\uDE32-\\uDE3A\\uDE3C-\\uDE3F\\uDE49-\\uDFFA]|\\uD83D[\\uDC00-\\uDD3D\\uDD46-\\uDE4F\\uDE80-\\uDEFF\\uDF74-\\uDF7F\\uDFD5-\\uDFFF]|\\uD83E[\\uDC0C-\\uDC0F\\uDC48-\\uDC4F\\uDC5A-\\uDC5F\\uDC88-\\uDC8F\\uDCAE-\\uDCFF\\uDD0C-\\uDD3A\\uDD3C-\\uDD45\\uDD47-\\uDEFF]|\\uD83F[\\uDC00-\\uDFFD])", + "$Extend": "(?:[\\u0300-\\u036F\\u0483-\\u0489\\u0591-\\u05BD\\u05BF\\u05C1\\u05C2\\u05C4\\u05C5\\u05C7\\u0610-\\u061A\\u064B-\\u065F\\u0670\\u06D6-\\u06DC\\u06DF-\\u06E4\\u06E7\\u06E8\\u06EA-\\u06ED\\u0711\\u0730-\\u074A\\u07A6-\\u07B0\\u07EB-\\u07F3\\u07FD\\u0816-\\u0819\\u081B-\\u0823\\u0825-\\u0827\\u0829-\\u082D\\u0859-\\u085B\\u0898-\\u089F\\u08CA-\\u08E1\\u08E3-\\u0902\\u093A\\u093C\\u0941-\\u0948\\u094D\\u0951-\\u0957\\u0962\\u0963\\u0981\\u09BC\\u09BE\\u09C1-\\u09C4\\u09CD\\u09D7\\u09E2\\u09E3\\u09FE\\u0A01\\u0A02\\u0A3C\\u0A41\\u0A42\\u0A47\\u0A48\\u0A4B-\\u0A4D\\u0A51\\u0A70\\u0A71\\u0A75\\u0A81\\u0A82\\u0ABC\\u0AC1-\\u0AC5\\u0AC7\\u0AC8\\u0ACD\\u0AE2\\u0AE3\\u0AFA-\\u0AFF\\u0B01\\u0B3C\\u0B3E\\u0B3F\\u0B41-\\u0B44\\u0B4D\\u0B55-\\u0B57\\u0B62\\u0B63\\u0B82\\u0BBE\\u0BC0\\u0BCD\\u0BD7\\u0C00\\u0C04\\u0C3C\\u0C3E-\\u0C40\\u0C46-\\u0C48\\u0C4A-\\u0C4D\\u0C55\\u0C56\\u0C62\\u0C63\\u0C81\\u0CBC\\u0CBF\\u0CC2\\u0CC6\\u0CCC\\u0CCD\\u0CD5\\u0CD6\\u0CE2\\u0CE3\\u0D00\\u0D01\\u0D3B\\u0D3C\\u0D3E\\u0D41-\\u0D44\\u0D4D\\u0D57\\u0D62\\u0D63\\u0D81\\u0DCA\\u0DCF\\u0DD2-\\u0DD4\\u0DD6\\u0DDF\\u0E31\\u0E34-\\u0E3A\\u0E47-\\u0E4E\\u0EB1\\u0EB4-\\u0EBC\\u0EC8-\\u0ECE\\u0F18\\u0F19\\u0F35\\u0F37\\u0F39\\u0F71-\\u0F7E\\u0F80-\\u0F84\\u0F86\\u0F87\\u0F8D-\\u0F97\\u0F99-\\u0FBC\\u0FC6\\u102D-\\u1030\\u1032-\\u1037\\u1039\\u103A\\u103D\\u103E\\u1058\\u1059\\u105E-\\u1060\\u1071-\\u1074\\u1082\\u1085\\u1086\\u108D\\u109D\\u135D-\\u135F\\u1712-\\u1714\\u1732\\u1733\\u1752\\u1753\\u1772\\u1773\\u17B4\\u17B5\\u17B7-\\u17BD\\u17C6\\u17C9-\\u17D3\\u17DD\\u180B-\\u180D\\u180F\\u1885\\u1886\\u18A9\\u1920-\\u1922\\u1927\\u1928\\u1932\\u1939-\\u193B\\u1A17\\u1A18\\u1A1B\\u1A56\\u1A58-\\u1A5E\\u1A60\\u1A62\\u1A65-\\u1A6C\\u1A73-\\u1A7C\\u1A7F\\u1AB0-\\u1ACE\\u1B00-\\u1B03\\u1B34-\\u1B3A\\u1B3C\\u1B42\\u1B6B-\\u1B73\\u1B80\\u1B81\\u1BA2-\\u1BA5\\u1BA8\\u1BA9\\u1BAB-\\u1BAD\\u1BE6\\u1BE8\\u1BE9\\u1BED\\u1BEF-\\u1BF1\\u1C2C-\\u1C33\\u1C36\\u1C37\\u1CD0-\\u1CD2\\u1CD4-\\u1CE0\\u1CE2-\\u1CE8\\u1CED\\u1CF4\\u1CF8\\u1CF9\\u1DC0-\\u1DFF\\u200C\\u20D0-\\u20F0\\u2CEF-\\u2CF1\\u2D7F\\u2DE0-\\u2DFF\\u302A-\\u302F\\u3099\\u309A\\uA66F-\\uA672\\uA674-\\uA67D\\uA69E\\uA69F\\uA6F0\\uA6F1\\uA802\\uA806\\uA80B\\uA825\\uA826\\uA82C\\uA8C4\\uA8C5\\uA8E0-\\uA8F1\\uA8FF\\uA926-\\uA92D\\uA947-\\uA951\\uA980-\\uA982\\uA9B3\\uA9B6-\\uA9B9\\uA9BC\\uA9BD\\uA9E5\\uAA29-\\uAA2E\\uAA31\\uAA32\\uAA35\\uAA36\\uAA43\\uAA4C\\uAA7C\\uAAB0\\uAAB2-\\uAAB4\\uAAB7\\uAAB8\\uAABE\\uAABF\\uAAC1\\uAAEC\\uAAED\\uAAF6\\uABE5\\uABE8\\uABED\\uFB1E\\uFE00-\\uFE0F\\uFE20-\\uFE2F\\uFF9E\\uFF9F]|\\uD800[\\uDDFD\\uDEE0\\uDF76-\\uDF7A]|\\uD802[\\uDE01-\\uDE03\\uDE05\\uDE06\\uDE0C-\\uDE0F\\uDE38-\\uDE3A\\uDE3F\\uDEE5\\uDEE6]|\\uD803[\\uDD24-\\uDD27\\uDEAB\\uDEAC\\uDEFD-\\uDEFF\\uDF46-\\uDF50\\uDF82-\\uDF85]|\\uD804[\\uDC01\\uDC38-\\uDC46\\uDC70\\uDC73\\uDC74\\uDC7F-\\uDC81\\uDCB3-\\uDCB6\\uDCB9\\uDCBA\\uDCC2\\uDD00-\\uDD02\\uDD27-\\uDD2B\\uDD2D-\\uDD34\\uDD73\\uDD80\\uDD81\\uDDB6-\\uDDBE\\uDDC9-\\uDDCC\\uDDCF\\uDE2F-\\uDE31\\uDE34\\uDE36\\uDE37\\uDE3E\\uDE41\\uDEDF\\uDEE3-\\uDEEA\\uDF00\\uDF01\\uDF3B\\uDF3C\\uDF3E\\uDF40\\uDF57\\uDF66-\\uDF6C\\uDF70-\\uDF74]|\\uD805[\\uDC38-\\uDC3F\\uDC42-\\uDC44\\uDC46\\uDC5E\\uDCB0\\uDCB3-\\uDCB8\\uDCBA\\uDCBD\\uDCBF\\uDCC0\\uDCC2\\uDCC3\\uDDAF\\uDDB2-\\uDDB5\\uDDBC\\uDDBD\\uDDBF\\uDDC0\\uDDDC\\uDDDD\\uDE33-\\uDE3A\\uDE3D\\uDE3F\\uDE40\\uDEAB\\uDEAD\\uDEB0-\\uDEB5\\uDEB7\\uDF1D-\\uDF1F\\uDF22-\\uDF25\\uDF27-\\uDF2B]|\\uD806[\\uDC2F-\\uDC37\\uDC39\\uDC3A\\uDD30\\uDD3B\\uDD3C\\uDD3E\\uDD43\\uDDD4-\\uDDD7\\uDDDA\\uDDDB\\uDDE0\\uDE01-\\uDE0A\\uDE33-\\uDE38\\uDE3B-\\uDE3E\\uDE47\\uDE51-\\uDE56\\uDE59-\\uDE5B\\uDE8A-\\uDE96\\uDE98\\uDE99]|\\uD807[\\uDC30-\\uDC36\\uDC38-\\uDC3D\\uDC3F\\uDC92-\\uDCA7\\uDCAA-\\uDCB0\\uDCB2\\uDCB3\\uDCB5\\uDCB6\\uDD31-\\uDD36\\uDD3A\\uDD3C\\uDD3D\\uDD3F-\\uDD45\\uDD47\\uDD90\\uDD91\\uDD95\\uDD97\\uDEF3\\uDEF4\\uDF00\\uDF01\\uDF36-\\uDF3A\\uDF40\\uDF42]|\\uD80D[\\uDC40\\uDC47-\\uDC55]|\\uD81A[\\uDEF0-\\uDEF4\\uDF30-\\uDF36]|\\uD81B[\\uDF4F\\uDF8F-\\uDF92\\uDFE4]|\\uD82F[\\uDC9D\\uDC9E]|\\uD833[\\uDF00-\\uDF2D\\uDF30-\\uDF46]|\\uD834[\\uDD65\\uDD67-\\uDD69\\uDD6E-\\uDD72\\uDD7B-\\uDD82\\uDD85-\\uDD8B\\uDDAA-\\uDDAD\\uDE42-\\uDE44]|\\uD836[\\uDE00-\\uDE36\\uDE3B-\\uDE6C\\uDE75\\uDE84\\uDE9B-\\uDE9F\\uDEA1-\\uDEAF]|\\uD838[\\uDC00-\\uDC06\\uDC08-\\uDC18\\uDC1B-\\uDC21\\uDC23\\uDC24\\uDC26-\\uDC2A\\uDC8F\\uDD30-\\uDD36\\uDEAE\\uDEEC-\\uDEEF]|\\uD839[\\uDCEC-\\uDCEF]|\\uD83A[\\uDCD0-\\uDCD6\\uDD44-\\uDD4A]|\\uD83C[\\uDFFB-\\uDFFF]|\\uDB40[\\uDC20-\\uDC7F\\uDD00-\\uDDEF])", + "$L": "[\\u1100-\\u115F\\uA960-\\uA97C]", + "$LF": "\\n", + "$LV": "[\\uAC00\\uAC1C\\uAC38\\uAC54\\uAC70\\uAC8C\\uACA8\\uACC4\\uACE0\\uACFC\\uAD18\\uAD34\\uAD50\\uAD6C\\uAD88\\uADA4\\uADC0\\uADDC\\uADF8\\uAE14\\uAE30\\uAE4C\\uAE68\\uAE84\\uAEA0\\uAEBC\\uAED8\\uAEF4\\uAF10\\uAF2C\\uAF48\\uAF64\\uAF80\\uAF9C\\uAFB8\\uAFD4\\uAFF0\\uB00C\\uB028\\uB044\\uB060\\uB07C\\uB098\\uB0B4\\uB0D0\\uB0EC\\uB108\\uB124\\uB140\\uB15C\\uB178\\uB194\\uB1B0\\uB1CC\\uB1E8\\uB204\\uB220\\uB23C\\uB258\\uB274\\uB290\\uB2AC\\uB2C8\\uB2E4\\uB300\\uB31C\\uB338\\uB354\\uB370\\uB38C\\uB3A8\\uB3C4\\uB3E0\\uB3FC\\uB418\\uB434\\uB450\\uB46C\\uB488\\uB4A4\\uB4C0\\uB4DC\\uB4F8\\uB514\\uB530\\uB54C\\uB568\\uB584\\uB5A0\\uB5BC\\uB5D8\\uB5F4\\uB610\\uB62C\\uB648\\uB664\\uB680\\uB69C\\uB6B8\\uB6D4\\uB6F0\\uB70C\\uB728\\uB744\\uB760\\uB77C\\uB798\\uB7B4\\uB7D0\\uB7EC\\uB808\\uB824\\uB840\\uB85C\\uB878\\uB894\\uB8B0\\uB8CC\\uB8E8\\uB904\\uB920\\uB93C\\uB958\\uB974\\uB990\\uB9AC\\uB9C8\\uB9E4\\uBA00\\uBA1C\\uBA38\\uBA54\\uBA70\\uBA8C\\uBAA8\\uBAC4\\uBAE0\\uBAFC\\uBB18\\uBB34\\uBB50\\uBB6C\\uBB88\\uBBA4\\uBBC0\\uBBDC\\uBBF8\\uBC14\\uBC30\\uBC4C\\uBC68\\uBC84\\uBCA0\\uBCBC\\uBCD8\\uBCF4\\uBD10\\uBD2C\\uBD48\\uBD64\\uBD80\\uBD9C\\uBDB8\\uBDD4\\uBDF0\\uBE0C\\uBE28\\uBE44\\uBE60\\uBE7C\\uBE98\\uBEB4\\uBED0\\uBEEC\\uBF08\\uBF24\\uBF40\\uBF5C\\uBF78\\uBF94\\uBFB0\\uBFCC\\uBFE8\\uC004\\uC020\\uC03C\\uC058\\uC074\\uC090\\uC0AC\\uC0C8\\uC0E4\\uC100\\uC11C\\uC138\\uC154\\uC170\\uC18C\\uC1A8\\uC1C4\\uC1E0\\uC1FC\\uC218\\uC234\\uC250\\uC26C\\uC288\\uC2A4\\uC2C0\\uC2DC\\uC2F8\\uC314\\uC330\\uC34C\\uC368\\uC384\\uC3A0\\uC3BC\\uC3D8\\uC3F4\\uC410\\uC42C\\uC448\\uC464\\uC480\\uC49C\\uC4B8\\uC4D4\\uC4F0\\uC50C\\uC528\\uC544\\uC560\\uC57C\\uC598\\uC5B4\\uC5D0\\uC5EC\\uC608\\uC624\\uC640\\uC65C\\uC678\\uC694\\uC6B0\\uC6CC\\uC6E8\\uC704\\uC720\\uC73C\\uC758\\uC774\\uC790\\uC7AC\\uC7C8\\uC7E4\\uC800\\uC81C\\uC838\\uC854\\uC870\\uC88C\\uC8A8\\uC8C4\\uC8E0\\uC8FC\\uC918\\uC934\\uC950\\uC96C\\uC988\\uC9A4\\uC9C0\\uC9DC\\uC9F8\\uCA14\\uCA30\\uCA4C\\uCA68\\uCA84\\uCAA0\\uCABC\\uCAD8\\uCAF4\\uCB10\\uCB2C\\uCB48\\uCB64\\uCB80\\uCB9C\\uCBB8\\uCBD4\\uCBF0\\uCC0C\\uCC28\\uCC44\\uCC60\\uCC7C\\uCC98\\uCCB4\\uCCD0\\uCCEC\\uCD08\\uCD24\\uCD40\\uCD5C\\uCD78\\uCD94\\uCDB0\\uCDCC\\uCDE8\\uCE04\\uCE20\\uCE3C\\uCE58\\uCE74\\uCE90\\uCEAC\\uCEC8\\uCEE4\\uCF00\\uCF1C\\uCF38\\uCF54\\uCF70\\uCF8C\\uCFA8\\uCFC4\\uCFE0\\uCFFC\\uD018\\uD034\\uD050\\uD06C\\uD088\\uD0A4\\uD0C0\\uD0DC\\uD0F8\\uD114\\uD130\\uD14C\\uD168\\uD184\\uD1A0\\uD1BC\\uD1D8\\uD1F4\\uD210\\uD22C\\uD248\\uD264\\uD280\\uD29C\\uD2B8\\uD2D4\\uD2F0\\uD30C\\uD328\\uD344\\uD360\\uD37C\\uD398\\uD3B4\\uD3D0\\uD3EC\\uD408\\uD424\\uD440\\uD45C\\uD478\\uD494\\uD4B0\\uD4CC\\uD4E8\\uD504\\uD520\\uD53C\\uD558\\uD574\\uD590\\uD5AC\\uD5C8\\uD5E4\\uD600\\uD61C\\uD638\\uD654\\uD670\\uD68C\\uD6A8\\uD6C4\\uD6E0\\uD6FC\\uD718\\uD734\\uD750\\uD76C\\uD788]", + "$LVT": "[\\uAC01-\\uAC1B\\uAC1D-\\uAC37\\uAC39-\\uAC53\\uAC55-\\uAC6F\\uAC71-\\uAC8B\\uAC8D-\\uACA7\\uACA9-\\uACC3\\uACC5-\\uACDF\\uACE1-\\uACFB\\uACFD-\\uAD17\\uAD19-\\uAD33\\uAD35-\\uAD4F\\uAD51-\\uAD6B\\uAD6D-\\uAD87\\uAD89-\\uADA3\\uADA5-\\uADBF\\uADC1-\\uADDB\\uADDD-\\uADF7\\uADF9-\\uAE13\\uAE15-\\uAE2F\\uAE31-\\uAE4B\\uAE4D-\\uAE67\\uAE69-\\uAE83\\uAE85-\\uAE9F\\uAEA1-\\uAEBB\\uAEBD-\\uAED7\\uAED9-\\uAEF3\\uAEF5-\\uAF0F\\uAF11-\\uAF2B\\uAF2D-\\uAF47\\uAF49-\\uAF63\\uAF65-\\uAF7F\\uAF81-\\uAF9B\\uAF9D-\\uAFB7\\uAFB9-\\uAFD3\\uAFD5-\\uAFEF\\uAFF1-\\uB00B\\uB00D-\\uB027\\uB029-\\uB043\\uB045-\\uB05F\\uB061-\\uB07B\\uB07D-\\uB097\\uB099-\\uB0B3\\uB0B5-\\uB0CF\\uB0D1-\\uB0EB\\uB0ED-\\uB107\\uB109-\\uB123\\uB125-\\uB13F\\uB141-\\uB15B\\uB15D-\\uB177\\uB179-\\uB193\\uB195-\\uB1AF\\uB1B1-\\uB1CB\\uB1CD-\\uB1E7\\uB1E9-\\uB203\\uB205-\\uB21F\\uB221-\\uB23B\\uB23D-\\uB257\\uB259-\\uB273\\uB275-\\uB28F\\uB291-\\uB2AB\\uB2AD-\\uB2C7\\uB2C9-\\uB2E3\\uB2E5-\\uB2FF\\uB301-\\uB31B\\uB31D-\\uB337\\uB339-\\uB353\\uB355-\\uB36F\\uB371-\\uB38B\\uB38D-\\uB3A7\\uB3A9-\\uB3C3\\uB3C5-\\uB3DF\\uB3E1-\\uB3FB\\uB3FD-\\uB417\\uB419-\\uB433\\uB435-\\uB44F\\uB451-\\uB46B\\uB46D-\\uB487\\uB489-\\uB4A3\\uB4A5-\\uB4BF\\uB4C1-\\uB4DB\\uB4DD-\\uB4F7\\uB4F9-\\uB513\\uB515-\\uB52F\\uB531-\\uB54B\\uB54D-\\uB567\\uB569-\\uB583\\uB585-\\uB59F\\uB5A1-\\uB5BB\\uB5BD-\\uB5D7\\uB5D9-\\uB5F3\\uB5F5-\\uB60F\\uB611-\\uB62B\\uB62D-\\uB647\\uB649-\\uB663\\uB665-\\uB67F\\uB681-\\uB69B\\uB69D-\\uB6B7\\uB6B9-\\uB6D3\\uB6D5-\\uB6EF\\uB6F1-\\uB70B\\uB70D-\\uB727\\uB729-\\uB743\\uB745-\\uB75F\\uB761-\\uB77B\\uB77D-\\uB797\\uB799-\\uB7B3\\uB7B5-\\uB7CF\\uB7D1-\\uB7EB\\uB7ED-\\uB807\\uB809-\\uB823\\uB825-\\uB83F\\uB841-\\uB85B\\uB85D-\\uB877\\uB879-\\uB893\\uB895-\\uB8AF\\uB8B1-\\uB8CB\\uB8CD-\\uB8E7\\uB8E9-\\uB903\\uB905-\\uB91F\\uB921-\\uB93B\\uB93D-\\uB957\\uB959-\\uB973\\uB975-\\uB98F\\uB991-\\uB9AB\\uB9AD-\\uB9C7\\uB9C9-\\uB9E3\\uB9E5-\\uB9FF\\uBA01-\\uBA1B\\uBA1D-\\uBA37\\uBA39-\\uBA53\\uBA55-\\uBA6F\\uBA71-\\uBA8B\\uBA8D-\\uBAA7\\uBAA9-\\uBAC3\\uBAC5-\\uBADF\\uBAE1-\\uBAFB\\uBAFD-\\uBB17\\uBB19-\\uBB33\\uBB35-\\uBB4F\\uBB51-\\uBB6B\\uBB6D-\\uBB87\\uBB89-\\uBBA3\\uBBA5-\\uBBBF\\uBBC1-\\uBBDB\\uBBDD-\\uBBF7\\uBBF9-\\uBC13\\uBC15-\\uBC2F\\uBC31-\\uBC4B\\uBC4D-\\uBC67\\uBC69-\\uBC83\\uBC85-\\uBC9F\\uBCA1-\\uBCBB\\uBCBD-\\uBCD7\\uBCD9-\\uBCF3\\uBCF5-\\uBD0F\\uBD11-\\uBD2B\\uBD2D-\\uBD47\\uBD49-\\uBD63\\uBD65-\\uBD7F\\uBD81-\\uBD9B\\uBD9D-\\uBDB7\\uBDB9-\\uBDD3\\uBDD5-\\uBDEF\\uBDF1-\\uBE0B\\uBE0D-\\uBE27\\uBE29-\\uBE43\\uBE45-\\uBE5F\\uBE61-\\uBE7B\\uBE7D-\\uBE97\\uBE99-\\uBEB3\\uBEB5-\\uBECF\\uBED1-\\uBEEB\\uBEED-\\uBF07\\uBF09-\\uBF23\\uBF25-\\uBF3F\\uBF41-\\uBF5B\\uBF5D-\\uBF77\\uBF79-\\uBF93\\uBF95-\\uBFAF\\uBFB1-\\uBFCB\\uBFCD-\\uBFE7\\uBFE9-\\uC003\\uC005-\\uC01F\\uC021-\\uC03B\\uC03D-\\uC057\\uC059-\\uC073\\uC075-\\uC08F\\uC091-\\uC0AB\\uC0AD-\\uC0C7\\uC0C9-\\uC0E3\\uC0E5-\\uC0FF\\uC101-\\uC11B\\uC11D-\\uC137\\uC139-\\uC153\\uC155-\\uC16F\\uC171-\\uC18B\\uC18D-\\uC1A7\\uC1A9-\\uC1C3\\uC1C5-\\uC1DF\\uC1E1-\\uC1FB\\uC1FD-\\uC217\\uC219-\\uC233\\uC235-\\uC24F\\uC251-\\uC26B\\uC26D-\\uC287\\uC289-\\uC2A3\\uC2A5-\\uC2BF\\uC2C1-\\uC2DB\\uC2DD-\\uC2F7\\uC2F9-\\uC313\\uC315-\\uC32F\\uC331-\\uC34B\\uC34D-\\uC367\\uC369-\\uC383\\uC385-\\uC39F\\uC3A1-\\uC3BB\\uC3BD-\\uC3D7\\uC3D9-\\uC3F3\\uC3F5-\\uC40F\\uC411-\\uC42B\\uC42D-\\uC447\\uC449-\\uC463\\uC465-\\uC47F\\uC481-\\uC49B\\uC49D-\\uC4B7\\uC4B9-\\uC4D3\\uC4D5-\\uC4EF\\uC4F1-\\uC50B\\uC50D-\\uC527\\uC529-\\uC543\\uC545-\\uC55F\\uC561-\\uC57B\\uC57D-\\uC597\\uC599-\\uC5B3\\uC5B5-\\uC5CF\\uC5D1-\\uC5EB\\uC5ED-\\uC607\\uC609-\\uC623\\uC625-\\uC63F\\uC641-\\uC65B\\uC65D-\\uC677\\uC679-\\uC693\\uC695-\\uC6AF\\uC6B1-\\uC6CB\\uC6CD-\\uC6E7\\uC6E9-\\uC703\\uC705-\\uC71F\\uC721-\\uC73B\\uC73D-\\uC757\\uC759-\\uC773\\uC775-\\uC78F\\uC791-\\uC7AB\\uC7AD-\\uC7C7\\uC7C9-\\uC7E3\\uC7E5-\\uC7FF\\uC801-\\uC81B\\uC81D-\\uC837\\uC839-\\uC853\\uC855-\\uC86F\\uC871-\\uC88B\\uC88D-\\uC8A7\\uC8A9-\\uC8C3\\uC8C5-\\uC8DF\\uC8E1-\\uC8FB\\uC8FD-\\uC917\\uC919-\\uC933\\uC935-\\uC94F\\uC951-\\uC96B\\uC96D-\\uC987\\uC989-\\uC9A3\\uC9A5-\\uC9BF\\uC9C1-\\uC9DB\\uC9DD-\\uC9F7\\uC9F9-\\uCA13\\uCA15-\\uCA2F\\uCA31-\\uCA4B\\uCA4D-\\uCA67\\uCA69-\\uCA83\\uCA85-\\uCA9F\\uCAA1-\\uCABB\\uCABD-\\uCAD7\\uCAD9-\\uCAF3\\uCAF5-\\uCB0F\\uCB11-\\uCB2B\\uCB2D-\\uCB47\\uCB49-\\uCB63\\uCB65-\\uCB7F\\uCB81-\\uCB9B\\uCB9D-\\uCBB7\\uCBB9-\\uCBD3\\uCBD5-\\uCBEF\\uCBF1-\\uCC0B\\uCC0D-\\uCC27\\uCC29-\\uCC43\\uCC45-\\uCC5F\\uCC61-\\uCC7B\\uCC7D-\\uCC97\\uCC99-\\uCCB3\\uCCB5-\\uCCCF\\uCCD1-\\uCCEB\\uCCED-\\uCD07\\uCD09-\\uCD23\\uCD25-\\uCD3F\\uCD41-\\uCD5B\\uCD5D-\\uCD77\\uCD79-\\uCD93\\uCD95-\\uCDAF\\uCDB1-\\uCDCB\\uCDCD-\\uCDE7\\uCDE9-\\uCE03\\uCE05-\\uCE1F\\uCE21-\\uCE3B\\uCE3D-\\uCE57\\uCE59-\\uCE73\\uCE75-\\uCE8F\\uCE91-\\uCEAB\\uCEAD-\\uCEC7\\uCEC9-\\uCEE3\\uCEE5-\\uCEFF\\uCF01-\\uCF1B\\uCF1D-\\uCF37\\uCF39-\\uCF53\\uCF55-\\uCF6F\\uCF71-\\uCF8B\\uCF8D-\\uCFA7\\uCFA9-\\uCFC3\\uCFC5-\\uCFDF\\uCFE1-\\uCFFB\\uCFFD-\\uD017\\uD019-\\uD033\\uD035-\\uD04F\\uD051-\\uD06B\\uD06D-\\uD087\\uD089-\\uD0A3\\uD0A5-\\uD0BF\\uD0C1-\\uD0DB\\uD0DD-\\uD0F7\\uD0F9-\\uD113\\uD115-\\uD12F\\uD131-\\uD14B\\uD14D-\\uD167\\uD169-\\uD183\\uD185-\\uD19F\\uD1A1-\\uD1BB\\uD1BD-\\uD1D7\\uD1D9-\\uD1F3\\uD1F5-\\uD20F\\uD211-\\uD22B\\uD22D-\\uD247\\uD249-\\uD263\\uD265-\\uD27F\\uD281-\\uD29B\\uD29D-\\uD2B7\\uD2B9-\\uD2D3\\uD2D5-\\uD2EF\\uD2F1-\\uD30B\\uD30D-\\uD327\\uD329-\\uD343\\uD345-\\uD35F\\uD361-\\uD37B\\uD37D-\\uD397\\uD399-\\uD3B3\\uD3B5-\\uD3CF\\uD3D1-\\uD3EB\\uD3ED-\\uD407\\uD409-\\uD423\\uD425-\\uD43F\\uD441-\\uD45B\\uD45D-\\uD477\\uD479-\\uD493\\uD495-\\uD4AF\\uD4B1-\\uD4CB\\uD4CD-\\uD4E7\\uD4E9-\\uD503\\uD505-\\uD51F\\uD521-\\uD53B\\uD53D-\\uD557\\uD559-\\uD573\\uD575-\\uD58F\\uD591-\\uD5AB\\uD5AD-\\uD5C7\\uD5C9-\\uD5E3\\uD5E5-\\uD5FF\\uD601-\\uD61B\\uD61D-\\uD637\\uD639-\\uD653\\uD655-\\uD66F\\uD671-\\uD68B\\uD68D-\\uD6A7\\uD6A9-\\uD6C3\\uD6C5-\\uD6DF\\uD6E1-\\uD6FB\\uD6FD-\\uD717\\uD719-\\uD733\\uD735-\\uD74F\\uD751-\\uD76B\\uD76D-\\uD787\\uD789-\\uD7A3]", + "$LinkingConsonant": "[\\u0915-\\u0939\\u0958-\\u095F\\u0978-\\u097F\\u0995-\\u09A8\\u09AA-\\u09B0\\u09B2\\u09B6-\\u09B9\\u09DC\\u09DD\\u09DF\\u09F0\\u09F1\\u0A95-\\u0AA8\\u0AAA-\\u0AB0\\u0AB2\\u0AB3\\u0AB5-\\u0AB9\\u0AF9\\u0B15-\\u0B28\\u0B2A-\\u0B30\\u0B32\\u0B33\\u0B35-\\u0B39\\u0B5C\\u0B5D\\u0B5F\\u0B71\\u0C15-\\u0C28\\u0C2A-\\u0C39\\u0C58-\\u0C5A\\u0D15-\\u0D3A]", + "$Prepend": "(?:[\\u0600-\\u0605\\u06DD\\u070F\\u0890\\u0891\\u08E2\\u0D4E]|\\uD804[\\uDCBD\\uDCCD\\uDDC2\\uDDC3]|\\uD806[\\uDD3F\\uDD41\\uDE3A\\uDE84-\\uDE89]|\\uD807[\\uDD46\\uDF02])", + "$RI": "(?:\\uD83C[\\uDDE6-\\uDDFF])", + "$SpacingMark": "(?:[\\u0903\\u093B\\u093E-\\u0940\\u0949-\\u094C\\u094E\\u094F\\u0982\\u0983\\u09BF\\u09C0\\u09C7\\u09C8\\u09CB\\u09CC\\u0A03\\u0A3E-\\u0A40\\u0A83\\u0ABE-\\u0AC0\\u0AC9\\u0ACB\\u0ACC\\u0B02\\u0B03\\u0B40\\u0B47\\u0B48\\u0B4B\\u0B4C\\u0BBF\\u0BC1\\u0BC2\\u0BC6-\\u0BC8\\u0BCA-\\u0BCC\\u0C01-\\u0C03\\u0C41-\\u0C44\\u0C82\\u0C83\\u0CBE\\u0CC0\\u0CC1\\u0CC3\\u0CC4\\u0CC7\\u0CC8\\u0CCA\\u0CCB\\u0CF3\\u0D02\\u0D03\\u0D3F\\u0D40\\u0D46-\\u0D48\\u0D4A-\\u0D4C\\u0D82\\u0D83\\u0DD0\\u0DD1\\u0DD8-\\u0DDE\\u0DF2\\u0DF3\\u0E33\\u0EB3\\u0F3E\\u0F3F\\u0F7F\\u1031\\u103B\\u103C\\u1056\\u1057\\u1084\\u1715\\u1734\\u17B6\\u17BE-\\u17C5\\u17C7\\u17C8\\u1923-\\u1926\\u1929-\\u192B\\u1930\\u1931\\u1933-\\u1938\\u1A19\\u1A1A\\u1A55\\u1A57\\u1A6D-\\u1A72\\u1B04\\u1B3B\\u1B3D-\\u1B41\\u1B43\\u1B44\\u1B82\\u1BA1\\u1BA6\\u1BA7\\u1BAA\\u1BE7\\u1BEA-\\u1BEC\\u1BEE\\u1BF2\\u1BF3\\u1C24-\\u1C2B\\u1C34\\u1C35\\u1CE1\\u1CF7\\uA823\\uA824\\uA827\\uA880\\uA881\\uA8B4-\\uA8C3\\uA952\\uA953\\uA983\\uA9B4\\uA9B5\\uA9BA\\uA9BB\\uA9BE-\\uA9C0\\uAA2F\\uAA30\\uAA33\\uAA34\\uAA4D\\uAAEB\\uAAEE\\uAAEF\\uAAF5\\uABE3\\uABE4\\uABE6\\uABE7\\uABE9\\uABEA\\uABEC]|\\uD804[\\uDC00\\uDC02\\uDC82\\uDCB0-\\uDCB2\\uDCB7\\uDCB8\\uDD2C\\uDD45\\uDD46\\uDD82\\uDDB3-\\uDDB5\\uDDBF\\uDDC0\\uDDCE\\uDE2C-\\uDE2E\\uDE32\\uDE33\\uDE35\\uDEE0-\\uDEE2\\uDF02\\uDF03\\uDF3F\\uDF41-\\uDF44\\uDF47\\uDF48\\uDF4B-\\uDF4D\\uDF62\\uDF63]|\\uD805[\\uDC35-\\uDC37\\uDC40\\uDC41\\uDC45\\uDCB1\\uDCB2\\uDCB9\\uDCBB\\uDCBC\\uDCBE\\uDCC1\\uDDB0\\uDDB1\\uDDB8-\\uDDBB\\uDDBE\\uDE30-\\uDE32\\uDE3B\\uDE3C\\uDE3E\\uDEAC\\uDEAE\\uDEAF\\uDEB6\\uDF26]|\\uD806[\\uDC2C-\\uDC2E\\uDC38\\uDD31-\\uDD35\\uDD37\\uDD38\\uDD3D\\uDD40\\uDD42\\uDDD1-\\uDDD3\\uDDDC-\\uDDDF\\uDDE4\\uDE39\\uDE57\\uDE58\\uDE97]|\\uD807[\\uDC2F\\uDC3E\\uDCA9\\uDCB1\\uDCB4\\uDD8A-\\uDD8E\\uDD93\\uDD94\\uDD96\\uDEF5\\uDEF6\\uDF03\\uDF34\\uDF35\\uDF3E\\uDF3F\\uDF41]|\\uD81B[\\uDF51-\\uDF87\\uDFF0\\uDFF1]|\\uD834[\\uDD66\\uDD6D])", + "$T": "[\\u11A8-\\u11FF\\uD7CB-\\uD7FB]", + "$V": "[\\u1160-\\u11A7\\uD7B0-\\uD7C6]", + "$Virama": "[\\u094D\\u09CD\\u0ACD\\u0B4D\\u0C4D\\u0D4D]", + "$ZWJ": "\\u200D" + } +}; From b4186f6224c7a02376f5a533e336822866c0383d Mon Sep 17 00:00:00 2001 From: Matous Kozak Date: Fri, 1 Dec 2023 15:03:18 +0000 Subject: [PATCH 02/20] refactor grapheme segmentation --- .../hybrid-globalization/collations.ts | 16 ++--- .../grapheme-segmenter.ts | 66 ++++++++++++------- 2 files changed, 49 insertions(+), 33 deletions(-) diff --git a/src/mono/wasm/runtime/hybrid-globalization/collations.ts b/src/mono/wasm/runtime/hybrid-globalization/collations.ts index 6843e96acc977c..ede081672e4ead 100644 --- a/src/mono/wasm/runtime/hybrid-globalization/collations.ts +++ b/src/mono/wasm/runtime/hybrid-globalization/collations.ts @@ -123,16 +123,15 @@ export function mono_wasm_index_of(culture: MonoStringRef, needlePtr: number, ne // Grapheme segmentation of needle string while (needleIdx < needle.length) { - const breakIdx = graphemeBreaker.next_grapheme_break(needle, needleIdx); - needleSegments.push(needle.slice(needleIdx, breakIdx)); - needleIdx = breakIdx; + const needleGrapheme = graphemeBreaker.next_grapheme(needle, needleIdx); + needleSegments.push(needleGrapheme); + needleIdx += needleGrapheme.length; } let srcIdx = 0; while (srcIdx < source.length) { - const breakIdx = graphemeBreaker.next_grapheme_break(source, srcIdx); - const srcGrapheme = source.slice(srcIdx, breakIdx); - srcIdx = breakIdx; + const srcGrapheme = graphemeBreaker.next_grapheme(source, srcIdx); + srcIdx += srcGrapheme.length; if (!check_match_found(srcGrapheme, needleSegments[0], locale, casePicker)) { continue; @@ -141,13 +140,12 @@ export function mono_wasm_index_of(culture: MonoStringRef, needlePtr: number, ne let j; let srcNextIdx = srcIdx; for (j = 1; j < needleSegments.length; j++) { - const breakIdx = graphemeBreaker.next_grapheme_break(source, srcNextIdx); - const srcGrapheme = source.slice(srcNextIdx, breakIdx); + const srcGrapheme = graphemeBreaker.next_grapheme(source, srcNextIdx); if (!check_match_found(srcGrapheme, needleSegments[j], locale, casePicker)) { break; } - srcNextIdx = breakIdx; + srcNextIdx += srcGrapheme.length; } if (j == needleSegments.length) { result = srcIdx - srcGrapheme.length; diff --git a/src/mono/wasm/runtime/hybrid-globalization/grapheme-segmenter.ts b/src/mono/wasm/runtime/hybrid-globalization/grapheme-segmenter.ts index 457cee00655b5a..c0061cebf25573 100644 --- a/src/mono/wasm/runtime/hybrid-globalization/grapheme-segmenter.ts +++ b/src/mono/wasm/runtime/hybrid-globalization/grapheme-segmenter.ts @@ -21,12 +21,12 @@ type SegmentationRuleRaw = { after?: string } -type SegmentationTypeTypeRaw = { +type SegmentationTypeRaw = { variables: Record rules: Record } -function replace_variables(variables: Record, input: string):string { +function replace_variables(variables: Record, input: string): string { const findVarRegex = /\$[A-Za-z0-9_]+/gm; return input.replaceAll(findVarRegex, match => { if (!(match in variables)) { @@ -40,36 +40,36 @@ function generate_rule_regex (rule: string, variables: Record, a return new RegExp(`${after ? "^" : ""}${replace_variables(variables, rule)}${after ? "" : "$"}`); } -function prepare_segmanation_rules(segmentationTypeValue: SegmentationTypeTypeRaw): Record { - const preparedRules: Record = {}; - - for (const ruleNr of Object.keys(segmentationTypeValue.rules)) { - const ruleValue = segmentationTypeValue.rules[ruleNr]; - const preparedRule: SegmentationRule = {breaks: ruleValue.breaks,}; - - if ("before" in ruleValue && ruleValue.before) { - preparedRule.before = generate_rule_regex(ruleValue.before, segmentationTypeValue.variables, false); - } - if ("after" in ruleValue && ruleValue.after) { - preparedRule.after = generate_rule_regex(ruleValue.after, segmentationTypeValue.variables, true); - } - - preparedRules[ruleNr] = preparedRule; - } - return preparedRules; -} - export class GraphemeSegmenter { private readonly rules; private readonly ruleSortedKeys; public constructor() { // Process segmentation rules - this.rules = prepare_segmanation_rules(SegmentationRules); + this.rules = GraphemeSegmenter.prepare_segmanation_rules(SegmentationRules); this.ruleSortedKeys = Object.keys(this.rules).sort((a, b) => Number(a) - Number(b)); } + /** + * Returns the next grapheme in the given string starting from the specified index. + * @param str - The input string. + * @param startIndex - The starting index. + * @returns The next grapheme. + */ + public next_grapheme(str: string, startIndex: number): string { + const breakIdx = this.next_grapheme_break(str, startIndex); + return str.substring(startIndex, breakIdx); + } + + + /** + * Finds the index of the next grapheme break in a given string starting from a specified index. + * + * @param str - The input string. + * @param startIndex - The index to start searching from. + * @returns The index of the next grapheme break. + */ public next_grapheme_break(str: string, startIndex: number): number { if (startIndex < 0) return 0; @@ -85,9 +85,8 @@ export class GraphemeSegmenter { (0xDC00 <= (low = str.charCodeAt(i)) && low <= 0xDFFF)) { continue; } - - const next = String.fromCodePoint(str.codePointAt(i)!); + const next = String.fromCodePoint(str.codePointAt(i)!); if (this.is_grapheme_break(prev, next)) return i; @@ -114,4 +113,23 @@ export class GraphemeSegmenter { // GB999: Any ÷ Any return true; } + + private static prepare_segmanation_rules(segmentationRules: SegmentationTypeRaw): Record { + const preparedRules: Record = {}; + + for (const key of Object.keys(segmentationRules.rules)) { + const ruleValue = segmentationRules.rules[key]; + const preparedRule: SegmentationRule = { breaks: ruleValue.breaks, }; + + if ("before" in ruleValue && ruleValue.before) { + preparedRule.before = generate_rule_regex(ruleValue.before, segmentationRules.variables, false); + } + if ("after" in ruleValue && ruleValue.after) { + preparedRule.after = generate_rule_regex(ruleValue.after, segmentationRules.variables, true); + } + + preparedRules[key] = preparedRule; + } + return preparedRules; + } } From cc22cf249d634083264f4c750a9858aaf92fa041 Mon Sep 17 00:00:00 2001 From: Matous Kozak Date: Mon, 4 Dec 2023 14:21:40 +0100 Subject: [PATCH 03/20] polish grapheme-segmenter.ts 2 --- .../grapheme-segmenter.ts | 27 ++++++++++--------- 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/src/mono/wasm/runtime/hybrid-globalization/grapheme-segmenter.ts b/src/mono/wasm/runtime/hybrid-globalization/grapheme-segmenter.ts index c0061cebf25573..5ceeeb2cbd5073 100644 --- a/src/mono/wasm/runtime/hybrid-globalization/grapheme-segmenter.ts +++ b/src/mono/wasm/runtime/hybrid-globalization/grapheme-segmenter.ts @@ -26,6 +26,13 @@ type SegmentationTypeRaw = { rules: Record } +function is_surrogate_pair(str: string, index: number): boolean { + const high = str.charCodeAt(index - 1); + const low = str.charCodeAt(index); + + return 0xD800 <= high && high <= 0xDBFF && 0xDC00 <= low && low <= 0xDFFF; +} + function replace_variables(variables: Record, input: string): string { const findVarRegex = /\$[A-Za-z0-9_]+/gm; return input.replaceAll(findVarRegex, match => { @@ -50,7 +57,6 @@ export class GraphemeSegmenter { this.ruleSortedKeys = Object.keys(this.rules).sort((a, b) => Number(a) - Number(b)); } - /** * Returns the next grapheme in the given string starting from the specified index. * @param str - The input string. @@ -62,7 +68,6 @@ export class GraphemeSegmenter { return str.substring(startIndex, breakIdx); } - /** * Finds the index of the next grapheme break in a given string starting from a specified index. * @@ -79,31 +84,29 @@ export class GraphemeSegmenter { let prev = String.fromCodePoint(str.codePointAt(startIndex)!); for (let i = startIndex + 1; i < str.length; i++) { - // check if we are in the middle of surrogate pair - let high, low; - if ((0xD800 <= (high = str.charCodeAt(i - 1)) && high <= 0xDBFF) && - (0xDC00 <= (low = str.charCodeAt(i)) && low <= 0xDFFF)) { + // Don't break surrogate pairs + if (is_surrogate_pair(str, i)) { continue; } - const next = String.fromCodePoint(str.codePointAt(i)!); - if (this.is_grapheme_break(prev, next)) + const curr = String.fromCodePoint(str.codePointAt(i)!); + if (this.is_grapheme_break(prev, curr)) return i; - prev = next; + prev = curr; } return str.length; } - private is_grapheme_break(prev: string, next: string): boolean { + private is_grapheme_break(previous: string, current: string): boolean { for (const key of this.ruleSortedKeys) { const {before, after, breaks} = this.rules[key]; // match before and after rules - if (before && !before.test(prev)) { + if (before && !before.test(previous)) { continue; } - if (after && !after.test(next)) { + if (after && !after.test(current)) { continue; } From 0b54cbc8c794a914193ddbb52cf47a3252f88e63 Mon Sep 17 00:00:00 2001 From: Matous Kozak Date: Fri, 8 Dec 2023 10:20:53 +0100 Subject: [PATCH 04/20] fix equals --- src/mono/wasm/runtime/hybrid-globalization/collations.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mono/wasm/runtime/hybrid-globalization/collations.ts b/src/mono/wasm/runtime/hybrid-globalization/collations.ts index ede081672e4ead..81bcf47201eec1 100644 --- a/src/mono/wasm/runtime/hybrid-globalization/collations.ts +++ b/src/mono/wasm/runtime/hybrid-globalization/collations.ts @@ -147,7 +147,7 @@ export function mono_wasm_index_of(culture: MonoStringRef, needlePtr: number, ne } srcNextIdx += srcGrapheme.length; } - if (j == needleSegments.length) { + if (j === needleSegments.length) { result = srcIdx - srcGrapheme.length; if (fromBeginning) break; From 0ea511854ba5ca19bec62e711264cacfb45ef60d Mon Sep 17 00:00:00 2001 From: Matous Kozak Date: Fri, 8 Dec 2023 14:37:11 +0100 Subject: [PATCH 05/20] cache GraphemeSegmenter --- src/mono/wasm/runtime/hybrid-globalization/collations.ts | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/mono/wasm/runtime/hybrid-globalization/collations.ts b/src/mono/wasm/runtime/hybrid-globalization/collations.ts index 81bcf47201eec1..4a87271251f3bc 100644 --- a/src/mono/wasm/runtime/hybrid-globalization/collations.ts +++ b/src/mono/wasm/runtime/hybrid-globalization/collations.ts @@ -10,6 +10,7 @@ import { GraphemeSegmenter } from "./grapheme-segmenter"; const COMPARISON_ERROR = -2; const INDEXING_ERROR = -1; +let graphemeSegmenterCached: GraphemeSegmenter | null; export function mono_wasm_compare_string(culture: MonoStringRef, str1: number, str1Length: number, str2: number, str2Length: number, options: number, is_exception: Int32Ptr, ex_address: MonoObjectRef): number { const cultureRoot = mono_wasm_new_external_root(culture), @@ -117,20 +118,20 @@ export function mono_wasm_index_of(culture: MonoStringRef, needlePtr: number, ne const casePicker = (options & 0x1f); let result = -1; - const graphemeBreaker = new GraphemeSegmenter(); + const graphemeSegmenter = graphemeSegmenterCached || (graphemeSegmenterCached = new GraphemeSegmenter()); const needleSegments = []; let needleIdx = 0; // Grapheme segmentation of needle string while (needleIdx < needle.length) { - const needleGrapheme = graphemeBreaker.next_grapheme(needle, needleIdx); + const needleGrapheme = graphemeSegmenter.next_grapheme(needle, needleIdx); needleSegments.push(needleGrapheme); needleIdx += needleGrapheme.length; } let srcIdx = 0; while (srcIdx < source.length) { - const srcGrapheme = graphemeBreaker.next_grapheme(source, srcIdx); + const srcGrapheme = graphemeSegmenter.next_grapheme(source, srcIdx); srcIdx += srcGrapheme.length; if (!check_match_found(srcGrapheme, needleSegments[0], locale, casePicker)) { @@ -140,7 +141,7 @@ export function mono_wasm_index_of(culture: MonoStringRef, needlePtr: number, ne let j; let srcNextIdx = srcIdx; for (j = 1; j < needleSegments.length; j++) { - const srcGrapheme = graphemeBreaker.next_grapheme(source, srcNextIdx); + const srcGrapheme = graphemeSegmenter.next_grapheme(source, srcNextIdx); if (!check_match_found(srcGrapheme, needleSegments[j], locale, casePicker)) { break; From 95c7ddf1b311c95a9174a4457e67461e33a56eac Mon Sep 17 00:00:00 2001 From: Matous Kozak Date: Fri, 8 Dec 2023 15:24:06 +0100 Subject: [PATCH 06/20] re-use isSurrogate from change-case.ts --- .../wasm/runtime/hybrid-globalization/change-case.ts | 2 +- .../runtime/hybrid-globalization/grapheme-segmenter.ts | 10 ++-------- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/src/mono/wasm/runtime/hybrid-globalization/change-case.ts b/src/mono/wasm/runtime/hybrid-globalization/change-case.ts index 6249a9599561ea..7c8b7fe1da65f8 100644 --- a/src/mono/wasm/runtime/hybrid-globalization/change-case.ts +++ b/src/mono/wasm/runtime/hybrid-globalization/change-case.ts @@ -160,7 +160,7 @@ export function mono_wasm_change_case(culture: MonoStringRef, src: number, srcLe } } -function isSurrogate(str: string, startIdx: number) : boolean +export function isSurrogate(str: string, startIdx: number) : boolean { return SURROGATE_HIGHER_START <= str[startIdx] && str[startIdx] <= SURROGATE_HIGHER_END && diff --git a/src/mono/wasm/runtime/hybrid-globalization/grapheme-segmenter.ts b/src/mono/wasm/runtime/hybrid-globalization/grapheme-segmenter.ts index 5ceeeb2cbd5073..b42ce81ba5f0ca 100644 --- a/src/mono/wasm/runtime/hybrid-globalization/grapheme-segmenter.ts +++ b/src/mono/wasm/runtime/hybrid-globalization/grapheme-segmenter.ts @@ -8,6 +8,7 @@ */ import { SegmentationRules } from "./segmentation-rules"; +import { isSurrogate } from "./change-case"; type SegmentationRule = { breaks: boolean @@ -26,13 +27,6 @@ type SegmentationTypeRaw = { rules: Record } -function is_surrogate_pair(str: string, index: number): boolean { - const high = str.charCodeAt(index - 1); - const low = str.charCodeAt(index); - - return 0xD800 <= high && high <= 0xDBFF && 0xDC00 <= low && low <= 0xDFFF; -} - function replace_variables(variables: Record, input: string): string { const findVarRegex = /\$[A-Za-z0-9_]+/gm; return input.replaceAll(findVarRegex, match => { @@ -85,7 +79,7 @@ export class GraphemeSegmenter { let prev = String.fromCodePoint(str.codePointAt(startIndex)!); for (let i = startIndex + 1; i < str.length; i++) { // Don't break surrogate pairs - if (is_surrogate_pair(str, i)) { + if (isSurrogate(str, i)) { continue; } From 5a7335d826c9a977baea48feeed72bd14d5c0ed6 Mon Sep 17 00:00:00 2001 From: Matous Kozak Date: Mon, 11 Dec 2023 18:55:14 +0100 Subject: [PATCH 07/20] move isSurrogate to helpers.ts --- .../runtime/hybrid-globalization/change-case.ts | 15 +-------------- .../hybrid-globalization/grapheme-segmenter.ts | 2 +- .../wasm/runtime/hybrid-globalization/helpers.ts | 16 +++++++++++++++- 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/src/mono/wasm/runtime/hybrid-globalization/change-case.ts b/src/mono/wasm/runtime/hybrid-globalization/change-case.ts index 7c8b7fe1da65f8..f9d227517b819b 100644 --- a/src/mono/wasm/runtime/hybrid-globalization/change-case.ts +++ b/src/mono/wasm/runtime/hybrid-globalization/change-case.ts @@ -7,11 +7,7 @@ import { MonoObject, MonoObjectRef, MonoString, MonoStringRef } from "../types/i import { Int32Ptr } from "../types/emscripten"; import { wrap_error_root, wrap_no_error_root } from "../invoke-js"; import { localHeapViewU16, setU16_local } from "../memory"; - -const SURROGATE_HIGHER_START = "\uD800"; -const SURROGATE_HIGHER_END = "\uDBFF"; -const SURROGATE_LOWER_START = "\uDC00"; -const SURROGATE_LOWER_END = "\uDFFF"; +import { isSurrogate } from "./helpers"; export function mono_wasm_change_case_invariant(src: number, srcLength: number, dst: number, dstLength: number, toUpper: number, is_exception: Int32Ptr, ex_address: MonoObjectRef): void { const exceptionRoot = mono_wasm_new_external_root(ex_address); @@ -160,15 +156,6 @@ export function mono_wasm_change_case(culture: MonoStringRef, src: number, srcLe } } -export function isSurrogate(str: string, startIdx: number) : boolean -{ - return SURROGATE_HIGHER_START <= str[startIdx] && - str[startIdx] <= SURROGATE_HIGHER_END && - startIdx+1 < str.length && - SURROGATE_LOWER_START <= str[startIdx+1] && - str[startIdx+1] <= SURROGATE_LOWER_END; -} - function appendSurrogateToMemory(heapI16: Uint16Array, dst: number, surrogate: string, idx: number) { setU16_local(heapI16, dst + idx*2, surrogate.charCodeAt(0)); diff --git a/src/mono/wasm/runtime/hybrid-globalization/grapheme-segmenter.ts b/src/mono/wasm/runtime/hybrid-globalization/grapheme-segmenter.ts index b42ce81ba5f0ca..47b5fefa504e19 100644 --- a/src/mono/wasm/runtime/hybrid-globalization/grapheme-segmenter.ts +++ b/src/mono/wasm/runtime/hybrid-globalization/grapheme-segmenter.ts @@ -8,7 +8,7 @@ */ import { SegmentationRules } from "./segmentation-rules"; -import { isSurrogate } from "./change-case"; +import { isSurrogate } from "./helpers"; type SegmentationRule = { breaks: boolean diff --git a/src/mono/wasm/runtime/hybrid-globalization/helpers.ts b/src/mono/wasm/runtime/hybrid-globalization/helpers.ts index d5b5da45e7daed..3c51b2a756d2b3 100644 --- a/src/mono/wasm/runtime/hybrid-globalization/helpers.ts +++ b/src/mono/wasm/runtime/hybrid-globalization/helpers.ts @@ -1,6 +1,11 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +const SURROGATE_HIGHER_START = "\uD800"; +const SURROGATE_HIGHER_END = "\uDBFF"; +const SURROGATE_LOWER_START = "\uDC00"; +const SURROGATE_LOWER_END = "\uDFFF"; + export const OUTER_SEPARATOR = "##"; export const INNER_SEPARATOR = "||"; @@ -24,4 +29,13 @@ export function normalizeLocale(locale: string | null) { throw new Error(`Get culture info failed for culture = ${locale} with error: ${ex}`); } -} \ No newline at end of file +} + +export function isSurrogate(str: string, startIdx: number) : boolean +{ + return SURROGATE_HIGHER_START <= str[startIdx] && + str[startIdx] <= SURROGATE_HIGHER_END && + startIdx+1 < str.length && + SURROGATE_LOWER_START <= str[startIdx+1] && + str[startIdx+1] <= SURROGATE_LOWER_END; +} From 61e06b22bd5d39cc615badd72b9d79df7d6b7584 Mon Sep 17 00:00:00 2001 From: Matous Kozak Date: Mon, 11 Dec 2023 19:10:31 +0100 Subject: [PATCH 08/20] change GraphemeSegmenter functions to cammelCase --- .../hybrid-globalization/collations.ts | 6 ++--- .../grapheme-segmenter.ts | 24 +++++++++---------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/mono/wasm/runtime/hybrid-globalization/collations.ts b/src/mono/wasm/runtime/hybrid-globalization/collations.ts index 4a87271251f3bc..807a2dbbffa00e 100644 --- a/src/mono/wasm/runtime/hybrid-globalization/collations.ts +++ b/src/mono/wasm/runtime/hybrid-globalization/collations.ts @@ -124,14 +124,14 @@ export function mono_wasm_index_of(culture: MonoStringRef, needlePtr: number, ne // Grapheme segmentation of needle string while (needleIdx < needle.length) { - const needleGrapheme = graphemeSegmenter.next_grapheme(needle, needleIdx); + const needleGrapheme = graphemeSegmenter.nextGrapheme(needle, needleIdx); needleSegments.push(needleGrapheme); needleIdx += needleGrapheme.length; } let srcIdx = 0; while (srcIdx < source.length) { - const srcGrapheme = graphemeSegmenter.next_grapheme(source, srcIdx); + const srcGrapheme = graphemeSegmenter.nextGrapheme(source, srcIdx); srcIdx += srcGrapheme.length; if (!check_match_found(srcGrapheme, needleSegments[0], locale, casePicker)) { @@ -141,7 +141,7 @@ export function mono_wasm_index_of(culture: MonoStringRef, needlePtr: number, ne let j; let srcNextIdx = srcIdx; for (j = 1; j < needleSegments.length; j++) { - const srcGrapheme = graphemeSegmenter.next_grapheme(source, srcNextIdx); + const srcGrapheme = graphemeSegmenter.nextGrapheme(source, srcNextIdx); if (!check_match_found(srcGrapheme, needleSegments[j], locale, casePicker)) { break; diff --git a/src/mono/wasm/runtime/hybrid-globalization/grapheme-segmenter.ts b/src/mono/wasm/runtime/hybrid-globalization/grapheme-segmenter.ts index 47b5fefa504e19..7736151b634583 100644 --- a/src/mono/wasm/runtime/hybrid-globalization/grapheme-segmenter.ts +++ b/src/mono/wasm/runtime/hybrid-globalization/grapheme-segmenter.ts @@ -27,7 +27,7 @@ type SegmentationTypeRaw = { rules: Record } -function replace_variables(variables: Record, input: string): string { +function replaceVariables(variables: Record, input: string): string { const findVarRegex = /\$[A-Za-z0-9_]+/gm; return input.replaceAll(findVarRegex, match => { if (!(match in variables)) { @@ -37,8 +37,8 @@ function replace_variables(variables: Record, input: string): st }); } -function generate_rule_regex (rule: string, variables: Record, after: boolean): RegExp { - return new RegExp(`${after ? "^" : ""}${replace_variables(variables, rule)}${after ? "" : "$"}`); +function generateRegexRule (rule: string, variables: Record, after: boolean): RegExp { + return new RegExp(`${after ? "^" : ""}${replaceVariables(variables, rule)}${after ? "" : "$"}`); } export class GraphemeSegmenter { @@ -47,7 +47,7 @@ export class GraphemeSegmenter { public constructor() { // Process segmentation rules - this.rules = GraphemeSegmenter.prepare_segmanation_rules(SegmentationRules); + this.rules = GraphemeSegmenter.prepareSegmentationRules(SegmentationRules); this.ruleSortedKeys = Object.keys(this.rules).sort((a, b) => Number(a) - Number(b)); } @@ -57,8 +57,8 @@ export class GraphemeSegmenter { * @param startIndex - The starting index. * @returns The next grapheme. */ - public next_grapheme(str: string, startIndex: number): string { - const breakIdx = this.next_grapheme_break(str, startIndex); + public nextGrapheme(str: string, startIndex: number): string { + const breakIdx = this.nextGraphemeBreak(str, startIndex); return str.substring(startIndex, breakIdx); } @@ -69,7 +69,7 @@ export class GraphemeSegmenter { * @param startIndex - The index to start searching from. * @returns The index of the next grapheme break. */ - public next_grapheme_break(str: string, startIndex: number): number { + public nextGraphemeBreak(str: string, startIndex: number): number { if (startIndex < 0) return 0; @@ -84,7 +84,7 @@ export class GraphemeSegmenter { } const curr = String.fromCodePoint(str.codePointAt(i)!); - if (this.is_grapheme_break(prev, curr)) + if (this.isGraphemeBreak(prev, curr)) return i; prev = curr; @@ -93,7 +93,7 @@ export class GraphemeSegmenter { return str.length; } - private is_grapheme_break(previous: string, current: string): boolean { + private isGraphemeBreak(previous: string, current: string): boolean { for (const key of this.ruleSortedKeys) { const {before, after, breaks} = this.rules[key]; // match before and after rules @@ -111,7 +111,7 @@ export class GraphemeSegmenter { return true; } - private static prepare_segmanation_rules(segmentationRules: SegmentationTypeRaw): Record { + private static prepareSegmentationRules(segmentationRules: SegmentationTypeRaw): Record { const preparedRules: Record = {}; for (const key of Object.keys(segmentationRules.rules)) { @@ -119,10 +119,10 @@ export class GraphemeSegmenter { const preparedRule: SegmentationRule = { breaks: ruleValue.breaks, }; if ("before" in ruleValue && ruleValue.before) { - preparedRule.before = generate_rule_regex(ruleValue.before, segmentationRules.variables, false); + preparedRule.before = generateRegexRule(ruleValue.before, segmentationRules.variables, false); } if ("after" in ruleValue && ruleValue.after) { - preparedRule.after = generate_rule_regex(ruleValue.after, segmentationRules.variables, true); + preparedRule.after = generateRegexRule(ruleValue.after, segmentationRules.variables, true); } preparedRules[key] = preparedRule; From 2240a294ef200d5d830e13010c3b412595821624 Mon Sep 17 00:00:00 2001 From: Matous Kozak Date: Tue, 12 Dec 2023 10:46:08 +0100 Subject: [PATCH 09/20] Change collation.ts functions to cammelCase --- .../hybrid-globalization/collations.ts | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/src/mono/wasm/runtime/hybrid-globalization/collations.ts b/src/mono/wasm/runtime/hybrid-globalization/collations.ts index 807a2dbbffa00e..7c1bb5a03da03c 100644 --- a/src/mono/wasm/runtime/hybrid-globalization/collations.ts +++ b/src/mono/wasm/runtime/hybrid-globalization/collations.ts @@ -22,7 +22,7 @@ export function mono_wasm_compare_string(culture: MonoStringRef, str1: number, s const casePicker = (options & 0x1f); const locale = cultureName ? cultureName : undefined; wrap_no_error_root(is_exception, exceptionRoot); - return compare_strings(string1, string2, locale, casePicker); + return compareStrings(string1, string2, locale, casePicker); } catch (ex: any) { wrap_error_root(is_exception, ex, exceptionRoot); @@ -39,19 +39,19 @@ export function mono_wasm_starts_with(culture: MonoStringRef, str1: number, str1 exceptionRoot = mono_wasm_new_external_root(ex_address); try { const cultureName = monoStringToString(cultureRoot); - const prefix = decode_to_clean_string(str2, str2Length); + const prefix = decodeToCleanString(str2, str2Length); // no need to look for an empty string if (prefix.length == 0) return 1; // true - const source = decode_to_clean_string(str1, str1Length); + const source = decodeToCleanString(str1, str1Length); if (source.length < prefix.length) return 0; //false const sourceOfPrefixLength = source.slice(0, prefix.length); const casePicker = (options & 0x1f); const locale = cultureName ? cultureName : undefined; - const result = compare_strings(sourceOfPrefixLength, prefix, locale, casePicker); + const result = compareStrings(sourceOfPrefixLength, prefix, locale, casePicker); wrap_no_error_root(is_exception, exceptionRoot); return result === 0 ? 1 : 0; // equals ? true : false } @@ -70,11 +70,11 @@ export function mono_wasm_ends_with(culture: MonoStringRef, str1: number, str1Le exceptionRoot = mono_wasm_new_external_root(ex_address); try { const cultureName = monoStringToString(cultureRoot); - const suffix = decode_to_clean_string(str2, str2Length); + const suffix = decodeToCleanString(str2, str2Length); if (suffix.length == 0) return 1; // true - const source = decode_to_clean_string(str1, str1Length); + const source = decodeToCleanString(str1, str1Length); const diff = source.length - suffix.length; if (diff < 0) return 0; //false @@ -82,7 +82,7 @@ export function mono_wasm_ends_with(culture: MonoStringRef, str1: number, str1Le const casePicker = (options & 0x1f); const locale = cultureName ? cultureName : undefined; - const result = compare_strings(sourceOfSuffixLength, suffix, locale, casePicker); + const result = compareStrings(sourceOfSuffixLength, suffix, locale, casePicker); wrap_no_error_root(is_exception, exceptionRoot); return result === 0 ? 1 : 0; // equals ? true : false } @@ -102,14 +102,14 @@ export function mono_wasm_index_of(culture: MonoStringRef, needlePtr: number, ne try { const needle = utf16ToString(needlePtr, (needlePtr + 2 * needleLength)); // no need to look for an empty string - if (clean_string(needle).length == 0) { + if (cleanString(needle).length == 0) { wrap_no_error_root(is_exception, exceptionRoot); return fromBeginning ? 0 : srcLength; } const source = utf16ToString(srcPtr, (srcPtr + 2 * srcLength)); // no need to look in an empty string - if (clean_string(source).length == 0) { + if (cleanString(source).length == 0) { wrap_no_error_root(is_exception, exceptionRoot); return fromBeginning ? 0 : srcLength; } @@ -134,7 +134,7 @@ export function mono_wasm_index_of(culture: MonoStringRef, needlePtr: number, ne const srcGrapheme = graphemeSegmenter.nextGrapheme(source, srcIdx); srcIdx += srcGrapheme.length; - if (!check_match_found(srcGrapheme, needleSegments[0], locale, casePicker)) { + if (!checkMatchFound(srcGrapheme, needleSegments[0], locale, casePicker)) { continue; } @@ -143,7 +143,7 @@ export function mono_wasm_index_of(culture: MonoStringRef, needlePtr: number, ne for (j = 1; j < needleSegments.length; j++) { const srcGrapheme = graphemeSegmenter.nextGrapheme(source, srcNextIdx); - if (!check_match_found(srcGrapheme, needleSegments[j], locale, casePicker)) { + if (!checkMatchFound(srcGrapheme, needleSegments[j], locale, casePicker)) { break; } srcNextIdx += srcGrapheme.length; @@ -166,12 +166,12 @@ export function mono_wasm_index_of(culture: MonoStringRef, needlePtr: number, ne exceptionRoot.release(); } - function check_match_found(str1: string, str2: string, locale: string | undefined, casePicker: number): boolean { - return compare_strings(str1, str2, locale, casePicker) === 0; + function checkMatchFound(str1: string, str2: string, locale: string | undefined, casePicker: number): boolean { + return compareStrings(str1, str2, locale, casePicker) === 0; } } -function compare_strings(string1: string, string2: string, locale: string | undefined, casePicker: number): number { +function compareStrings(string1: string, string2: string, locale: string | undefined, casePicker: number): number { switch (casePicker) { case 0: // 0: None - default algorithm for the platform OR @@ -263,12 +263,12 @@ function compare_strings(string1: string, string2: string, locale: string | unde } } -function decode_to_clean_string(strPtr: number, strLen: number) { +function decodeToCleanString(strPtr: number, strLen: number) { const str = utf16ToString(strPtr, (strPtr + 2 * strLen)); - return clean_string(str); + return cleanString(str); } -function clean_string(str: string) { +function cleanString(str: string) { const nStr = str.normalize(); return nStr.replace(/[\u200B-\u200D\uFEFF\0]/g, ""); } From 2b1034b98feb5786af0930bc25835c720047dc2c Mon Sep 17 00:00:00 2001 From: Matous Kozak Date: Fri, 5 Jan 2024 13:57:18 +0100 Subject: [PATCH 10/20] load segmentation rules as static json --- eng/liveBuilds.targets | 3 +- .../Directory.Build.props | 1 + src/mono/browser/browser.proj | 8 +++- src/mono/browser/build/BrowserWasmApp.targets | 1 + src/mono/browser/runtime/assets.ts | 19 ++++++++++ src/mono/browser/runtime/exports.ts | 3 +- .../grapheme-segmenter.ts | 22 ++++++++--- ...ation-rules.ts => segmentation-rules.json} | 38 +++++++++---------- src/mono/browser/runtime/loader/assets.ts | 15 ++++++++ src/mono/browser/runtime/types/index.ts | 5 +++ src/mono/browser/runtime/types/internal.ts | 1 + .../AssetsComputingHelper.cs | 1 + .../BootJsonBuilderHelper.cs | 2 + .../BootJsonData.cs | 6 +++ .../ComputeWasmBuildAssets.cs | 1 + 15 files changed, 96 insertions(+), 30 deletions(-) rename src/mono/browser/runtime/hybrid-globalization/{segmentation-rules.ts => segmentation-rules.json} (98%) diff --git a/eng/liveBuilds.targets b/eng/liveBuilds.targets index 48a7f853f60173..5f3bcc6c356f13 100644 --- a/eng/liveBuilds.targets +++ b/eng/liveBuilds.targets @@ -208,7 +208,8 @@ $(LibrariesNativeArtifactsPath)package.json; $(LibrariesNativeArtifactsPath)dotnet.native.wasm; $(LibrariesNativeArtifactsPath)dotnet.native.js.symbols; - $(LibrariesNativeArtifactsPath)*.dat;" + $(LibrariesNativeArtifactsPath)*.dat; + $(LibrariesNativeArtifactsPath)segmentation-rules.json;" IsNative="true" /> + diff --git a/src/mono/browser/browser.proj b/src/mono/browser/browser.proj index 7e7453a4326494..0b67ca6a31f102 100644 --- a/src/mono/browser/browser.proj +++ b/src/mono/browser/browser.proj @@ -456,6 +456,11 @@ DestinationFolder="$(NativeBinDir)" SkipUnchangedFiles="true" /> + + + diff --git a/src/mono/browser/build/BrowserWasmApp.targets b/src/mono/browser/build/BrowserWasmApp.targets index 7ca72154951a9c..32f2d7271ba49b 100644 --- a/src/mono/browser/build/BrowserWasmApp.targets +++ b/src/mono/browser/build/BrowserWasmApp.targets @@ -110,6 +110,7 @@ + diff --git a/src/mono/browser/runtime/assets.ts b/src/mono/browser/runtime/assets.ts index d7c5c82b4d8311..8a7460a24be71c 100644 --- a/src/mono/browser/runtime/assets.ts +++ b/src/mono/browser/runtime/assets.ts @@ -10,6 +10,7 @@ import { endMeasure, MeasuredBlock, startMeasure } from "./profiler"; import { AssetEntryInternal } from "./types/internal"; import { AssetEntry } from "./types"; import { VoidPtr } from "./types/emscripten"; +import { setSegmentationRulesFromJson } from "./hybrid-globalization/grapheme-segmenter"; // this need to be run only after onRuntimeInitialized event, when the memory is ready export function instantiate_asset(asset: AssetEntry, url: string, bytes: Uint8Array): void { @@ -25,6 +26,7 @@ export function instantiate_asset(asset: AssetEntry, url: string, bytes: Uint8Ar case "dotnetwasm": case "js-module-threads": case "symbols": + case "static-json": // do nothing break; case "resource": @@ -104,6 +106,23 @@ export async function instantiate_symbols_asset(pendingAsset: AssetEntryInternal } } +export async function instantiate_static_json_asset(pendingAsset: AssetEntryInternal): Promise { + try { + const response = await pendingAsset.pendingDownloadInternal!.response; + const json = await response.json(); + + switch (pendingAsset.name) { + case "segmentation-rules.json": + setSegmentationRulesFromJson(json); + break; + default: + throw new Error(`Unknown static json asset: ${pendingAsset.name}`); + } + } catch (error: any) { + mono_log_info(`Error loading static json asset ${pendingAsset.name}: ${JSON.stringify(error)}`); + } +} + export async function wait_for_all_assets() { // wait for all assets in memory await runtimeHelpers.allAssetsInMemory.promise; diff --git a/src/mono/browser/runtime/exports.ts b/src/mono/browser/runtime/exports.ts index 0e594c4cd0d8c0..9ea0794932d137 100644 --- a/src/mono/browser/runtime/exports.ts +++ b/src/mono/browser/runtime/exports.ts @@ -20,7 +20,7 @@ import { mono_bind_static_method } from "./net6-legacy/method-calls"; import { export_binding_api, export_internal_api, export_mono_api } from "./net6-legacy/exports-legacy"; import { initializeLegacyExports } from "./net6-legacy/globals"; import { mono_log_warn, mono_wasm_stringify_as_error_with_stack } from "./logging"; -import { instantiate_asset, instantiate_symbols_asset } from "./assets"; +import { instantiate_asset, instantiate_symbols_asset, instantiate_static_json_asset } from "./assets"; import { jiterpreter_dump_stats } from "./jiterpreter"; import { forceDisposeProxies } from "./gc-handles"; @@ -46,6 +46,7 @@ function initializeExports(globalObjects: GlobalObjects): RuntimeAPI { instantiate_asset, jiterpreter_dump_stats, forceDisposeProxies, + instantiate_static_json_asset, }); const API = export_api(); diff --git a/src/mono/browser/runtime/hybrid-globalization/grapheme-segmenter.ts b/src/mono/browser/runtime/hybrid-globalization/grapheme-segmenter.ts index 7736151b634583..7322443a86ccdb 100644 --- a/src/mono/browser/runtime/hybrid-globalization/grapheme-segmenter.ts +++ b/src/mono/browser/runtime/hybrid-globalization/grapheme-segmenter.ts @@ -7,7 +7,7 @@ * https://github.com/formatjs/formatjs/blob/58d6a7b398d776ca3d2726d72ae1573b65cc3bef/packages/intl-segmenter/src/segmentation-utils.ts */ -import { SegmentationRules } from "./segmentation-rules"; +import { mono_assert } from "../globals"; import { isSurrogate } from "./helpers"; type SegmentationRule = { @@ -27,6 +27,8 @@ type SegmentationTypeRaw = { rules: Record } +let segmentationRules: Record; + function replaceVariables(variables: Record, input: string): string { const findVarRegex = /\$[A-Za-z0-9_]+/gm; return input.replaceAll(findVarRegex, match => { @@ -41,13 +43,21 @@ function generateRegexRule (rule: string, variables: Record, aft return new RegExp(`${after ? "^" : ""}${replaceVariables(variables, rule)}${after ? "" : "$"}`); } +function isSegmentationTypeRaw(obj: any): obj is SegmentationTypeRaw { + return obj.variables != null && obj.rules != null; +} + +export function setSegmentationRulesFromJson(json: string) { + mono_assert(isSegmentationTypeRaw(json), "Provided grapheme segmentation rules are not valid"); + segmentationRules = GraphemeSegmenter.prepareSegmentationRules(json); +} + export class GraphemeSegmenter { - private readonly rules; - private readonly ruleSortedKeys; + private readonly rules: Record; + private readonly ruleSortedKeys: string[]; public constructor() { - // Process segmentation rules - this.rules = GraphemeSegmenter.prepareSegmentationRules(SegmentationRules); + this.rules = segmentationRules; this.ruleSortedKeys = Object.keys(this.rules).sort((a, b) => Number(a) - Number(b)); } @@ -111,7 +121,7 @@ export class GraphemeSegmenter { return true; } - private static prepareSegmentationRules(segmentationRules: SegmentationTypeRaw): Record { + public static prepareSegmentationRules(segmentationRules: SegmentationTypeRaw): Record { const preparedRules: Record = {}; for (const key of Object.keys(segmentationRules.rules)) { diff --git a/src/mono/browser/runtime/hybrid-globalization/segmentation-rules.ts b/src/mono/browser/runtime/hybrid-globalization/segmentation-rules.json similarity index 98% rename from src/mono/browser/runtime/hybrid-globalization/segmentation-rules.ts rename to src/mono/browser/runtime/hybrid-globalization/segmentation-rules.json index 76245c5f86c2d9..f99c632f843178 100644 --- a/src/mono/browser/runtime/hybrid-globalization/segmentation-rules.ts +++ b/src/mono/browser/runtime/hybrid-globalization/segmentation-rules.json @@ -1,24 +1,5 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -// Reference for SegmentationRules: https://github.com/formatjs/formatjs/blob/58d6a7b398d776ca3d2726d72ae1573b65cc3bef/packages/intl-segmenter/src/cldr-segmentation-rules.generated.ts#L953-L1037 -export const SegmentationRules = { +{ "rules": { - "11": { - "after": "$ExtPict", - "before": "$ExtPict$Extend*$ZWJ", - "breaks": false - }, - "12": { - "after": "$RI", - "before": "^($RI$RI)*$RI", - "breaks": false - }, - "13": { - "after": "$RI", - "before": "[^\\uDDE6-\\uDDFF]($RI$RI)*$RI", - "breaks": false - }, "3": { "after": "$LF", "before": "$CR", @@ -51,6 +32,21 @@ export const SegmentationRules = { "after": "($Extend|$ZWJ)", "breaks": false }, + "11": { + "after": "$ExtPict", + "before": "$ExtPict$Extend*$ZWJ", + "breaks": false + }, + "12": { + "after": "$RI", + "before": "^($RI$RI)*$RI", + "breaks": false + }, + "13": { + "after": "$RI", + "before": "[^\\uDDE6-\\uDDFF]($RI$RI)*$RI", + "breaks": false + }, "9.1": { "after": "$SpacingMark", "breaks": false @@ -84,4 +80,4 @@ export const SegmentationRules = { "$Virama": "[\\u094D\\u09CD\\u0ACD\\u0B4D\\u0C4D\\u0D4D]", "$ZWJ": "\\u200D" } -}; +} diff --git a/src/mono/browser/runtime/loader/assets.ts b/src/mono/browser/runtime/loader/assets.ts index 50ad4b4c93573c..b7ebc3cf93324d 100644 --- a/src/mono/browser/runtime/loader/assets.ts +++ b/src/mono/browser/runtime/loader/assets.ts @@ -68,6 +68,7 @@ const skipBufferByAssetTypes: { } = { "dotnetwasm": true, "symbols": true, + "static-json": true, }; const containedInSnapshotByAssetTypes: { @@ -87,6 +88,7 @@ const skipInstantiateByAssetTypes: { ...jsModulesAssetTypes, "dotnetwasm": true, "symbols": true, + "static-json": true, }; export function shouldLoadIcuAsset(asset: AssetEntryInternal): boolean { @@ -224,6 +226,9 @@ export async function mono_download_assets(): Promise { if (asset.behavior === "symbols") { await runtimeHelpers.instantiate_symbols_asset(asset); cleanupAsset(asset); + } else if (asset.behavior === "static-json") { + await runtimeHelpers.instantiate_static_json_asset(asset); + cleanupAsset(asset); } if (skipBufferByAssetTypes[asset.behavior]) { @@ -356,6 +361,16 @@ export function prepareAssets() { }); } } + + if (resources.staticJsonAssets) { + for (const name in resources.staticJsonAssets) { + alwaysLoadedAssets.push({ + name, + hash: resources.staticJsonAssets[name], + behavior: "static-json" + }); + } + } } // FIXME: should we also load Net7 backward compatible `config.configs` in a same way ? diff --git a/src/mono/browser/runtime/types/index.ts b/src/mono/browser/runtime/types/index.ts index 237d49d4971607..30fc022c7371e7 100644 --- a/src/mono/browser/runtime/types/index.ts +++ b/src/mono/browser/runtime/types/index.ts @@ -154,6 +154,7 @@ export interface ResourceGroups { wasmSymbols?: ResourceList; wasmNative: ResourceList; icu?: ResourceList; + staticJsonAssets?: ResourceList; satelliteResources?: { [cultureName: string]: ResourceList }; @@ -300,6 +301,10 @@ export type AssetBehaviors = SingleAssetBehaviors | * The javascript module for threads. */ | "symbols" + /** + * Load static JSON file. + */ + | "static-json" export const enum GlobalizationMode { /** diff --git a/src/mono/browser/runtime/types/internal.ts b/src/mono/browser/runtime/types/internal.ts index 45fcafbc080f98..c6dc55d929cac5 100644 --- a/src/mono/browser/runtime/types/internal.ts +++ b/src/mono/browser/runtime/types/internal.ts @@ -215,6 +215,7 @@ export type RuntimeHelpers = { stringify_as_error_with_stack?: (error: any) => string, instantiate_asset: (asset: AssetEntry, url: string, bytes: Uint8Array) => void, instantiate_symbols_asset: (pendingAsset: AssetEntryInternal) => Promise, + instantiate_static_json_asset: (pendingAsset: AssetEntryInternal) => Promise, jiterpreter_dump_stats?: (x: boolean) => string, forceDisposeProxies: (disposeMethods: boolean, verbose: boolean) => void, } diff --git a/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/AssetsComputingHelper.cs b/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/AssetsComputingHelper.cs index 6ee7d9a5a25ba4..d9a5eda5373ee3 100644 --- a/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/AssetsComputingHelper.cs +++ b/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/AssetsComputingHelper.cs @@ -64,6 +64,7 @@ public static bool ShouldFilterCandidate( ".dat" when invariantGlobalization && fileName.StartsWith("icudt") => "invariant globalization is enabled", ".dat" when loadFullICUData && fileName != "icudt" => "full ICU data is enabled", ".dat" when hybridGlobalization && fileName != "icudt_hybrid" => "hybrid globalization is enabled", + ".json" when !hybridGlobalization && fileName == "segmentation-rules" => "segmentation-rules.json is only used when hybrid globalization is enabled", ".dat" when !string.IsNullOrEmpty(customIcuCandidateFilename) && fileName != customIcuCandidateFilename => "custom icu file either from absolute path or from runtime pack path will be used", ".dat" when IsDefaultIcuMode() && !(icuShardsFromRuntimePack.Any(f => f == fileName)) => "automatic icu shard selection, based on application culture, is enabled", ".json" when fromMonoPackage && (fileName == "wasm-props" || fileName == "package") => $"{fileName}{extension} is not used by Blazor", diff --git a/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/BootJsonBuilderHelper.cs b/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/BootJsonBuilderHelper.cs index 07c05113ac843f..e45c8134372e2f 100644 --- a/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/BootJsonBuilderHelper.cs +++ b/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/BootJsonBuilderHelper.cs @@ -68,6 +68,8 @@ static void AddDictionary(StringBuilder sb, Dictionary? res) return bootConfig.resources.wasmSymbols ??= new(); else if (resourceName.StartsWith("icudt", StringComparison.OrdinalIgnoreCase)) return bootConfig.resources.icu ??= new(); + else if (resourceName.StartsWith("segmentation-rules", StringComparison.OrdinalIgnoreCase) && string.Equals(resourceExtension, ".json", StringComparison.OrdinalIgnoreCase)) + return bootConfig.resources.staticJsonAssets ??= new(); else Log.LogError($"The resource '{resourceName}' is not recognized as any native asset"); diff --git a/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/BootJsonData.cs b/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/BootJsonData.cs index 97bd05110bfbfc..5166b5eab7b22d 100644 --- a/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/BootJsonData.cs +++ b/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/BootJsonData.cs @@ -201,6 +201,12 @@ public class ResourcesData [DataMember(EmitDefaultValue = false)] public List remoteSources { get; set; } + + /// + /// Static JSON assets + /// + [DataMember(EmitDefaultValue = false)] + public ResourceHashesByNameDictionary staticJsonAssets { get; set; } } public enum GlobalizationMode : int diff --git a/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/ComputeWasmBuildAssets.cs b/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/ComputeWasmBuildAssets.cs index f2e4336da3630b..e5c10d67a4a306 100644 --- a/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/ComputeWasmBuildAssets.cs +++ b/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/ComputeWasmBuildAssets.cs @@ -265,6 +265,7 @@ private static void ApplyUniqueMetadataProperties(ITaskItem candidate) case ".wasm": case ".blat": case ".dat" when filename.StartsWith("icudt"): + case ".json" when filename.StartsWith("segmentation-rules"): candidate.SetMetadata("AssetTraitName", "WasmResource"); candidate.SetMetadata("AssetTraitValue", "native"); break; From b9a36c1dabc56a4d93a118065a93c0ff00e62548 Mon Sep 17 00:00:00 2001 From: Matous Kozak Date: Mon, 8 Jan 2024 13:31:46 +0100 Subject: [PATCH 11/20] re-formulate ShouldFilterCandidate condition --- .../AssetsComputingHelper.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/AssetsComputingHelper.cs b/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/AssetsComputingHelper.cs index d9a5eda5373ee3..5ff583b90df6d5 100644 --- a/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/AssetsComputingHelper.cs +++ b/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/AssetsComputingHelper.cs @@ -64,7 +64,7 @@ public static bool ShouldFilterCandidate( ".dat" when invariantGlobalization && fileName.StartsWith("icudt") => "invariant globalization is enabled", ".dat" when loadFullICUData && fileName != "icudt" => "full ICU data is enabled", ".dat" when hybridGlobalization && fileName != "icudt_hybrid" => "hybrid globalization is enabled", - ".json" when !hybridGlobalization && fileName == "segmentation-rules" => "segmentation-rules.json is only used when hybrid globalization is enabled", + ".json" when hybridGlobalization && fileName != "segmentation-rules" => "hybrid globalization is enabled, expecting segmentation-rules.json", ".dat" when !string.IsNullOrEmpty(customIcuCandidateFilename) && fileName != customIcuCandidateFilename => "custom icu file either from absolute path or from runtime pack path will be used", ".dat" when IsDefaultIcuMode() && !(icuShardsFromRuntimePack.Any(f => f == fileName)) => "automatic icu shard selection, based on application culture, is enabled", ".json" when fromMonoPackage && (fileName == "wasm-props" || fileName == "package") => $"{fileName}{extension} is not used by Blazor", From 2b62eb2fca1d3a4fa85998a1740d4c9e0b49a1da Mon Sep 17 00:00:00 2001 From: Matous Kozak Date: Mon, 8 Jan 2024 13:46:42 +0100 Subject: [PATCH 12/20] segmentation-rules.json as ICULibNativeFiles --- src/mono/browser/browser.proj | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/mono/browser/browser.proj b/src/mono/browser/browser.proj index 0b67ca6a31f102..d10cb296d8bb03 100644 --- a/src/mono/browser/browser.proj +++ b/src/mono/browser/browser.proj @@ -357,7 +357,8 @@ + $(ICULibDir)/libicudata.a; + runtime/hybrid-globalization/segmentation-rules.json" /> @@ -455,11 +456,6 @@ @(_WasmArchivedTimezones);" DestinationFolder="$(NativeBinDir)" SkipUnchangedFiles="true" /> - - - From 9ac537f5775853bff554051400b8988edac4dd69 Mon Sep 17 00:00:00 2001 From: Matous Kozak Date: Mon, 8 Jan 2024 15:03:47 +0100 Subject: [PATCH 13/20] use segmentation-rules instead of static-json --- src/mono/browser/runtime/assets.ts | 13 +++------- src/mono/browser/runtime/exports.ts | 4 ++-- src/mono/browser/runtime/loader/assets.ts | 24 ++++++++----------- src/mono/browser/runtime/types/index.ts | 5 ++-- src/mono/browser/runtime/types/internal.ts | 2 +- .../BootJsonBuilderHelper.cs | 2 +- .../BootJsonData.cs | 6 ----- 7 files changed, 19 insertions(+), 37 deletions(-) diff --git a/src/mono/browser/runtime/assets.ts b/src/mono/browser/runtime/assets.ts index 8a7460a24be71c..bd88949b6234c5 100644 --- a/src/mono/browser/runtime/assets.ts +++ b/src/mono/browser/runtime/assets.ts @@ -26,7 +26,7 @@ export function instantiate_asset(asset: AssetEntry, url: string, bytes: Uint8Ar case "dotnetwasm": case "js-module-threads": case "symbols": - case "static-json": + case "segmentation-rules": // do nothing break; case "resource": @@ -106,18 +106,11 @@ export async function instantiate_symbols_asset(pendingAsset: AssetEntryInternal } } -export async function instantiate_static_json_asset(pendingAsset: AssetEntryInternal): Promise { +export async function instantiate_segmentation_rules_asset(pendingAsset: AssetEntryInternal): Promise { try { const response = await pendingAsset.pendingDownloadInternal!.response; const json = await response.json(); - - switch (pendingAsset.name) { - case "segmentation-rules.json": - setSegmentationRulesFromJson(json); - break; - default: - throw new Error(`Unknown static json asset: ${pendingAsset.name}`); - } + setSegmentationRulesFromJson(json); } catch (error: any) { mono_log_info(`Error loading static json asset ${pendingAsset.name}: ${JSON.stringify(error)}`); } diff --git a/src/mono/browser/runtime/exports.ts b/src/mono/browser/runtime/exports.ts index 9ea0794932d137..35e03cf5db1a44 100644 --- a/src/mono/browser/runtime/exports.ts +++ b/src/mono/browser/runtime/exports.ts @@ -20,7 +20,7 @@ import { mono_bind_static_method } from "./net6-legacy/method-calls"; import { export_binding_api, export_internal_api, export_mono_api } from "./net6-legacy/exports-legacy"; import { initializeLegacyExports } from "./net6-legacy/globals"; import { mono_log_warn, mono_wasm_stringify_as_error_with_stack } from "./logging"; -import { instantiate_asset, instantiate_symbols_asset, instantiate_static_json_asset } from "./assets"; +import { instantiate_asset, instantiate_symbols_asset, instantiate_segmentation_rules_asset } from "./assets"; import { jiterpreter_dump_stats } from "./jiterpreter"; import { forceDisposeProxies } from "./gc-handles"; @@ -46,7 +46,7 @@ function initializeExports(globalObjects: GlobalObjects): RuntimeAPI { instantiate_asset, jiterpreter_dump_stats, forceDisposeProxies, - instantiate_static_json_asset, + instantiate_segmentation_rules_asset, }); const API = export_api(); diff --git a/src/mono/browser/runtime/loader/assets.ts b/src/mono/browser/runtime/loader/assets.ts index b7ebc3cf93324d..38eedf2777a3c9 100644 --- a/src/mono/browser/runtime/loader/assets.ts +++ b/src/mono/browser/runtime/loader/assets.ts @@ -68,7 +68,7 @@ const skipBufferByAssetTypes: { } = { "dotnetwasm": true, "symbols": true, - "static-json": true, + "segmentation-rules": true, }; const containedInSnapshotByAssetTypes: { @@ -88,7 +88,7 @@ const skipInstantiateByAssetTypes: { ...jsModulesAssetTypes, "dotnetwasm": true, "symbols": true, - "static-json": true, + "segmentation-rules": true, }; export function shouldLoadIcuAsset(asset: AssetEntryInternal): boolean { @@ -226,8 +226,8 @@ export async function mono_download_assets(): Promise { if (asset.behavior === "symbols") { await runtimeHelpers.instantiate_symbols_asset(asset); cleanupAsset(asset); - } else if (asset.behavior === "static-json") { - await runtimeHelpers.instantiate_static_json_asset(asset); + } else if (asset.behavior === "segmentation-rules") { + await runtimeHelpers.instantiate_segmentation_rules_asset(asset); cleanupAsset(asset); } @@ -348,6 +348,12 @@ export function prepareAssets() { behavior: "icu", loadRemote: true }); + } else if (name === "segmentation-rules.json") { + alwaysLoadedAssets.push({ + name, + hash: resources.icu[name], + behavior: "segmentation-rules", + }); } } } @@ -361,16 +367,6 @@ export function prepareAssets() { }); } } - - if (resources.staticJsonAssets) { - for (const name in resources.staticJsonAssets) { - alwaysLoadedAssets.push({ - name, - hash: resources.staticJsonAssets[name], - behavior: "static-json" - }); - } - } } // FIXME: should we also load Net7 backward compatible `config.configs` in a same way ? diff --git a/src/mono/browser/runtime/types/index.ts b/src/mono/browser/runtime/types/index.ts index 30fc022c7371e7..c47d4bd2bd72a0 100644 --- a/src/mono/browser/runtime/types/index.ts +++ b/src/mono/browser/runtime/types/index.ts @@ -154,7 +154,6 @@ export interface ResourceGroups { wasmSymbols?: ResourceList; wasmNative: ResourceList; icu?: ResourceList; - staticJsonAssets?: ResourceList; satelliteResources?: { [cultureName: string]: ResourceList }; @@ -302,9 +301,9 @@ export type AssetBehaviors = SingleAssetBehaviors | */ | "symbols" /** - * Load static JSON file. + * Load segmentation rules file for Hybrid Globalization. */ - | "static-json" + | "segmentation-rules" export const enum GlobalizationMode { /** diff --git a/src/mono/browser/runtime/types/internal.ts b/src/mono/browser/runtime/types/internal.ts index c6dc55d929cac5..c97d4c98f0211b 100644 --- a/src/mono/browser/runtime/types/internal.ts +++ b/src/mono/browser/runtime/types/internal.ts @@ -215,7 +215,7 @@ export type RuntimeHelpers = { stringify_as_error_with_stack?: (error: any) => string, instantiate_asset: (asset: AssetEntry, url: string, bytes: Uint8Array) => void, instantiate_symbols_asset: (pendingAsset: AssetEntryInternal) => Promise, - instantiate_static_json_asset: (pendingAsset: AssetEntryInternal) => Promise, + instantiate_segmentation_rules_asset: (pendingAsset: AssetEntryInternal) => Promise, jiterpreter_dump_stats?: (x: boolean) => string, forceDisposeProxies: (disposeMethods: boolean, verbose: boolean) => void, } diff --git a/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/BootJsonBuilderHelper.cs b/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/BootJsonBuilderHelper.cs index e45c8134372e2f..7226f763565efd 100644 --- a/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/BootJsonBuilderHelper.cs +++ b/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/BootJsonBuilderHelper.cs @@ -69,7 +69,7 @@ static void AddDictionary(StringBuilder sb, Dictionary? res) else if (resourceName.StartsWith("icudt", StringComparison.OrdinalIgnoreCase)) return bootConfig.resources.icu ??= new(); else if (resourceName.StartsWith("segmentation-rules", StringComparison.OrdinalIgnoreCase) && string.Equals(resourceExtension, ".json", StringComparison.OrdinalIgnoreCase)) - return bootConfig.resources.staticJsonAssets ??= new(); + return bootConfig.resources.icu ??= new(); else Log.LogError($"The resource '{resourceName}' is not recognized as any native asset"); diff --git a/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/BootJsonData.cs b/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/BootJsonData.cs index 5166b5eab7b22d..97bd05110bfbfc 100644 --- a/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/BootJsonData.cs +++ b/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/BootJsonData.cs @@ -201,12 +201,6 @@ public class ResourcesData [DataMember(EmitDefaultValue = false)] public List remoteSources { get; set; } - - /// - /// Static JSON assets - /// - [DataMember(EmitDefaultValue = false)] - public ResourceHashesByNameDictionary staticJsonAssets { get; set; } } public enum GlobalizationMode : int From 806c5667e49461ba3808ca0225cd548da41f4550 Mon Sep 17 00:00:00 2001 From: Matous Kozak Date: Mon, 8 Jan 2024 16:45:17 +0100 Subject: [PATCH 14/20] use full-path for segmentation-rules assset --- src/mono/browser/browser.proj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mono/browser/browser.proj b/src/mono/browser/browser.proj index d10cb296d8bb03..6e782f7b913f94 100644 --- a/src/mono/browser/browser.proj +++ b/src/mono/browser/browser.proj @@ -358,7 +358,7 @@ + $(MSBuildThisFileDirectory)runtime/hybrid-globalization/segmentation-rules.json" /> From 6ec53cd2cf4f61d9cc29874cf577e9642bb052aa Mon Sep 17 00:00:00 2001 From: Matous Kozak Date: Wed, 10 Jan 2024 20:37:33 +0100 Subject: [PATCH 15/20] simplify segmentation-rules include --- src/mono/browser/browser.proj | 5 ++--- src/mono/browser/build/BrowserWasmApp.targets | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/mono/browser/browser.proj b/src/mono/browser/browser.proj index 6e782f7b913f94..8089f9ab10e128 100644 --- a/src/mono/browser/browser.proj +++ b/src/mono/browser/browser.proj @@ -358,7 +358,7 @@ + $(BrowserProjectRoot)runtime/hybrid-globalization/segmentation-rules.json" /> @@ -482,8 +482,7 @@ $(NativeBinDir)dotnet.d.ts; $(NativeBinDir)dotnet-legacy.d.ts; $(NativeBinDir)package.json; - $(NativeBinDir)dotnet.native.wasm; - $(NativeBinDir)segmentation-rules.json;" + $(NativeBinDir)dotnet.native.wasm;" DestinationFolder="$(MicrosoftNetCoreAppRuntimePackNativeDir)" SkipUnchangedFiles="true" /> diff --git a/src/mono/browser/build/BrowserWasmApp.targets b/src/mono/browser/build/BrowserWasmApp.targets index 32f2d7271ba49b..9dd40ec65cc642 100644 --- a/src/mono/browser/build/BrowserWasmApp.targets +++ b/src/mono/browser/build/BrowserWasmApp.targets @@ -104,13 +104,13 @@ <_HybridGlobalizationDataFiles Include="$(MicrosoftNetCoreAppRuntimePackRidNativeDir)icudt_hybrid.dat"/> + <_HybridGlobalizationDataFiles Include="$(MicrosoftNetCoreAppRuntimePackRidNativeDir)segmentation-rules.json"/> <_IcuAvailableDataFiles Include="$(MicrosoftNetCoreAppRuntimePackRidNativeDir)icudt_*" Exclude="@(_HybridGlobalizationDataFiles);$(_WasmIcuDataFileName)"/> - + - From 0199ab62f2a547cf5b139f0dfbd152d6519d6773 Mon Sep 17 00:00:00 2001 From: Matous Kozak Date: Thu, 11 Jan 2024 12:11:26 +0100 Subject: [PATCH 16/20] reverse filter-out condition --- .../AssetsComputingHelper.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/AssetsComputingHelper.cs b/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/AssetsComputingHelper.cs index 5ff583b90df6d5..8e42fc982ce24a 100644 --- a/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/AssetsComputingHelper.cs +++ b/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/AssetsComputingHelper.cs @@ -64,7 +64,7 @@ public static bool ShouldFilterCandidate( ".dat" when invariantGlobalization && fileName.StartsWith("icudt") => "invariant globalization is enabled", ".dat" when loadFullICUData && fileName != "icudt" => "full ICU data is enabled", ".dat" when hybridGlobalization && fileName != "icudt_hybrid" => "hybrid globalization is enabled", - ".json" when hybridGlobalization && fileName != "segmentation-rules" => "hybrid globalization is enabled, expecting segmentation-rules.json", + ".json" when !hybridGlobalization && fileName == "segmentation-rules" => "segmentation-rules.json file is only used when hybrid globalization is enabled", ".dat" when !string.IsNullOrEmpty(customIcuCandidateFilename) && fileName != customIcuCandidateFilename => "custom icu file either from absolute path or from runtime pack path will be used", ".dat" when IsDefaultIcuMode() && !(icuShardsFromRuntimePack.Any(f => f == fileName)) => "automatic icu shard selection, based on application culture, is enabled", ".json" when fromMonoPackage && (fileName == "wasm-props" || fileName == "package") => $"{fileName}{extension} is not used by Blazor", From 093cf63bf489a666e12c3592e580efe259b4e1a3 Mon Sep 17 00:00:00 2001 From: Matous Kozak Date: Thu, 11 Jan 2024 15:22:19 +0100 Subject: [PATCH 17/20] fix code style --- src/mono/browser/browser.proj | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mono/browser/browser.proj b/src/mono/browser/browser.proj index 8089f9ab10e128..9b962a7efabe4d 100644 --- a/src/mono/browser/browser.proj +++ b/src/mono/browser/browser.proj @@ -456,7 +456,7 @@ @(_WasmArchivedTimezones);" DestinationFolder="$(NativeBinDir)" SkipUnchangedFiles="true" /> - + From 914986f2dad036077dba8ba000ea257062346780 Mon Sep 17 00:00:00 2001 From: Matous Kozak Date: Fri, 12 Jan 2024 08:42:22 +0100 Subject: [PATCH 18/20] refactor resource target condition --- .../BootJsonBuilderHelper.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/BootJsonBuilderHelper.cs b/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/BootJsonBuilderHelper.cs index 7226f763565efd..17760e84aad065 100644 --- a/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/BootJsonBuilderHelper.cs +++ b/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/BootJsonBuilderHelper.cs @@ -68,7 +68,7 @@ static void AddDictionary(StringBuilder sb, Dictionary? res) return bootConfig.resources.wasmSymbols ??= new(); else if (resourceName.StartsWith("icudt", StringComparison.OrdinalIgnoreCase)) return bootConfig.resources.icu ??= new(); - else if (resourceName.StartsWith("segmentation-rules", StringComparison.OrdinalIgnoreCase) && string.Equals(resourceExtension, ".json", StringComparison.OrdinalIgnoreCase)) + else if (resourceName.Equals("segmentation-rules.json", StringComparison.OrdinalIgnoreCase)) return bootConfig.resources.icu ??= new(); else Log.LogError($"The resource '{resourceName}' is not recognized as any native asset"); From 8743bf407f3d6adca495a718e3dbd0b053435f9a Mon Sep 17 00:00:00 2001 From: Matous Kozak Date: Fri, 12 Jan 2024 08:42:57 +0100 Subject: [PATCH 19/20] add segmentation-rules.json to expected ICU assets --- src/mono/wasm/Wasm.Build.Tests/ProjectProviderBase.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mono/wasm/Wasm.Build.Tests/ProjectProviderBase.cs b/src/mono/wasm/Wasm.Build.Tests/ProjectProviderBase.cs index 50dac9c67a1e7a..b2c53298b12770 100644 --- a/src/mono/wasm/Wasm.Build.Tests/ProjectProviderBase.cs +++ b/src/mono/wasm/Wasm.Build.Tests/ProjectProviderBase.cs @@ -361,6 +361,7 @@ public void AssertIcuAssets(AssertBundleOptionsBase assertOptions) break; case GlobalizationMode.Hybrid: expected.Add("icudt_hybrid.dat"); + expected.Add("segmentation-rules.json"); break; case GlobalizationMode.PredefinedIcu: if (string.IsNullOrEmpty(assertOptions.PredefinedIcudt)) From e740ff37c635f761accff71dd0cb47e8b84c47da Mon Sep 17 00:00:00 2001 From: Matous Kozak Date: Fri, 12 Jan 2024 11:05:14 +0100 Subject: [PATCH 20/20] add segmentation-rules.json to expected assets fix --- src/mono/wasm/Wasm.Build.Tests/ProjectProviderBase.cs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mono/wasm/Wasm.Build.Tests/ProjectProviderBase.cs b/src/mono/wasm/Wasm.Build.Tests/ProjectProviderBase.cs index b2c53298b12770..7e1a7e0ed2e557 100644 --- a/src/mono/wasm/Wasm.Build.Tests/ProjectProviderBase.cs +++ b/src/mono/wasm/Wasm.Build.Tests/ProjectProviderBase.cs @@ -381,6 +381,8 @@ public void AssertIcuAssets(AssertBundleOptionsBase assertOptions) } IEnumerable actual = Directory.EnumerateFiles(assertOptions.BinFrameworkDir, "icudt*dat"); + if (assertOptions.GlobalizationMode == GlobalizationMode.Hybrid) + actual = actual.Union(Directory.EnumerateFiles(assertOptions.BinFrameworkDir, "segmentation-rules.json")); AssertFilesOnDisk(expected, actual); if (assertOptions.GlobalizationMode is GlobalizationMode.PredefinedIcu) TestUtils.AssertSameFile(assertOptions.PredefinedIcudt!, actual.Single());