Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
tweaks
  • Loading branch information
gfx committed Mar 3, 2023
commit d6c13193d9bf87eafb7a0c91aa417a85165aa8eb
4 changes: 2 additions & 2 deletions benchmark/decode-string.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import { utf8EncodeJs, utf8Count, utf8DecodeJs, utf8DecodeTD } from "../src/util
import Benchmark from "benchmark";

for (const baseStr of ["A", "あ", "🌏"]) {
const dataSet = [10, 100, 200, 1_000, 10_000, 100_000].map((n) => {
const dataSet = [10, 100, 500, 1_000].map((n) => {
return baseStr.repeat(n);
});

Expand All @@ -14,7 +14,7 @@ for (const baseStr of ["A", "あ", "🌏"]) {
const bytes = new Uint8Array(new ArrayBuffer(byteLength));
utf8EncodeJs(str, bytes, 0);

console.log(`\n## string "${baseStr}" x ${str.length} (byteLength=${byteLength})\n`);
console.log(`\n## string "${baseStr}" (strLength=${str.length}, byteLength=${byteLength})\n`);

const suite = new Benchmark.Suite();

Expand Down
4 changes: 2 additions & 2 deletions benchmark/encode-string.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@ import { utf8EncodeJs, utf8Count, utf8EncodeTE } from "../src/utils/utf8";
import Benchmark from "benchmark";

for (const baseStr of ["A", "あ", "🌏"]) {
const dataSet = [10, 100, 200, 1_000, 10_000, 100_000].map((n) => {
const dataSet = [10, 30, 50, 100].map((n) => {
return baseStr.repeat(n);
});

for (const str of dataSet) {
const byteLength = utf8Count(str);
const buffer = new Uint8Array(byteLength);

console.log(`\n## string "${baseStr}" x ${str.length} (byteLength=${byteLength})\n`);
console.log(`\n## string "${baseStr}" (strLength=${str.length}, byteLength=${byteLength})\n`);

const suite = new Benchmark.Suite();

Expand Down
12 changes: 9 additions & 3 deletions src/utils/utf8.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
/* eslint-disable @typescript-eslint/no-unnecessary-condition */
import { UINT32_MAX } from "./int";

export function utf8Count(str: string): number {
const strLength = str.length;
Expand Down Expand Up @@ -88,9 +86,14 @@ export function utf8EncodeJs(str: string, output: Uint8Array, outputOffset: numb
// https://encoding.spec.whatwg.org/
// and available in all the modern browsers:
// https://caniuse.com/textencoder
// They are available in Node.js since v12 LTS as well:
// https://nodejs.org/api/globals.html#textencoder

const sharedTextEncoder = new TextEncoder();
const TEXT_ENCODER_THRESHOLD = 200;

// This threshold should be determined by benchmarking, which might vary in engines and input data.
// Run `npx ts-node benchmark/encode-string.ts` for details.
const TEXT_ENCODER_THRESHOLD = 50;

export function utf8EncodeTE(str: string, output: Uint8Array, outputOffset: number): void {
sharedTextEncoder.encodeInto(str, output.subarray(outputOffset));
Expand Down Expand Up @@ -156,6 +159,9 @@ export function utf8DecodeJs(bytes: Uint8Array, inputOffset: number, byteLength:
}

const sharedTextDecoder = new TextDecoder();

// This threshold should be determined by benchmarking, which might vary in engines and input data.
// Run `npx ts-node benchmark/decode-string.ts` for details.
const TEXT_DECODER_THRESHOLD = 200;

export function utf8DecodeTD(bytes: Uint8Array, inputOffset: number, byteLength: number): string {
Expand Down