From 6ba86a112e55a16e2bc090d12bf9717185f19286 Mon Sep 17 00:00:00 2001 From: Kevin Van Cott Date: Wed, 10 Jun 2026 08:19:27 -0500 Subject: [PATCH] fix: alphanumeric sort fix and perf improvement --- .../row-sorting/rowSortingFeature.utils.ts | 14 ++- packages/table-core/src/fns/sortFns.ts | 43 +++++-- .../rowSortingFeature.utils.test.ts | 111 ++++++++++++++++++ .../table-core/tests/unit/fns/sortFns.test.ts | 104 ++++++++++++++++ perf.md | 65 +++++++++- 5 files changed, 316 insertions(+), 21 deletions(-) create mode 100644 packages/table-core/tests/unit/features/row-sorting/rowSortingFeature.utils.test.ts diff --git a/packages/table-core/src/features/row-sorting/rowSortingFeature.utils.ts b/packages/table-core/src/features/row-sorting/rowSortingFeature.utils.ts index 3ebf0763c0..b8a4c94afb 100644 --- a/packages/table-core/src/features/row-sorting/rowSortingFeature.utils.ts +++ b/packages/table-core/src/features/row-sorting/rowSortingFeature.utils.ts @@ -89,8 +89,6 @@ export function column_getAutoSortFn< const sortFns: Record> | undefined = column.table._rowModelFns.sortFns - let sortFn: SortFn | undefined - const firstRows = column.table.getFilteredRowModel().flatRows.slice(0, 10) let isString = false @@ -99,23 +97,27 @@ export function column_getAutoSortFn< const value = firstRows[i]!.getValue(column.id) if (Object.prototype.toString.call(value) === '[object Date]') { - sortFn = sortFns?.datetime + if (sortFns?.datetime) { + return sortFns.datetime + } } if (typeof value === 'string') { isString = true if (value.split(reSplitAlphaNumeric).length > 1) { - sortFn = sortFns?.alphanumeric + if (sortFns?.alphanumeric) { + return sortFns.alphanumeric + } } } } if (isString) { - sortFn = sortFns?.text + return sortFns?.text ?? sortFn_basic } - return sortFn ?? sortFn_basic + return sortFn_basic } /** diff --git a/packages/table-core/src/fns/sortFns.ts b/packages/table-core/src/fns/sortFns.ts index 84889cb934..80cad617dd 100644 --- a/packages/table-core/src/fns/sortFns.ts +++ b/packages/table-core/src/fns/sortFns.ts @@ -151,10 +151,8 @@ function toString(a: any) { // It handles numbers, mixed alphanumeric combinations, and even // null, undefined, and Infinity function compareAlphanumeric(aStr: string, bStr: string) { - // Split on number groups, but keep the delimiter - // Then remove falsey split values - const a = aStr.split(reSplitAlphaNumeric).filter(Boolean) - const b = bStr.split(reSplitAlphaNumeric).filter(Boolean) + const a = aStr.split(reSplitAlphaNumeric) + const b = bStr.split(reSplitAlphaNumeric) let ai = 0 let bi = 0 @@ -162,16 +160,29 @@ function compareAlphanumeric(aStr: string, bStr: string) { const bLen = b.length while (ai < aLen && bi < bLen) { + // Skip the empty boundary chunks that .filter(Boolean) used to remove + if (!a[ai]) { + ai++ + continue + } + if (!b[bi]) { + bi++ + continue + } + const aa = a[ai++]! const bb = b[bi++]! + // Chunks are either all-digit (parseInt always succeeds) or digit-free + // (parseInt is always NaN), so NaN-ness fully classifies each chunk const an = parseInt(aa, 10) const bn = parseInt(bb, 10) - const combo = [an, bn].sort() + const aIsNaN = isNaN(an) + const bIsNaN = isNaN(bn) // Both are string - if (isNaN(combo[0]!)) { + if (aIsNaN && bIsNaN) { if (aa > bb) { return 1 } @@ -181,9 +192,9 @@ function compareAlphanumeric(aStr: string, bStr: string) { continue } - // One is a string, one is a number - if (isNaN(combo[1]!)) { - return isNaN(an) ? -1 : 1 + // One is a string, one is a number — the string chunk sorts first + if (aIsNaN || bIsNaN) { + return aIsNaN ? -1 : 1 } // Both are numbers @@ -195,7 +206,19 @@ function compareAlphanumeric(aStr: string, bStr: string) { } } - return aLen - ai - (bLen - bi) + // One side is exhausted — compare the counts of remaining non-empty chunks + let remaining = 0 + for (; ai < aLen; ai++) { + if (a[ai]) { + remaining++ + } + } + for (; bi < bLen; bi++) { + if (b[bi]) { + remaining-- + } + } + return remaining } // Exports diff --git a/packages/table-core/tests/unit/features/row-sorting/rowSortingFeature.utils.test.ts b/packages/table-core/tests/unit/features/row-sorting/rowSortingFeature.utils.test.ts new file mode 100644 index 0000000000..d4bf3546dd --- /dev/null +++ b/packages/table-core/tests/unit/features/row-sorting/rowSortingFeature.utils.test.ts @@ -0,0 +1,111 @@ +import { describe, expect, it } from 'vitest' +import { + constructTable, + coreFeatures, + rowSortingFeature, + sortFn_alphanumeric, + sortFn_basic, + sortFn_datetime, + sortFn_text, + sortFns, + tableFeatures, +} from '../../../../src' +import { column_getAutoSortFn } from '../../../../src/static-functions' +import { storeReactivityBindings } from '../../../../src/store-reactivity-bindings' +import type { ColumnDef } from '../../../../src' + +type Sample = { + plainText: string + alphaNum: string + createdAt: Date + amount: number +} + +const features = tableFeatures({ ...coreFeatures, rowSortingFeature }) + +const sampleKeys: Array = [ + 'plainText', + 'alphaNum', + 'createdAt', + 'amount', +] + +function generateAutoSortTestTable(data: Array) { + const columns: Array> = + sampleKeys.map((key) => ({ accessorKey: key, id: key })) + + const table = constructTable({ + data, + columns, + features: { + ...features, + coreReativityFeature: storeReactivityBindings(), + }, + }) + + // Normally assigned by createSortedRowModel when the sorted row model is wired up + table._rowModelFns.sortFns = sortFns + + return table +} + +describe('column_getAutoSortFn', () => { + const data: Array = [ + { + plainText: 'apple', + alphaNum: 'item1', + createdAt: new Date('2024-01-01'), + amount: 1, + }, + { + plainText: 'banana', + alphaNum: 'item10', + createdAt: new Date('2024-02-01'), + amount: 2, + }, + ] + + it('selects datetime for Date values', () => { + const table = generateAutoSortTestTable(data) + const column = table.getColumn('createdAt')! + + expect(column_getAutoSortFn(column)).toBe(sortFn_datetime) + }) + + it('selects alphanumeric for strings mixing text and numbers', () => { + // Regression: the text fallback used to clobber the alphanumeric match + const table = generateAutoSortTestTable(data) + const column = table.getColumn('alphaNum')! + + expect(column_getAutoSortFn(column)).toBe(sortFn_alphanumeric) + }) + + it('selects text for plain strings', () => { + const table = generateAutoSortTestTable(data) + const column = table.getColumn('plainText')! + + expect(column_getAutoSortFn(column)).toBe(sortFn_text) + }) + + it('falls back to basic for non-string, non-date values', () => { + const table = generateAutoSortTestTable(data) + const column = table.getColumn('amount')! + + expect(column_getAutoSortFn(column)).toBe(sortFn_basic) + }) + + it('falls back to basic when no rows exist', () => { + const table = generateAutoSortTestTable([]) + const column = table.getColumn('plainText')! + + expect(column_getAutoSortFn(column)).toBe(sortFn_basic) + }) + + it('falls back to text when alphanumeric is not registered', () => { + const table = generateAutoSortTestTable(data) + table._rowModelFns.sortFns = { text: sortFn_text } + const column = table.getColumn('alphaNum')! + + expect(column_getAutoSortFn(column)).toBe(sortFn_text) + }) +}) diff --git a/packages/table-core/tests/unit/fns/sortFns.test.ts b/packages/table-core/tests/unit/fns/sortFns.test.ts index e4d62441c3..36906f9c71 100644 --- a/packages/table-core/tests/unit/fns/sortFns.test.ts +++ b/packages/table-core/tests/unit/fns/sortFns.test.ts @@ -140,6 +140,110 @@ describe('sortFn_basic', () => { }) }) +describe('compareAlphanumeric boundary chunks (filter-removal regression)', () => { + // The inline empty-chunk skipping must match the old `.filter(Boolean)` + // behavior; empty chunks occur where a string starts/ends with digits. + + it('handles leading digit groups', () => { + // Number chunk vs string chunk at position 0 — string sorts first + expect(cmp(sortFn_alphanumeric, '1abc', 'abc1')).toBe(1) + expect(cmp(sortFn_alphanumeric, 'abc1', '1abc')).toBe(-1) + expect(cmp(sortFn_alphanumeric, '2abc', '10abc')).toBe(-1) + }) + + it('handles pure digit strings (leading and trailing empties)', () => { + expect(cmp(sortFn_alphanumeric, '12', '12')).toBe(0) + expect(cmp(sortFn_alphanumeric, '2', '10')).toBe(-1) + expect(cmp(sortFn_alphanumeric, '10', '2')).toBe(1) + }) + + it('treats leading zeros as numerically equal', () => { + expect(cmp(sortFn_alphanumeric, 'item007', 'item7')).toBe(0) + }) + + it('counts only non-empty chunks in the prefix tail', () => { + // a exhausts past its trailing empty; b has one real chunk remaining + expect(cmp(sortFn_alphanumeric, '1', '1abc')).toBe(-1) + expect(cmp(sortFn_alphanumeric, '1abc', '1')).toBe(1) + expect(cmp(sortFn_alphanumeric, 'abc', 'abc123')).toBe(-1) + }) + + it('handles empty vs non-empty strings', () => { + expect(cmp(sortFn_alphanumeric, '', 'a')).toBe(-1) + expect(cmp(sortFn_alphanumeric, 'a', '')).toBe(1) + }) + + it('matches the previous filter-based implementation across all vocab pairs', () => { + // Reference: the implementation before the allocation refactor, verbatim + const reference = (aStr: string, bStr: string): number => { + const a = aStr.split(/([0-9]+)/gm).filter(Boolean) + const b = bStr.split(/([0-9]+)/gm).filter(Boolean) + let ai = 0 + let bi = 0 + const aLen = a.length + const bLen = b.length + while (ai < aLen && bi < bLen) { + const aa = a[ai++]! + const bb = b[bi++]! + const an = parseInt(aa, 10) + const bn = parseInt(bb, 10) + const combo = [an, bn].sort() + if (isNaN(combo[0]!)) { + if (aa > bb) return 1 + if (bb > aa) return -1 + continue + } + if (isNaN(combo[1]!)) { + return isNaN(an) ? -1 : 1 + } + if (an > bn) return 1 + if (bn > an) return -1 + } + return aLen - ai - (bLen - bi) + } + + const vocab = [ + '', + 'a', + 'ab', + '1', + '0', + '12', + '007', + '7', + '1a', + 'a1', + '1a1', + 'a1a', + '1a2b', + 'a1b2', + '12ab34', + 'ab12cd', + 'item2', + 'item10', + '2item', + '10item', + 'a007b', + 'a7b', + 'nan', + 'infinity', + '999999999999999999999999999999', + ] + + for (const a of vocab) { + for (const b of vocab) { + // Case-sensitive variant is a pure passthrough for string inputs + const actual = sortFn_alphanumericCaseSensitive( + makeRow(a), + makeRow(b), + 'col', + ) + expect(actual, `compare("${a}", "${b}")`).toBe(reference(a, b)) + } + } + }) +}) + describe('sortFn_datetime', () => { it('returns 0 for equal dates', () => { const d = new Date(2026, 1, 1) diff --git a/perf.md b/perf.md index dad016c99d..a7c96565e8 100644 --- a/perf.md +++ b/perf.md @@ -93,11 +93,11 @@ Typecheck verified clean after the sweep (`pnpm tsc --noEmit` passes). ## Progress -- **Total findings:** 60 -- **Done `[x]`:** 17 +- **Total findings:** 61 +- **Done `[x]`:** 19 - **Partial `[~]`:** 2 - **Skipped `[-]`:** 5 -- **Not started `[ ]`:** 36 +- **Not started `[ ]`:** 35 _(Update these counters as you go.)_ @@ -1858,8 +1858,8 @@ Both walk the `sorting` array; called for every visible sortable column on every ## 52. `compareAlphanumeric` allocates 2 arrays per comparison — Score: 6 -**Status:** `[ ]` not started -**Implementation note:** _(none)_ +**Status:** `[x]` done +**Implementation note:** Went further than proposed — the audit undercounted the allocations. Besides the two `.filter(Boolean)` arrays per comparison, every chunk-pair iteration allocated and **default-sorted** a fresh `[an, bn]` array (`const combo = [an, bn].sort()`), paying array allocation + sort dispatch + number→string coercion just to classify NaN-ness. Since chunks from `reSplitAlphaNumeric` are either all-digit (`parseInt` always succeeds) or digit-free (`parseInt` always `NaN`), two plain `isNaN` checks replace the combo entirely (`aIsNaN && bIsNaN` → both-string branch, `aIsNaN || bIsNaN` → mixed branch). The `.filter(Boolean)` drop required two semantic guards: (1) empty chunks (which only occur at split-array boundaries) are skipped inline at the top of the loop; (2) the prefix tail return counts only **non-empty** remaining chunks instead of raw `aLen - ai - (bLen - bi)`. Net per comparison: 3+k array allocations → 1 per side (the unavoidable `.split()`), where k = chunk pairs visited. Measured on a 10k-row sort of mixed `itemNNNN-revNN` strings (Node, median of 7 runs): **36.5ms → 20.7ms (~43% faster)**. Equivalence verified two ways: a vocab×vocab differential test against the verbatim old implementation (625 pairs covering boundary digits, leading zeros, pure digits, empties, 30-digit overflow) plus targeted boundary-chunk unit tests, all in `tests/unit/fns/sortFns.test.ts`. Unblocked by #61 — the auto path can now actually select `alphanumeric` again. **Location:** `src/fns/sortFns.ts:154–200` **Category:** `big-o`, `micro` @@ -2037,6 +2037,60 @@ Each file has a `getXyzPrototype(table)` function with identical shape — `if ( --- +## 61. `column_getAutoSortFn` never auto-selects `alphanumeric`/`datetime` — `isString` fallback clobbers the match (bug) — Score: 8 (bug) + +**Status:** `[x]` done +**Implementation note:** Restored v8's early-return precedence (datetime → alphanumeric → text → basic). Discovered during the adversarial verification of #52. The v9 refactor had converted v8's in-loop `return sortingFns.datetime` / `return sortingFns.alphanumeric` (v8 `RowSorting.ts` `getAutoSortingFn`) into assignments to a shared `sortFn` local — but any string value in the sample also sets `isString = true`, and the trailing `if (isString) sortFn = sortFns?.text` unconditionally overwrote the match. Net effect: the auto path never selected `alphanumeric` (dead code) and `datetime` was clobbered whenever any sampled value was a string; digit-bearing string columns silently got plain `text` sorting (`"item10"` before `"item2"`). Each restored early return is guarded (`if (sortFns?.datetime) return ...`) so a custom registry missing an entry falls through to the next preference instead of returning `undefined`. Added `tests/unit/features/row-sorting/rowSortingFeature.utils.test.ts` covering all four selections plus empty-rows and missing-registry-entry fallbacks (no row-sorting utils tests existed before). Related to #50, which fixed the row sampling in the same function. Note for #52: this fix makes `compareAlphanumeric` reachable via the auto path again, so #52's allocation refactor is now worth pursuing. + +**Location:** `src/features/row-sorting/rowSortingFeature.utils.ts:84–122` +**Category:** `bug` + +**Before** + +```ts +for (let i = 0; i < firstRows.length; i++) { + const value = firstRows[i]!.getValue(column.id) + if (Object.prototype.toString.call(value) === '[object Date]') { + sortFn = sortFns?.datetime + } + if (typeof value === 'string') { + isString = true + if (value.split(reSplitAlphaNumeric).length > 1) { + sortFn = sortFns?.alphanumeric + } + } +} +if (isString) { + sortFn = sortFns?.text // clobbers any datetime/alphanumeric match above +} +return sortFn ?? sortFn_basic +``` + +**After** + +```ts +for (let i = 0; i < firstRows.length; i++) { + const value = firstRows[i]!.getValue(column.id) + if (Object.prototype.toString.call(value) === '[object Date]') { + if (sortFns?.datetime) return sortFns.datetime + } + if (typeof value === 'string') { + isString = true + if (value.split(reSplitAlphaNumeric).length > 1) { + if (sortFns?.alphanumeric) return sortFns.alphanumeric + } + } +} +if (isString) { + return sortFns?.text ?? sortFn_basic +} +return sortFn_basic +``` + +**Risk:** Behavior changes for tables relying on the broken default — string columns containing digits now natural-sort (the documented v8 behavior) instead of lexicographic `text` sort. + +--- + # Suggested priority order Anything **≥ 7**: @@ -2055,6 +2109,7 @@ Anything **≥ 7**: | 34 | `orderColumns` `grouping.includes` → Set | 7 | big-o | | 49 | `createSortedRowModel` clones every row | 7 | big-o / micro | | 50 | `column_getAutoSortFn` `slice(10)` should be `slice(0, 10)` | 7 | bug | +| 61 | `column_getAutoSortFn` text fallback clobbers auto-match | 8 | bug | Anything **5–6**: a second wave of memoization gaps and partition-loop consolidations (#2, #9, #15, #21, #24, #27, #33, #36, #39, #40, #41, #52). All low-risk.