From 6ba86a112e55a16e2bc090d12bf9717185f19286 Mon Sep 17 00:00:00 2001
From: Kevin Van Cott <kevinvandy656@gmail.com>
Date: Wed, 10 Jun 2026 08:19:27 -0500
Subject: [PATCH] fix: alphanumeric sort fix and perf improvement

---
 .../row-sorting/rowSortingFeature.utils.ts    |  14 ++-
 packages/table-core/src/fns/sortFns.ts        |  43 +++++--
 .../rowSortingFeature.utils.test.ts           | 111 ++++++++++++++++++
 .../table-core/tests/unit/fns/sortFns.test.ts | 104 ++++++++++++++++
 perf.md                                       |  65 +++++++++-
 5 files changed, 316 insertions(+), 21 deletions(-)
 create mode 100644 packages/table-core/tests/unit/features/row-sorting/rowSortingFeature.utils.test.ts

diff --git a/packages/table-core/src/features/row-sorting/rowSortingFeature.utils.ts b/packages/table-core/src/features/row-sorting/rowSortingFeature.utils.ts
index 3ebf0763c0..b8a4c94afb 100644
--- a/packages/table-core/src/features/row-sorting/rowSortingFeature.utils.ts
+++ b/packages/table-core/src/features/row-sorting/rowSortingFeature.utils.ts
@@ -89,8 +89,6 @@ export function column_getAutoSortFn<
   const sortFns: Record<string, SortFn<TFeatures, TData>> | undefined =
     column.table._rowModelFns.sortFns
 
-  let sortFn: SortFn<TFeatures, TData> | undefined
-
   const firstRows = column.table.getFilteredRowModel().flatRows.slice(0, 10)
 
   let isString = false
@@ -99,23 +97,27 @@ export function column_getAutoSortFn<
     const value = firstRows[i]!.getValue(column.id)
 
     if (Object.prototype.toString.call(value) === '[object Date]') {
-      sortFn = sortFns?.datetime
+      if (sortFns?.datetime) {
+        return sortFns.datetime
+      }
     }
 
     if (typeof value === 'string') {
       isString = true
 
       if (value.split(reSplitAlphaNumeric).length > 1) {
-        sortFn = sortFns?.alphanumeric
+        if (sortFns?.alphanumeric) {
+          return sortFns.alphanumeric
+        }
       }
     }
   }
 
   if (isString) {
-    sortFn = sortFns?.text
+    return sortFns?.text ?? sortFn_basic
   }
 
-  return sortFn ?? sortFn_basic
+  return sortFn_basic
 }
 
 /**
diff --git a/packages/table-core/src/fns/sortFns.ts b/packages/table-core/src/fns/sortFns.ts
index 84889cb934..80cad617dd 100644
--- a/packages/table-core/src/fns/sortFns.ts
+++ b/packages/table-core/src/fns/sortFns.ts
@@ -151,10 +151,8 @@ function toString(a: any) {
 // It handles numbers, mixed alphanumeric combinations, and even
 // null, undefined, and Infinity
 function compareAlphanumeric(aStr: string, bStr: string) {
-  // Split on number groups, but keep the delimiter
-  // Then remove falsey split values
-  const a = aStr.split(reSplitAlphaNumeric).filter(Boolean)
-  const b = bStr.split(reSplitAlphaNumeric).filter(Boolean)
+  const a = aStr.split(reSplitAlphaNumeric)
+  const b = bStr.split(reSplitAlphaNumeric)
 
   let ai = 0
   let bi = 0
@@ -162,16 +160,29 @@ function compareAlphanumeric(aStr: string, bStr: string) {
   const bLen = b.length
 
   while (ai < aLen && bi < bLen) {
+    // Skip the empty boundary chunks that .filter(Boolean) used to remove
+    if (!a[ai]) {
+      ai++
+      continue
+    }
+    if (!b[bi]) {
+      bi++
+      continue
+    }
+
     const aa = a[ai++]!
     const bb = b[bi++]!
 
+    // Chunks are either all-digit (parseInt always succeeds) or digit-free
+    // (parseInt is always NaN), so NaN-ness fully classifies each chunk
     const an = parseInt(aa, 10)
     const bn = parseInt(bb, 10)
 
-    const combo = [an, bn].sort()
+    const aIsNaN = isNaN(an)
+    const bIsNaN = isNaN(bn)
 
     // Both are string
-    if (isNaN(combo[0]!)) {
+    if (aIsNaN && bIsNaN) {
       if (aa > bb) {
         return 1
       }
@@ -181,9 +192,9 @@ function compareAlphanumeric(aStr: string, bStr: string) {
       continue
     }
 
-    // One is a string, one is a number
-    if (isNaN(combo[1]!)) {
-      return isNaN(an) ? -1 : 1
+    // One is a string, one is a number — the string chunk sorts first
+    if (aIsNaN || bIsNaN) {
+      return aIsNaN ? -1 : 1
     }
 
     // Both are numbers
@@ -195,7 +206,19 @@ function compareAlphanumeric(aStr: string, bStr: string) {
     }
   }
 
-  return aLen - ai - (bLen - bi)
+  // One side is exhausted — compare the counts of remaining non-empty chunks
+  let remaining = 0
+  for (; ai < aLen; ai++) {
+    if (a[ai]) {
+      remaining++
+    }
+  }
+  for (; bi < bLen; bi++) {
+    if (b[bi]) {
+      remaining--
+    }
+  }
+  return remaining
 }
 
 // Exports
diff --git a/packages/table-core/tests/unit/features/row-sorting/rowSortingFeature.utils.test.ts b/packages/table-core/tests/unit/features/row-sorting/rowSortingFeature.utils.test.ts
new file mode 100644
index 0000000000..d4bf3546dd
--- /dev/null
+++ b/packages/table-core/tests/unit/features/row-sorting/rowSortingFeature.utils.test.ts
@@ -0,0 +1,111 @@
+import { describe, expect, it } from 'vitest'
+import {
+  constructTable,
+  coreFeatures,
+  rowSortingFeature,
+  sortFn_alphanumeric,
+  sortFn_basic,
+  sortFn_datetime,
+  sortFn_text,
+  sortFns,
+  tableFeatures,
+} from '../../../../src'
+import { column_getAutoSortFn } from '../../../../src/static-functions'
+import { storeReactivityBindings } from '../../../../src/store-reactivity-bindings'
+import type { ColumnDef } from '../../../../src'
+
+type Sample = {
+  plainText: string
+  alphaNum: string
+  createdAt: Date
+  amount: number
+}
+
+const features = tableFeatures({ ...coreFeatures, rowSortingFeature })
+
+const sampleKeys: Array<keyof Sample> = [
+  'plainText',
+  'alphaNum',
+  'createdAt',
+  'amount',
+]
+
+function generateAutoSortTestTable(data: Array<Sample>) {
+  const columns: Array<ColumnDef<typeof features, Sample, any>> =
+    sampleKeys.map((key) => ({ accessorKey: key, id: key }))
+
+  const table = constructTable<typeof features, Sample>({
+    data,
+    columns,
+    features: {
+      ...features,
+      coreReativityFeature: storeReactivityBindings(),
+    },
+  })
+
+  // Normally assigned by createSortedRowModel when the sorted row model is wired up
+  table._rowModelFns.sortFns = sortFns
+
+  return table
+}
+
+describe('column_getAutoSortFn', () => {
+  const data: Array<Sample> = [
+    {
+      plainText: 'apple',
+      alphaNum: 'item1',
+      createdAt: new Date('2024-01-01'),
+      amount: 1,
+    },
+    {
+      plainText: 'banana',
+      alphaNum: 'item10',
+      createdAt: new Date('2024-02-01'),
+      amount: 2,
+    },
+  ]
+
+  it('selects datetime for Date values', () => {
+    const table = generateAutoSortTestTable(data)
+    const column = table.getColumn('createdAt')!
+
+    expect(column_getAutoSortFn(column)).toBe(sortFn_datetime)
+  })
+
+  it('selects alphanumeric for strings mixing text and numbers', () => {
+    // Regression: the text fallback used to clobber the alphanumeric match
+    const table = generateAutoSortTestTable(data)
+    const column = table.getColumn('alphaNum')!
+
+    expect(column_getAutoSortFn(column)).toBe(sortFn_alphanumeric)
+  })
+
+  it('selects text for plain strings', () => {
+    const table = generateAutoSortTestTable(data)
+    const column = table.getColumn('plainText')!
+
+    expect(column_getAutoSortFn(column)).toBe(sortFn_text)
+  })
+
+  it('falls back to basic for non-string, non-date values', () => {
+    const table = generateAutoSortTestTable(data)
+    const column = table.getColumn('amount')!
+
+    expect(column_getAutoSortFn(column)).toBe(sortFn_basic)
+  })
+
+  it('falls back to basic when no rows exist', () => {
+    const table = generateAutoSortTestTable([])
+    const column = table.getColumn('plainText')!
+
+    expect(column_getAutoSortFn(column)).toBe(sortFn_basic)
+  })
+
+  it('falls back to text when alphanumeric is not registered', () => {
+    const table = generateAutoSortTestTable(data)
+    table._rowModelFns.sortFns = { text: sortFn_text }
+    const column = table.getColumn('alphaNum')!
+
+    expect(column_getAutoSortFn(column)).toBe(sortFn_text)
+  })
+})
diff --git a/packages/table-core/tests/unit/fns/sortFns.test.ts b/packages/table-core/tests/unit/fns/sortFns.test.ts
index e4d62441c3..36906f9c71 100644
--- a/packages/table-core/tests/unit/fns/sortFns.test.ts
+++ b/packages/table-core/tests/unit/fns/sortFns.test.ts
@@ -140,6 +140,110 @@ describe('sortFn_basic', () => {
   })
 })
 
+describe('compareAlphanumeric boundary chunks (filter-removal regression)', () => {
+  // The inline empty-chunk skipping must match the old `.filter(Boolean)`
+  // behavior; empty chunks occur where a string starts/ends with digits.
+
+  it('handles leading digit groups', () => {
+    // Number chunk vs string chunk at position 0 — string sorts first
+    expect(cmp(sortFn_alphanumeric, '1abc', 'abc1')).toBe(1)
+    expect(cmp(sortFn_alphanumeric, 'abc1', '1abc')).toBe(-1)
+    expect(cmp(sortFn_alphanumeric, '2abc', '10abc')).toBe(-1)
+  })
+
+  it('handles pure digit strings (leading and trailing empties)', () => {
+    expect(cmp(sortFn_alphanumeric, '12', '12')).toBe(0)
+    expect(cmp(sortFn_alphanumeric, '2', '10')).toBe(-1)
+    expect(cmp(sortFn_alphanumeric, '10', '2')).toBe(1)
+  })
+
+  it('treats leading zeros as numerically equal', () => {
+    expect(cmp(sortFn_alphanumeric, 'item007', 'item7')).toBe(0)
+  })
+
+  it('counts only non-empty chunks in the prefix tail', () => {
+    // a exhausts past its trailing empty; b has one real chunk remaining
+    expect(cmp(sortFn_alphanumeric, '1', '1abc')).toBe(-1)
+    expect(cmp(sortFn_alphanumeric, '1abc', '1')).toBe(1)
+    expect(cmp(sortFn_alphanumeric, 'abc', 'abc123')).toBe(-1)
+  })
+
+  it('handles empty vs non-empty strings', () => {
+    expect(cmp(sortFn_alphanumeric, '', 'a')).toBe(-1)
+    expect(cmp(sortFn_alphanumeric, 'a', '')).toBe(1)
+  })
+
+  it('matches the previous filter-based implementation across all vocab pairs', () => {
+    // Reference: the implementation before the allocation refactor, verbatim
+    const reference = (aStr: string, bStr: string): number => {
+      const a = aStr.split(/([0-9]+)/gm).filter(Boolean)
+      const b = bStr.split(/([0-9]+)/gm).filter(Boolean)
+      let ai = 0
+      let bi = 0
+      const aLen = a.length
+      const bLen = b.length
+      while (ai < aLen && bi < bLen) {
+        const aa = a[ai++]!
+        const bb = b[bi++]!
+        const an = parseInt(aa, 10)
+        const bn = parseInt(bb, 10)
+        const combo = [an, bn].sort()
+        if (isNaN(combo[0]!)) {
+          if (aa > bb) return 1
+          if (bb > aa) return -1
+          continue
+        }
+        if (isNaN(combo[1]!)) {
+          return isNaN(an) ? -1 : 1
+        }
+        if (an > bn) return 1
+        if (bn > an) return -1
+      }
+      return aLen - ai - (bLen - bi)
+    }
+
+    const vocab = [
+      '',
+      'a',
+      'ab',
+      '1',
+      '0',
+      '12',
+      '007',
+      '7',
+      '1a',
+      'a1',
+      '1a1',
+      'a1a',
+      '1a2b',
+      'a1b2',
+      '12ab34',
+      'ab12cd',
+      'item2',
+      'item10',
+      '2item',
+      '10item',
+      'a007b',
+      'a7b',
+      'nan',
+      'infinity',
+      '999999999999999999999999999999',
+    ]
+
+    for (const a of vocab) {
+      for (const b of vocab) {
+        // Case-sensitive variant is a pure passthrough for string inputs
+        const actual = sortFn_alphanumericCaseSensitive(
+          makeRow(a),
+          makeRow(b),
+          'col',
+        )
+        expect(actual, `compare("${a}", "${b}")`).toBe(reference(a, b))
+      }
+    }
+  })
+})
+
 describe('sortFn_datetime', () => {
   it('returns 0 for equal dates', () => {
     const d = new Date(2026, 1, 1)
diff --git a/perf.md b/perf.md
index dad016c99d..a7c96565e8 100644
--- a/perf.md
+++ b/perf.md
@@ -93,11 +93,11 @@ Typecheck verified clean after the sweep (`pnpm tsc --noEmit` passes).
 
 ## Progress
 
-- **Total findings:** 60
-- **Done `[x]`:** 17
+- **Total findings:** 61
+- **Done `[x]`:** 19
 - **Partial `[~]`:** 2
 - **Skipped `[-]`:** 5
-- **Not started `[ ]`:** 36
+- **Not started `[ ]`:** 35
 
 _(Update these counters as you go.)_
 
@@ -1858,8 +1858,8 @@ Both walk the `sorting` array; called for every visible sortable column on every
 
 ## 52. `compareAlphanumeric` allocates 2 arrays per comparison — Score: 6
 
-**Status:** `[ ]` not started
-**Implementation note:** _(none)_
+**Status:** `[x]` done
+**Implementation note:** Went further than proposed — the audit undercounted the allocations. Besides the two `.filter(Boolean)` arrays per comparison, every chunk-pair iteration allocated and **default-sorted** a fresh `[an, bn]` array (`const combo = [an, bn].sort()`), paying array allocation + sort dispatch + number→string coercion just to classify NaN-ness. Since chunks from `reSplitAlphaNumeric` are either all-digit (`parseInt` always succeeds) or digit-free (`parseInt` always `NaN`), two plain `isNaN` checks replace the combo entirely (`aIsNaN && bIsNaN` → both-string branch, `aIsNaN || bIsNaN` → mixed branch). The `.filter(Boolean)` drop required two semantic guards: (1) empty chunks (which only occur at split-array boundaries) are skipped inline at the top of the loop; (2) the prefix tail return counts only **non-empty** remaining chunks instead of raw `aLen - ai - (bLen - bi)`. Net per comparison: 3+k array allocations → 1 per side (the unavoidable `.split()`), where k = chunk pairs visited. Measured on a 10k-row sort of mixed `itemNNNN-revNN` strings (Node, median of 7 runs): **36.5ms → 20.7ms (~43% faster)**. Equivalence verified two ways: a vocab×vocab differential test against the verbatim old implementation (625 pairs covering boundary digits, leading zeros, pure digits, empties, 30-digit overflow) plus targeted boundary-chunk unit tests, all in `tests/unit/fns/sortFns.test.ts`. Unblocked by #61 — the auto path can now actually select `alphanumeric` again.
 
 **Location:** `src/fns/sortFns.ts:154–200`
 **Category:** `big-o`, `micro`
@@ -2037,6 +2037,60 @@ Each file has a `getXyzPrototype(table)` function with identical shape — `if (
 
 ---
 
+## 61. `column_getAutoSortFn` never auto-selects `alphanumeric`/`datetime` — `isString` fallback clobbers the match (bug) — Score: 8 (bug)
+
+**Status:** `[x]` done
+**Implementation note:** Restored v8's early-return precedence (datetime → alphanumeric → text → basic). Discovered during the adversarial verification of #52. The v9 refactor had converted v8's in-loop `return sortingFns.datetime` / `return sortingFns.alphanumeric` (v8 `RowSorting.ts` `getAutoSortingFn`) into assignments to a shared `sortFn` local — but any string value in the sample also sets `isString = true`, and the trailing `if (isString) sortFn = sortFns?.text` unconditionally overwrote the match. Net effect: the auto path never selected `alphanumeric` (dead code) and `datetime` was clobbered whenever any sampled value was a string; digit-bearing string columns silently got plain `text` sorting (`"item10"` before `"item2"`). Each restored early return is guarded (`if (sortFns?.datetime) return ...`) so a custom registry missing an entry falls through to the next preference instead of returning `undefined`. Added `tests/unit/features/row-sorting/rowSortingFeature.utils.test.ts` covering all four selections plus empty-rows and missing-registry-entry fallbacks (no row-sorting utils tests existed before). Related to #50, which fixed the row sampling in the same function. Note for #52: this fix makes `compareAlphanumeric` reachable via the auto path again, so #52's allocation refactor is now worth pursuing.
+
+**Location:** `src/features/row-sorting/rowSortingFeature.utils.ts:84–122`
+**Category:** `bug`
+
+**Before**
+
+```ts
+for (let i = 0; i < firstRows.length; i++) {
+  const value = firstRows[i]!.getValue(column.id)
+  if (Object.prototype.toString.call(value) === '[object Date]') {
+    sortFn = sortFns?.datetime
+  }
+  if (typeof value === 'string') {
+    isString = true
+    if (value.split(reSplitAlphaNumeric).length > 1) {
+      sortFn = sortFns?.alphanumeric
+    }
+  }
+}
+if (isString) {
+  sortFn = sortFns?.text // clobbers any datetime/alphanumeric match above
+}
+return sortFn ?? sortFn_basic
+```
+
+**After**
+
+```ts
+for (let i = 0; i < firstRows.length; i++) {
+  const value = firstRows[i]!.getValue(column.id)
+  if (Object.prototype.toString.call(value) === '[object Date]') {
+    if (sortFns?.datetime) return sortFns.datetime
+  }
+  if (typeof value === 'string') {
+    isString = true
+    if (value.split(reSplitAlphaNumeric).length > 1) {
+      if (sortFns?.alphanumeric) return sortFns.alphanumeric
+    }
+  }
+}
+if (isString) {
+  return sortFns?.text ?? sortFn_basic
+}
+return sortFn_basic
+```
+
+**Risk:** Behavior changes for tables relying on the broken default — string columns containing digits now natural-sort (the documented v8 behavior) instead of lexicographic `text` sort.
+
+---
+
 # Suggested priority order
 
 Anything **≥ 7**:
@@ -2055,6 +2109,7 @@ Anything **≥ 7**:
 | 34  | `orderColumns` `grouping.includes` → Set                    | 7     | big-o         |
 | 49  | `createSortedRowModel` clones every row                     | 7     | big-o / micro |
 | 50  | `column_getAutoSortFn` `slice(10)` should be `slice(0, 10)` | 7     | bug           |
+| 61  | `column_getAutoSortFn` text fallback clobbers auto-match    | 8     | bug           |
 
 Anything **5–6**: a second wave of memoization gaps and partition-loop consolidations (#2, #9, #15, #21, #24, #27, #33, #36, #39, #40, #41, #52). All low-risk.