TanStack · kevin-dp · Sep 8, 2025 · Sep 3, 2025 · Sep 3, 2025 · Sep 3, 2025
diff --git a/.changeset/tame-times-create.md b/.changeset/tame-times-create.md
@@ -0,0 +1,5 @@
+---
+"@tanstack/db-ivm": patch
+---
+
+Replace JSON.stringify based hash function by structural hashing function.
diff --git a/packages/db-ivm/package.json b/packages/db-ivm/package.json
@@ -4,12 +4,10 @@
   "version": "0.1.2",
   "dependencies": {
     "fractional-indexing": "^3.2.0",
-    "murmurhash-js": "^1.0.0",
     "sorted-btree": "^1.8.1"
   },
   "devDependencies": {
     "@types/debug": "^4.1.12",
-    "@types/murmurhash-js": "^1.0.6",
     "@vitest/coverage-istanbul": "^3.0.9"
   },
   "exports": {

diff --git a/packages/db-ivm/src/hashIndex.ts b/packages/db-ivm/src/hashIndex.ts
@@ -1,17 +1,18 @@
-import { DefaultMap, hash } from "./utils.js"
+import { DefaultMap } from "./utils.js"
+import { hash } from "./hashing/index.js"
+import type { Hash } from "./hashing/index.js"
 
 /**
  * A map from a difference collection trace's keys -> (value, multiplicities) that changed.
  * Used in operations like join and reduce where the operation needs to
  * exploit the key-value structure of the data to run efficiently.
  */
 export class HashIndex<K, V> {
-  #inner: DefaultMap<K, DefaultMap<string, [V, number]>>
+  #inner: DefaultMap<K, DefaultMap<Hash, [V, number]>>
 
   constructor() {
-    this.#inner = new DefaultMap<K, DefaultMap<string, [V, number]>>(
-      () =>
-        new DefaultMap<string, [V, number]>(() => [undefined as any as V, 0])
+    this.#inner = new DefaultMap<K, DefaultMap<Hash, [V, number]>>(
+      () => new DefaultMap<Hash, [V, number]>(() => [undefined as any as V, 0])
     )
     // #inner is as map of:
     // {

diff --git a/packages/db-ivm/src/hashing/hash.ts b/packages/db-ivm/src/hashing/hash.ts
@@ -0,0 +1,157 @@
+import { MurmurHashStream, randomHash } from "./murmur.js"
+import type { Hasher } from "./murmur.js"
+
+/*
+ * Implementation of structural hashing based on the Composites polyfill implementation:
+ * https://github.com/tc39/proposal-composites
+ */
+
+const TRUE = randomHash()
+const FALSE = randomHash()
+const NULL = randomHash()
+const UNDEFINED = randomHash()
+const KEY = randomHash()
+const FUNCTIONS = randomHash()
+const DATE_MARKER = randomHash()
+const OBJECT_MARKER = randomHash()
+const ARRAY_MARKER = randomHash()
+const MAP_MARKER = randomHash()
+const SET_MARKER = randomHash()
+
+const hashCache = new WeakMap<object, number>()
+
+export function hash(input: any): number {
+  const hasher = new MurmurHashStream()
+  updateHasher(hasher, input)
+  return hasher.digest()
+}
+
+function hashObject(input: object): number {
+  const cachedHash = hashCache.get(input)
+  if (cachedHash !== undefined) {
+    return cachedHash
+  }
+
+  let valueHash: number | undefined
+  if (input instanceof Date) {
+    valueHash = hashDate(input)
+  } else {
+    let plainObjectInput = input
+    let marker = OBJECT_MARKER
+
+    if (input instanceof Array) {
+      marker = ARRAY_MARKER
+    }
+
+    if (input instanceof Map) {
+      marker = MAP_MARKER
+      plainObjectInput = [...input.entries()]
+    }
+
+    if (input instanceof Set) {
+      marker = SET_MARKER
+      plainObjectInput = [...input.entries()]
+    }
+
+    if (
+      input instanceof Buffer ||
+      input instanceof Uint8Array ||
+      input instanceof File
+    ) {
+      // Deeply hashing these objects would be too costly
+      // but we also don't want to ignore them
+      // so we track them by reference and cache them in a weak map
+      return cachedReferenceHash(input)
+    }
+
+    valueHash = hashPlainObject(plainObjectInput, marker)
+  }
+
+  hashCache.set(input, valueHash)
+  return valueHash
+}
+
+function hashDate(input: Date): number {
+  const hasher = new MurmurHashStream()
+  hasher.update(DATE_MARKER)
+  hasher.update(input.getTime())
+  return hasher.digest()
+}
+
+function hashPlainObject(input: object, marker: number): number {
+  const hasher = new MurmurHashStream()
+
+  // Mark the type of the input
+  hasher.update(marker)
+  const keys = Object.keys(input)
+  keys.sort(keySort)
+  for (const key of keys) {
+    hasher.update(KEY)
+    hasher.update(key)
+    updateHasher(hasher, input[key as keyof typeof input])
+  }
+
+  return hasher.digest()
+}
+
+function updateHasher(hasher: Hasher, input: unknown): void {
+  if (input === null) {
+    hasher.update(NULL)
+    return
+  }
+  switch (typeof input) {
+    case `undefined`:
+      hasher.update(UNDEFINED)
+      return
+    case `boolean`:
+      hasher.update(input ? TRUE : FALSE)
+      return
+    case `number`:
+      // Normalize NaNs and -0
+      hasher.update(isNaN(input) ? NaN : input === 0 ? 0 : input)
+      return
+    case `bigint`:
+    case `string`:
+    case `symbol`:
+      hasher.update(input)
+      return
+    case `object`:
+      hasher.update(getCachedHash(input))
+      return
+    case `function`:
+      // Functions are assigned a globally unique ID
+      // and that ID is cached in the weak map
+      hasher.update(cachedReferenceHash(input))
+      return
+    default:
+      console.warn(
+        `Ignored input during hashing because it is of type ${typeof input} which is not supported`
+      )
+  }
+}
+
+function getCachedHash(input: object): number {
+  let valueHash = hashCache.get(input)
+  if (valueHash === undefined) {
+    valueHash = hashObject(input)
+  }
+  return valueHash
+}
+
+let nextRefId = 1
+function cachedReferenceHash(fn: object): number {
+  let valueHash = hashCache.get(fn)
+  if (valueHash === undefined) {
+    valueHash = nextRefId ^ FUNCTIONS
+    nextRefId++
+    hashCache.set(fn, valueHash)
+  }
+  return valueHash
+}
+
+/**
+ * Strings sorted lexicographically.
+ */
+function keySort(a: string, b: string): number {
+  return a.localeCompare(b)
+}
diff --git a/packages/db-ivm/src/hashing/index.ts b/packages/db-ivm/src/hashing/index.ts
@@ -0,0 +1,2 @@
+export { hash } from "./hash.js"
+export type { Hash, Hasher } from "./murmur.js"
diff --git a/packages/db-ivm/src/hashing/murmur.ts b/packages/db-ivm/src/hashing/murmur.ts
@@ -0,0 +1,128 @@
+/*
+ * Implementation of murmur hash based on the Composites polyfill implementation:
+ * https://github.com/tc39/proposal-composites
+ */
+
+const RANDOM_SEED = randomHash()
+const STRING_MARKER = randomHash()
+const BIG_INT_MARKER = randomHash()
+const NEG_BIG_INT_MARKER = randomHash()
+const SYMBOL_MARKER = randomHash()
+
+export type Hash = number
+
+export function randomHash() {
+  return (Math.random() * (2 ** 31 - 1)) >>> 0
+}
+
+export interface Hasher {
+  update: (val: symbol | string | number | bigint) => void
+  digest: () => number
+}
+
+/**
+ * This implementation of Murmur hash uses a random initial seed and random markers.
+ * This means that hashes aren't deterministic across app restarts.
+ * This is intentional in the composites polyfill to be resistent to hash-flooding attacks
+ * where malicious users would precompute lots of different objects whose hashes collide with each other.
+ *
+ * Currently, for ts/db-ivm this is fine because we don't persist client state.
+ * However, when we will introduce persistence we will either need to store the seeds or remove the randomness
+ * to ensure deterministic hashes across app restarts.
+ */
+export class MurmurHashStream implements Hasher {
+  private hash: number = RANDOM_SEED
+  private length = 0
+  private carry = 0
+  private carryBytes = 0
+
+  private _mix(k1: number): void {
+    k1 = Math.imul(k1, 0xcc9e2d51)
+    k1 = (k1 << 15) | (k1 >>> 17)
+    k1 = Math.imul(k1, 0x1b873593)
+    this.hash ^= k1
+    this.hash = (this.hash << 13) | (this.hash >>> 19)
+    this.hash = Math.imul(this.hash, 5) + 0xe6546b64
+  }
+
+  private _writeByte(byte: number): void {
+    this.carry |= (byte & 0xff) << (8 * this.carryBytes)
+    this.carryBytes++
+    this.length++
+
+    if (this.carryBytes === 4) {
+      this._mix(this.carry >>> 0)
+      this.carry = 0
+      this.carryBytes = 0
+    }
+  }
+
+  update(chunk: symbol | string | number | bigint): void {
+    switch (typeof chunk) {
+      case `symbol`: {
+        this.update(SYMBOL_MARKER)
+        const description = chunk.description
+        if (!description) {
+          return
+        }
+
+        for (let i = 0; i < description.length; i++) {
+          const code = description.charCodeAt(i)
+          this._writeByte(code & 0xff)
+          this._writeByte((code >>> 8) & 0xff)
+        }
+        return
+      }
+      case `string`:
+        this.update(STRING_MARKER)
+        for (let i = 0; i < chunk.length; i++) {
+          const code = chunk.charCodeAt(i)
+          this._writeByte(code & 0xff)
+          this._writeByte((code >>> 8) & 0xff)
+        }
+        return
+      case `number`:
+        this._writeByte(chunk & 0xff)
+        this._writeByte((chunk >>> 8) & 0xff)
+        this._writeByte((chunk >>> 16) & 0xff)
+        this._writeByte((chunk >>> 24) & 0xff)
+        return
+      case `bigint`: {
+        let value = chunk
+        if (value < 0n) {
+          value = -value
+          this.update(NEG_BIG_INT_MARKER)
+        } else {
+          this.update(BIG_INT_MARKER)
+        }
+        while (value > 0n) {
+          this._writeByte(Number(value & 0xffn))
+          value >>= 8n
+        }
+        if (chunk === 0n) this._writeByte(0)
+        return
+      }
+      default:
+        throw new TypeError(`Unsupported input type: ${typeof chunk}`)
+    }
+  }
+
+  digest(): number {
+    if (this.carryBytes > 0) {
+      let k1 = this.carry >>> 0
+      k1 = Math.imul(k1, 0xcc9e2d51)
+      k1 = (k1 << 15) | (k1 >>> 17)
+      k1 = Math.imul(k1, 0x1b873593)
+      this.hash ^= k1
+    }
+
+    this.hash ^= this.length
+    this.hash ^= this.hash >>> 16
+    this.hash = Math.imul(this.hash, 0x85ebca6b)
+    this.hash ^= this.hash >>> 13
+    this.hash = Math.imul(this.hash, 0xc2b2ae35)
+    this.hash ^= this.hash >>> 16
+
+    return this.hash >>> 0
+  }
+}
diff --git a/packages/db-ivm/src/multiset.ts b/packages/db-ivm/src/multiset.ts
@@ -2,8 +2,8 @@ import {
   DefaultMap,
   chunkedArrayPush,
   globalObjectIdGenerator,
-  hash,
 } from "./utils.js"
+import { hash } from "./hashing/index.js"
 
 export type MultiSetArray<T> = Array<[T, number]>
 export type KeyedData<T> = [key: string, value: T]

diff --git a/packages/db-ivm/src/operators/distinct.ts b/packages/db-ivm/src/operators/distinct.ts
@@ -1,19 +1,19 @@
 import { DifferenceStreamWriter, UnaryOperator } from "../graph.js"
 import { StreamBuilder } from "../d2.js"
-import { hash } from "../utils.js"
+import { hash } from "../hashing/index.js"
 import { MultiSet } from "../multiset.js"
+import type { Hash } from "../hashing/index.js"
 import type { DifferenceStreamReader } from "../graph.js"
 import type { IStreamBuilder } from "../types.js"
 
-type HashedValue = string
 type Multiplicity = number
 
 /**
  * Operator that removes duplicates
  */
 export class DistinctOperator<T> extends UnaryOperator<T> {
   #by: (value: T) => any
-  #values: Map<HashedValue, Multiplicity> // keeps track of the number of times each value has been seen
+  #values: Map<Hash, Multiplicity> // keeps track of the number of times each value has been seen
 
   constructor(
     id: number,
@@ -27,7 +27,7 @@ export class DistinctOperator<T> extends UnaryOperator<T> {
   }
 
   run(): void {
-    const updatedValues = new Map<HashedValue, [Multiplicity, T]>()
+    const updatedValues = new Map<Hash, [Multiplicity, T]>()
 
     // Compute the new multiplicity for each value
     for (const message of this.inputMessages()) {
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		export { hash } from "./hash.js"
		export type { Hash, Hasher } from "./murmur.js"