Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/five-singers-mate.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@tanstack/db-ivm": patch
---

Hybrid index implementation to track values and their multiplicities
93 changes: 93 additions & 0 deletions packages/db-ivm/src/hashIndex.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import { DefaultMap, hash } from "./utils.js"

/**
* A map from a difference collection trace's keys -> (value, multiplicities) that changed.
* Used in operations like join and reduce where the operation needs to
* exploit the key-value structure of the data to run efficiently.
*/
export class HashIndex<K, V> {
#inner: DefaultMap<K, DefaultMap<string, [V, number]>>

constructor() {
this.#inner = new DefaultMap<K, DefaultMap<string, [V, number]>>(
() =>
new DefaultMap<string, [V, number]>(() => [undefined as any as V, 0])
)
// #inner is as map of:
// {
// [key]: {
// [hash(value)]: [value, multiplicity]
// }
// }
}

toString(indent = false): string {
return `HashIndex(${JSON.stringify(
[...this.#inner].map(([k, valueMap]) => [k, [...valueMap]]),
undefined,
indent ? 2 : undefined
)})`
}

get(key: K): Array<[V, number]> {
const valueMap = this.#inner.get(key)
return [...valueMap.values()]
}

getMultiplicity(key: K, value: V): number {
const valueMap = this.#inner.get(key)
const valueHash = hash(value)
const [, multiplicity] = valueMap.get(valueHash)
return multiplicity
}

entries() {
return this.#inner.entries()
}

*entriesIterator(): Generator<[K, [V, number]]> {
for (const [key, valueMap] of this.#inner.entries()) {
for (const [_valueHash, [value, multiplicity]] of valueMap.entries()) {
yield [key, [value, multiplicity]]
}
}
}

has(key: K): boolean {
return this.#inner.has(key)
}

delete(key: K): void {
this.#inner.delete(key)
}

get size(): number {
return this.#inner.size
}

/**
* Adds a value to the index and does not return anything
* except if the addition caused the value to be removed
* and the key to be left with only a single value.
* In that case, we return the single remaining value.
*/
addValue(key: K, value: [V, number]): [V, number] | void {
const [val, multiplicity] = value
const valueMap = this.#inner.get(key)
const valueHash = hash(val)
const [, existingMultiplicity] = valueMap.get(valueHash)
const newMultiplicity = existingMultiplicity + multiplicity
if (multiplicity !== 0) {
if (newMultiplicity === 0) {
valueMap.delete(valueHash)
if (valueMap.size === 1) {
// Signal that the key only has a single remaining value
return valueMap.entries().next().value![1]
}
} else {
valueMap.set(valueHash, [val, newMultiplicity])
}
}
this.#inner.set(key, valueMap)
}
}
153 changes: 106 additions & 47 deletions packages/db-ivm/src/indexes.ts
Original file line number Diff line number Diff line change
@@ -1,83 +1,142 @@
import { MultiSet } from "./multiset.js"
import { DefaultMap } from "./utils.js"
import { HashIndex } from "./hashIndex.js"
import { ValueIndex } from "./valueIndex.js"
import { concatIterable, mapIterable } from "./utils.js"

/**
* A map from a difference collection trace's keys -> (value, multiplicities) that changed.
* Used in operations like join and reduce where the operation needs to
* exploit the key-value structure of the data to run efficiently.
*/
export class Index<K, V> {
#inner: DefaultMap<K, Map<V, number>>
/*
* This is a hybrid Index that composes a ValueIndex and a HashIndex.
* Keys that have only one value are stored in the ValueIndex.
* Keys that have multiple values are stored in the HashIndex, the hash distinguishes between the values.
* This reduces the amount of hashes we need to compute since often times only a small portion of the keys are updated
* so we don't have to hash the keys that are never updated.
*
* Note: The `valueIndex` and `hashIndex` have disjoint keys.
* When a key that has only one value gets a new distinct value,
* it is added to the `hashIndex` and removed from the `valueIndex` and vice versa.
*/
#valueIndex: ValueIndex<K, V>
#hashIndex: HashIndex<K, V>

constructor() {
this.#inner = new DefaultMap<K, Map<V, number>>(() => new Map<V, number>())
// #inner is a map of:
// {
// [key]: Map<V, number> // Direct value-to-multiplicity mapping
// }
this.#valueIndex = new ValueIndex<K, V>()
this.#hashIndex = new HashIndex<K, V>()
}

toString(indent = false): string {
return `Index(${JSON.stringify(
[...this.#inner].map(([k, valueMap]) => [k, [...valueMap]]),
undefined,
indent ? ` ` : undefined
)})`
return `Index(\n ${this.#valueIndex.toString(indent)},\n ${this.#hashIndex.toString(indent)}\n)`
}

get(key: K): Array<[V, number]> {
const valueMap = this.#inner.get(key)
return [...valueMap.entries()]
if (this.#valueIndex.has(key)) {
return [this.#valueIndex.get(key)!]
}
return this.#hashIndex.get(key)
}

getMultiplicity(key: K, value: V): number {
const valueMap = this.#inner.get(key)
return valueMap.get(value) ?? 0
if (this.#valueIndex.has(key)) {
return this.#valueIndex.getMultiplicity(key)
}
return this.#hashIndex.getMultiplicity(key, value)
}

entries() {
return this.#inner.entries()
/**
* This returns an iterator that iterates over all key-value pairs.
* @returns An iterable of all key-value pairs (and their multiplicities) in the index.
*/
#entries(): Iterable<[K, [V, number]]> {
return concatIterable(
this.#valueIndex.entries(),
this.#hashIndex.entriesIterator()
)
}

keys() {
return this.#inner.keys()
/**
* This method only iterates over the keys and not over the values.
* Hence, it is more efficient than the `#entries` method.
* It returns an iterator that you can use if you need to iterate over the values for a given key.
* @returns An iterator of all *keys* in the index and their corresponding value iterator.
*/
*#entriesIterators(): Iterable<[K, Iterable<[V, number]>]> {
for (const [key, [value, multiplicity]] of this.#valueIndex.entries()) {
yield [key, new Map<V, number>([[value, multiplicity]])]
}
for (const [key, valueMap] of this.#hashIndex.entries()) {
yield [
key,
mapIterable(valueMap, ([_hash, [value, multiplicity]]) => [
value,
multiplicity,
]),
]
}
}

has(key: K): boolean {
return this.#inner.has(key)
return this.#valueIndex.has(key) || this.#hashIndex.has(key)
}

get size(): number {
return this.#inner.size
return this.#valueIndex.size + this.#hashIndex.size
}

addValue(key: K, value: [V, number]): void {
const [val, multiplicity] = value
const valueMap = this.#inner.get(key)
const existingMultiplicity = valueMap.get(val) ?? 0
const newMultiplicity = existingMultiplicity + multiplicity

if (multiplicity !== 0) {
if (newMultiplicity === 0) {
valueMap.delete(val)
} else {
valueMap.set(val, newMultiplicity)
const containedInValueIndex = this.#valueIndex.has(key)
const containedInHashIndex = this.#hashIndex.has(key)

if (containedInHashIndex && containedInValueIndex) {
throw new Error(
`Key ${key} is contained in both the value index and the hash index. This should never happen because they should have disjoint keysets.`
)
}

if (!containedInValueIndex && !containedInHashIndex) {
// This is the first time we see the key
// Add it to the value index
this.#valueIndex.addValue(key, value)
return
}

if (containedInValueIndex) {
// This key is already in the value index
// It could be that it's the same value or a different one
// If it's a different value we will need to remove the key from the value index
// and add the key and its two values to the hash index
try {
this.#valueIndex.addValue(key, value)
} catch {
// This is a different value, need to move the key to the hash index
const existingValue = this.#valueIndex.get(key)!
this.#valueIndex.delete(key)
this.#hashIndex.addValue(key, existingValue)
this.#hashIndex.addValue(key, value)
}
return
}

if (containedInHashIndex) {
// This key is already in the hash index so it already has two or more values.
// However, this new value and multiplicity could cause an existing value to be removed
// and lead to the key having only a single value in which case we need to move it back to the value index
const singleRemainingValue = this.#hashIndex.addValue(key, value)
if (singleRemainingValue) {
// The key only has a single remaining value so we need to move it back to the value index
this.#hashIndex.delete(key)
this.#valueIndex.addValue(key, singleRemainingValue)
}
return
}
}

append(other: Index<K, V>): void {
for (const [key, otherValueMap] of other.entries()) {
const thisValueMap = this.#inner.get(key)
for (const [value, multiplicity] of otherValueMap.entries()) {
const existingMultiplicity = thisValueMap.get(value) ?? 0
const newMultiplicity = existingMultiplicity + multiplicity
if (newMultiplicity === 0) {
thisValueMap.delete(value)
} else {
thisValueMap.set(value, newMultiplicity)
}
}
for (const [key, value] of other.#entries()) {
this.addValue(key, value)
}
}

Expand All @@ -87,10 +146,10 @@ export class Index<K, V> {
// We want to iterate over the smaller of the two indexes to reduce the
// number of operations we need to do.
if (this.size <= other.size) {
for (const [key, valueMap] of this.entries()) {
for (const [key, valueIt] of this.#entriesIterators()) {
if (!other.has(key)) continue
const otherValues = other.get(key)
for (const [val1, mul1] of valueMap.entries()) {
for (const [val1, mul1] of valueIt) {
for (const [val2, mul2] of otherValues) {
if (mul1 !== 0 && mul2 !== 0) {
result.push([[key, [val1, val2]], mul1 * mul2])
Expand All @@ -99,10 +158,10 @@ export class Index<K, V> {
}
}
} else {
for (const [key, otherValueMap] of other.entries()) {
for (const [key, otherValueIt] of other.#entriesIterators()) {
if (!this.has(key)) continue
const values = this.get(key)
for (const [val2, mul2] of otherValueMap.entries()) {
for (const [val2, mul2] of otherValueIt) {
for (const [val1, mul1] of values) {
if (mul1 !== 0 && mul2 !== 0) {
result.push([[key, [val1, val2]], mul1 * mul2])
Expand Down
20 changes: 19 additions & 1 deletion packages/db-ivm/src/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ export class DefaultMap<K, V> extends Map<K, V> {

get(key: K): V {
if (!this.has(key)) {
this.set(key, this.defaultValue())
// this.set(key, this.defaultValue())
return this.defaultValue()
}
return super.get(key)!
}
Expand Down Expand Up @@ -161,3 +162,20 @@ export class ObjectIdGenerator {
* Global instance for cases where a shared object ID space is needed.
*/
export const globalObjectIdGenerator = new ObjectIdGenerator()

export function* concatIterable<T>(
...iterables: Array<Iterable<T>>
): Iterable<T> {
for (const iterable of iterables) {
yield* iterable
}
}

export function* mapIterable<T, U>(
it: Iterable<T>,
fn: (t: T) => U
): Iterable<U> {
for (const t of it) {
yield fn(t)
}
}
Loading
Loading