4545 * - **Ordering + Limits**: ORDER BY combined with LIMIT/OFFSET (would change result set)
4646 * - **Functional Operations**: fnSelect, fnWhere, fnHaving (potential side effects)
4747 *
48+ * ### Residual WHERE Clauses
49+ * For outer joins (LEFT, RIGHT, FULL), WHERE clauses are copied to subqueries for optimization
50+ * but also kept as "residual" clauses in the main query to preserve semantics. This ensures
51+ * that NULL values from outer joins are properly filtered according to SQL standards.
52+ *
4853 * The optimizer tracks which clauses were actually optimized and only removes those from the
4954 * main query. Subquery reuse is handled safely through immutable query copies.
5055 *
@@ -121,9 +126,12 @@ import {
121126 CollectionRef as CollectionRefClass ,
122127 Func ,
123128 QueryRef as QueryRefClass ,
129+ createResidualWhere ,
130+ getWhereExpression ,
131+ isResidualWhere ,
124132} from "./ir.js"
125133import { isConvertibleToCollectionFilter } from "./compiler/expressions.js"
126- import type { BasicExpression , From , QueryIR } from "./ir.js"
134+ import type { BasicExpression , From , QueryIR , Where } from "./ir.js"
127135
128136/**
129137 * Represents a WHERE clause after source analysis
@@ -325,8 +333,13 @@ function applySingleLevelOptimization(query: QueryIR): QueryIR {
325333 return query
326334 }
327335
336+ // Filter out residual WHERE clauses to prevent them from being optimized again
337+ const nonResidualWhereClauses = query . where . filter (
338+ ( where ) => ! isResidualWhere ( where )
339+ )
340+
328341 // Step 1: Split all AND clauses at the root level for granular optimization
329- const splitWhereClauses = splitAndClauses ( query . where )
342+ const splitWhereClauses = splitAndClauses ( nonResidualWhereClauses )
330343
331344 // Step 2: Analyze each WHERE clause to determine which sources it touches
332345 const analyzedClauses = splitWhereClauses . map ( ( clause ) =>
@@ -337,7 +350,20 @@ function applySingleLevelOptimization(query: QueryIR): QueryIR {
337350 const groupedClauses = groupWhereClauses ( analyzedClauses )
338351
339352 // Step 4: Apply optimizations by lifting single-source clauses into subqueries
340- return applyOptimizations ( query , groupedClauses )
353+ const optimizedQuery = applyOptimizations ( query , groupedClauses )
354+
355+ // Add back any residual WHERE clauses that were filtered out
356+ const residualWhereClauses = query . where . filter ( ( where ) =>
357+ isResidualWhere ( where )
358+ )
359+ if ( residualWhereClauses . length > 0 ) {
360+ optimizedQuery . where = [
361+ ...( optimizedQuery . where || [ ] ) ,
362+ ...residualWhereClauses ,
363+ ]
364+ }
365+
366+ return optimizedQuery
341367}
342368
343369/**
@@ -424,26 +450,35 @@ function isRedundantSubquery(query: QueryIR): boolean {
424450 * ```
425451 */
426452function splitAndClauses (
427- whereClauses : Array < BasicExpression < boolean > >
453+ whereClauses : Array < Where >
428454) : Array < BasicExpression < boolean > > {
429455 const result : Array < BasicExpression < boolean > > = [ ]
430456
431- for ( const clause of whereClauses ) {
432- if ( clause . type === `func` && clause . name === `and` ) {
433- // Recursively split nested AND clauses to handle complex expressions
434- const splitArgs = splitAndClauses (
435- clause . args as Array < BasicExpression < boolean > >
436- )
437- result . push ( ...splitArgs )
438- } else {
439- // Preserve non-AND clauses as-is (including OR clauses)
440- result . push ( clause )
441- }
457+ for ( const whereClause of whereClauses ) {
458+ const clause = getWhereExpression ( whereClause )
459+ result . push ( ...splitAndClausesRecursive ( clause ) )
442460 }
443461
444462 return result
445463}
446464
465+ // Helper function for recursive splitting of BasicExpression arrays
466+ function splitAndClausesRecursive (
467+ clause : BasicExpression < boolean >
468+ ) : Array < BasicExpression < boolean > > {
469+ if ( clause . type === `func` && clause . name === `and` ) {
470+ // Recursively split nested AND clauses to handle complex expressions
471+ const result : Array < BasicExpression < boolean > > = [ ]
472+ for ( const arg of clause . args as Array < BasicExpression < boolean > > ) {
473+ result . push ( ...splitAndClausesRecursive ( arg ) )
474+ }
475+ return result
476+ } else {
477+ // Preserve non-AND clauses as-is (including OR clauses)
478+ return [ clause ]
479+ }
480+ }
481+
447482/**
448483 * Step 2: Analyze which table sources a WHERE clause touches.
449484 *
@@ -588,19 +623,32 @@ function applyOptimizations(
588623 } ) )
589624 : undefined
590625
591- // Build the remaining WHERE clauses: multi-source + any single-source that weren't optimized
592- const remainingWhereClauses : Array < BasicExpression < boolean > > = [ ]
626+ // Build the remaining WHERE clauses: multi-source + residual single-source clauses
627+ const remainingWhereClauses : Array < Where > = [ ]
593628
594629 // Add multi-source clauses
595630 if ( groupedClauses . multiSource ) {
596631 remainingWhereClauses . push ( groupedClauses . multiSource )
597632 }
598633
599- // Add single-source clauses that weren't actually optimized
634+ // Determine if we need residual clauses (when query has outer JOINs)
635+ const hasOuterJoins =
636+ query . join &&
637+ query . join . some (
638+ ( join ) =>
639+ join . type === `left` || join . type === `right` || join . type === `full`
640+ )
641+
642+ // Add single-source clauses
600643 for ( const [ source , clause ] of groupedClauses . singleSource ) {
601644 if ( ! actuallyOptimized . has ( source ) ) {
645+ // Wasn't optimized at all - keep as regular WHERE clause
602646 remainingWhereClauses . push ( clause )
647+ } else if ( hasOuterJoins ) {
648+ // Was optimized AND query has outer JOINs - keep as residual WHERE clause
649+ remainingWhereClauses . push ( createResidualWhere ( clause ) )
603650 }
651+ // If optimized and no outer JOINs - don't keep (original behavior)
604652 }
605653
606654 // Create a completely new query object to ensure immutability
0 commit comments