@@ -201,40 +201,53 @@ impl Database {
201201
202202 // Get one queued execution with SELECT FOR UPDATE SKIP LOCKED
203203 // This allows multiple orchestrators to work on different executions in parallel
204- // Uses CTE to efficiently check queue concurrency limits
204+ // Uses CTEs to efficiently check queue concurrency limits AND worker availability
205+ // Only selects executions that have at least one available worker for their deployment_id
205206 let execution_row = sqlx:: query (
206207 "WITH running_counts AS (
207- SELECT
208+ SELECT
208209 queue_name,
209210 deployment_id,
210211 COALESCE(concurrency_key, '') as concurrency_key,
211212 COUNT(*) as running_count
212213 FROM workflow_executions
213214 WHERE status IN ('claimed', 'running')
214215 GROUP BY queue_name, deployment_id, COALESCE(concurrency_key, '')
216+ ),
217+ available_deployments AS (
218+ -- Get deployment_ids that have at least one available worker
219+ SELECT DISTINCT current_deployment_id
220+ FROM workers
221+ WHERE mode = 'push'
222+ AND status = 'online'
223+ AND current_execution_count < max_concurrent_executions
224+ AND push_failure_count < push_failure_threshold
225+ AND last_heartbeat > NOW() - INTERVAL '60 seconds'
215226 )
216- SELECT e.id, e.workflow_id, e.status, e.payload, e.result, e.error,
217- e.created_at, e.started_at, e.completed_at,
218- e.deployment_id, e.parent_execution_id, e.root_execution_id,
219- e.retry_count, e.step_key, e.queue_name, e.concurrency_key,
220- e.batch_id, e.session_id, e.user_id, e.output_schema_name,
221- e.otel_traceparent, e.otel_span_id, e.initial_state, e.final_state,
227+ SELECT e.id, e.workflow_id, e.status, e.payload, e.result, e.error,
228+ e.created_at, e.started_at, e.completed_at,
229+ e.deployment_id, e.parent_execution_id, e.root_execution_id,
230+ e.retry_count, e.step_key, e.queue_name, e.concurrency_key,
231+ e.batch_id, e.session_id, e.user_id, e.output_schema_name,
232+ e.otel_traceparent, e.otel_span_id, e.initial_state, e.final_state,
222233 e.claimed_at, e.queued_at, e.run_timeout_seconds, q.concurrency_limit
223234 FROM workflow_executions e
224- INNER JOIN queues q
225- ON q.name = e.queue_name
235+ INNER JOIN queues q
236+ ON q.name = e.queue_name
226237 AND q.deployment_id = e.deployment_id
227- LEFT JOIN running_counts rc
228- ON rc.queue_name = e.queue_name
238+ INNER JOIN available_deployments ad
239+ ON ad.current_deployment_id = e.deployment_id
240+ LEFT JOIN running_counts rc
241+ ON rc.queue_name = e.queue_name
229242 AND rc.deployment_id = e.deployment_id
230243 AND rc.concurrency_key = COALESCE(e.concurrency_key, '')
231244 WHERE e.status = 'queued'
232245 AND (
233246 q.concurrency_limit IS NULL
234247 OR COALESCE(rc.running_count, 0) < q.concurrency_limit
235248 )
236- ORDER BY COALESCE(e.queued_at, e.created_at) ASC
237- LIMIT 1
249+ ORDER BY COALESCE(e.queued_at, e.created_at) ASC
250+ LIMIT 1
238251 FOR UPDATE OF e SKIP LOCKED" ,
239252 )
240253 . fetch_optional ( & mut * tx)
0 commit comments