Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
179 changes: 150 additions & 29 deletions packages/factory-sdk/src/fleet/internal-fleet-client.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ describe('InternalFleetClient', () => {
expect(exits).toEqual([{ name: 'ar-1-impl', reason: 'crashed' }])
})

it('deduplicates distinct broker exit events for one agent death by name', () => {
it('latches one agent death by name across lagged exit callbacks', () => {
vi.useFakeTimers()
vi.setSystemTime(new Date('2026-06-11T00:00:00.000Z'))

Expand All @@ -319,41 +319,144 @@ describe('InternalFleetClient', () => {
reason: 'pty_closed',
event_id: 'exit-event-1',
} as BrokerEvent)
vi.advanceTimersByTime(10_000)
harness.emit({
kind: 'agent_exited',
name: 'ar-1-impl',
code: 1,
reason: 'pty_closed',
event_id: 'exit-event-2',
} as BrokerEvent)
harness.emit({
kind: 'agent_exited',
name: 'ar-1-review',
code: 1,
reason: 'pty_closed',
event_id: 'exit-event-3',
} as BrokerEvent)

vi.advanceTimersByTime(5_000)
harness.emit({
kind: 'agent_exited',
name: 'ar-1-impl',
code: 1,
reason: 'pty_closed',
event_id: 'exit-event-4',
} as BrokerEvent)

expect(exits).toEqual([
{ name: 'ar-1-impl', reason: 'pty_closed' },
{ name: 'ar-1-review', reason: 'pty_closed' },
{ name: 'ar-1-impl', reason: 'pty_closed' },
])
expect(exits).toEqual([{ name: 'ar-1-impl', reason: 'pty_closed' }])
} finally {
vi.useRealTimers()
}
})

it('suppresses legacy same-name exit callbacks inside the death window only', () => {
it('does not suppress exits for different agent names', () => {
const harness = new FakeHarnessDriverClient()
const fleet = new InternalFleetClient({ client: harness })
const exits: Array<{ name: string; reason?: string }> = []

fleet.onAgentExit((name, reason) => exits.push({ name, reason }))

harness.emit({
kind: 'agent_exited',
name: 'ar-1-impl',
code: 1,
reason: 'pty_closed',
event_id: 'exit-event-1',
} as BrokerEvent)
harness.emit({
kind: 'agent_exited',
name: 'ar-1-review',
code: 1,
reason: 'pty_closed',
event_id: 'exit-event-2',
} as BrokerEvent)

expect(exits).toEqual([
{ name: 'ar-1-impl', reason: 'pty_closed' },
{ name: 'ar-1-review', reason: 'pty_closed' },
])
})

it('clears the exit latch when the same agent name is spawned again', async () => {
const harness = new FakeHarnessDriverClient()
const fleet = new InternalFleetClient({ client: harness })
const exits: Array<{ name: string; reason?: string }> = []

fleet.onAgentExit((name, reason) => exits.push({ name, reason }))

await fleet.spawn({ name: 'ar-1-impl', capability: 'spawn:codex' })
harness.emit({
kind: 'agent_exited',
name: 'ar-1-impl',
code: 1,
reason: 'first-death',
event_id: 'exit-event-1',
} as BrokerEvent)
harness.emit({
kind: 'agent_exited',
name: 'ar-1-impl',
code: 1,
reason: 'lagged-duplicate',
event_id: 'exit-event-2',
} as BrokerEvent)

await fleet.spawn({ name: 'ar-1-impl', capability: 'spawn:codex' })
harness.emit({
kind: 'agent_exited',
name: 'ar-1-impl',
code: 1,
reason: 'second-death',
event_id: 'exit-event-3',
} as BrokerEvent)

expect(exits).toEqual([
{ name: 'ar-1-impl', reason: 'first-death' },
{ name: 'ar-1-impl', reason: 'second-death' },
])
})

it('clears the exit latch when the same agent name is resumed', async () => {
const harness = new FakeHarnessDriverClient()
const fleet = new InternalFleetClient({ client: harness })
const exits: Array<{ name: string; reason?: string }> = []

fleet.onAgentExit((name, reason) => exits.push({ name, reason }))

harness.emit({
kind: 'agent_exited',
name: 'ar-1-impl',
code: 1,
reason: 'first-death',
event_id: 'exit-event-1',
} as BrokerEvent)
harness.emit({
kind: 'agent_exited',
name: 'ar-1-impl',
code: 1,
reason: 'lagged-duplicate',
event_id: 'exit-event-2',
} as BrokerEvent)

await fleet.resume({ name: 'ar-1-impl', sessionRef: 'session-original' })
harness.emit({
kind: 'agent_exited',
name: 'ar-1-impl',
code: 1,
reason: 'second-death',
event_id: 'exit-event-3',
} as BrokerEvent)

expect(exits).toEqual([
{ name: 'ar-1-impl', reason: 'first-death' },
{ name: 'ar-1-impl', reason: 'second-death' },
])
})

it('suppresses typed duplicate agent-exit callbacks until the name is spawned again', async () => {
const harness = new FakeHarnessDriverClient()
const fleet = new InternalFleetClient({ client: harness })
const exits: Array<{ name: string; reason?: string }> = []

fleet.onAgentExit((name, reason) => exits.push({ name, reason }))

harness.emitAgentExited({ name: 'ar-1-impl', sessionId: 'session-1' })
harness.emitAgentExited({ name: 'ar-1-impl', sessionId: 'session-2' })

await fleet.spawn({ name: 'ar-1-impl', capability: 'spawn:codex' })
harness.emitAgentExited({ name: 'ar-1-impl', sessionId: 'session-3' })

expect(exits).toEqual([
{ name: 'ar-1-impl', reason: 'exited' },
{ name: 'ar-1-impl', reason: 'exited' },
])
})

it('surfaces same-name exits again only after a lifecycle restart, not after time passes', () => {
vi.useFakeTimers()
vi.setSystemTime(new Date('2026-06-11T00:00:00.000Z'))

Expand All @@ -364,14 +467,32 @@ describe('InternalFleetClient', () => {

fleet.onAgentExit((name, reason) => exits.push({ name, reason }))

harness.emitAgentExited({ name: 'ar-1-impl', sessionId: 'session-1' })
harness.emitAgentExited({ name: 'ar-1-impl', sessionId: 'session-2' })
vi.advanceTimersByTime(5_000)
harness.emitAgentExited({ name: 'ar-1-impl', sessionId: 'session-3' })
harness.emit({
kind: 'agent_exited',
name: 'ar-1-impl',
code: 1,
reason: 'pty_closed',
event_id: 'exit-event-1',
} as BrokerEvent)
vi.advanceTimersByTime(60_000)
harness.emit({
kind: 'agent_exited',
name: 'ar-1-impl',
code: 1,
reason: 'pty_closed',
event_id: 'exit-event-2',
} as BrokerEvent)
harness.emit({
kind: 'agent_exited',
name: 'ar-1-review',
code: 1,
reason: 'pty_closed',
event_id: 'exit-event-3',
} as BrokerEvent)

expect(exits).toEqual([
{ name: 'ar-1-impl', reason: 'exited' },
{ name: 'ar-1-impl', reason: 'exited' },
{ name: 'ar-1-impl', reason: 'pty_closed' },
{ name: 'ar-1-review', reason: 'pty_closed' },
])
} finally {
vi.useRealTimers()
Expand Down
26 changes: 9 additions & 17 deletions packages/factory-sdk/src/fleet/internal-fleet-client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,6 @@ const selfNode: RosterEntry['nodes'][number] = {
live: true,
}

const AGENT_EXIT_DEDUP_WINDOW_MS = 5_000

export class InternalFleetClient implements FleetClient {
readonly #client: HarnessDriverClientLike
readonly #cwd?: string
Expand All @@ -65,7 +63,7 @@ export class InternalFleetClient implements FleetClient {
readonly #injectedEventIdSet = new Set<string>()
readonly #failedDeliveries = new Map<string, Error>()
readonly #failedDeliveryIds: string[] = []
readonly #recentAgentExits = new Map<string, number>()
readonly #exitedAgentNames = new Set<string>()
#suppressedDuplicateEvents = 0
#suppressedDuplicateAgentExits = 0
#missingIdentityEvents = 0
Expand All @@ -92,6 +90,8 @@ export class InternalFleetClient implements FleetClient {
continueFrom: input.sessionRef,
})

this.#clearAgentExitLatch(handle.name)

return { name: handle.name, sessionRef: sessionRefFrom(handle) }
}

Expand All @@ -106,6 +106,8 @@ export class InternalFleetClient implements FleetClient {
continueFrom: input.sessionRef,
})

this.#clearAgentExitLatch(handle.name)

return { name: handle.name, sessionRef: sessionRefFrom(handle) ?? input.sessionRef }
}

Expand Down Expand Up @@ -291,33 +293,23 @@ export class InternalFleetClient implements FleetClient {
}

#rememberAgentExit(name: string): boolean {
const now = Date.now()
const prior = this.#recentAgentExits.get(name)
if (prior !== undefined && now - prior < AGENT_EXIT_DEDUP_WINDOW_MS) {
if (this.#exitedAgentNames.has(name)) {
this.#suppressedDuplicateAgentExits += 1
if (this.#suppressedDuplicateAgentExits <= 3 || this.#suppressedDuplicateAgentExits % 100 === 0) {
this.#logger?.debug?.('[factory-sdk] suppressed duplicate agent exit', {
count: this.#suppressedDuplicateAgentExits,
name,
windowMs: AGENT_EXIT_DEDUP_WINDOW_MS,
})
}
return true
}

this.#recentAgentExits.set(name, now)
if (this.#recentAgentExits.size > 500) {
this.#pruneRecentAgentExits(now)
}
this.#exitedAgentNames.add(name)
return false
}

#pruneRecentAgentExits(now: number): void {
for (const [name, seenAt] of this.#recentAgentExits) {
if (now - seenAt >= AGENT_EXIT_DEDUP_WINDOW_MS) {
this.#recentAgentExits.delete(name)
}
}
#clearAgentExitLatch(name: string): void {
this.#exitedAgentNames.delete(name)
}

#rememberEvent(identity: EventIdentity): boolean {
Expand Down
Loading