diff --git a/packages/core-backend/CHANGELOG.md b/packages/core-backend/CHANGELOG.md index 2938c577a3..daf352e0fc 100644 --- a/packages/core-backend/CHANGELOG.md +++ b/packages/core-backend/CHANGELOG.md @@ -7,9 +7,36 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- Add `forceReconnection()` method to `BackendWebSocketService` for controlled subscription state cleanup ([#6861](https://github.com/MetaMask/core/pull/6861)) + - Performs a controlled disconnect-then-reconnect sequence with exponential backoff + - Useful for recovering from subscription/unsubscription issues and cleaning up orphaned subscriptions + - Add `BackendWebSocketService:forceReconnection` messenger action +- Add stable connection timer to prevent rapid reconnection loops ([#6861](https://github.com/MetaMask/core/pull/6861)) + - Connection must stay stable for 10 seconds before resetting reconnect attempts + - Prevents issues when server accepts connection then immediately closes it + ### Changed - Bump `@metamask/base-controller` from `^8.4.1` to `^8.4.2` ([#6917](https://github.com/MetaMask/core/pull/6917)) +- Update `AccountActivityService` to use new `forceReconnection()` method instead of manually calling disconnect/connect ([#6861](https://github.com/MetaMask/core/pull/6861)) +- **BREAKING:** Update allowed actions for `AccountActivityService` messenger: remove `BackendWebSocketService:disconnect`, add `BackendWebSocketService:forceConnect` ([#6861](https://github.com/MetaMask/core/pull/6861)) +- Improve reconnection scheduling in `BackendWebSocketService` to be idempotent ([#6861](https://github.com/MetaMask/core/pull/6861)) + - Prevents duplicate reconnection timers and inflated attempt counters + - Scheduler checks if reconnect is already scheduled before creating new timer +- Improve error handling in `BackendWebSocketService.connect()` ([#6861](https://github.com/MetaMask/core/pull/6861)) + - Always schedule reconnect on connection failure (exponential backoff prevents aggressive retries) + - Remove redundant schedule calls from error paths +- Update `BackendWebSocketService.disconnect()` to reset reconnect attempts counter ([#6861](https://github.com/MetaMask/core/pull/6861)) +- Update `BackendWebSocketService.disconnect()` return type from `Promise` to `void` ([#6861](https://github.com/MetaMask/core/pull/6861)) +- Improve logging throughout `BackendWebSocketService` for better debugging ([#6861](https://github.com/MetaMask/core/pull/6861)) + +### Fixed + +- Fix potential race condition in `BackendWebSocketService.connect()` that could bypass exponential backoff when reconnect is already scheduled ([#6861](https://github.com/MetaMask/core/pull/6861)) +- Fix memory leak from orphaned timers when multiple reconnects are scheduled ([#6861](https://github.com/MetaMask/core/pull/6861)) +- Fix issue where reconnect attempts counter could grow unnecessarily with duplicate scheduled reconnects ([#6861](https://github.com/MetaMask/core/pull/6861)) ## [2.1.0] diff --git a/packages/core-backend/src/AccountActivityService.test.ts b/packages/core-backend/src/AccountActivityService.test.ts index 6ba242469d..c60fe7bdeb 100644 --- a/packages/core-backend/src/AccountActivityService.test.ts +++ b/packages/core-backend/src/AccountActivityService.test.ts @@ -71,7 +71,7 @@ const getMessenger = () => { // Create mock action handlers const mockGetSelectedAccount = jest.fn(); const mockConnect = jest.fn(); - const mockDisconnect = jest.fn(); + const mockForceReconnection = jest.fn(); const mockSubscribe = jest.fn(); const mockChannelHasSubscription = jest.fn(); const mockGetSubscriptionsByChannel = jest.fn(); @@ -89,8 +89,8 @@ const getMessenger = () => { mockConnect, ); rootMessenger.registerActionHandler( - 'BackendWebSocketService:disconnect', - mockDisconnect, + 'BackendWebSocketService:forceReconnection', + mockForceReconnection, ); rootMessenger.registerActionHandler( 'BackendWebSocketService:subscribe', @@ -123,7 +123,7 @@ const getMessenger = () => { mocks: { getSelectedAccount: mockGetSelectedAccount, connect: mockConnect, - disconnect: mockDisconnect, + forceReconnection: mockForceReconnection, subscribe: mockSubscribe, channelHasSubscription: mockChannelHasSubscription, getSubscriptionsByChannel: mockGetSubscriptionsByChannel, @@ -222,7 +222,7 @@ type WithServiceCallback = (payload: { mocks: { getSelectedAccount: jest.Mock; connect: jest.Mock; - disconnect: jest.Mock; + forceReconnection: jest.Mock; subscribe: jest.Mock; channelHasSubscription: jest.Mock; getSubscriptionsByChannel: jest.Mock; @@ -464,28 +464,22 @@ describe('AccountActivityService', () => { ); }); - it('should handle disconnect failures during force reconnection by logging error and continuing gracefully', async () => { + it('should handle subscription failure by calling forceReconnection', async () => { await withService(async ({ service, mocks }) => { - // Mock disconnect to fail - this prevents the reconnect step from executing - mocks.disconnect.mockRejectedValue( - new Error('Disconnect failed during force reconnection'), - ); - - // Trigger scenario that causes force reconnection by making subscribe fail + // Mock subscribe to fail mocks.subscribe.mockRejectedValue(new Error('Subscription failed')); - // Should handle both subscription failure and disconnect failure gracefully - should not throw + // Should handle subscription failure gracefully - should not throw const result = await service.subscribe({ address: '0x123abc' }); expect(result).toBeUndefined(); // Verify the subscription was attempted expect(mocks.subscribe).toHaveBeenCalledTimes(1); - // Verify disconnect was attempted (but failed, preventing reconnection) - expect(mocks.disconnect).toHaveBeenCalledTimes(1); + // Verify forceReconnection was called (lines 289-290) + expect(mocks.forceReconnection).toHaveBeenCalledTimes(1); - // Connect is only called once at the start because disconnect failed, - // so the reconnect step never executes (it's in the same try-catch block) + // Connect is only called once at the start expect(mocks.connect).toHaveBeenCalledTimes(1); }); }); @@ -536,14 +530,8 @@ describe('AccountActivityService', () => { // unsubscribe catches errors and forces reconnection instead of throwing await service.unsubscribe(mockSubscription); - // Should have attempted to force reconnection with exact sequence - expect(mocks.disconnect).toHaveBeenCalledTimes(1); - expect(mocks.connect).toHaveBeenCalledTimes(1); - - // Verify disconnect was called before connect - const disconnectOrder = mocks.disconnect.mock.invocationCallOrder[0]; - const connectOrder = mocks.connect.mock.invocationCallOrder[0]; - expect(disconnectOrder).toBeLessThan(connectOrder); + // Should have attempted to force reconnection + expect(mocks.forceReconnection).toHaveBeenCalledTimes(1); }, ); }); diff --git a/packages/core-backend/src/AccountActivityService.ts b/packages/core-backend/src/AccountActivityService.ts index c92ab1a2a2..30c8b95d98 100644 --- a/packages/core-backend/src/AccountActivityService.ts +++ b/packages/core-backend/src/AccountActivityService.ts @@ -80,7 +80,7 @@ export type AccountActivityServiceActions = AccountActivityServiceMethodActions; export const ACCOUNT_ACTIVITY_SERVICE_ALLOWED_ACTIONS = [ 'AccountsController:getSelectedAccount', 'BackendWebSocketService:connect', - 'BackendWebSocketService:disconnect', + 'BackendWebSocketService:forceReconnection', 'BackendWebSocketService:subscribe', 'BackendWebSocketService:getConnectionInfo', 'BackendWebSocketService:channelHasSubscription', @@ -559,16 +559,11 @@ export class AccountActivityService { * Force WebSocket reconnection to clean up subscription state */ async #forceReconnection(): Promise { - try { - log('Forcing WebSocket reconnection to clean up subscription state'); - - // All subscriptions will be cleaned up automatically on WebSocket disconnect + log('Forcing WebSocket reconnection to clean up subscription state'); - await this.#messenger.call('BackendWebSocketService:disconnect'); - await this.#messenger.call('BackendWebSocketService:connect'); - } catch (error) { - log('Failed to force WebSocket reconnection', { error }); - } + // Use the dedicated forceReconnection method which performs a controlled + // disconnect-then-connect sequence to clean up subscription state + await this.#messenger.call('BackendWebSocketService:forceReconnection'); } // ============================================================================= diff --git a/packages/core-backend/src/BackendWebSocketService-method-action-types.ts b/packages/core-backend/src/BackendWebSocketService-method-action-types.ts index 2410df1449..0d93fa38aa 100644 --- a/packages/core-backend/src/BackendWebSocketService-method-action-types.ts +++ b/packages/core-backend/src/BackendWebSocketService-method-action-types.ts @@ -25,6 +25,27 @@ export type BackendWebSocketServiceDisconnectAction = { handler: BackendWebSocketService['disconnect']; }; +/** + * Forces a WebSocket reconnection to clean up subscription state + * + * This method is useful when subscription state may be out of sync and needs to be reset. + * It performs a controlled disconnect-then-reconnect sequence: + * - Disconnects cleanly to trigger subscription cleanup + * - Schedules reconnection with exponential backoff to prevent rapid loops + * - All subscriptions will be cleaned up automatically on disconnect + * + * Use cases: + * - Recovering from subscription/unsubscription issues + * - Cleaning up orphaned subscriptions + * - Forcing a fresh subscription state + * + * @returns Promise that resolves when disconnection is complete (reconnection is scheduled) + */ +export type BackendWebSocketServiceForceReconnectionAction = { + type: `BackendWebSocketService:forceReconnection`; + handler: BackendWebSocketService['forceReconnection']; +}; + /** * Sends a message through the WebSocket * @@ -159,6 +180,7 @@ export type BackendWebSocketServiceSubscribeAction = { export type BackendWebSocketServiceMethodActions = | BackendWebSocketServiceConnectAction | BackendWebSocketServiceDisconnectAction + | BackendWebSocketServiceForceReconnectionAction | BackendWebSocketServiceSendMessageAction | BackendWebSocketServiceSendRequestAction | BackendWebSocketServiceGetConnectionInfoAction diff --git a/packages/core-backend/src/BackendWebSocketService.test.ts b/packages/core-backend/src/BackendWebSocketService.test.ts index d2281b72a5..eafbe8033c 100644 --- a/packages/core-backend/src/BackendWebSocketService.test.ts +++ b/packages/core-backend/src/BackendWebSocketService.test.ts @@ -349,7 +349,7 @@ const setupBackendWebSocketService = ({ cleanup: () => { service?.destroy(); jest.useRealTimers(); - jest.clearAllMocks(); + jest.restoreAllMocks(); }, }; }; @@ -609,26 +609,6 @@ describe('BackendWebSocketService', () => { ); }); - it('should handle connection timeout by rejecting with timeout error and setting state to ERROR', async () => { - await withService( - { - options: { timeout: TEST_CONSTANTS.TIMEOUT_MS }, - mockWebSocketOptions: { autoConnect: false }, - }, - async ({ service, completeAsyncOperations }) => { - const connectPromise = service.connect().catch((error) => error); - - await completeAsyncOperations(TEST_CONSTANTS.TIMEOUT_MS + 50); - - const error = await connectPromise; - expect(error.message).toBe( - `Failed to connect to WebSocket: Connection timeout after ${TEST_CONSTANTS.TIMEOUT_MS}ms`, - ); - expect(service.getConnectionInfo().state).toBe(WebSocketState.ERROR); - }, - ); - }); - it('should reject sendMessage and sendRequest operations when WebSocket is disconnected', async () => { await withService( { mockWebSocketOptions: { autoConnect: false } }, @@ -685,6 +665,9 @@ describe('BackendWebSocketService', () => { it('should handle abnormal WebSocket close by triggering reconnection', async () => { await withService( async ({ service, getMockWebSocket, completeAsyncOperations }) => { + // Mock Math.random to make Cockatiel's jitter deterministic + jest.spyOn(Math, 'random').mockReturnValue(0); + await service.connect(); expect(service.getConnectionInfo().state).toBe( WebSocketState.CONNECTED, @@ -709,7 +692,8 @@ describe('BackendWebSocketService', () => { expect(service.getConnectionInfo().state).toBe( WebSocketState.CONNECTED, ); - expect(service.getConnectionInfo().reconnectAttempts).toBe(0); // Reset on successful connection + // reconnectAttempts will be 1 until stable connection timer (10s) resets it + expect(service.getConnectionInfo().reconnectAttempts).toBe(1); }, ); }); @@ -730,7 +714,7 @@ describe('BackendWebSocketService', () => { }, ); - await service.disconnect(); + service.disconnect(); const connectionInfo = service.getConnectionInfo(); expect(connectionInfo.state).toBe(WebSocketState.DISCONNECTED); @@ -739,25 +723,6 @@ describe('BackendWebSocketService', () => { }); }); - it('should handle WebSocket error events during connection establishment by setting state to ERROR', async () => { - await withService( - { mockWebSocketOptions: { autoConnect: false } }, - async ({ service, getMockWebSocket, completeAsyncOperations }) => { - const connectPromise = service.connect(); - await completeAsyncOperations(10); - - // Trigger error event during connection phase - const mockWs = getMockWebSocket(); - mockWs.simulateError(); - - await expect(connectPromise).rejects.toThrow( - 'WebSocket connection error', - ); - expect(service.getConnectionInfo().state).toBe(WebSocketState.ERROR); - }, - ); - }); - it('should remain in CONNECTED state when trying to connect again', async () => { await withService(async ({ service }) => { await service.connect(); @@ -802,30 +767,13 @@ describe('BackendWebSocketService', () => { ); // Disconnect when already disconnected - await service.disconnect(); + service.disconnect(); expect(service.getConnectionInfo().state).toBe( WebSocketState.DISCONNECTED, ); }); }); - it('should handle WebSocket close during connection phase', async () => { - await withService( - { mockWebSocketOptions: { autoConnect: false } }, - async ({ service, getMockWebSocket, completeAsyncOperations }) => { - const connectPromise = service.connect(); - await completeAsyncOperations(10); - - const mockWs = getMockWebSocket(); - mockWs.simulateClose(1006, 'Connection failed'); - - await expect(connectPromise).rejects.toThrow( - 'WebSocket connection closed during connection', - ); - }, - ); - }); - it('should handle unexpected disconnect with empty reason by using default close reason', async () => { await withService( async ({ service, getMockWebSocket, completeAsyncOperations }) => { @@ -865,6 +813,232 @@ describe('BackendWebSocketService', () => { }, ); }); + + it('should skip connect when reconnect timer is already scheduled', async () => { + await withService( + async ({ service, getMockWebSocket, completeAsyncOperations }) => { + // Connect successfully first + await service.connect(); + + const mockWs = getMockWebSocket(); + + // Simulate unexpected close to trigger scheduleReconnect + mockWs.simulateClose(1006, 'Abnormal closure'); + await completeAsyncOperations(10); + + // Verify reconnect timer is scheduled + const attemptsBefore = service.getConnectionInfo().reconnectAttempts; + expect(attemptsBefore).toBeGreaterThan(0); + + // Now try to connect again while reconnect timer is scheduled + // This should return early without doing anything + await service.connect(); + + // Attempts should be unchanged since connect returned early + expect(service.getConnectionInfo().reconnectAttempts).toBe( + attemptsBefore, + ); + }, + ); + }); + + it('should handle connection timeout', async () => { + await withService( + { + options: { timeout: 100 }, + mockWebSocketOptions: { autoConnect: false }, + }, + async ({ service, completeAsyncOperations }) => { + // eslint-disable-next-line @typescript-eslint/no-floating-promises + service.connect(); + + // Advance time past the timeout + await completeAsyncOperations(150); + + // Should have transitioned to ERROR state after timeout + expect(service.getConnectionInfo().state).toBe(WebSocketState.ERROR); + }, + ); + }); + + it('should reset reconnect attempts after stable connection', async () => { + await withService( + async ({ service, getMockWebSocket, completeAsyncOperations }) => { + // Mock Math.random to make Cockatiel's jitter deterministic + jest.spyOn(Math, 'random').mockReturnValue(0); + + // Connect successfully + await service.connect(); + + // Close connection to trigger reconnect + const mockWs = getMockWebSocket(); + mockWs.simulateClose(1006, 'Test close'); + await completeAsyncOperations(10); + + // Reconnect (this increments attempts to 1) + // With Math.random() = 0, Cockatiel's jitter will give consistent delays + await completeAsyncOperations(700); + + expect(service.getConnectionInfo().reconnectAttempts).toBe(1); + + // Wait for stable connection timer (10 seconds + buffer) + await completeAsyncOperations(10050); + + // Attempts should now be reset to 0 + expect(service.getConnectionInfo().reconnectAttempts).toBe(0); + }, + ); + }); + + it('should handle WebSocket onclose during connection phase', async () => { + await withService( + { mockWebSocketOptions: { autoConnect: false } }, + async ({ service, getMockWebSocket, completeAsyncOperations }) => { + // eslint-disable-next-line @typescript-eslint/no-floating-promises + service.connect(); + await completeAsyncOperations(10); + + // Close during connection phase + const mockWs = getMockWebSocket(); + mockWs.simulateClose(1006, 'Connection failed'); + await completeAsyncOperations(10); + + // Should schedule reconnect and be in ERROR state + expect(service.getConnectionInfo().state).toBe(WebSocketState.ERROR); + }, + ); + }); + + it('should clear connection timeout in handleClose when timeout occurs then close fires', async () => { + await withService( + { + options: { timeout: 100 }, + mockWebSocketOptions: { autoConnect: false }, + }, + async ({ service, getMockWebSocket, completeAsyncOperations }) => { + // Start connection (this sets connectionTimeout) + // eslint-disable-next-line @typescript-eslint/no-floating-promises + service.connect(); + await completeAsyncOperations(10); + + const mockWs = getMockWebSocket(); + expect(service.getConnectionInfo().state).toBe( + WebSocketState.CONNECTING, + ); + + // Let timeout fire (closes WebSocket and sets state to ERROR) + await completeAsyncOperations(150); + + // State should be ERROR or DISCONNECTED after timeout + const stateAfterTimeout = service.getConnectionInfo().state; + expect([WebSocketState.ERROR, WebSocketState.DISCONNECTED]).toContain( + stateAfterTimeout, + ); + + // Now manually trigger close event + // Since state is ERROR (not CONNECTING), onclose will call handleClose + // which will clear connectionTimeout + mockWs.simulateClose(1006, 'Close after timeout'); + await completeAsyncOperations(10); + + // State should still be ERROR or DISCONNECTED + expect([WebSocketState.ERROR, WebSocketState.DISCONNECTED]).toContain( + service.getConnectionInfo().state, + ); + }, + ); + }); + + it('should not schedule multiple reconnects when scheduleReconnect called multiple times', async () => { + await withService( + async ({ service, getMockWebSocket, completeAsyncOperations }) => { + await service.connect(); + expect(service.getConnectionInfo().state).toBe( + WebSocketState.CONNECTED, + ); + + const mockWs = getMockWebSocket(); + + // First close to trigger scheduleReconnect + mockWs.simulateClose(1006, 'Connection lost'); + await completeAsyncOperations(10); + + const attemptsBefore = service.getConnectionInfo().reconnectAttempts; + expect(attemptsBefore).toBeGreaterThan(0); + + // Second close should trigger scheduleReconnect again, + // but it should return early since timer already exists + mockWs.simulateClose(1006, 'Connection lost again'); + await completeAsyncOperations(10); + + // Attempts should not have increased again due to idempotency + expect(service.getConnectionInfo().reconnectAttempts).toBe( + attemptsBefore, + ); + }, + ); + }); + }); + + // ===================================================== + // FORCE RECONNECTION TESTS + // ===================================================== + describe('forceReconnection', () => { + it('should force reconnection and schedule connect', async () => { + await withService( + async ({ service, getMockWebSocket, completeAsyncOperations }) => { + await service.connect(); + expect(service.getConnectionInfo().state).toBe( + WebSocketState.CONNECTED, + ); + + const mockWs = getMockWebSocket(); + mockWs.close.mockImplementation( + (code = 1000, reason = 'Normal closure') => { + mockWs.simulateClose(code, reason); + }, + ); + + // Force reconnection + await service.forceReconnection(); + await completeAsyncOperations(10); + + // Should be disconnected after forceReconnection + expect(service.getConnectionInfo().state).toBe( + WebSocketState.DISCONNECTED, + ); + + // Should have scheduled a reconnect (attempts incremented) + expect(service.getConnectionInfo().reconnectAttempts).toBe(1); + }, + ); + }); + + it('should skip forceReconnection when reconnect timer is already scheduled', async () => { + await withService( + { mockWebSocketOptions: { autoConnect: false } }, + async ({ service, getMockWebSocket, completeAsyncOperations }) => { + // Trigger a connection failure to schedule a reconnect + // eslint-disable-next-line @typescript-eslint/no-floating-promises + service.connect(); + await completeAsyncOperations(10); + + const mockWs = getMockWebSocket(); + mockWs.simulateError(); + await completeAsyncOperations(10); + + const attemptsBefore = service.getConnectionInfo().reconnectAttempts; + + // Try to force reconnection while timer is already scheduled + await service.forceReconnection(); + + // Should have returned early, attempts unchanged + expect(service.getConnectionInfo().reconnectAttempts).toBe( + attemptsBefore, + ); + }, + ); + }); }); // ===================================================== @@ -1213,7 +1387,7 @@ describe('BackendWebSocketService', () => { data: { test: true }, }); - await service.disconnect(); + service.disconnect(); await expect(requestPromise).rejects.toThrow('WebSocket disconnected'); }); @@ -1463,43 +1637,56 @@ describe('BackendWebSocketService', () => { }); }); - it('should handle authentication required but user not signed in by rejecting connection with error', async () => { + it('should handle getBearerToken error during connection by scheduling reconnect', async () => { await withService( { options: {}, mockWebSocketOptions: { autoConnect: false }, }, async ({ service, mocks }) => { - mocks.getBearerToken.mockResolvedValueOnce(null); + const authError = new Error('Auth error'); + mocks.getBearerToken.mockRejectedValueOnce(authError); - await expect(service.connect()).rejects.toThrow( - 'Authentication required: user not signed in', - ); + // connect() will catch the error and schedule reconnect (not throw) + await service.connect(); + // Initial state should be DISCONNECTED since connection failed expect(service.getConnectionInfo().state).toBe( WebSocketState.DISCONNECTED, ); expect(mocks.getBearerToken).toHaveBeenCalled(); + + // Verify reconnect was scheduled (attempts should be incremented) + expect(service.getConnectionInfo().reconnectAttempts).toBeGreaterThan( + 0, + ); }, ); }); - it('should handle getBearerToken error during connection by rejecting with authentication error', async () => { + it('should handle null bearer token by scheduling reconnect', async () => { await withService( { options: {}, mockWebSocketOptions: { autoConnect: false }, }, async ({ service, mocks }) => { - const authError = new Error('Auth error'); - mocks.getBearerToken.mockRejectedValueOnce(authError); + // Return null to simulate user not signed in + mocks.getBearerToken.mockResolvedValueOnce(null); - await expect(service.connect()).rejects.toThrow('Auth error'); + // connect() will catch the authentication error and schedule reconnect + await service.connect(); + // Should be in DISCONNECTED state expect(service.getConnectionInfo().state).toBe( WebSocketState.DISCONNECTED, ); expect(mocks.getBearerToken).toHaveBeenCalled(); + + // Verify reconnect was scheduled + expect(service.getConnectionInfo().reconnectAttempts).toBeGreaterThan( + 0, + ); }, ); }); @@ -1546,6 +1733,9 @@ describe('BackendWebSocketService', () => { }, }, async ({ service, getMockWebSocket, completeAsyncOperations }) => { + // Mock Math.random to make Cockatiel's jitter deterministic + jest.spyOn(Math, 'random').mockReturnValue(0); + await service.connect(); const mockWs = getMockWebSocket(); @@ -1561,7 +1751,7 @@ describe('BackendWebSocketService', () => { mockEnabledCallback.mockReturnValue(false); // Advance time to trigger reconnection check - await completeAsyncOperations(50); + await completeAsyncOperations(70); // Should have checked isEnabled and stopped reconnection expect(mockEnabledCallback).toHaveBeenCalled(); @@ -1572,34 +1762,6 @@ describe('BackendWebSocketService', () => { }, ); }); - - it('should continue reconnection attempts when reconnect fails', async () => { - await withService( - { - options: { - reconnectDelay: 50, - }, - }, - async ({ service, getMockWebSocket, completeAsyncOperations }) => { - await service.connect(); - const mockWs = getMockWebSocket(); - - // Simulate connection loss - mockWs.simulateClose(1006, 'Connection lost'); - await completeAsyncOperations(0); - - const connectSpy = jest - .spyOn(service, 'connect') - .mockRejectedValue(new Error('Reconnect failed')); - - // Advance time to trigger first reconnection attempt - await completeAsyncOperations(50); - - // Should have attempted to reconnect - expect(connectSpy).toHaveBeenCalled(); - }, - ); - }); }); // ===================================================== diff --git a/packages/core-backend/src/BackendWebSocketService.ts b/packages/core-backend/src/BackendWebSocketService.ts index 9a1a98297c..3bbfd48a0b 100644 --- a/packages/core-backend/src/BackendWebSocketService.ts +++ b/packages/core-backend/src/BackendWebSocketService.ts @@ -1,5 +1,6 @@ import type { RestrictedMessenger } from '@metamask/base-controller'; import type { TraceCallback } from '@metamask/controller-utils'; +import { ExponentialBackoff } from '@metamask/controller-utils'; import type { KeyringControllerLockEvent, KeyringControllerUnlockEvent, @@ -18,6 +19,7 @@ const log = createModuleLogger(projectLogger, SERVICE_NAME); const MESSENGER_EXPOSED_METHODS = [ 'connect', 'disconnect', + 'forceReconnection', 'sendMessage', 'sendRequest', 'subscribe', @@ -299,6 +301,8 @@ export class BackendWebSocketService { #connectionTimeout: NodeJS.Timeout | null = null; + #stableConnectionTimer: NodeJS.Timeout | null = null; + // Track the current connection promise to handle concurrent connection attempts #connectionPromise: Promise | null = null; @@ -326,6 +330,9 @@ export class BackendWebSocketService { // Value: ChannelCallback configuration readonly #channelCallbacks = new Map(); + // Backoff instance for reconnection delays (reset on stable connection) + #backoff!: ReturnType['next']>; + // ============================================================================= // 1. CONSTRUCTOR & INITIALIZATION // ============================================================================= @@ -352,6 +359,9 @@ export class BackendWebSocketService { requestTimeout: options.requestTimeout ?? 30000, }; + // Initialize backoff for reconnection delays + this.#newBackoff(); + // Subscribe to authentication and keyring controller events this.#subscribeEvents(); @@ -381,7 +391,6 @@ export class BackendWebSocketService { // eslint-disable-next-line @typescript-eslint/no-floating-promises this.connect(); } else { - // eslint-disable-next-line @typescript-eslint/no-floating-promises this.disconnect(); } }, @@ -396,7 +405,6 @@ export class BackendWebSocketService { // Subscribe to wallet lock event this.#messenger.subscribe('KeyringController:lock', () => { - // eslint-disable-next-line @typescript-eslint/no-floating-promises this.disconnect(); }); } @@ -442,6 +450,12 @@ export class BackendWebSocketService { return; } + // If a reconnect is already scheduled, defer to it to avoid bypassing exponential backoff + // This prevents rapid loops when server accepts then immediately closes connections + if (this.#reconnectTimer) { + return; + } + // Create and store the connection promise IMMEDIATELY (before any async operations) // This ensures subsequent connect() calls will wait for this promise instead of creating new connections this.#connectionPromise = (async () => { @@ -452,15 +466,11 @@ export class BackendWebSocketService { 'AuthenticationController:getBearerToken', ); if (!token) { - this.#scheduleReconnect(); throw new Error('Authentication required: user not signed in'); } bearerToken = token; } catch (error) { log('Failed to check authentication requirements', { error }); - - // Can't connect - schedule retry - this.#scheduleReconnect(); throw error; } @@ -473,14 +483,16 @@ export class BackendWebSocketService { const errorMessage = getErrorMessage(error); log('Connection attempt failed', { errorMessage, error }); this.#setState(WebSocketState.ERROR); - - // Rethrow to propagate error to caller throw error; } })(); try { await this.#connectionPromise; + } catch { + // Always schedule reconnect on any failure + // Exponential backoff will prevent aggressive retries + this.#scheduleReconnect(); } finally { // Clear the connection promise when done (success or failure) this.#connectionPromise = null; @@ -489,10 +501,8 @@ export class BackendWebSocketService { /** * Closes WebSocket connection - * - * @returns Promise that resolves when disconnection is complete */ - async disconnect(): Promise { + disconnect(): void { if ( this.#state === WebSocketState.DISCONNECTED || this.#state === WebSocketState.DISCONNECTING @@ -510,6 +520,9 @@ export class BackendWebSocketService { // Clear any pending connection promise this.#connectionPromise = null; + // Reset reconnect attempts on manual disconnect + this.#reconnectAttempts = 0; + if (this.#ws) { this.#ws.close(1000, 'Normal closure'); } @@ -517,6 +530,38 @@ export class BackendWebSocketService { log('WebSocket manually disconnected'); } + /** + * Forces a WebSocket reconnection to clean up subscription state + * + * This method is useful when subscription state may be out of sync and needs to be reset. + * It performs a controlled disconnect-then-reconnect sequence: + * - Disconnects cleanly to trigger subscription cleanup + * - Schedules reconnection with exponential backoff to prevent rapid loops + * - All subscriptions will be cleaned up automatically on disconnect + * + * Use cases: + * - Recovering from subscription/unsubscription issues + * - Cleaning up orphaned subscriptions + * - Forcing a fresh subscription state + * + * @returns Promise that resolves when disconnection is complete (reconnection is scheduled) + */ + async forceReconnection(): Promise { + // If a reconnect is already scheduled, don't force another one + if (this.#reconnectTimer) { + log('Reconnect already scheduled, skipping force reconnection'); + return; + } + + log('Forcing WebSocket reconnection to clean up subscription state'); + + // Perform controlled disconnect + this.disconnect(); + + // Schedule reconnection with exponential backoff + this.#scheduleReconnect(); + } + /** * Sends a message through the WebSocket (fire-and-forget, no response expected) * @@ -991,8 +1036,15 @@ export class BackendWebSocketService { this.#setState(WebSocketState.CONNECTED); this.#connectedAt = Date.now(); - // Reset reconnect attempts on successful connection - this.#reconnectAttempts = 0; + // Only reset after connection stays stable for a period (10 seconds) + // This prevents rapid reconnect loops when server accepts then immediately closes + this.#stableConnectionTimer = setTimeout(() => { + this.#stableConnectionTimer = null; + this.#reconnectAttempts = 0; + // Create new backoff sequence for fresh start on next disconnect + this.#newBackoff(); + log('Connection stable - reset reconnect attempts and backoff'); + }, 10000); resolve(); }, @@ -1242,7 +1294,15 @@ export class BackendWebSocketService { // Calculate connection duration before we clear state const connectionDuration = Date.now() - this.#connectedAt; - this.#clearTimers(); + if (this.#connectionTimeout) { + clearTimeout(this.#connectionTimeout); + this.#connectionTimeout = null; + } + if (this.#stableConnectionTimer) { + clearTimeout(this.#stableConnectionTimer); + this.#stableConnectionTimer = null; + } + this.#connectedAt = 0; // Clear any pending connection promise @@ -1281,8 +1341,6 @@ export class BackendWebSocketService { }, ); - // For any unexpected disconnects, attempt reconnection - // The manualDisconnect flag is the only gate - if it's false, we reconnect this.#scheduleReconnect(); } @@ -1300,14 +1358,45 @@ export class BackendWebSocketService { // ============================================================================= /** - * Schedules a reconnection attempt with exponential backoff + * Schedules a connection attempt with exponential backoff and jitter + * + * This method is used for automatic reconnection with Cockatiel's exponential backoff: + * - Prevents duplicate reconnection timers (idempotent) + * - Applies exponential backoff with jitter based on previous failures + * - Jitter uses decorrelated formula to prevent thundering herd problem + * - Used ONLY for automatic retries, not user-initiated actions + * + * Call this from: + * - connect() catch block (on connection failure) + * - #handleClose() (on unexpected disconnect) + * + * For user-initiated actions (sign in, unlock), call connect() directly instead. + * + * If a reconnect is already scheduled, this is a no-op to prevent: + * - Orphaned timers (memory leak) + * - Inflated reconnect attempts counter + * - Prematurely long delays */ #scheduleReconnect(): void { + // If a reconnect is already scheduled, don't schedule another one + if (this.#reconnectTimer) { + return; + } + + // Increment attempts BEFORE calculating delay so backoff grows properly this.#reconnectAttempts += 1; - const rawDelay = - this.#options.reconnectDelay * Math.pow(1.5, this.#reconnectAttempts - 1); - const delay = Math.min(rawDelay, this.#options.maxReconnectDelay); + // Use Cockatiel's exponential backoff to get delay with jitter + const delay = this.#backoff.duration; + + // Progress to next backoff state for future reconnect attempts + // Pass attempt number as context (though ExponentialBackoff doesn't use it) + this.#backoff = this.#backoff.next({ attempt: this.#reconnectAttempts }); + + log('Scheduling reconnect', { + attempt: this.#reconnectAttempts, + delay_ms: delay, + }); this.#reconnectTimer = setTimeout(() => { // Clear timer reference first @@ -1316,16 +1405,26 @@ export class BackendWebSocketService { // Check if connection is still enabled before reconnecting if (this.#isEnabled && !this.#isEnabled()) { this.#reconnectAttempts = 0; + // Create new backoff sequence when disabled + this.#newBackoff(); return; } - // Attempt to reconnect - if it fails, schedule another attempt - this.connect().catch(() => { - this.#scheduleReconnect(); - }); + // eslint-disable-next-line @typescript-eslint/no-floating-promises + this.connect(); }, delay); } + /** + * Creates a new exponential backoff sequence + */ + #newBackoff(): void { + this.#backoff = new ExponentialBackoff({ + initialDelay: this.#options.reconnectDelay, + maxDelay: this.#options.maxReconnectDelay, + }).next(); + } + /** * Clears all active timers */ @@ -1338,6 +1437,10 @@ export class BackendWebSocketService { clearTimeout(this.#connectionTimeout); this.#connectionTimeout = null; } + if (this.#stableConnectionTimer) { + clearTimeout(this.#stableConnectionTimer); + this.#stableConnectionTimer = null; + } } /**