@@ -22,6 +22,7 @@ import (
2222 "fmt"
2323 "net/http"
2424 "net/url"
25+ "os"
2526 "slices"
2627 "sync"
2728 "time"
@@ -318,33 +319,43 @@ func (c *Client) onConnectHandler(_ context.Context) {
318319 return
319320 }
320321
321- // See if we can retrieve the PackageStatuses where the collector package is in an installing state
322322 pkgStatuses , err := c .getVerifiedPackageStatuses ()
323323 if err != nil {
324324 c .logger .Error ("Problem with PackageStatuses" , zap .Error (err ))
325325 return
326326 }
327-
328327 collectorPkgStatus := pkgStatuses .Packages [packagestate .CollectorPackageName ]
329- // If we were not installing before the connection, nothing else to do
330- if collectorPkgStatus .Status != protobufs .PackageStatusEnum_PackageStatusEnum_Installing {
331- return
332- }
333328
334- // If in the middle of an install and we just connected, this is most likely becasue the collector was just spun up fresh by the Updater.
329+ // If in the middle of an install and we just connected, this is most likely because the collector was just spun up fresh by the Updater.
335330 // If the current version matches the server offered version, this implies a good install and so we should set the PackageStatuses and
336331 // send it to the OpAMP Server. If the version does not match, just change the PackageStatues JSON so that the Updater can start rollback.
332+ if collectorPkgStatus .Status == protobufs .PackageStatusEnum_PackageStatusEnum_Installing {
333+ if collectorPkgStatus .ServerOfferedVersion != version .Version () {
334+ errMsg := fmt .Sprintf ("Failed because of collector version mismatch: expected %s, actual %s" ,
335+ collectorPkgStatus .ServerOfferedVersion , version .Version ())
336+ c .failPackageInstall (pkgStatuses , errMsg , false )
337337
338- if collectorPkgStatus .ServerOfferedVersion != version .Version () {
339- errMsg := fmt .Sprintf ("Failed because of collector version mismatch: expected %s, actual %s" ,
340- collectorPkgStatus .ServerOfferedVersion , version .Version ())
341- c .failPackageInstall (pkgStatuses , errMsg , false )
338+ return
339+ }
342340
341+ // Installation of new collector was successful!
342+ c .finishPackageInstall (pkgStatuses )
343343 return
344344 }
345345
346- // Installation of new collector was successful!
347- c .finishPackageInstall (pkgStatuses )
346+ // If the package status is "failed", this most likely means an update was attempted and the new binary failed.
347+ // The Updater has removed the new binary, rolled back the old binary, and re-started it. The current process
348+ // is the old binary that was restored. At this point the OpAMP client has already initialized itself with the
349+ // status contained in the package state file as part of the client connection process. We can safely delete
350+ // the package state file, however we cannot update the stored state on the client (i.e. c.opampClient.SetPackageStatuses(pkgStatuses)).
351+ // Doing so would wipe the state before the client can report the failure. Deleting just the file though is safe. After the initial
352+ // state is reported as part of the connection process, future queries of this file for package state will return a default "installed" status.
353+ // This is good because after reporting the initial error, we don't want to persist an upgrade failure.
354+ if collectorPkgStatus .Status == protobufs .PackageStatusEnum_PackageStatusEnum_InstallFailed {
355+ if err := os .Remove (packagestate .DefaultFileName ); err != nil {
356+ c .logger .Warn ("Failed to remove package status artifact" , zap .Error (err ))
357+ }
358+ }
348359}
349360
350361func (c * Client ) onConnectFailedHandler (_ context.Context , err error ) {
0 commit comments