From a16a6004a899b04a4a244c70086e0459ee3c420e Mon Sep 17 00:00:00 2001 From: Andrew Hsu Date: Mon, 12 Jun 2017 22:53:37 +0000 Subject: [PATCH] revendor github.com/docker/swarmkit to 6083c76 To get the changes: * https://github.com/docker/swarmkit/pull/2234 * https://github.com/docker/swarmkit/pull/2237 * https://github.com/docker/swarmkit/pull/2238 Signed-off-by: Andrew Hsu --- components/engine/vendor.conf | 2 +- .../docker/swarmkit/ca/certificates.go | 16 ++ .../github.com/docker/swarmkit/ca/config.go | 86 +++++- .../github.com/docker/swarmkit/ca/renewer.go | 6 +- .../github.com/docker/swarmkit/ca/server.go | 7 +- .../swarmkit/manager/allocator/network.go | 249 +++++++++++------- .../networkallocator/drivers_darwin.go | 3 +- .../docker/swarmkit/manager/manager.go | 2 +- .../github.com/docker/swarmkit/node/node.go | 10 +- 9 files changed, 266 insertions(+), 115 deletions(-) diff --git a/components/engine/vendor.conf b/components/engine/vendor.conf index fc43a8bd9ba..2ab4254827d 100644 --- a/components/engine/vendor.conf +++ b/components/engine/vendor.conf @@ -107,7 +107,7 @@ github.com/containerd/containerd cfb82a876ecc11b5ca0977d1733adbe58599088a github.com/tonistiigi/fifo 1405643975692217d6720f8b54aeee1bf2cd5cf4 # cluster -github.com/docker/swarmkit 758b59114f7eef55faca7007af773d4097540ed2 +github.com/docker/swarmkit 6083c7689ed00a6f2d67941443603df69c2ff6ba github.com/gogo/protobuf v0.4 github.com/cloudflare/cfssl 7fb22c8cba7ecaf98e4082d22d65800cf45e042a github.com/google/certificate-transparency d90e65c3a07988180c5b1ece71791c0b6506826e diff --git a/components/engine/vendor/github.com/docker/swarmkit/ca/certificates.go b/components/engine/vendor/github.com/docker/swarmkit/ca/certificates.go index 37c8a172dea..8e004c1b5f2 100644 --- a/components/engine/vendor/github.com/docker/swarmkit/ca/certificates.go +++ b/components/engine/vendor/github.com/docker/swarmkit/ca/certificates.go @@ -126,6 +126,11 @@ type LocalSigner struct { cryptoSigner crypto.Signer } +type x509UnknownAuthError struct { + error + failedLeafCert *x509.Certificate +} + // RootCA is the representation of everything we need to sign certificates and/or to verify certificates // // RootCA.Cert: [CA cert1][CA cert2] @@ -275,6 +280,17 @@ func (rca *RootCA) RequestAndSaveNewCertificates(ctx context.Context, kw KeyWrit // Create an X509Cert so we can .Verify() // Check to see if this certificate was signed by our CA, and isn't expired parsedCerts, chains, err := ValidateCertChain(rca.Pool, signedCert, false) + // TODO(cyli): - right now we need the invalid certificate in order to determine whether or not we should + // download a new root, because we only want to do that in the case of workers. When we have a single + // codepath for updating the root CAs for both managers and workers, this snippet can go. + if _, ok := err.(x509.UnknownAuthorityError); ok { + if parsedCerts, parseErr := helpers.ParseCertificatesPEM(signedCert); parseErr == nil && len(parsedCerts) > 0 { + return nil, nil, x509UnknownAuthError{ + error: err, + failedLeafCert: parsedCerts[0], + } + } + } if err != nil { return nil, nil, err } diff --git a/components/engine/vendor/github.com/docker/swarmkit/ca/config.go b/components/engine/vendor/github.com/docker/swarmkit/ca/config.go index a853fa6c168..9dd84ecb360 100644 --- a/components/engine/vendor/github.com/docker/swarmkit/ca/config.go +++ b/components/engine/vendor/github.com/docker/swarmkit/ca/config.go @@ -89,6 +89,39 @@ type CertificateUpdate struct { Err error } +func validateRootCAAndTLSCert(rootCA *RootCA, externalCARootPool *x509.CertPool, tlsKeyPair *tls.Certificate) error { + var ( + leafCert *x509.Certificate + intermediatePool *x509.CertPool + ) + for i, derBytes := range tlsKeyPair.Certificate { + parsed, err := x509.ParseCertificate(derBytes) + if err != nil { + return errors.Wrap(err, "could not validate new root certificates due to parse error") + } + if i == 0 { + leafCert = parsed + } else { + if intermediatePool == nil { + intermediatePool = x509.NewCertPool() + } + intermediatePool.AddCert(parsed) + } + } + opts := x509.VerifyOptions{ + Roots: rootCA.Pool, + Intermediates: intermediatePool, + } + if _, err := leafCert.Verify(opts); err != nil { + return errors.Wrap(err, "new root CA does not match existing TLS credentials") + } + opts.Roots = externalCARootPool + if _, err := leafCert.Verify(opts); err != nil { + return errors.Wrap(err, "new external root pool does not match existing TLS credentials") + } + return nil +} + // NewSecurityConfig initializes and returns a new SecurityConfig. func NewSecurityConfig(rootCA *RootCA, krw *KeyReadWriter, tlsKeyPair *tls.Certificate, issuerInfo *IssuerInfo) (*SecurityConfig, error) { // Create the Server TLS Credentials for this node. These will not be used by workers. @@ -155,6 +188,11 @@ func (s *SecurityConfig) UpdateRootCA(rootCA *RootCA, externalCARootPool *x509.C s.mu.Lock() defer s.mu.Unlock() + // refuse to update the root CA if the current TLS credentials do not validate against it + if err := validateRootCAAndTLSCert(rootCA, externalCARootPool, s.certificate); err != nil { + return err + } + s.rootCA = rootCA s.externalCAClientRootPool = externalCARootPool s.externalCA.UpdateRootCA(rootCA) @@ -217,6 +255,13 @@ func (s *SecurityConfig) updateTLSCredentials(certificate *tls.Certificate, issu return nil } +// UpdateTLSCredentials updates the security config with an updated TLS certificate and issuer info +func (s *SecurityConfig) UpdateTLSCredentials(certificate *tls.Certificate, issuerInfo *IssuerInfo) error { + s.mu.Lock() + defer s.mu.Unlock() + return s.updateTLSCredentials(certificate, issuerInfo) +} + // SigningPolicy creates a policy used by the signer to ensure that the only fields // from the remote CSRs we trust are: PublicKey, PublicKeyAlgorithm and SignatureAlgorithm. // It receives the duration a certificate will be valid for @@ -447,9 +492,35 @@ func (rootCA RootCA) CreateSecurityConfig(ctx context.Context, krw *KeyReadWrite return secConfig, err } +// TODO(cyli): currently we have to only update if it's a worker role - if we have a single root CA update path for +// both managers and workers, we won't need to check any more. +func updateRootThenUpdateCert(ctx context.Context, s *SecurityConfig, connBroker *connectionbroker.Broker, rootPaths CertPaths, failedCert *x509.Certificate) (*tls.Certificate, *IssuerInfo, error) { + if len(failedCert.Subject.OrganizationalUnit) == 0 || failedCert.Subject.OrganizationalUnit[0] != WorkerRole { + return nil, nil, errors.New("cannot update root CA since this is not a worker") + } + // try downloading a new root CA if it's an unknown authority issue, in case there was a root rotation completion + // and we just didn't get the new root + rootCA, err := GetRemoteCA(ctx, "", connBroker) + if err != nil { + return nil, nil, err + } + // validate against the existing security config creds + if err := s.UpdateRootCA(&rootCA, rootCA.Pool); err != nil { + return nil, nil, err + } + if err := SaveRootCA(rootCA, rootPaths); err != nil { + return nil, nil, err + } + return rootCA.RequestAndSaveNewCertificates(ctx, s.KeyWriter(), + CertificateRequestConfig{ + ConnBroker: connBroker, + Credentials: s.ClientTLSCreds, + }) +} + // RenewTLSConfigNow gets a new TLS cert and key, and updates the security config if provided. This is similar to // RenewTLSConfig, except while that monitors for expiry, and periodically renews, this renews once and is blocking -func RenewTLSConfigNow(ctx context.Context, s *SecurityConfig, connBroker *connectionbroker.Broker) error { +func RenewTLSConfigNow(ctx context.Context, s *SecurityConfig, connBroker *connectionbroker.Broker, rootPaths CertPaths) error { s.renewalMu.Lock() defer s.renewalMu.Unlock() @@ -467,14 +538,21 @@ func RenewTLSConfigNow(ctx context.Context, s *SecurityConfig, connBroker *conne ConnBroker: connBroker, Credentials: s.ClientTLSCreds, }) + if wrappedError, ok := err.(x509UnknownAuthError); ok { + var newErr error + tlsKeyPair, issuerInfo, newErr = updateRootThenUpdateCert(ctx, s, connBroker, rootPaths, wrappedError.failedLeafCert) + if newErr != nil { + err = wrappedError.error + } else { + err = nil + } + } if err != nil { log.WithError(err).Errorf("failed to renew the certificate") return err } - s.mu.Lock() - defer s.mu.Unlock() - return s.updateTLSCredentials(tlsKeyPair, issuerInfo) + return s.UpdateTLSCredentials(tlsKeyPair, issuerInfo) } // calculateRandomExpiry returns a random duration between 50% and 80% of the diff --git a/components/engine/vendor/github.com/docker/swarmkit/ca/renewer.go b/components/engine/vendor/github.com/docker/swarmkit/ca/renewer.go index 213a66a25b6..6d0229bd1d6 100644 --- a/components/engine/vendor/github.com/docker/swarmkit/ca/renewer.go +++ b/components/engine/vendor/github.com/docker/swarmkit/ca/renewer.go @@ -27,14 +27,16 @@ type TLSRenewer struct { connBroker *connectionbroker.Broker renew chan struct{} expectedRole string + rootPaths CertPaths } // NewTLSRenewer creates a new TLS renewer. It must be started with Start. -func NewTLSRenewer(s *SecurityConfig, connBroker *connectionbroker.Broker) *TLSRenewer { +func NewTLSRenewer(s *SecurityConfig, connBroker *connectionbroker.Broker, rootPaths CertPaths) *TLSRenewer { return &TLSRenewer{ s: s, connBroker: connBroker, renew: make(chan struct{}, 1), + rootPaths: rootPaths, } } @@ -135,7 +137,7 @@ func (t *TLSRenewer) Start(ctx context.Context) <-chan CertificateUpdate { // ignore errors - it will just try again later var certUpdate CertificateUpdate - if err := RenewTLSConfigNow(ctx, t.s, t.connBroker); err != nil { + if err := RenewTLSConfigNow(ctx, t.s, t.connBroker, t.rootPaths); err != nil { certUpdate.Err = err expBackoff.Failure(nil, nil) } else { diff --git a/components/engine/vendor/github.com/docker/swarmkit/ca/server.go b/components/engine/vendor/github.com/docker/swarmkit/ca/server.go index b4b06680496..cb3b6acdc12 100644 --- a/components/engine/vendor/github.com/docker/swarmkit/ca/server.go +++ b/components/engine/vendor/github.com/docker/swarmkit/ca/server.go @@ -624,10 +624,6 @@ func (s *Server) UpdateRootCA(ctx context.Context, cluster *api.Cluster) error { if err != nil { return errors.Wrap(err, "invalid Root CA object in cluster") } - if err := SaveRootCA(updatedRootCA, s.rootPaths); err != nil { - return errors.Wrap(err, "unable to save new root CA certificates") - } - externalCARootPool := updatedRootCA.Pool if rCA.RootRotation != nil { // the external CA has to trust the new CA cert @@ -640,6 +636,9 @@ func (s *Server) UpdateRootCA(ctx context.Context, cluster *api.Cluster) error { if err := s.securityConfig.UpdateRootCA(&updatedRootCA, externalCARootPool); err != nil { return errors.Wrap(err, "updating Root CA failed") } + if err := SaveRootCA(updatedRootCA, s.rootPaths); err != nil { + return errors.Wrap(err, "unable to save new root CA certificates") + } // only update the server cache if we've successfully updated the root CA logger.Debugf("Root CA %s successfully", setOrUpdate) s.lastSeenClusterRootCA = rCA diff --git a/components/engine/vendor/github.com/docker/swarmkit/manager/allocator/network.go b/components/engine/vendor/github.com/docker/swarmkit/manager/allocator/network.go index 23d3f6f0a1b..52572b951fe 100644 --- a/components/engine/vendor/github.com/docker/swarmkit/manager/allocator/network.go +++ b/components/engine/vendor/github.com/docker/swarmkit/manager/allocator/network.go @@ -145,110 +145,31 @@ func (a *Allocator) doNetworkInit(ctx context.Context) (err error) { log.G(ctx).WithError(err).Error("failed committing allocation of networks during init") } - // Allocate nodes in the store so far before we process watched events, - // if the ingress network is present. + // First, allocate objects that already have addresses associated with + // them, to reserve these IP addresses in internal state. if nc.ingressNetwork != nil { - if err := a.allocateNodes(ctx); err != nil { + if err := a.allocateNodes(ctx, true); err != nil { return err } } - - // Allocate services in the store so far before we process watched events. - var services []*api.Service - a.store.View(func(tx store.ReadTx) { - services, err = store.FindServices(tx, store.All) - }) - if err != nil { - return errors.Wrap(err, "error listing all services in store while trying to allocate during init") + if err := a.allocateServices(ctx, true); err != nil { + return err } - - var allocatedServices []*api.Service - for _, s := range services { - if !nc.nwkAllocator.ServiceNeedsAllocation(s, networkallocator.OnInit) { - continue - } - - if err := a.allocateService(ctx, s); err != nil { - log.G(ctx).WithError(err).Errorf("failed allocating service %s during init", s.ID) - continue - } - allocatedServices = append(allocatedServices, s) + if err := a.allocateTasks(ctx, true); err != nil { + return err } - if err := a.store.Batch(func(batch *store.Batch) error { - for _, s := range allocatedServices { - if err := a.commitAllocatedService(ctx, batch, s); err != nil { - log.G(ctx).WithError(err).Errorf("failed committing allocation of service %s during init", s.ID) - } + // Now allocate objects that don't have addresses yet. + if nc.ingressNetwork != nil { + if err := a.allocateNodes(ctx, false); err != nil { + return err } - return nil - }); err != nil { - log.G(ctx).WithError(err).Error("failed committing allocation of services during init") - } - - // Allocate tasks in the store so far before we started watching. - var ( - tasks []*api.Task - allocatedTasks []*api.Task - ) - a.store.View(func(tx store.ReadTx) { - tasks, err = store.FindTasks(tx, store.All) - }) - if err != nil { - return errors.Wrap(err, "error listing all tasks in store while trying to allocate during init") } - - for _, t := range tasks { - if t.Status.State > api.TaskStateRunning { - continue - } - - var s *api.Service - if t.ServiceID != "" { - a.store.View(func(tx store.ReadTx) { - s = store.GetService(tx, t.ServiceID) - }) - } - - // Populate network attachments in the task - // based on service spec. - a.taskCreateNetworkAttachments(t, s) - - if taskReadyForNetworkVote(t, s, nc) { - if t.Status.State >= api.TaskStatePending { - continue - } - - if a.taskAllocateVote(networkVoter, t.ID) { - // If the task is not attached to any network, network - // allocators job is done. Immediately cast a vote so - // that the task can be moved to the PENDING state as - // soon as possible. - updateTaskStatus(t, api.TaskStatePending, allocatedStatusMessage) - allocatedTasks = append(allocatedTasks, t) - } - continue - } - - err := a.allocateTask(ctx, t) - if err == nil { - allocatedTasks = append(allocatedTasks, t) - } else if err != errNoChanges { - log.G(ctx).WithError(err).Errorf("failed allocating task %s during init", t.ID) - nc.unallocatedTasks[t.ID] = t - } + if err := a.allocateServices(ctx, false); err != nil { + return err } - - if err := a.store.Batch(func(batch *store.Batch) error { - for _, t := range allocatedTasks { - if err := a.commitAllocatedTask(ctx, batch, t); err != nil { - log.G(ctx).WithError(err).Errorf("failed committing allocation of task %s during init", t.ID) - } - } - - return nil - }); err != nil { - log.G(ctx).WithError(err).Error("failed committing allocation of tasks during init") + if err := a.allocateTasks(ctx, false); err != nil { + return err } return nil @@ -283,7 +204,7 @@ func (a *Allocator) doNetworkAlloc(ctx context.Context, ev events.Event) { if IsIngressNetwork(n) { nc.ingressNetwork = n - err := a.allocateNodes(ctx) + err := a.allocateNodes(ctx, false) if err != nil { log.G(ctx).WithError(err).Error(err) } @@ -455,7 +376,7 @@ func (a *Allocator) doNodeAlloc(ctx context.Context, ev events.Event) { } } -func (a *Allocator) allocateNodes(ctx context.Context) error { +func (a *Allocator) allocateNodes(ctx context.Context, existingAddressesOnly bool) error { // Allocate nodes in the store so far before we process watched events. var ( allocatedNodes []*api.Node @@ -480,6 +401,10 @@ func (a *Allocator) allocateNodes(ctx context.Context) error { node.Attachment = &api.NetworkAttachment{} } + if existingAddressesOnly && len(node.Attachment.Addresses) == 0 { + continue + } + node.Attachment.Network = nc.ingressNetwork.Copy() if err := a.allocateNode(ctx, node); err != nil { log.G(ctx).WithError(err).Errorf("Failed to allocate network resources for node %s", node.ID) @@ -534,6 +459,138 @@ func (a *Allocator) deallocateNodes(ctx context.Context) error { return nil } +// allocateServices allocates services in the store so far before we process +// watched events. +func (a *Allocator) allocateServices(ctx context.Context, existingAddressesOnly bool) error { + var ( + nc = a.netCtx + services []*api.Service + err error + ) + a.store.View(func(tx store.ReadTx) { + services, err = store.FindServices(tx, store.All) + }) + if err != nil { + return errors.Wrap(err, "error listing all services in store while trying to allocate during init") + } + + var allocatedServices []*api.Service + for _, s := range services { + if !nc.nwkAllocator.ServiceNeedsAllocation(s, networkallocator.OnInit) { + continue + } + + if existingAddressesOnly && + (s.Endpoint == nil || + len(s.Endpoint.VirtualIPs) == 0) { + continue + } + + if err := a.allocateService(ctx, s); err != nil { + log.G(ctx).WithError(err).Errorf("failed allocating service %s during init", s.ID) + continue + } + allocatedServices = append(allocatedServices, s) + } + + if err := a.store.Batch(func(batch *store.Batch) error { + for _, s := range allocatedServices { + if err := a.commitAllocatedService(ctx, batch, s); err != nil { + log.G(ctx).WithError(err).Errorf("failed committing allocation of service %s during init", s.ID) + } + } + return nil + }); err != nil { + log.G(ctx).WithError(err).Error("failed committing allocation of services during init") + } + + return nil +} + +// allocateTasks allocates tasks in the store so far before we started watching. +func (a *Allocator) allocateTasks(ctx context.Context, existingAddressesOnly bool) error { + var ( + nc = a.netCtx + tasks []*api.Task + allocatedTasks []*api.Task + err error + ) + a.store.View(func(tx store.ReadTx) { + tasks, err = store.FindTasks(tx, store.All) + }) + if err != nil { + return errors.Wrap(err, "error listing all tasks in store while trying to allocate during init") + } + + for _, t := range tasks { + if t.Status.State > api.TaskStateRunning { + continue + } + + if existingAddressesOnly { + hasAddresses := false + for _, nAttach := range t.Networks { + if len(nAttach.Addresses) != 0 { + hasAddresses = true + break + } + } + if !hasAddresses { + continue + } + } + + var s *api.Service + if t.ServiceID != "" { + a.store.View(func(tx store.ReadTx) { + s = store.GetService(tx, t.ServiceID) + }) + } + + // Populate network attachments in the task + // based on service spec. + a.taskCreateNetworkAttachments(t, s) + + if taskReadyForNetworkVote(t, s, nc) { + if t.Status.State >= api.TaskStatePending { + continue + } + + if a.taskAllocateVote(networkVoter, t.ID) { + // If the task is not attached to any network, network + // allocators job is done. Immediately cast a vote so + // that the task can be moved to the PENDING state as + // soon as possible. + updateTaskStatus(t, api.TaskStatePending, allocatedStatusMessage) + allocatedTasks = append(allocatedTasks, t) + } + continue + } + + err := a.allocateTask(ctx, t) + if err == nil { + allocatedTasks = append(allocatedTasks, t) + } else if err != errNoChanges { + log.G(ctx).WithError(err).Errorf("failed allocating task %s during init", t.ID) + nc.unallocatedTasks[t.ID] = t + } + } + + if err := a.store.Batch(func(batch *store.Batch) error { + for _, t := range allocatedTasks { + if err := a.commitAllocatedTask(ctx, batch, t); err != nil { + log.G(ctx).WithError(err).Errorf("failed committing allocation of task %s during init", t.ID) + } + } + + return nil + }); err != nil { + log.G(ctx).WithError(err).Error("failed committing allocation of tasks during init") + } + + return nil +} + // taskReadyForNetworkVote checks if the task is ready for a network // vote to move it to PENDING state. func taskReadyForNetworkVote(t *api.Task, s *api.Service, nc *networkContext) bool { diff --git a/components/engine/vendor/github.com/docker/swarmkit/manager/allocator/networkallocator/drivers_darwin.go b/components/engine/vendor/github.com/docker/swarmkit/manager/allocator/networkallocator/drivers_darwin.go index 8cc2e11d795..9b56f7be364 100644 --- a/components/engine/vendor/github.com/docker/swarmkit/manager/allocator/networkallocator/drivers_darwin.go +++ b/components/engine/vendor/github.com/docker/swarmkit/manager/allocator/networkallocator/drivers_darwin.go @@ -3,10 +3,9 @@ package networkallocator import ( "github.com/docker/libnetwork/drivers/overlay/ovmanager" "github.com/docker/libnetwork/drivers/remote" - "github.com/docker/libnetwork/drvregistry" ) -const initializers = []initializer{ +var initializers = []initializer{ {remote.Init, "remote"}, {ovmanager.Init, "overlay"}, } diff --git a/components/engine/vendor/github.com/docker/swarmkit/manager/manager.go b/components/engine/vendor/github.com/docker/swarmkit/manager/manager.go index 01c296acfe4..a5a39723f17 100644 --- a/components/engine/vendor/github.com/docker/swarmkit/manager/manager.go +++ b/components/engine/vendor/github.com/docker/swarmkit/manager/manager.go @@ -698,7 +698,7 @@ func (m *Manager) updateKEK(ctx context.Context, cluster *api.Cluster) error { connBroker := connectionbroker.New(remotes.NewRemotes()) connBroker.SetLocalConn(conn) - if err := ca.RenewTLSConfigNow(ctx, securityConfig, connBroker); err != nil { + if err := ca.RenewTLSConfigNow(ctx, securityConfig, connBroker, m.config.RootCAPaths); err != nil { logger.WithError(err).Error("failed to download new TLS certificate after locking the cluster") } }() diff --git a/components/engine/vendor/github.com/docker/swarmkit/node/node.go b/components/engine/vendor/github.com/docker/swarmkit/node/node.go index 6a40a06e161..85c5abe390b 100644 --- a/components/engine/vendor/github.com/docker/swarmkit/node/node.go +++ b/components/engine/vendor/github.com/docker/swarmkit/node/node.go @@ -279,7 +279,7 @@ func (n *Node) run(ctx context.Context) (err error) { return err } - renewer := ca.NewTLSRenewer(securityConfig, n.connBroker) + renewer := ca.NewTLSRenewer(securityConfig, n.connBroker, paths.RootCA) ctx = log.WithLogger(ctx, log.G(ctx).WithField("node.id", n.NodeID())) @@ -344,14 +344,14 @@ func (n *Node) run(ctx context.Context) (err error) { log.G(ctx).WithError(err).Error("invalid new root certificate from the dispatcher") continue } - if err := ca.SaveRootCA(newRootCA, paths.RootCA); err != nil { - log.G(ctx).WithError(err).Error("could not save new root certificate from the dispatcher") - continue - } if err := securityConfig.UpdateRootCA(&newRootCA, newRootCA.Pool); err != nil { log.G(ctx).WithError(err).Error("could not use new root CA from dispatcher") continue } + if err := ca.SaveRootCA(newRootCA, paths.RootCA); err != nil { + log.G(ctx).WithError(err).Error("could not save new root certificate from the dispatcher") + continue + } } } }