From 150e63c6d867f27b83d7cd76f2d0ac83051d0420 Mon Sep 17 00:00:00 2001 From: Sean Kane Date: Tue, 7 Apr 2026 15:42:32 -0600 Subject: [PATCH 01/25] RPC boiler plate and code generation (#9693) * Add four new RPC methods to `historyservice`(PauseActivityExecution,UnpauseActivityExecution,ResetActivityExecution,UpdateACtivityExecutionOptions) * Add new historyservice request/response messages wrapping api repos requests * Generated code First step towards supporting the four activity operator commands on both workflow activities and standalone CHASM activities. The overall aim of this branch is to support operator API commands (Pause, Unpause, Reset, UpdateOptions) for both Standalone Activities (SAA) and Workflow Activities (WA). These will share the same new external workflowservice RPC entrypoints and will be distinguished by the presence of a workflow ID. Pause, Unpause, UpdateOptions, and Reset already exist for WA with an experimental and soon-to-be deprecated public API. The API handlers added in this PR route to the current experimental implementation. We define Frontend and History service RPC handlers in chasm/lib/activity. Requests for both SAA and WA are routed from Frontend to History using these handlers. The handler in the history service (part of the CHASM ActivityService which is hosted by History service) then inspects the workflow ID and activity ID and dispatches according to whether the request is for an SAA vs WA. - [ ] built - [ ] run locally and tested manually - [ ] covered by existing tests - [ ] added new unit test(s) - [ ] added new functional test(s) No runtime behavior changes. --- api/routing/v1/extension.pb.go | 21 +- chasm/lib/activity/frontend.go | 105 +- .../v1/request_response.go-helpers.pb.go | 296 ++++ .../gen/activitypb/v1/request_response.pb.go | 471 ++++++- .../activity/gen/activitypb/v1/service.pb.go | 63 +- .../gen/activitypb/v1/service_client.pb.go | 188 +++ .../gen/activitypb/v1/service_grpc.pb.go | 148 ++ chasm/lib/activity/handler.go | 112 +- .../activity/proto/v1/request_response.proto | 80 +- chasm/lib/activity/proto/v1/service.proto | 96 +- client/history/historytest/clienttest.go | 7 +- cmd/tools/protoc-gen-go-chasm/main.go | 29 +- common/api/metadata.go | 1 + proto/internal/buf.yaml | 9 +- .../server/api/routing/v1/extension.proto | 21 +- service/history/fx.go | 38 +- tests/activity_api_pause_test.go | 1189 ++++++++--------- tests/activity_api_reset_test.go | 265 ++-- tests/activity_api_update_test.go | 737 +++++----- tests/standalone_activity_test.go | 137 +- 20 files changed, 2708 insertions(+), 1305 deletions(-) diff --git a/api/routing/v1/extension.pb.go b/api/routing/v1/extension.pb.go index 0b8e977e2c1..0105a688531 100644 --- a/api/routing/v1/extension.pb.go +++ b/api/routing/v1/extension.pb.go @@ -29,8 +29,9 @@ type RoutingOptions struct { Random bool `protobuf:"varint,1,opt,name=random,proto3" json:"random,omitempty"` // Requests may specify how to obtain the namespace ID. Defaults to the "namespace_id" field. NamespaceId string `protobuf:"bytes,2,opt,name=namespace_id,json=namespaceId,proto3" json:"namespace_id,omitempty"` - // Request will be routed by resolving the namespace ID and business ID to a given shard. - BusinessId string `protobuf:"bytes,3,opt,name=business_id,json=businessId,proto3" json:"business_id,omitempty"` + // Requests will be routed by resolving the namespace ID and business ID to a given shard. + // If multiple fields are specified, the first non-empty value is used. + BusinessId []string `protobuf:"bytes,3,rep,name=business_id,json=businessId,proto3" json:"business_id,omitempty"` unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache } @@ -79,27 +80,27 @@ func (x *RoutingOptions) GetNamespaceId() string { return "" } -func (x *RoutingOptions) GetBusinessId() string { +func (x *RoutingOptions) GetBusinessId() []string { if x != nil { return x.BusinessId } - return "" + return nil } var file_temporal_server_api_routing_v1_extension_proto_extTypes = []protoimpl.ExtensionInfo{ { ExtendedType: (*descriptorpb.MethodOptions)(nil), ExtensionType: (*RoutingOptions)(nil), - Field: 50234, + Field: 7234, Name: "temporal.server.api.routing.v1.routing", - Tag: "bytes,50234,opt,name=routing", + Tag: "bytes,7234,opt,name=routing", Filename: "temporal/server/api/routing/v1/extension.proto", }, } // Extension fields to descriptorpb.MethodOptions. var ( - // optional temporal.server.api.routing.v1.RoutingOptions routing = 50234; + // optional temporal.server.api.routing.v1.RoutingOptions routing = 7234; E_Routing = &file_temporal_server_api_routing_v1_extension_proto_extTypes[0] ) @@ -111,9 +112,9 @@ const file_temporal_server_api_routing_v1_extension_proto_rawDesc = "" + "\x0eRoutingOptions\x12\x16\n" + "\x06random\x18\x01 \x01(\bR\x06random\x12!\n" + "\fnamespace_id\x18\x02 \x01(\tR\vnamespaceId\x12\x1f\n" + - "\vbusiness_id\x18\x03 \x01(\tR\n" + - "businessId:m\n" + - "\arouting\x12\x1e.google.protobuf.MethodOptions\x18\xba\x88\x03 \x01(\v2..temporal.server.api.routing.v1.RoutingOptionsR\arouting\x88\x01\x01B.Z,go.temporal.io/server/api/routing/v1;routingb\x06proto3" + "\vbusiness_id\x18\x03 \x03(\tR\n" + + "businessId:l\n" + + "\arouting\x12\x1e.google.protobuf.MethodOptions\x18\xc28 \x01(\v2..temporal.server.api.routing.v1.RoutingOptionsR\arouting\x88\x01\x01B.Z,go.temporal.io/server/api/routing/v1;routingb\x06proto3" var ( file_temporal_server_api_routing_v1_extension_proto_rawDescOnce sync.Once diff --git a/chasm/lib/activity/frontend.go b/chasm/lib/activity/frontend.go index 6dae15ece5b..2d16ccfd7f8 100644 --- a/chasm/lib/activity/frontend.go +++ b/chasm/lib/activity/frontend.go @@ -31,6 +31,10 @@ type FrontendHandler interface { ListActivityExecutions(context.Context, *workflowservice.ListActivityExecutionsRequest) (*workflowservice.ListActivityExecutionsResponse, error) RequestCancelActivityExecution(context.Context, *workflowservice.RequestCancelActivityExecutionRequest) (*workflowservice.RequestCancelActivityExecutionResponse, error) TerminateActivityExecution(context.Context, *workflowservice.TerminateActivityExecutionRequest) (*workflowservice.TerminateActivityExecutionResponse, error) + PauseActivityExecution(context.Context, *workflowservice.PauseActivityExecutionRequest) (*workflowservice.PauseActivityExecutionResponse, error) + UnpauseActivityExecution(context.Context, *workflowservice.UnpauseActivityExecutionRequest) (*workflowservice.UnpauseActivityExecutionResponse, error) + ResetActivityExecution(context.Context, *workflowservice.ResetActivityExecutionRequest) (*workflowservice.ResetActivityExecutionResponse, error) + UpdateActivityExecutionOptions(context.Context, *workflowservice.UpdateActivityExecutionOptionsRequest) (*workflowservice.UpdateActivityExecutionOptionsResponse, error) IsStandaloneActivityEnabled(namespaceName string) bool } @@ -427,8 +431,105 @@ func activityOptionsFromStartRequest(req *workflowservice.StartActivityExecution } } -// applyActivityOptionsToStartRequest copies normalized values from ActivityOptions -// back to the StartActivityExecutionRequest. +func (h *frontendHandler) PauseActivityExecution( + ctx context.Context, + req *workflowservice.PauseActivityExecutionRequest, +) (*workflowservice.PauseActivityExecutionResponse, error) { + if req.GetWorkflowId() == "" && !h.config.Enabled(req.GetNamespace()) { + return nil, ErrStandaloneActivityDisabled + } + + // TODO: validate request fields (e.g. namespace, identity length) + namespaceID, err := h.namespaceRegistry.GetNamespaceID(namespace.Name(req.GetNamespace())) + if err != nil { + return nil, err + } + + _, err = h.client.PauseActivityExecution(ctx, &activitypb.PauseActivityExecutionRequest{ + NamespaceId: namespaceID.String(), + FrontendRequest: req, + }) + if err != nil { + return nil, err + } + return &workflowservice.PauseActivityExecutionResponse{}, nil +} + +func (h *frontendHandler) UnpauseActivityExecution( + ctx context.Context, + req *workflowservice.UnpauseActivityExecutionRequest, +) (*workflowservice.UnpauseActivityExecutionResponse, error) { + if req.GetWorkflowId() == "" && !h.config.Enabled(req.GetNamespace()) { + return nil, ErrStandaloneActivityDisabled + } + + // TODO: validate request fields (e.g. namespace, identity length) + namespaceID, err := h.namespaceRegistry.GetNamespaceID(namespace.Name(req.GetNamespace())) + if err != nil { + return nil, err + } + + _, err = h.client.UnpauseActivityExecution(ctx, &activitypb.UnpauseActivityExecutionRequest{ + NamespaceId: namespaceID.String(), + FrontendRequest: req, + }) + if err != nil { + return nil, err + } + return &workflowservice.UnpauseActivityExecutionResponse{}, nil +} + +func (h *frontendHandler) ResetActivityExecution( + ctx context.Context, + req *workflowservice.ResetActivityExecutionRequest, +) (*workflowservice.ResetActivityExecutionResponse, error) { + if req.GetWorkflowId() == "" && !h.config.Enabled(req.GetNamespace()) { + return nil, ErrStandaloneActivityDisabled + } + + // TODO: validate request fields (e.g. namespace, identity length) + namespaceID, err := h.namespaceRegistry.GetNamespaceID(namespace.Name(req.GetNamespace())) + if err != nil { + return nil, err + } + + _, err = h.client.ResetActivityExecution(ctx, &activitypb.ResetActivityExecutionRequest{ + NamespaceId: namespaceID.String(), + FrontendRequest: req, + }) + if err != nil { + return nil, err + } + return &workflowservice.ResetActivityExecutionResponse{}, nil +} + +func (h *frontendHandler) UpdateActivityExecutionOptions( + ctx context.Context, + req *workflowservice.UpdateActivityExecutionOptionsRequest, +) (*workflowservice.UpdateActivityExecutionOptionsResponse, error) { + if req.GetWorkflowId() == "" && !h.config.Enabled(req.GetNamespace()) { + return nil, ErrStandaloneActivityDisabled + } + + // TODO: validate request fields (e.g. namespace, identity length, update mask) + namespaceID, err := h.namespaceRegistry.GetNamespaceID(namespace.Name(req.GetNamespace())) + if err != nil { + return nil, err + } + + resp, err := h.client.UpdateActivityExecutionOptions(ctx, &activitypb.UpdateActivityExecutionOptionsRequest{ + NamespaceId: namespaceID.String(), + FrontendRequest: req, + }) + if err != nil { + return nil, err + } + return &workflowservice.UpdateActivityExecutionOptionsResponse{ + ActivityOptions: resp.GetFrontendResponse().GetActivityOptions(), + }, nil +} + +// applyActivityOptionsToStartRequest copies normalized values from ActivityOptions back to the StartActivityExecutionRequest. func applyActivityOptionsToStartRequest(opts *apiactivitypb.ActivityOptions, req *workflowservice.StartActivityExecutionRequest) { req.TaskQueue = opts.TaskQueue req.ScheduleToCloseTimeout = opts.ScheduleToCloseTimeout diff --git a/chasm/lib/activity/gen/activitypb/v1/request_response.go-helpers.pb.go b/chasm/lib/activity/gen/activitypb/v1/request_response.go-helpers.pb.go index 517287dc3e2..d90fe2ee24a 100644 --- a/chasm/lib/activity/gen/activitypb/v1/request_response.go-helpers.pb.go +++ b/chasm/lib/activity/gen/activitypb/v1/request_response.go-helpers.pb.go @@ -448,3 +448,299 @@ func (this *DeleteActivityExecutionResponse) Equal(that interface{}) bool { return proto.Equal(this, that1) } + +// Marshal an object of type PauseActivityExecutionRequest to the protobuf v3 wire format +func (val *PauseActivityExecutionRequest) Marshal() ([]byte, error) { + return proto.Marshal(val) +} + +// Unmarshal an object of type PauseActivityExecutionRequest from the protobuf v3 wire format +func (val *PauseActivityExecutionRequest) Unmarshal(buf []byte) error { + return proto.Unmarshal(buf, val) +} + +// Size returns the size of the object, in bytes, once serialized +func (val *PauseActivityExecutionRequest) Size() int { + return proto.Size(val) +} + +// Equal returns whether two PauseActivityExecutionRequest values are equivalent by recursively +// comparing the message's fields. +// For more information see the documentation for +// https://pkg.go.dev/google.golang.org/protobuf/proto#Equal +func (this *PauseActivityExecutionRequest) Equal(that interface{}) bool { + if that == nil { + return this == nil + } + + var that1 *PauseActivityExecutionRequest + switch t := that.(type) { + case *PauseActivityExecutionRequest: + that1 = t + case PauseActivityExecutionRequest: + that1 = &t + default: + return false + } + + return proto.Equal(this, that1) +} + +// Marshal an object of type PauseActivityExecutionResponse to the protobuf v3 wire format +func (val *PauseActivityExecutionResponse) Marshal() ([]byte, error) { + return proto.Marshal(val) +} + +// Unmarshal an object of type PauseActivityExecutionResponse from the protobuf v3 wire format +func (val *PauseActivityExecutionResponse) Unmarshal(buf []byte) error { + return proto.Unmarshal(buf, val) +} + +// Size returns the size of the object, in bytes, once serialized +func (val *PauseActivityExecutionResponse) Size() int { + return proto.Size(val) +} + +// Equal returns whether two PauseActivityExecutionResponse values are equivalent by recursively +// comparing the message's fields. +// For more information see the documentation for +// https://pkg.go.dev/google.golang.org/protobuf/proto#Equal +func (this *PauseActivityExecutionResponse) Equal(that interface{}) bool { + if that == nil { + return this == nil + } + + var that1 *PauseActivityExecutionResponse + switch t := that.(type) { + case *PauseActivityExecutionResponse: + that1 = t + case PauseActivityExecutionResponse: + that1 = &t + default: + return false + } + + return proto.Equal(this, that1) +} + +// Marshal an object of type UnpauseActivityExecutionRequest to the protobuf v3 wire format +func (val *UnpauseActivityExecutionRequest) Marshal() ([]byte, error) { + return proto.Marshal(val) +} + +// Unmarshal an object of type UnpauseActivityExecutionRequest from the protobuf v3 wire format +func (val *UnpauseActivityExecutionRequest) Unmarshal(buf []byte) error { + return proto.Unmarshal(buf, val) +} + +// Size returns the size of the object, in bytes, once serialized +func (val *UnpauseActivityExecutionRequest) Size() int { + return proto.Size(val) +} + +// Equal returns whether two UnpauseActivityExecutionRequest values are equivalent by recursively +// comparing the message's fields. +// For more information see the documentation for +// https://pkg.go.dev/google.golang.org/protobuf/proto#Equal +func (this *UnpauseActivityExecutionRequest) Equal(that interface{}) bool { + if that == nil { + return this == nil + } + + var that1 *UnpauseActivityExecutionRequest + switch t := that.(type) { + case *UnpauseActivityExecutionRequest: + that1 = t + case UnpauseActivityExecutionRequest: + that1 = &t + default: + return false + } + + return proto.Equal(this, that1) +} + +// Marshal an object of type UnpauseActivityExecutionResponse to the protobuf v3 wire format +func (val *UnpauseActivityExecutionResponse) Marshal() ([]byte, error) { + return proto.Marshal(val) +} + +// Unmarshal an object of type UnpauseActivityExecutionResponse from the protobuf v3 wire format +func (val *UnpauseActivityExecutionResponse) Unmarshal(buf []byte) error { + return proto.Unmarshal(buf, val) +} + +// Size returns the size of the object, in bytes, once serialized +func (val *UnpauseActivityExecutionResponse) Size() int { + return proto.Size(val) +} + +// Equal returns whether two UnpauseActivityExecutionResponse values are equivalent by recursively +// comparing the message's fields. +// For more information see the documentation for +// https://pkg.go.dev/google.golang.org/protobuf/proto#Equal +func (this *UnpauseActivityExecutionResponse) Equal(that interface{}) bool { + if that == nil { + return this == nil + } + + var that1 *UnpauseActivityExecutionResponse + switch t := that.(type) { + case *UnpauseActivityExecutionResponse: + that1 = t + case UnpauseActivityExecutionResponse: + that1 = &t + default: + return false + } + + return proto.Equal(this, that1) +} + +// Marshal an object of type ResetActivityExecutionRequest to the protobuf v3 wire format +func (val *ResetActivityExecutionRequest) Marshal() ([]byte, error) { + return proto.Marshal(val) +} + +// Unmarshal an object of type ResetActivityExecutionRequest from the protobuf v3 wire format +func (val *ResetActivityExecutionRequest) Unmarshal(buf []byte) error { + return proto.Unmarshal(buf, val) +} + +// Size returns the size of the object, in bytes, once serialized +func (val *ResetActivityExecutionRequest) Size() int { + return proto.Size(val) +} + +// Equal returns whether two ResetActivityExecutionRequest values are equivalent by recursively +// comparing the message's fields. +// For more information see the documentation for +// https://pkg.go.dev/google.golang.org/protobuf/proto#Equal +func (this *ResetActivityExecutionRequest) Equal(that interface{}) bool { + if that == nil { + return this == nil + } + + var that1 *ResetActivityExecutionRequest + switch t := that.(type) { + case *ResetActivityExecutionRequest: + that1 = t + case ResetActivityExecutionRequest: + that1 = &t + default: + return false + } + + return proto.Equal(this, that1) +} + +// Marshal an object of type ResetActivityExecutionResponse to the protobuf v3 wire format +func (val *ResetActivityExecutionResponse) Marshal() ([]byte, error) { + return proto.Marshal(val) +} + +// Unmarshal an object of type ResetActivityExecutionResponse from the protobuf v3 wire format +func (val *ResetActivityExecutionResponse) Unmarshal(buf []byte) error { + return proto.Unmarshal(buf, val) +} + +// Size returns the size of the object, in bytes, once serialized +func (val *ResetActivityExecutionResponse) Size() int { + return proto.Size(val) +} + +// Equal returns whether two ResetActivityExecutionResponse values are equivalent by recursively +// comparing the message's fields. +// For more information see the documentation for +// https://pkg.go.dev/google.golang.org/protobuf/proto#Equal +func (this *ResetActivityExecutionResponse) Equal(that interface{}) bool { + if that == nil { + return this == nil + } + + var that1 *ResetActivityExecutionResponse + switch t := that.(type) { + case *ResetActivityExecutionResponse: + that1 = t + case ResetActivityExecutionResponse: + that1 = &t + default: + return false + } + + return proto.Equal(this, that1) +} + +// Marshal an object of type UpdateActivityExecutionOptionsRequest to the protobuf v3 wire format +func (val *UpdateActivityExecutionOptionsRequest) Marshal() ([]byte, error) { + return proto.Marshal(val) +} + +// Unmarshal an object of type UpdateActivityExecutionOptionsRequest from the protobuf v3 wire format +func (val *UpdateActivityExecutionOptionsRequest) Unmarshal(buf []byte) error { + return proto.Unmarshal(buf, val) +} + +// Size returns the size of the object, in bytes, once serialized +func (val *UpdateActivityExecutionOptionsRequest) Size() int { + return proto.Size(val) +} + +// Equal returns whether two UpdateActivityExecutionOptionsRequest values are equivalent by recursively +// comparing the message's fields. +// For more information see the documentation for +// https://pkg.go.dev/google.golang.org/protobuf/proto#Equal +func (this *UpdateActivityExecutionOptionsRequest) Equal(that interface{}) bool { + if that == nil { + return this == nil + } + + var that1 *UpdateActivityExecutionOptionsRequest + switch t := that.(type) { + case *UpdateActivityExecutionOptionsRequest: + that1 = t + case UpdateActivityExecutionOptionsRequest: + that1 = &t + default: + return false + } + + return proto.Equal(this, that1) +} + +// Marshal an object of type UpdateActivityExecutionOptionsResponse to the protobuf v3 wire format +func (val *UpdateActivityExecutionOptionsResponse) Marshal() ([]byte, error) { + return proto.Marshal(val) +} + +// Unmarshal an object of type UpdateActivityExecutionOptionsResponse from the protobuf v3 wire format +func (val *UpdateActivityExecutionOptionsResponse) Unmarshal(buf []byte) error { + return proto.Unmarshal(buf, val) +} + +// Size returns the size of the object, in bytes, once serialized +func (val *UpdateActivityExecutionOptionsResponse) Size() int { + return proto.Size(val) +} + +// Equal returns whether two UpdateActivityExecutionOptionsResponse values are equivalent by recursively +// comparing the message's fields. +// For more information see the documentation for +// https://pkg.go.dev/google.golang.org/protobuf/proto#Equal +func (this *UpdateActivityExecutionOptionsResponse) Equal(that interface{}) bool { + if that == nil { + return this == nil + } + + var that1 *UpdateActivityExecutionOptionsResponse + switch t := that.(type) { + case *UpdateActivityExecutionOptionsResponse: + that1 = t + case UpdateActivityExecutionOptionsResponse: + that1 = &t + default: + return false + } + + return proto.Equal(this, that1) +} diff --git a/chasm/lib/activity/gen/activitypb/v1/request_response.pb.go b/chasm/lib/activity/gen/activitypb/v1/request_response.pb.go index 0407199486e..c2ba753c0c6 100644 --- a/chasm/lib/activity/gen/activitypb/v1/request_response.pb.go +++ b/chasm/lib/activity/gen/activitypb/v1/request_response.pb.go @@ -575,6 +575,366 @@ func (*DeleteActivityExecutionResponse) Descriptor() ([]byte, []int) { return file_temporal_server_chasm_lib_activity_proto_v1_request_response_proto_rawDescGZIP(), []int{11} } +type PauseActivityExecutionRequest struct { + state protoimpl.MessageState `protogen:"open.v1"` + NamespaceId string `protobuf:"bytes,1,opt,name=namespace_id,json=namespaceId,proto3" json:"namespace_id,omitempty"` + FrontendRequest *v1.PauseActivityExecutionRequest `protobuf:"bytes,2,opt,name=frontend_request,json=frontendRequest,proto3" json:"frontend_request,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *PauseActivityExecutionRequest) Reset() { + *x = PauseActivityExecutionRequest{} + mi := &file_temporal_server_chasm_lib_activity_proto_v1_request_response_proto_msgTypes[12] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *PauseActivityExecutionRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*PauseActivityExecutionRequest) ProtoMessage() {} + +func (x *PauseActivityExecutionRequest) ProtoReflect() protoreflect.Message { + mi := &file_temporal_server_chasm_lib_activity_proto_v1_request_response_proto_msgTypes[12] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use PauseActivityExecutionRequest.ProtoReflect.Descriptor instead. +func (*PauseActivityExecutionRequest) Descriptor() ([]byte, []int) { + return file_temporal_server_chasm_lib_activity_proto_v1_request_response_proto_rawDescGZIP(), []int{12} +} + +func (x *PauseActivityExecutionRequest) GetNamespaceId() string { + if x != nil { + return x.NamespaceId + } + return "" +} + +func (x *PauseActivityExecutionRequest) GetFrontendRequest() *v1.PauseActivityExecutionRequest { + if x != nil { + return x.FrontendRequest + } + return nil +} + +type PauseActivityExecutionResponse struct { + state protoimpl.MessageState `protogen:"open.v1"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *PauseActivityExecutionResponse) Reset() { + *x = PauseActivityExecutionResponse{} + mi := &file_temporal_server_chasm_lib_activity_proto_v1_request_response_proto_msgTypes[13] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *PauseActivityExecutionResponse) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*PauseActivityExecutionResponse) ProtoMessage() {} + +func (x *PauseActivityExecutionResponse) ProtoReflect() protoreflect.Message { + mi := &file_temporal_server_chasm_lib_activity_proto_v1_request_response_proto_msgTypes[13] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use PauseActivityExecutionResponse.ProtoReflect.Descriptor instead. +func (*PauseActivityExecutionResponse) Descriptor() ([]byte, []int) { + return file_temporal_server_chasm_lib_activity_proto_v1_request_response_proto_rawDescGZIP(), []int{13} +} + +type UnpauseActivityExecutionRequest struct { + state protoimpl.MessageState `protogen:"open.v1"` + NamespaceId string `protobuf:"bytes,1,opt,name=namespace_id,json=namespaceId,proto3" json:"namespace_id,omitempty"` + FrontendRequest *v1.UnpauseActivityExecutionRequest `protobuf:"bytes,2,opt,name=frontend_request,json=frontendRequest,proto3" json:"frontend_request,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *UnpauseActivityExecutionRequest) Reset() { + *x = UnpauseActivityExecutionRequest{} + mi := &file_temporal_server_chasm_lib_activity_proto_v1_request_response_proto_msgTypes[14] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *UnpauseActivityExecutionRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*UnpauseActivityExecutionRequest) ProtoMessage() {} + +func (x *UnpauseActivityExecutionRequest) ProtoReflect() protoreflect.Message { + mi := &file_temporal_server_chasm_lib_activity_proto_v1_request_response_proto_msgTypes[14] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use UnpauseActivityExecutionRequest.ProtoReflect.Descriptor instead. +func (*UnpauseActivityExecutionRequest) Descriptor() ([]byte, []int) { + return file_temporal_server_chasm_lib_activity_proto_v1_request_response_proto_rawDescGZIP(), []int{14} +} + +func (x *UnpauseActivityExecutionRequest) GetNamespaceId() string { + if x != nil { + return x.NamespaceId + } + return "" +} + +func (x *UnpauseActivityExecutionRequest) GetFrontendRequest() *v1.UnpauseActivityExecutionRequest { + if x != nil { + return x.FrontendRequest + } + return nil +} + +type UnpauseActivityExecutionResponse struct { + state protoimpl.MessageState `protogen:"open.v1"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *UnpauseActivityExecutionResponse) Reset() { + *x = UnpauseActivityExecutionResponse{} + mi := &file_temporal_server_chasm_lib_activity_proto_v1_request_response_proto_msgTypes[15] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *UnpauseActivityExecutionResponse) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*UnpauseActivityExecutionResponse) ProtoMessage() {} + +func (x *UnpauseActivityExecutionResponse) ProtoReflect() protoreflect.Message { + mi := &file_temporal_server_chasm_lib_activity_proto_v1_request_response_proto_msgTypes[15] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use UnpauseActivityExecutionResponse.ProtoReflect.Descriptor instead. +func (*UnpauseActivityExecutionResponse) Descriptor() ([]byte, []int) { + return file_temporal_server_chasm_lib_activity_proto_v1_request_response_proto_rawDescGZIP(), []int{15} +} + +type ResetActivityExecutionRequest struct { + state protoimpl.MessageState `protogen:"open.v1"` + NamespaceId string `protobuf:"bytes,1,opt,name=namespace_id,json=namespaceId,proto3" json:"namespace_id,omitempty"` + FrontendRequest *v1.ResetActivityExecutionRequest `protobuf:"bytes,2,opt,name=frontend_request,json=frontendRequest,proto3" json:"frontend_request,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *ResetActivityExecutionRequest) Reset() { + *x = ResetActivityExecutionRequest{} + mi := &file_temporal_server_chasm_lib_activity_proto_v1_request_response_proto_msgTypes[16] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *ResetActivityExecutionRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*ResetActivityExecutionRequest) ProtoMessage() {} + +func (x *ResetActivityExecutionRequest) ProtoReflect() protoreflect.Message { + mi := &file_temporal_server_chasm_lib_activity_proto_v1_request_response_proto_msgTypes[16] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use ResetActivityExecutionRequest.ProtoReflect.Descriptor instead. +func (*ResetActivityExecutionRequest) Descriptor() ([]byte, []int) { + return file_temporal_server_chasm_lib_activity_proto_v1_request_response_proto_rawDescGZIP(), []int{16} +} + +func (x *ResetActivityExecutionRequest) GetNamespaceId() string { + if x != nil { + return x.NamespaceId + } + return "" +} + +func (x *ResetActivityExecutionRequest) GetFrontendRequest() *v1.ResetActivityExecutionRequest { + if x != nil { + return x.FrontendRequest + } + return nil +} + +type ResetActivityExecutionResponse struct { + state protoimpl.MessageState `protogen:"open.v1"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *ResetActivityExecutionResponse) Reset() { + *x = ResetActivityExecutionResponse{} + mi := &file_temporal_server_chasm_lib_activity_proto_v1_request_response_proto_msgTypes[17] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *ResetActivityExecutionResponse) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*ResetActivityExecutionResponse) ProtoMessage() {} + +func (x *ResetActivityExecutionResponse) ProtoReflect() protoreflect.Message { + mi := &file_temporal_server_chasm_lib_activity_proto_v1_request_response_proto_msgTypes[17] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use ResetActivityExecutionResponse.ProtoReflect.Descriptor instead. +func (*ResetActivityExecutionResponse) Descriptor() ([]byte, []int) { + return file_temporal_server_chasm_lib_activity_proto_v1_request_response_proto_rawDescGZIP(), []int{17} +} + +type UpdateActivityExecutionOptionsRequest struct { + state protoimpl.MessageState `protogen:"open.v1"` + NamespaceId string `protobuf:"bytes,1,opt,name=namespace_id,json=namespaceId,proto3" json:"namespace_id,omitempty"` + FrontendRequest *v1.UpdateActivityExecutionOptionsRequest `protobuf:"bytes,2,opt,name=frontend_request,json=frontendRequest,proto3" json:"frontend_request,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *UpdateActivityExecutionOptionsRequest) Reset() { + *x = UpdateActivityExecutionOptionsRequest{} + mi := &file_temporal_server_chasm_lib_activity_proto_v1_request_response_proto_msgTypes[18] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *UpdateActivityExecutionOptionsRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*UpdateActivityExecutionOptionsRequest) ProtoMessage() {} + +func (x *UpdateActivityExecutionOptionsRequest) ProtoReflect() protoreflect.Message { + mi := &file_temporal_server_chasm_lib_activity_proto_v1_request_response_proto_msgTypes[18] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use UpdateActivityExecutionOptionsRequest.ProtoReflect.Descriptor instead. +func (*UpdateActivityExecutionOptionsRequest) Descriptor() ([]byte, []int) { + return file_temporal_server_chasm_lib_activity_proto_v1_request_response_proto_rawDescGZIP(), []int{18} +} + +func (x *UpdateActivityExecutionOptionsRequest) GetNamespaceId() string { + if x != nil { + return x.NamespaceId + } + return "" +} + +func (x *UpdateActivityExecutionOptionsRequest) GetFrontendRequest() *v1.UpdateActivityExecutionOptionsRequest { + if x != nil { + return x.FrontendRequest + } + return nil +} + +type UpdateActivityExecutionOptionsResponse struct { + state protoimpl.MessageState `protogen:"open.v1"` + FrontendResponse *v1.UpdateActivityExecutionOptionsResponse `protobuf:"bytes,1,opt,name=frontend_response,json=frontendResponse,proto3" json:"frontend_response,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *UpdateActivityExecutionOptionsResponse) Reset() { + *x = UpdateActivityExecutionOptionsResponse{} + mi := &file_temporal_server_chasm_lib_activity_proto_v1_request_response_proto_msgTypes[19] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *UpdateActivityExecutionOptionsResponse) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*UpdateActivityExecutionOptionsResponse) ProtoMessage() {} + +func (x *UpdateActivityExecutionOptionsResponse) ProtoReflect() protoreflect.Message { + mi := &file_temporal_server_chasm_lib_activity_proto_v1_request_response_proto_msgTypes[19] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use UpdateActivityExecutionOptionsResponse.ProtoReflect.Descriptor instead. +func (*UpdateActivityExecutionOptionsResponse) Descriptor() ([]byte, []int) { + return file_temporal_server_chasm_lib_activity_proto_v1_request_response_proto_rawDescGZIP(), []int{19} +} + +func (x *UpdateActivityExecutionOptionsResponse) GetFrontendResponse() *v1.UpdateActivityExecutionOptionsResponse { + if x != nil { + return x.FrontendResponse + } + return nil +} + var File_temporal_server_chasm_lib_activity_proto_v1_request_response_proto protoreflect.FileDescriptor const file_temporal_server_chasm_lib_activity_proto_v1_request_response_proto_rawDesc = "" + @@ -606,7 +966,24 @@ const file_temporal_server_chasm_lib_activity_proto_v1_request_response_proto_ra "\x1eDeleteActivityExecutionRequest\x12!\n" + "\fnamespace_id\x18\x01 \x01(\tR\vnamespaceId\x12j\n" + "\x10frontend_request\x18\x02 \x01(\v2?.temporal.api.workflowservice.v1.DeleteActivityExecutionRequestR\x0ffrontendRequest\"!\n" + - "\x1fDeleteActivityExecutionResponseBDZBgo.temporal.io/server/chasm/lib/activity/gen/activitypb;activitypbb\x06proto3" + "\x1fDeleteActivityExecutionResponse\"\xad\x01\n" + + "\x1dPauseActivityExecutionRequest\x12!\n" + + "\fnamespace_id\x18\x01 \x01(\tR\vnamespaceId\x12i\n" + + "\x10frontend_request\x18\x02 \x01(\v2>.temporal.api.workflowservice.v1.PauseActivityExecutionRequestR\x0ffrontendRequest\" \n" + + "\x1ePauseActivityExecutionResponse\"\xb1\x01\n" + + "\x1fUnpauseActivityExecutionRequest\x12!\n" + + "\fnamespace_id\x18\x01 \x01(\tR\vnamespaceId\x12k\n" + + "\x10frontend_request\x18\x02 \x01(\v2@.temporal.api.workflowservice.v1.UnpauseActivityExecutionRequestR\x0ffrontendRequest\"\"\n" + + " UnpauseActivityExecutionResponse\"\xad\x01\n" + + "\x1dResetActivityExecutionRequest\x12!\n" + + "\fnamespace_id\x18\x01 \x01(\tR\vnamespaceId\x12i\n" + + "\x10frontend_request\x18\x02 \x01(\v2>.temporal.api.workflowservice.v1.ResetActivityExecutionRequestR\x0ffrontendRequest\" \n" + + "\x1eResetActivityExecutionResponse\"\xbd\x01\n" + + "%UpdateActivityExecutionOptionsRequest\x12!\n" + + "\fnamespace_id\x18\x01 \x01(\tR\vnamespaceId\x12q\n" + + "\x10frontend_request\x18\x02 \x01(\v2F.temporal.api.workflowservice.v1.UpdateActivityExecutionOptionsRequestR\x0ffrontendRequest\"\x9e\x01\n" + + "&UpdateActivityExecutionOptionsResponse\x12t\n" + + "\x11frontend_response\x18\x01 \x01(\v2G.temporal.api.workflowservice.v1.UpdateActivityExecutionOptionsResponseR\x10frontendResponseBDZBgo.temporal.io/server/chasm/lib/activity/gen/activitypb;activitypbb\x06proto3" var ( file_temporal_server_chasm_lib_activity_proto_v1_request_response_proto_rawDescOnce sync.Once @@ -620,45 +997,63 @@ func file_temporal_server_chasm_lib_activity_proto_v1_request_response_proto_raw return file_temporal_server_chasm_lib_activity_proto_v1_request_response_proto_rawDescData } -var file_temporal_server_chasm_lib_activity_proto_v1_request_response_proto_msgTypes = make([]protoimpl.MessageInfo, 12) +var file_temporal_server_chasm_lib_activity_proto_v1_request_response_proto_msgTypes = make([]protoimpl.MessageInfo, 20) var file_temporal_server_chasm_lib_activity_proto_v1_request_response_proto_goTypes = []any{ - (*StartActivityExecutionRequest)(nil), // 0: temporal.server.chasm.lib.activity.proto.v1.StartActivityExecutionRequest - (*StartActivityExecutionResponse)(nil), // 1: temporal.server.chasm.lib.activity.proto.v1.StartActivityExecutionResponse - (*DescribeActivityExecutionRequest)(nil), // 2: temporal.server.chasm.lib.activity.proto.v1.DescribeActivityExecutionRequest - (*DescribeActivityExecutionResponse)(nil), // 3: temporal.server.chasm.lib.activity.proto.v1.DescribeActivityExecutionResponse - (*PollActivityExecutionRequest)(nil), // 4: temporal.server.chasm.lib.activity.proto.v1.PollActivityExecutionRequest - (*PollActivityExecutionResponse)(nil), // 5: temporal.server.chasm.lib.activity.proto.v1.PollActivityExecutionResponse - (*TerminateActivityExecutionRequest)(nil), // 6: temporal.server.chasm.lib.activity.proto.v1.TerminateActivityExecutionRequest - (*TerminateActivityExecutionResponse)(nil), // 7: temporal.server.chasm.lib.activity.proto.v1.TerminateActivityExecutionResponse - (*RequestCancelActivityExecutionRequest)(nil), // 8: temporal.server.chasm.lib.activity.proto.v1.RequestCancelActivityExecutionRequest - (*RequestCancelActivityExecutionResponse)(nil), // 9: temporal.server.chasm.lib.activity.proto.v1.RequestCancelActivityExecutionResponse - (*DeleteActivityExecutionRequest)(nil), // 10: temporal.server.chasm.lib.activity.proto.v1.DeleteActivityExecutionRequest - (*DeleteActivityExecutionResponse)(nil), // 11: temporal.server.chasm.lib.activity.proto.v1.DeleteActivityExecutionResponse - (*v1.StartActivityExecutionRequest)(nil), // 12: temporal.api.workflowservice.v1.StartActivityExecutionRequest - (*v1.StartActivityExecutionResponse)(nil), // 13: temporal.api.workflowservice.v1.StartActivityExecutionResponse - (*v1.DescribeActivityExecutionRequest)(nil), // 14: temporal.api.workflowservice.v1.DescribeActivityExecutionRequest - (*v1.DescribeActivityExecutionResponse)(nil), // 15: temporal.api.workflowservice.v1.DescribeActivityExecutionResponse - (*v1.PollActivityExecutionRequest)(nil), // 16: temporal.api.workflowservice.v1.PollActivityExecutionRequest - (*v1.PollActivityExecutionResponse)(nil), // 17: temporal.api.workflowservice.v1.PollActivityExecutionResponse - (*v1.TerminateActivityExecutionRequest)(nil), // 18: temporal.api.workflowservice.v1.TerminateActivityExecutionRequest - (*v1.RequestCancelActivityExecutionRequest)(nil), // 19: temporal.api.workflowservice.v1.RequestCancelActivityExecutionRequest - (*v1.DeleteActivityExecutionRequest)(nil), // 20: temporal.api.workflowservice.v1.DeleteActivityExecutionRequest + (*StartActivityExecutionRequest)(nil), // 0: temporal.server.chasm.lib.activity.proto.v1.StartActivityExecutionRequest + (*StartActivityExecutionResponse)(nil), // 1: temporal.server.chasm.lib.activity.proto.v1.StartActivityExecutionResponse + (*DescribeActivityExecutionRequest)(nil), // 2: temporal.server.chasm.lib.activity.proto.v1.DescribeActivityExecutionRequest + (*DescribeActivityExecutionResponse)(nil), // 3: temporal.server.chasm.lib.activity.proto.v1.DescribeActivityExecutionResponse + (*PollActivityExecutionRequest)(nil), // 4: temporal.server.chasm.lib.activity.proto.v1.PollActivityExecutionRequest + (*PollActivityExecutionResponse)(nil), // 5: temporal.server.chasm.lib.activity.proto.v1.PollActivityExecutionResponse + (*TerminateActivityExecutionRequest)(nil), // 6: temporal.server.chasm.lib.activity.proto.v1.TerminateActivityExecutionRequest + (*TerminateActivityExecutionResponse)(nil), // 7: temporal.server.chasm.lib.activity.proto.v1.TerminateActivityExecutionResponse + (*RequestCancelActivityExecutionRequest)(nil), // 8: temporal.server.chasm.lib.activity.proto.v1.RequestCancelActivityExecutionRequest + (*RequestCancelActivityExecutionResponse)(nil), // 9: temporal.server.chasm.lib.activity.proto.v1.RequestCancelActivityExecutionResponse + (*DeleteActivityExecutionRequest)(nil), // 10: temporal.server.chasm.lib.activity.proto.v1.DeleteActivityExecutionRequest + (*DeleteActivityExecutionResponse)(nil), // 11: temporal.server.chasm.lib.activity.proto.v1.DeleteActivityExecutionResponse + (*PauseActivityExecutionRequest)(nil), // 12: temporal.server.chasm.lib.activity.proto.v1.PauseActivityExecutionRequest + (*PauseActivityExecutionResponse)(nil), // 13: temporal.server.chasm.lib.activity.proto.v1.PauseActivityExecutionResponse + (*UnpauseActivityExecutionRequest)(nil), // 14: temporal.server.chasm.lib.activity.proto.v1.UnpauseActivityExecutionRequest + (*UnpauseActivityExecutionResponse)(nil), // 15: temporal.server.chasm.lib.activity.proto.v1.UnpauseActivityExecutionResponse + (*ResetActivityExecutionRequest)(nil), // 16: temporal.server.chasm.lib.activity.proto.v1.ResetActivityExecutionRequest + (*ResetActivityExecutionResponse)(nil), // 17: temporal.server.chasm.lib.activity.proto.v1.ResetActivityExecutionResponse + (*UpdateActivityExecutionOptionsRequest)(nil), // 18: temporal.server.chasm.lib.activity.proto.v1.UpdateActivityExecutionOptionsRequest + (*UpdateActivityExecutionOptionsResponse)(nil), // 19: temporal.server.chasm.lib.activity.proto.v1.UpdateActivityExecutionOptionsResponse + (*v1.StartActivityExecutionRequest)(nil), // 20: temporal.api.workflowservice.v1.StartActivityExecutionRequest + (*v1.StartActivityExecutionResponse)(nil), // 21: temporal.api.workflowservice.v1.StartActivityExecutionResponse + (*v1.DescribeActivityExecutionRequest)(nil), // 22: temporal.api.workflowservice.v1.DescribeActivityExecutionRequest + (*v1.DescribeActivityExecutionResponse)(nil), // 23: temporal.api.workflowservice.v1.DescribeActivityExecutionResponse + (*v1.PollActivityExecutionRequest)(nil), // 24: temporal.api.workflowservice.v1.PollActivityExecutionRequest + (*v1.PollActivityExecutionResponse)(nil), // 25: temporal.api.workflowservice.v1.PollActivityExecutionResponse + (*v1.TerminateActivityExecutionRequest)(nil), // 26: temporal.api.workflowservice.v1.TerminateActivityExecutionRequest + (*v1.RequestCancelActivityExecutionRequest)(nil), // 27: temporal.api.workflowservice.v1.RequestCancelActivityExecutionRequest + (*v1.DeleteActivityExecutionRequest)(nil), // 28: temporal.api.workflowservice.v1.DeleteActivityExecutionRequest + (*v1.PauseActivityExecutionRequest)(nil), // 29: temporal.api.workflowservice.v1.PauseActivityExecutionRequest + (*v1.UnpauseActivityExecutionRequest)(nil), // 30: temporal.api.workflowservice.v1.UnpauseActivityExecutionRequest + (*v1.ResetActivityExecutionRequest)(nil), // 31: temporal.api.workflowservice.v1.ResetActivityExecutionRequest + (*v1.UpdateActivityExecutionOptionsRequest)(nil), // 32: temporal.api.workflowservice.v1.UpdateActivityExecutionOptionsRequest + (*v1.UpdateActivityExecutionOptionsResponse)(nil), // 33: temporal.api.workflowservice.v1.UpdateActivityExecutionOptionsResponse } var file_temporal_server_chasm_lib_activity_proto_v1_request_response_proto_depIdxs = []int32{ - 12, // 0: temporal.server.chasm.lib.activity.proto.v1.StartActivityExecutionRequest.frontend_request:type_name -> temporal.api.workflowservice.v1.StartActivityExecutionRequest - 13, // 1: temporal.server.chasm.lib.activity.proto.v1.StartActivityExecutionResponse.frontend_response:type_name -> temporal.api.workflowservice.v1.StartActivityExecutionResponse - 14, // 2: temporal.server.chasm.lib.activity.proto.v1.DescribeActivityExecutionRequest.frontend_request:type_name -> temporal.api.workflowservice.v1.DescribeActivityExecutionRequest - 15, // 3: temporal.server.chasm.lib.activity.proto.v1.DescribeActivityExecutionResponse.frontend_response:type_name -> temporal.api.workflowservice.v1.DescribeActivityExecutionResponse - 16, // 4: temporal.server.chasm.lib.activity.proto.v1.PollActivityExecutionRequest.frontend_request:type_name -> temporal.api.workflowservice.v1.PollActivityExecutionRequest - 17, // 5: temporal.server.chasm.lib.activity.proto.v1.PollActivityExecutionResponse.frontend_response:type_name -> temporal.api.workflowservice.v1.PollActivityExecutionResponse - 18, // 6: temporal.server.chasm.lib.activity.proto.v1.TerminateActivityExecutionRequest.frontend_request:type_name -> temporal.api.workflowservice.v1.TerminateActivityExecutionRequest - 19, // 7: temporal.server.chasm.lib.activity.proto.v1.RequestCancelActivityExecutionRequest.frontend_request:type_name -> temporal.api.workflowservice.v1.RequestCancelActivityExecutionRequest - 20, // 8: temporal.server.chasm.lib.activity.proto.v1.DeleteActivityExecutionRequest.frontend_request:type_name -> temporal.api.workflowservice.v1.DeleteActivityExecutionRequest - 9, // [9:9] is the sub-list for method output_type - 9, // [9:9] is the sub-list for method input_type - 9, // [9:9] is the sub-list for extension type_name - 9, // [9:9] is the sub-list for extension extendee - 0, // [0:9] is the sub-list for field type_name + 20, // 0: temporal.server.chasm.lib.activity.proto.v1.StartActivityExecutionRequest.frontend_request:type_name -> temporal.api.workflowservice.v1.StartActivityExecutionRequest + 21, // 1: temporal.server.chasm.lib.activity.proto.v1.StartActivityExecutionResponse.frontend_response:type_name -> temporal.api.workflowservice.v1.StartActivityExecutionResponse + 22, // 2: temporal.server.chasm.lib.activity.proto.v1.DescribeActivityExecutionRequest.frontend_request:type_name -> temporal.api.workflowservice.v1.DescribeActivityExecutionRequest + 23, // 3: temporal.server.chasm.lib.activity.proto.v1.DescribeActivityExecutionResponse.frontend_response:type_name -> temporal.api.workflowservice.v1.DescribeActivityExecutionResponse + 24, // 4: temporal.server.chasm.lib.activity.proto.v1.PollActivityExecutionRequest.frontend_request:type_name -> temporal.api.workflowservice.v1.PollActivityExecutionRequest + 25, // 5: temporal.server.chasm.lib.activity.proto.v1.PollActivityExecutionResponse.frontend_response:type_name -> temporal.api.workflowservice.v1.PollActivityExecutionResponse + 26, // 6: temporal.server.chasm.lib.activity.proto.v1.TerminateActivityExecutionRequest.frontend_request:type_name -> temporal.api.workflowservice.v1.TerminateActivityExecutionRequest + 27, // 7: temporal.server.chasm.lib.activity.proto.v1.RequestCancelActivityExecutionRequest.frontend_request:type_name -> temporal.api.workflowservice.v1.RequestCancelActivityExecutionRequest + 28, // 8: temporal.server.chasm.lib.activity.proto.v1.DeleteActivityExecutionRequest.frontend_request:type_name -> temporal.api.workflowservice.v1.DeleteActivityExecutionRequest + 29, // 9: temporal.server.chasm.lib.activity.proto.v1.PauseActivityExecutionRequest.frontend_request:type_name -> temporal.api.workflowservice.v1.PauseActivityExecutionRequest + 30, // 10: temporal.server.chasm.lib.activity.proto.v1.UnpauseActivityExecutionRequest.frontend_request:type_name -> temporal.api.workflowservice.v1.UnpauseActivityExecutionRequest + 31, // 11: temporal.server.chasm.lib.activity.proto.v1.ResetActivityExecutionRequest.frontend_request:type_name -> temporal.api.workflowservice.v1.ResetActivityExecutionRequest + 32, // 12: temporal.server.chasm.lib.activity.proto.v1.UpdateActivityExecutionOptionsRequest.frontend_request:type_name -> temporal.api.workflowservice.v1.UpdateActivityExecutionOptionsRequest + 33, // 13: temporal.server.chasm.lib.activity.proto.v1.UpdateActivityExecutionOptionsResponse.frontend_response:type_name -> temporal.api.workflowservice.v1.UpdateActivityExecutionOptionsResponse + 14, // [14:14] is the sub-list for method output_type + 14, // [14:14] is the sub-list for method input_type + 14, // [14:14] is the sub-list for extension type_name + 14, // [14:14] is the sub-list for extension extendee + 0, // [0:14] is the sub-list for field type_name } func init() { file_temporal_server_chasm_lib_activity_proto_v1_request_response_proto_init() } @@ -672,7 +1067,7 @@ func file_temporal_server_chasm_lib_activity_proto_v1_request_response_proto_ini GoPackagePath: reflect.TypeOf(x{}).PkgPath(), RawDescriptor: unsafe.Slice(unsafe.StringData(file_temporal_server_chasm_lib_activity_proto_v1_request_response_proto_rawDesc), len(file_temporal_server_chasm_lib_activity_proto_v1_request_response_proto_rawDesc)), NumEnums: 0, - NumMessages: 12, + NumMessages: 20, NumExtensions: 0, NumServices: 0, }, diff --git a/chasm/lib/activity/gen/activitypb/v1/service.pb.go b/chasm/lib/activity/gen/activitypb/v1/service.pb.go index 2bca0c6046c..34266e9af32 100644 --- a/chasm/lib/activity/gen/activitypb/v1/service.pb.go +++ b/chasm/lib/activity/gen/activitypb/v1/service.pb.go @@ -27,15 +27,18 @@ var File_temporal_server_chasm_lib_activity_proto_v1_service_proto protoreflect. const file_temporal_server_chasm_lib_activity_proto_v1_service_proto_rawDesc = "" + "\n" + - "9temporal/server/chasm/lib/activity/proto/v1/service.proto\x12+temporal.server.chasm.lib.activity.proto.v1\x1aBtemporal/server/chasm/lib/activity/proto/v1/request_response.proto\x1a0temporal/server/api/common/v1/api_category.proto\x1a.temporal/server/api/routing/v1/extension.proto2\xf2\n" + - "\n" + + "9temporal/server/chasm/lib/activity/proto/v1/service.proto\x12+temporal.server.chasm.lib.activity.proto.v1\x1aBtemporal/server/chasm/lib/activity/proto/v1/request_response.proto\x1a.temporal/server/api/routing/v1/extension.proto\x1a0temporal/server/api/common/v1/api_category.proto2\x80\x13\n" + "\x0fActivityService\x12\xdb\x01\n" + - "\x16StartActivityExecution\x12J.temporal.server.chasm.lib.activity.proto.v1.StartActivityExecutionRequest\x1aK.temporal.server.chasm.lib.activity.proto.v1.StartActivityExecutionResponse\"(\x8a\xb5\x18\x02\b\x01\xd2\xc3\x18\x1e\x1a\x1cfrontend_request.activity_id\x12\xe4\x01\n" + - "\x19DescribeActivityExecution\x12M.temporal.server.chasm.lib.activity.proto.v1.DescribeActivityExecutionRequest\x1aN.temporal.server.chasm.lib.activity.proto.v1.DescribeActivityExecutionResponse\"(\x8a\xb5\x18\x02\b\x01\xd2\xc3\x18\x1e\x1a\x1cfrontend_request.activity_id\x12\xd8\x01\n" + - "\x15PollActivityExecution\x12I.temporal.server.chasm.lib.activity.proto.v1.PollActivityExecutionRequest\x1aJ.temporal.server.chasm.lib.activity.proto.v1.PollActivityExecutionResponse\"(\x8a\xb5\x18\x02\b\x02\xd2\xc3\x18\x1e\x1a\x1cfrontend_request.activity_id\x12\xe7\x01\n" + - "\x1aTerminateActivityExecution\x12N.temporal.server.chasm.lib.activity.proto.v1.TerminateActivityExecutionRequest\x1aO.temporal.server.chasm.lib.activity.proto.v1.TerminateActivityExecutionResponse\"(\x8a\xb5\x18\x02\b\x01\xd2\xc3\x18\x1e\x1a\x1cfrontend_request.activity_id\x12\xf3\x01\n" + - "\x1eRequestCancelActivityExecution\x12R.temporal.server.chasm.lib.activity.proto.v1.RequestCancelActivityExecutionRequest\x1aS.temporal.server.chasm.lib.activity.proto.v1.RequestCancelActivityExecutionResponse\"(\x8a\xb5\x18\x02\b\x01\xd2\xc3\x18\x1e\x1a\x1cfrontend_request.activity_id\x12\xde\x01\n" + - "\x17DeleteActivityExecution\x12K.temporal.server.chasm.lib.activity.proto.v1.DeleteActivityExecutionRequest\x1aL.temporal.server.chasm.lib.activity.proto.v1.DeleteActivityExecutionResponse\"(\x8a\xb5\x18\x02\b\x01\xd2\xc3\x18\x1e\x1a\x1cfrontend_request.activity_idBDZBgo.temporal.io/server/chasm/lib/activity/gen/activitypb;activitypbb\x06proto3" + "\x16StartActivityExecution\x12J.temporal.server.chasm.lib.activity.proto.v1.StartActivityExecutionRequest\x1aK.temporal.server.chasm.lib.activity.proto.v1.StartActivityExecutionResponse\"(\x92\xc4\x03\x1e\x1a\x1cfrontend_request.activity_id\x8a\xb5\x18\x02\b\x01\x12\xe4\x01\n" + + "\x19DescribeActivityExecution\x12M.temporal.server.chasm.lib.activity.proto.v1.DescribeActivityExecutionRequest\x1aN.temporal.server.chasm.lib.activity.proto.v1.DescribeActivityExecutionResponse\"(\x92\xc4\x03\x1e\x1a\x1cfrontend_request.activity_id\x8a\xb5\x18\x02\b\x01\x12\xd8\x01\n" + + "\x15PollActivityExecution\x12I.temporal.server.chasm.lib.activity.proto.v1.PollActivityExecutionRequest\x1aJ.temporal.server.chasm.lib.activity.proto.v1.PollActivityExecutionResponse\"(\x92\xc4\x03\x1e\x1a\x1cfrontend_request.activity_id\x8a\xb5\x18\x02\b\x02\x12\xe7\x01\n" + + "\x1aTerminateActivityExecution\x12N.temporal.server.chasm.lib.activity.proto.v1.TerminateActivityExecutionRequest\x1aO.temporal.server.chasm.lib.activity.proto.v1.TerminateActivityExecutionResponse\"(\x92\xc4\x03\x1e\x1a\x1cfrontend_request.activity_id\x8a\xb5\x18\x02\b\x01\x12\xf3\x01\n" + + "\x1eRequestCancelActivityExecution\x12R.temporal.server.chasm.lib.activity.proto.v1.RequestCancelActivityExecutionRequest\x1aS.temporal.server.chasm.lib.activity.proto.v1.RequestCancelActivityExecutionResponse\"(\x92\xc4\x03\x1e\x1a\x1cfrontend_request.activity_id\x8a\xb5\x18\x02\b\x01\x12\xde\x01\n" + + "\x17DeleteActivityExecution\x12K.temporal.server.chasm.lib.activity.proto.v1.DeleteActivityExecutionRequest\x1aL.temporal.server.chasm.lib.activity.proto.v1.DeleteActivityExecutionResponse\"(\x92\xc4\x03\x1e\x1a\x1cfrontend_request.activity_id\x8a\xb5\x18\x02\b\x01\x12\xf9\x01\n" + + "\x16PauseActivityExecution\x12J.temporal.server.chasm.lib.activity.proto.v1.PauseActivityExecutionRequest\x1aK.temporal.server.chasm.lib.activity.proto.v1.PauseActivityExecutionResponse\"F\x92\xc4\x03<\x1a\x1cfrontend_request.workflow_id\x1a\x1cfrontend_request.activity_id\x8a\xb5\x18\x02\b\x01\x12\xff\x01\n" + + "\x18UnpauseActivityExecution\x12L.temporal.server.chasm.lib.activity.proto.v1.UnpauseActivityExecutionRequest\x1aM.temporal.server.chasm.lib.activity.proto.v1.UnpauseActivityExecutionResponse\"F\x92\xc4\x03<\x1a\x1cfrontend_request.workflow_id\x1a\x1cfrontend_request.activity_id\x8a\xb5\x18\x02\b\x01\x12\xf9\x01\n" + + "\x16ResetActivityExecution\x12J.temporal.server.chasm.lib.activity.proto.v1.ResetActivityExecutionRequest\x1aK.temporal.server.chasm.lib.activity.proto.v1.ResetActivityExecutionResponse\"F\x92\xc4\x03<\x1a\x1cfrontend_request.workflow_id\x1a\x1cfrontend_request.activity_id\x8a\xb5\x18\x02\b\x01\x12\x91\x02\n" + + "\x1eUpdateActivityExecutionOptions\x12R.temporal.server.chasm.lib.activity.proto.v1.UpdateActivityExecutionOptionsRequest\x1aS.temporal.server.chasm.lib.activity.proto.v1.UpdateActivityExecutionOptionsResponse\"F\x92\xc4\x03<\x1a\x1cfrontend_request.workflow_id\x1a\x1cfrontend_request.activity_id\x8a\xb5\x18\x02\b\x01BDZBgo.temporal.io/server/chasm/lib/activity/gen/activitypb;activitypbb\x06proto3" var file_temporal_server_chasm_lib_activity_proto_v1_service_proto_goTypes = []any{ (*StartActivityExecutionRequest)(nil), // 0: temporal.server.chasm.lib.activity.proto.v1.StartActivityExecutionRequest @@ -44,12 +47,20 @@ var file_temporal_server_chasm_lib_activity_proto_v1_service_proto_goTypes = []a (*TerminateActivityExecutionRequest)(nil), // 3: temporal.server.chasm.lib.activity.proto.v1.TerminateActivityExecutionRequest (*RequestCancelActivityExecutionRequest)(nil), // 4: temporal.server.chasm.lib.activity.proto.v1.RequestCancelActivityExecutionRequest (*DeleteActivityExecutionRequest)(nil), // 5: temporal.server.chasm.lib.activity.proto.v1.DeleteActivityExecutionRequest - (*StartActivityExecutionResponse)(nil), // 6: temporal.server.chasm.lib.activity.proto.v1.StartActivityExecutionResponse - (*DescribeActivityExecutionResponse)(nil), // 7: temporal.server.chasm.lib.activity.proto.v1.DescribeActivityExecutionResponse - (*PollActivityExecutionResponse)(nil), // 8: temporal.server.chasm.lib.activity.proto.v1.PollActivityExecutionResponse - (*TerminateActivityExecutionResponse)(nil), // 9: temporal.server.chasm.lib.activity.proto.v1.TerminateActivityExecutionResponse - (*RequestCancelActivityExecutionResponse)(nil), // 10: temporal.server.chasm.lib.activity.proto.v1.RequestCancelActivityExecutionResponse - (*DeleteActivityExecutionResponse)(nil), // 11: temporal.server.chasm.lib.activity.proto.v1.DeleteActivityExecutionResponse + (*PauseActivityExecutionRequest)(nil), // 6: temporal.server.chasm.lib.activity.proto.v1.PauseActivityExecutionRequest + (*UnpauseActivityExecutionRequest)(nil), // 7: temporal.server.chasm.lib.activity.proto.v1.UnpauseActivityExecutionRequest + (*ResetActivityExecutionRequest)(nil), // 8: temporal.server.chasm.lib.activity.proto.v1.ResetActivityExecutionRequest + (*UpdateActivityExecutionOptionsRequest)(nil), // 9: temporal.server.chasm.lib.activity.proto.v1.UpdateActivityExecutionOptionsRequest + (*StartActivityExecutionResponse)(nil), // 10: temporal.server.chasm.lib.activity.proto.v1.StartActivityExecutionResponse + (*DescribeActivityExecutionResponse)(nil), // 11: temporal.server.chasm.lib.activity.proto.v1.DescribeActivityExecutionResponse + (*PollActivityExecutionResponse)(nil), // 12: temporal.server.chasm.lib.activity.proto.v1.PollActivityExecutionResponse + (*TerminateActivityExecutionResponse)(nil), // 13: temporal.server.chasm.lib.activity.proto.v1.TerminateActivityExecutionResponse + (*RequestCancelActivityExecutionResponse)(nil), // 14: temporal.server.chasm.lib.activity.proto.v1.RequestCancelActivityExecutionResponse + (*DeleteActivityExecutionResponse)(nil), // 15: temporal.server.chasm.lib.activity.proto.v1.DeleteActivityExecutionResponse + (*PauseActivityExecutionResponse)(nil), // 16: temporal.server.chasm.lib.activity.proto.v1.PauseActivityExecutionResponse + (*UnpauseActivityExecutionResponse)(nil), // 17: temporal.server.chasm.lib.activity.proto.v1.UnpauseActivityExecutionResponse + (*ResetActivityExecutionResponse)(nil), // 18: temporal.server.chasm.lib.activity.proto.v1.ResetActivityExecutionResponse + (*UpdateActivityExecutionOptionsResponse)(nil), // 19: temporal.server.chasm.lib.activity.proto.v1.UpdateActivityExecutionOptionsResponse } var file_temporal_server_chasm_lib_activity_proto_v1_service_proto_depIdxs = []int32{ 0, // 0: temporal.server.chasm.lib.activity.proto.v1.ActivityService.StartActivityExecution:input_type -> temporal.server.chasm.lib.activity.proto.v1.StartActivityExecutionRequest @@ -58,14 +69,22 @@ var file_temporal_server_chasm_lib_activity_proto_v1_service_proto_depIdxs = []i 3, // 3: temporal.server.chasm.lib.activity.proto.v1.ActivityService.TerminateActivityExecution:input_type -> temporal.server.chasm.lib.activity.proto.v1.TerminateActivityExecutionRequest 4, // 4: temporal.server.chasm.lib.activity.proto.v1.ActivityService.RequestCancelActivityExecution:input_type -> temporal.server.chasm.lib.activity.proto.v1.RequestCancelActivityExecutionRequest 5, // 5: temporal.server.chasm.lib.activity.proto.v1.ActivityService.DeleteActivityExecution:input_type -> temporal.server.chasm.lib.activity.proto.v1.DeleteActivityExecutionRequest - 6, // 6: temporal.server.chasm.lib.activity.proto.v1.ActivityService.StartActivityExecution:output_type -> temporal.server.chasm.lib.activity.proto.v1.StartActivityExecutionResponse - 7, // 7: temporal.server.chasm.lib.activity.proto.v1.ActivityService.DescribeActivityExecution:output_type -> temporal.server.chasm.lib.activity.proto.v1.DescribeActivityExecutionResponse - 8, // 8: temporal.server.chasm.lib.activity.proto.v1.ActivityService.PollActivityExecution:output_type -> temporal.server.chasm.lib.activity.proto.v1.PollActivityExecutionResponse - 9, // 9: temporal.server.chasm.lib.activity.proto.v1.ActivityService.TerminateActivityExecution:output_type -> temporal.server.chasm.lib.activity.proto.v1.TerminateActivityExecutionResponse - 10, // 10: temporal.server.chasm.lib.activity.proto.v1.ActivityService.RequestCancelActivityExecution:output_type -> temporal.server.chasm.lib.activity.proto.v1.RequestCancelActivityExecutionResponse - 11, // 11: temporal.server.chasm.lib.activity.proto.v1.ActivityService.DeleteActivityExecution:output_type -> temporal.server.chasm.lib.activity.proto.v1.DeleteActivityExecutionResponse - 6, // [6:12] is the sub-list for method output_type - 0, // [0:6] is the sub-list for method input_type + 6, // 6: temporal.server.chasm.lib.activity.proto.v1.ActivityService.PauseActivityExecution:input_type -> temporal.server.chasm.lib.activity.proto.v1.PauseActivityExecutionRequest + 7, // 7: temporal.server.chasm.lib.activity.proto.v1.ActivityService.UnpauseActivityExecution:input_type -> temporal.server.chasm.lib.activity.proto.v1.UnpauseActivityExecutionRequest + 8, // 8: temporal.server.chasm.lib.activity.proto.v1.ActivityService.ResetActivityExecution:input_type -> temporal.server.chasm.lib.activity.proto.v1.ResetActivityExecutionRequest + 9, // 9: temporal.server.chasm.lib.activity.proto.v1.ActivityService.UpdateActivityExecutionOptions:input_type -> temporal.server.chasm.lib.activity.proto.v1.UpdateActivityExecutionOptionsRequest + 10, // 10: temporal.server.chasm.lib.activity.proto.v1.ActivityService.StartActivityExecution:output_type -> temporal.server.chasm.lib.activity.proto.v1.StartActivityExecutionResponse + 11, // 11: temporal.server.chasm.lib.activity.proto.v1.ActivityService.DescribeActivityExecution:output_type -> temporal.server.chasm.lib.activity.proto.v1.DescribeActivityExecutionResponse + 12, // 12: temporal.server.chasm.lib.activity.proto.v1.ActivityService.PollActivityExecution:output_type -> temporal.server.chasm.lib.activity.proto.v1.PollActivityExecutionResponse + 13, // 13: temporal.server.chasm.lib.activity.proto.v1.ActivityService.TerminateActivityExecution:output_type -> temporal.server.chasm.lib.activity.proto.v1.TerminateActivityExecutionResponse + 14, // 14: temporal.server.chasm.lib.activity.proto.v1.ActivityService.RequestCancelActivityExecution:output_type -> temporal.server.chasm.lib.activity.proto.v1.RequestCancelActivityExecutionResponse + 15, // 15: temporal.server.chasm.lib.activity.proto.v1.ActivityService.DeleteActivityExecution:output_type -> temporal.server.chasm.lib.activity.proto.v1.DeleteActivityExecutionResponse + 16, // 16: temporal.server.chasm.lib.activity.proto.v1.ActivityService.PauseActivityExecution:output_type -> temporal.server.chasm.lib.activity.proto.v1.PauseActivityExecutionResponse + 17, // 17: temporal.server.chasm.lib.activity.proto.v1.ActivityService.UnpauseActivityExecution:output_type -> temporal.server.chasm.lib.activity.proto.v1.UnpauseActivityExecutionResponse + 18, // 18: temporal.server.chasm.lib.activity.proto.v1.ActivityService.ResetActivityExecution:output_type -> temporal.server.chasm.lib.activity.proto.v1.ResetActivityExecutionResponse + 19, // 19: temporal.server.chasm.lib.activity.proto.v1.ActivityService.UpdateActivityExecutionOptions:output_type -> temporal.server.chasm.lib.activity.proto.v1.UpdateActivityExecutionOptionsResponse + 10, // [10:20] is the sub-list for method output_type + 0, // [0:10] is the sub-list for method input_type 0, // [0:0] is the sub-list for extension type_name 0, // [0:0] is the sub-list for extension extendee 0, // [0:0] is the sub-list for field type_name diff --git a/chasm/lib/activity/gen/activitypb/v1/service_client.pb.go b/chasm/lib/activity/gen/activitypb/v1/service_client.pb.go index b1d80f018f1..e62ce73e257 100644 --- a/chasm/lib/activity/gen/activitypb/v1/service_client.pb.go +++ b/chasm/lib/activity/gen/activitypb/v1/service_client.pb.go @@ -316,3 +316,191 @@ func (c *ActivityServiceLayeredClient) DeleteActivityExecution( } return backoff.ThrottleRetryContextWithReturn(ctx, call, c.retryPolicy, common.IsServiceClientTransientError) } +func (c *ActivityServiceLayeredClient) callPauseActivityExecutionNoRetry( + ctx context.Context, + request *PauseActivityExecutionRequest, + opts ...grpc.CallOption, +) (*PauseActivityExecutionResponse, error) { + var response *PauseActivityExecutionResponse + var err error + startTime := time.Now().UTC() + // the caller is a namespace, hence the tag below. + caller := headers.GetCallerInfo(ctx).CallerName + metricsHandler := c.metricsHandler.WithTags( + metrics.OperationTag("ActivityService.PauseActivityExecution"), + metrics.NamespaceTag(caller), + metrics.ServiceRoleTag(metrics.HistoryRoleTagValue), + ) + metrics.ClientRequests.With(metricsHandler).Record(1) + defer func() { + if err != nil { + metrics.ClientFailures.With(metricsHandler).Record(1, metrics.ServiceErrorTypeTag(err)) + } + metrics.ClientLatency.With(metricsHandler).Record(time.Since(startTime)) + }() + businessID := request.GetFrontendRequest().GetWorkflowId() + if businessID == "" { + businessID = request.GetFrontendRequest().GetActivityId() + } + shardID := common.WorkflowIDToHistoryShard(request.GetNamespaceId(), businessID, c.numShards) + op := func(ctx context.Context, client ActivityServiceClient) error { + var err error + ctx, cancel := context.WithTimeout(ctx, history.DefaultTimeout) + defer cancel() + response, err = client.PauseActivityExecution(ctx, request, opts...) + return err + } + err = c.redirector.Execute(ctx, shardID, op) + return response, err +} +func (c *ActivityServiceLayeredClient) PauseActivityExecution( + ctx context.Context, + request *PauseActivityExecutionRequest, + opts ...grpc.CallOption, +) (*PauseActivityExecutionResponse, error) { + call := func(ctx context.Context) (*PauseActivityExecutionResponse, error) { + return c.callPauseActivityExecutionNoRetry(ctx, request, opts...) + } + return backoff.ThrottleRetryContextWithReturn(ctx, call, c.retryPolicy, common.IsServiceClientTransientError) +} +func (c *ActivityServiceLayeredClient) callUnpauseActivityExecutionNoRetry( + ctx context.Context, + request *UnpauseActivityExecutionRequest, + opts ...grpc.CallOption, +) (*UnpauseActivityExecutionResponse, error) { + var response *UnpauseActivityExecutionResponse + var err error + startTime := time.Now().UTC() + // the caller is a namespace, hence the tag below. + caller := headers.GetCallerInfo(ctx).CallerName + metricsHandler := c.metricsHandler.WithTags( + metrics.OperationTag("ActivityService.UnpauseActivityExecution"), + metrics.NamespaceTag(caller), + metrics.ServiceRoleTag(metrics.HistoryRoleTagValue), + ) + metrics.ClientRequests.With(metricsHandler).Record(1) + defer func() { + if err != nil { + metrics.ClientFailures.With(metricsHandler).Record(1, metrics.ServiceErrorTypeTag(err)) + } + metrics.ClientLatency.With(metricsHandler).Record(time.Since(startTime)) + }() + businessID := request.GetFrontendRequest().GetWorkflowId() + if businessID == "" { + businessID = request.GetFrontendRequest().GetActivityId() + } + shardID := common.WorkflowIDToHistoryShard(request.GetNamespaceId(), businessID, c.numShards) + op := func(ctx context.Context, client ActivityServiceClient) error { + var err error + ctx, cancel := context.WithTimeout(ctx, history.DefaultTimeout) + defer cancel() + response, err = client.UnpauseActivityExecution(ctx, request, opts...) + return err + } + err = c.redirector.Execute(ctx, shardID, op) + return response, err +} +func (c *ActivityServiceLayeredClient) UnpauseActivityExecution( + ctx context.Context, + request *UnpauseActivityExecutionRequest, + opts ...grpc.CallOption, +) (*UnpauseActivityExecutionResponse, error) { + call := func(ctx context.Context) (*UnpauseActivityExecutionResponse, error) { + return c.callUnpauseActivityExecutionNoRetry(ctx, request, opts...) + } + return backoff.ThrottleRetryContextWithReturn(ctx, call, c.retryPolicy, common.IsServiceClientTransientError) +} +func (c *ActivityServiceLayeredClient) callResetActivityExecutionNoRetry( + ctx context.Context, + request *ResetActivityExecutionRequest, + opts ...grpc.CallOption, +) (*ResetActivityExecutionResponse, error) { + var response *ResetActivityExecutionResponse + var err error + startTime := time.Now().UTC() + // the caller is a namespace, hence the tag below. + caller := headers.GetCallerInfo(ctx).CallerName + metricsHandler := c.metricsHandler.WithTags( + metrics.OperationTag("ActivityService.ResetActivityExecution"), + metrics.NamespaceTag(caller), + metrics.ServiceRoleTag(metrics.HistoryRoleTagValue), + ) + metrics.ClientRequests.With(metricsHandler).Record(1) + defer func() { + if err != nil { + metrics.ClientFailures.With(metricsHandler).Record(1, metrics.ServiceErrorTypeTag(err)) + } + metrics.ClientLatency.With(metricsHandler).Record(time.Since(startTime)) + }() + businessID := request.GetFrontendRequest().GetWorkflowId() + if businessID == "" { + businessID = request.GetFrontendRequest().GetActivityId() + } + shardID := common.WorkflowIDToHistoryShard(request.GetNamespaceId(), businessID, c.numShards) + op := func(ctx context.Context, client ActivityServiceClient) error { + var err error + ctx, cancel := context.WithTimeout(ctx, history.DefaultTimeout) + defer cancel() + response, err = client.ResetActivityExecution(ctx, request, opts...) + return err + } + err = c.redirector.Execute(ctx, shardID, op) + return response, err +} +func (c *ActivityServiceLayeredClient) ResetActivityExecution( + ctx context.Context, + request *ResetActivityExecutionRequest, + opts ...grpc.CallOption, +) (*ResetActivityExecutionResponse, error) { + call := func(ctx context.Context) (*ResetActivityExecutionResponse, error) { + return c.callResetActivityExecutionNoRetry(ctx, request, opts...) + } + return backoff.ThrottleRetryContextWithReturn(ctx, call, c.retryPolicy, common.IsServiceClientTransientError) +} +func (c *ActivityServiceLayeredClient) callUpdateActivityExecutionOptionsNoRetry( + ctx context.Context, + request *UpdateActivityExecutionOptionsRequest, + opts ...grpc.CallOption, +) (*UpdateActivityExecutionOptionsResponse, error) { + var response *UpdateActivityExecutionOptionsResponse + var err error + startTime := time.Now().UTC() + // the caller is a namespace, hence the tag below. + caller := headers.GetCallerInfo(ctx).CallerName + metricsHandler := c.metricsHandler.WithTags( + metrics.OperationTag("ActivityService.UpdateActivityExecutionOptions"), + metrics.NamespaceTag(caller), + metrics.ServiceRoleTag(metrics.HistoryRoleTagValue), + ) + metrics.ClientRequests.With(metricsHandler).Record(1) + defer func() { + if err != nil { + metrics.ClientFailures.With(metricsHandler).Record(1, metrics.ServiceErrorTypeTag(err)) + } + metrics.ClientLatency.With(metricsHandler).Record(time.Since(startTime)) + }() + businessID := request.GetFrontendRequest().GetWorkflowId() + if businessID == "" { + businessID = request.GetFrontendRequest().GetActivityId() + } + shardID := common.WorkflowIDToHistoryShard(request.GetNamespaceId(), businessID, c.numShards) + op := func(ctx context.Context, client ActivityServiceClient) error { + var err error + ctx, cancel := context.WithTimeout(ctx, history.DefaultTimeout) + defer cancel() + response, err = client.UpdateActivityExecutionOptions(ctx, request, opts...) + return err + } + err = c.redirector.Execute(ctx, shardID, op) + return response, err +} +func (c *ActivityServiceLayeredClient) UpdateActivityExecutionOptions( + ctx context.Context, + request *UpdateActivityExecutionOptionsRequest, + opts ...grpc.CallOption, +) (*UpdateActivityExecutionOptionsResponse, error) { + call := func(ctx context.Context) (*UpdateActivityExecutionOptionsResponse, error) { + return c.callUpdateActivityExecutionOptionsNoRetry(ctx, request, opts...) + } + return backoff.ThrottleRetryContextWithReturn(ctx, call, c.retryPolicy, common.IsServiceClientTransientError) +} diff --git a/chasm/lib/activity/gen/activitypb/v1/service_grpc.pb.go b/chasm/lib/activity/gen/activitypb/v1/service_grpc.pb.go index f02184fbd40..7d7f0bafaba 100644 --- a/chasm/lib/activity/gen/activitypb/v1/service_grpc.pb.go +++ b/chasm/lib/activity/gen/activitypb/v1/service_grpc.pb.go @@ -26,6 +26,10 @@ const ( ActivityService_TerminateActivityExecution_FullMethodName = "/temporal.server.chasm.lib.activity.proto.v1.ActivityService/TerminateActivityExecution" ActivityService_RequestCancelActivityExecution_FullMethodName = "/temporal.server.chasm.lib.activity.proto.v1.ActivityService/RequestCancelActivityExecution" ActivityService_DeleteActivityExecution_FullMethodName = "/temporal.server.chasm.lib.activity.proto.v1.ActivityService/DeleteActivityExecution" + ActivityService_PauseActivityExecution_FullMethodName = "/temporal.server.chasm.lib.activity.proto.v1.ActivityService/PauseActivityExecution" + ActivityService_UnpauseActivityExecution_FullMethodName = "/temporal.server.chasm.lib.activity.proto.v1.ActivityService/UnpauseActivityExecution" + ActivityService_ResetActivityExecution_FullMethodName = "/temporal.server.chasm.lib.activity.proto.v1.ActivityService/ResetActivityExecution" + ActivityService_UpdateActivityExecutionOptions_FullMethodName = "/temporal.server.chasm.lib.activity.proto.v1.ActivityService/UpdateActivityExecutionOptions" ) // ActivityServiceClient is the client API for ActivityService service. @@ -38,6 +42,10 @@ type ActivityServiceClient interface { TerminateActivityExecution(ctx context.Context, in *TerminateActivityExecutionRequest, opts ...grpc.CallOption) (*TerminateActivityExecutionResponse, error) RequestCancelActivityExecution(ctx context.Context, in *RequestCancelActivityExecutionRequest, opts ...grpc.CallOption) (*RequestCancelActivityExecutionResponse, error) DeleteActivityExecution(ctx context.Context, in *DeleteActivityExecutionRequest, opts ...grpc.CallOption) (*DeleteActivityExecutionResponse, error) + PauseActivityExecution(ctx context.Context, in *PauseActivityExecutionRequest, opts ...grpc.CallOption) (*PauseActivityExecutionResponse, error) + UnpauseActivityExecution(ctx context.Context, in *UnpauseActivityExecutionRequest, opts ...grpc.CallOption) (*UnpauseActivityExecutionResponse, error) + ResetActivityExecution(ctx context.Context, in *ResetActivityExecutionRequest, opts ...grpc.CallOption) (*ResetActivityExecutionResponse, error) + UpdateActivityExecutionOptions(ctx context.Context, in *UpdateActivityExecutionOptionsRequest, opts ...grpc.CallOption) (*UpdateActivityExecutionOptionsResponse, error) } type activityServiceClient struct { @@ -102,6 +110,42 @@ func (c *activityServiceClient) DeleteActivityExecution(ctx context.Context, in return out, nil } +func (c *activityServiceClient) PauseActivityExecution(ctx context.Context, in *PauseActivityExecutionRequest, opts ...grpc.CallOption) (*PauseActivityExecutionResponse, error) { + out := new(PauseActivityExecutionResponse) + err := c.cc.Invoke(ctx, ActivityService_PauseActivityExecution_FullMethodName, in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *activityServiceClient) UnpauseActivityExecution(ctx context.Context, in *UnpauseActivityExecutionRequest, opts ...grpc.CallOption) (*UnpauseActivityExecutionResponse, error) { + out := new(UnpauseActivityExecutionResponse) + err := c.cc.Invoke(ctx, ActivityService_UnpauseActivityExecution_FullMethodName, in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *activityServiceClient) ResetActivityExecution(ctx context.Context, in *ResetActivityExecutionRequest, opts ...grpc.CallOption) (*ResetActivityExecutionResponse, error) { + out := new(ResetActivityExecutionResponse) + err := c.cc.Invoke(ctx, ActivityService_ResetActivityExecution_FullMethodName, in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *activityServiceClient) UpdateActivityExecutionOptions(ctx context.Context, in *UpdateActivityExecutionOptionsRequest, opts ...grpc.CallOption) (*UpdateActivityExecutionOptionsResponse, error) { + out := new(UpdateActivityExecutionOptionsResponse) + err := c.cc.Invoke(ctx, ActivityService_UpdateActivityExecutionOptions_FullMethodName, in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + // ActivityServiceServer is the server API for ActivityService service. // All implementations must embed UnimplementedActivityServiceServer // for forward compatibility @@ -112,6 +156,10 @@ type ActivityServiceServer interface { TerminateActivityExecution(context.Context, *TerminateActivityExecutionRequest) (*TerminateActivityExecutionResponse, error) RequestCancelActivityExecution(context.Context, *RequestCancelActivityExecutionRequest) (*RequestCancelActivityExecutionResponse, error) DeleteActivityExecution(context.Context, *DeleteActivityExecutionRequest) (*DeleteActivityExecutionResponse, error) + PauseActivityExecution(context.Context, *PauseActivityExecutionRequest) (*PauseActivityExecutionResponse, error) + UnpauseActivityExecution(context.Context, *UnpauseActivityExecutionRequest) (*UnpauseActivityExecutionResponse, error) + ResetActivityExecution(context.Context, *ResetActivityExecutionRequest) (*ResetActivityExecutionResponse, error) + UpdateActivityExecutionOptions(context.Context, *UpdateActivityExecutionOptionsRequest) (*UpdateActivityExecutionOptionsResponse, error) mustEmbedUnimplementedActivityServiceServer() } @@ -137,6 +185,18 @@ func (UnimplementedActivityServiceServer) RequestCancelActivityExecution(context func (UnimplementedActivityServiceServer) DeleteActivityExecution(context.Context, *DeleteActivityExecutionRequest) (*DeleteActivityExecutionResponse, error) { return nil, status.Errorf(codes.Unimplemented, "method DeleteActivityExecution not implemented") } +func (UnimplementedActivityServiceServer) PauseActivityExecution(context.Context, *PauseActivityExecutionRequest) (*PauseActivityExecutionResponse, error) { + return nil, status.Errorf(codes.Unimplemented, "method PauseActivityExecution not implemented") +} +func (UnimplementedActivityServiceServer) UnpauseActivityExecution(context.Context, *UnpauseActivityExecutionRequest) (*UnpauseActivityExecutionResponse, error) { + return nil, status.Errorf(codes.Unimplemented, "method UnpauseActivityExecution not implemented") +} +func (UnimplementedActivityServiceServer) ResetActivityExecution(context.Context, *ResetActivityExecutionRequest) (*ResetActivityExecutionResponse, error) { + return nil, status.Errorf(codes.Unimplemented, "method ResetActivityExecution not implemented") +} +func (UnimplementedActivityServiceServer) UpdateActivityExecutionOptions(context.Context, *UpdateActivityExecutionOptionsRequest) (*UpdateActivityExecutionOptionsResponse, error) { + return nil, status.Errorf(codes.Unimplemented, "method UpdateActivityExecutionOptions not implemented") +} func (UnimplementedActivityServiceServer) mustEmbedUnimplementedActivityServiceServer() {} // UnsafeActivityServiceServer may be embedded to opt out of forward compatibility for this service. @@ -258,6 +318,78 @@ func _ActivityService_DeleteActivityExecution_Handler(srv interface{}, ctx conte return interceptor(ctx, in, info, handler) } +func _ActivityService_PauseActivityExecution_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(PauseActivityExecutionRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(ActivityServiceServer).PauseActivityExecution(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: ActivityService_PauseActivityExecution_FullMethodName, + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(ActivityServiceServer).PauseActivityExecution(ctx, req.(*PauseActivityExecutionRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _ActivityService_UnpauseActivityExecution_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(UnpauseActivityExecutionRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(ActivityServiceServer).UnpauseActivityExecution(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: ActivityService_UnpauseActivityExecution_FullMethodName, + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(ActivityServiceServer).UnpauseActivityExecution(ctx, req.(*UnpauseActivityExecutionRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _ActivityService_ResetActivityExecution_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(ResetActivityExecutionRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(ActivityServiceServer).ResetActivityExecution(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: ActivityService_ResetActivityExecution_FullMethodName, + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(ActivityServiceServer).ResetActivityExecution(ctx, req.(*ResetActivityExecutionRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _ActivityService_UpdateActivityExecutionOptions_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(UpdateActivityExecutionOptionsRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(ActivityServiceServer).UpdateActivityExecutionOptions(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: ActivityService_UpdateActivityExecutionOptions_FullMethodName, + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(ActivityServiceServer).UpdateActivityExecutionOptions(ctx, req.(*UpdateActivityExecutionOptionsRequest)) + } + return interceptor(ctx, in, info, handler) +} + // ActivityService_ServiceDesc is the grpc.ServiceDesc for ActivityService service. // It's only intended for direct use with grpc.RegisterService, // and not to be introspected or modified (even as a copy) @@ -289,6 +421,22 @@ var ActivityService_ServiceDesc = grpc.ServiceDesc{ MethodName: "DeleteActivityExecution", Handler: _ActivityService_DeleteActivityExecution_Handler, }, + { + MethodName: "PauseActivityExecution", + Handler: _ActivityService_PauseActivityExecution_Handler, + }, + { + MethodName: "UnpauseActivityExecution", + Handler: _ActivityService_UnpauseActivityExecution_Handler, + }, + { + MethodName: "ResetActivityExecution", + Handler: _ActivityService_ResetActivityExecution_Handler, + }, + { + MethodName: "UpdateActivityExecutionOptions", + Handler: _ActivityService_UpdateActivityExecutionOptions_Handler, + }, }, Streams: []grpc.StreamDesc{}, Metadata: "temporal/server/chasm/lib/activity/proto/v1/service.proto", diff --git a/chasm/lib/activity/handler.go b/chasm/lib/activity/handler.go index 378776ebca4..0bb927d16b5 100644 --- a/chasm/lib/activity/handler.go +++ b/chasm/lib/activity/handler.go @@ -8,6 +8,7 @@ import ( enumspb "go.temporal.io/api/enums/v1" "go.temporal.io/api/serviceerror" "go.temporal.io/api/workflowservice/v1" + "go.temporal.io/server/api/historyservice/v1" "go.temporal.io/server/chasm" "go.temporal.io/server/chasm/lib/activity/gen/activitypb/v1" "go.temporal.io/server/common/contextutil" @@ -32,14 +33,16 @@ var ( type handler struct { activitypb.UnimplementedActivityServiceServer config *Config + historyHandler historyservice.HistoryServiceServer logger log.Logger metricsHandler metrics.Handler namespaceRegistry namespace.Registry } -func newHandler(config *Config, metricsHandler metrics.Handler, logger log.Logger, namespaceRegistry namespace.Registry) *handler { +func newHandler(config *Config, historyHandler historyservice.HistoryServiceServer, metricsHandler metrics.Handler, logger log.Logger, namespaceRegistry namespace.Registry) *handler { return &handler{ config: config, + historyHandler: historyHandler, logger: logger, metricsHandler: metricsHandler, namespaceRegistry: namespaceRegistry, @@ -332,3 +335,110 @@ func (h *handler) RequestCancelActivityExecution( return response, nil } + +func (h *handler) PauseActivityExecution(ctx context.Context, req *activitypb.PauseActivityExecutionRequest) (*activitypb.PauseActivityExecutionResponse, error) { + frontendReq := req.GetFrontendRequest() + if frontendReq.GetWorkflowId() != "" { + _, err := h.historyHandler.PauseActivity(ctx, &historyservice.PauseActivityRequest{ + NamespaceId: req.GetNamespaceId(), + FrontendRequest: &workflowservice.PauseActivityRequest{ + Namespace: frontendReq.GetNamespace(), + Execution: &commonpb.WorkflowExecution{ + WorkflowId: frontendReq.GetWorkflowId(), + RunId: frontendReq.GetRunId(), + }, + Activity: &workflowservice.PauseActivityRequest_Id{Id: frontendReq.GetActivityId()}, + Reason: frontendReq.GetReason(), + Identity: frontendReq.GetIdentity(), + }, + }) + if err != nil { + return nil, err + } + return &activitypb.PauseActivityExecutionResponse{}, nil + } + return nil, serviceerror.NewUnimplemented("PauseActivityExecution for standalone activities is not yet implemented") +} + +func (h *handler) UnpauseActivityExecution(ctx context.Context, req *activitypb.UnpauseActivityExecutionRequest) (*activitypb.UnpauseActivityExecutionResponse, error) { + frontendReq := req.GetFrontendRequest() + if frontendReq.GetWorkflowId() != "" { + _, err := h.historyHandler.UnpauseActivity(ctx, &historyservice.UnpauseActivityRequest{ + NamespaceId: req.GetNamespaceId(), + FrontendRequest: &workflowservice.UnpauseActivityRequest{ + Namespace: frontendReq.GetNamespace(), + Execution: &commonpb.WorkflowExecution{ + WorkflowId: frontendReq.GetWorkflowId(), + RunId: frontendReq.GetRunId(), + }, + Activity: &workflowservice.UnpauseActivityRequest_Id{Id: frontendReq.GetActivityId()}, + Jitter: frontendReq.GetJitter(), + ResetAttempts: frontendReq.GetResetAttempts(), + ResetHeartbeat: frontendReq.GetResetHeartbeat(), + Identity: frontendReq.GetIdentity(), + }, + }) + if err != nil { + return nil, err + } + return &activitypb.UnpauseActivityExecutionResponse{}, nil + } + return nil, serviceerror.NewUnimplemented("UnpauseActivityExecution for standalone activities is not yet implemented") +} + +func (h *handler) ResetActivityExecution(ctx context.Context, req *activitypb.ResetActivityExecutionRequest) (*activitypb.ResetActivityExecutionResponse, error) { + frontendReq := req.GetFrontendRequest() + if frontendReq.GetWorkflowId() != "" { + _, err := h.historyHandler.ResetActivity(ctx, &historyservice.ResetActivityRequest{ + NamespaceId: req.GetNamespaceId(), + FrontendRequest: &workflowservice.ResetActivityRequest{ + Namespace: frontendReq.GetNamespace(), + Execution: &commonpb.WorkflowExecution{ + WorkflowId: frontendReq.GetWorkflowId(), + RunId: frontendReq.GetRunId(), + }, + Activity: &workflowservice.ResetActivityRequest_Id{Id: frontendReq.GetActivityId()}, + ResetHeartbeat: frontendReq.GetResetHeartbeat(), + RestoreOriginalOptions: frontendReq.GetRestoreOriginalOptions(), + KeepPaused: frontendReq.GetKeepPaused(), + Jitter: frontendReq.GetJitter(), + Identity: frontendReq.GetIdentity(), + }, + }) + if err != nil { + return nil, err + } + return &activitypb.ResetActivityExecutionResponse{}, nil + } + return nil, serviceerror.NewUnimplemented("ResetActivityExecution for standalone activities is not yet implemented") +} + +func (h *handler) UpdateActivityExecutionOptions(ctx context.Context, req *activitypb.UpdateActivityExecutionOptionsRequest) (*activitypb.UpdateActivityExecutionOptionsResponse, error) { + frontendReq := req.GetFrontendRequest() + if frontendReq.GetWorkflowId() != "" { + resp, err := h.historyHandler.UpdateActivityOptions(ctx, &historyservice.UpdateActivityOptionsRequest{ + NamespaceId: req.GetNamespaceId(), + UpdateRequest: &workflowservice.UpdateActivityOptionsRequest{ + Namespace: frontendReq.GetNamespace(), + Execution: &commonpb.WorkflowExecution{ + WorkflowId: frontendReq.GetWorkflowId(), + RunId: frontendReq.GetRunId(), + }, + Activity: &workflowservice.UpdateActivityOptionsRequest_Id{Id: frontendReq.GetActivityId()}, + ActivityOptions: frontendReq.GetActivityOptions(), + UpdateMask: frontendReq.GetUpdateMask(), + RestoreOriginal: frontendReq.GetRestoreOriginal(), + Identity: frontendReq.GetIdentity(), + }, + }) + if err != nil { + return nil, err + } + return &activitypb.UpdateActivityExecutionOptionsResponse{ + FrontendResponse: &workflowservice.UpdateActivityExecutionOptionsResponse{ + ActivityOptions: resp.GetActivityOptions(), + }, + }, nil + } + return nil, serviceerror.NewUnimplemented("UpdateActivityExecutionOptions for standalone activities is not yet implemented") +} diff --git a/chasm/lib/activity/proto/v1/request_response.proto b/chasm/lib/activity/proto/v1/request_response.proto index 918c6f4de31..d4713c3d4c2 100644 --- a/chasm/lib/activity/proto/v1/request_response.proto +++ b/chasm/lib/activity/proto/v1/request_response.proto @@ -2,60 +2,100 @@ syntax = "proto3"; package temporal.server.chasm.lib.activity.proto.v1; -import "temporal/api/workflowservice/v1/request_response.proto"; - option go_package = "go.temporal.io/server/chasm/lib/activity/gen/activitypb;activitypb"; +import "temporal/api/workflowservice/v1/request_response.proto"; + message StartActivityExecutionRequest { - string namespace_id = 1; + string namespace_id = 1; - temporal.api.workflowservice.v1.StartActivityExecutionRequest frontend_request = 2; + temporal.api.workflowservice.v1.StartActivityExecutionRequest frontend_request = 2; } message StartActivityExecutionResponse { - temporal.api.workflowservice.v1.StartActivityExecutionResponse frontend_response = 1; + temporal.api.workflowservice.v1.StartActivityExecutionResponse frontend_response = 1; } message DescribeActivityExecutionRequest { - string namespace_id = 1; + string namespace_id = 1; - temporal.api.workflowservice.v1.DescribeActivityExecutionRequest frontend_request = 2; + temporal.api.workflowservice.v1.DescribeActivityExecutionRequest frontend_request = 2; } message DescribeActivityExecutionResponse { - temporal.api.workflowservice.v1.DescribeActivityExecutionResponse frontend_response = 1; + temporal.api.workflowservice.v1.DescribeActivityExecutionResponse frontend_response = 1; } message PollActivityExecutionRequest { - string namespace_id = 1; + string namespace_id = 1; - temporal.api.workflowservice.v1.PollActivityExecutionRequest frontend_request = 2; + temporal.api.workflowservice.v1.PollActivityExecutionRequest frontend_request = 2; } message PollActivityExecutionResponse { - temporal.api.workflowservice.v1.PollActivityExecutionResponse frontend_response = 1; + temporal.api.workflowservice.v1.PollActivityExecutionResponse frontend_response = 1; } message TerminateActivityExecutionRequest { - string namespace_id = 1; + string namespace_id = 1; - temporal.api.workflowservice.v1.TerminateActivityExecutionRequest frontend_request = 2; + temporal.api.workflowservice.v1.TerminateActivityExecutionRequest frontend_request = 2; } -message TerminateActivityExecutionResponse {} +message TerminateActivityExecutionResponse { +} message RequestCancelActivityExecutionRequest { - string namespace_id = 1; + string namespace_id = 1; - temporal.api.workflowservice.v1.RequestCancelActivityExecutionRequest frontend_request = 2; + temporal.api.workflowservice.v1.RequestCancelActivityExecutionRequest frontend_request = 2; } -message RequestCancelActivityExecutionResponse {} +message RequestCancelActivityExecutionResponse { +} message DeleteActivityExecutionRequest { - string namespace_id = 1; + string namespace_id = 1; + + temporal.api.workflowservice.v1.DeleteActivityExecutionRequest frontend_request = 2; +} + +message DeleteActivityExecutionResponse { +} + +message PauseActivityExecutionRequest { + string namespace_id = 1; - temporal.api.workflowservice.v1.DeleteActivityExecutionRequest frontend_request = 2; + temporal.api.workflowservice.v1.PauseActivityExecutionRequest frontend_request = 2; } -message DeleteActivityExecutionResponse {} +message PauseActivityExecutionResponse { +} + +message UnpauseActivityExecutionRequest { + string namespace_id = 1; + + temporal.api.workflowservice.v1.UnpauseActivityExecutionRequest frontend_request = 2; +} + +message UnpauseActivityExecutionResponse { +} + +message ResetActivityExecutionRequest { + string namespace_id = 1; + + temporal.api.workflowservice.v1.ResetActivityExecutionRequest frontend_request = 2; +} + +message ResetActivityExecutionResponse { +} + +message UpdateActivityExecutionOptionsRequest { + string namespace_id = 1; + + temporal.api.workflowservice.v1.UpdateActivityExecutionOptionsRequest frontend_request = 2; +} + +message UpdateActivityExecutionOptionsResponse { + temporal.api.workflowservice.v1.UpdateActivityExecutionOptionsResponse frontend_response = 1; +} diff --git a/chasm/lib/activity/proto/v1/service.proto b/chasm/lib/activity/proto/v1/service.proto index 69810bee55c..e900ee9a8f1 100644 --- a/chasm/lib/activity/proto/v1/service.proto +++ b/chasm/lib/activity/proto/v1/service.proto @@ -2,40 +2,72 @@ syntax = "proto3"; package temporal.server.chasm.lib.activity.proto.v1; +option go_package = "go.temporal.io/server/chasm/lib/activity/gen/activitypb;activitypb"; + import "chasm/lib/activity/proto/v1/request_response.proto"; -import "temporal/server/api/common/v1/api_category.proto"; import "temporal/server/api/routing/v1/extension.proto"; - -option go_package = "go.temporal.io/server/chasm/lib/activity/gen/activitypb;activitypb"; +import "temporal/server/api/common/v1/api_category.proto"; service ActivityService { - rpc StartActivityExecution(StartActivityExecutionRequest) returns (StartActivityExecutionResponse) { - option (temporal.server.api.routing.v1.routing).business_id = "frontend_request.activity_id"; - option (temporal.server.api.common.v1.api_category).category = API_CATEGORY_STANDARD; - } - - rpc DescribeActivityExecution(DescribeActivityExecutionRequest) returns (DescribeActivityExecutionResponse) { - option (temporal.server.api.routing.v1.routing).business_id = "frontend_request.activity_id"; - option (temporal.server.api.common.v1.api_category).category = API_CATEGORY_STANDARD; - } - - rpc PollActivityExecution(PollActivityExecutionRequest) returns (PollActivityExecutionResponse) { - option (temporal.server.api.routing.v1.routing).business_id = "frontend_request.activity_id"; - option (temporal.server.api.common.v1.api_category).category = API_CATEGORY_LONG_POLL; - } - - rpc TerminateActivityExecution(TerminateActivityExecutionRequest) returns (TerminateActivityExecutionResponse) { - option (temporal.server.api.routing.v1.routing).business_id = "frontend_request.activity_id"; - option (temporal.server.api.common.v1.api_category).category = API_CATEGORY_STANDARD; - } - - rpc RequestCancelActivityExecution(RequestCancelActivityExecutionRequest) returns (RequestCancelActivityExecutionResponse) { - option (temporal.server.api.routing.v1.routing).business_id = "frontend_request.activity_id"; - option (temporal.server.api.common.v1.api_category).category = API_CATEGORY_STANDARD; - } - - rpc DeleteActivityExecution(DeleteActivityExecutionRequest) returns (DeleteActivityExecutionResponse) { - option (temporal.server.api.routing.v1.routing).business_id = "frontend_request.activity_id"; - option (temporal.server.api.common.v1.api_category).category = API_CATEGORY_STANDARD; - } + rpc StartActivityExecution(StartActivityExecutionRequest) returns (StartActivityExecutionResponse) { + option (temporal.server.api.routing.v1.routing).business_id = "frontend_request.activity_id"; + option (temporal.server.api.common.v1.api_category).category = API_CATEGORY_STANDARD; + } + + rpc DescribeActivityExecution(DescribeActivityExecutionRequest) returns (DescribeActivityExecutionResponse) { + option (temporal.server.api.routing.v1.routing).business_id = "frontend_request.activity_id"; + option (temporal.server.api.common.v1.api_category).category = API_CATEGORY_STANDARD; + } + + rpc PollActivityExecution(PollActivityExecutionRequest) returns (PollActivityExecutionResponse) { + option (temporal.server.api.routing.v1.routing).business_id = "frontend_request.activity_id"; + option (temporal.server.api.common.v1.api_category).category = API_CATEGORY_LONG_POLL; + } + + rpc TerminateActivityExecution(TerminateActivityExecutionRequest) returns (TerminateActivityExecutionResponse) { + option (temporal.server.api.routing.v1.routing).business_id = "frontend_request.activity_id"; + option (temporal.server.api.common.v1.api_category).category = API_CATEGORY_STANDARD; + } + + rpc RequestCancelActivityExecution(RequestCancelActivityExecutionRequest) returns (RequestCancelActivityExecutionResponse) { + option (temporal.server.api.routing.v1.routing).business_id = "frontend_request.activity_id"; + option (temporal.server.api.common.v1.api_category).category = API_CATEGORY_STANDARD; + } + + rpc DeleteActivityExecution(DeleteActivityExecutionRequest) returns (DeleteActivityExecutionResponse) { + option (temporal.server.api.routing.v1.routing).business_id = "frontend_request.activity_id"; + option (temporal.server.api.common.v1.api_category).category = API_CATEGORY_STANDARD; + } + + rpc PauseActivityExecution(PauseActivityExecutionRequest) returns (PauseActivityExecutionResponse) { + option (temporal.server.api.routing.v1.routing) = { + business_id: "frontend_request.workflow_id" + business_id: "frontend_request.activity_id" + }; + option (temporal.server.api.common.v1.api_category).category = API_CATEGORY_STANDARD; + } + + rpc UnpauseActivityExecution(UnpauseActivityExecutionRequest) returns (UnpauseActivityExecutionResponse) { + option (temporal.server.api.routing.v1.routing) = { + business_id: "frontend_request.workflow_id" + business_id: "frontend_request.activity_id" + }; + option (temporal.server.api.common.v1.api_category).category = API_CATEGORY_STANDARD; + } + + rpc ResetActivityExecution(ResetActivityExecutionRequest) returns (ResetActivityExecutionResponse) { + option (temporal.server.api.routing.v1.routing) = { + business_id: "frontend_request.workflow_id" + business_id: "frontend_request.activity_id" + }; + option (temporal.server.api.common.v1.api_category).category = API_CATEGORY_STANDARD; + } + + rpc UpdateActivityExecutionOptions(UpdateActivityExecutionOptionsRequest) returns (UpdateActivityExecutionOptionsResponse) { + option (temporal.server.api.routing.v1.routing) = { + business_id: "frontend_request.workflow_id" + business_id: "frontend_request.activity_id" + }; + option (temporal.server.api.common.v1.api_category).category = API_CATEGORY_STANDARD; + } } diff --git a/client/history/historytest/clienttest.go b/client/history/historytest/clienttest.go index de9cb8990a3..5bad95746d4 100644 --- a/client/history/historytest/clienttest.go +++ b/client/history/historytest/clienttest.go @@ -25,6 +25,7 @@ import ( "go.temporal.io/server/common/testing/nettest" historyserver "go.temporal.io/server/service/history" "go.temporal.io/server/service/history/tasks" + "go.uber.org/fx/fxtest" "go.uber.org/mock/gomock" "google.golang.org/grpc" ) @@ -51,7 +52,7 @@ func TestClient(t *testing.T, historyTaskQueueManager persistence.HistoryTaskQue listener := nettest.NewListener(nettest.NewPipe()) serveErrs := make(chan error, 1) - grpcServer := createServer(historyTaskQueueManager) + grpcServer := createServer(t, historyTaskQueueManager) go func() { serveErrs <- grpcServer.Serve(listener) }() @@ -134,14 +135,14 @@ func readTasks( } } -func createServer(historyTaskQueueManager persistence.HistoryTaskQueueManager) *grpc.Server { +func createServer(t *testing.T, historyTaskQueueManager persistence.HistoryTaskQueueManager) *grpc.Server { // TODO: find a better way to create a history handler historyHandler, err := historyserver.HandlerProvider(historyserver.NewHandlerArgs{ TaskQueueManager: historyTaskQueueManager, TracerProvider: fakeTracerProvider{}, TaskCategoryRegistry: tasks.NewDefaultTaskCategoryRegistry(), ChasmRegistry: chasm.NewRegistry(log.NewNoopLogger()), - }) + }, fxtest.NewLifecycle(t)) if err != nil { panic(err) // nolint:forbidigo // Panic is acceptable in test setup code. } diff --git a/cmd/tools/protoc-gen-go-chasm/main.go b/cmd/tools/protoc-gen-go-chasm/main.go index feb90ba1b2e..95f6bd2cd6d 100644 --- a/cmd/tools/protoc-gen-go-chasm/main.go +++ b/cmd/tools/protoc-gen-go-chasm/main.go @@ -121,18 +121,15 @@ func genAssignShard(m *protogen.Method) (string, error) { if opts == nil { return "", fmt.Errorf("no routing directive specified on %s", m.Desc.FullName()) } - if opts.Random && (opts.NamespaceId != "" || opts.BusinessId != "") { + if opts.Random && (opts.NamespaceId != "" || len(opts.BusinessId) != 0) { return "", fmt.Errorf("random directive cannot be combined with namespace_id or business_id on %s", m.Desc.FullName()) } if opts.Random { return "shardID := int32(rand.Intn(int(c.numShards)) + 1)", nil } - if opts.BusinessId == "" { + if len(opts.BusinessId) == 0 { return "", fmt.Errorf("business_id directive empty on %s", m.Desc.FullName()) } - if opts.Random { - return "", fmt.Errorf("random directive cannot be combined with namespace_id or business_id on %s", m.Desc.FullName()) - } namespaceIDField := opts.NamespaceId if namespaceIDField == "" { @@ -143,12 +140,28 @@ func genAssignShard(m *protogen.Method) (string, error) { if err != nil { return "", fmt.Errorf("unable to resolve namespace_id field path %q: %w", namespaceIDField, err) } - businessIDFieldGetter, err := goFieldPath(m, opts.BusinessId) + + primaryFieldGetter, err := goFieldPath(m, opts.BusinessId[0]) if err != nil { - return "", fmt.Errorf("unable to resolve business_id field path %q: %w", opts.BusinessId, err) + return "", fmt.Errorf("unable to resolve business_id field path %q: %w", opts.BusinessId[0], err) } - return fmt.Sprintf("shardID := common.WorkflowIDToHistoryShard(request%s, request%s, c.numShards)", namespaceIDFieldGetter, businessIDFieldGetter), nil + if len(opts.BusinessId) == 1 { + return fmt.Sprintf("shardID := common.WorkflowIDToHistoryShard(request%s, request%s, c.numShards)", namespaceIDFieldGetter, primaryFieldGetter), nil + } + + // Multiple business_id fields: use the first non-empty value as the routing key. + var sb strings.Builder + fmt.Fprintf(&sb, "businessID := request%s\n", primaryFieldGetter) + for _, bid := range opts.BusinessId[1:] { + fallbackGetter, err := goFieldPath(m, bid) + if err != nil { + return "", fmt.Errorf("unable to resolve business_id field path %q: %w", bid, err) + } + fmt.Fprintf(&sb, "if businessID == \"\" { businessID = request%s }\n", fallbackGetter) + } + fmt.Fprintf(&sb, "shardID := common.WorkflowIDToHistoryShard(request%s, businessID, c.numShards)", namespaceIDFieldGetter) + return sb.String(), nil } func goFieldPath(m *protogen.Method, path string) (string, error) { diff --git a/common/api/metadata.go b/common/api/metadata.go index bb584d38000..24c0eacd961 100644 --- a/common/api/metadata.go +++ b/common/api/metadata.go @@ -163,6 +163,7 @@ var ( "GetDeploymentReachability": {Scope: ScopeNamespace, Access: AccessReadOnly, Polling: PollingNone}, // [cleanup-wv-pre-release] "GetCurrentDeployment": {Scope: ScopeNamespace, Access: AccessReadOnly, Polling: PollingNone}, // [cleanup-wv-pre-release] "SetCurrentDeployment": {Scope: ScopeNamespace, Access: AccessWrite, Polling: PollingNone}, // [cleanup-wv-pre-release] + "SetCurrentDeploymentVersion": {Scope: ScopeNamespace, Access: AccessWrite, Polling: PollingNone}, // [cleanup-wv-pre-release] "DescribeWorkerDeploymentVersion": {Scope: ScopeNamespace, Access: AccessReadOnly, Polling: PollingNone}, "DescribeWorkerDeployment": {Scope: ScopeNamespace, Access: AccessReadOnly, Polling: PollingNone}, "SetWorkerDeploymentCurrentVersion": {Scope: ScopeNamespace, Access: AccessWrite, Polling: PollingNone}, diff --git a/proto/internal/buf.yaml b/proto/internal/buf.yaml index 0625f0cffae..33360104a05 100644 --- a/proto/internal/buf.yaml +++ b/proto/internal/buf.yaml @@ -9,9 +9,14 @@ deps: breaking: use: - WIRE - # Uncomment this to temporarily ignore specific files or directories: - # ignore: + # Uncomment this to temporarily ignore specific files or directories: + ignore: # example: - temporal/server/api/.../message.proto + - temporal/server/api/persistence/v1/chasm.proto + - temporal/server/api/token/v1/message.proto + # TODO: Remove once this is stable. business_id was intentionally changed from + # optional to repeated to support multiple fallback routing fields. + - temporal/server/api/routing/v1/extension.proto lint: use: - DEFAULT diff --git a/proto/internal/temporal/server/api/routing/v1/extension.proto b/proto/internal/temporal/server/api/routing/v1/extension.proto index cb12f5e5222..8a3bcd9c460 100644 --- a/proto/internal/temporal/server/api/routing/v1/extension.proto +++ b/proto/internal/temporal/server/api/routing/v1/extension.proto @@ -2,19 +2,18 @@ syntax = "proto3"; package temporal.server.api.routing.v1; -import "google/protobuf/descriptor.proto"; - option go_package = "go.temporal.io/server/api/routing/v1;routing"; -extend google.protobuf.MethodOptions { - optional RoutingOptions routing = 50234; -} +import "google/protobuf/descriptor.proto"; + +extend google.protobuf.MethodOptions { optional RoutingOptions routing = 7234; } message RoutingOptions { - // Requests will be routed to a random shard. - bool random = 1; - // Requests may specify how to obtain the namespace ID. Defaults to the "namespace_id" field. - string namespace_id = 2; - // Request will be routed by resolving the namespace ID and business ID to a given shard. - string business_id = 3; + // Requests will be routed to a random shard. + bool random = 1; + // Requests may specify how to obtain the namespace ID. Defaults to the "namespace_id" field. + string namespace_id = 2; + // Requests will be routed by resolving the namespace ID and business ID to a given shard. + // If multiple fields are specified, the first non-empty value is used. + repeated string business_id = 3; } diff --git a/service/history/fx.go b/service/history/fx.go index 313f29fd45d..cdcc0959ff6 100644 --- a/service/history/fx.go +++ b/service/history/fx.go @@ -89,6 +89,7 @@ var Module = fx.Options( fx.Provide(EventNotifierProvider), fx.Provide(HistoryEngineFactoryProvider), fx.Provide(HandlerProvider), + fx.Provide(HistoryServiceServerProvider), fx.Provide(ServerProvider), fx.Provide(NewService), fx.Provide(ReplicationProgressCacheProvider), @@ -115,13 +116,7 @@ func ServiceResolverProvider( return membershipMonitor.GetResolver(primitives.HistoryService) } -func HandlerProvider(args NewHandlerArgs) (*Handler, error) { - // Build and store the Nexus handler - nexusHandler, err := buildNexusHandler(args.ChasmRegistry) - if err != nil { - return nil, err - } - +func HandlerProvider(args NewHandlerArgs, lc fx.Lifecycle) (*Handler, error) { handler := &Handler{ status: common.DaemonStatusInitialized, config: args.Config, @@ -161,12 +156,39 @@ func HandlerProvider(args NewHandlerArgs) (*Handler, error) { replicationTaskConverterProvider: args.ReplicationTaskConverterFactory, streamReceiverMonitor: args.StreamReceiverMonitor, replicationServerRateLimiter: args.ReplicationServerRateLimiter, - nexusHandler: nexusHandler, } + // Build the Nexus handler in OnStart rather than here so that it runs after all + // fx.Invoke functions have completed. If we built it eagerly, the dependency chain + // + // activity.HistoryModule (fx.Invoke) + // → *library → *handler → historyservice.HistoryServiceServer + // → HistoryServiceServerProvider → HandlerProvider (this function) + // + // would force HandlerProvider to run before modules like chasmtests.Module have had + // a chance to register their nexus services via their own fx.Invoke calls. As a + // result, buildNexusHandler would snapshot an empty registry and h.nexusHandler + // would remain nil, causing all StartNexusOperation calls to the system endpoint to + // return "no nexus services registered". OnStart hooks run after ALL invokes are + // done, so the registry is fully populated by the time we call buildNexusHandler. + lc.Append(fx.Hook{ + OnStart: func(_ context.Context) error { + h, err := buildNexusHandler(args.ChasmRegistry) + if err != nil { + return err + } + handler.nexusHandler = h + return nil + }, + }) + return handler, nil } +func HistoryServiceServerProvider(handler *Handler) historyservice.HistoryServiceServer { + return handler +} + func buildNexusHandler(chasmRegistry *chasm.Registry) (nexus.Handler, error) { nexusServices := chasmRegistry.NexusServices() if len(nexusServices) == 0 { diff --git a/tests/activity_api_pause_test.go b/tests/activity_api_pause_test.go index 63b6fabbb63..be58a962db1 100644 --- a/tests/activity_api_pause_test.go +++ b/tests/activity_api_pause_test.go @@ -15,628 +15,593 @@ import ( sdkclient "go.temporal.io/sdk/client" "go.temporal.io/sdk/temporal" "go.temporal.io/sdk/workflow" - "go.temporal.io/server/common/testing/parallelsuite" "go.temporal.io/server/common/util" "go.temporal.io/server/tests/testcore" ) -type ActivityAPIPauseClientTestSuite struct { - parallelsuite.Suite[*ActivityAPIPauseClientTestSuite] +// activityPauseAPI groups pause/unpause adapters so the same test body can run +// against both the legacy PauseActivity/UnpauseActivity API and the newer +// PauseActivityExecution/UnpauseActivityExecution API. +type activityPauseAPI struct { + name string + pause func(ctx context.Context, s *testcore.TestEnv, wfID, actID, identity, reason string) error + unpause func(ctx context.Context, s *testcore.TestEnv, wfID, actID, identity string, resetAttempts bool) error } -func TestActivityAPIPauseClientTestSuite(t *testing.T) { - parallelsuite.Run(t, &ActivityAPIPauseClientTestSuite{}) -} - -func (s *ActivityAPIPauseClientTestSuite) TestActivityPauseApi_WhileRunning() { - env := testcore.NewEnv(s.T(), testcore.WithSdkWorker()) - - initialRetryInterval := 1 * time.Second - scheduleToCloseTimeout := 30 * time.Minute - startToCloseTimeout := 15 * time.Minute - activityRetryPolicy := &temporal.RetryPolicy{ - InitialInterval: initialRetryInterval, - BackoffCoefficient: 1, - } - makeWorkflowFunc := func(activityFunction ActivityFunctions) WorkflowFunction { - return func(ctx workflow.Context) error { - var ret string - err := workflow.ExecuteActivity(workflow.WithActivityOptions(ctx, workflow.ActivityOptions{ - ActivityID: "activity-id", - DisableEagerExecution: true, - StartToCloseTimeout: startToCloseTimeout, - ScheduleToCloseTimeout: scheduleToCloseTimeout, - RetryPolicy: activityRetryPolicy, - }), activityFunction).Get(ctx, &ret) - return err - } - } - - ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) - defer cancel() - - activityPausedCn := make(chan struct{}) - var startedActivityCount atomic.Int32 - activityErr := errors.New("bad-luck-please-retry") - - activityFunction := func() (string, error) { - startedActivityCount.Add(1) - if startedActivityCount.Load() == 1 { - env.WaitForChannel(activityPausedCn) - return "", activityErr - } - return "done!", nil - } - - workflowFn := makeWorkflowFunc(activityFunction) - - env.SdkWorker().RegisterWorkflow(workflowFn) - env.SdkWorker().RegisterActivity(activityFunction) - - workflowOptions := sdkclient.StartWorkflowOptions{ - ID: testcore.RandomizeStr("wf_id-" + s.T().Name()), - TaskQueue: env.WorkerTaskQueue(), - } - - workflowRun, err := env.SdkClient().ExecuteWorkflow(ctx, workflowOptions, workflowFn) - s.NoError(err) - - // wait for activity to start - s.EventuallyWithT(func(t *assert.CollectT) { - description, err := env.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) - require.NoError(t, err) - require.Len(t, description.PendingActivities, 1) - require.Equal(t, int32(1), startedActivityCount.Load()) - }, 5*time.Second, 500*time.Millisecond) - - // pause activity - testIdentity := "test-identity" - testReason := "test-reason" - pauseRequest := &workflowservice.PauseActivityRequest{ - Namespace: env.Namespace().String(), - Execution: &commonpb.WorkflowExecution{ - WorkflowId: workflowRun.GetID(), - }, - Activity: &workflowservice.PauseActivityRequest_Id{Id: "activity-id"}, - Identity: testIdentity, - Reason: testReason, - } - resp, err := env.FrontendClient().PauseActivity(ctx, pauseRequest) - s.NoError(err) - s.NotNil(resp) - - // make sure activity is paused on server while running on worker - s.EventuallyWithT(func(t *assert.CollectT) { - description, err := env.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) - require.NoError(t, err) - require.Len(t, description.PendingActivities, 1) - require.Equal(t, enumspb.PENDING_ACTIVITY_STATE_PAUSE_REQUESTED, description.PendingActivities[0].State) - require.Equal(t, int32(1), startedActivityCount.Load()) - }, 5*time.Second, 500*time.Millisecond) - - // unblock the activity - env.SendToChannel(activityPausedCn) - // make sure activity is paused on server and completed on the worker - s.EventuallyWithT(func(t *assert.CollectT) { - description, err := env.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) - require.NoError(t, err) - require.Len(t, description.PendingActivities, 1) - require.Equal(t, enumspb.PENDING_ACTIVITY_STATE_PAUSED, description.PendingActivities[0].State) - require.Equal(t, int32(1), startedActivityCount.Load()) - }, 5*time.Second, 500*time.Millisecond) - - description, err := env.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) - s.NoError(err) - s.Len(description.PendingActivities, 1) - s.True(description.PendingActivities[0].Paused) - - // wait long enough for activity to retry if pause is not working - // Note: because activity is retried we expect the attempts to be incremented - err = util.InterruptibleSleep(ctx, 2*time.Second) - s.NoError(err) - - // make sure activity is not completed, and was not retried - description, err = env.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) - s.NoError(err) - s.Len(description.PendingActivities, 1) - s.True(description.PendingActivities[0].Paused) - s.Equal(int32(2), description.PendingActivities[0].Attempt) - s.NotNil(description.PendingActivities[0].LastFailure) - s.Equal(activityErr.Error(), description.PendingActivities[0].LastFailure.Message) - s.NotNil(description.PendingActivities[0].PauseInfo) - s.NotNil(description.PendingActivities[0].PauseInfo.GetManual()) - s.Equal(testIdentity, description.PendingActivities[0].PauseInfo.GetManual().Identity) - s.Equal(testReason, description.PendingActivities[0].PauseInfo.GetManual().Reason) - - // unpause the activity - unpauseRequest := &workflowservice.UnpauseActivityRequest{ - Namespace: env.Namespace().String(), - Execution: &commonpb.WorkflowExecution{ - WorkflowId: workflowRun.GetID(), - }, - Activity: &workflowservice.UnpauseActivityRequest_Id{Id: "activity-id"}, - } - unpauseResp, err := env.FrontendClient().UnpauseActivity(ctx, unpauseRequest) - s.NoError(err) - s.NotNil(unpauseResp) - - var out string - err = workflowRun.Get(ctx, &out) - - s.NoError(err) -} - -func (s *ActivityAPIPauseClientTestSuite) TestActivityPauseApi_IncreaseAttemptsOnFailure() { - /* - * 1. Run an activity that runs forever - * 2. Pause the activity - * 3. Send a failure signal to the activity - * 4. Validate activity failed - * 5. Validate number of activity attempts increased - */ - env := testcore.NewEnv(s.T(), testcore.WithSdkWorker()) - - initialRetryInterval := 1 * time.Second - scheduleToCloseTimeout := 30 * time.Minute - startToCloseTimeout := 15 * time.Minute - activityRetryPolicy := &temporal.RetryPolicy{ - InitialInterval: initialRetryInterval, - BackoffCoefficient: 1, - } - makeWorkflowFunc := func(activityFunction ActivityFunctions) WorkflowFunction { - return func(ctx workflow.Context) error { - var ret string - err := workflow.ExecuteActivity(workflow.WithActivityOptions(ctx, workflow.ActivityOptions{ - ActivityID: "activity-id", - DisableEagerExecution: true, - StartToCloseTimeout: startToCloseTimeout, - ScheduleToCloseTimeout: scheduleToCloseTimeout, - RetryPolicy: activityRetryPolicy, - }), activityFunction).Get(ctx, &ret) - return err - } - } - - ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) - defer cancel() - - var startedActivityCount atomic.Int32 - activityPausedCn := make(chan struct{}) - activityErr := errors.New("activity-failed-while-paused") - var shouldSucceed atomic.Bool - - activityFunction := func() (string, error) { - startedActivityCount.Add(1) - if startedActivityCount.Load() == 1 { - env.WaitForChannel(activityPausedCn) - return "", activityErr - } - if shouldSucceed.Load() { - return "done!", nil - } - return "", activityErr - } - - workflowFn := makeWorkflowFunc(activityFunction) - - env.SdkWorker().RegisterWorkflow(workflowFn) - env.SdkWorker().RegisterActivity(activityFunction) - - workflowOptions := sdkclient.StartWorkflowOptions{ - ID: testcore.RandomizeStr("wf_id-" + s.T().Name()), - TaskQueue: env.WorkerTaskQueue(), - } - - workflowRun, err := env.SdkClient().ExecuteWorkflow(ctx, workflowOptions, workflowFn) - s.NoError(err) - - // wait for activity to start - s.EventuallyWithT(func(t *assert.CollectT) { - description, err := env.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) - require.NoError(t, err) - require.Len(t, description.PendingActivities, 1) - require.Equal(t, int32(1), startedActivityCount.Load()) - }, 5*time.Second, 500*time.Millisecond) - - // pause activity - testIdentity := "test-identity" - testReason := "test-reason" - pauseRequest := &workflowservice.PauseActivityRequest{ - Namespace: env.Namespace().String(), - Execution: &commonpb.WorkflowExecution{ - WorkflowId: workflowRun.GetID(), +func pauseAPIs() []activityPauseAPI { + return []activityPauseAPI{ + { + name: "legacy-api", + pause: func(ctx context.Context, s *testcore.TestEnv, wfID, actID, identity, reason string) error { + _, err := s.FrontendClient().PauseActivity(ctx, &workflowservice.PauseActivityRequest{ + Namespace: s.Namespace().String(), + Execution: &commonpb.WorkflowExecution{WorkflowId: wfID}, + Activity: &workflowservice.PauseActivityRequest_Id{Id: actID}, + Identity: identity, + Reason: reason, + }) + return err + }, + unpause: func(ctx context.Context, s *testcore.TestEnv, wfID, actID, identity string, resetAttempts bool) error { + _, err := s.FrontendClient().UnpauseActivity(ctx, &workflowservice.UnpauseActivityRequest{ + Namespace: s.Namespace().String(), + Execution: &commonpb.WorkflowExecution{WorkflowId: wfID}, + Activity: &workflowservice.UnpauseActivityRequest_Id{Id: actID}, + Identity: identity, + ResetAttempts: resetAttempts, + }) + return err + }, }, - Activity: &workflowservice.PauseActivityRequest_Id{Id: "activity-id"}, - Identity: testIdentity, - Reason: testReason, - } - resp, err := env.FrontendClient().PauseActivity(ctx, pauseRequest) - s.NoError(err) - s.NotNil(resp) - - // make sure activity is paused on server while running on worker - s.EventuallyWithT(func(t *assert.CollectT) { - description, err := env.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) - require.NoError(t, err) - require.Len(t, description.PendingActivities, 1) - require.Equal(t, enumspb.PENDING_ACTIVITY_STATE_PAUSE_REQUESTED, description.PendingActivities[0].State) - require.Equal(t, int32(1), startedActivityCount.Load()) - }, 5*time.Second, 500*time.Millisecond) - - // End the activity - env.SendToChannel(activityPausedCn) - - s.EventuallyWithT(func(t *assert.CollectT) { - description, err := env.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) - require.NoError(t, err) - require.NotNil(t, description) - require.Len(t, description.PendingActivities, 1) - require.True(t, description.PendingActivities[0].Paused) - require.Equal(t, int32(2), description.PendingActivities[0].Attempt) - require.NotNil(t, description.PendingActivities[0].LastFailure) - require.NotNil(t, description.PendingActivities[0].PauseInfo) - require.NotNil(t, description.PendingActivities[0].PauseInfo.GetManual()) - require.Equal(t, testIdentity, description.PendingActivities[0].PauseInfo.GetManual().Identity) - require.Equal(t, testReason, description.PendingActivities[0].PauseInfo.GetManual().Reason) - require.Equal(t, int32(1), startedActivityCount.Load()) - }, 5*time.Second, 500*time.Millisecond) - - // Let the workflow finish gracefully - // set the flag to make activity succeed on next attempt - shouldSucceed.Store(true) - - // unpause the activity - unpauseRequest := &workflowservice.UnpauseActivityRequest{ - Namespace: env.Namespace().String(), - Execution: &commonpb.WorkflowExecution{ - WorkflowId: workflowRun.GetID(), + { + name: "execution-api", + pause: func(ctx context.Context, s *testcore.TestEnv, wfID, actID, identity, reason string) error { + _, err := s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: s.Namespace().String(), + WorkflowId: wfID, + ActivityId: actID, + Identity: identity, + Reason: reason, + }) + return err + }, + unpause: func(ctx context.Context, s *testcore.TestEnv, wfID, actID, identity string, resetAttempts bool) error { + _, err := s.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ + Namespace: s.Namespace().String(), + WorkflowId: wfID, + ActivityId: actID, + Identity: identity, + ResetAttempts: resetAttempts, + }) + return err + }, }, - Activity: &workflowservice.UnpauseActivityRequest_Id{Id: "activity-id"}, } - unpauseResp, err := env.FrontendClient().UnpauseActivity(ctx, unpauseRequest) - s.NoError(err) - s.NotNil(unpauseResp) - - // wait for activity to complete - s.EventuallyWithT(func(t *assert.CollectT) { - require.Equal(t, int32(2), startedActivityCount.Load()) - }, 5*time.Second, 100*time.Millisecond) - - var out string - err = workflowRun.Get(ctx, &out) - - s.NoError(err) } -func (s *ActivityAPIPauseClientTestSuite) TestActivityPauseApi_WhileWaiting() { - // In this case, pause happens when activity is in retry state. - // Make sure that activity is paused and then unpaused. - // Also check that activity will not be retried while unpaused. - env := testcore.NewEnv(s.T(), testcore.WithSdkWorker()) - - initialRetryInterval := 1 * time.Second - scheduleToCloseTimeout := 30 * time.Minute - startToCloseTimeout := 15 * time.Minute - activityRetryPolicy := &temporal.RetryPolicy{ - InitialInterval: initialRetryInterval, - BackoffCoefficient: 1, - } - makeWorkflowFunc := func(activityFunction ActivityFunctions) WorkflowFunction { - return func(ctx workflow.Context) error { - var ret string - err := workflow.ExecuteActivity(workflow.WithActivityOptions(ctx, workflow.ActivityOptions{ - ActivityID: "activity-id", - DisableEagerExecution: true, - StartToCloseTimeout: startToCloseTimeout, - ScheduleToCloseTimeout: scheduleToCloseTimeout, - RetryPolicy: activityRetryPolicy, - }), activityFunction).Get(ctx, &ret) - return err - } - } - - ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) - defer cancel() - - var startedActivityCount atomic.Int32 - - activityFunction := func() (string, error) { - startedActivityCount.Add(1) - if startedActivityCount.Load() == 1 { - activityErr := errors.New("bad-luck-please-retry") - return "", activityErr - } - return "done!", nil - } - - workflowFn := makeWorkflowFunc(activityFunction) - - env.SdkWorker().RegisterWorkflow(workflowFn) - env.SdkWorker().RegisterActivity(activityFunction) - - workflowOptions := sdkclient.StartWorkflowOptions{ - ID: testcore.RandomizeStr("wf_id-" + s.T().Name()), - TaskQueue: env.WorkerTaskQueue(), - } - - workflowRun, err := env.SdkClient().ExecuteWorkflow(ctx, workflowOptions, workflowFn) - s.NoError(err) - - // wait for activity to start - s.EventuallyWithT(func(t *assert.CollectT) { - description, err := env.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) - require.NoError(t, err) - require.Len(t, description.PendingActivities, 1) - require.Equal(t, int32(1), startedActivityCount.Load()) - }, 5*time.Second, 100*time.Millisecond) - - // pause activity - testIdentity := "test-identity" - testReason := "test-reason" - pauseRequest := &workflowservice.PauseActivityRequest{ - Namespace: env.Namespace().String(), - Execution: &commonpb.WorkflowExecution{ - WorkflowId: workflowRun.GetID(), - }, - Activity: &workflowservice.PauseActivityRequest_Id{Id: "activity-id"}, - Identity: testIdentity, - Reason: testReason, - } - resp, err := env.FrontendClient().PauseActivity(ctx, pauseRequest) - s.NoError(err) - s.NotNil(resp) - - // wait long enough for activity to retry if pause is not working - s.NoError(util.InterruptibleSleep(ctx, 2*time.Second)) - - // make sure activity is not completed, and was not retried - description, err := env.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) - s.NoError(err) - s.Len(description.PendingActivities, 1) - s.True(description.PendingActivities[0].Paused) - s.Equal(int32(2), description.PendingActivities[0].Attempt) - s.NotNil(description.PendingActivities[0].PauseInfo) - s.NotNil(description.PendingActivities[0].PauseInfo.GetManual()) - s.Equal(testIdentity, description.PendingActivities[0].PauseInfo.GetManual().Identity) - s.Equal(testReason, description.PendingActivities[0].PauseInfo.GetManual().Reason) - - // unpause the activity - unpauseRequest := &workflowservice.UnpauseActivityRequest{ - Namespace: env.Namespace().String(), - Execution: &commonpb.WorkflowExecution{ - WorkflowId: workflowRun.GetID(), - }, - Activity: &workflowservice.UnpauseActivityRequest_Id{Id: "activity-id"}, - } - unpauseResp, err := env.FrontendClient().UnpauseActivity(ctx, unpauseRequest) - s.NoError(err) - s.NotNil(unpauseResp) - - // wait for activity to complete - s.EventuallyWithT(func(t *assert.CollectT) { - require.Equal(t, int32(2), startedActivityCount.Load()) - }, 5*time.Second, 100*time.Millisecond) - - var out string - err = workflowRun.Get(ctx, &out) - - s.NoError(err) -} - -func (s *ActivityAPIPauseClientTestSuite) TestActivityPauseApi_WhileRetryNoWait() { - // In this case, pause can happen when activity is in retry state. - // Make sure that activity is paused and then unpaused. - // Also tests noWait flag. - env := testcore.NewEnv(s.T(), testcore.WithSdkWorker()) - - initialRetryInterval := 30 * time.Second - scheduleToCloseTimeout := 30 * time.Minute - startToCloseTimeout := 15 * time.Minute - activityRetryPolicy := &temporal.RetryPolicy{ - InitialInterval: initialRetryInterval, - BackoffCoefficient: 1, - } - makeWorkflowFunc := func(activityFunction ActivityFunctions) WorkflowFunction { - return func(ctx workflow.Context) error { - var ret string - err := workflow.ExecuteActivity(workflow.WithActivityOptions(ctx, workflow.ActivityOptions{ - ActivityID: "activity-id", - DisableEagerExecution: true, - StartToCloseTimeout: startToCloseTimeout, - ScheduleToCloseTimeout: scheduleToCloseTimeout, - RetryPolicy: activityRetryPolicy, - }), activityFunction).Get(ctx, &ret) - return err - } - } - - ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) - defer cancel() - - var startedActivityCount atomic.Int32 - - activityFunction := func() (string, error) { - startedActivityCount.Add(1) - if startedActivityCount.Load() == 1 { - activityErr := errors.New("bad-luck-please-retry") - return "", activityErr - } - return "done!", nil - } - - workflowFn := makeWorkflowFunc(activityFunction) - - env.SdkWorker().RegisterWorkflow(workflowFn) - env.SdkWorker().RegisterActivity(activityFunction) - - workflowOptions := sdkclient.StartWorkflowOptions{ - ID: testcore.RandomizeStr("wf_id-" + s.T().Name()), - TaskQueue: env.WorkerTaskQueue(), - } - - workflowRun, err := env.SdkClient().ExecuteWorkflow(ctx, workflowOptions, workflowFn) - s.NoError(err) - - // wait for activity to start - s.EventuallyWithT(func(t *assert.CollectT) { - description, err := env.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) - require.NoError(t, err) - require.Len(t, description.GetPendingActivities(), 1) - require.Equal(t, int32(1), startedActivityCount.Load()) - }, 5*time.Second, 100*time.Millisecond) - - // pause activity - pauseRequest := &workflowservice.PauseActivityRequest{ - Namespace: env.Namespace().String(), - Execution: &commonpb.WorkflowExecution{ - WorkflowId: workflowRun.GetID(), - }, - Activity: &workflowservice.PauseActivityRequest_Id{Id: "activity-id"}, - } - resp, err := env.FrontendClient().PauseActivity(ctx, pauseRequest) - s.NoError(err) - s.NotNil(resp) - - // unpause the activity - unpauseRequest := &workflowservice.UnpauseActivityRequest{ - Namespace: env.Namespace().String(), - Execution: &commonpb.WorkflowExecution{ - WorkflowId: workflowRun.GetID(), - }, - Activity: &workflowservice.UnpauseActivityRequest_Id{Id: "activity-id"}, - } - unpauseResp, err := env.FrontendClient().UnpauseActivity(ctx, unpauseRequest) - s.NoError(err) - s.NotNil(unpauseResp) - - // wait for activity to complete. It should happen immediately since noWait is set - s.EventuallyWithT(func(t *assert.CollectT) { - require.Equal(t, int32(2), startedActivityCount.Load()) - }, 2*time.Second, 100*time.Millisecond) - - var out string - err = workflowRun.Get(ctx, &out) - - s.NoError(err) -} - -func (s *ActivityAPIPauseClientTestSuite) TestActivityPauseApi_WithReset() { - // pause/unpause the activity with reset option and noWait flag - env := testcore.NewEnv(s.T(), testcore.WithSdkWorker()) - - initialRetryInterval := 1 * time.Second - scheduleToCloseTimeout := 30 * time.Minute - startToCloseTimeout := 15 * time.Minute - activityRetryPolicy := &temporal.RetryPolicy{ - InitialInterval: initialRetryInterval, - BackoffCoefficient: 1, - } - makeWorkflowFunc := func(activityFunction ActivityFunctions) WorkflowFunction { - return func(ctx workflow.Context) error { - var ret string - err := workflow.ExecuteActivity(workflow.WithActivityOptions(ctx, workflow.ActivityOptions{ - ActivityID: "activity-id", - DisableEagerExecution: true, - StartToCloseTimeout: startToCloseTimeout, - ScheduleToCloseTimeout: scheduleToCloseTimeout, - RetryPolicy: activityRetryPolicy, - }), activityFunction).Get(ctx, &ret) - return err - } - } - - ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) - defer cancel() - - var startedActivityCount atomic.Int32 - activityWasReset := false - activityCompleteCn := make(chan struct{}) - - activityFunction := func() (string, error) { - startedActivityCount.Add(1) - - if !activityWasReset { - activityErr := errors.New("bad-luck-please-retry") - return "", activityErr - } - env.WaitForChannel(activityCompleteCn) - return "done!", nil - } - - workflowFn := makeWorkflowFunc(activityFunction) - - env.SdkWorker().RegisterWorkflow(workflowFn) - env.SdkWorker().RegisterActivity(activityFunction) - - workflowOptions := sdkclient.StartWorkflowOptions{ - ID: testcore.RandomizeStr("wf_id-" + s.T().Name()), - TaskQueue: env.WorkerTaskQueue(), - } - - workflowRun, err := env.SdkClient().ExecuteWorkflow(ctx, workflowOptions, workflowFn) - s.NoError(err) - - // wait for activity to start/fail few times - s.EventuallyWithT(func(t *assert.CollectT) { - description, err := env.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) - require.NoError(t, err) - require.Len(t, description.GetPendingActivities(), 1) - require.Greater(t, startedActivityCount.Load(), int32(1)) - }, 5*time.Second, 100*time.Millisecond) - - // pause activity - pauseRequest := &workflowservice.PauseActivityRequest{ - Namespace: env.Namespace().String(), - Execution: &commonpb.WorkflowExecution{ - WorkflowId: workflowRun.GetID(), - }, - Activity: &workflowservice.PauseActivityRequest_Id{Id: "activity-id"}, - } - resp, err := env.FrontendClient().PauseActivity(ctx, pauseRequest) - s.NoError(err) - s.NotNil(resp) - - // wait for activity to be in paused state and waiting for retry - s.EventuallyWithT(func(t *assert.CollectT) { - description, err := env.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) - require.NoError(t, err) - require.Len(t, description.GetPendingActivities(), 1) - require.Equal(t, enumspb.PENDING_ACTIVITY_STATE_PAUSED, description.PendingActivities[0].State) - // also verify that the number of attempts was not reset - require.Greater(t, description.PendingActivities[0].Attempt, int32(1)) - }, 5*time.Second, 100*time.Millisecond) - - activityWasReset = true - - // unpause the activity with reset, and set noWait flag - unpauseRequest := &workflowservice.UnpauseActivityRequest{ - Namespace: env.Namespace().String(), - Execution: &commonpb.WorkflowExecution{ - WorkflowId: workflowRun.GetID(), - }, - Activity: &workflowservice.UnpauseActivityRequest_Id{Id: "activity-id"}, - ResetAttempts: true, +func TestActivityApiPauseClientTestSuite(t *testing.T) { + t.Parallel() + + for _, api := range pauseAPIs() { + api := api + t.Run(api.name, func(t *testing.T) { + t.Parallel() + + t.Run("TestActivityPauseApi_WhileRunning", func(t *testing.T) { + s := testcore.NewEnv(t, testcore.WithSdkWorker()) + + initialRetryInterval := 1 * time.Second + scheduleToCloseTimeout := 30 * time.Minute + startToCloseTimeout := 15 * time.Minute + activityRetryPolicy := &temporal.RetryPolicy{ + InitialInterval: initialRetryInterval, + BackoffCoefficient: 1, + } + makeWorkflowFunc := func(activityFunction ActivityFunctions) WorkflowFunction { + return func(ctx workflow.Context) error { + var ret string + err := workflow.ExecuteActivity(workflow.WithActivityOptions(ctx, workflow.ActivityOptions{ + ActivityID: "activity-id", + DisableEagerExecution: true, + StartToCloseTimeout: startToCloseTimeout, + ScheduleToCloseTimeout: scheduleToCloseTimeout, + RetryPolicy: activityRetryPolicy, + }), activityFunction).Get(ctx, &ret) + return err + } + } + + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + activityPausedCn := make(chan struct{}) + var startedActivityCount atomic.Int32 + activityErr := errors.New("bad-luck-please-retry") + + activityFunction := func() (string, error) { + startedActivityCount.Add(1) + if startedActivityCount.Load() == 1 { + s.WaitForChannel(ctx, activityPausedCn) + return "", activityErr + } + return "done!", nil + } + + workflowFn := makeWorkflowFunc(activityFunction) + + s.SdkWorker().RegisterWorkflow(workflowFn) + s.SdkWorker().RegisterActivity(activityFunction) + + workflowOptions := sdkclient.StartWorkflowOptions{ + ID: testcore.RandomizeStr("wf_id-" + s.T().Name()), + TaskQueue: s.WorkerTaskQueue(), + } + + workflowRun, err := s.SdkClient().ExecuteWorkflow(ctx, workflowOptions, workflowFn) + s.NoError(err) + + // wait for activity to start + s.EventuallyWithT(func(t *assert.CollectT) { + description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + require.NoError(t, err) + require.Len(t, description.PendingActivities, 1) + require.Equal(t, int32(1), startedActivityCount.Load()) + }, 5*time.Second, 500*time.Millisecond) + + // pause activity + testIdentity := "test-identity" + testReason := "test-reason" + s.NoError(api.pause(ctx, s, workflowRun.GetID(), "activity-id", testIdentity, testReason)) + + // make sure activity is paused on server while running on worker + s.EventuallyWithT(func(t *assert.CollectT) { + description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + require.NoError(t, err) + require.Len(t, description.PendingActivities, 1) + require.Equal(t, enumspb.PENDING_ACTIVITY_STATE_PAUSE_REQUESTED, description.PendingActivities[0].State) + require.Equal(t, int32(1), startedActivityCount.Load()) + }, 5*time.Second, 500*time.Millisecond) + + // unblock the activity + activityPausedCn <- struct{}{} + // make sure activity is paused on server and completed on the worker + s.EventuallyWithT(func(t *assert.CollectT) { + description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + require.NoError(t, err) + require.Len(t, description.PendingActivities, 1) + require.Equal(t, enumspb.PENDING_ACTIVITY_STATE_PAUSED, description.PendingActivities[0].State) + require.Equal(t, int32(1), startedActivityCount.Load()) + }, 5*time.Second, 500*time.Millisecond) + + description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + s.NoError(err) + s.Len(description.PendingActivities, 1) + s.True(description.PendingActivities[0].Paused) + + // wait long enough for activity to retry if pause is not working + // Note: because activity is retried we expect the attempts to be incremented + err = util.InterruptibleSleep(ctx, 2*time.Second) + s.NoError(err) + + // make sure activity is not completed, and was not retried + description, err = s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + s.NoError(err) + s.Len(description.PendingActivities, 1) + s.True(description.PendingActivities[0].Paused) + s.Equal(int32(2), description.PendingActivities[0].Attempt) + s.NotNil(description.PendingActivities[0].LastFailure) + s.Equal(activityErr.Error(), description.PendingActivities[0].LastFailure.Message) + s.NotNil(description.PendingActivities[0].PauseInfo) + s.NotNil(description.PendingActivities[0].PauseInfo.GetManual()) + s.Equal(testIdentity, description.PendingActivities[0].PauseInfo.GetManual().Identity) + s.Equal(testReason, description.PendingActivities[0].PauseInfo.GetManual().Reason) + + // unpause the activity + s.NoError(api.unpause(ctx, s, workflowRun.GetID(), "activity-id", "", false)) + + var out string + err = workflowRun.Get(ctx, &out) + + s.NoError(err) + }) + + t.Run("TestActivityPauseApi_IncreaseAttemptsOnFailure", func(t *testing.T) { + /* + * 1. Run an activity that runs forever + * 2. Pause the activity + * 3. Send a failure signal to the activity + * 4. Validate activity failed + * 5. Validate number of activity attempts increased + */ + s := testcore.NewEnv(t, testcore.WithSdkWorker()) + + initialRetryInterval := 1 * time.Second + scheduleToCloseTimeout := 30 * time.Minute + startToCloseTimeout := 15 * time.Minute + activityRetryPolicy := &temporal.RetryPolicy{ + InitialInterval: initialRetryInterval, + BackoffCoefficient: 1, + } + makeWorkflowFunc := func(activityFunction ActivityFunctions) WorkflowFunction { + return func(ctx workflow.Context) error { + var ret string + err := workflow.ExecuteActivity(workflow.WithActivityOptions(ctx, workflow.ActivityOptions{ + ActivityID: "activity-id", + DisableEagerExecution: true, + StartToCloseTimeout: startToCloseTimeout, + ScheduleToCloseTimeout: scheduleToCloseTimeout, + RetryPolicy: activityRetryPolicy, + }), activityFunction).Get(ctx, &ret) + return err + } + } + + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + var startedActivityCount atomic.Int32 + activityPausedCn := make(chan struct{}) + activityErr := errors.New("activity-failed-while-paused") + var shouldSucceed atomic.Bool + + activityFunction := func() (string, error) { + startedActivityCount.Add(1) + if startedActivityCount.Load() == 1 { + s.WaitForChannel(ctx, activityPausedCn) + return "", activityErr + } + if shouldSucceed.Load() { + return "done!", nil + } + return "", activityErr + } + + workflowFn := makeWorkflowFunc(activityFunction) + + s.SdkWorker().RegisterWorkflow(workflowFn) + s.SdkWorker().RegisterActivity(activityFunction) + + workflowOptions := sdkclient.StartWorkflowOptions{ + ID: testcore.RandomizeStr("wf_id-" + s.T().Name()), + TaskQueue: s.WorkerTaskQueue(), + } + + workflowRun, err := s.SdkClient().ExecuteWorkflow(ctx, workflowOptions, workflowFn) + s.NoError(err) + + // wait for activity to start + s.EventuallyWithT(func(t *assert.CollectT) { + description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + require.NoError(t, err) + require.Len(t, description.PendingActivities, 1) + require.Equal(t, int32(1), startedActivityCount.Load()) + }, 5*time.Second, 500*time.Millisecond) + + // pause activity + testIdentity := "test-identity" + testReason := "test-reason" + s.NoError(api.pause(ctx, s, workflowRun.GetID(), "activity-id", testIdentity, testReason)) + + // make sure activity is paused on server while running on worker + s.EventuallyWithT(func(t *assert.CollectT) { + description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + require.NoError(t, err) + require.Len(t, description.PendingActivities, 1) + require.Equal(t, enumspb.PENDING_ACTIVITY_STATE_PAUSE_REQUESTED, description.PendingActivities[0].State) + require.Equal(t, int32(1), startedActivityCount.Load()) + }, 5*time.Second, 500*time.Millisecond) + + // End the activity + activityPausedCn <- struct{}{} + + s.EventuallyWithT(func(t *assert.CollectT) { + description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + require.NoError(t, err) + require.NotNil(t, description) + require.Len(t, description.PendingActivities, 1) + require.True(t, description.PendingActivities[0].Paused) + require.Equal(t, int32(2), description.PendingActivities[0].Attempt) + require.NotNil(t, description.PendingActivities[0].LastFailure) + require.NotNil(t, description.PendingActivities[0].PauseInfo) + require.NotNil(t, description.PendingActivities[0].PauseInfo.GetManual()) + require.Equal(t, testIdentity, description.PendingActivities[0].PauseInfo.GetManual().Identity) + require.Equal(t, testReason, description.PendingActivities[0].PauseInfo.GetManual().Reason) + require.Equal(t, int32(1), startedActivityCount.Load()) + }, 5*time.Second, 500*time.Millisecond) + + // Let the workflow finish gracefully + // set the flag to make activity succeed on next attempt + shouldSucceed.Store(true) + + // unpause the activity + s.NoError(api.unpause(ctx, s, workflowRun.GetID(), "activity-id", "", false)) + + // wait for activity to complete + s.EventuallyWithT(func(t *assert.CollectT) { + require.Equal(t, int32(2), startedActivityCount.Load()) + }, 5*time.Second, 100*time.Millisecond) + + var out string + err = workflowRun.Get(ctx, &out) + + s.NoError(err) + }) + + t.Run("TestActivityPauseApi_WhileWaiting", func(t *testing.T) { + // In this case, pause happens when activity is in retry state. + // Make sure that activity is paused and then unpaused. + // Also check that activity will not be retried while unpaused. + s := testcore.NewEnv(t, testcore.WithSdkWorker()) + + initialRetryInterval := 1 * time.Second + scheduleToCloseTimeout := 30 * time.Minute + startToCloseTimeout := 15 * time.Minute + activityRetryPolicy := &temporal.RetryPolicy{ + InitialInterval: initialRetryInterval, + BackoffCoefficient: 1, + } + makeWorkflowFunc := func(activityFunction ActivityFunctions) WorkflowFunction { + return func(ctx workflow.Context) error { + var ret string + err := workflow.ExecuteActivity(workflow.WithActivityOptions(ctx, workflow.ActivityOptions{ + ActivityID: "activity-id", + DisableEagerExecution: true, + StartToCloseTimeout: startToCloseTimeout, + ScheduleToCloseTimeout: scheduleToCloseTimeout, + RetryPolicy: activityRetryPolicy, + }), activityFunction).Get(ctx, &ret) + return err + } + } + + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + var startedActivityCount atomic.Int32 + + activityFunction := func() (string, error) { + startedActivityCount.Add(1) + if startedActivityCount.Load() == 1 { + activityErr := errors.New("bad-luck-please-retry") + return "", activityErr + } + return "done!", nil + } + + workflowFn := makeWorkflowFunc(activityFunction) + + s.SdkWorker().RegisterWorkflow(workflowFn) + s.SdkWorker().RegisterActivity(activityFunction) + + workflowOptions := sdkclient.StartWorkflowOptions{ + ID: testcore.RandomizeStr("wf_id-" + s.T().Name()), + TaskQueue: s.WorkerTaskQueue(), + } + + workflowRun, err := s.SdkClient().ExecuteWorkflow(ctx, workflowOptions, workflowFn) + s.NoError(err) + + // wait for activity to start + s.EventuallyWithT(func(t *assert.CollectT) { + description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + require.NoError(t, err) + require.Len(t, description.PendingActivities, 1) + require.Equal(t, int32(1), startedActivityCount.Load()) + }, 5*time.Second, 100*time.Millisecond) + + // pause activity + testIdentity := "test-identity" + testReason := "test-reason" + s.NoError(api.pause(ctx, s, workflowRun.GetID(), "activity-id", testIdentity, testReason)) + + // wait long enough for activity to retry if pause is not working + require.NoError(t, util.InterruptibleSleep(ctx, 2*time.Second)) + + // make sure activity is not completed, and was not retried + description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + s.NoError(err) + s.Len(description.PendingActivities, 1) + s.True(description.PendingActivities[0].Paused) + s.Equal(int32(2), description.PendingActivities[0].Attempt) + s.NotNil(description.PendingActivities[0].PauseInfo) + s.NotNil(description.PendingActivities[0].PauseInfo.GetManual()) + s.Equal(testIdentity, description.PendingActivities[0].PauseInfo.GetManual().Identity) + s.Equal(testReason, description.PendingActivities[0].PauseInfo.GetManual().Reason) + + // unpause the activity + s.NoError(api.unpause(ctx, s, workflowRun.GetID(), "activity-id", "", false)) + + // wait for activity to complete + s.EventuallyWithT(func(t *assert.CollectT) { + require.Equal(t, int32(2), startedActivityCount.Load()) + }, 5*time.Second, 100*time.Millisecond) + + var out string + err = workflowRun.Get(ctx, &out) + + s.NoError(err) + }) + + t.Run("TestActivityPauseApi_WhileRetryNoWait", func(t *testing.T) { + // In this case, pause can happen when activity is in retry state. + // Make sure that activity is paused and then unpaused. + // Also tests noWait flag. + s := testcore.NewEnv(t, testcore.WithSdkWorker()) + + initialRetryInterval := 30 * time.Second + scheduleToCloseTimeout := 30 * time.Minute + startToCloseTimeout := 15 * time.Minute + activityRetryPolicy := &temporal.RetryPolicy{ + InitialInterval: initialRetryInterval, + BackoffCoefficient: 1, + } + makeWorkflowFunc := func(activityFunction ActivityFunctions) WorkflowFunction { + return func(ctx workflow.Context) error { + var ret string + err := workflow.ExecuteActivity(workflow.WithActivityOptions(ctx, workflow.ActivityOptions{ + ActivityID: "activity-id", + DisableEagerExecution: true, + StartToCloseTimeout: startToCloseTimeout, + ScheduleToCloseTimeout: scheduleToCloseTimeout, + RetryPolicy: activityRetryPolicy, + }), activityFunction).Get(ctx, &ret) + return err + } + } + + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + var startedActivityCount atomic.Int32 + + activityFunction := func() (string, error) { + startedActivityCount.Add(1) + if startedActivityCount.Load() == 1 { + activityErr := errors.New("bad-luck-please-retry") + return "", activityErr + } + return "done!", nil + } + + workflowFn := makeWorkflowFunc(activityFunction) + + s.SdkWorker().RegisterWorkflow(workflowFn) + s.SdkWorker().RegisterActivity(activityFunction) + + workflowOptions := sdkclient.StartWorkflowOptions{ + ID: testcore.RandomizeStr("wf_id-" + s.T().Name()), + TaskQueue: s.WorkerTaskQueue(), + } + + workflowRun, err := s.SdkClient().ExecuteWorkflow(ctx, workflowOptions, workflowFn) + s.NoError(err) + + // wait for activity to start + s.EventuallyWithT(func(t *assert.CollectT) { + description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + require.NoError(t, err) + require.Len(t, description.GetPendingActivities(), 1) + require.Equal(t, int32(1), startedActivityCount.Load()) + }, 5*time.Second, 100*time.Millisecond) + + // pause activity + s.NoError(api.pause(ctx, s, workflowRun.GetID(), "activity-id", "", "")) + + // unpause the activity + s.NoError(api.unpause(ctx, s, workflowRun.GetID(), "activity-id", "", false)) + + // wait for activity to complete. It should happen immediately since noWait is set + s.EventuallyWithT(func(t *assert.CollectT) { + require.Equal(t, int32(2), startedActivityCount.Load()) + }, 2*time.Second, 100*time.Millisecond) + + var out string + err = workflowRun.Get(ctx, &out) + + s.NoError(err) + }) + + t.Run("TestActivityPauseApi_WithReset", func(t *testing.T) { + // pause/unpause the activity with reset option and noWait flag + s := testcore.NewEnv(t, testcore.WithSdkWorker()) + + initialRetryInterval := 1 * time.Second + scheduleToCloseTimeout := 30 * time.Minute + startToCloseTimeout := 15 * time.Minute + activityRetryPolicy := &temporal.RetryPolicy{ + InitialInterval: initialRetryInterval, + BackoffCoefficient: 1, + } + makeWorkflowFunc := func(activityFunction ActivityFunctions) WorkflowFunction { + return func(ctx workflow.Context) error { + var ret string + err := workflow.ExecuteActivity(workflow.WithActivityOptions(ctx, workflow.ActivityOptions{ + ActivityID: "activity-id", + DisableEagerExecution: true, + StartToCloseTimeout: startToCloseTimeout, + ScheduleToCloseTimeout: scheduleToCloseTimeout, + RetryPolicy: activityRetryPolicy, + }), activityFunction).Get(ctx, &ret) + return err + } + } + + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + var startedActivityCount atomic.Int32 + activityWasReset := false + activityCompleteCn := make(chan struct{}) + + activityFunction := func() (string, error) { + startedActivityCount.Add(1) + + if !activityWasReset { + activityErr := errors.New("bad-luck-please-retry") + return "", activityErr + } + s.WaitForChannel(ctx, activityCompleteCn) + return "done!", nil + } + + workflowFn := makeWorkflowFunc(activityFunction) + + s.SdkWorker().RegisterWorkflow(workflowFn) + s.SdkWorker().RegisterActivity(activityFunction) + + workflowOptions := sdkclient.StartWorkflowOptions{ + ID: testcore.RandomizeStr("wf_id-" + s.T().Name()), + TaskQueue: s.WorkerTaskQueue(), + } + + workflowRun, err := s.SdkClient().ExecuteWorkflow(ctx, workflowOptions, workflowFn) + s.NoError(err) + + // wait for activity to start/fail few times + s.EventuallyWithT(func(t *assert.CollectT) { + description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + require.NoError(t, err) + require.Len(t, description.GetPendingActivities(), 1) + require.Greater(t, startedActivityCount.Load(), int32(1)) + }, 5*time.Second, 100*time.Millisecond) + + // pause activity + s.NoError(api.pause(ctx, s, workflowRun.GetID(), "activity-id", "", "")) + + // wait for activity to be in paused state and waiting for retry + s.EventuallyWithT(func(t *assert.CollectT) { + description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + require.NoError(t, err) + require.Len(t, description.GetPendingActivities(), 1) + require.Equal(t, enumspb.PENDING_ACTIVITY_STATE_PAUSED, description.PendingActivities[0].State) + // also verify that the number of attempts was not reset + require.Greater(t, description.PendingActivities[0].Attempt, int32(1)) + }, 5*time.Second, 100*time.Millisecond) + + activityWasReset = true + + // unpause the activity with reset + s.NoError(api.unpause(ctx, s, workflowRun.GetID(), "activity-id", "", true)) + + // wait for activity to be running + s.EventuallyWithT(func(t *assert.CollectT) { + description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + require.NoError(t, err) + require.Len(t, description.GetPendingActivities(), 1) + require.Equal(t, enumspb.PENDING_ACTIVITY_STATE_STARTED, description.PendingActivities[0].State) + // also verify that the number of attempts was reset + require.Equal(t, int32(1), description.PendingActivities[0].Attempt) + }, 5*time.Second, 100*time.Millisecond) + + // let activity finish + activityCompleteCn <- struct{}{} + + // wait for workflow to finish + var out string + err = workflowRun.Get(ctx, &out) + + s.NoError(err) + }) + }) } - unpauseResp, err := env.FrontendClient().UnpauseActivity(ctx, unpauseRequest) - s.NoError(err) - s.NotNil(unpauseResp) - - // wait for activity to be running - s.EventuallyWithT(func(t *assert.CollectT) { - description, err := env.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) - require.NoError(t, err) - require.Len(t, description.GetPendingActivities(), 1) - require.Equal(t, enumspb.PENDING_ACTIVITY_STATE_STARTED, description.PendingActivities[0].State) - // also verify that the number of attempts was reset - require.Equal(t, int32(1), description.PendingActivities[0].Attempt) - }, 5*time.Second, 100*time.Millisecond) - - // let activity finish - env.SendToChannel(activityCompleteCn) - - // wait for workflow to finish - var out string - err = workflowRun.Get(ctx, &out) - - s.NoError(err) } diff --git a/tests/activity_api_reset_test.go b/tests/activity_api_reset_test.go index b6d656d0219..6e5d8215e01 100644 --- a/tests/activity_api_reset_test.go +++ b/tests/activity_api_reset_test.go @@ -33,6 +33,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" commonpb "go.temporal.io/api/common/v1" enumspb "go.temporal.io/api/enums/v1" "go.temporal.io/api/workflowservice/v1" @@ -41,28 +42,88 @@ import ( "go.temporal.io/sdk/temporal" "go.temporal.io/sdk/workflow" "go.temporal.io/server/common/payloads" - "go.temporal.io/server/common/testing/parallelsuite" + "go.temporal.io/server/common/testing/testvars" "go.temporal.io/server/common/util" "go.temporal.io/server/tests/testcore" ) type ActivityApiResetClientTestSuite struct { - parallelsuite.Suite[*ActivityApiResetClientTestSuite] + testcore.FunctionalTestBase + tv *testvars.TestVars + initialRetryInterval time.Duration + scheduleToCloseTimeout time.Duration + startToCloseTimeout time.Duration + activityRetryPolicy *temporal.RetryPolicy + + // apiName selects which reset API variant to exercise ("legacy-api" or "execution-api"). + // Set before suite.Run; used by SetupTest to initialise resetFn. + apiName string + // resetFn is the adapter for the API under test, initialised in SetupTest. + resetFn func(ctx context.Context, wfID, actID string, resetHeartbeat, keepPaused bool) error } +// TestActivityApiResetClientTestSuite runs the suite twice: once with the legacy +// ResetActivity API and once with the newer ResetActivityExecution API. func TestActivityApiResetClientTestSuite(t *testing.T) { - parallelsuite.Run(t, &ActivityApiResetClientTestSuite{}) + for _, apiName := range []string{"legacy-api", "execution-api"} { + apiName := apiName + t.Run(apiName, func(t *testing.T) { + s := new(ActivityApiResetClientTestSuite) + s.apiName = apiName + suite.Run(t, s) + }) + } } -func (s *ActivityApiResetClientTestSuite) makeWorkflowFunc(activityFunction ActivityFunctions, retryPolicy *temporal.RetryPolicy) WorkflowFunction { +func (s *ActivityApiResetClientTestSuite) SetupTest() { + s.FunctionalTestBase.SetupTest() + + s.tv = testvars.New(s.T()).WithTaskQueue(s.TaskQueue()).WithNamespaceName(s.Namespace()) + + s.initialRetryInterval = 1 * time.Second + s.scheduleToCloseTimeout = 30 * time.Minute + s.startToCloseTimeout = 15 * time.Minute + + s.activityRetryPolicy = &temporal.RetryPolicy{ + InitialInterval: s.initialRetryInterval, + BackoffCoefficient: 1, + } + + if s.apiName == "execution-api" { + s.resetFn = func(ctx context.Context, wfID, actID string, resetHeartbeat, keepPaused bool) error { + _, err := s.FrontendClient().ResetActivityExecution(ctx, &workflowservice.ResetActivityExecutionRequest{ + Namespace: s.Namespace().String(), + WorkflowId: wfID, + ActivityId: actID, + ResetHeartbeat: resetHeartbeat, + KeepPaused: keepPaused, + }) + return err + } + } else { + s.resetFn = func(ctx context.Context, wfID, actID string, resetHeartbeat, keepPaused bool) error { + _, err := s.FrontendClient().ResetActivity(ctx, &workflowservice.ResetActivityRequest{ + Namespace: s.Namespace().String(), + Execution: &commonpb.WorkflowExecution{WorkflowId: wfID}, + Activity: &workflowservice.ResetActivityRequest_Id{Id: actID}, + ResetHeartbeat: resetHeartbeat, + KeepPaused: keepPaused, + }) + return err + } + } +} + +func (s *ActivityApiResetClientTestSuite) makeWorkflowFunc(activityFunction ActivityFunctions) WorkflowFunction { return func(ctx workflow.Context) error { + var ret string err := workflow.ExecuteActivity(workflow.WithActivityOptions(ctx, workflow.ActivityOptions{ ActivityID: "activity-id", DisableEagerExecution: true, - StartToCloseTimeout: 15 * time.Minute, - ScheduleToCloseTimeout: 30 * time.Minute, - RetryPolicy: retryPolicy, + StartToCloseTimeout: s.startToCloseTimeout, + ScheduleToCloseTimeout: s.scheduleToCloseTimeout, + RetryPolicy: s.activityRetryPolicy, }), activityFunction).Get(ctx, &ret) return err } @@ -70,8 +131,6 @@ func (s *ActivityApiResetClientTestSuite) makeWorkflowFunc(activityFunction Acti func (s *ActivityApiResetClientTestSuite) TestActivityResetApi_AfterRetry() { // activity reset is called after multiple attempts, - env := testcore.NewEnv(s.T()) - ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) defer cancel() @@ -87,51 +146,39 @@ func (s *ActivityApiResetClientTestSuite) TestActivityResetApi_AfterRetry() { return "", activityErr } - env.WaitForChannel(activityCompleteCh) + s.WaitForChannel(ctx, activityCompleteCh) return "done!", nil } - workflowFn := s.makeWorkflowFunc(activityFunction, &temporal.RetryPolicy{ - InitialInterval: 1 * time.Second, - BackoffCoefficient: 1, - }) + workflowFn := s.makeWorkflowFunc(activityFunction) - env.SdkWorker().RegisterWorkflow(workflowFn) - env.SdkWorker().RegisterActivity(activityFunction) + s.SdkWorker().RegisterWorkflow(workflowFn) + s.SdkWorker().RegisterActivity(activityFunction) wfId := testcore.RandomizeStr("wfid-" + s.T().Name()) workflowOptions := sdkclient.StartWorkflowOptions{ ID: wfId, - TaskQueue: env.WorkerTaskQueue(), + TaskQueue: s.TaskQueue(), } - workflowRun, err := env.SdkClient().ExecuteWorkflow(ctx, workflowOptions, workflowFn) + workflowRun, err := s.SdkClient().ExecuteWorkflow(ctx, workflowOptions, workflowFn) s.NoError(err) // wait for activity to start/fail few times s.EventuallyWithT(func(t *assert.CollectT) { - description, err := env.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) require.NoError(t, err) require.Len(t, description.GetPendingActivities(), 1) require.Greater(t, startedActivityCount.Load(), int32(1)) }, 5*time.Second, 200*time.Millisecond) - resetRequest := &workflowservice.ResetActivityRequest{ - Namespace: env.Namespace().String(), - Execution: &commonpb.WorkflowExecution{ - WorkflowId: workflowRun.GetID(), - }, - Activity: &workflowservice.ResetActivityRequest_Id{Id: "activity-id"}, - } - resp, err := env.FrontendClient().ResetActivity(ctx, resetRequest) - s.NoError(err) - s.NotNil(resp) + s.NoError(s.resetFn(ctx, workflowRun.GetID(), "activity-id", false, false)) activityWasReset.Store(true) // wait for activity to be running s.EventuallyWithT(func(t *assert.CollectT) { - description, err := env.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) require.NoError(t, err) require.Len(t, description.GetPendingActivities(), 1) require.Equal(t, enumspb.PENDING_ACTIVITY_STATE_STARTED, description.PendingActivities[0].State) @@ -151,8 +198,6 @@ func (s *ActivityApiResetClientTestSuite) TestActivityResetApi_AfterRetry() { func (s *ActivityApiResetClientTestSuite) TestActivityResetApi_WhileRunning() { // activity reset is called while activity is running - env := testcore.NewEnv(s.T()) - ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) defer cancel() @@ -160,51 +205,39 @@ func (s *ActivityApiResetClientTestSuite) TestActivityResetApi_WhileRunning() { var startedActivityCount atomic.Int32 activityFunction := func() (string, error) { startedActivityCount.Add(1) - env.WaitForChannel(activityCompleteCh) + s.WaitForChannel(ctx, activityCompleteCh) return "done!", nil } - workflowFn := s.makeWorkflowFunc(activityFunction, &temporal.RetryPolicy{ - InitialInterval: 1 * time.Second, - BackoffCoefficient: 1, - }) + workflowFn := s.makeWorkflowFunc(activityFunction) - env.SdkWorker().RegisterWorkflow(workflowFn) - env.SdkWorker().RegisterActivity(activityFunction) + s.SdkWorker().RegisterWorkflow(workflowFn) + s.SdkWorker().RegisterActivity(activityFunction) workflowOptions := sdkclient.StartWorkflowOptions{ - ID: testcore.RandomizeStr("wf_id-" + s.T().Name()), - TaskQueue: env.WorkerTaskQueue(), + ID: s.tv.WorkflowID(), + TaskQueue: s.TaskQueue(), } - workflowRun, err := env.SdkClient().ExecuteWorkflow(ctx, workflowOptions, workflowFn) + workflowRun, err := s.SdkClient().ExecuteWorkflow(ctx, workflowOptions, workflowFn) s.NoError(err) // wait for activity to start s.EventuallyWithT(func(t *assert.CollectT) { - description, err := env.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) require.NoError(t, err) require.Len(t, description.GetPendingActivities(), 1) require.Equal(t, enumspb.PENDING_ACTIVITY_STATE_STARTED, description.PendingActivities[0].State) }, 5*time.Second, 200*time.Millisecond) - resetRequest := &workflowservice.ResetActivityRequest{ - Namespace: env.Namespace().String(), - Execution: &commonpb.WorkflowExecution{ - WorkflowId: workflowRun.GetID(), - }, - Activity: &workflowservice.ResetActivityRequest_Id{Id: "activity-id"}, - } - resp, err := env.FrontendClient().ResetActivity(ctx, resetRequest) - s.NoError(err) - s.NotNil(resp) + s.NoError(s.resetFn(ctx, workflowRun.GetID(), "activity-id", false, false)) // wait a bit util.InterruptibleSleep(ctx, 1*time.Second) // check if workflow and activity are still running s.EventuallyWithT(func(t *assert.CollectT) { - description, err := env.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) require.NoError(t, err) require.Len(t, description.GetPendingActivities(), 1) require.Equal(t, enumspb.PENDING_ACTIVITY_STATE_STARTED, description.PendingActivities[0].State) @@ -226,7 +259,11 @@ func (s *ActivityApiResetClientTestSuite) TestActivityResetApi_WhileRunning() { func (s *ActivityApiResetClientTestSuite) TestActivityResetApi_InRetry() { // reset is called while activity is in retry - env := testcore.NewEnv(s.T()) + s.initialRetryInterval = 1 * time.Minute + s.activityRetryPolicy = &temporal.RetryPolicy{ + InitialInterval: s.initialRetryInterval, + BackoffCoefficient: 1, + } ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) defer cancel() @@ -242,50 +279,38 @@ func (s *ActivityApiResetClientTestSuite) TestActivityResetApi_InRetry() { return "", activityErr } - env.WaitForChannel(activityCompleteCh) + s.WaitForChannel(ctx, activityCompleteCh) return "done!", nil } - workflowFn := s.makeWorkflowFunc(activityFunction, &temporal.RetryPolicy{ - InitialInterval: 1 * time.Minute, - BackoffCoefficient: 1, - }) + workflowFn := s.makeWorkflowFunc(activityFunction) - env.SdkWorker().RegisterWorkflow(workflowFn) - env.SdkWorker().RegisterActivity(activityFunction) + s.SdkWorker().RegisterWorkflow(workflowFn) + s.SdkWorker().RegisterActivity(activityFunction) wfId := testcore.RandomizeStr("wf_id-" + s.T().Name()) workflowOptions := sdkclient.StartWorkflowOptions{ ID: wfId, - TaskQueue: env.WorkerTaskQueue(), + TaskQueue: s.TaskQueue(), } - workflowRun, err := env.SdkClient().ExecuteWorkflow(ctx, workflowOptions, workflowFn) + workflowRun, err := s.SdkClient().ExecuteWorkflow(ctx, workflowOptions, workflowFn) s.NoError(err) // wait for activity to start, fail and wait for retry s.EventuallyWithT(func(t *assert.CollectT) { - description, err := env.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) require.NoError(t, err) require.Len(t, description.PendingActivities, 1) require.Equal(t, enumspb.PENDING_ACTIVITY_STATE_SCHEDULED, description.PendingActivities[0].State) require.Equal(t, int32(1), startedActivityCount.Load()) }, 5*time.Second, 200*time.Millisecond) - resetRequest := &workflowservice.ResetActivityRequest{ - Namespace: env.Namespace().String(), - Execution: &commonpb.WorkflowExecution{ - WorkflowId: workflowRun.GetID(), - }, - Activity: &workflowservice.ResetActivityRequest_Id{Id: "activity-id"}, - } - resp, err := env.FrontendClient().ResetActivity(ctx, resetRequest) - s.NoError(err) - s.NotNil(resp) + s.NoError(s.resetFn(ctx, workflowRun.GetID(), "activity-id", false, false)) // wait for activity to start. Wait time is shorter than original retry interval s.EventuallyWithT(func(t *assert.CollectT) { - description, err := env.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) require.NoError(t, err) require.Len(t, description.GetPendingActivities(), 1) require.Equal(t, enumspb.PENDING_ACTIVITY_STATE_STARTED, description.PendingActivities[0].State) @@ -305,7 +330,11 @@ func (s *ActivityApiResetClientTestSuite) TestActivityResetApi_InRetry() { func (s *ActivityApiResetClientTestSuite) TestActivityResetApi_KeepPaused() { // reset is called while activity is in retry - env := testcore.NewEnv(s.T()) + s.initialRetryInterval = 1 * time.Minute + s.activityRetryPolicy = &temporal.RetryPolicy{ + InitialInterval: s.initialRetryInterval, + BackoffCoefficient: 1, + } ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) defer cancel() @@ -322,30 +351,27 @@ func (s *ActivityApiResetClientTestSuite) TestActivityResetApi_KeepPaused() { return "", activityErr } - env.WaitForChannel(activityCompleteCh) + s.WaitForChannel(ctx, activityCompleteCh) return "done!", nil } - workflowFn := s.makeWorkflowFunc(activityFunction, &temporal.RetryPolicy{ - InitialInterval: 1 * time.Minute, - BackoffCoefficient: 1, - }) + workflowFn := s.makeWorkflowFunc(activityFunction) - env.SdkWorker().RegisterWorkflow(workflowFn) - env.SdkWorker().RegisterActivity(activityFunction) + s.SdkWorker().RegisterWorkflow(workflowFn) + s.SdkWorker().RegisterActivity(activityFunction) wfId := testcore.RandomizeStr("wf_id-" + s.T().Name()) workflowOptions := sdkclient.StartWorkflowOptions{ ID: wfId, - TaskQueue: env.WorkerTaskQueue(), + TaskQueue: s.TaskQueue(), } - workflowRun, err := env.SdkClient().ExecuteWorkflow(ctx, workflowOptions, workflowFn) + workflowRun, err := s.SdkClient().ExecuteWorkflow(ctx, workflowOptions, workflowFn) s.NoError(err) // wait for activity to start, fail few times and wait for retry s.EventuallyWithT(func(t *assert.CollectT) { - description, err := env.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) require.NoError(t, err) require.Len(t, description.PendingActivities, 1) require.Equal(t, enumspb.PENDING_ACTIVITY_STATE_SCHEDULED, description.PendingActivities[0].State) @@ -354,19 +380,19 @@ func (s *ActivityApiResetClientTestSuite) TestActivityResetApi_KeepPaused() { // pause the activity pauseRequest := &workflowservice.PauseActivityRequest{ - Namespace: env.Namespace().String(), + Namespace: s.Namespace().String(), Execution: &commonpb.WorkflowExecution{ WorkflowId: workflowRun.GetID(), }, Activity: &workflowservice.PauseActivityRequest_Id{Id: "activity-id"}, } - pauseResp, err := env.FrontendClient().PauseActivity(ctx, pauseRequest) + pauseResp, err := s.FrontendClient().PauseActivity(ctx, pauseRequest) s.NoError(err) s.NotNil(pauseResp) // verify that activity is paused s.EventuallyWithT(func(t *assert.CollectT) { - description, err := env.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) require.NoError(t, err) require.NotNil(t, description) require.Len(t, description.GetPendingActivities(), 1) @@ -377,21 +403,11 @@ func (s *ActivityApiResetClientTestSuite) TestActivityResetApi_KeepPaused() { }, 5*time.Second, 100*time.Millisecond) // reset the activity, while keeping it paused - resetRequest := &workflowservice.ResetActivityRequest{ - Namespace: env.Namespace().String(), - Execution: &commonpb.WorkflowExecution{ - WorkflowId: workflowRun.GetID(), - }, - Activity: &workflowservice.ResetActivityRequest_Id{Id: "activity-id"}, - KeepPaused: true, - } - resp, err := env.FrontendClient().ResetActivity(ctx, resetRequest) - s.NoError(err) - s.NotNil(resp) + s.NoError(s.resetFn(ctx, workflowRun.GetID(), "activity-id", false, true)) // verify that activity is still paused, and reset s.EventuallyWithT(func(t *assert.CollectT) { - description, err := env.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) require.NoError(t, err) require.NotNil(t, description) require.Len(t, description.GetPendingActivities(), 1) @@ -405,13 +421,13 @@ func (s *ActivityApiResetClientTestSuite) TestActivityResetApi_KeepPaused() { // unpause the activity unpauseRequest := &workflowservice.UnpauseActivityRequest{ - Namespace: env.Namespace().String(), + Namespace: s.Namespace().String(), Execution: &commonpb.WorkflowExecution{ WorkflowId: workflowRun.GetID(), }, Activity: &workflowservice.UnpauseActivityRequest_Id{Id: "activity-id"}, } - unpauseResp, err := env.FrontendClient().UnpauseActivity(ctx, unpauseRequest) + unpauseResp, err := s.FrontendClient().UnpauseActivity(ctx, unpauseRequest) s.NoError(err) s.NotNil(unpauseResp) @@ -441,12 +457,6 @@ func (s *ActivityApiResetClientTestSuite) TestActivityReset_HeartbeatDetails() { // 2. First invocation of activity sets heartbeat details and fails upon request. // 3. Second invocation triggers waits to be triggered, and then send new heartbeat until requested to finish. // 6. Once workflow completes -- we're done. - env := testcore.NewEnv(s.T()) - - activityRetryPolicy := &temporal.RetryPolicy{ - InitialInterval: 1 * time.Second, - BackoffCoefficient: 1, - } activityCompleteCh := make(chan struct{}) var activityIteration atomic.Int32 @@ -462,7 +472,7 @@ func (s *ActivityApiResetClientTestSuite) TestActivityReset_HeartbeatDetails() { return "", errors.New("bad-luck-please-retry") } // not the first iteration - env.WaitForChannel(activityCompleteCh) + s.WaitForChannel(ctx, activityCompleteCh) for activityShouldFinish.Load() == false { activity.RecordHeartbeat(ctx, "second") time.Sleep(time.Second) //nolint:forbidigo @@ -476,25 +486,25 @@ func (s *ActivityApiResetClientTestSuite) TestActivityReset_HeartbeatDetails() { err := workflow.ExecuteActivity(workflow.WithActivityOptions(ctx, workflow.ActivityOptions{ ActivityID: activityId, DisableEagerExecution: true, - StartToCloseTimeout: 15 * time.Minute, - ScheduleToCloseTimeout: 30 * time.Minute, - RetryPolicy: activityRetryPolicy, + StartToCloseTimeout: s.startToCloseTimeout, + ScheduleToCloseTimeout: s.scheduleToCloseTimeout, + RetryPolicy: s.activityRetryPolicy, }), activityFn).Get(ctx, &ret) return ret, err } - env.SdkWorker().RegisterActivity(activityFn) - env.SdkWorker().RegisterWorkflow(workflowFn) + s.SdkWorker().RegisterActivity(activityFn) + s.SdkWorker().RegisterWorkflow(workflowFn) wfId := "functional-test-heartbeat-details-after-reset" workflowOptions := sdkclient.StartWorkflowOptions{ ID: wfId, - TaskQueue: env.WorkerTaskQueue(), + TaskQueue: s.TaskQueue(), WorkflowRunTimeout: 20 * time.Second, } ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) defer cancel() - workflowRun, err := env.SdkClient().ExecuteWorkflow(ctx, workflowOptions, workflowFn) + workflowRun, err := s.SdkClient().ExecuteWorkflow(ctx, workflowOptions, workflowFn) s.NoError(err) s.NotNil(workflowRun) @@ -503,7 +513,7 @@ func (s *ActivityApiResetClientTestSuite) TestActivityReset_HeartbeatDetails() { // make sure activity is running and sending heartbeats s.EventuallyWithT(func(t *assert.CollectT) { - description, err := env.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) require.NoError(t, err) require.Len(t, description.PendingActivities, 1) requirePayload(t, "first", description.PendingActivities[0].GetHeartbeatDetails()) @@ -511,25 +521,14 @@ func (s *ActivityApiResetClientTestSuite) TestActivityReset_HeartbeatDetails() { }, 5*time.Second, 500*time.Millisecond) // reset the activity, with heartbeats - resetRequest := &workflowservice.ResetActivityRequest{ - Namespace: env.Namespace().String(), - Execution: &commonpb.WorkflowExecution{ - WorkflowId: workflowRun.GetID(), - }, - Activity: &workflowservice.ResetActivityRequest_Id{Id: activityId}, - ResetHeartbeat: true, - } - - resp, err := env.FrontendClient().ResetActivity(ctx, resetRequest) - s.NoError(err) - s.NotNil(resp) + s.NoError(s.resetFn(ctx, workflowRun.GetID(), activityId, true, false)) activityIteration.Store(1) activityShouldBreak.Store(true) // wait for activity to fail and retried s.EventuallyWithT(func(t *assert.CollectT) { - description, err := env.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) require.NoError(t, err) require.Len(t, description.PendingActivities, 1) ap := description.PendingActivities[0] @@ -545,7 +544,7 @@ func (s *ActivityApiResetClientTestSuite) TestActivityReset_HeartbeatDetails() { // make sure activity is running and sending heartbeats s.EventuallyWithT(func(t *assert.CollectT) { - description, err := env.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) require.NoError(t, err) require.Equal(t, int32(1), activityIteration.Load()) require.Len(t, description.PendingActivities, 1) diff --git a/tests/activity_api_update_test.go b/tests/activity_api_update_test.go index 0afcdd54ef4..6874b3a50b6 100644 --- a/tests/activity_api_update_test.go +++ b/tests/activity_api_update_test.go @@ -16,7 +16,6 @@ import ( sdkclient "go.temporal.io/sdk/client" "go.temporal.io/sdk/temporal" "go.temporal.io/sdk/workflow" - "go.temporal.io/server/common/testing/parallelsuite" "go.temporal.io/server/tests/testcore" "google.golang.org/protobuf/types/known/durationpb" "google.golang.org/protobuf/types/known/fieldmaskpb" @@ -57,372 +56,396 @@ func makeActivityUpdateWorkflowFunc( } } -type ActivityAPIUpdateClientTestSuite struct { - parallelsuite.Suite[*ActivityAPIUpdateClientTestSuite] +// activityUpdateAPI abstracts UpdateActivityOptions/UpdateActivityExecutionOptions +// so the same test body can verify both APIs. +type activityUpdateAPI struct { + name string + update func(ctx context.Context, s *testcore.TestEnv, wfID, actID string, opts *activitypb.ActivityOptions, maskPaths []string, restoreOriginal bool) (*activitypb.ActivityOptions, error) } -func TestActivityAPIUpdateClientTestSuite(t *testing.T) { - parallelsuite.Run(t, &ActivityAPIUpdateClientTestSuite{}) -} - -func (s *ActivityAPIUpdateClientTestSuite) TestActivityUpdateApi_ChangeRetryInterval() { - env := testcore.NewEnv(s.T()) - - ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) - defer cancel() - - activityUpdated := make(chan struct{}) - - var startedActivityCount atomic.Int32 - activityFunction := func() (string, error) { - startedActivityCount.Add(1) - if startedActivityCount.Load() == 1 { - activityErr := errors.New("bad-luck-please-retry") - - return "", activityErr - } - - env.WaitForChannel(activityUpdated) - return "done!", nil - } - - scheduleToCloseTimeout := 30 * time.Minute - retryTimeout := 10 * time.Minute - workflowFn := makeActivityUpdateWorkflowFunc(activityFunction, scheduleToCloseTimeout, retryTimeout) - - env.SdkWorker().RegisterWorkflow(workflowFn) - env.SdkWorker().RegisterActivity(activityFunction) - - workflowOptions := sdkclient.StartWorkflowOptions{ - ID: activityUpdateWorkflowID, - TaskQueue: env.WorkerTaskQueue(), - } - - workflowRun, err := env.SdkClient().ExecuteWorkflow(ctx, workflowOptions, workflowFn) - s.NoError(err) - - s.EventuallyWithT(func(t *assert.CollectT) { - description, err := env.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) - require.NoError(t, err) - require.Len(t, description.GetPendingActivities(), 1) - require.Equal(t, int32(1), startedActivityCount.Load()) - }, 10*time.Second, 500*time.Millisecond) - - updateRequest := &workflowservice.UpdateActivityOptionsRequest{ - Namespace: env.Namespace().String(), - Execution: &commonpb.WorkflowExecution{ - WorkflowId: workflowRun.GetID(), - }, - Activity: &workflowservice.UpdateActivityOptionsRequest_Id{Id: "activity-id"}, - ActivityOptions: &activitypb.ActivityOptions{ - RetryPolicy: &commonpb.RetryPolicy{ - InitialInterval: durationpb.New(1 * time.Second), +func updateAPIs() []activityUpdateAPI { + return []activityUpdateAPI{ + { + name: "legacy-api", + update: func(ctx context.Context, s *testcore.TestEnv, wfID, actID string, opts *activitypb.ActivityOptions, maskPaths []string, restoreOriginal bool) (*activitypb.ActivityOptions, error) { + resp, err := s.FrontendClient().UpdateActivityOptions(ctx, &workflowservice.UpdateActivityOptionsRequest{ + Namespace: s.Namespace().String(), + Execution: &commonpb.WorkflowExecution{WorkflowId: wfID}, + Activity: &workflowservice.UpdateActivityOptionsRequest_Id{Id: actID}, + ActivityOptions: opts, + UpdateMask: &fieldmaskpb.FieldMask{Paths: maskPaths}, + RestoreOriginal: restoreOriginal, + }) + if err != nil { + return nil, err + } + return resp.GetActivityOptions(), nil }, }, - UpdateMask: &fieldmaskpb.FieldMask{Paths: []string{"retry_policy.initial_interval"}}, - } - resp, err := env.FrontendClient().UpdateActivityOptions(ctx, updateRequest) - s.NoError(err) - s.NotNil(resp) - - description, err := env.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) - s.NoError(err) - s.Len(description.PendingActivities, 1) - - activityUpdated <- struct{}{} - - s.EventuallyWithT(func(t *assert.CollectT) { - description, err = env.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) - require.NoError(t, err) - require.Empty(t, description.GetPendingActivities()) - require.Equal(t, int32(2), startedActivityCount.Load()) - }, 3*time.Second, 100*time.Millisecond) - - var out string - err = workflowRun.Get(ctx, &out) - - s.NoError(err) -} - -func (s *ActivityAPIUpdateClientTestSuite) TestActivityUpdateApi_ChangeScheduleToClose() { - env := testcore.NewEnv(s.T()) - - ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) - defer cancel() - - var startedActivityCount atomic.Int32 - activityFunction := func() (string, error) { - startedActivityCount.Add(1) - if startedActivityCount.Load() == 1 { - activityErr := errors.New("bad-luck-please-retry") - return "", activityErr - } - return "done!", nil - } - - scheduleToCloseTimeout := 30 * time.Minute - retryTimeout := 10 * time.Minute - - workflowFn := makeActivityUpdateWorkflowFunc(activityFunction, scheduleToCloseTimeout, retryTimeout) - - env.SdkWorker().RegisterWorkflow(workflowFn) - env.SdkWorker().RegisterActivity(activityFunction) - - workflowOptions := sdkclient.StartWorkflowOptions{ - ID: activityUpdateWorkflowID, - TaskQueue: env.WorkerTaskQueue(), - } - - workflowRun, err := env.SdkClient().ExecuteWorkflow(ctx, workflowOptions, workflowFn) - s.NoError(err) - - // wait for activity to start (and fail) - s.EventuallyWithT(func(t *assert.CollectT) { - description, err := env.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) - require.NoError(t, err) - require.Len(t, description.GetPendingActivities(), 1) - require.Equal(t, int32(1), startedActivityCount.Load()) - - }, 2*time.Second, 200*time.Millisecond) - - // update schedule_to_close_timeout - updateRequest := &workflowservice.UpdateActivityOptionsRequest{ - Namespace: env.Namespace().String(), - Execution: &commonpb.WorkflowExecution{ - WorkflowId: workflowRun.GetID(), - }, - Activity: &workflowservice.UpdateActivityOptionsRequest_Id{Id: "activity-id"}, - ActivityOptions: &activitypb.ActivityOptions{ - ScheduleToCloseTimeout: durationpb.New(1 * time.Second), - }, - UpdateMask: &fieldmaskpb.FieldMask{Paths: []string{"schedule_to_close_timeout"}}, - } - resp, err := env.FrontendClient().UpdateActivityOptions(ctx, updateRequest) - s.NoError(err) - s.NotNil(resp) - - // activity should fail immediately - s.EventuallyWithT(func(t *assert.CollectT) { - description, err := env.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) - require.NoError(t, err) - require.Empty(t, description.GetPendingActivities()) - require.Equal(t, int32(1), startedActivityCount.Load()) - }, 2*time.Second, 200*time.Millisecond) - - var out string - err = workflowRun.Get(ctx, &out) - var activityError *temporal.ActivityError - s.ErrorAs(err, &activityError) - // SCHEDULE_TO_CLOSE timeout now returns RETRY_STATE_TIMEOUT instead of RETRY_STATE_NON_RETRYABLE_FAILURE - s.Equal(enumspb.RETRY_STATE_TIMEOUT, activityError.RetryState()) - var timeoutError *temporal.TimeoutError - s.ErrorAs(activityError, &timeoutError) - s.Equal(enumspb.TIMEOUT_TYPE_SCHEDULE_TO_CLOSE, timeoutError.TimeoutType()) - s.Equal(int32(1), startedActivityCount.Load()) -} - -func (s *ActivityAPIUpdateClientTestSuite) TestActivityUpdateApi_ChangeScheduleToCloseAndRetry() { - // change both schedule to close and retry policy - // initial values are chosen in such a way that activity will fail due to schedule to close timeout - // we change schedule to close to a longer value and retry policy to a shorter value - // after that activity should succeed - env := testcore.NewEnv(s.T()) - - ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) - defer cancel() - - var startedActivityCount atomic.Int32 - activityFunction := func() (string, error) { - startedActivityCount.Add(1) - if startedActivityCount.Load() == 1 { - activityErr := errors.New("bad-luck-please-retry") - - return "", activityErr - } - return "done!", nil - } - - // make scheduleToClose shorter than retry 2nd retry interval - scheduleToCloseTimeout := 8 * time.Second - retryInterval := 5 * time.Second - - workflowFn := makeActivityUpdateWorkflowFunc( - activityFunction, scheduleToCloseTimeout, retryInterval) - - env.SdkWorker().RegisterWorkflow(workflowFn) - env.SdkWorker().RegisterActivity(activityFunction) - - workflowOptions := sdkclient.StartWorkflowOptions{ - ID: activityUpdateWorkflowID, - TaskQueue: env.WorkerTaskQueue(), - } - - workflowRun, err := env.SdkClient().ExecuteWorkflow(ctx, workflowOptions, workflowFn) - s.NoError(err) - - // wait for activity to start (and fail) - s.EventuallyWithT(func(t *assert.CollectT) { - require.NotZero(t, startedActivityCount.Load()) - }, 2*time.Second, 200*time.Millisecond) - - // update schedule_to_close_timeout, make it longer - // also update retry policy interval, make it shorter - newScheduleToCloseTimeout := 10 * time.Second - updateRequest := &workflowservice.UpdateActivityOptionsRequest{ - Namespace: env.Namespace().String(), - Execution: &commonpb.WorkflowExecution{ - WorkflowId: workflowRun.GetID(), - }, - Activity: &workflowservice.UpdateActivityOptionsRequest_Id{Id: "activity-id"}, - ActivityOptions: &activitypb.ActivityOptions{ - ScheduleToCloseTimeout: durationpb.New(newScheduleToCloseTimeout), - RetryPolicy: &commonpb.RetryPolicy{ - InitialInterval: durationpb.New(1 * time.Second), + { + name: "execution-api", + update: func(ctx context.Context, s *testcore.TestEnv, wfID, actID string, opts *activitypb.ActivityOptions, maskPaths []string, restoreOriginal bool) (*activitypb.ActivityOptions, error) { + resp, err := s.FrontendClient().UpdateActivityExecutionOptions(ctx, &workflowservice.UpdateActivityExecutionOptionsRequest{ + Namespace: s.Namespace().String(), + WorkflowId: wfID, + ActivityId: actID, + ActivityOptions: opts, + UpdateMask: &fieldmaskpb.FieldMask{Paths: maskPaths}, + RestoreOriginal: restoreOriginal, + }) + if err != nil { + return nil, err + } + return resp.GetActivityOptions(), nil }, }, - UpdateMask: &fieldmaskpb.FieldMask{Paths: []string{"schedule_to_close_timeout", "retry_policy.initial_interval"}}, } - - resp, err := env.FrontendClient().UpdateActivityOptions(ctx, updateRequest) - s.NoError(err) - s.NotNil(resp) - // check that the update was successful - s.Equal(int64(newScheduleToCloseTimeout.Seconds()), resp.GetActivityOptions().ScheduleToCloseTimeout.GetSeconds()) - // check that field we didn't update is the same - s.Equal(int64(scheduleToCloseTimeout.Seconds()), resp.GetActivityOptions().StartToCloseTimeout.GetSeconds()) - - // now activity should succeed - s.EventuallyWithT(func(t *assert.CollectT) { - description, err := env.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) - require.NoError(t, err) - require.Empty(t, description.GetPendingActivities()) - require.Equal(t, int32(2), startedActivityCount.Load()) - }, 5*time.Second, 200*time.Millisecond) - - var out string - err = workflowRun.Get(ctx, &out) - s.NoError(err) } -func (s *ActivityAPIUpdateClientTestSuite) TestActivityUpdateApi_ResetDefaultOptions() { - // plan: - // 1. start the workflow, wait for activity to start and fail, - // 2. update activity options to change retry policy maximum attempts - // 3. reset activity options to default, verify that retry policy is reset to default - // 4. update activity options again, this time change schedule to close timeout and retry policy initial interval - // 5. let activity finish, verify that it finished with updated options - env := testcore.NewEnv(s.T()) - - ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) - defer cancel() - - activityUpdated := make(chan struct{}) - - var startedActivityCount atomic.Int32 - activityFunction := func() (string, error) { - startedActivityCount.Add(1) - if startedActivityCount.Load() == 1 { - activityErr := errors.New("bad-luck-please-retry") - - return "", activityErr - } - - env.WaitForChannel(activityUpdated) - return "done!", nil - } +func TestActivityApiUpdateClientTestSuite(t *testing.T) { + t.Parallel() + + for _, api := range updateAPIs() { + api := api + t.Run(api.name, func(t *testing.T) { + t.Parallel() + + t.Run("TestActivityUpdateApi_ChangeRetryInterval", func(t *testing.T) { + s := testcore.NewEnv(t, testcore.WithSdkWorker()) + + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() - scheduleToCloseTimeout := 30 * time.Minute - retryTimeout := 10 * time.Minute - workflowFn := makeActivityUpdateWorkflowFunc(activityFunction, scheduleToCloseTimeout, retryTimeout) + activityUpdated := make(chan struct{}) + + var startedActivityCount atomic.Int32 + activityFunction := func() (string, error) { + startedActivityCount.Add(1) + if startedActivityCount.Load() == 1 { + activityErr := errors.New("bad-luck-please-retry") - env.SdkWorker().RegisterWorkflow(workflowFn) - env.SdkWorker().RegisterActivity(activityFunction) - - workflowOptions := sdkclient.StartWorkflowOptions{ - ID: activityUpdateWorkflowID, - TaskQueue: env.WorkerTaskQueue(), - } - - workflowRun, err := env.SdkClient().ExecuteWorkflow(ctx, workflowOptions, workflowFn) - s.NoError(err) - - // wait for activity to start (and fail) - s.EventuallyWithT(func(t *assert.CollectT) { - description, err := env.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) - require.NoError(t, err) - require.Len(t, description.GetPendingActivities(), 1) - require.Equal(t, int32(1), startedActivityCount.Load()) - }, 10*time.Second, 500*time.Millisecond) - - // update activity options, set retry policy to 1000 attempts - updateRequest := &workflowservice.UpdateActivityOptionsRequest{ - Namespace: env.Namespace().String(), - Execution: &commonpb.WorkflowExecution{ - WorkflowId: workflowRun.GetID(), - }, - Activity: &workflowservice.UpdateActivityOptionsRequest_Id{Id: "activity-id"}, - ActivityOptions: &activitypb.ActivityOptions{ - RetryPolicy: &commonpb.RetryPolicy{ - MaximumAttempts: 1000, - }, - }, - UpdateMask: &fieldmaskpb.FieldMask{Paths: []string{"retry_policy.maximum_attempts"}}, - } - resp, err := env.FrontendClient().UpdateActivityOptions(ctx, updateRequest) - s.NoError(err) - s.NotNil(resp) - - // check that the update was successful - s.EventuallyWithT(func(t *assert.CollectT) { - description, err := env.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) - require.NoError(t, err) - require.Len(t, description.PendingActivities, 1) - require.Equal(t, int32(1000), description.PendingActivities[0].GetActivityOptions().GetRetryPolicy().GetMaximumAttempts()) - }, 3*time.Second, 200*time.Millisecond) - - // reset activity options to default - updateRequest.ActivityOptions = nil - updateRequest.UpdateMask = &fieldmaskpb.FieldMask{Paths: []string{}} - updateRequest.RestoreOriginal = true - resp, err = env.FrontendClient().UpdateActivityOptions(ctx, updateRequest) - s.NoError(err) - s.NotNil(resp) - - // check that the update was successful - s.EventuallyWithT(func(t *assert.CollectT) { - description, err := env.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) - require.NoError(t, err) - require.Len(t, description.PendingActivities, 1) - require.Equal(t, int32(defaultMaximumAttempts), description.PendingActivities[0].GetActivityOptions().GetRetryPolicy().GetMaximumAttempts()) - }, 3*time.Second, 200*time.Millisecond) - - // update activity options again, this time set retry interval to 1 second - newScheduleToCloseTimeout := 10 * time.Second - updateRequest.ActivityOptions = &activitypb.ActivityOptions{ - ScheduleToCloseTimeout: durationpb.New(newScheduleToCloseTimeout), - RetryPolicy: &commonpb.RetryPolicy{ - InitialInterval: durationpb.New(1 * time.Second), - }, + return "", activityErr + } + + s.WaitForChannel(ctx, activityUpdated) + return "done!", nil + } + + scheduleToCloseTimeout := 30 * time.Minute + retryTimeout := 10 * time.Minute + workflowFn := makeActivityUpdateWorkflowFunc(activityFunction, scheduleToCloseTimeout, retryTimeout) + + s.SdkWorker().RegisterWorkflow(workflowFn) + s.SdkWorker().RegisterActivity(activityFunction) + + workflowOptions := sdkclient.StartWorkflowOptions{ + ID: activityUpdateWorkflowID, + TaskQueue: s.WorkerTaskQueue(), + } + + workflowRun, err := s.SdkClient().ExecuteWorkflow(ctx, workflowOptions, workflowFn) + s.NoError(err) + + s.EventuallyWithT(func(t *assert.CollectT) { + description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + require.NoError(t, err) + require.Len(t, description.GetPendingActivities(), 1) + require.Equal(t, int32(1), startedActivityCount.Load()) + }, 10*time.Second, 500*time.Millisecond) + + _, err = api.update(ctx, s, workflowRun.GetID(), "activity-id", + &activitypb.ActivityOptions{ + RetryPolicy: &commonpb.RetryPolicy{ + InitialInterval: durationpb.New(1 * time.Second), + }, + }, + []string{"retry_policy.initial_interval"}, + false, + ) + s.NoError(err) + + description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + s.NoError(err) + s.Len(description.PendingActivities, 1) + + activityUpdated <- struct{}{} + + s.EventuallyWithT(func(t *assert.CollectT) { + description, err = s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + require.NoError(t, err) + require.Empty(t, description.GetPendingActivities()) + require.Equal(t, int32(2), startedActivityCount.Load()) + }, 3*time.Second, 100*time.Millisecond) + + var out string + err = workflowRun.Get(ctx, &out) + + s.NoError(err) + }) + + t.Run("TestActivityUpdateApi_ChangeScheduleToClose", func(t *testing.T) { + s := testcore.NewEnv(t, testcore.WithSdkWorker()) + + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + var startedActivityCount atomic.Int32 + activityFunction := func() (string, error) { + startedActivityCount.Add(1) + if startedActivityCount.Load() == 1 { + activityErr := errors.New("bad-luck-please-retry") + return "", activityErr + } + return "done!", nil + } + + scheduleToCloseTimeout := 30 * time.Minute + retryTimeout := 10 * time.Minute + + workflowFn := makeActivityUpdateWorkflowFunc(activityFunction, scheduleToCloseTimeout, retryTimeout) + + s.SdkWorker().RegisterWorkflow(workflowFn) + s.SdkWorker().RegisterActivity(activityFunction) + + workflowOptions := sdkclient.StartWorkflowOptions{ + ID: activityUpdateWorkflowID, + TaskQueue: s.WorkerTaskQueue(), + } + + workflowRun, err := s.SdkClient().ExecuteWorkflow(ctx, workflowOptions, workflowFn) + s.NoError(err) + + // wait for activity to start (and fail) + s.EventuallyWithT(func(t *assert.CollectT) { + description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + require.NoError(t, err) + require.Len(t, description.GetPendingActivities(), 1) + require.Equal(t, int32(1), startedActivityCount.Load()) + + }, 2*time.Second, 200*time.Millisecond) + + // update schedule_to_close_timeout + _, err = api.update(ctx, s, workflowRun.GetID(), "activity-id", + &activitypb.ActivityOptions{ + ScheduleToCloseTimeout: durationpb.New(1 * time.Second), + }, + []string{"schedule_to_close_timeout"}, + false, + ) + s.NoError(err) + + // activity should fail immediately + s.EventuallyWithT(func(t *assert.CollectT) { + description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + require.NoError(t, err) + require.Empty(t, description.GetPendingActivities()) + require.Equal(t, int32(1), startedActivityCount.Load()) + }, 2*time.Second, 200*time.Millisecond) + + var out string + err = workflowRun.Get(ctx, &out) + var activityError *temporal.ActivityError + s.ErrorAs(err, &activityError) + // SCHEDULE_TO_CLOSE timeout now returns RETRY_STATE_TIMEOUT instead of RETRY_STATE_NON_RETRYABLE_FAILURE + s.Equal(enumspb.RETRY_STATE_TIMEOUT, activityError.RetryState()) + var timeoutError *temporal.TimeoutError + s.ErrorAs(activityError, &timeoutError) + s.Equal(enumspb.TIMEOUT_TYPE_SCHEDULE_TO_CLOSE, timeoutError.TimeoutType()) + s.Equal(int32(1), startedActivityCount.Load()) + }) + + t.Run("TestActivityUpdateApi_ChangeScheduleToCloseAndRetry", func(t *testing.T) { + // change both schedule to close and retry policy + // initial values are chosen in such a way that activity will fail due to schedule to close timeout + // we change schedule to close to a longer value and retry policy to a shorter value + // after that activity should succeed + s := testcore.NewEnv(t, testcore.WithSdkWorker()) + + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + var startedActivityCount atomic.Int32 + activityFunction := func() (string, error) { + startedActivityCount.Add(1) + if startedActivityCount.Load() == 1 { + activityErr := errors.New("bad-luck-please-retry") + + return "", activityErr + } + return "done!", nil + } + + // make scheduleToClose shorter than retry 2nd retry interval + scheduleToCloseTimeout := 8 * time.Second + retryInterval := 5 * time.Second + + workflowFn := makeActivityUpdateWorkflowFunc( + activityFunction, scheduleToCloseTimeout, retryInterval) + + s.SdkWorker().RegisterWorkflow(workflowFn) + s.SdkWorker().RegisterActivity(activityFunction) + + workflowOptions := sdkclient.StartWorkflowOptions{ + ID: activityUpdateWorkflowID, + TaskQueue: s.WorkerTaskQueue(), + } + + workflowRun, err := s.SdkClient().ExecuteWorkflow(ctx, workflowOptions, workflowFn) + s.NoError(err) + + // wait for activity to start (and fail) + s.EventuallyWithT(func(t *assert.CollectT) { + require.NotZero(t, startedActivityCount.Load()) + }, 2*time.Second, 200*time.Millisecond) + + // update schedule_to_close_timeout, make it longer + // also update retry policy interval, make it shorter + newScheduleToCloseTimeout := 10 * time.Second + respOpts, err := api.update(ctx, s, workflowRun.GetID(), "activity-id", + &activitypb.ActivityOptions{ + ScheduleToCloseTimeout: durationpb.New(newScheduleToCloseTimeout), + RetryPolicy: &commonpb.RetryPolicy{ + InitialInterval: durationpb.New(1 * time.Second), + }, + }, + []string{"schedule_to_close_timeout", "retry_policy.initial_interval"}, + false, + ) + s.NoError(err) + // check that the update was successful + s.Equal(int64(newScheduleToCloseTimeout.Seconds()), respOpts.ScheduleToCloseTimeout.GetSeconds()) + // check that field we didn't update is the same + s.Equal(int64(scheduleToCloseTimeout.Seconds()), respOpts.StartToCloseTimeout.GetSeconds()) + + // now activity should succeed + s.EventuallyWithT(func(t *assert.CollectT) { + description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + require.NoError(t, err) + require.Empty(t, description.GetPendingActivities()) + require.Equal(t, int32(2), startedActivityCount.Load()) + }, 5*time.Second, 200*time.Millisecond) + + var out string + err = workflowRun.Get(ctx, &out) + s.NoError(err) + }) + + t.Run("TestActivityUpdateApi_ResetDefaultOptions", func(t *testing.T) { + // plan: + // 1. start the workflow, wait for activity to start and fail, + // 2. update activity options to change retry policy maximum attempts + // 3. reset activity options to default, verify that retry policy is reset to default + // 4. update activity options again, this time change schedule to close timeout and retry policy initial interval + // 5. let activity finish, verify that it finished with updated options + s := testcore.NewEnv(t, testcore.WithSdkWorker()) + + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + activityUpdated := make(chan struct{}) + + var startedActivityCount atomic.Int32 + activityFunction := func() (string, error) { + startedActivityCount.Add(1) + if startedActivityCount.Load() == 1 { + activityErr := errors.New("bad-luck-please-retry") + + return "", activityErr + } + + s.WaitForChannel(ctx, activityUpdated) + return "done!", nil + } + + scheduleToCloseTimeout := 30 * time.Minute + retryTimeout := 10 * time.Minute + workflowFn := makeActivityUpdateWorkflowFunc(activityFunction, scheduleToCloseTimeout, retryTimeout) + + s.SdkWorker().RegisterWorkflow(workflowFn) + s.SdkWorker().RegisterActivity(activityFunction) + + workflowOptions := sdkclient.StartWorkflowOptions{ + ID: activityUpdateWorkflowID, + TaskQueue: s.WorkerTaskQueue(), + } + + workflowRun, err := s.SdkClient().ExecuteWorkflow(ctx, workflowOptions, workflowFn) + s.NoError(err) + + // wait for activity to start (and fail) + s.EventuallyWithT(func(t *assert.CollectT) { + description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + require.NoError(t, err) + require.Len(t, description.GetPendingActivities(), 1) + require.Equal(t, int32(1), startedActivityCount.Load()) + }, 10*time.Second, 500*time.Millisecond) + + // update activity options, set retry policy to 1000 attempts + _, err = api.update(ctx, s, workflowRun.GetID(), "activity-id", + &activitypb.ActivityOptions{ + RetryPolicy: &commonpb.RetryPolicy{ + MaximumAttempts: 1000, + }, + }, + []string{"retry_policy.maximum_attempts"}, + false, + ) + s.NoError(err) + + // check that the update was successful + s.EventuallyWithT(func(t *assert.CollectT) { + description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + require.NoError(t, err) + require.Len(t, description.PendingActivities, 1) + require.Equal(t, int32(1000), description.PendingActivities[0].GetActivityOptions().GetRetryPolicy().GetMaximumAttempts()) + }, 3*time.Second, 200*time.Millisecond) + + // reset activity options to default + _, err = api.update(ctx, s, workflowRun.GetID(), "activity-id", + nil, + []string{}, + true, + ) + s.NoError(err) + + // check that the reset was successful + s.EventuallyWithT(func(t *assert.CollectT) { + description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + require.NoError(t, err) + require.Len(t, description.PendingActivities, 1) + require.Equal(t, int32(defaultMaximumAttempts), description.PendingActivities[0].GetActivityOptions().GetRetryPolicy().GetMaximumAttempts()) + }, 3*time.Second, 200*time.Millisecond) + + // update activity options again, this time set retry interval to 1 second + newScheduleToCloseTimeout := 10 * time.Second + _, err = api.update(ctx, s, workflowRun.GetID(), "activity-id", + &activitypb.ActivityOptions{ + ScheduleToCloseTimeout: durationpb.New(newScheduleToCloseTimeout), + RetryPolicy: &commonpb.RetryPolicy{ + InitialInterval: durationpb.New(1 * time.Second), + }, + }, + []string{"schedule_to_close_timeout", "retry_policy.initial_interval"}, + false, + ) + s.NoError(err) + + // let activity finish + activityUpdated <- struct{}{} + + // wait for activity to finish + s.EventuallyWithT(func(t *assert.CollectT) { + description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + require.NoError(t, err) + require.Empty(t, description.GetPendingActivities()) + require.Equal(t, int32(2), startedActivityCount.Load()) + }, 3*time.Second, 100*time.Millisecond) + + var out string + err = workflowRun.Get(ctx, &out) + + s.NoError(err) + }) + }) } - updateRequest.UpdateMask = &fieldmaskpb.FieldMask{Paths: []string{"schedule_to_close_timeout", "retry_policy.initial_interval"}} - updateRequest.RestoreOriginal = false - resp, err = env.FrontendClient().UpdateActivityOptions(ctx, updateRequest) - s.NoError(err) - s.NotNil(resp) - - // let activity finish - activityUpdated <- struct{}{} - - // wait for activity to finish - s.EventuallyWithT(func(t *assert.CollectT) { - description, err := env.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) - require.NoError(t, err) - require.Empty(t, description.GetPendingActivities()) - require.Equal(t, int32(2), startedActivityCount.Load()) - }, 3*time.Second, 100*time.Millisecond) - - var out string - err = workflowRun.Get(ctx, &out) - - s.NoError(err) } diff --git a/tests/standalone_activity_test.go b/tests/standalone_activity_test.go index 49ce3fe4dc4..991b6dbe9d1 100644 --- a/tests/standalone_activity_test.go +++ b/tests/standalone_activity_test.go @@ -4,12 +4,10 @@ import ( "context" "errors" "fmt" - "io" - "net/http/httptest" "testing" "time" - "github.com/nexus-rpc/sdk-go/nexus" + "github.com/google/go-cmp/cmp" "github.com/stretchr/testify/require" activitypb "go.temporal.io/api/activity/v1" commonpb "go.temporal.io/api/common/v1" @@ -20,13 +18,9 @@ import ( "go.temporal.io/api/serviceerror" taskqueuepb "go.temporal.io/api/taskqueue/v1" "go.temporal.io/api/workflowservice/v1" - "go.temporal.io/sdk/temporal" "go.temporal.io/server/chasm/lib/activity" - "go.temporal.io/server/chasm/lib/callback" "go.temporal.io/server/common/dynamicconfig" "go.temporal.io/server/common/log" - commonnexus "go.temporal.io/server/common/nexus" - "go.temporal.io/server/common/nexus/nexusrpc" "go.temporal.io/server/common/payload" "go.temporal.io/server/common/payloads" "go.temporal.io/server/common/tasktoken" @@ -36,6 +30,7 @@ import ( "go.temporal.io/server/tests/testcore" "google.golang.org/grpc/codes" "google.golang.org/protobuf/types/known/durationpb" + "google.golang.org/protobuf/types/known/fieldmaskpb" "google.golang.org/protobuf/types/known/timestamppb" ) @@ -230,7 +225,7 @@ func (s *standaloneActivityTestSuite) TestIDConflictPolicy() { }) t.Run("UseExisting", func(t *testing.T) { - originalActivityID := testcore.RandomizeStr(t.Name()) + activityID := testcore.RandomizeStr(t.Name()) taskQueue := testcore.RandomizeStr(t.Name()) firstStartResp := env.startAndValidateActivity(ctx, t, originalActivityID, taskQueue) @@ -1402,7 +1397,6 @@ func (s *standaloneActivityTestSuite) TestRequestCancel() { t.Run("ByToken", func(t *testing.T) { activityID := testcore.RandomizeStr(t.Name()) taskQueue := testcore.RandomizeStr(t.Name()) - identity := "client-that-requested-cancellation" startResp := env.startAndValidateActivity(ctx, t, activityID, taskQueue) runID := startResp.RunId @@ -1457,7 +1451,6 @@ func (s *standaloneActivityTestSuite) TestRequestCancel() { require.Greater(t, info.GetExecutionDuration().AsDuration(), time.Duration(0)) require.NotNil(t, info.GetCloseTime()) protorequire.ProtoEqual(t, details, activityResp.GetOutcome().GetFailure().GetCanceledFailureInfo().GetDetails()) - require.Equal(t, identity, activityResp.GetOutcome().GetFailure().GetCanceledFailureInfo().GetIdentity()) }) testByIDCases := []struct { @@ -1481,7 +1474,6 @@ func (s *standaloneActivityTestSuite) TestRequestCancel() { activityID := testcore.RandomizeStr(tc.name) taskQueue := testcore.RandomizeStr(tc.name) - identity := "client-that-requested-cancellation" startResp := env.startAndValidateActivity(ctx, t, activityID, taskQueue) runID := startResp.RunId @@ -1541,7 +1533,6 @@ func (s *standaloneActivityTestSuite) TestRequestCancel() { require.Equal(t, "Test Cancellation", info.GetCanceledReason()) require.Equal(t, int64(1), info.GetTotalHeartbeatCount(), "total heartbeat count") protorequire.ProtoEqual(t, details, activityResp.GetOutcome().GetFailure().GetCanceledFailureInfo().GetDetails()) - require.Equal(t, identity, activityResp.GetOutcome().GetFailure().GetCanceledFailureInfo().GetIdentity()) }) } @@ -1657,7 +1648,7 @@ func (s *standaloneActivityTestSuite) TestRequestCancel() { Namespace: env.Namespace().String(), ActivityId: activityID, RunId: runID, - Identity: "client-that-requested-cancellation", + Identity: "cancelling-worker", RequestId: "cancel-request-id", Reason: "Test Cancellation", }) @@ -1693,7 +1684,6 @@ func (s *standaloneActivityTestSuite) TestRequestCancel() { t.Run("DifferentRequestIDFails", func(t *testing.T) { activityID := testcore.RandomizeStr(t.Name()) taskQueue := testcore.RandomizeStr(t.Name()) - identity := "client-that-requested-cancellation" startResp := env.startAndValidateActivity(ctx, t, activityID, taskQueue) runID := startResp.RunId @@ -1704,7 +1694,7 @@ func (s *standaloneActivityTestSuite) TestRequestCancel() { Namespace: env.Namespace().String(), ActivityId: activityID, RunId: runID, - Identity: identity, + Identity: "cancelling-worker", RequestId: "cancel-request-id", Reason: "Test Cancellation", }) @@ -1714,7 +1704,7 @@ func (s *standaloneActivityTestSuite) TestRequestCancel() { Namespace: env.Namespace().String(), ActivityId: activityID, RunId: runID, - Identity: identity, + Identity: "cancelling-worker", RequestId: "different-cancel-request-id", Reason: "Test Cancellation", }) @@ -1888,7 +1878,7 @@ func (s *standaloneActivityTestSuite) TestRequestCancel() { _, err := env.FrontendClient().RequestCancelActivityExecution(ctx, &workflowservice.RequestCancelActivityExecutionRequest{ Namespace: env.Namespace().String(), Reason: "Test Cancellation", - Identity: "client-that-requested-cancellation", + Identity: "cancelling-worker", }) var invalidArgErr *serviceerror.InvalidArgument @@ -1901,7 +1891,7 @@ func (s *standaloneActivityTestSuite) TestRequestCancel() { ActivityId: string(make([]byte, defaultMaxIDLengthLimit+1)), // dynamic config default is 1000 Namespace: env.Namespace().String(), Reason: "Test Cancellation", - Identity: "client-that-requested-cancellation", + Identity: "cancelling-worker", }) var invalidArgErr *serviceerror.InvalidArgument @@ -1916,7 +1906,7 @@ func (s *standaloneActivityTestSuite) TestRequestCancel() { RequestId: string(make([]byte, defaultMaxIDLengthLimit+1)), // dynamic config default is 1000 Namespace: env.Namespace().String(), Reason: "Test Cancellation", - Identity: "client-that-requested-cancellation", + Identity: "cancelling-worker", }) var invalidArgErr *serviceerror.InvalidArgument @@ -1945,7 +1935,7 @@ func (s *standaloneActivityTestSuite) TestRequestCancel() { RunId: "invalid-run-id", Namespace: env.Namespace().String(), Reason: "Test Cancellation", - Identity: "client-that-requested-cancellation", + Identity: "cancelling-worker", }) var invalidArgErr *serviceerror.InvalidArgument @@ -1965,7 +1955,7 @@ func (s *standaloneActivityTestSuite) TestRequestCancel() { ActivityId: testcore.RandomizeStr(t.Name()), Namespace: env.Namespace().String(), Reason: string(make([]byte, blobSizeLimitError+1)), - Identity: "client-that-requested-cancellation", + Identity: "cancelling-worker", }) var invalidArgErr *serviceerror.InvalidArgument @@ -2005,8 +1995,6 @@ func (s *standaloneActivityTestSuite) TestRequestCancel() { }) t.Run("StaleToken", func(t *testing.T) { - ctx, cancel := context.WithTimeout(t.Context(), 10*time.Second) - t.Cleanup(cancel) activityID := testcore.RandomizeStr(t.Name()) taskQueue := testcore.RandomizeStr(t.Name()) _, err := env.startActivity(ctx, activityID, taskQueue) @@ -2037,8 +2025,6 @@ func (s *standaloneActivityTestSuite) TestRequestCancel() { }) t.Run("StaleAttemptToken", func(t *testing.T) { - ctx, cancel := context.WithTimeout(t.Context(), 10*time.Second) - t.Cleanup(cancel) // Start an activity with retries, fail first attempt, then try to complete with old token. // Use NextRetryDelay=1s to ensure the retry dispatch happens within test timeout. activityID := testcore.RandomizeStr(t.Name()) @@ -2122,8 +2108,6 @@ func (s *standaloneActivityTestSuite) TestRequestCancel() { }) t.Run("MismatchedTokenNamespace", func(t *testing.T) { - ctx, cancel := context.WithTimeout(t.Context(), 10*time.Second) - t.Cleanup(cancel) activityID := testcore.RandomizeStr(t.Name()) taskQueue := testcore.RandomizeStr(t.Name()) existingNamespace := env.Namespace().String() @@ -2165,8 +2149,6 @@ func (s *standaloneActivityTestSuite) TestRequestCancel() { // The validation ensures that the namespace in the request matches the namespace in the token's // ComponentRef, preventing cross-namespace token reuse attacks. t.Run("MismatchedTokenComponentRef", func(t *testing.T) { - ctx, cancel := context.WithTimeout(t.Context(), 10*time.Second) - t.Cleanup(cancel) activityID := testcore.RandomizeStr(t.Name()) taskQueue := testcore.RandomizeStr(t.Name()) existingNamespace := env.Namespace().String() @@ -2265,7 +2247,7 @@ func (s *standaloneActivityTestSuite) TestTerminate() { ActivityId: activityID, RunId: runID, Reason: "Test Termination", - Identity: identity, + Identity: "terminator", }) require.NoError(t, err) @@ -2293,12 +2275,8 @@ func (s *standaloneActivityTestSuite) TestTerminate() { require.Nil(t, info.GetLastFailure()) expectedFailure := &failurepb.Failure{ - Message: "Test Termination", - FailureInfo: &failurepb.Failure_TerminatedFailureInfo{ - TerminatedFailureInfo: &failurepb.TerminatedFailureInfo{ - Identity: identity, - }, - }, + Message: "Test Termination", + FailureInfo: &failurepb.Failure_TerminatedFailureInfo{}, } protorequire.ProtoEqual(t, expectedFailure, activityResp.GetOutcome().GetFailure()) }) @@ -3015,7 +2993,6 @@ func (s *standaloneActivityTestSuite) TestStartToCloseTimeout() { require.NoError(t, err) require.NotNil(t, describeResp2) require.NotNil(t, describeResp2.GetInfo()) - require.Positive(t, describeResp2.GetInfo().GetStateSizeBytes()) require.Greater(t, describeResp2.GetInfo().GetStateTransitionCount(), describeResp1.GetInfo().GetStateTransitionCount()) require.Equal(t, enumspb.ACTIVITY_EXECUTION_STATUS_RUNNING, describeResp2.GetInfo().GetStatus(), "expected Running but is %s", describeResp2.GetInfo().GetStatus()) @@ -3034,7 +3011,6 @@ func (s *standaloneActivityTestSuite) TestStartToCloseTimeout() { require.NoError(t, err) require.NotNil(t, describeResp3) require.NotNil(t, describeResp3.GetInfo()) - require.Positive(t, describeResp3.GetInfo().GetStateSizeBytes()) require.Greater(t, describeResp3.GetInfo().GetStateTransitionCount(), describeResp2.GetInfo().GetStateTransitionCount()) // The activity has timed out due to StartToClose. This is an attempt failure, therefore the @@ -3107,6 +3083,7 @@ func (s *standaloneActivityTestSuite) TestStartToCloseTimeout_WhileCancelRequest "activity in CANCEL_REQUESTED should still time out via START_TO_CLOSE") } + // TestScheduleToStartTimeout tests that a schedule-to-start timeout is recorded after the activity is // created but never started. It also verifies that DescribeActivityExecution can be used to long-poll for a TimedOut // state change caused by execution of a timer task. @@ -3258,14 +3235,12 @@ func (s *standaloneActivityTestSuite) TestDescribeActivityExecution_NoWait() { protorequire.IgnoreFields( "execution_duration", "schedule_time", - "state_size_bytes", "state_transition_count", ), ) require.Equal(t, respInfo.GetExecutionDuration().AsDuration(), time.Duration(0)) // Never completed, so expect 0 require.Nil(t, describeResp.GetInfo().GetCloseTime()) require.Positive(t, respInfo.GetScheduleTime().AsTime().Unix()) - require.Positive(t, respInfo.GetStateSizeBytes()) require.Positive(t, respInfo.GetStateTransitionCount()) protorequire.ProtoEqual(t, defaultInput, describeResp.Input) @@ -3321,11 +3296,10 @@ func (s *standaloneActivityTestSuite) TestDescribeActivityExecution_WaitAnyState protorequire.IgnoreFields( "execution_duration", "schedule_time", - "state_size_bytes", "state_transition_count", ), ) - require.Positive(t, firstDescribeResp.GetInfo().GetStateSizeBytes()) + require.Empty(t, diff) taskQueuePollErr := make(chan error, 1) activityPollDone := make(chan struct{}) @@ -3381,11 +3355,10 @@ func (s *standaloneActivityTestSuite) TestDescribeActivityExecution_WaitAnyState "execution_duration", "last_started_time", "schedule_time", - "state_size_bytes", "state_transition_count", ), ) - require.Positive(t, describeResp.GetInfo().GetStateSizeBytes()) + require.Empty(t, diff) protorequire.ProtoEqual(t, defaultInput, describeResp.Input) @@ -3526,7 +3499,6 @@ func (s *standaloneActivityTestSuite) TestDescribeActivityExecution_Completed() require.NotNil(t, info.GetCloseTime()) require.Positive(t, info.GetCloseTime().AsTime().Unix()) require.GreaterOrEqual(t, info.GetCloseTime().AsTime().UnixNano(), info.GetLastStartedTime().AsTime().UnixNano()) - require.Positive(t, info.GetStateSizeBytes()) require.Positive(t, info.GetStateTransitionCount()) tc.outcomeValidator(t, describeResp) @@ -5836,7 +5808,7 @@ func (env *standaloneActivityEnv) runNexusCompletionHTTPServer(t *testing.T, h * func (s *standaloneActivityTestSuite) TestCallbacks() { env := s.newTestEnv() t := s.T() - ctx, cancel := context.WithTimeout(t.Context(), 15*time.Second) + ctx, cancel := context.WithTimeout(t.Context(), 30*time.Second) defer cancel() env.OverrideDynamicConfig( @@ -6326,5 +6298,78 @@ func (s *standaloneActivityTestSuite) TestCallbacks() { }) require.NoError(t, err) require.Equal(t, enumspb.ACTIVITY_EXECUTION_STATUS_TIMED_OUT, descResp.GetInfo().GetStatus()) + +func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { + t := s.T() + ctx, cancel := context.WithTimeout(t.Context(), 30*time.Second) + defer cancel() + + t.Run("StandaloneActivityReturnsError", func(t *testing.T) { + _, err := s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: testcore.RandomizeStr(t.Name()), + Identity: "test-identity", + Reason: "test", + }) + require.Error(t, err) + var unimplementedErr *serviceerror.Unimplemented + require.ErrorAs(t, err, &unimplementedErr) + }) +} + +func (s *standaloneActivityTestSuite) TestUnpauseActivityExecution() { + t := s.T() + ctx, cancel := context.WithTimeout(t.Context(), 30*time.Second) + defer cancel() + + t.Run("StandaloneActivityReturnsError", func(t *testing.T) { + _, err := s.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: testcore.RandomizeStr(t.Name()), + Identity: "test-identity", + }) + require.Error(t, err) + var unimplementedErr *serviceerror.Unimplemented + require.ErrorAs(t, err, &unimplementedErr) + }) +} + +func (s *standaloneActivityTestSuite) TestResetActivityExecution() { + t := s.T() + ctx, cancel := context.WithTimeout(t.Context(), 30*time.Second) + defer cancel() + + t.Run("StandaloneActivityReturnsError", func(t *testing.T) { + _, err := s.FrontendClient().ResetActivityExecution(ctx, &workflowservice.ResetActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: testcore.RandomizeStr(t.Name()), + Identity: "test-identity", + }) + require.Error(t, err) + var unimplementedErr *serviceerror.Unimplemented + require.ErrorAs(t, err, &unimplementedErr) + }) +} + +func (s *standaloneActivityTestSuite) TestUpdateActivityExecutionOptions() { + t := s.T() + ctx, cancel := context.WithTimeout(t.Context(), 30*time.Second) + defer cancel() + + t.Run("StandaloneActivityReturnsError", func(t *testing.T) { + _, err := s.FrontendClient().UpdateActivityExecutionOptions(ctx, &workflowservice.UpdateActivityExecutionOptionsRequest{ + Namespace: s.Namespace().String(), + ActivityId: testcore.RandomizeStr(t.Name()), + Identity: "test-identity", + ActivityOptions: &activitypb.ActivityOptions{ + RetryPolicy: &commonpb.RetryPolicy{ + InitialInterval: durationpb.New(time.Second), + }, + }, + UpdateMask: &fieldmaskpb.FieldMask{Paths: []string{"retry_policy.initial_interval"}}, + }) + require.Error(t, err) + var unimplementedErr *serviceerror.Unimplemented + require.ErrorAs(t, err, &unimplementedErr) }) } From 3e14f5d7d65dd8952c20dd3976a996862b1ec2e0 Mon Sep 17 00:00:00 2001 From: Sean Kane Date: Mon, 13 Apr 2026 15:23:08 -0600 Subject: [PATCH 02/25] Implement UpdateActivityExecutionOptions (#9850) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements the `UpdateActivityExecutionOptions` RPC for standalone activities. The workflow-activity path is already supported via the existing UpdateActivityOptions history service API this PR only wires up the standalone path end-to-end. Core changes: - `chasm/lib/activity/activity.go` — Adds UpdateActivityExecutionOptions method on Activity: - Validates mutual exclusion of `update_mask` / `restore_original` - `restore_original`: bulk-assigns all fields from the `original_options` snapshot stored at schedule time - Field-mask path: applies only the specified fields via `mergeActivityOptions` - Stamp invalidation: bumps `attempt.Stamp` to cancel in-flight `ActivityDispatchTask` and timeout tasks, then re-queues a new dispatch and schedule-to-start timeout - Retry interval recalculation: after a retry-policy update, recomputes `attempt.CurrentRetryInterval` so the re-dispatch fires at the new (possibly shorter) interval rather than the stale one - Schedule-to-close update: adds a new `ScheduleToCloseTimeoutTask` at the updated deadline so a shortened timeout fires immediately - Adds `original_options` to `ActivityState` proto and populates it in `NewStandaloneActivity` - chasm/lib/activity/handler.go — Routes the standalone path to `chasm.UpdateComponent` to (*Activity).UpdateActivityExecutionOptions - chasm/lib/activity/frontend.go — Adds input validation (`validateUpdateActivityExecutionOptionsRequest`: activity ID required/length, identity length, run ID UUID), and gates the standalone path behind the Enabled config flag while always permitting the workflow path UpdateActivityExecutionOptions is the new unified RPC for updating activity options for both standalone activities and workflow-embedded activities. - [ ] built - [ ] run locally and tested manually - [ ] covered by existing tests - [ ] added new unit test(s) - [X] added new functional test(s) Minimal, this is into a feature branch --- chasm/lib/activity/activity.go | 212 +++++- chasm/lib/activity/activity_tasks.go | 17 +- chasm/lib/activity/frontend.go | 10 +- .../gen/activitypb/v1/activity_state.pb.go | 112 +-- .../activity/gen/activitypb/v1/tasks.pb.go | 18 +- chasm/lib/activity/handler.go | 17 +- .../activity/proto/v1/activity_state.proto | 303 ++++---- chasm/lib/activity/proto/v1/tasks.proto | 23 +- chasm/lib/activity/statemachine.go | 3 +- chasm/lib/activity/validator.go | 186 +++-- common/activityoptions/merge.go | 122 ++++ common/activityoptions/merge_test.go | 323 +++++++++ .../history/api/updateactivityoptions/api.go | 107 +-- .../api/updateactivityoptions/api_test.go | 170 +---- tests/activity_api_update_test.go | 322 ++++++++- tests/standalone_activity_test.go | 670 +++++++++++++++++- 16 files changed, 2029 insertions(+), 586 deletions(-) create mode 100644 common/activityoptions/merge.go create mode 100644 common/activityoptions/merge_test.go diff --git a/chasm/lib/activity/activity.go b/chasm/lib/activity/activity.go index df45a9ac490..0292ec62b33 100644 --- a/chasm/lib/activity/activity.go +++ b/chasm/lib/activity/activity.go @@ -23,6 +23,7 @@ import ( "go.temporal.io/server/chasm/lib/callback" callbackspb "go.temporal.io/server/chasm/lib/callback/gen/callbackpb/v1" "go.temporal.io/server/common" + "go.temporal.io/server/common/activityoptions" "go.temporal.io/server/common/backoff" "go.temporal.io/server/common/contextutil" "go.temporal.io/server/common/metrics" @@ -32,6 +33,7 @@ import ( "go.temporal.io/server/common/payload" serviceerrors "go.temporal.io/server/common/serviceerror" "go.temporal.io/server/common/tqid" + "go.temporal.io/server/common/util" "google.golang.org/protobuf/types/known/durationpb" "google.golang.org/protobuf/types/known/timestamppb" ) @@ -149,23 +151,34 @@ func NewStandaloneActivity( ) activity := &Activity{ - ActivityState: &activitypb.ActivityState{ - ActivityType: request.ActivityType, + // Use common.CloneProto here because the values can change and these are all + // pointers to the request so changing the ActivityState will also change the + // request values. + ActivityState: common.CloneProto(&activitypb.ActivityState{ + ActivityType: request.GetActivityType(), TaskQueue: request.GetTaskQueue(), ScheduleToCloseTimeout: request.GetScheduleToCloseTimeout(), ScheduleToStartTimeout: request.GetScheduleToStartTimeout(), StartToCloseTimeout: request.GetStartToCloseTimeout(), HeartbeatTimeout: request.GetHeartbeatTimeout(), RetryPolicy: request.GetRetryPolicy(), - Priority: request.Priority, - StartDelay: request.GetStartDelay(), - }, - LastAttempt: chasm.NewDataField(ctx, &activitypb.ActivityAttemptState{}), - RequestData: chasm.NewDataField(ctx, &activitypb.ActivityRequestData{ - Input: request.Input, - Header: request.Header, - UserMetadata: request.UserMetadata, + Priority: request.GetPriority(), + OriginalOptions: &apiactivitypb.ActivityOptions{ + TaskQueue: request.GetTaskQueue(), + ScheduleToCloseTimeout: request.GetScheduleToCloseTimeout(), + ScheduleToStartTimeout: request.GetScheduleToStartTimeout(), + StartToCloseTimeout: request.GetStartToCloseTimeout(), + HeartbeatTimeout: request.GetHeartbeatTimeout(), + RetryPolicy: request.GetRetryPolicy(), + Priority: request.GetPriority(), + }, }), + LastAttempt: chasm.NewDataField(ctx, &activitypb.ActivityAttemptState{}), + RequestData: chasm.NewDataField(ctx, common.CloneProto(&activitypb.ActivityRequestData{ + Input: request.GetInput(), + Header: request.GetHeader(), + UserMetadata: request.GetUserMetadata(), + })), Outcome: chasm.NewDataField(ctx, &activitypb.ActivityOutcome{}), Visibility: chasm.NewComponentField(ctx, visibility), } @@ -516,6 +529,178 @@ func (a *Activity) Terminate( }) } +func (a *Activity) UpdateActivityExecutionOptions( + ctx chasm.MutableContext, + req *activitypb.UpdateActivityExecutionOptionsRequest, +) (*activitypb.UpdateActivityExecutionOptionsResponse, error) { + switch a.Status { + case activitypb.ACTIVITY_EXECUTION_STATUS_CANCELED, + activitypb.ACTIVITY_EXECUTION_STATUS_COMPLETED, + activitypb.ACTIVITY_EXECUTION_STATUS_FAILED, + activitypb.ACTIVITY_EXECUTION_STATUS_TERMINATED, + activitypb.ACTIVITY_EXECUTION_STATUS_TIMED_OUT, + activitypb.ACTIVITY_EXECUTION_STATUS_UNSPECIFIED: + return nil, serviceerror.NewFailedPreconditionf("Cannot update options for activity in state %s", a.Status.String()) + default: + } + + frontendReq := req.GetFrontendRequest() + + if frontendReq.GetRestoreOriginal() { + ogOptions := a.GetOriginalOptions() + a.TaskQueue = common.CloneProto(ogOptions.GetTaskQueue()) + a.ScheduleToCloseTimeout = common.CloneProto(ogOptions.GetScheduleToCloseTimeout()) + a.ScheduleToStartTimeout = common.CloneProto(ogOptions.GetScheduleToStartTimeout()) + a.StartToCloseTimeout = common.CloneProto(ogOptions.GetStartToCloseTimeout()) + a.HeartbeatTimeout = common.CloneProto(ogOptions.GetHeartbeatTimeout()) + a.RetryPolicy = common.CloneProto(ogOptions.GetRetryPolicy()) + a.Priority = common.CloneProto(ogOptions.GetPriority()) + } else { + if err := a.mergeActivityOptions(frontendReq); err != nil { + return nil, err + } + } + + attempt := a.LastAttempt.Get(ctx) + + // Recalculate the current retry interval based on the (possibly updated) retry policy. + // This ensures a shortened retry interval takes effect immediately on re-dispatch. + if a.GetStatus() == activitypb.ACTIVITY_EXECUTION_STATUS_SCHEDULED && attempt.GetCurrentRetryInterval() != nil { + newInterval := backoff.CalculateExponentialRetryInterval(a.RetryPolicy, attempt.GetCount()-1) + attempt.CurrentRetryInterval = durationpb.New(newInterval) + } + + // Add a new ScheduleToCloseTimeoutTask at the (possibly updated) deadline. + // Increment the stamp so the previous task is invalidated by the Validate check. + if timeout := a.GetScheduleToCloseTimeout().AsDuration(); timeout > 0 { + a.Stamp++ + deadline := a.GetScheduleTime().AsTime().Add(timeout) + ctx.AddTask( + a, + chasm.TaskAttributes{ScheduledTime: deadline}, + &activitypb.ScheduleToCloseTimeoutTask{Stamp: a.GetStamp()}, + ) + } + + attempt.Stamp++ + + if a.GetStatus() == activitypb.ACTIVITY_EXECUTION_STATUS_STARTED || a.GetStatus() == activitypb.ACTIVITY_EXECUTION_STATUS_CANCEL_REQUESTED { + // Re-create the start-to-close timeout task with the new stamp and (possibly updated) timeout. + // The old task was invalidated by the stamp increment above. + if timeout := a.GetStartToCloseTimeout().AsDuration(); timeout > 0 { + deadline := attempt.GetStartedTime().AsTime().Add(timeout) + ctx.AddTask( + a, + chasm.TaskAttributes{ScheduledTime: deadline}, + &activitypb.StartToCloseTimeoutTask{Stamp: attempt.GetStamp()}, + ) + } + + if hbTimeout := a.GetHeartbeatTimeout().AsDuration(); hbTimeout > 0 { + // The next heartbeat time is the max of (the last heartbeats recorded time and + // the current attempts started time) plus the heartbeat timeout + lastHb, _ := a.LastHeartbeat.TryGet(ctx) + lastHbTime := util.MaxTime( + lastHb.GetRecordedTime().AsTime(), + attempt.GetStartedTime().AsTime(), + ).Add(hbTimeout) + ctx.AddTask( + a, + chasm.TaskAttributes{ + ScheduledTime: lastHbTime, + }, + &activitypb.HeartbeatTimeoutTask{ + Stamp: attempt.GetStamp(), + }, + ) + } + } + + // TODO(saa-ga): need to handle the StartDelay timer + + if a.GetStatus() == activitypb.ACTIVITY_EXECUTION_STATUS_SCHEDULED { + // Re dispatch this activity + retryTime := attemptScheduleTimeForRetry(attempt) + var dispatchAttrs chasm.TaskAttributes + if retryTime != nil { + // in backoff, future retry time + dispatchAttrs.ScheduledTime = retryTime.AsTime() + } + ctx.AddTask( + a, + dispatchAttrs, + &activitypb.ActivityDispatchTask{Stamp: attempt.GetStamp()}, + ) + + if timeout := a.GetScheduleToStartTimeout().AsDuration(); timeout > 0 { + schedToStart := ctx.Now(a).Add(timeout) + if retryTime != nil { + schedToStart = retryTime.AsTime().Add(timeout) + } + ctx.AddTask( + a, + chasm.TaskAttributes{ScheduledTime: schedToStart}, + &activitypb.ScheduleToStartTimeoutTask{Stamp: attempt.GetStamp()}, + ) + } + } + + return &activitypb.UpdateActivityExecutionOptionsResponse{ + FrontendResponse: &workflowservice.UpdateActivityExecutionOptionsResponse{ + ActivityOptions: &apiactivitypb.ActivityOptions{ + TaskQueue: a.GetTaskQueue(), + ScheduleToCloseTimeout: a.GetScheduleToCloseTimeout(), + ScheduleToStartTimeout: a.GetScheduleToStartTimeout(), + StartToCloseTimeout: a.GetStartToCloseTimeout(), + HeartbeatTimeout: a.GetHeartbeatTimeout(), + RetryPolicy: a.GetRetryPolicy(), + Priority: a.GetPriority(), + }, + }, + }, nil +} + +// mergeActivityOptions applies the field mask from the request to the activity state. +// The structure mirrors the field-mask logic in service/history/api/updateactivityoptions/api.go +func (a *Activity) mergeActivityOptions( + req *workflowservice.UpdateActivityExecutionOptionsRequest, +) error { + updateFields := util.ParseFieldMask(req.GetUpdateMask()) + + // Build an ActivityOptions view of the current Activity state so we can use the shared merge function. + ao := &apiactivitypb.ActivityOptions{ + TaskQueue: a.TaskQueue, + ScheduleToCloseTimeout: a.ScheduleToCloseTimeout, + ScheduleToStartTimeout: a.ScheduleToStartTimeout, + StartToCloseTimeout: a.StartToCloseTimeout, + HeartbeatTimeout: a.HeartbeatTimeout, + Priority: a.Priority, + RetryPolicy: a.RetryPolicy, + } + + if err := activityoptions.MergeActivityOptions(ao, req.GetActivityOptions(), updateFields); err != nil { + return err + } + + // Re-normalize timeouts after the update so that relationships like + // start_to_close <= schedule_to_close and heartbeat <= start_to_close are preserved. + // This mirrors adjustActivityOptions for workflow-embedded activities. + if err := normalizeAndValidateTimeouts(req.GetActivityId(), a.GetActivityType().GetName(), durationpb.New(0), ao); err != nil { + return err + } + + // Write the merged and normalized options back to the Activity state fields. + a.TaskQueue = ao.TaskQueue + a.ScheduleToCloseTimeout = ao.ScheduleToCloseTimeout + a.ScheduleToStartTimeout = ao.ScheduleToStartTimeout + a.StartToCloseTimeout = ao.StartToCloseTimeout + a.HeartbeatTimeout = ao.HeartbeatTimeout + a.Priority = ao.Priority + a.RetryPolicy = ao.RetryPolicy + + return nil +} + // getOrCreateLastHeartbeat retrieves the last heartbeat state, initializing it if not present. The heartbeat is lazily created // to avoid unnecessary writes when heartbeats are not used. func (a *Activity) getOrCreateLastHeartbeat(ctx chasm.MutableContext) *activitypb.ActivityHeartbeatState { @@ -718,11 +903,9 @@ func (a *Activity) RecordHeartbeat( if err != nil { return nil, err } - prevHeartbeat, _ := a.LastHeartbeat.TryGet(ctx) a.LastHeartbeat = chasm.NewDataField(ctx, &activitypb.ActivityHeartbeatState{ - RecordedTime: timestamppb.New(ctx.Now(a)), - Details: input.Request.GetHeartbeatRequest().GetDetails(), - TotalHeartbeatCount: prevHeartbeat.GetTotalHeartbeatCount() + 1, + RecordedTime: timestamppb.New(ctx.Now(a)), + Details: input.Request.GetHeartbeatRequest().GetDetails(), }) if heartbeatTimeout := a.GetHeartbeatTimeout().AsDuration(); heartbeatTimeout > 0 { ctx.AddTask( @@ -824,7 +1007,6 @@ func (a *Activity) buildActivityExecutionInfo(ctx chasm.Context) *apiactivitypb. Header: requestData.GetHeader(), HeartbeatDetails: heartbeat.GetDetails(), HeartbeatTimeout: a.GetHeartbeatTimeout(), - TotalHeartbeatCount: heartbeat.GetTotalHeartbeatCount(), LastAttemptCompleteTime: attempt.GetCompleteTime(), LastFailure: attempt.GetLastFailureDetails().GetFailure(), LastHeartbeatTime: heartbeat.GetRecordedTime(), diff --git a/chasm/lib/activity/activity_tasks.go b/chasm/lib/activity/activity_tasks.go index e22b2f586a6..fc98e2c58a2 100644 --- a/chasm/lib/activity/activity_tasks.go +++ b/chasm/lib/activity/activity_tasks.go @@ -127,9 +127,22 @@ func (h *scheduleToCloseTimeoutTaskHandler) Validate( _ chasm.Context, activity *Activity, _ chasm.TaskAttributes, - _ *activitypb.ScheduleToCloseTimeoutTask, + task *activitypb.ScheduleToCloseTimeoutTask, ) (bool, error) { - return TransitionTimedOut.Possible(activity), nil + if !TransitionTimedOut.Possible(activity) { + return false, nil + } + // If schedule-to-close was disabled via an options update, discard this task. + if activity.GetScheduleToCloseTimeout().AsDuration() <= 0 { + return false, nil + } + // Stamp check: discard tasks from before the most recent ScheduleToCloseTimeoutTask was + // scheduled (e.g. after a schedule-to-close extension or a disable+re-enable cycle). + // Tasks without a stamp (stamp=0) predate this field and are not validated by stamp. + if task.GetStamp() != 0 && task.GetStamp() != activity.GetStamp() { + return false, nil + } + return true, nil } func (h *scheduleToCloseTimeoutTaskHandler) Execute( diff --git a/chasm/lib/activity/frontend.go b/chasm/lib/activity/frontend.go index 2d16ccfd7f8..e55b2d2d726 100644 --- a/chasm/lib/activity/frontend.go +++ b/chasm/lib/activity/frontend.go @@ -507,11 +507,19 @@ func (h *frontendHandler) UpdateActivityExecutionOptions( ctx context.Context, req *workflowservice.UpdateActivityExecutionOptionsRequest, ) (*workflowservice.UpdateActivityExecutionOptionsResponse, error) { + // Standalone path requires the feature to be enabled. Workflow path (workflow_id != "") + // is always permitted and routes to the history service. if req.GetWorkflowId() == "" && !h.config.Enabled(req.GetNamespace()) { return nil, ErrStandaloneActivityDisabled } - // TODO: validate request fields (e.g. namespace, identity length, update mask) + if err := validateUpdateActivityExecutionOptionsRequest( + req, + h.config.MaxIDLengthLimit(), + ); err != nil { + return nil, err + } + namespaceID, err := h.namespaceRegistry.GetNamespaceID(namespace.Name(req.GetNamespace())) if err != nil { return nil, err diff --git a/chasm/lib/activity/gen/activitypb/v1/activity_state.pb.go b/chasm/lib/activity/gen/activitypb/v1/activity_state.pb.go index 3e95ee84f59..ef31bedc60e 100644 --- a/chasm/lib/activity/gen/activitypb/v1/activity_state.pb.go +++ b/chasm/lib/activity/gen/activitypb/v1/activity_state.pb.go @@ -12,10 +12,11 @@ import ( sync "sync" unsafe "unsafe" + v12 "go.temporal.io/api/activity/v1" v1 "go.temporal.io/api/common/v1" - v12 "go.temporal.io/api/deployment/v1" - v14 "go.temporal.io/api/failure/v1" - v13 "go.temporal.io/api/sdk/v1" + v13 "go.temporal.io/api/deployment/v1" + v15 "go.temporal.io/api/failure/v1" + v14 "go.temporal.io/api/sdk/v1" v11 "go.temporal.io/api/taskqueue/v1" protoreflect "google.golang.org/protobuf/reflect/protoreflect" protoimpl "google.golang.org/protobuf/runtime/protoimpl" @@ -183,9 +184,13 @@ type ActivityState struct { CancelState *ActivityCancelState `protobuf:"bytes,11,opt,name=cancel_state,json=cancelState,proto3" json:"cancel_state,omitempty"` // Set if the activity was terminated TerminateState *ActivityTerminateState `protobuf:"bytes,12,opt,name=terminate_state,json=terminateState,proto3" json:"terminate_state,omitempty"` - // Amount of time to wait before dispatching the activity task to the task queue for the first time. If the activity - // has a retry policy, retry attempts will not have start delay applied. - StartDelay *durationpb.Duration `protobuf:"bytes,13,opt,name=start_delay,json=startDelay,proto3" json:"start_delay,omitempty"` + // Options for the first scheduled attempt to support `restore_original` + OriginalOptions *v12.ActivityOptions `protobuf:"bytes,13,opt,name=original_options,json=originalOptions,proto3" json:"original_options,omitempty"` + // An incremental version number used to validate ScheduleToCloseTimeoutTask tasks. + // Incremented each time a new ScheduleToCloseTimeoutTask is scheduled (at activity creation + // and on each options update that re-schedules the task). Unlike attempt.stamp, this counter + // is NOT incremented on retries, because schedule-to-close spans the full activity lifetime. + Stamp int32 `protobuf:"varint,14,opt,name=stamp,proto3" json:"stamp,omitempty"` unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache } @@ -304,13 +309,20 @@ func (x *ActivityState) GetTerminateState() *ActivityTerminateState { return nil } -func (x *ActivityState) GetStartDelay() *durationpb.Duration { +func (x *ActivityState) GetOriginalOptions() *v12.ActivityOptions { if x != nil { - return x.StartDelay + return x.OriginalOptions } return nil } +func (x *ActivityState) GetStamp() int32 { + if x != nil { + return x.Stamp + } + return 0 +} + type ActivityCancelState struct { state protoimpl.MessageState `protogen:"open.v1"` RequestId string `protobuf:"bytes,1,opt,name=request_id,json=requestId,proto3" json:"request_id,omitempty"` @@ -444,15 +456,18 @@ type ActivityAttemptState struct { // including start-to-close timeout. Activity success, termination, schedule-to-start and schedule-to-close timeouts // will not reset it. LastFailureDetails *ActivityAttemptState_LastFailureDetails `protobuf:"bytes,5,opt,name=last_failure_details,json=lastFailureDetails,proto3" json:"last_failure_details,omitempty"` - // An incremental version number used to validate tasks. - // Initially this only verifies that a task belong to the current attempt. - // Later on this stamp will be used to also invalidate tasks when the activity is paused, reset, or has its options - // updated. + // An incremental version number used to validate attempt-scoped tasks + // (ActivityDispatchTask, ScheduleToStartTimeoutTask, StartToCloseTimeoutTask, HeartbeatTimeoutTask). + // Incremented on each new attempt and on options updates, so that in-flight tasks from the + // previous attempt or pre-update state are discarded. + // Note: ScheduleToCloseTimeoutTask uses a separate ActivityState.schedule_to_close_stamp because + // it spans the full activity lifetime and must not be invalidated on retry. + // TODO: also invalidate on pause and reset when those are supported. Stamp int32 `protobuf:"varint,6,opt,name=stamp,proto3" json:"stamp,omitempty"` LastWorkerIdentity string `protobuf:"bytes,7,opt,name=last_worker_identity,json=lastWorkerIdentity,proto3" json:"last_worker_identity,omitempty"` // The Worker Deployment Version this activity was dispatched to most recently. // If nil, the activity has not yet been dispatched or was last dispatched to an unversioned worker. - LastDeploymentVersion *v12.WorkerDeploymentVersion `protobuf:"bytes,8,opt,name=last_deployment_version,json=lastDeploymentVersion,proto3" json:"last_deployment_version,omitempty"` + LastDeploymentVersion *v13.WorkerDeploymentVersion `protobuf:"bytes,8,opt,name=last_deployment_version,json=lastDeploymentVersion,proto3" json:"last_deployment_version,omitempty"` // The request ID that came from matching's RecordActivityTaskStarted API call. Used to make this API idempotent in // case of implicit retries. StartRequestId string `protobuf:"bytes,9,opt,name=start_request_id,json=startRequestId,proto3" json:"start_request_id,omitempty"` @@ -539,7 +554,7 @@ func (x *ActivityAttemptState) GetLastWorkerIdentity() string { return "" } -func (x *ActivityAttemptState) GetLastDeploymentVersion() *v12.WorkerDeploymentVersion { +func (x *ActivityAttemptState) GetLastDeploymentVersion() *v13.WorkerDeploymentVersion { if x != nil { return x.LastDeploymentVersion } @@ -558,11 +573,9 @@ type ActivityHeartbeatState struct { // Details provided in the last recorded activity heartbeat. Details *v1.Payloads `protobuf:"bytes,1,opt,name=details,proto3" json:"details,omitempty"` // Time the last heartbeat was recorded. - RecordedTime *timestamppb.Timestamp `protobuf:"bytes,2,opt,name=recorded_time,json=recordedTime,proto3" json:"recorded_time,omitempty"` - // Total number of heartbeats recorded across all attempts of this activity, including retries. - TotalHeartbeatCount int64 `protobuf:"varint,3,opt,name=total_heartbeat_count,json=totalHeartbeatCount,proto3" json:"total_heartbeat_count,omitempty"` - unknownFields protoimpl.UnknownFields - sizeCache protoimpl.SizeCache + RecordedTime *timestamppb.Timestamp `protobuf:"bytes,2,opt,name=recorded_time,json=recordedTime,proto3" json:"recorded_time,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache } func (x *ActivityHeartbeatState) Reset() { @@ -609,20 +622,13 @@ func (x *ActivityHeartbeatState) GetRecordedTime() *timestamppb.Timestamp { return nil } -func (x *ActivityHeartbeatState) GetTotalHeartbeatCount() int64 { - if x != nil { - return x.TotalHeartbeatCount - } - return 0 -} - type ActivityRequestData struct { state protoimpl.MessageState `protogen:"open.v1"` // Serialized activity input, passed as arguments to the activity function. Input *v1.Payloads `protobuf:"bytes,1,opt,name=input,proto3" json:"input,omitempty"` Header *v1.Header `protobuf:"bytes,2,opt,name=header,proto3" json:"header,omitempty"` // Metadata for use by user interfaces to display the fixed as-of-start summary and details of the activity. - UserMetadata *v13.UserMetadata `protobuf:"bytes,3,opt,name=user_metadata,json=userMetadata,proto3" json:"user_metadata,omitempty"` + UserMetadata *v14.UserMetadata `protobuf:"bytes,3,opt,name=user_metadata,json=userMetadata,proto3" json:"user_metadata,omitempty"` unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache } @@ -671,7 +677,7 @@ func (x *ActivityRequestData) GetHeader() *v1.Header { return nil } -func (x *ActivityRequestData) GetUserMetadata() *v13.UserMetadata { +func (x *ActivityRequestData) GetUserMetadata() *v14.UserMetadata { if x != nil { return x.UserMetadata } @@ -765,7 +771,7 @@ type ActivityAttemptState_LastFailureDetails struct { // The last time the activity attempt failed. Time *timestamppb.Timestamp `protobuf:"bytes,1,opt,name=time,proto3" json:"time,omitempty"` // Failure details from the last failed attempt. - Failure *v14.Failure `protobuf:"bytes,2,opt,name=failure,proto3" json:"failure,omitempty"` + Failure *v15.Failure `protobuf:"bytes,2,opt,name=failure,proto3" json:"failure,omitempty"` unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache } @@ -807,7 +813,7 @@ func (x *ActivityAttemptState_LastFailureDetails) GetTime() *timestamppb.Timesta return nil } -func (x *ActivityAttemptState_LastFailureDetails) GetFailure() *v14.Failure { +func (x *ActivityAttemptState_LastFailureDetails) GetFailure() *v15.Failure { if x != nil { return x.Failure } @@ -862,7 +868,7 @@ type ActivityOutcome_Failed struct { state protoimpl.MessageState `protogen:"open.v1"` // Only filled on schedule-to-start timeouts, schedule-to-close timeouts or terminations. All other attempt // failures will be recorded in ActivityAttemptState.last_failure_details. - Failure *v14.Failure `protobuf:"bytes,1,opt,name=failure,proto3" json:"failure,omitempty"` + Failure *v15.Failure `protobuf:"bytes,1,opt,name=failure,proto3" json:"failure,omitempty"` unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache } @@ -897,7 +903,7 @@ func (*ActivityOutcome_Failed) Descriptor() ([]byte, []int) { return file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_rawDescGZIP(), []int{6, 1} } -func (x *ActivityOutcome_Failed) GetFailure() *v14.Failure { +func (x *ActivityOutcome_Failed) GetFailure() *v15.Failure { if x != nil { return x.Failure } @@ -908,7 +914,7 @@ var File_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto protor const file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_rawDesc = "" + "\n" + - "@temporal/server/chasm/lib/activity/proto/v1/activity_state.proto\x12+temporal.server.chasm.lib.activity.proto.v1\x1a\x1egoogle/protobuf/duration.proto\x1a\x1fgoogle/protobuf/timestamp.proto\x1a$temporal/api/common/v1/message.proto\x1a(temporal/api/deployment/v1/message.proto\x1a%temporal/api/failure/v1/message.proto\x1a'temporal/api/sdk/v1/user_metadata.proto\x1a'temporal/api/taskqueue/v1/message.proto\"\x97\b\n" + + "@temporal/server/chasm/lib/activity/proto/v1/activity_state.proto\x12+temporal.server.chasm.lib.activity.proto.v1\x1a\x1egoogle/protobuf/duration.proto\x1a\x1fgoogle/protobuf/timestamp.proto\x1a&temporal/api/activity/v1/message.proto\x1a$temporal/api/common/v1/message.proto\x1a(temporal/api/deployment/v1/message.proto\x1a%temporal/api/failure/v1/message.proto\x1a'temporal/api/sdk/v1/user_metadata.proto\x1a'temporal/api/taskqueue/v1/message.proto\"\xc7\b\n" + "\rActivityState\x12I\n" + "\ractivity_type\x18\x01 \x01(\v2$.temporal.api.common.v1.ActivityTypeR\factivityType\x12C\n" + "\n" + @@ -923,9 +929,9 @@ const file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_rawD "\bpriority\x18\n" + " \x01(\v2 .temporal.api.common.v1.PriorityR\bpriority\x12c\n" + "\fcancel_state\x18\v \x01(\v2@.temporal.server.chasm.lib.activity.proto.v1.ActivityCancelStateR\vcancelState\x12l\n" + - "\x0fterminate_state\x18\f \x01(\v2C.temporal.server.chasm.lib.activity.proto.v1.ActivityTerminateStateR\x0eterminateState\x12:\n" + - "\vstart_delay\x18\r \x01(\v2\x19.google.protobuf.DurationR\n" + - "startDelay\"\xa7\x01\n" + + "\x0fterminate_state\x18\f \x01(\v2C.temporal.server.chasm.lib.activity.proto.v1.ActivityTerminateStateR\x0eterminateState\x12T\n" + + "\x10original_options\x18\r \x01(\v2).temporal.api.activity.v1.ActivityOptionsR\x0foriginalOptions\x12\x14\n" + + "\x05stamp\x18\x0e \x01(\x05R\x05stamp\"\xa7\x01\n" + "\x13ActivityCancelState\x12\x1d\n" + "\n" + "request_id\x18\x01 \x01(\tR\trequestId\x12=\n" + @@ -947,11 +953,10 @@ const file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_rawD "\x10start_request_id\x18\t \x01(\tR\x0estartRequestId\x1a\x80\x01\n" + "\x12LastFailureDetails\x12.\n" + "\x04time\x18\x01 \x01(\v2\x1a.google.protobuf.TimestampR\x04time\x12:\n" + - "\afailure\x18\x02 \x01(\v2 .temporal.api.failure.v1.FailureR\afailure\"\xc9\x01\n" + + "\afailure\x18\x02 \x01(\v2 .temporal.api.failure.v1.FailureR\afailure\"\x95\x01\n" + "\x16ActivityHeartbeatState\x12:\n" + "\adetails\x18\x01 \x01(\v2 .temporal.api.common.v1.PayloadsR\adetails\x12?\n" + - "\rrecorded_time\x18\x02 \x01(\v2\x1a.google.protobuf.TimestampR\frecordedTime\x122\n" + - "\x15total_heartbeat_count\x18\x03 \x01(\x03R\x13totalHeartbeatCount\"\xcd\x01\n" + + "\rrecorded_time\x18\x02 \x01(\v2\x1a.google.protobuf.TimestampR\frecordedTime\"\xcd\x01\n" + "\x13ActivityRequestData\x126\n" + "\x05input\x18\x01 \x01(\v2 .temporal.api.common.v1.PayloadsR\x05input\x126\n" + "\x06header\x18\x02 \x01(\v2\x1e.temporal.api.common.v1.HeaderR\x06header\x12F\n" + @@ -1010,11 +1015,12 @@ var file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_goType (*v1.RetryPolicy)(nil), // 14: temporal.api.common.v1.RetryPolicy (*timestamppb.Timestamp)(nil), // 15: google.protobuf.Timestamp (*v1.Priority)(nil), // 16: temporal.api.common.v1.Priority - (*v12.WorkerDeploymentVersion)(nil), // 17: temporal.api.deployment.v1.WorkerDeploymentVersion - (*v1.Payloads)(nil), // 18: temporal.api.common.v1.Payloads - (*v1.Header)(nil), // 19: temporal.api.common.v1.Header - (*v13.UserMetadata)(nil), // 20: temporal.api.sdk.v1.UserMetadata - (*v14.Failure)(nil), // 21: temporal.api.failure.v1.Failure + (*v12.ActivityOptions)(nil), // 17: temporal.api.activity.v1.ActivityOptions + (*v13.WorkerDeploymentVersion)(nil), // 18: temporal.api.deployment.v1.WorkerDeploymentVersion + (*v1.Payloads)(nil), // 19: temporal.api.common.v1.Payloads + (*v1.Header)(nil), // 20: temporal.api.common.v1.Header + (*v14.UserMetadata)(nil), // 21: temporal.api.sdk.v1.UserMetadata + (*v15.Failure)(nil), // 22: temporal.api.failure.v1.Failure } var file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_depIdxs = []int32{ 11, // 0: temporal.server.chasm.lib.activity.proto.v1.ActivityState.activity_type:type_name -> temporal.api.common.v1.ActivityType @@ -1029,24 +1035,24 @@ var file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_depIdx 16, // 9: temporal.server.chasm.lib.activity.proto.v1.ActivityState.priority:type_name -> temporal.api.common.v1.Priority 2, // 10: temporal.server.chasm.lib.activity.proto.v1.ActivityState.cancel_state:type_name -> temporal.server.chasm.lib.activity.proto.v1.ActivityCancelState 3, // 11: temporal.server.chasm.lib.activity.proto.v1.ActivityState.terminate_state:type_name -> temporal.server.chasm.lib.activity.proto.v1.ActivityTerminateState - 13, // 12: temporal.server.chasm.lib.activity.proto.v1.ActivityState.start_delay:type_name -> google.protobuf.Duration + 17, // 12: temporal.server.chasm.lib.activity.proto.v1.ActivityState.original_options:type_name -> temporal.api.activity.v1.ActivityOptions 15, // 13: temporal.server.chasm.lib.activity.proto.v1.ActivityCancelState.request_time:type_name -> google.protobuf.Timestamp 13, // 14: temporal.server.chasm.lib.activity.proto.v1.ActivityAttemptState.current_retry_interval:type_name -> google.protobuf.Duration 15, // 15: temporal.server.chasm.lib.activity.proto.v1.ActivityAttemptState.started_time:type_name -> google.protobuf.Timestamp 15, // 16: temporal.server.chasm.lib.activity.proto.v1.ActivityAttemptState.complete_time:type_name -> google.protobuf.Timestamp 8, // 17: temporal.server.chasm.lib.activity.proto.v1.ActivityAttemptState.last_failure_details:type_name -> temporal.server.chasm.lib.activity.proto.v1.ActivityAttemptState.LastFailureDetails - 17, // 18: temporal.server.chasm.lib.activity.proto.v1.ActivityAttemptState.last_deployment_version:type_name -> temporal.api.deployment.v1.WorkerDeploymentVersion - 18, // 19: temporal.server.chasm.lib.activity.proto.v1.ActivityHeartbeatState.details:type_name -> temporal.api.common.v1.Payloads + 18, // 18: temporal.server.chasm.lib.activity.proto.v1.ActivityAttemptState.last_deployment_version:type_name -> temporal.api.deployment.v1.WorkerDeploymentVersion + 19, // 19: temporal.server.chasm.lib.activity.proto.v1.ActivityHeartbeatState.details:type_name -> temporal.api.common.v1.Payloads 15, // 20: temporal.server.chasm.lib.activity.proto.v1.ActivityHeartbeatState.recorded_time:type_name -> google.protobuf.Timestamp - 18, // 21: temporal.server.chasm.lib.activity.proto.v1.ActivityRequestData.input:type_name -> temporal.api.common.v1.Payloads - 19, // 22: temporal.server.chasm.lib.activity.proto.v1.ActivityRequestData.header:type_name -> temporal.api.common.v1.Header - 20, // 23: temporal.server.chasm.lib.activity.proto.v1.ActivityRequestData.user_metadata:type_name -> temporal.api.sdk.v1.UserMetadata + 19, // 21: temporal.server.chasm.lib.activity.proto.v1.ActivityRequestData.input:type_name -> temporal.api.common.v1.Payloads + 20, // 22: temporal.server.chasm.lib.activity.proto.v1.ActivityRequestData.header:type_name -> temporal.api.common.v1.Header + 21, // 23: temporal.server.chasm.lib.activity.proto.v1.ActivityRequestData.user_metadata:type_name -> temporal.api.sdk.v1.UserMetadata 9, // 24: temporal.server.chasm.lib.activity.proto.v1.ActivityOutcome.successful:type_name -> temporal.server.chasm.lib.activity.proto.v1.ActivityOutcome.Successful 10, // 25: temporal.server.chasm.lib.activity.proto.v1.ActivityOutcome.failed:type_name -> temporal.server.chasm.lib.activity.proto.v1.ActivityOutcome.Failed 15, // 26: temporal.server.chasm.lib.activity.proto.v1.ActivityAttemptState.LastFailureDetails.time:type_name -> google.protobuf.Timestamp - 21, // 27: temporal.server.chasm.lib.activity.proto.v1.ActivityAttemptState.LastFailureDetails.failure:type_name -> temporal.api.failure.v1.Failure - 18, // 28: temporal.server.chasm.lib.activity.proto.v1.ActivityOutcome.Successful.output:type_name -> temporal.api.common.v1.Payloads - 21, // 29: temporal.server.chasm.lib.activity.proto.v1.ActivityOutcome.Failed.failure:type_name -> temporal.api.failure.v1.Failure + 22, // 27: temporal.server.chasm.lib.activity.proto.v1.ActivityAttemptState.LastFailureDetails.failure:type_name -> temporal.api.failure.v1.Failure + 19, // 28: temporal.server.chasm.lib.activity.proto.v1.ActivityOutcome.Successful.output:type_name -> temporal.api.common.v1.Payloads + 22, // 29: temporal.server.chasm.lib.activity.proto.v1.ActivityOutcome.Failed.failure:type_name -> temporal.api.failure.v1.Failure 30, // [30:30] is the sub-list for method output_type 30, // [30:30] is the sub-list for method input_type 30, // [30:30] is the sub-list for extension type_name diff --git a/chasm/lib/activity/gen/activitypb/v1/tasks.pb.go b/chasm/lib/activity/gen/activitypb/v1/tasks.pb.go index 796574e7db2..3cb7a1345c3 100644 --- a/chasm/lib/activity/gen/activitypb/v1/tasks.pb.go +++ b/chasm/lib/activity/gen/activitypb/v1/tasks.pb.go @@ -113,7 +113,11 @@ func (x *ScheduleToStartTimeoutTask) GetStamp() int32 { } type ScheduleToCloseTimeoutTask struct { - state protoimpl.MessageState `protogen:"open.v1"` + state protoimpl.MessageState `protogen:"open.v1"` + // The schedule-to-close stamp for this task. Used for task validation. + // See also [ActivityState.schedule_to_close_stamp]. + // Tasks without a stamp (stamp=0) predate this field and are not validated by stamp. + Stamp int32 `protobuf:"varint,1,opt,name=stamp,proto3" json:"stamp,omitempty"` unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache } @@ -148,6 +152,13 @@ func (*ScheduleToCloseTimeoutTask) Descriptor() ([]byte, []int) { return file_temporal_server_chasm_lib_activity_proto_v1_tasks_proto_rawDescGZIP(), []int{2} } +func (x *ScheduleToCloseTimeoutTask) GetStamp() int32 { + if x != nil { + return x.Stamp + } + return 0 +} + type StartToCloseTimeoutTask struct { state protoimpl.MessageState `protogen:"open.v1"` // The current stamp for this activity execution. Used for task validation. See also [ActivityAttemptState]. @@ -247,8 +258,9 @@ const file_temporal_server_chasm_lib_activity_proto_v1_tasks_proto_rawDesc = "" "\x14ActivityDispatchTask\x12\x14\n" + "\x05stamp\x18\x01 \x01(\x05R\x05stamp\"2\n" + "\x1aScheduleToStartTimeoutTask\x12\x14\n" + - "\x05stamp\x18\x01 \x01(\x05R\x05stamp\"\x1c\n" + - "\x1aScheduleToCloseTimeoutTask\"/\n" + + "\x05stamp\x18\x01 \x01(\x05R\x05stamp\"2\n" + + "\x1aScheduleToCloseTimeoutTask\x12\x14\n" + + "\x05stamp\x18\x01 \x01(\x05R\x05stamp\"/\n" + "\x17StartToCloseTimeoutTask\x12\x14\n" + "\x05stamp\x18\x01 \x01(\x05R\x05stamp\",\n" + "\x14HeartbeatTimeoutTask\x12\x14\n" + diff --git a/chasm/lib/activity/handler.go b/chasm/lib/activity/handler.go index 0bb927d16b5..e024a69e800 100644 --- a/chasm/lib/activity/handler.go +++ b/chasm/lib/activity/handler.go @@ -440,5 +440,20 @@ func (h *handler) UpdateActivityExecutionOptions(ctx context.Context, req *activ }, }, nil } - return nil, serviceerror.NewUnimplemented("UpdateActivityExecutionOptions for standalone activities is not yet implemented") + + ref := chasm.NewComponentRef[*Activity](chasm.ExecutionKey{ + NamespaceID: req.GetNamespaceId(), + BusinessID: req.GetFrontendRequest().GetActivityId(), + RunID: req.GetFrontendRequest().GetRunId(), + }) + response, _, err := chasm.UpdateComponent( + ctx, + ref, + (*Activity).UpdateActivityExecutionOptions, + req, + ) + if err != nil { + return nil, err + } + return response, nil } diff --git a/chasm/lib/activity/proto/v1/activity_state.proto b/chasm/lib/activity/proto/v1/activity_state.proto index 931afb0b881..0c516def6e0 100644 --- a/chasm/lib/activity/proto/v1/activity_state.proto +++ b/chasm/lib/activity/proto/v1/activity_state.proto @@ -2,105 +2,111 @@ syntax = "proto3"; package temporal.server.chasm.lib.activity.proto.v1; +option go_package = "go.temporal.io/server/chasm/lib/activity/gen/activitypb;activitypb"; + import "google/protobuf/duration.proto"; import "google/protobuf/timestamp.proto"; +import "temporal/api/activity/v1/message.proto"; import "temporal/api/common/v1/message.proto"; import "temporal/api/deployment/v1/message.proto"; import "temporal/api/failure/v1/message.proto"; import "temporal/api/sdk/v1/user_metadata.proto"; import "temporal/api/taskqueue/v1/message.proto"; -option go_package = "go.temporal.io/server/chasm/lib/activity/gen/activitypb;activitypb"; - enum ActivityExecutionStatus { - ACTIVITY_EXECUTION_STATUS_UNSPECIFIED = 0; - // The activity has been scheduled, but a worker has not accepted the task for the current - // attempt. The activity may be backing off between attempts or waiting for a worker to pick it - // up. - ACTIVITY_EXECUTION_STATUS_SCHEDULED = 1; - // A worker has accepted a task for the current attempt. - ACTIVITY_EXECUTION_STATUS_STARTED = 2; - // A caller has requested cancellation of the activity. - ACTIVITY_EXECUTION_STATUS_CANCEL_REQUESTED = 3; - // The activity completed successfully. - ACTIVITY_EXECUTION_STATUS_COMPLETED = 4; - // The activity completed with failure. - ACTIVITY_EXECUTION_STATUS_FAILED = 5; - // The activity completed as canceled. - // Requesting to cancel an activity does not automatically transition the activity to canceled status. If the worker - // responds to cancel the activity after requesting cancellation, the status will transition to cancelled. If the - // activity completes, fails, times out or terminates after cancel is requested and before the worker responds with - // cancelled. The activity will be stay in the terminal non-cancelled status. - ACTIVITY_EXECUTION_STATUS_CANCELED = 6; - // The activity was terminated. Termination does not reach the worker and the activity code cannot react to it. - // A terminated activity may have a running attempt and will be requested to be canceled by the server when it - // heartbeats. - ACTIVITY_EXECUTION_STATUS_TERMINATED = 7; - // The activity has timed out by reaching the specified schedule-to-start or schedule-to-close timeouts. - // Additionally, after all retries are exhausted for start-to-close or heartbeat timeouts, the activity will also - // transition to timed out status. - ACTIVITY_EXECUTION_STATUS_TIMED_OUT = 8; + ACTIVITY_EXECUTION_STATUS_UNSPECIFIED = 0; + // The activity has been scheduled, but a worker has not accepted the task for the current + // attempt. The activity may be backing off between attempts or waiting for a worker to pick it + // up. + ACTIVITY_EXECUTION_STATUS_SCHEDULED = 1; + // A worker has accepted a task for the current attempt. + ACTIVITY_EXECUTION_STATUS_STARTED = 2; + // A caller has requested cancellation of the activity. + ACTIVITY_EXECUTION_STATUS_CANCEL_REQUESTED = 3; + // The activity completed successfully. + ACTIVITY_EXECUTION_STATUS_COMPLETED = 4; + // The activity completed with failure. + ACTIVITY_EXECUTION_STATUS_FAILED = 5; + // The activity completed as canceled. + // Requesting to cancel an activity does not automatically transition the activity to canceled status. If the worker + // responds to cancel the activity after requesting cancellation, the status will transition to cancelled. If the + // activity completes, fails, times out or terminates after cancel is requested and before the worker responds with + // cancelled. The activity will be stay in the terminal non-cancelled status. + ACTIVITY_EXECUTION_STATUS_CANCELED = 6; + // The activity was terminated. Termination does not reach the worker and the activity code cannot react to it. + // A terminated activity may have a running attempt and will be requested to be canceled by the server when it + // heartbeats. + ACTIVITY_EXECUTION_STATUS_TERMINATED = 7; + // The activity has timed out by reaching the specified schedule-to-start or schedule-to-close timeouts. + // Additionally, after all retries are exhausted for start-to-close or heartbeat timeouts, the activity will also + // transition to timed out status. + ACTIVITY_EXECUTION_STATUS_TIMED_OUT = 8; } message ActivityState { - // The type of the activity, a string that maps to a registered activity on a worker. - temporal.api.common.v1.ActivityType activity_type = 1; - - temporal.api.taskqueue.v1.TaskQueue task_queue = 2; - - // Indicates how long the caller is willing to wait for an activity completion. Limits how long - // retries will be attempted. Either this or `start_to_close_timeout` must be specified. - // - // (-- api-linter: core::0140::prepositions=disabled - // aip.dev/not-precedent: "to" is used to indicate interval. --) - google.protobuf.Duration schedule_to_close_timeout = 3; - // Limits time an activity task can stay in a task queue before a worker picks it up. This - // timeout is always non retryable, as all a retry would achieve is to put it back into the same - // queue. Defaults to `schedule_to_close_timeout` or workflow execution timeout if not - // specified. - // - // (-- api-linter: core::0140::prepositions=disabled - // aip.dev/not-precedent: "to" is used to indicate interval. --) - google.protobuf.Duration schedule_to_start_timeout = 4; - // Maximum time an activity is allowed to execute after being picked up by a worker. This - // timeout is always retryable. Either this or `schedule_to_close_timeout` must be - // specified. - // - // (-- api-linter: core::0140::prepositions=disabled - // aip.dev/not-precedent: "to" is used to indicate interval. --) - google.protobuf.Duration start_to_close_timeout = 5; - // Maximum permitted time between successful worker heartbeats. - google.protobuf.Duration heartbeat_timeout = 6; - // The retry policy for the activity. Will never exceed `schedule_to_close_timeout`. - temporal.api.common.v1.RetryPolicy retry_policy = 7; - - // All of the possible activity statuses (covers both the public ActivityExecutionStatus and PendingActivityState). - // TODO: consider moving this into ActivityAttemptState and renaming that message. This could save mutating two - // components on each attempt transition. - ActivityExecutionStatus status = 8; - - // Time the activity was originally scheduled via a StartActivityExecution request. - google.protobuf.Timestamp schedule_time = 9; - - // Priority metadata. - temporal.api.common.v1.Priority priority = 10; - - // Set if activity cancellation was requested. - ActivityCancelState cancel_state = 11; + // The type of the activity, a string that maps to a registered activity on a worker. + temporal.api.common.v1.ActivityType activity_type = 1; + + temporal.api.taskqueue.v1.TaskQueue task_queue = 2; + + // Indicates how long the caller is willing to wait for an activity completion. Limits how long + // retries will be attempted. Either this or `start_to_close_timeout` must be specified. + // + // (-- api-linter: core::0140::prepositions=disabled + // aip.dev/not-precedent: "to" is used to indicate interval. --) + google.protobuf.Duration schedule_to_close_timeout = 3; + // Limits time an activity task can stay in a task queue before a worker picks it up. This + // timeout is always non retryable, as all a retry would achieve is to put it back into the same + // queue. Defaults to `schedule_to_close_timeout` or workflow execution timeout if not + // specified. + // + // (-- api-linter: core::0140::prepositions=disabled + // aip.dev/not-precedent: "to" is used to indicate interval. --) + google.protobuf.Duration schedule_to_start_timeout = 4; + // Maximum time an activity is allowed to execute after being picked up by a worker. This + // timeout is always retryable. Either this or `schedule_to_close_timeout` must be + // specified. + // + // (-- api-linter: core::0140::prepositions=disabled + // aip.dev/not-precedent: "to" is used to indicate interval. --) + google.protobuf.Duration start_to_close_timeout = 5; + // Maximum permitted time between successful worker heartbeats. + google.protobuf.Duration heartbeat_timeout = 6; + // The retry policy for the activity. Will never exceed `schedule_to_close_timeout`. + temporal.api.common.v1.RetryPolicy retry_policy = 7; + + // All of the possible activity statuses (covers both the public ActivityExecutionStatus and PendingActivityState). + // TODO: consider moving this into ActivityAttemptState and renaming that message. This could save mutating two + // components on each attempt transition. + ActivityExecutionStatus status = 8; + + // Time the activity was originally scheduled via a StartActivityExecution request. + google.protobuf.Timestamp schedule_time = 9; + + // Priority metadata. + temporal.api.common.v1.Priority priority = 10; + + // Set if activity cancellation was requested. + ActivityCancelState cancel_state = 11; // Set if the activity was terminated ActivityTerminateState terminate_state = 12; - // Amount of time to wait before dispatching the activity task to the task queue for the first time. If the activity - // has a retry policy, retry attempts will not have start delay applied. - google.protobuf.Duration start_delay = 13; + // Options for the first scheduled attempt to support `restore_original` + temporal.api.activity.v1.ActivityOptions original_options = 13; + + // An incremental version number used to validate ScheduleToCloseTimeoutTask tasks. + // Incremented each time a new ScheduleToCloseTimeoutTask is scheduled (at activity creation + // and on each options update that re-schedules the task). Unlike attempt.stamp, this counter + // is NOT incremented on retries, because schedule-to-close spans the full activity lifetime. + int32 stamp = 14; } message ActivityCancelState { - string request_id = 1; - google.protobuf.Timestamp request_time = 2; - string identity = 3; - string reason = 4; + string request_id = 1; + google.protobuf.Timestamp request_time = 2; + string identity = 3; + string reason = 4; } message ActivityTerminateState { @@ -108,53 +114,56 @@ message ActivityTerminateState { } message ActivityAttemptState { - // The attempt this activity is currently on. - // Incremented each time a new attempt is scheduled. A newly created activity will immediately be scheduled, and - // the count is set to 1. - int32 count = 1; - - // Time from the last attempt failure to the next activity retry. - // If the activity is currently running, this represents the next retry interval in case the attempt fails. - // If activity is currently backing off between attempt, this represents the current retry interval. - // If there is no next retry allowed, this field will be null. - // This interval is typically calculated from the specified retry policy, but may be modified if an activity fails - // with a retryable application failure specifying a retry delay. - google.protobuf.Duration current_retry_interval = 2; - - // Time the last attempt was started. - google.protobuf.Timestamp started_time = 3; - - // The time when the last activity attempt completed. If activity has not been completed yet, it will be null. - google.protobuf.Timestamp complete_time = 4; - - message LastFailureDetails { - // The last time the activity attempt failed. - google.protobuf.Timestamp time = 1; - - // Failure details from the last failed attempt. - temporal.api.failure.v1.Failure failure = 2; - } - - // Details about the last failure. This will only be updated when an activity attempt fails, - // including start-to-close timeout. Activity success, termination, schedule-to-start and schedule-to-close timeouts - // will not reset it. - LastFailureDetails last_failure_details = 5; - - // An incremental version number used to validate tasks. - // Initially this only verifies that a task belong to the current attempt. - // Later on this stamp will be used to also invalidate tasks when the activity is paused, reset, or has its options - // updated. - int32 stamp = 6; - - string last_worker_identity = 7; - - // The Worker Deployment Version this activity was dispatched to most recently. - // If nil, the activity has not yet been dispatched or was last dispatched to an unversioned worker. - temporal.api.deployment.v1.WorkerDeploymentVersion last_deployment_version = 8; - - // The request ID that came from matching's RecordActivityTaskStarted API call. Used to make this API idempotent in - // case of implicit retries. - string start_request_id = 9; + // The attempt this activity is currently on. + // Incremented each time a new attempt is scheduled. A newly created activity will immediately be scheduled, and + // the count is set to 1. + int32 count = 1; + + // Time from the last attempt failure to the next activity retry. + // If the activity is currently running, this represents the next retry interval in case the attempt fails. + // If activity is currently backing off between attempt, this represents the current retry interval. + // If there is no next retry allowed, this field will be null. + // This interval is typically calculated from the specified retry policy, but may be modified if an activity fails + // with a retryable application failure specifying a retry delay. + google.protobuf.Duration current_retry_interval = 2; + + // Time the last attempt was started. + google.protobuf.Timestamp started_time = 3; + + // The time when the last activity attempt completed. If activity has not been completed yet, it will be null. + google.protobuf.Timestamp complete_time = 4; + + message LastFailureDetails { + // The last time the activity attempt failed. + google.protobuf.Timestamp time = 1; + + // Failure details from the last failed attempt. + temporal.api.failure.v1.Failure failure = 2; + } + + // Details about the last failure. This will only be updated when an activity attempt fails, + // including start-to-close timeout. Activity success, termination, schedule-to-start and schedule-to-close timeouts + // will not reset it. + LastFailureDetails last_failure_details = 5; + + // An incremental version number used to validate attempt-scoped tasks + // (ActivityDispatchTask, ScheduleToStartTimeoutTask, StartToCloseTimeoutTask, HeartbeatTimeoutTask). + // Incremented on each new attempt and on options updates, so that in-flight tasks from the + // previous attempt or pre-update state are discarded. + // Note: ScheduleToCloseTimeoutTask uses a separate ActivityState.schedule_to_close_stamp because + // it spans the full activity lifetime and must not be invalidated on retry. + // TODO: also invalidate on pause and reset when those are supported. + int32 stamp = 6; + + string last_worker_identity = 7; + + // The Worker Deployment Version this activity was dispatched to most recently. + // If nil, the activity has not yet been dispatched or was last dispatched to an unversioned worker. + temporal.api.deployment.v1.WorkerDeploymentVersion last_deployment_version = 8; + + // The request ID that came from matching's RecordActivityTaskStarted API call. Used to make this API idempotent in + // case of implicit retries. + string start_request_id = 9; } message ActivityHeartbeatState { @@ -162,32 +171,30 @@ message ActivityHeartbeatState { temporal.api.common.v1.Payloads details = 1; // Time the last heartbeat was recorded. google.protobuf.Timestamp recorded_time = 2; - // Total number of heartbeats recorded across all attempts of this activity, including retries. - int64 total_heartbeat_count = 3; } message ActivityRequestData { - // Serialized activity input, passed as arguments to the activity function. - temporal.api.common.v1.Payloads input = 1; - temporal.api.common.v1.Header header = 2; + // Serialized activity input, passed as arguments to the activity function. + temporal.api.common.v1.Payloads input = 1; + temporal.api.common.v1.Header header = 2; - // Metadata for use by user interfaces to display the fixed as-of-start summary and details of the activity. - temporal.api.sdk.v1.UserMetadata user_metadata = 3; + // Metadata for use by user interfaces to display the fixed as-of-start summary and details of the activity. + temporal.api.sdk.v1.UserMetadata user_metadata = 3; } message ActivityOutcome { - message Successful { - temporal.api.common.v1.Payloads output = 1; - } - - message Failed { - // Only filled on schedule-to-start timeouts, schedule-to-close timeouts or terminations. All other attempt - // failures will be recorded in ActivityAttemptState.last_failure_details. - temporal.api.failure.v1.Failure failure = 1; - } - - oneof variant { - Successful successful = 1; - Failed failed = 2; - } + message Successful { + temporal.api.common.v1.Payloads output = 1; + } + + message Failed { + // Only filled on schedule-to-start timeouts, schedule-to-close timeouts or terminations. All other attempt + // failures will be recorded in ActivityAttemptState.last_failure_details. + temporal.api.failure.v1.Failure failure = 1; + } + + oneof variant { + Successful successful = 1; + Failed failed = 2; + } } diff --git a/chasm/lib/activity/proto/v1/tasks.proto b/chasm/lib/activity/proto/v1/tasks.proto index 9a1996e3dd2..736d6767ac2 100644 --- a/chasm/lib/activity/proto/v1/tasks.proto +++ b/chasm/lib/activity/proto/v1/tasks.proto @@ -5,24 +5,29 @@ package temporal.server.chasm.lib.activity.proto.v1; option go_package = "go.temporal.io/server/chasm/lib/activity/gen/activitypb;activitypb"; message ActivityDispatchTask { - // The current stamp for this activity execution. Used for task validation. See also [ActivityAttemptState]. - int32 stamp = 1; + // The current stamp for this activity execution. Used for task validation. See also [ActivityAttemptState]. + int32 stamp = 1; } message ScheduleToStartTimeoutTask { - // The current stamp for this activity execution. Used for task validation. See also [ActivityAttemptState]. - int32 stamp = 1; + // The current stamp for this activity execution. Used for task validation. See also [ActivityAttemptState]. + int32 stamp = 1; } -message ScheduleToCloseTimeoutTask {} +message ScheduleToCloseTimeoutTask { + // The schedule-to-close stamp for this task. Used for task validation. + // See also [ActivityState.schedule_to_close_stamp]. + // Tasks without a stamp (stamp=0) predate this field and are not validated by stamp. + int32 stamp = 1; +} message StartToCloseTimeoutTask { - // The current stamp for this activity execution. Used for task validation. See also [ActivityAttemptState]. - int32 stamp = 1; + // The current stamp for this activity execution. Used for task validation. See also [ActivityAttemptState]. + int32 stamp = 1; } // HeartbeatTimeoutTask is a pure task that enforces heartbeat timeouts. message HeartbeatTimeoutTask { - // The current stamp for this activity execution. Used for task validation. See also [ActivityAttemptState]. - int32 stamp = 1; + // The current stamp for this activity execution. Used for task validation. See also [ActivityAttemptState]. + int32 stamp = 1; } diff --git a/chasm/lib/activity/statemachine.go b/chasm/lib/activity/statemachine.go index b594e56a6d1..58ffd45376f 100644 --- a/chasm/lib/activity/statemachine.go +++ b/chasm/lib/activity/statemachine.go @@ -62,12 +62,13 @@ var TransitionScheduled = chasm.NewTransition( } if timeout := a.GetScheduleToCloseTimeout().AsDuration(); timeout > 0 { + a.Stamp++ ctx.AddTask( a, chasm.TaskAttributes{ ScheduledTime: startDelayEnd.Add(timeout), }, - &activitypb.ScheduleToCloseTimeoutTask{}) + &activitypb.ScheduleToCloseTimeoutTask{Stamp: a.GetStamp()}) } dispatchAttrs := chasm.TaskAttributes{} diff --git a/chasm/lib/activity/validator.go b/chasm/lib/activity/validator.go index e1c1ddb9797..4d690a818b8 100644 --- a/chasm/lib/activity/validator.go +++ b/chasm/lib/activity/validator.go @@ -17,6 +17,7 @@ import ( "go.temporal.io/server/common/retrypolicy" "go.temporal.io/server/common/searchattribute" "go.temporal.io/server/common/tqid" + "go.temporal.io/server/common/util" "google.golang.org/protobuf/types/known/durationpb" ) @@ -57,9 +58,10 @@ func ValidateAndNormalizeEmbeddedActivity( options *activitypb.ActivityOptions, priority *commonpb.Priority, runTimeout *durationpb.Duration, - workflowTaskQueueName string, ) error { - if err := tqid.NormalizeAndValidateUserDefined(options.TaskQueue, "", workflowTaskQueueName, maxIDLengthLimit); err != nil { + // We cannot use NormalizeAndValidateUserDefined for embedded activity task queue because embedded activities can + // use reserved task queues, which are not considered user defined. + if err := tqid.NormalizeAndValidate(options.TaskQueue, "", maxIDLengthLimit); err != nil { return err } @@ -119,7 +121,7 @@ func validateAndNormalizeActivityAttributes( return serviceerror.NewInvalidArgumentf("invalid priorities: %v", err) } - return validateAndNormalizeTimeouts(activityID, + return normalizeAndValidateTimeouts(activityID, activityType, runTimeout, options) @@ -146,7 +148,7 @@ func validateActivityRetryPolicy( return retrypolicy.Validate(retryPolicy) } -func validateAndNormalizeTimeouts( +func normalizeAndValidateTimeouts( activityID string, activityType string, runTimeout *durationpb.Duration, @@ -214,7 +216,7 @@ func validateAndNormalizeTimeouts( return nil } -func validateAndNormalizeIDPolicy(req *workflowservice.StartActivityExecutionRequest) error { +func normalizeAndValidateIDPolicy(req *workflowservice.StartActivityExecutionRequest) error { if req.GetIdReusePolicy() == enumspb.ACTIVITY_ID_REUSE_POLICY_UNSPECIFIED { req.IdReusePolicy = enumspb.ACTIVITY_ID_REUSE_POLICY_ALLOW_DUPLICATE } @@ -323,18 +325,23 @@ func validatePollActivityExecutionRequest( return nil } -func validateAndNormalizeStartRequest( - req *workflowservice.StartActivityExecutionRequest, +func validateRequestCancelActivityExecutionRequest( + req *workflowservice.RequestCancelActivityExecutionRequest, maxIDLengthLimit int, blobSizeLimitError dynamicconfig.IntPropertyFnWithNamespaceFilter, blobSizeLimitWarn dynamicconfig.IntPropertyFnWithNamespaceFilter, logger log.Logger, - saMapperProvider searchattribute.MapperProvider, - saValidator *searchattribute.Validator, ) error { - if req.GetRequestId() == "" { - req.RequestId = uuid.NewString() - } else if len(req.GetRequestId()) > maxIDLengthLimit { + if req.GetActivityId() == "" { + return serviceerror.NewInvalidArgument("activity ID is required") + } + + if len(req.GetActivityId()) > maxIDLengthLimit { + return serviceerror.NewInvalidArgumentf("activity ID exceeds length limit. Length=%d Limit=%d", + len(req.GetActivityId()), maxIDLengthLimit) + } + + if len(req.GetRequestId()) > maxIDLengthLimit { return serviceerror.NewInvalidArgumentf("request ID exceeds length limit. Length=%d Limit=%d", len(req.GetRequestId()), maxIDLengthLimit) } @@ -344,39 +351,32 @@ func validateAndNormalizeStartRequest( len(req.GetIdentity()), maxIDLengthLimit) } - if err := validateAndNormalizeIDPolicy(req); err != nil { - return err + if runID := req.GetRunId(); runID != "" { + _, err := uuid.Parse(runID) + if err != nil { + return serviceerror.NewInvalidArgument("invalid run id: must be a valid UUID") + } } - if err := validateBlobSize( + err := validateBlobSize( req.GetActivityId(), - "StartActivityExecution", + "RequestCancelActivityExecution", blobSizeLimitError, blobSizeLimitWarn, - req.Input.Size(), + len(req.GetReason()), logger, - req.GetNamespace()); err != nil { - return serviceerror.NewInvalidArgument("input exceeds length limit") - } - - if req.GetSearchAttributes() != nil { - if err := validateAndNormalizeSearchAttributes( - req, - saMapperProvider, - saValidator); err != nil { - return err - } + req.GetNamespace()) + if err != nil { + return serviceerror.NewInvalidArgument("reason exceeds length limit") } return nil } -func validateAndNormalizeCancelRequest( - req *workflowservice.RequestCancelActivityExecutionRequest, +//nolint:revive // cyclomatic: per-field validation of a field-mask update requires explicit handling of each field +func validateUpdateActivityExecutionOptionsRequest( + req *workflowservice.UpdateActivityExecutionOptionsRequest, maxIDLengthLimit int, - blobSizeLimitError dynamicconfig.IntPropertyFnWithNamespaceFilter, - blobSizeLimitWarn dynamicconfig.IntPropertyFnWithNamespaceFilter, - logger log.Logger, ) error { if req.GetActivityId() == "" { return serviceerror.NewInvalidArgument("activity ID is required") @@ -387,13 +387,6 @@ func validateAndNormalizeCancelRequest( len(req.GetActivityId()), maxIDLengthLimit) } - if req.GetRequestId() == "" { - req.RequestId = uuid.NewString() - } else if len(req.GetRequestId()) > maxIDLengthLimit { - return serviceerror.NewInvalidArgumentf("request ID exceeds length limit. Length=%d Limit=%d", - len(req.GetRequestId()), maxIDLengthLimit) - } - if len(req.GetIdentity()) > maxIDLengthLimit { return serviceerror.NewInvalidArgumentf("identity exceeds length limit. Length=%d Limit=%d", len(req.GetIdentity()), maxIDLengthLimit) @@ -406,22 +399,107 @@ func validateAndNormalizeCancelRequest( } } - err := validateBlobSize( - req.GetActivityId(), - "RequestCancelActivityExecution", - blobSizeLimitError, - blobSizeLimitWarn, - len(req.GetReason()), - logger, - req.GetNamespace()) - if err != nil { - return serviceerror.NewInvalidArgument("reason exceeds length limit") + if len(req.GetUpdateMask().GetPaths()) > 0 && req.GetRestoreOriginal() { + return serviceerror.NewInvalidArgument("Both UpdateMask and RestoreOriginal are provided") + } + + if req.GetRestoreOriginal() { + return nil + } + + if req.GetActivityOptions() == nil { + return serviceerror.NewInvalidArgument("ActivityOptions are not provided") + } + if req.GetUpdateMask() == nil { + return serviceerror.NewInvalidArgument("UpdateMask is not provided") + } + + opts := req.GetActivityOptions() + updateFields := util.ParseFieldMask(req.GetUpdateMask()) + + // TaskQueue: enforce user-defined task queue to prevent scheduling on reserved queues + // (e.g. the internal per-namespace-worker task queue). + if _, ok := updateFields["taskQueue.name"]; ok { + if err := tqid.NormalizeAndValidateUserDefined(opts.GetTaskQueue(), "", "", maxIDLengthLimit); err != nil { + return err + } + } + + // Timeouts: validate each timeout value that is being updated. + if _, ok := updateFields["scheduleToCloseTimeout"]; ok { + if err := timestamp.ValidateAndCapProtoDuration(opts.GetScheduleToCloseTimeout()); err != nil { + return serviceerror.NewInvalidArgumentf("invalid ScheduleToCloseTimeout: %v", err) + } + } + if _, ok := updateFields["scheduleToStartTimeout"]; ok { + if err := timestamp.ValidateAndCapProtoDuration(opts.GetScheduleToStartTimeout()); err != nil { + return serviceerror.NewInvalidArgumentf("invalid ScheduleToStartTimeout: %v", err) + } + } + if _, ok := updateFields["startToCloseTimeout"]; ok { + if err := timestamp.ValidateAndCapProtoDuration(opts.GetStartToCloseTimeout()); err != nil { + return serviceerror.NewInvalidArgumentf("invalid StartToCloseTimeout: %v", err) + } + } + if _, ok := updateFields["heartbeatTimeout"]; ok { + if err := timestamp.ValidateAndCapProtoDuration(opts.GetHeartbeatTimeout()); err != nil { + return serviceerror.NewInvalidArgumentf("invalid HeartbeatTimeout: %v", err) + } + } + + // Priority: validate the full priority when replacing it, or validate individual sub-fields. + if _, ok := updateFields["priority"]; ok { + if err := priorities.Validate(opts.GetPriority()); err != nil { + return err + } + } + if _, ok := updateFields["priority.priorityKey"]; ok { + if opts.GetPriority().GetPriorityKey() < 0 { + return priorities.ErrInvalidPriority + } + } + if _, ok := updateFields["priority.fairnessKey"]; ok { + if err := priorities.ValidateFairnessKey(opts.GetPriority().GetFairnessKey()); err != nil { + return err + } + } + if _, ok := updateFields["priority.fairnessWeight"]; ok { + if opts.GetPriority().GetFairnessWeight() < 0 { + return priorities.ErrInvalidFairnessWeight + } + } + + // RetryPolicy: validate the full policy when replacing it, or validate individual sub-fields. + if _, ok := updateFields["retryPolicy"]; ok { + if err := retrypolicy.Validate(opts.GetRetryPolicy()); err != nil { + return err + } + } + if _, ok := updateFields["retryPolicy.initialInterval"]; ok { + if err := timestamp.ValidateAndCapProtoDuration(opts.GetRetryPolicy().GetInitialInterval()); err != nil { + return serviceerror.NewInvalidArgumentf("invalid InitialInterval set on retry policy: %v", err) + } + } + if _, ok := updateFields["retryPolicy.backoffCoefficient"]; ok { + if opts.GetRetryPolicy().GetBackoffCoefficient() < 1 { + return serviceerror.NewInvalidArgument("BackoffCoefficient cannot be less than 1 on retry policy.") + } + } + if _, ok := updateFields["retryPolicy.maximumInterval"]; ok { + if err := timestamp.ValidateAndCapProtoDuration(opts.GetRetryPolicy().GetMaximumInterval()); err != nil { + return serviceerror.NewInvalidArgumentf("invalid MaximumInterval set on retry policy: %v", err) + } + } + if _, ok := updateFields["retryPolicy.maximumAttempts"]; ok { + if opts.GetRetryPolicy().GetMaximumAttempts() < 0 { + return serviceerror.NewInvalidArgument("MaximumAttempts cannot be negative on retry policy.") + } } return nil } -func validateAndNormalizeDeleteRequest( +func validateDeleteActivityExecutionRequest( req *workflowservice.DeleteActivityExecutionRequest, maxIDLengthLimit int, ) error { @@ -444,7 +522,7 @@ func validateAndNormalizeDeleteRequest( return nil } -func validateAndNormalizeTerminateRequest( +func validateTerminateActivityExecutionRequest( req *workflowservice.TerminateActivityExecutionRequest, maxIDLengthLimit int, blobSizeLimitError dynamicconfig.IntPropertyFnWithNamespaceFilter, @@ -460,9 +538,7 @@ func validateAndNormalizeTerminateRequest( len(req.GetActivityId()), maxIDLengthLimit) } - if req.GetRequestId() == "" { - req.RequestId = uuid.NewString() - } else if len(req.GetRequestId()) > maxIDLengthLimit { + if len(req.GetRequestId()) > maxIDLengthLimit { return serviceerror.NewInvalidArgumentf("request ID exceeds length limit. Length=%d Limit=%d", len(req.GetRequestId()), maxIDLengthLimit) } diff --git a/common/activityoptions/merge.go b/common/activityoptions/merge.go new file mode 100644 index 00000000000..b9713ba431c --- /dev/null +++ b/common/activityoptions/merge.go @@ -0,0 +1,122 @@ +package activityoptions + +import ( + activitypb "go.temporal.io/api/activity/v1" + commonpb "go.temporal.io/api/common/v1" + "go.temporal.io/api/serviceerror" +) + +// MergeActivityOptions applies the fields specified in updateFields from mergeFrom into mergeInto in-place. +// updateFields is a map of camelCase JSON field paths, as returned by util.ParseFieldMask. +// Returns an error if a required parent field (TaskQueue, Priority, RetryPolicy) is nil in mergeFrom +// when a sub-field of that parent is listed in updateFields. +// +//nolint:revive // cyclomatic: field-mask application requires explicit handling of each supported field +func MergeActivityOptions(mergeInto, mergeFrom *activitypb.ActivityOptions, updateFields map[string]struct{}) error { + if _, ok := updateFields["taskQueue.name"]; ok { + if mergeFrom.GetTaskQueue() == nil { + return serviceerror.NewInvalidArgument("TaskQueue is not provided") + } + if mergeInto.TaskQueue == nil { + mergeInto.TaskQueue = mergeFrom.GetTaskQueue() + } else { + mergeInto.TaskQueue.Name = mergeFrom.GetTaskQueue().GetName() + } + } + + if _, ok := updateFields["scheduleToCloseTimeout"]; ok { + mergeInto.ScheduleToCloseTimeout = mergeFrom.GetScheduleToCloseTimeout() + } + + if _, ok := updateFields["scheduleToStartTimeout"]; ok { + mergeInto.ScheduleToStartTimeout = mergeFrom.GetScheduleToStartTimeout() + } + + if _, ok := updateFields["startToCloseTimeout"]; ok { + mergeInto.StartToCloseTimeout = mergeFrom.GetStartToCloseTimeout() + } + + if _, ok := updateFields["heartbeatTimeout"]; ok { + mergeInto.HeartbeatTimeout = mergeFrom.GetHeartbeatTimeout() + } + + if _, ok := updateFields["priority"]; ok { + mergeInto.Priority = mergeFrom.GetPriority() + } + + if _, ok := updateFields["priority.priorityKey"]; ok { + if mergeFrom.GetPriority() == nil { + return serviceerror.NewInvalidArgument("Priority is not provided") + } + if mergeInto.Priority == nil { + mergeInto.Priority = &commonpb.Priority{} + } + mergeInto.Priority.PriorityKey = mergeFrom.GetPriority().GetPriorityKey() + } + + if _, ok := updateFields["priority.fairnessKey"]; ok { + if mergeFrom.GetPriority() == nil { + return serviceerror.NewInvalidArgument("Priority is not provided") + } + if mergeInto.Priority == nil { + mergeInto.Priority = &commonpb.Priority{} + } + mergeInto.Priority.FairnessKey = mergeFrom.GetPriority().GetFairnessKey() + } + + if _, ok := updateFields["priority.fairnessWeight"]; ok { + if mergeFrom.GetPriority() == nil { + return serviceerror.NewInvalidArgument("Priority is not provided") + } + if mergeInto.Priority == nil { + mergeInto.Priority = &commonpb.Priority{} + } + mergeInto.Priority.FairnessWeight = mergeFrom.GetPriority().GetFairnessWeight() + } + + if _, ok := updateFields["retryPolicy"]; ok { + mergeInto.RetryPolicy = mergeFrom.GetRetryPolicy() + } + + if _, ok := updateFields["retryPolicy.initialInterval"]; ok { + if mergeFrom.GetRetryPolicy() == nil { + return serviceerror.NewInvalidArgument("RetryPolicy is not provided") + } + if mergeInto.RetryPolicy == nil { + mergeInto.RetryPolicy = &commonpb.RetryPolicy{} + } + mergeInto.RetryPolicy.InitialInterval = mergeFrom.GetRetryPolicy().GetInitialInterval() + } + + if _, ok := updateFields["retryPolicy.backoffCoefficient"]; ok { + if mergeFrom.GetRetryPolicy() == nil { + return serviceerror.NewInvalidArgument("RetryPolicy is not provided") + } + if mergeInto.RetryPolicy == nil { + mergeInto.RetryPolicy = &commonpb.RetryPolicy{} + } + mergeInto.RetryPolicy.BackoffCoefficient = mergeFrom.GetRetryPolicy().GetBackoffCoefficient() + } + + if _, ok := updateFields["retryPolicy.maximumInterval"]; ok { + if mergeFrom.GetRetryPolicy() == nil { + return serviceerror.NewInvalidArgument("RetryPolicy is not provided") + } + if mergeInto.RetryPolicy == nil { + mergeInto.RetryPolicy = &commonpb.RetryPolicy{} + } + mergeInto.RetryPolicy.MaximumInterval = mergeFrom.GetRetryPolicy().GetMaximumInterval() + } + + if _, ok := updateFields["retryPolicy.maximumAttempts"]; ok { + if mergeFrom.GetRetryPolicy() == nil { + return serviceerror.NewInvalidArgument("RetryPolicy is not provided") + } + if mergeInto.RetryPolicy == nil { + mergeInto.RetryPolicy = &commonpb.RetryPolicy{} + } + mergeInto.RetryPolicy.MaximumAttempts = mergeFrom.GetRetryPolicy().GetMaximumAttempts() + } + + return nil +} diff --git a/common/activityoptions/merge_test.go b/common/activityoptions/merge_test.go new file mode 100644 index 00000000000..e2e59c33cf7 --- /dev/null +++ b/common/activityoptions/merge_test.go @@ -0,0 +1,323 @@ +package activityoptions + +import ( + "strings" + "testing" + "time" + + "github.com/stretchr/testify/require" + activitypb "go.temporal.io/api/activity/v1" + commonpb "go.temporal.io/api/common/v1" + taskqueuepb "go.temporal.io/api/taskqueue/v1" + "go.temporal.io/server/common/util" + "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/reflect/protoreflect" + "google.golang.org/protobuf/types/known/durationpb" + "google.golang.org/protobuf/types/known/fieldmaskpb" +) + +func TestMergeActivityOptionsAcceptance(t *testing.T) { + updateOptions := &activitypb.ActivityOptions{ + TaskQueue: &taskqueuepb.TaskQueue{Name: "task_queue_name"}, + ScheduleToCloseTimeout: durationpb.New(time.Second), + StartToCloseTimeout: durationpb.New(time.Second), + ScheduleToStartTimeout: durationpb.New(time.Second), + HeartbeatTimeout: durationpb.New(time.Second), + Priority: &commonpb.Priority{ + PriorityKey: 42, + FairnessKey: "test_key", + FairnessWeight: 5.0, + }, + RetryPolicy: &commonpb.RetryPolicy{ + MaximumInterval: durationpb.New(time.Second), + MaximumAttempts: 5, + BackoffCoefficient: 1.0, + InitialInterval: durationpb.New(time.Second), + }, + } + + testCases := []struct { + name string + mergeInto *activitypb.ActivityOptions + mergeFrom *activitypb.ActivityOptions + expected *activitypb.ActivityOptions + mask *fieldmaskpb.FieldMask + }{ + { + name: "Top-level fields with CamelCase", + mergeFrom: updateOptions, + mergeInto: &activitypb.ActivityOptions{}, + expected: updateOptions, + mask: &fieldmaskpb.FieldMask{ + Paths: []string{ + "TaskQueue.Name", + "ScheduleToCloseTimeout", + "ScheduleToStartTimeout", + "StartToCloseTimeout", + "HeartbeatTimeout", + "Priority", + "RetryPolicy", + }, + }, + }, + { + name: "Top-level fields with snake_case", + mergeFrom: updateOptions, + mergeInto: &activitypb.ActivityOptions{}, + expected: updateOptions, + mask: &fieldmaskpb.FieldMask{ + Paths: []string{ + "task_queue.name", + "schedule_to_close_timeout", + "schedule_to_start_timeout", + "start_to_close_timeout", + "heartbeat_timeout", + "priority", + "retry_policy", + }, + }, + }, + { + name: "Sub-fields", + mergeFrom: &activitypb.ActivityOptions{ + Priority: &commonpb.Priority{ + PriorityKey: 99, + FairnessKey: "newKey", + FairnessWeight: 7.5, + }, + RetryPolicy: &commonpb.RetryPolicy{ + MaximumInterval: durationpb.New(time.Second), + MaximumAttempts: 5, + BackoffCoefficient: 1.0, + InitialInterval: durationpb.New(time.Second), + }, + }, + mergeInto: &activitypb.ActivityOptions{ + Priority: &commonpb.Priority{ + PriorityKey: 10, + FairnessKey: "oldKey", + FairnessWeight: 1.0, + }, + RetryPolicy: &commonpb.RetryPolicy{}, + }, + expected: &activitypb.ActivityOptions{ + Priority: &commonpb.Priority{ + PriorityKey: 99, + FairnessKey: "newKey", + FairnessWeight: 7.5, + }, + RetryPolicy: &commonpb.RetryPolicy{ + MaximumInterval: durationpb.New(time.Second), + MaximumAttempts: 5, + BackoffCoefficient: 1.0, + InitialInterval: durationpb.New(time.Second), + }, + }, + mask: &fieldmaskpb.FieldMask{ + Paths: []string{ + "priority.priority_key", + "priority.fairness_key", + "priority.fairness_weight", + "retry_policy.backoff_coefficient", + "retry_policy.initial_interval", + "retry_policy.maximum_interval", + "retry_policy.maximum_attempts", + }, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + updateFields := util.ParseFieldMask(tc.mask) + err := MergeActivityOptions(tc.mergeInto, tc.mergeFrom, updateFields) + require.NoError(t, err) + require.Equal(t, tc.expected.RetryPolicy.GetInitialInterval(), tc.mergeInto.RetryPolicy.GetInitialInterval(), "RetryInitialInterval") + require.Equal(t, tc.expected.RetryPolicy.GetMaximumInterval(), tc.mergeInto.RetryPolicy.GetMaximumInterval(), "RetryMaximumInterval") + require.InEpsilon(t, tc.expected.RetryPolicy.GetBackoffCoefficient(), tc.mergeInto.RetryPolicy.GetBackoffCoefficient(), 0.001, "RetryBackoffCoefficient") + require.Equal(t, tc.expected.RetryPolicy.GetMaximumAttempts(), tc.mergeInto.RetryPolicy.GetMaximumAttempts(), "RetryMaximumAttempts") + require.Equal(t, tc.expected.TaskQueue, tc.mergeInto.TaskQueue, "TaskQueue") + require.Equal(t, tc.expected.ScheduleToCloseTimeout, tc.mergeInto.ScheduleToCloseTimeout, "ScheduleToCloseTimeout") + require.Equal(t, tc.expected.ScheduleToStartTimeout, tc.mergeInto.ScheduleToStartTimeout, "ScheduleToStartTimeout") + require.Equal(t, tc.expected.StartToCloseTimeout, tc.mergeInto.StartToCloseTimeout, "StartToCloseTimeout") + require.Equal(t, tc.expected.HeartbeatTimeout, tc.mergeInto.HeartbeatTimeout, "HeartbeatTimeout") + require.Equal(t, tc.expected.Priority, tc.mergeInto.Priority, "Priority") + }) + } +} + +func TestMergeActivityOptionsErrors(t *testing.T) { + makeReq := func(paths ...string) map[string]struct{} { + return util.ParseFieldMask(&fieldmaskpb.FieldMask{Paths: paths}) + } + emptyOpts := &activitypb.ActivityOptions{} + + var err error + err = MergeActivityOptions(&activitypb.ActivityOptions{}, emptyOpts, makeReq("retry_policy.maximum_interval")) + require.ErrorContains(t, err, "RetryPolicy is not provided") + + err = MergeActivityOptions(&activitypb.ActivityOptions{}, emptyOpts, makeReq("retry_policy.maximum_attempts")) + require.ErrorContains(t, err, "RetryPolicy is not provided") + + err = MergeActivityOptions(&activitypb.ActivityOptions{}, emptyOpts, makeReq("retry_policy.backoff_coefficient")) + require.ErrorContains(t, err, "RetryPolicy is not provided") + + err = MergeActivityOptions(&activitypb.ActivityOptions{}, emptyOpts, makeReq("retry_policy.initial_interval")) + require.ErrorContains(t, err, "RetryPolicy is not provided") + + err = MergeActivityOptions(&activitypb.ActivityOptions{}, emptyOpts, makeReq("taskQueue.name")) + require.ErrorContains(t, err, "TaskQueue is not provided") + + err = MergeActivityOptions(&activitypb.ActivityOptions{}, emptyOpts, makeReq("priority.priority_key")) + require.ErrorContains(t, err, "Priority is not provided") + + err = MergeActivityOptions(&activitypb.ActivityOptions{}, emptyOpts, makeReq("priority.fairness_key")) + require.ErrorContains(t, err, "Priority is not provided") + + err = MergeActivityOptions(&activitypb.ActivityOptions{}, emptyOpts, makeReq("priority.fairness_weight")) + require.ErrorContains(t, err, "Priority is not provided") +} + +// TestMergeActivityOptionsCoversAllFields uses protobuf reflection to enumerate +// every field-mask path reachable from ActivityOptions and verifies that each +// one is handled by MergeActivityOptions. Paths that are intentionally not +// offered for update must be listed in notOfferedPaths. When a new field is +// added to the proto, this test will automatically create a sub-test that fails +// until MergeActivityOptions handles it or it is added to notOfferedPaths. +func TestMergeActivityOptionsCoversAllFields(t *testing.T) { + // notOfferedPaths lists camelCase field-mask paths intentionally not supported + // for update. Add a new entry here (with a comment) if a proto field should + // not be offered to callers rather than wiring it into MergeActivityOptions. + notOfferedPaths := map[string]struct{}{ + // TaskQueue: only the name is user-writable; kind and normal_name are + // internal routing fields managed by the server, not by callers. + "taskQueue": {}, + "taskQueue.kind": {}, + "taskQueue.normalName": {}, + // non_retryable_error_types is a repeated field; set/clear semantics + // do not map cleanly onto a single field-mask path (no append/remove). + "retryPolicy.nonRetryableErrorTypes": {}, + } + + // Enumerate all valid field-mask paths for ActivityOptions via protoreflect. + discovered := collectActivityOptionsFieldPaths() + + // notOfferedPaths entries must still exist in the proto (catches stale entries after renames). + discoveredSet := make(map[string]struct{}, len(discovered)) + for _, p := range discovered { + discoveredSet[p] = struct{}{} + } + for p := range notOfferedPaths { + _, exists := discoveredSet[p] + require.True(t, exists, "notOfferedPaths entry %q does not correspond to any proto field path", p) + } + + // populatedOptions has a distinct non-zero value for every possible field so + // the per-path mutation check below can detect whether the field was actually set. + populatedOptions := &activitypb.ActivityOptions{ + TaskQueue: &taskqueuepb.TaskQueue{Name: "test-queue"}, + ScheduleToCloseTimeout: durationpb.New(10 * time.Second), + ScheduleToStartTimeout: durationpb.New(5 * time.Second), + StartToCloseTimeout: durationpb.New(3 * time.Second), + HeartbeatTimeout: durationpb.New(1 * time.Second), + Priority: &commonpb.Priority{ + PriorityKey: 42, + FairnessKey: "test-key", + FairnessWeight: 2.5, + }, + RetryPolicy: &commonpb.RetryPolicy{ + InitialInterval: durationpb.New(time.Second), + BackoffCoefficient: 2.0, + MaximumInterval: durationpb.New(30 * time.Second), + MaximumAttempts: 5, + }, + } + + // Every discovered path not in notOfferedPaths must be handled by MergeActivityOptions. + for _, path := range discovered { + if _, skip := notOfferedPaths[path]; skip { + continue + } + path := path // capture for sub-test closure + t.Run("updates/"+path, func(t *testing.T) { + mergeInto := &activitypb.ActivityOptions{} + err := MergeActivityOptions(mergeInto, populatedOptions, map[string]struct{}{path: {}}) + require.NoError(t, err, "unexpected error for path %q", path) + + fromVal, fd := getValueAtProtoPath(populatedOptions.ProtoReflect(), path) + intoVal, _ := getValueAtProtoPath(mergeInto.ProtoReflect(), path) + + switch fd.Kind() { + case protoreflect.MessageKind: + require.True(t, proto.Equal(safeProtoMessage(fromVal), safeProtoMessage(intoVal)), + "MergeActivityOptions did not update message field %q", path) + default: + require.Equal(t, fromVal.Interface(), intoVal.Interface(), + "MergeActivityOptions did not update scalar field %q", path) + } + }) + } +} + +// collectActivityOptionsFieldPaths returns the camelCase field-mask paths for +// ActivityOptions. For singular message-typed fields, both the message path and +// its sub-field paths are included. Well-known types (e.g. google.protobuf.Duration) +// are treated as leaves and not recursed into. +func collectActivityOptionsFieldPaths() []string { + return collectFieldMaskPaths((&activitypb.ActivityOptions{}).ProtoReflect().Descriptor(), "") +} + +func collectFieldMaskPaths(desc protoreflect.MessageDescriptor, prefix string) []string { + var paths []string + fields := desc.Fields() + for i := 0; i < fields.Len(); i++ { + field := fields.Get(i) + fullPath := field.JSONName() + if prefix != "" { + fullPath = prefix + "." + field.JSONName() + } + paths = append(paths, fullPath) + + // Recurse into singular message fields that are not google.protobuf well-known types. + if field.Kind() == protoreflect.MessageKind && + !field.IsList() && !field.IsMap() && + !isWellKnownProtoMessage(field.Message()) { + paths = append(paths, collectFieldMaskPaths(field.Message(), fullPath)...) + } + } + return paths +} + +// isWellKnownProtoMessage reports whether msg is a google.protobuf well-known +// type such as Duration or Timestamp. Field-mask paths never descend into these. +func isWellKnownProtoMessage(msg protoreflect.MessageDescriptor) bool { + return strings.HasPrefix(string(msg.FullName()), "google.protobuf.") +} + +// getValueAtProtoPath navigates a dot-separated camelCase field-mask path in msg +// and returns the protoreflect.Value and FieldDescriptor at that path. +// Returns an invalid Value if any intermediate message is not populated. +func getValueAtProtoPath(msg protoreflect.Message, path string) (protoreflect.Value, protoreflect.FieldDescriptor) { + parts := strings.SplitN(path, ".", 2) + fd := msg.Descriptor().Fields().ByJSONName(parts[0]) + if fd == nil { + panic("proto field not found by JSON name: " + parts[0]) + } + val := msg.Get(fd) + if len(parts) == 1 { + return val, fd + } + if fd.Kind() != protoreflect.MessageKind || !val.Message().IsValid() { + return protoreflect.Value{}, fd + } + return getValueAtProtoPath(val.Message(), parts[1]) +} + +// safeProtoMessage returns the proto.Message from a protoreflect.Value, +// or nil if the value is invalid or the underlying message is not populated. +func safeProtoMessage(v protoreflect.Value) proto.Message { + if !v.IsValid() || !v.Message().IsValid() { + return nil + } + return v.Message().Interface() +} diff --git a/service/history/api/updateactivityoptions/api.go b/service/history/api/updateactivityoptions/api.go index 8d7406206c8..53522a06a7b 100644 --- a/service/history/api/updateactivityoptions/api.go +++ b/service/history/api/updateactivityoptions/api.go @@ -15,6 +15,7 @@ import ( "go.temporal.io/server/api/historyservice/v1" persistencespb "go.temporal.io/server/api/persistence/v1" "go.temporal.io/server/common" + "go.temporal.io/server/common/activityoptions" "go.temporal.io/server/common/definition" "go.temporal.io/server/common/log/tag" "go.temporal.io/server/common/metrics" @@ -194,7 +195,7 @@ func processActivityOptionsUpdate( } // update activity options - if err := mergeActivityOptions(mergeInto, mergeFrom, updateFields); err != nil { + if err := activityoptions.MergeActivityOptions(mergeInto, mergeFrom, updateFields); err != nil { return nil, err } @@ -207,110 +208,6 @@ func processActivityOptionsUpdate( return updateActivityOptions(mutableState, ai, adjustedOptions) } -func mergeActivityOptions( - mergeInto *activitypb.ActivityOptions, - mergeFrom *activitypb.ActivityOptions, - updateFields map[string]struct{}, -) error { - - if _, ok := updateFields["taskQueue.name"]; ok { - if mergeFrom.TaskQueue == nil { - return serviceerror.NewInvalidArgument("TaskQueue is not provided") - } - if mergeInto.TaskQueue == nil { - mergeInto.TaskQueue = mergeFrom.TaskQueue - } - mergeInto.TaskQueue.Name = mergeFrom.TaskQueue.Name - } - - if _, ok := updateFields["scheduleToCloseTimeout"]; ok { - mergeInto.ScheduleToCloseTimeout = mergeFrom.ScheduleToCloseTimeout - } - - if _, ok := updateFields["scheduleToStartTimeout"]; ok { - mergeInto.ScheduleToStartTimeout = mergeFrom.ScheduleToStartTimeout - } - - if _, ok := updateFields["startToCloseTimeout"]; ok { - mergeInto.StartToCloseTimeout = mergeFrom.StartToCloseTimeout - } - - if _, ok := updateFields["heartbeatTimeout"]; ok { - mergeInto.HeartbeatTimeout = mergeFrom.HeartbeatTimeout - } - - if _, ok := updateFields["priority"]; ok { - mergeInto.Priority = mergeFrom.Priority - } - - if _, ok := updateFields["priority.priorityKey"]; ok { - if mergeFrom.Priority == nil { - return serviceerror.NewInvalidArgument("Priority is not provided") - } - if mergeInto.Priority == nil { - mergeInto.Priority = &commonpb.Priority{} - } - mergeInto.Priority.PriorityKey = mergeFrom.Priority.PriorityKey - } - - if _, ok := updateFields["priority.fairnessKey"]; ok { - if mergeFrom.Priority == nil { - return serviceerror.NewInvalidArgument("Priority is not provided") - } - if mergeInto.Priority == nil { - mergeInto.Priority = &commonpb.Priority{} - } - mergeInto.Priority.FairnessKey = mergeFrom.Priority.FairnessKey - } - - if _, ok := updateFields["priority.fairnessWeight"]; ok { - if mergeFrom.Priority == nil { - return serviceerror.NewInvalidArgument("Priority is not provided") - } - if mergeInto.Priority == nil { - mergeInto.Priority = &commonpb.Priority{} - } - mergeInto.Priority.FairnessWeight = mergeFrom.Priority.FairnessWeight - } - - if mergeInto.RetryPolicy == nil { - mergeInto.RetryPolicy = &commonpb.RetryPolicy{} - } - - if _, ok := updateFields["retryPolicy"]; ok { - mergeInto.RetryPolicy = mergeFrom.RetryPolicy - } - - if _, ok := updateFields["retryPolicy.initialInterval"]; ok { - if mergeFrom.RetryPolicy == nil { - return serviceerror.NewInvalidArgument("RetryPolicy is not provided") - } - mergeInto.RetryPolicy.InitialInterval = mergeFrom.RetryPolicy.InitialInterval - } - - if _, ok := updateFields["retryPolicy.backoffCoefficient"]; ok { - if mergeFrom.RetryPolicy == nil { - return serviceerror.NewInvalidArgument("RetryPolicy is not provided") - } - mergeInto.RetryPolicy.BackoffCoefficient = mergeFrom.RetryPolicy.BackoffCoefficient - } - - if _, ok := updateFields["retryPolicy.maximumInterval"]; ok { - if mergeFrom.RetryPolicy == nil { - return serviceerror.NewInvalidArgument("RetryPolicy is not provided") - } - mergeInto.RetryPolicy.MaximumInterval = mergeFrom.RetryPolicy.MaximumInterval - } - if _, ok := updateFields["retryPolicy.maximumAttempts"]; ok { - if mergeFrom.RetryPolicy == nil { - return serviceerror.NewInvalidArgument("RetryPolicy is not provided") - } - mergeInto.RetryPolicy.MaximumAttempts = mergeFrom.RetryPolicy.MaximumAttempts - } - - return nil -} - func adjustActivityOptions( validator *api.CommandAttrValidator, namespaceID string, diff --git a/service/history/api/updateactivityoptions/api_test.go b/service/history/api/updateactivityoptions/api_test.go index 8dd3ebf4ae7..da65f4a1d2e 100644 --- a/service/history/api/updateactivityoptions/api_test.go +++ b/service/history/api/updateactivityoptions/api_test.go @@ -18,6 +18,7 @@ import ( historyspb "go.temporal.io/server/api/history/v1" "go.temporal.io/server/api/historyservice/v1" persistencespb "go.temporal.io/server/api/persistence/v1" + "go.temporal.io/server/common/activityoptions" "go.temporal.io/server/common/cluster" "go.temporal.io/server/common/log" "go.temporal.io/server/common/namespace" @@ -37,173 +38,6 @@ import ( "google.golang.org/protobuf/types/known/fieldmaskpb" ) -func TestApplyActivityOptionsAcceptance(t *testing.T) { - updateOptions := &activitypb.ActivityOptions{ - TaskQueue: &taskqueuepb.TaskQueue{Name: "task_queue_name"}, - ScheduleToCloseTimeout: durationpb.New(time.Second), - StartToCloseTimeout: durationpb.New(time.Second), - ScheduleToStartTimeout: durationpb.New(time.Second), - HeartbeatTimeout: durationpb.New(time.Second), - Priority: &commonpb.Priority{ - PriorityKey: 42, - FairnessKey: "test_key", - FairnessWeight: 5.0, - }, - RetryPolicy: &commonpb.RetryPolicy{ - MaximumInterval: durationpb.New(time.Second), - MaximumAttempts: 5, - BackoffCoefficient: 1.0, - InitialInterval: durationpb.New(time.Second), - }, - } - - testCases := []struct { - name string - mergeInto *activitypb.ActivityOptions - mergeFrom *activitypb.ActivityOptions - expected *activitypb.ActivityOptions - mask *fieldmaskpb.FieldMask - }{ - { - name: "Top-level fields with CamelCase", - mergeFrom: updateOptions, - mergeInto: &activitypb.ActivityOptions{}, - expected: updateOptions, - mask: &fieldmaskpb.FieldMask{ - Paths: []string{ - "TaskQueue.Name", - "ScheduleToCloseTimeout", - "ScheduleToStartTimeout", - "StartToCloseTimeout", - "HeartbeatTimeout", - "Priority", - "RetryPolicy", - }, - }, - }, - { - name: "Top-level fields with snake_case", - mergeFrom: updateOptions, - mergeInto: &activitypb.ActivityOptions{}, - expected: updateOptions, - mask: &fieldmaskpb.FieldMask{ - Paths: []string{ - "task_queue.name", - "schedule_to_close_timeout", - "schedule_to_start_timeout", - "start_to_close_timeout", - "heartbeat_timeout", - "priority", - "retry_policy", - }, - }, - }, - { - name: "Sub-fields", - mergeFrom: &activitypb.ActivityOptions{ - Priority: &commonpb.Priority{ - PriorityKey: 99, - FairnessKey: "newKey", - FairnessWeight: 7.5, - }, - RetryPolicy: &commonpb.RetryPolicy{ - MaximumInterval: durationpb.New(time.Second), - MaximumAttempts: 5, - BackoffCoefficient: 1.0, - InitialInterval: durationpb.New(time.Second), - }, - }, - mergeInto: &activitypb.ActivityOptions{ - Priority: &commonpb.Priority{ - PriorityKey: 10, - FairnessKey: "oldKey", - FairnessWeight: 1.0, - }, - RetryPolicy: &commonpb.RetryPolicy{}, - }, - expected: &activitypb.ActivityOptions{ - Priority: &commonpb.Priority{ - PriorityKey: 99, - FairnessKey: "newKey", - FairnessWeight: 7.5, - }, - RetryPolicy: &commonpb.RetryPolicy{ - MaximumInterval: durationpb.New(time.Second), - MaximumAttempts: 5, - BackoffCoefficient: 1.0, - InitialInterval: durationpb.New(time.Second), - }, - }, - mask: &fieldmaskpb.FieldMask{ - Paths: []string{ - "priority.priority_key", - "priority.fairness_key", - "priority.fairness_weight", - "retry_policy.backoff_coefficient", - "retry_policy.initial_interval", - "retry_policy.maximum_interval", - "retry_policy.maximum_attempts", - }, - }, - }, - } - for _, tc := range testCases { - updateFields := util.ParseFieldMask(tc.mask) - - t.Run(tc.name, func(t *testing.T) {}) - err := mergeActivityOptions(tc.mergeInto, tc.mergeFrom, updateFields) - assert.NoError(t, err) - assert.Equal(t, tc.mergeInto.RetryPolicy.InitialInterval, tc.expected.RetryPolicy.InitialInterval, "RetryInitialInterval") - assert.Equal(t, tc.mergeInto.RetryPolicy.MaximumInterval, tc.expected.RetryPolicy.MaximumInterval, "RetryMaximumInterval") - assert.Equal(t, tc.mergeInto.RetryPolicy.BackoffCoefficient, tc.expected.RetryPolicy.BackoffCoefficient, "RetryBackoffCoefficient") - assert.Equal(t, tc.mergeInto.RetryPolicy.MaximumAttempts, tc.expected.RetryPolicy.MaximumAttempts, "RetryMaximumAttempts") - - assert.Equal(t, tc.mergeInto.TaskQueue, tc.expected.TaskQueue, "TaskQueue") - - assert.Equal(t, tc.mergeInto.ScheduleToCloseTimeout, tc.expected.ScheduleToCloseTimeout, "ScheduleToCloseTimeout") - assert.Equal(t, tc.mergeInto.ScheduleToStartTimeout, tc.expected.ScheduleToStartTimeout, "ScheduleToStartTimeout") - assert.Equal(t, tc.mergeInto.StartToCloseTimeout, tc.expected.StartToCloseTimeout, "StartToCloseTimeout") - assert.Equal(t, tc.mergeInto.HeartbeatTimeout, tc.expected.HeartbeatTimeout, "HeartbeatTimeout") - assert.Equal(t, tc.mergeInto.Priority, tc.expected.Priority, "Priority") - } -} - -func TestApplyActivityOptionsErrors(t *testing.T) { - var err error - err = mergeActivityOptions(&activitypb.ActivityOptions{}, &activitypb.ActivityOptions{}, - util.ParseFieldMask(&fieldmaskpb.FieldMask{Paths: []string{"retry_policy.maximum_interval"}})) - require.ErrorContains(t, err, "RetryPolicy is not provided") - - err = mergeActivityOptions(&activitypb.ActivityOptions{}, &activitypb.ActivityOptions{}, - util.ParseFieldMask(&fieldmaskpb.FieldMask{Paths: []string{"retry_policy.maximum_attempts"}})) - require.ErrorContains(t, err, "RetryPolicy is not provided") - - err = mergeActivityOptions(&activitypb.ActivityOptions{}, &activitypb.ActivityOptions{}, - util.ParseFieldMask(&fieldmaskpb.FieldMask{Paths: []string{"retry_policy.backoff_coefficient"}})) - require.ErrorContains(t, err, "RetryPolicy is not provided") - - err = mergeActivityOptions(&activitypb.ActivityOptions{}, &activitypb.ActivityOptions{}, - util.ParseFieldMask(&fieldmaskpb.FieldMask{Paths: []string{"retry_policy.initial_interval"}})) - require.ErrorContains(t, err, "RetryPolicy is not provided") - - err = mergeActivityOptions(&activitypb.ActivityOptions{}, &activitypb.ActivityOptions{}, - util.ParseFieldMask(&fieldmaskpb.FieldMask{Paths: []string{"taskQueue.name"}})) - require.ErrorContains(t, err, "TaskQueue is not provided") - - err = mergeActivityOptions(&activitypb.ActivityOptions{}, &activitypb.ActivityOptions{}, - util.ParseFieldMask(&fieldmaskpb.FieldMask{Paths: []string{"priority.priority_key"}})) - require.ErrorContains(t, err, "Priority is not provided") - - err = mergeActivityOptions(&activitypb.ActivityOptions{}, &activitypb.ActivityOptions{}, - util.ParseFieldMask(&fieldmaskpb.FieldMask{Paths: []string{"priority.fairness_key"}})) - require.ErrorContains(t, err, "Priority is not provided") - - err = mergeActivityOptions(&activitypb.ActivityOptions{}, &activitypb.ActivityOptions{}, - util.ParseFieldMask(&fieldmaskpb.FieldMask{Paths: []string{"priority.fairness_weight"}})) - require.ErrorContains(t, err, "Priority is not provided") - -} - func TestApplyActivityOptionsReset(t *testing.T) { options := &activitypb.ActivityOptions{ TaskQueue: &taskqueuepb.TaskQueue{Name: "task_queue_name"}, @@ -242,7 +76,7 @@ func TestApplyActivityOptionsReset(t *testing.T) { updateFields := util.ParseFieldMask(fullMask) - err := mergeActivityOptions(options, + err := activityoptions.MergeActivityOptions(options, &activitypb.ActivityOptions{ Priority: &commonpb.Priority{ PriorityKey: 10, diff --git a/tests/activity_api_update_test.go b/tests/activity_api_update_test.go index 6874b3a50b6..7205aa43cf6 100644 --- a/tests/activity_api_update_test.go +++ b/tests/activity_api_update_test.go @@ -168,7 +168,11 @@ func TestActivityApiUpdateClientTestSuite(t *testing.T) { s.NoError(err) s.Len(description.PendingActivities, 1) - activityUpdated <- struct{}{} + select { + case activityUpdated <- struct{}{}: + case <-ctx.Done(): + t.Fatal("timed out waiting for activity to receive update signal") + } s.EventuallyWithT(func(t *assert.CollectT) { description, err = s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) @@ -431,7 +435,11 @@ func TestActivityApiUpdateClientTestSuite(t *testing.T) { s.NoError(err) // let activity finish - activityUpdated <- struct{}{} + select { + case activityUpdated <- struct{}{}: + case <-ctx.Done(): + t.Fatal("timed out waiting for activity to receive update signal") + } // wait for activity to finish s.EventuallyWithT(func(t *assert.CollectT) { @@ -449,3 +457,313 @@ func TestActivityApiUpdateClientTestSuite(t *testing.T) { }) } } + +// TestActivityUpdateExecutionOptionsApi tests the new UpdateActivityExecutionOptions RPC +// on workflow activities (workflow_id != ""), verifying it behaves identically to the +// existing UpdateActivityOptions RPC. +func TestActivityUpdateExecutionOptionsApi(t *testing.T) { + t.Parallel() + + t.Run("ChangeRetryInterval", func(t *testing.T) { + s := testcore.NewEnv(t, testcore.WithSdkWorker()) + + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + activityUpdated := make(chan struct{}) + + var startedActivityCount atomic.Int32 + activityFunction := func() (string, error) { + startedActivityCount.Add(1) + if startedActivityCount.Load() == 1 { + return "", errors.New("bad-luck-please-retry") + } + s.WaitForChannel(ctx, activityUpdated) + return "done!", nil + } + + workflowFn := makeActivityUpdateWorkflowFunc(activityFunction, 30*time.Minute, 10*time.Minute) + s.SdkWorker().RegisterWorkflow(workflowFn) + s.SdkWorker().RegisterActivity(activityFunction) + + workflowRun, err := s.SdkClient().ExecuteWorkflow(ctx, sdkclient.StartWorkflowOptions{ + ID: activityUpdateWorkflowID, + TaskQueue: s.WorkerTaskQueue(), + }, workflowFn) + s.NoError(err) + + s.EventuallyWithT(func(t *assert.CollectT) { + description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + require.NoError(t, err) + require.Len(t, description.GetPendingActivities(), 1) + require.Equal(t, int32(1), startedActivityCount.Load()) + }, 10*time.Second, 500*time.Millisecond) + + resp, err := s.FrontendClient().UpdateActivityExecutionOptions(ctx, &workflowservice.UpdateActivityExecutionOptionsRequest{ + Namespace: s.Namespace().String(), + WorkflowId: workflowRun.GetID(), + RunId: workflowRun.GetRunID(), + ActivityId: "activity-id", + ActivityOptions: &activitypb.ActivityOptions{ + RetryPolicy: &commonpb.RetryPolicy{ + InitialInterval: durationpb.New(1 * time.Second), + }, + }, + UpdateMask: &fieldmaskpb.FieldMask{Paths: []string{"retry_policy.initial_interval"}}, + }) + s.NoError(err) + s.NotNil(resp) + + select { + case activityUpdated <- struct{}{}: + case <-ctx.Done(): + t.Fatal("timed out waiting for activity to receive update signal") + } + + s.EventuallyWithT(func(t *assert.CollectT) { + description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + require.NoError(t, err) + require.Empty(t, description.GetPendingActivities()) + require.Equal(t, int32(2), startedActivityCount.Load()) + }, 3*time.Second, 100*time.Millisecond) + + var out string + err = workflowRun.Get(ctx, &out) + s.NoError(err) + }) + + t.Run("ChangeScheduleToClose", func(t *testing.T) { + s := testcore.NewEnv(t, testcore.WithSdkWorker()) + + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + var startedActivityCount atomic.Int32 + activityFunction := func() (string, error) { + startedActivityCount.Add(1) + if startedActivityCount.Load() == 1 { + return "", errors.New("bad-luck-please-retry") + } + return "done!", nil + } + + workflowFn := makeActivityUpdateWorkflowFunc(activityFunction, 30*time.Minute, 10*time.Minute) + s.SdkWorker().RegisterWorkflow(workflowFn) + s.SdkWorker().RegisterActivity(activityFunction) + + workflowRun, err := s.SdkClient().ExecuteWorkflow(ctx, sdkclient.StartWorkflowOptions{ + ID: activityUpdateWorkflowID, + TaskQueue: s.WorkerTaskQueue(), + }, workflowFn) + s.NoError(err) + + s.EventuallyWithT(func(t *assert.CollectT) { + description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + require.NoError(t, err) + require.Len(t, description.GetPendingActivities(), 1) + require.Equal(t, int32(1), startedActivityCount.Load()) + }, 2*time.Second, 200*time.Millisecond) + + resp, err := s.FrontendClient().UpdateActivityExecutionOptions(ctx, &workflowservice.UpdateActivityExecutionOptionsRequest{ + Namespace: s.Namespace().String(), + WorkflowId: workflowRun.GetID(), + RunId: workflowRun.GetRunID(), + ActivityId: "activity-id", + ActivityOptions: &activitypb.ActivityOptions{ + ScheduleToCloseTimeout: durationpb.New(1 * time.Second), + }, + UpdateMask: &fieldmaskpb.FieldMask{Paths: []string{"schedule_to_close_timeout"}}, + }) + s.NoError(err) + s.NotNil(resp) + + s.EventuallyWithT(func(t *assert.CollectT) { + description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + require.NoError(t, err) + require.Empty(t, description.GetPendingActivities()) + require.Equal(t, int32(1), startedActivityCount.Load()) + }, 2*time.Second, 200*time.Millisecond) + + var out string + err = workflowRun.Get(ctx, &out) + var activityError *temporal.ActivityError + s.ErrorAs(err, &activityError) + s.Equal(enumspb.RETRY_STATE_TIMEOUT, activityError.RetryState()) + var timeoutError *temporal.TimeoutError + s.ErrorAs(activityError, &timeoutError) + s.Equal(enumspb.TIMEOUT_TYPE_SCHEDULE_TO_CLOSE, timeoutError.TimeoutType()) + s.Equal(int32(1), startedActivityCount.Load()) + }) + + t.Run("ChangeScheduleToCloseAndRetry", func(t *testing.T) { + s := testcore.NewEnv(t, testcore.WithSdkWorker()) + + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + var startedActivityCount atomic.Int32 + activityFunction := func() (string, error) { + startedActivityCount.Add(1) + if startedActivityCount.Load() == 1 { + return "", errors.New("bad-luck-please-retry") + } + return "done!", nil + } + + scheduleToCloseTimeout := 8 * time.Second + workflowFn := makeActivityUpdateWorkflowFunc(activityFunction, scheduleToCloseTimeout, 5*time.Second) + s.SdkWorker().RegisterWorkflow(workflowFn) + s.SdkWorker().RegisterActivity(activityFunction) + + workflowRun, err := s.SdkClient().ExecuteWorkflow(ctx, sdkclient.StartWorkflowOptions{ + ID: activityUpdateWorkflowID, + TaskQueue: s.WorkerTaskQueue(), + }, workflowFn) + s.NoError(err) + + s.EventuallyWithT(func(t *assert.CollectT) { + require.NotZero(t, startedActivityCount.Load()) + }, 2*time.Second, 200*time.Millisecond) + + newScheduleToCloseTimeout := 10 * time.Second + resp, err := s.FrontendClient().UpdateActivityExecutionOptions(ctx, &workflowservice.UpdateActivityExecutionOptionsRequest{ + Namespace: s.Namespace().String(), + WorkflowId: workflowRun.GetID(), + RunId: workflowRun.GetRunID(), + ActivityId: "activity-id", + ActivityOptions: &activitypb.ActivityOptions{ + ScheduleToCloseTimeout: durationpb.New(newScheduleToCloseTimeout), + RetryPolicy: &commonpb.RetryPolicy{ + InitialInterval: durationpb.New(1 * time.Second), + }, + }, + UpdateMask: &fieldmaskpb.FieldMask{Paths: []string{"schedule_to_close_timeout", "retry_policy.initial_interval"}}, + }) + s.NoError(err) + s.NotNil(resp) + s.Equal(int64(newScheduleToCloseTimeout.Seconds()), resp.GetActivityOptions().ScheduleToCloseTimeout.GetSeconds()) + s.Equal(int64(scheduleToCloseTimeout.Seconds()), resp.GetActivityOptions().StartToCloseTimeout.GetSeconds()) + + s.EventuallyWithT(func(t *assert.CollectT) { + description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + require.NoError(t, err) + require.Empty(t, description.GetPendingActivities()) + require.Equal(t, int32(2), startedActivityCount.Load()) + }, 5*time.Second, 200*time.Millisecond) + + var out string + err = workflowRun.Get(ctx, &out) + s.NoError(err) + }) + + t.Run("ResetDefaultOptions", func(t *testing.T) { + s := testcore.NewEnv(t, testcore.WithSdkWorker()) + + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + activityUpdated := make(chan struct{}) + + var startedActivityCount atomic.Int32 + activityFunction := func() (string, error) { + startedActivityCount.Add(1) + if startedActivityCount.Load() == 1 { + return "", errors.New("bad-luck-please-retry") + } + s.WaitForChannel(ctx, activityUpdated) + return "done!", nil + } + + workflowFn := makeActivityUpdateWorkflowFunc(activityFunction, 30*time.Minute, 10*time.Minute) + s.SdkWorker().RegisterWorkflow(workflowFn) + s.SdkWorker().RegisterActivity(activityFunction) + + workflowRun, err := s.SdkClient().ExecuteWorkflow(ctx, sdkclient.StartWorkflowOptions{ + ID: activityUpdateWorkflowID, + TaskQueue: s.WorkerTaskQueue(), + }, workflowFn) + s.NoError(err) + + s.EventuallyWithT(func(t *assert.CollectT) { + description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + require.NoError(t, err) + require.Len(t, description.GetPendingActivities(), 1) + require.Equal(t, int32(1), startedActivityCount.Load()) + }, 10*time.Second, 500*time.Millisecond) + + // Update max attempts to 1000. + resp, err := s.FrontendClient().UpdateActivityExecutionOptions(ctx, &workflowservice.UpdateActivityExecutionOptionsRequest{ + Namespace: s.Namespace().String(), + WorkflowId: workflowRun.GetID(), + RunId: workflowRun.GetRunID(), + ActivityId: "activity-id", + ActivityOptions: &activitypb.ActivityOptions{ + RetryPolicy: &commonpb.RetryPolicy{ + MaximumAttempts: 1000, + }, + }, + UpdateMask: &fieldmaskpb.FieldMask{Paths: []string{"retry_policy.maximum_attempts"}}, + }) + s.NoError(err) + s.NotNil(resp) + + s.EventuallyWithT(func(t *assert.CollectT) { + description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + require.NoError(t, err) + require.Len(t, description.PendingActivities, 1) + require.Equal(t, int32(1000), description.PendingActivities[0].GetActivityOptions().GetRetryPolicy().GetMaximumAttempts()) + }, 3*time.Second, 200*time.Millisecond) + + // Reset to original options. + resp, err = s.FrontendClient().UpdateActivityExecutionOptions(ctx, &workflowservice.UpdateActivityExecutionOptionsRequest{ + Namespace: s.Namespace().String(), + WorkflowId: workflowRun.GetID(), + RunId: workflowRun.GetRunID(), + ActivityId: "activity-id", + RestoreOriginal: true, + }) + s.NoError(err) + s.NotNil(resp) + + s.EventuallyWithT(func(t *assert.CollectT) { + description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + require.NoError(t, err) + require.Len(t, description.PendingActivities, 1) + require.Equal(t, int32(defaultMaximumAttempts), description.PendingActivities[0].GetActivityOptions().GetRetryPolicy().GetMaximumAttempts()) + }, 3*time.Second, 200*time.Millisecond) + + // Update retry interval to unblock the second attempt. + resp, err = s.FrontendClient().UpdateActivityExecutionOptions(ctx, &workflowservice.UpdateActivityExecutionOptionsRequest{ + Namespace: s.Namespace().String(), + WorkflowId: workflowRun.GetID(), + RunId: workflowRun.GetRunID(), + ActivityId: "activity-id", + ActivityOptions: &activitypb.ActivityOptions{ + ScheduleToCloseTimeout: durationpb.New(10 * time.Second), + RetryPolicy: &commonpb.RetryPolicy{ + InitialInterval: durationpb.New(1 * time.Second), + }, + }, + UpdateMask: &fieldmaskpb.FieldMask{Paths: []string{"schedule_to_close_timeout", "retry_policy.initial_interval"}}, + }) + s.NoError(err) + s.NotNil(resp) + + select { + case activityUpdated <- struct{}{}: + case <-ctx.Done(): + t.Fatal("timed out waiting for activity to receive update signal") + } + + s.EventuallyWithT(func(t *assert.CollectT) { + description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + require.NoError(t, err) + require.Empty(t, description.GetPendingActivities()) + require.Equal(t, int32(2), startedActivityCount.Load()) + }, 3*time.Second, 100*time.Millisecond) + + var out string + err = workflowRun.Get(ctx, &out) + s.NoError(err) + }) +} diff --git a/tests/standalone_activity_test.go b/tests/standalone_activity_test.go index 991b6dbe9d1..1745f4ef222 100644 --- a/tests/standalone_activity_test.go +++ b/tests/standalone_activity_test.go @@ -3084,6 +3084,7 @@ func (s *standaloneActivityTestSuite) TestStartToCloseTimeout_WhileCancelRequest } + // TestScheduleToStartTimeout tests that a schedule-to-start timeout is recorded after the activity is // created but never started. It also verifies that DescribeActivityExecution can be used to long-poll for a TimedOut // state change caused by execution of a timer task. @@ -5597,6 +5598,650 @@ func (s *standaloneActivityTestSuite) TestStartDelay() { }) } + +func (s *standaloneActivityTestSuite) TestUpdateActivityExecutionOptions() { + t := s.T() + + t.Run("InvalidArgument", func(t *testing.T) { + ctx := testcore.NewContext() + activityID := testcore.RandomizeStr(t.Name()) + taskQueue := testcore.RandomizeStr(t.Name()) + + startResp, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + ActivityType: &commonpb.ActivityType{Name: "test-activity"}, + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue}, + StartToCloseTimeout: durationpb.New(defaultStartToCloseTimeout), + }) + require.NoError(t, err) + runID := startResp.RunId + ns := s.Namespace().String() + + validOptions := &activitypb.ActivityOptions{ + StartToCloseTimeout: durationpb.New(2 * time.Minute), + } + validMask := &fieldmaskpb.FieldMask{Paths: []string{"start_to_close_timeout"}} + + testCases := []struct { + name string + req *workflowservice.UpdateActivityExecutionOptionsRequest + expectedErr string + }{ + { + name: "EmptyActivityID", + req: &workflowservice.UpdateActivityExecutionOptionsRequest{ + Namespace: ns, + RunId: runID, + ActivityOptions: validOptions, + UpdateMask: validMask, + }, + expectedErr: "activity ID is required", + }, + { + name: "ActivityIDTooLong", + req: &workflowservice.UpdateActivityExecutionOptionsRequest{ + Namespace: ns, + ActivityId: string(make([]byte, defaultMaxIDLengthLimit+1)), + RunId: runID, + ActivityOptions: validOptions, + UpdateMask: validMask, + }, + expectedErr: "activity ID exceeds length limit", + }, + { + name: "IdentityTooLong", + req: &workflowservice.UpdateActivityExecutionOptionsRequest{ + Namespace: ns, + ActivityId: activityID, + RunId: runID, + Identity: string(make([]byte, defaultMaxIDLengthLimit+1)), + ActivityOptions: validOptions, + UpdateMask: validMask, + }, + expectedErr: "identity exceeds length limit", + }, + { + name: "InvalidRunID", + req: &workflowservice.UpdateActivityExecutionOptionsRequest{ + Namespace: ns, + ActivityId: activityID, + RunId: "not-a-valid-uuid", + ActivityOptions: validOptions, + UpdateMask: validMask, + }, + expectedErr: "invalid run id", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + _, err := s.FrontendClient().UpdateActivityExecutionOptions(ctx, tc.req) + var invalidArgErr *serviceerror.InvalidArgument + require.ErrorAs(t, err, &invalidArgErr) + require.Contains(t, invalidArgErr.Message, tc.expectedErr) + }) + } + }) + + t.Run("ChangeRetryInterval", func(t *testing.T) { + ctx, cancel := context.WithTimeout(t.Context(), 30*time.Second) + defer cancel() + + activityID := testcore.RandomizeStr(t.Name()) + taskQueue := testcore.RandomizeStr(t.Name()) + + // Start with a long retry interval to keep the activity in backoff after failure. + startResp, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + ActivityType: &commonpb.ActivityType{Name: "test-activity"}, + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue}, + StartToCloseTimeout: durationpb.New(30 * time.Minute), + RetryPolicy: &commonpb.RetryPolicy{ + InitialInterval: durationpb.New(10 * time.Minute), + MaximumAttempts: 5, + }, + }) + require.NoError(t, err) + + // Poll and fail with a retryable failure — activity enters long backoff. + pollResp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + }) + require.NoError(t, err) + require.EqualValues(t, 1, pollResp.Attempt) + + _, err = s.FrontendClient().RespondActivityTaskFailed(ctx, &workflowservice.RespondActivityTaskFailedRequest{ + Namespace: s.Namespace().String(), + TaskToken: pollResp.TaskToken, + Failure: &failurepb.Failure{ + Message: "retryable failure", + FailureInfo: &failurepb.Failure_ApplicationFailureInfo{ + ApplicationFailureInfo: &failurepb.ApplicationFailureInfo{NonRetryable: false}, + }, + }, + }) + require.NoError(t, err) + + // Shorten the retry interval so the activity retries immediately. + updateResp, err := s.FrontendClient().UpdateActivityExecutionOptions(ctx, &workflowservice.UpdateActivityExecutionOptionsRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: startResp.RunId, + ActivityOptions: &activitypb.ActivityOptions{ + RetryPolicy: &commonpb.RetryPolicy{ + InitialInterval: durationpb.New(1 * time.Millisecond), + }, + }, + UpdateMask: &fieldmaskpb.FieldMask{Paths: []string{"retry_policy.initial_interval"}}, + }) + require.NoError(t, err) + require.NotNil(t, updateResp) + + // Activity should now be available to poll for attempt 2. + pollResp2, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + }) + require.NoError(t, err) + require.EqualValues(t, 2, pollResp2.Attempt) + }) + + t.Run("ChangeRetryInterval_WhileStarted", func(t *testing.T) { + // Update retry_policy.initial_interval while the activity is STARTED (running). + // In this state CurrentRetryInterval is not recalculated at update time — the new + // policy takes effect when the attempt fails and a fresh interval is computed. + ctx, cancel := context.WithTimeout(t.Context(), 30*time.Second) + defer cancel() + + activityID := testcore.RandomizeStr(t.Name()) + taskQueue := testcore.RandomizeStr(t.Name()) + + startResp, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + ActivityType: &commonpb.ActivityType{Name: "test-activity"}, + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue}, + StartToCloseTimeout: durationpb.New(30 * time.Minute), + RetryPolicy: &commonpb.RetryPolicy{ + InitialInterval: durationpb.New(10 * time.Minute), + MaximumAttempts: 5, + }, + }) + require.NoError(t, err) + + // Poll attempt 1 — activity is now STARTED. + pollResp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + }) + require.NoError(t, err) + require.EqualValues(t, 1, pollResp.Attempt) + + // Shorten the retry interval while the activity is running. + _, err = s.FrontendClient().UpdateActivityExecutionOptions(ctx, &workflowservice.UpdateActivityExecutionOptionsRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: startResp.RunId, + ActivityOptions: &activitypb.ActivityOptions{ + RetryPolicy: &commonpb.RetryPolicy{ + InitialInterval: durationpb.New(1 * time.Millisecond), + }, + }, + UpdateMask: &fieldmaskpb.FieldMask{Paths: []string{"retry_policy.initial_interval"}}, + }) + require.NoError(t, err) + + // Fail attempt 1 — next retry should dispatch immediately with the new 1ms interval. + _, err = s.FrontendClient().RespondActivityTaskFailed(ctx, &workflowservice.RespondActivityTaskFailedRequest{ + Namespace: s.Namespace().String(), + TaskToken: pollResp.TaskToken, + Failure: &failurepb.Failure{ + Message: "retryable failure", + FailureInfo: &failurepb.Failure_ApplicationFailureInfo{ + ApplicationFailureInfo: &failurepb.ApplicationFailureInfo{NonRetryable: false}, + }, + }, + }) + require.NoError(t, err) + + // Attempt 2 should be available immediately. + pollResp2, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + }) + require.NoError(t, err) + require.EqualValues(t, 2, pollResp2.Attempt) + }) + + t.Run("ChangeScheduleToClose", func(t *testing.T) { + ctx, cancel := context.WithTimeout(t.Context(), 30*time.Second) + defer cancel() + + activityID := testcore.RandomizeStr(t.Name()) + taskQueue := testcore.RandomizeStr(t.Name()) + + startResp, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + ActivityType: &commonpb.ActivityType{Name: "test-activity"}, + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue}, + StartToCloseTimeout: durationpb.New(30 * time.Minute), + ScheduleToCloseTimeout: durationpb.New(30 * time.Minute), + RetryPolicy: &commonpb.RetryPolicy{ + MaximumAttempts: 0, // unlimited retries + InitialInterval: durationpb.New(10 * time.Minute), // long backoff keeps activity in SCHEDULED state + }, + }) + require.NoError(t, err) + + // Start and fail the activity once — it enters backoff (SCHEDULED state). + pollResp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + }) + require.NoError(t, err) + _, err = s.FrontendClient().RespondActivityTaskFailed(ctx, &workflowservice.RespondActivityTaskFailedRequest{ + Namespace: s.Namespace().String(), + TaskToken: pollResp.TaskToken, + Failure: &failurepb.Failure{ + Message: "retryable failure", + FailureInfo: &failurepb.Failure_ApplicationFailureInfo{ + ApplicationFailureInfo: &failurepb.ApplicationFailureInfo{NonRetryable: false}, + }, + }, + }) + require.NoError(t, err) + + // Shorten schedule-to-close — activity should time out immediately. + _, err = s.FrontendClient().UpdateActivityExecutionOptions(ctx, &workflowservice.UpdateActivityExecutionOptionsRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: startResp.RunId, + ActivityOptions: &activitypb.ActivityOptions{ + ScheduleToCloseTimeout: durationpb.New(1 * time.Second), + }, + UpdateMask: &fieldmaskpb.FieldMask{Paths: []string{"schedule_to_close_timeout"}}, + }) + require.NoError(t, err) + + // Long-poll until the activity times out. + pollOutcome, err := s.FrontendClient().PollActivityExecution(ctx, &workflowservice.PollActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: startResp.RunId, + }) + require.NoError(t, err) + require.Equal(t, + enumspb.TIMEOUT_TYPE_SCHEDULE_TO_CLOSE, + pollOutcome.GetOutcome().GetFailure().GetTimeoutFailureInfo().GetTimeoutType(), + ) + }) + + t.Run("ChangeScheduleToClose_WhileStarted", func(t *testing.T) { + // Poll the activity (STARTED), then shorten schedule-to-close timeout via update. + // Since no response is sent, the activity should time out with SCHEDULE_TO_CLOSE. + ctx, cancel := context.WithTimeout(t.Context(), 30*time.Second) + defer cancel() + + activityID := testcore.RandomizeStr(t.Name()) + taskQueue := testcore.RandomizeStr(t.Name()) + + startResp, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + ActivityType: &commonpb.ActivityType{Name: "test-activity"}, + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue}, + StartToCloseTimeout: durationpb.New(30 * time.Minute), + ScheduleToCloseTimeout: durationpb.New(30 * time.Minute), + RetryPolicy: &commonpb.RetryPolicy{ + MaximumAttempts: 1, // no retry so TIMED_OUT is terminal + }, + }) + require.NoError(t, err) + + // Poll attempt 1 — activity is now STARTED. + pollResp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + }) + require.NoError(t, err) + require.EqualValues(t, 1, pollResp.Attempt) + + // Shorten schedule-to-close — the new task fires almost immediately. + _, err = s.FrontendClient().UpdateActivityExecutionOptions(ctx, &workflowservice.UpdateActivityExecutionOptionsRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: startResp.RunId, + ActivityOptions: &activitypb.ActivityOptions{ + ScheduleToCloseTimeout: durationpb.New(1 * time.Millisecond), + }, + UpdateMask: &fieldmaskpb.FieldMask{Paths: []string{"schedule_to_close_timeout"}}, + }) + require.NoError(t, err) + + // Long-poll until the activity times out (no response sent to the task queue). + pollOutcome, err := s.FrontendClient().PollActivityExecution(ctx, &workflowservice.PollActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: startResp.RunId, + }) + require.NoError(t, err) + require.Equal(t, + enumspb.TIMEOUT_TYPE_SCHEDULE_TO_CLOSE, + pollOutcome.GetOutcome().GetFailure().GetTimeoutFailureInfo().GetTimeoutType(), + ) + }) + + t.Run("ChangeScheduleToCloseAndRetry", func(t *testing.T) { + // Start with a short schedule-to-close (8s) and a long retry interval (5s) so + // the activity would time out before its second attempt under the original options. + // Update both: longer schedule-to-close and shorter retry interval. + // The activity should retry quickly and succeed. + ctx, cancel := context.WithTimeout(t.Context(), 30*time.Second) + defer cancel() + + activityID := testcore.RandomizeStr(t.Name()) + taskQueue := testcore.RandomizeStr(t.Name()) + originalStartToClose := 8 * time.Second + + startResp, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + ActivityType: &commonpb.ActivityType{Name: "test-activity"}, + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue}, + StartToCloseTimeout: durationpb.New(originalStartToClose), + ScheduleToCloseTimeout: durationpb.New(8 * time.Second), + RetryPolicy: &commonpb.RetryPolicy{ + InitialInterval: durationpb.New(5 * time.Second), + MaximumAttempts: 5, + }, + }) + require.NoError(t, err) + + // Fail attempt 1. + pollResp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + }) + require.NoError(t, err) + require.EqualValues(t, 1, pollResp.Attempt) + _, err = s.FrontendClient().RespondActivityTaskFailed(ctx, &workflowservice.RespondActivityTaskFailedRequest{ + Namespace: s.Namespace().String(), + TaskToken: pollResp.TaskToken, + Failure: &failurepb.Failure{ + Message: "retryable failure", + FailureInfo: &failurepb.Failure_ApplicationFailureInfo{ + ApplicationFailureInfo: &failurepb.ApplicationFailureInfo{NonRetryable: false}, + }, + }, + }) + require.NoError(t, err) + + // Update: extend schedule-to-close, shorten retry interval. + newScheduleToClose := 30 * time.Second + updateResp, err := s.FrontendClient().UpdateActivityExecutionOptions(ctx, &workflowservice.UpdateActivityExecutionOptionsRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: startResp.RunId, + ActivityOptions: &activitypb.ActivityOptions{ + ScheduleToCloseTimeout: durationpb.New(newScheduleToClose), + RetryPolicy: &commonpb.RetryPolicy{ + InitialInterval: durationpb.New(1 * time.Millisecond), + }, + }, + UpdateMask: &fieldmaskpb.FieldMask{Paths: []string{"schedule_to_close_timeout", "retry_policy.initial_interval"}}, + }) + require.NoError(t, err) + require.NotNil(t, updateResp) + require.Equal(t, int64(newScheduleToClose.Seconds()), updateResp.GetActivityOptions().GetScheduleToCloseTimeout().GetSeconds()) + // Verify that the unmodified start_to_close_timeout is preserved in the response. + require.Equal(t, int64(originalStartToClose.Seconds()), updateResp.GetActivityOptions().GetStartToCloseTimeout().GetSeconds()) + + // Attempt 2 should be available immediately. + pollResp2, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + }) + require.NoError(t, err) + require.EqualValues(t, 2, pollResp2.Attempt) + }) + + t.Run("ResetDefaultOptions", func(t *testing.T) { + ctx, cancel := context.WithTimeout(t.Context(), 30*time.Second) + defer cancel() + + activityID := testcore.RandomizeStr(t.Name()) + taskQueue := testcore.RandomizeStr(t.Name()) + originalMaxAttempts := int32(10) + + startResp, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + ActivityType: &commonpb.ActivityType{Name: "test-activity"}, + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue}, + StartToCloseTimeout: durationpb.New(30 * time.Minute), + RetryPolicy: &commonpb.RetryPolicy{ + InitialInterval: durationpb.New(1 * time.Millisecond), + MaximumAttempts: originalMaxAttempts, + }, + }) + require.NoError(t, err) + + // Update maximum attempts to a large value. + _, err = s.FrontendClient().UpdateActivityExecutionOptions(ctx, &workflowservice.UpdateActivityExecutionOptionsRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: startResp.RunId, + ActivityOptions: &activitypb.ActivityOptions{ + RetryPolicy: &commonpb.RetryPolicy{MaximumAttempts: 1000}, + }, + UpdateMask: &fieldmaskpb.FieldMask{Paths: []string{"retry_policy.maximum_attempts"}}, + }) + require.NoError(t, err) + + // Verify the update was applied. + describeResp, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: startResp.RunId, + }) + require.NoError(t, err) + require.EqualValues(t, 1000, describeResp.GetInfo().GetRetryPolicy().GetMaximumAttempts()) + + // Reset to original options. + _, err = s.FrontendClient().UpdateActivityExecutionOptions(ctx, &workflowservice.UpdateActivityExecutionOptionsRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: startResp.RunId, + RestoreOriginal: true, + }) + require.NoError(t, err) + + // Verify original maximum attempts are restored. + describeResp, err = s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: startResp.RunId, + }) + require.NoError(t, err) + require.Equal(t, originalMaxAttempts, describeResp.GetInfo().GetRetryPolicy().GetMaximumAttempts()) + + // Verify the activity still executes after reset — poll attempt 1 and complete it. + pollResp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + }) + require.NoError(t, err) + require.EqualValues(t, 1, pollResp.Attempt) + _, err = s.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ + Namespace: s.Namespace().String(), + TaskToken: pollResp.TaskToken, + Result: payloads.EncodeString("done"), + }) + require.NoError(t, err) + }) + + t.Run("ChangeScheduleToStart", func(t *testing.T) { + // Start activity with a long schedule-to-start timeout and no workers polling the task + // queue. Shorten the timeout via update — activity should time out with SCHEDULE_TO_START. + ctx, cancel := context.WithTimeout(t.Context(), 30*time.Second) + defer cancel() + + activityID := testcore.RandomizeStr(t.Name()) + taskQueue := testcore.RandomizeStr(t.Name()) + + startResp, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + ActivityType: &commonpb.ActivityType{Name: "test-activity"}, + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue}, + StartToCloseTimeout: durationpb.New(30 * time.Minute), + ScheduleToStartTimeout: durationpb.New(30 * time.Minute), + RetryPolicy: &commonpb.RetryPolicy{ + MaximumAttempts: 1, // no retry so we observe TIMED_OUT + }, + }) + require.NoError(t, err) + + // Shorten schedule-to-start — no workers are polling so it should fire immediately. + _, err = s.FrontendClient().UpdateActivityExecutionOptions(ctx, &workflowservice.UpdateActivityExecutionOptionsRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: startResp.RunId, + ActivityOptions: &activitypb.ActivityOptions{ + ScheduleToStartTimeout: durationpb.New(1 * time.Millisecond), + }, + UpdateMask: &fieldmaskpb.FieldMask{Paths: []string{"schedule_to_start_timeout"}}, + }) + require.NoError(t, err) + + // Long-poll until the activity times out. + pollOutcome, err := s.FrontendClient().PollActivityExecution(ctx, &workflowservice.PollActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: startResp.RunId, + }) + require.NoError(t, err) + require.Equal(t, + enumspb.TIMEOUT_TYPE_SCHEDULE_TO_START, + pollOutcome.GetOutcome().GetFailure().GetTimeoutFailureInfo().GetTimeoutType(), + ) + }) + + t.Run("ChangeStartToClose", func(t *testing.T) { + // Poll the activity (STARTED), then shorten start-to-close timeout via update. + // Since no response is sent, the activity should time out with START_TO_CLOSE. + ctx, cancel := context.WithTimeout(t.Context(), 30*time.Second) + defer cancel() + + activityID := testcore.RandomizeStr(t.Name()) + taskQueue := testcore.RandomizeStr(t.Name()) + + startResp, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + ActivityType: &commonpb.ActivityType{Name: "test-activity"}, + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue}, + StartToCloseTimeout: durationpb.New(30 * time.Minute), + RetryPolicy: &commonpb.RetryPolicy{ + MaximumAttempts: 1, // no retry so TIMED_OUT is terminal + }, + }) + require.NoError(t, err) + + // Poll attempt 1 — activity is now STARTED. + pollResp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + }) + require.NoError(t, err) + require.EqualValues(t, 1, pollResp.Attempt) + + // Shorten start-to-close — the new task fires almost immediately. + _, err = s.FrontendClient().UpdateActivityExecutionOptions(ctx, &workflowservice.UpdateActivityExecutionOptionsRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: startResp.RunId, + ActivityOptions: &activitypb.ActivityOptions{ + StartToCloseTimeout: durationpb.New(1 * time.Millisecond), + }, + UpdateMask: &fieldmaskpb.FieldMask{Paths: []string{"start_to_close_timeout"}}, + }) + require.NoError(t, err) + + // Long-poll until the activity times out (no response sent to the task queue). + pollOutcome, err := s.FrontendClient().PollActivityExecution(ctx, &workflowservice.PollActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: startResp.RunId, + }) + require.NoError(t, err) + require.Equal(t, + enumspb.TIMEOUT_TYPE_START_TO_CLOSE, + pollOutcome.GetOutcome().GetFailure().GetTimeoutFailureInfo().GetTimeoutType(), + ) + }) + + t.Run("ChangeHeartbeatTimeout", func(t *testing.T) { + // Poll the activity (STARTED), then shorten heartbeat timeout via update. + // The update re-creates the HeartbeatTimeoutTask with the new timeout, so no further + // heartbeats are needed — the activity should time out with HEARTBEAT. + ctx, cancel := context.WithTimeout(t.Context(), 30*time.Second) + defer cancel() + + activityID := testcore.RandomizeStr(t.Name()) + taskQueue := testcore.RandomizeStr(t.Name()) + + startResp, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + ActivityType: &commonpb.ActivityType{Name: "test-activity"}, + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue}, + StartToCloseTimeout: durationpb.New(30 * time.Minute), + HeartbeatTimeout: durationpb.New(30 * time.Minute), + RetryPolicy: &commonpb.RetryPolicy{ + MaximumAttempts: 1, // no retry so TIMED_OUT is terminal + }, + }) + require.NoError(t, err) + + // Poll attempt 1 — activity is now STARTED. + pollResp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + }) + require.NoError(t, err) + require.EqualValues(t, 1, pollResp.Attempt) + + // Shorten heartbeat timeout. + _, err = s.FrontendClient().UpdateActivityExecutionOptions(ctx, &workflowservice.UpdateActivityExecutionOptionsRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: startResp.RunId, + ActivityOptions: &activitypb.ActivityOptions{ + HeartbeatTimeout: durationpb.New(2 * time.Second), + }, + UpdateMask: &fieldmaskpb.FieldMask{Paths: []string{"heartbeat_timeout"}}, + }) + require.NoError(t, err) + + // Long-poll until the activity times out (no further heartbeats sent). + pollOutcome, err := s.FrontendClient().PollActivityExecution(ctx, &workflowservice.PollActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: startResp.RunId, + }) + require.NoError(t, err) + require.Equal(t, + enumspb.TIMEOUT_TYPE_HEARTBEAT, + pollOutcome.GetOutcome().GetFailure().GetTimeoutFailureInfo().GetTimeoutType(), + ) + }) +} + func (env *standaloneActivityEnv) pollActivityTaskQueue(ctx context.Context, taskQueue string) (*workflowservice.PollActivityTaskQueueResponse, error) { return env.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ Namespace: env.Namespace().String(), @@ -6298,6 +6943,8 @@ func (s *standaloneActivityTestSuite) TestCallbacks() { }) require.NoError(t, err) require.Equal(t, enumspb.ACTIVITY_EXECUTION_STATUS_TIMED_OUT, descResp.GetInfo().GetStatus()) + }) +} func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { t := s.T() @@ -6350,26 +6997,3 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { require.ErrorAs(t, err, &unimplementedErr) }) } - -func (s *standaloneActivityTestSuite) TestUpdateActivityExecutionOptions() { - t := s.T() - ctx, cancel := context.WithTimeout(t.Context(), 30*time.Second) - defer cancel() - - t.Run("StandaloneActivityReturnsError", func(t *testing.T) { - _, err := s.FrontendClient().UpdateActivityExecutionOptions(ctx, &workflowservice.UpdateActivityExecutionOptionsRequest{ - Namespace: s.Namespace().String(), - ActivityId: testcore.RandomizeStr(t.Name()), - Identity: "test-identity", - ActivityOptions: &activitypb.ActivityOptions{ - RetryPolicy: &commonpb.RetryPolicy{ - InitialInterval: durationpb.New(time.Second), - }, - }, - UpdateMask: &fieldmaskpb.FieldMask{Paths: []string{"retry_policy.initial_interval"}}, - }) - require.Error(t, err) - var unimplementedErr *serviceerror.Unimplemented - require.ErrorAs(t, err, &unimplementedErr) - }) -} From dbfe8c338403cf77546b4da38781f0035cbe024f Mon Sep 17 00:00:00 2001 From: Sean Kane Date: Mon, 20 Apr 2026 13:12:27 -0400 Subject: [PATCH 03/25] implement Pause/UnpauseActivityExecution for standalone activities (#9851) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement `PauseActivityExecution` and `UnpauseActivityExecution` for standalone activities. Previously both handlers returned Unimplemented for the SAA path. They now use chasm.UpdateComponent to apply pause/unpause state directly to the CHASM Activity component, matching the semantics of the existing workflow-activity implementation. - Proto (`activity_state.proto`): Added `ActivityPauseState` message (`pause_time`, `identity`, `reason`) and a `pause_state` field on `ActivityState`. - `handlePauseRequested`: Sets `PauseState` on the component. If the activity is in `SCHEDULED` state, increments the attempt stamp so the existing `ActivityDispatchTask` is invalidated — preventing the activity from being dispatched to a worker while paused. For `STARTED` activities the stamp is left unchanged; the worker retains a valid token and receives `ActivityPaused: true` on its next heartbeat. - `handleUnpauseRequested`: Clears `PauseState`, optionally resets the attempt count and/or heartbeat details, and if the activity is `SCHEDULED` bumps the stamp and enqueues a new `ActivityDispatchTask` with optional jitter. - `RecordHeartbeat`: Wires up the `ActivityPaused` response field. - `buildActivityExecutionInfo`: Maps pause state to `PENDING_ACTIVITY_STATE_PAUSED` (activity is scheduled but not running) or `PENDING_ACTIVITY_STATE_PAUSE_REQUESTED` (activity is running on the worker) in the `RunState` field of `DescribeActivityExecution`. `PauseActivityExecution` / `UnpauseActivityExecution` were already implemented for workflow-embedded activities via the history service. Standalone activities had stub handlers that returned `Unimplemented`, this brings SAA to feature parity with workflow activities for the pause/unpause lifecycle operations. - [ ] built - [ ] run locally and tested manually - [ ] covered by existing tests - [ ] added new unit test(s) - [X] added new functional test(s) Minimal, this is a translation of an existing api (Pause/UnpauseActivity) --- chasm/lib/activity/activity.go | 183 +- chasm/lib/activity/activity_tasks.go | 6 + chasm/lib/activity/frontend.go | 15 +- .../v1/activity_state.go-helpers.pb.go | 38 + .../gen/activitypb/v1/activity_state.pb.go | 256 ++- chasm/lib/activity/handler.go | 26 +- .../activity/proto/v1/activity_state.proto | 19 + chasm/lib/activity/statemachine.go | 53 + chasm/lib/activity/validator.go | 61 + chasm/lib/activity/validator_test.go | 145 +- common/metrics/metric_defs.go | 6 + tests/standalone_activity_test.go | 1521 ++++++++++++++++- 12 files changed, 2211 insertions(+), 118 deletions(-) diff --git a/chasm/lib/activity/activity.go b/chasm/lib/activity/activity.go index 0292ec62b33..4887c2f89b4 100644 --- a/chasm/lib/activity/activity.go +++ b/chasm/lib/activity/activity.go @@ -3,6 +3,7 @@ package activity import ( "errors" "fmt" + "math/rand" "slices" "time" @@ -110,6 +111,19 @@ type RespondCancelledEvent struct { Token *tokenspb.Task } +func (a *Activity) isTerminal() bool { + switch a.GetStatus() { + case activitypb.ACTIVITY_EXECUTION_STATUS_COMPLETED, + activitypb.ACTIVITY_EXECUTION_STATUS_FAILED, + activitypb.ACTIVITY_EXECUTION_STATUS_CANCELED, + activitypb.ACTIVITY_EXECUTION_STATUS_TERMINATED, + activitypb.ACTIVITY_EXECUTION_STATUS_TIMED_OUT: + return true + default: + return false + } +} + // LifecycleState implements the chasm.Component interface. func (a *Activity) LifecycleState(_ chasm.Context) chasm.LifecycleState { switch a.Status { @@ -729,8 +743,11 @@ func (a *Activity) handleCancellationRequested(ctx chasm.MutableContext, request return &activitypb.RequestCancelActivityExecutionResponse{}, nil } - // If in scheduled state, cancel immediately right after marking cancel requested - isCancelImmediately := a.GetStatus() == activitypb.ACTIVITY_EXECUTION_STATUS_SCHEDULED + // SCHEDULED and PAUSED activities have no active worker token so cancel immediately. + // STARTED and CANCEL_REQUESTED activities wait for the worker to respond. + originalStatus := a.GetStatus() + isCancelImmediately := originalStatus == activitypb.ACTIVITY_EXECUTION_STATUS_SCHEDULED || + originalStatus == activitypb.ACTIVITY_EXECUTION_STATUS_PAUSED if err := TransitionCancelRequested.Apply(a, ctx, req); err != nil { return nil, err @@ -750,7 +767,7 @@ func (a *Activity) handleCancellationRequested(ctx chasm.MutableContext, request err = TransitionCanceled.Apply(a, ctx, cancelEvent{ details: details, handler: metricsHandler, - fromStatus: activitypb.ACTIVITY_EXECUTION_STATUS_SCHEDULED, // if we're here the original status was scheduled + fromStatus: originalStatus, }) if err != nil { return nil, err @@ -760,6 +777,125 @@ func (a *Activity) handleCancellationRequested(ctx chasm.MutableContext, request return &activitypb.RequestCancelActivityExecutionResponse{}, nil } +func (a *Activity) handlePauseRequested(ctx chasm.MutableContext, req *activitypb.PauseActivityExecutionRequest) ( + *activitypb.PauseActivityExecutionResponse, error, +) { + if a.isTerminal() { + return nil, serviceerror.NewFailedPreconditionf("activity is in terminal state %v", a.GetStatus()) + } + if a.GetStatus() == activitypb.ACTIVITY_EXECUTION_STATUS_CANCEL_REQUESTED { + return nil, serviceerror.NewFailedPrecondition("cannot pause an activity with a pending cancellation") + } + if a.PauseState != nil { + return &activitypb.PauseActivityExecutionResponse{}, nil + } + + metricsHandler, err := a.enrichMetricsHandler(ctx, metrics.ActivityPausedScope) + if err != nil { + return nil, err + } + + if TransitionPaused.Possible(a) { + // SCHEDULED → real PAUSED status; stamp bumped to invalidate the pending dispatch task. + if err := TransitionPaused.Apply(a, ctx, pauseEvent{ + req: req.GetFrontendRequest(), + metricsHandler: metricsHandler, + }); err != nil { + return nil, err + } + return &activitypb.PauseActivityExecutionResponse{}, nil + } + // STARTED → flag-only pause. Status stays STARTED so the worker's token remains valid. + // The worker will see ActivityPaused=true on the next heartbeat. + a.pause(ctx, pauseEvent{req.GetFrontendRequest(), metricsHandler}) + return &activitypb.PauseActivityExecutionResponse{}, nil +} + +func (a *Activity) handleUnpauseRequested(ctx chasm.MutableContext, req *activitypb.UnpauseActivityExecutionRequest) ( + *activitypb.UnpauseActivityExecutionResponse, error, +) { + if a.isTerminal() { + return nil, serviceerror.NewFailedPreconditionf("activity is in terminal state %v", a.GetStatus()) + } + // Not paused → no-op. + if a.PauseState == nil { + return &activitypb.UnpauseActivityExecutionResponse{}, nil + } + + metricsHandler, err := a.enrichMetricsHandler(ctx, metrics.ActivityUnpausedScope) + if err != nil { + return nil, err + } + + if TransitionUnpaused.Possible(a) { + if err := TransitionUnpaused.Apply(a, ctx, unpauseEvent{ + req: req.GetFrontendRequest(), + metricsHandler: metricsHandler, + }); err != nil { + return nil, err + } + return &activitypb.UnpauseActivityExecutionResponse{}, nil + } + + // Flag-based pause (status is STARTED, CANCEL_REQUESTED, or SCHEDULED after retry while paused). + if a.GetStatus() == activitypb.ACTIVITY_EXECUTION_STATUS_STARTED || + a.GetStatus() == activitypb.ACTIVITY_EXECUTION_STATUS_CANCEL_REQUESTED { + // Worker continues with its existing token — no stamp bump needed, no dispatch task. + // Cancel takes precedence over pause. Unpause clears the pause flag but does not re-dispatch; + // the activity remains CANCEL_REQUESTED and will be cancelled when the worker responds. + a.PauseState = nil + a.emitOnUnpausedMetrics(metricsHandler) + return &activitypb.UnpauseActivityExecutionResponse{}, nil + } + a.unpause(ctx, unpauseEvent{ + req: req.GetFrontendRequest(), + metricsHandler: metricsHandler, + }) + return &activitypb.UnpauseActivityExecutionResponse{}, nil +} + +func (a *Activity) unpause( + ctx chasm.MutableContext, + event unpauseEvent, +) { + a.PauseState = nil + attempt := a.LastAttempt.Get(ctx) + if event.req.GetResetAttempts() { + attempt.Count = 1 + } + if event.req.GetResetHeartbeat() { + a.LastHeartbeat = chasm.NewDataField(ctx, &activitypb.ActivityHeartbeatState{}) + } + attempt.Stamp++ + attempt.CurrentRetryInterval = nil + scheduleTime := ctx.Now(a) + if jitter := event.req.GetJitter().AsDuration(); jitter > 0 { + scheduleTime = scheduleTime.Add(time.Duration(rand.Int63n(int64(jitter)))) //nolint:gosec + } + if timeout := a.GetScheduleToStartTimeout().AsDuration(); timeout > 0 { + ctx.AddTask( + a, + chasm.TaskAttributes{ScheduledTime: scheduleTime.Add(timeout)}, + &activitypb.ScheduleToStartTimeoutTask{Stamp: attempt.GetStamp()}) + } + ctx.AddTask( + a, + chasm.TaskAttributes{ScheduledTime: scheduleTime}, + &activitypb.ActivityDispatchTask{Stamp: attempt.GetStamp()}) + a.emitOnUnpausedMetrics(event.metricsHandler) +} +func (a *Activity) pause( + ctx chasm.MutableContext, + event pauseEvent, +) { + a.PauseState = &activitypb.ActivityPauseState{ + PauseTime: timestamppb.New(ctx.Now(a)), + Identity: event.req.GetIdentity(), + Reason: event.req.GetReason(), + } + a.emitOnPausedMetrics(event.metricsHandler) +} + // recordScheduleToStartOrCloseTimeoutFailure records schedule-to-start or schedule-to-close timeouts. Such timeouts are not retried so we // set the outcome failure directly and leave the attempt failure as is. func (a *Activity) recordScheduleToStartOrCloseTimeoutFailure(ctx chasm.MutableContext, timeoutType enumspb.TimeoutType) error { @@ -810,7 +946,9 @@ func (a *Activity) recordFailedAttempt( } // tryReschedule attempts to reschedule the activity for retry. Returns true if rescheduled, false -// if retry is not possible. +// if retry is not possible. When the activity has PauseState set (flag-based pause from STARTED), +// the retry transitions to SCHEDULED normally but the dispatch task is blocked by the pause flag +// until the activity is unpaused. func (a *Activity) tryReschedule( ctx chasm.MutableContext, overridingRetryInterval time.Duration, @@ -920,7 +1058,8 @@ func (a *Activity) RecordHeartbeat( } return &historyservice.RecordActivityTaskHeartbeatResponse{ CancelRequested: a.Status == activitypb.ACTIVITY_EXECUTION_STATUS_CANCEL_REQUESTED, - // TODO(saa-preview): ActivityPaused, ActivityReset + ActivityPaused: a.PauseState != nil, + // TODO(saa-preview): ActivityReset }, nil } @@ -929,7 +1068,8 @@ func InternalStatusToAPIStatus(status activitypb.ActivityExecutionStatus) enumsp switch status { case activitypb.ACTIVITY_EXECUTION_STATUS_SCHEDULED, activitypb.ACTIVITY_EXECUTION_STATUS_STARTED, - activitypb.ACTIVITY_EXECUTION_STATUS_CANCEL_REQUESTED: + activitypb.ACTIVITY_EXECUTION_STATUS_CANCEL_REQUESTED, + activitypb.ACTIVITY_EXECUTION_STATUS_PAUSED: return enumspb.ACTIVITY_EXECUTION_STATUS_RUNNING case activitypb.ACTIVITY_EXECUTION_STATUS_COMPLETED: return enumspb.ACTIVITY_EXECUTION_STATUS_COMPLETED @@ -956,6 +1096,8 @@ func internalStatusToRunState(status activitypb.ActivityExecutionStatus) enumspb return enumspb.PENDING_ACTIVITY_STATE_STARTED case activitypb.ACTIVITY_EXECUTION_STATUS_CANCEL_REQUESTED: return enumspb.PENDING_ACTIVITY_STATE_CANCEL_REQUESTED + case activitypb.ACTIVITY_EXECUTION_STATUS_PAUSED: + return enumspb.PENDING_ACTIVITY_STATE_PAUSED case activitypb.ACTIVITY_EXECUTION_STATUS_COMPLETED, activitypb.ACTIVITY_EXECUTION_STATUS_FAILED, activitypb.ACTIVITY_EXECUTION_STATUS_CANCELED, @@ -969,9 +1111,20 @@ func internalStatusToRunState(status activitypb.ActivityExecutionStatus) enumspb } func (a *Activity) buildActivityExecutionInfo(ctx chasm.Context) *apiactivitypb.ActivityExecutionInfo { - // TODO(saa-preview): support pause states status := InternalStatusToAPIStatus(a.GetStatus()) - runState := internalStatusToRunState(a.GetStatus()) + // Derive the external run state with hybrid pause logic: + // PAUSED status (real) → PAUSED + // STARTED + PauseState != nil (pause requested while running) → PAUSE_REQUESTED + // SCHEDULED + PauseState != nil (retry while paused flag set) → PAUSED + // All other cases → derived from internal status directly + var runState enumspb.PendingActivityState + if a.GetStatus() == activitypb.ACTIVITY_EXECUTION_STATUS_STARTED && a.PauseState != nil { + runState = enumspb.PENDING_ACTIVITY_STATE_PAUSE_REQUESTED + } else if a.GetStatus() == activitypb.ACTIVITY_EXECUTION_STATUS_SCHEDULED && a.PauseState != nil { + runState = enumspb.PENDING_ACTIVITY_STATE_PAUSED + } else { + runState = internalStatusToRunState(a.GetStatus()) + } requestData := a.RequestData.Get(ctx) attempt := a.LastAttempt.Get(ctx) @@ -1322,6 +1475,20 @@ func (a *Activity) emitOnTimedOutMetrics( metrics.ActivityTimeout.With(handler).Record(1, timeoutTag) } +func (a *Activity) emitOnPausedMetrics( + handler metrics.Handler, +) { + metrics.ActivityPauseRequests.With(handler).Record(1) + metrics.ActivityPause.With(handler).Record(1) +} + +func (a *Activity) emitOnUnpausedMetrics( + handler metrics.Handler, +) { + metrics.ActivityUnpauseRequests.With(handler).Record(1) + metrics.ActivityUnpause.With(handler).Record(1) +} + // SearchAttributes implements chasm.VisibilitySearchAttributesProvider interface. // Returns the current search attribute values for this activity execution. func (a *Activity) SearchAttributes(_ chasm.Context) []chasm.SearchAttributeKeyValue { diff --git a/chasm/lib/activity/activity_tasks.go b/chasm/lib/activity/activity_tasks.go index fc98e2c58a2..b93d2fcba5e 100644 --- a/chasm/lib/activity/activity_tasks.go +++ b/chasm/lib/activity/activity_tasks.go @@ -36,7 +36,11 @@ func (h *activityDispatchTaskHandler) Validate( task *activitypb.ActivityDispatchTask, ) (bool, error) { // TODO(saa-preview): make sure we handle resets when we support them, as they will reset the attempt count + // Do not dispatch while the activity has a pause flag set (SCHEDULED + PauseState from a retry + // while a STARTED activity was flag-paused). TransitionStarted.Possible already returns false for + // real PAUSED status activities (source must be SCHEDULED, and PAUSED → SCHEDULED via unpause). return (TransitionStarted.Possible(activity) && + activity.PauseState == nil && task.Stamp == activity.LastAttempt.Get(ctx).GetStamp()), nil } @@ -93,7 +97,9 @@ func (h *scheduleToStartTimeoutTaskHandler) Validate( _ chasm.TaskAttributes, task *activitypb.ScheduleToStartTimeoutTask, ) (bool, error) { + // Do not time out a SCHEDULED activity that has the pause flag set (retry while paused). return (activity.Status == activitypb.ACTIVITY_EXECUTION_STATUS_SCHEDULED && + activity.PauseState == nil && task.Stamp == activity.LastAttempt.Get(ctx).GetStamp()), nil } diff --git a/chasm/lib/activity/frontend.go b/chasm/lib/activity/frontend.go index e55b2d2d726..96861552ad7 100644 --- a/chasm/lib/activity/frontend.go +++ b/chasm/lib/activity/frontend.go @@ -439,7 +439,15 @@ func (h *frontendHandler) PauseActivityExecution( return nil, ErrStandaloneActivityDisabled } - // TODO: validate request fields (e.g. namespace, identity length) + if err := validatePauseActivityExecutionRequest( + req, + h.config.MaxIDLengthLimit(), + h.config.BlobSizeLimitError, + h.config.BlobSizeLimitWarn, + h.logger); err != nil { + return nil, err + } + namespaceID, err := h.namespaceRegistry.GetNamespaceID(namespace.Name(req.GetNamespace())) if err != nil { return nil, err @@ -463,7 +471,10 @@ func (h *frontendHandler) UnpauseActivityExecution( return nil, ErrStandaloneActivityDisabled } - // TODO: validate request fields (e.g. namespace, identity length) + if err := validateUnpauseActivityExecutionRequest(req, h.config.MaxIDLengthLimit()); err != nil { + return nil, err + } + namespaceID, err := h.namespaceRegistry.GetNamespaceID(namespace.Name(req.GetNamespace())) if err != nil { return nil, err diff --git a/chasm/lib/activity/gen/activitypb/v1/activity_state.go-helpers.pb.go b/chasm/lib/activity/gen/activitypb/v1/activity_state.go-helpers.pb.go index a765e208683..8ca3e1cb52b 100644 --- a/chasm/lib/activity/gen/activitypb/v1/activity_state.go-helpers.pb.go +++ b/chasm/lib/activity/gen/activitypb/v1/activity_state.go-helpers.pb.go @@ -118,6 +118,43 @@ func (this *ActivityTerminateState) Equal(that interface{}) bool { return proto.Equal(this, that1) } +// Marshal an object of type ActivityPauseState to the protobuf v3 wire format +func (val *ActivityPauseState) Marshal() ([]byte, error) { + return proto.Marshal(val) +} + +// Unmarshal an object of type ActivityPauseState from the protobuf v3 wire format +func (val *ActivityPauseState) Unmarshal(buf []byte) error { + return proto.Unmarshal(buf, val) +} + +// Size returns the size of the object, in bytes, once serialized +func (val *ActivityPauseState) Size() int { + return proto.Size(val) +} + +// Equal returns whether two ActivityPauseState values are equivalent by recursively +// comparing the message's fields. +// For more information see the documentation for +// https://pkg.go.dev/google.golang.org/protobuf/proto#Equal +func (this *ActivityPauseState) Equal(that interface{}) bool { + if that == nil { + return this == nil + } + + var that1 *ActivityPauseState + switch t := that.(type) { + case *ActivityPauseState: + that1 = t + case ActivityPauseState: + that1 = &t + default: + return false + } + + return proto.Equal(this, that1) +} + // Marshal an object of type ActivityAttemptState to the protobuf v3 wire format func (val *ActivityAttemptState) Marshal() ([]byte, error) { return proto.Marshal(val) @@ -277,6 +314,7 @@ var ( "Canceled": 6, "Terminated": 7, "TimedOut": 8, + "Paused": 9, } ) diff --git a/chasm/lib/activity/gen/activitypb/v1/activity_state.pb.go b/chasm/lib/activity/gen/activitypb/v1/activity_state.pb.go index ef31bedc60e..d18150d46fc 100644 --- a/chasm/lib/activity/gen/activitypb/v1/activity_state.pb.go +++ b/chasm/lib/activity/gen/activitypb/v1/activity_state.pb.go @@ -61,6 +61,16 @@ const ( // Additionally, after all retries are exhausted for start-to-close or heartbeat timeouts, the activity will also // transition to timed out status. ACTIVITY_EXECUTION_STATUS_TIMED_OUT ActivityExecutionStatus = 8 + // The activity has been paused while in the SCHEDULED state. No worker will be dispatched until + // the activity is unpaused. The activity's pause_state field is populated with the identity, + // reason, and time of the pause request. + // + // Note: pausing a STARTED activity does not transition to this status. Instead, the pause is + // delivered as a flag (pause_state is set, status stays STARTED) and the worker is notified + // via ActivityPaused=true on its next heartbeat. The external run state in that case is + // PAUSE_REQUESTED. If the worker fails and retries while the flag is set, the retry lands in + // SCHEDULED with pause_state still populated and the dispatch task is blocked until unpause. + ACTIVITY_EXECUTION_STATUS_PAUSED ActivityExecutionStatus = 9 ) // Enum value maps for ActivityExecutionStatus. @@ -75,6 +85,7 @@ var ( 6: "ACTIVITY_EXECUTION_STATUS_CANCELED", 7: "ACTIVITY_EXECUTION_STATUS_TERMINATED", 8: "ACTIVITY_EXECUTION_STATUS_TIMED_OUT", + 9: "ACTIVITY_EXECUTION_STATUS_PAUSED", } ActivityExecutionStatus_value = map[string]int32{ "ACTIVITY_EXECUTION_STATUS_UNSPECIFIED": 0, @@ -86,6 +97,7 @@ var ( "ACTIVITY_EXECUTION_STATUS_CANCELED": 6, "ACTIVITY_EXECUTION_STATUS_TERMINATED": 7, "ACTIVITY_EXECUTION_STATUS_TIMED_OUT": 8, + "ACTIVITY_EXECUTION_STATUS_PAUSED": 9, } ) @@ -117,6 +129,8 @@ func (x ActivityExecutionStatus) String() string { return "TimedOut" // Deprecated: Use ActivityExecutionStatus.Descriptor instead. + case ACTIVITY_EXECUTION_STATUS_PAUSED: + return "Paused" default: return strconv.Itoa(int(x)) } @@ -190,7 +204,9 @@ type ActivityState struct { // Incremented each time a new ScheduleToCloseTimeoutTask is scheduled (at activity creation // and on each options update that re-schedules the task). Unlike attempt.stamp, this counter // is NOT incremented on retries, because schedule-to-close spans the full activity lifetime. - Stamp int32 `protobuf:"varint,14,opt,name=stamp,proto3" json:"stamp,omitempty"` + Stamp int32 `protobuf:"varint,14,opt,name=stamp,proto3" json:"stamp,omitempty"` + // Set if the activity was paused. + PauseState *ActivityPauseState `protobuf:"bytes,15,opt,name=pause_state,json=pauseState,proto3" json:"pause_state,omitempty"` unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache } @@ -323,6 +339,13 @@ func (x *ActivityState) GetStamp() int32 { return 0 } +func (x *ActivityState) GetPauseState() *ActivityPauseState { + if x != nil { + return x.PauseState + } + return nil +} + type ActivityCancelState struct { state protoimpl.MessageState `protogen:"open.v1"` RequestId string `protobuf:"bytes,1,opt,name=request_id,json=requestId,proto3" json:"request_id,omitempty"` @@ -435,6 +458,66 @@ func (x *ActivityTerminateState) GetRequestId() string { return "" } +type ActivityPauseState struct { + state protoimpl.MessageState `protogen:"open.v1"` + PauseTime *timestamppb.Timestamp `protobuf:"bytes,1,opt,name=pause_time,json=pauseTime,proto3" json:"pause_time,omitempty"` + Identity string `protobuf:"bytes,2,opt,name=identity,proto3" json:"identity,omitempty"` + Reason string `protobuf:"bytes,3,opt,name=reason,proto3" json:"reason,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *ActivityPauseState) Reset() { + *x = ActivityPauseState{} + mi := &file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_msgTypes[3] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *ActivityPauseState) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*ActivityPauseState) ProtoMessage() {} + +func (x *ActivityPauseState) ProtoReflect() protoreflect.Message { + mi := &file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_msgTypes[3] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use ActivityPauseState.ProtoReflect.Descriptor instead. +func (*ActivityPauseState) Descriptor() ([]byte, []int) { + return file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_rawDescGZIP(), []int{3} +} + +func (x *ActivityPauseState) GetPauseTime() *timestamppb.Timestamp { + if x != nil { + return x.PauseTime + } + return nil +} + +func (x *ActivityPauseState) GetIdentity() string { + if x != nil { + return x.Identity + } + return "" +} + +func (x *ActivityPauseState) GetReason() string { + if x != nil { + return x.Reason + } + return "" +} + type ActivityAttemptState struct { state protoimpl.MessageState `protogen:"open.v1"` // The attempt this activity is currently on. @@ -477,7 +560,7 @@ type ActivityAttemptState struct { func (x *ActivityAttemptState) Reset() { *x = ActivityAttemptState{} - mi := &file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_msgTypes[3] + mi := &file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_msgTypes[4] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -489,7 +572,7 @@ func (x *ActivityAttemptState) String() string { func (*ActivityAttemptState) ProtoMessage() {} func (x *ActivityAttemptState) ProtoReflect() protoreflect.Message { - mi := &file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_msgTypes[3] + mi := &file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_msgTypes[4] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -502,7 +585,7 @@ func (x *ActivityAttemptState) ProtoReflect() protoreflect.Message { // Deprecated: Use ActivityAttemptState.ProtoReflect.Descriptor instead. func (*ActivityAttemptState) Descriptor() ([]byte, []int) { - return file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_rawDescGZIP(), []int{3} + return file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_rawDescGZIP(), []int{4} } func (x *ActivityAttemptState) GetCount() int32 { @@ -580,7 +663,7 @@ type ActivityHeartbeatState struct { func (x *ActivityHeartbeatState) Reset() { *x = ActivityHeartbeatState{} - mi := &file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_msgTypes[4] + mi := &file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_msgTypes[5] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -592,7 +675,7 @@ func (x *ActivityHeartbeatState) String() string { func (*ActivityHeartbeatState) ProtoMessage() {} func (x *ActivityHeartbeatState) ProtoReflect() protoreflect.Message { - mi := &file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_msgTypes[4] + mi := &file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_msgTypes[5] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -605,7 +688,7 @@ func (x *ActivityHeartbeatState) ProtoReflect() protoreflect.Message { // Deprecated: Use ActivityHeartbeatState.ProtoReflect.Descriptor instead. func (*ActivityHeartbeatState) Descriptor() ([]byte, []int) { - return file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_rawDescGZIP(), []int{4} + return file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_rawDescGZIP(), []int{5} } func (x *ActivityHeartbeatState) GetDetails() *v1.Payloads { @@ -635,7 +718,7 @@ type ActivityRequestData struct { func (x *ActivityRequestData) Reset() { *x = ActivityRequestData{} - mi := &file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_msgTypes[5] + mi := &file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_msgTypes[6] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -647,7 +730,7 @@ func (x *ActivityRequestData) String() string { func (*ActivityRequestData) ProtoMessage() {} func (x *ActivityRequestData) ProtoReflect() protoreflect.Message { - mi := &file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_msgTypes[5] + mi := &file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_msgTypes[6] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -660,7 +743,7 @@ func (x *ActivityRequestData) ProtoReflect() protoreflect.Message { // Deprecated: Use ActivityRequestData.ProtoReflect.Descriptor instead. func (*ActivityRequestData) Descriptor() ([]byte, []int) { - return file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_rawDescGZIP(), []int{5} + return file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_rawDescGZIP(), []int{6} } func (x *ActivityRequestData) GetInput() *v1.Payloads { @@ -697,7 +780,7 @@ type ActivityOutcome struct { func (x *ActivityOutcome) Reset() { *x = ActivityOutcome{} - mi := &file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_msgTypes[6] + mi := &file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_msgTypes[7] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -709,7 +792,7 @@ func (x *ActivityOutcome) String() string { func (*ActivityOutcome) ProtoMessage() {} func (x *ActivityOutcome) ProtoReflect() protoreflect.Message { - mi := &file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_msgTypes[6] + mi := &file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_msgTypes[7] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -722,7 +805,7 @@ func (x *ActivityOutcome) ProtoReflect() protoreflect.Message { // Deprecated: Use ActivityOutcome.ProtoReflect.Descriptor instead. func (*ActivityOutcome) Descriptor() ([]byte, []int) { - return file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_rawDescGZIP(), []int{6} + return file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_rawDescGZIP(), []int{7} } func (x *ActivityOutcome) GetVariant() isActivityOutcome_Variant { @@ -778,7 +861,7 @@ type ActivityAttemptState_LastFailureDetails struct { func (x *ActivityAttemptState_LastFailureDetails) Reset() { *x = ActivityAttemptState_LastFailureDetails{} - mi := &file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_msgTypes[7] + mi := &file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_msgTypes[8] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -790,7 +873,7 @@ func (x *ActivityAttemptState_LastFailureDetails) String() string { func (*ActivityAttemptState_LastFailureDetails) ProtoMessage() {} func (x *ActivityAttemptState_LastFailureDetails) ProtoReflect() protoreflect.Message { - mi := &file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_msgTypes[7] + mi := &file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_msgTypes[8] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -803,7 +886,7 @@ func (x *ActivityAttemptState_LastFailureDetails) ProtoReflect() protoreflect.Me // Deprecated: Use ActivityAttemptState_LastFailureDetails.ProtoReflect.Descriptor instead. func (*ActivityAttemptState_LastFailureDetails) Descriptor() ([]byte, []int) { - return file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_rawDescGZIP(), []int{3, 0} + return file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_rawDescGZIP(), []int{4, 0} } func (x *ActivityAttemptState_LastFailureDetails) GetTime() *timestamppb.Timestamp { @@ -829,7 +912,7 @@ type ActivityOutcome_Successful struct { func (x *ActivityOutcome_Successful) Reset() { *x = ActivityOutcome_Successful{} - mi := &file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_msgTypes[8] + mi := &file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_msgTypes[9] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -841,7 +924,7 @@ func (x *ActivityOutcome_Successful) String() string { func (*ActivityOutcome_Successful) ProtoMessage() {} func (x *ActivityOutcome_Successful) ProtoReflect() protoreflect.Message { - mi := &file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_msgTypes[8] + mi := &file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_msgTypes[9] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -854,7 +937,7 @@ func (x *ActivityOutcome_Successful) ProtoReflect() protoreflect.Message { // Deprecated: Use ActivityOutcome_Successful.ProtoReflect.Descriptor instead. func (*ActivityOutcome_Successful) Descriptor() ([]byte, []int) { - return file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_rawDescGZIP(), []int{6, 0} + return file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_rawDescGZIP(), []int{7, 0} } func (x *ActivityOutcome_Successful) GetOutput() *v1.Payloads { @@ -875,7 +958,7 @@ type ActivityOutcome_Failed struct { func (x *ActivityOutcome_Failed) Reset() { *x = ActivityOutcome_Failed{} - mi := &file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_msgTypes[9] + mi := &file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_msgTypes[10] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -887,7 +970,7 @@ func (x *ActivityOutcome_Failed) String() string { func (*ActivityOutcome_Failed) ProtoMessage() {} func (x *ActivityOutcome_Failed) ProtoReflect() protoreflect.Message { - mi := &file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_msgTypes[9] + mi := &file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_msgTypes[10] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -900,7 +983,7 @@ func (x *ActivityOutcome_Failed) ProtoReflect() protoreflect.Message { // Deprecated: Use ActivityOutcome_Failed.ProtoReflect.Descriptor instead. func (*ActivityOutcome_Failed) Descriptor() ([]byte, []int) { - return file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_rawDescGZIP(), []int{6, 1} + return file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_rawDescGZIP(), []int{7, 1} } func (x *ActivityOutcome_Failed) GetFailure() *v15.Failure { @@ -914,7 +997,7 @@ var File_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto protor const file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_rawDesc = "" + "\n" + - "@temporal/server/chasm/lib/activity/proto/v1/activity_state.proto\x12+temporal.server.chasm.lib.activity.proto.v1\x1a\x1egoogle/protobuf/duration.proto\x1a\x1fgoogle/protobuf/timestamp.proto\x1a&temporal/api/activity/v1/message.proto\x1a$temporal/api/common/v1/message.proto\x1a(temporal/api/deployment/v1/message.proto\x1a%temporal/api/failure/v1/message.proto\x1a'temporal/api/sdk/v1/user_metadata.proto\x1a'temporal/api/taskqueue/v1/message.proto\"\xc7\b\n" + + "@temporal/server/chasm/lib/activity/proto/v1/activity_state.proto\x12+temporal.server.chasm.lib.activity.proto.v1\x1a\x1egoogle/protobuf/duration.proto\x1a\x1fgoogle/protobuf/timestamp.proto\x1a&temporal/api/activity/v1/message.proto\x1a$temporal/api/common/v1/message.proto\x1a(temporal/api/deployment/v1/message.proto\x1a%temporal/api/failure/v1/message.proto\x1a'temporal/api/sdk/v1/user_metadata.proto\x1a'temporal/api/taskqueue/v1/message.proto\"\xa9\t\n" + "\rActivityState\x12I\n" + "\ractivity_type\x18\x01 \x01(\v2$.temporal.api.common.v1.ActivityTypeR\factivityType\x12C\n" + "\n" + @@ -931,7 +1014,9 @@ const file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_rawD "\fcancel_state\x18\v \x01(\v2@.temporal.server.chasm.lib.activity.proto.v1.ActivityCancelStateR\vcancelState\x12l\n" + "\x0fterminate_state\x18\f \x01(\v2C.temporal.server.chasm.lib.activity.proto.v1.ActivityTerminateStateR\x0eterminateState\x12T\n" + "\x10original_options\x18\r \x01(\v2).temporal.api.activity.v1.ActivityOptionsR\x0foriginalOptions\x12\x14\n" + - "\x05stamp\x18\x0e \x01(\x05R\x05stamp\"\xa7\x01\n" + + "\x05stamp\x18\x0e \x01(\x05R\x05stamp\x12`\n" + + "\vpause_state\x18\x0f \x01(\v2?.temporal.server.chasm.lib.activity.proto.v1.ActivityPauseStateR\n" + + "pauseState\"\xa7\x01\n" + "\x13ActivityCancelState\x12\x1d\n" + "\n" + "request_id\x18\x01 \x01(\tR\trequestId\x12=\n" + @@ -940,7 +1025,12 @@ const file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_rawD "\x06reason\x18\x04 \x01(\tR\x06reason\"7\n" + "\x16ActivityTerminateState\x12\x1d\n" + "\n" + - "request_id\x18\x01 \x01(\tR\trequestId\"\xe8\x05\n" + + "request_id\x18\x01 \x01(\tR\trequestId\"\x83\x01\n" + + "\x12ActivityPauseState\x129\n" + + "\n" + + "pause_time\x18\x01 \x01(\v2\x1a.google.protobuf.TimestampR\tpauseTime\x12\x1a\n" + + "\bidentity\x18\x02 \x01(\tR\bidentity\x12\x16\n" + + "\x06reason\x18\x03 \x01(\tR\x06reason\"\xe8\x05\n" + "\x14ActivityAttemptState\x12\x14\n" + "\x05count\x18\x01 \x01(\x05R\x05count\x12O\n" + "\x16current_retry_interval\x18\x02 \x01(\v2\x19.google.protobuf.DurationR\x14currentRetryInterval\x12=\n" + @@ -971,7 +1061,7 @@ const file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_rawD "\x06output\x18\x01 \x01(\v2 .temporal.api.common.v1.PayloadsR\x06output\x1aD\n" + "\x06Failed\x12:\n" + "\afailure\x18\x01 \x01(\v2 .temporal.api.failure.v1.FailureR\afailureB\t\n" + - "\avariant*\x8e\x03\n" + + "\avariant*\xb4\x03\n" + "\x17ActivityExecutionStatus\x12)\n" + "%ACTIVITY_EXECUTION_STATUS_UNSPECIFIED\x10\x00\x12'\n" + "#ACTIVITY_EXECUTION_STATUS_SCHEDULED\x10\x01\x12%\n" + @@ -981,7 +1071,8 @@ const file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_rawD " ACTIVITY_EXECUTION_STATUS_FAILED\x10\x05\x12&\n" + "\"ACTIVITY_EXECUTION_STATUS_CANCELED\x10\x06\x12(\n" + "$ACTIVITY_EXECUTION_STATUS_TERMINATED\x10\a\x12'\n" + - "#ACTIVITY_EXECUTION_STATUS_TIMED_OUT\x10\bBDZBgo.temporal.io/server/chasm/lib/activity/gen/activitypb;activitypbb\x06proto3" + "#ACTIVITY_EXECUTION_STATUS_TIMED_OUT\x10\b\x12$\n" + + " ACTIVITY_EXECUTION_STATUS_PAUSED\x10\tBDZBgo.temporal.io/server/chasm/lib/activity/gen/activitypb;activitypbb\x06proto3" var ( file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_rawDescOnce sync.Once @@ -996,68 +1087,71 @@ func file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_rawDe } var file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_enumTypes = make([]protoimpl.EnumInfo, 1) -var file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_msgTypes = make([]protoimpl.MessageInfo, 10) +var file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_msgTypes = make([]protoimpl.MessageInfo, 11) var file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_goTypes = []any{ (ActivityExecutionStatus)(0), // 0: temporal.server.chasm.lib.activity.proto.v1.ActivityExecutionStatus (*ActivityState)(nil), // 1: temporal.server.chasm.lib.activity.proto.v1.ActivityState (*ActivityCancelState)(nil), // 2: temporal.server.chasm.lib.activity.proto.v1.ActivityCancelState (*ActivityTerminateState)(nil), // 3: temporal.server.chasm.lib.activity.proto.v1.ActivityTerminateState - (*ActivityAttemptState)(nil), // 4: temporal.server.chasm.lib.activity.proto.v1.ActivityAttemptState - (*ActivityHeartbeatState)(nil), // 5: temporal.server.chasm.lib.activity.proto.v1.ActivityHeartbeatState - (*ActivityRequestData)(nil), // 6: temporal.server.chasm.lib.activity.proto.v1.ActivityRequestData - (*ActivityOutcome)(nil), // 7: temporal.server.chasm.lib.activity.proto.v1.ActivityOutcome - (*ActivityAttemptState_LastFailureDetails)(nil), // 8: temporal.server.chasm.lib.activity.proto.v1.ActivityAttemptState.LastFailureDetails - (*ActivityOutcome_Successful)(nil), // 9: temporal.server.chasm.lib.activity.proto.v1.ActivityOutcome.Successful - (*ActivityOutcome_Failed)(nil), // 10: temporal.server.chasm.lib.activity.proto.v1.ActivityOutcome.Failed - (*v1.ActivityType)(nil), // 11: temporal.api.common.v1.ActivityType - (*v11.TaskQueue)(nil), // 12: temporal.api.taskqueue.v1.TaskQueue - (*durationpb.Duration)(nil), // 13: google.protobuf.Duration - (*v1.RetryPolicy)(nil), // 14: temporal.api.common.v1.RetryPolicy - (*timestamppb.Timestamp)(nil), // 15: google.protobuf.Timestamp - (*v1.Priority)(nil), // 16: temporal.api.common.v1.Priority - (*v12.ActivityOptions)(nil), // 17: temporal.api.activity.v1.ActivityOptions - (*v13.WorkerDeploymentVersion)(nil), // 18: temporal.api.deployment.v1.WorkerDeploymentVersion - (*v1.Payloads)(nil), // 19: temporal.api.common.v1.Payloads - (*v1.Header)(nil), // 20: temporal.api.common.v1.Header - (*v14.UserMetadata)(nil), // 21: temporal.api.sdk.v1.UserMetadata - (*v15.Failure)(nil), // 22: temporal.api.failure.v1.Failure + (*ActivityPauseState)(nil), // 4: temporal.server.chasm.lib.activity.proto.v1.ActivityPauseState + (*ActivityAttemptState)(nil), // 5: temporal.server.chasm.lib.activity.proto.v1.ActivityAttemptState + (*ActivityHeartbeatState)(nil), // 6: temporal.server.chasm.lib.activity.proto.v1.ActivityHeartbeatState + (*ActivityRequestData)(nil), // 7: temporal.server.chasm.lib.activity.proto.v1.ActivityRequestData + (*ActivityOutcome)(nil), // 8: temporal.server.chasm.lib.activity.proto.v1.ActivityOutcome + (*ActivityAttemptState_LastFailureDetails)(nil), // 9: temporal.server.chasm.lib.activity.proto.v1.ActivityAttemptState.LastFailureDetails + (*ActivityOutcome_Successful)(nil), // 10: temporal.server.chasm.lib.activity.proto.v1.ActivityOutcome.Successful + (*ActivityOutcome_Failed)(nil), // 11: temporal.server.chasm.lib.activity.proto.v1.ActivityOutcome.Failed + (*v1.ActivityType)(nil), // 12: temporal.api.common.v1.ActivityType + (*v11.TaskQueue)(nil), // 13: temporal.api.taskqueue.v1.TaskQueue + (*durationpb.Duration)(nil), // 14: google.protobuf.Duration + (*v1.RetryPolicy)(nil), // 15: temporal.api.common.v1.RetryPolicy + (*timestamppb.Timestamp)(nil), // 16: google.protobuf.Timestamp + (*v1.Priority)(nil), // 17: temporal.api.common.v1.Priority + (*v12.ActivityOptions)(nil), // 18: temporal.api.activity.v1.ActivityOptions + (*v13.WorkerDeploymentVersion)(nil), // 19: temporal.api.deployment.v1.WorkerDeploymentVersion + (*v1.Payloads)(nil), // 20: temporal.api.common.v1.Payloads + (*v1.Header)(nil), // 21: temporal.api.common.v1.Header + (*v14.UserMetadata)(nil), // 22: temporal.api.sdk.v1.UserMetadata + (*v15.Failure)(nil), // 23: temporal.api.failure.v1.Failure } var file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_depIdxs = []int32{ - 11, // 0: temporal.server.chasm.lib.activity.proto.v1.ActivityState.activity_type:type_name -> temporal.api.common.v1.ActivityType - 12, // 1: temporal.server.chasm.lib.activity.proto.v1.ActivityState.task_queue:type_name -> temporal.api.taskqueue.v1.TaskQueue - 13, // 2: temporal.server.chasm.lib.activity.proto.v1.ActivityState.schedule_to_close_timeout:type_name -> google.protobuf.Duration - 13, // 3: temporal.server.chasm.lib.activity.proto.v1.ActivityState.schedule_to_start_timeout:type_name -> google.protobuf.Duration - 13, // 4: temporal.server.chasm.lib.activity.proto.v1.ActivityState.start_to_close_timeout:type_name -> google.protobuf.Duration - 13, // 5: temporal.server.chasm.lib.activity.proto.v1.ActivityState.heartbeat_timeout:type_name -> google.protobuf.Duration - 14, // 6: temporal.server.chasm.lib.activity.proto.v1.ActivityState.retry_policy:type_name -> temporal.api.common.v1.RetryPolicy + 12, // 0: temporal.server.chasm.lib.activity.proto.v1.ActivityState.activity_type:type_name -> temporal.api.common.v1.ActivityType + 13, // 1: temporal.server.chasm.lib.activity.proto.v1.ActivityState.task_queue:type_name -> temporal.api.taskqueue.v1.TaskQueue + 14, // 2: temporal.server.chasm.lib.activity.proto.v1.ActivityState.schedule_to_close_timeout:type_name -> google.protobuf.Duration + 14, // 3: temporal.server.chasm.lib.activity.proto.v1.ActivityState.schedule_to_start_timeout:type_name -> google.protobuf.Duration + 14, // 4: temporal.server.chasm.lib.activity.proto.v1.ActivityState.start_to_close_timeout:type_name -> google.protobuf.Duration + 14, // 5: temporal.server.chasm.lib.activity.proto.v1.ActivityState.heartbeat_timeout:type_name -> google.protobuf.Duration + 15, // 6: temporal.server.chasm.lib.activity.proto.v1.ActivityState.retry_policy:type_name -> temporal.api.common.v1.RetryPolicy 0, // 7: temporal.server.chasm.lib.activity.proto.v1.ActivityState.status:type_name -> temporal.server.chasm.lib.activity.proto.v1.ActivityExecutionStatus - 15, // 8: temporal.server.chasm.lib.activity.proto.v1.ActivityState.schedule_time:type_name -> google.protobuf.Timestamp - 16, // 9: temporal.server.chasm.lib.activity.proto.v1.ActivityState.priority:type_name -> temporal.api.common.v1.Priority + 16, // 8: temporal.server.chasm.lib.activity.proto.v1.ActivityState.schedule_time:type_name -> google.protobuf.Timestamp + 17, // 9: temporal.server.chasm.lib.activity.proto.v1.ActivityState.priority:type_name -> temporal.api.common.v1.Priority 2, // 10: temporal.server.chasm.lib.activity.proto.v1.ActivityState.cancel_state:type_name -> temporal.server.chasm.lib.activity.proto.v1.ActivityCancelState 3, // 11: temporal.server.chasm.lib.activity.proto.v1.ActivityState.terminate_state:type_name -> temporal.server.chasm.lib.activity.proto.v1.ActivityTerminateState - 17, // 12: temporal.server.chasm.lib.activity.proto.v1.ActivityState.original_options:type_name -> temporal.api.activity.v1.ActivityOptions - 15, // 13: temporal.server.chasm.lib.activity.proto.v1.ActivityCancelState.request_time:type_name -> google.protobuf.Timestamp - 13, // 14: temporal.server.chasm.lib.activity.proto.v1.ActivityAttemptState.current_retry_interval:type_name -> google.protobuf.Duration - 15, // 15: temporal.server.chasm.lib.activity.proto.v1.ActivityAttemptState.started_time:type_name -> google.protobuf.Timestamp - 15, // 16: temporal.server.chasm.lib.activity.proto.v1.ActivityAttemptState.complete_time:type_name -> google.protobuf.Timestamp - 8, // 17: temporal.server.chasm.lib.activity.proto.v1.ActivityAttemptState.last_failure_details:type_name -> temporal.server.chasm.lib.activity.proto.v1.ActivityAttemptState.LastFailureDetails - 18, // 18: temporal.server.chasm.lib.activity.proto.v1.ActivityAttemptState.last_deployment_version:type_name -> temporal.api.deployment.v1.WorkerDeploymentVersion - 19, // 19: temporal.server.chasm.lib.activity.proto.v1.ActivityHeartbeatState.details:type_name -> temporal.api.common.v1.Payloads - 15, // 20: temporal.server.chasm.lib.activity.proto.v1.ActivityHeartbeatState.recorded_time:type_name -> google.protobuf.Timestamp - 19, // 21: temporal.server.chasm.lib.activity.proto.v1.ActivityRequestData.input:type_name -> temporal.api.common.v1.Payloads - 20, // 22: temporal.server.chasm.lib.activity.proto.v1.ActivityRequestData.header:type_name -> temporal.api.common.v1.Header - 21, // 23: temporal.server.chasm.lib.activity.proto.v1.ActivityRequestData.user_metadata:type_name -> temporal.api.sdk.v1.UserMetadata - 9, // 24: temporal.server.chasm.lib.activity.proto.v1.ActivityOutcome.successful:type_name -> temporal.server.chasm.lib.activity.proto.v1.ActivityOutcome.Successful - 10, // 25: temporal.server.chasm.lib.activity.proto.v1.ActivityOutcome.failed:type_name -> temporal.server.chasm.lib.activity.proto.v1.ActivityOutcome.Failed - 15, // 26: temporal.server.chasm.lib.activity.proto.v1.ActivityAttemptState.LastFailureDetails.time:type_name -> google.protobuf.Timestamp - 22, // 27: temporal.server.chasm.lib.activity.proto.v1.ActivityAttemptState.LastFailureDetails.failure:type_name -> temporal.api.failure.v1.Failure - 19, // 28: temporal.server.chasm.lib.activity.proto.v1.ActivityOutcome.Successful.output:type_name -> temporal.api.common.v1.Payloads - 22, // 29: temporal.server.chasm.lib.activity.proto.v1.ActivityOutcome.Failed.failure:type_name -> temporal.api.failure.v1.Failure - 30, // [30:30] is the sub-list for method output_type - 30, // [30:30] is the sub-list for method input_type - 30, // [30:30] is the sub-list for extension type_name - 30, // [30:30] is the sub-list for extension extendee - 0, // [0:30] is the sub-list for field type_name + 18, // 12: temporal.server.chasm.lib.activity.proto.v1.ActivityState.original_options:type_name -> temporal.api.activity.v1.ActivityOptions + 4, // 13: temporal.server.chasm.lib.activity.proto.v1.ActivityState.pause_state:type_name -> temporal.server.chasm.lib.activity.proto.v1.ActivityPauseState + 16, // 14: temporal.server.chasm.lib.activity.proto.v1.ActivityCancelState.request_time:type_name -> google.protobuf.Timestamp + 16, // 15: temporal.server.chasm.lib.activity.proto.v1.ActivityPauseState.pause_time:type_name -> google.protobuf.Timestamp + 14, // 16: temporal.server.chasm.lib.activity.proto.v1.ActivityAttemptState.current_retry_interval:type_name -> google.protobuf.Duration + 16, // 17: temporal.server.chasm.lib.activity.proto.v1.ActivityAttemptState.started_time:type_name -> google.protobuf.Timestamp + 16, // 18: temporal.server.chasm.lib.activity.proto.v1.ActivityAttemptState.complete_time:type_name -> google.protobuf.Timestamp + 9, // 19: temporal.server.chasm.lib.activity.proto.v1.ActivityAttemptState.last_failure_details:type_name -> temporal.server.chasm.lib.activity.proto.v1.ActivityAttemptState.LastFailureDetails + 19, // 20: temporal.server.chasm.lib.activity.proto.v1.ActivityAttemptState.last_deployment_version:type_name -> temporal.api.deployment.v1.WorkerDeploymentVersion + 20, // 21: temporal.server.chasm.lib.activity.proto.v1.ActivityHeartbeatState.details:type_name -> temporal.api.common.v1.Payloads + 16, // 22: temporal.server.chasm.lib.activity.proto.v1.ActivityHeartbeatState.recorded_time:type_name -> google.protobuf.Timestamp + 20, // 23: temporal.server.chasm.lib.activity.proto.v1.ActivityRequestData.input:type_name -> temporal.api.common.v1.Payloads + 21, // 24: temporal.server.chasm.lib.activity.proto.v1.ActivityRequestData.header:type_name -> temporal.api.common.v1.Header + 22, // 25: temporal.server.chasm.lib.activity.proto.v1.ActivityRequestData.user_metadata:type_name -> temporal.api.sdk.v1.UserMetadata + 10, // 26: temporal.server.chasm.lib.activity.proto.v1.ActivityOutcome.successful:type_name -> temporal.server.chasm.lib.activity.proto.v1.ActivityOutcome.Successful + 11, // 27: temporal.server.chasm.lib.activity.proto.v1.ActivityOutcome.failed:type_name -> temporal.server.chasm.lib.activity.proto.v1.ActivityOutcome.Failed + 16, // 28: temporal.server.chasm.lib.activity.proto.v1.ActivityAttemptState.LastFailureDetails.time:type_name -> google.protobuf.Timestamp + 23, // 29: temporal.server.chasm.lib.activity.proto.v1.ActivityAttemptState.LastFailureDetails.failure:type_name -> temporal.api.failure.v1.Failure + 20, // 30: temporal.server.chasm.lib.activity.proto.v1.ActivityOutcome.Successful.output:type_name -> temporal.api.common.v1.Payloads + 23, // 31: temporal.server.chasm.lib.activity.proto.v1.ActivityOutcome.Failed.failure:type_name -> temporal.api.failure.v1.Failure + 32, // [32:32] is the sub-list for method output_type + 32, // [32:32] is the sub-list for method input_type + 32, // [32:32] is the sub-list for extension type_name + 32, // [32:32] is the sub-list for extension extendee + 0, // [0:32] is the sub-list for field type_name } func init() { file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_init() } @@ -1065,7 +1159,7 @@ func file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_init( if File_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto != nil { return } - file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_msgTypes[6].OneofWrappers = []any{ + file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_msgTypes[7].OneofWrappers = []any{ (*ActivityOutcome_Successful_)(nil), (*ActivityOutcome_Failed_)(nil), } @@ -1075,7 +1169,7 @@ func file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_init( GoPackagePath: reflect.TypeOf(x{}).PkgPath(), RawDescriptor: unsafe.Slice(unsafe.StringData(file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_rawDesc), len(file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_rawDesc)), NumEnums: 1, - NumMessages: 10, + NumMessages: 11, NumExtensions: 0, NumServices: 0, }, diff --git a/chasm/lib/activity/handler.go b/chasm/lib/activity/handler.go index e024a69e800..19185aa33c6 100644 --- a/chasm/lib/activity/handler.go +++ b/chasm/lib/activity/handler.go @@ -357,7 +357,18 @@ func (h *handler) PauseActivityExecution(ctx context.Context, req *activitypb.Pa } return &activitypb.PauseActivityExecutionResponse{}, nil } - return nil, serviceerror.NewUnimplemented("PauseActivityExecution for standalone activities is not yet implemented") + + ref := chasm.NewComponentRef[*Activity](chasm.ExecutionKey{ + NamespaceID: req.GetNamespaceId(), + BusinessID: frontendReq.GetActivityId(), + RunID: frontendReq.GetRunId(), + }) + + _, _, err := chasm.UpdateComponent(ctx, ref, (*Activity).handlePauseRequested, req) + if err != nil { + return nil, err + } + return &activitypb.PauseActivityExecutionResponse{}, nil } func (h *handler) UnpauseActivityExecution(ctx context.Context, req *activitypb.UnpauseActivityExecutionRequest) (*activitypb.UnpauseActivityExecutionResponse, error) { @@ -383,7 +394,18 @@ func (h *handler) UnpauseActivityExecution(ctx context.Context, req *activitypb. } return &activitypb.UnpauseActivityExecutionResponse{}, nil } - return nil, serviceerror.NewUnimplemented("UnpauseActivityExecution for standalone activities is not yet implemented") + + ref := chasm.NewComponentRef[*Activity](chasm.ExecutionKey{ + NamespaceID: req.GetNamespaceId(), + BusinessID: frontendReq.GetActivityId(), + RunID: frontendReq.GetRunId(), + }) + + _, _, err := chasm.UpdateComponent(ctx, ref, (*Activity).handleUnpauseRequested, req) + if err != nil { + return nil, err + } + return &activitypb.UnpauseActivityExecutionResponse{}, nil } func (h *handler) ResetActivityExecution(ctx context.Context, req *activitypb.ResetActivityExecutionRequest) (*activitypb.ResetActivityExecutionResponse, error) { diff --git a/chasm/lib/activity/proto/v1/activity_state.proto b/chasm/lib/activity/proto/v1/activity_state.proto index 0c516def6e0..6bd801818f5 100644 --- a/chasm/lib/activity/proto/v1/activity_state.proto +++ b/chasm/lib/activity/proto/v1/activity_state.proto @@ -41,6 +41,16 @@ enum ActivityExecutionStatus { // Additionally, after all retries are exhausted for start-to-close or heartbeat timeouts, the activity will also // transition to timed out status. ACTIVITY_EXECUTION_STATUS_TIMED_OUT = 8; + // The activity has been paused while in the SCHEDULED state. No worker will be dispatched until + // the activity is unpaused. The activity's pause_state field is populated with the identity, + // reason, and time of the pause request. + // + // Note: pausing a STARTED activity does not transition to this status. Instead, the pause is + // delivered as a flag (pause_state is set, status stays STARTED) and the worker is notified + // via ActivityPaused=true on its next heartbeat. The external run state in that case is + // PAUSE_REQUESTED. If the worker fails and retries while the flag is set, the retry lands in + // SCHEDULED with pause_state still populated and the dispatch task is blocked until unpause. + ACTIVITY_EXECUTION_STATUS_PAUSED = 9; } message ActivityState { @@ -100,6 +110,9 @@ message ActivityState { // and on each options update that re-schedules the task). Unlike attempt.stamp, this counter // is NOT incremented on retries, because schedule-to-close spans the full activity lifetime. int32 stamp = 14; + + // Set if the activity was paused. + ActivityPauseState pause_state = 15; } message ActivityCancelState { @@ -113,6 +126,12 @@ message ActivityTerminateState { string request_id = 1; } +message ActivityPauseState { + google.protobuf.Timestamp pause_time = 1; + string identity = 2; + string reason = 3; +} + message ActivityAttemptState { // The attempt this activity is currently on. // Incremented each time a new attempt is scheduled. A newly created activity will immediately be scheduled, and diff --git a/chasm/lib/activity/statemachine.go b/chasm/lib/activity/statemachine.go index 58ffd45376f..516b4c6449b 100644 --- a/chasm/lib/activity/statemachine.go +++ b/chasm/lib/activity/statemachine.go @@ -192,6 +192,8 @@ var TransitionCompleted = chasm.NewTransition( activitypb.ACTIVITY_EXECUTION_STATUS_COMPLETED, func(a *Activity, ctx chasm.MutableContext, event completeEvent) error { return a.StoreOrSelf(ctx).RecordCompleted(ctx, func(ctx chasm.MutableContext) error { + a.PauseState = nil + req := event.req.GetCompleteRequest() attempt := a.LastAttempt.Get(ctx) @@ -226,6 +228,7 @@ var TransitionFailed = chasm.NewTransition( func(a *Activity, ctx chasm.MutableContext, event failedEvent) error { return a.StoreOrSelf(ctx).RecordCompleted(ctx, func(ctx chasm.MutableContext) error { req := event.req.GetFailedRequest() + a.PauseState = nil if details := req.GetLastHeartbeatDetails(); details != nil { heartbeat := a.getOrCreateLastHeartbeat(ctx) @@ -258,6 +261,7 @@ var TransitionTerminated = chasm.NewTransition( activitypb.ACTIVITY_EXECUTION_STATUS_SCHEDULED, activitypb.ACTIVITY_EXECUTION_STATUS_STARTED, activitypb.ACTIVITY_EXECUTION_STATUS_CANCEL_REQUESTED, + activitypb.ACTIVITY_EXECUTION_STATUS_PAUSED, }, activitypb.ACTIVITY_EXECUTION_STATUS_TERMINATED, func(a *Activity, ctx chasm.MutableContext, event terminateEvent) error { @@ -265,6 +269,7 @@ var TransitionTerminated = chasm.NewTransition( a.TerminateState = &activitypb.ActivityTerminateState{ RequestId: event.request.RequestID, } + a.PauseState = nil outcome := a.Outcome.Get(ctx) failure := &failurepb.Failure{ Message: event.request.Reason, @@ -288,11 +293,14 @@ var TransitionTerminated = chasm.NewTransition( ) // TransitionCancelRequested transitions to CancelRequested status. +// PAUSED activities (real status, no worker) are cancelled immediately in handleCancellationRequested +// rather than waiting for a worker response. var TransitionCancelRequested = chasm.NewTransition( []activitypb.ActivityExecutionStatus{ activitypb.ACTIVITY_EXECUTION_STATUS_STARTED, activitypb.ACTIVITY_EXECUTION_STATUS_SCHEDULED, activitypb.ACTIVITY_EXECUTION_STATUS_CANCEL_REQUESTED, + activitypb.ACTIVITY_EXECUTION_STATUS_PAUSED, }, activitypb.ACTIVITY_EXECUTION_STATUS_CANCEL_REQUESTED, func(a *Activity, ctx chasm.MutableContext, req *workflowservice.RequestCancelActivityExecutionRequest) error { @@ -336,6 +344,7 @@ var TransitionCanceled = chasm.NewTransition( Failure: failure, }, } + a.PauseState = nil a.emitOnCanceledMetrics(ctx, event.handler, event.fromStatus) @@ -356,6 +365,7 @@ var TransitionTimedOut = chasm.NewTransition( activitypb.ACTIVITY_EXECUTION_STATUS_SCHEDULED, activitypb.ACTIVITY_EXECUTION_STATUS_STARTED, activitypb.ACTIVITY_EXECUTION_STATUS_CANCEL_REQUESTED, + activitypb.ACTIVITY_EXECUTION_STATUS_PAUSED, }, activitypb.ACTIVITY_EXECUTION_STATUS_TIMED_OUT, func(a *Activity, ctx chasm.MutableContext, event timeoutEvent) error { @@ -380,9 +390,52 @@ var TransitionTimedOut = chasm.NewTransition( return err } + a.PauseState = nil + a.emitOnTimedOutMetrics(ctx, event.metricsHandler, timeoutType, event.fromStatus) return nil }) }, ) + +type pauseEvent struct { + req *workflowservice.PauseActivityExecutionRequest + metricsHandler metrics.Handler +} + +// TransitionPaused transitions a SCHEDULED activity to PAUSED status. The stamp is bumped to +// invalidate any pending dispatch task so the activity is not dispatched while paused. +// +// Note: STARTED activities are NOT paused via this transition. Pausing a STARTED activity is a +// flag-only operation (PauseState is set, status stays STARTED) so the worker's token remains +// valid and the worker is notified via ActivityPaused=true on its next heartbeat. See +// handlePauseRequested for the full hybrid logic. +var TransitionPaused = chasm.NewTransition( + []activitypb.ActivityExecutionStatus{ + activitypb.ACTIVITY_EXECUTION_STATUS_SCHEDULED, + }, + activitypb.ACTIVITY_EXECUTION_STATUS_PAUSED, + func(a *Activity, ctx chasm.MutableContext, event pauseEvent) error { + a.pause(ctx, event) + attempt := a.LastAttempt.Get(ctx) + attempt.Stamp++ + return nil + }, +) + +type unpauseEvent struct { + req *workflowservice.UnpauseActivityExecutionRequest + metricsHandler metrics.Handler +} + +var TransitionUnpaused = chasm.NewTransition( + []activitypb.ActivityExecutionStatus{ + activitypb.ACTIVITY_EXECUTION_STATUS_PAUSED, + }, + activitypb.ACTIVITY_EXECUTION_STATUS_SCHEDULED, + func(a *Activity, ctx chasm.MutableContext, event unpauseEvent) error { + a.unpause(ctx, event) + return nil + }, +) diff --git a/chasm/lib/activity/validator.go b/chasm/lib/activity/validator.go index 4d690a818b8..eb78302a1c8 100644 --- a/chasm/lib/activity/validator.go +++ b/chasm/lib/activity/validator.go @@ -569,3 +569,64 @@ func validateTerminateActivityExecutionRequest( return nil } + +func validatePauseActivityExecutionRequest( + req *workflowservice.PauseActivityExecutionRequest, + maxIDLengthLimit int, + blobSizeLimitError dynamicconfig.IntPropertyFnWithNamespaceFilter, + blobSizeLimitWarn dynamicconfig.IntPropertyFnWithNamespaceFilter, + logger log.Logger, +) error { + if req.GetActivityId() == "" { + return serviceerror.NewInvalidArgument("activity ID is required") + } + if len(req.GetActivityId()) > maxIDLengthLimit { + return serviceerror.NewInvalidArgumentf("activity ID exceeds length limit. Length=%d Limit=%d", + len(req.GetActivityId()), maxIDLengthLimit) + } + if len(req.GetIdentity()) > maxIDLengthLimit { + return serviceerror.NewInvalidArgumentf("identity exceeds length limit. Length=%d Limit=%d", + len(req.GetIdentity()), maxIDLengthLimit) + } + if runID := req.GetRunId(); runID != "" { + _, err := uuid.Parse(runID) + if err != nil { + return serviceerror.NewInvalidArgument("invalid run id: must be a valid UUID") + } + } + if err := validateBlobSize( + req.GetActivityId(), + "PauseActivityExecution", + blobSizeLimitError, + blobSizeLimitWarn, + len(req.GetReason()), + logger, + req.GetNamespace()); err != nil { + return serviceerror.NewInvalidArgument("reason exceeds length limit") + } + return nil +} + +func validateUnpauseActivityExecutionRequest( + req *workflowservice.UnpauseActivityExecutionRequest, + maxIDLengthLimit int, +) error { + if req.GetActivityId() == "" { + return serviceerror.NewInvalidArgument("activity ID is required") + } + if len(req.GetActivityId()) > maxIDLengthLimit { + return serviceerror.NewInvalidArgumentf("activity ID exceeds length limit. Length=%d Limit=%d", + len(req.GetActivityId()), maxIDLengthLimit) + } + if len(req.GetIdentity()) > maxIDLengthLimit { + return serviceerror.NewInvalidArgumentf("identity exceeds length limit. Length=%d Limit=%d", + len(req.GetIdentity()), maxIDLengthLimit) + } + if runID := req.GetRunId(); runID != "" { + _, err := uuid.Parse(runID) + if err != nil { + return serviceerror.NewInvalidArgument("invalid run id: must be a valid UUID") + } + } + return nil +} diff --git a/chasm/lib/activity/validator_test.go b/chasm/lib/activity/validator_test.go index 7ebc7415223..b8f62f0ec19 100644 --- a/chasm/lib/activity/validator_test.go +++ b/chasm/lib/activity/validator_test.go @@ -688,27 +688,152 @@ func TestValidateDeleteActivityExecutionRequest(t *testing.T) { }) } -func TestValidateStartDelay(t *testing.T) { - t.Run("NilDuration", func(t *testing.T) { - err := validateStartDelay(nil) +func TestValidatePauseActivityExecutionRequest(t *testing.T) { + t.Run("Success", func(t *testing.T) { + req := &workflowservice.PauseActivityExecutionRequest{ + ActivityId: defaultActivityID, + Identity: "test-identity", + Reason: "test-reason", + } + err := validatePauseActivityExecutionRequest(req, defaultMaxIDLengthLimit, defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, log.NewNoopLogger()) + require.NoError(t, err) + }) + + t.Run("SuccessWithRunID", func(t *testing.T) { + req := &workflowservice.PauseActivityExecutionRequest{ + ActivityId: defaultActivityID, + RunId: "f47ac10b-58cc-4372-a567-0e02b2c3d479", + Identity: "test-identity", + } + err := validatePauseActivityExecutionRequest(req, defaultMaxIDLengthLimit, defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, log.NewNoopLogger()) require.NoError(t, err) }) - t.Run("ZeroDuration", func(t *testing.T) { - err := validateStartDelay(durationpb.New(0)) + t.Run("EmptyActivityID", func(t *testing.T) { + req := &workflowservice.PauseActivityExecutionRequest{ + ActivityId: "", + Identity: "test-identity", + } + err := validatePauseActivityExecutionRequest(req, defaultMaxIDLengthLimit, defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, log.NewNoopLogger()) + var invalidArgErr *serviceerror.InvalidArgument + require.ErrorAs(t, err, &invalidArgErr) + require.Equal(t, "activity ID is required", invalidArgErr.Message) + }) + + t.Run("ActivityIDTooLong", func(t *testing.T) { + req := &workflowservice.PauseActivityExecutionRequest{ + ActivityId: string(make([]byte, defaultMaxIDLengthLimit+1)), + Identity: "test-identity", + } + err := validatePauseActivityExecutionRequest(req, defaultMaxIDLengthLimit, defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, log.NewNoopLogger()) + var invalidArgErr *serviceerror.InvalidArgument + require.ErrorAs(t, err, &invalidArgErr) + require.Equal(t, fmt.Sprintf("activity ID exceeds length limit. Length=%d Limit=%d", + defaultMaxIDLengthLimit+1, defaultMaxIDLengthLimit), invalidArgErr.Message) + }) + + t.Run("IdentityTooLong", func(t *testing.T) { + req := &workflowservice.PauseActivityExecutionRequest{ + ActivityId: defaultActivityID, + Identity: string(make([]byte, defaultMaxIDLengthLimit+1)), + } + err := validatePauseActivityExecutionRequest(req, defaultMaxIDLengthLimit, defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, log.NewNoopLogger()) + var invalidArgErr *serviceerror.InvalidArgument + require.ErrorAs(t, err, &invalidArgErr) + require.Equal(t, fmt.Sprintf("identity exceeds length limit. Length=%d Limit=%d", + defaultMaxIDLengthLimit+1, defaultMaxIDLengthLimit), invalidArgErr.Message) + }) + + t.Run("ReasonTooLong", func(t *testing.T) { + req := &workflowservice.PauseActivityExecutionRequest{ + ActivityId: defaultActivityID, + Identity: "test-identity", + Reason: string(make([]byte, defaultBlobSizeLimitError("default")+1)), + } + err := validatePauseActivityExecutionRequest(req, defaultMaxIDLengthLimit, defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, log.NewNoopLogger()) + var invalidArgErr *serviceerror.InvalidArgument + require.ErrorAs(t, err, &invalidArgErr) + require.Equal(t, "reason exceeds length limit", invalidArgErr.Message) + }) + + t.Run("InvalidRunID", func(t *testing.T) { + req := &workflowservice.PauseActivityExecutionRequest{ + ActivityId: defaultActivityID, + RunId: "not-a-valid-uuid", + Identity: "test-identity", + } + err := validatePauseActivityExecutionRequest(req, defaultMaxIDLengthLimit, defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, log.NewNoopLogger()) + var invalidArgErr *serviceerror.InvalidArgument + require.ErrorAs(t, err, &invalidArgErr) + require.Equal(t, "invalid run id: must be a valid UUID", invalidArgErr.Message) + }) +} + +func TestValidateUnpauseActivityExecutionRequest(t *testing.T) { + t.Run("Success", func(t *testing.T) { + req := &workflowservice.UnpauseActivityExecutionRequest{ + ActivityId: defaultActivityID, + Identity: "test-identity", + } + err := validateUnpauseActivityExecutionRequest(req, defaultMaxIDLengthLimit) require.NoError(t, err) }) - t.Run("ValidDuration", func(t *testing.T) { - err := validateStartDelay(durationpb.New(5 * time.Second)) + t.Run("SuccessWithRunID", func(t *testing.T) { + req := &workflowservice.UnpauseActivityExecutionRequest{ + ActivityId: defaultActivityID, + RunId: "f47ac10b-58cc-4372-a567-0e02b2c3d479", + Identity: "test-identity", + } + err := validateUnpauseActivityExecutionRequest(req, defaultMaxIDLengthLimit) require.NoError(t, err) }) - t.Run("NegativeDuration", func(t *testing.T) { - err := validateStartDelay(durationpb.New(-1 * time.Second)) + t.Run("EmptyActivityID", func(t *testing.T) { + req := &workflowservice.UnpauseActivityExecutionRequest{ + ActivityId: "", + Identity: "test-identity", + } + err := validateUnpauseActivityExecutionRequest(req, defaultMaxIDLengthLimit) + var invalidArgErr *serviceerror.InvalidArgument + require.ErrorAs(t, err, &invalidArgErr) + require.Equal(t, "activity ID is required", invalidArgErr.Message) + }) + + t.Run("ActivityIDTooLong", func(t *testing.T) { + req := &workflowservice.UnpauseActivityExecutionRequest{ + ActivityId: string(make([]byte, defaultMaxIDLengthLimit+1)), + Identity: "test-identity", + } + err := validateUnpauseActivityExecutionRequest(req, defaultMaxIDLengthLimit) + var invalidArgErr *serviceerror.InvalidArgument + require.ErrorAs(t, err, &invalidArgErr) + require.Equal(t, fmt.Sprintf("activity ID exceeds length limit. Length=%d Limit=%d", + defaultMaxIDLengthLimit+1, defaultMaxIDLengthLimit), invalidArgErr.Message) + }) + + t.Run("IdentityTooLong", func(t *testing.T) { + req := &workflowservice.UnpauseActivityExecutionRequest{ + ActivityId: defaultActivityID, + Identity: string(make([]byte, defaultMaxIDLengthLimit+1)), + } + err := validateUnpauseActivityExecutionRequest(req, defaultMaxIDLengthLimit) + var invalidArgErr *serviceerror.InvalidArgument + require.ErrorAs(t, err, &invalidArgErr) + require.Equal(t, fmt.Sprintf("identity exceeds length limit. Length=%d Limit=%d", + defaultMaxIDLengthLimit+1, defaultMaxIDLengthLimit), invalidArgErr.Message) + }) + + t.Run("InvalidRunID", func(t *testing.T) { + req := &workflowservice.UnpauseActivityExecutionRequest{ + ActivityId: defaultActivityID, + RunId: "not-a-valid-uuid", + Identity: "test-identity", + } + err := validateUnpauseActivityExecutionRequest(req, defaultMaxIDLengthLimit) var invalidArgErr *serviceerror.InvalidArgument require.ErrorAs(t, err, &invalidArgErr) - require.Contains(t, invalidArgErr.Message, "invalid StartDelay") + require.Equal(t, "invalid run id: must be a valid UUID", invalidArgErr.Message) }) } diff --git a/common/metrics/metric_defs.go b/common/metrics/metric_defs.go index feae0c8ef7f..99d212fbe7d 100644 --- a/common/metrics/metric_defs.go +++ b/common/metrics/metric_defs.go @@ -348,6 +348,10 @@ const ( HistoryRespondActivityTaskCanceledScope = "RespondActivityTaskCanceled" // ActivityTerminatedScope tracks TerminateActivityExecution API calls received by service ActivityTerminatedScope = "ActivityTerminated" + // ActivityPausedScope tracks PauseActivityExecution API calls received by service + ActivityPausedScope = "ActivityPaused" + // ActivityUnpausedScope tracks UnpauseActivityExecution API calls received by service + ActivityUnpausedScope = "ActivityUnpaused" // HistoryGetWorkflowExecutionHistoryScope is the metric scope for non-long-poll frontend.GetWorkflowExecutionHistory HistoryGetWorkflowExecutionHistoryScope = "GetWorkflowExecutionHistory" // HistoryPollWorkflowExecutionHistoryScope is the metric scope for long poll case of frontend.GetWorkflowExecutionHistory @@ -934,6 +938,8 @@ var ( ActivityCancel = NewCounterDef("activity_cancel", WithDescription("Number of activities that are cancelled.")) ActivityTerminate = NewCounterDef("activity_terminate", WithDescription("Number of activities that are terminated.")) ActivityTaskTimeout = NewCounterDef("activity_task_timeout", WithDescription("Number of activity task timeouts (including retries).")) + ActivityPause = NewCounterDef("activity_pause", WithDescription("Number of activity pauses.")) + ActivityUnpause = NewCounterDef("activity_unpause", WithDescription("Number of activity unpauses.")) ActivityTimeout = NewCounterDef("activity_timeout", WithDescription("Number of terminal activity timeouts.")) ActivityPayloadSize = NewCounterDef("activity_payload_size", WithDescription("Size of activity payloads in bytes.")) AckLevelUpdateCounter = NewCounterDef("ack_level_update") diff --git a/tests/standalone_activity_test.go b/tests/standalone_activity_test.go index 1745f4ef222..1181800ae4f 100644 --- a/tests/standalone_activity_test.go +++ b/tests/standalone_activity_test.go @@ -8,6 +8,7 @@ import ( "time" "github.com/google/go-cmp/cmp" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" activitypb "go.temporal.io/api/activity/v1" commonpb "go.temporal.io/api/common/v1" @@ -6948,10 +6949,121 @@ func (s *standaloneActivityTestSuite) TestCallbacks() { func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { t := s.T() - ctx, cancel := context.WithTimeout(t.Context(), 30*time.Second) - defer cancel() - t.Run("StandaloneActivityReturnsError", func(t *testing.T) { + t.Run("PauseWhileStarted", func(t *testing.T) { + ctx := testcore.NewContext() + activityID := testcore.RandomizeStr(t.Name()) + taskQueue := testcore.RandomizeStr(t.Name()) + + startResp := s.startAndValidateActivity(ctx, t, activityID, taskQueue) + runID := startResp.RunId + + pollResp := s.pollActivityTaskAndValidate(ctx, t, activityID, taskQueue, runID) + + _, err := s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: runID, + Identity: "test-identity", + Reason: "test-pause", + }) + require.NoError(t, err) + + // DescribeActivityExecution should reflect PAUSE_REQUESTED run state: the activity is still + // STARTED (worker token valid) but a pause has been requested via the flag. + descResp, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: runID, + }) + require.NoError(t, err) + require.Equal(t, enumspb.PENDING_ACTIVITY_STATE_PAUSE_REQUESTED, descResp.GetInfo().GetRunState()) + + // Heartbeat should report ActivityPaused=true. + heartbeatResp, err := s.FrontendClient().RecordActivityTaskHeartbeat(ctx, &workflowservice.RecordActivityTaskHeartbeatRequest{ + Namespace: s.Namespace().String(), + TaskToken: pollResp.TaskToken, + }) + require.NoError(t, err) + require.True(t, heartbeatResp.GetActivityPaused(), "expected ActivityPaused=true after pause") + + // DescribeActivityExecution should still reflect PAUSE_REQUESTED. + descResp, err = s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: runID, + }) + require.NoError(t, err) + require.Equal(t, enumspb.PENDING_ACTIVITY_STATE_PAUSE_REQUESTED, descResp.GetInfo().GetRunState()) + }) + + t.Run("PauseWhileScheduled", func(t *testing.T) { + ctx := testcore.NewContext() + activityID := testcore.RandomizeStr(t.Name()) + taskQueue := testcore.RandomizeStr(t.Name()) + + startResp := s.startAndValidateActivity(ctx, t, activityID, taskQueue) + runID := startResp.RunId + + _, err := s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: runID, + Identity: "test-identity", + Reason: "test-pause", + }) + require.NoError(t, err) + + // DescribeActivityExecution should reflect PAUSED run state. + descResp, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: runID, + }) + require.NoError(t, err) + require.Equal(t, enumspb.PENDING_ACTIVITY_STATE_PAUSED, descResp.GetInfo().GetRunState()) + + // Attempt to poll — the dispatch task was invalidated by the stamp bump, so no task should + // be available. Use a short-lived context to avoid blocking the test. + shortCtx, shortCancel := context.WithTimeout(ctx, 2*time.Second) + defer shortCancel() + pollResp, err := s.FrontendClient().PollActivityTaskQueue(shortCtx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + Identity: s.tv.WorkerIdentity(), + }) + // Either the poll times out (deadline exceeded) or returns an empty response. + if err == nil { + require.Empty(t, pollResp.GetActivityId(), "expected no task to be dispatched while paused") + } + }) + + t.Run("PauseIdempotent", func(t *testing.T) { + ctx := testcore.NewContext() + activityID := testcore.RandomizeStr(t.Name()) + taskQueue := testcore.RandomizeStr(t.Name()) + + startResp := s.startAndValidateActivity(ctx, t, activityID, taskQueue) + runID := startResp.RunId + + pauseReq := &workflowservice.PauseActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: runID, + Identity: "test-identity", + Reason: "test-pause", + } + _, err := s.FrontendClient().PauseActivityExecution(ctx, pauseReq) + require.NoError(t, err) + + // Second pause should succeed with no error (idempotent). + _, err = s.FrontendClient().PauseActivityExecution(ctx, pauseReq) + require.NoError(t, err) + }) + + t.Run("PauseNotFound", func(t *testing.T) { + ctx := testcore.NewContext() + _, err := s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ Namespace: s.Namespace().String(), ActivityId: testcore.RandomizeStr(t.Name()), @@ -6959,25 +7071,1404 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { Reason: "test", }) require.Error(t, err) - var unimplementedErr *serviceerror.Unimplemented - require.ErrorAs(t, err, &unimplementedErr) + var notFoundErr *serviceerror.NotFound + require.ErrorAs(t, err, ¬FoundErr) }) -} -func (s *standaloneActivityTestSuite) TestUnpauseActivityExecution() { - t := s.T() - ctx, cancel := context.WithTimeout(t.Context(), 30*time.Second) - defer cancel() + t.Run("PauseTerminalState", func(t *testing.T) { + ctx := testcore.NewContext() + activityID := testcore.RandomizeStr(t.Name()) + taskQueue := testcore.RandomizeStr(t.Name()) - t.Run("StandaloneActivityReturnsError", func(t *testing.T) { - _, err := s.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ + startResp := s.startAndValidateActivity(ctx, t, activityID, taskQueue) + runID := startResp.RunId + + // Poll and complete the activity so it reaches a terminal state. + pollResp := s.pollActivityTaskAndValidate(ctx, t, activityID, taskQueue, runID) + _, err := s.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ + Namespace: s.Namespace().String(), + TaskToken: pollResp.TaskToken, + Identity: s.tv.WorkerIdentity(), + }) + require.NoError(t, err) + + // Pause should fail with FailedPrecondition on a terminal activity. + _, err = s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ Namespace: s.Namespace().String(), - ActivityId: testcore.RandomizeStr(t.Name()), + ActivityId: activityID, + RunId: runID, Identity: "test-identity", + Reason: "test", }) require.Error(t, err) - var unimplementedErr *serviceerror.Unimplemented - require.ErrorAs(t, err, &unimplementedErr) + var failedPreconditionErr *serviceerror.FailedPrecondition + require.ErrorAs(t, err, &failedPreconditionErr) + }) + + // PauseWhileRunning: pause a STARTED activity, fail the attempt, then verify the activity + // stays paused (SCHEDULED + paused = RunState PAUSED) and is not dispatched until unpause. + t.Run("PauseWhileRunning", func(t *testing.T) { + ctx := testcore.NewContext() + activityID := testcore.RandomizeStr(t.Name()) + taskQueue := testcore.RandomizeStr(t.Name()) + + _, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + ActivityType: s.tv.ActivityType(), + Identity: s.tv.WorkerIdentity(), + Input: defaultInput, + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue}, + StartToCloseTimeout: durationpb.New(defaultStartToCloseTimeout), + RequestId: s.tv.RequestID(), + RetryPolicy: &commonpb.RetryPolicy{ + MaximumAttempts: 10, + InitialInterval: durationpb.New(1 * time.Second), + BackoffCoefficient: 1.0, + }, + }) + require.NoError(t, err) + + // Poll – activity is now STARTED at attempt=1. + pollResp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + Identity: s.tv.WorkerIdentity(), + }) + require.NoError(t, err) + require.EqualValues(t, 1, pollResp.Attempt) + + // Pause while STARTED. + _, err = s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + Identity: "test-identity", + Reason: "test-reason", + }) + require.NoError(t, err) + + // Heartbeat should report ActivityPaused=true. + heartbeatResp, err := s.FrontendClient().RecordActivityTaskHeartbeat(ctx, &workflowservice.RecordActivityTaskHeartbeatRequest{ + Namespace: s.Namespace().String(), + TaskToken: pollResp.TaskToken, + }) + require.NoError(t, err) + require.True(t, heartbeatResp.GetActivityPaused()) + + // Describe should show PAUSE_REQUESTED: status is STARTED with PauseState set. + descResp, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + }) + require.NoError(t, err) + require.Equal(t, enumspb.PENDING_ACTIVITY_STATE_PAUSE_REQUESTED, descResp.GetInfo().GetRunState()) + + // Fail the attempt – this triggers a retry (attempt=2) but the activity stays paused. + _, err = s.FrontendClient().RespondActivityTaskFailed(ctx, &workflowservice.RespondActivityTaskFailedRequest{ + Namespace: s.Namespace().String(), + TaskToken: pollResp.TaskToken, + Failure: &failurepb.Failure{ + Message: "retryable failure", + FailureInfo: &failurepb.Failure_ApplicationFailureInfo{ + ApplicationFailureInfo: &failurepb.ApplicationFailureInfo{NonRetryable: false}, + }, + }, + Identity: s.tv.WorkerIdentity(), + }) + require.NoError(t, err) + + // After fail, activity should be PAUSED (SCHEDULED + paused) at attempt=2 with a recorded failure. + require.EventuallyWithT(t, func(c *assert.CollectT) { + dr, dErr := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + }) + assert.NoError(c, dErr) + assert.Equal(c, enumspb.PENDING_ACTIVITY_STATE_PAUSED, dr.GetInfo().GetRunState()) + assert.EqualValues(c, 2, dr.GetInfo().GetAttempt()) + assert.NotNil(c, dr.GetInfo().GetLastFailure()) + }, 10*time.Second, 200*time.Millisecond) + + // Unpause – activity should be dispatched. + _, err = s.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + Identity: "test-identity", + }) + require.NoError(t, err) + + // Poll and complete the second attempt. + poll2Resp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + Identity: s.tv.WorkerIdentity(), + }) + require.NoError(t, err) + require.Equal(t, activityID, poll2Resp.GetActivityId()) + require.EqualValues(t, 2, poll2Resp.Attempt) + }) + + // PauseIncreaseAttemptsOnFailure: verify that attempt count increases and LastFailure is populated + // when an activity fails while paused. + t.Run("PauseIncreaseAttemptsOnFailure", func(t *testing.T) { + ctx := testcore.NewContext() + activityID := testcore.RandomizeStr(t.Name()) + taskQueue := testcore.RandomizeStr(t.Name()) + + _, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + ActivityType: s.tv.ActivityType(), + Identity: s.tv.WorkerIdentity(), + Input: defaultInput, + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue}, + StartToCloseTimeout: durationpb.New(defaultStartToCloseTimeout), + RequestId: s.tv.RequestID(), + RetryPolicy: &commonpb.RetryPolicy{ + MaximumAttempts: 10, + InitialInterval: durationpb.New(1 * time.Second), + BackoffCoefficient: 1.0, + }, + }) + require.NoError(t, err) + + // Poll – attempt=1. + pollResp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + Identity: s.tv.WorkerIdentity(), + }) + require.NoError(t, err) + require.EqualValues(t, 1, pollResp.Attempt) + + // Pause while STARTED. + _, err = s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + Identity: "test-identity", + Reason: "test-reason", + }) + require.NoError(t, err) + + // Describe should show PAUSE_REQUESTED: status is STARTED with PauseState set. + descResp, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + }) + require.NoError(t, err) + require.Equal(t, enumspb.PENDING_ACTIVITY_STATE_PAUSE_REQUESTED, descResp.GetInfo().GetRunState()) + require.EqualValues(t, 1, descResp.GetInfo().GetAttempt()) + + failureMsg := "activity-failed-while-paused" + _, err = s.FrontendClient().RespondActivityTaskFailed(ctx, &workflowservice.RespondActivityTaskFailedRequest{ + Namespace: s.Namespace().String(), + TaskToken: pollResp.TaskToken, + Failure: &failurepb.Failure{ + Message: failureMsg, + FailureInfo: &failurepb.Failure_ApplicationFailureInfo{ + ApplicationFailureInfo: &failurepb.ApplicationFailureInfo{NonRetryable: false}, + }, + }, + Identity: s.tv.WorkerIdentity(), + }) + require.NoError(t, err) + + // Verify attempt is now 2, activity is still paused, and LastFailure is populated. + require.EventuallyWithT(t, func(c *assert.CollectT) { + dr, dErr := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + }) + assert.NoError(c, dErr) + assert.Equal(c, enumspb.PENDING_ACTIVITY_STATE_PAUSED, dr.GetInfo().GetRunState()) + assert.EqualValues(c, 2, dr.GetInfo().GetAttempt()) + assert.Equal(c, failureMsg, dr.GetInfo().GetLastFailure().GetMessage()) + }, 10*time.Second, 200*time.Millisecond) + + // Unpause and complete. + _, err = s.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + Identity: "test-identity", + }) + require.NoError(t, err) + + poll2Resp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + Identity: s.tv.WorkerIdentity(), + }) + require.NoError(t, err) + require.Equal(t, activityID, poll2Resp.GetActivityId()) + require.EqualValues(t, 2, poll2Resp.Attempt) + }) + + // PauseWhileWaiting: pause an activity that has already failed and is waiting in retry backoff + // (SCHEDULED). Verify it is not dispatched while paused, then unpause and poll. + // Uses a 30s retry interval (matching PauseWhileRetryNoWait) to eliminate the race between + // "confirm attempt==2" and "pause call" — with 30s backoff the dispatch task cannot fire in + // the window between those two operations. + t.Run("PauseWhileWaiting", func(t *testing.T) { + ctx := testcore.NewContext() + activityID := testcore.RandomizeStr(t.Name()) + taskQueue := testcore.RandomizeStr(t.Name()) + + _, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + ActivityType: s.tv.ActivityType(), + Identity: s.tv.WorkerIdentity(), + Input: defaultInput, + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue}, + StartToCloseTimeout: durationpb.New(defaultStartToCloseTimeout), + RequestId: s.tv.RequestID(), + RetryPolicy: &commonpb.RetryPolicy{ + MaximumAttempts: 10, + InitialInterval: durationpb.New(30 * time.Second), + BackoffCoefficient: 1.0, + }, + }) + require.NoError(t, err) + + // Poll and fail attempt=1. + pollResp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + Identity: s.tv.WorkerIdentity(), + }) + require.NoError(t, err) + require.EqualValues(t, 1, pollResp.Attempt) + + _, err = s.FrontendClient().RespondActivityTaskFailed(ctx, &workflowservice.RespondActivityTaskFailedRequest{ + Namespace: s.Namespace().String(), + TaskToken: pollResp.TaskToken, + Failure: &failurepb.Failure{ + Message: "retryable failure", + FailureInfo: &failurepb.Failure_ApplicationFailureInfo{ + ApplicationFailureInfo: &failurepb.ApplicationFailureInfo{NonRetryable: false}, + }, + }, + Identity: s.tv.WorkerIdentity(), + }) + require.NoError(t, err) + + // Wait for the activity to be rescheduled at attempt=2 (in retry backoff). + require.Eventually(t, func() bool { + dr, dErr := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + }) + return dErr == nil && dr.GetInfo().GetAttempt() == 2 + }, 10*time.Second, 200*time.Millisecond) + + // Pause while in SCHEDULED retry backoff. + _, err = s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + Identity: "test-identity", + Reason: "test-reason", + }) + require.NoError(t, err) + + // Verify activity is PAUSED at attempt=2 (not dispatched while paused). + require.EventuallyWithT(t, func(c *assert.CollectT) { + dr, dErr := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + }) + assert.NoError(c, dErr) + assert.Equal(c, enumspb.PENDING_ACTIVITY_STATE_PAUSED, dr.GetInfo().GetRunState()) + assert.EqualValues(c, 2, dr.GetInfo().GetAttempt()) + }, 10*time.Second, 200*time.Millisecond) + + // Unpause – activity should be dispatched. + _, err = s.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + Identity: "test-identity", + }) + require.NoError(t, err) + + // Poll and complete attempt=2. + poll2Resp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + Identity: s.tv.WorkerIdentity(), + }) + require.NoError(t, err) + require.Equal(t, activityID, poll2Resp.GetActivityId()) + require.EqualValues(t, 2, poll2Resp.Attempt) + }) + + // PauseWhileRetryNoWait: pause an activity during a long retry backoff (30s), then immediately + // unpause. The activity should be dispatched quickly — well before the 30s retry interval elapses. + t.Run("PauseWhileRetryNoWait", func(t *testing.T) { + ctx := testcore.NewContext() + activityID := testcore.RandomizeStr(t.Name()) + taskQueue := testcore.RandomizeStr(t.Name()) + + _, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + ActivityType: s.tv.ActivityType(), + Identity: s.tv.WorkerIdentity(), + Input: defaultInput, + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue}, + StartToCloseTimeout: durationpb.New(defaultStartToCloseTimeout), + RequestId: s.tv.RequestID(), + RetryPolicy: &commonpb.RetryPolicy{ + MaximumAttempts: 10, + InitialInterval: durationpb.New(30 * time.Second), + BackoffCoefficient: 1.0, + }, + }) + require.NoError(t, err) + + // Poll and fail attempt=1 – activity enters a 30s retry backoff. + pollResp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + Identity: s.tv.WorkerIdentity(), + }) + require.NoError(t, err) + require.EqualValues(t, 1, pollResp.Attempt) + + _, err = s.FrontendClient().RespondActivityTaskFailed(ctx, &workflowservice.RespondActivityTaskFailedRequest{ + Namespace: s.Namespace().String(), + TaskToken: pollResp.TaskToken, + Failure: &failurepb.Failure{ + Message: "retryable failure", + FailureInfo: &failurepb.Failure_ApplicationFailureInfo{ + ApplicationFailureInfo: &failurepb.ApplicationFailureInfo{NonRetryable: false}, + }, + }, + Identity: s.tv.WorkerIdentity(), + }) + require.NoError(t, err) + + // Wait for activity to be rescheduled at attempt=2. + require.Eventually(t, func() bool { + dr, dErr := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + }) + return dErr == nil && dr.GetInfo().GetAttempt() == 2 + }, 10*time.Second, 200*time.Millisecond) + + // Pause, then immediately unpause – this should skip the remaining 30s backoff. + _, err = s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + Identity: "test-identity", + }) + require.NoError(t, err) + + _, err = s.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + Identity: "test-identity", + }) + require.NoError(t, err) + + // Activity should be dispatched quickly (well within the 30s retry backoff window). + poll2Resp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + Identity: s.tv.WorkerIdentity(), + }) + require.NoError(t, err) + require.Equal(t, activityID, poll2Resp.GetActivityId()) + require.EqualValues(t, 2, poll2Resp.Attempt) + }) + + // PauseWhileCancelRequested: pausing a CANCEL_REQUESTED activity must be rejected with + // FailedPrecondition — cancellation takes precedence and cannot be overridden by a pause. + t.Run("PauseWhileCancelRequested", func(t *testing.T) { + ctx := testcore.NewContext() + activityID := testcore.RandomizeStr(t.Name()) + taskQueue := testcore.RandomizeStr(t.Name()) + + startResp := s.startAndValidateActivity(ctx, t, activityID, taskQueue) + runID := startResp.RunId + + // Poll so the activity is STARTED. + s.pollActivityTaskAndValidate(ctx, t, activityID, taskQueue, runID) + + // Request cancellation — activity transitions to CANCEL_REQUESTED. + _, err := s.FrontendClient().RequestCancelActivityExecution(ctx, &workflowservice.RequestCancelActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: runID, + Identity: "test-identity", + Reason: "test-cancel", + RequestId: s.tv.RequestID(), + }) + require.NoError(t, err) + + // Pause must be rejected — cannot pause an activity with a pending cancellation. + _, err = s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: runID, + Identity: "test-identity", + Reason: "test-pause", + }) + require.Error(t, err) + var failedPreconditionErr *serviceerror.FailedPrecondition + require.ErrorAs(t, err, &failedPreconditionErr) + }) + + // CancelWhilePaused: mirrors workflow activity behavior — cancelling a PAUSED activity + // succeeds and takes effect immediately (no worker token is active). + t.Run("CancelWhilePaused", func(t *testing.T) { + ctx := testcore.NewContext() + activityID := testcore.RandomizeStr(t.Name()) + taskQueue := testcore.RandomizeStr(t.Name()) + + startResp := s.startAndValidateActivity(ctx, t, activityID, taskQueue) + runID := startResp.RunId + + // Pause while SCHEDULED → activity becomes PAUSED. + _, err := s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: runID, + Identity: "test-identity", + Reason: "test-pause", + }) + require.NoError(t, err) + + // Cancel should succeed and take effect immediately (no worker to notify). + _, err = s.FrontendClient().RequestCancelActivityExecution(ctx, &workflowservice.RequestCancelActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: runID, + Identity: "test-identity", + Reason: "test-cancel", + RequestId: s.tv.RequestID(), + }) + require.NoError(t, err) + + // Activity should be CANCELED immediately. + require.EventuallyWithT(t, func(c *assert.CollectT) { + dr, dErr := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: runID, + }) + assert.NoError(c, dErr) + assert.Equal(c, enumspb.ACTIVITY_EXECUTION_STATUS_CANCELED, dr.GetInfo().GetStatus()) + }, 10*time.Second, 200*time.Millisecond) + }) + + // TerminateWhilePaused: design doc says PAUSED + terminate → TERMINATED. + t.Run("TerminateWhilePaused", func(t *testing.T) { + ctx := testcore.NewContext() + activityID := testcore.RandomizeStr(t.Name()) + taskQueue := testcore.RandomizeStr(t.Name()) + + startResp := s.startAndValidateActivity(ctx, t, activityID, taskQueue) + runID := startResp.RunId + + // Pause while SCHEDULED → PAUSED. + _, err := s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: runID, + Identity: "test-identity", + Reason: "test-pause", + }) + require.NoError(t, err) + + descResp, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: runID, + }) + require.NoError(t, err) + require.Equal(t, enumspb.PENDING_ACTIVITY_STATE_PAUSED, descResp.GetInfo().GetRunState()) + + // Terminate while PAUSED → TERMINATED. + _, err = s.FrontendClient().TerminateActivityExecution(ctx, &workflowservice.TerminateActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: runID, + Reason: "test-terminate", + Identity: "test-identity", + }) + require.NoError(t, err) + + descResp, err = s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: runID, + }) + require.NoError(t, err) + require.Equal(t, enumspb.ACTIVITY_EXECUTION_STATUS_TERMINATED, descResp.GetInfo().GetStatus()) + }) + + // ScheduleToCloseTimeoutWhilePaused: design doc says PAUSED + S2C timeout → TIMED_OUT. + // The S2C task is created with a.Stamp (not attempt.Stamp), and TransitionPaused only bumps + // attempt.Stamp, so the S2C task remains valid and fires even while the activity is PAUSED. + t.Run("ScheduleToCloseTimeoutWhilePaused", func(t *testing.T) { + ctx, cancel := context.WithTimeout(testcore.NewContext(), 30*time.Second) + defer cancel() + activityID := testcore.RandomizeStr(t.Name()) + taskQueue := testcore.RandomizeStr(t.Name()) + + startResp, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + ActivityType: s.tv.ActivityType(), + Identity: s.tv.WorkerIdentity(), + Input: defaultInput, + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue}, + ScheduleToCloseTimeout: durationpb.New(2 * time.Second), + RequestId: s.tv.RequestID(), + }) + require.NoError(t, err) + runID := startResp.RunId + + // Pause immediately while SCHEDULED — the S2C timer is still running. + _, err = s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: runID, + Identity: "test-identity", + Reason: "test-pause", + }) + require.NoError(t, err) + + // Long-poll for the activity outcome — the S2C timeout fires while the activity is PAUSED. + pollActivityResp, err := s.FrontendClient().PollActivityExecution(ctx, &workflowservice.PollActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: runID, + }) + require.NoError(t, err) + require.Equal(t, enumspb.TIMEOUT_TYPE_SCHEDULE_TO_CLOSE, + pollActivityResp.GetOutcome().GetFailure().GetTimeoutFailureInfo().GetTimeoutType(), + "expected ScheduleToCloseTimeout while paused") + }) + + // CompleteWhileStartedAndPaused: design doc says STARTED + paused + worker completes → COMPLETED. + // The PauseWhileRunning test only covers the fail+retry path; this covers the success path. + t.Run("CompleteWhileStartedAndPaused", func(t *testing.T) { + ctx := testcore.NewContext() + activityID := testcore.RandomizeStr(t.Name()) + taskQueue := testcore.RandomizeStr(t.Name()) + + startResp := s.startAndValidateActivity(ctx, t, activityID, taskQueue) + runID := startResp.RunId + + // Poll → STARTED. + pollResp := s.pollActivityTaskAndValidate(ctx, t, activityID, taskQueue, runID) + + // Pause while STARTED → PAUSE_REQUESTED (flag-only; worker token stays valid). + _, err := s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: runID, + Identity: "test-identity", + Reason: "test-pause", + }) + require.NoError(t, err) + + descResp, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: runID, + }) + require.NoError(t, err) + require.Equal(t, enumspb.PENDING_ACTIVITY_STATE_PAUSE_REQUESTED, descResp.GetInfo().GetRunState()) + + // Worker completes despite the pause flag — pause is advisory, the token is still valid. + _, err = s.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ + Namespace: s.Namespace().String(), + TaskToken: pollResp.TaskToken, + Identity: s.tv.WorkerIdentity(), + }) + require.NoError(t, err) + + // Activity must be COMPLETED. + descResp, err = s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: runID, + }) + require.NoError(t, err) + require.Equal(t, enumspb.ACTIVITY_EXECUTION_STATUS_COMPLETED, descResp.GetInfo().GetStatus()) + }) + + // NonRetryableFailWhilePaused: design doc says STARTED + paused + non-retryable fail → FAILED. + t.Run("NonRetryableFailWhilePaused", func(t *testing.T) { + ctx := testcore.NewContext() + activityID := testcore.RandomizeStr(t.Name()) + taskQueue := testcore.RandomizeStr(t.Name()) + + _, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + ActivityType: s.tv.ActivityType(), + Identity: s.tv.WorkerIdentity(), + Input: defaultInput, + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue}, + StartToCloseTimeout: durationpb.New(defaultStartToCloseTimeout), + RequestId: s.tv.RequestID(), + RetryPolicy: &commonpb.RetryPolicy{ + MaximumAttempts: 10, + }, + }) + require.NoError(t, err) + + // Poll → STARTED. + pollResp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + Identity: s.tv.WorkerIdentity(), + }) + require.NoError(t, err) + require.EqualValues(t, 1, pollResp.Attempt) + + // Pause while STARTED → PAUSE_REQUESTED. + _, err = s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + Identity: "test-identity", + Reason: "test-pause", + }) + require.NoError(t, err) + + // Worker fails with a non-retryable error — must transition to FAILED, not retry or stay paused. + _, err = s.FrontendClient().RespondActivityTaskFailed(ctx, &workflowservice.RespondActivityTaskFailedRequest{ + Namespace: s.Namespace().String(), + TaskToken: pollResp.TaskToken, + Failure: &failurepb.Failure{ + Message: "non-retryable failure", + FailureInfo: &failurepb.Failure_ApplicationFailureInfo{ + ApplicationFailureInfo: &failurepb.ApplicationFailureInfo{NonRetryable: true}, + }, + }, + Identity: s.tv.WorkerIdentity(), + }) + require.NoError(t, err) + + descResp, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + }) + require.NoError(t, err) + require.Equal(t, enumspb.ACTIVITY_EXECUTION_STATUS_FAILED, descResp.GetInfo().GetStatus()) + require.EqualValues(t, 1, descResp.GetInfo().GetAttempt(), "non-retryable fail must not increment attempt") + }) + + // PauseRequestValidation: validate that Pause rejects invalid request fields. + t.Run("PauseRequestValidation", func(t *testing.T) { + ctx := testcore.NewContext() + + t.Run("EmptyActivityID", func(t *testing.T) { + _, err := s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: s.Namespace().String(), + Identity: "test-identity", + Reason: "test-pause", + }) + var invalidArgErr *serviceerror.InvalidArgument + require.ErrorAs(t, err, &invalidArgErr) + require.Equal(t, "activity ID is required", invalidArgErr.Message) + }) + + t.Run("ActivityIDTooLong", func(t *testing.T) { + _, err := s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: string(make([]byte, defaultMaxIDLengthLimit+1)), + Identity: "test-identity", + }) + var invalidArgErr *serviceerror.InvalidArgument + require.ErrorAs(t, err, &invalidArgErr) + require.Equal(t, fmt.Sprintf("activity ID exceeds length limit. Length=%d Limit=%d", + defaultMaxIDLengthLimit+1, defaultMaxIDLengthLimit), invalidArgErr.Message) + }) + + t.Run("IdentityTooLong", func(t *testing.T) { + _, err := s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: testcore.RandomizeStr(t.Name()), + Identity: string(make([]byte, defaultMaxIDLengthLimit+1)), + }) + var invalidArgErr *serviceerror.InvalidArgument + require.ErrorAs(t, err, &invalidArgErr) + require.Equal(t, fmt.Sprintf("identity exceeds length limit. Length=%d Limit=%d", + defaultMaxIDLengthLimit+1, defaultMaxIDLengthLimit), invalidArgErr.Message) + }) + + t.Run("ReasonTooLong", func(t *testing.T) { + blobSizeLimitError := 1000 + cleanup := s.OverrideDynamicConfig( + dynamicconfig.BlobSizeLimitError, + blobSizeLimitError, + ) + defer cleanup() + + _, err := s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: testcore.RandomizeStr(t.Name()), + Identity: "test-identity", + Reason: string(make([]byte, blobSizeLimitError+1)), + }) + var invalidArgErr *serviceerror.InvalidArgument + require.ErrorAs(t, err, &invalidArgErr) + require.Equal(t, "reason exceeds length limit", invalidArgErr.Message) + }) + + t.Run("InvalidRunID", func(t *testing.T) { + _, err := s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: testcore.RandomizeStr(t.Name()), + RunId: "invalid-run-id", + Identity: "test-identity", + }) + var invalidArgErr *serviceerror.InvalidArgument + require.ErrorAs(t, err, &invalidArgErr) + require.Equal(t, "invalid run id: must be a valid UUID", invalidArgErr.Message) + }) + }) + + // PauseUpdateOptionsAndUnpause: pause an activity while it's in retry backoff, update the + // retry interval while paused, then unpause and verify the update took effect — the activity + // is dispatched immediately (short interval) and runs to completion. + t.Run("PauseUpdateOptionsAndUnpause", func(t *testing.T) { + ctx, cancel := context.WithTimeout(t.Context(), 30*time.Second) + defer cancel() + + activityID := testcore.RandomizeStr(t.Name()) + taskQueue := testcore.RandomizeStr(t.Name()) + + startResp, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + ActivityType: s.tv.ActivityType(), + Identity: s.tv.WorkerIdentity(), + Input: defaultInput, + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue}, + StartToCloseTimeout: durationpb.New(defaultStartToCloseTimeout), + RequestId: s.tv.RequestID(), + RetryPolicy: &commonpb.RetryPolicy{ + MaximumAttempts: 10, + InitialInterval: durationpb.New(10 * time.Minute), + BackoffCoefficient: 1.0, + }, + }) + require.NoError(t, err) + + // Poll and fail attempt 1 — activity enters the 10-minute retry backoff. + pollResp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + Identity: s.tv.WorkerIdentity(), + }) + require.NoError(t, err) + require.EqualValues(t, 1, pollResp.Attempt) + + _, err = s.FrontendClient().RespondActivityTaskFailed(ctx, &workflowservice.RespondActivityTaskFailedRequest{ + Namespace: s.Namespace().String(), + TaskToken: pollResp.TaskToken, + Failure: &failurepb.Failure{ + Message: "retryable failure", + FailureInfo: &failurepb.Failure_ApplicationFailureInfo{ + ApplicationFailureInfo: &failurepb.ApplicationFailureInfo{NonRetryable: false}, + }, + }, + Identity: s.tv.WorkerIdentity(), + }) + require.NoError(t, err) + + // Wait for attempt 2 (rescheduled in long backoff, not yet dispatched). + require.Eventually(t, func() bool { + dr, dErr := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + }) + return dErr == nil && dr.GetInfo().GetAttempt() == 2 + }, 10*time.Second, 200*time.Millisecond) + + // Pause while SCHEDULED (in 10-minute retry backoff). + _, err = s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + Identity: "test-identity", + Reason: "test-pause", + }) + require.NoError(t, err) + + // Update retry interval to 1ms while paused. + _, err = s.FrontendClient().UpdateActivityExecutionOptions(ctx, &workflowservice.UpdateActivityExecutionOptionsRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: startResp.RunId, + ActivityOptions: &activitypb.ActivityOptions{ + RetryPolicy: &commonpb.RetryPolicy{ + InitialInterval: durationpb.New(1 * time.Millisecond), + }, + }, + UpdateMask: &fieldmaskpb.FieldMask{Paths: []string{"retry_policy.initial_interval"}}, + }) + require.NoError(t, err) + + // Verify the update was persisted while the activity remains paused. + descResp, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + }) + require.NoError(t, err) + require.Equal(t, enumspb.PENDING_ACTIVITY_STATE_PAUSED, descResp.GetInfo().GetRunState()) + require.Equal(t, durationpb.New(1*time.Millisecond), descResp.GetInfo().GetRetryPolicy().GetInitialInterval()) + + // Unpause — the shortened interval means the activity is dispatched immediately. + _, err = s.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + Identity: "test-identity", + }) + require.NoError(t, err) + + // Poll attempt 2 — available immediately because the retry interval is now 1ms. + poll2Resp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + Identity: s.tv.WorkerIdentity(), + }) + require.NoError(t, err) + require.Equal(t, activityID, poll2Resp.GetActivityId()) + require.EqualValues(t, 2, poll2Resp.Attempt) + + // Complete the activity. + _, err = s.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ + Namespace: s.Namespace().String(), + TaskToken: poll2Resp.TaskToken, + Identity: s.tv.WorkerIdentity(), + }) + require.NoError(t, err) + + // Verify terminal COMPLETED state with updated retry policy. + descResp, err = s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + }) + require.NoError(t, err) + require.Equal(t, enumspb.ACTIVITY_EXECUTION_STATUS_COMPLETED, descResp.GetInfo().GetStatus()) + require.Equal(t, durationpb.New(1*time.Millisecond), descResp.GetInfo().GetRetryPolicy().GetInitialInterval()) + }) +} + +func (s *standaloneActivityTestSuite) TestUnpauseActivityExecution() { + t := s.T() + + t.Run("UnpauseWhileScheduled", func(t *testing.T) { + ctx := testcore.NewContext() + activityID := testcore.RandomizeStr(t.Name()) + taskQueue := testcore.RandomizeStr(t.Name()) + + startResp := s.startAndValidateActivity(ctx, t, activityID, taskQueue) + runID := startResp.RunId + + // Pause while SCHEDULED. + _, err := s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: runID, + Identity: "test-identity", + Reason: "test-pause", + }) + require.NoError(t, err) + + // Unpause — this should re-dispatch the activity. + _, err = s.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: runID, + Identity: "test-identity", + }) + require.NoError(t, err) + + // Poll should now succeed. + pollResp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + Identity: s.tv.WorkerIdentity(), + }) + require.NoError(t, err) + require.Equal(t, activityID, pollResp.GetActivityId(), "expected activity to be dispatched after unpause") + }) + + t.Run("UnpauseWhileStarted", func(t *testing.T) { + ctx := testcore.NewContext() + + activityID := testcore.RandomizeStr(t.Name()) + taskQueue := testcore.RandomizeStr(t.Name()) + + startResp := s.startAndValidateActivity(ctx, t, activityID, taskQueue) + runID := startResp.RunId + + pollResp := s.pollActivityTaskAndValidate(ctx, t, activityID, taskQueue, runID) + + // Pause while STARTED. + _, err := s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: runID, + Identity: "test-identity", + Reason: "test-pause", + }) + require.NoError(t, err) + + heartbeatResp, err := s.FrontendClient().RecordActivityTaskHeartbeat(ctx, &workflowservice.RecordActivityTaskHeartbeatRequest{ + Namespace: s.Namespace().String(), + TaskToken: pollResp.TaskToken, + }) + require.NoError(t, err) + require.True(t, heartbeatResp.GetActivityPaused(), "expected ActivityPaused=true after pause") + + // Unpause. + _, err = s.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: runID, + Identity: "test-identity", + }) + require.NoError(t, err) + + // After unpause of a STARTED+PauseState activity, the status stays STARTED (the worker's + // token is still valid — no stamp bump). Verify via describe that the activity is no longer paused. + descResp, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: runID, + }) + require.NoError(t, err) + require.Equal(t, enumspb.PENDING_ACTIVITY_STATE_STARTED, descResp.GetInfo().GetRunState(), + "expected activity to be STARTED after unpause") + }) + + t.Run("UnpauseIdempotent", func(t *testing.T) { + ctx := testcore.NewContext() + + activityID := testcore.RandomizeStr(t.Name()) + taskQueue := testcore.RandomizeStr(t.Name()) + + startResp := s.startAndValidateActivity(ctx, t, activityID, taskQueue) + runID := startResp.RunId + + // Unpause a non-paused activity — should succeed with no error. + _, err := s.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: runID, + Identity: "test-identity", + }) + require.NoError(t, err) + }) + + t.Run("UnpauseWithResetAttempts", func(t *testing.T) { + ctx := testcore.NewContext() + + activityID := testcore.RandomizeStr(t.Name()) + taskQueue := testcore.RandomizeStr(t.Name()) + + _, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + ActivityType: s.tv.ActivityType(), + Identity: s.tv.WorkerIdentity(), + Input: defaultInput, + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue}, + StartToCloseTimeout: durationpb.New(defaultStartToCloseTimeout), + RequestId: s.tv.RequestID(), + RetryPolicy: &commonpb.RetryPolicy{ + MaximumAttempts: 10, + InitialInterval: durationpb.New(1 * time.Second), + BackoffCoefficient: 1.0, + }, + }) + require.NoError(t, err) + + // Poll and fail the first attempt to advance the attempt count. + pollResp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + Identity: s.tv.WorkerIdentity(), + }) + require.NoError(t, err) + require.EqualValues(t, 1, pollResp.Attempt) + + _, err = s.FrontendClient().RespondActivityTaskFailed(ctx, &workflowservice.RespondActivityTaskFailedRequest{ + Namespace: s.Namespace().String(), + TaskToken: pollResp.TaskToken, + Failure: &failurepb.Failure{ + Message: "retryable failure", + FailureInfo: &failurepb.Failure_ApplicationFailureInfo{ + ApplicationFailureInfo: &failurepb.ApplicationFailureInfo{NonRetryable: false}, + }, + }, + Identity: s.tv.WorkerIdentity(), + }) + require.NoError(t, err) + + // Wait for the activity to enter SCHEDULED state for retry. + require.Eventually(t, func() bool { + descResp, descErr := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + }) + if descErr != nil { + return false + } + return descResp.GetInfo().GetAttempt() == 2 + }, 15*time.Second, 200*time.Millisecond) + + // Pause while SCHEDULED (attempt=2). + _, err = s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + Identity: "test-identity", + Reason: "test-pause", + }) + require.NoError(t, err) + + // Unpause with ResetAttempts=true. + _, err = s.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + Identity: "test-identity", + ResetAttempts: true, + }) + require.NoError(t, err) + + // Poll — attempt count should be reset to 1. + poll2Resp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + Identity: s.tv.WorkerIdentity(), + }) + require.NoError(t, err) + require.EqualValues(t, 1, poll2Resp.Attempt, "expected attempt reset to 1 after UnpauseWithResetAttempts") + }) + + t.Run("UnpauseWithJitter", func(t *testing.T) { + ctx := testcore.NewContext() + + activityID := testcore.RandomizeStr(t.Name()) + taskQueue := testcore.RandomizeStr(t.Name()) + + startResp := s.startAndValidateActivity(ctx, t, activityID, taskQueue) + runID := startResp.RunId + + _, err := s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: runID, + Identity: "test-identity", + Reason: "test-pause", + }) + require.NoError(t, err) + + _, err = s.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: runID, + Identity: "test-identity", + Jitter: durationpb.New(500 * time.Millisecond), + }) + require.NoError(t, err) + + // Activity should eventually be dispatched despite the jitter. + pollResp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + Identity: s.tv.WorkerIdentity(), + }) + require.NoError(t, err) + require.Equal(t, activityID, pollResp.GetActivityId()) + }) + + t.Run("UnpauseNotFound", func(t *testing.T) { + ctx := testcore.NewContext() + + _, err := s.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: testcore.RandomizeStr(t.Name()), + Identity: "test-identity", + }) + require.Error(t, err) + var notFoundErr *serviceerror.NotFound + require.ErrorAs(t, err, ¬FoundErr) + }) + + t.Run("UnpauseTerminalState", func(t *testing.T) { + ctx := testcore.NewContext() + activityID := testcore.RandomizeStr(t.Name()) + taskQueue := testcore.RandomizeStr(t.Name()) + + startResp := s.startAndValidateActivity(ctx, t, activityID, taskQueue) + runID := startResp.RunId + + // Poll and complete the activity so it reaches a terminal state. + pollResp := s.pollActivityTaskAndValidate(ctx, t, activityID, taskQueue, runID) + _, err := s.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ + Namespace: s.Namespace().String(), + TaskToken: pollResp.TaskToken, + Identity: s.tv.WorkerIdentity(), + }) + require.NoError(t, err) + + // Unpause should fail with FailedPrecondition on a terminal activity. + _, err = s.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: runID, + Identity: "test-identity", + }) + require.Error(t, err) + var failedPreconditionErr *serviceerror.FailedPrecondition + require.ErrorAs(t, err, &failedPreconditionErr) + }) + + // UnpauseWithResetHeartbeat: verify that unpause with reset_heartbeat=true clears heartbeat + // details recorded during a prior attempt. + t.Run("UnpauseWithResetHeartbeat", func(t *testing.T) { + ctx := testcore.NewContext() + activityID := testcore.RandomizeStr(t.Name()) + taskQueue := testcore.RandomizeStr(t.Name()) + + _, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + ActivityType: s.tv.ActivityType(), + Identity: s.tv.WorkerIdentity(), + Input: defaultInput, + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue}, + StartToCloseTimeout: durationpb.New(defaultStartToCloseTimeout), + RequestId: s.tv.RequestID(), + RetryPolicy: &commonpb.RetryPolicy{ + MaximumAttempts: 10, + InitialInterval: durationpb.New(30 * time.Second), + BackoffCoefficient: 1.0, + }, + }) + require.NoError(t, err) + + // Poll attempt 1. + pollResp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + Identity: s.tv.WorkerIdentity(), + }) + require.NoError(t, err) + require.EqualValues(t, 1, pollResp.Attempt) + + // Record a heartbeat with details. + _, err = s.FrontendClient().RecordActivityTaskHeartbeat(ctx, &workflowservice.RecordActivityTaskHeartbeatRequest{ + Namespace: s.Namespace().String(), + TaskToken: pollResp.TaskToken, + Details: defaultHeartbeatDetails, + }) + require.NoError(t, err) + + // Fail attempt 1 — activity enters 30s retry backoff at attempt 2. + _, err = s.FrontendClient().RespondActivityTaskFailed(ctx, &workflowservice.RespondActivityTaskFailedRequest{ + Namespace: s.Namespace().String(), + TaskToken: pollResp.TaskToken, + Failure: &failurepb.Failure{ + Message: "retryable failure", + FailureInfo: &failurepb.Failure_ApplicationFailureInfo{ + ApplicationFailureInfo: &failurepb.ApplicationFailureInfo{NonRetryable: false}, + }, + }, + Identity: s.tv.WorkerIdentity(), + }) + require.NoError(t, err) + + // Wait for attempt 2 (count increments immediately on reschedule, even during backoff). + require.Eventually(t, func() bool { + dr, dErr := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + }) + return dErr == nil && dr.GetInfo().GetAttempt() == 2 + }, 10*time.Second, 200*time.Millisecond) + + // Heartbeat details should still be set before the unpause. + descResp, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + }) + require.NoError(t, err) + require.NotNil(t, descResp.GetInfo().GetLastHeartbeatTime(), "expected heartbeat time before unpause") + require.NotNil(t, descResp.GetInfo().GetHeartbeatDetails(), "expected heartbeat details before unpause") + + // Pause while SCHEDULED (in 30s backoff). + _, err = s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + Identity: "test-identity", + Reason: "test-pause", + }) + require.NoError(t, err) + + // Unpause with ResetHeartbeat=true — clears the recorded heartbeat state. + _, err = s.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + Identity: "test-identity", + ResetHeartbeat: true, + }) + require.NoError(t, err) + + // Heartbeat details must be cleared after unpause. + descResp, err = s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + }) + require.NoError(t, err) + require.Nil(t, descResp.GetInfo().GetLastHeartbeatTime(), + "expected heartbeat time cleared after UnpauseWithResetHeartbeat") + require.Nil(t, descResp.GetInfo().GetHeartbeatDetails(), + "expected heartbeat details cleared after UnpauseWithResetHeartbeat") + + // Poll attempt 2 — heartbeat details must be nil in the poll response too. + poll2Resp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + Identity: s.tv.WorkerIdentity(), + }) + require.NoError(t, err) + require.Equal(t, activityID, poll2Resp.GetActivityId()) + require.Nil(t, poll2Resp.GetHeartbeatDetails(), "expected nil heartbeat details in poll after reset") + }) + + // UnpauseWhileCancelRequested: CANCEL_REQUESTED+PauseState is reached by pausing while STARTED + // (flag-only, PauseState set) and then cancelling (status → CANCEL_REQUESTED, PauseState stays). + // Pausing directly on a CANCEL_REQUESTED activity is now rejected (FailedPrecondition), so this + // is the only valid path into this state. Unpause must be a no-op — cancel takes precedence and + // the activity must not be re-dispatched. + t.Run("UnpauseWhileCancelRequested", func(t *testing.T) { + ctx := testcore.NewContext() + activityID := testcore.RandomizeStr(t.Name()) + taskQueue := testcore.RandomizeStr(t.Name()) + + startResp := s.startAndValidateActivity(ctx, t, activityID, taskQueue) + runID := startResp.RunId + + // Poll → STARTED. + pollResp := s.pollActivityTaskAndValidate(ctx, t, activityID, taskQueue, runID) + + // Pause while STARTED → flag-only (PauseState set, status stays STARTED). + _, err := s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: runID, + Identity: "test-identity", + Reason: "test-pause", + }) + require.NoError(t, err) + + // Cancel → CANCEL_REQUESTED. PauseState remains set from the prior pause. + _, err = s.FrontendClient().RequestCancelActivityExecution(ctx, &workflowservice.RequestCancelActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: runID, + Identity: "test-identity", + Reason: "test-cancel", + RequestId: s.tv.RequestID(), + }) + require.NoError(t, err) + + // Confirm both flags are set via heartbeat. + hbResp, err := s.FrontendClient().RecordActivityTaskHeartbeat(ctx, &workflowservice.RecordActivityTaskHeartbeatRequest{ + Namespace: s.Namespace().String(), + TaskToken: pollResp.TaskToken, + }) + require.NoError(t, err) + require.True(t, hbResp.GetCancelRequested()) + require.True(t, hbResp.GetActivityPaused()) + + // Unpause — must be a no-op: status stays CANCEL_REQUESTED, no new dispatch task. + _, err = s.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: runID, + Identity: "test-identity", + }) + require.NoError(t, err) + + // RunState must still be CANCEL_REQUESTED after the unpause. + descResp, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: runID, + }) + require.NoError(t, err) + require.Equal(t, enumspb.PENDING_ACTIVITY_STATE_CANCEL_REQUESTED, descResp.GetInfo().GetRunState(), + "unpause of a CANCEL_REQUESTED activity must be a no-op") + + // Heartbeat must show CancelRequested=true, ActivityPaused=false after the unpause. + hbResp2, err := s.FrontendClient().RecordActivityTaskHeartbeat(ctx, &workflowservice.RecordActivityTaskHeartbeatRequest{ + Namespace: s.Namespace().String(), + TaskToken: pollResp.TaskToken, + }) + require.NoError(t, err) + require.True(t, hbResp2.GetCancelRequested(), "cancel must remain after unpause") + require.False(t, hbResp2.GetActivityPaused(), "pause flag must be cleared after unpause") + }) + + // UnpauseRequestValidation: validate that Unpause rejects invalid request fields. + t.Run("UnpauseRequestValidation", func(t *testing.T) { + ctx := testcore.NewContext() + + t.Run("EmptyActivityID", func(t *testing.T) { + _, err := s.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ + Namespace: s.Namespace().String(), + Identity: "test-identity", + }) + var invalidArgErr *serviceerror.InvalidArgument + require.ErrorAs(t, err, &invalidArgErr) + require.Equal(t, "activity ID is required", invalidArgErr.Message) + }) + + t.Run("ActivityIDTooLong", func(t *testing.T) { + _, err := s.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: string(make([]byte, defaultMaxIDLengthLimit+1)), + Identity: "test-identity", + }) + var invalidArgErr *serviceerror.InvalidArgument + require.ErrorAs(t, err, &invalidArgErr) + require.Equal(t, fmt.Sprintf("activity ID exceeds length limit. Length=%d Limit=%d", + defaultMaxIDLengthLimit+1, defaultMaxIDLengthLimit), invalidArgErr.Message) + }) + + t.Run("IdentityTooLong", func(t *testing.T) { + _, err := s.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: testcore.RandomizeStr(t.Name()), + Identity: string(make([]byte, defaultMaxIDLengthLimit+1)), + }) + var invalidArgErr *serviceerror.InvalidArgument + require.ErrorAs(t, err, &invalidArgErr) + require.Equal(t, fmt.Sprintf("identity exceeds length limit. Length=%d Limit=%d", + defaultMaxIDLengthLimit+1, defaultMaxIDLengthLimit), invalidArgErr.Message) + }) + + t.Run("InvalidRunID", func(t *testing.T) { + _, err := s.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: testcore.RandomizeStr(t.Name()), + RunId: "invalid-run-id", + Identity: "test-identity", + }) + var invalidArgErr *serviceerror.InvalidArgument + require.ErrorAs(t, err, &invalidArgErr) + require.Equal(t, "invalid run id: must be a valid UUID", invalidArgErr.Message) + }) }) } From a756c374f503d85d3854ca9603d56ec3fdd53d82 Mon Sep 17 00:00:00 2001 From: Sean Kane Date: Wed, 22 Apr 2026 16:09:44 -0400 Subject: [PATCH 04/25] fixing feature branch with new validator functions (#10025) ## What changed? Fixing a bad rebase by (1) adding `validateAndNormalizeStartActivityExecutionRequest` function, (2) correcting the validateXXX functions, and (3) running `make fmt` to format the proto files. ## Why? Fixes bad rebase and unblocks #10001 and #9852 ## How did you test it? - [ ] built - [ ] run locally and tested manually - [ ] covered by existing tests - [ ] added new unit test(s) - [ ] added new functional test(s) ## Potential risks NA --- chasm/lib/activity/frontend.go | 64 +++- chasm/lib/activity/frontend_test.go | 4 +- .../activity/gen/activitypb/v1/service.pb.go | 2 +- .../activity/proto/v1/activity_state.proto | 318 +++++++++--------- .../activity/proto/v1/request_response.proto | 70 ++-- chasm/lib/activity/proto/v1/service.proto | 110 +++--- chasm/lib/activity/proto/v1/tasks.proto | 24 +- chasm/lib/activity/validator.go | 29 +- chasm/lib/activity/validator_test.go | 44 +-- .../gen/schedulerpb/v1/service.pb.go | 18 +- chasm/lib/tests/gen/testspb/v1/service.pb.go | 2 +- common/api/metadata.go | 1 - .../server/api/routing/v1/extension.proto | 22 +- tests/standalone_activity_test.go | 11 +- 14 files changed, 378 insertions(+), 341 deletions(-) diff --git a/chasm/lib/activity/frontend.go b/chasm/lib/activity/frontend.go index 96861552ad7..da0210f0df4 100644 --- a/chasm/lib/activity/frontend.go +++ b/chasm/lib/activity/frontend.go @@ -122,7 +122,7 @@ func (h *frontendHandler) DescribeActivityExecution( return nil, ErrStandaloneActivityDisabled } - err := validateDescribeActivityExecutionRequest( + err := validateAndNormalizeDescribeActivityExecutionRequest( req, h.config.MaxIDLengthLimit(), ) @@ -151,7 +151,7 @@ func (h *frontendHandler) PollActivityExecution( return nil, ErrStandaloneActivityDisabled } - err := validatePollActivityExecutionRequest( + err := validateAndNormalizePollActivityExecutionRequest( req, h.config.MaxIDLengthLimit(), ) @@ -266,7 +266,7 @@ func (h *frontendHandler) DeleteActivityExecution( return nil, ErrStandaloneActivityDisabled } - if err := validateAndNormalizeDeleteRequest(req, h.config.MaxIDLengthLimit()); err != nil { + if err := validateAndNormalizeDeleteActivityExecutionRequest(req, h.config.MaxIDLengthLimit()); err != nil { return nil, err } @@ -301,7 +301,7 @@ func (h *frontendHandler) TerminateActivityExecution( return nil, err } - if err := validateAndNormalizeTerminateRequest( + if err := validateAndNormalizeTerminateActivityExecutionRequest( req, h.config.MaxIDLengthLimit(), h.config.BlobSizeLimitError, @@ -334,7 +334,7 @@ func (h *frontendHandler) RequestCancelActivityExecution( return nil, err } - if err := validateAndNormalizeCancelRequest( + if err := validateAndNormalizeRequestCancelActivityExecutionRequest( req, h.config.MaxIDLengthLimit(), h.config.BlobSizeLimitError, @@ -396,16 +396,7 @@ func (h *frontendHandler) validateAndPopulateStartRequest( } applyActivityOptionsToStartRequest(opts, req) - err = validateAndNormalizeStartRequest( - req, - h.config.MaxIDLengthLimit(), - h.config.BlobSizeLimitError, - h.config.BlobSizeLimitWarn, - h.logger, - h.saMapperProvider, - h.saValidator, - ) - if err != nil { + if err = h.validateAndNormalizeStartActivityExecutionRequest(req); err != nil { return nil, err } @@ -418,6 +409,43 @@ func (h *frontendHandler) validateAndPopulateStartRequest( return req, nil } +func (h *frontendHandler) validateAndNormalizeStartActivityExecutionRequest( + req *workflowservice.StartActivityExecutionRequest, +) error { + maxIDLengthLimit := h.config.MaxIDLengthLimit() + + if len(req.GetRequestId()) > maxIDLengthLimit { + return serviceerror.NewInvalidArgumentf("request ID exceeds length limit. Length=%d Limit=%d", + len(req.GetRequestId()), maxIDLengthLimit) + } + if len(req.GetIdentity()) > maxIDLengthLimit { + return serviceerror.NewInvalidArgumentf("identity exceeds length limit. Length=%d Limit=%d", + len(req.GetIdentity()), maxIDLengthLimit) + } + if err := normalizeAndValidateIDPolicy(req); err != nil { + return err + } + if err := validateBlobSize( + req.GetActivityId(), + "StartActivityExecution", + h.config.BlobSizeLimitError, + h.config.BlobSizeLimitWarn, + req.Input.Size(), + h.logger, + req.GetNamespace()); err != nil { + return serviceerror.NewInvalidArgument("input exceeds length limit") + } + if req.GetSearchAttributes() != nil { + if err := validateAndNormalizeSearchAttributes( + req, + h.saMapperProvider, + h.saValidator); err != nil { + return err + } + } + return nil +} + // activityOptionsFromStartRequest builds an ActivityOptions from the inlined fields // of a StartActivityExecutionRequest for use with shared validation logic. func activityOptionsFromStartRequest(req *workflowservice.StartActivityExecutionRequest) *apiactivitypb.ActivityOptions { @@ -439,7 +467,7 @@ func (h *frontendHandler) PauseActivityExecution( return nil, ErrStandaloneActivityDisabled } - if err := validatePauseActivityExecutionRequest( + if err := validateAndNormalizePauseActivityExecutionRequest( req, h.config.MaxIDLengthLimit(), h.config.BlobSizeLimitError, @@ -471,7 +499,7 @@ func (h *frontendHandler) UnpauseActivityExecution( return nil, ErrStandaloneActivityDisabled } - if err := validateUnpauseActivityExecutionRequest(req, h.config.MaxIDLengthLimit()); err != nil { + if err := validateAndNormalizeUnpauseActivityExecutionRequest(req, h.config.MaxIDLengthLimit()); err != nil { return nil, err } @@ -524,7 +552,7 @@ func (h *frontendHandler) UpdateActivityExecutionOptions( return nil, ErrStandaloneActivityDisabled } - if err := validateUpdateActivityExecutionOptionsRequest( + if err := validateAndNormalizeUpdateActivityExecutionOptionsRequest( req, h.config.MaxIDLengthLimit(), ); err != nil { diff --git a/chasm/lib/activity/frontend_test.go b/chasm/lib/activity/frontend_test.go index e692de13b84..648ce3b52e8 100644 --- a/chasm/lib/activity/frontend_test.go +++ b/chasm/lib/activity/frontend_test.go @@ -84,7 +84,7 @@ func TestRequestIdStableAcrossRetries(t *testing.T) { ActivityId: "test-activity", } validateTwice(t, req, func() error { - return validateAndNormalizeTerminateRequest( + return validateAndNormalizeTerminateActivityExecutionRequest( req, defaultMaxIDLengthLimit, defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, log.NewNoopLogger()) }) }) @@ -95,7 +95,7 @@ func TestRequestIdStableAcrossRetries(t *testing.T) { ActivityId: "test-activity", } validateTwice(t, req, func() error { - return validateAndNormalizeCancelRequest( + return validateAndNormalizeRequestCancelActivityExecutionRequest( req, defaultMaxIDLengthLimit, defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, log.NewNoopLogger()) }) }) diff --git a/chasm/lib/activity/gen/activitypb/v1/service.pb.go b/chasm/lib/activity/gen/activitypb/v1/service.pb.go index 34266e9af32..0f94b37263c 100644 --- a/chasm/lib/activity/gen/activitypb/v1/service.pb.go +++ b/chasm/lib/activity/gen/activitypb/v1/service.pb.go @@ -27,7 +27,7 @@ var File_temporal_server_chasm_lib_activity_proto_v1_service_proto protoreflect. const file_temporal_server_chasm_lib_activity_proto_v1_service_proto_rawDesc = "" + "\n" + - "9temporal/server/chasm/lib/activity/proto/v1/service.proto\x12+temporal.server.chasm.lib.activity.proto.v1\x1aBtemporal/server/chasm/lib/activity/proto/v1/request_response.proto\x1a.temporal/server/api/routing/v1/extension.proto\x1a0temporal/server/api/common/v1/api_category.proto2\x80\x13\n" + + "9temporal/server/chasm/lib/activity/proto/v1/service.proto\x12+temporal.server.chasm.lib.activity.proto.v1\x1aBtemporal/server/chasm/lib/activity/proto/v1/request_response.proto\x1a0temporal/server/api/common/v1/api_category.proto\x1a.temporal/server/api/routing/v1/extension.proto2\x80\x13\n" + "\x0fActivityService\x12\xdb\x01\n" + "\x16StartActivityExecution\x12J.temporal.server.chasm.lib.activity.proto.v1.StartActivityExecutionRequest\x1aK.temporal.server.chasm.lib.activity.proto.v1.StartActivityExecutionResponse\"(\x92\xc4\x03\x1e\x1a\x1cfrontend_request.activity_id\x8a\xb5\x18\x02\b\x01\x12\xe4\x01\n" + "\x19DescribeActivityExecution\x12M.temporal.server.chasm.lib.activity.proto.v1.DescribeActivityExecutionRequest\x1aN.temporal.server.chasm.lib.activity.proto.v1.DescribeActivityExecutionResponse\"(\x92\xc4\x03\x1e\x1a\x1cfrontend_request.activity_id\x8a\xb5\x18\x02\b\x01\x12\xd8\x01\n" + diff --git a/chasm/lib/activity/proto/v1/activity_state.proto b/chasm/lib/activity/proto/v1/activity_state.proto index 6bd801818f5..6d3d227205a 100644 --- a/chasm/lib/activity/proto/v1/activity_state.proto +++ b/chasm/lib/activity/proto/v1/activity_state.proto @@ -2,8 +2,6 @@ syntax = "proto3"; package temporal.server.chasm.lib.activity.proto.v1; -option go_package = "go.temporal.io/server/chasm/lib/activity/gen/activitypb;activitypb"; - import "google/protobuf/duration.proto"; import "google/protobuf/timestamp.proto"; import "temporal/api/activity/v1/message.proto"; @@ -13,91 +11,93 @@ import "temporal/api/failure/v1/message.proto"; import "temporal/api/sdk/v1/user_metadata.proto"; import "temporal/api/taskqueue/v1/message.proto"; +option go_package = "go.temporal.io/server/chasm/lib/activity/gen/activitypb;activitypb"; + enum ActivityExecutionStatus { - ACTIVITY_EXECUTION_STATUS_UNSPECIFIED = 0; - // The activity has been scheduled, but a worker has not accepted the task for the current - // attempt. The activity may be backing off between attempts or waiting for a worker to pick it - // up. - ACTIVITY_EXECUTION_STATUS_SCHEDULED = 1; - // A worker has accepted a task for the current attempt. - ACTIVITY_EXECUTION_STATUS_STARTED = 2; - // A caller has requested cancellation of the activity. - ACTIVITY_EXECUTION_STATUS_CANCEL_REQUESTED = 3; - // The activity completed successfully. - ACTIVITY_EXECUTION_STATUS_COMPLETED = 4; - // The activity completed with failure. - ACTIVITY_EXECUTION_STATUS_FAILED = 5; - // The activity completed as canceled. - // Requesting to cancel an activity does not automatically transition the activity to canceled status. If the worker - // responds to cancel the activity after requesting cancellation, the status will transition to cancelled. If the - // activity completes, fails, times out or terminates after cancel is requested and before the worker responds with - // cancelled. The activity will be stay in the terminal non-cancelled status. - ACTIVITY_EXECUTION_STATUS_CANCELED = 6; - // The activity was terminated. Termination does not reach the worker and the activity code cannot react to it. - // A terminated activity may have a running attempt and will be requested to be canceled by the server when it - // heartbeats. - ACTIVITY_EXECUTION_STATUS_TERMINATED = 7; - // The activity has timed out by reaching the specified schedule-to-start or schedule-to-close timeouts. - // Additionally, after all retries are exhausted for start-to-close or heartbeat timeouts, the activity will also - // transition to timed out status. - ACTIVITY_EXECUTION_STATUS_TIMED_OUT = 8; - // The activity has been paused while in the SCHEDULED state. No worker will be dispatched until - // the activity is unpaused. The activity's pause_state field is populated with the identity, - // reason, and time of the pause request. - // - // Note: pausing a STARTED activity does not transition to this status. Instead, the pause is - // delivered as a flag (pause_state is set, status stays STARTED) and the worker is notified - // via ActivityPaused=true on its next heartbeat. The external run state in that case is - // PAUSE_REQUESTED. If the worker fails and retries while the flag is set, the retry lands in - // SCHEDULED with pause_state still populated and the dispatch task is blocked until unpause. - ACTIVITY_EXECUTION_STATUS_PAUSED = 9; + ACTIVITY_EXECUTION_STATUS_UNSPECIFIED = 0; + // The activity has been scheduled, but a worker has not accepted the task for the current + // attempt. The activity may be backing off between attempts or waiting for a worker to pick it + // up. + ACTIVITY_EXECUTION_STATUS_SCHEDULED = 1; + // A worker has accepted a task for the current attempt. + ACTIVITY_EXECUTION_STATUS_STARTED = 2; + // A caller has requested cancellation of the activity. + ACTIVITY_EXECUTION_STATUS_CANCEL_REQUESTED = 3; + // The activity completed successfully. + ACTIVITY_EXECUTION_STATUS_COMPLETED = 4; + // The activity completed with failure. + ACTIVITY_EXECUTION_STATUS_FAILED = 5; + // The activity completed as canceled. + // Requesting to cancel an activity does not automatically transition the activity to canceled status. If the worker + // responds to cancel the activity after requesting cancellation, the status will transition to cancelled. If the + // activity completes, fails, times out or terminates after cancel is requested and before the worker responds with + // cancelled. The activity will be stay in the terminal non-cancelled status. + ACTIVITY_EXECUTION_STATUS_CANCELED = 6; + // The activity was terminated. Termination does not reach the worker and the activity code cannot react to it. + // A terminated activity may have a running attempt and will be requested to be canceled by the server when it + // heartbeats. + ACTIVITY_EXECUTION_STATUS_TERMINATED = 7; + // The activity has timed out by reaching the specified schedule-to-start or schedule-to-close timeouts. + // Additionally, after all retries are exhausted for start-to-close or heartbeat timeouts, the activity will also + // transition to timed out status. + ACTIVITY_EXECUTION_STATUS_TIMED_OUT = 8; + // The activity has been paused while in the SCHEDULED state. No worker will be dispatched until + // the activity is unpaused. The activity's pause_state field is populated with the identity, + // reason, and time of the pause request. + // + // Note: pausing a STARTED activity does not transition to this status. Instead, the pause is + // delivered as a flag (pause_state is set, status stays STARTED) and the worker is notified + // via ActivityPaused=true on its next heartbeat. The external run state in that case is + // PAUSE_REQUESTED. If the worker fails and retries while the flag is set, the retry lands in + // SCHEDULED with pause_state still populated and the dispatch task is blocked until unpause. + ACTIVITY_EXECUTION_STATUS_PAUSED = 9; } message ActivityState { - // The type of the activity, a string that maps to a registered activity on a worker. - temporal.api.common.v1.ActivityType activity_type = 1; - - temporal.api.taskqueue.v1.TaskQueue task_queue = 2; - - // Indicates how long the caller is willing to wait for an activity completion. Limits how long - // retries will be attempted. Either this or `start_to_close_timeout` must be specified. - // - // (-- api-linter: core::0140::prepositions=disabled - // aip.dev/not-precedent: "to" is used to indicate interval. --) - google.protobuf.Duration schedule_to_close_timeout = 3; - // Limits time an activity task can stay in a task queue before a worker picks it up. This - // timeout is always non retryable, as all a retry would achieve is to put it back into the same - // queue. Defaults to `schedule_to_close_timeout` or workflow execution timeout if not - // specified. - // - // (-- api-linter: core::0140::prepositions=disabled - // aip.dev/not-precedent: "to" is used to indicate interval. --) - google.protobuf.Duration schedule_to_start_timeout = 4; - // Maximum time an activity is allowed to execute after being picked up by a worker. This - // timeout is always retryable. Either this or `schedule_to_close_timeout` must be - // specified. - // - // (-- api-linter: core::0140::prepositions=disabled - // aip.dev/not-precedent: "to" is used to indicate interval. --) - google.protobuf.Duration start_to_close_timeout = 5; - // Maximum permitted time between successful worker heartbeats. - google.protobuf.Duration heartbeat_timeout = 6; - // The retry policy for the activity. Will never exceed `schedule_to_close_timeout`. - temporal.api.common.v1.RetryPolicy retry_policy = 7; - - // All of the possible activity statuses (covers both the public ActivityExecutionStatus and PendingActivityState). - // TODO: consider moving this into ActivityAttemptState and renaming that message. This could save mutating two - // components on each attempt transition. - ActivityExecutionStatus status = 8; - - // Time the activity was originally scheduled via a StartActivityExecution request. - google.protobuf.Timestamp schedule_time = 9; - - // Priority metadata. - temporal.api.common.v1.Priority priority = 10; - - // Set if activity cancellation was requested. - ActivityCancelState cancel_state = 11; + // The type of the activity, a string that maps to a registered activity on a worker. + temporal.api.common.v1.ActivityType activity_type = 1; + + temporal.api.taskqueue.v1.TaskQueue task_queue = 2; + + // Indicates how long the caller is willing to wait for an activity completion. Limits how long + // retries will be attempted. Either this or `start_to_close_timeout` must be specified. + // + // (-- api-linter: core::0140::prepositions=disabled + // aip.dev/not-precedent: "to" is used to indicate interval. --) + google.protobuf.Duration schedule_to_close_timeout = 3; + // Limits time an activity task can stay in a task queue before a worker picks it up. This + // timeout is always non retryable, as all a retry would achieve is to put it back into the same + // queue. Defaults to `schedule_to_close_timeout` or workflow execution timeout if not + // specified. + // + // (-- api-linter: core::0140::prepositions=disabled + // aip.dev/not-precedent: "to" is used to indicate interval. --) + google.protobuf.Duration schedule_to_start_timeout = 4; + // Maximum time an activity is allowed to execute after being picked up by a worker. This + // timeout is always retryable. Either this or `schedule_to_close_timeout` must be + // specified. + // + // (-- api-linter: core::0140::prepositions=disabled + // aip.dev/not-precedent: "to" is used to indicate interval. --) + google.protobuf.Duration start_to_close_timeout = 5; + // Maximum permitted time between successful worker heartbeats. + google.protobuf.Duration heartbeat_timeout = 6; + // The retry policy for the activity. Will never exceed `schedule_to_close_timeout`. + temporal.api.common.v1.RetryPolicy retry_policy = 7; + + // All of the possible activity statuses (covers both the public ActivityExecutionStatus and PendingActivityState). + // TODO: consider moving this into ActivityAttemptState and renaming that message. This could save mutating two + // components on each attempt transition. + ActivityExecutionStatus status = 8; + + // Time the activity was originally scheduled via a StartActivityExecution request. + google.protobuf.Timestamp schedule_time = 9; + + // Priority metadata. + temporal.api.common.v1.Priority priority = 10; + + // Set if activity cancellation was requested. + ActivityCancelState cancel_state = 11; // Set if the activity was terminated ActivityTerminateState terminate_state = 12; @@ -116,10 +116,10 @@ message ActivityState { } message ActivityCancelState { - string request_id = 1; - google.protobuf.Timestamp request_time = 2; - string identity = 3; - string reason = 4; + string request_id = 1; + google.protobuf.Timestamp request_time = 2; + string identity = 3; + string reason = 4; } message ActivityTerminateState { @@ -127,62 +127,62 @@ message ActivityTerminateState { } message ActivityPauseState { - google.protobuf.Timestamp pause_time = 1; - string identity = 2; - string reason = 3; + google.protobuf.Timestamp pause_time = 1; + string identity = 2; + string reason = 3; } message ActivityAttemptState { - // The attempt this activity is currently on. - // Incremented each time a new attempt is scheduled. A newly created activity will immediately be scheduled, and - // the count is set to 1. - int32 count = 1; - - // Time from the last attempt failure to the next activity retry. - // If the activity is currently running, this represents the next retry interval in case the attempt fails. - // If activity is currently backing off between attempt, this represents the current retry interval. - // If there is no next retry allowed, this field will be null. - // This interval is typically calculated from the specified retry policy, but may be modified if an activity fails - // with a retryable application failure specifying a retry delay. - google.protobuf.Duration current_retry_interval = 2; - - // Time the last attempt was started. - google.protobuf.Timestamp started_time = 3; - - // The time when the last activity attempt completed. If activity has not been completed yet, it will be null. - google.protobuf.Timestamp complete_time = 4; - - message LastFailureDetails { - // The last time the activity attempt failed. - google.protobuf.Timestamp time = 1; - - // Failure details from the last failed attempt. - temporal.api.failure.v1.Failure failure = 2; - } - - // Details about the last failure. This will only be updated when an activity attempt fails, - // including start-to-close timeout. Activity success, termination, schedule-to-start and schedule-to-close timeouts - // will not reset it. - LastFailureDetails last_failure_details = 5; - - // An incremental version number used to validate attempt-scoped tasks - // (ActivityDispatchTask, ScheduleToStartTimeoutTask, StartToCloseTimeoutTask, HeartbeatTimeoutTask). - // Incremented on each new attempt and on options updates, so that in-flight tasks from the - // previous attempt or pre-update state are discarded. - // Note: ScheduleToCloseTimeoutTask uses a separate ActivityState.schedule_to_close_stamp because - // it spans the full activity lifetime and must not be invalidated on retry. - // TODO: also invalidate on pause and reset when those are supported. - int32 stamp = 6; - - string last_worker_identity = 7; - - // The Worker Deployment Version this activity was dispatched to most recently. - // If nil, the activity has not yet been dispatched or was last dispatched to an unversioned worker. - temporal.api.deployment.v1.WorkerDeploymentVersion last_deployment_version = 8; - - // The request ID that came from matching's RecordActivityTaskStarted API call. Used to make this API idempotent in - // case of implicit retries. - string start_request_id = 9; + // The attempt this activity is currently on. + // Incremented each time a new attempt is scheduled. A newly created activity will immediately be scheduled, and + // the count is set to 1. + int32 count = 1; + + // Time from the last attempt failure to the next activity retry. + // If the activity is currently running, this represents the next retry interval in case the attempt fails. + // If activity is currently backing off between attempt, this represents the current retry interval. + // If there is no next retry allowed, this field will be null. + // This interval is typically calculated from the specified retry policy, but may be modified if an activity fails + // with a retryable application failure specifying a retry delay. + google.protobuf.Duration current_retry_interval = 2; + + // Time the last attempt was started. + google.protobuf.Timestamp started_time = 3; + + // The time when the last activity attempt completed. If activity has not been completed yet, it will be null. + google.protobuf.Timestamp complete_time = 4; + + message LastFailureDetails { + // The last time the activity attempt failed. + google.protobuf.Timestamp time = 1; + + // Failure details from the last failed attempt. + temporal.api.failure.v1.Failure failure = 2; + } + + // Details about the last failure. This will only be updated when an activity attempt fails, + // including start-to-close timeout. Activity success, termination, schedule-to-start and schedule-to-close timeouts + // will not reset it. + LastFailureDetails last_failure_details = 5; + + // An incremental version number used to validate attempt-scoped tasks + // (ActivityDispatchTask, ScheduleToStartTimeoutTask, StartToCloseTimeoutTask, HeartbeatTimeoutTask). + // Incremented on each new attempt and on options updates, so that in-flight tasks from the + // previous attempt or pre-update state are discarded. + // Note: ScheduleToCloseTimeoutTask uses a separate ActivityState.schedule_to_close_stamp because + // it spans the full activity lifetime and must not be invalidated on retry. + // TODO: also invalidate on pause and reset when those are supported. + int32 stamp = 6; + + string last_worker_identity = 7; + + // The Worker Deployment Version this activity was dispatched to most recently. + // If nil, the activity has not yet been dispatched or was last dispatched to an unversioned worker. + temporal.api.deployment.v1.WorkerDeploymentVersion last_deployment_version = 8; + + // The request ID that came from matching's RecordActivityTaskStarted API call. Used to make this API idempotent in + // case of implicit retries. + string start_request_id = 9; } message ActivityHeartbeatState { @@ -193,27 +193,27 @@ message ActivityHeartbeatState { } message ActivityRequestData { - // Serialized activity input, passed as arguments to the activity function. - temporal.api.common.v1.Payloads input = 1; - temporal.api.common.v1.Header header = 2; + // Serialized activity input, passed as arguments to the activity function. + temporal.api.common.v1.Payloads input = 1; + temporal.api.common.v1.Header header = 2; - // Metadata for use by user interfaces to display the fixed as-of-start summary and details of the activity. - temporal.api.sdk.v1.UserMetadata user_metadata = 3; + // Metadata for use by user interfaces to display the fixed as-of-start summary and details of the activity. + temporal.api.sdk.v1.UserMetadata user_metadata = 3; } message ActivityOutcome { - message Successful { - temporal.api.common.v1.Payloads output = 1; - } - - message Failed { - // Only filled on schedule-to-start timeouts, schedule-to-close timeouts or terminations. All other attempt - // failures will be recorded in ActivityAttemptState.last_failure_details. - temporal.api.failure.v1.Failure failure = 1; - } - - oneof variant { - Successful successful = 1; - Failed failed = 2; - } + message Successful { + temporal.api.common.v1.Payloads output = 1; + } + + message Failed { + // Only filled on schedule-to-start timeouts, schedule-to-close timeouts or terminations. All other attempt + // failures will be recorded in ActivityAttemptState.last_failure_details. + temporal.api.failure.v1.Failure failure = 1; + } + + oneof variant { + Successful successful = 1; + Failed failed = 2; + } } diff --git a/chasm/lib/activity/proto/v1/request_response.proto b/chasm/lib/activity/proto/v1/request_response.proto index d4713c3d4c2..451f88bbb63 100644 --- a/chasm/lib/activity/proto/v1/request_response.proto +++ b/chasm/lib/activity/proto/v1/request_response.proto @@ -2,100 +2,94 @@ syntax = "proto3"; package temporal.server.chasm.lib.activity.proto.v1; -option go_package = "go.temporal.io/server/chasm/lib/activity/gen/activitypb;activitypb"; - import "temporal/api/workflowservice/v1/request_response.proto"; +option go_package = "go.temporal.io/server/chasm/lib/activity/gen/activitypb;activitypb"; + message StartActivityExecutionRequest { - string namespace_id = 1; + string namespace_id = 1; - temporal.api.workflowservice.v1.StartActivityExecutionRequest frontend_request = 2; + temporal.api.workflowservice.v1.StartActivityExecutionRequest frontend_request = 2; } message StartActivityExecutionResponse { - temporal.api.workflowservice.v1.StartActivityExecutionResponse frontend_response = 1; + temporal.api.workflowservice.v1.StartActivityExecutionResponse frontend_response = 1; } message DescribeActivityExecutionRequest { - string namespace_id = 1; + string namespace_id = 1; - temporal.api.workflowservice.v1.DescribeActivityExecutionRequest frontend_request = 2; + temporal.api.workflowservice.v1.DescribeActivityExecutionRequest frontend_request = 2; } message DescribeActivityExecutionResponse { - temporal.api.workflowservice.v1.DescribeActivityExecutionResponse frontend_response = 1; + temporal.api.workflowservice.v1.DescribeActivityExecutionResponse frontend_response = 1; } message PollActivityExecutionRequest { - string namespace_id = 1; + string namespace_id = 1; - temporal.api.workflowservice.v1.PollActivityExecutionRequest frontend_request = 2; + temporal.api.workflowservice.v1.PollActivityExecutionRequest frontend_request = 2; } message PollActivityExecutionResponse { - temporal.api.workflowservice.v1.PollActivityExecutionResponse frontend_response = 1; + temporal.api.workflowservice.v1.PollActivityExecutionResponse frontend_response = 1; } message TerminateActivityExecutionRequest { - string namespace_id = 1; + string namespace_id = 1; - temporal.api.workflowservice.v1.TerminateActivityExecutionRequest frontend_request = 2; + temporal.api.workflowservice.v1.TerminateActivityExecutionRequest frontend_request = 2; } -message TerminateActivityExecutionResponse { -} +message TerminateActivityExecutionResponse {} message RequestCancelActivityExecutionRequest { - string namespace_id = 1; + string namespace_id = 1; - temporal.api.workflowservice.v1.RequestCancelActivityExecutionRequest frontend_request = 2; + temporal.api.workflowservice.v1.RequestCancelActivityExecutionRequest frontend_request = 2; } -message RequestCancelActivityExecutionResponse { -} +message RequestCancelActivityExecutionResponse {} message DeleteActivityExecutionRequest { - string namespace_id = 1; + string namespace_id = 1; - temporal.api.workflowservice.v1.DeleteActivityExecutionRequest frontend_request = 2; + temporal.api.workflowservice.v1.DeleteActivityExecutionRequest frontend_request = 2; } -message DeleteActivityExecutionResponse { -} +message DeleteActivityExecutionResponse {} message PauseActivityExecutionRequest { - string namespace_id = 1; + string namespace_id = 1; - temporal.api.workflowservice.v1.PauseActivityExecutionRequest frontend_request = 2; + temporal.api.workflowservice.v1.PauseActivityExecutionRequest frontend_request = 2; } -message PauseActivityExecutionResponse { -} +message PauseActivityExecutionResponse {} message UnpauseActivityExecutionRequest { - string namespace_id = 1; + string namespace_id = 1; - temporal.api.workflowservice.v1.UnpauseActivityExecutionRequest frontend_request = 2; + temporal.api.workflowservice.v1.UnpauseActivityExecutionRequest frontend_request = 2; } -message UnpauseActivityExecutionResponse { -} +message UnpauseActivityExecutionResponse {} message ResetActivityExecutionRequest { - string namespace_id = 1; + string namespace_id = 1; - temporal.api.workflowservice.v1.ResetActivityExecutionRequest frontend_request = 2; + temporal.api.workflowservice.v1.ResetActivityExecutionRequest frontend_request = 2; } -message ResetActivityExecutionResponse { -} +message ResetActivityExecutionResponse {} message UpdateActivityExecutionOptionsRequest { - string namespace_id = 1; + string namespace_id = 1; - temporal.api.workflowservice.v1.UpdateActivityExecutionOptionsRequest frontend_request = 2; + temporal.api.workflowservice.v1.UpdateActivityExecutionOptionsRequest frontend_request = 2; } message UpdateActivityExecutionOptionsResponse { - temporal.api.workflowservice.v1.UpdateActivityExecutionOptionsResponse frontend_response = 1; + temporal.api.workflowservice.v1.UpdateActivityExecutionOptionsResponse frontend_response = 1; } diff --git a/chasm/lib/activity/proto/v1/service.proto b/chasm/lib/activity/proto/v1/service.proto index e900ee9a8f1..1517acc9977 100644 --- a/chasm/lib/activity/proto/v1/service.proto +++ b/chasm/lib/activity/proto/v1/service.proto @@ -2,72 +2,72 @@ syntax = "proto3"; package temporal.server.chasm.lib.activity.proto.v1; -option go_package = "go.temporal.io/server/chasm/lib/activity/gen/activitypb;activitypb"; - import "chasm/lib/activity/proto/v1/request_response.proto"; -import "temporal/server/api/routing/v1/extension.proto"; import "temporal/server/api/common/v1/api_category.proto"; +import "temporal/server/api/routing/v1/extension.proto"; + +option go_package = "go.temporal.io/server/chasm/lib/activity/gen/activitypb;activitypb"; service ActivityService { - rpc StartActivityExecution(StartActivityExecutionRequest) returns (StartActivityExecutionResponse) { - option (temporal.server.api.routing.v1.routing).business_id = "frontend_request.activity_id"; - option (temporal.server.api.common.v1.api_category).category = API_CATEGORY_STANDARD; - } + rpc StartActivityExecution(StartActivityExecutionRequest) returns (StartActivityExecutionResponse) { + option (temporal.server.api.routing.v1.routing).business_id = "frontend_request.activity_id"; + option (temporal.server.api.common.v1.api_category).category = API_CATEGORY_STANDARD; + } - rpc DescribeActivityExecution(DescribeActivityExecutionRequest) returns (DescribeActivityExecutionResponse) { - option (temporal.server.api.routing.v1.routing).business_id = "frontend_request.activity_id"; - option (temporal.server.api.common.v1.api_category).category = API_CATEGORY_STANDARD; - } + rpc DescribeActivityExecution(DescribeActivityExecutionRequest) returns (DescribeActivityExecutionResponse) { + option (temporal.server.api.routing.v1.routing).business_id = "frontend_request.activity_id"; + option (temporal.server.api.common.v1.api_category).category = API_CATEGORY_STANDARD; + } - rpc PollActivityExecution(PollActivityExecutionRequest) returns (PollActivityExecutionResponse) { - option (temporal.server.api.routing.v1.routing).business_id = "frontend_request.activity_id"; - option (temporal.server.api.common.v1.api_category).category = API_CATEGORY_LONG_POLL; - } + rpc PollActivityExecution(PollActivityExecutionRequest) returns (PollActivityExecutionResponse) { + option (temporal.server.api.routing.v1.routing).business_id = "frontend_request.activity_id"; + option (temporal.server.api.common.v1.api_category).category = API_CATEGORY_LONG_POLL; + } - rpc TerminateActivityExecution(TerminateActivityExecutionRequest) returns (TerminateActivityExecutionResponse) { - option (temporal.server.api.routing.v1.routing).business_id = "frontend_request.activity_id"; - option (temporal.server.api.common.v1.api_category).category = API_CATEGORY_STANDARD; - } + rpc TerminateActivityExecution(TerminateActivityExecutionRequest) returns (TerminateActivityExecutionResponse) { + option (temporal.server.api.routing.v1.routing).business_id = "frontend_request.activity_id"; + option (temporal.server.api.common.v1.api_category).category = API_CATEGORY_STANDARD; + } - rpc RequestCancelActivityExecution(RequestCancelActivityExecutionRequest) returns (RequestCancelActivityExecutionResponse) { - option (temporal.server.api.routing.v1.routing).business_id = "frontend_request.activity_id"; - option (temporal.server.api.common.v1.api_category).category = API_CATEGORY_STANDARD; - } + rpc RequestCancelActivityExecution(RequestCancelActivityExecutionRequest) returns (RequestCancelActivityExecutionResponse) { + option (temporal.server.api.routing.v1.routing).business_id = "frontend_request.activity_id"; + option (temporal.server.api.common.v1.api_category).category = API_CATEGORY_STANDARD; + } - rpc DeleteActivityExecution(DeleteActivityExecutionRequest) returns (DeleteActivityExecutionResponse) { - option (temporal.server.api.routing.v1.routing).business_id = "frontend_request.activity_id"; - option (temporal.server.api.common.v1.api_category).category = API_CATEGORY_STANDARD; - } + rpc DeleteActivityExecution(DeleteActivityExecutionRequest) returns (DeleteActivityExecutionResponse) { + option (temporal.server.api.routing.v1.routing).business_id = "frontend_request.activity_id"; + option (temporal.server.api.common.v1.api_category).category = API_CATEGORY_STANDARD; + } - rpc PauseActivityExecution(PauseActivityExecutionRequest) returns (PauseActivityExecutionResponse) { - option (temporal.server.api.routing.v1.routing) = { - business_id: "frontend_request.workflow_id" - business_id: "frontend_request.activity_id" - }; - option (temporal.server.api.common.v1.api_category).category = API_CATEGORY_STANDARD; - } + rpc PauseActivityExecution(PauseActivityExecutionRequest) returns (PauseActivityExecutionResponse) { + option (temporal.server.api.routing.v1.routing) = { + business_id: "frontend_request.workflow_id" + business_id: "frontend_request.activity_id" + }; + option (temporal.server.api.common.v1.api_category).category = API_CATEGORY_STANDARD; + } - rpc UnpauseActivityExecution(UnpauseActivityExecutionRequest) returns (UnpauseActivityExecutionResponse) { - option (temporal.server.api.routing.v1.routing) = { - business_id: "frontend_request.workflow_id" - business_id: "frontend_request.activity_id" - }; - option (temporal.server.api.common.v1.api_category).category = API_CATEGORY_STANDARD; - } + rpc UnpauseActivityExecution(UnpauseActivityExecutionRequest) returns (UnpauseActivityExecutionResponse) { + option (temporal.server.api.routing.v1.routing) = { + business_id: "frontend_request.workflow_id" + business_id: "frontend_request.activity_id" + }; + option (temporal.server.api.common.v1.api_category).category = API_CATEGORY_STANDARD; + } - rpc ResetActivityExecution(ResetActivityExecutionRequest) returns (ResetActivityExecutionResponse) { - option (temporal.server.api.routing.v1.routing) = { - business_id: "frontend_request.workflow_id" - business_id: "frontend_request.activity_id" - }; - option (temporal.server.api.common.v1.api_category).category = API_CATEGORY_STANDARD; - } + rpc ResetActivityExecution(ResetActivityExecutionRequest) returns (ResetActivityExecutionResponse) { + option (temporal.server.api.routing.v1.routing) = { + business_id: "frontend_request.workflow_id" + business_id: "frontend_request.activity_id" + }; + option (temporal.server.api.common.v1.api_category).category = API_CATEGORY_STANDARD; + } - rpc UpdateActivityExecutionOptions(UpdateActivityExecutionOptionsRequest) returns (UpdateActivityExecutionOptionsResponse) { - option (temporal.server.api.routing.v1.routing) = { - business_id: "frontend_request.workflow_id" - business_id: "frontend_request.activity_id" - }; - option (temporal.server.api.common.v1.api_category).category = API_CATEGORY_STANDARD; - } + rpc UpdateActivityExecutionOptions(UpdateActivityExecutionOptionsRequest) returns (UpdateActivityExecutionOptionsResponse) { + option (temporal.server.api.routing.v1.routing) = { + business_id: "frontend_request.workflow_id" + business_id: "frontend_request.activity_id" + }; + option (temporal.server.api.common.v1.api_category).category = API_CATEGORY_STANDARD; + } } diff --git a/chasm/lib/activity/proto/v1/tasks.proto b/chasm/lib/activity/proto/v1/tasks.proto index 736d6767ac2..e70753124fd 100644 --- a/chasm/lib/activity/proto/v1/tasks.proto +++ b/chasm/lib/activity/proto/v1/tasks.proto @@ -5,29 +5,29 @@ package temporal.server.chasm.lib.activity.proto.v1; option go_package = "go.temporal.io/server/chasm/lib/activity/gen/activitypb;activitypb"; message ActivityDispatchTask { - // The current stamp for this activity execution. Used for task validation. See also [ActivityAttemptState]. - int32 stamp = 1; + // The current stamp for this activity execution. Used for task validation. See also [ActivityAttemptState]. + int32 stamp = 1; } message ScheduleToStartTimeoutTask { - // The current stamp for this activity execution. Used for task validation. See also [ActivityAttemptState]. - int32 stamp = 1; + // The current stamp for this activity execution. Used for task validation. See also [ActivityAttemptState]. + int32 stamp = 1; } message ScheduleToCloseTimeoutTask { - // The schedule-to-close stamp for this task. Used for task validation. - // See also [ActivityState.schedule_to_close_stamp]. - // Tasks without a stamp (stamp=0) predate this field and are not validated by stamp. - int32 stamp = 1; + // The schedule-to-close stamp for this task. Used for task validation. + // See also [ActivityState.schedule_to_close_stamp]. + // Tasks without a stamp (stamp=0) predate this field and are not validated by stamp. + int32 stamp = 1; } message StartToCloseTimeoutTask { - // The current stamp for this activity execution. Used for task validation. See also [ActivityAttemptState]. - int32 stamp = 1; + // The current stamp for this activity execution. Used for task validation. See also [ActivityAttemptState]. + int32 stamp = 1; } // HeartbeatTimeoutTask is a pure task that enforces heartbeat timeouts. message HeartbeatTimeoutTask { - // The current stamp for this activity execution. Used for task validation. See also [ActivityAttemptState]. - int32 stamp = 1; + // The current stamp for this activity execution. Used for task validation. See also [ActivityAttemptState]. + int32 stamp = 1; } diff --git a/chasm/lib/activity/validator.go b/chasm/lib/activity/validator.go index eb78302a1c8..0770c8efa96 100644 --- a/chasm/lib/activity/validator.go +++ b/chasm/lib/activity/validator.go @@ -58,10 +58,9 @@ func ValidateAndNormalizeEmbeddedActivity( options *activitypb.ActivityOptions, priority *commonpb.Priority, runTimeout *durationpb.Duration, + workflowTaskQueueName string, ) error { - // We cannot use NormalizeAndValidateUserDefined for embedded activity task queue because embedded activities can - // use reserved task queues, which are not considered user defined. - if err := tqid.NormalizeAndValidate(options.TaskQueue, "", maxIDLengthLimit); err != nil { + if err := tqid.NormalizeAndValidateUserDefined(options.TaskQueue, "", workflowTaskQueueName, maxIDLengthLimit); err != nil { return err } @@ -279,7 +278,7 @@ func validateAndNormalizeSearchAttributes( return saValidator.ValidateSize(saToValidate, namespaceName) } -func validateDescribeActivityExecutionRequest( +func validateAndNormalizeDescribeActivityExecutionRequest( req *workflowservice.DescribeActivityExecutionRequest, maxIDLengthLimit int, ) error { @@ -305,7 +304,7 @@ func validateDescribeActivityExecutionRequest( return nil } -func validatePollActivityExecutionRequest( +func validateAndNormalizePollActivityExecutionRequest( req *workflowservice.PollActivityExecutionRequest, maxIDLengthLimit int, ) error { @@ -325,13 +324,17 @@ func validatePollActivityExecutionRequest( return nil } -func validateRequestCancelActivityExecutionRequest( +func validateAndNormalizeRequestCancelActivityExecutionRequest( req *workflowservice.RequestCancelActivityExecutionRequest, maxIDLengthLimit int, blobSizeLimitError dynamicconfig.IntPropertyFnWithNamespaceFilter, blobSizeLimitWarn dynamicconfig.IntPropertyFnWithNamespaceFilter, logger log.Logger, ) error { + if req.GetRequestId() == "" { + req.RequestId = uuid.NewString() + } + if req.GetActivityId() == "" { return serviceerror.NewInvalidArgument("activity ID is required") } @@ -374,7 +377,7 @@ func validateRequestCancelActivityExecutionRequest( } //nolint:revive // cyclomatic: per-field validation of a field-mask update requires explicit handling of each field -func validateUpdateActivityExecutionOptionsRequest( +func validateAndNormalizeUpdateActivityExecutionOptionsRequest( req *workflowservice.UpdateActivityExecutionOptionsRequest, maxIDLengthLimit int, ) error { @@ -499,7 +502,7 @@ func validateUpdateActivityExecutionOptionsRequest( return nil } -func validateDeleteActivityExecutionRequest( +func validateAndNormalizeDeleteActivityExecutionRequest( req *workflowservice.DeleteActivityExecutionRequest, maxIDLengthLimit int, ) error { @@ -522,13 +525,17 @@ func validateDeleteActivityExecutionRequest( return nil } -func validateTerminateActivityExecutionRequest( +func validateAndNormalizeTerminateActivityExecutionRequest( req *workflowservice.TerminateActivityExecutionRequest, maxIDLengthLimit int, blobSizeLimitError dynamicconfig.IntPropertyFnWithNamespaceFilter, blobSizeLimitWarn dynamicconfig.IntPropertyFnWithNamespaceFilter, logger log.Logger, ) error { + if req.GetRequestId() == "" { + req.RequestId = uuid.NewString() + } + if req.GetActivityId() == "" { return serviceerror.NewInvalidArgument("activity ID is required") } @@ -570,7 +577,7 @@ func validateTerminateActivityExecutionRequest( return nil } -func validatePauseActivityExecutionRequest( +func validateAndNormalizePauseActivityExecutionRequest( req *workflowservice.PauseActivityExecutionRequest, maxIDLengthLimit int, blobSizeLimitError dynamicconfig.IntPropertyFnWithNamespaceFilter, @@ -607,7 +614,7 @@ func validatePauseActivityExecutionRequest( return nil } -func validateUnpauseActivityExecutionRequest( +func validateAndNormalizeUnpauseActivityExecutionRequest( req *workflowservice.UnpauseActivityExecutionRequest, maxIDLengthLimit int, ) error { diff --git a/chasm/lib/activity/validator_test.go b/chasm/lib/activity/validator_test.go index b8f62f0ec19..beb9787be12 100644 --- a/chasm/lib/activity/validator_test.go +++ b/chasm/lib/activity/validator_test.go @@ -429,7 +429,7 @@ func TestValidateStandAloneRequestIDTooLong(t *testing.T) { } h := newTestFrontendHandler(defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, defaultMaxIDLengthLimit) - err := validateAndNormalizeStartRequest(req, h.config.MaxIDLengthLimit(), h.config.BlobSizeLimitError, h.config.BlobSizeLimitWarn, h.logger, h.saMapperProvider, h.saValidator) + err := h.validateAndNormalizeStartActivityExecutionRequest(req) var invalidArgErr *serviceerror.InvalidArgument require.ErrorAs(t, err, &invalidArgErr) } @@ -449,7 +449,7 @@ func TestValidateStandAloneInputTooLarge(t *testing.T) { } h := newTestFrontendHandler(defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, defaultMaxIDLengthLimit) - err := validateAndNormalizeStartRequest(req, h.config.MaxIDLengthLimit(), h.config.BlobSizeLimitError, h.config.BlobSizeLimitWarn, h.logger, h.saMapperProvider, h.saValidator) + err := h.validateAndNormalizeStartActivityExecutionRequest(req) var invalidArgErr *serviceerror.InvalidArgument require.ErrorAs(t, err, &invalidArgErr) } @@ -476,7 +476,7 @@ func TestValidateStandAloneInputWarningSizeShouldSucceed(t *testing.T) { func(ns string) int { return payloadSize }, defaultMaxIDLengthLimit, ) - err := validateAndNormalizeStartRequest(req, h.config.MaxIDLengthLimit(), h.config.BlobSizeLimitError, h.config.BlobSizeLimitWarn, h.logger, h.saMapperProvider, h.saValidator) + err := h.validateAndNormalizeStartActivityExecutionRequest(req) require.NoError(t, err) } @@ -494,7 +494,7 @@ func TestValidateStandAlone_IDPolicyShouldDefault(t *testing.T) { } h := newTestFrontendHandler(defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, defaultMaxIDLengthLimit) - err := validateAndNormalizeStartRequest(req, h.config.MaxIDLengthLimit(), h.config.BlobSizeLimitError, h.config.BlobSizeLimitWarn, h.logger, h.saMapperProvider, h.saValidator) + err := h.validateAndNormalizeStartActivityExecutionRequest(req) require.NoError(t, err) require.Equal(t, enumspb.ACTIVITY_ID_REUSE_POLICY_ALLOW_DUPLICATE, req.IdReusePolicy) @@ -646,7 +646,7 @@ func TestValidateDeleteActivityExecutionRequest(t *testing.T) { req := &workflowservice.DeleteActivityExecutionRequest{ ActivityId: defaultActivityID, } - err := validateAndNormalizeDeleteRequest(req, defaultMaxIDLengthLimit) + err := validateAndNormalizeDeleteActivityExecutionRequest(req, defaultMaxIDLengthLimit) require.NoError(t, err) }) @@ -655,7 +655,7 @@ func TestValidateDeleteActivityExecutionRequest(t *testing.T) { ActivityId: defaultActivityID, RunId: "f47ac10b-58cc-4372-a567-0e02b2c3d479", } - err := validateAndNormalizeDeleteRequest(req, defaultMaxIDLengthLimit) + err := validateAndNormalizeDeleteActivityExecutionRequest(req, defaultMaxIDLengthLimit) require.NoError(t, err) }) @@ -663,7 +663,7 @@ func TestValidateDeleteActivityExecutionRequest(t *testing.T) { req := &workflowservice.DeleteActivityExecutionRequest{ ActivityId: "", } - err := validateAndNormalizeDeleteRequest(req, defaultMaxIDLengthLimit) + err := validateAndNormalizeDeleteActivityExecutionRequest(req, defaultMaxIDLengthLimit) var invalidArgErr *serviceerror.InvalidArgument require.ErrorAs(t, err, &invalidArgErr) }) @@ -672,7 +672,7 @@ func TestValidateDeleteActivityExecutionRequest(t *testing.T) { req := &workflowservice.DeleteActivityExecutionRequest{ ActivityId: string(make([]byte, defaultMaxIDLengthLimit+1)), } - err := validateAndNormalizeDeleteRequest(req, defaultMaxIDLengthLimit) + err := validateAndNormalizeDeleteActivityExecutionRequest(req, defaultMaxIDLengthLimit) var invalidArgErr *serviceerror.InvalidArgument require.ErrorAs(t, err, &invalidArgErr) }) @@ -682,7 +682,7 @@ func TestValidateDeleteActivityExecutionRequest(t *testing.T) { ActivityId: defaultActivityID, RunId: "not-a-valid-uuid", } - err := validateAndNormalizeDeleteRequest(req, defaultMaxIDLengthLimit) + err := validateAndNormalizeDeleteActivityExecutionRequest(req, defaultMaxIDLengthLimit) var invalidArgErr *serviceerror.InvalidArgument require.ErrorAs(t, err, &invalidArgErr) }) @@ -695,7 +695,7 @@ func TestValidatePauseActivityExecutionRequest(t *testing.T) { Identity: "test-identity", Reason: "test-reason", } - err := validatePauseActivityExecutionRequest(req, defaultMaxIDLengthLimit, defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, log.NewNoopLogger()) + err := validateAndNormalizePauseActivityExecutionRequest(req, defaultMaxIDLengthLimit, defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, log.NewNoopLogger()) require.NoError(t, err) }) @@ -705,7 +705,7 @@ func TestValidatePauseActivityExecutionRequest(t *testing.T) { RunId: "f47ac10b-58cc-4372-a567-0e02b2c3d479", Identity: "test-identity", } - err := validatePauseActivityExecutionRequest(req, defaultMaxIDLengthLimit, defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, log.NewNoopLogger()) + err := validateAndNormalizePauseActivityExecutionRequest(req, defaultMaxIDLengthLimit, defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, log.NewNoopLogger()) require.NoError(t, err) }) @@ -714,7 +714,7 @@ func TestValidatePauseActivityExecutionRequest(t *testing.T) { ActivityId: "", Identity: "test-identity", } - err := validatePauseActivityExecutionRequest(req, defaultMaxIDLengthLimit, defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, log.NewNoopLogger()) + err := validateAndNormalizePauseActivityExecutionRequest(req, defaultMaxIDLengthLimit, defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, log.NewNoopLogger()) var invalidArgErr *serviceerror.InvalidArgument require.ErrorAs(t, err, &invalidArgErr) require.Equal(t, "activity ID is required", invalidArgErr.Message) @@ -725,7 +725,7 @@ func TestValidatePauseActivityExecutionRequest(t *testing.T) { ActivityId: string(make([]byte, defaultMaxIDLengthLimit+1)), Identity: "test-identity", } - err := validatePauseActivityExecutionRequest(req, defaultMaxIDLengthLimit, defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, log.NewNoopLogger()) + err := validateAndNormalizePauseActivityExecutionRequest(req, defaultMaxIDLengthLimit, defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, log.NewNoopLogger()) var invalidArgErr *serviceerror.InvalidArgument require.ErrorAs(t, err, &invalidArgErr) require.Equal(t, fmt.Sprintf("activity ID exceeds length limit. Length=%d Limit=%d", @@ -737,7 +737,7 @@ func TestValidatePauseActivityExecutionRequest(t *testing.T) { ActivityId: defaultActivityID, Identity: string(make([]byte, defaultMaxIDLengthLimit+1)), } - err := validatePauseActivityExecutionRequest(req, defaultMaxIDLengthLimit, defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, log.NewNoopLogger()) + err := validateAndNormalizePauseActivityExecutionRequest(req, defaultMaxIDLengthLimit, defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, log.NewNoopLogger()) var invalidArgErr *serviceerror.InvalidArgument require.ErrorAs(t, err, &invalidArgErr) require.Equal(t, fmt.Sprintf("identity exceeds length limit. Length=%d Limit=%d", @@ -750,7 +750,7 @@ func TestValidatePauseActivityExecutionRequest(t *testing.T) { Identity: "test-identity", Reason: string(make([]byte, defaultBlobSizeLimitError("default")+1)), } - err := validatePauseActivityExecutionRequest(req, defaultMaxIDLengthLimit, defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, log.NewNoopLogger()) + err := validateAndNormalizePauseActivityExecutionRequest(req, defaultMaxIDLengthLimit, defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, log.NewNoopLogger()) var invalidArgErr *serviceerror.InvalidArgument require.ErrorAs(t, err, &invalidArgErr) require.Equal(t, "reason exceeds length limit", invalidArgErr.Message) @@ -762,7 +762,7 @@ func TestValidatePauseActivityExecutionRequest(t *testing.T) { RunId: "not-a-valid-uuid", Identity: "test-identity", } - err := validatePauseActivityExecutionRequest(req, defaultMaxIDLengthLimit, defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, log.NewNoopLogger()) + err := validateAndNormalizePauseActivityExecutionRequest(req, defaultMaxIDLengthLimit, defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, log.NewNoopLogger()) var invalidArgErr *serviceerror.InvalidArgument require.ErrorAs(t, err, &invalidArgErr) require.Equal(t, "invalid run id: must be a valid UUID", invalidArgErr.Message) @@ -775,7 +775,7 @@ func TestValidateUnpauseActivityExecutionRequest(t *testing.T) { ActivityId: defaultActivityID, Identity: "test-identity", } - err := validateUnpauseActivityExecutionRequest(req, defaultMaxIDLengthLimit) + err := validateAndNormalizeUnpauseActivityExecutionRequest(req, defaultMaxIDLengthLimit) require.NoError(t, err) }) @@ -785,7 +785,7 @@ func TestValidateUnpauseActivityExecutionRequest(t *testing.T) { RunId: "f47ac10b-58cc-4372-a567-0e02b2c3d479", Identity: "test-identity", } - err := validateUnpauseActivityExecutionRequest(req, defaultMaxIDLengthLimit) + err := validateAndNormalizeUnpauseActivityExecutionRequest(req, defaultMaxIDLengthLimit) require.NoError(t, err) }) @@ -794,7 +794,7 @@ func TestValidateUnpauseActivityExecutionRequest(t *testing.T) { ActivityId: "", Identity: "test-identity", } - err := validateUnpauseActivityExecutionRequest(req, defaultMaxIDLengthLimit) + err := validateAndNormalizeUnpauseActivityExecutionRequest(req, defaultMaxIDLengthLimit) var invalidArgErr *serviceerror.InvalidArgument require.ErrorAs(t, err, &invalidArgErr) require.Equal(t, "activity ID is required", invalidArgErr.Message) @@ -805,7 +805,7 @@ func TestValidateUnpauseActivityExecutionRequest(t *testing.T) { ActivityId: string(make([]byte, defaultMaxIDLengthLimit+1)), Identity: "test-identity", } - err := validateUnpauseActivityExecutionRequest(req, defaultMaxIDLengthLimit) + err := validateAndNormalizeUnpauseActivityExecutionRequest(req, defaultMaxIDLengthLimit) var invalidArgErr *serviceerror.InvalidArgument require.ErrorAs(t, err, &invalidArgErr) require.Equal(t, fmt.Sprintf("activity ID exceeds length limit. Length=%d Limit=%d", @@ -817,7 +817,7 @@ func TestValidateUnpauseActivityExecutionRequest(t *testing.T) { ActivityId: defaultActivityID, Identity: string(make([]byte, defaultMaxIDLengthLimit+1)), } - err := validateUnpauseActivityExecutionRequest(req, defaultMaxIDLengthLimit) + err := validateAndNormalizeUnpauseActivityExecutionRequest(req, defaultMaxIDLengthLimit) var invalidArgErr *serviceerror.InvalidArgument require.ErrorAs(t, err, &invalidArgErr) require.Equal(t, fmt.Sprintf("identity exceeds length limit. Length=%d Limit=%d", @@ -830,7 +830,7 @@ func TestValidateUnpauseActivityExecutionRequest(t *testing.T) { RunId: "not-a-valid-uuid", Identity: "test-identity", } - err := validateUnpauseActivityExecutionRequest(req, defaultMaxIDLengthLimit) + err := validateAndNormalizeUnpauseActivityExecutionRequest(req, defaultMaxIDLengthLimit) var invalidArgErr *serviceerror.InvalidArgument require.ErrorAs(t, err, &invalidArgErr) require.Equal(t, "invalid run id: must be a valid UUID", invalidArgErr.Message) diff --git a/chasm/lib/scheduler/gen/schedulerpb/v1/service.pb.go b/chasm/lib/scheduler/gen/schedulerpb/v1/service.pb.go index be6244cb83d..8c5afe3c650 100644 --- a/chasm/lib/scheduler/gen/schedulerpb/v1/service.pb.go +++ b/chasm/lib/scheduler/gen/schedulerpb/v1/service.pb.go @@ -29,15 +29,15 @@ const file_temporal_server_chasm_lib_scheduler_proto_v1_service_proto_rawDesc = "\n" + ":temporal/server/chasm/lib/scheduler/proto/v1/service.proto\x12,temporal.server.chasm.lib.scheduler.proto.v1\x1aCtemporal/server/chasm/lib/scheduler/proto/v1/request_response.proto\x1a0temporal/server/api/common/v1/api_category.proto\x1a.temporal/server/api/routing/v1/extension.proto2\xb6\x0e\n" + "\x10SchedulerService\x12\xc5\x01\n" + - "\x0eCreateSchedule\x12C.temporal.server.chasm.lib.scheduler.proto.v1.CreateScheduleRequest\x1aD.temporal.server.chasm.lib.scheduler.proto.v1.CreateScheduleResponse\"(\x8a\xb5\x18\x02\b\x01\xd2\xc3\x18\x1e\x1a\x1cfrontend_request.schedule_id\x12\xc5\x01\n" + - "\x0eUpdateSchedule\x12C.temporal.server.chasm.lib.scheduler.proto.v1.UpdateScheduleRequest\x1aD.temporal.server.chasm.lib.scheduler.proto.v1.UpdateScheduleResponse\"(\x8a\xb5\x18\x02\b\x01\xd2\xc3\x18\x1e\x1a\x1cfrontend_request.schedule_id\x12\xc2\x01\n" + - "\rPatchSchedule\x12B.temporal.server.chasm.lib.scheduler.proto.v1.PatchScheduleRequest\x1aC.temporal.server.chasm.lib.scheduler.proto.v1.PatchScheduleResponse\"(\x8a\xb5\x18\x02\b\x01\xd2\xc3\x18\x1e\x1a\x1cfrontend_request.schedule_id\x12\xc5\x01\n" + - "\x0eDeleteSchedule\x12C.temporal.server.chasm.lib.scheduler.proto.v1.DeleteScheduleRequest\x1aD.temporal.server.chasm.lib.scheduler.proto.v1.DeleteScheduleResponse\"(\x8a\xb5\x18\x02\b\x01\xd2\xc3\x18\x1e\x1a\x1cfrontend_request.schedule_id\x12\xcb\x01\n" + - "\x10DescribeSchedule\x12E.temporal.server.chasm.lib.scheduler.proto.v1.DescribeScheduleRequest\x1aF.temporal.server.chasm.lib.scheduler.proto.v1.DescribeScheduleResponse\"(\x8a\xb5\x18\x02\b\x01\xd2\xc3\x18\x1e\x1a\x1cfrontend_request.schedule_id\x12\xe6\x01\n" + - "\x19ListScheduleMatchingTimes\x12N.temporal.server.chasm.lib.scheduler.proto.v1.ListScheduleMatchingTimesRequest\x1aO.temporal.server.chasm.lib.scheduler.proto.v1.ListScheduleMatchingTimesResponse\"(\x8a\xb5\x18\x02\b\x01\xd2\xc3\x18\x1e\x1a\x1cfrontend_request.schedule_id\x12\xe2\x01\n" + - "\x18CreateFromMigrationState\x12M.temporal.server.chasm.lib.scheduler.proto.v1.CreateFromMigrationStateRequest\x1aN.temporal.server.chasm.lib.scheduler.proto.v1.CreateFromMigrationStateResponse\"'\xd2\xc3\x18#\x1a!state.scheduler_state.schedule_id\x12\xae\x01\n" + - "\x0eCreateSentinel\x12C.temporal.server.chasm.lib.scheduler.proto.v1.CreateSentinelRequest\x1aD.temporal.server.chasm.lib.scheduler.proto.v1.CreateSentinelResponse\"\x11\xd2\xc3\x18\r\x1a\vschedule_id\x12\xb7\x01\n" + - "\x11MigrateToWorkflow\x12F.temporal.server.chasm.lib.scheduler.proto.v1.MigrateToWorkflowRequest\x1aG.temporal.server.chasm.lib.scheduler.proto.v1.MigrateToWorkflowResponse\"\x11\xd2\xc3\x18\r\x1a\vschedule_idBGZEgo.temporal.io/server/chasm/lib/scheduler/gen/schedulerpb;schedulerpbb\x06proto3" + "\x0eCreateSchedule\x12C.temporal.server.chasm.lib.scheduler.proto.v1.CreateScheduleRequest\x1aD.temporal.server.chasm.lib.scheduler.proto.v1.CreateScheduleResponse\"(\x92\xc4\x03\x1e\x1a\x1cfrontend_request.schedule_id\x8a\xb5\x18\x02\b\x01\x12\xc5\x01\n" + + "\x0eUpdateSchedule\x12C.temporal.server.chasm.lib.scheduler.proto.v1.UpdateScheduleRequest\x1aD.temporal.server.chasm.lib.scheduler.proto.v1.UpdateScheduleResponse\"(\x92\xc4\x03\x1e\x1a\x1cfrontend_request.schedule_id\x8a\xb5\x18\x02\b\x01\x12\xc2\x01\n" + + "\rPatchSchedule\x12B.temporal.server.chasm.lib.scheduler.proto.v1.PatchScheduleRequest\x1aC.temporal.server.chasm.lib.scheduler.proto.v1.PatchScheduleResponse\"(\x92\xc4\x03\x1e\x1a\x1cfrontend_request.schedule_id\x8a\xb5\x18\x02\b\x01\x12\xc5\x01\n" + + "\x0eDeleteSchedule\x12C.temporal.server.chasm.lib.scheduler.proto.v1.DeleteScheduleRequest\x1aD.temporal.server.chasm.lib.scheduler.proto.v1.DeleteScheduleResponse\"(\x92\xc4\x03\x1e\x1a\x1cfrontend_request.schedule_id\x8a\xb5\x18\x02\b\x01\x12\xcb\x01\n" + + "\x10DescribeSchedule\x12E.temporal.server.chasm.lib.scheduler.proto.v1.DescribeScheduleRequest\x1aF.temporal.server.chasm.lib.scheduler.proto.v1.DescribeScheduleResponse\"(\x92\xc4\x03\x1e\x1a\x1cfrontend_request.schedule_id\x8a\xb5\x18\x02\b\x01\x12\xe6\x01\n" + + "\x19ListScheduleMatchingTimes\x12N.temporal.server.chasm.lib.scheduler.proto.v1.ListScheduleMatchingTimesRequest\x1aO.temporal.server.chasm.lib.scheduler.proto.v1.ListScheduleMatchingTimesResponse\"(\x92\xc4\x03\x1e\x1a\x1cfrontend_request.schedule_id\x8a\xb5\x18\x02\b\x01\x12\xe2\x01\n" + + "\x18CreateFromMigrationState\x12M.temporal.server.chasm.lib.scheduler.proto.v1.CreateFromMigrationStateRequest\x1aN.temporal.server.chasm.lib.scheduler.proto.v1.CreateFromMigrationStateResponse\"'\x92\xc4\x03#\x1a!state.scheduler_state.schedule_id\x12\xae\x01\n" + + "\x0eCreateSentinel\x12C.temporal.server.chasm.lib.scheduler.proto.v1.CreateSentinelRequest\x1aD.temporal.server.chasm.lib.scheduler.proto.v1.CreateSentinelResponse\"\x11\x92\xc4\x03\r\x1a\vschedule_id\x12\xb7\x01\n" + + "\x11MigrateToWorkflow\x12F.temporal.server.chasm.lib.scheduler.proto.v1.MigrateToWorkflowRequest\x1aG.temporal.server.chasm.lib.scheduler.proto.v1.MigrateToWorkflowResponse\"\x11\x92\xc4\x03\r\x1a\vschedule_idBGZEgo.temporal.io/server/chasm/lib/scheduler/gen/schedulerpb;schedulerpbb\x06proto3" var file_temporal_server_chasm_lib_scheduler_proto_v1_service_proto_goTypes = []any{ (*CreateScheduleRequest)(nil), // 0: temporal.server.chasm.lib.scheduler.proto.v1.CreateScheduleRequest diff --git a/chasm/lib/tests/gen/testspb/v1/service.pb.go b/chasm/lib/tests/gen/testspb/v1/service.pb.go index 5b33e9f9c77..236ecca1ee7 100644 --- a/chasm/lib/tests/gen/testspb/v1/service.pb.go +++ b/chasm/lib/tests/gen/testspb/v1/service.pb.go @@ -29,7 +29,7 @@ const file_temporal_server_chasm_lib_tests_proto_v1_service_proto_rawDesc = "" + "\n" + "6temporal/server/chasm/lib/tests/proto/v1/service.proto\x12(temporal.server.chasm.lib.tests.proto.v1\x1a?temporal/server/chasm/lib/tests/proto/v1/request_response.proto\x1a0temporal/server/api/common/v1/api_category.proto\x1a.temporal/server/api/routing/v1/extension.proto2\x93\x01\n" + "\vTestService\x12\x83\x01\n" + - "\x04Test\x125.temporal.server.chasm.lib.tests.proto.v1.TestRequest\x1a6.temporal.server.chasm.lib.tests.proto.v1.TestResponse\"\f\x8a\xb5\x18\x02\b\x01\xd2\xc3\x18\x02\b\x01B;Z9go.temporal.io/server/chasm/lib/tests/gen/testspb;testspbb\x06proto3" + "\x04Test\x125.temporal.server.chasm.lib.tests.proto.v1.TestRequest\x1a6.temporal.server.chasm.lib.tests.proto.v1.TestResponse\"\f\x92\xc4\x03\x02\b\x01\x8a\xb5\x18\x02\b\x01B;Z9go.temporal.io/server/chasm/lib/tests/gen/testspb;testspbb\x06proto3" var file_temporal_server_chasm_lib_tests_proto_v1_service_proto_goTypes = []any{ (*TestRequest)(nil), // 0: temporal.server.chasm.lib.tests.proto.v1.TestRequest diff --git a/common/api/metadata.go b/common/api/metadata.go index 24c0eacd961..bb584d38000 100644 --- a/common/api/metadata.go +++ b/common/api/metadata.go @@ -163,7 +163,6 @@ var ( "GetDeploymentReachability": {Scope: ScopeNamespace, Access: AccessReadOnly, Polling: PollingNone}, // [cleanup-wv-pre-release] "GetCurrentDeployment": {Scope: ScopeNamespace, Access: AccessReadOnly, Polling: PollingNone}, // [cleanup-wv-pre-release] "SetCurrentDeployment": {Scope: ScopeNamespace, Access: AccessWrite, Polling: PollingNone}, // [cleanup-wv-pre-release] - "SetCurrentDeploymentVersion": {Scope: ScopeNamespace, Access: AccessWrite, Polling: PollingNone}, // [cleanup-wv-pre-release] "DescribeWorkerDeploymentVersion": {Scope: ScopeNamespace, Access: AccessReadOnly, Polling: PollingNone}, "DescribeWorkerDeployment": {Scope: ScopeNamespace, Access: AccessReadOnly, Polling: PollingNone}, "SetWorkerDeploymentCurrentVersion": {Scope: ScopeNamespace, Access: AccessWrite, Polling: PollingNone}, diff --git a/proto/internal/temporal/server/api/routing/v1/extension.proto b/proto/internal/temporal/server/api/routing/v1/extension.proto index 8a3bcd9c460..0feb2b630ef 100644 --- a/proto/internal/temporal/server/api/routing/v1/extension.proto +++ b/proto/internal/temporal/server/api/routing/v1/extension.proto @@ -2,18 +2,20 @@ syntax = "proto3"; package temporal.server.api.routing.v1; -option go_package = "go.temporal.io/server/api/routing/v1;routing"; - import "google/protobuf/descriptor.proto"; -extend google.protobuf.MethodOptions { optional RoutingOptions routing = 7234; } +option go_package = "go.temporal.io/server/api/routing/v1;routing"; + +extend google.protobuf.MethodOptions { + optional RoutingOptions routing = 7234; +} message RoutingOptions { - // Requests will be routed to a random shard. - bool random = 1; - // Requests may specify how to obtain the namespace ID. Defaults to the "namespace_id" field. - string namespace_id = 2; - // Requests will be routed by resolving the namespace ID and business ID to a given shard. - // If multiple fields are specified, the first non-empty value is used. - repeated string business_id = 3; + // Requests will be routed to a random shard. + bool random = 1; + // Requests may specify how to obtain the namespace ID. Defaults to the "namespace_id" field. + string namespace_id = 2; + // Requests will be routed by resolving the namespace ID and business ID to a given shard. + // If multiple fields are specified, the first non-empty value is used. + repeated string business_id = 3; } diff --git a/tests/standalone_activity_test.go b/tests/standalone_activity_test.go index 1181800ae4f..7ff58238c72 100644 --- a/tests/standalone_activity_test.go +++ b/tests/standalone_activity_test.go @@ -2276,8 +2276,12 @@ func (s *standaloneActivityTestSuite) TestTerminate() { require.Nil(t, info.GetLastFailure()) expectedFailure := &failurepb.Failure{ - Message: "Test Termination", - FailureInfo: &failurepb.Failure_TerminatedFailureInfo{}, + Message: "Test Termination", + FailureInfo: &failurepb.Failure_TerminatedFailureInfo{ + TerminatedFailureInfo: &failurepb.TerminatedFailureInfo{ + Identity: "terminator", + }, + }, } protorequire.ProtoEqual(t, expectedFailure, activityResp.GetOutcome().GetFailure()) }) @@ -3237,6 +3241,7 @@ func (s *standaloneActivityTestSuite) TestDescribeActivityExecution_NoWait() { protorequire.IgnoreFields( "execution_duration", "schedule_time", + "state_size_bytes", "state_transition_count", ), ) @@ -3298,6 +3303,7 @@ func (s *standaloneActivityTestSuite) TestDescribeActivityExecution_WaitAnyState protorequire.IgnoreFields( "execution_duration", "schedule_time", + "state_size_bytes", "state_transition_count", ), ) @@ -3357,6 +3363,7 @@ func (s *standaloneActivityTestSuite) TestDescribeActivityExecution_WaitAnyState "execution_duration", "last_started_time", "schedule_time", + "state_size_bytes", "state_transition_count", ), ) From a32d5ff5b2fd790424800f49d4c2d3733b6660f8 Mon Sep 17 00:00:00 2001 From: Sean Kane Date: Thu, 23 Apr 2026 12:23:47 -0400 Subject: [PATCH 05/25] pause: return serviceerror.FailedPrecondition when pausing a Paused activity (#10001) ## What changed? When calling `PauseActivityExecution` on an already paused activity a `serviceerror.FailedPrecondition` is returned to the user. ## Why? If a user attempts to pause an already paused activity with an identity and reason and then inspects that activity it will show a different identity and reason which is a confusing behavior, returning an error is a better way to show that the request did not alter the activity. ## How did you test it? - [ ] built - [ ] run locally and tested manually - [ ] covered by existing tests - [ ] added new unit test(s) - [X] added new functional test(s) ## Potential risks No, this is into a feature branch. After merging the feature branch into main the behavior will change for workflow activities which treat Pause on an already paused activity as a no-op. --- api/persistence/v1/executions.pb.go | 16 +- chasm/lib/activity/activity.go | 8 +- .../gen/activitypb/v1/activity_state.pb.go | 14 +- chasm/lib/activity/handler.go | 7 +- .../activity/proto/v1/activity_state.proto | 1 + .../api/persistence/v1/executions.proto | 2 + service/history/api/pauseactivity/api.go | 1 + service/history/workflow/activity.go | 6 +- tests/activity_api_pause_test.go | 222 +++++++++++++++++- tests/standalone_activity_test.go | 29 ++- 10 files changed, 280 insertions(+), 26 deletions(-) diff --git a/api/persistence/v1/executions.pb.go b/api/persistence/v1/executions.pb.go index 87acaa3432a..3fc4e216147 100644 --- a/api/persistence/v1/executions.pb.go +++ b/api/persistence/v1/executions.pb.go @@ -4502,6 +4502,7 @@ type ActivityInfo_PauseInfo struct { // *ActivityInfo_PauseInfo_Manual_ // *ActivityInfo_PauseInfo_RuleId PausedBy isActivityInfo_PauseInfo_PausedBy `protobuf_oneof:"paused_by"` + RequestId string `protobuf:"bytes,4,opt,name=request_id,json=requestId,proto3" json:"request_id,omitempty"` unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache } @@ -4568,6 +4569,13 @@ func (x *ActivityInfo_PauseInfo) GetRuleId() string { return "" } +func (x *ActivityInfo_PauseInfo) GetRequestId() string { + if x != nil { + return x.RequestId + } + return "" +} + type isActivityInfo_PauseInfo_PausedBy interface { isActivityInfo_PauseInfo_PausedBy() } @@ -5184,7 +5192,7 @@ const file_temporal_server_api_persistence_v1_executions_proto_rawDesc = "" + "\x17NexusInvocationTaskInfo\x12\x18\n" + "\aattempt\x18\x01 \x01(\x05R\aattempt\"4\n" + "\x18NexusCancelationTaskInfo\x12\x18\n" + - "\aattempt\x18\x01 \x01(\x05R\aattempt\"\xa9\x1c\n" + + "\aattempt\x18\x01 \x01(\x05R\aattempt\"\xc8\x1c\n" + "\fActivityInfo\x12\x18\n" + "\aversion\x18\x01 \x01(\x03R\aversion\x127\n" + "\x18scheduled_event_batch_id\x18\x02 \x01(\x03R\x15scheduledEventBatchId\x12A\n" + @@ -5242,12 +5250,14 @@ const file_temporal_server_api_persistence_v1_executions_proto_rawDesc = "" + "\rstarted_clock\x184 \x01(\v2).temporal.server.api.clock.v1.VectorClockR\fstartedClock\x1ay\n" + "\x16UseWorkflowBuildIdInfo\x12+\n" + "\x12last_used_build_id\x18\x01 \x01(\tR\x0flastUsedBuildId\x122\n" + - "\x15last_redirect_counter\x18\x02 \x01(\x03R\x13lastRedirectCounter\x1a\x89\x02\n" + + "\x15last_redirect_counter\x18\x02 \x01(\x03R\x13lastRedirectCounter\x1a\xa8\x02\n" + "\tPauseInfo\x129\n" + "\n" + "pause_time\x18\x01 \x01(\v2\x1a.google.protobuf.TimestampR\tpauseTime\x12[\n" + "\x06manual\x18\x02 \x01(\v2A.temporal.server.api.persistence.v1.ActivityInfo.PauseInfo.ManualH\x00R\x06manual\x12\x19\n" + - "\arule_id\x18\x03 \x01(\tH\x00R\x06ruleId\x1a<\n" + + "\arule_id\x18\x03 \x01(\tH\x00R\x06ruleId\x12\x1d\n" + + "\n" + + "request_id\x18\x04 \x01(\tR\trequestId\x1a<\n" + "\x06Manual\x12\x1a\n" + "\bidentity\x18\x01 \x01(\tR\bidentity\x12\x16\n" + "\x06reason\x18\x02 \x01(\tR\x06reasonB\v\n" + diff --git a/chasm/lib/activity/activity.go b/chasm/lib/activity/activity.go index 4887c2f89b4..853f4d4181e 100644 --- a/chasm/lib/activity/activity.go +++ b/chasm/lib/activity/activity.go @@ -787,7 +787,12 @@ func (a *Activity) handlePauseRequested(ctx chasm.MutableContext, req *activityp return nil, serviceerror.NewFailedPrecondition("cannot pause an activity with a pending cancellation") } if a.PauseState != nil { - return &activitypb.PauseActivityExecutionResponse{}, nil + newReqID := req.GetFrontendRequest().GetRequestId() + existingReqID := a.PauseState.GetRequestId() + if newReqID != "" && existingReqID == newReqID { + return &activitypb.PauseActivityExecutionResponse{}, nil + } + return &activitypb.PauseActivityExecutionResponse{}, serviceerror.NewFailedPrecondition("activity is already paused") } metricsHandler, err := a.enrichMetricsHandler(ctx, metrics.ActivityPausedScope) @@ -892,6 +897,7 @@ func (a *Activity) pause( PauseTime: timestamppb.New(ctx.Now(a)), Identity: event.req.GetIdentity(), Reason: event.req.GetReason(), + RequestId: event.req.GetRequestId(), } a.emitOnPausedMetrics(event.metricsHandler) } diff --git a/chasm/lib/activity/gen/activitypb/v1/activity_state.pb.go b/chasm/lib/activity/gen/activitypb/v1/activity_state.pb.go index d18150d46fc..10fd8dc2925 100644 --- a/chasm/lib/activity/gen/activitypb/v1/activity_state.pb.go +++ b/chasm/lib/activity/gen/activitypb/v1/activity_state.pb.go @@ -463,6 +463,7 @@ type ActivityPauseState struct { PauseTime *timestamppb.Timestamp `protobuf:"bytes,1,opt,name=pause_time,json=pauseTime,proto3" json:"pause_time,omitempty"` Identity string `protobuf:"bytes,2,opt,name=identity,proto3" json:"identity,omitempty"` Reason string `protobuf:"bytes,3,opt,name=reason,proto3" json:"reason,omitempty"` + RequestId string `protobuf:"bytes,4,opt,name=request_id,json=requestId,proto3" json:"request_id,omitempty"` unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache } @@ -518,6 +519,13 @@ func (x *ActivityPauseState) GetReason() string { return "" } +func (x *ActivityPauseState) GetRequestId() string { + if x != nil { + return x.RequestId + } + return "" +} + type ActivityAttemptState struct { state protoimpl.MessageState `protogen:"open.v1"` // The attempt this activity is currently on. @@ -1025,12 +1033,14 @@ const file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_rawD "\x06reason\x18\x04 \x01(\tR\x06reason\"7\n" + "\x16ActivityTerminateState\x12\x1d\n" + "\n" + - "request_id\x18\x01 \x01(\tR\trequestId\"\x83\x01\n" + + "request_id\x18\x01 \x01(\tR\trequestId\"\xa2\x01\n" + "\x12ActivityPauseState\x129\n" + "\n" + "pause_time\x18\x01 \x01(\v2\x1a.google.protobuf.TimestampR\tpauseTime\x12\x1a\n" + "\bidentity\x18\x02 \x01(\tR\bidentity\x12\x16\n" + - "\x06reason\x18\x03 \x01(\tR\x06reason\"\xe8\x05\n" + + "\x06reason\x18\x03 \x01(\tR\x06reason\x12\x1d\n" + + "\n" + + "request_id\x18\x04 \x01(\tR\trequestId\"\xe8\x05\n" + "\x14ActivityAttemptState\x12\x14\n" + "\x05count\x18\x01 \x01(\x05R\x05count\x12O\n" + "\x16current_retry_interval\x18\x02 \x01(\v2\x19.google.protobuf.DurationR\x14currentRetryInterval\x12=\n" + diff --git a/chasm/lib/activity/handler.go b/chasm/lib/activity/handler.go index 19185aa33c6..06aff612f6a 100644 --- a/chasm/lib/activity/handler.go +++ b/chasm/lib/activity/handler.go @@ -347,9 +347,10 @@ func (h *handler) PauseActivityExecution(ctx context.Context, req *activitypb.Pa WorkflowId: frontendReq.GetWorkflowId(), RunId: frontendReq.GetRunId(), }, - Activity: &workflowservice.PauseActivityRequest_Id{Id: frontendReq.GetActivityId()}, - Reason: frontendReq.GetReason(), - Identity: frontendReq.GetIdentity(), + Activity: &workflowservice.PauseActivityRequest_Id{Id: frontendReq.GetActivityId()}, + Reason: frontendReq.GetReason(), + Identity: frontendReq.GetIdentity(), + RequestId: frontendReq.GetRequestId(), }, }) if err != nil { diff --git a/chasm/lib/activity/proto/v1/activity_state.proto b/chasm/lib/activity/proto/v1/activity_state.proto index 6d3d227205a..3494a6ac670 100644 --- a/chasm/lib/activity/proto/v1/activity_state.proto +++ b/chasm/lib/activity/proto/v1/activity_state.proto @@ -130,6 +130,7 @@ message ActivityPauseState { google.protobuf.Timestamp pause_time = 1; string identity = 2; string reason = 3; + string request_id = 4; } message ActivityAttemptState { diff --git a/proto/internal/temporal/server/api/persistence/v1/executions.proto b/proto/internal/temporal/server/api/persistence/v1/executions.proto index 906eb9ac783..bba05765623 100644 --- a/proto/internal/temporal/server/api/persistence/v1/executions.proto +++ b/proto/internal/temporal/server/api/persistence/v1/executions.proto @@ -686,6 +686,8 @@ message ActivityInfo { // Id of the rule that paused the activity. string rule_id = 3; } + + string request_id = 4; } PauseInfo pause_info = 46; diff --git a/service/history/api/pauseactivity/api.go b/service/history/api/pauseactivity/api.go index 3221ed0787d..81ab79637a3 100644 --- a/service/history/api/pauseactivity/api.go +++ b/service/history/api/pauseactivity/api.go @@ -59,6 +59,7 @@ func Invoke( Reason: frontendRequest.GetReason(), }, }, + RequestId: request.GetFrontendRequest().GetRequestId(), } for _, activityId := range activityIDs { diff --git a/service/history/workflow/activity.go b/service/history/workflow/activity.go index 8054f4c02ba..e6720dae56c 100644 --- a/service/history/workflow/activity.go +++ b/service/history/workflow/activity.go @@ -267,8 +267,10 @@ func PauseActivity( } if ai.Paused { - // do nothing - return nil + if ai.GetPauseInfo().GetRequestId() == pauseInfo.GetRequestId() { + return nil + } + return serviceerror.NewFailedPrecondition("activity is already paused") } return mutableState.UpdateActivity(ai.ScheduledEventId, func(activityInfo *persistencespb.ActivityInfo, _ historyi.MutableState) error { diff --git a/tests/activity_api_pause_test.go b/tests/activity_api_pause_test.go index be58a962db1..f3bee7f22b3 100644 --- a/tests/activity_api_pause_test.go +++ b/tests/activity_api_pause_test.go @@ -11,6 +11,7 @@ import ( "github.com/stretchr/testify/require" commonpb "go.temporal.io/api/common/v1" enumspb "go.temporal.io/api/enums/v1" + "go.temporal.io/api/serviceerror" "go.temporal.io/api/workflowservice/v1" sdkclient "go.temporal.io/sdk/client" "go.temporal.io/sdk/temporal" @@ -24,7 +25,7 @@ import ( // PauseActivityExecution/UnpauseActivityExecution API. type activityPauseAPI struct { name string - pause func(ctx context.Context, s *testcore.TestEnv, wfID, actID, identity, reason string) error + pause func(ctx context.Context, s *testcore.TestEnv, wfID, actID, identity, reason, requestID string) error unpause func(ctx context.Context, s *testcore.TestEnv, wfID, actID, identity string, resetAttempts bool) error } @@ -32,13 +33,14 @@ func pauseAPIs() []activityPauseAPI { return []activityPauseAPI{ { name: "legacy-api", - pause: func(ctx context.Context, s *testcore.TestEnv, wfID, actID, identity, reason string) error { + pause: func(ctx context.Context, s *testcore.TestEnv, wfID, actID, identity, reason, requestID string) error { _, err := s.FrontendClient().PauseActivity(ctx, &workflowservice.PauseActivityRequest{ Namespace: s.Namespace().String(), Execution: &commonpb.WorkflowExecution{WorkflowId: wfID}, Activity: &workflowservice.PauseActivityRequest_Id{Id: actID}, Identity: identity, Reason: reason, + RequestId: requestID, }) return err }, @@ -55,13 +57,14 @@ func pauseAPIs() []activityPauseAPI { }, { name: "execution-api", - pause: func(ctx context.Context, s *testcore.TestEnv, wfID, actID, identity, reason string) error { + pause: func(ctx context.Context, s *testcore.TestEnv, wfID, actID, identity, reason, requestID string) error { _, err := s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ Namespace: s.Namespace().String(), WorkflowId: wfID, ActivityId: actID, Identity: identity, Reason: reason, + RequestId: requestID, }) return err }, @@ -88,7 +91,7 @@ func TestActivityApiPauseClientTestSuite(t *testing.T) { t.Parallel() t.Run("TestActivityPauseApi_WhileRunning", func(t *testing.T) { - s := testcore.NewEnv(t, testcore.WithSdkWorker()) + s := testcore.NewEnv(t) initialRetryInterval := 1 * time.Second scheduleToCloseTimeout := 30 * time.Minute @@ -151,7 +154,8 @@ func TestActivityApiPauseClientTestSuite(t *testing.T) { // pause activity testIdentity := "test-identity" testReason := "test-reason" - s.NoError(api.pause(ctx, s, workflowRun.GetID(), "activity-id", testIdentity, testReason)) + requestID := "test-request-id" + s.NoError(api.pause(ctx, s, workflowRun.GetID(), "activity-id", testIdentity, testReason, requestID)) // make sure activity is paused on server while running on worker s.EventuallyWithT(func(t *assert.CollectT) { @@ -213,7 +217,7 @@ func TestActivityApiPauseClientTestSuite(t *testing.T) { * 4. Validate activity failed * 5. Validate number of activity attempts increased */ - s := testcore.NewEnv(t, testcore.WithSdkWorker()) + s := testcore.NewEnv(t) initialRetryInterval := 1 * time.Second scheduleToCloseTimeout := 30 * time.Minute @@ -280,7 +284,8 @@ func TestActivityApiPauseClientTestSuite(t *testing.T) { // pause activity testIdentity := "test-identity" testReason := "test-reason" - s.NoError(api.pause(ctx, s, workflowRun.GetID(), "activity-id", testIdentity, testReason)) + testRequestID := "test-request-id" + s.NoError(api.pause(ctx, s, workflowRun.GetID(), "activity-id", testIdentity, testReason, testRequestID)) // make sure activity is paused on server while running on worker s.EventuallyWithT(func(t *assert.CollectT) { @@ -331,7 +336,7 @@ func TestActivityApiPauseClientTestSuite(t *testing.T) { // In this case, pause happens when activity is in retry state. // Make sure that activity is paused and then unpaused. // Also check that activity will not be retried while unpaused. - s := testcore.NewEnv(t, testcore.WithSdkWorker()) + s := testcore.NewEnv(t) initialRetryInterval := 1 * time.Second scheduleToCloseTimeout := 30 * time.Minute @@ -392,7 +397,8 @@ func TestActivityApiPauseClientTestSuite(t *testing.T) { // pause activity testIdentity := "test-identity" testReason := "test-reason" - s.NoError(api.pause(ctx, s, workflowRun.GetID(), "activity-id", testIdentity, testReason)) + testRequestID := "test-request-id" + s.NoError(api.pause(ctx, s, workflowRun.GetID(), "activity-id", testIdentity, testReason, testRequestID)) // wait long enough for activity to retry if pause is not working require.NoError(t, util.InterruptibleSleep(ctx, 2*time.Second)) @@ -426,7 +432,7 @@ func TestActivityApiPauseClientTestSuite(t *testing.T) { // In this case, pause can happen when activity is in retry state. // Make sure that activity is paused and then unpaused. // Also tests noWait flag. - s := testcore.NewEnv(t, testcore.WithSdkWorker()) + s := testcore.NewEnv(t) initialRetryInterval := 30 * time.Second scheduleToCloseTimeout := 30 * time.Minute @@ -485,7 +491,8 @@ func TestActivityApiPauseClientTestSuite(t *testing.T) { }, 5*time.Second, 100*time.Millisecond) // pause activity - s.NoError(api.pause(ctx, s, workflowRun.GetID(), "activity-id", "", "")) + testRequestID := "test-request-id" + s.NoError(api.pause(ctx, s, workflowRun.GetID(), "activity-id", "", "", testRequestID)) // unpause the activity s.NoError(api.unpause(ctx, s, workflowRun.GetID(), "activity-id", "", false)) @@ -503,7 +510,7 @@ func TestActivityApiPauseClientTestSuite(t *testing.T) { t.Run("TestActivityPauseApi_WithReset", func(t *testing.T) { // pause/unpause the activity with reset option and noWait flag - s := testcore.NewEnv(t, testcore.WithSdkWorker()) + s := testcore.NewEnv(t) initialRetryInterval := 1 * time.Second scheduleToCloseTimeout := 30 * time.Minute @@ -566,7 +573,8 @@ func TestActivityApiPauseClientTestSuite(t *testing.T) { }, 5*time.Second, 100*time.Millisecond) // pause activity - s.NoError(api.pause(ctx, s, workflowRun.GetID(), "activity-id", "", "")) + testRequestID := "test-request-id" + s.NoError(api.pause(ctx, s, workflowRun.GetID(), "activity-id", "", "", testRequestID)) // wait for activity to be in paused state and waiting for retry s.EventuallyWithT(func(t *assert.CollectT) { @@ -602,6 +610,194 @@ func TestActivityApiPauseClientTestSuite(t *testing.T) { s.NoError(err) }) + + t.Run("TestActivityPauseApi_WhilePaused", func(t *testing.T) { + s := testcore.NewEnv(t) + + initialRetryInterval := 1 * time.Second + scheduleToCloseTimeout := 30 * time.Minute + startToCloseTimeout := 15 * time.Minute + activityRetryPolicy := &temporal.RetryPolicy{ + InitialInterval: initialRetryInterval, + BackoffCoefficient: 1, + } + makeWorkflowFunc := func(activityFunction ActivityFunctions) WorkflowFunction { + return func(ctx workflow.Context) error { + var ret string + err := workflow.ExecuteActivity(workflow.WithActivityOptions(ctx, workflow.ActivityOptions{ + ActivityID: "activity-id", + DisableEagerExecution: true, + StartToCloseTimeout: startToCloseTimeout, + ScheduleToCloseTimeout: scheduleToCloseTimeout, + RetryPolicy: activityRetryPolicy, + }), activityFunction).Get(ctx, &ret) + return err + } + } + + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + activityPausedCn := make(chan struct{}) + var startedActivityCount atomic.Int32 + activityErr := errors.New("bad-luck-please-retry") + + activityFunction := func() (string, error) { + startedActivityCount.Add(1) + if startedActivityCount.Load() == 1 { + s.WaitForChannel(ctx, activityPausedCn) + return "", activityErr + } + return "done!", nil + } + + workflowFn := makeWorkflowFunc(activityFunction) + + s.SdkWorker().RegisterWorkflow(workflowFn) + s.SdkWorker().RegisterActivity(activityFunction) + + workflowOptions := sdkclient.StartWorkflowOptions{ + ID: testcore.RandomizeStr("wf_id-" + s.T().Name()), + TaskQueue: s.WorkerTaskQueue(), + } + + workflowRun, err := s.SdkClient().ExecuteWorkflow(ctx, workflowOptions, workflowFn) + s.NoError(err) + + // wait for activity to start + s.EventuallyWithT(func(t *assert.CollectT) { + description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + require.NoError(t, err) + require.Len(t, description.PendingActivities, 1) + require.Equal(t, int32(1), startedActivityCount.Load()) + }, 5*time.Second, 500*time.Millisecond) + + // pause activity + testIdentity := "test-identity" + testReason := "test-reason" + testRequestID := "test-request-id" + s.NoError(api.pause(ctx, s, workflowRun.GetID(), "activity-id", testIdentity, testReason, testRequestID)) + + // make sure activity is paused on server while running on worker + s.EventuallyWithT(func(t *assert.CollectT) { + description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + require.NoError(t, err) + require.Len(t, description.PendingActivities, 1) + require.Equal(t, enumspb.PENDING_ACTIVITY_STATE_PAUSE_REQUESTED, description.PendingActivities[0].State) + require.Equal(t, int32(1), startedActivityCount.Load()) + }, 5*time.Second, 500*time.Millisecond) + + // A second pause with a different request ID must return FailedPrecondition. + // The first pause was issued without a request ID (stored as ""), so any + // non-empty request ID here is guaranteed to differ. + err = api.pause(ctx, s, workflowRun.GetID(), "activity-id", testIdentity, testReason, testRequestID+"-2") + var failedPreconditionErr *serviceerror.FailedPrecondition + s.ErrorAs(err, &failedPreconditionErr) + + // unblock the activity + activityPausedCn <- struct{}{} + // make sure activity is paused on server and completed on the worker + s.EventuallyWithT(func(t *assert.CollectT) { + description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + require.NoError(t, err) + require.Len(t, description.PendingActivities, 1) + require.Equal(t, enumspb.PENDING_ACTIVITY_STATE_PAUSED, description.PendingActivities[0].State) + require.Equal(t, int32(1), startedActivityCount.Load()) + }, 5*time.Second, 500*time.Millisecond) + + description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + s.NoError(err) + s.Len(description.PendingActivities, 1) + s.True(description.PendingActivities[0].Paused) + + // wait long enough for activity to retry if pause is not working + // Note: because activity is retried we expect the attempts to be incremented + err = util.InterruptibleSleep(ctx, 2*time.Second) + s.NoError(err) + + // make sure activity is not completed, and was not retried + description, err = s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + s.NoError(err) + s.Len(description.PendingActivities, 1) + s.True(description.PendingActivities[0].Paused) + s.Equal(int32(2), description.PendingActivities[0].Attempt) + s.NotNil(description.PendingActivities[0].LastFailure) + s.Equal(activityErr.Error(), description.PendingActivities[0].LastFailure.Message) + s.NotNil(description.PendingActivities[0].PauseInfo) + s.NotNil(description.PendingActivities[0].PauseInfo.GetManual()) + s.Equal(testIdentity, description.PendingActivities[0].PauseInfo.GetManual().Identity) + s.Equal(testReason, description.PendingActivities[0].PauseInfo.GetManual().Reason) + + // unpause the activity + s.NoError(api.unpause(ctx, s, workflowRun.GetID(), "activity-id", "", false)) + + var out string + err = workflowRun.Get(ctx, &out) + + s.NoError(err) + }) + + t.Run("TestActivityPauseApi_SameRequestID_IsIdempotent", func(t *testing.T) { + // Pausing an already-paused activity with the same request ID must succeed (no-op). + s := testcore.NewEnv(t) + + scheduleToCloseTimeout := 30 * time.Minute + startToCloseTimeout := 15 * time.Minute + activityRetryPolicy := &temporal.RetryPolicy{ + InitialInterval: 30 * time.Second, + BackoffCoefficient: 1, + } + makeWorkflowFunc := func(activityFunction ActivityFunctions) WorkflowFunction { + return func(ctx workflow.Context) error { + var ret string + err := workflow.ExecuteActivity(workflow.WithActivityOptions(ctx, workflow.ActivityOptions{ + ActivityID: "activity-id", + DisableEagerExecution: true, + StartToCloseTimeout: startToCloseTimeout, + ScheduleToCloseTimeout: scheduleToCloseTimeout, + RetryPolicy: activityRetryPolicy, + }), activityFunction).Get(ctx, &ret) + return err + } + } + + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + activityFunction := ActivityFunctions(func() (string, error) { + return "", errors.New("fail-to-trigger-retry") + }) + workflowFn := makeWorkflowFunc(activityFunction) + s.SdkWorker().RegisterWorkflow(workflowFn) + s.SdkWorker().RegisterActivity(activityFunction) + + workflowRun, err := s.SdkClient().ExecuteWorkflow(ctx, sdkclient.StartWorkflowOptions{ + ID: testcore.RandomizeStr("wf_id-" + s.T().Name()), + TaskQueue: s.WorkerTaskQueue(), + }, workflowFn) + s.NoError(err) + + // Wait for the first attempt to fail and the activity to enter retry backoff (attempt 2). + s.EventuallyWithT(func(t *assert.CollectT) { + description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + require.NoError(t, err) + require.Len(t, description.PendingActivities, 1) + require.Equal(t, int32(2), description.PendingActivities[0].Attempt) + }, 5*time.Second, 100*time.Millisecond) + + // First pause with an explicit request ID. + s.NoError(api.pause(ctx, s, workflowRun.GetID(), "activity-id", "identity", "reason", "my-pause-request-id")) + + s.EventuallyWithT(func(t *assert.CollectT) { + description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + require.NoError(t, err) + require.Len(t, description.PendingActivities, 1) + require.Equal(t, enumspb.PENDING_ACTIVITY_STATE_PAUSED, description.PendingActivities[0].State) + }, 5*time.Second, 100*time.Millisecond) + + // Second pause with the same request ID — must succeed (idempotent no-op). + s.NoError(api.pause(ctx, s, workflowRun.GetID(), "activity-id", "identity", "reason", "my-pause-request-id")) + }) }) } } diff --git a/tests/standalone_activity_test.go b/tests/standalone_activity_test.go index 7ff58238c72..97dc179affd 100644 --- a/tests/standalone_activity_test.go +++ b/tests/standalone_activity_test.go @@ -7045,7 +7045,7 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { } }) - t.Run("PauseIdempotent", func(t *testing.T) { + t.Run("PauseWhilePaused", func(t *testing.T) { ctx := testcore.NewContext() activityID := testcore.RandomizeStr(t.Name()) taskQueue := testcore.RandomizeStr(t.Name()) @@ -7063,7 +7063,32 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { _, err := s.FrontendClient().PauseActivityExecution(ctx, pauseReq) require.NoError(t, err) - // Second pause should succeed with no error (idempotent). + // Second pause should fail with FailedPrecondition (activity is already paused). + _, err = s.FrontendClient().PauseActivityExecution(ctx, pauseReq) + var failedPreconditionErr *serviceerror.FailedPrecondition + require.ErrorAs(t, err, &failedPreconditionErr) + }) + + t.Run("PauseWhilePausedIdempotent", func(t *testing.T) { + ctx := testcore.NewContext() + activityID := testcore.RandomizeStr(t.Name()) + taskQueue := testcore.RandomizeStr(t.Name()) + + startResp := s.startAndValidateActivity(ctx, t, activityID, taskQueue) + runID := startResp.RunId + + pauseReq := &workflowservice.PauseActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: runID, + Identity: "test-identity", + Reason: "test-pause", + RequestId: "some-request-id", + } + _, err := s.FrontendClient().PauseActivityExecution(ctx, pauseReq) + require.NoError(t, err) + + // Second pause with the same request ID should succeed (idempotent no-op). _, err = s.FrontendClient().PauseActivityExecution(ctx, pauseReq) require.NoError(t, err) }) From c8ec21a5fbd62c88bf311c49714b587a87b777ad Mon Sep 17 00:00:00 2001 From: Sean Kane Date: Tue, 28 Apr 2026 15:54:02 -0400 Subject: [PATCH 06/25] Implement ResetActivityExecution for Standalone Activities (#9852) ## What changed? Implemented ResetActivityExecution for standalone activities (SAA) in chasm/lib/activity. - `activity_state.proto`: Added `activity_reset` (bool) and `reset_heartbeats` (bool) fields to `ActivityState` to carry deferred-reset state across retries. - `statemachine.go`: `TransitionRescheduled` now checks the `ActivityReset` flag before incrementing the attempt count. When set, it zeroes the count first and optionally clears heartbeat details if ResetHeartbeats is also set. - `activity.go`: Added handleReset with two execution paths: - `SCHEDULED`: immediately resets `Count=1`, increments Stamp (invalidating any in-flight dispatch tasks), and enqueues a new ActivityDispatchTask. - `STARTED` / `CANCEL_REQUESTED`: sets `ActivityReset` = true (and optionally ResetHeartbeats = true) so the reset is applied on the next retry via TransitionRescheduled, without touching the running attempt's task token. - Terminal states: returns NotFound. - `handler.go`: Replaced serviceerror.NewUnimplemented(...) in ResetActivityExecution with a chasm.UpdateComponent call using (*Activity).handleReset. ## Why? `ResetActivityExecution` is already implemented for workflow-embedded activities. Standalone activities had stub handlers that returned Unimplemented, this brings SAA to feature parity with workflow activities for the reset operations. ## How did you test it? - [ ] built - [ ] run locally and tested manually - [X] covered by existing tests - [ ] added new unit test(s) - [X] added new functional test(s) ## Potential risks Minimal, this is a translation of an existing api (ResetActivity) --- chasm/lib/activity/activity.go | 136 ++- chasm/lib/activity/activity_tasks.go | 1 - chasm/lib/activity/frontend.go | 69 +- chasm/lib/activity/frontend_test.go | 4 +- .../gen/activitypb/v1/activity_state.pb.go | 31 +- chasm/lib/activity/handler.go | 17 +- .../activity/proto/v1/activity_state.proto | 7 + chasm/lib/activity/statemachine.go | 42 + chasm/lib/activity/statemachine_test.go | 259 +++++ chasm/lib/activity/validator.go | 83 +- chasm/lib/activity/validator_test.go | 112 ++- common/metrics/metric_defs.go | 14 +- service/history/api/pauseactivity/api.go | 2 +- service/history/api/resetactivity/api.go | 2 +- service/history/api/unpauseactivity/api.go | 2 +- .../history/api/updateactivityoptions/api.go | 2 +- tests/activity_api_pause_test.go | 131 +++ tests/activity_api_reset_test.go | 141 +++ tests/standalone_activity_test.go | 901 +++++++++++++++++- 19 files changed, 1849 insertions(+), 107 deletions(-) diff --git a/chasm/lib/activity/activity.go b/chasm/lib/activity/activity.go index 853f4d4181e..95284646001 100644 --- a/chasm/lib/activity/activity.go +++ b/chasm/lib/activity/activity.go @@ -659,6 +659,12 @@ func (a *Activity) UpdateActivityExecutionOptions( } } + metricsHandler, err := a.enrichMetricsHandler(ctx, metrics.ActivityUpdateOptionsScope) + if err != nil { + return nil, err + } + a.emitOnUpdateOptionsMetrics(metricsHandler) + return &activitypb.UpdateActivityExecutionOptionsResponse{ FrontendResponse: &workflowservice.UpdateActivityExecutionOptionsResponse{ ActivityOptions: &apiactivitypb.ActivityOptions{ @@ -889,6 +895,7 @@ func (a *Activity) unpause( &activitypb.ActivityDispatchTask{Stamp: attempt.GetStamp()}) a.emitOnUnpausedMetrics(event.metricsHandler) } + func (a *Activity) pause( ctx chasm.MutableContext, event pauseEvent, @@ -902,6 +909,119 @@ func (a *Activity) pause( a.emitOnPausedMetrics(event.metricsHandler) } +func (a *Activity) clearHeartbeat(ctx chasm.MutableContext) { + if hb, ok := a.LastHeartbeat.TryGet(ctx); ok { + hb.Details = nil + hb.RecordedTime = nil + } +} + +func (a *Activity) reset(ctx chasm.MutableContext, event resetEvent) { + attempt := a.LastAttempt.Get(ctx) + attempt.Count = 1 + attempt.Stamp++ + attempt.CurrentRetryInterval = nil + if event.req.GetResetHeartbeat() { + a.clearHeartbeat(ctx) + } + if timeout := a.GetScheduleToStartTimeout().AsDuration(); timeout > 0 { + ctx.AddTask( + a, + chasm.TaskAttributes{ScheduledTime: event.scheduleTime.Add(timeout)}, + &activitypb.ScheduleToStartTimeoutTask{Stamp: attempt.GetStamp()}, + ) + } + ctx.AddTask( + a, + chasm.TaskAttributes{ScheduledTime: event.scheduleTime}, + &activitypb.ActivityDispatchTask{Stamp: attempt.GetStamp()}, + ) + a.emitOnResetMetrics(event.handler) +} + +// handleReset handles the activity execution reset. +// For SCHEDULED/PAUSED activities: immediately re-dispatches at attempt 1. +// For STARTED/CANCEL_REQUESTED activities: defers the reset to the next retry via the ActivityReset flag. +func (a *Activity) handleReset(ctx chasm.MutableContext, req *activitypb.ResetActivityExecutionRequest) (*activitypb.ResetActivityExecutionResponse, error) { + frontendReq := req.GetFrontendRequest() + keepPaused := frontendReq.GetKeepPaused() + + metricsHandler, err := a.enrichMetricsHandler(ctx, metrics.ActivityResetScope) + if err != nil { + return nil, err + } + + if frontendReq.GetRestoreOriginalOptions() { + ogOptions := a.GetOriginalOptions() + a.TaskQueue = common.CloneProto(ogOptions.GetTaskQueue()) + a.ScheduleToCloseTimeout = common.CloneProto(ogOptions.GetScheduleToCloseTimeout()) + a.ScheduleToStartTimeout = common.CloneProto(ogOptions.GetScheduleToStartTimeout()) + a.StartToCloseTimeout = common.CloneProto(ogOptions.GetStartToCloseTimeout()) + a.HeartbeatTimeout = common.CloneProto(ogOptions.GetHeartbeatTimeout()) + a.RetryPolicy = common.CloneProto(ogOptions.GetRetryPolicy()) + a.Priority = common.CloneProto(ogOptions.GetPriority()) + } + + scheduleTime := ctx.Now(a) + if jitter := frontendReq.GetJitter().AsDuration(); jitter > 0 { + scheduleTime = scheduleTime.Add(time.Duration(rand.Int63n(int64(jitter)))) //nolint:gosec + } + + switch a.Status { + case activitypb.ACTIVITY_EXECUTION_STATUS_STARTED, + activitypb.ACTIVITY_EXECUTION_STATUS_CANCEL_REQUESTED: + // Activity is running. Defer reset to the next retry so we don't break + // the running worker's task token (which encodes the current attempt count). + a.ActivityReset = true + if frontendReq.GetResetHeartbeat() { + a.ResetHeartbeats = true + } + if !keepPaused { + // Clear PauseState now so TransitionRescheduled can dispatch without being + // blocked by the validator (which drops dispatch tasks when PauseState != nil). + a.PauseState = nil + } + a.emitOnResetMetrics(metricsHandler) + return &activitypb.ResetActivityExecutionResponse{}, nil + + case activitypb.ACTIVITY_EXECUTION_STATUS_PAUSED, + activitypb.ACTIVITY_EXECUTION_STATUS_SCHEDULED: + // A SCHEDULED activity can carry PauseState when a pause was applied concurrently + // (e.g. deferred from a STARTED→retry path). In that case the dispatch task emitted + // by TransitionReset would be dropped by the validator, leaving the activity stuck. + // Treat any non-nil PauseState the same way as the explicit PAUSED status. + if a.PauseState != nil { + if keepPaused { + // Reset counts but keep the activity paused. + // No dispatch task — the user must unpause to re-dispatch. + attempt := a.LastAttempt.Get(ctx) + attempt.Count = 1 + attempt.Stamp++ + attempt.CurrentRetryInterval = nil + if frontendReq.GetResetHeartbeat() { + a.clearHeartbeat(ctx) + } + a.emitOnResetMetrics(metricsHandler) + return &activitypb.ResetActivityExecutionResponse{}, nil + } + // keepPaused=false: clear pause state so the dispatch task isn't dropped. + a.PauseState = nil + } + if err := TransitionReset.Apply(a, ctx, resetEvent{ + req: frontendReq, + scheduleTime: scheduleTime, + handler: metricsHandler, + }); err != nil { + return nil, err + } + return &activitypb.ResetActivityExecutionResponse{}, nil + + default: + // Terminal or unspecified state. + return nil, serviceerror.NewFailedPrecondition("activity execution is not running") + } +} + // recordScheduleToStartOrCloseTimeoutFailure records schedule-to-start or schedule-to-close timeouts. Such timeouts are not retried so we // set the outcome failure directly and leave the attempt failure as is. func (a *Activity) recordScheduleToStartOrCloseTimeoutFailure(ctx chasm.MutableContext, timeoutType enumspb.TimeoutType) error { @@ -1065,7 +1185,7 @@ func (a *Activity) RecordHeartbeat( return &historyservice.RecordActivityTaskHeartbeatResponse{ CancelRequested: a.Status == activitypb.ACTIVITY_EXECUTION_STATUS_CANCEL_REQUESTED, ActivityPaused: a.PauseState != nil, - // TODO(saa-preview): ActivityReset + // ActivityReset is intentionally not reported via heartbeat; reset takes effect on the next retry. }, nil } @@ -1484,17 +1604,27 @@ func (a *Activity) emitOnTimedOutMetrics( func (a *Activity) emitOnPausedMetrics( handler metrics.Handler, ) { - metrics.ActivityPauseRequests.With(handler).Record(1) metrics.ActivityPause.With(handler).Record(1) } +func (a *Activity) emitOnUpdateOptionsMetrics( + handler metrics.Handler, +) { + metrics.ActivityUpdateOptions.With(handler).Record(1) +} + func (a *Activity) emitOnUnpausedMetrics( handler metrics.Handler, ) { - metrics.ActivityUnpauseRequests.With(handler).Record(1) metrics.ActivityUnpause.With(handler).Record(1) } +func (a *Activity) emitOnResetMetrics( + handler metrics.Handler, +) { + metrics.ActivityReset.With(handler).Record(1) +} + // SearchAttributes implements chasm.VisibilitySearchAttributesProvider interface. // Returns the current search attribute values for this activity execution. func (a *Activity) SearchAttributes(_ chasm.Context) []chasm.SearchAttributeKeyValue { diff --git a/chasm/lib/activity/activity_tasks.go b/chasm/lib/activity/activity_tasks.go index b93d2fcba5e..fae7e340997 100644 --- a/chasm/lib/activity/activity_tasks.go +++ b/chasm/lib/activity/activity_tasks.go @@ -35,7 +35,6 @@ func (h *activityDispatchTaskHandler) Validate( _ chasm.TaskAttributes, task *activitypb.ActivityDispatchTask, ) (bool, error) { - // TODO(saa-preview): make sure we handle resets when we support them, as they will reset the attempt count // Do not dispatch while the activity has a pause flag set (SCHEDULED + PauseState from a retry // while a STARTED activity was flag-paused). TransitionStarted.Possible already returns false for // real PAUSED status activities (source must be SCHEDULED, and PAUSED → SCHEDULED via unpause). diff --git a/chasm/lib/activity/frontend.go b/chasm/lib/activity/frontend.go index da0210f0df4..b4114ee8e47 100644 --- a/chasm/lib/activity/frontend.go +++ b/chasm/lib/activity/frontend.go @@ -122,7 +122,7 @@ func (h *frontendHandler) DescribeActivityExecution( return nil, ErrStandaloneActivityDisabled } - err := validateAndNormalizeDescribeActivityExecutionRequest( + err := validateDescribeActivityExecutionRequest( req, h.config.MaxIDLengthLimit(), ) @@ -151,7 +151,7 @@ func (h *frontendHandler) PollActivityExecution( return nil, ErrStandaloneActivityDisabled } - err := validateAndNormalizePollActivityExecutionRequest( + err := validatePollActivityExecutionRequest( req, h.config.MaxIDLengthLimit(), ) @@ -266,7 +266,7 @@ func (h *frontendHandler) DeleteActivityExecution( return nil, ErrStandaloneActivityDisabled } - if err := validateAndNormalizeDeleteActivityExecutionRequest(req, h.config.MaxIDLengthLimit()); err != nil { + if err := validateDeleteActivityExecutionRequest(req, h.config.MaxIDLengthLimit()); err != nil { return nil, err } @@ -301,7 +301,7 @@ func (h *frontendHandler) TerminateActivityExecution( return nil, err } - if err := validateAndNormalizeTerminateActivityExecutionRequest( + if err := validateTerminateActivityExecutionRequest( req, h.config.MaxIDLengthLimit(), h.config.BlobSizeLimitError, @@ -334,7 +334,7 @@ func (h *frontendHandler) RequestCancelActivityExecution( return nil, err } - if err := validateAndNormalizeRequestCancelActivityExecutionRequest( + if err := validateRequestCancelActivityExecutionRequest( req, h.config.MaxIDLengthLimit(), h.config.BlobSizeLimitError, @@ -396,7 +396,16 @@ func (h *frontendHandler) validateAndPopulateStartRequest( } applyActivityOptionsToStartRequest(opts, req) - if err = h.validateAndNormalizeStartActivityExecutionRequest(req); err != nil { + err = validateAndNormalizeStartRequest( + req, + h.config.MaxIDLengthLimit(), + h.config.BlobSizeLimitError, + h.config.BlobSizeLimitWarn, + h.logger, + h.saMapperProvider, + h.saValidator, + ) + if err != nil { return nil, err } @@ -409,43 +418,6 @@ func (h *frontendHandler) validateAndPopulateStartRequest( return req, nil } -func (h *frontendHandler) validateAndNormalizeStartActivityExecutionRequest( - req *workflowservice.StartActivityExecutionRequest, -) error { - maxIDLengthLimit := h.config.MaxIDLengthLimit() - - if len(req.GetRequestId()) > maxIDLengthLimit { - return serviceerror.NewInvalidArgumentf("request ID exceeds length limit. Length=%d Limit=%d", - len(req.GetRequestId()), maxIDLengthLimit) - } - if len(req.GetIdentity()) > maxIDLengthLimit { - return serviceerror.NewInvalidArgumentf("identity exceeds length limit. Length=%d Limit=%d", - len(req.GetIdentity()), maxIDLengthLimit) - } - if err := normalizeAndValidateIDPolicy(req); err != nil { - return err - } - if err := validateBlobSize( - req.GetActivityId(), - "StartActivityExecution", - h.config.BlobSizeLimitError, - h.config.BlobSizeLimitWarn, - req.Input.Size(), - h.logger, - req.GetNamespace()); err != nil { - return serviceerror.NewInvalidArgument("input exceeds length limit") - } - if req.GetSearchAttributes() != nil { - if err := validateAndNormalizeSearchAttributes( - req, - h.saMapperProvider, - h.saValidator); err != nil { - return err - } - } - return nil -} - // activityOptionsFromStartRequest builds an ActivityOptions from the inlined fields // of a StartActivityExecutionRequest for use with shared validation logic. func activityOptionsFromStartRequest(req *workflowservice.StartActivityExecutionRequest) *apiactivitypb.ActivityOptions { @@ -467,7 +439,7 @@ func (h *frontendHandler) PauseActivityExecution( return nil, ErrStandaloneActivityDisabled } - if err := validateAndNormalizePauseActivityExecutionRequest( + if err := validatePauseActivityExecutionRequest( req, h.config.MaxIDLengthLimit(), h.config.BlobSizeLimitError, @@ -499,7 +471,7 @@ func (h *frontendHandler) UnpauseActivityExecution( return nil, ErrStandaloneActivityDisabled } - if err := validateAndNormalizeUnpauseActivityExecutionRequest(req, h.config.MaxIDLengthLimit()); err != nil { + if err := validateUnpauseActivityExecutionRequest(req, h.config.MaxIDLengthLimit()); err != nil { return nil, err } @@ -526,7 +498,10 @@ func (h *frontendHandler) ResetActivityExecution( return nil, ErrStandaloneActivityDisabled } - // TODO: validate request fields (e.g. namespace, identity length) + if err := validateResetActivityExecutionRequest(req, h.config.MaxIDLengthLimit()); err != nil { + return nil, err + } + namespaceID, err := h.namespaceRegistry.GetNamespaceID(namespace.Name(req.GetNamespace())) if err != nil { return nil, err @@ -552,7 +527,7 @@ func (h *frontendHandler) UpdateActivityExecutionOptions( return nil, ErrStandaloneActivityDisabled } - if err := validateAndNormalizeUpdateActivityExecutionOptionsRequest( + if err := validateUpdateActivityExecutionOptionsRequest( req, h.config.MaxIDLengthLimit(), ); err != nil { diff --git a/chasm/lib/activity/frontend_test.go b/chasm/lib/activity/frontend_test.go index 648ce3b52e8..0dce1270634 100644 --- a/chasm/lib/activity/frontend_test.go +++ b/chasm/lib/activity/frontend_test.go @@ -84,7 +84,7 @@ func TestRequestIdStableAcrossRetries(t *testing.T) { ActivityId: "test-activity", } validateTwice(t, req, func() error { - return validateAndNormalizeTerminateActivityExecutionRequest( + return validateTerminateActivityExecutionRequest( req, defaultMaxIDLengthLimit, defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, log.NewNoopLogger()) }) }) @@ -95,7 +95,7 @@ func TestRequestIdStableAcrossRetries(t *testing.T) { ActivityId: "test-activity", } validateTwice(t, req, func() error { - return validateAndNormalizeRequestCancelActivityExecutionRequest( + return validateRequestCancelActivityExecutionRequest( req, defaultMaxIDLengthLimit, defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, log.NewNoopLogger()) }) }) diff --git a/chasm/lib/activity/gen/activitypb/v1/activity_state.pb.go b/chasm/lib/activity/gen/activitypb/v1/activity_state.pb.go index 10fd8dc2925..76e67ac0763 100644 --- a/chasm/lib/activity/gen/activitypb/v1/activity_state.pb.go +++ b/chasm/lib/activity/gen/activitypb/v1/activity_state.pb.go @@ -206,9 +206,14 @@ type ActivityState struct { // is NOT incremented on retries, because schedule-to-close spans the full activity lifetime. Stamp int32 `protobuf:"varint,14,opt,name=stamp,proto3" json:"stamp,omitempty"` // Set if the activity was paused. - PauseState *ActivityPauseState `protobuf:"bytes,15,opt,name=pause_state,json=pauseState,proto3" json:"pause_state,omitempty"` - unknownFields protoimpl.UnknownFields - sizeCache protoimpl.SizeCache + PauseState *ActivityPauseState `protobuf:"bytes,15,opt,name=pause_state,json=pauseState,proto3" json:"pause_state,omitempty"` + // Set when reset was requested while the activity was running. + // On the next retry, TransitionRescheduled will reset the attempt count to 1 before incrementing. + ActivityReset bool `protobuf:"varint,16,opt,name=activity_reset,json=activityReset,proto3" json:"activity_reset,omitempty"` + // Set alongside activity_reset when heartbeat details should be cleared on the next retry. + ResetHeartbeats bool `protobuf:"varint,17,opt,name=reset_heartbeats,json=resetHeartbeats,proto3" json:"reset_heartbeats,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache } func (x *ActivityState) Reset() { @@ -346,6 +351,20 @@ func (x *ActivityState) GetPauseState() *ActivityPauseState { return nil } +func (x *ActivityState) GetActivityReset() bool { + if x != nil { + return x.ActivityReset + } + return false +} + +func (x *ActivityState) GetResetHeartbeats() bool { + if x != nil { + return x.ResetHeartbeats + } + return false +} + type ActivityCancelState struct { state protoimpl.MessageState `protogen:"open.v1"` RequestId string `protobuf:"bytes,1,opt,name=request_id,json=requestId,proto3" json:"request_id,omitempty"` @@ -1005,7 +1024,7 @@ var File_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto protor const file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_rawDesc = "" + "\n" + - "@temporal/server/chasm/lib/activity/proto/v1/activity_state.proto\x12+temporal.server.chasm.lib.activity.proto.v1\x1a\x1egoogle/protobuf/duration.proto\x1a\x1fgoogle/protobuf/timestamp.proto\x1a&temporal/api/activity/v1/message.proto\x1a$temporal/api/common/v1/message.proto\x1a(temporal/api/deployment/v1/message.proto\x1a%temporal/api/failure/v1/message.proto\x1a'temporal/api/sdk/v1/user_metadata.proto\x1a'temporal/api/taskqueue/v1/message.proto\"\xa9\t\n" + + "@temporal/server/chasm/lib/activity/proto/v1/activity_state.proto\x12+temporal.server.chasm.lib.activity.proto.v1\x1a\x1egoogle/protobuf/duration.proto\x1a\x1fgoogle/protobuf/timestamp.proto\x1a&temporal/api/activity/v1/message.proto\x1a$temporal/api/common/v1/message.proto\x1a(temporal/api/deployment/v1/message.proto\x1a%temporal/api/failure/v1/message.proto\x1a'temporal/api/sdk/v1/user_metadata.proto\x1a'temporal/api/taskqueue/v1/message.proto\"\xfb\t\n" + "\rActivityState\x12I\n" + "\ractivity_type\x18\x01 \x01(\v2$.temporal.api.common.v1.ActivityTypeR\factivityType\x12C\n" + "\n" + @@ -1024,7 +1043,9 @@ const file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_rawD "\x10original_options\x18\r \x01(\v2).temporal.api.activity.v1.ActivityOptionsR\x0foriginalOptions\x12\x14\n" + "\x05stamp\x18\x0e \x01(\x05R\x05stamp\x12`\n" + "\vpause_state\x18\x0f \x01(\v2?.temporal.server.chasm.lib.activity.proto.v1.ActivityPauseStateR\n" + - "pauseState\"\xa7\x01\n" + + "pauseState\x12%\n" + + "\x0eactivity_reset\x18\x10 \x01(\bR\ractivityReset\x12)\n" + + "\x10reset_heartbeats\x18\x11 \x01(\bR\x0fresetHeartbeats\"\xa7\x01\n" + "\x13ActivityCancelState\x12\x1d\n" + "\n" + "request_id\x18\x01 \x01(\tR\trequestId\x12=\n" + diff --git a/chasm/lib/activity/handler.go b/chasm/lib/activity/handler.go index 06aff612f6a..8debd61e043 100644 --- a/chasm/lib/activity/handler.go +++ b/chasm/lib/activity/handler.go @@ -433,7 +433,22 @@ func (h *handler) ResetActivityExecution(ctx context.Context, req *activitypb.Re } return &activitypb.ResetActivityExecutionResponse{}, nil } - return nil, serviceerror.NewUnimplemented("ResetActivityExecution for standalone activities is not yet implemented") + ref := chasm.NewComponentRef[*Activity](chasm.ExecutionKey{ + NamespaceID: req.GetNamespaceId(), + BusinessID: frontendReq.GetActivityId(), + RunID: frontendReq.GetRunId(), + }) + + _, _, err := chasm.UpdateComponent( + ctx, + ref, + (*Activity).handleReset, + req, + ) + if err != nil { + return nil, err + } + return &activitypb.ResetActivityExecutionResponse{}, nil } func (h *handler) UpdateActivityExecutionOptions(ctx context.Context, req *activitypb.UpdateActivityExecutionOptionsRequest) (*activitypb.UpdateActivityExecutionOptionsResponse, error) { diff --git a/chasm/lib/activity/proto/v1/activity_state.proto b/chasm/lib/activity/proto/v1/activity_state.proto index 3494a6ac670..1d913782b19 100644 --- a/chasm/lib/activity/proto/v1/activity_state.proto +++ b/chasm/lib/activity/proto/v1/activity_state.proto @@ -113,6 +113,13 @@ message ActivityState { // Set if the activity was paused. ActivityPauseState pause_state = 15; + + // Set when reset was requested while the activity was running. + // On the next retry, TransitionRescheduled will reset the attempt count to 1 before incrementing. + bool activity_reset = 16; + + // Set alongside activity_reset when heartbeat details should be cleared on the next retry. + bool reset_heartbeats = 17; } message ActivityCancelState { diff --git a/chasm/lib/activity/statemachine.go b/chasm/lib/activity/statemachine.go index 516b4c6449b..058de6453b3 100644 --- a/chasm/lib/activity/statemachine.go +++ b/chasm/lib/activity/statemachine.go @@ -102,6 +102,17 @@ var TransitionRescheduled = chasm.NewTransition( func(a *Activity, ctx chasm.MutableContext, event rescheduleEvent) error { attempt := a.LastAttempt.Get(ctx) currentTime := ctx.Now(a) + + // Apply deferred reset: set Count to 0 so the increment below produces 1. + if a.ActivityReset { + attempt.Count = 0 + a.ActivityReset = false + if a.ResetHeartbeats { + a.ResetHeartbeats = false + a.clearHeartbeat(ctx) + } + } + attempt.Count++ attempt.Stamp++ @@ -193,6 +204,8 @@ var TransitionCompleted = chasm.NewTransition( func(a *Activity, ctx chasm.MutableContext, event completeEvent) error { return a.StoreOrSelf(ctx).RecordCompleted(ctx, func(ctx chasm.MutableContext) error { a.PauseState = nil + a.ActivityReset = false + a.ResetHeartbeats = false req := event.req.GetCompleteRequest() @@ -229,6 +242,8 @@ var TransitionFailed = chasm.NewTransition( return a.StoreOrSelf(ctx).RecordCompleted(ctx, func(ctx chasm.MutableContext) error { req := event.req.GetFailedRequest() a.PauseState = nil + a.ActivityReset = false + a.ResetHeartbeats = false if details := req.GetLastHeartbeatDetails(); details != nil { heartbeat := a.getOrCreateLastHeartbeat(ctx) @@ -270,6 +285,8 @@ var TransitionTerminated = chasm.NewTransition( RequestId: event.request.RequestID, } a.PauseState = nil + a.ActivityReset = false + a.ResetHeartbeats = false outcome := a.Outcome.Get(ctx) failure := &failurepb.Failure{ Message: event.request.Reason, @@ -345,6 +362,8 @@ var TransitionCanceled = chasm.NewTransition( }, } a.PauseState = nil + a.ActivityReset = false + a.ResetHeartbeats = false a.emitOnCanceledMetrics(ctx, event.handler, event.fromStatus) @@ -391,6 +410,8 @@ var TransitionTimedOut = chasm.NewTransition( } a.PauseState = nil + a.ActivityReset = false + a.ResetHeartbeats = false a.emitOnTimedOutMetrics(ctx, event.metricsHandler, timeoutType, event.fromStatus) @@ -439,3 +460,24 @@ var TransitionUnpaused = chasm.NewTransition( return nil }, ) + +type resetEvent struct { + req *workflowservice.ResetActivityExecutionRequest + scheduleTime time.Time + handler metrics.Handler +} + +// TransitionReset resets a SCHEDULED or PAUSED activity back to attempt 1. The stamp is bumped to +// invalidate any pending dispatch task, then a new dispatch task is added at the given schedule time. +// For STARTED/CANCEL_REQUESTED activities the reset is deferred — see Activity.ActivityReset flag. +var TransitionReset = chasm.NewTransition( + []activitypb.ActivityExecutionStatus{ + activitypb.ACTIVITY_EXECUTION_STATUS_SCHEDULED, + activitypb.ACTIVITY_EXECUTION_STATUS_PAUSED, + }, + activitypb.ACTIVITY_EXECUTION_STATUS_SCHEDULED, + func(a *Activity, ctx chasm.MutableContext, event resetEvent) error { + a.reset(ctx, event) + return nil + }, +) diff --git a/chasm/lib/activity/statemachine_test.go b/chasm/lib/activity/statemachine_test.go index 8e412b09bd4..a36a16735c3 100644 --- a/chasm/lib/activity/statemachine_test.go +++ b/chasm/lib/activity/statemachine_test.go @@ -715,3 +715,262 @@ func TestTransitionCanceled(t *testing.T) { } protorequire.ProtoEqual(t, expectedFailure, outcome.GetFailed().GetFailure()) } + +// TestTerminalTransitionsClearResetFlags verifies that ActivityReset and ResetHeartbeats are +// cleared by every terminal transition so deferred-reset state does not linger on a terminal activity. +func TestTerminalTransitionsClearResetFlags(t *testing.T) { + makeActivity := func(ctx *chasm.MockMutableContext, status activitypb.ActivityExecutionStatus) *Activity { + return &Activity{ + ActivityState: &activitypb.ActivityState{ + ActivityType: &commonpb.ActivityType{Name: "test-activity-type"}, + RetryPolicy: defaultRetryPolicy, + ScheduleToCloseTimeout: durationpb.New(defaultScheduleToCloseTimeout), + ScheduleToStartTimeout: durationpb.New(defaultScheduleToStartTimeout), + StartToCloseTimeout: durationpb.New(defaultStartToCloseTimeout), + Status: status, + TaskQueue: &taskqueuepb.TaskQueue{Name: "test-task-queue"}, + ActivityReset: true, + ResetHeartbeats: true, + }, + LastAttempt: chasm.NewDataField(ctx, &activitypb.ActivityAttemptState{Count: 2}), + LastHeartbeat: chasm.NewDataField(ctx, &activitypb.ActivityHeartbeatState{}), + Outcome: chasm.NewDataField(ctx, &activitypb.ActivityOutcome{}), + } + } + + newCtx := func() *chasm.MockMutableContext { + ctx := &chasm.MockMutableContext{} + ctx.HandleNow = func(chasm.Component) time.Time { return defaultTime } + return ctx + } + + t.Run("TransitionCompleted", func(t *testing.T) { + ctx := newCtx() + act := makeActivity(ctx, activitypb.ACTIVITY_EXECUTION_STATUS_STARTED) + + ctrl := gomock.NewController(t) + mh := metrics.NewMockHandler(ctrl) + s2c := metrics.NewMockTimerIface(ctrl) + s2c.EXPECT().Record(gomock.Any()) + mh.EXPECT().Timer(metrics.ActivityStartToCloseLatency.Name()).Return(s2c) + sch2c := metrics.NewMockTimerIface(ctrl) + sch2c.EXPECT().Record(gomock.Any()) + mh.EXPECT().Timer(metrics.ActivityScheduleToCloseLatency.Name()).Return(sch2c) + ctr := metrics.NewMockCounterIface(ctrl) + ctr.EXPECT().Record(int64(1)) + mh.EXPECT().Counter(metrics.ActivitySuccess.Name()).Return(ctr) + + err := TransitionCompleted.Apply(act, ctx, completeEvent{ + req: &historyservice.RespondActivityTaskCompletedRequest{ + CompleteRequest: &workflowservice.RespondActivityTaskCompletedRequest{Identity: "worker"}, + }, + metricsHandler: mh, + }) + require.NoError(t, err) + require.False(t, act.ActivityReset, "ActivityReset should be cleared by TransitionCompleted") + require.False(t, act.ResetHeartbeats, "ResetHeartbeats should be cleared by TransitionCompleted") + }) + + t.Run("TransitionFailed", func(t *testing.T) { + ctx := newCtx() + act := makeActivity(ctx, activitypb.ACTIVITY_EXECUTION_STATUS_STARTED) + + ctrl := gomock.NewController(t) + mh := metrics.NewMockHandler(ctrl) + s2c := metrics.NewMockTimerIface(ctrl) + s2c.EXPECT().Record(gomock.Any()) + mh.EXPECT().Timer(metrics.ActivityStartToCloseLatency.Name()).Return(s2c) + sch2c := metrics.NewMockTimerIface(ctrl) + sch2c.EXPECT().Record(gomock.Any()) + mh.EXPECT().Timer(metrics.ActivityScheduleToCloseLatency.Name()).Return(sch2c) + cFail := metrics.NewMockCounterIface(ctrl) + cFail.EXPECT().Record(int64(1)) + mh.EXPECT().Counter(metrics.ActivityFail.Name()).Return(cFail) + cTaskFail := metrics.NewMockCounterIface(ctrl) + cTaskFail.EXPECT().Record(int64(1)) + mh.EXPECT().Counter(metrics.ActivityTaskFail.Name()).Return(cTaskFail) + + err := TransitionFailed.Apply(act, ctx, failedEvent{ + req: &historyservice.RespondActivityTaskFailedRequest{ + FailedRequest: &workflowservice.RespondActivityTaskFailedRequest{ + Failure: &failurepb.Failure{ + Message: "non-retryable", + FailureInfo: &failurepb.Failure_ApplicationFailureInfo{ + ApplicationFailureInfo: &failurepb.ApplicationFailureInfo{NonRetryable: true}, + }, + }, + }, + }, + metricsHandler: mh, + }) + require.NoError(t, err) + require.False(t, act.ActivityReset, "ActivityReset should be cleared by TransitionFailed") + require.False(t, act.ResetHeartbeats, "ResetHeartbeats should be cleared by TransitionFailed") + }) + + t.Run("TransitionTerminated", func(t *testing.T) { + ctx := newCtx() + act := makeActivity(ctx, activitypb.ACTIVITY_EXECUTION_STATUS_STARTED) + + ctrl := gomock.NewController(t) + mh := metrics.NewMockHandler(ctrl) + ctr := metrics.NewMockCounterIface(ctrl) + ctr.EXPECT().Record(int64(1)) + mh.EXPECT().Counter(metrics.ActivityTerminate.Name()).Return(ctr) + + err := TransitionTerminated.Apply(act, ctx, terminateEvent{ + request: chasm.TerminateComponentRequest{Reason: "test"}, + metricsHandler: mh, + fromStatus: activitypb.ACTIVITY_EXECUTION_STATUS_STARTED, + }) + require.NoError(t, err) + require.False(t, act.ActivityReset, "ActivityReset should be cleared by TransitionTerminated") + require.False(t, act.ResetHeartbeats, "ResetHeartbeats should be cleared by TransitionTerminated") + }) + + t.Run("TransitionCanceled", func(t *testing.T) { + ctx := newCtx() + act := makeActivity(ctx, activitypb.ACTIVITY_EXECUTION_STATUS_CANCEL_REQUESTED) + + ctrl := gomock.NewController(t) + mh := metrics.NewMockHandler(ctrl) + s2c := metrics.NewMockTimerIface(ctrl) + s2c.EXPECT().Record(gomock.Any()) + mh.EXPECT().Timer(metrics.ActivityStartToCloseLatency.Name()).Return(s2c) + sch2c := metrics.NewMockTimerIface(ctrl) + sch2c.EXPECT().Record(gomock.Any()) + mh.EXPECT().Timer(metrics.ActivityScheduleToCloseLatency.Name()).Return(sch2c) + ctr := metrics.NewMockCounterIface(ctrl) + ctr.EXPECT().Record(int64(1)) + mh.EXPECT().Counter(metrics.ActivityCancel.Name()).Return(ctr) + + err := TransitionCanceled.Apply(act, ctx, cancelEvent{ + handler: mh, + fromStatus: activitypb.ACTIVITY_EXECUTION_STATUS_CANCEL_REQUESTED, + }) + require.NoError(t, err) + require.False(t, act.ActivityReset, "ActivityReset should be cleared by TransitionCanceled") + require.False(t, act.ResetHeartbeats, "ResetHeartbeats should be cleared by TransitionCanceled") + }) + + t.Run("TransitionTimedOut", func(t *testing.T) { + ctx := newCtx() + act := makeActivity(ctx, activitypb.ACTIVITY_EXECUTION_STATUS_STARTED) + + ctrl := gomock.NewController(t) + mh := metrics.NewMockHandler(ctrl) + s2c := metrics.NewMockTimerIface(ctrl) + s2c.EXPECT().Record(gomock.Any()) + mh.EXPECT().Timer(metrics.ActivityStartToCloseLatency.Name()).Return(s2c) + sch2c := metrics.NewMockTimerIface(ctrl) + sch2c.EXPECT().Record(gomock.Any()) + mh.EXPECT().Timer(metrics.ActivityScheduleToCloseLatency.Name()).Return(sch2c) + timeoutTag := metrics.StringTag("timeout_type", enumspb.TIMEOUT_TYPE_START_TO_CLOSE.String()) + cTimeout := metrics.NewMockCounterIface(ctrl) + cTimeout.EXPECT().Record(int64(1), timeoutTag) + mh.EXPECT().Counter(metrics.ActivityTimeout.Name()).Return(cTimeout) + cTaskTimeout := metrics.NewMockCounterIface(ctrl) + cTaskTimeout.EXPECT().Record(int64(1), timeoutTag) + mh.EXPECT().Counter(metrics.ActivityTaskTimeout.Name()).Return(cTaskTimeout) + + err := TransitionTimedOut.Apply(act, ctx, timeoutEvent{ + timeoutType: enumspb.TIMEOUT_TYPE_START_TO_CLOSE, + metricsHandler: mh, + fromStatus: activitypb.ACTIVITY_EXECUTION_STATUS_STARTED, + }) + require.NoError(t, err) + require.False(t, act.ActivityReset, "ActivityReset should be cleared by TransitionTimedOut") + require.False(t, act.ResetHeartbeats, "ResetHeartbeats should be cleared by TransitionTimedOut") + }) +} + +// TestTransitionResetFromPaused verifies that TransitionReset applied to a PAUSED activity +// transitions it to SCHEDULED and adds a dispatch task so it can be picked up by a worker. +func TestTransitionResetFromPaused(t *testing.T) { + testCases := []struct { + name string + scheduleToStartTimeout time.Duration + expectedTaskCount int + }{ + { + name: "with schedule-to-start timeout", + scheduleToStartTimeout: defaultScheduleToStartTimeout, + expectedTaskCount: 2, // ScheduleToStartTimeoutTask + ActivityDispatchTask + }, + { + name: "without schedule-to-start timeout", + scheduleToStartTimeout: 0, + expectedTaskCount: 1, // ActivityDispatchTask only + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + ctx := &chasm.MockMutableContext{} + ctx.HandleNow = func(chasm.Component) time.Time { return defaultTime } + attemptState := &activitypb.ActivityAttemptState{ + Count: 3, + CurrentRetryInterval: durationpb.New(30 * time.Second), + } + + act := &Activity{ + ActivityState: &activitypb.ActivityState{ + ActivityType: &commonpb.ActivityType{Name: "test-activity-type"}, + RetryPolicy: defaultRetryPolicy, + ScheduleToCloseTimeout: durationpb.New(defaultScheduleToCloseTimeout), + ScheduleToStartTimeout: durationpb.New(tc.scheduleToStartTimeout), + StartToCloseTimeout: durationpb.New(defaultStartToCloseTimeout), + Status: activitypb.ACTIVITY_EXECUTION_STATUS_PAUSED, + TaskQueue: &taskqueuepb.TaskQueue{Name: "test-task-queue"}, + PauseState: &activitypb.ActivityPauseState{ + Identity: "test-identity", + Reason: "test reason", + }, + }, + LastAttempt: chasm.NewDataField(ctx, attemptState), + Outcome: chasm.NewDataField(ctx, &activitypb.ActivityOutcome{}), + } + + err := TransitionReset.Apply(act, ctx, resetEvent{scheduleTime: defaultTime, handler: metrics.NoopMetricsHandler}) + require.NoError(t, err) + require.Equal(t, activitypb.ACTIVITY_EXECUTION_STATUS_SCHEDULED, act.Status) + require.Equal(t, int32(1), attemptState.Count) + require.Nil(t, attemptState.GetCurrentRetryInterval()) + require.Len(t, ctx.Tasks, tc.expectedTaskCount) + + // Last task is always the dispatch task + _, ok := ctx.Tasks[tc.expectedTaskCount-1].Payload.(*activitypb.ActivityDispatchTask) + require.True(t, ok, "expected ActivityDispatchTask as last task") + }) + } +} + +// TestTransitionResetClearsCurrentRetryInterval verifies that TransitionReset clears the retry +// interval so a reset activity is not delayed by a previous backoff period. +func TestTransitionResetClearsCurrentRetryInterval(t *testing.T) { + ctx := &chasm.MockMutableContext{} + ctx.HandleNow = func(chasm.Component) time.Time { return defaultTime } + attemptState := &activitypb.ActivityAttemptState{ + Count: 2, + CurrentRetryInterval: durationpb.New(30 * time.Second), + } + + act := &Activity{ + ActivityState: &activitypb.ActivityState{ + ActivityType: &commonpb.ActivityType{Name: "test-activity-type"}, + RetryPolicy: defaultRetryPolicy, + ScheduleToCloseTimeout: durationpb.New(defaultScheduleToCloseTimeout), + ScheduleToStartTimeout: durationpb.New(defaultScheduleToStartTimeout), + StartToCloseTimeout: durationpb.New(defaultStartToCloseTimeout), + Status: activitypb.ACTIVITY_EXECUTION_STATUS_SCHEDULED, + TaskQueue: &taskqueuepb.TaskQueue{Name: "test-task-queue"}, + }, + LastAttempt: chasm.NewDataField(ctx, attemptState), + Outcome: chasm.NewDataField(ctx, &activitypb.ActivityOutcome{}), + } + + err := TransitionReset.Apply(act, ctx, resetEvent{scheduleTime: defaultTime, handler: metrics.NoopMetricsHandler}) + require.NoError(t, err) + require.Nil(t, attemptState.GetCurrentRetryInterval(), "TransitionReset must clear CurrentRetryInterval") + require.Equal(t, int32(1), attemptState.Count, "TransitionReset must reset Count to 1") +} diff --git a/chasm/lib/activity/validator.go b/chasm/lib/activity/validator.go index 0770c8efa96..b0a5f6fa1a5 100644 --- a/chasm/lib/activity/validator.go +++ b/chasm/lib/activity/validator.go @@ -278,7 +278,50 @@ func validateAndNormalizeSearchAttributes( return saValidator.ValidateSize(saToValidate, namespaceName) } -func validateAndNormalizeDescribeActivityExecutionRequest( +func validateAndNormalizeStartRequest( + req *workflowservice.StartActivityExecutionRequest, + maxIDLengthLimit int, + blobSizeLimitError dynamicconfig.IntPropertyFnWithNamespaceFilter, + blobSizeLimitWarn dynamicconfig.IntPropertyFnWithNamespaceFilter, + logger log.Logger, + saMapperProvider searchattribute.MapperProvider, + saValidator *searchattribute.Validator, +) error { + if len(req.GetRequestId()) > maxIDLengthLimit { + return serviceerror.NewInvalidArgumentf("request ID exceeds length limit. Length=%d Limit=%d", + len(req.GetRequestId()), maxIDLengthLimit) + } + + if len(req.GetIdentity()) > maxIDLengthLimit { + return serviceerror.NewInvalidArgumentf("identity exceeds length limit. Length=%d Limit=%d", + len(req.GetIdentity()), maxIDLengthLimit) + } + + if err := normalizeAndValidateIDPolicy(req); err != nil { + return err + } + + if err := validateBlobSize( + req.GetActivityId(), + "StartActivityExecution", + blobSizeLimitError, + blobSizeLimitWarn, + req.Input.Size(), + logger, + req.GetNamespace()); err != nil { + return serviceerror.NewInvalidArgument("input exceeds length limit") + } + + if req.GetSearchAttributes() != nil { + if err := validateAndNormalizeSearchAttributes(req, saMapperProvider, saValidator); err != nil { + return err + } + } + + return nil +} + +func validateDescribeActivityExecutionRequest( req *workflowservice.DescribeActivityExecutionRequest, maxIDLengthLimit int, ) error { @@ -304,7 +347,7 @@ func validateAndNormalizeDescribeActivityExecutionRequest( return nil } -func validateAndNormalizePollActivityExecutionRequest( +func validatePollActivityExecutionRequest( req *workflowservice.PollActivityExecutionRequest, maxIDLengthLimit int, ) error { @@ -324,7 +367,7 @@ func validateAndNormalizePollActivityExecutionRequest( return nil } -func validateAndNormalizeRequestCancelActivityExecutionRequest( +func validateRequestCancelActivityExecutionRequest( req *workflowservice.RequestCancelActivityExecutionRequest, maxIDLengthLimit int, blobSizeLimitError dynamicconfig.IntPropertyFnWithNamespaceFilter, @@ -377,7 +420,7 @@ func validateAndNormalizeRequestCancelActivityExecutionRequest( } //nolint:revive // cyclomatic: per-field validation of a field-mask update requires explicit handling of each field -func validateAndNormalizeUpdateActivityExecutionOptionsRequest( +func validateUpdateActivityExecutionOptionsRequest( req *workflowservice.UpdateActivityExecutionOptionsRequest, maxIDLengthLimit int, ) error { @@ -502,7 +545,7 @@ func validateAndNormalizeUpdateActivityExecutionOptionsRequest( return nil } -func validateAndNormalizeDeleteActivityExecutionRequest( +func validateDeleteActivityExecutionRequest( req *workflowservice.DeleteActivityExecutionRequest, maxIDLengthLimit int, ) error { @@ -525,7 +568,7 @@ func validateAndNormalizeDeleteActivityExecutionRequest( return nil } -func validateAndNormalizeTerminateActivityExecutionRequest( +func validateTerminateActivityExecutionRequest( req *workflowservice.TerminateActivityExecutionRequest, maxIDLengthLimit int, blobSizeLimitError dynamicconfig.IntPropertyFnWithNamespaceFilter, @@ -577,7 +620,7 @@ func validateAndNormalizeTerminateActivityExecutionRequest( return nil } -func validateAndNormalizePauseActivityExecutionRequest( +func validatePauseActivityExecutionRequest( req *workflowservice.PauseActivityExecutionRequest, maxIDLengthLimit int, blobSizeLimitError dynamicconfig.IntPropertyFnWithNamespaceFilter, @@ -614,7 +657,31 @@ func validateAndNormalizePauseActivityExecutionRequest( return nil } -func validateAndNormalizeUnpauseActivityExecutionRequest( +func validateResetActivityExecutionRequest( + req *workflowservice.ResetActivityExecutionRequest, + maxIDLengthLimit int, +) error { + if req.GetActivityId() == "" { + return serviceerror.NewInvalidArgument("activity ID is required") + } + if len(req.GetActivityId()) > maxIDLengthLimit { + return serviceerror.NewInvalidArgumentf("activity ID exceeds length limit. Length=%d Limit=%d", + len(req.GetActivityId()), maxIDLengthLimit) + } + if len(req.GetIdentity()) > maxIDLengthLimit { + return serviceerror.NewInvalidArgumentf("identity exceeds length limit. Length=%d Limit=%d", + len(req.GetIdentity()), maxIDLengthLimit) + } + if runID := req.GetRunId(); runID != "" { + _, err := uuid.Parse(runID) + if err != nil { + return serviceerror.NewInvalidArgument("invalid run id: must be a valid UUID") + } + } + return nil +} + +func validateUnpauseActivityExecutionRequest( req *workflowservice.UnpauseActivityExecutionRequest, maxIDLengthLimit int, ) error { diff --git a/chasm/lib/activity/validator_test.go b/chasm/lib/activity/validator_test.go index beb9787be12..8d9d030ea17 100644 --- a/chasm/lib/activity/validator_test.go +++ b/chasm/lib/activity/validator_test.go @@ -429,7 +429,7 @@ func TestValidateStandAloneRequestIDTooLong(t *testing.T) { } h := newTestFrontendHandler(defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, defaultMaxIDLengthLimit) - err := h.validateAndNormalizeStartActivityExecutionRequest(req) + err := validateAndNormalizeStartRequest(req, h.config.MaxIDLengthLimit(), h.config.BlobSizeLimitError, h.config.BlobSizeLimitWarn, h.logger, h.saMapperProvider, h.saValidator) var invalidArgErr *serviceerror.InvalidArgument require.ErrorAs(t, err, &invalidArgErr) } @@ -449,7 +449,7 @@ func TestValidateStandAloneInputTooLarge(t *testing.T) { } h := newTestFrontendHandler(defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, defaultMaxIDLengthLimit) - err := h.validateAndNormalizeStartActivityExecutionRequest(req) + err := validateAndNormalizeStartRequest(req, h.config.MaxIDLengthLimit(), h.config.BlobSizeLimitError, h.config.BlobSizeLimitWarn, h.logger, h.saMapperProvider, h.saValidator) var invalidArgErr *serviceerror.InvalidArgument require.ErrorAs(t, err, &invalidArgErr) } @@ -476,7 +476,7 @@ func TestValidateStandAloneInputWarningSizeShouldSucceed(t *testing.T) { func(ns string) int { return payloadSize }, defaultMaxIDLengthLimit, ) - err := h.validateAndNormalizeStartActivityExecutionRequest(req) + err := validateAndNormalizeStartRequest(req, h.config.MaxIDLengthLimit(), h.config.BlobSizeLimitError, h.config.BlobSizeLimitWarn, h.logger, h.saMapperProvider, h.saValidator) require.NoError(t, err) } @@ -494,7 +494,7 @@ func TestValidateStandAlone_IDPolicyShouldDefault(t *testing.T) { } h := newTestFrontendHandler(defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, defaultMaxIDLengthLimit) - err := h.validateAndNormalizeStartActivityExecutionRequest(req) + err := validateAndNormalizeStartRequest(req, h.config.MaxIDLengthLimit(), h.config.BlobSizeLimitError, h.config.BlobSizeLimitWarn, h.logger, h.saMapperProvider, h.saValidator) require.NoError(t, err) require.Equal(t, enumspb.ACTIVITY_ID_REUSE_POLICY_ALLOW_DUPLICATE, req.IdReusePolicy) @@ -646,7 +646,7 @@ func TestValidateDeleteActivityExecutionRequest(t *testing.T) { req := &workflowservice.DeleteActivityExecutionRequest{ ActivityId: defaultActivityID, } - err := validateAndNormalizeDeleteActivityExecutionRequest(req, defaultMaxIDLengthLimit) + err := validateDeleteActivityExecutionRequest(req, defaultMaxIDLengthLimit) require.NoError(t, err) }) @@ -655,7 +655,7 @@ func TestValidateDeleteActivityExecutionRequest(t *testing.T) { ActivityId: defaultActivityID, RunId: "f47ac10b-58cc-4372-a567-0e02b2c3d479", } - err := validateAndNormalizeDeleteActivityExecutionRequest(req, defaultMaxIDLengthLimit) + err := validateDeleteActivityExecutionRequest(req, defaultMaxIDLengthLimit) require.NoError(t, err) }) @@ -663,7 +663,7 @@ func TestValidateDeleteActivityExecutionRequest(t *testing.T) { req := &workflowservice.DeleteActivityExecutionRequest{ ActivityId: "", } - err := validateAndNormalizeDeleteActivityExecutionRequest(req, defaultMaxIDLengthLimit) + err := validateDeleteActivityExecutionRequest(req, defaultMaxIDLengthLimit) var invalidArgErr *serviceerror.InvalidArgument require.ErrorAs(t, err, &invalidArgErr) }) @@ -672,7 +672,7 @@ func TestValidateDeleteActivityExecutionRequest(t *testing.T) { req := &workflowservice.DeleteActivityExecutionRequest{ ActivityId: string(make([]byte, defaultMaxIDLengthLimit+1)), } - err := validateAndNormalizeDeleteActivityExecutionRequest(req, defaultMaxIDLengthLimit) + err := validateDeleteActivityExecutionRequest(req, defaultMaxIDLengthLimit) var invalidArgErr *serviceerror.InvalidArgument require.ErrorAs(t, err, &invalidArgErr) }) @@ -682,7 +682,7 @@ func TestValidateDeleteActivityExecutionRequest(t *testing.T) { ActivityId: defaultActivityID, RunId: "not-a-valid-uuid", } - err := validateAndNormalizeDeleteActivityExecutionRequest(req, defaultMaxIDLengthLimit) + err := validateDeleteActivityExecutionRequest(req, defaultMaxIDLengthLimit) var invalidArgErr *serviceerror.InvalidArgument require.ErrorAs(t, err, &invalidArgErr) }) @@ -695,7 +695,7 @@ func TestValidatePauseActivityExecutionRequest(t *testing.T) { Identity: "test-identity", Reason: "test-reason", } - err := validateAndNormalizePauseActivityExecutionRequest(req, defaultMaxIDLengthLimit, defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, log.NewNoopLogger()) + err := validatePauseActivityExecutionRequest(req, defaultMaxIDLengthLimit, defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, log.NewNoopLogger()) require.NoError(t, err) }) @@ -705,7 +705,7 @@ func TestValidatePauseActivityExecutionRequest(t *testing.T) { RunId: "f47ac10b-58cc-4372-a567-0e02b2c3d479", Identity: "test-identity", } - err := validateAndNormalizePauseActivityExecutionRequest(req, defaultMaxIDLengthLimit, defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, log.NewNoopLogger()) + err := validatePauseActivityExecutionRequest(req, defaultMaxIDLengthLimit, defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, log.NewNoopLogger()) require.NoError(t, err) }) @@ -714,7 +714,7 @@ func TestValidatePauseActivityExecutionRequest(t *testing.T) { ActivityId: "", Identity: "test-identity", } - err := validateAndNormalizePauseActivityExecutionRequest(req, defaultMaxIDLengthLimit, defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, log.NewNoopLogger()) + err := validatePauseActivityExecutionRequest(req, defaultMaxIDLengthLimit, defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, log.NewNoopLogger()) var invalidArgErr *serviceerror.InvalidArgument require.ErrorAs(t, err, &invalidArgErr) require.Equal(t, "activity ID is required", invalidArgErr.Message) @@ -725,7 +725,7 @@ func TestValidatePauseActivityExecutionRequest(t *testing.T) { ActivityId: string(make([]byte, defaultMaxIDLengthLimit+1)), Identity: "test-identity", } - err := validateAndNormalizePauseActivityExecutionRequest(req, defaultMaxIDLengthLimit, defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, log.NewNoopLogger()) + err := validatePauseActivityExecutionRequest(req, defaultMaxIDLengthLimit, defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, log.NewNoopLogger()) var invalidArgErr *serviceerror.InvalidArgument require.ErrorAs(t, err, &invalidArgErr) require.Equal(t, fmt.Sprintf("activity ID exceeds length limit. Length=%d Limit=%d", @@ -737,7 +737,7 @@ func TestValidatePauseActivityExecutionRequest(t *testing.T) { ActivityId: defaultActivityID, Identity: string(make([]byte, defaultMaxIDLengthLimit+1)), } - err := validateAndNormalizePauseActivityExecutionRequest(req, defaultMaxIDLengthLimit, defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, log.NewNoopLogger()) + err := validatePauseActivityExecutionRequest(req, defaultMaxIDLengthLimit, defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, log.NewNoopLogger()) var invalidArgErr *serviceerror.InvalidArgument require.ErrorAs(t, err, &invalidArgErr) require.Equal(t, fmt.Sprintf("identity exceeds length limit. Length=%d Limit=%d", @@ -750,7 +750,7 @@ func TestValidatePauseActivityExecutionRequest(t *testing.T) { Identity: "test-identity", Reason: string(make([]byte, defaultBlobSizeLimitError("default")+1)), } - err := validateAndNormalizePauseActivityExecutionRequest(req, defaultMaxIDLengthLimit, defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, log.NewNoopLogger()) + err := validatePauseActivityExecutionRequest(req, defaultMaxIDLengthLimit, defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, log.NewNoopLogger()) var invalidArgErr *serviceerror.InvalidArgument require.ErrorAs(t, err, &invalidArgErr) require.Equal(t, "reason exceeds length limit", invalidArgErr.Message) @@ -762,7 +762,7 @@ func TestValidatePauseActivityExecutionRequest(t *testing.T) { RunId: "not-a-valid-uuid", Identity: "test-identity", } - err := validateAndNormalizePauseActivityExecutionRequest(req, defaultMaxIDLengthLimit, defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, log.NewNoopLogger()) + err := validatePauseActivityExecutionRequest(req, defaultMaxIDLengthLimit, defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, log.NewNoopLogger()) var invalidArgErr *serviceerror.InvalidArgument require.ErrorAs(t, err, &invalidArgErr) require.Equal(t, "invalid run id: must be a valid UUID", invalidArgErr.Message) @@ -775,7 +775,7 @@ func TestValidateUnpauseActivityExecutionRequest(t *testing.T) { ActivityId: defaultActivityID, Identity: "test-identity", } - err := validateAndNormalizeUnpauseActivityExecutionRequest(req, defaultMaxIDLengthLimit) + err := validateUnpauseActivityExecutionRequest(req, defaultMaxIDLengthLimit) require.NoError(t, err) }) @@ -785,7 +785,7 @@ func TestValidateUnpauseActivityExecutionRequest(t *testing.T) { RunId: "f47ac10b-58cc-4372-a567-0e02b2c3d479", Identity: "test-identity", } - err := validateAndNormalizeUnpauseActivityExecutionRequest(req, defaultMaxIDLengthLimit) + err := validateUnpauseActivityExecutionRequest(req, defaultMaxIDLengthLimit) require.NoError(t, err) }) @@ -794,7 +794,7 @@ func TestValidateUnpauseActivityExecutionRequest(t *testing.T) { ActivityId: "", Identity: "test-identity", } - err := validateAndNormalizeUnpauseActivityExecutionRequest(req, defaultMaxIDLengthLimit) + err := validateUnpauseActivityExecutionRequest(req, defaultMaxIDLengthLimit) var invalidArgErr *serviceerror.InvalidArgument require.ErrorAs(t, err, &invalidArgErr) require.Equal(t, "activity ID is required", invalidArgErr.Message) @@ -805,7 +805,7 @@ func TestValidateUnpauseActivityExecutionRequest(t *testing.T) { ActivityId: string(make([]byte, defaultMaxIDLengthLimit+1)), Identity: "test-identity", } - err := validateAndNormalizeUnpauseActivityExecutionRequest(req, defaultMaxIDLengthLimit) + err := validateUnpauseActivityExecutionRequest(req, defaultMaxIDLengthLimit) var invalidArgErr *serviceerror.InvalidArgument require.ErrorAs(t, err, &invalidArgErr) require.Equal(t, fmt.Sprintf("activity ID exceeds length limit. Length=%d Limit=%d", @@ -817,7 +817,7 @@ func TestValidateUnpauseActivityExecutionRequest(t *testing.T) { ActivityId: defaultActivityID, Identity: string(make([]byte, defaultMaxIDLengthLimit+1)), } - err := validateAndNormalizeUnpauseActivityExecutionRequest(req, defaultMaxIDLengthLimit) + err := validateUnpauseActivityExecutionRequest(req, defaultMaxIDLengthLimit) var invalidArgErr *serviceerror.InvalidArgument require.ErrorAs(t, err, &invalidArgErr) require.Equal(t, fmt.Sprintf("identity exceeds length limit. Length=%d Limit=%d", @@ -830,7 +830,75 @@ func TestValidateUnpauseActivityExecutionRequest(t *testing.T) { RunId: "not-a-valid-uuid", Identity: "test-identity", } - err := validateAndNormalizeUnpauseActivityExecutionRequest(req, defaultMaxIDLengthLimit) + err := validateUnpauseActivityExecutionRequest(req, defaultMaxIDLengthLimit) + var invalidArgErr *serviceerror.InvalidArgument + require.ErrorAs(t, err, &invalidArgErr) + require.Equal(t, "invalid run id: must be a valid UUID", invalidArgErr.Message) + }) +} + +func TestValidateResetActivityExecutionRequest(t *testing.T) { + t.Run("Success", func(t *testing.T) { + req := &workflowservice.ResetActivityExecutionRequest{ + ActivityId: defaultActivityID, + Identity: "test-identity", + } + err := validateResetActivityExecutionRequest(req, defaultMaxIDLengthLimit) + require.NoError(t, err) + }) + + t.Run("SuccessWithRunID", func(t *testing.T) { + req := &workflowservice.ResetActivityExecutionRequest{ + ActivityId: defaultActivityID, + RunId: "f47ac10b-58cc-4372-a567-0e02b2c3d479", + Identity: "test-identity", + } + err := validateResetActivityExecutionRequest(req, defaultMaxIDLengthLimit) + require.NoError(t, err) + }) + + t.Run("EmptyActivityID", func(t *testing.T) { + req := &workflowservice.ResetActivityExecutionRequest{ + ActivityId: "", + Identity: "test-identity", + } + err := validateResetActivityExecutionRequest(req, defaultMaxIDLengthLimit) + var invalidArgErr *serviceerror.InvalidArgument + require.ErrorAs(t, err, &invalidArgErr) + require.Equal(t, "activity ID is required", invalidArgErr.Message) + }) + + t.Run("ActivityIDTooLong", func(t *testing.T) { + req := &workflowservice.ResetActivityExecutionRequest{ + ActivityId: string(make([]byte, defaultMaxIDLengthLimit+1)), + Identity: "test-identity", + } + err := validateResetActivityExecutionRequest(req, defaultMaxIDLengthLimit) + var invalidArgErr *serviceerror.InvalidArgument + require.ErrorAs(t, err, &invalidArgErr) + require.Equal(t, fmt.Sprintf("activity ID exceeds length limit. Length=%d Limit=%d", + defaultMaxIDLengthLimit+1, defaultMaxIDLengthLimit), invalidArgErr.Message) + }) + + t.Run("IdentityTooLong", func(t *testing.T) { + req := &workflowservice.ResetActivityExecutionRequest{ + ActivityId: defaultActivityID, + Identity: string(make([]byte, defaultMaxIDLengthLimit+1)), + } + err := validateResetActivityExecutionRequest(req, defaultMaxIDLengthLimit) + var invalidArgErr *serviceerror.InvalidArgument + require.ErrorAs(t, err, &invalidArgErr) + require.Equal(t, fmt.Sprintf("identity exceeds length limit. Length=%d Limit=%d", + defaultMaxIDLengthLimit+1, defaultMaxIDLengthLimit), invalidArgErr.Message) + }) + + t.Run("InvalidRunID", func(t *testing.T) { + req := &workflowservice.ResetActivityExecutionRequest{ + ActivityId: defaultActivityID, + RunId: "not-a-valid-uuid", + Identity: "test-identity", + } + err := validateResetActivityExecutionRequest(req, defaultMaxIDLengthLimit) var invalidArgErr *serviceerror.InvalidArgument require.ErrorAs(t, err, &invalidArgErr) require.Equal(t, "invalid run id: must be a valid UUID", invalidArgErr.Message) diff --git a/common/metrics/metric_defs.go b/common/metrics/metric_defs.go index 99d212fbe7d..f0290682282 100644 --- a/common/metrics/metric_defs.go +++ b/common/metrics/metric_defs.go @@ -352,6 +352,10 @@ const ( ActivityPausedScope = "ActivityPaused" // ActivityUnpausedScope tracks UnpauseActivityExecution API calls received by service ActivityUnpausedScope = "ActivityUnpaused" + // ActivityResetScope tracks ResetActivityExecution API calls received by service + ActivityResetScope = "ActivityReset" + // ActivityUpdateOptionsScope tracks UpdateActivityExecutionOptions API calls received by service + ActivityUpdateOptionsScope = "ActivityUpdateOptions" // HistoryGetWorkflowExecutionHistoryScope is the metric scope for non-long-poll frontend.GetWorkflowExecutionHistory HistoryGetWorkflowExecutionHistoryScope = "GetWorkflowExecutionHistory" // HistoryPollWorkflowExecutionHistoryScope is the metric scope for long poll case of frontend.GetWorkflowExecutionHistory @@ -938,8 +942,10 @@ var ( ActivityCancel = NewCounterDef("activity_cancel", WithDescription("Number of activities that are cancelled.")) ActivityTerminate = NewCounterDef("activity_terminate", WithDescription("Number of activities that are terminated.")) ActivityTaskTimeout = NewCounterDef("activity_task_timeout", WithDescription("Number of activity task timeouts (including retries).")) + ActivityUpdateOptions = NewCounterDef("activity_update_options", WithDescription("Number of activity update options calls.")) ActivityPause = NewCounterDef("activity_pause", WithDescription("Number of activity pauses.")) ActivityUnpause = NewCounterDef("activity_unpause", WithDescription("Number of activity unpauses.")) + ActivityReset = NewCounterDef("activity_reset", WithDescription("Number of activity resets.")) ActivityTimeout = NewCounterDef("activity_timeout", WithDescription("Number of terminal activity timeouts.")) ActivityPayloadSize = NewCounterDef("activity_payload_size", WithDescription("Size of activity payloads in bytes.")) AckLevelUpdateCounter = NewCounterDef("ack_level_update") @@ -1146,12 +1152,8 @@ var ( ExecutionQueueSchedulerTaskLatency = NewTimerDef("execution_queue_scheduler_task_latency") ExecutionQueueSchedulerQueueWaitTime = NewTimerDef("execution_queue_scheduler_queue_wait_time") - PausedActivitiesCounter = NewCounterDef("paused_activities") - ActivityPauseRequests = NewCounterDef("activity_pause_requests") - ActivityUnpauseRequests = NewCounterDef("activity_unpause_requests") - ActivityResetRequests = NewCounterDef("activity_reset_requests") - ActivityUpdateOptionsRequests = NewCounterDef("activity_update_options_requests") - ExternalPayloadUploadSize = NewBytesHistogramDef("external_payload_upload_size", WithDescription("The histogram of sizes in bytes of uploaded external payloads.")) + PausedActivitiesCounter = NewCounterDef("paused_activities") + ExternalPayloadUploadSize = NewBytesHistogramDef("external_payload_upload_size", WithDescription("The histogram of sizes in bytes of uploaded external payloads.")) // Deadlock detector metrics DDSuspectedDeadlocks = NewCounterDef("dd_suspected_deadlocks") diff --git a/service/history/api/pauseactivity/api.go b/service/history/api/pauseactivity/api.go index 81ab79637a3..5a7278a4757 100644 --- a/service/history/api/pauseactivity/api.go +++ b/service/history/api/pauseactivity/api.go @@ -87,7 +87,7 @@ func Invoke( targetingMethod = "id" } if ns, err := shardContext.GetNamespaceRegistry().GetNamespaceByID(namespace.ID(request.NamespaceId)); err == nil { - metrics.ActivityPauseRequests.With(shardContext.GetMetricsHandler().WithTags( + metrics.ActivityPause.With(shardContext.GetMetricsHandler().WithTags( metrics.NamespaceTag(ns.Name().String()), metrics.ActivityTargetingMethodTag(targetingMethod), )).Record(1) diff --git a/service/history/api/resetactivity/api.go b/service/history/api/resetactivity/api.go index 6b46ee16fff..229efad6073 100644 --- a/service/history/api/resetactivity/api.go +++ b/service/history/api/resetactivity/api.go @@ -80,7 +80,7 @@ func Invoke( targetingMethod = "id" } if ns, err := shardContext.GetNamespaceRegistry().GetNamespaceByID(namespace.ID(req.NamespaceId)); err == nil { - metrics.ActivityResetRequests.With(shardContext.GetMetricsHandler().WithTags( + metrics.ActivityReset.With(shardContext.GetMetricsHandler().WithTags( metrics.NamespaceTag(ns.Name().String()), metrics.ActivityTargetingMethodTag(targetingMethod), )).Record(1) diff --git a/service/history/api/unpauseactivity/api.go b/service/history/api/unpauseactivity/api.go index d1c2865b239..2a9217a9718 100644 --- a/service/history/api/unpauseactivity/api.go +++ b/service/history/api/unpauseactivity/api.go @@ -58,7 +58,7 @@ func Invoke( targetingMethod = "id" } if ns, err := shardContext.GetNamespaceRegistry().GetNamespaceByID(namespace.ID(request.NamespaceId)); err == nil { - metrics.ActivityUnpauseRequests.With(shardContext.GetMetricsHandler().WithTags( + metrics.ActivityUnpause.With(shardContext.GetMetricsHandler().WithTags( metrics.NamespaceTag(ns.Name().String()), metrics.ActivityTargetingMethodTag(targetingMethod), )).Record(1) diff --git a/service/history/api/updateactivityoptions/api.go b/service/history/api/updateactivityoptions/api.go index 53522a06a7b..34dc72c0a0f 100644 --- a/service/history/api/updateactivityoptions/api.go +++ b/service/history/api/updateactivityoptions/api.go @@ -90,7 +90,7 @@ func Invoke( targetingMethod = "id" } if ns, err := shardContext.GetNamespaceRegistry().GetNamespaceByID(namespace.ID(request.NamespaceId)); err == nil { - metrics.ActivityUpdateOptionsRequests.With(shardContext.GetMetricsHandler().WithTags( + metrics.ActivityUpdateOptions.With(shardContext.GetMetricsHandler().WithTags( metrics.NamespaceTag(ns.Name().String()), metrics.ActivityTargetingMethodTag(targetingMethod), )).Record(1) diff --git a/tests/activity_api_pause_test.go b/tests/activity_api_pause_test.go index f3bee7f22b3..334aae004e9 100644 --- a/tests/activity_api_pause_test.go +++ b/tests/activity_api_pause_test.go @@ -9,6 +9,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + activitypb "go.temporal.io/api/activity/v1" commonpb "go.temporal.io/api/common/v1" enumspb "go.temporal.io/api/enums/v1" "go.temporal.io/api/serviceerror" @@ -18,6 +19,8 @@ import ( "go.temporal.io/sdk/workflow" "go.temporal.io/server/common/util" "go.temporal.io/server/tests/testcore" + "google.golang.org/protobuf/types/known/durationpb" + "google.golang.org/protobuf/types/known/fieldmaskpb" ) // activityPauseAPI groups pause/unpause adapters so the same test body can run @@ -798,6 +801,134 @@ func TestActivityApiPauseClientTestSuite(t *testing.T) { // Second pause with the same request ID — must succeed (idempotent no-op). s.NoError(api.pause(ctx, s, workflowRun.GetID(), "activity-id", "identity", "reason", "my-pause-request-id")) }) + + t.Run("TestActivityPauseUpdateOptionsResetUnpause", func(t *testing.T) { + // End-to-end test: pause → update-options → reset → unpause all work together. + // Verifies that the updated options persist through a reset and that the activity + // completes at attempt 1 with the new options after unpause. + s := testcore.NewEnv(t) + + initialRetryInterval := 1 * time.Minute + origScheduleToClose := 30 * time.Minute + updatedScheduleToClose := 25 * time.Minute + activityRetryPolicy := &temporal.RetryPolicy{ + InitialInterval: initialRetryInterval, + BackoffCoefficient: 1, + } + + makeWorkflowFunc := func(activityFunction ActivityFunctions) WorkflowFunction { + return func(ctx workflow.Context) error { + var ret string + return workflow.ExecuteActivity(workflow.WithActivityOptions(ctx, workflow.ActivityOptions{ + ActivityID: "activity-id", + DisableEagerExecution: true, + StartToCloseTimeout: 15 * time.Minute, + ScheduleToCloseTimeout: origScheduleToClose, + RetryPolicy: activityRetryPolicy, + }), activityFunction).Get(ctx, &ret) + } + } + + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + var activityWasReset atomic.Bool + activityCompleteCh := make(chan struct{}) + + activityFunction := func() (string, error) { + if !activityWasReset.Load() { + return "", errors.New("bad-luck-please-retry") + } + s.WaitForChannel(ctx, activityCompleteCh) + return "done!", nil + } + + workflowFn := makeWorkflowFunc(activityFunction) + s.SdkWorker().RegisterWorkflow(workflowFn) + s.SdkWorker().RegisterActivity(activityFunction) + + wfID := testcore.RandomizeStr("wf_id-" + t.Name()) + workflowRun, err := s.SdkClient().ExecuteWorkflow(ctx, sdkclient.StartWorkflowOptions{ + ID: wfID, + TaskQueue: s.WorkerTaskQueue(), + }, workflowFn) + require.NoError(t, err) + + // wait for activity to fail and enter retry backoff + require.EventuallyWithT(t, func(c *assert.CollectT) { + desc, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + require.NoError(c, err) + require.Len(c, desc.PendingActivities, 1) + require.Equal(c, enumspb.PENDING_ACTIVITY_STATE_SCHEDULED, desc.PendingActivities[0].State) + require.Greater(c, desc.PendingActivities[0].Attempt, int32(1)) + }, 5*time.Second, 200*time.Millisecond) + + // step 1: pause + require.NoError(t, api.pause(ctx, s, wfID, "activity-id", "", "", "")) + + require.EventuallyWithT(t, func(c *assert.CollectT) { + desc, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + require.NoError(c, err) + require.Len(c, desc.PendingActivities, 1) + require.Equal(c, enumspb.PENDING_ACTIVITY_STATE_PAUSED, desc.PendingActivities[0].State) + }, 5*time.Second, 100*time.Millisecond) + + // step 2: update-options (reduce schedule-to-close timeout while paused) + _, err = s.FrontendClient().UpdateActivityOptions(ctx, &workflowservice.UpdateActivityOptionsRequest{ + Namespace: s.Namespace().String(), + Execution: &commonpb.WorkflowExecution{WorkflowId: wfID}, + Activity: &workflowservice.UpdateActivityOptionsRequest_Id{Id: "activity-id"}, + ActivityOptions: &activitypb.ActivityOptions{ + ScheduleToCloseTimeout: durationpb.New(updatedScheduleToClose), + }, + UpdateMask: &fieldmaskpb.FieldMask{Paths: []string{"schedule_to_close_timeout"}}, + }) + require.NoError(t, err) + + require.EventuallyWithT(t, func(c *assert.CollectT) { + desc, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + require.NoError(c, err) + require.Len(c, desc.PendingActivities, 1) + require.Equal(c, updatedScheduleToClose, desc.PendingActivities[0].ActivityOptions.GetScheduleToCloseTimeout().AsDuration()) + }, 5*time.Second, 100*time.Millisecond) + + // step 3: reset while paused — stays PAUSED (keepPaused=true), attempt resets to 1 + _, err = s.FrontendClient().ResetActivity(ctx, &workflowservice.ResetActivityRequest{ + Namespace: s.Namespace().String(), + Execution: &commonpb.WorkflowExecution{WorkflowId: wfID}, + Activity: &workflowservice.ResetActivityRequest_Id{Id: "activity-id"}, + KeepPaused: true, + }) + require.NoError(t, err) + + require.EventuallyWithT(t, func(c *assert.CollectT) { + desc, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + require.NoError(c, err) + require.Len(c, desc.PendingActivities, 1) + require.Equal(c, enumspb.PENDING_ACTIVITY_STATE_PAUSED, desc.PendingActivities[0].State) + require.Equal(c, int32(1), desc.PendingActivities[0].Attempt) + // updated options must survive the reset + require.Equal(c, updatedScheduleToClose, desc.PendingActivities[0].ActivityOptions.GetScheduleToCloseTimeout().AsDuration()) + }, 5*time.Second, 100*time.Millisecond) + + // step 4: unpause + activityWasReset.Store(true) + require.NoError(t, api.unpause(ctx, s, wfID, "activity-id", "", false)) + + require.EventuallyWithT(t, func(c *assert.CollectT) { + desc, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + require.NoError(c, err) + require.Len(c, desc.PendingActivities, 1) + require.Equal(c, enumspb.PENDING_ACTIVITY_STATE_STARTED, desc.PendingActivities[0].State) + require.Equal(c, int32(1), desc.PendingActivities[0].Attempt) + }, 5*time.Second, 100*time.Millisecond) + + activityCompleteCh <- struct{}{} + + var out string + err = workflowRun.Get(ctx, &out) + require.NoError(t, err) + }) }) } } diff --git a/tests/activity_api_reset_test.go b/tests/activity_api_reset_test.go index 6e5d8215e01..8a95e36c5a6 100644 --- a/tests/activity_api_reset_test.go +++ b/tests/activity_api_reset_test.go @@ -560,3 +560,144 @@ func (s *ActivityApiResetClientTestSuite) TestActivityReset_HeartbeatDetails() { s.NoError(err) s.NotEmpty(out) } + +func (s *ActivityApiResetClientTestSuite) TestActivityResetApi_WhilePaused() { + // Reset is called while the activity is in PAUSED state (SCHEDULED→PAUSED via TransitionPaused). + // The activity should remain PAUSED with attempt count reset to 1. After unpause it should complete. + s.initialRetryInterval = 1 * time.Minute + s.activityRetryPolicy = &temporal.RetryPolicy{ + InitialInterval: s.initialRetryInterval, + BackoffCoefficient: 1, + } + + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + var startedActivityCount atomic.Int32 + var activityWasReset atomic.Bool + activityCompleteCh := make(chan struct{}) + + activityFunction := func() (string, error) { + startedActivityCount.Add(1) + if !activityWasReset.Load() { + return "", errors.New("bad-luck-please-retry") + } + s.WaitForChannel(ctx, activityCompleteCh) + return "done!", nil + } + + workflowFn := s.makeWorkflowFunc(activityFunction) + s.SdkWorker().RegisterWorkflow(workflowFn) + s.SdkWorker().RegisterActivity(activityFunction) + + wfID := testcore.RandomizeStr("wf_id-" + s.T().Name()) + workflowRun, err := s.SdkClient().ExecuteWorkflow(ctx, sdkclient.StartWorkflowOptions{ + ID: wfID, + TaskQueue: s.TaskQueue(), + }, workflowFn) + s.NoError(err) + + // wait for activity to fail and enter retry backoff (SCHEDULED state waiting for retry) + s.EventuallyWithT(func(t *assert.CollectT) { + desc, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + require.NoError(t, err) + require.Len(t, desc.PendingActivities, 1) + require.Equal(t, enumspb.PENDING_ACTIVITY_STATE_SCHEDULED, desc.PendingActivities[0].State) + require.Greater(t, desc.PendingActivities[0].Attempt, int32(1)) + }, 5*time.Second, 200*time.Millisecond) + + // pause the activity (transitions SCHEDULED→PAUSED) + _, err = s.FrontendClient().PauseActivity(ctx, &workflowservice.PauseActivityRequest{ + Namespace: s.Namespace().String(), + Execution: &commonpb.WorkflowExecution{WorkflowId: wfID}, + Activity: &workflowservice.PauseActivityRequest_Id{Id: "activity-id"}, + }) + s.NoError(err) + + // wait for PAUSED state + s.EventuallyWithT(func(t *assert.CollectT) { + desc, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + require.NoError(t, err) + require.Len(t, desc.PendingActivities, 1) + require.Equal(t, enumspb.PENDING_ACTIVITY_STATE_PAUSED, desc.PendingActivities[0].State) + require.Greater(t, desc.PendingActivities[0].Attempt, int32(1)) + }, 5*time.Second, 100*time.Millisecond) + + // reset while paused — activity should stay PAUSED, but attempt resets to 1 + s.NoError(s.resetFn(ctx, wfID, "activity-id", false, true)) + + s.EventuallyWithT(func(t *assert.CollectT) { + desc, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + require.NoError(t, err) + require.Len(t, desc.PendingActivities, 1) + require.Equal(t, enumspb.PENDING_ACTIVITY_STATE_PAUSED, desc.PendingActivities[0].State) + require.Equal(t, int32(1), desc.PendingActivities[0].Attempt) + }, 5*time.Second, 100*time.Millisecond) + + activityWasReset.Store(true) + + // unpause — activity should run and complete + _, err = s.FrontendClient().UnpauseActivity(ctx, &workflowservice.UnpauseActivityRequest{ + Namespace: s.Namespace().String(), + Execution: &commonpb.WorkflowExecution{WorkflowId: wfID}, + Activity: &workflowservice.UnpauseActivityRequest_Id{Id: "activity-id"}, + }) + s.NoError(err) + + activityCompleteCh <- struct{}{} + + s.NoError(workflowRun.Get(ctx, nil)) +} + +func (s *ActivityApiResetClientTestSuite) TestActivityResetApi_TerminateWhileDeferredReset() { + // Reset is called while activity is STARTED (sets ActivityReset=true as a deferred flag). + // The workflow is then terminated before the activity retries. Verifies the activity + // and workflow terminate cleanly without the deferred reset flag causing issues. + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + activityBlockCh := make(chan struct{}) + var startedActivityCount atomic.Int32 + + activityFunction := func() (string, error) { + startedActivityCount.Add(1) + s.WaitForChannel(ctx, activityBlockCh) + return "done!", nil + } + + workflowFn := s.makeWorkflowFunc(activityFunction) + s.SdkWorker().RegisterWorkflow(workflowFn) + s.SdkWorker().RegisterActivity(activityFunction) + + wfID := testcore.RandomizeStr("wf_id-" + s.T().Name()) + workflowRun, err := s.SdkClient().ExecuteWorkflow(ctx, sdkclient.StartWorkflowOptions{ + ID: wfID, + TaskQueue: s.TaskQueue(), + }, workflowFn) + s.NoError(err) + + // wait for activity to start + s.EventuallyWithT(func(t *assert.CollectT) { + desc, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + require.NoError(t, err) + require.Len(t, desc.PendingActivities, 1) + require.Equal(t, enumspb.PENDING_ACTIVITY_STATE_STARTED, desc.PendingActivities[0].State) + }, 5*time.Second, 200*time.Millisecond) + + // reset while running — sets ActivityReset=true as deferred flag + s.NoError(s.resetFn(ctx, wfID, "activity-id", false, false)) + + // terminate the workflow before the activity retries + err = s.SdkClient().TerminateWorkflow(ctx, wfID, workflowRun.GetRunID(), "test termination") + s.NoError(err) + + // unblock the activity worker so it can respond + close(activityBlockCh) + + // verify the workflow is terminated + s.EventuallyWithT(func(t *assert.CollectT) { + desc, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + require.NoError(t, err) + require.Equal(t, enumspb.WORKFLOW_EXECUTION_STATUS_TERMINATED, desc.GetWorkflowExecutionInfo().GetStatus()) + }, 10*time.Second, 200*time.Millisecond) +} diff --git a/tests/standalone_activity_test.go b/tests/standalone_activity_test.go index 97dc179affd..84ac70f6848 100644 --- a/tests/standalone_activity_test.go +++ b/tests/standalone_activity_test.go @@ -8506,17 +8506,902 @@ func (s *standaloneActivityTestSuite) TestUnpauseActivityExecution() { func (s *standaloneActivityTestSuite) TestResetActivityExecution() { t := s.T() - ctx, cancel := context.WithTimeout(t.Context(), 30*time.Second) - defer cancel() - t.Run("StandaloneActivityReturnsError", func(t *testing.T) { + // startAndPollActivity starts a SAA, polls for the first task, and returns + // the start response, poll response, and the task queue name used. + startAndPollActivity := func(ctx context.Context, t *testing.T, activityID string, retryPolicy *commonpb.RetryPolicy) ( + *workflowservice.StartActivityExecutionResponse, + *workflowservice.PollActivityTaskQueueResponse, + string, + ) { + t.Helper() + taskQueue := testcore.RandomizeStr(t.Name()) + startResp, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + ActivityType: s.tv.ActivityType(), + Identity: defaultIdentity, + Input: defaultInput, + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue}, + StartToCloseTimeout: durationpb.New(15 * time.Minute), + RetryPolicy: retryPolicy, + RequestId: testcore.RandomizeStr(activityID), + }) + require.NoError(t, err) + + pollResp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + Identity: defaultIdentity, + }) + require.NoError(t, err) + require.Equal(t, activityID, pollResp.GetActivityId()) + return startResp, pollResp, taskQueue + } + + failRetryable := func(ctx context.Context, t *testing.T, taskToken []byte, nextRetryDelay time.Duration) { + t.Helper() + _, err := s.FrontendClient().RespondActivityTaskFailed(ctx, &workflowservice.RespondActivityTaskFailedRequest{ + Namespace: s.Namespace().String(), + TaskToken: taskToken, + Failure: &failurepb.Failure{ + Message: "retryable failure", + FailureInfo: &failurepb.Failure_ApplicationFailureInfo{ + ApplicationFailureInfo: &failurepb.ApplicationFailureInfo{ + NonRetryable: false, + NextRetryDelay: durationpb.New(nextRetryDelay), + }, + }, + }, + Identity: defaultIdentity, + }) + require.NoError(t, err) + } + + resetActivity := func(ctx context.Context, t *testing.T, activityID, runID string, resetHeartbeat bool) { + t.Helper() + _, err := s.FrontendClient().ResetActivityExecution(ctx, &workflowservice.ResetActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: runID, + ResetHeartbeat: resetHeartbeat, + }) + require.NoError(t, err) + } + + pauseActivity := func(ctx context.Context, t *testing.T, activityID, runID string) { + t.Helper() + _, err := s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: runID, + Identity: defaultIdentity, + Reason: "test-pause", + }) + require.NoError(t, err) + } + + unpauseActivity := func(ctx context.Context, t *testing.T, activityID, runID string) { + t.Helper() + _, err := s.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: runID, + Identity: defaultIdentity, + }) + require.NoError(t, err) + } + + waitForState := func(ctx context.Context, t *testing.T, activityID, runID string, state enumspb.PendingActivityState) { + t.Helper() + require.Eventually(t, func() bool { + desc, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: runID, + }) + return err == nil && desc.GetInfo().GetRunState() == state + }, 5*time.Second, 100*time.Millisecond) + } + + t.Run("AfterRetry", func(t *testing.T) { + // Start activity, let it fail twice (attempt 3 backing off with long interval), + // then reset. Verify the next attempt starts at 1. + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + activityID := testcore.RandomizeStr(t.Name()) + retryPolicy := &commonpb.RetryPolicy{ + InitialInterval: durationpb.New(time.Second), + BackoffCoefficient: 1.0, + } + startResp, pollResp1, taskQueue := startAndPollActivity(ctx, t, activityID, retryPolicy) + + // Fail attempt 1 with a short retry + failRetryable(ctx, t, pollResp1.TaskToken, time.Second) + + // Poll attempt 2 + pollResp2, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + Identity: defaultIdentity, + }) + require.NoError(t, err) + require.EqualValues(t, 2, pollResp2.Attempt) + + // Fail attempt 2 with a long backoff so the activity is SCHEDULED waiting + failRetryable(ctx, t, pollResp2.TaskToken, 60*time.Second) + + // Verify activity is SCHEDULED (backing off at attempt 3) + require.Eventually(t, func() bool { + desc, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: startResp.GetRunId(), + }) + if err != nil || desc.GetInfo() == nil { + return false + } + info := desc.GetInfo() + return info.GetRunState() == enumspb.PENDING_ACTIVITY_STATE_SCHEDULED && + info.GetAttempt() == 3 + }, 5*time.Second, 200*time.Millisecond) + + // Reset while SCHEDULED — should re-dispatch immediately at attempt 1 + resetActivity(ctx, t, activityID, startResp.GetRunId(), false) + + // Poll — should be attempt 1 + pollResp3, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + Identity: defaultIdentity, + }) + require.NoError(t, err) + require.EqualValues(t, 1, pollResp3.Attempt, "attempt should be reset to 1") + + // Complete successfully + _, err = s.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ + Namespace: s.Namespace().String(), + TaskToken: pollResp3.TaskToken, + Result: defaultResult, + Identity: defaultIdentity, + }) + require.NoError(t, err) + }) + + t.Run("WhileRunning", func(t *testing.T) { + // Reset while the activity is STARTED. The reset is deferred to the next + // retry — the running attempt fails normally, then retries at attempt 1. + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + activityID := testcore.RandomizeStr(t.Name()) + retryPolicy := &commonpb.RetryPolicy{ + InitialInterval: durationpb.New(time.Second), + BackoffCoefficient: 1.0, + } + startResp, pollResp1, taskQueue := startAndPollActivity(ctx, t, activityID, retryPolicy) + require.EqualValues(t, 1, pollResp1.Attempt) + + // Reset while running + resetActivity(ctx, t, activityID, startResp.GetRunId(), false) + + // Verify activity still appears as STARTED (reset is deferred) + desc, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: startResp.GetRunId(), + }) + require.NoError(t, err) + require.Equal(t, enumspb.PENDING_ACTIVITY_STATE_STARTED, desc.GetInfo().GetRunState()) + + // Fail the running attempt — triggers deferred reset in TransitionRescheduled + failRetryable(ctx, t, pollResp1.TaskToken, 0) + + // Poll the retry — should be attempt 1 + pollResp2, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + Identity: defaultIdentity, + }) + require.NoError(t, err) + require.EqualValues(t, 1, pollResp2.Attempt, "attempt should be reset to 1 on retry after running reset") + + // Complete + _, err = s.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ + Namespace: s.Namespace().String(), + TaskToken: pollResp2.TaskToken, + Result: defaultResult, + Identity: defaultIdentity, + }) + require.NoError(t, err) + }) + + t.Run("WhileCancelRequested", func(t *testing.T) { + // Reset while the activity is in CANCEL_REQUESTED state. + // handleReset sets the ActivityReset flag (same deferred path as STARTED). + // NOTE: TransitionRescheduled currently only allows STARTED as a source state, so a + // CANCEL_REQUESTED activity that fails retryably goes to FAILED (terminal) rather + // than retrying — the ActivityReset flag would have no effect in that case. This test + // verifies: (1) the reset API succeeds, (2) the activity remains in CANCEL_REQUESTED + // with its state intact, and (3) the activity can still complete normally. Full + // deferred-reset verification (attempt count reset to 1) requires extending + // TransitionRescheduled to accept CANCEL_REQUESTED as a source state. + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + activityID := testcore.RandomizeStr(t.Name()) + startResp, pollResp1, _ := startAndPollActivity(ctx, t, activityID, &commonpb.RetryPolicy{ + InitialInterval: durationpb.New(time.Second), + BackoffCoefficient: 1.0, + }) + require.EqualValues(t, 1, pollResp1.Attempt) + + // Request cancellation — moves to CANCEL_REQUESTED + _, err := s.FrontendClient().RequestCancelActivityExecution(ctx, &workflowservice.RequestCancelActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: startResp.GetRunId(), + Identity: defaultIdentity, + RequestId: testcore.RandomizeStr(activityID), + }) + require.NoError(t, err) + + // Verify CANCEL_REQUESTED state + desc, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: startResp.GetRunId(), + }) + require.NoError(t, err) + require.Equal(t, enumspb.PENDING_ACTIVITY_STATE_CANCEL_REQUESTED, desc.GetInfo().GetRunState()) + + // Reset while CANCEL_REQUESTED — must succeed without error + resetActivity(ctx, t, activityID, startResp.GetRunId(), false) + + // Activity must still be in CANCEL_REQUESTED (reset is deferred, no immediate side effect) + desc, err = s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: startResp.GetRunId(), + }) + require.NoError(t, err) + require.Equal(t, enumspb.PENDING_ACTIVITY_STATE_CANCEL_REQUESTED, desc.GetInfo().GetRunState()) + + // Worker ignores the cancel and completes — activity should complete cleanly + _, err = s.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ + Namespace: s.Namespace().String(), + TaskToken: pollResp1.TaskToken, + Result: defaultResult, + Identity: defaultIdentity, + }) + require.NoError(t, err) + }) + + t.Run("InRetryWithLongInterval", func(t *testing.T) { + // Activity is backing off for a long interval. Reset re-dispatches immediately. + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + activityID := testcore.RandomizeStr(t.Name()) + retryPolicy := &commonpb.RetryPolicy{ + InitialInterval: durationpb.New(time.Minute), // long backoff + BackoffCoefficient: 1.0, + } + startResp, pollResp1, taskQueue := startAndPollActivity(ctx, t, activityID, retryPolicy) + + // Fail attempt 1 — now backing off for 1 minute + failRetryable(ctx, t, pollResp1.TaskToken, 0) + + // Verify in SCHEDULED state + require.Eventually(t, func() bool { + desc, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: startResp.GetRunId(), + }) + return err == nil && + desc.GetInfo().GetRunState() == enumspb.PENDING_ACTIVITY_STATE_SCHEDULED + }, 5*time.Second, 200*time.Millisecond) + + // Reset — should bypass the 1-minute wait and dispatch immediately + resetActivity(ctx, t, activityID, startResp.GetRunId(), false) + + // Poll — task should be available immediately after reset (no long backoff) + pollResp2, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + Identity: defaultIdentity, + }) + require.NoError(t, err, "should receive task quickly after reset (no long backoff)") + require.EqualValues(t, 1, pollResp2.Attempt) + + // Complete + _, err = s.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ + Namespace: s.Namespace().String(), + TaskToken: pollResp2.TaskToken, + Result: defaultResult, + Identity: defaultIdentity, + }) + require.NoError(t, err) + }) + + t.Run("HeartbeatReset", func(t *testing.T) { + // Activity records heartbeats. Reset with resetHeartbeat=true clears them. + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + activityID := testcore.RandomizeStr(t.Name()) + retryPolicy := &commonpb.RetryPolicy{ + InitialInterval: durationpb.New(time.Second), + BackoffCoefficient: 1.0, + } + startResp, pollResp1, taskQueue := startAndPollActivity(ctx, t, activityID, retryPolicy) + + // Record a heartbeat + _, err := s.FrontendClient().RecordActivityTaskHeartbeat(ctx, &workflowservice.RecordActivityTaskHeartbeatRequest{ + Namespace: s.Namespace().String(), + TaskToken: pollResp1.TaskToken, + Details: defaultHeartbeatDetails, + Identity: defaultIdentity, + }) + require.NoError(t, err) + + // Verify heartbeat is visible in describe + desc, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: startResp.GetRunId(), + }) + require.NoError(t, err) + require.NotNil(t, desc.GetInfo().GetHeartbeatDetails()) + + // Fail the attempt with long backoff + failRetryable(ctx, t, pollResp1.TaskToken, 60*time.Second) + + // Wait for SCHEDULED state + require.Eventually(t, func() bool { + d, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: startResp.GetRunId(), + }) + return err == nil && + d.GetInfo().GetRunState() == enumspb.PENDING_ACTIVITY_STATE_SCHEDULED + }, 5*time.Second, 200*time.Millisecond) + + // Reset with heartbeat reset + _, err = s.FrontendClient().ResetActivityExecution(ctx, &workflowservice.ResetActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: startResp.GetRunId(), + ResetHeartbeat: true, + }) + require.NoError(t, err) + + // Poll — attempt 1, no heartbeat details + pollResp2, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + Identity: defaultIdentity, + }) + require.NoError(t, err) + require.EqualValues(t, 1, pollResp2.Attempt) + require.Empty(t, pollResp2.HeartbeatDetails.GetPayloads(), "heartbeat details should be cleared after reset") + + // Complete + _, err = s.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ + Namespace: s.Namespace().String(), + TaskToken: pollResp2.TaskToken, + Result: defaultResult, + Identity: defaultIdentity, + }) + require.NoError(t, err) + }) + + t.Run("HeartbeatResetWhileRunning", func(t *testing.T) { + // Reset with resetHeartbeat=true while the activity is STARTED. + // The heartbeat clear is deferred — it only takes effect on the next retry, + // matching the behavior of the workflow activity HeartbeatDetails reset test. + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + activityID := testcore.RandomizeStr(t.Name()) + retryPolicy := &commonpb.RetryPolicy{ + InitialInterval: durationpb.New(time.Second), + BackoffCoefficient: 1.0, + } + startResp, pollResp1, taskQueue := startAndPollActivity(ctx, t, activityID, retryPolicy) + + // Record a heartbeat while running + _, err := s.FrontendClient().RecordActivityTaskHeartbeat(ctx, &workflowservice.RecordActivityTaskHeartbeatRequest{ + Namespace: s.Namespace().String(), + TaskToken: pollResp1.TaskToken, + Details: defaultHeartbeatDetails, + Identity: defaultIdentity, + }) + require.NoError(t, err) + + // Verify heartbeat is visible + desc, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: startResp.GetRunId(), + }) + require.NoError(t, err) + require.NotNil(t, desc.GetInfo().GetHeartbeatDetails()) + + // Reset with heartbeat reset while STARTED — deferred + resetActivity(ctx, t, activityID, startResp.GetRunId(), true) + + // Activity should still be STARTED with heartbeat still visible (reset is deferred) + desc, err = s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: startResp.GetRunId(), + }) + require.NoError(t, err) + require.Equal(t, enumspb.PENDING_ACTIVITY_STATE_STARTED, desc.GetInfo().GetRunState()) + require.NotNil(t, desc.GetInfo().GetHeartbeatDetails(), "heartbeat should still be visible before the attempt fails") + + // Fail the running attempt — triggers deferred reset+heartbeat clear in TransitionRescheduled + failRetryable(ctx, t, pollResp1.TaskToken, 0) + + // Poll retry — attempt=1, heartbeat details cleared + pollResp2, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + Identity: defaultIdentity, + }) + require.NoError(t, err) + require.EqualValues(t, 1, pollResp2.Attempt, "attempt should be reset to 1") + require.Empty(t, pollResp2.HeartbeatDetails.GetPayloads(), "heartbeat details should be cleared after deferred reset") + + // Record a new heartbeat on the new attempt + _, err = s.FrontendClient().RecordActivityTaskHeartbeat(ctx, &workflowservice.RecordActivityTaskHeartbeatRequest{ + Namespace: s.Namespace().String(), + TaskToken: pollResp2.TaskToken, + Details: defaultHeartbeatDetails, + Identity: defaultIdentity, + }) + require.NoError(t, err) + + // Verify new heartbeat is visible in describe + desc, err = s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: startResp.GetRunId(), + }) + require.NoError(t, err) + require.NotNil(t, desc.GetInfo().GetHeartbeatDetails(), "new heartbeat from reset attempt should be visible") + + // Complete + _, err = s.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ + Namespace: s.Namespace().String(), + TaskToken: pollResp2.TaskToken, + Result: defaultResult, + Identity: defaultIdentity, + }) + require.NoError(t, err) + }) + + t.Run("TerminalStateReturnsFailedPrecondition", func(t *testing.T) { + // Resetting a completed activity should return FailedPrecondition. + ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second) + defer cancel() + + activityID := testcore.RandomizeStr(t.Name()) + startResp, pollResp, _ := startAndPollActivity(ctx, t, activityID, nil) + + // Complete the activity + _, err := s.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ + Namespace: s.Namespace().String(), + TaskToken: pollResp.TaskToken, + Result: defaultResult, + Identity: defaultIdentity, + }) + require.NoError(t, err) + + // Attempt to reset — should fail with FailedPrecondition since the activity is in a terminal state + _, err = s.FrontendClient().ResetActivityExecution(ctx, &workflowservice.ResetActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: startResp.GetRunId(), + }) + var failedPreconditionErr *serviceerror.FailedPrecondition + require.ErrorAs(t, err, &failedPreconditionErr) + }) + + t.Run("KeepPaused", func(t *testing.T) { + // Reset while activity is paused, with keepPaused=true. + // Verifies that the activity remains paused after reset and that the attempt + // count is reset to 1. + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + activityID := testcore.RandomizeStr(t.Name()) + retryPolicy := &commonpb.RetryPolicy{ + InitialInterval: durationpb.New(time.Minute), // long backoff so we can pause while scheduled + BackoffCoefficient: 1.0, + } + startResp, pollResp1, taskQueue := startAndPollActivity(ctx, t, activityID, retryPolicy) + + // Fail attempt 1 with a short override retry so it enters backoff + failRetryable(ctx, t, pollResp1.TaskToken, 0) + + // Wait for SCHEDULED state (retry backoff) + require.Eventually(t, func() bool { + desc, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: startResp.GetRunId(), + }) + return err == nil && + desc.GetInfo().GetRunState() == enumspb.PENDING_ACTIVITY_STATE_SCHEDULED + }, 5*time.Second, 200*time.Millisecond) + + // Pause the activity + _, err := s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: startResp.GetRunId(), + Identity: defaultIdentity, + Reason: "test pause before reset", + }) + require.NoError(t, err) + + // Verify activity is paused + require.Eventually(t, func() bool { + desc, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: startResp.GetRunId(), + }) + return err == nil && + desc.GetInfo().GetRunState() == enumspb.PENDING_ACTIVITY_STATE_PAUSED + }, 5*time.Second, 200*time.Millisecond) + + // Verify attempt count is >= 2 (failed at least once before pause) + desc, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: startResp.GetRunId(), + }) + require.NoError(t, err) + require.Greater(t, desc.GetInfo().GetAttempt(), int32(1)) + + // Reset with keepPaused=true — activity should remain paused but attempt reset to 1 + _, err = s.FrontendClient().ResetActivityExecution(ctx, &workflowservice.ResetActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: startResp.GetRunId(), + KeepPaused: true, + }) + require.NoError(t, err) + + // Verify still paused with attempt=1 + require.Eventually(t, func() bool { + desc, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: startResp.GetRunId(), + }) + return err == nil && + desc.GetInfo().GetRunState() == enumspb.PENDING_ACTIVITY_STATE_PAUSED && + desc.GetInfo().GetAttempt() == int32(1) + }, 2*time.Second, 200*time.Millisecond) + + // Unpause the activity + _, err = s.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: startResp.GetRunId(), + Identity: defaultIdentity, + }) + require.NoError(t, err) + + // Poll — should be attempt 1 after unpause + pollResp2, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + Identity: defaultIdentity, + }) + require.NoError(t, err) + require.EqualValues(t, 1, pollResp2.Attempt) + + // Complete + _, err = s.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ + Namespace: s.Namespace().String(), + TaskToken: pollResp2.TaskToken, + Result: defaultResult, + Identity: defaultIdentity, + }) + require.NoError(t, err) + }) + + t.Run("RestoreOriginalOptions", func(t *testing.T) { + // Start activity with specific options, update them, then reset with + // RestoreOriginalOptions=true and verify the original options come back + // along with the attempt count being reset to 1. + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + activityID := testcore.RandomizeStr(t.Name()) + originalMaxAttempts := int32(7) + retryPolicy := &commonpb.RetryPolicy{ + InitialInterval: durationpb.New(time.Second), + BackoffCoefficient: 1.0, + MaximumAttempts: originalMaxAttempts, + } + startResp, pollResp1, taskQueue := startAndPollActivity(ctx, t, activityID, retryPolicy) + + // Fail attempt 1 with a long backoff so the activity is SCHEDULED backing off. + failRetryable(ctx, t, pollResp1.TaskToken, 60*time.Second) + + require.Eventually(t, func() bool { + desc, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: startResp.GetRunId(), + }) + return err == nil && desc.GetInfo().GetRunState() == enumspb.PENDING_ACTIVITY_STATE_SCHEDULED + }, 5*time.Second, 100*time.Millisecond) + + // Update MaximumAttempts to a different value. + updatedMaxAttempts := int32(100) + _, err := s.FrontendClient().UpdateActivityExecutionOptions(ctx, &workflowservice.UpdateActivityExecutionOptionsRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: startResp.GetRunId(), + ActivityOptions: &activitypb.ActivityOptions{RetryPolicy: &commonpb.RetryPolicy{MaximumAttempts: updatedMaxAttempts}}, + UpdateMask: &fieldmaskpb.FieldMask{Paths: []string{"retry_policy.maximum_attempts"}}, + }) + require.NoError(t, err) + + desc, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: startResp.GetRunId(), + }) + require.NoError(t, err) + require.Equal(t, updatedMaxAttempts, desc.GetInfo().GetRetryPolicy().GetMaximumAttempts(), "update should be applied before reset") + + // Reset with RestoreOriginalOptions=true — options should revert and attempt reset to 1. + _, err = s.FrontendClient().ResetActivityExecution(ctx, &workflowservice.ResetActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: startResp.GetRunId(), + RestoreOriginalOptions: true, + }) + require.NoError(t, err) + + // Verify original options are reflected in describe after reset. + desc, err = s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: startResp.GetRunId(), + }) + require.NoError(t, err) + require.EqualValues(t, 1, desc.GetInfo().GetAttempt(), "attempt should be reset to 1") + require.Equal(t, originalMaxAttempts, desc.GetInfo().GetRetryPolicy().GetMaximumAttempts(), "original MaximumAttempts should be restored") + + // Poll — should be attempt 1. + pollResp2, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + Identity: defaultIdentity, + }) + require.NoError(t, err) + require.EqualValues(t, 1, pollResp2.Attempt, "attempt should be reset to 1") + + // Complete the activity. + _, err = s.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ + Namespace: s.Namespace().String(), + TaskToken: pollResp2.TaskToken, + Result: defaultResult, + Identity: defaultIdentity, + }) + require.NoError(t, err) + }) + + t.Run("ScheduledWithPauseStateKeepPausedFalse", func(t *testing.T) { + // SCHEDULED status with non-nil PauseState (RunState=PAUSED), reset with keepPaused=false. + // Verify: PauseState is cleared, attempt count reset to 1, activity dispatches. + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + activityID := testcore.RandomizeStr(t.Name()) + startResp, pollResp1, taskQueue := startAndPollActivity(ctx, t, activityID, &commonpb.RetryPolicy{ + InitialInterval: durationpb.New(time.Second), + BackoffCoefficient: 1.0, + }) + + // Fail attempt 1 → SCHEDULED backoff. + failRetryable(ctx, t, pollResp1.TaskToken, 0) + waitForState(ctx, t, activityID, startResp.GetRunId(), enumspb.PENDING_ACTIVITY_STATE_SCHEDULED) + + // Pause → PAUSED (SCHEDULED status + non-nil PauseState). + pauseActivity(ctx, t, activityID, startResp.GetRunId()) + waitForState(ctx, t, activityID, startResp.GetRunId(), enumspb.PENDING_ACTIVITY_STATE_PAUSED) + + // Reset with keepPaused=false — should clear PauseState and dispatch at attempt 1. _, err := s.FrontendClient().ResetActivityExecution(ctx, &workflowservice.ResetActivityExecutionRequest{ Namespace: s.Namespace().String(), - ActivityId: testcore.RandomizeStr(t.Name()), - Identity: "test-identity", + ActivityId: activityID, + RunId: startResp.GetRunId(), + KeepPaused: false, }) - require.Error(t, err) - var unimplementedErr *serviceerror.Unimplemented - require.ErrorAs(t, err, &unimplementedErr) + require.NoError(t, err) + + pollResp2, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + Identity: defaultIdentity, + }) + require.NoError(t, err) + require.EqualValues(t, 1, pollResp2.Attempt, "attempt should be reset to 1") + + _, err = s.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ + Namespace: s.Namespace().String(), + TaskToken: pollResp2.TaskToken, + Result: defaultResult, + Identity: defaultIdentity, + }) + require.NoError(t, err) + }) + + t.Run("StartedWithPauseStateKeepPausedFalse", func(t *testing.T) { + // STARTED status with non-nil PauseState (RunState=PAUSE_REQUESTED), reset with keepPaused=false. + // Reset is deferred; PauseState must be cleared eagerly so the next retry dispatches. + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + activityID := testcore.RandomizeStr(t.Name()) + startResp, pollResp1, taskQueue := startAndPollActivity(ctx, t, activityID, &commonpb.RetryPolicy{ + InitialInterval: durationpb.New(time.Second), + BackoffCoefficient: 1.0, + }) + require.EqualValues(t, 1, pollResp1.Attempt) + + // Pause while STARTED → PauseState set, status remains STARTED (PAUSE_REQUESTED). + pauseActivity(ctx, t, activityID, startResp.GetRunId()) + waitForState(ctx, t, activityID, startResp.GetRunId(), enumspb.PENDING_ACTIVITY_STATE_PAUSE_REQUESTED) + + // Reset with keepPaused=false — deferred; must also clear PauseState so dispatch isn't blocked. + _, err := s.FrontendClient().ResetActivityExecution(ctx, &workflowservice.ResetActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: startResp.GetRunId(), + KeepPaused: false, + }) + require.NoError(t, err) + + // Fail the running attempt — triggers TransitionRescheduled with the deferred reset. + failRetryable(ctx, t, pollResp1.TaskToken, 0) + + // Activity should dispatch (not be stuck paused) at attempt 1. + pollResp2, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + Identity: defaultIdentity, + }) + require.NoError(t, err) + require.EqualValues(t, 1, pollResp2.Attempt, "attempt should be reset to 1 after deferred reset") + + _, err = s.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ + Namespace: s.Namespace().String(), + TaskToken: pollResp2.TaskToken, + Result: defaultResult, + Identity: defaultIdentity, + }) + require.NoError(t, err) + }) + + t.Run("StartedWithPauseStateKeepPausedTrue", func(t *testing.T) { + // STARTED status with non-nil PauseState (RunState=PAUSE_REQUESTED), reset with keepPaused=true. + // Reset is deferred; PauseState is preserved so after the retry the activity stays paused. + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + activityID := testcore.RandomizeStr(t.Name()) + startResp, pollResp1, taskQueue := startAndPollActivity(ctx, t, activityID, &commonpb.RetryPolicy{ + InitialInterval: durationpb.New(time.Second), + BackoffCoefficient: 1.0, + }) + require.EqualValues(t, 1, pollResp1.Attempt) + + // Pause while STARTED. + pauseActivity(ctx, t, activityID, startResp.GetRunId()) + waitForState(ctx, t, activityID, startResp.GetRunId(), enumspb.PENDING_ACTIVITY_STATE_PAUSE_REQUESTED) + + // Reset with keepPaused=true — deferred; PauseState should be preserved. + _, err := s.FrontendClient().ResetActivityExecution(ctx, &workflowservice.ResetActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: startResp.GetRunId(), + KeepPaused: true, + }) + require.NoError(t, err) + + // Fail the running attempt. + failRetryable(ctx, t, pollResp1.TaskToken, 0) + + // Activity should be PAUSED at attempt 1 (deferred reset + preserved pause). + require.Eventually(t, func() bool { + desc, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: startResp.GetRunId(), + }) + return err == nil && + desc.GetInfo().GetRunState() == enumspb.PENDING_ACTIVITY_STATE_PAUSED && + desc.GetInfo().GetAttempt() == int32(1) + }, 5*time.Second, 100*time.Millisecond) + + // Unpause and verify dispatch at attempt 1. + unpauseActivity(ctx, t, activityID, startResp.GetRunId()) + + pollResp2, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + Identity: defaultIdentity, + }) + require.NoError(t, err) + require.EqualValues(t, 1, pollResp2.Attempt) + + _, err = s.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ + Namespace: s.Namespace().String(), + TaskToken: pollResp2.TaskToken, + Result: defaultResult, + Identity: defaultIdentity, + }) + require.NoError(t, err) + }) + + t.Run("Jitter", func(t *testing.T) { + // A non-zero jitter should delay the dispatch task by at most jitter duration. + // Verify the activity is not immediately available (still SCHEDULED briefly) and + // then dispatches within the jitter window. + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + activityID := testcore.RandomizeStr(t.Name()) + startResp, pollResp1, taskQueue := startAndPollActivity(ctx, t, activityID, &commonpb.RetryPolicy{ + InitialInterval: durationpb.New(time.Second), + BackoffCoefficient: 1.0, + }) + + // Fail attempt 1 so the activity is SCHEDULED in retry backoff. + failRetryable(ctx, t, pollResp1.TaskToken, 60*time.Second) + waitForState(ctx, t, activityID, startResp.GetRunId(), enumspb.PENDING_ACTIVITY_STATE_SCHEDULED) + + jitter := 3 * time.Second + resetStart := time.Now() + _, err := s.FrontendClient().ResetActivityExecution(ctx, &workflowservice.ResetActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: startResp.GetRunId(), + Jitter: durationpb.New(jitter), + }) + require.NoError(t, err) + + // Activity should dispatch within [now, now+jitter+buffer]. + pollResp2, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + Identity: defaultIdentity, + }) + require.NoError(t, err) + require.EqualValues(t, 1, pollResp2.Attempt, "attempt should be reset to 1") + require.WithinDuration(t, resetStart.Add(jitter), time.Now(), jitter+5*time.Second, + "activity should dispatch within jitter window") + + _, err = s.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ + Namespace: s.Namespace().String(), + TaskToken: pollResp2.TaskToken, + Result: defaultResult, + Identity: defaultIdentity, + }) + require.NoError(t, err) }) } From fc4ef1dec2e54c3b13fd2c77c3ff0a6a66bff747 Mon Sep 17 00:00:00 2001 From: Sean Kane Date: Wed, 29 Apr 2026 09:48:18 -0400 Subject: [PATCH 07/25] fixing bad rebase --- .../gen/activitypb/v1/activity_state.pb.go | 68 ++- .../activity/proto/v1/activity_state.proto | 4 + chasm/lib/activity/validator_test.go | 24 + .../gen/nexusoperationpb/v1/service.pb.go | 12 +- tests/standalone_activity_test.go | 511 ++++++++++++++++++ 5 files changed, 586 insertions(+), 33 deletions(-) diff --git a/chasm/lib/activity/gen/activitypb/v1/activity_state.pb.go b/chasm/lib/activity/gen/activitypb/v1/activity_state.pb.go index 76e67ac0763..16005129b18 100644 --- a/chasm/lib/activity/gen/activitypb/v1/activity_state.pb.go +++ b/chasm/lib/activity/gen/activitypb/v1/activity_state.pb.go @@ -212,8 +212,11 @@ type ActivityState struct { ActivityReset bool `protobuf:"varint,16,opt,name=activity_reset,json=activityReset,proto3" json:"activity_reset,omitempty"` // Set alongside activity_reset when heartbeat details should be cleared on the next retry. ResetHeartbeats bool `protobuf:"varint,17,opt,name=reset_heartbeats,json=resetHeartbeats,proto3" json:"reset_heartbeats,omitempty"` - unknownFields protoimpl.UnknownFields - sizeCache protoimpl.SizeCache + // Delays the first dispatch of the activity. Extends ScheduleToClose and ScheduleToStart + // timeouts by this duration. StartToClose and Heartbeat timeouts are unaffected. + StartDelay *durationpb.Duration `protobuf:"bytes,18,opt,name=start_delay,json=startDelay,proto3" json:"start_delay,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache } func (x *ActivityState) Reset() { @@ -365,6 +368,13 @@ func (x *ActivityState) GetResetHeartbeats() bool { return false } +func (x *ActivityState) GetStartDelay() *durationpb.Duration { + if x != nil { + return x.StartDelay + } + return nil +} + type ActivityCancelState struct { state protoimpl.MessageState `protogen:"open.v1"` RequestId string `protobuf:"bytes,1,opt,name=request_id,json=requestId,proto3" json:"request_id,omitempty"` @@ -1024,7 +1034,8 @@ var File_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto protor const file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_rawDesc = "" + "\n" + - "@temporal/server/chasm/lib/activity/proto/v1/activity_state.proto\x12+temporal.server.chasm.lib.activity.proto.v1\x1a\x1egoogle/protobuf/duration.proto\x1a\x1fgoogle/protobuf/timestamp.proto\x1a&temporal/api/activity/v1/message.proto\x1a$temporal/api/common/v1/message.proto\x1a(temporal/api/deployment/v1/message.proto\x1a%temporal/api/failure/v1/message.proto\x1a'temporal/api/sdk/v1/user_metadata.proto\x1a'temporal/api/taskqueue/v1/message.proto\"\xfb\t\n" + + "@temporal/server/chasm/lib/activity/proto/v1/activity_state.proto\x12+temporal.server.chasm.lib.activity.proto.v1\x1a\x1egoogle/protobuf/duration.proto\x1a\x1fgoogle/protobuf/timestamp.proto\x1a&temporal/api/activity/v1/message.proto\x1a$temporal/api/common/v1/message.proto\x1a(temporal/api/deployment/v1/message.proto\x1a%temporal/api/failure/v1/message.proto\x1a'temporal/api/sdk/v1/user_metadata.proto\x1a'temporal/api/taskqueue/v1/message.proto\"\xb7\n" + + "\n" + "\rActivityState\x12I\n" + "\ractivity_type\x18\x01 \x01(\v2$.temporal.api.common.v1.ActivityTypeR\factivityType\x12C\n" + "\n" + @@ -1045,7 +1056,9 @@ const file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_rawD "\vpause_state\x18\x0f \x01(\v2?.temporal.server.chasm.lib.activity.proto.v1.ActivityPauseStateR\n" + "pauseState\x12%\n" + "\x0eactivity_reset\x18\x10 \x01(\bR\ractivityReset\x12)\n" + - "\x10reset_heartbeats\x18\x11 \x01(\bR\x0fresetHeartbeats\"\xa7\x01\n" + + "\x10reset_heartbeats\x18\x11 \x01(\bR\x0fresetHeartbeats\x12:\n" + + "\vstart_delay\x18\x12 \x01(\v2\x19.google.protobuf.DurationR\n" + + "startDelay\"\xa7\x01\n" + "\x13ActivityCancelState\x12\x1d\n" + "\n" + "request_id\x18\x01 \x01(\tR\trequestId\x12=\n" + @@ -1160,29 +1173,30 @@ var file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_depIdx 3, // 11: temporal.server.chasm.lib.activity.proto.v1.ActivityState.terminate_state:type_name -> temporal.server.chasm.lib.activity.proto.v1.ActivityTerminateState 18, // 12: temporal.server.chasm.lib.activity.proto.v1.ActivityState.original_options:type_name -> temporal.api.activity.v1.ActivityOptions 4, // 13: temporal.server.chasm.lib.activity.proto.v1.ActivityState.pause_state:type_name -> temporal.server.chasm.lib.activity.proto.v1.ActivityPauseState - 16, // 14: temporal.server.chasm.lib.activity.proto.v1.ActivityCancelState.request_time:type_name -> google.protobuf.Timestamp - 16, // 15: temporal.server.chasm.lib.activity.proto.v1.ActivityPauseState.pause_time:type_name -> google.protobuf.Timestamp - 14, // 16: temporal.server.chasm.lib.activity.proto.v1.ActivityAttemptState.current_retry_interval:type_name -> google.protobuf.Duration - 16, // 17: temporal.server.chasm.lib.activity.proto.v1.ActivityAttemptState.started_time:type_name -> google.protobuf.Timestamp - 16, // 18: temporal.server.chasm.lib.activity.proto.v1.ActivityAttemptState.complete_time:type_name -> google.protobuf.Timestamp - 9, // 19: temporal.server.chasm.lib.activity.proto.v1.ActivityAttemptState.last_failure_details:type_name -> temporal.server.chasm.lib.activity.proto.v1.ActivityAttemptState.LastFailureDetails - 19, // 20: temporal.server.chasm.lib.activity.proto.v1.ActivityAttemptState.last_deployment_version:type_name -> temporal.api.deployment.v1.WorkerDeploymentVersion - 20, // 21: temporal.server.chasm.lib.activity.proto.v1.ActivityHeartbeatState.details:type_name -> temporal.api.common.v1.Payloads - 16, // 22: temporal.server.chasm.lib.activity.proto.v1.ActivityHeartbeatState.recorded_time:type_name -> google.protobuf.Timestamp - 20, // 23: temporal.server.chasm.lib.activity.proto.v1.ActivityRequestData.input:type_name -> temporal.api.common.v1.Payloads - 21, // 24: temporal.server.chasm.lib.activity.proto.v1.ActivityRequestData.header:type_name -> temporal.api.common.v1.Header - 22, // 25: temporal.server.chasm.lib.activity.proto.v1.ActivityRequestData.user_metadata:type_name -> temporal.api.sdk.v1.UserMetadata - 10, // 26: temporal.server.chasm.lib.activity.proto.v1.ActivityOutcome.successful:type_name -> temporal.server.chasm.lib.activity.proto.v1.ActivityOutcome.Successful - 11, // 27: temporal.server.chasm.lib.activity.proto.v1.ActivityOutcome.failed:type_name -> temporal.server.chasm.lib.activity.proto.v1.ActivityOutcome.Failed - 16, // 28: temporal.server.chasm.lib.activity.proto.v1.ActivityAttemptState.LastFailureDetails.time:type_name -> google.protobuf.Timestamp - 23, // 29: temporal.server.chasm.lib.activity.proto.v1.ActivityAttemptState.LastFailureDetails.failure:type_name -> temporal.api.failure.v1.Failure - 20, // 30: temporal.server.chasm.lib.activity.proto.v1.ActivityOutcome.Successful.output:type_name -> temporal.api.common.v1.Payloads - 23, // 31: temporal.server.chasm.lib.activity.proto.v1.ActivityOutcome.Failed.failure:type_name -> temporal.api.failure.v1.Failure - 32, // [32:32] is the sub-list for method output_type - 32, // [32:32] is the sub-list for method input_type - 32, // [32:32] is the sub-list for extension type_name - 32, // [32:32] is the sub-list for extension extendee - 0, // [0:32] is the sub-list for field type_name + 14, // 14: temporal.server.chasm.lib.activity.proto.v1.ActivityState.start_delay:type_name -> google.protobuf.Duration + 16, // 15: temporal.server.chasm.lib.activity.proto.v1.ActivityCancelState.request_time:type_name -> google.protobuf.Timestamp + 16, // 16: temporal.server.chasm.lib.activity.proto.v1.ActivityPauseState.pause_time:type_name -> google.protobuf.Timestamp + 14, // 17: temporal.server.chasm.lib.activity.proto.v1.ActivityAttemptState.current_retry_interval:type_name -> google.protobuf.Duration + 16, // 18: temporal.server.chasm.lib.activity.proto.v1.ActivityAttemptState.started_time:type_name -> google.protobuf.Timestamp + 16, // 19: temporal.server.chasm.lib.activity.proto.v1.ActivityAttemptState.complete_time:type_name -> google.protobuf.Timestamp + 9, // 20: temporal.server.chasm.lib.activity.proto.v1.ActivityAttemptState.last_failure_details:type_name -> temporal.server.chasm.lib.activity.proto.v1.ActivityAttemptState.LastFailureDetails + 19, // 21: temporal.server.chasm.lib.activity.proto.v1.ActivityAttemptState.last_deployment_version:type_name -> temporal.api.deployment.v1.WorkerDeploymentVersion + 20, // 22: temporal.server.chasm.lib.activity.proto.v1.ActivityHeartbeatState.details:type_name -> temporal.api.common.v1.Payloads + 16, // 23: temporal.server.chasm.lib.activity.proto.v1.ActivityHeartbeatState.recorded_time:type_name -> google.protobuf.Timestamp + 20, // 24: temporal.server.chasm.lib.activity.proto.v1.ActivityRequestData.input:type_name -> temporal.api.common.v1.Payloads + 21, // 25: temporal.server.chasm.lib.activity.proto.v1.ActivityRequestData.header:type_name -> temporal.api.common.v1.Header + 22, // 26: temporal.server.chasm.lib.activity.proto.v1.ActivityRequestData.user_metadata:type_name -> temporal.api.sdk.v1.UserMetadata + 10, // 27: temporal.server.chasm.lib.activity.proto.v1.ActivityOutcome.successful:type_name -> temporal.server.chasm.lib.activity.proto.v1.ActivityOutcome.Successful + 11, // 28: temporal.server.chasm.lib.activity.proto.v1.ActivityOutcome.failed:type_name -> temporal.server.chasm.lib.activity.proto.v1.ActivityOutcome.Failed + 16, // 29: temporal.server.chasm.lib.activity.proto.v1.ActivityAttemptState.LastFailureDetails.time:type_name -> google.protobuf.Timestamp + 23, // 30: temporal.server.chasm.lib.activity.proto.v1.ActivityAttemptState.LastFailureDetails.failure:type_name -> temporal.api.failure.v1.Failure + 20, // 31: temporal.server.chasm.lib.activity.proto.v1.ActivityOutcome.Successful.output:type_name -> temporal.api.common.v1.Payloads + 23, // 32: temporal.server.chasm.lib.activity.proto.v1.ActivityOutcome.Failed.failure:type_name -> temporal.api.failure.v1.Failure + 33, // [33:33] is the sub-list for method output_type + 33, // [33:33] is the sub-list for method input_type + 33, // [33:33] is the sub-list for extension type_name + 33, // [33:33] is the sub-list for extension extendee + 0, // [0:33] is the sub-list for field type_name } func init() { file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_init() } diff --git a/chasm/lib/activity/proto/v1/activity_state.proto b/chasm/lib/activity/proto/v1/activity_state.proto index 1d913782b19..7e02bd633e6 100644 --- a/chasm/lib/activity/proto/v1/activity_state.proto +++ b/chasm/lib/activity/proto/v1/activity_state.proto @@ -120,6 +120,10 @@ message ActivityState { // Set alongside activity_reset when heartbeat details should be cleared on the next retry. bool reset_heartbeats = 17; + + // Delays the first dispatch of the activity. Extends ScheduleToClose and ScheduleToStart + // timeouts by this duration. StartToClose and Heartbeat timeouts are unaffected. + google.protobuf.Duration start_delay = 18; } message ActivityCancelState { diff --git a/chasm/lib/activity/validator_test.go b/chasm/lib/activity/validator_test.go index 8d9d030ea17..2433459ab79 100644 --- a/chasm/lib/activity/validator_test.go +++ b/chasm/lib/activity/validator_test.go @@ -688,6 +688,30 @@ func TestValidateDeleteActivityExecutionRequest(t *testing.T) { }) } +func TestValidateStartDelay(t *testing.T) { + t.Run("NilDuration", func(t *testing.T) { + err := validateStartDelay(nil) + require.NoError(t, err) + }) + + t.Run("ZeroDuration", func(t *testing.T) { + err := validateStartDelay(durationpb.New(0)) + require.NoError(t, err) + }) + + t.Run("ValidDuration", func(t *testing.T) { + err := validateStartDelay(durationpb.New(5 * time.Second)) + require.NoError(t, err) + }) + + t.Run("NegativeDuration", func(t *testing.T) { + err := validateStartDelay(durationpb.New(-1 * time.Second)) + var invalidArgErr *serviceerror.InvalidArgument + require.ErrorAs(t, err, &invalidArgErr) + require.Contains(t, invalidArgErr.Message, "invalid StartDelay") + }) +} + func TestValidatePauseActivityExecutionRequest(t *testing.T) { t.Run("Success", func(t *testing.T) { req := &workflowservice.PauseActivityExecutionRequest{ diff --git a/chasm/lib/nexusoperation/gen/nexusoperationpb/v1/service.pb.go b/chasm/lib/nexusoperation/gen/nexusoperationpb/v1/service.pb.go index 115d4ecd863..2aa7b9b1ca5 100644 --- a/chasm/lib/nexusoperation/gen/nexusoperationpb/v1/service.pb.go +++ b/chasm/lib/nexusoperation/gen/nexusoperationpb/v1/service.pb.go @@ -29,12 +29,12 @@ const file_temporal_server_chasm_lib_nexusoperation_proto_v1_service_proto_rawDe "\n" + "?temporal/server/chasm/lib/nexusoperation/proto/v1/service.proto\x121temporal.server.chasm.lib.nexusoperation.proto.v1\x1aHtemporal/server/chasm/lib/nexusoperation/proto/v1/request_response.proto\x1a0temporal/server/api/common/v1/api_category.proto\x1a.temporal/server/api/routing/v1/extension.proto2\x90\v\n" + "\x15NexusOperationService\x12\xdf\x01\n" + - "\x13StartNexusOperation\x12M.temporal.server.chasm.lib.nexusoperation.proto.v1.StartNexusOperationRequest\x1aN.temporal.server.chasm.lib.nexusoperation.proto.v1.StartNexusOperationResponse\")\x8a\xb5\x18\x02\b\x01\xd2\xc3\x18\x1f\x1a\x1dfrontend_request.operation_id\x12\xe8\x01\n" + - "\x16DescribeNexusOperation\x12P.temporal.server.chasm.lib.nexusoperation.proto.v1.DescribeNexusOperationRequest\x1aQ.temporal.server.chasm.lib.nexusoperation.proto.v1.DescribeNexusOperationResponse\")\x8a\xb5\x18\x02\b\x01\xd2\xc3\x18\x1f\x1a\x1dfrontend_request.operation_id\x12\xf7\x01\n" + - "\x1bRequestCancelNexusOperation\x12U.temporal.server.chasm.lib.nexusoperation.proto.v1.RequestCancelNexusOperationRequest\x1aV.temporal.server.chasm.lib.nexusoperation.proto.v1.RequestCancelNexusOperationResponse\")\x8a\xb5\x18\x02\b\x01\xd2\xc3\x18\x1f\x1a\x1dfrontend_request.operation_id\x12\xeb\x01\n" + - "\x17TerminateNexusOperation\x12Q.temporal.server.chasm.lib.nexusoperation.proto.v1.TerminateNexusOperationRequest\x1aR.temporal.server.chasm.lib.nexusoperation.proto.v1.TerminateNexusOperationResponse\")\x8a\xb5\x18\x02\b\x01\xd2\xc3\x18\x1f\x1a\x1dfrontend_request.operation_id\x12\xe2\x01\n" + - "\x14DeleteNexusOperation\x12N.temporal.server.chasm.lib.nexusoperation.proto.v1.DeleteNexusOperationRequest\x1aO.temporal.server.chasm.lib.nexusoperation.proto.v1.DeleteNexusOperationResponse\")\x8a\xb5\x18\x02\b\x01\xd2\xc3\x18\x1f\x1a\x1dfrontend_request.operation_id\x12\xdc\x01\n" + - "\x12PollNexusOperation\x12L.temporal.server.chasm.lib.nexusoperation.proto.v1.PollNexusOperationRequest\x1aM.temporal.server.chasm.lib.nexusoperation.proto.v1.PollNexusOperationResponse\")\x8a\xb5\x18\x02\b\x02\xd2\xc3\x18\x1f\x1a\x1dfrontend_request.operation_idBVZTgo.temporal.io/server/chasm/lib/nexusoperation/gen/nexusoperationpb;nexusoperationpbb\x06proto3" + "\x13StartNexusOperation\x12M.temporal.server.chasm.lib.nexusoperation.proto.v1.StartNexusOperationRequest\x1aN.temporal.server.chasm.lib.nexusoperation.proto.v1.StartNexusOperationResponse\")\x92\xc4\x03\x1f\x1a\x1dfrontend_request.operation_id\x8a\xb5\x18\x02\b\x01\x12\xe8\x01\n" + + "\x16DescribeNexusOperation\x12P.temporal.server.chasm.lib.nexusoperation.proto.v1.DescribeNexusOperationRequest\x1aQ.temporal.server.chasm.lib.nexusoperation.proto.v1.DescribeNexusOperationResponse\")\x92\xc4\x03\x1f\x1a\x1dfrontend_request.operation_id\x8a\xb5\x18\x02\b\x01\x12\xf7\x01\n" + + "\x1bRequestCancelNexusOperation\x12U.temporal.server.chasm.lib.nexusoperation.proto.v1.RequestCancelNexusOperationRequest\x1aV.temporal.server.chasm.lib.nexusoperation.proto.v1.RequestCancelNexusOperationResponse\")\x92\xc4\x03\x1f\x1a\x1dfrontend_request.operation_id\x8a\xb5\x18\x02\b\x01\x12\xeb\x01\n" + + "\x17TerminateNexusOperation\x12Q.temporal.server.chasm.lib.nexusoperation.proto.v1.TerminateNexusOperationRequest\x1aR.temporal.server.chasm.lib.nexusoperation.proto.v1.TerminateNexusOperationResponse\")\x92\xc4\x03\x1f\x1a\x1dfrontend_request.operation_id\x8a\xb5\x18\x02\b\x01\x12\xe2\x01\n" + + "\x14DeleteNexusOperation\x12N.temporal.server.chasm.lib.nexusoperation.proto.v1.DeleteNexusOperationRequest\x1aO.temporal.server.chasm.lib.nexusoperation.proto.v1.DeleteNexusOperationResponse\")\x92\xc4\x03\x1f\x1a\x1dfrontend_request.operation_id\x8a\xb5\x18\x02\b\x01\x12\xdc\x01\n" + + "\x12PollNexusOperation\x12L.temporal.server.chasm.lib.nexusoperation.proto.v1.PollNexusOperationRequest\x1aM.temporal.server.chasm.lib.nexusoperation.proto.v1.PollNexusOperationResponse\")\x92\xc4\x03\x1f\x1a\x1dfrontend_request.operation_id\x8a\xb5\x18\x02\b\x02BVZTgo.temporal.io/server/chasm/lib/nexusoperation/gen/nexusoperationpb;nexusoperationpbb\x06proto3" var file_temporal_server_chasm_lib_nexusoperation_proto_v1_service_proto_goTypes = []any{ (*StartNexusOperationRequest)(nil), // 0: temporal.server.chasm.lib.nexusoperation.proto.v1.StartNexusOperationRequest diff --git a/tests/standalone_activity_test.go b/tests/standalone_activity_test.go index 84ac70f6848..c65cb6d5da6 100644 --- a/tests/standalone_activity_test.go +++ b/tests/standalone_activity_test.go @@ -4,10 +4,13 @@ import ( "context" "errors" "fmt" + "io" + "net/http/httptest" "testing" "time" "github.com/google/go-cmp/cmp" + "github.com/nexus-rpc/sdk-go/nexus" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" activitypb "go.temporal.io/api/activity/v1" @@ -19,9 +22,13 @@ import ( "go.temporal.io/api/serviceerror" taskqueuepb "go.temporal.io/api/taskqueue/v1" "go.temporal.io/api/workflowservice/v1" + "go.temporal.io/sdk/temporal" "go.temporal.io/server/chasm/lib/activity" + "go.temporal.io/server/chasm/lib/callback" "go.temporal.io/server/common/dynamicconfig" "go.temporal.io/server/common/log" + commonnexus "go.temporal.io/server/common/nexus" + "go.temporal.io/server/common/nexus/nexusrpc" "go.temporal.io/server/common/payload" "go.temporal.io/server/common/payloads" "go.temporal.io/server/common/tasktoken" @@ -6954,6 +6961,510 @@ func (s *standaloneActivityTestSuite) TestCallbacks() { }) } +func (s *standaloneActivityTestSuite) runNexusCompletionHTTPServer(t *testing.T, h *completionHandler) string { + hh := nexusrpc.NewCompletionHTTPHandler(nexusrpc.CompletionHandlerOptions{Handler: h}) + srv := httptest.NewServer(hh) + t.Cleanup(func() { + srv.Close() + }) + return srv.URL +} + +func (s *standaloneActivityTestSuite) TestCallbacks() { + t := s.T() + ctx, cancel := context.WithTimeout(t.Context(), 15*time.Second) + defer cancel() + + s.OverrideDynamicConfig( + callbacks.AllowedAddresses, + []any{map[string]any{"Pattern": "*", "AllowInsecure": true}}, + ) + + t.Run("AcceptedOnStart", func(t *testing.T) { + activityID := testcore.RandomizeStr(t.Name()) + taskQueue := testcore.RandomizeStr(t.Name()) + + resp, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + ActivityType: s.tv.ActivityType(), + Identity: s.tv.WorkerIdentity(), + Input: defaultInput, + TaskQueue: &taskqueuepb.TaskQueue{ + Name: taskQueue, + }, + StartToCloseTimeout: durationpb.New(defaultStartToCloseTimeout), + RequestId: s.tv.Any().String(), + CompletionCallbacks: []*commonpb.Callback{{ + Variant: &commonpb.Callback_Nexus_{ + Nexus: &commonpb.Callback_Nexus{ + Url: "http://localhost/callback", + }, + }, + }}, + }) + require.NoError(t, err) + require.True(t, resp.Started) + require.NotEmpty(t, resp.RunId) + }) + + t.Run("MultipleCallbacksAccepted", func(t *testing.T) { + activityID := testcore.RandomizeStr(t.Name()) + taskQueue := testcore.RandomizeStr(t.Name()) + + resp, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + ActivityType: s.tv.ActivityType(), + Identity: s.tv.WorkerIdentity(), + Input: defaultInput, + TaskQueue: &taskqueuepb.TaskQueue{ + Name: taskQueue, + }, + StartToCloseTimeout: durationpb.New(defaultStartToCloseTimeout), + RequestId: s.tv.Any().String(), + CompletionCallbacks: []*commonpb.Callback{ + {Variant: &commonpb.Callback_Nexus_{Nexus: &commonpb.Callback_Nexus{Url: "http://localhost/callback1"}}}, + {Variant: &commonpb.Callback_Nexus_{Nexus: &commonpb.Callback_Nexus{Url: "http://localhost/callback2"}}}, + }, + }) + require.NoError(t, err) + require.True(t, resp.Started) + require.NotEmpty(t, resp.RunId) + }) + + t.Run("DescribeIncludesCallbackInfo", func(t *testing.T) { + activityID := testcore.RandomizeStr(t.Name()) + taskQueue := testcore.RandomizeStr(t.Name()) + + callbackURL := "http://localhost/describe-callback" + startResp, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + ActivityType: s.tv.ActivityType(), + Identity: s.tv.WorkerIdentity(), + Input: defaultInput, + TaskQueue: &taskqueuepb.TaskQueue{ + Name: taskQueue, + }, + StartToCloseTimeout: durationpb.New(defaultStartToCloseTimeout), + RequestId: s.tv.Any().String(), + CompletionCallbacks: []*commonpb.Callback{ + {Variant: &commonpb.Callback_Nexus_{Nexus: &commonpb.Callback_Nexus{Url: callbackURL}}}, + }, + }) + require.NoError(t, err) + + describeResp, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: startResp.RunId, + }) + require.NoError(t, err) + + require.Len(t, describeResp.Callbacks, 1) + cbInfo := describeResp.Callbacks[0] + require.NotNil(t, cbInfo.GetTrigger().GetActivityClosed()) + require.Equal(t, callbackURL, cbInfo.GetInfo().GetCallback().GetNexus().GetUrl()) + require.Equal(t, enumspb.CALLBACK_STATE_STANDBY, cbInfo.GetInfo().GetState()) + require.NotNil(t, cbInfo.GetInfo().GetRegistrationTime()) + }) + + t.Run("ExceedsMaxCallbacksLimit", func(t *testing.T) { + maxCallbacks := 1 + s.OverrideDynamicConfig( + callback.MaxPerExecution, + maxCallbacks, + ) + + activityID := testcore.RandomizeStr(t.Name()) + taskQueue := testcore.RandomizeStr(t.Name()) + + _, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + ActivityType: s.tv.ActivityType(), + Identity: s.tv.WorkerIdentity(), + Input: defaultInput, + TaskQueue: &taskqueuepb.TaskQueue{ + Name: taskQueue, + }, + StartToCloseTimeout: durationpb.New(defaultStartToCloseTimeout), + RequestId: s.tv.Any().String(), + // Two callbacks when overridden max dynamic config is 1, so should error. + CompletionCallbacks: []*commonpb.Callback{ + {Variant: &commonpb.Callback_Nexus_{Nexus: &commonpb.Callback_Nexus{Url: "http://localhost/callback1"}}}, + {Variant: &commonpb.Callback_Nexus_{Nexus: &commonpb.Callback_Nexus{Url: "http://localhost/callback2"}}}, + }, + }) + require.Error(t, err) + require.ErrorContains(t, err, fmt.Sprintf("cannot attach more than %d callbacks", maxCallbacks)) + }) + + t.Run("CompletesWithCallbacks", func(t *testing.T) { + activityID := testcore.RandomizeStr(t.Name()) + taskQueue := testcore.RandomizeStr(t.Name()) + + ch := &completionHandler{ + requestCh: make(chan *nexusrpc.CompletionRequest, 1), + requestCompleteCh: make(chan error, 1), + } + defer func() { + close(ch.requestCh) + close(ch.requestCompleteCh) + }() + callbackAddress := s.runNexusCompletionHTTPServer(t, ch) + + _, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + ActivityType: s.tv.ActivityType(), + Identity: s.tv.WorkerIdentity(), + Input: defaultInput, + TaskQueue: &taskqueuepb.TaskQueue{ + Name: taskQueue, + }, + StartToCloseTimeout: durationpb.New(defaultStartToCloseTimeout), + RequestId: s.tv.Any().String(), + CompletionCallbacks: []*commonpb.Callback{{ + Variant: &commonpb.Callback_Nexus_{Nexus: &commonpb.Callback_Nexus{Url: callbackAddress}}, + }}, + }) + require.NoError(t, err) + + pollResp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + Identity: s.tv.WorkerIdentity(), + }) + require.NoError(t, err) + + _, err = s.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ + Namespace: s.Namespace().String(), + TaskToken: pollResp.TaskToken, + Result: defaultResult, + Identity: defaultIdentity, + }) + require.NoError(t, err) + + // Verify the callback was actually delivered with the correct result. + select { + case completion := <-ch.requestCh: + require.Equal(t, nexus.OperationStateSucceeded, completion.State) + require.False(t, completion.StartTime.IsZero()) + require.False(t, completion.CloseTime.IsZero()) + body, readErr := io.ReadAll(completion.HTTPRequest.Body) + _ = completion.HTTPRequest.Body.Close() + require.NoError(t, readErr) + require.JSONEq(t, string(defaultResult.Payloads[0].Data), string(body)) + // Unblock CompleteOperation so it returns 200 OK to the callback library + ch.requestCompleteCh <- nil + case <-ctx.Done(): + require.Fail(t, "timed out waiting for completion callback") + } + + // Verify the activity is in completed state. + descResp, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + }) + require.NoError(t, err) + require.Equal(t, enumspb.ACTIVITY_EXECUTION_STATUS_COMPLETED, descResp.GetInfo().GetStatus()) + }) + + t.Run("FailsWithCallbacks", func(t *testing.T) { + activityID := testcore.RandomizeStr(t.Name()) + taskQueue := testcore.RandomizeStr(t.Name()) + + ch := &completionHandler{ + requestCh: make(chan *nexusrpc.CompletionRequest, 1), + requestCompleteCh: make(chan error, 1), + } + defer func() { + close(ch.requestCh) + close(ch.requestCompleteCh) + }() + callbackAddress := s.runNexusCompletionHTTPServer(t, ch) + + _, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + ActivityType: s.tv.ActivityType(), + Identity: s.tv.WorkerIdentity(), + Input: defaultInput, + TaskQueue: &taskqueuepb.TaskQueue{ + Name: taskQueue, + }, + StartToCloseTimeout: durationpb.New(defaultStartToCloseTimeout), + RequestId: s.tv.Any().String(), + CompletionCallbacks: []*commonpb.Callback{{ + Variant: &commonpb.Callback_Nexus_{Nexus: &commonpb.Callback_Nexus{Url: callbackAddress}}, + }}, + }) + require.NoError(t, err) + + pollResp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + Identity: s.tv.WorkerIdentity(), + }) + require.NoError(t, err) + + _, err = s.FrontendClient().RespondActivityTaskFailed(ctx, &workflowservice.RespondActivityTaskFailedRequest{ + Namespace: s.Namespace().String(), + TaskToken: pollResp.TaskToken, + Failure: defaultFailure, + Identity: defaultIdentity, + }) + require.NoError(t, err) + + // Verify the callback was actually delivered with failure state. + select { + case completion := <-ch.requestCh: + require.Equal(t, nexus.OperationStateFailed, completion.State) + require.False(t, completion.StartTime.IsZero()) + require.False(t, completion.CloseTime.IsZero()) + var failureErr *nexus.FailureError + require.ErrorAs(t, completion.Error.Cause, &failureErr) + tFailure, convErr := commonnexus.NexusFailureToTemporalFailure(failureErr.Failure) + require.NoError(t, convErr) + sdkErr := temporal.GetDefaultFailureConverter().FailureToError(tFailure) + var appErr *temporal.ApplicationError + require.ErrorAs(t, sdkErr, &appErr) + require.Equal(t, defaultFailure.Message, appErr.Message()) + ch.requestCompleteCh <- nil + case <-ctx.Done(): + require.Fail(t, "timed out waiting for completion callback") + } + + // Verify the activity is in failed state. + descResp, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + }) + require.NoError(t, err) + require.Equal(t, enumspb.ACTIVITY_EXECUTION_STATUS_FAILED, descResp.GetInfo().GetStatus()) + }) + + t.Run("TerminatedWithCallbacks", func(t *testing.T) { + activityID := testcore.RandomizeStr(t.Name()) + taskQueue := testcore.RandomizeStr(t.Name()) + + ch := &completionHandler{ + requestCh: make(chan *nexusrpc.CompletionRequest, 1), + requestCompleteCh: make(chan error, 1), + } + defer func() { + close(ch.requestCh) + close(ch.requestCompleteCh) + }() + callbackAddress := s.runNexusCompletionHTTPServer(t, ch) + + startResp, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + ActivityType: s.tv.ActivityType(), + Identity: s.tv.WorkerIdentity(), + Input: defaultInput, + TaskQueue: &taskqueuepb.TaskQueue{ + Name: taskQueue, + }, + StartToCloseTimeout: durationpb.New(defaultStartToCloseTimeout), + RequestId: s.tv.Any().String(), + CompletionCallbacks: []*commonpb.Callback{{ + Variant: &commonpb.Callback_Nexus_{Nexus: &commonpb.Callback_Nexus{Url: callbackAddress}}, + }}, + }) + require.NoError(t, err) + runID := startResp.RunId + + _, err = s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + Identity: s.tv.WorkerIdentity(), + }) + require.NoError(t, err) + + reason := "Test Termination" + _, err = s.FrontendClient().TerminateActivityExecution(ctx, &workflowservice.TerminateActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: runID, + Reason: reason, + Identity: "terminator", + }) + require.NoError(t, err) + + // Verify the callback was delivered with failure state (terminated maps to failed). + select { + case completion := <-ch.requestCh: + require.Equal(t, nexus.OperationStateFailed, completion.State) + require.False(t, completion.StartTime.IsZero()) + require.False(t, completion.CloseTime.IsZero()) + var failureErr *nexus.FailureError + require.ErrorAs(t, completion.Error.Cause, &failureErr) + tFailure, convErr := commonnexus.NexusFailureToTemporalFailure(failureErr.Failure) + require.NoError(t, convErr) + sdkErr := temporal.GetDefaultFailureConverter().FailureToError(tFailure) + var termErr *temporal.TerminatedError + require.ErrorAs(t, sdkErr, &termErr) + ch.requestCompleteCh <- nil + case <-ctx.Done(): + require.Fail(t, "timed out waiting for completion callback") + } + + descResp, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + }) + require.NoError(t, err) + require.Equal(t, enumspb.ACTIVITY_EXECUTION_STATUS_TERMINATED, descResp.GetInfo().GetStatus()) + }) + + t.Run("CanceledWithCallbacks", func(t *testing.T) { + activityID := testcore.RandomizeStr(t.Name()) + taskQueue := testcore.RandomizeStr(t.Name()) + + ch := &completionHandler{ + requestCh: make(chan *nexusrpc.CompletionRequest, 1), + requestCompleteCh: make(chan error, 1), + } + defer func() { + close(ch.requestCh) + close(ch.requestCompleteCh) + }() + callbackAddress := s.runNexusCompletionHTTPServer(t, ch) + + _, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + ActivityType: s.tv.ActivityType(), + Identity: s.tv.WorkerIdentity(), + Input: defaultInput, + TaskQueue: &taskqueuepb.TaskQueue{ + Name: taskQueue, + }, + StartToCloseTimeout: durationpb.New(defaultStartToCloseTimeout), + RequestId: s.tv.Any().String(), + CompletionCallbacks: []*commonpb.Callback{{ + Variant: &commonpb.Callback_Nexus_{Nexus: &commonpb.Callback_Nexus{Url: callbackAddress}}, + }}, + }) + require.NoError(t, err) + + pollResp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + Identity: s.tv.WorkerIdentity(), + }) + require.NoError(t, err) + + _, err = s.FrontendClient().RequestCancelActivityExecution(ctx, &workflowservice.RequestCancelActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + Identity: "cancelling-worker", + RequestId: s.tv.Any().String(), + Reason: "Test Cancellation", + }) + require.NoError(t, err) + + _, err = s.FrontendClient().RespondActivityTaskCanceled(ctx, &workflowservice.RespondActivityTaskCanceledRequest{ + Namespace: s.Namespace().String(), + TaskToken: pollResp.TaskToken, + Identity: defaultIdentity, + }) + require.NoError(t, err) + + // Verify the callback was delivered with canceled state. + select { + case completion := <-ch.requestCh: + require.Equal(t, nexus.OperationStateCanceled, completion.State) + require.False(t, completion.StartTime.IsZero()) + require.False(t, completion.CloseTime.IsZero()) + var failureErr *nexus.FailureError + require.ErrorAs(t, completion.Error.Cause, &failureErr) + tFailure, convErr := commonnexus.NexusFailureToTemporalFailure(failureErr.Failure) + require.NoError(t, convErr) + sdkErr := temporal.GetDefaultFailureConverter().FailureToError(tFailure) + var canceledErr *temporal.CanceledError + require.ErrorAs(t, sdkErr, &canceledErr) + ch.requestCompleteCh <- nil + case <-ctx.Done(): + require.Fail(t, "timed out waiting for completion callback") + } + + descResp, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + }) + require.NoError(t, err) + require.Equal(t, enumspb.ACTIVITY_EXECUTION_STATUS_CANCELED, descResp.GetInfo().GetStatus()) + }) + + // This test covers the timeout callback path using schedule-to-start, but the callback behavior + // is the same for all timeout types (schedule-to-close, start-to-close, heartbeat). + t.Run("TimeoutWithCallbacks", func(t *testing.T) { + activityID := testcore.RandomizeStr(t.Name()) + taskQueue := testcore.RandomizeStr(t.Name()) + + ch := &completionHandler{ + requestCh: make(chan *nexusrpc.CompletionRequest, 1), + requestCompleteCh: make(chan error, 1), + } + defer func() { + close(ch.requestCh) + close(ch.requestCompleteCh) + }() + callbackAddress := s.runNexusCompletionHTTPServer(t, ch) + + _, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + ActivityType: s.tv.ActivityType(), + Identity: s.tv.WorkerIdentity(), + Input: defaultInput, + TaskQueue: &taskqueuepb.TaskQueue{ + Name: taskQueue, + }, + StartToCloseTimeout: durationpb.New(1 * time.Minute), + ScheduleToStartTimeout: durationpb.New(1 * time.Second), + RequestId: s.tv.Any().String(), + CompletionCallbacks: []*commonpb.Callback{{ + Variant: &commonpb.Callback_Nexus_{Nexus: &commonpb.Callback_Nexus{Url: callbackAddress}}, + }}, + }) + require.NoError(t, err) + + // No worker polls — activity will time out waiting to be started. + + // Verify the callback is delivered with failure state and non-zero CloseTime. + select { + case completion := <-ch.requestCh: + require.Equal(t, nexus.OperationStateFailed, completion.State) + var failureErr *nexus.FailureError + require.ErrorAs(t, completion.Error.Cause, &failureErr) + tFailure, convErr := commonnexus.NexusFailureToTemporalFailure(failureErr.Failure) + require.NoError(t, convErr) + sdkErr := temporal.GetDefaultFailureConverter().FailureToError(tFailure) + var timeoutErr *temporal.TimeoutError + require.ErrorAs(t, sdkErr, &timeoutErr) + require.False(t, completion.StartTime.IsZero()) + require.False(t, completion.CloseTime.IsZero()) + ch.requestCompleteCh <- nil + case <-ctx.Done(): + require.Fail(t, "timed out waiting for completion callback") + } + + // Verify the activity is in timed-out state. + descResp, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + }) + require.NoError(t, err) + require.Equal(t, enumspb.ACTIVITY_EXECUTION_STATUS_TIMED_OUT, descResp.GetInfo().GetStatus()) + }) +} + func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { t := s.T() From a5fcee56aeaef2e5a155b4ddfbf24f569d8abd74 Mon Sep 17 00:00:00 2001 From: Sean Kane Date: Wed, 29 Apr 2026 09:54:04 -0400 Subject: [PATCH 08/25] imports --- tests/standalone_activity_test.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/standalone_activity_test.go b/tests/standalone_activity_test.go index c65cb6d5da6..7a4e1008f37 100644 --- a/tests/standalone_activity_test.go +++ b/tests/standalone_activity_test.go @@ -9,7 +9,6 @@ import ( "testing" "time" - "github.com/google/go-cmp/cmp" "github.com/nexus-rpc/sdk-go/nexus" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -3314,7 +3313,7 @@ func (s *standaloneActivityTestSuite) TestDescribeActivityExecution_WaitAnyState "state_transition_count", ), ) - require.Empty(t, diff) + taskQueuePollErr := make(chan error, 1) activityPollDone := make(chan struct{}) @@ -3374,7 +3373,7 @@ func (s *standaloneActivityTestSuite) TestDescribeActivityExecution_WaitAnyState "state_transition_count", ), ) - require.Empty(t, diff) + protorequire.ProtoEqual(t, defaultInput, describeResp.Input) From 7bd3f04590c13a0966f6f79a0adab426d2532d20 Mon Sep 17 00:00:00 2001 From: Sean Kane Date: Wed, 29 Apr 2026 11:26:10 -0400 Subject: [PATCH 09/25] fixing heartbeat issues --- chasm/lib/activity/activity.go | 7 ++++-- .../gen/activitypb/v1/activity_state.pb.go | 20 ++++++++++++---- .../activity/proto/v1/activity_state.proto | 2 ++ tests/activity_api_update_test.go | 16 ++++++------- tests/standalone_activity_test.go | 24 +++++++++---------- 5 files changed, 41 insertions(+), 28 deletions(-) diff --git a/chasm/lib/activity/activity.go b/chasm/lib/activity/activity.go index 95284646001..40cc2d9306b 100644 --- a/chasm/lib/activity/activity.go +++ b/chasm/lib/activity/activity.go @@ -1167,9 +1167,11 @@ func (a *Activity) RecordHeartbeat( if err != nil { return nil, err } + prevHeartbeat, _ := a.LastHeartbeat.TryGet(ctx) a.LastHeartbeat = chasm.NewDataField(ctx, &activitypb.ActivityHeartbeatState{ - RecordedTime: timestamppb.New(ctx.Now(a)), - Details: input.Request.GetHeartbeatRequest().GetDetails(), + RecordedTime: timestamppb.New(ctx.Now(a)), + Details: input.Request.GetHeartbeatRequest().GetDetails(), + TotalHeartbeatCount: prevHeartbeat.GetTotalHeartbeatCount() + 1, }) if heartbeatTimeout := a.GetHeartbeatTimeout().AsDuration(); heartbeatTimeout > 0 { ctx.AddTask( @@ -1286,6 +1288,7 @@ func (a *Activity) buildActivityExecutionInfo(ctx chasm.Context) *apiactivitypb. Header: requestData.GetHeader(), HeartbeatDetails: heartbeat.GetDetails(), HeartbeatTimeout: a.GetHeartbeatTimeout(), + TotalHeartbeatCount: heartbeat.GetTotalHeartbeatCount(), LastAttemptCompleteTime: attempt.GetCompleteTime(), LastFailure: attempt.GetLastFailureDetails().GetFailure(), LastHeartbeatTime: heartbeat.GetRecordedTime(), diff --git a/chasm/lib/activity/gen/activitypb/v1/activity_state.pb.go b/chasm/lib/activity/gen/activitypb/v1/activity_state.pb.go index 16005129b18..1ba044b4a2f 100644 --- a/chasm/lib/activity/gen/activitypb/v1/activity_state.pb.go +++ b/chasm/lib/activity/gen/activitypb/v1/activity_state.pb.go @@ -693,9 +693,11 @@ type ActivityHeartbeatState struct { // Details provided in the last recorded activity heartbeat. Details *v1.Payloads `protobuf:"bytes,1,opt,name=details,proto3" json:"details,omitempty"` // Time the last heartbeat was recorded. - RecordedTime *timestamppb.Timestamp `protobuf:"bytes,2,opt,name=recorded_time,json=recordedTime,proto3" json:"recorded_time,omitempty"` - unknownFields protoimpl.UnknownFields - sizeCache protoimpl.SizeCache + RecordedTime *timestamppb.Timestamp `protobuf:"bytes,2,opt,name=recorded_time,json=recordedTime,proto3" json:"recorded_time,omitempty"` + // Total number of heartbeats recorded across all attempts of this activity, including retries. + TotalHeartbeatCount int64 `protobuf:"varint,3,opt,name=total_heartbeat_count,json=totalHeartbeatCount,proto3" json:"total_heartbeat_count,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache } func (x *ActivityHeartbeatState) Reset() { @@ -742,6 +744,13 @@ func (x *ActivityHeartbeatState) GetRecordedTime() *timestamppb.Timestamp { return nil } +func (x *ActivityHeartbeatState) GetTotalHeartbeatCount() int64 { + if x != nil { + return x.TotalHeartbeatCount + } + return 0 +} + type ActivityRequestData struct { state protoimpl.MessageState `protogen:"open.v1"` // Serialized activity input, passed as arguments to the activity function. @@ -1087,10 +1096,11 @@ const file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_rawD "\x10start_request_id\x18\t \x01(\tR\x0estartRequestId\x1a\x80\x01\n" + "\x12LastFailureDetails\x12.\n" + "\x04time\x18\x01 \x01(\v2\x1a.google.protobuf.TimestampR\x04time\x12:\n" + - "\afailure\x18\x02 \x01(\v2 .temporal.api.failure.v1.FailureR\afailure\"\x95\x01\n" + + "\afailure\x18\x02 \x01(\v2 .temporal.api.failure.v1.FailureR\afailure\"\xc9\x01\n" + "\x16ActivityHeartbeatState\x12:\n" + "\adetails\x18\x01 \x01(\v2 .temporal.api.common.v1.PayloadsR\adetails\x12?\n" + - "\rrecorded_time\x18\x02 \x01(\v2\x1a.google.protobuf.TimestampR\frecordedTime\"\xcd\x01\n" + + "\rrecorded_time\x18\x02 \x01(\v2\x1a.google.protobuf.TimestampR\frecordedTime\x122\n" + + "\x15total_heartbeat_count\x18\x03 \x01(\x03R\x13totalHeartbeatCount\"\xcd\x01\n" + "\x13ActivityRequestData\x126\n" + "\x05input\x18\x01 \x01(\v2 .temporal.api.common.v1.PayloadsR\x05input\x126\n" + "\x06header\x18\x02 \x01(\v2\x1e.temporal.api.common.v1.HeaderR\x06header\x12F\n" + diff --git a/chasm/lib/activity/proto/v1/activity_state.proto b/chasm/lib/activity/proto/v1/activity_state.proto index 7e02bd633e6..62f512dfb25 100644 --- a/chasm/lib/activity/proto/v1/activity_state.proto +++ b/chasm/lib/activity/proto/v1/activity_state.proto @@ -202,6 +202,8 @@ message ActivityHeartbeatState { temporal.api.common.v1.Payloads details = 1; // Time the last heartbeat was recorded. google.protobuf.Timestamp recorded_time = 2; + // Total number of heartbeats recorded across all attempts of this activity, including retries. + int64 total_heartbeat_count = 3; } message ActivityRequestData { diff --git a/tests/activity_api_update_test.go b/tests/activity_api_update_test.go index 7205aa43cf6..75b45093e16 100644 --- a/tests/activity_api_update_test.go +++ b/tests/activity_api_update_test.go @@ -111,7 +111,7 @@ func TestActivityApiUpdateClientTestSuite(t *testing.T) { t.Parallel() t.Run("TestActivityUpdateApi_ChangeRetryInterval", func(t *testing.T) { - s := testcore.NewEnv(t, testcore.WithSdkWorker()) + s := testcore.NewEnv(t) ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) defer cancel() @@ -188,7 +188,7 @@ func TestActivityApiUpdateClientTestSuite(t *testing.T) { }) t.Run("TestActivityUpdateApi_ChangeScheduleToClose", func(t *testing.T) { - s := testcore.NewEnv(t, testcore.WithSdkWorker()) + s := testcore.NewEnv(t) ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) defer cancel() @@ -263,7 +263,7 @@ func TestActivityApiUpdateClientTestSuite(t *testing.T) { // initial values are chosen in such a way that activity will fail due to schedule to close timeout // we change schedule to close to a longer value and retry policy to a shorter value // after that activity should succeed - s := testcore.NewEnv(t, testcore.WithSdkWorker()) + s := testcore.NewEnv(t) ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) defer cancel() @@ -341,7 +341,7 @@ func TestActivityApiUpdateClientTestSuite(t *testing.T) { // 3. reset activity options to default, verify that retry policy is reset to default // 4. update activity options again, this time change schedule to close timeout and retry policy initial interval // 5. let activity finish, verify that it finished with updated options - s := testcore.NewEnv(t, testcore.WithSdkWorker()) + s := testcore.NewEnv(t) ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) defer cancel() @@ -465,7 +465,7 @@ func TestActivityUpdateExecutionOptionsApi(t *testing.T) { t.Parallel() t.Run("ChangeRetryInterval", func(t *testing.T) { - s := testcore.NewEnv(t, testcore.WithSdkWorker()) + s := testcore.NewEnv(t) ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) defer cancel() @@ -533,7 +533,7 @@ func TestActivityUpdateExecutionOptionsApi(t *testing.T) { }) t.Run("ChangeScheduleToClose", func(t *testing.T) { - s := testcore.NewEnv(t, testcore.WithSdkWorker()) + s := testcore.NewEnv(t) ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) defer cancel() @@ -596,7 +596,7 @@ func TestActivityUpdateExecutionOptionsApi(t *testing.T) { }) t.Run("ChangeScheduleToCloseAndRetry", func(t *testing.T) { - s := testcore.NewEnv(t, testcore.WithSdkWorker()) + s := testcore.NewEnv(t) ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) defer cancel() @@ -657,7 +657,7 @@ func TestActivityUpdateExecutionOptionsApi(t *testing.T) { }) t.Run("ResetDefaultOptions", func(t *testing.T) { - s := testcore.NewEnv(t, testcore.WithSdkWorker()) + s := testcore.NewEnv(t) ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) defer cancel() diff --git a/tests/standalone_activity_test.go b/tests/standalone_activity_test.go index 7a4e1008f37..b713381b715 100644 --- a/tests/standalone_activity_test.go +++ b/tests/standalone_activity_test.go @@ -3314,7 +3314,6 @@ func (s *standaloneActivityTestSuite) TestDescribeActivityExecution_WaitAnyState ), ) - taskQueuePollErr := make(chan error, 1) activityPollDone := make(chan struct{}) var describeResp *workflowservice.DescribeActivityExecutionResponse @@ -3373,7 +3372,6 @@ func (s *standaloneActivityTestSuite) TestDescribeActivityExecution_WaitAnyState "state_transition_count", ), ) - protorequire.ProtoEqual(t, defaultInput, describeResp.Input) @@ -7725,10 +7723,10 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { Namespace: s.Namespace().String(), ActivityId: activityID, }) - assert.NoError(c, dErr) - assert.Equal(c, enumspb.PENDING_ACTIVITY_STATE_PAUSED, dr.GetInfo().GetRunState()) - assert.EqualValues(c, 2, dr.GetInfo().GetAttempt()) - assert.NotNil(c, dr.GetInfo().GetLastFailure()) + require.NoError(c, dErr) + require.Equal(c, enumspb.PENDING_ACTIVITY_STATE_PAUSED, dr.GetInfo().GetRunState()) + require.EqualValues(c, 2, dr.GetInfo().GetAttempt()) + require.NotNil(c, dr.GetInfo().GetLastFailure()) }, 10*time.Second, 200*time.Millisecond) // Unpause – activity should be dispatched. @@ -7821,10 +7819,10 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { Namespace: s.Namespace().String(), ActivityId: activityID, }) - assert.NoError(c, dErr) - assert.Equal(c, enumspb.PENDING_ACTIVITY_STATE_PAUSED, dr.GetInfo().GetRunState()) - assert.EqualValues(c, 2, dr.GetInfo().GetAttempt()) - assert.Equal(c, failureMsg, dr.GetInfo().GetLastFailure().GetMessage()) + require.NoError(c, dErr) + require.Equal(c, enumspb.PENDING_ACTIVITY_STATE_PAUSED, dr.GetInfo().GetRunState()) + require.EqualValues(c, 2, dr.GetInfo().GetAttempt()) + require.Equal(c, failureMsg, dr.GetInfo().GetLastFailure().GetMessage()) }, 10*time.Second, 200*time.Millisecond) // Unpause and complete. @@ -7918,9 +7916,9 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { Namespace: s.Namespace().String(), ActivityId: activityID, }) - assert.NoError(c, dErr) - assert.Equal(c, enumspb.PENDING_ACTIVITY_STATE_PAUSED, dr.GetInfo().GetRunState()) - assert.EqualValues(c, 2, dr.GetInfo().GetAttempt()) + require.NoError(c, dErr) + require.Equal(c, enumspb.PENDING_ACTIVITY_STATE_PAUSED, dr.GetInfo().GetRunState()) + require.EqualValues(c, 2, dr.GetInfo().GetAttempt()) }, 10*time.Second, 200*time.Millisecond) // Unpause – activity should be dispatched. From 86166c8848df217a1855993705b6bee48fb7fea6 Mon Sep 17 00:00:00 2001 From: Sean Kane Date: Wed, 29 Apr 2026 11:35:13 -0400 Subject: [PATCH 10/25] linter --- tests/activity_api_reset_test.go | 2 +- tests/standalone_activity_test.go | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/activity_api_reset_test.go b/tests/activity_api_reset_test.go index 8a95e36c5a6..6d762599c4b 100644 --- a/tests/activity_api_reset_test.go +++ b/tests/activity_api_reset_test.go @@ -76,7 +76,7 @@ func TestActivityApiResetClientTestSuite(t *testing.T) { } func (s *ActivityApiResetClientTestSuite) SetupTest() { - s.FunctionalTestBase.SetupTest() + s.FunctionalTestBase.SetupTest() //nolint:forbidigo s.tv = testvars.New(s.T()).WithTaskQueue(s.TaskQueue()).WithNamespaceName(s.Namespace()) diff --git a/tests/standalone_activity_test.go b/tests/standalone_activity_test.go index b713381b715..076e3bc984a 100644 --- a/tests/standalone_activity_test.go +++ b/tests/standalone_activity_test.go @@ -8096,8 +8096,8 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { ActivityId: activityID, RunId: runID, }) - assert.NoError(c, dErr) - assert.Equal(c, enumspb.ACTIVITY_EXECUTION_STATUS_CANCELED, dr.GetInfo().GetStatus()) + require.NoError(c, dErr) + require.Equal(c, enumspb.ACTIVITY_EXECUTION_STATUS_CANCELED, dr.GetInfo().GetStatus()) }, 10*time.Second, 200*time.Millisecond) }) From 777ba883d510caedb254885ad03389810ae21447 Mon Sep 17 00:00:00 2001 From: Sean Kane Date: Wed, 29 Apr 2026 11:47:59 -0400 Subject: [PATCH 11/25] fixing normalizeAndValidate funcs --- chasm/lib/activity/activity.go | 2 +- chasm/lib/activity/validator.go | 8 ++++---- tests/activity_api_reset_test.go | 12 ++++++------ 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/chasm/lib/activity/activity.go b/chasm/lib/activity/activity.go index 40cc2d9306b..b57aae562be 100644 --- a/chasm/lib/activity/activity.go +++ b/chasm/lib/activity/activity.go @@ -705,7 +705,7 @@ func (a *Activity) mergeActivityOptions( // Re-normalize timeouts after the update so that relationships like // start_to_close <= schedule_to_close and heartbeat <= start_to_close are preserved. // This mirrors adjustActivityOptions for workflow-embedded activities. - if err := normalizeAndValidateTimeouts(req.GetActivityId(), a.GetActivityType().GetName(), durationpb.New(0), ao); err != nil { + if err := validateAndNormalizeTimeouts(req.GetActivityId(), a.GetActivityType().GetName(), durationpb.New(0), ao); err != nil { return err } diff --git a/chasm/lib/activity/validator.go b/chasm/lib/activity/validator.go index b0a5f6fa1a5..36914e97e08 100644 --- a/chasm/lib/activity/validator.go +++ b/chasm/lib/activity/validator.go @@ -120,7 +120,7 @@ func validateAndNormalizeActivityAttributes( return serviceerror.NewInvalidArgumentf("invalid priorities: %v", err) } - return normalizeAndValidateTimeouts(activityID, + return validateAndNormalizeTimeouts(activityID, activityType, runTimeout, options) @@ -147,7 +147,7 @@ func validateActivityRetryPolicy( return retrypolicy.Validate(retryPolicy) } -func normalizeAndValidateTimeouts( +func validateAndNormalizeTimeouts( activityID string, activityType string, runTimeout *durationpb.Duration, @@ -215,7 +215,7 @@ func normalizeAndValidateTimeouts( return nil } -func normalizeAndValidateIDPolicy(req *workflowservice.StartActivityExecutionRequest) error { +func validateAndNormalizeIDPolicy(req *workflowservice.StartActivityExecutionRequest) error { if req.GetIdReusePolicy() == enumspb.ACTIVITY_ID_REUSE_POLICY_UNSPECIFIED { req.IdReusePolicy = enumspb.ACTIVITY_ID_REUSE_POLICY_ALLOW_DUPLICATE } @@ -297,7 +297,7 @@ func validateAndNormalizeStartRequest( len(req.GetIdentity()), maxIDLengthLimit) } - if err := normalizeAndValidateIDPolicy(req); err != nil { + if err := validateAndNormalizeIDPolicy(req); err != nil { return err } diff --git a/tests/activity_api_reset_test.go b/tests/activity_api_reset_test.go index 6d762599c4b..8d3534aaa79 100644 --- a/tests/activity_api_reset_test.go +++ b/tests/activity_api_reset_test.go @@ -48,12 +48,12 @@ import ( ) type ActivityApiResetClientTestSuite struct { - testcore.FunctionalTestBase - tv *testvars.TestVars - initialRetryInterval time.Duration - scheduleToCloseTimeout time.Duration - startToCloseTimeout time.Duration - activityRetryPolicy *temporal.RetryPolicy + testcore.FunctionalTestBase //nolint:forbidigo + tv *testvars.TestVars + initialRetryInterval time.Duration + scheduleToCloseTimeout time.Duration + startToCloseTimeout time.Duration + activityRetryPolicy *temporal.RetryPolicy // apiName selects which reset API variant to exercise ("legacy-api" or "execution-api"). // Set before suite.Run; used by SetupTest to initialise resetFn. From bbbb63145fdbe86319892bd5d7cbfd3a904c9c5f Mon Sep 17 00:00:00 2001 From: Sean Kane Date: Wed, 29 Apr 2026 12:05:00 -0400 Subject: [PATCH 12/25] renaming --- chasm/lib/activity/frontend.go | 16 +++++----- chasm/lib/activity/frontend_test.go | 4 +-- chasm/lib/activity/validator.go | 16 +++++----- chasm/lib/activity/validator_test.go | 48 ++++++++++++++-------------- 4 files changed, 42 insertions(+), 42 deletions(-) diff --git a/chasm/lib/activity/frontend.go b/chasm/lib/activity/frontend.go index b4114ee8e47..313d3933ec0 100644 --- a/chasm/lib/activity/frontend.go +++ b/chasm/lib/activity/frontend.go @@ -122,7 +122,7 @@ func (h *frontendHandler) DescribeActivityExecution( return nil, ErrStandaloneActivityDisabled } - err := validateDescribeActivityExecutionRequest( + err := validateAndNormalizeDescribeActivityExecutionRequest( req, h.config.MaxIDLengthLimit(), ) @@ -151,7 +151,7 @@ func (h *frontendHandler) PollActivityExecution( return nil, ErrStandaloneActivityDisabled } - err := validatePollActivityExecutionRequest( + err := validateAndNormalizePollActivityExecutionRequest( req, h.config.MaxIDLengthLimit(), ) @@ -266,7 +266,7 @@ func (h *frontendHandler) DeleteActivityExecution( return nil, ErrStandaloneActivityDisabled } - if err := validateDeleteActivityExecutionRequest(req, h.config.MaxIDLengthLimit()); err != nil { + if err := validateAndNormalizeDeleteActivityExecutionRequest(req, h.config.MaxIDLengthLimit()); err != nil { return nil, err } @@ -301,7 +301,7 @@ func (h *frontendHandler) TerminateActivityExecution( return nil, err } - if err := validateTerminateActivityExecutionRequest( + if err := validateAndNormalizeTerminateActivityExecutionRequest( req, h.config.MaxIDLengthLimit(), h.config.BlobSizeLimitError, @@ -334,7 +334,7 @@ func (h *frontendHandler) RequestCancelActivityExecution( return nil, err } - if err := validateRequestCancelActivityExecutionRequest( + if err := validateAndNormalizeRequestCancelActivityExecutionRequest( req, h.config.MaxIDLengthLimit(), h.config.BlobSizeLimitError, @@ -439,7 +439,7 @@ func (h *frontendHandler) PauseActivityExecution( return nil, ErrStandaloneActivityDisabled } - if err := validatePauseActivityExecutionRequest( + if err := validateAndNormalizePauseActivityExecutionRequest( req, h.config.MaxIDLengthLimit(), h.config.BlobSizeLimitError, @@ -471,7 +471,7 @@ func (h *frontendHandler) UnpauseActivityExecution( return nil, ErrStandaloneActivityDisabled } - if err := validateUnpauseActivityExecutionRequest(req, h.config.MaxIDLengthLimit()); err != nil { + if err := validateAndNormalizeUnpauseActivityExecutionRequest(req, h.config.MaxIDLengthLimit()); err != nil { return nil, err } @@ -498,7 +498,7 @@ func (h *frontendHandler) ResetActivityExecution( return nil, ErrStandaloneActivityDisabled } - if err := validateResetActivityExecutionRequest(req, h.config.MaxIDLengthLimit()); err != nil { + if err := validateAndNormalizeResetActivityExecutionRequest(req, h.config.MaxIDLengthLimit()); err != nil { return nil, err } diff --git a/chasm/lib/activity/frontend_test.go b/chasm/lib/activity/frontend_test.go index 0dce1270634..648ce3b52e8 100644 --- a/chasm/lib/activity/frontend_test.go +++ b/chasm/lib/activity/frontend_test.go @@ -84,7 +84,7 @@ func TestRequestIdStableAcrossRetries(t *testing.T) { ActivityId: "test-activity", } validateTwice(t, req, func() error { - return validateTerminateActivityExecutionRequest( + return validateAndNormalizeTerminateActivityExecutionRequest( req, defaultMaxIDLengthLimit, defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, log.NewNoopLogger()) }) }) @@ -95,7 +95,7 @@ func TestRequestIdStableAcrossRetries(t *testing.T) { ActivityId: "test-activity", } validateTwice(t, req, func() error { - return validateRequestCancelActivityExecutionRequest( + return validateAndNormalizeRequestCancelActivityExecutionRequest( req, defaultMaxIDLengthLimit, defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, log.NewNoopLogger()) }) }) diff --git a/chasm/lib/activity/validator.go b/chasm/lib/activity/validator.go index 36914e97e08..7141f4996f3 100644 --- a/chasm/lib/activity/validator.go +++ b/chasm/lib/activity/validator.go @@ -321,7 +321,7 @@ func validateAndNormalizeStartRequest( return nil } -func validateDescribeActivityExecutionRequest( +func validateAndNormalizeDescribeActivityExecutionRequest( req *workflowservice.DescribeActivityExecutionRequest, maxIDLengthLimit int, ) error { @@ -347,7 +347,7 @@ func validateDescribeActivityExecutionRequest( return nil } -func validatePollActivityExecutionRequest( +func validateAndNormalizePollActivityExecutionRequest( req *workflowservice.PollActivityExecutionRequest, maxIDLengthLimit int, ) error { @@ -367,7 +367,7 @@ func validatePollActivityExecutionRequest( return nil } -func validateRequestCancelActivityExecutionRequest( +func validateAndNormalizeRequestCancelActivityExecutionRequest( req *workflowservice.RequestCancelActivityExecutionRequest, maxIDLengthLimit int, blobSizeLimitError dynamicconfig.IntPropertyFnWithNamespaceFilter, @@ -545,7 +545,7 @@ func validateUpdateActivityExecutionOptionsRequest( return nil } -func validateDeleteActivityExecutionRequest( +func validateAndNormalizeDeleteActivityExecutionRequest( req *workflowservice.DeleteActivityExecutionRequest, maxIDLengthLimit int, ) error { @@ -568,7 +568,7 @@ func validateDeleteActivityExecutionRequest( return nil } -func validateTerminateActivityExecutionRequest( +func validateAndNormalizeTerminateActivityExecutionRequest( req *workflowservice.TerminateActivityExecutionRequest, maxIDLengthLimit int, blobSizeLimitError dynamicconfig.IntPropertyFnWithNamespaceFilter, @@ -620,7 +620,7 @@ func validateTerminateActivityExecutionRequest( return nil } -func validatePauseActivityExecutionRequest( +func validateAndNormalizePauseActivityExecutionRequest( req *workflowservice.PauseActivityExecutionRequest, maxIDLengthLimit int, blobSizeLimitError dynamicconfig.IntPropertyFnWithNamespaceFilter, @@ -657,7 +657,7 @@ func validatePauseActivityExecutionRequest( return nil } -func validateResetActivityExecutionRequest( +func validateAndNormalizeResetActivityExecutionRequest( req *workflowservice.ResetActivityExecutionRequest, maxIDLengthLimit int, ) error { @@ -681,7 +681,7 @@ func validateResetActivityExecutionRequest( return nil } -func validateUnpauseActivityExecutionRequest( +func validateAndNormalizeUnpauseActivityExecutionRequest( req *workflowservice.UnpauseActivityExecutionRequest, maxIDLengthLimit int, ) error { diff --git a/chasm/lib/activity/validator_test.go b/chasm/lib/activity/validator_test.go index 2433459ab79..32f4911b6fc 100644 --- a/chasm/lib/activity/validator_test.go +++ b/chasm/lib/activity/validator_test.go @@ -646,7 +646,7 @@ func TestValidateDeleteActivityExecutionRequest(t *testing.T) { req := &workflowservice.DeleteActivityExecutionRequest{ ActivityId: defaultActivityID, } - err := validateDeleteActivityExecutionRequest(req, defaultMaxIDLengthLimit) + err := validateAndNormalizeDeleteActivityExecutionRequest(req, defaultMaxIDLengthLimit) require.NoError(t, err) }) @@ -655,7 +655,7 @@ func TestValidateDeleteActivityExecutionRequest(t *testing.T) { ActivityId: defaultActivityID, RunId: "f47ac10b-58cc-4372-a567-0e02b2c3d479", } - err := validateDeleteActivityExecutionRequest(req, defaultMaxIDLengthLimit) + err := validateAndNormalizeDeleteActivityExecutionRequest(req, defaultMaxIDLengthLimit) require.NoError(t, err) }) @@ -663,7 +663,7 @@ func TestValidateDeleteActivityExecutionRequest(t *testing.T) { req := &workflowservice.DeleteActivityExecutionRequest{ ActivityId: "", } - err := validateDeleteActivityExecutionRequest(req, defaultMaxIDLengthLimit) + err := validateAndNormalizeDeleteActivityExecutionRequest(req, defaultMaxIDLengthLimit) var invalidArgErr *serviceerror.InvalidArgument require.ErrorAs(t, err, &invalidArgErr) }) @@ -672,7 +672,7 @@ func TestValidateDeleteActivityExecutionRequest(t *testing.T) { req := &workflowservice.DeleteActivityExecutionRequest{ ActivityId: string(make([]byte, defaultMaxIDLengthLimit+1)), } - err := validateDeleteActivityExecutionRequest(req, defaultMaxIDLengthLimit) + err := validateAndNormalizeDeleteActivityExecutionRequest(req, defaultMaxIDLengthLimit) var invalidArgErr *serviceerror.InvalidArgument require.ErrorAs(t, err, &invalidArgErr) }) @@ -682,7 +682,7 @@ func TestValidateDeleteActivityExecutionRequest(t *testing.T) { ActivityId: defaultActivityID, RunId: "not-a-valid-uuid", } - err := validateDeleteActivityExecutionRequest(req, defaultMaxIDLengthLimit) + err := validateAndNormalizeDeleteActivityExecutionRequest(req, defaultMaxIDLengthLimit) var invalidArgErr *serviceerror.InvalidArgument require.ErrorAs(t, err, &invalidArgErr) }) @@ -719,7 +719,7 @@ func TestValidatePauseActivityExecutionRequest(t *testing.T) { Identity: "test-identity", Reason: "test-reason", } - err := validatePauseActivityExecutionRequest(req, defaultMaxIDLengthLimit, defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, log.NewNoopLogger()) + err := validateAndNormalizePauseActivityExecutionRequest(req, defaultMaxIDLengthLimit, defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, log.NewNoopLogger()) require.NoError(t, err) }) @@ -729,7 +729,7 @@ func TestValidatePauseActivityExecutionRequest(t *testing.T) { RunId: "f47ac10b-58cc-4372-a567-0e02b2c3d479", Identity: "test-identity", } - err := validatePauseActivityExecutionRequest(req, defaultMaxIDLengthLimit, defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, log.NewNoopLogger()) + err := validateAndNormalizePauseActivityExecutionRequest(req, defaultMaxIDLengthLimit, defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, log.NewNoopLogger()) require.NoError(t, err) }) @@ -738,7 +738,7 @@ func TestValidatePauseActivityExecutionRequest(t *testing.T) { ActivityId: "", Identity: "test-identity", } - err := validatePauseActivityExecutionRequest(req, defaultMaxIDLengthLimit, defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, log.NewNoopLogger()) + err := validateAndNormalizePauseActivityExecutionRequest(req, defaultMaxIDLengthLimit, defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, log.NewNoopLogger()) var invalidArgErr *serviceerror.InvalidArgument require.ErrorAs(t, err, &invalidArgErr) require.Equal(t, "activity ID is required", invalidArgErr.Message) @@ -749,7 +749,7 @@ func TestValidatePauseActivityExecutionRequest(t *testing.T) { ActivityId: string(make([]byte, defaultMaxIDLengthLimit+1)), Identity: "test-identity", } - err := validatePauseActivityExecutionRequest(req, defaultMaxIDLengthLimit, defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, log.NewNoopLogger()) + err := validateAndNormalizePauseActivityExecutionRequest(req, defaultMaxIDLengthLimit, defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, log.NewNoopLogger()) var invalidArgErr *serviceerror.InvalidArgument require.ErrorAs(t, err, &invalidArgErr) require.Equal(t, fmt.Sprintf("activity ID exceeds length limit. Length=%d Limit=%d", @@ -761,7 +761,7 @@ func TestValidatePauseActivityExecutionRequest(t *testing.T) { ActivityId: defaultActivityID, Identity: string(make([]byte, defaultMaxIDLengthLimit+1)), } - err := validatePauseActivityExecutionRequest(req, defaultMaxIDLengthLimit, defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, log.NewNoopLogger()) + err := validateAndNormalizePauseActivityExecutionRequest(req, defaultMaxIDLengthLimit, defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, log.NewNoopLogger()) var invalidArgErr *serviceerror.InvalidArgument require.ErrorAs(t, err, &invalidArgErr) require.Equal(t, fmt.Sprintf("identity exceeds length limit. Length=%d Limit=%d", @@ -774,7 +774,7 @@ func TestValidatePauseActivityExecutionRequest(t *testing.T) { Identity: "test-identity", Reason: string(make([]byte, defaultBlobSizeLimitError("default")+1)), } - err := validatePauseActivityExecutionRequest(req, defaultMaxIDLengthLimit, defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, log.NewNoopLogger()) + err := validateAndNormalizePauseActivityExecutionRequest(req, defaultMaxIDLengthLimit, defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, log.NewNoopLogger()) var invalidArgErr *serviceerror.InvalidArgument require.ErrorAs(t, err, &invalidArgErr) require.Equal(t, "reason exceeds length limit", invalidArgErr.Message) @@ -786,7 +786,7 @@ func TestValidatePauseActivityExecutionRequest(t *testing.T) { RunId: "not-a-valid-uuid", Identity: "test-identity", } - err := validatePauseActivityExecutionRequest(req, defaultMaxIDLengthLimit, defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, log.NewNoopLogger()) + err := validateAndNormalizePauseActivityExecutionRequest(req, defaultMaxIDLengthLimit, defaultBlobSizeLimitError, defaultBlobSizeLimitWarn, log.NewNoopLogger()) var invalidArgErr *serviceerror.InvalidArgument require.ErrorAs(t, err, &invalidArgErr) require.Equal(t, "invalid run id: must be a valid UUID", invalidArgErr.Message) @@ -799,7 +799,7 @@ func TestValidateUnpauseActivityExecutionRequest(t *testing.T) { ActivityId: defaultActivityID, Identity: "test-identity", } - err := validateUnpauseActivityExecutionRequest(req, defaultMaxIDLengthLimit) + err := validateAndNormalizeUnpauseActivityExecutionRequest(req, defaultMaxIDLengthLimit) require.NoError(t, err) }) @@ -809,7 +809,7 @@ func TestValidateUnpauseActivityExecutionRequest(t *testing.T) { RunId: "f47ac10b-58cc-4372-a567-0e02b2c3d479", Identity: "test-identity", } - err := validateUnpauseActivityExecutionRequest(req, defaultMaxIDLengthLimit) + err := validateAndNormalizeUnpauseActivityExecutionRequest(req, defaultMaxIDLengthLimit) require.NoError(t, err) }) @@ -818,7 +818,7 @@ func TestValidateUnpauseActivityExecutionRequest(t *testing.T) { ActivityId: "", Identity: "test-identity", } - err := validateUnpauseActivityExecutionRequest(req, defaultMaxIDLengthLimit) + err := validateAndNormalizeUnpauseActivityExecutionRequest(req, defaultMaxIDLengthLimit) var invalidArgErr *serviceerror.InvalidArgument require.ErrorAs(t, err, &invalidArgErr) require.Equal(t, "activity ID is required", invalidArgErr.Message) @@ -829,7 +829,7 @@ func TestValidateUnpauseActivityExecutionRequest(t *testing.T) { ActivityId: string(make([]byte, defaultMaxIDLengthLimit+1)), Identity: "test-identity", } - err := validateUnpauseActivityExecutionRequest(req, defaultMaxIDLengthLimit) + err := validateAndNormalizeUnpauseActivityExecutionRequest(req, defaultMaxIDLengthLimit) var invalidArgErr *serviceerror.InvalidArgument require.ErrorAs(t, err, &invalidArgErr) require.Equal(t, fmt.Sprintf("activity ID exceeds length limit. Length=%d Limit=%d", @@ -841,7 +841,7 @@ func TestValidateUnpauseActivityExecutionRequest(t *testing.T) { ActivityId: defaultActivityID, Identity: string(make([]byte, defaultMaxIDLengthLimit+1)), } - err := validateUnpauseActivityExecutionRequest(req, defaultMaxIDLengthLimit) + err := validateAndNormalizeUnpauseActivityExecutionRequest(req, defaultMaxIDLengthLimit) var invalidArgErr *serviceerror.InvalidArgument require.ErrorAs(t, err, &invalidArgErr) require.Equal(t, fmt.Sprintf("identity exceeds length limit. Length=%d Limit=%d", @@ -854,7 +854,7 @@ func TestValidateUnpauseActivityExecutionRequest(t *testing.T) { RunId: "not-a-valid-uuid", Identity: "test-identity", } - err := validateUnpauseActivityExecutionRequest(req, defaultMaxIDLengthLimit) + err := validateAndNormalizeUnpauseActivityExecutionRequest(req, defaultMaxIDLengthLimit) var invalidArgErr *serviceerror.InvalidArgument require.ErrorAs(t, err, &invalidArgErr) require.Equal(t, "invalid run id: must be a valid UUID", invalidArgErr.Message) @@ -867,7 +867,7 @@ func TestValidateResetActivityExecutionRequest(t *testing.T) { ActivityId: defaultActivityID, Identity: "test-identity", } - err := validateResetActivityExecutionRequest(req, defaultMaxIDLengthLimit) + err := validateAndNormalizeResetActivityExecutionRequest(req, defaultMaxIDLengthLimit) require.NoError(t, err) }) @@ -877,7 +877,7 @@ func TestValidateResetActivityExecutionRequest(t *testing.T) { RunId: "f47ac10b-58cc-4372-a567-0e02b2c3d479", Identity: "test-identity", } - err := validateResetActivityExecutionRequest(req, defaultMaxIDLengthLimit) + err := validateAndNormalizeResetActivityExecutionRequest(req, defaultMaxIDLengthLimit) require.NoError(t, err) }) @@ -886,7 +886,7 @@ func TestValidateResetActivityExecutionRequest(t *testing.T) { ActivityId: "", Identity: "test-identity", } - err := validateResetActivityExecutionRequest(req, defaultMaxIDLengthLimit) + err := validateAndNormalizeResetActivityExecutionRequest(req, defaultMaxIDLengthLimit) var invalidArgErr *serviceerror.InvalidArgument require.ErrorAs(t, err, &invalidArgErr) require.Equal(t, "activity ID is required", invalidArgErr.Message) @@ -897,7 +897,7 @@ func TestValidateResetActivityExecutionRequest(t *testing.T) { ActivityId: string(make([]byte, defaultMaxIDLengthLimit+1)), Identity: "test-identity", } - err := validateResetActivityExecutionRequest(req, defaultMaxIDLengthLimit) + err := validateAndNormalizeResetActivityExecutionRequest(req, defaultMaxIDLengthLimit) var invalidArgErr *serviceerror.InvalidArgument require.ErrorAs(t, err, &invalidArgErr) require.Equal(t, fmt.Sprintf("activity ID exceeds length limit. Length=%d Limit=%d", @@ -909,7 +909,7 @@ func TestValidateResetActivityExecutionRequest(t *testing.T) { ActivityId: defaultActivityID, Identity: string(make([]byte, defaultMaxIDLengthLimit+1)), } - err := validateResetActivityExecutionRequest(req, defaultMaxIDLengthLimit) + err := validateAndNormalizeResetActivityExecutionRequest(req, defaultMaxIDLengthLimit) var invalidArgErr *serviceerror.InvalidArgument require.ErrorAs(t, err, &invalidArgErr) require.Equal(t, fmt.Sprintf("identity exceeds length limit. Length=%d Limit=%d", @@ -922,7 +922,7 @@ func TestValidateResetActivityExecutionRequest(t *testing.T) { RunId: "not-a-valid-uuid", Identity: "test-identity", } - err := validateResetActivityExecutionRequest(req, defaultMaxIDLengthLimit) + err := validateAndNormalizeResetActivityExecutionRequest(req, defaultMaxIDLengthLimit) var invalidArgErr *serviceerror.InvalidArgument require.ErrorAs(t, err, &invalidArgErr) require.Equal(t, "invalid run id: must be a valid UUID", invalidArgErr.Message) From e0dfba6ac257fce7d978f14bfdad2a0babf5bdce Mon Sep 17 00:00:00 2001 From: Sean Kane Date: Wed, 29 Apr 2026 12:20:20 -0400 Subject: [PATCH 13/25] buf breaking change fix --- .../gen/activitypb/v1/activity_state.pb.go | 56 +++++++++---------- .../activity/proto/v1/activity_state.proto | 18 +++--- 2 files changed, 37 insertions(+), 37 deletions(-) diff --git a/chasm/lib/activity/gen/activitypb/v1/activity_state.pb.go b/chasm/lib/activity/gen/activitypb/v1/activity_state.pb.go index 1ba044b4a2f..b92ae094780 100644 --- a/chasm/lib/activity/gen/activitypb/v1/activity_state.pb.go +++ b/chasm/lib/activity/gen/activitypb/v1/activity_state.pb.go @@ -198,25 +198,25 @@ type ActivityState struct { CancelState *ActivityCancelState `protobuf:"bytes,11,opt,name=cancel_state,json=cancelState,proto3" json:"cancel_state,omitempty"` // Set if the activity was terminated TerminateState *ActivityTerminateState `protobuf:"bytes,12,opt,name=terminate_state,json=terminateState,proto3" json:"terminate_state,omitempty"` + // Delays the first dispatch of the activity. Extends ScheduleToClose and ScheduleToStart + // timeouts by this duration. StartToClose and Heartbeat timeouts are unaffected. + StartDelay *durationpb.Duration `protobuf:"bytes,13,opt,name=start_delay,json=startDelay,proto3" json:"start_delay,omitempty"` // Options for the first scheduled attempt to support `restore_original` - OriginalOptions *v12.ActivityOptions `protobuf:"bytes,13,opt,name=original_options,json=originalOptions,proto3" json:"original_options,omitempty"` + OriginalOptions *v12.ActivityOptions `protobuf:"bytes,14,opt,name=original_options,json=originalOptions,proto3" json:"original_options,omitempty"` // An incremental version number used to validate ScheduleToCloseTimeoutTask tasks. // Incremented each time a new ScheduleToCloseTimeoutTask is scheduled (at activity creation // and on each options update that re-schedules the task). Unlike attempt.stamp, this counter // is NOT incremented on retries, because schedule-to-close spans the full activity lifetime. - Stamp int32 `protobuf:"varint,14,opt,name=stamp,proto3" json:"stamp,omitempty"` + Stamp int32 `protobuf:"varint,15,opt,name=stamp,proto3" json:"stamp,omitempty"` // Set if the activity was paused. - PauseState *ActivityPauseState `protobuf:"bytes,15,opt,name=pause_state,json=pauseState,proto3" json:"pause_state,omitempty"` + PauseState *ActivityPauseState `protobuf:"bytes,16,opt,name=pause_state,json=pauseState,proto3" json:"pause_state,omitempty"` // Set when reset was requested while the activity was running. // On the next retry, TransitionRescheduled will reset the attempt count to 1 before incrementing. - ActivityReset bool `protobuf:"varint,16,opt,name=activity_reset,json=activityReset,proto3" json:"activity_reset,omitempty"` + ActivityReset bool `protobuf:"varint,17,opt,name=activity_reset,json=activityReset,proto3" json:"activity_reset,omitempty"` // Set alongside activity_reset when heartbeat details should be cleared on the next retry. - ResetHeartbeats bool `protobuf:"varint,17,opt,name=reset_heartbeats,json=resetHeartbeats,proto3" json:"reset_heartbeats,omitempty"` - // Delays the first dispatch of the activity. Extends ScheduleToClose and ScheduleToStart - // timeouts by this duration. StartToClose and Heartbeat timeouts are unaffected. - StartDelay *durationpb.Duration `protobuf:"bytes,18,opt,name=start_delay,json=startDelay,proto3" json:"start_delay,omitempty"` - unknownFields protoimpl.UnknownFields - sizeCache protoimpl.SizeCache + ResetHeartbeats bool `protobuf:"varint,18,opt,name=reset_heartbeats,json=resetHeartbeats,proto3" json:"reset_heartbeats,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache } func (x *ActivityState) Reset() { @@ -333,6 +333,13 @@ func (x *ActivityState) GetTerminateState() *ActivityTerminateState { return nil } +func (x *ActivityState) GetStartDelay() *durationpb.Duration { + if x != nil { + return x.StartDelay + } + return nil +} + func (x *ActivityState) GetOriginalOptions() *v12.ActivityOptions { if x != nil { return x.OriginalOptions @@ -368,13 +375,6 @@ func (x *ActivityState) GetResetHeartbeats() bool { return false } -func (x *ActivityState) GetStartDelay() *durationpb.Duration { - if x != nil { - return x.StartDelay - } - return nil -} - type ActivityCancelState struct { state protoimpl.MessageState `protogen:"open.v1"` RequestId string `protobuf:"bytes,1,opt,name=request_id,json=requestId,proto3" json:"request_id,omitempty"` @@ -1059,15 +1059,15 @@ const file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_rawD "\bpriority\x18\n" + " \x01(\v2 .temporal.api.common.v1.PriorityR\bpriority\x12c\n" + "\fcancel_state\x18\v \x01(\v2@.temporal.server.chasm.lib.activity.proto.v1.ActivityCancelStateR\vcancelState\x12l\n" + - "\x0fterminate_state\x18\f \x01(\v2C.temporal.server.chasm.lib.activity.proto.v1.ActivityTerminateStateR\x0eterminateState\x12T\n" + - "\x10original_options\x18\r \x01(\v2).temporal.api.activity.v1.ActivityOptionsR\x0foriginalOptions\x12\x14\n" + - "\x05stamp\x18\x0e \x01(\x05R\x05stamp\x12`\n" + - "\vpause_state\x18\x0f \x01(\v2?.temporal.server.chasm.lib.activity.proto.v1.ActivityPauseStateR\n" + + "\x0fterminate_state\x18\f \x01(\v2C.temporal.server.chasm.lib.activity.proto.v1.ActivityTerminateStateR\x0eterminateState\x12:\n" + + "\vstart_delay\x18\r \x01(\v2\x19.google.protobuf.DurationR\n" + + "startDelay\x12T\n" + + "\x10original_options\x18\x0e \x01(\v2).temporal.api.activity.v1.ActivityOptionsR\x0foriginalOptions\x12\x14\n" + + "\x05stamp\x18\x0f \x01(\x05R\x05stamp\x12`\n" + + "\vpause_state\x18\x10 \x01(\v2?.temporal.server.chasm.lib.activity.proto.v1.ActivityPauseStateR\n" + "pauseState\x12%\n" + - "\x0eactivity_reset\x18\x10 \x01(\bR\ractivityReset\x12)\n" + - "\x10reset_heartbeats\x18\x11 \x01(\bR\x0fresetHeartbeats\x12:\n" + - "\vstart_delay\x18\x12 \x01(\v2\x19.google.protobuf.DurationR\n" + - "startDelay\"\xa7\x01\n" + + "\x0eactivity_reset\x18\x11 \x01(\bR\ractivityReset\x12)\n" + + "\x10reset_heartbeats\x18\x12 \x01(\bR\x0fresetHeartbeats\"\xa7\x01\n" + "\x13ActivityCancelState\x12\x1d\n" + "\n" + "request_id\x18\x01 \x01(\tR\trequestId\x12=\n" + @@ -1181,9 +1181,9 @@ var file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_depIdx 17, // 9: temporal.server.chasm.lib.activity.proto.v1.ActivityState.priority:type_name -> temporal.api.common.v1.Priority 2, // 10: temporal.server.chasm.lib.activity.proto.v1.ActivityState.cancel_state:type_name -> temporal.server.chasm.lib.activity.proto.v1.ActivityCancelState 3, // 11: temporal.server.chasm.lib.activity.proto.v1.ActivityState.terminate_state:type_name -> temporal.server.chasm.lib.activity.proto.v1.ActivityTerminateState - 18, // 12: temporal.server.chasm.lib.activity.proto.v1.ActivityState.original_options:type_name -> temporal.api.activity.v1.ActivityOptions - 4, // 13: temporal.server.chasm.lib.activity.proto.v1.ActivityState.pause_state:type_name -> temporal.server.chasm.lib.activity.proto.v1.ActivityPauseState - 14, // 14: temporal.server.chasm.lib.activity.proto.v1.ActivityState.start_delay:type_name -> google.protobuf.Duration + 14, // 12: temporal.server.chasm.lib.activity.proto.v1.ActivityState.start_delay:type_name -> google.protobuf.Duration + 18, // 13: temporal.server.chasm.lib.activity.proto.v1.ActivityState.original_options:type_name -> temporal.api.activity.v1.ActivityOptions + 4, // 14: temporal.server.chasm.lib.activity.proto.v1.ActivityState.pause_state:type_name -> temporal.server.chasm.lib.activity.proto.v1.ActivityPauseState 16, // 15: temporal.server.chasm.lib.activity.proto.v1.ActivityCancelState.request_time:type_name -> google.protobuf.Timestamp 16, // 16: temporal.server.chasm.lib.activity.proto.v1.ActivityPauseState.pause_time:type_name -> google.protobuf.Timestamp 14, // 17: temporal.server.chasm.lib.activity.proto.v1.ActivityAttemptState.current_retry_interval:type_name -> google.protobuf.Duration diff --git a/chasm/lib/activity/proto/v1/activity_state.proto b/chasm/lib/activity/proto/v1/activity_state.proto index 62f512dfb25..93bbbb17111 100644 --- a/chasm/lib/activity/proto/v1/activity_state.proto +++ b/chasm/lib/activity/proto/v1/activity_state.proto @@ -102,28 +102,28 @@ message ActivityState { // Set if the activity was terminated ActivityTerminateState terminate_state = 12; + // Delays the first dispatch of the activity. Extends ScheduleToClose and ScheduleToStart + // timeouts by this duration. StartToClose and Heartbeat timeouts are unaffected. + google.protobuf.Duration start_delay = 13; + // Options for the first scheduled attempt to support `restore_original` - temporal.api.activity.v1.ActivityOptions original_options = 13; + temporal.api.activity.v1.ActivityOptions original_options = 14; // An incremental version number used to validate ScheduleToCloseTimeoutTask tasks. // Incremented each time a new ScheduleToCloseTimeoutTask is scheduled (at activity creation // and on each options update that re-schedules the task). Unlike attempt.stamp, this counter // is NOT incremented on retries, because schedule-to-close spans the full activity lifetime. - int32 stamp = 14; + int32 stamp = 15; // Set if the activity was paused. - ActivityPauseState pause_state = 15; + ActivityPauseState pause_state = 16; // Set when reset was requested while the activity was running. // On the next retry, TransitionRescheduled will reset the attempt count to 1 before incrementing. - bool activity_reset = 16; + bool activity_reset = 17; // Set alongside activity_reset when heartbeat details should be cleared on the next retry. - bool reset_heartbeats = 17; - - // Delays the first dispatch of the activity. Extends ScheduleToClose and ScheduleToStart - // timeouts by this duration. StartToClose and Heartbeat timeouts are unaffected. - google.protobuf.Duration start_delay = 18; + bool reset_heartbeats = 18; } message ActivityCancelState { From 3f2dacce8fbd258038bbc4e0ce18880e81bebffd Mon Sep 17 00:00:00 2001 From: Sean Kane Date: Wed, 29 Apr 2026 13:06:30 -0400 Subject: [PATCH 14/25] start delay was dropped, fixing more rebase issues --- chasm/lib/activity/activity.go | 1 + 1 file changed, 1 insertion(+) diff --git a/chasm/lib/activity/activity.go b/chasm/lib/activity/activity.go index b57aae562be..78c5ea0dc2b 100644 --- a/chasm/lib/activity/activity.go +++ b/chasm/lib/activity/activity.go @@ -177,6 +177,7 @@ func NewStandaloneActivity( HeartbeatTimeout: request.GetHeartbeatTimeout(), RetryPolicy: request.GetRetryPolicy(), Priority: request.GetPriority(), + StartDelay: request.GetStartDelay(), OriginalOptions: &apiactivitypb.ActivityOptions{ TaskQueue: request.GetTaskQueue(), ScheduleToCloseTimeout: request.GetScheduleToCloseTimeout(), From c5bbc3e3ee334679ba03ddb21822fc0eddac72f7 Mon Sep 17 00:00:00 2001 From: Sean Kane Date: Wed, 29 Apr 2026 14:47:48 -0400 Subject: [PATCH 15/25] updating stamp -> schedule_to_close_stamp --- chasm/lib/activity/activity.go | 4 ++-- chasm/lib/activity/activity_tasks.go | 2 +- .../activity/gen/activitypb/v1/activity_state.pb.go | 12 ++++++------ chasm/lib/activity/proto/v1/activity_state.proto | 2 +- chasm/lib/activity/statemachine.go | 4 ++-- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/chasm/lib/activity/activity.go b/chasm/lib/activity/activity.go index 78c5ea0dc2b..6ceafe3091c 100644 --- a/chasm/lib/activity/activity.go +++ b/chasm/lib/activity/activity.go @@ -588,12 +588,12 @@ func (a *Activity) UpdateActivityExecutionOptions( // Add a new ScheduleToCloseTimeoutTask at the (possibly updated) deadline. // Increment the stamp so the previous task is invalidated by the Validate check. if timeout := a.GetScheduleToCloseTimeout().AsDuration(); timeout > 0 { - a.Stamp++ + a.ScheduleToCloseStamp++ deadline := a.GetScheduleTime().AsTime().Add(timeout) ctx.AddTask( a, chasm.TaskAttributes{ScheduledTime: deadline}, - &activitypb.ScheduleToCloseTimeoutTask{Stamp: a.GetStamp()}, + &activitypb.ScheduleToCloseTimeoutTask{Stamp: a.GetScheduleToCloseStamp()}, ) } diff --git a/chasm/lib/activity/activity_tasks.go b/chasm/lib/activity/activity_tasks.go index fae7e340997..52d51127896 100644 --- a/chasm/lib/activity/activity_tasks.go +++ b/chasm/lib/activity/activity_tasks.go @@ -144,7 +144,7 @@ func (h *scheduleToCloseTimeoutTaskHandler) Validate( // Stamp check: discard tasks from before the most recent ScheduleToCloseTimeoutTask was // scheduled (e.g. after a schedule-to-close extension or a disable+re-enable cycle). // Tasks without a stamp (stamp=0) predate this field and are not validated by stamp. - if task.GetStamp() != 0 && task.GetStamp() != activity.GetStamp() { + if task.GetStamp() != 0 && task.GetStamp() != activity.GetScheduleToCloseStamp() { return false, nil } return true, nil diff --git a/chasm/lib/activity/gen/activitypb/v1/activity_state.pb.go b/chasm/lib/activity/gen/activitypb/v1/activity_state.pb.go index b92ae094780..649a011776a 100644 --- a/chasm/lib/activity/gen/activitypb/v1/activity_state.pb.go +++ b/chasm/lib/activity/gen/activitypb/v1/activity_state.pb.go @@ -207,7 +207,7 @@ type ActivityState struct { // Incremented each time a new ScheduleToCloseTimeoutTask is scheduled (at activity creation // and on each options update that re-schedules the task). Unlike attempt.stamp, this counter // is NOT incremented on retries, because schedule-to-close spans the full activity lifetime. - Stamp int32 `protobuf:"varint,15,opt,name=stamp,proto3" json:"stamp,omitempty"` + ScheduleToCloseStamp int32 `protobuf:"varint,15,opt,name=schedule_to_close_stamp,json=scheduleToCloseStamp,proto3" json:"schedule_to_close_stamp,omitempty"` // Set if the activity was paused. PauseState *ActivityPauseState `protobuf:"bytes,16,opt,name=pause_state,json=pauseState,proto3" json:"pause_state,omitempty"` // Set when reset was requested while the activity was running. @@ -347,9 +347,9 @@ func (x *ActivityState) GetOriginalOptions() *v12.ActivityOptions { return nil } -func (x *ActivityState) GetStamp() int32 { +func (x *ActivityState) GetScheduleToCloseStamp() int32 { if x != nil { - return x.Stamp + return x.ScheduleToCloseStamp } return 0 } @@ -1043,7 +1043,7 @@ var File_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto protor const file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_rawDesc = "" + "\n" + - "@temporal/server/chasm/lib/activity/proto/v1/activity_state.proto\x12+temporal.server.chasm.lib.activity.proto.v1\x1a\x1egoogle/protobuf/duration.proto\x1a\x1fgoogle/protobuf/timestamp.proto\x1a&temporal/api/activity/v1/message.proto\x1a$temporal/api/common/v1/message.proto\x1a(temporal/api/deployment/v1/message.proto\x1a%temporal/api/failure/v1/message.proto\x1a'temporal/api/sdk/v1/user_metadata.proto\x1a'temporal/api/taskqueue/v1/message.proto\"\xb7\n" + + "@temporal/server/chasm/lib/activity/proto/v1/activity_state.proto\x12+temporal.server.chasm.lib.activity.proto.v1\x1a\x1egoogle/protobuf/duration.proto\x1a\x1fgoogle/protobuf/timestamp.proto\x1a&temporal/api/activity/v1/message.proto\x1a$temporal/api/common/v1/message.proto\x1a(temporal/api/deployment/v1/message.proto\x1a%temporal/api/failure/v1/message.proto\x1a'temporal/api/sdk/v1/user_metadata.proto\x1a'temporal/api/taskqueue/v1/message.proto\"\xd8\n" + "\n" + "\rActivityState\x12I\n" + "\ractivity_type\x18\x01 \x01(\v2$.temporal.api.common.v1.ActivityTypeR\factivityType\x12C\n" + @@ -1062,8 +1062,8 @@ const file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_rawD "\x0fterminate_state\x18\f \x01(\v2C.temporal.server.chasm.lib.activity.proto.v1.ActivityTerminateStateR\x0eterminateState\x12:\n" + "\vstart_delay\x18\r \x01(\v2\x19.google.protobuf.DurationR\n" + "startDelay\x12T\n" + - "\x10original_options\x18\x0e \x01(\v2).temporal.api.activity.v1.ActivityOptionsR\x0foriginalOptions\x12\x14\n" + - "\x05stamp\x18\x0f \x01(\x05R\x05stamp\x12`\n" + + "\x10original_options\x18\x0e \x01(\v2).temporal.api.activity.v1.ActivityOptionsR\x0foriginalOptions\x125\n" + + "\x17schedule_to_close_stamp\x18\x0f \x01(\x05R\x14scheduleToCloseStamp\x12`\n" + "\vpause_state\x18\x10 \x01(\v2?.temporal.server.chasm.lib.activity.proto.v1.ActivityPauseStateR\n" + "pauseState\x12%\n" + "\x0eactivity_reset\x18\x11 \x01(\bR\ractivityReset\x12)\n" + diff --git a/chasm/lib/activity/proto/v1/activity_state.proto b/chasm/lib/activity/proto/v1/activity_state.proto index 93bbbb17111..6c92b9dbfb7 100644 --- a/chasm/lib/activity/proto/v1/activity_state.proto +++ b/chasm/lib/activity/proto/v1/activity_state.proto @@ -113,7 +113,7 @@ message ActivityState { // Incremented each time a new ScheduleToCloseTimeoutTask is scheduled (at activity creation // and on each options update that re-schedules the task). Unlike attempt.stamp, this counter // is NOT incremented on retries, because schedule-to-close spans the full activity lifetime. - int32 stamp = 15; + int32 schedule_to_close_stamp = 15; // Set if the activity was paused. ActivityPauseState pause_state = 16; diff --git a/chasm/lib/activity/statemachine.go b/chasm/lib/activity/statemachine.go index 058de6453b3..675e4408f88 100644 --- a/chasm/lib/activity/statemachine.go +++ b/chasm/lib/activity/statemachine.go @@ -62,13 +62,13 @@ var TransitionScheduled = chasm.NewTransition( } if timeout := a.GetScheduleToCloseTimeout().AsDuration(); timeout > 0 { - a.Stamp++ + a.ScheduleToCloseStamp++ ctx.AddTask( a, chasm.TaskAttributes{ ScheduledTime: startDelayEnd.Add(timeout), }, - &activitypb.ScheduleToCloseTimeoutTask{Stamp: a.GetStamp()}) + &activitypb.ScheduleToCloseTimeoutTask{Stamp: a.GetScheduleToCloseStamp()}) } dispatchAttrs := chasm.TaskAttributes{} From 11cc15db20cbf57f920943d4dc98d13fa8f429c8 Mon Sep 17 00:00:00 2001 From: Sean Kane Date: Thu, 30 Apr 2026 16:26:43 -0400 Subject: [PATCH 16/25] simplifying diff, validators for request ID --- chasm/lib/activity/validator.go | 93 ++++++++++---------- chasm/lib/activity/validator_test.go | 122 +++++++++++++++++++++++++++ 2 files changed, 172 insertions(+), 43 deletions(-) diff --git a/chasm/lib/activity/validator.go b/chasm/lib/activity/validator.go index 7141f4996f3..6345be3f892 100644 --- a/chasm/lib/activity/validator.go +++ b/chasm/lib/activity/validator.go @@ -278,49 +278,6 @@ func validateAndNormalizeSearchAttributes( return saValidator.ValidateSize(saToValidate, namespaceName) } -func validateAndNormalizeStartRequest( - req *workflowservice.StartActivityExecutionRequest, - maxIDLengthLimit int, - blobSizeLimitError dynamicconfig.IntPropertyFnWithNamespaceFilter, - blobSizeLimitWarn dynamicconfig.IntPropertyFnWithNamespaceFilter, - logger log.Logger, - saMapperProvider searchattribute.MapperProvider, - saValidator *searchattribute.Validator, -) error { - if len(req.GetRequestId()) > maxIDLengthLimit { - return serviceerror.NewInvalidArgumentf("request ID exceeds length limit. Length=%d Limit=%d", - len(req.GetRequestId()), maxIDLengthLimit) - } - - if len(req.GetIdentity()) > maxIDLengthLimit { - return serviceerror.NewInvalidArgumentf("identity exceeds length limit. Length=%d Limit=%d", - len(req.GetIdentity()), maxIDLengthLimit) - } - - if err := validateAndNormalizeIDPolicy(req); err != nil { - return err - } - - if err := validateBlobSize( - req.GetActivityId(), - "StartActivityExecution", - blobSizeLimitError, - blobSizeLimitWarn, - req.Input.Size(), - logger, - req.GetNamespace()); err != nil { - return serviceerror.NewInvalidArgument("input exceeds length limit") - } - - if req.GetSearchAttributes() != nil { - if err := validateAndNormalizeSearchAttributes(req, saMapperProvider, saValidator); err != nil { - return err - } - } - - return nil -} - func validateAndNormalizeDescribeActivityExecutionRequest( req *workflowservice.DescribeActivityExecutionRequest, maxIDLengthLimit int, @@ -367,6 +324,49 @@ func validateAndNormalizePollActivityExecutionRequest( return nil } +func validateAndNormalizeStartRequest( + req *workflowservice.StartActivityExecutionRequest, + maxIDLengthLimit int, + blobSizeLimitError dynamicconfig.IntPropertyFnWithNamespaceFilter, + blobSizeLimitWarn dynamicconfig.IntPropertyFnWithNamespaceFilter, + logger log.Logger, + saMapperProvider searchattribute.MapperProvider, + saValidator *searchattribute.Validator, +) error { + if len(req.GetRequestId()) > maxIDLengthLimit { + return serviceerror.NewInvalidArgumentf("request ID exceeds length limit. Length=%d Limit=%d", + len(req.GetRequestId()), maxIDLengthLimit) + } + + if len(req.GetIdentity()) > maxIDLengthLimit { + return serviceerror.NewInvalidArgumentf("identity exceeds length limit. Length=%d Limit=%d", + len(req.GetIdentity()), maxIDLengthLimit) + } + + if err := validateAndNormalizeIDPolicy(req); err != nil { + return err + } + + if err := validateBlobSize( + req.GetActivityId(), + "StartActivityExecution", + blobSizeLimitError, + blobSizeLimitWarn, + req.Input.Size(), + logger, + req.GetNamespace()); err != nil { + return serviceerror.NewInvalidArgument("input exceeds length limit") + } + + if req.GetSearchAttributes() != nil { + if err := validateAndNormalizeSearchAttributes(req, saMapperProvider, saValidator); err != nil { + return err + } + } + + return nil +} + func validateAndNormalizeRequestCancelActivityExecutionRequest( req *workflowservice.RequestCancelActivityExecutionRequest, maxIDLengthLimit int, @@ -627,6 +627,13 @@ func validateAndNormalizePauseActivityExecutionRequest( blobSizeLimitWarn dynamicconfig.IntPropertyFnWithNamespaceFilter, logger log.Logger, ) error { + if req.GetRequestId() == "" { + req.RequestId = uuid.NewString() + } + if len(req.GetRequestId()) > maxIDLengthLimit { + return serviceerror.NewInvalidArgumentf("request ID exceeds length limit. Length=%d Limit=%d", + len(req.GetRequestId()), maxIDLengthLimit) + } if req.GetActivityId() == "" { return serviceerror.NewInvalidArgument("activity ID is required") } diff --git a/chasm/lib/activity/validator_test.go b/chasm/lib/activity/validator_test.go index 32f4911b6fc..4fc87806069 100644 --- a/chasm/lib/activity/validator_test.go +++ b/chasm/lib/activity/validator_test.go @@ -5,6 +5,7 @@ import ( "testing" "time" + "github.com/google/uuid" "github.com/stretchr/testify/require" activitypb "go.temporal.io/api/activity/v1" commonpb "go.temporal.io/api/common/v1" @@ -414,6 +415,127 @@ func newTestFrontendHandler( } } +// TestRequestIDGeneratedWhenMissing verifies that the server generates a non-empty UUID request ID +// for every standalone activity API that carries a request_id field, when the client omits it. +// This prevents "" == "" false-positive idempotency matches in the state machine. +func TestRequestIDGeneratedWhenMissing(t *testing.T) { + maxIDLengthLimit := defaultMaxIDLengthLimit + blobLimit := defaultBlobSizeLimitError + logger := log.NewNoopLogger() + + t.Run("StartActivityExecution", func(t *testing.T) { + h := &frontendHandler{ + config: &Config{ + BlobSizeLimitError: blobLimit, + BlobSizeLimitWarn: defaultBlobSizeLimitWarn, + MaxIDLengthLimit: func() int { return maxIDLengthLimit }, + DefaultActivityRetryPolicy: dynamicconfig.GetTypedPropertyFnFilteredByNamespace(getDefaultRetrySettings("")), + }, + logger: logger, + } + req := &workflowservice.StartActivityExecutionRequest{ + ActivityId: defaultActivityID, + ActivityType: &commonpb.ActivityType{Name: defaultActivityType}, + TaskQueue: &taskqueuepb.TaskQueue{Name: defaultTaskQueue}, + StartToCloseTimeout: durationpb.New(10 * time.Second), + RetryPolicy: &commonpb.RetryPolicy{}, + } + _, err := h.validateAndPopulateStartRequest(req, namespace.ID(defaultNamespaceID)) + require.NoError(t, err) + require.NotEmpty(t, req.GetRequestId(), "server must generate a request ID when client omits it") + require.NoError(t, validateUUID(req.GetRequestId()), "generated request ID must be a valid UUID") + }) + + t.Run("RequestCancelActivityExecution", func(t *testing.T) { + req := &workflowservice.RequestCancelActivityExecutionRequest{ + ActivityId: defaultActivityID, + } + err := validateAndNormalizeRequestCancelActivityExecutionRequest(req, maxIDLengthLimit, blobLimit, blobLimit, logger) + require.NoError(t, err) + require.NotEmpty(t, req.GetRequestId(), "server must generate a request ID when client omits it") + require.NoError(t, validateUUID(req.GetRequestId()), "generated request ID must be a valid UUID") + }) + + t.Run("TerminateActivityExecution", func(t *testing.T) { + req := &workflowservice.TerminateActivityExecutionRequest{ + ActivityId: defaultActivityID, + } + err := validateAndNormalizeTerminateActivityExecutionRequest(req, maxIDLengthLimit, blobLimit, blobLimit, logger) + require.NoError(t, err) + require.NotEmpty(t, req.GetRequestId(), "server must generate a request ID when client omits it") + require.NoError(t, validateUUID(req.GetRequestId()), "generated request ID must be a valid UUID") + }) + + t.Run("PauseActivityExecution", func(t *testing.T) { + req := &workflowservice.PauseActivityExecutionRequest{ + ActivityId: defaultActivityID, + } + err := validateAndNormalizePauseActivityExecutionRequest(req, maxIDLengthLimit, blobLimit, blobLimit, logger) + require.NoError(t, err) + require.NotEmpty(t, req.GetRequestId(), "server must generate a request ID when client omits it") + require.NoError(t, validateUUID(req.GetRequestId()), "generated request ID must be a valid UUID") + }) +} + +func validateUUID(s string) error { + _, err := uuid.Parse(s) + return err +} + +// TestRequestIDTooLong verifies that every standalone activity API enforces the request ID +// length limit when the client supplies a value that exceeds it. +func TestRequestIDTooLong(t *testing.T) { + maxIDLengthLimit := defaultMaxIDLengthLimit + blobLimit := defaultBlobSizeLimitError + logger := log.NewNoopLogger() + tooLong := string(make([]byte, maxIDLengthLimit+1)) + + t.Run("StartActivityExecution", func(t *testing.T) { + h := newTestFrontendHandler(blobLimit, defaultBlobSizeLimitWarn, maxIDLengthLimit) + req := &workflowservice.StartActivityExecutionRequest{ + ActivityId: defaultActivityID, + ActivityType: &commonpb.ActivityType{Name: defaultActivityType}, + TaskQueue: &taskqueuepb.TaskQueue{Name: defaultTaskQueue}, + StartToCloseTimeout: durationpb.New(10 * time.Second), + RetryPolicy: &commonpb.RetryPolicy{}, + RequestId: tooLong, + } + err := validateAndNormalizeStartRequest(req, h.config.MaxIDLengthLimit(), h.config.BlobSizeLimitError, h.config.BlobSizeLimitWarn, h.logger, h.saMapperProvider, h.saValidator) + var invalidArgErr *serviceerror.InvalidArgument + require.ErrorAs(t, err, &invalidArgErr) + }) + + t.Run("RequestCancelActivityExecution", func(t *testing.T) { + req := &workflowservice.RequestCancelActivityExecutionRequest{ + ActivityId: defaultActivityID, + RequestId: tooLong, + } + err := validateAndNormalizeRequestCancelActivityExecutionRequest(req, maxIDLengthLimit, blobLimit, blobLimit, logger) + var invalidArgErr *serviceerror.InvalidArgument + require.ErrorAs(t, err, &invalidArgErr) + }) + + t.Run("TerminateActivityExecution", func(t *testing.T) { + req := &workflowservice.TerminateActivityExecutionRequest{ + ActivityId: defaultActivityID, + RequestId: tooLong, + } + err := validateAndNormalizeTerminateActivityExecutionRequest(req, maxIDLengthLimit, blobLimit, blobLimit, logger) + var invalidArgErr *serviceerror.InvalidArgument + require.ErrorAs(t, err, &invalidArgErr) + }) + + t.Run("PauseActivityExecution", func(t *testing.T) { + req := &workflowservice.PauseActivityExecutionRequest{ + ActivityId: defaultActivityID, + RequestId: tooLong, + } + err := validateAndNormalizePauseActivityExecutionRequest(req, maxIDLengthLimit, blobLimit, blobLimit, logger) + var invalidArgErr *serviceerror.InvalidArgument + require.ErrorAs(t, err, &invalidArgErr) + }) +} + func TestValidateStandAloneRequestIDTooLong(t *testing.T) { req := &workflowservice.StartActivityExecutionRequest{ ActivityId: defaultActivityID, From 1c6b2e20c9674203a6ee61c79c393e620abc48c8 Mon Sep 17 00:00:00 2001 From: Sean Kane Date: Mon, 11 May 2026 10:14:47 -0600 Subject: [PATCH 17/25] migrate activity operator commands to use testEnv --- tests/standalone_activity_test.go | 1752 +++++++++++------------------ 1 file changed, 630 insertions(+), 1122 deletions(-) diff --git a/tests/standalone_activity_test.go b/tests/standalone_activity_test.go index 076e3bc984a..b377aee623c 100644 --- a/tests/standalone_activity_test.go +++ b/tests/standalone_activity_test.go @@ -232,7 +232,7 @@ func (s *standaloneActivityTestSuite) TestIDConflictPolicy() { }) t.Run("UseExisting", func(t *testing.T) { - activityID := testcore.RandomizeStr(t.Name()) + originalActivityID := testcore.RandomizeStr(t.Name()) taskQueue := testcore.RandomizeStr(t.Name()) firstStartResp := env.startAndValidateActivity(ctx, t, originalActivityID, taskQueue) @@ -1404,6 +1404,7 @@ func (s *standaloneActivityTestSuite) TestRequestCancel() { t.Run("ByToken", func(t *testing.T) { activityID := testcore.RandomizeStr(t.Name()) taskQueue := testcore.RandomizeStr(t.Name()) + identity := "client-that-requested-cancellation" startResp := env.startAndValidateActivity(ctx, t, activityID, taskQueue) runID := startResp.RunId @@ -1481,6 +1482,7 @@ func (s *standaloneActivityTestSuite) TestRequestCancel() { activityID := testcore.RandomizeStr(tc.name) taskQueue := testcore.RandomizeStr(tc.name) + identity := "client-that-requested-cancellation" startResp := env.startAndValidateActivity(ctx, t, activityID, taskQueue) runID := startResp.RunId @@ -2254,7 +2256,7 @@ func (s *standaloneActivityTestSuite) TestTerminate() { ActivityId: activityID, RunId: runID, Reason: "Test Termination", - Identity: "terminator", + Identity: identity, }) require.NoError(t, err) @@ -3094,8 +3096,6 @@ func (s *standaloneActivityTestSuite) TestStartToCloseTimeout_WhileCancelRequest "activity in CANCEL_REQUESTED should still time out via START_TO_CLOSE") } - - // TestScheduleToStartTimeout tests that a schedule-to-start timeout is recorded after the activity is // created but never started. It also verifies that DescribeActivityExecution can be used to long-poll for a TimedOut // state change caused by execution of a timer task. @@ -5610,8 +5610,8 @@ func (s *standaloneActivityTestSuite) TestStartDelay() { }) } - func (s *standaloneActivityTestSuite) TestUpdateActivityExecutionOptions() { + env := s.newTestEnv() t := s.T() t.Run("InvalidArgument", func(t *testing.T) { @@ -5619,8 +5619,8 @@ func (s *standaloneActivityTestSuite) TestUpdateActivityExecutionOptions() { activityID := testcore.RandomizeStr(t.Name()) taskQueue := testcore.RandomizeStr(t.Name()) - startResp, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ - Namespace: s.Namespace().String(), + startResp, err := env.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, ActivityType: &commonpb.ActivityType{Name: "test-activity"}, TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue}, @@ -5628,7 +5628,7 @@ func (s *standaloneActivityTestSuite) TestUpdateActivityExecutionOptions() { }) require.NoError(t, err) runID := startResp.RunId - ns := s.Namespace().String() + ns := env.Namespace().String() validOptions := &activitypb.ActivityOptions{ StartToCloseTimeout: durationpb.New(2 * time.Minute), @@ -5688,7 +5688,7 @@ func (s *standaloneActivityTestSuite) TestUpdateActivityExecutionOptions() { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - _, err := s.FrontendClient().UpdateActivityExecutionOptions(ctx, tc.req) + _, err := env.FrontendClient().UpdateActivityExecutionOptions(ctx, tc.req) var invalidArgErr *serviceerror.InvalidArgument require.ErrorAs(t, err, &invalidArgErr) require.Contains(t, invalidArgErr.Message, tc.expectedErr) @@ -5704,8 +5704,8 @@ func (s *standaloneActivityTestSuite) TestUpdateActivityExecutionOptions() { taskQueue := testcore.RandomizeStr(t.Name()) // Start with a long retry interval to keep the activity in backoff after failure. - startResp, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ - Namespace: s.Namespace().String(), + startResp, err := env.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, ActivityType: &commonpb.ActivityType{Name: "test-activity"}, TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue}, @@ -5718,15 +5718,15 @@ func (s *standaloneActivityTestSuite) TestUpdateActivityExecutionOptions() { require.NoError(t, err) // Poll and fail with a retryable failure — activity enters long backoff. - pollResp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ - Namespace: s.Namespace().String(), + pollResp, err := env.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: env.Namespace().String(), TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, }) require.NoError(t, err) require.EqualValues(t, 1, pollResp.Attempt) - _, err = s.FrontendClient().RespondActivityTaskFailed(ctx, &workflowservice.RespondActivityTaskFailedRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().RespondActivityTaskFailed(ctx, &workflowservice.RespondActivityTaskFailedRequest{ + Namespace: env.Namespace().String(), TaskToken: pollResp.TaskToken, Failure: &failurepb.Failure{ Message: "retryable failure", @@ -5738,8 +5738,8 @@ func (s *standaloneActivityTestSuite) TestUpdateActivityExecutionOptions() { require.NoError(t, err) // Shorten the retry interval so the activity retries immediately. - updateResp, err := s.FrontendClient().UpdateActivityExecutionOptions(ctx, &workflowservice.UpdateActivityExecutionOptionsRequest{ - Namespace: s.Namespace().String(), + updateResp, err := env.FrontendClient().UpdateActivityExecutionOptions(ctx, &workflowservice.UpdateActivityExecutionOptionsRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: startResp.RunId, ActivityOptions: &activitypb.ActivityOptions{ @@ -5753,8 +5753,8 @@ func (s *standaloneActivityTestSuite) TestUpdateActivityExecutionOptions() { require.NotNil(t, updateResp) // Activity should now be available to poll for attempt 2. - pollResp2, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ - Namespace: s.Namespace().String(), + pollResp2, err := env.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: env.Namespace().String(), TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, }) require.NoError(t, err) @@ -5771,8 +5771,8 @@ func (s *standaloneActivityTestSuite) TestUpdateActivityExecutionOptions() { activityID := testcore.RandomizeStr(t.Name()) taskQueue := testcore.RandomizeStr(t.Name()) - startResp, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ - Namespace: s.Namespace().String(), + startResp, err := env.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, ActivityType: &commonpb.ActivityType{Name: "test-activity"}, TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue}, @@ -5785,16 +5785,16 @@ func (s *standaloneActivityTestSuite) TestUpdateActivityExecutionOptions() { require.NoError(t, err) // Poll attempt 1 — activity is now STARTED. - pollResp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ - Namespace: s.Namespace().String(), + pollResp, err := env.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: env.Namespace().String(), TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, }) require.NoError(t, err) require.EqualValues(t, 1, pollResp.Attempt) // Shorten the retry interval while the activity is running. - _, err = s.FrontendClient().UpdateActivityExecutionOptions(ctx, &workflowservice.UpdateActivityExecutionOptionsRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().UpdateActivityExecutionOptions(ctx, &workflowservice.UpdateActivityExecutionOptionsRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: startResp.RunId, ActivityOptions: &activitypb.ActivityOptions{ @@ -5807,8 +5807,8 @@ func (s *standaloneActivityTestSuite) TestUpdateActivityExecutionOptions() { require.NoError(t, err) // Fail attempt 1 — next retry should dispatch immediately with the new 1ms interval. - _, err = s.FrontendClient().RespondActivityTaskFailed(ctx, &workflowservice.RespondActivityTaskFailedRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().RespondActivityTaskFailed(ctx, &workflowservice.RespondActivityTaskFailedRequest{ + Namespace: env.Namespace().String(), TaskToken: pollResp.TaskToken, Failure: &failurepb.Failure{ Message: "retryable failure", @@ -5820,8 +5820,8 @@ func (s *standaloneActivityTestSuite) TestUpdateActivityExecutionOptions() { require.NoError(t, err) // Attempt 2 should be available immediately. - pollResp2, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ - Namespace: s.Namespace().String(), + pollResp2, err := env.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: env.Namespace().String(), TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, }) require.NoError(t, err) @@ -5835,8 +5835,8 @@ func (s *standaloneActivityTestSuite) TestUpdateActivityExecutionOptions() { activityID := testcore.RandomizeStr(t.Name()) taskQueue := testcore.RandomizeStr(t.Name()) - startResp, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ - Namespace: s.Namespace().String(), + startResp, err := env.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, ActivityType: &commonpb.ActivityType{Name: "test-activity"}, TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue}, @@ -5850,13 +5850,13 @@ func (s *standaloneActivityTestSuite) TestUpdateActivityExecutionOptions() { require.NoError(t, err) // Start and fail the activity once — it enters backoff (SCHEDULED state). - pollResp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ - Namespace: s.Namespace().String(), + pollResp, err := env.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: env.Namespace().String(), TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, }) require.NoError(t, err) - _, err = s.FrontendClient().RespondActivityTaskFailed(ctx, &workflowservice.RespondActivityTaskFailedRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().RespondActivityTaskFailed(ctx, &workflowservice.RespondActivityTaskFailedRequest{ + Namespace: env.Namespace().String(), TaskToken: pollResp.TaskToken, Failure: &failurepb.Failure{ Message: "retryable failure", @@ -5868,8 +5868,8 @@ func (s *standaloneActivityTestSuite) TestUpdateActivityExecutionOptions() { require.NoError(t, err) // Shorten schedule-to-close — activity should time out immediately. - _, err = s.FrontendClient().UpdateActivityExecutionOptions(ctx, &workflowservice.UpdateActivityExecutionOptionsRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().UpdateActivityExecutionOptions(ctx, &workflowservice.UpdateActivityExecutionOptionsRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: startResp.RunId, ActivityOptions: &activitypb.ActivityOptions{ @@ -5880,8 +5880,8 @@ func (s *standaloneActivityTestSuite) TestUpdateActivityExecutionOptions() { require.NoError(t, err) // Long-poll until the activity times out. - pollOutcome, err := s.FrontendClient().PollActivityExecution(ctx, &workflowservice.PollActivityExecutionRequest{ - Namespace: s.Namespace().String(), + pollOutcome, err := env.FrontendClient().PollActivityExecution(ctx, &workflowservice.PollActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: startResp.RunId, }) @@ -5901,8 +5901,8 @@ func (s *standaloneActivityTestSuite) TestUpdateActivityExecutionOptions() { activityID := testcore.RandomizeStr(t.Name()) taskQueue := testcore.RandomizeStr(t.Name()) - startResp, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ - Namespace: s.Namespace().String(), + startResp, err := env.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, ActivityType: &commonpb.ActivityType{Name: "test-activity"}, TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue}, @@ -5915,16 +5915,16 @@ func (s *standaloneActivityTestSuite) TestUpdateActivityExecutionOptions() { require.NoError(t, err) // Poll attempt 1 — activity is now STARTED. - pollResp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ - Namespace: s.Namespace().String(), + pollResp, err := env.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: env.Namespace().String(), TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, }) require.NoError(t, err) require.EqualValues(t, 1, pollResp.Attempt) // Shorten schedule-to-close — the new task fires almost immediately. - _, err = s.FrontendClient().UpdateActivityExecutionOptions(ctx, &workflowservice.UpdateActivityExecutionOptionsRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().UpdateActivityExecutionOptions(ctx, &workflowservice.UpdateActivityExecutionOptionsRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: startResp.RunId, ActivityOptions: &activitypb.ActivityOptions{ @@ -5935,8 +5935,8 @@ func (s *standaloneActivityTestSuite) TestUpdateActivityExecutionOptions() { require.NoError(t, err) // Long-poll until the activity times out (no response sent to the task queue). - pollOutcome, err := s.FrontendClient().PollActivityExecution(ctx, &workflowservice.PollActivityExecutionRequest{ - Namespace: s.Namespace().String(), + pollOutcome, err := env.FrontendClient().PollActivityExecution(ctx, &workflowservice.PollActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: startResp.RunId, }) @@ -5959,8 +5959,8 @@ func (s *standaloneActivityTestSuite) TestUpdateActivityExecutionOptions() { taskQueue := testcore.RandomizeStr(t.Name()) originalStartToClose := 8 * time.Second - startResp, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ - Namespace: s.Namespace().String(), + startResp, err := env.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, ActivityType: &commonpb.ActivityType{Name: "test-activity"}, TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue}, @@ -5974,14 +5974,14 @@ func (s *standaloneActivityTestSuite) TestUpdateActivityExecutionOptions() { require.NoError(t, err) // Fail attempt 1. - pollResp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ - Namespace: s.Namespace().String(), + pollResp, err := env.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: env.Namespace().String(), TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, }) require.NoError(t, err) require.EqualValues(t, 1, pollResp.Attempt) - _, err = s.FrontendClient().RespondActivityTaskFailed(ctx, &workflowservice.RespondActivityTaskFailedRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().RespondActivityTaskFailed(ctx, &workflowservice.RespondActivityTaskFailedRequest{ + Namespace: env.Namespace().String(), TaskToken: pollResp.TaskToken, Failure: &failurepb.Failure{ Message: "retryable failure", @@ -5994,8 +5994,8 @@ func (s *standaloneActivityTestSuite) TestUpdateActivityExecutionOptions() { // Update: extend schedule-to-close, shorten retry interval. newScheduleToClose := 30 * time.Second - updateResp, err := s.FrontendClient().UpdateActivityExecutionOptions(ctx, &workflowservice.UpdateActivityExecutionOptionsRequest{ - Namespace: s.Namespace().String(), + updateResp, err := env.FrontendClient().UpdateActivityExecutionOptions(ctx, &workflowservice.UpdateActivityExecutionOptionsRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: startResp.RunId, ActivityOptions: &activitypb.ActivityOptions{ @@ -6013,8 +6013,8 @@ func (s *standaloneActivityTestSuite) TestUpdateActivityExecutionOptions() { require.Equal(t, int64(originalStartToClose.Seconds()), updateResp.GetActivityOptions().GetStartToCloseTimeout().GetSeconds()) // Attempt 2 should be available immediately. - pollResp2, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ - Namespace: s.Namespace().String(), + pollResp2, err := env.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: env.Namespace().String(), TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, }) require.NoError(t, err) @@ -6029,8 +6029,8 @@ func (s *standaloneActivityTestSuite) TestUpdateActivityExecutionOptions() { taskQueue := testcore.RandomizeStr(t.Name()) originalMaxAttempts := int32(10) - startResp, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ - Namespace: s.Namespace().String(), + startResp, err := env.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, ActivityType: &commonpb.ActivityType{Name: "test-activity"}, TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue}, @@ -6043,8 +6043,8 @@ func (s *standaloneActivityTestSuite) TestUpdateActivityExecutionOptions() { require.NoError(t, err) // Update maximum attempts to a large value. - _, err = s.FrontendClient().UpdateActivityExecutionOptions(ctx, &workflowservice.UpdateActivityExecutionOptionsRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().UpdateActivityExecutionOptions(ctx, &workflowservice.UpdateActivityExecutionOptionsRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: startResp.RunId, ActivityOptions: &activitypb.ActivityOptions{ @@ -6055,8 +6055,8 @@ func (s *standaloneActivityTestSuite) TestUpdateActivityExecutionOptions() { require.NoError(t, err) // Verify the update was applied. - describeResp, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ - Namespace: s.Namespace().String(), + describeResp, err := env.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: startResp.RunId, }) @@ -6064,8 +6064,8 @@ func (s *standaloneActivityTestSuite) TestUpdateActivityExecutionOptions() { require.EqualValues(t, 1000, describeResp.GetInfo().GetRetryPolicy().GetMaximumAttempts()) // Reset to original options. - _, err = s.FrontendClient().UpdateActivityExecutionOptions(ctx, &workflowservice.UpdateActivityExecutionOptionsRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().UpdateActivityExecutionOptions(ctx, &workflowservice.UpdateActivityExecutionOptionsRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: startResp.RunId, RestoreOriginal: true, @@ -6073,8 +6073,8 @@ func (s *standaloneActivityTestSuite) TestUpdateActivityExecutionOptions() { require.NoError(t, err) // Verify original maximum attempts are restored. - describeResp, err = s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ - Namespace: s.Namespace().String(), + describeResp, err = env.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: startResp.RunId, }) @@ -6082,14 +6082,14 @@ func (s *standaloneActivityTestSuite) TestUpdateActivityExecutionOptions() { require.Equal(t, originalMaxAttempts, describeResp.GetInfo().GetRetryPolicy().GetMaximumAttempts()) // Verify the activity still executes after reset — poll attempt 1 and complete it. - pollResp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ - Namespace: s.Namespace().String(), + pollResp, err := env.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: env.Namespace().String(), TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, }) require.NoError(t, err) require.EqualValues(t, 1, pollResp.Attempt) - _, err = s.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ + Namespace: env.Namespace().String(), TaskToken: pollResp.TaskToken, Result: payloads.EncodeString("done"), }) @@ -6105,8 +6105,8 @@ func (s *standaloneActivityTestSuite) TestUpdateActivityExecutionOptions() { activityID := testcore.RandomizeStr(t.Name()) taskQueue := testcore.RandomizeStr(t.Name()) - startResp, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ - Namespace: s.Namespace().String(), + startResp, err := env.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, ActivityType: &commonpb.ActivityType{Name: "test-activity"}, TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue}, @@ -6119,8 +6119,8 @@ func (s *standaloneActivityTestSuite) TestUpdateActivityExecutionOptions() { require.NoError(t, err) // Shorten schedule-to-start — no workers are polling so it should fire immediately. - _, err = s.FrontendClient().UpdateActivityExecutionOptions(ctx, &workflowservice.UpdateActivityExecutionOptionsRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().UpdateActivityExecutionOptions(ctx, &workflowservice.UpdateActivityExecutionOptionsRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: startResp.RunId, ActivityOptions: &activitypb.ActivityOptions{ @@ -6131,8 +6131,8 @@ func (s *standaloneActivityTestSuite) TestUpdateActivityExecutionOptions() { require.NoError(t, err) // Long-poll until the activity times out. - pollOutcome, err := s.FrontendClient().PollActivityExecution(ctx, &workflowservice.PollActivityExecutionRequest{ - Namespace: s.Namespace().String(), + pollOutcome, err := env.FrontendClient().PollActivityExecution(ctx, &workflowservice.PollActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: startResp.RunId, }) @@ -6152,8 +6152,8 @@ func (s *standaloneActivityTestSuite) TestUpdateActivityExecutionOptions() { activityID := testcore.RandomizeStr(t.Name()) taskQueue := testcore.RandomizeStr(t.Name()) - startResp, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ - Namespace: s.Namespace().String(), + startResp, err := env.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, ActivityType: &commonpb.ActivityType{Name: "test-activity"}, TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue}, @@ -6165,16 +6165,16 @@ func (s *standaloneActivityTestSuite) TestUpdateActivityExecutionOptions() { require.NoError(t, err) // Poll attempt 1 — activity is now STARTED. - pollResp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ - Namespace: s.Namespace().String(), + pollResp, err := env.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: env.Namespace().String(), TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, }) require.NoError(t, err) require.EqualValues(t, 1, pollResp.Attempt) // Shorten start-to-close — the new task fires almost immediately. - _, err = s.FrontendClient().UpdateActivityExecutionOptions(ctx, &workflowservice.UpdateActivityExecutionOptionsRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().UpdateActivityExecutionOptions(ctx, &workflowservice.UpdateActivityExecutionOptionsRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: startResp.RunId, ActivityOptions: &activitypb.ActivityOptions{ @@ -6185,8 +6185,8 @@ func (s *standaloneActivityTestSuite) TestUpdateActivityExecutionOptions() { require.NoError(t, err) // Long-poll until the activity times out (no response sent to the task queue). - pollOutcome, err := s.FrontendClient().PollActivityExecution(ctx, &workflowservice.PollActivityExecutionRequest{ - Namespace: s.Namespace().String(), + pollOutcome, err := env.FrontendClient().PollActivityExecution(ctx, &workflowservice.PollActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: startResp.RunId, }) @@ -6207,8 +6207,8 @@ func (s *standaloneActivityTestSuite) TestUpdateActivityExecutionOptions() { activityID := testcore.RandomizeStr(t.Name()) taskQueue := testcore.RandomizeStr(t.Name()) - startResp, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ - Namespace: s.Namespace().String(), + startResp, err := env.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, ActivityType: &commonpb.ActivityType{Name: "test-activity"}, TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue}, @@ -6221,16 +6221,16 @@ func (s *standaloneActivityTestSuite) TestUpdateActivityExecutionOptions() { require.NoError(t, err) // Poll attempt 1 — activity is now STARTED. - pollResp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ - Namespace: s.Namespace().String(), + pollResp, err := env.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: env.Namespace().String(), TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, }) require.NoError(t, err) require.EqualValues(t, 1, pollResp.Attempt) // Shorten heartbeat timeout. - _, err = s.FrontendClient().UpdateActivityExecutionOptions(ctx, &workflowservice.UpdateActivityExecutionOptionsRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().UpdateActivityExecutionOptions(ctx, &workflowservice.UpdateActivityExecutionOptionsRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: startResp.RunId, ActivityOptions: &activitypb.ActivityOptions{ @@ -6241,8 +6241,8 @@ func (s *standaloneActivityTestSuite) TestUpdateActivityExecutionOptions() { require.NoError(t, err) // Long-poll until the activity times out (no further heartbeats sent). - pollOutcome, err := s.FrontendClient().PollActivityExecution(ctx, &workflowservice.PollActivityExecutionRequest{ - Namespace: s.Namespace().String(), + pollOutcome, err := env.FrontendClient().PollActivityExecution(ctx, &workflowservice.PollActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: startResp.RunId, }) @@ -6967,645 +6967,151 @@ func (s *standaloneActivityTestSuite) runNexusCompletionHTTPServer(t *testing.T, return srv.URL } -func (s *standaloneActivityTestSuite) TestCallbacks() { +func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { + env := s.newTestEnv() t := s.T() - ctx, cancel := context.WithTimeout(t.Context(), 15*time.Second) - defer cancel() - - s.OverrideDynamicConfig( - callbacks.AllowedAddresses, - []any{map[string]any{"Pattern": "*", "AllowInsecure": true}}, - ) - t.Run("AcceptedOnStart", func(t *testing.T) { + t.Run("PauseWhileStarted", func(t *testing.T) { + ctx := testcore.NewContext() activityID := testcore.RandomizeStr(t.Name()) taskQueue := testcore.RandomizeStr(t.Name()) - resp, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ - Namespace: s.Namespace().String(), - ActivityId: activityID, - ActivityType: s.tv.ActivityType(), - Identity: s.tv.WorkerIdentity(), - Input: defaultInput, - TaskQueue: &taskqueuepb.TaskQueue{ - Name: taskQueue, - }, - StartToCloseTimeout: durationpb.New(defaultStartToCloseTimeout), - RequestId: s.tv.Any().String(), - CompletionCallbacks: []*commonpb.Callback{{ - Variant: &commonpb.Callback_Nexus_{ - Nexus: &commonpb.Callback_Nexus{ - Url: "http://localhost/callback", - }, - }, - }}, + startResp := env.startAndValidateActivity(ctx, t, activityID, taskQueue) + runID := startResp.RunId + + pollResp := env.pollActivityTaskAndValidate(ctx, t, activityID, taskQueue, runID) + + _, err := env.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: env.Namespace().String(), + ActivityId: activityID, + RunId: runID, + Identity: "test-identity", + Reason: "test-pause", }) require.NoError(t, err) - require.True(t, resp.Started) - require.NotEmpty(t, resp.RunId) - }) - t.Run("MultipleCallbacksAccepted", func(t *testing.T) { - activityID := testcore.RandomizeStr(t.Name()) - taskQueue := testcore.RandomizeStr(t.Name()) + // DescribeActivityExecution should reflect PAUSE_REQUESTED run state: the activity is still + // STARTED (worker token valid) but a pause has been requested via the flag. + descResp, err := env.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: env.Namespace().String(), + ActivityId: activityID, + RunId: runID, + }) + require.NoError(t, err) + require.Equal(t, enumspb.PENDING_ACTIVITY_STATE_PAUSE_REQUESTED, descResp.GetInfo().GetRunState()) - resp, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ - Namespace: s.Namespace().String(), - ActivityId: activityID, - ActivityType: s.tv.ActivityType(), - Identity: s.tv.WorkerIdentity(), - Input: defaultInput, - TaskQueue: &taskqueuepb.TaskQueue{ - Name: taskQueue, - }, - StartToCloseTimeout: durationpb.New(defaultStartToCloseTimeout), - RequestId: s.tv.Any().String(), - CompletionCallbacks: []*commonpb.Callback{ - {Variant: &commonpb.Callback_Nexus_{Nexus: &commonpb.Callback_Nexus{Url: "http://localhost/callback1"}}}, - {Variant: &commonpb.Callback_Nexus_{Nexus: &commonpb.Callback_Nexus{Url: "http://localhost/callback2"}}}, - }, + // Heartbeat should report ActivityPaused=true. + heartbeatResp, err := env.FrontendClient().RecordActivityTaskHeartbeat(ctx, &workflowservice.RecordActivityTaskHeartbeatRequest{ + Namespace: env.Namespace().String(), + TaskToken: pollResp.TaskToken, }) require.NoError(t, err) - require.True(t, resp.Started) - require.NotEmpty(t, resp.RunId) + require.True(t, heartbeatResp.GetActivityPaused(), "expected ActivityPaused=true after pause") + + // DescribeActivityExecution should still reflect PAUSE_REQUESTED. + descResp, err = env.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: env.Namespace().String(), + ActivityId: activityID, + RunId: runID, + }) + require.NoError(t, err) + require.Equal(t, enumspb.PENDING_ACTIVITY_STATE_PAUSE_REQUESTED, descResp.GetInfo().GetRunState()) }) - t.Run("DescribeIncludesCallbackInfo", func(t *testing.T) { + t.Run("PauseWhileScheduled", func(t *testing.T) { + ctx := testcore.NewContext() activityID := testcore.RandomizeStr(t.Name()) taskQueue := testcore.RandomizeStr(t.Name()) - callbackURL := "http://localhost/describe-callback" - startResp, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ - Namespace: s.Namespace().String(), - ActivityId: activityID, - ActivityType: s.tv.ActivityType(), - Identity: s.tv.WorkerIdentity(), - Input: defaultInput, - TaskQueue: &taskqueuepb.TaskQueue{ - Name: taskQueue, - }, - StartToCloseTimeout: durationpb.New(defaultStartToCloseTimeout), - RequestId: s.tv.Any().String(), - CompletionCallbacks: []*commonpb.Callback{ - {Variant: &commonpb.Callback_Nexus_{Nexus: &commonpb.Callback_Nexus{Url: callbackURL}}}, - }, + startResp := env.startAndValidateActivity(ctx, t, activityID, taskQueue) + runID := startResp.RunId + + _, err := env.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: env.Namespace().String(), + ActivityId: activityID, + RunId: runID, + Identity: "test-identity", + Reason: "test-pause", }) require.NoError(t, err) - describeResp, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ - Namespace: s.Namespace().String(), + // DescribeActivityExecution should reflect PAUSED run state. + descResp, err := env.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, - RunId: startResp.RunId, + RunId: runID, }) require.NoError(t, err) + require.Equal(t, enumspb.PENDING_ACTIVITY_STATE_PAUSED, descResp.GetInfo().GetRunState()) - require.Len(t, describeResp.Callbacks, 1) - cbInfo := describeResp.Callbacks[0] - require.NotNil(t, cbInfo.GetTrigger().GetActivityClosed()) - require.Equal(t, callbackURL, cbInfo.GetInfo().GetCallback().GetNexus().GetUrl()) - require.Equal(t, enumspb.CALLBACK_STATE_STANDBY, cbInfo.GetInfo().GetState()) - require.NotNil(t, cbInfo.GetInfo().GetRegistrationTime()) + // Attempt to poll — the dispatch task was invalidated by the stamp bump, so no task should + // be available. Use a short-lived context to avoid blocking the test. + shortCtx, shortCancel := context.WithTimeout(ctx, 2*time.Second) + defer shortCancel() + pollResp, err := env.FrontendClient().PollActivityTaskQueue(shortCtx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: env.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + Identity: env.Tv().WorkerIdentity(), + }) + // Either the poll times out (deadline exceeded) or returns an empty response. + if err == nil { + require.Empty(t, pollResp.GetActivityId(), "expected no task to be dispatched while paused") + } }) - t.Run("ExceedsMaxCallbacksLimit", func(t *testing.T) { - maxCallbacks := 1 - s.OverrideDynamicConfig( - callback.MaxPerExecution, - maxCallbacks, - ) - + t.Run("PauseWhilePaused", func(t *testing.T) { + ctx := testcore.NewContext() activityID := testcore.RandomizeStr(t.Name()) taskQueue := testcore.RandomizeStr(t.Name()) - _, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ - Namespace: s.Namespace().String(), - ActivityId: activityID, - ActivityType: s.tv.ActivityType(), - Identity: s.tv.WorkerIdentity(), - Input: defaultInput, - TaskQueue: &taskqueuepb.TaskQueue{ - Name: taskQueue, - }, - StartToCloseTimeout: durationpb.New(defaultStartToCloseTimeout), - RequestId: s.tv.Any().String(), - // Two callbacks when overridden max dynamic config is 1, so should error. - CompletionCallbacks: []*commonpb.Callback{ - {Variant: &commonpb.Callback_Nexus_{Nexus: &commonpb.Callback_Nexus{Url: "http://localhost/callback1"}}}, - {Variant: &commonpb.Callback_Nexus_{Nexus: &commonpb.Callback_Nexus{Url: "http://localhost/callback2"}}}, - }, - }) - require.Error(t, err) - require.ErrorContains(t, err, fmt.Sprintf("cannot attach more than %d callbacks", maxCallbacks)) + startResp := env.startAndValidateActivity(ctx, t, activityID, taskQueue) + runID := startResp.RunId + + pauseReq := &workflowservice.PauseActivityExecutionRequest{ + Namespace: env.Namespace().String(), + ActivityId: activityID, + RunId: runID, + Identity: "test-identity", + Reason: "test-pause", + } + _, err := env.FrontendClient().PauseActivityExecution(ctx, pauseReq) + require.NoError(t, err) + + // Second pause should fail with FailedPrecondition (activity is already paused). + _, err = env.FrontendClient().PauseActivityExecution(ctx, pauseReq) + var failedPreconditionErr *serviceerror.FailedPrecondition + require.ErrorAs(t, err, &failedPreconditionErr) }) - t.Run("CompletesWithCallbacks", func(t *testing.T) { + t.Run("PauseWhilePausedIdempotent", func(t *testing.T) { + ctx := testcore.NewContext() activityID := testcore.RandomizeStr(t.Name()) taskQueue := testcore.RandomizeStr(t.Name()) - ch := &completionHandler{ - requestCh: make(chan *nexusrpc.CompletionRequest, 1), - requestCompleteCh: make(chan error, 1), + startResp := env.startAndValidateActivity(ctx, t, activityID, taskQueue) + runID := startResp.RunId + + pauseReq := &workflowservice.PauseActivityExecutionRequest{ + Namespace: env.Namespace().String(), + ActivityId: activityID, + RunId: runID, + Identity: "test-identity", + Reason: "test-pause", + RequestId: "some-request-id", } - defer func() { - close(ch.requestCh) - close(ch.requestCompleteCh) - }() - callbackAddress := s.runNexusCompletionHTTPServer(t, ch) - - _, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ - Namespace: s.Namespace().String(), - ActivityId: activityID, - ActivityType: s.tv.ActivityType(), - Identity: s.tv.WorkerIdentity(), - Input: defaultInput, - TaskQueue: &taskqueuepb.TaskQueue{ - Name: taskQueue, - }, - StartToCloseTimeout: durationpb.New(defaultStartToCloseTimeout), - RequestId: s.tv.Any().String(), - CompletionCallbacks: []*commonpb.Callback{{ - Variant: &commonpb.Callback_Nexus_{Nexus: &commonpb.Callback_Nexus{Url: callbackAddress}}, - }}, - }) - require.NoError(t, err) - - pollResp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ - Namespace: s.Namespace().String(), - TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, - Identity: s.tv.WorkerIdentity(), - }) - require.NoError(t, err) - - _, err = s.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ - Namespace: s.Namespace().String(), - TaskToken: pollResp.TaskToken, - Result: defaultResult, - Identity: defaultIdentity, - }) - require.NoError(t, err) - - // Verify the callback was actually delivered with the correct result. - select { - case completion := <-ch.requestCh: - require.Equal(t, nexus.OperationStateSucceeded, completion.State) - require.False(t, completion.StartTime.IsZero()) - require.False(t, completion.CloseTime.IsZero()) - body, readErr := io.ReadAll(completion.HTTPRequest.Body) - _ = completion.HTTPRequest.Body.Close() - require.NoError(t, readErr) - require.JSONEq(t, string(defaultResult.Payloads[0].Data), string(body)) - // Unblock CompleteOperation so it returns 200 OK to the callback library - ch.requestCompleteCh <- nil - case <-ctx.Done(): - require.Fail(t, "timed out waiting for completion callback") - } - - // Verify the activity is in completed state. - descResp, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ - Namespace: s.Namespace().String(), - ActivityId: activityID, - }) - require.NoError(t, err) - require.Equal(t, enumspb.ACTIVITY_EXECUTION_STATUS_COMPLETED, descResp.GetInfo().GetStatus()) - }) - - t.Run("FailsWithCallbacks", func(t *testing.T) { - activityID := testcore.RandomizeStr(t.Name()) - taskQueue := testcore.RandomizeStr(t.Name()) - - ch := &completionHandler{ - requestCh: make(chan *nexusrpc.CompletionRequest, 1), - requestCompleteCh: make(chan error, 1), - } - defer func() { - close(ch.requestCh) - close(ch.requestCompleteCh) - }() - callbackAddress := s.runNexusCompletionHTTPServer(t, ch) - - _, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ - Namespace: s.Namespace().String(), - ActivityId: activityID, - ActivityType: s.tv.ActivityType(), - Identity: s.tv.WorkerIdentity(), - Input: defaultInput, - TaskQueue: &taskqueuepb.TaskQueue{ - Name: taskQueue, - }, - StartToCloseTimeout: durationpb.New(defaultStartToCloseTimeout), - RequestId: s.tv.Any().String(), - CompletionCallbacks: []*commonpb.Callback{{ - Variant: &commonpb.Callback_Nexus_{Nexus: &commonpb.Callback_Nexus{Url: callbackAddress}}, - }}, - }) - require.NoError(t, err) - - pollResp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ - Namespace: s.Namespace().String(), - TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, - Identity: s.tv.WorkerIdentity(), - }) - require.NoError(t, err) - - _, err = s.FrontendClient().RespondActivityTaskFailed(ctx, &workflowservice.RespondActivityTaskFailedRequest{ - Namespace: s.Namespace().String(), - TaskToken: pollResp.TaskToken, - Failure: defaultFailure, - Identity: defaultIdentity, - }) - require.NoError(t, err) - - // Verify the callback was actually delivered with failure state. - select { - case completion := <-ch.requestCh: - require.Equal(t, nexus.OperationStateFailed, completion.State) - require.False(t, completion.StartTime.IsZero()) - require.False(t, completion.CloseTime.IsZero()) - var failureErr *nexus.FailureError - require.ErrorAs(t, completion.Error.Cause, &failureErr) - tFailure, convErr := commonnexus.NexusFailureToTemporalFailure(failureErr.Failure) - require.NoError(t, convErr) - sdkErr := temporal.GetDefaultFailureConverter().FailureToError(tFailure) - var appErr *temporal.ApplicationError - require.ErrorAs(t, sdkErr, &appErr) - require.Equal(t, defaultFailure.Message, appErr.Message()) - ch.requestCompleteCh <- nil - case <-ctx.Done(): - require.Fail(t, "timed out waiting for completion callback") - } - - // Verify the activity is in failed state. - descResp, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ - Namespace: s.Namespace().String(), - ActivityId: activityID, - }) - require.NoError(t, err) - require.Equal(t, enumspb.ACTIVITY_EXECUTION_STATUS_FAILED, descResp.GetInfo().GetStatus()) - }) - - t.Run("TerminatedWithCallbacks", func(t *testing.T) { - activityID := testcore.RandomizeStr(t.Name()) - taskQueue := testcore.RandomizeStr(t.Name()) - - ch := &completionHandler{ - requestCh: make(chan *nexusrpc.CompletionRequest, 1), - requestCompleteCh: make(chan error, 1), - } - defer func() { - close(ch.requestCh) - close(ch.requestCompleteCh) - }() - callbackAddress := s.runNexusCompletionHTTPServer(t, ch) - - startResp, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ - Namespace: s.Namespace().String(), - ActivityId: activityID, - ActivityType: s.tv.ActivityType(), - Identity: s.tv.WorkerIdentity(), - Input: defaultInput, - TaskQueue: &taskqueuepb.TaskQueue{ - Name: taskQueue, - }, - StartToCloseTimeout: durationpb.New(defaultStartToCloseTimeout), - RequestId: s.tv.Any().String(), - CompletionCallbacks: []*commonpb.Callback{{ - Variant: &commonpb.Callback_Nexus_{Nexus: &commonpb.Callback_Nexus{Url: callbackAddress}}, - }}, - }) - require.NoError(t, err) - runID := startResp.RunId - - _, err = s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ - Namespace: s.Namespace().String(), - TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, - Identity: s.tv.WorkerIdentity(), - }) - require.NoError(t, err) - - reason := "Test Termination" - _, err = s.FrontendClient().TerminateActivityExecution(ctx, &workflowservice.TerminateActivityExecutionRequest{ - Namespace: s.Namespace().String(), - ActivityId: activityID, - RunId: runID, - Reason: reason, - Identity: "terminator", - }) - require.NoError(t, err) - - // Verify the callback was delivered with failure state (terminated maps to failed). - select { - case completion := <-ch.requestCh: - require.Equal(t, nexus.OperationStateFailed, completion.State) - require.False(t, completion.StartTime.IsZero()) - require.False(t, completion.CloseTime.IsZero()) - var failureErr *nexus.FailureError - require.ErrorAs(t, completion.Error.Cause, &failureErr) - tFailure, convErr := commonnexus.NexusFailureToTemporalFailure(failureErr.Failure) - require.NoError(t, convErr) - sdkErr := temporal.GetDefaultFailureConverter().FailureToError(tFailure) - var termErr *temporal.TerminatedError - require.ErrorAs(t, sdkErr, &termErr) - ch.requestCompleteCh <- nil - case <-ctx.Done(): - require.Fail(t, "timed out waiting for completion callback") - } - - descResp, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ - Namespace: s.Namespace().String(), - ActivityId: activityID, - }) - require.NoError(t, err) - require.Equal(t, enumspb.ACTIVITY_EXECUTION_STATUS_TERMINATED, descResp.GetInfo().GetStatus()) - }) - - t.Run("CanceledWithCallbacks", func(t *testing.T) { - activityID := testcore.RandomizeStr(t.Name()) - taskQueue := testcore.RandomizeStr(t.Name()) - - ch := &completionHandler{ - requestCh: make(chan *nexusrpc.CompletionRequest, 1), - requestCompleteCh: make(chan error, 1), - } - defer func() { - close(ch.requestCh) - close(ch.requestCompleteCh) - }() - callbackAddress := s.runNexusCompletionHTTPServer(t, ch) - - _, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ - Namespace: s.Namespace().String(), - ActivityId: activityID, - ActivityType: s.tv.ActivityType(), - Identity: s.tv.WorkerIdentity(), - Input: defaultInput, - TaskQueue: &taskqueuepb.TaskQueue{ - Name: taskQueue, - }, - StartToCloseTimeout: durationpb.New(defaultStartToCloseTimeout), - RequestId: s.tv.Any().String(), - CompletionCallbacks: []*commonpb.Callback{{ - Variant: &commonpb.Callback_Nexus_{Nexus: &commonpb.Callback_Nexus{Url: callbackAddress}}, - }}, - }) - require.NoError(t, err) - - pollResp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ - Namespace: s.Namespace().String(), - TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, - Identity: s.tv.WorkerIdentity(), - }) - require.NoError(t, err) - - _, err = s.FrontendClient().RequestCancelActivityExecution(ctx, &workflowservice.RequestCancelActivityExecutionRequest{ - Namespace: s.Namespace().String(), - ActivityId: activityID, - Identity: "cancelling-worker", - RequestId: s.tv.Any().String(), - Reason: "Test Cancellation", - }) - require.NoError(t, err) - - _, err = s.FrontendClient().RespondActivityTaskCanceled(ctx, &workflowservice.RespondActivityTaskCanceledRequest{ - Namespace: s.Namespace().String(), - TaskToken: pollResp.TaskToken, - Identity: defaultIdentity, - }) - require.NoError(t, err) - - // Verify the callback was delivered with canceled state. - select { - case completion := <-ch.requestCh: - require.Equal(t, nexus.OperationStateCanceled, completion.State) - require.False(t, completion.StartTime.IsZero()) - require.False(t, completion.CloseTime.IsZero()) - var failureErr *nexus.FailureError - require.ErrorAs(t, completion.Error.Cause, &failureErr) - tFailure, convErr := commonnexus.NexusFailureToTemporalFailure(failureErr.Failure) - require.NoError(t, convErr) - sdkErr := temporal.GetDefaultFailureConverter().FailureToError(tFailure) - var canceledErr *temporal.CanceledError - require.ErrorAs(t, sdkErr, &canceledErr) - ch.requestCompleteCh <- nil - case <-ctx.Done(): - require.Fail(t, "timed out waiting for completion callback") - } - - descResp, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ - Namespace: s.Namespace().String(), - ActivityId: activityID, - }) - require.NoError(t, err) - require.Equal(t, enumspb.ACTIVITY_EXECUTION_STATUS_CANCELED, descResp.GetInfo().GetStatus()) - }) - - // This test covers the timeout callback path using schedule-to-start, but the callback behavior - // is the same for all timeout types (schedule-to-close, start-to-close, heartbeat). - t.Run("TimeoutWithCallbacks", func(t *testing.T) { - activityID := testcore.RandomizeStr(t.Name()) - taskQueue := testcore.RandomizeStr(t.Name()) - - ch := &completionHandler{ - requestCh: make(chan *nexusrpc.CompletionRequest, 1), - requestCompleteCh: make(chan error, 1), - } - defer func() { - close(ch.requestCh) - close(ch.requestCompleteCh) - }() - callbackAddress := s.runNexusCompletionHTTPServer(t, ch) - - _, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ - Namespace: s.Namespace().String(), - ActivityId: activityID, - ActivityType: s.tv.ActivityType(), - Identity: s.tv.WorkerIdentity(), - Input: defaultInput, - TaskQueue: &taskqueuepb.TaskQueue{ - Name: taskQueue, - }, - StartToCloseTimeout: durationpb.New(1 * time.Minute), - ScheduleToStartTimeout: durationpb.New(1 * time.Second), - RequestId: s.tv.Any().String(), - CompletionCallbacks: []*commonpb.Callback{{ - Variant: &commonpb.Callback_Nexus_{Nexus: &commonpb.Callback_Nexus{Url: callbackAddress}}, - }}, - }) - require.NoError(t, err) - - // No worker polls — activity will time out waiting to be started. - - // Verify the callback is delivered with failure state and non-zero CloseTime. - select { - case completion := <-ch.requestCh: - require.Equal(t, nexus.OperationStateFailed, completion.State) - var failureErr *nexus.FailureError - require.ErrorAs(t, completion.Error.Cause, &failureErr) - tFailure, convErr := commonnexus.NexusFailureToTemporalFailure(failureErr.Failure) - require.NoError(t, convErr) - sdkErr := temporal.GetDefaultFailureConverter().FailureToError(tFailure) - var timeoutErr *temporal.TimeoutError - require.ErrorAs(t, sdkErr, &timeoutErr) - require.False(t, completion.StartTime.IsZero()) - require.False(t, completion.CloseTime.IsZero()) - ch.requestCompleteCh <- nil - case <-ctx.Done(): - require.Fail(t, "timed out waiting for completion callback") - } - - // Verify the activity is in timed-out state. - descResp, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ - Namespace: s.Namespace().String(), - ActivityId: activityID, - }) - require.NoError(t, err) - require.Equal(t, enumspb.ACTIVITY_EXECUTION_STATUS_TIMED_OUT, descResp.GetInfo().GetStatus()) - }) -} - -func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { - t := s.T() - - t.Run("PauseWhileStarted", func(t *testing.T) { - ctx := testcore.NewContext() - activityID := testcore.RandomizeStr(t.Name()) - taskQueue := testcore.RandomizeStr(t.Name()) - - startResp := s.startAndValidateActivity(ctx, t, activityID, taskQueue) - runID := startResp.RunId - - pollResp := s.pollActivityTaskAndValidate(ctx, t, activityID, taskQueue, runID) - - _, err := s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ - Namespace: s.Namespace().String(), - ActivityId: activityID, - RunId: runID, - Identity: "test-identity", - Reason: "test-pause", - }) - require.NoError(t, err) - - // DescribeActivityExecution should reflect PAUSE_REQUESTED run state: the activity is still - // STARTED (worker token valid) but a pause has been requested via the flag. - descResp, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ - Namespace: s.Namespace().String(), - ActivityId: activityID, - RunId: runID, - }) - require.NoError(t, err) - require.Equal(t, enumspb.PENDING_ACTIVITY_STATE_PAUSE_REQUESTED, descResp.GetInfo().GetRunState()) - - // Heartbeat should report ActivityPaused=true. - heartbeatResp, err := s.FrontendClient().RecordActivityTaskHeartbeat(ctx, &workflowservice.RecordActivityTaskHeartbeatRequest{ - Namespace: s.Namespace().String(), - TaskToken: pollResp.TaskToken, - }) - require.NoError(t, err) - require.True(t, heartbeatResp.GetActivityPaused(), "expected ActivityPaused=true after pause") - - // DescribeActivityExecution should still reflect PAUSE_REQUESTED. - descResp, err = s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ - Namespace: s.Namespace().String(), - ActivityId: activityID, - RunId: runID, - }) - require.NoError(t, err) - require.Equal(t, enumspb.PENDING_ACTIVITY_STATE_PAUSE_REQUESTED, descResp.GetInfo().GetRunState()) - }) - - t.Run("PauseWhileScheduled", func(t *testing.T) { - ctx := testcore.NewContext() - activityID := testcore.RandomizeStr(t.Name()) - taskQueue := testcore.RandomizeStr(t.Name()) - - startResp := s.startAndValidateActivity(ctx, t, activityID, taskQueue) - runID := startResp.RunId - - _, err := s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ - Namespace: s.Namespace().String(), - ActivityId: activityID, - RunId: runID, - Identity: "test-identity", - Reason: "test-pause", - }) - require.NoError(t, err) - - // DescribeActivityExecution should reflect PAUSED run state. - descResp, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ - Namespace: s.Namespace().String(), - ActivityId: activityID, - RunId: runID, - }) - require.NoError(t, err) - require.Equal(t, enumspb.PENDING_ACTIVITY_STATE_PAUSED, descResp.GetInfo().GetRunState()) - - // Attempt to poll — the dispatch task was invalidated by the stamp bump, so no task should - // be available. Use a short-lived context to avoid blocking the test. - shortCtx, shortCancel := context.WithTimeout(ctx, 2*time.Second) - defer shortCancel() - pollResp, err := s.FrontendClient().PollActivityTaskQueue(shortCtx, &workflowservice.PollActivityTaskQueueRequest{ - Namespace: s.Namespace().String(), - TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, - Identity: s.tv.WorkerIdentity(), - }) - // Either the poll times out (deadline exceeded) or returns an empty response. - if err == nil { - require.Empty(t, pollResp.GetActivityId(), "expected no task to be dispatched while paused") - } - }) - - t.Run("PauseWhilePaused", func(t *testing.T) { - ctx := testcore.NewContext() - activityID := testcore.RandomizeStr(t.Name()) - taskQueue := testcore.RandomizeStr(t.Name()) - - startResp := s.startAndValidateActivity(ctx, t, activityID, taskQueue) - runID := startResp.RunId - - pauseReq := &workflowservice.PauseActivityExecutionRequest{ - Namespace: s.Namespace().String(), - ActivityId: activityID, - RunId: runID, - Identity: "test-identity", - Reason: "test-pause", - } - _, err := s.FrontendClient().PauseActivityExecution(ctx, pauseReq) - require.NoError(t, err) - - // Second pause should fail with FailedPrecondition (activity is already paused). - _, err = s.FrontendClient().PauseActivityExecution(ctx, pauseReq) - var failedPreconditionErr *serviceerror.FailedPrecondition - require.ErrorAs(t, err, &failedPreconditionErr) - }) - - t.Run("PauseWhilePausedIdempotent", func(t *testing.T) { - ctx := testcore.NewContext() - activityID := testcore.RandomizeStr(t.Name()) - taskQueue := testcore.RandomizeStr(t.Name()) - - startResp := s.startAndValidateActivity(ctx, t, activityID, taskQueue) - runID := startResp.RunId - - pauseReq := &workflowservice.PauseActivityExecutionRequest{ - Namespace: s.Namespace().String(), - ActivityId: activityID, - RunId: runID, - Identity: "test-identity", - Reason: "test-pause", - RequestId: "some-request-id", - } - _, err := s.FrontendClient().PauseActivityExecution(ctx, pauseReq) - require.NoError(t, err) + _, err := env.FrontendClient().PauseActivityExecution(ctx, pauseReq) + require.NoError(t, err) // Second pause with the same request ID should succeed (idempotent no-op). - _, err = s.FrontendClient().PauseActivityExecution(ctx, pauseReq) + _, err = env.FrontendClient().PauseActivityExecution(ctx, pauseReq) require.NoError(t, err) }) t.Run("PauseNotFound", func(t *testing.T) { ctx := testcore.NewContext() - _, err := s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err := env.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: testcore.RandomizeStr(t.Name()), Identity: "test-identity", Reason: "test", @@ -7620,21 +7126,21 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { activityID := testcore.RandomizeStr(t.Name()) taskQueue := testcore.RandomizeStr(t.Name()) - startResp := s.startAndValidateActivity(ctx, t, activityID, taskQueue) + startResp := env.startAndValidateActivity(ctx, t, activityID, taskQueue) runID := startResp.RunId // Poll and complete the activity so it reaches a terminal state. - pollResp := s.pollActivityTaskAndValidate(ctx, t, activityID, taskQueue, runID) - _, err := s.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ - Namespace: s.Namespace().String(), + pollResp := env.pollActivityTaskAndValidate(ctx, t, activityID, taskQueue, runID) + _, err := env.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ + Namespace: env.Namespace().String(), TaskToken: pollResp.TaskToken, - Identity: s.tv.WorkerIdentity(), + Identity: env.Tv().WorkerIdentity(), }) require.NoError(t, err) // Pause should fail with FailedPrecondition on a terminal activity. - _, err = s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: runID, Identity: "test-identity", @@ -7652,15 +7158,15 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { activityID := testcore.RandomizeStr(t.Name()) taskQueue := testcore.RandomizeStr(t.Name()) - _, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err := env.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, - ActivityType: s.tv.ActivityType(), - Identity: s.tv.WorkerIdentity(), + ActivityType: env.Tv().ActivityType(), + Identity: env.Tv().WorkerIdentity(), Input: defaultInput, TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue}, StartToCloseTimeout: durationpb.New(defaultStartToCloseTimeout), - RequestId: s.tv.RequestID(), + RequestId: env.Tv().RequestID(), RetryPolicy: &commonpb.RetryPolicy{ MaximumAttempts: 10, InitialInterval: durationpb.New(1 * time.Second), @@ -7670,17 +7176,17 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { require.NoError(t, err) // Poll – activity is now STARTED at attempt=1. - pollResp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ - Namespace: s.Namespace().String(), + pollResp, err := env.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: env.Namespace().String(), TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, - Identity: s.tv.WorkerIdentity(), + Identity: env.Tv().WorkerIdentity(), }) require.NoError(t, err) require.EqualValues(t, 1, pollResp.Attempt) // Pause while STARTED. - _, err = s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, Identity: "test-identity", Reason: "test-reason", @@ -7688,24 +7194,24 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { require.NoError(t, err) // Heartbeat should report ActivityPaused=true. - heartbeatResp, err := s.FrontendClient().RecordActivityTaskHeartbeat(ctx, &workflowservice.RecordActivityTaskHeartbeatRequest{ - Namespace: s.Namespace().String(), + heartbeatResp, err := env.FrontendClient().RecordActivityTaskHeartbeat(ctx, &workflowservice.RecordActivityTaskHeartbeatRequest{ + Namespace: env.Namespace().String(), TaskToken: pollResp.TaskToken, }) require.NoError(t, err) require.True(t, heartbeatResp.GetActivityPaused()) // Describe should show PAUSE_REQUESTED: status is STARTED with PauseState set. - descResp, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ - Namespace: s.Namespace().String(), + descResp, err := env.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, }) require.NoError(t, err) require.Equal(t, enumspb.PENDING_ACTIVITY_STATE_PAUSE_REQUESTED, descResp.GetInfo().GetRunState()) // Fail the attempt – this triggers a retry (attempt=2) but the activity stays paused. - _, err = s.FrontendClient().RespondActivityTaskFailed(ctx, &workflowservice.RespondActivityTaskFailedRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().RespondActivityTaskFailed(ctx, &workflowservice.RespondActivityTaskFailedRequest{ + Namespace: env.Namespace().String(), TaskToken: pollResp.TaskToken, Failure: &failurepb.Failure{ Message: "retryable failure", @@ -7713,14 +7219,14 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { ApplicationFailureInfo: &failurepb.ApplicationFailureInfo{NonRetryable: false}, }, }, - Identity: s.tv.WorkerIdentity(), + Identity: env.Tv().WorkerIdentity(), }) require.NoError(t, err) // After fail, activity should be PAUSED (SCHEDULED + paused) at attempt=2 with a recorded failure. require.EventuallyWithT(t, func(c *assert.CollectT) { - dr, dErr := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ - Namespace: s.Namespace().String(), + dr, dErr := env.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, }) require.NoError(c, dErr) @@ -7730,18 +7236,18 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { }, 10*time.Second, 200*time.Millisecond) // Unpause – activity should be dispatched. - _, err = s.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, Identity: "test-identity", }) require.NoError(t, err) // Poll and complete the second attempt. - poll2Resp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ - Namespace: s.Namespace().String(), + poll2Resp, err := env.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: env.Namespace().String(), TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, - Identity: s.tv.WorkerIdentity(), + Identity: env.Tv().WorkerIdentity(), }) require.NoError(t, err) require.Equal(t, activityID, poll2Resp.GetActivityId()) @@ -7755,15 +7261,15 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { activityID := testcore.RandomizeStr(t.Name()) taskQueue := testcore.RandomizeStr(t.Name()) - _, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err := env.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, - ActivityType: s.tv.ActivityType(), - Identity: s.tv.WorkerIdentity(), + ActivityType: env.Tv().ActivityType(), + Identity: env.Tv().WorkerIdentity(), Input: defaultInput, TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue}, StartToCloseTimeout: durationpb.New(defaultStartToCloseTimeout), - RequestId: s.tv.RequestID(), + RequestId: env.Tv().RequestID(), RetryPolicy: &commonpb.RetryPolicy{ MaximumAttempts: 10, InitialInterval: durationpb.New(1 * time.Second), @@ -7773,17 +7279,17 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { require.NoError(t, err) // Poll – attempt=1. - pollResp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ - Namespace: s.Namespace().String(), + pollResp, err := env.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: env.Namespace().String(), TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, - Identity: s.tv.WorkerIdentity(), + Identity: env.Tv().WorkerIdentity(), }) require.NoError(t, err) require.EqualValues(t, 1, pollResp.Attempt) // Pause while STARTED. - _, err = s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, Identity: "test-identity", Reason: "test-reason", @@ -7791,8 +7297,8 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { require.NoError(t, err) // Describe should show PAUSE_REQUESTED: status is STARTED with PauseState set. - descResp, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ - Namespace: s.Namespace().String(), + descResp, err := env.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, }) require.NoError(t, err) @@ -7800,8 +7306,8 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { require.EqualValues(t, 1, descResp.GetInfo().GetAttempt()) failureMsg := "activity-failed-while-paused" - _, err = s.FrontendClient().RespondActivityTaskFailed(ctx, &workflowservice.RespondActivityTaskFailedRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().RespondActivityTaskFailed(ctx, &workflowservice.RespondActivityTaskFailedRequest{ + Namespace: env.Namespace().String(), TaskToken: pollResp.TaskToken, Failure: &failurepb.Failure{ Message: failureMsg, @@ -7809,14 +7315,14 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { ApplicationFailureInfo: &failurepb.ApplicationFailureInfo{NonRetryable: false}, }, }, - Identity: s.tv.WorkerIdentity(), + Identity: env.Tv().WorkerIdentity(), }) require.NoError(t, err) // Verify attempt is now 2, activity is still paused, and LastFailure is populated. require.EventuallyWithT(t, func(c *assert.CollectT) { - dr, dErr := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ - Namespace: s.Namespace().String(), + dr, dErr := env.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, }) require.NoError(c, dErr) @@ -7826,17 +7332,17 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { }, 10*time.Second, 200*time.Millisecond) // Unpause and complete. - _, err = s.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, Identity: "test-identity", }) require.NoError(t, err) - poll2Resp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ - Namespace: s.Namespace().String(), + poll2Resp, err := env.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: env.Namespace().String(), TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, - Identity: s.tv.WorkerIdentity(), + Identity: env.Tv().WorkerIdentity(), }) require.NoError(t, err) require.Equal(t, activityID, poll2Resp.GetActivityId()) @@ -7853,15 +7359,15 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { activityID := testcore.RandomizeStr(t.Name()) taskQueue := testcore.RandomizeStr(t.Name()) - _, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err := env.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, - ActivityType: s.tv.ActivityType(), - Identity: s.tv.WorkerIdentity(), + ActivityType: env.Tv().ActivityType(), + Identity: env.Tv().WorkerIdentity(), Input: defaultInput, TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue}, StartToCloseTimeout: durationpb.New(defaultStartToCloseTimeout), - RequestId: s.tv.RequestID(), + RequestId: env.Tv().RequestID(), RetryPolicy: &commonpb.RetryPolicy{ MaximumAttempts: 10, InitialInterval: durationpb.New(30 * time.Second), @@ -7871,16 +7377,16 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { require.NoError(t, err) // Poll and fail attempt=1. - pollResp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ - Namespace: s.Namespace().String(), + pollResp, err := env.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: env.Namespace().String(), TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, - Identity: s.tv.WorkerIdentity(), + Identity: env.Tv().WorkerIdentity(), }) require.NoError(t, err) require.EqualValues(t, 1, pollResp.Attempt) - _, err = s.FrontendClient().RespondActivityTaskFailed(ctx, &workflowservice.RespondActivityTaskFailedRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().RespondActivityTaskFailed(ctx, &workflowservice.RespondActivityTaskFailedRequest{ + Namespace: env.Namespace().String(), TaskToken: pollResp.TaskToken, Failure: &failurepb.Failure{ Message: "retryable failure", @@ -7888,22 +7394,22 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { ApplicationFailureInfo: &failurepb.ApplicationFailureInfo{NonRetryable: false}, }, }, - Identity: s.tv.WorkerIdentity(), + Identity: env.Tv().WorkerIdentity(), }) require.NoError(t, err) // Wait for the activity to be rescheduled at attempt=2 (in retry backoff). require.Eventually(t, func() bool { - dr, dErr := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ - Namespace: s.Namespace().String(), + dr, dErr := env.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, }) return dErr == nil && dr.GetInfo().GetAttempt() == 2 }, 10*time.Second, 200*time.Millisecond) // Pause while in SCHEDULED retry backoff. - _, err = s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, Identity: "test-identity", Reason: "test-reason", @@ -7912,8 +7418,8 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { // Verify activity is PAUSED at attempt=2 (not dispatched while paused). require.EventuallyWithT(t, func(c *assert.CollectT) { - dr, dErr := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ - Namespace: s.Namespace().String(), + dr, dErr := env.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, }) require.NoError(c, dErr) @@ -7922,18 +7428,18 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { }, 10*time.Second, 200*time.Millisecond) // Unpause – activity should be dispatched. - _, err = s.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, Identity: "test-identity", }) require.NoError(t, err) // Poll and complete attempt=2. - poll2Resp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ - Namespace: s.Namespace().String(), + poll2Resp, err := env.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: env.Namespace().String(), TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, - Identity: s.tv.WorkerIdentity(), + Identity: env.Tv().WorkerIdentity(), }) require.NoError(t, err) require.Equal(t, activityID, poll2Resp.GetActivityId()) @@ -7947,15 +7453,15 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { activityID := testcore.RandomizeStr(t.Name()) taskQueue := testcore.RandomizeStr(t.Name()) - _, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err := env.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, - ActivityType: s.tv.ActivityType(), - Identity: s.tv.WorkerIdentity(), + ActivityType: env.Tv().ActivityType(), + Identity: env.Tv().WorkerIdentity(), Input: defaultInput, TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue}, StartToCloseTimeout: durationpb.New(defaultStartToCloseTimeout), - RequestId: s.tv.RequestID(), + RequestId: env.Tv().RequestID(), RetryPolicy: &commonpb.RetryPolicy{ MaximumAttempts: 10, InitialInterval: durationpb.New(30 * time.Second), @@ -7965,16 +7471,16 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { require.NoError(t, err) // Poll and fail attempt=1 – activity enters a 30s retry backoff. - pollResp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ - Namespace: s.Namespace().String(), + pollResp, err := env.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: env.Namespace().String(), TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, - Identity: s.tv.WorkerIdentity(), + Identity: env.Tv().WorkerIdentity(), }) require.NoError(t, err) require.EqualValues(t, 1, pollResp.Attempt) - _, err = s.FrontendClient().RespondActivityTaskFailed(ctx, &workflowservice.RespondActivityTaskFailedRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().RespondActivityTaskFailed(ctx, &workflowservice.RespondActivityTaskFailedRequest{ + Namespace: env.Namespace().String(), TaskToken: pollResp.TaskToken, Failure: &failurepb.Failure{ Message: "retryable failure", @@ -7982,39 +7488,39 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { ApplicationFailureInfo: &failurepb.ApplicationFailureInfo{NonRetryable: false}, }, }, - Identity: s.tv.WorkerIdentity(), + Identity: env.Tv().WorkerIdentity(), }) require.NoError(t, err) // Wait for activity to be rescheduled at attempt=2. require.Eventually(t, func() bool { - dr, dErr := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ - Namespace: s.Namespace().String(), + dr, dErr := env.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, }) return dErr == nil && dr.GetInfo().GetAttempt() == 2 }, 10*time.Second, 200*time.Millisecond) // Pause, then immediately unpause – this should skip the remaining 30s backoff. - _, err = s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, Identity: "test-identity", }) require.NoError(t, err) - _, err = s.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, Identity: "test-identity", }) require.NoError(t, err) // Activity should be dispatched quickly (well within the 30s retry backoff window). - poll2Resp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ - Namespace: s.Namespace().String(), + poll2Resp, err := env.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: env.Namespace().String(), TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, - Identity: s.tv.WorkerIdentity(), + Identity: env.Tv().WorkerIdentity(), }) require.NoError(t, err) require.Equal(t, activityID, poll2Resp.GetActivityId()) @@ -8028,26 +7534,26 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { activityID := testcore.RandomizeStr(t.Name()) taskQueue := testcore.RandomizeStr(t.Name()) - startResp := s.startAndValidateActivity(ctx, t, activityID, taskQueue) + startResp := env.startAndValidateActivity(ctx, t, activityID, taskQueue) runID := startResp.RunId // Poll so the activity is STARTED. - s.pollActivityTaskAndValidate(ctx, t, activityID, taskQueue, runID) + env.pollActivityTaskAndValidate(ctx, t, activityID, taskQueue, runID) // Request cancellation — activity transitions to CANCEL_REQUESTED. - _, err := s.FrontendClient().RequestCancelActivityExecution(ctx, &workflowservice.RequestCancelActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err := env.FrontendClient().RequestCancelActivityExecution(ctx, &workflowservice.RequestCancelActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: runID, Identity: "test-identity", Reason: "test-cancel", - RequestId: s.tv.RequestID(), + RequestId: env.Tv().RequestID(), }) require.NoError(t, err) // Pause must be rejected — cannot pause an activity with a pending cancellation. - _, err = s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: runID, Identity: "test-identity", @@ -8065,12 +7571,12 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { activityID := testcore.RandomizeStr(t.Name()) taskQueue := testcore.RandomizeStr(t.Name()) - startResp := s.startAndValidateActivity(ctx, t, activityID, taskQueue) + startResp := env.startAndValidateActivity(ctx, t, activityID, taskQueue) runID := startResp.RunId // Pause while SCHEDULED → activity becomes PAUSED. - _, err := s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err := env.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: runID, Identity: "test-identity", @@ -8079,20 +7585,20 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { require.NoError(t, err) // Cancel should succeed and take effect immediately (no worker to notify). - _, err = s.FrontendClient().RequestCancelActivityExecution(ctx, &workflowservice.RequestCancelActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().RequestCancelActivityExecution(ctx, &workflowservice.RequestCancelActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: runID, Identity: "test-identity", Reason: "test-cancel", - RequestId: s.tv.RequestID(), + RequestId: env.Tv().RequestID(), }) require.NoError(t, err) // Activity should be CANCELED immediately. require.EventuallyWithT(t, func(c *assert.CollectT) { - dr, dErr := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ - Namespace: s.Namespace().String(), + dr, dErr := env.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: runID, }) @@ -8107,12 +7613,12 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { activityID := testcore.RandomizeStr(t.Name()) taskQueue := testcore.RandomizeStr(t.Name()) - startResp := s.startAndValidateActivity(ctx, t, activityID, taskQueue) + startResp := env.startAndValidateActivity(ctx, t, activityID, taskQueue) runID := startResp.RunId // Pause while SCHEDULED → PAUSED. - _, err := s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err := env.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: runID, Identity: "test-identity", @@ -8120,8 +7626,8 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { }) require.NoError(t, err) - descResp, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ - Namespace: s.Namespace().String(), + descResp, err := env.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: runID, }) @@ -8129,8 +7635,8 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { require.Equal(t, enumspb.PENDING_ACTIVITY_STATE_PAUSED, descResp.GetInfo().GetRunState()) // Terminate while PAUSED → TERMINATED. - _, err = s.FrontendClient().TerminateActivityExecution(ctx, &workflowservice.TerminateActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().TerminateActivityExecution(ctx, &workflowservice.TerminateActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: runID, Reason: "test-terminate", @@ -8138,8 +7644,8 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { }) require.NoError(t, err) - descResp, err = s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ - Namespace: s.Namespace().String(), + descResp, err = env.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: runID, }) @@ -8156,22 +7662,22 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { activityID := testcore.RandomizeStr(t.Name()) taskQueue := testcore.RandomizeStr(t.Name()) - startResp, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ - Namespace: s.Namespace().String(), + startResp, err := env.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, - ActivityType: s.tv.ActivityType(), - Identity: s.tv.WorkerIdentity(), + ActivityType: env.Tv().ActivityType(), + Identity: env.Tv().WorkerIdentity(), Input: defaultInput, TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue}, ScheduleToCloseTimeout: durationpb.New(2 * time.Second), - RequestId: s.tv.RequestID(), + RequestId: env.Tv().RequestID(), }) require.NoError(t, err) runID := startResp.RunId // Pause immediately while SCHEDULED — the S2C timer is still running. - _, err = s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: runID, Identity: "test-identity", @@ -8180,8 +7686,8 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { require.NoError(t, err) // Long-poll for the activity outcome — the S2C timeout fires while the activity is PAUSED. - pollActivityResp, err := s.FrontendClient().PollActivityExecution(ctx, &workflowservice.PollActivityExecutionRequest{ - Namespace: s.Namespace().String(), + pollActivityResp, err := env.FrontendClient().PollActivityExecution(ctx, &workflowservice.PollActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: runID, }) @@ -8198,15 +7704,15 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { activityID := testcore.RandomizeStr(t.Name()) taskQueue := testcore.RandomizeStr(t.Name()) - startResp := s.startAndValidateActivity(ctx, t, activityID, taskQueue) + startResp := env.startAndValidateActivity(ctx, t, activityID, taskQueue) runID := startResp.RunId // Poll → STARTED. - pollResp := s.pollActivityTaskAndValidate(ctx, t, activityID, taskQueue, runID) + pollResp := env.pollActivityTaskAndValidate(ctx, t, activityID, taskQueue, runID) // Pause while STARTED → PAUSE_REQUESTED (flag-only; worker token stays valid). - _, err := s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err := env.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: runID, Identity: "test-identity", @@ -8214,8 +7720,8 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { }) require.NoError(t, err) - descResp, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ - Namespace: s.Namespace().String(), + descResp, err := env.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: runID, }) @@ -8223,16 +7729,16 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { require.Equal(t, enumspb.PENDING_ACTIVITY_STATE_PAUSE_REQUESTED, descResp.GetInfo().GetRunState()) // Worker completes despite the pause flag — pause is advisory, the token is still valid. - _, err = s.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ + Namespace: env.Namespace().String(), TaskToken: pollResp.TaskToken, - Identity: s.tv.WorkerIdentity(), + Identity: env.Tv().WorkerIdentity(), }) require.NoError(t, err) // Activity must be COMPLETED. - descResp, err = s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ - Namespace: s.Namespace().String(), + descResp, err = env.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: runID, }) @@ -8246,15 +7752,15 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { activityID := testcore.RandomizeStr(t.Name()) taskQueue := testcore.RandomizeStr(t.Name()) - _, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err := env.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, - ActivityType: s.tv.ActivityType(), - Identity: s.tv.WorkerIdentity(), + ActivityType: env.Tv().ActivityType(), + Identity: env.Tv().WorkerIdentity(), Input: defaultInput, TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue}, StartToCloseTimeout: durationpb.New(defaultStartToCloseTimeout), - RequestId: s.tv.RequestID(), + RequestId: env.Tv().RequestID(), RetryPolicy: &commonpb.RetryPolicy{ MaximumAttempts: 10, }, @@ -8262,17 +7768,17 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { require.NoError(t, err) // Poll → STARTED. - pollResp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ - Namespace: s.Namespace().String(), + pollResp, err := env.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: env.Namespace().String(), TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, - Identity: s.tv.WorkerIdentity(), + Identity: env.Tv().WorkerIdentity(), }) require.NoError(t, err) require.EqualValues(t, 1, pollResp.Attempt) // Pause while STARTED → PAUSE_REQUESTED. - _, err = s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, Identity: "test-identity", Reason: "test-pause", @@ -8280,8 +7786,8 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { require.NoError(t, err) // Worker fails with a non-retryable error — must transition to FAILED, not retry or stay paused. - _, err = s.FrontendClient().RespondActivityTaskFailed(ctx, &workflowservice.RespondActivityTaskFailedRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().RespondActivityTaskFailed(ctx, &workflowservice.RespondActivityTaskFailedRequest{ + Namespace: env.Namespace().String(), TaskToken: pollResp.TaskToken, Failure: &failurepb.Failure{ Message: "non-retryable failure", @@ -8289,12 +7795,12 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { ApplicationFailureInfo: &failurepb.ApplicationFailureInfo{NonRetryable: true}, }, }, - Identity: s.tv.WorkerIdentity(), + Identity: env.Tv().WorkerIdentity(), }) require.NoError(t, err) - descResp, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ - Namespace: s.Namespace().String(), + descResp, err := env.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, }) require.NoError(t, err) @@ -8307,8 +7813,8 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { ctx := testcore.NewContext() t.Run("EmptyActivityID", func(t *testing.T) { - _, err := s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err := env.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: env.Namespace().String(), Identity: "test-identity", Reason: "test-pause", }) @@ -8318,8 +7824,8 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { }) t.Run("ActivityIDTooLong", func(t *testing.T) { - _, err := s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err := env.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: string(make([]byte, defaultMaxIDLengthLimit+1)), Identity: "test-identity", }) @@ -8330,8 +7836,8 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { }) t.Run("IdentityTooLong", func(t *testing.T) { - _, err := s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err := env.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: testcore.RandomizeStr(t.Name()), Identity: string(make([]byte, defaultMaxIDLengthLimit+1)), }) @@ -8343,14 +7849,14 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { t.Run("ReasonTooLong", func(t *testing.T) { blobSizeLimitError := 1000 - cleanup := s.OverrideDynamicConfig( + cleanup := env.OverrideDynamicConfig( dynamicconfig.BlobSizeLimitError, blobSizeLimitError, ) defer cleanup() - _, err := s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err := env.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: testcore.RandomizeStr(t.Name()), Identity: "test-identity", Reason: string(make([]byte, blobSizeLimitError+1)), @@ -8361,8 +7867,8 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { }) t.Run("InvalidRunID", func(t *testing.T) { - _, err := s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err := env.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: testcore.RandomizeStr(t.Name()), RunId: "invalid-run-id", Identity: "test-identity", @@ -8383,15 +7889,15 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { activityID := testcore.RandomizeStr(t.Name()) taskQueue := testcore.RandomizeStr(t.Name()) - startResp, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ - Namespace: s.Namespace().String(), + startResp, err := env.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, - ActivityType: s.tv.ActivityType(), - Identity: s.tv.WorkerIdentity(), + ActivityType: env.Tv().ActivityType(), + Identity: env.Tv().WorkerIdentity(), Input: defaultInput, TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue}, StartToCloseTimeout: durationpb.New(defaultStartToCloseTimeout), - RequestId: s.tv.RequestID(), + RequestId: env.Tv().RequestID(), RetryPolicy: &commonpb.RetryPolicy{ MaximumAttempts: 10, InitialInterval: durationpb.New(10 * time.Minute), @@ -8401,16 +7907,16 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { require.NoError(t, err) // Poll and fail attempt 1 — activity enters the 10-minute retry backoff. - pollResp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ - Namespace: s.Namespace().String(), + pollResp, err := env.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: env.Namespace().String(), TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, - Identity: s.tv.WorkerIdentity(), + Identity: env.Tv().WorkerIdentity(), }) require.NoError(t, err) require.EqualValues(t, 1, pollResp.Attempt) - _, err = s.FrontendClient().RespondActivityTaskFailed(ctx, &workflowservice.RespondActivityTaskFailedRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().RespondActivityTaskFailed(ctx, &workflowservice.RespondActivityTaskFailedRequest{ + Namespace: env.Namespace().String(), TaskToken: pollResp.TaskToken, Failure: &failurepb.Failure{ Message: "retryable failure", @@ -8418,22 +7924,22 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { ApplicationFailureInfo: &failurepb.ApplicationFailureInfo{NonRetryable: false}, }, }, - Identity: s.tv.WorkerIdentity(), + Identity: env.Tv().WorkerIdentity(), }) require.NoError(t, err) // Wait for attempt 2 (rescheduled in long backoff, not yet dispatched). require.Eventually(t, func() bool { - dr, dErr := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ - Namespace: s.Namespace().String(), + dr, dErr := env.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, }) return dErr == nil && dr.GetInfo().GetAttempt() == 2 }, 10*time.Second, 200*time.Millisecond) // Pause while SCHEDULED (in 10-minute retry backoff). - _, err = s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, Identity: "test-identity", Reason: "test-pause", @@ -8441,8 +7947,8 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { require.NoError(t, err) // Update retry interval to 1ms while paused. - _, err = s.FrontendClient().UpdateActivityExecutionOptions(ctx, &workflowservice.UpdateActivityExecutionOptionsRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().UpdateActivityExecutionOptions(ctx, &workflowservice.UpdateActivityExecutionOptionsRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: startResp.RunId, ActivityOptions: &activitypb.ActivityOptions{ @@ -8455,8 +7961,8 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { require.NoError(t, err) // Verify the update was persisted while the activity remains paused. - descResp, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ - Namespace: s.Namespace().String(), + descResp, err := env.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, }) require.NoError(t, err) @@ -8464,34 +7970,34 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { require.Equal(t, durationpb.New(1*time.Millisecond), descResp.GetInfo().GetRetryPolicy().GetInitialInterval()) // Unpause — the shortened interval means the activity is dispatched immediately. - _, err = s.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, Identity: "test-identity", }) require.NoError(t, err) // Poll attempt 2 — available immediately because the retry interval is now 1ms. - poll2Resp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ - Namespace: s.Namespace().String(), + poll2Resp, err := env.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: env.Namespace().String(), TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, - Identity: s.tv.WorkerIdentity(), + Identity: env.Tv().WorkerIdentity(), }) require.NoError(t, err) require.Equal(t, activityID, poll2Resp.GetActivityId()) require.EqualValues(t, 2, poll2Resp.Attempt) // Complete the activity. - _, err = s.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ + Namespace: env.Namespace().String(), TaskToken: poll2Resp.TaskToken, - Identity: s.tv.WorkerIdentity(), + Identity: env.Tv().WorkerIdentity(), }) require.NoError(t, err) // Verify terminal COMPLETED state with updated retry policy. - descResp, err = s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ - Namespace: s.Namespace().String(), + descResp, err = env.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, }) require.NoError(t, err) @@ -8501,6 +8007,7 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { } func (s *standaloneActivityTestSuite) TestUnpauseActivityExecution() { + env := s.newTestEnv() t := s.T() t.Run("UnpauseWhileScheduled", func(t *testing.T) { @@ -8508,12 +8015,12 @@ func (s *standaloneActivityTestSuite) TestUnpauseActivityExecution() { activityID := testcore.RandomizeStr(t.Name()) taskQueue := testcore.RandomizeStr(t.Name()) - startResp := s.startAndValidateActivity(ctx, t, activityID, taskQueue) + startResp := env.startAndValidateActivity(ctx, t, activityID, taskQueue) runID := startResp.RunId // Pause while SCHEDULED. - _, err := s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err := env.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: runID, Identity: "test-identity", @@ -8522,8 +8029,8 @@ func (s *standaloneActivityTestSuite) TestUnpauseActivityExecution() { require.NoError(t, err) // Unpause — this should re-dispatch the activity. - _, err = s.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: runID, Identity: "test-identity", @@ -8531,10 +8038,10 @@ func (s *standaloneActivityTestSuite) TestUnpauseActivityExecution() { require.NoError(t, err) // Poll should now succeed. - pollResp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ - Namespace: s.Namespace().String(), + pollResp, err := env.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: env.Namespace().String(), TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, - Identity: s.tv.WorkerIdentity(), + Identity: env.Tv().WorkerIdentity(), }) require.NoError(t, err) require.Equal(t, activityID, pollResp.GetActivityId(), "expected activity to be dispatched after unpause") @@ -8546,14 +8053,14 @@ func (s *standaloneActivityTestSuite) TestUnpauseActivityExecution() { activityID := testcore.RandomizeStr(t.Name()) taskQueue := testcore.RandomizeStr(t.Name()) - startResp := s.startAndValidateActivity(ctx, t, activityID, taskQueue) + startResp := env.startAndValidateActivity(ctx, t, activityID, taskQueue) runID := startResp.RunId - pollResp := s.pollActivityTaskAndValidate(ctx, t, activityID, taskQueue, runID) + pollResp := env.pollActivityTaskAndValidate(ctx, t, activityID, taskQueue, runID) // Pause while STARTED. - _, err := s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err := env.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: runID, Identity: "test-identity", @@ -8561,16 +8068,16 @@ func (s *standaloneActivityTestSuite) TestUnpauseActivityExecution() { }) require.NoError(t, err) - heartbeatResp, err := s.FrontendClient().RecordActivityTaskHeartbeat(ctx, &workflowservice.RecordActivityTaskHeartbeatRequest{ - Namespace: s.Namespace().String(), + heartbeatResp, err := env.FrontendClient().RecordActivityTaskHeartbeat(ctx, &workflowservice.RecordActivityTaskHeartbeatRequest{ + Namespace: env.Namespace().String(), TaskToken: pollResp.TaskToken, }) require.NoError(t, err) require.True(t, heartbeatResp.GetActivityPaused(), "expected ActivityPaused=true after pause") // Unpause. - _, err = s.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: runID, Identity: "test-identity", @@ -8579,8 +8086,8 @@ func (s *standaloneActivityTestSuite) TestUnpauseActivityExecution() { // After unpause of a STARTED+PauseState activity, the status stays STARTED (the worker's // token is still valid — no stamp bump). Verify via describe that the activity is no longer paused. - descResp, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ - Namespace: s.Namespace().String(), + descResp, err := env.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: runID, }) @@ -8595,12 +8102,12 @@ func (s *standaloneActivityTestSuite) TestUnpauseActivityExecution() { activityID := testcore.RandomizeStr(t.Name()) taskQueue := testcore.RandomizeStr(t.Name()) - startResp := s.startAndValidateActivity(ctx, t, activityID, taskQueue) + startResp := env.startAndValidateActivity(ctx, t, activityID, taskQueue) runID := startResp.RunId // Unpause a non-paused activity — should succeed with no error. - _, err := s.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err := env.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: runID, Identity: "test-identity", @@ -8614,15 +8121,15 @@ func (s *standaloneActivityTestSuite) TestUnpauseActivityExecution() { activityID := testcore.RandomizeStr(t.Name()) taskQueue := testcore.RandomizeStr(t.Name()) - _, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err := env.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, - ActivityType: s.tv.ActivityType(), - Identity: s.tv.WorkerIdentity(), + ActivityType: env.Tv().ActivityType(), + Identity: env.Tv().WorkerIdentity(), Input: defaultInput, TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue}, StartToCloseTimeout: durationpb.New(defaultStartToCloseTimeout), - RequestId: s.tv.RequestID(), + RequestId: env.Tv().RequestID(), RetryPolicy: &commonpb.RetryPolicy{ MaximumAttempts: 10, InitialInterval: durationpb.New(1 * time.Second), @@ -8632,16 +8139,16 @@ func (s *standaloneActivityTestSuite) TestUnpauseActivityExecution() { require.NoError(t, err) // Poll and fail the first attempt to advance the attempt count. - pollResp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ - Namespace: s.Namespace().String(), + pollResp, err := env.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: env.Namespace().String(), TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, - Identity: s.tv.WorkerIdentity(), + Identity: env.Tv().WorkerIdentity(), }) require.NoError(t, err) require.EqualValues(t, 1, pollResp.Attempt) - _, err = s.FrontendClient().RespondActivityTaskFailed(ctx, &workflowservice.RespondActivityTaskFailedRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().RespondActivityTaskFailed(ctx, &workflowservice.RespondActivityTaskFailedRequest{ + Namespace: env.Namespace().String(), TaskToken: pollResp.TaskToken, Failure: &failurepb.Failure{ Message: "retryable failure", @@ -8649,14 +8156,14 @@ func (s *standaloneActivityTestSuite) TestUnpauseActivityExecution() { ApplicationFailureInfo: &failurepb.ApplicationFailureInfo{NonRetryable: false}, }, }, - Identity: s.tv.WorkerIdentity(), + Identity: env.Tv().WorkerIdentity(), }) require.NoError(t, err) // Wait for the activity to enter SCHEDULED state for retry. require.Eventually(t, func() bool { - descResp, descErr := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ - Namespace: s.Namespace().String(), + descResp, descErr := env.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, }) if descErr != nil { @@ -8666,8 +8173,8 @@ func (s *standaloneActivityTestSuite) TestUnpauseActivityExecution() { }, 15*time.Second, 200*time.Millisecond) // Pause while SCHEDULED (attempt=2). - _, err = s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, Identity: "test-identity", Reason: "test-pause", @@ -8675,8 +8182,8 @@ func (s *standaloneActivityTestSuite) TestUnpauseActivityExecution() { require.NoError(t, err) // Unpause with ResetAttempts=true. - _, err = s.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, Identity: "test-identity", ResetAttempts: true, @@ -8684,10 +8191,10 @@ func (s *standaloneActivityTestSuite) TestUnpauseActivityExecution() { require.NoError(t, err) // Poll — attempt count should be reset to 1. - poll2Resp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ - Namespace: s.Namespace().String(), + poll2Resp, err := env.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: env.Namespace().String(), TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, - Identity: s.tv.WorkerIdentity(), + Identity: env.Tv().WorkerIdentity(), }) require.NoError(t, err) require.EqualValues(t, 1, poll2Resp.Attempt, "expected attempt reset to 1 after UnpauseWithResetAttempts") @@ -8699,11 +8206,11 @@ func (s *standaloneActivityTestSuite) TestUnpauseActivityExecution() { activityID := testcore.RandomizeStr(t.Name()) taskQueue := testcore.RandomizeStr(t.Name()) - startResp := s.startAndValidateActivity(ctx, t, activityID, taskQueue) + startResp := env.startAndValidateActivity(ctx, t, activityID, taskQueue) runID := startResp.RunId - _, err := s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err := env.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: runID, Identity: "test-identity", @@ -8711,8 +8218,8 @@ func (s *standaloneActivityTestSuite) TestUnpauseActivityExecution() { }) require.NoError(t, err) - _, err = s.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: runID, Identity: "test-identity", @@ -8721,10 +8228,10 @@ func (s *standaloneActivityTestSuite) TestUnpauseActivityExecution() { require.NoError(t, err) // Activity should eventually be dispatched despite the jitter. - pollResp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ - Namespace: s.Namespace().String(), + pollResp, err := env.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: env.Namespace().String(), TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, - Identity: s.tv.WorkerIdentity(), + Identity: env.Tv().WorkerIdentity(), }) require.NoError(t, err) require.Equal(t, activityID, pollResp.GetActivityId()) @@ -8733,8 +8240,8 @@ func (s *standaloneActivityTestSuite) TestUnpauseActivityExecution() { t.Run("UnpauseNotFound", func(t *testing.T) { ctx := testcore.NewContext() - _, err := s.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err := env.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: testcore.RandomizeStr(t.Name()), Identity: "test-identity", }) @@ -8748,21 +8255,21 @@ func (s *standaloneActivityTestSuite) TestUnpauseActivityExecution() { activityID := testcore.RandomizeStr(t.Name()) taskQueue := testcore.RandomizeStr(t.Name()) - startResp := s.startAndValidateActivity(ctx, t, activityID, taskQueue) + startResp := env.startAndValidateActivity(ctx, t, activityID, taskQueue) runID := startResp.RunId // Poll and complete the activity so it reaches a terminal state. - pollResp := s.pollActivityTaskAndValidate(ctx, t, activityID, taskQueue, runID) - _, err := s.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ - Namespace: s.Namespace().String(), + pollResp := env.pollActivityTaskAndValidate(ctx, t, activityID, taskQueue, runID) + _, err := env.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ + Namespace: env.Namespace().String(), TaskToken: pollResp.TaskToken, - Identity: s.tv.WorkerIdentity(), + Identity: env.Tv().WorkerIdentity(), }) require.NoError(t, err) // Unpause should fail with FailedPrecondition on a terminal activity. - _, err = s.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: runID, Identity: "test-identity", @@ -8779,15 +8286,15 @@ func (s *standaloneActivityTestSuite) TestUnpauseActivityExecution() { activityID := testcore.RandomizeStr(t.Name()) taskQueue := testcore.RandomizeStr(t.Name()) - _, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err := env.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, - ActivityType: s.tv.ActivityType(), - Identity: s.tv.WorkerIdentity(), + ActivityType: env.Tv().ActivityType(), + Identity: env.Tv().WorkerIdentity(), Input: defaultInput, TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue}, StartToCloseTimeout: durationpb.New(defaultStartToCloseTimeout), - RequestId: s.tv.RequestID(), + RequestId: env.Tv().RequestID(), RetryPolicy: &commonpb.RetryPolicy{ MaximumAttempts: 10, InitialInterval: durationpb.New(30 * time.Second), @@ -8797,25 +8304,25 @@ func (s *standaloneActivityTestSuite) TestUnpauseActivityExecution() { require.NoError(t, err) // Poll attempt 1. - pollResp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ - Namespace: s.Namespace().String(), + pollResp, err := env.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: env.Namespace().String(), TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, - Identity: s.tv.WorkerIdentity(), + Identity: env.Tv().WorkerIdentity(), }) require.NoError(t, err) require.EqualValues(t, 1, pollResp.Attempt) // Record a heartbeat with details. - _, err = s.FrontendClient().RecordActivityTaskHeartbeat(ctx, &workflowservice.RecordActivityTaskHeartbeatRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().RecordActivityTaskHeartbeat(ctx, &workflowservice.RecordActivityTaskHeartbeatRequest{ + Namespace: env.Namespace().String(), TaskToken: pollResp.TaskToken, Details: defaultHeartbeatDetails, }) require.NoError(t, err) // Fail attempt 1 — activity enters 30s retry backoff at attempt 2. - _, err = s.FrontendClient().RespondActivityTaskFailed(ctx, &workflowservice.RespondActivityTaskFailedRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().RespondActivityTaskFailed(ctx, &workflowservice.RespondActivityTaskFailedRequest{ + Namespace: env.Namespace().String(), TaskToken: pollResp.TaskToken, Failure: &failurepb.Failure{ Message: "retryable failure", @@ -8823,22 +8330,22 @@ func (s *standaloneActivityTestSuite) TestUnpauseActivityExecution() { ApplicationFailureInfo: &failurepb.ApplicationFailureInfo{NonRetryable: false}, }, }, - Identity: s.tv.WorkerIdentity(), + Identity: env.Tv().WorkerIdentity(), }) require.NoError(t, err) // Wait for attempt 2 (count increments immediately on reschedule, even during backoff). require.Eventually(t, func() bool { - dr, dErr := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ - Namespace: s.Namespace().String(), + dr, dErr := env.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, }) return dErr == nil && dr.GetInfo().GetAttempt() == 2 }, 10*time.Second, 200*time.Millisecond) // Heartbeat details should still be set before the unpause. - descResp, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ - Namespace: s.Namespace().String(), + descResp, err := env.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, }) require.NoError(t, err) @@ -8846,8 +8353,8 @@ func (s *standaloneActivityTestSuite) TestUnpauseActivityExecution() { require.NotNil(t, descResp.GetInfo().GetHeartbeatDetails(), "expected heartbeat details before unpause") // Pause while SCHEDULED (in 30s backoff). - _, err = s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, Identity: "test-identity", Reason: "test-pause", @@ -8855,8 +8362,8 @@ func (s *standaloneActivityTestSuite) TestUnpauseActivityExecution() { require.NoError(t, err) // Unpause with ResetHeartbeat=true — clears the recorded heartbeat state. - _, err = s.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, Identity: "test-identity", ResetHeartbeat: true, @@ -8864,8 +8371,8 @@ func (s *standaloneActivityTestSuite) TestUnpauseActivityExecution() { require.NoError(t, err) // Heartbeat details must be cleared after unpause. - descResp, err = s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ - Namespace: s.Namespace().String(), + descResp, err = env.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, }) require.NoError(t, err) @@ -8875,10 +8382,10 @@ func (s *standaloneActivityTestSuite) TestUnpauseActivityExecution() { "expected heartbeat details cleared after UnpauseWithResetHeartbeat") // Poll attempt 2 — heartbeat details must be nil in the poll response too. - poll2Resp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ - Namespace: s.Namespace().String(), + poll2Resp, err := env.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: env.Namespace().String(), TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, - Identity: s.tv.WorkerIdentity(), + Identity: env.Tv().WorkerIdentity(), }) require.NoError(t, err) require.Equal(t, activityID, poll2Resp.GetActivityId()) @@ -8895,15 +8402,15 @@ func (s *standaloneActivityTestSuite) TestUnpauseActivityExecution() { activityID := testcore.RandomizeStr(t.Name()) taskQueue := testcore.RandomizeStr(t.Name()) - startResp := s.startAndValidateActivity(ctx, t, activityID, taskQueue) + startResp := env.startAndValidateActivity(ctx, t, activityID, taskQueue) runID := startResp.RunId // Poll → STARTED. - pollResp := s.pollActivityTaskAndValidate(ctx, t, activityID, taskQueue, runID) + pollResp := env.pollActivityTaskAndValidate(ctx, t, activityID, taskQueue, runID) // Pause while STARTED → flag-only (PauseState set, status stays STARTED). - _, err := s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err := env.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: runID, Identity: "test-identity", @@ -8912,19 +8419,19 @@ func (s *standaloneActivityTestSuite) TestUnpauseActivityExecution() { require.NoError(t, err) // Cancel → CANCEL_REQUESTED. PauseState remains set from the prior pause. - _, err = s.FrontendClient().RequestCancelActivityExecution(ctx, &workflowservice.RequestCancelActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().RequestCancelActivityExecution(ctx, &workflowservice.RequestCancelActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: runID, Identity: "test-identity", Reason: "test-cancel", - RequestId: s.tv.RequestID(), + RequestId: env.Tv().RequestID(), }) require.NoError(t, err) // Confirm both flags are set via heartbeat. - hbResp, err := s.FrontendClient().RecordActivityTaskHeartbeat(ctx, &workflowservice.RecordActivityTaskHeartbeatRequest{ - Namespace: s.Namespace().String(), + hbResp, err := env.FrontendClient().RecordActivityTaskHeartbeat(ctx, &workflowservice.RecordActivityTaskHeartbeatRequest{ + Namespace: env.Namespace().String(), TaskToken: pollResp.TaskToken, }) require.NoError(t, err) @@ -8932,8 +8439,8 @@ func (s *standaloneActivityTestSuite) TestUnpauseActivityExecution() { require.True(t, hbResp.GetActivityPaused()) // Unpause — must be a no-op: status stays CANCEL_REQUESTED, no new dispatch task. - _, err = s.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: runID, Identity: "test-identity", @@ -8941,8 +8448,8 @@ func (s *standaloneActivityTestSuite) TestUnpauseActivityExecution() { require.NoError(t, err) // RunState must still be CANCEL_REQUESTED after the unpause. - descResp, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ - Namespace: s.Namespace().String(), + descResp, err := env.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: runID, }) @@ -8951,8 +8458,8 @@ func (s *standaloneActivityTestSuite) TestUnpauseActivityExecution() { "unpause of a CANCEL_REQUESTED activity must be a no-op") // Heartbeat must show CancelRequested=true, ActivityPaused=false after the unpause. - hbResp2, err := s.FrontendClient().RecordActivityTaskHeartbeat(ctx, &workflowservice.RecordActivityTaskHeartbeatRequest{ - Namespace: s.Namespace().String(), + hbResp2, err := env.FrontendClient().RecordActivityTaskHeartbeat(ctx, &workflowservice.RecordActivityTaskHeartbeatRequest{ + Namespace: env.Namespace().String(), TaskToken: pollResp.TaskToken, }) require.NoError(t, err) @@ -8965,8 +8472,8 @@ func (s *standaloneActivityTestSuite) TestUnpauseActivityExecution() { ctx := testcore.NewContext() t.Run("EmptyActivityID", func(t *testing.T) { - _, err := s.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err := env.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ + Namespace: env.Namespace().String(), Identity: "test-identity", }) var invalidArgErr *serviceerror.InvalidArgument @@ -8975,8 +8482,8 @@ func (s *standaloneActivityTestSuite) TestUnpauseActivityExecution() { }) t.Run("ActivityIDTooLong", func(t *testing.T) { - _, err := s.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err := env.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: string(make([]byte, defaultMaxIDLengthLimit+1)), Identity: "test-identity", }) @@ -8987,8 +8494,8 @@ func (s *standaloneActivityTestSuite) TestUnpauseActivityExecution() { }) t.Run("IdentityTooLong", func(t *testing.T) { - _, err := s.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err := env.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: testcore.RandomizeStr(t.Name()), Identity: string(make([]byte, defaultMaxIDLengthLimit+1)), }) @@ -8999,8 +8506,8 @@ func (s *standaloneActivityTestSuite) TestUnpauseActivityExecution() { }) t.Run("InvalidRunID", func(t *testing.T) { - _, err := s.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err := env.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: testcore.RandomizeStr(t.Name()), RunId: "invalid-run-id", Identity: "test-identity", @@ -9013,6 +8520,7 @@ func (s *standaloneActivityTestSuite) TestUnpauseActivityExecution() { } func (s *standaloneActivityTestSuite) TestResetActivityExecution() { + env := s.newTestEnv() t := s.T() // startAndPollActivity starts a SAA, polls for the first task, and returns @@ -9024,10 +8532,10 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { ) { t.Helper() taskQueue := testcore.RandomizeStr(t.Name()) - startResp, err := s.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ - Namespace: s.Namespace().String(), + startResp, err := env.FrontendClient().StartActivityExecution(ctx, &workflowservice.StartActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, - ActivityType: s.tv.ActivityType(), + ActivityType: env.Tv().ActivityType(), Identity: defaultIdentity, Input: defaultInput, TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue}, @@ -9037,8 +8545,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { }) require.NoError(t, err) - pollResp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ - Namespace: s.Namespace().String(), + pollResp, err := env.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: env.Namespace().String(), TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, Identity: defaultIdentity, }) @@ -9049,8 +8557,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { failRetryable := func(ctx context.Context, t *testing.T, taskToken []byte, nextRetryDelay time.Duration) { t.Helper() - _, err := s.FrontendClient().RespondActivityTaskFailed(ctx, &workflowservice.RespondActivityTaskFailedRequest{ - Namespace: s.Namespace().String(), + _, err := env.FrontendClient().RespondActivityTaskFailed(ctx, &workflowservice.RespondActivityTaskFailedRequest{ + Namespace: env.Namespace().String(), TaskToken: taskToken, Failure: &failurepb.Failure{ Message: "retryable failure", @@ -9068,8 +8576,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { resetActivity := func(ctx context.Context, t *testing.T, activityID, runID string, resetHeartbeat bool) { t.Helper() - _, err := s.FrontendClient().ResetActivityExecution(ctx, &workflowservice.ResetActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err := env.FrontendClient().ResetActivityExecution(ctx, &workflowservice.ResetActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: runID, ResetHeartbeat: resetHeartbeat, @@ -9079,8 +8587,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { pauseActivity := func(ctx context.Context, t *testing.T, activityID, runID string) { t.Helper() - _, err := s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err := env.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: runID, Identity: defaultIdentity, @@ -9091,8 +8599,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { unpauseActivity := func(ctx context.Context, t *testing.T, activityID, runID string) { t.Helper() - _, err := s.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err := env.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: runID, Identity: defaultIdentity, @@ -9103,8 +8611,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { waitForState := func(ctx context.Context, t *testing.T, activityID, runID string, state enumspb.PendingActivityState) { t.Helper() require.Eventually(t, func() bool { - desc, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ - Namespace: s.Namespace().String(), + desc, err := env.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: runID, }) @@ -9129,8 +8637,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { failRetryable(ctx, t, pollResp1.TaskToken, time.Second) // Poll attempt 2 - pollResp2, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ - Namespace: s.Namespace().String(), + pollResp2, err := env.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: env.Namespace().String(), TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, Identity: defaultIdentity, }) @@ -9142,8 +8650,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { // Verify activity is SCHEDULED (backing off at attempt 3) require.Eventually(t, func() bool { - desc, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ - Namespace: s.Namespace().String(), + desc, err := env.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: startResp.GetRunId(), }) @@ -9159,8 +8667,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { resetActivity(ctx, t, activityID, startResp.GetRunId(), false) // Poll — should be attempt 1 - pollResp3, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ - Namespace: s.Namespace().String(), + pollResp3, err := env.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: env.Namespace().String(), TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, Identity: defaultIdentity, }) @@ -9168,8 +8676,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { require.EqualValues(t, 1, pollResp3.Attempt, "attempt should be reset to 1") // Complete successfully - _, err = s.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ + Namespace: env.Namespace().String(), TaskToken: pollResp3.TaskToken, Result: defaultResult, Identity: defaultIdentity, @@ -9195,8 +8703,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { resetActivity(ctx, t, activityID, startResp.GetRunId(), false) // Verify activity still appears as STARTED (reset is deferred) - desc, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ - Namespace: s.Namespace().String(), + desc, err := env.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: startResp.GetRunId(), }) @@ -9207,8 +8715,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { failRetryable(ctx, t, pollResp1.TaskToken, 0) // Poll the retry — should be attempt 1 - pollResp2, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ - Namespace: s.Namespace().String(), + pollResp2, err := env.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: env.Namespace().String(), TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, Identity: defaultIdentity, }) @@ -9216,8 +8724,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { require.EqualValues(t, 1, pollResp2.Attempt, "attempt should be reset to 1 on retry after running reset") // Complete - _, err = s.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ + Namespace: env.Namespace().String(), TaskToken: pollResp2.TaskToken, Result: defaultResult, Identity: defaultIdentity, @@ -9246,8 +8754,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { require.EqualValues(t, 1, pollResp1.Attempt) // Request cancellation — moves to CANCEL_REQUESTED - _, err := s.FrontendClient().RequestCancelActivityExecution(ctx, &workflowservice.RequestCancelActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err := env.FrontendClient().RequestCancelActivityExecution(ctx, &workflowservice.RequestCancelActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: startResp.GetRunId(), Identity: defaultIdentity, @@ -9256,8 +8764,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { require.NoError(t, err) // Verify CANCEL_REQUESTED state - desc, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ - Namespace: s.Namespace().String(), + desc, err := env.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: startResp.GetRunId(), }) @@ -9268,8 +8776,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { resetActivity(ctx, t, activityID, startResp.GetRunId(), false) // Activity must still be in CANCEL_REQUESTED (reset is deferred, no immediate side effect) - desc, err = s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ - Namespace: s.Namespace().String(), + desc, err = env.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: startResp.GetRunId(), }) @@ -9277,8 +8785,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { require.Equal(t, enumspb.PENDING_ACTIVITY_STATE_CANCEL_REQUESTED, desc.GetInfo().GetRunState()) // Worker ignores the cancel and completes — activity should complete cleanly - _, err = s.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ + Namespace: env.Namespace().String(), TaskToken: pollResp1.TaskToken, Result: defaultResult, Identity: defaultIdentity, @@ -9303,8 +8811,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { // Verify in SCHEDULED state require.Eventually(t, func() bool { - desc, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ - Namespace: s.Namespace().String(), + desc, err := env.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: startResp.GetRunId(), }) @@ -9316,8 +8824,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { resetActivity(ctx, t, activityID, startResp.GetRunId(), false) // Poll — task should be available immediately after reset (no long backoff) - pollResp2, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ - Namespace: s.Namespace().String(), + pollResp2, err := env.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: env.Namespace().String(), TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, Identity: defaultIdentity, }) @@ -9325,8 +8833,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { require.EqualValues(t, 1, pollResp2.Attempt) // Complete - _, err = s.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ + Namespace: env.Namespace().String(), TaskToken: pollResp2.TaskToken, Result: defaultResult, Identity: defaultIdentity, @@ -9347,8 +8855,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { startResp, pollResp1, taskQueue := startAndPollActivity(ctx, t, activityID, retryPolicy) // Record a heartbeat - _, err := s.FrontendClient().RecordActivityTaskHeartbeat(ctx, &workflowservice.RecordActivityTaskHeartbeatRequest{ - Namespace: s.Namespace().String(), + _, err := env.FrontendClient().RecordActivityTaskHeartbeat(ctx, &workflowservice.RecordActivityTaskHeartbeatRequest{ + Namespace: env.Namespace().String(), TaskToken: pollResp1.TaskToken, Details: defaultHeartbeatDetails, Identity: defaultIdentity, @@ -9356,8 +8864,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { require.NoError(t, err) // Verify heartbeat is visible in describe - desc, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ - Namespace: s.Namespace().String(), + desc, err := env.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: startResp.GetRunId(), }) @@ -9369,8 +8877,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { // Wait for SCHEDULED state require.Eventually(t, func() bool { - d, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ - Namespace: s.Namespace().String(), + d, err := env.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: startResp.GetRunId(), }) @@ -9379,8 +8887,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { }, 5*time.Second, 200*time.Millisecond) // Reset with heartbeat reset - _, err = s.FrontendClient().ResetActivityExecution(ctx, &workflowservice.ResetActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().ResetActivityExecution(ctx, &workflowservice.ResetActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: startResp.GetRunId(), ResetHeartbeat: true, @@ -9388,8 +8896,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { require.NoError(t, err) // Poll — attempt 1, no heartbeat details - pollResp2, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ - Namespace: s.Namespace().String(), + pollResp2, err := env.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: env.Namespace().String(), TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, Identity: defaultIdentity, }) @@ -9398,8 +8906,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { require.Empty(t, pollResp2.HeartbeatDetails.GetPayloads(), "heartbeat details should be cleared after reset") // Complete - _, err = s.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ + Namespace: env.Namespace().String(), TaskToken: pollResp2.TaskToken, Result: defaultResult, Identity: defaultIdentity, @@ -9422,8 +8930,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { startResp, pollResp1, taskQueue := startAndPollActivity(ctx, t, activityID, retryPolicy) // Record a heartbeat while running - _, err := s.FrontendClient().RecordActivityTaskHeartbeat(ctx, &workflowservice.RecordActivityTaskHeartbeatRequest{ - Namespace: s.Namespace().String(), + _, err := env.FrontendClient().RecordActivityTaskHeartbeat(ctx, &workflowservice.RecordActivityTaskHeartbeatRequest{ + Namespace: env.Namespace().String(), TaskToken: pollResp1.TaskToken, Details: defaultHeartbeatDetails, Identity: defaultIdentity, @@ -9431,8 +8939,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { require.NoError(t, err) // Verify heartbeat is visible - desc, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ - Namespace: s.Namespace().String(), + desc, err := env.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: startResp.GetRunId(), }) @@ -9443,8 +8951,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { resetActivity(ctx, t, activityID, startResp.GetRunId(), true) // Activity should still be STARTED with heartbeat still visible (reset is deferred) - desc, err = s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ - Namespace: s.Namespace().String(), + desc, err = env.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: startResp.GetRunId(), }) @@ -9456,8 +8964,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { failRetryable(ctx, t, pollResp1.TaskToken, 0) // Poll retry — attempt=1, heartbeat details cleared - pollResp2, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ - Namespace: s.Namespace().String(), + pollResp2, err := env.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: env.Namespace().String(), TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, Identity: defaultIdentity, }) @@ -9466,8 +8974,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { require.Empty(t, pollResp2.HeartbeatDetails.GetPayloads(), "heartbeat details should be cleared after deferred reset") // Record a new heartbeat on the new attempt - _, err = s.FrontendClient().RecordActivityTaskHeartbeat(ctx, &workflowservice.RecordActivityTaskHeartbeatRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().RecordActivityTaskHeartbeat(ctx, &workflowservice.RecordActivityTaskHeartbeatRequest{ + Namespace: env.Namespace().String(), TaskToken: pollResp2.TaskToken, Details: defaultHeartbeatDetails, Identity: defaultIdentity, @@ -9475,8 +8983,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { require.NoError(t, err) // Verify new heartbeat is visible in describe - desc, err = s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ - Namespace: s.Namespace().String(), + desc, err = env.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: startResp.GetRunId(), }) @@ -9484,8 +8992,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { require.NotNil(t, desc.GetInfo().GetHeartbeatDetails(), "new heartbeat from reset attempt should be visible") // Complete - _, err = s.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ + Namespace: env.Namespace().String(), TaskToken: pollResp2.TaskToken, Result: defaultResult, Identity: defaultIdentity, @@ -9502,8 +9010,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { startResp, pollResp, _ := startAndPollActivity(ctx, t, activityID, nil) // Complete the activity - _, err := s.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ - Namespace: s.Namespace().String(), + _, err := env.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ + Namespace: env.Namespace().String(), TaskToken: pollResp.TaskToken, Result: defaultResult, Identity: defaultIdentity, @@ -9511,8 +9019,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { require.NoError(t, err) // Attempt to reset — should fail with FailedPrecondition since the activity is in a terminal state - _, err = s.FrontendClient().ResetActivityExecution(ctx, &workflowservice.ResetActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().ResetActivityExecution(ctx, &workflowservice.ResetActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: startResp.GetRunId(), }) @@ -9539,8 +9047,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { // Wait for SCHEDULED state (retry backoff) require.Eventually(t, func() bool { - desc, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ - Namespace: s.Namespace().String(), + desc, err := env.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: startResp.GetRunId(), }) @@ -9549,8 +9057,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { }, 5*time.Second, 200*time.Millisecond) // Pause the activity - _, err := s.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err := env.FrontendClient().PauseActivityExecution(ctx, &workflowservice.PauseActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: startResp.GetRunId(), Identity: defaultIdentity, @@ -9560,8 +9068,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { // Verify activity is paused require.Eventually(t, func() bool { - desc, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ - Namespace: s.Namespace().String(), + desc, err := env.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: startResp.GetRunId(), }) @@ -9570,8 +9078,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { }, 5*time.Second, 200*time.Millisecond) // Verify attempt count is >= 2 (failed at least once before pause) - desc, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ - Namespace: s.Namespace().String(), + desc, err := env.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: startResp.GetRunId(), }) @@ -9579,8 +9087,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { require.Greater(t, desc.GetInfo().GetAttempt(), int32(1)) // Reset with keepPaused=true — activity should remain paused but attempt reset to 1 - _, err = s.FrontendClient().ResetActivityExecution(ctx, &workflowservice.ResetActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().ResetActivityExecution(ctx, &workflowservice.ResetActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: startResp.GetRunId(), KeepPaused: true, @@ -9589,8 +9097,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { // Verify still paused with attempt=1 require.Eventually(t, func() bool { - desc, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ - Namespace: s.Namespace().String(), + desc, err := env.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: startResp.GetRunId(), }) @@ -9600,8 +9108,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { }, 2*time.Second, 200*time.Millisecond) // Unpause the activity - _, err = s.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().UnpauseActivityExecution(ctx, &workflowservice.UnpauseActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: startResp.GetRunId(), Identity: defaultIdentity, @@ -9609,8 +9117,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { require.NoError(t, err) // Poll — should be attempt 1 after unpause - pollResp2, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ - Namespace: s.Namespace().String(), + pollResp2, err := env.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: env.Namespace().String(), TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, Identity: defaultIdentity, }) @@ -9618,8 +9126,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { require.EqualValues(t, 1, pollResp2.Attempt) // Complete - _, err = s.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ + Namespace: env.Namespace().String(), TaskToken: pollResp2.TaskToken, Result: defaultResult, Identity: defaultIdentity, @@ -9647,8 +9155,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { failRetryable(ctx, t, pollResp1.TaskToken, 60*time.Second) require.Eventually(t, func() bool { - desc, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ - Namespace: s.Namespace().String(), + desc, err := env.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: startResp.GetRunId(), }) @@ -9657,8 +9165,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { // Update MaximumAttempts to a different value. updatedMaxAttempts := int32(100) - _, err := s.FrontendClient().UpdateActivityExecutionOptions(ctx, &workflowservice.UpdateActivityExecutionOptionsRequest{ - Namespace: s.Namespace().String(), + _, err := env.FrontendClient().UpdateActivityExecutionOptions(ctx, &workflowservice.UpdateActivityExecutionOptionsRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: startResp.GetRunId(), ActivityOptions: &activitypb.ActivityOptions{RetryPolicy: &commonpb.RetryPolicy{MaximumAttempts: updatedMaxAttempts}}, @@ -9666,8 +9174,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { }) require.NoError(t, err) - desc, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ - Namespace: s.Namespace().String(), + desc, err := env.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: startResp.GetRunId(), }) @@ -9675,8 +9183,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { require.Equal(t, updatedMaxAttempts, desc.GetInfo().GetRetryPolicy().GetMaximumAttempts(), "update should be applied before reset") // Reset with RestoreOriginalOptions=true — options should revert and attempt reset to 1. - _, err = s.FrontendClient().ResetActivityExecution(ctx, &workflowservice.ResetActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().ResetActivityExecution(ctx, &workflowservice.ResetActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: startResp.GetRunId(), RestoreOriginalOptions: true, @@ -9684,8 +9192,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { require.NoError(t, err) // Verify original options are reflected in describe after reset. - desc, err = s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ - Namespace: s.Namespace().String(), + desc, err = env.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: startResp.GetRunId(), }) @@ -9694,8 +9202,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { require.Equal(t, originalMaxAttempts, desc.GetInfo().GetRetryPolicy().GetMaximumAttempts(), "original MaximumAttempts should be restored") // Poll — should be attempt 1. - pollResp2, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ - Namespace: s.Namespace().String(), + pollResp2, err := env.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: env.Namespace().String(), TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, Identity: defaultIdentity, }) @@ -9703,8 +9211,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { require.EqualValues(t, 1, pollResp2.Attempt, "attempt should be reset to 1") // Complete the activity. - _, err = s.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ + Namespace: env.Namespace().String(), TaskToken: pollResp2.TaskToken, Result: defaultResult, Identity: defaultIdentity, @@ -9733,24 +9241,24 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { waitForState(ctx, t, activityID, startResp.GetRunId(), enumspb.PENDING_ACTIVITY_STATE_PAUSED) // Reset with keepPaused=false — should clear PauseState and dispatch at attempt 1. - _, err := s.FrontendClient().ResetActivityExecution(ctx, &workflowservice.ResetActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err := env.FrontendClient().ResetActivityExecution(ctx, &workflowservice.ResetActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: startResp.GetRunId(), KeepPaused: false, }) require.NoError(t, err) - pollResp2, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ - Namespace: s.Namespace().String(), + pollResp2, err := env.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: env.Namespace().String(), TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, Identity: defaultIdentity, }) require.NoError(t, err) require.EqualValues(t, 1, pollResp2.Attempt, "attempt should be reset to 1") - _, err = s.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ + Namespace: env.Namespace().String(), TaskToken: pollResp2.TaskToken, Result: defaultResult, Identity: defaultIdentity, @@ -9776,8 +9284,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { waitForState(ctx, t, activityID, startResp.GetRunId(), enumspb.PENDING_ACTIVITY_STATE_PAUSE_REQUESTED) // Reset with keepPaused=false — deferred; must also clear PauseState so dispatch isn't blocked. - _, err := s.FrontendClient().ResetActivityExecution(ctx, &workflowservice.ResetActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err := env.FrontendClient().ResetActivityExecution(ctx, &workflowservice.ResetActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: startResp.GetRunId(), KeepPaused: false, @@ -9788,16 +9296,16 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { failRetryable(ctx, t, pollResp1.TaskToken, 0) // Activity should dispatch (not be stuck paused) at attempt 1. - pollResp2, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ - Namespace: s.Namespace().String(), + pollResp2, err := env.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: env.Namespace().String(), TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, Identity: defaultIdentity, }) require.NoError(t, err) require.EqualValues(t, 1, pollResp2.Attempt, "attempt should be reset to 1 after deferred reset") - _, err = s.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ + Namespace: env.Namespace().String(), TaskToken: pollResp2.TaskToken, Result: defaultResult, Identity: defaultIdentity, @@ -9823,8 +9331,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { waitForState(ctx, t, activityID, startResp.GetRunId(), enumspb.PENDING_ACTIVITY_STATE_PAUSE_REQUESTED) // Reset with keepPaused=true — deferred; PauseState should be preserved. - _, err := s.FrontendClient().ResetActivityExecution(ctx, &workflowservice.ResetActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err := env.FrontendClient().ResetActivityExecution(ctx, &workflowservice.ResetActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: startResp.GetRunId(), KeepPaused: true, @@ -9836,8 +9344,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { // Activity should be PAUSED at attempt 1 (deferred reset + preserved pause). require.Eventually(t, func() bool { - desc, err := s.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ - Namespace: s.Namespace().String(), + desc, err := env.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: startResp.GetRunId(), }) @@ -9849,16 +9357,16 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { // Unpause and verify dispatch at attempt 1. unpauseActivity(ctx, t, activityID, startResp.GetRunId()) - pollResp2, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ - Namespace: s.Namespace().String(), + pollResp2, err := env.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: env.Namespace().String(), TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, Identity: defaultIdentity, }) require.NoError(t, err) require.EqualValues(t, 1, pollResp2.Attempt) - _, err = s.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ + Namespace: env.Namespace().String(), TaskToken: pollResp2.TaskToken, Result: defaultResult, Identity: defaultIdentity, @@ -9885,8 +9393,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { jitter := 3 * time.Second resetStart := time.Now() - _, err := s.FrontendClient().ResetActivityExecution(ctx, &workflowservice.ResetActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err := env.FrontendClient().ResetActivityExecution(ctx, &workflowservice.ResetActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: startResp.GetRunId(), Jitter: durationpb.New(jitter), @@ -9894,8 +9402,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { require.NoError(t, err) // Activity should dispatch within [now, now+jitter+buffer]. - pollResp2, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ - Namespace: s.Namespace().String(), + pollResp2, err := env.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: env.Namespace().String(), TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, Identity: defaultIdentity, }) @@ -9904,8 +9412,8 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { require.WithinDuration(t, resetStart.Add(jitter), time.Now(), jitter+5*time.Second, "activity should dispatch within jitter window") - _, err = s.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().RespondActivityTaskCompleted(ctx, &workflowservice.RespondActivityTaskCompletedRequest{ + Namespace: env.Namespace().String(), TaskToken: pollResp2.TaskToken, Result: defaultResult, Identity: defaultIdentity, From b0e4d52c6b7a3c39898c813023ff69a1e5369d0e Mon Sep 17 00:00:00 2001 From: Sean Kane Date: Mon, 11 May 2026 10:41:13 -0600 Subject: [PATCH 18/25] remove stubs --- service/frontend/workflow_handler.go | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/service/frontend/workflow_handler.go b/service/frontend/workflow_handler.go index 114655ccf32..57d2371791f 100644 --- a/service/frontend/workflow_handler.go +++ b/service/frontend/workflow_handler.go @@ -7392,18 +7392,3 @@ func (wh *WorkflowHandler) UnpauseWorkflowExecution(ctx context.Context, request return &workflowservice.UnpauseWorkflowExecutionResponse{}, nil } -func (wh *WorkflowHandler) PauseActivityExecution(context.Context, *workflowservice.PauseActivityExecutionRequest) (*workflowservice.PauseActivityExecutionResponse, error) { - return nil, serviceerror.NewUnimplemented("PauseActivityExecution not implemented") -} - -func (wh *WorkflowHandler) ResetActivityExecution(context.Context, *workflowservice.ResetActivityExecutionRequest) (*workflowservice.ResetActivityExecutionResponse, error) { - return nil, serviceerror.NewUnimplemented("ResetActivityExecution not implemented") -} - -func (wh *WorkflowHandler) UnpauseActivityExecution(context.Context, *workflowservice.UnpauseActivityExecutionRequest) (*workflowservice.UnpauseActivityExecutionResponse, error) { - return nil, serviceerror.NewUnimplemented("UnpauseActivityExecution not implemented") -} - -func (wh *WorkflowHandler) UpdateActivityExecutionOptions(context.Context, *workflowservice.UpdateActivityExecutionOptionsRequest) (*workflowservice.UpdateActivityExecutionOptionsResponse, error) { - return nil, serviceerror.NewUnimplemented("UpdateActivityExecutionOptions not implemented") -} From a4df9b3aea1ef706d43ad383b404e1d98ea52501 Mon Sep 17 00:00:00 2001 From: Sean Kane Date: Mon, 11 May 2026 10:50:03 -0600 Subject: [PATCH 19/25] linter --- service/frontend/workflow_handler.go | 1 - 1 file changed, 1 deletion(-) diff --git a/service/frontend/workflow_handler.go b/service/frontend/workflow_handler.go index 57d2371791f..05e5b06cd20 100644 --- a/service/frontend/workflow_handler.go +++ b/service/frontend/workflow_handler.go @@ -7391,4 +7391,3 @@ func (wh *WorkflowHandler) UnpauseWorkflowExecution(ctx context.Context, request return &workflowservice.UnpauseWorkflowExecutionResponse{}, nil } - From 48f8606a00ffdc5952b8797fe5415765e91cefe7 Mon Sep 17 00:00:00 2001 From: Sean Kane Date: Tue, 12 May 2026 11:09:17 -0600 Subject: [PATCH 20/25] return nil, remove TODO --- chasm/lib/activity/activity.go | 2 +- chasm/lib/activity/proto/v1/activity_state.proto | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/chasm/lib/activity/activity.go b/chasm/lib/activity/activity.go index 6ceafe3091c..0d7f4b64b5c 100644 --- a/chasm/lib/activity/activity.go +++ b/chasm/lib/activity/activity.go @@ -799,7 +799,7 @@ func (a *Activity) handlePauseRequested(ctx chasm.MutableContext, req *activityp if newReqID != "" && existingReqID == newReqID { return &activitypb.PauseActivityExecutionResponse{}, nil } - return &activitypb.PauseActivityExecutionResponse{}, serviceerror.NewFailedPrecondition("activity is already paused") + return nil, serviceerror.NewFailedPrecondition("activity is already paused") } metricsHandler, err := a.enrichMetricsHandler(ctx, metrics.ActivityPausedScope) diff --git a/chasm/lib/activity/proto/v1/activity_state.proto b/chasm/lib/activity/proto/v1/activity_state.proto index 6c92b9dbfb7..40a3401b042 100644 --- a/chasm/lib/activity/proto/v1/activity_state.proto +++ b/chasm/lib/activity/proto/v1/activity_state.proto @@ -183,7 +183,6 @@ message ActivityAttemptState { // previous attempt or pre-update state are discarded. // Note: ScheduleToCloseTimeoutTask uses a separate ActivityState.schedule_to_close_stamp because // it spans the full activity lifetime and must not be invalidated on retry. - // TODO: also invalidate on pause and reset when those are supported. int32 stamp = 6; string last_worker_identity = 7; From 6c78ee732ebca480e5dc1e4a3ad5124a6a9bcbd4 Mon Sep 17 00:00:00 2001 From: Sean Kane Date: Tue, 12 May 2026 11:30:03 -0600 Subject: [PATCH 21/25] regen --- chasm/lib/activity/gen/activitypb/v1/activity_state.pb.go | 1 - 1 file changed, 1 deletion(-) diff --git a/chasm/lib/activity/gen/activitypb/v1/activity_state.pb.go b/chasm/lib/activity/gen/activitypb/v1/activity_state.pb.go index 649a011776a..0a93abdde58 100644 --- a/chasm/lib/activity/gen/activitypb/v1/activity_state.pb.go +++ b/chasm/lib/activity/gen/activitypb/v1/activity_state.pb.go @@ -582,7 +582,6 @@ type ActivityAttemptState struct { // previous attempt or pre-update state are discarded. // Note: ScheduleToCloseTimeoutTask uses a separate ActivityState.schedule_to_close_stamp because // it spans the full activity lifetime and must not be invalidated on retry. - // TODO: also invalidate on pause and reset when those are supported. Stamp int32 `protobuf:"varint,6,opt,name=stamp,proto3" json:"stamp,omitempty"` LastWorkerIdentity string `protobuf:"bytes,7,opt,name=last_worker_identity,json=lastWorkerIdentity,proto3" json:"last_worker_identity,omitempty"` // The Worker Deployment Version this activity was dispatched to most recently. From 3afd8d686dfcc372b25ca700e6cfd3bf98d4d4a3 Mon Sep 17 00:00:00 2001 From: Sean Kane Date: Tue, 12 May 2026 11:38:29 -0600 Subject: [PATCH 22/25] migrate test files --- tests/activity_api_pause_test.go | 10 +++++----- tests/activity_api_update_test.go | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/activity_api_pause_test.go b/tests/activity_api_pause_test.go index 334aae004e9..f6a8d39ffec 100644 --- a/tests/activity_api_pause_test.go +++ b/tests/activity_api_pause_test.go @@ -127,7 +127,7 @@ func TestActivityApiPauseClientTestSuite(t *testing.T) { activityFunction := func() (string, error) { startedActivityCount.Add(1) if startedActivityCount.Load() == 1 { - s.WaitForChannel(ctx, activityPausedCn) + s.WaitForChannel(activityPausedCn) return "", activityErr } return "done!", nil @@ -254,7 +254,7 @@ func TestActivityApiPauseClientTestSuite(t *testing.T) { activityFunction := func() (string, error) { startedActivityCount.Add(1) if startedActivityCount.Load() == 1 { - s.WaitForChannel(ctx, activityPausedCn) + s.WaitForChannel(activityPausedCn) return "", activityErr } if shouldSucceed.Load() { @@ -550,7 +550,7 @@ func TestActivityApiPauseClientTestSuite(t *testing.T) { activityErr := errors.New("bad-luck-please-retry") return "", activityErr } - s.WaitForChannel(ctx, activityCompleteCn) + s.WaitForChannel(activityCompleteCn) return "done!", nil } @@ -648,7 +648,7 @@ func TestActivityApiPauseClientTestSuite(t *testing.T) { activityFunction := func() (string, error) { startedActivityCount.Add(1) if startedActivityCount.Load() == 1 { - s.WaitForChannel(ctx, activityPausedCn) + s.WaitForChannel(activityPausedCn) return "", activityErr } return "done!", nil @@ -839,7 +839,7 @@ func TestActivityApiPauseClientTestSuite(t *testing.T) { if !activityWasReset.Load() { return "", errors.New("bad-luck-please-retry") } - s.WaitForChannel(ctx, activityCompleteCh) + s.WaitForChannel(activityCompleteCh) return "done!", nil } diff --git a/tests/activity_api_update_test.go b/tests/activity_api_update_test.go index 75b45093e16..e21c6eede43 100644 --- a/tests/activity_api_update_test.go +++ b/tests/activity_api_update_test.go @@ -127,7 +127,7 @@ func TestActivityApiUpdateClientTestSuite(t *testing.T) { return "", activityErr } - s.WaitForChannel(ctx, activityUpdated) + s.WaitForChannel(activityUpdated) return "done!", nil } @@ -357,7 +357,7 @@ func TestActivityApiUpdateClientTestSuite(t *testing.T) { return "", activityErr } - s.WaitForChannel(ctx, activityUpdated) + s.WaitForChannel(activityUpdated) return "done!", nil } @@ -478,7 +478,7 @@ func TestActivityUpdateExecutionOptionsApi(t *testing.T) { if startedActivityCount.Load() == 1 { return "", errors.New("bad-luck-please-retry") } - s.WaitForChannel(ctx, activityUpdated) + s.WaitForChannel(activityUpdated) return "done!", nil } @@ -670,7 +670,7 @@ func TestActivityUpdateExecutionOptionsApi(t *testing.T) { if startedActivityCount.Load() == 1 { return "", errors.New("bad-luck-please-retry") } - s.WaitForChannel(ctx, activityUpdated) + s.WaitForChannel(activityUpdated) return "done!", nil } From c351cfe693314ca2273cc53c5edfdbd33675e99e Mon Sep 17 00:00:00 2001 From: Sean Kane Date: Tue, 12 May 2026 13:24:00 -0600 Subject: [PATCH 23/25] validator test bug missing context --- chasm/lib/activity/validator_test.go | 2 +- tests/standalone_activity_test.go | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/chasm/lib/activity/validator_test.go b/chasm/lib/activity/validator_test.go index 4fc87806069..1cd5ce73440 100644 --- a/chasm/lib/activity/validator_test.go +++ b/chasm/lib/activity/validator_test.go @@ -440,7 +440,7 @@ func TestRequestIDGeneratedWhenMissing(t *testing.T) { StartToCloseTimeout: durationpb.New(10 * time.Second), RetryPolicy: &commonpb.RetryPolicy{}, } - _, err := h.validateAndPopulateStartRequest(req, namespace.ID(defaultNamespaceID)) + _, err := h.validateAndPopulateStartRequest(t.Context(), req, namespace.ID(defaultNamespaceID)) require.NoError(t, err) require.NotEmpty(t, req.GetRequestId(), "server must generate a request ID when client omits it") require.NoError(t, validateUUID(req.GetRequestId()), "generated request ID must be a valid UUID") diff --git a/tests/standalone_activity_test.go b/tests/standalone_activity_test.go index b377aee623c..65833eb0832 100644 --- a/tests/standalone_activity_test.go +++ b/tests/standalone_activity_test.go @@ -7056,6 +7056,9 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { // Either the poll times out (deadline exceeded) or returns an empty response. if err == nil { require.Empty(t, pollResp.GetActivityId(), "expected no task to be dispatched while paused") + } else { + var deadlineErr *serviceerror.DeadlineExceeded + require.ErrorAs(t, err, &deadlineErr) } }) From 72d05827c0f1681f6a2c6f7aa7e74fab4cea3308 Mon Sep 17 00:00:00 2001 From: Sean Kane Date: Tue, 12 May 2026 14:52:52 -0600 Subject: [PATCH 24/25] fix flakiness --- tests/standalone_activity_test.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/standalone_activity_test.go b/tests/standalone_activity_test.go index 65833eb0832..b377aee623c 100644 --- a/tests/standalone_activity_test.go +++ b/tests/standalone_activity_test.go @@ -7056,9 +7056,6 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { // Either the poll times out (deadline exceeded) or returns an empty response. if err == nil { require.Empty(t, pollResp.GetActivityId(), "expected no task to be dispatched while paused") - } else { - var deadlineErr *serviceerror.DeadlineExceeded - require.ErrorAs(t, err, &deadlineErr) } }) From e291b23318580f8748c2553608ea2626eb5d7255 Mon Sep 17 00:00:00 2001 From: Sean Kane Date: Thu, 14 May 2026 14:23:04 -0600 Subject: [PATCH 25/25] heartbeating and some comments --- chasm/lib/activity/activity.go | 2 +- chasm/lib/activity/activity_test.go | 112 ++++++++++++++++++++++++++++ tests/standalone_activity_test.go | 21 +++++- 3 files changed, 132 insertions(+), 3 deletions(-) diff --git a/chasm/lib/activity/activity.go b/chasm/lib/activity/activity.go index 0d7f4b64b5c..55c67f95e6b 100644 --- a/chasm/lib/activity/activity.go +++ b/chasm/lib/activity/activity.go @@ -1188,7 +1188,7 @@ func (a *Activity) RecordHeartbeat( return &historyservice.RecordActivityTaskHeartbeatResponse{ CancelRequested: a.Status == activitypb.ACTIVITY_EXECUTION_STATUS_CANCEL_REQUESTED, ActivityPaused: a.PauseState != nil, - // ActivityReset is intentionally not reported via heartbeat; reset takes effect on the next retry. + ActivityReset: a.ActivityReset, }, nil } diff --git a/chasm/lib/activity/activity_test.go b/chasm/lib/activity/activity_test.go index b614c3eba7a..06888df8d87 100644 --- a/chasm/lib/activity/activity_test.go +++ b/chasm/lib/activity/activity_test.go @@ -8,7 +8,10 @@ import ( "github.com/stretchr/testify/require" commonpb "go.temporal.io/api/common/v1" taskqueuepb "go.temporal.io/api/taskqueue/v1" + "go.temporal.io/api/workflowservice/v1" "go.temporal.io/server/api/historyservice/v1" + persistencespb "go.temporal.io/server/api/persistence/v1" + tokenspb "go.temporal.io/server/api/token/v1" "go.temporal.io/server/chasm" "go.temporal.io/server/chasm/lib/activity/gen/activitypb/v1" "go.temporal.io/server/common/dynamicconfig" @@ -250,6 +253,115 @@ func TestActivityTerminate(t *testing.T) { } } +func TestRecordHeartbeatPauseResetCancelFlags(t *testing.T) { + testTime := time.Date(2000, 1, 1, 0, 0, 0, 0, time.UTC) + const ( + namespaceID = "test-namespace-id" + activityID = "test-activity-id" + runID = "test-run-id" + attempt = int32(1) + ) + + componentRef, err := (&persistencespb.ChasmComponentRef{ + NamespaceId: namespaceID, + BusinessId: activityID, + RunId: runID, + }).Marshal() + require.NoError(t, err) + + testCases := []struct { + name string + status activitypb.ActivityExecutionStatus + pauseState *activitypb.ActivityPauseState + activityReset bool + wantPaused bool + wantReset bool + wantCancel bool + }{ + { + name: "no pause or reset returns zero flags", + status: activitypb.ACTIVITY_EXECUTION_STATUS_STARTED, + }, + { + // Regression guard: reset must propagate to the next heartbeat response + // immediately so the worker can abort the in-flight attempt; previously + // reset was withheld until the next retry. + name: "reset set propagates ActivityReset on next heartbeat", + status: activitypb.ACTIVITY_EXECUTION_STATUS_STARTED, + activityReset: true, + wantReset: true, + }, + { + name: "pause set propagates ActivityPaused", + status: activitypb.ACTIVITY_EXECUTION_STATUS_STARTED, + pauseState: &activitypb.ActivityPauseState{PauseTime: timestamppb.New(testTime)}, + wantPaused: true, + }, + { + name: "pause and reset both propagate", + status: activitypb.ACTIVITY_EXECUTION_STATUS_STARTED, + pauseState: &activitypb.ActivityPauseState{PauseTime: timestamppb.New(testTime)}, + activityReset: true, + wantPaused: true, + wantReset: true, + }, + { + name: "cancel requested coexists with reset", + status: activitypb.ACTIVITY_EXECUTION_STATUS_CANCEL_REQUESTED, + activityReset: true, + wantCancel: true, + wantReset: true, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + ctx := &chasm.MockMutableContext{ + MockContext: chasm.MockContext{ + HandleNow: func(chasm.Component) time.Time { return testTime }, + HandleExecutionKey: func() chasm.ExecutionKey { + return chasm.ExecutionKey{ + NamespaceID: namespaceID, + BusinessID: activityID, + RunID: runID, + } + }, + }, + } + + act := &Activity{ + ActivityState: &activitypb.ActivityState{ + Status: tc.status, + HeartbeatTimeout: durationpb.New(0), + PauseState: tc.pauseState, + ActivityReset: tc.activityReset, + }, + LastAttempt: chasm.NewDataField(ctx, &activitypb.ActivityAttemptState{Count: attempt}), + } + + token := &tokenspb.Task{ + NamespaceId: namespaceID, + Attempt: attempt, + ComponentRef: componentRef, + } + req := &historyservice.RecordActivityTaskHeartbeatRequest{ + NamespaceId: namespaceID, + HeartbeatRequest: &workflowservice.RecordActivityTaskHeartbeatRequest{}, + } + + resp, err := act.RecordHeartbeat(ctx, WithToken[*historyservice.RecordActivityTaskHeartbeatRequest]{ + Token: token, + Request: req, + }) + + require.NoError(t, err) + require.Equal(t, tc.wantPaused, resp.ActivityPaused, "ActivityPaused") + require.Equal(t, tc.wantReset, resp.ActivityReset, "ActivityReset") + require.Equal(t, tc.wantCancel, resp.CancelRequested, "CancelRequested") + }) + } +} + func TestContextMetadata(t *testing.T) { t.Run("returns activity type and task queue", func(t *testing.T) { ctx := &chasm.MockMutableContext{} diff --git a/tests/standalone_activity_test.go b/tests/standalone_activity_test.go index b377aee623c..288e31153d3 100644 --- a/tests/standalone_activity_test.go +++ b/tests/standalone_activity_test.go @@ -7046,7 +7046,7 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { // Attempt to poll — the dispatch task was invalidated by the stamp bump, so no task should // be available. Use a short-lived context to avoid blocking the test. - shortCtx, shortCancel := context.WithTimeout(ctx, 2*time.Second) + shortCtx, shortCancel := context.WithTimeout(ctx, 5*time.Second) defer shortCancel() pollResp, err := env.FrontendClient().PollActivityTaskQueue(shortCtx, &workflowservice.PollActivityTaskQueueRequest{ Namespace: env.Namespace().String(), @@ -7102,9 +7102,26 @@ func (s *standaloneActivityTestSuite) TestPauseActivityExecution() { _, err := env.FrontendClient().PauseActivityExecution(ctx, pauseReq) require.NoError(t, err) + resp, err := env.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: env.Namespace().String(), + ActivityId: activityID, + RunId: runID, + }) + require.NoError(t, err) + require.Equal(t, enumspb.PENDING_ACTIVITY_STATE_PAUSED, resp.GetInfo().GetRunState()) + // Second pause with the same request ID should succeed (idempotent no-op). + // Note there is no way to _, err = env.FrontendClient().PauseActivityExecution(ctx, pauseReq) require.NoError(t, err) + + resp, err = env.FrontendClient().DescribeActivityExecution(ctx, &workflowservice.DescribeActivityExecutionRequest{ + Namespace: env.Namespace().String(), + ActivityId: activityID, + RunId: runID, + }) + require.NoError(t, err) + require.Equal(t, enumspb.PENDING_ACTIVITY_STATE_PAUSED, resp.GetInfo().GetRunState()) }) t.Run("PauseNotFound", func(t *testing.T) { @@ -8733,7 +8750,7 @@ func (s *standaloneActivityTestSuite) TestResetActivityExecution() { require.NoError(t, err) }) - t.Run("WhileCancelRequested", func(t *testing.T) { + t.Run("WhileCancelRequestedDoesNotFail", func(t *testing.T) { // Reset while the activity is in CANCEL_REQUESTED state. // handleReset sets the ActivityReset flag (same deferred path as STARTED). // NOTE: TransitionRescheduled currently only allows STARTED as a source state, so a