Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 15 additions & 11 deletions nvflare/app_common/executors/launcher_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,21 +168,25 @@ def handle_event(self, event_type: str, fl_ctx: FLContext) -> None:
def execute(self, task_name: str, shareable: Shareable, fl_ctx: FLContext, abort_signal: Signal) -> Shareable:
self.log_info(fl_ctx, f"execute for task ({task_name})")

if not self._initialize_external_execution(task_name, shareable, fl_ctx, abort_signal):
return make_reply(ReturnCode.EXECUTION_EXCEPTION)
self._executing.set()
try:
if not self._initialize_external_execution(task_name, shareable, fl_ctx, abort_signal):
return make_reply(ReturnCode.EXECUTION_EXCEPTION)

result = super().execute(task_name, shareable, fl_ctx, abort_signal)
result = super().execute(task_name, shareable, fl_ctx, abort_signal)

if result.get_return_code() != ReturnCode.OK:
abort_signal.trigger("execution exception in TaskExchanger")
self._execute_launcher_method_in_thread_executor(
method_name="stop_task", task_name=task_name, fl_ctx=fl_ctx, abort_signal=abort_signal
)
return make_reply(ReturnCode.EXECUTION_EXCEPTION)
if result.get_return_code() != ReturnCode.OK:
abort_signal.trigger("execution exception in TaskExchanger")
self._execute_launcher_method_in_thread_executor(
method_name="stop_task", task_name=task_name, fl_ctx=fl_ctx, abort_signal=abort_signal
)
return make_reply(ReturnCode.EXECUTION_EXCEPTION)

self._finalize_external_execution(task_name, shareable, fl_ctx, abort_signal)
self._finalize_external_execution(task_name, shareable, fl_ctx, abort_signal)

return result
return result
finally:
self._executing.clear()

def check_input_shareable(self, task_name: str, shareable: Shareable, fl_ctx: FLContext) -> bool:
supported_tasks = [self._train_task_name, self._evaluate_task_name, self._submit_model_task_name]
Expand Down
22 changes: 22 additions & 0 deletions nvflare/app_common/executors/task_exchanger.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import threading
import time
from typing import Optional

Expand Down Expand Up @@ -98,6 +99,8 @@ def __init__(
self.pipe_channel_name = pipe_channel_name
self.pipe = None
self.pipe_handler = None
self._executing = threading.Event()
self._executing_lock = threading.Lock()

def handle_event(self, event_type: str, fl_ctx: FLContext):
if event_type == EventType.START_RUN:
Expand All @@ -108,6 +111,14 @@ def handle_event(self, event_type: str, fl_ctx: FLContext):
return
self.pipe.open(self.pipe_channel_name)
elif event_type == EventType.BEFORE_TASK_EXECUTION:
with self._executing_lock:
if self._executing.is_set():
skip = True
else:
skip = False
if skip:
self.log_info(fl_ctx, "skipping pipe handler reset: execute() is in progress")
return
if self.pipe_handler:
self.pipe_handler.stop(close_pipe=False)
self._create_pipe_handler()
Expand Down Expand Up @@ -158,6 +169,17 @@ def execute(self, task_name: str, shareable: Shareable, fl_ctx: FLContext, abort
TaskExchanger generic and can be reused for any applications (e.g. Shareable based, DXO based, or any custom
data based).
"""
with self._executing_lock:
acquired = not self._executing.is_set()
if acquired:
self._executing.set()
try:
return self._do_execute(task_name, shareable, fl_ctx, abort_signal)
finally:
if acquired:
self._executing.clear()

def _do_execute(self, task_name: str, shareable: Shareable, fl_ctx: FLContext, abort_signal: Signal) -> Shareable:
if not self.check_input_shareable(task_name, shareable, fl_ctx):
self.log_error(fl_ctx, "bad input task shareable")
return make_reply(ReturnCode.BAD_TASK_DATA)
Expand Down
Loading
Loading