Skip to content
This repository was archived by the owner on Jun 6, 2024. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@

class service_management_delete:

def __init__(self, kube_config_path=None, service_list=None, **kwargs):
def __init__(self, kube_config_path=None, service_list=None, skip_service_list=None, **kwargs):
self.logger = logging.getLogger(__name__)

self.cluster_object_model = service_management_configuration.get_cluster_object_model_from_k8s(kube_config_path)
Expand All @@ -42,6 +42,9 @@ def __init__(self, kube_config_path=None, service_list=None, **kwargs):
if "cluster-type" in self.cluster_object_model["cluster"]["common"]:
self.cluster_type = self.cluster_object_model["cluster"]["common"]["cluster-type"]
self.service_list = service_management_configuration.get_service_list(self.cluster_type)
if skip_service_list is not None:
self.logger.info("Skipping service list {0}".format(skip_service_list))
self.service_list = list(set(self.service_list) - set(skip_service_list))
else:
self.service_list = service_list
self.logger.info("Get the service-list to manage : {0}".format(str(self.service_list)))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@

class service_management_refresh:

def __init__(self, kube_config_path=None, service_list=None, **kwargs):
def __init__(self, kube_config_path=None, service_list=None, skip_service_list=None, **kwargs):
self.logger = logging.getLogger(__name__)

self.cluster_object_model = service_management_configuration.get_cluster_object_model_from_k8s(kube_config_path)
Expand All @@ -41,6 +41,9 @@ def __init__(self, kube_config_path=None, service_list=None, **kwargs):
if "cluster-type" in self.cluster_object_model["cluster"]["common"]:
self.cluster_type = self.cluster_object_model["cluster"]["common"]["cluster-type"]
self.service_list = service_management_configuration.get_service_list(self.cluster_type)
if skip_service_list is not None:
self.logger.info("Skipping service list {0}".format(skip_service_list))
self.service_list = list(set(self.service_list) - set(skip_service_list))
else:
self.service_list = service_list
self.logger.info("Get the service-list to manage : {0}".format(str(self.service_list)))
Expand Down
5 changes: 4 additions & 1 deletion deployment/paiLibrary/paiService/service_management_start.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@

class serivce_management_start:

def __init__(self, kube_config_path=None, service_list=None, **kwargs):
def __init__(self, kube_config_path=None, service_list=None, skip_service_list=None, **kwargs):
self.logger = logging.getLogger(__name__)

self.cluster_object_model = service_management_configuration.get_cluster_object_model_from_k8s(kube_config_path)
Expand All @@ -42,6 +42,9 @@ def __init__(self, kube_config_path=None, service_list=None, **kwargs):
if "cluster-type" in self.cluster_object_model["cluster"]["common"]:
self.cluster_type = self.cluster_object_model["cluster"]["common"]["cluster-type"]
self.service_list = service_management_configuration.get_service_list(self.cluster_type)
if skip_service_list is not None:
self.logger.info("Skipping service list {0}".format(skip_service_list))
self.service_list = list(set(self.service_list) - set(skip_service_list))
else:
self.service_list = service_list
if self.cluster_type == 'yarn':
Expand Down
5 changes: 4 additions & 1 deletion deployment/paiLibrary/paiService/service_management_stop.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@

class service_management_stop:

def __init__(self, kube_config_path=None, service_list=None, **kwargs):
def __init__(self, kube_config_path=None, service_list=None, skip_service_list=None, **kwargs):
self.logger = logging.getLogger(__name__)

self.cluster_object_model = service_management_configuration.get_cluster_object_model_from_k8s(kube_config_path)
Expand All @@ -41,6 +41,9 @@ def __init__(self, kube_config_path=None, service_list=None, **kwargs):
if "cluster-type" in self.cluster_object_model["cluster"]["common"]:
self.cluster_type = self.cluster_object_model["cluster"]["common"]["cluster-type"]
self.service_list = service_management_configuration.get_service_list(self.cluster_type)
if skip_service_list is not None:
self.logger.info("Skipping service list {0}".format(skip_service_list))
self.service_list = list(set(self.service_list) - set(skip_service_list))
else:
self.service_list = service_list
self.logger.info("Get the service-list to manage : {0}".format(str(self.service_list)))
Expand Down
23 changes: 14 additions & 9 deletions deployment/serviceCmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@


import os
import sys
import readline
import logging
import logging.config
Expand Down Expand Up @@ -52,6 +53,7 @@ def register(self, parser):
def add_arguments(parser):
parser.add_argument("-c", "--kube-config-path", dest="kube_config_path", default="~/.kube/config", help="The path to KUBE_CONFIG file. Default value: ~/.kube/config")
parser.add_argument("-n", "--service-list", nargs='+', dest="service_list", default=None, help="Service list to manage")
parser.add_argument("-k", "--skip-service-list", nargs='+', dest="skip_service_list", default=None, help="Service list to skip")

add_arguments(start_parser)
add_arguments(stop_parser)
Expand All @@ -61,22 +63,25 @@ def add_arguments(parser):
def process_args(self, args):
if args.kube_config_path is not None:
args.kube_config_path = os.path.expanduser(args.kube_config_path)
return args.service_list

if args.service_list is not None and args.skip_service_list is not None:
logger.error('--service-list and --skip-service-list are mutually exclusive')
sys.exit(1)

def service_start(self, args):
service_list = self.process_args(args)
self.process_args(args)

service_management_starter = service_management_start.serivce_management_start(args.kube_config_path, service_list)
service_management_starter = service_management_start.serivce_management_start(args.kube_config_path, args.service_list, args.skip_service_list)
service_management_starter.run()

def service_stop(self, args):
service_list = self.process_args(args)
self.process_args(args)

service_management_stopper = service_management_stop.service_management_stop(args.kube_config_path, service_list)
service_management_stopper = service_management_stop.service_management_stop(args.kube_config_path, args.service_list, args.skip_service_list)
service_management_stopper.run()

def service_delete(self, args):
service_list = self.process_args(args)
self.process_args(args)

logger.warning("--------------------------------------------------------")
logger.warning("--------------------------------------------------------")
Expand Down Expand Up @@ -110,11 +115,11 @@ def service_delete(self, args):
logger.warning("3 Times......... Sorry, we will force stopping your operation.")
return

service_management_deleter = service_management_delete.service_management_delete(args.kube_config_path, service_list)
service_management_deleter = service_management_delete.service_management_delete(args.kube_config_path, args.service_list, args.skip_service_list)
service_management_deleter.run()

def service_refresh(self, args):
service_list = self.process_args(args)
self.process_args(args)

service_management_refresher = service_management_refresh.service_management_refresh(args.kube_config_path, service_list)
service_management_refresher = service_management_refresh.service_management_refresh(args.kube_config_path, args.service_list, args.skip_service_list)
service_management_refresher.run()
10 changes: 9 additions & 1 deletion docs/manual/cluster-admin/upgrade-guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@ First, launch a dev box container of current PAI version, stop all services by:

The command will ask you for the cluster id for confirmation. If you forget it, another command `./paictl.py config get-id` will help you.

Your current running jobs are not expected to be affected by stopping PAI services.
If you don't want to affect current running jobs, please don't stop `storage-manager`, `dshuttle-master`, `dshuttle-worker` & `dshuttle-csi` by using the following command:
```bash
./paictl.py service stop --skip-service-list storage-manager dshuttle-master dshuttle-worker dshuttle-csi
```

Use `exit` to leave the dev box container. And remove it by:

Expand Down Expand Up @@ -64,4 +67,9 @@ Start all PAI services by:
./paictl.py service start
```

If you didn't stop `storage-manager`, `dshuttle-master`, `dshuttle-worker` & `dshuttle-csi`, start other services by:
```bash
./paictl.py service start --skip-service-list storage-manager dshuttle-master dshuttle-worker dshuttle-csi
```

After all services is started, your OpenPAI cluster is successfully upgraded.