Skip to content
This repository was archived by the owner on Jun 6, 2024. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 20 additions & 36 deletions contrib/aks-engine/azure.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,38 +111,22 @@ def generate_aks_engine_script(aks_engine_cfg, working_dir, script_dir):


def generate_openpai_configuration(k8s_info, aks_engine_cfg, working_dir, script_dir):
generate_template_file(
"{0}/templates/generate-key-and-cert.sh.j2".format(script_dir),
"{0}/generate-key-and-cert.sh".format(working_dir),
{
"cfg": aks_engine_cfg,
"k8s": k8s_info
}
)
generate_template_file(
"{0}/templates/layout.yaml.j2".format(script_dir),
"{0}/layout.yaml".format(working_dir),
{
"cfg": aks_engine_cfg,
"k8s": k8s_info
}
)
generate_template_file(
"{0}/templates/services-configuration.yaml.j2".format(script_dir),
"{0}/services-configuration.yaml".format(working_dir),
{
"cfg": aks_engine_cfg,
"k8s": k8s_info
}
)
generate_template_file(
"{0}/templates/start-openpai.sh.j2".format(script_dir),
"{0}/start-openpai.sh".format(working_dir),
{
"cfg": aks_engine_cfg,
"k8s": k8s_info
}
)
for src, dst in [
("{}/templates/generate-key-and-cert.sh.j2", "{}/generate-key-and-cert.sh"),
("{}/templates/layout.yaml.j2", "{}/layout.yaml"),
("{}/templates/services-configuration.yaml.j2", "{}/services-configuration.yaml"),
("{}/templates/ca-resource.yaml", "{}/ca-resource.yaml"),
("{}/templates/hived-config-adapter.yaml.j2", "{}/hived-config-adapter.yaml"),
("{}/templates/start-openpai.sh.j2", "{}/start-openpai.sh"),
]:
generate_template_file(
src.format(script_dir),
dst.format(working_dir),
{
"cfg": aks_engine_cfg,
"k8s": k8s_info,
},
)


def pod_is_ready_or_not(label_key, label_value, service_name, kubeconfig):
Expand Down Expand Up @@ -227,8 +211,8 @@ def get_k8s_cluster_info(working_dir, dns_prefix, location):
gpu_resource = int(parse_quantity(node.status.allocatable['nvidia.com/gpu']))
if master_string in node.metadata.name:
master[node.metadata.name] = {
"cpu-resource": int(parse_quantity(node.status.allocatable['cpu'])),
"mem-resource": int(parse_quantity(node.status.allocatable['memory']) / 1024 / 1024 ),
"cpu-resource": int(parse_quantity(node.status.allocatable['cpu'])) - 2,
"mem-resource": int(parse_quantity(node.status.allocatable['memory']) / 1024 / 1024 ) - 8 * 1024,
"gpu-resource": gpu_resource,
}
master[node.metadata.name]["hostname"] = node.metadata.name
Expand All @@ -244,8 +228,8 @@ def get_k8s_cluster_info(working_dir, dns_prefix, location):
master_ip_internal = address.address
elif worker_string in node.metadata.name:
worker[node.metadata.name] = {
"cpu-resource": int(parse_quantity(node.status.allocatable['cpu'])),
"mem-resource": int(parse_quantity(node.status.allocatable['memory']) / 1024 / 1024 ),
"cpu-resource": int(parse_quantity(node.status.allocatable['cpu'])) - 2,
"mem-resource": int(parse_quantity(node.status.allocatable['memory']) / 1024 / 1024 ) - 8 * 1024,
"gpu-resource": gpu_resource,
}
if sku is None:
Expand Down
1 change: 0 additions & 1 deletion contrib/aks-engine/templates/aks-engine.sh.j2
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

aks-engine deploy --subscription-id {{cfg["subscription_id"]}} \
--dns-prefix {{cfg["dns_prefix"]}} \
--resource-group {{cfg["resource_group_name"]}} \
--location {{cfg["location"]}} \
--api-model {{working_dir}}/k8s.json \
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,39 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

apiVersion: v1
kind: ServiceAccount
metadata:
name: ca-resource-account
namespace: default
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: ca-resource-role
rules:
- apiGroups: [""]
resources: ["nodes"]
verbs: ["patch"]
- apiGroups: [""]
resources: ["nodes/status"]
verbs: ["patch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: ca-resource-role-binding
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: ca-resource-role
subjects:
- kind: ServiceAccount
name: ca-resource-account
namespace: default

---

apiVersion: apps/v1
kind: DaemonSet
metadata:
Expand Down
80 changes: 80 additions & 0 deletions contrib/aks-engine/templates/hived-config-adapter.yaml.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

apiVersion: v1
kind: ServiceAccount
metadata:
name: hived-config-adapter-account
namespace: default
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: hived-config-adapter-role
rules:
- apiGroups: [""]
resources: ["nodes"]
verbs: ["list", "watch"]
- apiGroups: [""]
resources: ["configmaps"]
verbs: ["patch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: hived-config-adapter-binding
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: hived-config-adapter-role
subjects:
- kind: ServiceAccount
name: hived-config-adapter-account
namespace: default

---

apiVersion: apps/v1
kind: DaemonSet
metadata:
name: hived-config-adapter-ds
spec:
selector:
matchLabels:
app: hived-config-adapter
template:
metadata:
name: hived-config-adapter
labels:
app: hived-config-adapter
spec:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: pai-master
operator: In
values:
- "true"
hostNetwork: false
hostPID: false
containers:
- name: app
image: openpai/hived-config-adapter
imagePullPolicy: Always
command: ["python"]
args: [
"./adapter.py",
"--max-nodes={{cfg['openpai_worker_vmss']['max_vm_count']}}",
"--node-name-prefix=k8s-opworker",
"--hived-config-file=/hivedscheduler-config/hivedscheduler.yaml"
]
volumeMounts:
- name: hivedscheduler-config
mountPath: /hivedscheduler-config
volumes:
- name: hivedscheduler-config
configMap:
name: hivedscheduler-config
serviceAccountName: hived-config-adapter-account
Loading