From f39f6403af4f3f670479488a97406ea197a08acf Mon Sep 17 00:00:00 2001 From: Matt Crees Date: Thu, 31 Aug 2023 08:10:24 +0000 Subject: [PATCH 01/11] Update Rocky containerfiles to fix releaseserver --- containerfiles/rocky-8.8 | 2 +- containerfiles/rocky-8.8-ofed-23.04 | 2 +- containerfiles/rocky-9.2 | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/containerfiles/rocky-8.8 b/containerfiles/rocky-8.8 index fadc877..bd2439c 100644 --- a/containerfiles/rocky-8.8 +++ b/containerfiles/rocky-8.8 @@ -10,7 +10,7 @@ ARG rocky_version=8.8 RUN systemd-machine-id-setup # Pin to an older version of Rocky Linux by setting a DNF package variable -RUN echo "8.8" > /etc/dnf/vars/releasever && dnf update -y --refresh +# RUN echo "8.8" > /etc/dnf/vars/releasever && dnf update -y --refresh # Install/remove packages from https://git.rockylinux.org/rocky/kickstarts/-/blob/r8/Rocky-8-GenericCloud.ks RUN dnf install -y @core --allowerasing diff --git a/containerfiles/rocky-8.8-ofed-23.04 b/containerfiles/rocky-8.8-ofed-23.04 index e1ed88a..2077f7b 100644 --- a/containerfiles/rocky-8.8-ofed-23.04 +++ b/containerfiles/rocky-8.8-ofed-23.04 @@ -8,7 +8,7 @@ FROM docker.io/library/rockylinux:8.8 RUN systemd-machine-id-setup # Pin to an older version of Rocky Linux by setting a DNF package variable -RUN echo "8.8" > /etc/dnf/vars/releasever && dnf update -y --refresh +# RUN echo "8.8" > /etc/dnf/vars/releasever && dnf update -y --refresh # Install/remove packages from https://git.rockylinux.org/rocky/kickstarts/-/blob/r8/Rocky-8-GenericCloud.ks RUN dnf install -y @core --allowerasing diff --git a/containerfiles/rocky-9.2 b/containerfiles/rocky-9.2 index b81356b..f1f33a1 100644 --- a/containerfiles/rocky-9.2 +++ b/containerfiles/rocky-9.2 @@ -10,7 +10,7 @@ ARG rocky_version=9.2 #RUN systemd-machine-id-setup # Pin to an older version of Rocky Linux by setting a DNF package variable -RUN echo "9.2" > /etc/dnf/vars/releasever && dnf update -y --refresh +# RUN echo "9.2" > /etc/dnf/vars/releasever && dnf update -y --refresh # Install/remove packages from https://git.rockylinux.org/rocky/kickstarts/-/blob/r8/Rocky-8-GenericCloud.ks RUN dnf install -y @core --allowerasing From d7ec36b4c5d133363d228d06d4a87bc9136a9ba1 Mon Sep 17 00:00:00 2001 From: Matt Crees Date: Thu, 31 Aug 2023 08:12:29 +0000 Subject: [PATCH 02/11] Define stackhpc project --- etc/openstack-config/openstack-config.yml | 84 ++++++++++++++++++++++- 1 file changed, 83 insertions(+), 1 deletion(-) diff --git a/etc/openstack-config/openstack-config.yml b/etc/openstack-config/openstack-config.yml index fd88313..303c766 100644 --- a/etc/openstack-config/openstack-config.yml +++ b/etc/openstack-config/openstack-config.yml @@ -4,7 +4,89 @@ # List of OpenStack projects. Format is as required by the stackhpc.os-projects # role. -#openstack_projects: +openstack_projects: + - "{{ openstack_project_stackhpc }}" +# Definition of the openstack demo project. Format is as required by the +# stackhpc.os-projects role. +openstack_project_stackhpc: + name: "stackhpc" + description: "StackHPC demo project" + project_domain: "default" + user_domain: "default" + users: "{{ openstack_stackhpc_users }}" + quotas: "{{ openstack_project_quotas }}" + +# StackHPC project users and roles +openstack_stackhpc_users: + - name: "stackhpc-dmehmood" + description: "Dawud Mehmood (StackHPC)" + email: "dawud@stackhpc.com" + password: "placeholder" + roles: "{{ openstack_user_roles }}" + - name: "stackhpc-mcrees" + description: "Matt Crees (StackHPC)" + email: "mattc@stackhpc.com" + password: "placeholder" + roles: "{{ openstack_user_roles }}" + - name: "stackhpc-stelfer" + description: "Stig Telfer (StackHPC)" + email: "stig@stackhpc.com" + password: "placeholder" + roles: "{{ openstack_user_roles }}" + - name: "stackhpc-sdavidson" + description: "Scott Davidson (StackHPC)" + email: "scott@stackhpc.com" + password: "placeholder" + roles: "{{ openstack_user_roles }}" + +# List of roles to apply to regular users in the openstack demo project. +openstack_user_roles: + - member + - heat_stack_owner + # This allows a user read and write access to octavia APIs. + # https://docs.openstack.org/octavia/latest/configuration/policy.html + - load-balancer_member + # This allows a user read access to Barbican secrets. + # https://docs.openstack.org/barbican/latest/admin/access_control.html + - observer + +# Dict of quotas to set for projects with basic resource quotas +openstack_project_quotas: + backup_gigabytes: -1 + backups: -1 + cores: 250 + fixed_ips: 10 + floatingip: 10 + gigabytes: 10000 + injected_file_size: -1 + injected_files: -1 + instances: 20 + key_pairs: 10 + per_volume_gigabytes: 500 + ram: 1000000 + security_group: 10 + security_group_rule: 100 + snapshots: -1 + volumes: 50 + +# Dict of quotas to set for projects with unlimited resource quotas +openstack_unlimited_quotas: + backup_gigabytes: -1 + backups: -1 + cores: -1 + fixed_ips: -1 + floatingip: -1 + gigabytes: -1 + injected_file_size: -1 + injected_files: -1 + instances: -1 + key_pairs: -1 + per_volume_gigabytes: -1 + ram: -1 + security_group: -1 + security_group_rule: -1 + snapshots: -1 + volumes: -1 ############################################################################### # Configuration of networks, subnets and routers. From 44e703c50d7cc24ecbf5a6d3c41380d90ac9d817 Mon Sep 17 00:00:00 2001 From: Matt Crees Date: Thu, 31 Aug 2023 08:14:17 +0000 Subject: [PATCH 03/11] Initial stackhpc networks --- etc/openstack-config/openstack-config.yml | 93 ++++++++++++++++++++++- 1 file changed, 91 insertions(+), 2 deletions(-) diff --git a/etc/openstack-config/openstack-config.yml b/etc/openstack-config/openstack-config.yml index 303c766..285cfa2 100644 --- a/etc/openstack-config/openstack-config.yml +++ b/etc/openstack-config/openstack-config.yml @@ -93,15 +93,104 @@ openstack_unlimited_quotas: # List of networks in the openstack system. Format is as required by the # stackhpc.os-networks role. +openstack_networks: + - "{{ openstack_network_external_internet }}" + - "{{ openstack_network_external_ceph }}" + - "{{ openstack_network_stackhpc }}" + - "{{ openstack_network_stackhpc_vlan }}" + + +# +# External/Internet network +# Actually still a private subnet range but intended for outward-facing +# networking. +# +openstack_network_external_internet_name: "external" + +openstack_network_external_internet: + name: "{{ openstack_network_external_internet_name }}" + project: "admin" + provider_network_type: "vlan" + provider_physical_network: "physnet1" + provider_segmentation_id: 2803 + shared: false + external: true + # Subnet configuration. + subnets: + - "{{ openstack_subnet_external_internet }}" + +openstack_subnet_external_internet: + name: "{{ openstack_network_external_internet_name }}" + project: "admin" + cidr: "10.129.30.0/23" + gateway_ip: "10.129.31.254" + allocation_pool_start: "10.129.30.20" + allocation_pool_end: "10.129.31.240" + #openstack_networks: # List of routers in the openstack project. Format is as required by the # stackhpc.os-networks role. -#openstack_routers: +openstack_routers: + - "{{ openstack_router_stackhpc }}" # List of security groups in the openstack project. # Format is as required by the stackhpc.os-networks role. -#openstack_security_groups: +################################################################################ +# Networks for stackhpc +# +openstack_network_stackhpc: + name: "{{ openstack_project_stackhpc.name }}" + project: "{{ openstack_project_stackhpc.name }}" + shared: false + external: false + # Subnet configuration. + subnets: + - "{{ openstack_subnet_stackhpc }}" + +openstack_subnet_stackhpc: + name: "{{ openstack_project_stackhpc.name }}" + project: "{{ openstack_project_stackhpc.name }}" + cidr: "192.168.0.0/24" + gateway_ip: "192.168.0.1" + allocation_pool_start: "192.168.0.10" + allocation_pool_end: "192.168.0.250" + +openstack_network_stackhpc_vlan: + name: "{{ openstack_project_stackhpc.name }}-vlan" + project: "{{ openstack_project_stackhpc.name }}" + shared: false + external: false + provider_network_type: "vlan" + provider_physical_network: "physnet2" + # This may be required for RDMA traffic + port_security_enabled: false + mtu: 9000 + # Subnet configuration. + subnets: + - "{{ openstack_subnet_stackhpc_vlan }}" + +openstack_subnet_stackhpc_vlan: + name: "{{ openstack_project_stackhpc.name }}-vlan" + project: "{{ openstack_project_stackhpc.name }}" + cidr: "192.168.1.0/24" + gateway_ip: "192.168.1.1" + allocation_pool_start: "192.168.1.10" + allocation_pool_end: "192.168.1.250" + +openstack_router_stackhpc: + name: "{{ openstack_project_stackhpc.name }}" + project: "{{ openstack_project_stackhpc.name }}" + interfaces: + - "{{ openstack_network_stackhpc.name }}" + - "{{ openstack_network_stackhpc_vlan.name }}" + network: "{{ openstack_network_external_internet.name }}" + +openstack_secgroup_stackhpc: + name: default + project: "{{ openstack_project_stackhpc.name }}" + rules: "{{ openstack_secgroup_rules_default }}" + ############################################################################### # Configuration of nova flavors. From 0453c744074bd1faefbb94f782434ca1f5705cec Mon Sep 17 00:00:00 2001 From: Matt Crees Date: Thu, 31 Aug 2023 08:15:25 +0000 Subject: [PATCH 04/11] Add HPC flavors --- etc/openstack-config/openstack-config.yml | 143 +++++++++++++++++++++- 1 file changed, 142 insertions(+), 1 deletion(-) diff --git a/etc/openstack-config/openstack-config.yml b/etc/openstack-config/openstack-config.yml index 285cfa2..4b92ed9 100644 --- a/etc/openstack-config/openstack-config.yml +++ b/etc/openstack-config/openstack-config.yml @@ -197,7 +197,148 @@ openstack_secgroup_stackhpc: # List of nova flavors in the openstack project. Format is as required by the # stackhpc.os-flavors role. -#openstack_flavors: +openstack_flavors: +# - "{{ openstack_flavor_godzilla }}" + - "{{ hpc_v1_8cpu }}" + - "{{ hpc_v1_16cpu }}" + - "{{ hpc_v1_32cpu }}" + - "{{ hpc_v1_48cpu }}" + - "{{ hpc_v1_64cpu }}" + - "{{ hpc_v1_80cpu }}" + - "{{ hpc_v1_96cpu }}" + +# openstack_flavor_godzilla: +# name: "godzilla" +# ram: 393216 +# disk: 50 +# vcpus: 96 +# is_public: true +# extra_specs: +# hw:cpu_policy: "dedicated" +# hw:cpu_thread_policy: "prefer" +# hw:cpu_threads: 2 +# hw:mem_page_size: "1GB" +# hw:cpu_sockets: 2 +# hw:numa_nodes: 8 +# hw:pci_numa_affinity_policy: preferred +# hw_rng:allowed: "True" + +# HPC v1: +# Core-pinned VCPUs +# 4GB RAM per VCPU +# Spread across NUMA regions (but in thread sibling pairs) +# +hpc_v1_8cpu: + name: "hpc.v1.8cpu" + ram: 32768 + disk: 30 + vcpus: 8 + is_public: false + extra_specs: + hw:cpu_policy: "dedicated" + hw:cpu_thread_policy: "prefer" + hw:cpu_threads: 2 + hw:mem_page_size: "1GB" + hw:cpu_sockets: 2 + hw:numa_nodes: 8 + hw:pci_numa_affinity_policy: preferred + hw_rng:allowed: "True" + +hpc_v1_16cpu: + name: "hpc.v1.16cpu" + ram: 65536 + disk: 30 + vcpus: 16 + is_public: false + extra_specs: + hw:cpu_policy: "dedicated" + hw:cpu_thread_policy: "prefer" + hw:cpu_threads: 2 + hw:mem_page_size: "1GB" + hw:cpu_sockets: 2 + hw:numa_nodes: 8 + hw:pci_numa_affinity_policy: preferred + hw_rng:allowed: "True" + +hpc_v1_32cpu: + name: "hpc.v1.32cpu" + ram: 131072 + disk: 30 + vcpus: 32 + is_public: false + extra_specs: + hw:cpu_policy: "dedicated" + hw:cpu_thread_policy: "prefer" + hw:cpu_threads: 2 + hw:mem_page_size: "1GB" + hw:cpu_sockets: 2 + hw:numa_nodes: 8 + hw:pci_numa_affinity_policy: preferred + hw_rng:allowed: "True" + +hpc_v1_48cpu: + name: "hpc.v1.48cpu" + ram: 196608 + disk: 30 + vcpus: 48 + is_public: false + extra_specs: + hw:cpu_policy: "dedicated" + hw:cpu_thread_policy: "prefer" + hw:cpu_threads: 2 + hw:mem_page_size: "1GB" + hw:cpu_sockets: 2 + hw:numa_nodes: 8 + hw:pci_numa_affinity_policy: preferred + hw_rng:allowed: "True" + +hpc_v1_64cpu: + name: "hpc.v1.64cpu" + ram: 262144 + disk: 30 + vcpus: 64 + is_public: false + extra_specs: + hw:cpu_policy: "dedicated" + hw:cpu_thread_policy: "prefer" + hw:cpu_threads: 2 + hw:mem_page_size: "1GB" + hw:cpu_sockets: 2 + hw:numa_nodes: 8 + hw:pci_numa_affinity_policy: preferred + hw_rng:allowed: "True" + +hpc_v1_80cpu: + name: "hpc.v1.80cpu" + ram: 327680 + disk: 30 + vcpus: 80 + is_public: false + extra_specs: + hw:cpu_policy: "dedicated" + hw:cpu_thread_policy: "prefer" + hw:cpu_threads: 2 + hw:mem_page_size: "1GB" + hw:cpu_sockets: 2 + hw:numa_nodes: 8 + hw:pci_numa_affinity_policy: preferred + hw_rng:allowed: "True" + +hpc_v1_96cpu: + name: "hpc.v1.96cpu" + ram: 393216 + disk: 30 + vcpus: 96 + is_public: false + extra_specs: + hw:cpu_policy: "dedicated" + hw:cpu_thread_policy: "prefer" + hw:cpu_threads: 2 + hw:mem_page_size: "1GB" + hw:cpu_sockets: 2 + hw:numa_nodes: 8 + hw:pci_numa_affinity_policy: preferred + hw_rng:allowed: "True" ############################################################################### # Configuration of nova host aggregates. From c3d211d674da753c2755a50c28b7be9e7660539b Mon Sep 17 00:00:00 2001 From: Matt Crees Date: Thu, 31 Aug 2023 08:16:30 +0000 Subject: [PATCH 05/11] Initial images + OFED images --- etc/openstack-config/openstack-config.yml | 281 +++++++++++++++++++++- 1 file changed, 280 insertions(+), 1 deletion(-) diff --git a/etc/openstack-config/openstack-config.yml b/etc/openstack-config/openstack-config.yml index 4b92ed9..f44fa75 100644 --- a/etc/openstack-config/openstack-config.yml +++ b/etc/openstack-config/openstack-config.yml @@ -351,7 +351,286 @@ hpc_v1_96cpu: # Configuration of Glance software images. # List of Glance images. Format is as required by the stackhpc.os-images role. -#openstack_images: +# List of additional host packages. +os_images_package_dependencies_extra: + # debootstrap is required to build ubuntu-minimal images. + - debootstrap + +# Drop cloud-init and stable-interface-names from default elements. +os_images_common: enable-serial-console + +# Set this to true to force rebuilding images. +os_images_force_rebuild: false + +# List of Glance images. Format is as required by the stackhpc.os-images role. +openstack_images: + # - "{{ openstack_image_rocky88 }}" + # - "{{ openstack_image_rocky88_ofed2304 }}" + # - "{{ openstack_image_rocky88_ofed2304_dev }}" + # - "{{ openstack_image_rocky92 }}" + - "{{ openstack_image_ubuntu_jammy }}" + +# Rocky Linux 8.8 built using a custom containerfile +openstack_image_rocky88: + name: "Rocky-8.8" + type: "raw" + elements: + - "rocky-container" + - "cloud-init" + - "cloud-init-growpart" + - "epel" + - "selinux-permissive" + - "dhcp-all-interfaces" + - "vm" + - "grub2" + - "openssh-server" + - "block-device-efi" + - "dracut-regenerate" + is_public: True + packages: + - "gdisk" + - "efibootmgr" + - "efivar" + - "bash-completion" + - "git" + - "linux-firmware" + - "logrotate" + - "lshw" + - "man-db" + - "net-tools" + - "nmon" + - "pciutils" + - "tmux" + - "vim-enhanced" + - "NetworkManager-initscripts-updown" + - "dracut" + - "dracut-network" + env: + DIB_CONTAINERFILE_NETWORK_DRIVER: host + DIB_CONTAINERFILE_RUNTIME: docker + DIB_CONTAINERFILE_DOCKERFILE: "{{ playbook_dir }}/../containerfiles/rocky-8.8" + DIB_CLOUD_INIT_GROWPART_DEVICES: + - / + YUM: dnf + DIB_RELEASE: "8.8" + DIB_DRACUT_ENABLED_MODULES: + - name: lvm + packages: + - lvm2 + - name: kernel-modules + - name: kernel-network-modules + properties: + os_type: "linux" + os_distro: "rocky" + os_version: "8.8" + hw_vif_multiqueue_enabled: true + hw_scsi_model: "virtio-scsi" + hw_disk_bus: "scsi" + +# Rocky 8.8 built with Mellanox OFED 23.04 +openstack_image_rocky88_ofed2304: + name: "Rocky-8.8-OFED-23.04" + type: "raw" + elements: + - "rocky-container" + - "cloud-init" + - "cloud-init-growpart" + - "epel" + - "selinux-permissive" + - "dhcp-all-interfaces" + - "vm" + - "grub2" + - "openssh-server" + - "block-device-efi" + - "dracut-regenerate" + is_public: False + packages: + - "gdisk" + - "efibootmgr" + - "efivar" + - "bash-completion" + - "git" + - "linux-firmware" + - "logrotate" + - "lshw" + - "man-db" + - "net-tools" + - "nmon" + - "pciutils" + - "tmux" + - "vim-enhanced" + - "NetworkManager-initscripts-updown" + - "dracut" + - "dracut-network" + env: + DIB_CONTAINERFILE_NETWORK_DRIVER: host + DIB_CONTAINERFILE_RUNTIME: docker + DIB_CONTAINERFILE_DOCKERFILE: "{{ playbook_dir }}/../containerfiles/rocky-8.8-ofed-23.04" + DIB_CLOUD_INIT_GROWPART_DEVICES: + - / + YUM: dnf + DIB_RELEASE: "8.8" + DIB_DRACUT_ENABLED_MODULES: + - name: lvm + packages: + - lvm2 + - name: kernel-modules + - name: kernel-network-modules + properties: + os_type: "linux" + os_distro: "rocky" + os_version: "8.8" + hw_vif_multiqueue_enabled: true + hw_scsi_model: "virtio-scsi" + hw_disk_bus: "scsi" + +# Rocky 8.8 built with Mellanox OFED 23.04 + devuser +openstack_image_rocky88_ofed2304_dev: + name: "Rocky-8.8-OFED-23.04-dev" + type: "raw" + elements: + - "rocky-container" + - "cloud-init" + - "cloud-init-growpart" + - "epel" + - "selinux-permissive" + - "dhcp-all-interfaces" + - "vm" + - "grub2" + - "openssh-server" + - "block-device-efi" + - "dracut-regenerate" + - "devuser" + is_public: False + packages: + - "gdisk" + - "efibootmgr" + - "efivar" + - "bash-completion" + - "git" + - "linux-firmware" + - "logrotate" + - "lshw" + - "man-db" + - "net-tools" + - "nmon" + - "pciutils" + - "tmux" + - "vim-enhanced" + - "NetworkManager-initscripts-updown" + - "dracut" + - "dracut-network" + env: + DIB_DEV_USER_USERNAME: "devuser" + DIB_DEV_USER_PASSWORD: !vault | + $ANSIBLE_VAULT;1.1;AES256 + 36346461356139626536656561353063623964356566303264383031653034633566386161333738 + 3831343166353132363332623232376463306138323038330a663231663833393132653965316534 + 36393032656138303632336231383437313532653335393038626132646635353664396233393037 + 3537396337623037370a393739396535316539663061623363376330636466333635646233306664 + 64623262396365373437356235346630613732666537623464663862653463333362 + DIB_DEV_USER_PWDLESS_SUDO: "yes" + DIB_CONTAINERFILE_NETWORK_DRIVER: host + DIB_CONTAINERFILE_RUNTIME: docker + DIB_CONTAINERFILE_DOCKERFILE: "{{ playbook_dir }}/../containerfiles/rocky-8.8-ofed-23.04" + DIB_CLOUD_INIT_GROWPART_DEVICES: + - / + YUM: dnf + DIB_RELEASE: "8.8" + DIB_DRACUT_ENABLED_MODULES: + - name: lvm + packages: + - lvm2 + - name: kernel-modules + - name: kernel-network-modules + properties: + os_type: "linux" + os_distro: "rocky" + os_version: "8.8" + hw_vif_multiqueue_enabled: true + hw_scsi_model: "virtio-scsi" + hw_disk_bus: "scsi" + +# Rocky Linux 9.2 built from custom containerfile +openstack_image_rocky92: + name: "Rocky-9.2" + type: "raw" + elements: + - "rocky-container" + - "cloud-init" + - "cloud-init-growpart" + - "selinux-permissive" + - "vm" + - "grub2" + - "openssh-server" + is_public: True + packages: + - "git" + - "tmux" + - "vim-enhanced" + env: + DIB_CONTAINERFILE_NETWORK_DRIVER: host + DIB_CONTAINERFILE_RUNTIME: docker + DIB_CONTAINERFILE_DOCKERFILE: "{{ playbook_dir }}/../containerfiles/rocky-9.2" + YUM: dnf + DIB_CLOUD_INIT_GROWPART_DEVICES: + - "/" + DIB_RELEASE: "9.2" + properties: + os_type: "linux" + os_distro: "rocky" + os_version: "9.2" + hw_vif_multiqueue_enabled: true + hw_scsi_model: "virtio-scsi" + hw_disk_bus: "scsi" + +# Ubuntu Jammy 22.04 +openstack_image_ubuntu_jammy: + name: "Ubuntu-22.04" + type: "raw" + is_public: True + elements: + - "cloud-init" + - "grub2" + - "openssh-server" + - "ubuntu-minimal" + - "vm" + - "dhcp-all-interfaces" + packages: + - "bash-completion" + - "git" + - "less" + - "logrotate" + - "lshw" + - "man-db" + - "net-tools" + - "nmon" + - "pciutils" + - "tmux" + - "iputils-ping" + - "netbase" + - "apt-utils" + - "curl" + - "debootstrap" + - "vim" + properties: + os_type: "linux" + os_distro: "ubuntu" + os_version: "jammy" + hw_rng_model: "virtio" + hw_vif_multiqueue_enabled: true + hw_scsi_model: "virtio-scsi" + hw_disk_bus: "scsi" + env: + DIB_RELEASE: "jammy" + + +openstack_image_git_elements: + - repo: "https://github.com/stackhpc/stackhpc-image-elements" + local: "{{ playbook_dir }}/stackhpc-image-elements" + version: master + elements_path: elements + # List of Diskimage Builder (DIB) elements paths to include in image builds. #openstack_image_elements: From ab1b35a033e8604799a376d72fa44b9c19d5a003 Mon Sep 17 00:00:00 2001 From: Matt Crees Date: Thu, 31 Aug 2023 08:17:38 +0000 Subject: [PATCH 06/11] vGPU resources --- etc/openstack-config/openstack-config.yml | 134 ++++++++++++++++++++++ 1 file changed, 134 insertions(+) diff --git a/etc/openstack-config/openstack-config.yml b/etc/openstack-config/openstack-config.yml index f44fa75..0bce5ec 100644 --- a/etc/openstack-config/openstack-config.yml +++ b/etc/openstack-config/openstack-config.yml @@ -136,6 +136,49 @@ openstack_routers: # List of security groups in the openstack project. # Format is as required by the stackhpc.os-networks role. +openstack_security_groups: +# - "{{ openstack_secgroup_stackhpc }}" + - "{{ secgroup_nvidia_dls }}" + +# Default security group rule settings for a project +openstack_secgroup_rules_default: + # Allow ICMP (for ping, etc.). + - ethertype: IPv4 + protocol: icmp + # Allow SSH. + - ethertype: IPv4 + protocol: tcp + port_range_min: 22 + port_range_max: 22 + +secgroup_rules_nvidia_dls: + # Allow ICMP (for ping, etc.). + - ethertype: IPv4 + protocol: icmp + # Allow SSH. + - ethertype: IPv4 + protocol: tcp + port_range_min: 22 + port_range_max: 22 + # https://docs.nvidia.com/license-system/latest/nvidia-license-system-user-guide/index.html + - ethertype: IPv4 + protocol: tcp + port_range_min: 443 + port_range_max: 443 + - ethertype: IPv4 + protocol: tcp + port_range_min: 80 + port_range_max: 80 + - ethertype: IPv4 + protocol: tcp + port_range_min: 7070 + port_range_max: 7070 + +secgroup_nvidia_dls: + name: nvidia-dls + project: "{{ project_cloud_services.name }}" + rules: "{{ secgroup_rules_nvidia_dls }}" + ################################################################################ # Networks for stackhpc # @@ -199,6 +242,9 @@ openstack_secgroup_stackhpc: # stackhpc.os-flavors role. openstack_flavors: # - "{{ openstack_flavor_godzilla }}" + # - "{{ vgpu_a100d_2g_20gb }}" + # - "{{ vgpu_a100d_1g_10gb }}" + # - "{{ vgpu_a100d_40c }}" - "{{ hpc_v1_8cpu }}" - "{{ hpc_v1_16cpu }}" - "{{ hpc_v1_32cpu }}" @@ -340,6 +386,52 @@ hpc_v1_96cpu: hw:pci_numa_affinity_policy: preferred hw_rng:allowed: "True" +vgpu_a100d_2g_20gb: + name: "vgpu.a100d.2g.20gb" + ram: 32768 + disk: 30 + vcpus: 8 + is_public: false + extra_specs: + hw:cpu_policy: "dedicated" + hw:cpu_thread_policy: "prefer" + hw:mem_page_size: "1GB" + hw:cpu_sockets: 2 + hw:numa_nodes: 4 + hw_rng:allowed: "True" + resources:CUSTOM_NVIDIA_700: "1" + +vgpu_a100d_1g_10gb: + name: "vgpu.a100d.1g.10gb" + ram: 16384 + disk: 30 + vcpus: 4 + is_public: false + extra_specs: + hw:cpu_policy: "dedicated" + hw:cpu_thread_policy: "prefer" + hw:mem_page_size: "1GB" + hw:cpu_sockets: 2 + hw:numa_nodes: 2 + hw_rng:allowed: "True" + resources:CUSTOM_NVIDIA_699: "1" + +vgpu_a100d_40c: + name: "vgpu.a100d.40c" + ram: 65536 + disk: 30 + vcpus: 16 + is_public: false + extra_specs: + hw:cpu_policy: "dedicated" + hw:cpu_thread_policy: "prefer" + hw:mem_page_size: "1GB" + hw:cpu_sockets: 2 + hw:numa_nodes: 8 + hw_rng:allowed: "True" + resources:CUSTOM_NVIDIA_697: "1" + + ############################################################################### # Configuration of nova host aggregates. @@ -369,6 +461,7 @@ openstack_images: # - "{{ openstack_image_rocky88_ofed2304_dev }}" # - "{{ openstack_image_rocky92 }}" - "{{ openstack_image_ubuntu_jammy }}" + # - "{{ image_rocky9_nvidia }}" # Rocky Linux 8.8 built using a custom containerfile openstack_image_rocky88: @@ -624,6 +717,47 @@ openstack_image_ubuntu_jammy: env: DIB_RELEASE: "jammy" +image_rocky9_nvidia: + name: "Rocky9-NVIDIA" + type: raw + elements: + - "rocky-container" + - "rpm" + - "nvidia-vgpu" + - "cloud-init" + - "epel" + - "cloud-init-growpart" + - "selinux-permissive" + - "dhcp-all-interfaces" + - "vm" + - "extra-repos" + - "grub2" + - "stable-interface-names" + - "openssh-server" + is_public: True + packages: + - "dkms" + - "git" + - "tmux" + - "cuda-minimal-build-12-1" + - "cuda-demo-suite-12-1" + - "cuda-libraries-12-1" + - "cuda-toolkit" + - "vim-enhanced" + env: + DIB_CONTAINERFILE_NETWORK_DRIVER: host + DIB_CONTAINERFILE_RUNTIME: docker + DIB_RPMS: "http://10.129.28.41:80/pulp/content/nvidia/nvidia-linux-grid-525-525.125.06-1.x86_64.rpm" + YUM: dnf + DIB_EXTRA_REPOS: "https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo" + DIB_NVIDIA_VGPU_CLIENT_TOKEN: "{{ lookup('file' , 'secrets/nvidia-client-token.tok') }}" + DIB_CLOUD_INIT_GROWPART_DEVICES: + - "/" + DIB_RELEASE: "9" + properties: + os_type: "linux" + os_distro: "rocky" + os_version: "9" openstack_image_git_elements: - repo: "https://github.com/stackhpc/stackhpc-image-elements" From 394141bffa1ef389efb92c20bf827e2f670cc289 Mon Sep 17 00:00:00 2001 From: Matt Crees Date: Thu, 31 Aug 2023 08:18:21 +0000 Subject: [PATCH 07/11] External Ceph network --- ansible/openstack-networks.yml | 1 + etc/openstack-config/openstack-config.yml | 50 ++++++++++++++++++++++- requirements.yml | 3 ++ 3 files changed, 53 insertions(+), 1 deletion(-) diff --git a/ansible/openstack-networks.yml b/ansible/openstack-networks.yml index 525712d..b3de6dc 100644 --- a/ansible/openstack-networks.yml +++ b/ansible/openstack-networks.yml @@ -12,3 +12,4 @@ os_networks: "{{ openstack_networks }}" os_networks_routers: "{{ openstack_routers }}" os_networks_security_groups: "{{ openstack_security_groups }}" + os_networks_rbac: "{{ openstack_networks_rbac }}" diff --git a/etc/openstack-config/openstack-config.yml b/etc/openstack-config/openstack-config.yml index 0bce5ec..9c56c60 100644 --- a/etc/openstack-config/openstack-config.yml +++ b/etc/openstack-config/openstack-config.yml @@ -99,6 +99,8 @@ openstack_networks: - "{{ openstack_network_stackhpc }}" - "{{ openstack_network_stackhpc_vlan }}" +openstack_networks_rbac: + - "{{ openstack_rbac_external_ceph }}" # # External/Internet network @@ -127,7 +129,53 @@ openstack_subnet_external_internet: allocation_pool_start: "10.129.30.20" allocation_pool_end: "10.129.31.240" -#openstack_networks: +# +# External/Ceph network +# +openstack_network_external_ceph_name: "external-ceph" + +# The External/Ceph network is owned by the admin project and shared +# via RBAC with projects that require direct access to Ceph storage +# (eg, for Manila CephFS native access). The network access control +# is set as "access as shared" for those tenant networks. +openstack_network_external_ceph: + name: "{{ openstack_network_external_ceph_name }}" + project: "admin" + provider_network_type: "vlan" + provider_physical_network: "physnet2" + provider_segmentation_id: 8 + shared: false + external: false + mtu: 9150 + # Subnet configuration. + subnets: + - "{{ openstack_subnet_external_ceph }}" + +# There is no route out from this network for VMs +# Reserve some space at the bottom of the network for Ceph IPs +openstack_subnet_external_ceph: + name: "{{ openstack_network_external_ceph_name }}" + project: "admin" + cidr: "10.0.0.0/20" + allocation_pool_start: "10.0.0.2" + allocation_pool_end: "10.0.15.250" + host_routes: + - destination: "10.129.27.0/25" + nexthop: "10.0.15.254" + +# The External/Ceph network is shared as an additional tenant +# VLAN for approved projects requiring high-speed direct access +# to the Ceph cluster. Those projects don't get to attach routers +# or make other changes to the external-ceph network, but can attach +# VM network ports to the shared network. VMs from different +# projects are on the same network but isolated from one another +# by security groups. +# Those projects are listed here. +openstack_rbac_external_ceph: + network: "{{ openstack_network_external_ceph_name }}" + access: "access_as_shared" + projects: + - "{{ openstack_project_stackhpc.name }}" # List of routers in the openstack project. Format is as required by the # stackhpc.os-networks role. diff --git a/requirements.yml b/requirements.yml index fda082e..31c9442 100644 --- a/requirements.yml +++ b/requirements.yml @@ -3,6 +3,9 @@ roles: - name: stackhpc.os-flavors - name: stackhpc.os-images - name: stackhpc.os-networks + source: git@github.com:stackhpc/ansible-role-os-networks.git + type: git + version: v1.5.7 - name: stackhpc.os-projects - name: stackhpc.os_host_aggregates - name: stackhpc.os-container-clusters From 9bc8d3f6ee7903d99ac311e0c8f71bac1826295c Mon Sep 17 00:00:00 2001 From: Matt Crees Date: Thu, 31 Aug 2023 08:18:34 +0000 Subject: [PATCH 08/11] Cloud services project --- etc/openstack-config/openstack-config.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/etc/openstack-config/openstack-config.yml b/etc/openstack-config/openstack-config.yml index 9c56c60..8ce21d5 100644 --- a/etc/openstack-config/openstack-config.yml +++ b/etc/openstack-config/openstack-config.yml @@ -6,6 +6,8 @@ # role. openstack_projects: - "{{ openstack_project_stackhpc }}" + # - "{{ project_cloud_services }}" + # Definition of the openstack demo project. Format is as required by the # stackhpc.os-projects role. openstack_project_stackhpc: @@ -39,6 +41,14 @@ openstack_stackhpc_users: password: "placeholder" roles: "{{ openstack_user_roles }}" +project_cloud_services: + name: "cloud-services" + description: "Internal Cloud services" + project_domain: default + user_domain: default + users: [] + quotas: "{{ openstack_project_quotas }}" + # List of roles to apply to regular users in the openstack demo project. openstack_user_roles: - member From 39bd2e613baa39c85219fd2de2f3c3494f41f500 Mon Sep 17 00:00:00 2001 From: Matt Crees Date: Thu, 31 Aug 2023 08:20:20 +0000 Subject: [PATCH 09/11] nvidia client token --- ansible/secrets/nvidia-client-token.tok | 140 ++++++++++++++++++++++++ 1 file changed, 140 insertions(+) create mode 100644 ansible/secrets/nvidia-client-token.tok diff --git a/ansible/secrets/nvidia-client-token.tok b/ansible/secrets/nvidia-client-token.tok new file mode 100644 index 0000000..cfda18b --- /dev/null +++ b/ansible/secrets/nvidia-client-token.tok @@ -0,0 +1,140 @@ +$ANSIBLE_VAULT;1.1;AES256 +37366431656638653236363833623031663536646163326536386265303135323237353665653638 +3634653939306336373662656461663832653866373563320a373336393064366363386263333764 +61313162303263383936373437653334306565373232613132343335356533656361386166326335 +3235636135376136360a356636626338613230316233336538616236303538313237653934613132 +65623064386539663866663432376430383565376231393939646435636264326463653033313038 +62333365396436303737626638333833353264343965376538346632623966363131646639646561 +32333161313464316533613230636131613336303863303131326631636163623631326166323438 +66636232376461303231343930303535303532663361663034336562366537353863666137363631 +63633133373639376338633234613766326433366361636637393638613362326434623337333939 +66316564316639383534643465623837633039333763613864336165363566323733343064663930 +38353965323763303934393465303731306332613935646636633233393531323033353031653063 +64643663653665343937363439373835383033373365633637306162356464663066323830633636 +38306265386562396137336431633331396137643338313061313061393061306238346634323138 +64306437343865386261373561656661333062323963326334663764613836346266393535386463 +61383031356665363233313637343533366539633734336365643834646438653265383862666336 +63353163623635386233356462636232316563303136383764616633336432316633376266326365 +61393564333532323836383564373537383435396464366536393130666236356536393634333436 +35643765396663373035306166376530663530366233373331646533333038376435653964636236 +63353531356663383234636238383239643462646366363439373036396238353132333537623237 +63346538333637373131613332383237663933643034653437323337636338653434646131386136 +65646538326466653733646666343266343737393138323038653161393163623433363632633633 +38633632323033396539366363316239636233656565653334336266383531393231386239643233 +63623939306366333131373739633434373733363864386438653938306562376131626665663433 +34623365313838353762393361386136303932653631363661336533343263636333393465636330 +33396239636232393535356233623735656665643530643963306236326235323631363362383033 +37383661343535303561363066396236353463613437643966393762346466373562343163653661 +61663437353932666239656263363739643135666138356634393335326336333739343733326133 +35373332663438626264306432643037323763386261396363343065343866393731623136346432 +61356331353662306633643563313964386637643938336234346636323834623562373431623831 +65336561653161663665303537656336323339336262303833383838376537623030343633626130 +39643838326634313934623139663064383634633338386261343931343464393733636630633033 +30383937666364663333396366346335313565373337636362306262343861323061366433303232 +65303030353934313931393466633432363761623837656332356333336561623039633938623066 +64343238373134313735636561613866313761376463333731336361613865663261656535346461 +61613534663566313236646364616231656664343138383236326265636235616136323130373864 +32613134663431613034383435326431306136656638653164633439313865346464383663663265 +38373230366361363866383330633836353535393039616231636662663437353962613863646634 +63356637366164313131333231393466383133363863306164613030393566366534613063653636 +38393562616330383136333932376330366562366238356266323063313030386432333966353236 +34663934343231643630396331306231616561363833383034353062656537353363376463383162 +34636538643233383137663733303638613961323735363636646630373337383636643837343534 +37643035356436656430333038663938646662363837346238303735353539333661346239386239 +30336565323166663564373034613430323931633562363337316533383066616532313163393439 +32656162343362633038356330366233353133303135613266663935653338346139333135613964 +33666438636166303265303937623738663439386630373735626433653738613334346261353131 +36323864633665306133643332396231366332666461356331306237373163613236643664366364 +33653433636262326232323136343636383362373638323530323466306161363338343364653735 +64393436313238616331333438396539386666663532396233396462363266323265383537343532 +35663838313065356630336361663761366532663934633564333436376537346637393238616535 +34333564646230353336633037373038393262646361396332353161376563333631663563643261 +62323633386431643766623361613866656232316232386132326538383635636538613639326635 +65316665313433303031313939623831333930363464323737373832386632363139346664653033 +36373638386435326236623866326636623531306364353737393634386662353136343033636664 +37616265343664396534656633626634313761373435366666636238653030626564616165303864 +36353964373562383832626230653139643934613938353432613335623966383364623233666532 +36366363336533353561363630376162643635653863623064643634303061313863376463626162 +32326138613432373132376366313163656339636261343133393233363033613335316262643963 +66646533346230383936383963636532636666663265343230316263626666366365353965346239 +65373964366432353233313232663765343765393963373163333331386364316166666537353431 +65663339633139393532613430663437363865376637633835663464316231303939373331323839 +38336435613433353339336162633335353134393966323931353634663865613539383764343032 +38303036633639383733633963643138396463643932656538376534396562626432363138636533 +61366362656236393536643139333964336265326539323334306462633965363966616432663337 +33643730363532313439396536326338313539313732623334353332313963363631366663663164 +32363033336565356363633661636538356437663538346237356636616331613266653534326363 +64653862613763313830373532633437653737396265636464353161323939373530616239393632 +35646631633263333537393931393232383433396236313065306437653731313066316631333735 +64623132303638353334336463646138306362656139633731356636643737333737303738376635 +32646639366165353866633466353337666236383434623133653736303633663934346132303736 +39343966653665356231646439613334656237623663393065373366363362356336356538323966 +66623966356461393231623732356534646564643763626161303430626561633339633639353831 +30303332383565646237333965653339643539313062323364353335343738663333333061663535 +31393835333234353736393039313530336332303363653763386438376663646535363065373632 +33666363376137623561646231653137363366333332626164653534613766323338393932613533 +64333937633839626235366465623262313063393132653933656539353637623766336531323939 +61323966613564386330636261383961383330623834316331613064656332356134393536656461 +38316633666263626133386437613865633436356432663261336335323834363433303462656633 +31613034303733613663363333313937353931356262343937303136616666616637303165346566 +65333766326663356664323731356264393737353430633066346132353665343361616235393939 +35356434383662626664313132636539376562393238323462306532326639386566616236623665 +30653734303333346661363132376664366264643830376635303838643738366230363731333664 +61333362326134386464373030633634373138363461376133333764313532323364663838336664 +38353161306233633731343863636139396138653761623931346533653937316531316230646162 +62663430663339366463343032343366376436313936363133383237646534336361613534363237 +36626236376536626361666137356263366236613935363231363366306563326366613663313338 +62653861653264643136306565303064373166626163646466303733363837353434613463396430 +37633031383261363435646233663435336165643238663263646165623961666463323333373138 +32613032343637363330646432323265306139396637323531396330623030306437393435323533 +66653638343636353734373233633065333562366361303137326334666532303636333431356264 +37636435363462323838336330363934643330393336373864643531666236326536353265343564 +61323831623562393437333264383664383461333965316536326365353761663632643238643935 +65313933393664386566363161633364633562383134396131643236663834613661323136386533 +64306561656532666162346232303262353135633434383834393835636333306266633630313835 +64626335396661616335303465336631383366366463353638373131386636636436363438636636 +35613834373932376665323066633561333739383736313431346665353335663462316463376137 +64313837363533626538396332383266373665393633373835626234356339326564343962626435 +62623730303839366637623534323833636334616534653132663839383762646361613563646366 +62343034653037303030306132376664653536373865393831653733396436653465336137396566 +32373565653533333336363662373939626633663965623866623730636665656432376330616562 +63333338346263393761343935623037393735316363333233366637386263633238366464633361 +31306265393365306630656239323462346562646432336566336364306130313334613861363331 +31363631366436333034663739353038643333633732336563633536306131326136646237376630 +66343032653631366363636534663363613863366563353136383830313037616232366335333461 +30323730323635633736663264343736353639616234356566393664643561366436363636616461 +62333830346530616636373034363338346330653735323065393236343432366263353939383132 +65386266363537346139623836613563633935643030376263336634646333336564633563323639 +35643130303466636136623037303036633465343537626131323432653937643766333666363238 +65383338346431383236613364666331363937303836396639306261336232633433373731313163 +32316361613239623137316464373033323661376336336233306130666635643839336232373637 +36363464643830663834363933613061623764353933626166633436666639336239613736383861 +65326436323232643362303932616137373634663034663665356166356634666564636133663038 +34373639653365333031353732303633653262306133303766666561363330653763366438343161 +35316361303666383337316638383134646461663931326265323663636237333563633961383664 +62663337643433643232393532303365353032306535356437643531653235633534626230363762 +63376536623630356333316464306539666132323661383962656461353162326533323764343434 +61633861613364373532383839333632323831393866643162623333316530633335623733633038 +39353032316434336263336331366431333036626334303566313561303236653337623764366439 +34373734643331633830316230356664616239633232326565626235363034643338653531323336 +65643134613635383863366463326438356262393637656235316332353137366436363066386165 +32643636613163356133373065396134373732643737323135303238613334626339393436666438 +61616164633561666466333665346262653965343830366465363333353366316630656661343261 +36393633353630306632616162343762323566376338303464636365653661653061393561303063 +33373739626139376634373765613236366632386532393364643231643838313261633864323836 +39323536666231613766306133393334396261623937353536326533373730313438643137636461 +38383531383066396436373162336466326535393334616438313731383161346439373631336266 +65376632373838656165393262363834303937396635323463656361623937666239663432383538 +39353666626230626135373466613733383130383063326434316539373231343934653266646362 +30376235386130393538396465306537343539396337653731623739373434663864666562666634 +66636532626133303034656661376237363130653631313064656231323637663232646637376564 +32633564316462386138626430323438386532623337386662356363613135373663313830373330 +61313061653266633636313934616631646334316266303735316364323366333631313035666666 +38346663396132383865376564633064653163636361666636333138343731306131333263366331 +38653866626565303936363966316230643032663161623466643231653833396435623461303539 +34373133333461383963656332633666636536303038346138663236353461346665356563616365 +61663436303863373037636334333936323365306133316236616463323031613337343437356231 +65333832633165306539643932653231303537363738663330383962383334343638363034633536 +37383162333166643231366434646364333763626336646335643962333534613730666266393233 +31646637336637643733333764653735396265313237326436646338356232613135613338616561 +636634393539646639373534393432363562 From 7f63c4df88972f1544ffdab115da3dc1077af10f Mon Sep 17 00:00:00 2001 From: Matt Crees Date: Thu, 31 Aug 2023 08:34:40 +0000 Subject: [PATCH 10/11] Ignore cache and venv --- .gitignore | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitignore b/.gitignore index 6fe3a61..9d756a9 100644 --- a/.gitignore +++ b/.gitignore @@ -114,3 +114,7 @@ ansible/collections/**/ *~ .*.swp .*sw? + +# venv +ansible/openstack-config-image-cache +ansilbe/openstack-config-venv \ No newline at end of file From d1392e2d16f8ed862355185c83b57a1ce2534c81 Mon Sep 17 00:00:00 2001 From: Matt Crees Date: Thu, 31 Aug 2023 08:39:19 +0000 Subject: [PATCH 11/11] Uncomment in-use resources --- etc/openstack-config/openstack-config.yml | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/etc/openstack-config/openstack-config.yml b/etc/openstack-config/openstack-config.yml index 7e4bc50..7a99326 100644 --- a/etc/openstack-config/openstack-config.yml +++ b/etc/openstack-config/openstack-config.yml @@ -6,7 +6,7 @@ # role. openstack_projects: - "{{ openstack_project_stackhpc }}" - # - "{{ project_cloud_services }}" + - "{{ project_cloud_services }}" - "{{ openstack_project_azimuth }}" # Definition of the openstack demo project. Format is as required by the @@ -204,7 +204,7 @@ openstack_routers: # List of security groups in the openstack project. # Format is as required by the stackhpc.os-networks role. openstack_security_groups: -# - "{{ openstack_secgroup_stackhpc }}" + - "{{ openstack_secgroup_stackhpc }}" - "{{ secgroup_nvidia_dls }}" # Default security group rule settings for a project @@ -309,9 +309,9 @@ openstack_secgroup_stackhpc: # stackhpc.os-flavors role. openstack_flavors: # - "{{ openstack_flavor_godzilla }}" - # - "{{ vgpu_a100d_2g_20gb }}" - # - "{{ vgpu_a100d_1g_10gb }}" - # - "{{ vgpu_a100d_40c }}" + - "{{ vgpu_a100d_2g_20gb }}" + - "{{ vgpu_a100d_1g_10gb }}" + - "{{ vgpu_a100d_40c }}" - "{{ hpc_v1_8cpu }}" - "{{ hpc_v1_16cpu }}" - "{{ hpc_v1_32cpu }}" @@ -523,12 +523,12 @@ os_images_force_rebuild: false # List of Glance images. Format is as required by the stackhpc.os-images role. openstack_images: - # - "{{ openstack_image_rocky88 }}" - # - "{{ openstack_image_rocky88_ofed2304 }}" - # - "{{ openstack_image_rocky88_ofed2304_dev }}" - # - "{{ openstack_image_rocky92 }}" + - "{{ openstack_image_rocky88 }}" + - "{{ openstack_image_rocky88_ofed2304 }}" + - "{{ openstack_image_rocky88_ofed2304_dev }}" + - "{{ openstack_image_rocky92 }}" - "{{ openstack_image_ubuntu_jammy }}" - # - "{{ image_rocky9_nvidia }}" + - "{{ image_rocky9_nvidia }}" # Rocky Linux 8.8 built using a custom containerfile openstack_image_rocky88: