diff --git a/ansible/roles/add-kubelet-logging/tasks/main.yml b/ansible/roles/add-kubelet-logging/tasks/main.yml index 03914cd059..932f495ca6 100644 --- a/ansible/roles/add-kubelet-logging/tasks/main.yml +++ b/ansible/roles/add-kubelet-logging/tasks/main.yml @@ -58,7 +58,9 @@ insecure_skip_verify: true static_configs: - targets: - - {{ hostvars[host].ansible_host }}:10250 + - {{ hostvars[host].ansible_host }}:10250 + labels: + instance: '{{ hostvars[host].kubernetes_nodename | default(host) }}' - job_name: kubelet-{{ host }}-cadvisor scheme: https @@ -69,7 +71,9 @@ metrics_path: /metrics/cadvisor static_configs: - targets: - - {{ hostvars[host].ansible_host }}:10250 + - {{ hostvars[host].ansible_host }}:10250 + labels: + instance: '{{ hostvars[host].kubernetes_nodename | default(host) }}' {% endfor %} - name: Restart prometheus to pick up new target diff --git a/ansible/roles/common/tasks/files/prom-network-query.py b/ansible/roles/common/tasks/files/prom-network-query.py index e0fe0a7a48..5d68c47c84 100644 --- a/ansible/roles/common/tasks/files/prom-network-query.py +++ b/ansible/roles/common/tasks/files/prom-network-query.py @@ -7,8 +7,8 @@ prom-network-query.py [options] Options: - --prometheus URL Prometheus base URL (default: http://zen3:9091) - --instance HOST:PORT Prometheus instance label to filter by (e.g. microshift:9100) + --prometheus URL Prometheus base URL (default: http://prometheus:9091) + --instance LABEL Prometheus instance label to filter by (e.g. node hostname) --device IFACE Network device (default: enp5s0) --step STEP Range query step (default: 15s) @@ -26,7 +26,7 @@ from dataclasses import dataclass from datetime import datetime, timezone -DEFAULT_PROMETHEUS = "http://zen3:9091" +DEFAULT_PROMETHEUS = "http://prometheus:9091" DEFAULT_DEVICE = "enp5s0" DEFAULT_STEP = "15s" @@ -55,9 +55,12 @@ def base_url(self): return f"{self.scheme}://{self.display_host}:{self.port}{self.base_path}" +_CGNAT_NETWORK = ipaddress.ip_network("100.64.0.0/10") + + def _is_local_or_private_ip(value): ip = ipaddress.ip_address(value) - return ip.is_private or ip.is_loopback or ip.is_link_local + return ip.is_private or ip.is_loopback or ip.is_link_local or ip in _CGNAT_NETWORK def _validate_prometheus_host(host): @@ -145,7 +148,7 @@ def parse_window_args(argv): parser.add_argument( "--instance", default=None, - help="Prometheus instance label to filter by (e.g. microshift:9100)", + help="Prometheus instance label to filter by (e.g. node hostname)", ) parser.add_argument( "--step", diff --git a/ansible/roles/common/tasks/nodename.yml b/ansible/roles/common/tasks/nodename.yml new file mode 100644 index 0000000000..f906fc6e22 --- /dev/null +++ b/ansible/roles/common/tasks/nodename.yml @@ -0,0 +1,60 @@ +--- +# Resolve the Kubernetes NodeName MicroShift will advertise, matching the +# expected MicroShift config layouts used by this playbook: read +# /etc/microshift/config.yaml first, then YAML drop-ins under +# /etc/microshift/config.d/ in lexical path order. Each explicit +# hostnameOverride value (including empty string) updates the tracked +# value, so a later drop-in setting it to "" reverts to the OS hostname, +# matching pkg/config/config.go:250 which only applies non-empty overrides. +# This does not emulate full RFC 7396 merge semantics (e.g. node: null +# clearing the whole node object) — sufficient for normal drop-ins. +# +# become: yes — these files may be root-readable only; failed_when: false on +# the slurps would otherwise silently mask permission errors and fall back +# to ansible_hostname even when an override is configured. + +- name: Resolve kubernetes nodename + become: yes + block: + - name: Find microshift config drop-ins + ansible.builtin.find: + paths: /etc/microshift/config.d + patterns: "*.yaml" + recurse: true + register: _microshift_dropins + failed_when: false + + - name: Build ordered list of microshift config paths + ansible.builtin.set_fact: + _microshift_config_paths: >- + {{ ['/etc/microshift/config.yaml'] + + (_microshift_dropins.files | default([]) | map(attribute='path') | sort) }} + + - name: Slurp microshift config files + ansible.builtin.slurp: + src: "{{ item }}" + register: _microshift_configs + loop: "{{ _microshift_config_paths }}" + failed_when: false + + - name: Compute hostnameOverride from merged microshift config + ansible.builtin.set_fact: + _microshift_hostname_override: >- + {%- set ns = namespace(value='') -%} + {%- for r in _microshift_configs.results -%} + {%- if r.content is defined -%} + {%- set parsed = (r.content | b64decode | from_yaml) -%} + {%- if parsed is mapping + and (parsed.node | default(none)) is mapping + and 'hostnameOverride' in parsed.node -%} + {%- set ns.value = parsed.node.hostnameOverride | default('', true) -%} + {%- endif -%} + {%- endif -%} + {%- endfor -%} + {{ ns.value }} + +- name: Set kubernetes_nodename fact + ansible.builtin.set_fact: + kubernetes_nodename: "{{ ((_microshift_hostname_override | length > 0) + | ternary(_microshift_hostname_override, ansible_hostname)) + | lower }}" diff --git a/ansible/roles/install-logging/templates/prometheus.yml.j2 b/ansible/roles/install-logging/templates/prometheus.yml.j2 index 3db3418c68..b2ac6aef4a 100644 --- a/ansible/roles/install-logging/templates/prometheus.yml.j2 +++ b/ansible/roles/install-logging/templates/prometheus.yml.j2 @@ -27,21 +27,24 @@ scrape_configs: - job_name: node static_configs: - - targets: {% for host in groups['microshift'] %} - - {{ host }}:9100 + - targets: ['{{ host }}:9100'] + labels: + instance: '{{ hostvars[host].kubernetes_nodename | default(host) }}' {% endfor %} - job_name: process static_configs: - - targets: {% for host in groups['microshift'] %} - - {{ host }}:9256 + - targets: ['{{ host }}:9256'] + labels: + instance: '{{ hostvars[host].kubernetes_nodename | default(host) }}' {% endfor %} - job_name: crio static_configs: - - targets: {% for host in groups['microshift'] %} - - {{ host }}:9537 + - targets: ['{{ host }}:9537'] + labels: + instance: '{{ hostvars[host].kubernetes_nodename | default(host) }}' {% endfor %} diff --git a/ansible/roles/microshift-start/tasks/main.yml b/ansible/roles/microshift-start/tasks/main.yml index 44834b0c81..622ba4af82 100644 --- a/ansible/roles/microshift-start/tasks/main.yml +++ b/ansible/roles/microshift-start/tasks/main.yml @@ -1,6 +1,9 @@ --- # microshift-start tasks +- name: resolve kubernetes nodename + include_tasks: roles/common/tasks/nodename.yml + - name: check if microshift has run ansible.builtin.shell: systemctl show -p ActiveEnterTimestampMonotonic microshift | awk -F"=" '{print $2}' register: microshift_active @@ -153,14 +156,14 @@ | default((groups['logging'] | default([]) | first)) ) if (groups['logging'] | default([]) | length > 0) - else 'zen3' + else 'prometheus' ) ~ ':' ~ (prometheus_port | default(9091) | string), true ) }} - prom_network_device: "{{ network_device | default(ansible_default_ipv4.interface | default('enp5s0'), true) }}" - prom_network_instance: "{{ inventory_hostname }}:9100" + prom_network_device: "{{ network_device | default(ansible_facts.default_ipv4.interface | default('enp5s0'), true) }}" + prom_network_instance: "{{ kubernetes_nodename }}" when: prometheus_logging | bool - name: query prometheus for network transfer diff --git a/ansible/setup-node.yml b/ansible/setup-node.yml index b705c67730..bc817439f1 100644 --- a/ansible/setup-node.yml +++ b/ansible/setup-node.yml @@ -18,15 +18,6 @@ when: (create_devenv | bool) - role: setup-localhost -- name: Set up logging node - hosts: logging - become: yes - vars_files: - - vars/all.yml - roles: - - role: install-logging - when: (prometheus_logging | bool) - - name: Entitle host and manage repos hosts: microshift become: yes @@ -57,6 +48,23 @@ - role: install-microshift when: (install_microshift | bool) +- name: Resolve microshift nodename + hosts: microshift + vars_files: + - vars/all.yml + tasks: + - name: resolve kubernetes nodename + include_tasks: roles/common/tasks/nodename.yml + +- name: Set up logging node + hosts: logging + become: yes + vars_files: + - vars/all.yml + roles: + - role: install-logging + when: (prometheus_logging | bool) + - name: Capture microshift metrics hosts: microshift vars_files: