diff --git a/python/neutron-understack/neutron_understack/routers.py b/python/neutron-understack/neutron_understack/routers.py index 6aabb318e..1740ca0f5 100644 --- a/python/neutron-understack/neutron_understack/routers.py +++ b/python/neutron-understack/neutron_understack/routers.py @@ -200,11 +200,14 @@ def link_vxlan_network_ha_chassis_group(_resource, _event, _trigger, payload) -> ports on that network reference the empty network HCG, so no chassis owns them and routing/ARP breaks. - We do what link_network_ha_chassis_group would have done, but source the - chassis from the router HCG instead of the (empty) gateway LRP: populate the - unified network HCG with sync_ha_chassis_group_network_unified, then anchor - the internal router-interface LRP to that same HCG. External ports already - reference the unified network HCG, so populating it fixes them. + We do what link_network_ha_chassis_group would have done: populate the unified + network HCG with sync_ha_chassis_group_network_unified, then anchor the internal + router-interface LRP to that same HCG. The gateway chassis is sourced from the + global HA_Chassis table (all records must share one chassis_name) so the fix + fires even before the external gateway port is attached. + + For VLAN/FLAT networks neutron's handler already populates the network HCG + correctly; we detect that and return early. Subscribed to ROUTER_INTERFACE/AFTER_CREATE at a priority that runs after neutron's OVN handler, so the LRP (lrp-) already exists by now. @@ -220,19 +223,28 @@ def link_vxlan_network_ha_chassis_group(_resource, _event, _trigger, payload) -> return nb_idl = client._nb_idl - # Vxlan-gateway signal: the per-router HCG exists with chassis. - # VLAN/FLAT gateways have no router HCG and are handled by neutron. - router_hcg = nb_idl.lookup( - "HA_Chassis_Group", ovn_utils.ovn_name(router_id), default=None + # Skip if the per-network HCG is already populated — neutron handled it + # (VLAN/FLAT gateways). For vxlan the HCG is empty due to the neutron bug. + network_hcg = nb_idl.lookup( + "HA_Chassis_Group", ovn_utils.ovn_name(network_id), default=None ) - if not router_hcg or not router_hcg.ha_chassis: + if network_hcg and network_hcg.ha_chassis: + return + + # Derive the gateway chassis from every HA_Chassis row in the NB database. + # If exactly one distinct chassis_name exists, that is our gateway chassis. + # This avoids requiring the per-router HCG to exist first. + all_ha_chassis = nb_idl.db_list_rows("HA_Chassis").execute(check_error=True) + chassis_names = {row.chassis_name for row in all_ha_chassis} + if len(chassis_names) != 1: LOG.debug( - "No HA_Chassis_Group with chassis found for router %(router)s", - {"router": router_id}, + "Cannot determine unique gateway chassis for network %(net)s " + "(router %(router)s): found %(n)d distinct chassis name(s)", + {"net": network_id, "router": router_id, "n": len(chassis_names)}, ) return - chassis_prio = {hc.chassis_name: hc.priority for hc in router_hcg.ha_chassis} + chassis_prio = {chassis_names.pop(): 32767} lrp_name = ovn_utils.ovn_lrouter_port_name(port_id) LOG.info( diff --git a/python/neutron-understack/neutron_understack/tests/test_routers.py b/python/neutron-understack/neutron_understack/tests/test_routers.py index fc961cac8..71ad80fd8 100644 --- a/python/neutron-understack/neutron_understack/tests/test_routers.py +++ b/python/neutron-understack/neutron_understack/tests/test_routers.py @@ -216,12 +216,13 @@ def _payload(mocker, router_id="router-1", port_id="port-1", network_id="net-1") ) @staticmethod - def _client(mocker, router_hcg, lrp): + def _client(mocker, ha_chassis_rows, lrp, network_hcg=None): nb_idl = mocker.MagicMock() + nb_idl.db_list_rows.return_value.execute.return_value = ha_chassis_rows def lookup(table, _name, default=None): if table == "HA_Chassis_Group": - return router_hcg + return network_hcg # None by default (unpopulated vxlan HCG) if table == "Logical_Router_Port": return lrp return default @@ -241,19 +242,18 @@ def _patch_sync(self, mocker, hcg="net-hcg-uuid"): ) def test_populates_network_hcg_and_anchors_lrp(self, mocker): - hc = mocker.Mock(chassis_name="chassis-1", priority=10) - router_hcg = mocker.Mock(ha_chassis=[hc], name="neutron-router-1") + hc = mocker.Mock(chassis_name="chassis-1") lrp = mocker.Mock(ha_chassis_group=[]) - client, nb_idl = self._client(mocker, router_hcg, lrp) + client, nb_idl = self._client(mocker, ha_chassis_rows=[hc], lrp=lrp) sync = self._patch_sync(mocker) mocker.patch("neutron_understack.routers.ovn_client", return_value=client) link_vxlan_network_ha_chassis_group(None, None, None, self._payload(mocker)) - # Network HCG is populated from the router's chassis. + # Network HCG is populated using the chassis derived from the global table. assert sync.call_args.args[3] == "net-1" # network_id assert sync.call_args.args[4] == "router-1" # router_id - assert sync.call_args.args[5] == {"chassis-1": 10} # chassis_prio + assert sync.call_args.args[5] == {"chassis-1": 32767} # chassis_prio # Internal LRP is anchored to the unified network HCG. nb_idl.db_set.assert_called_once_with( "Logical_Router_Port", @@ -261,8 +261,8 @@ def test_populates_network_hcg_and_anchors_lrp(self, mocker): ("ha_chassis_group", "net-hcg-uuid"), ) - def test_no_router_hcg(self, mocker): - client, nb_idl = self._client(mocker, router_hcg=None, lrp=None) + def test_empty_ha_chassis_table(self, mocker): + client, nb_idl = self._client(mocker, ha_chassis_rows=[], lrp=None) sync = self._patch_sync(mocker) mocker.patch("neutron_understack.routers.ovn_client", return_value=client) @@ -271,9 +271,25 @@ def test_no_router_hcg(self, mocker): sync.assert_not_called() nb_idl.db_set.assert_not_called() - def test_router_hcg_without_chassis(self, mocker): - router_hcg = mocker.Mock(ha_chassis=[]) - client, nb_idl = self._client(mocker, router_hcg, lrp=None) + def test_multiple_chassis_names(self, mocker): + hc1 = mocker.Mock(chassis_name="chassis-1") + hc2 = mocker.Mock(chassis_name="chassis-2") + client, nb_idl = self._client(mocker, ha_chassis_rows=[hc1, hc2], lrp=None) + sync = self._patch_sync(mocker) + mocker.patch("neutron_understack.routers.ovn_client", return_value=client) + + link_vxlan_network_ha_chassis_group(None, None, None, self._payload(mocker)) + + sync.assert_not_called() + nb_idl.db_set.assert_not_called() + + def test_network_hcg_already_populated(self, mocker): + # VLAN/FLAT: neutron already populated the per-network HCG — we skip. + existing_hcg = mocker.Mock(ha_chassis=[mocker.Mock(chassis_name="chassis-1")]) + hc = mocker.Mock(chassis_name="chassis-1") + client, nb_idl = self._client( + mocker, ha_chassis_rows=[hc], lrp=None, network_hcg=existing_hcg + ) sync = self._patch_sync(mocker) mocker.patch("neutron_understack.routers.ovn_client", return_value=client) @@ -283,9 +299,8 @@ def test_router_hcg_without_chassis(self, mocker): nb_idl.db_set.assert_not_called() def test_lrp_missing_still_populates_network_hcg(self, mocker): - hc = mocker.Mock(chassis_name="chassis-1", priority=10) - router_hcg = mocker.Mock(ha_chassis=[hc], name="neutron-router-1") - client, nb_idl = self._client(mocker, router_hcg, lrp=None) + hc = mocker.Mock(chassis_name="chassis-1") + client, nb_idl = self._client(mocker, ha_chassis_rows=[hc], lrp=None) sync = self._patch_sync(mocker) mocker.patch("neutron_understack.routers.ovn_client", return_value=client)