Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 25 additions & 13 deletions python/neutron-understack/neutron_understack/routers.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,11 +200,14 @@ def link_vxlan_network_ha_chassis_group(_resource, _event, _trigger, payload) ->
ports on that network reference the empty network HCG, so no chassis owns
them and routing/ARP breaks.

We do what link_network_ha_chassis_group would have done, but source the
chassis from the router HCG instead of the (empty) gateway LRP: populate the
unified network HCG with sync_ha_chassis_group_network_unified, then anchor
the internal router-interface LRP to that same HCG. External ports already
reference the unified network HCG, so populating it fixes them.
We do what link_network_ha_chassis_group would have done: populate the unified
network HCG with sync_ha_chassis_group_network_unified, then anchor the internal
router-interface LRP to that same HCG. The gateway chassis is sourced from the
global HA_Chassis table (all records must share one chassis_name) so the fix
fires even before the external gateway port is attached.

For VLAN/FLAT networks neutron's handler already populates the network HCG
correctly; we detect that and return early.

Subscribed to ROUTER_INTERFACE/AFTER_CREATE at a priority that runs after
neutron's OVN handler, so the LRP (lrp-<port_id>) already exists by now.
Expand All @@ -220,19 +223,28 @@ def link_vxlan_network_ha_chassis_group(_resource, _event, _trigger, payload) ->
return
nb_idl = client._nb_idl

# Vxlan-gateway signal: the per-router HCG exists with chassis.
# VLAN/FLAT gateways have no router HCG and are handled by neutron.
router_hcg = nb_idl.lookup(
"HA_Chassis_Group", ovn_utils.ovn_name(router_id), default=None
# Skip if the per-network HCG is already populated — neutron handled it
# (VLAN/FLAT gateways). For vxlan the HCG is empty due to the neutron bug.
network_hcg = nb_idl.lookup(
"HA_Chassis_Group", ovn_utils.ovn_name(network_id), default=None
)
if not router_hcg or not router_hcg.ha_chassis:
if network_hcg and network_hcg.ha_chassis:
return

# Derive the gateway chassis from every HA_Chassis row in the NB database.
# If exactly one distinct chassis_name exists, that is our gateway chassis.
# This avoids requiring the per-router HCG to exist first.
all_ha_chassis = nb_idl.db_list_rows("HA_Chassis").execute(check_error=True)
chassis_names = {row.chassis_name for row in all_ha_chassis}
if len(chassis_names) != 1:
LOG.debug(
"No HA_Chassis_Group with chassis found for router %(router)s",
{"router": router_id},
"Cannot determine unique gateway chassis for network %(net)s "
"(router %(router)s): found %(n)d distinct chassis name(s)",
{"net": network_id, "router": router_id, "n": len(chassis_names)},
)
return

chassis_prio = {hc.chassis_name: hc.priority for hc in router_hcg.ha_chassis}
chassis_prio = {chassis_names.pop(): 32767}
lrp_name = ovn_utils.ovn_lrouter_port_name(port_id)

LOG.info(
Expand Down
45 changes: 30 additions & 15 deletions python/neutron-understack/neutron_understack/tests/test_routers.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,12 +216,13 @@ def _payload(mocker, router_id="router-1", port_id="port-1", network_id="net-1")
)

@staticmethod
def _client(mocker, router_hcg, lrp):
def _client(mocker, ha_chassis_rows, lrp, network_hcg=None):
nb_idl = mocker.MagicMock()
nb_idl.db_list_rows.return_value.execute.return_value = ha_chassis_rows

def lookup(table, _name, default=None):
if table == "HA_Chassis_Group":
return router_hcg
return network_hcg # None by default (unpopulated vxlan HCG)
if table == "Logical_Router_Port":
return lrp
return default
Expand All @@ -241,28 +242,27 @@ def _patch_sync(self, mocker, hcg="net-hcg-uuid"):
)

def test_populates_network_hcg_and_anchors_lrp(self, mocker):
hc = mocker.Mock(chassis_name="chassis-1", priority=10)
router_hcg = mocker.Mock(ha_chassis=[hc], name="neutron-router-1")
hc = mocker.Mock(chassis_name="chassis-1")
lrp = mocker.Mock(ha_chassis_group=[])
client, nb_idl = self._client(mocker, router_hcg, lrp)
client, nb_idl = self._client(mocker, ha_chassis_rows=[hc], lrp=lrp)
sync = self._patch_sync(mocker)
mocker.patch("neutron_understack.routers.ovn_client", return_value=client)

link_vxlan_network_ha_chassis_group(None, None, None, self._payload(mocker))

# Network HCG is populated from the router's chassis.
# Network HCG is populated using the chassis derived from the global table.
assert sync.call_args.args[3] == "net-1" # network_id
assert sync.call_args.args[4] == "router-1" # router_id
assert sync.call_args.args[5] == {"chassis-1": 10} # chassis_prio
assert sync.call_args.args[5] == {"chassis-1": 32767} # chassis_prio
# Internal LRP is anchored to the unified network HCG.
nb_idl.db_set.assert_called_once_with(
"Logical_Router_Port",
"lrp-port-1",
("ha_chassis_group", "net-hcg-uuid"),
)

def test_no_router_hcg(self, mocker):
client, nb_idl = self._client(mocker, router_hcg=None, lrp=None)
def test_empty_ha_chassis_table(self, mocker):
client, nb_idl = self._client(mocker, ha_chassis_rows=[], lrp=None)
sync = self._patch_sync(mocker)
mocker.patch("neutron_understack.routers.ovn_client", return_value=client)

Expand All @@ -271,9 +271,25 @@ def test_no_router_hcg(self, mocker):
sync.assert_not_called()
nb_idl.db_set.assert_not_called()

def test_router_hcg_without_chassis(self, mocker):
router_hcg = mocker.Mock(ha_chassis=[])
client, nb_idl = self._client(mocker, router_hcg, lrp=None)
def test_multiple_chassis_names(self, mocker):
hc1 = mocker.Mock(chassis_name="chassis-1")
hc2 = mocker.Mock(chassis_name="chassis-2")
client, nb_idl = self._client(mocker, ha_chassis_rows=[hc1, hc2], lrp=None)
sync = self._patch_sync(mocker)
mocker.patch("neutron_understack.routers.ovn_client", return_value=client)

link_vxlan_network_ha_chassis_group(None, None, None, self._payload(mocker))

sync.assert_not_called()
nb_idl.db_set.assert_not_called()

def test_network_hcg_already_populated(self, mocker):
# VLAN/FLAT: neutron already populated the per-network HCG — we skip.
existing_hcg = mocker.Mock(ha_chassis=[mocker.Mock(chassis_name="chassis-1")])
hc = mocker.Mock(chassis_name="chassis-1")
client, nb_idl = self._client(
mocker, ha_chassis_rows=[hc], lrp=None, network_hcg=existing_hcg
)
sync = self._patch_sync(mocker)
mocker.patch("neutron_understack.routers.ovn_client", return_value=client)

Expand All @@ -283,9 +299,8 @@ def test_router_hcg_without_chassis(self, mocker):
nb_idl.db_set.assert_not_called()

def test_lrp_missing_still_populates_network_hcg(self, mocker):
hc = mocker.Mock(chassis_name="chassis-1", priority=10)
router_hcg = mocker.Mock(ha_chassis=[hc], name="neutron-router-1")
client, nb_idl = self._client(mocker, router_hcg, lrp=None)
hc = mocker.Mock(chassis_name="chassis-1")
client, nb_idl = self._client(mocker, ha_chassis_rows=[hc], lrp=None)
sync = self._patch_sync(mocker)
mocker.patch("neutron_understack.routers.ovn_client", return_value=client)

Expand Down
Loading