Skip to content

Commit 26becef

Browse files
committed
kvm: Aqcuire lock when running security group Python script
It could happen that when multiple instances are starting at the same time on a KVM host the Agent spawns multiple instances of security_group.py which both try to modify iptables/ebtables rules. This fails with on of the two processes failing. The instance is still started, but it doesn't have any IP connectivity due to the failed programming of the security groups. This modification lets the script aqcuire a exclusive lock on a file so that only one instance of the scripts talks to iptables/ebtables at once. Other instances of the script which start will poll every 500ms if they can obtain the lock and otherwise execute anyway after 15 seconds. The lock will be released as soon as the script exists, which is usually within a few hundred ms.
1 parent 7017a82 commit 26becef

File tree

1 file changed

+27
-1
lines changed

1 file changed

+27
-1
lines changed

scripts/vm/network/security_group.py

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,11 @@
2626
from optparse import OptionParser, OptionGroup, OptParseError, BadOptionError, OptionError, OptionConflictError, OptionValueError
2727
import re
2828
import libvirt
29+
import fcntl
30+
import time
2931

3032
logpath = "/var/run/cloud/" # FIXME: Logs should reside in /var/log/cloud
33+
lock_file = "/var/lock/cloudstack_security_group.lock"
3134
iptables = Command("iptables")
3235
bash = Command("/bin/bash")
3336
ebtables = Command("ebtables")
@@ -36,6 +39,21 @@
3639
hyper = cfo.getEntry("hypervisor.type")
3740
if hyper == "lxc":
3841
driver = "lxc:///"
42+
43+
lock_handle = None
44+
45+
def obtain_file_lock(path):
46+
global lock_handle
47+
48+
try:
49+
lock_handle = open(path, 'w')
50+
fcntl.flock(lock_handle, fcntl.LOCK_EX | fcntl.LOCK_NB)
51+
return True
52+
except IOError:
53+
pass
54+
55+
return False
56+
3957
def execute(cmd):
4058
logging.debug(cmd)
4159
return bash("-c", cmd).stdout
@@ -303,7 +321,7 @@ def default_network_rules_systemvm(vm_name, localbrname):
303321
for bridge in bridges:
304322
if bridge != localbrname:
305323
if not addFWFramework(bridge):
306-
return False
324+
return False
307325
brfw = getBrfw(bridge)
308326
vifs = getVifsForBridge(vm_name, bridge)
309327
for vif in vifs:
@@ -1029,6 +1047,14 @@ def addFWFramework(brname):
10291047
sys.exit(1)
10301048
cmd = args[0]
10311049
logging.debug("Executing command: " + str(cmd))
1050+
1051+
for i in range(0, 30):
1052+
if obtain_file_lock(lock_file) is False:
1053+
logging.warn("Lock on %s is being held by other process. Waiting for release." % lock_file)
1054+
time.sleep(0.5)
1055+
else:
1056+
break
1057+
10321058
if cmd == "can_bridge_firewall":
10331059
can_bridge_firewall(args[1])
10341060
elif cmd == "default_network_rules":

0 commit comments

Comments
 (0)