Skip to content

Commit 168060a

Browse files
authored
Merge pull request vitessio#2721 from thompsonja/sandbox_tests
Update sandbox tests to use kubernetes StatefulSet. This helps the orchestrator reparent test work.
2 parents c3362ec + d98417b commit 168060a

File tree

4 files changed

+16
-56
lines changed

4 files changed

+16
-56
lines changed

test/cluster/k8s_environment.py

Lines changed: 12 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
import logging
66
import os
77
import subprocess
8-
import tempfile
98
import time
109

1110
from sandbox import kubernetes_components
@@ -152,54 +151,9 @@ def get_vtgate_conn(self, cell):
152151
self.vtgate_addrs[cell], 60)
153152

154153
def restart_mysql_task(self, tablet_name, task_name, is_alloc=False):
155-
# Kubernetes strips leading 0s from uid when creating pod names
156-
uid = str(self.get_tablet_uid(tablet_name)).lstrip('0')
157-
vttablet_pod_name = 'vttablet-%s' % uid
158-
159-
# Generate temp file with current k8s pod config. Many of these steps
160-
# can be deleted once StatefulSet is implemented and vttablets are
161-
# controlled via replication controller.
162-
tmpfile = None
163-
tmpfile = tempfile.NamedTemporaryFile()
164-
subprocess.Popen(['kubectl', 'get', 'pod', vttablet_pod_name,
165-
'--namespace=%s' % self.cluster_name, '-o', 'json'],
166-
stdout=tmpfile)
167-
tmpfile.flush()
168-
tmpfile.seek(0)
169-
170-
# Actually delete the pod and wait for it to be deleted.
154+
# Delete the whole pod, which deletes mysql + vttablet tasks.
171155
os.system('kubectl delete pod %s --namespace=%s' % (
172-
vttablet_pod_name, self.cluster_name))
173-
start_time = time.time()
174-
while time.time() - start_time < 120:
175-
logging.info('Waiting for pod %s to be deleted', vttablet_pod_name)
176-
pods = subprocess.check_output(
177-
['kubectl', 'get', 'pods', '--namespace=%s' % self.cluster_name])
178-
if vttablet_pod_name not in pods:
179-
logging.info('Pod deleted.')
180-
break
181-
time.sleep(5)
182-
183-
logging.info('Sleeping...')
184-
time.sleep(60)
185-
186-
# Create the pod again.
187-
os.system('cat %s | kubectl create --namespace=%s -f -' % (
188-
tmpfile.name, self.cluster_name))
189-
while time.time() - start_time < 120:
190-
logging.info('Waiting for pod %s to be running', vttablet_pod_name)
191-
pod = subprocess.check_output(
192-
['kubectl', 'get', 'pod', '--namespace=%s' % self.cluster_name,
193-
vttablet_pod_name, '-o', 'json'])
194-
try:
195-
if json.loads(pod)['status']['phase'] == 'Running':
196-
logging.info('Pod is running')
197-
break
198-
except ValueError:
199-
pass
200-
time.sleep(5)
201-
202-
self.vtctl_helper.execute_vtctl_command(['RefreshState', tablet_name])
156+
self.get_tablet_pod_name(tablet_name), self.cluster_name))
203157
return 0
204158

205159
def wait_for_good_failover_status(
@@ -235,9 +189,8 @@ def poll_for_varz(self, tablet_name, varz, timeout=60.0,
235189
timeout_error_msg += ' Condition "%s" not met.' % condition_msg
236190
raise base_environment.VitessEnvironmentError(timeout_error_msg)
237191
hostname = self.get_tablet_ip_port(tablet_name)
238-
tablet_pod = 'vttablet-%s' % self.get_tablet_uid(tablet_name)
239192
host_varz = subprocess.check_output([
240-
'kubectl', 'exec', '-ti', tablet_pod,
193+
'kubectl', 'exec', '-ti', self.get_tablet_pod_name(tablet_name),
241194
'--namespace=%s' % self.cluster_name,
242195
'curl', '%s/debug/vars' % hostname])
243196
if not host_varz:
@@ -249,10 +202,17 @@ def poll_for_varz(self, tablet_name, varz, timeout=60.0,
249202
def wait_for_healthy_tablets(self):
250203
return 0
251204

252-
def get_tablet_task_number(self, tablet_name):
205+
def get_tablet_pod_name(self, tablet_name):
253206
tablet_info = json.loads(self.vtctl_helper.execute_vtctl_command(
254207
['GetTablet', tablet_name]))
255-
return tablet_info['alias']['uid'] % 100
208+
# Hostname is <pod_name>.vttablet
209+
return tablet_info['hostname'].split('.')[0]
210+
211+
def get_tablet_task_number(self, tablet_name):
212+
# Tablet pod name under StatefulSet is
213+
# "<cell>-<keyspace>-<shard_number>-<tablet_type>-<task_number>"
214+
# Example: test1-foo-0-replica-0.
215+
return int(self.get_tablet_pod_name(tablet_name).split('-')[-1])
256216

257217
def automatic_reparent_available(self):
258218
"""Checks if the environment can automatically reparent."""

test/cluster/reparent_test.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def setUpClass(cls):
3535

3636
# seconds to wait for reparent to result in a new master
3737
cls.reparent_timeout_threshold = int(cls.test_params.get(
38-
'reparent_timeout_threshold', '30'))
38+
'reparent_timeout_threshold', '60'))
3939

4040
for keyspace, num_shards in zip(cls.env.keyspaces, cls.env.num_shards):
4141
for shard in xrange(num_shards):
@@ -187,7 +187,7 @@ def test_implicit_reparent(self):
187187

188188
def _test_explicit_emergency_reparent(self):
189189
# This test is currently disabled until the emergency reparent can be
190-
# fleshed fleshed out better. If a master tablet is killed and there is no
190+
# fleshed out better. If a master tablet is killed and there is no
191191
# tool performing automatic reparents (like Orchestrator), then there may be
192192
# a race condition between restarting the tablet (in which it would resume
193193
# being the master), and the EmergencyReparentShard call. This can sometimes

test/cluster/sandbox/example_sandbox.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ sandbox:
1717
# command line or randomly generated name.
1818
name: {{cluster_name}}
1919
node_count: 5
20-
machine_type: n1-standard-8
20+
machine_type: n1-standard-4
2121
# Application is application-specific, used by the derived Sandbox class.
2222
application:
2323
vtgate_count: 1

test/cluster/sandbox/vitess_kubernetes_sandbox.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ def _generate_helm_values_config(self):
144144
cpu=self.app_options.mysql_cpu,
145145
),
146146
),
147-
controllerType='None',
147+
controllerType='StatefulSet',
148148
),
149149
vtgate=dict(
150150
serviceType='LoadBalancer', # Allows port forwarding.

0 commit comments

Comments
 (0)