diff --git a/devservices/commands/up.py b/devservices/commands/up.py index 03aa7085..615fa95f 100644 --- a/devservices/commands/up.py +++ b/devservices/commands/up.py @@ -157,7 +157,7 @@ def up(args: Namespace) -> None: def _pull_dependency_images( cmd: DockerComposeCommand, current_env: dict[str, str], status: Status ) -> None: - run_cmd(cmd.full_command, current_env) + run_cmd(cmd.full_command, current_env, retries=4) for dependency in cmd.services: status.info(f"Pulled image for {dependency}") @@ -199,6 +199,7 @@ def _up( ) ), ) + # Pull all images in parallel status.info("Pulling images") pull_commands = get_docker_compose_commands_to_run( diff --git a/devservices/utils/docker_compose.py b/devservices/utils/docker_compose.py index 8eba334d..d8a26cf5 100644 --- a/devservices/utils/docker_compose.py +++ b/devservices/utils/docker_compose.py @@ -5,7 +5,9 @@ import os import platform import re +import shlex import subprocess +import time from typing import cast from typing import NamedTuple @@ -290,15 +292,51 @@ def get_docker_compose_commands_to_run( return docker_compose_commands -def run_cmd(cmd: list[str], env: dict[str, str]) -> subprocess.CompletedProcess[str]: +def run_cmd( + cmd: list[str], + env: dict[str, str], + retries: int = 0, + retry_initial_wait: float = 5.0, + retry_exp: float = 2.0, +) -> subprocess.CompletedProcess[str]: + if retries < 0: + raise ValueError("Retries cannot be negative") + logger = logging.getLogger(LOGGER_NAME) - try: - logger.debug("Running command: %s", " ".join(cmd)) - return subprocess.run(cmd, check=True, capture_output=True, text=True, env=env) - except subprocess.CalledProcessError as e: - raise DockerComposeError( - command=" ".join(cmd), - returncode=e.returncode, - stdout=e.stdout, - stderr=e.stderr, - ) from e + console = Console() + cmd_pretty = shlex.join(cmd) + + proc = None + retries += 1 # initial try + + while retries > 0: + retries -= 1 + try: + logger.debug(f"Running command: {cmd_pretty}") + proc = subprocess.run( + cmd, check=True, capture_output=True, text=True, env=env + ) + return proc + except subprocess.CalledProcessError as e: + err = DockerComposeError( + command=cmd_pretty, + returncode=e.returncode, + stdout=e.stdout, + stderr=e.stderr, + ) + if retries == 0: + raise err + + console.warning( + f""" +Error: {err} + +Retrying in {retry_initial_wait}s ({retries} retries left)... +""" + ) + time.sleep(retry_initial_wait) + retry_initial_wait *= retry_exp + + # make mypy happy + assert proc is not None + return proc diff --git a/tests/commands/test_up.py b/tests/commands/test_up.py index 9cb00fdb..5f60d86e 100644 --- a/tests/commands/test_up.py +++ b/tests/commands/test_up.py @@ -5,6 +5,7 @@ from argparse import Namespace from pathlib import Path from unittest import mock +from unittest.mock import call import pytest @@ -339,6 +340,220 @@ def test_up_error( assert "Starting redis" not in captured.out.strip() +@mock.patch("time.sleep") +@mock.patch("devservices.utils.state.State.remove_service_entry") +@mock.patch("devservices.utils.state.State.update_service_entry") +@mock.patch("devservices.commands.up._create_devservices_network") +@mock.patch("devservices.commands.up.check_all_containers_healthy") +@mock.patch( + "devservices.utils.docker_compose.get_non_remote_services", + return_value={"clickhouse", "redis"}, +) +def test_up_pull_error_timeout( + mock_get_non_remote_services: mock.Mock, + mock_check_all_containers_healthy: mock.Mock, + mock_create_devservices_network: mock.Mock, + mock_update_service_entry: mock.Mock, + mock_remove_service_entry: mock.Mock, + mock_sleep: mock.Mock, + capsys: pytest.CaptureFixture[str], + tmp_path: Path, +) -> None: + config = { + "x-sentry-service-config": { + "version": 0.1, + "service_name": "example-service", + "dependencies": { + "redis": {"description": "Redis"}, + "clickhouse": {"description": "Clickhouse"}, + }, + "modes": {"default": ["redis", "clickhouse"]}, + }, + "services": { + "redis": {"image": "redis:6.2.14-alpine"}, + "clickhouse": { + "image": "altinity/clickhouse-server:23.8.11.29.altinitystable" + }, + }, + } + + create_config_file(tmp_path, config) + os.chdir(tmp_path) + + args = Namespace(service_name=None, debug=False, mode="default") + + with pytest.raises(SystemExit): + with mock.patch( + "devservices.utils.docker_compose.subprocess.run", + side_effect=[ + subprocess.CalledProcessError( + returncode=1, output="", stderr="TLS handshake timeout", cmd="" + ), + subprocess.CalledProcessError( + returncode=1, output="", stderr="TLS handshake timeout", cmd="" + ), + subprocess.CalledProcessError( + returncode=1, output="", stderr="TLS handshake timeout", cmd="" + ), + subprocess.CalledProcessError( + returncode=1, output="", stderr="TLS handshake timeout", cmd="" + ), + subprocess.CalledProcessError( + returncode=1, output="", stderr="TLS handshake timeout", cmd="" + ), + ], + ) as mock_subprocess_run: + up(args) + + # assert multiple failed calls + assert ( + mock_subprocess_run.mock_calls + == [ + call( + [ + "docker", + "compose", + "-p", + "example-service", + "-f", + f"{tmp_path}/{DEVSERVICES_DIR_NAME}/{CONFIG_FILE_NAME}", + "pull", + "clickhouse", + "redis", + ], + check=True, + capture_output=True, + text=True, + env=mock.ANY, + ) + ] + # default is 4 retries (5 tries total) + * 5 + ) + + mock_create_devservices_network.assert_called_once() + mock_check_all_containers_healthy.assert_not_called() + # Capture the printed output + captured = capsys.readouterr() + + assert ( + "Failed to start example-service: TLS handshake timeout" in captured.out.strip() + ) + + +@mock.patch("time.sleep") +@mock.patch("devservices.utils.state.State.remove_service_entry") +@mock.patch("devservices.utils.state.State.update_service_entry") +@mock.patch("devservices.commands.up._create_devservices_network") +@mock.patch("devservices.commands.up.check_all_containers_healthy") +@mock.patch( + "devservices.utils.docker_compose.get_non_remote_services", + return_value={"clickhouse", "redis"}, +) +@mock.patch( + "devservices.commands.up.get_container_names_for_project", + return_value=["x", "y"], +) +def test_up_pull_error_eventual_success( + mock_get_container_names_for_project: mock.Mock, + mock_get_non_remote_services: mock.Mock, + mock_check_all_containers_healthy: mock.Mock, + mock_create_devservices_network: mock.Mock, + mock_update_service_entry: mock.Mock, + mock_remove_service_entry: mock.Mock, + mock_sleep: mock.Mock, + capsys: pytest.CaptureFixture[str], + tmp_path: Path, +) -> None: + config = { + "x-sentry-service-config": { + "version": 0.1, + "service_name": "example-service", + "dependencies": { + "redis": {"description": "Redis"}, + "clickhouse": {"description": "Clickhouse"}, + }, + "modes": {"default": ["redis", "clickhouse"]}, + }, + "services": { + "redis": {"image": "redis:6.2.14-alpine"}, + "clickhouse": { + "image": "altinity/clickhouse-server:23.8.11.29.altinitystable" + }, + }, + } + + create_config_file(tmp_path, config) + os.chdir(tmp_path) + + args = Namespace(service_name=None, debug=False, mode="default") + + with mock.patch( + "devservices.utils.docker_compose.subprocess.run", + side_effect=[ + subprocess.CalledProcessError( + returncode=1, output="", stderr="TLS handshake timeout", cmd="" + ), + subprocess.CalledProcessError( + returncode=1, output="", stderr="TLS handshake timeout", cmd="" + ), + subprocess.CompletedProcess( + args=(), + returncode=0, + ), + subprocess.CompletedProcess( + args=(), + returncode=0, + ), + ], + ) as mock_subprocess_run: + up(args) + + assert mock_subprocess_run.mock_calls == [ + call( + [ + "docker", + "compose", + "-p", + "example-service", + "-f", + f"{tmp_path}/{DEVSERVICES_DIR_NAME}/{CONFIG_FILE_NAME}", + "pull", + "clickhouse", + "redis", + ], + check=True, + capture_output=True, + text=True, + env=mock.ANY, + ) + ] * 3 + [ + call( + [ + "docker", + "compose", + "-p", + "example-service", + "-f", + f"{tmp_path}/{DEVSERVICES_DIR_NAME}/{CONFIG_FILE_NAME}", + "up", + "clickhouse", + "redis", + "-d", + ], + check=True, + capture_output=True, + text=True, + env=mock.ANY, + ) + ] + + mock_create_devservices_network.assert_called_once() + captured = capsys.readouterr() + + assert "example-service started" in captured.out.strip() + + @mock.patch("devservices.utils.state.State.remove_service_entry") @mock.patch("devservices.utils.state.State.update_service_entry") @mock.patch("devservices.commands.up._create_devservices_network")