From ea57bff05a5d8e480a7d21cd67f4479422a93305 Mon Sep 17 00:00:00 2001 From: Alexandre Rulleau Date: Mon, 11 May 2026 13:01:58 +0200 Subject: [PATCH 1/8] ci: add --show-slow 10000 to valgrind run-tests.php invocation (PHP >= 7.2) Lists tests slower than 10s with their durations at the end of the run. The --show-slow flag only exists in PHP 7.2+, so it's gated by a runtime PHP_VERSION_ID check to avoid breaking PHP 7.0/7.1 jobs. Used to verify the hypothesis that recent CI failures are caused by tests exceeding 1000s under valgrind (triggering the number_format E_WARNING in run-tests.php's record() method). --- Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1d017c7308..963228d77f 100644 --- a/Makefile +++ b/Makefile @@ -211,7 +211,8 @@ test_extension_ci: $(SO_FILE) $(TEST_FILES) $(TEST_STUB_FILES) $(BUILD_DIR)/run- \ export TEST_PHP_JUNIT=$(JUNIT_RESULTS_DIR)/valgrind-extension-test.xml; \ export TEST_PHP_OUTPUT=$(JUNIT_RESULTS_DIR)/valgrind-run-tests.out; \ - DD_SPAWN_WORKER_STABLE_TRAMPOLINE=1 $(ALL_TEST_ENV_OVERRIDE) $(RUN_TESTS_CMD) -d extension=$(SO_FILE) -m -s $$TEST_PHP_OUTPUT $(BUILD_DIR)/$(TESTS) && ! grep -e '^LEAKED TEST SUMMARY' $$TEST_PHP_OUTPUT; \ + SHOW_SLOW=$$(php -r 'echo PHP_VERSION_ID >= 70200 ? "--show-slow 10000" : "";'); \ + DD_SPAWN_WORKER_STABLE_TRAMPOLINE=1 $(ALL_TEST_ENV_OVERRIDE) $(RUN_TESTS_CMD) $$SHOW_SLOW -d extension=$(SO_FILE) -m -s $$TEST_PHP_OUTPUT $(BUILD_DIR)/$(TESTS) && ! grep -e '^LEAKED TEST SUMMARY' $$TEST_PHP_OUTPUT; \ ) build_tea: TEA_BUILD_TESTS=ON From f4edb28610d406a162f6b0f6785f2cb86b8ac6e8 Mon Sep 17 00:00:00 2001 From: Alexandre Rulleau Date: Mon, 11 May 2026 15:00:33 +0200 Subject: [PATCH 2/8] fix(ci): patch run-tests.php getTimer() to avoid E_WARNING on long valgrind tests PHP's run-tests.php getTimer() returns number_format($elapsed, 4) which produces a comma-formatted string (e.g. "1,500.0000") when a test takes >=1000 seconds. PHP 8.0+ raises E_WARNING when that string is used in += arithmetic inside record(), causing the test runner to abort with exit code 2. Patch getTimer() at build time so it returns a float (via round()) instead. --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index 963228d77f..2e2c9cbb5a 100644 --- a/Makefile +++ b/Makefile @@ -112,6 +112,7 @@ $(BUILD_DIR)/configure: $(M4_FILES) $(BUILD_DIR)/ddtrace.sym $(BUILD_DIR)/VERSIO $(BUILD_DIR)/run-tests.php: $(if $(ASSUME_COMPILED),, $(BUILD_DIR)/configure) $(if $(ASSUME_COMPILED), cp $(shell dirname $(shell realpath $(shell which phpize)))/../lib/php/build/run-tests.php $(BUILD_DIR)/run-tests.php) sed -i 's/\bdl(/(bool)(/' $(BUILD_DIR)/run-tests.php # this dl() stuff in run-tests.php is for --EXTENSIONS-- sections, which we don't use; just strip it away (see https://github.com/php/php-src/issues/15367) + sed -i 's/return number_format($$this->rootSuite/return round($$this->rootSuite/' $(BUILD_DIR)/run-tests.php # number_format returns a comma-formatted string for elapsed >= 1000s (e.g. "1,500.0000"), which is non-numeric and triggers E_WARNING in PHP 8.0+ when used in += arithmetic inside record() # ensure list of rust files is up to date $(BUILD_DIR)/.rust_files_list: $(RUST_FILES) From b371f6cd93a1226034d4793e17b6312c678203bc Mon Sep 17 00:00:00 2001 From: Alexandre Rulleau Date: Mon, 11 May 2026 17:27:50 +0200 Subject: [PATCH 3/8] ci: wait for request-replayer readiness before running tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The CI .base_test before_script runs wait-for-service-ready.sh, but WAIT_FOR only listed test-agent — request-replayer was never gated on, even though it is in the agent_httpbin_service() service block used by test_extension_ci, ASAN test_c, and ASAN test_c with multiple observers. Add a request-replayer case to detect_service_type (probing /replay, a read-only endpoint that returns valid JSON when php -S is up), and add request-replayer:80 to WAIT_FOR for the three affected job blocks. PHP Language Tests is unchanged: it only declares test-agent in services. --- .gitlab/generate-tracer.php | 6 +++--- .gitlab/wait-for-service-ready.sh | 8 ++++++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/.gitlab/generate-tracer.php b/.gitlab/generate-tracer.php index 42510ff8ce..ad0b8df75d 100644 --- a/.gitlab/generate-tracer.php +++ b/.gitlab/generate-tracer.php @@ -232,7 +232,7 @@ function before_script_steps($with_docker_auth = false) { artifacts: true retry: 2 variables: - WAIT_FOR: test-agent:9126 + WAIT_FOR: test-agent:9126 request-replayer:80 KUBERNETES_CPU_REQUEST: 6 KUBERNETES_CPU_LIMIT: 6 KUBERNETES_MEMORY_REQUEST: 4Gi @@ -305,7 +305,7 @@ function before_script_steps($with_docker_auth = false) { ARCH: "amd64" artifacts: true variables: - WAIT_FOR: test-agent:9126 + WAIT_FOR: test-agent:9126 request-replayer:80 KUBERNETES_CPU_REQUEST: 12 MAX_TEST_PARALLELISM: 4 PHP_MAJOR_MINOR: "" @@ -360,7 +360,7 @@ function before_script_steps($with_docker_auth = false) { ARCH: "amd64" artifacts: true variables: - WAIT_FOR: test-agent:9126 + WAIT_FOR: test-agent:9126 request-replayer:80 KUBERNETES_CPU_REQUEST: 12 MAX_TEST_PARALLELISM: 4 PHP_MAJOR_MINOR: "" diff --git a/.gitlab/wait-for-service-ready.sh b/.gitlab/wait-for-service-ready.sh index 45ee96399d..0f22493bfd 100755 --- a/.gitlab/wait-for-service-ready.sh +++ b/.gitlab/wait-for-service-ready.sh @@ -6,6 +6,7 @@ detect_service_type() { local host=${1} case ${host} in test-agent) echo "test-agent" ;; + request-replayer) echo "request-replayer" ;; mysql-integration) echo "mysql" ;; elasticsearch*) echo "elasticsearch" ;; zookeeper*) echo "zookeeper" ;; @@ -38,6 +39,13 @@ wait_for_single_service() { return 0 fi ;; + request-replayer) + # /replay is read-only and always returns valid JSON when the PHP server is up + if curl -sf "http://${HOST}:${PORT}/replay" > /dev/null 2>&1; then + echo "request-replayer is ready" + return 0 + fi + ;; mysql) if mysqladmin ping -h"${HOST}" --silent 2>/dev/null; then echo "MySQL is ready" From 674b774d8a4da315e2aa269e965899062ab344eb Mon Sep 17 00:00:00 2001 From: Alexandre Rulleau Date: Mon, 11 May 2026 17:32:52 +0200 Subject: [PATCH 4/8] ci(fix): accept any HTTP response from request-replayer health probe curl -sf fails on 4xx, which would falsely report not-ready if /replay returns 4xx when no dump exists yet (the natural startup state). Drop -f so any HTTP response proves php -S is up and executing index.php; connection failure remains the only signal of an unhealthy service. --- .gitlab/wait-for-service-ready.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.gitlab/wait-for-service-ready.sh b/.gitlab/wait-for-service-ready.sh index 0f22493bfd..2359920632 100755 --- a/.gitlab/wait-for-service-ready.sh +++ b/.gitlab/wait-for-service-ready.sh @@ -40,8 +40,9 @@ wait_for_single_service() { fi ;; request-replayer) - # /replay is read-only and always returns valid JSON when the PHP server is up - if curl -sf "http://${HOST}:${PORT}/replay" > /dev/null 2>&1; then + # Any HTTP response (2xx/3xx/4xx/5xx) proves php -S is up and executing index.php. + # /replay may return 4xx when no data has been dumped yet, so do not use -f. + if curl -s -o /dev/null "http://${HOST}:${PORT}/replay"; then echo "request-replayer is ready" return 0 fi From ef06ce73347b6865c385a52aea204f44d80596dd Mon Sep 17 00:00:00 2001 From: Alexandre Rulleau Date: Tue, 12 May 2026 13:51:05 +0200 Subject: [PATCH 5/8] ci: raise --show-slow threshold from 10s to 30s Filters the SLOW TEST SUMMARY to tests that meaningfully approach the per-test 300s timeout, trimming noise from the long tail. --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 2e2c9cbb5a..a3758cf921 100644 --- a/Makefile +++ b/Makefile @@ -212,7 +212,7 @@ test_extension_ci: $(SO_FILE) $(TEST_FILES) $(TEST_STUB_FILES) $(BUILD_DIR)/run- \ export TEST_PHP_JUNIT=$(JUNIT_RESULTS_DIR)/valgrind-extension-test.xml; \ export TEST_PHP_OUTPUT=$(JUNIT_RESULTS_DIR)/valgrind-run-tests.out; \ - SHOW_SLOW=$$(php -r 'echo PHP_VERSION_ID >= 70200 ? "--show-slow 10000" : "";'); \ + SHOW_SLOW=$$(php -r 'echo PHP_VERSION_ID >= 70200 ? "--show-slow 30000" : "";'); \ DD_SPAWN_WORKER_STABLE_TRAMPOLINE=1 $(ALL_TEST_ENV_OVERRIDE) $(RUN_TESTS_CMD) $$SHOW_SLOW -d extension=$(SO_FILE) -m -s $$TEST_PHP_OUTPUT $(BUILD_DIR)/$(TESTS) && ! grep -e '^LEAKED TEST SUMMARY' $$TEST_PHP_OUTPUT; \ ) From dced1f49fb7e3d86b0e44fdd7d27d8b736a09e3a Mon Sep 17 00:00:00 2001 From: Alexandre Rulleau Date: Fri, 15 May 2026 12:20:00 +0200 Subject: [PATCH 6/8] test(telemetry): accept integrations bundled in app-started payload On PHP 8.4 the test sandbox integration registers early enough that the worker bundles it into the app-started payload's `integrations` field (see libdd-telemetry build_app_started), so no separate app-integrations-change event is emitted and the existing assertion loop times out. Accept both delivery paths: the dedicated app-integrations-change event, and the integrations array embedded in app-started. The expected output shape is identical because Rust Integration struct field order matches. --- tests/ext/telemetry/integration.phpt | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/ext/telemetry/integration.phpt b/tests/ext/telemetry/integration.phpt index 8e74be2293..b5ee5aea92 100644 --- a/tests/ext/telemetry/integration.phpt +++ b/tests/ext/telemetry/integration.phpt @@ -60,6 +60,13 @@ namespace var_dump($json["payload"]); break 3; } + // The integrations may also be bundled into the app-started payload + // depending on registration timing (see libdd-telemetry build_app_started). + if ($json["request_type"] == "app-started" + && !empty($json["payload"]["integrations"])) { + var_dump(["integrations" => $json["payload"]["integrations"]]); + break 3; + } } } } From 1c5e65c9f1507c9d526fdcde9427ae5bce558507 Mon Sep 17 00:00:00 2001 From: Alexandre Rulleau Date: Fri, 15 May 2026 12:19:09 +0200 Subject: [PATCH 7/8] fix(test): poll for dynamic config in dynamic_config_update.phpt to fix flaky PHP 8.0 valgrind The test guarded a sleep(20) with `if (ini_get(...) != 0.5)`, but the wrapped sleep() blocks SIGVTALRM during the call. Under valgrind on PHP 8.0, the heavier signal traffic can cut the sleep short before the sidecar fetches the config from the request-replayer and propagates it to INI globals, leaving every dynamic value at its default. Replace the single sleep() with a 100ms polling loop bounded at 20s total. Each usleep() iteration goes through the same SIGVTALRM-blocking wrapper, which calls ddtrace_check_for_new_config_now() on unblock under valgrind, giving the test repeated, well-defined opportunities to apply the config while still finishing early on the happy path. --- tests/ext/remote_config/dynamic_config_update.phpt | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/ext/remote_config/dynamic_config_update.phpt b/tests/ext/remote_config/dynamic_config_update.phpt index 48be3ca6ad..ed7902f5db 100644 --- a/tests/ext/remote_config/dynamic_config_update.phpt +++ b/tests/ext/remote_config/dynamic_config_update.phpt @@ -50,8 +50,14 @@ put_dynamic_config_file([ // submit span data \DDTrace\start_span(); -if (ini_get("datadog.trace.sample_rate") != 0.5) { - sleep(20); // signal interrupts interrupt the sleep(). +// Poll until the dynamic config is applied. The wrapped sleep() blocks +// SIGVTALRM during the call, so a single sleep(20) can be cut short by +// unrelated signals (notably under valgrind, where signal traffic is heavier). +// Looping ensures we keep waiting up to ~20s total for the sidecar to fetch +// from the request-replayer and propagate the config to INI globals. +$deadline = microtime(true) + 20; +while (ini_get("datadog.trace.sample_rate") != 0.5 && microtime(true) < $deadline) { + usleep(100000); } var_dump(ini_get("datadog.trace.sample_rate")); From d26b1623c8ffb6f2d7b50f9fa98f801ab34a6024 Mon Sep 17 00:00:00 2001 From: Alexandre Rulleau Date: Fri, 15 May 2026 12:31:53 +0200 Subject: [PATCH 8/8] fix(test): use await_agent_info to deflake dd_trace_agent_env.phpt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous warmup was a no-op: $start = microtime(true); ... usleep(floor(microtime(true) - $start) * 100000); floor() of a sub-second float yields 0, so usleep(0) did nothing. Also, the sleep happened *after* the span had already been sampled with an empty env, so even a real delay wouldn't have helped. Replace it with dd_trace_internal_fn('await_agent_info') called before opening the span — same pattern that client_side_stats*.phpt already uses to gate on the sidecar having read /info from the agent. --- tests/ext/request-replayer/dd_trace_agent_env.phpt | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/tests/ext/request-replayer/dd_trace_agent_env.phpt b/tests/ext/request-replayer/dd_trace_agent_env.phpt index 50de644e70..036d09f8f3 100644 --- a/tests/ext/request-replayer/dd_trace_agent_env.phpt +++ b/tests/ext/request-replayer/dd_trace_agent_env.phpt @@ -30,19 +30,10 @@ datadog.trace.agent_test_session_token=dd_trace_agent_env --FILE-- waitForDataAndReplay(); -usleep(floor(microtime(true) - $start) * 100000); - \DDTrace\close_span(); var_dump($span->env);