From ee9f71e18922cf7fdb7926236b729bc6d0e32ca6 Mon Sep 17 00:00:00 2001 From: hanabi1224 Date: Mon, 9 Feb 2026 18:47:45 +0800 Subject: [PATCH 1/5] fix(tool): add retry to api compare tests --- src/tool/subcommands/api_cmd.rs | 6 +++++ .../subcommands/api_cmd/api_compare_tests.rs | 22 ++++++++++++++----- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/src/tool/subcommands/api_cmd.rs b/src/tool/subcommands/api_cmd.rs index c837355e91d6..be9d42cd9fb8 100644 --- a/src/tool/subcommands/api_cmd.rs +++ b/src/tool/subcommands/api_cmd.rs @@ -152,6 +152,10 @@ pub enum ApiCommands { /// Report detail level: full (default), failure-only, or summary #[arg(long, value_enum, default_value = "full")] report_mode: ReportMode, + + /// Number of retries for each test + #[arg(long, default_value = "2")] + n_retries: usize, }, /// Generates RPC test snapshots from test dump files and a Forest database. /// @@ -295,6 +299,7 @@ impl ApiCommands { test_criteria_overrides, report_dir, report_mode, + n_retries, } => { let forest = Arc::new(rpc::Client::from_url(forest)); let lotus = Arc::new(rpc::Client::from_url(lotus)); @@ -314,6 +319,7 @@ impl ApiCommands { &test_criteria_overrides, report_dir, report_mode, + n_retries, ) .await?; } diff --git a/src/tool/subcommands/api_cmd/api_compare_tests.rs b/src/tool/subcommands/api_cmd/api_compare_tests.rs index 3c26e0f8ee82..7ebbb6a73a26 100644 --- a/src/tool/subcommands/api_cmd/api_compare_tests.rs +++ b/src/tool/subcommands/api_cmd/api_compare_tests.rs @@ -2932,6 +2932,7 @@ pub(super) async fn run_tests( test_criteria_overrides: &[TestCriteriaOverride], report_dir: Option, report_mode: ReportMode, + n_retries: usize, ) -> anyhow::Result<()> { let forest = Into::>::into(forest); let lotus = Into::>::into(lotus); @@ -2991,10 +2992,22 @@ pub(super) async fn run_tests( let permit = semaphore.clone().acquire_owned().await?; let forest = forest.clone(); let lotus = lotus.clone(); + let test_criteria_overrides = test_criteria_overrides.to_vec(); let future = tokio::spawn(async move { - let test_result = test.run(&forest, &lotus).await; - drop(permit); // Release the permit after test execution - (test, test_result) + let mut n_retries_left = n_retries; + let mut backoff_secs = 2; + loop { + let test_result = test.run(&forest, &lotus).await; + let success = evaluate_test_success(&test_result, &test, &test_criteria_overrides); + if success || n_retries_left == 0 { + drop(permit); // Release the permit after test execution + return (success, test, test_result); + } + // Sleep before each retry + tokio::time::sleep(Duration::from_secs(backoff_secs)).await; + n_retries_left = n_retries_left.saturating_sub(1); + backoff_secs = backoff_secs.saturating_mul(2); + } }); futures.push(future); @@ -3005,9 +3018,8 @@ pub(super) async fn run_tests( return Ok(()); } - while let Some(Ok((test, test_result))) = futures.next().await { + while let Some(Ok((success, test, test_result))) = futures.next().await { let method_name = test.request.method_name.clone(); - let success = evaluate_test_success(&test_result, &test, test_criteria_overrides); report_builder.track_test_result( method_name.as_ref(), From 38ab021689ea1b1216ef682804f2821f6890fd81 Mon Sep 17 00:00:00 2001 From: hanabi1224 Date: Mon, 9 Feb 2026 19:06:58 +0800 Subject: [PATCH 2/5] resolve AI comment --- .../subcommands/api_cmd/api_compare_tests.rs | 44 ++++++++++--------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/src/tool/subcommands/api_cmd/api_compare_tests.rs b/src/tool/subcommands/api_cmd/api_compare_tests.rs index 7ebbb6a73a26..f6fd372560a2 100644 --- a/src/tool/subcommands/api_cmd/api_compare_tests.rs +++ b/src/tool/subcommands/api_cmd/api_compare_tests.rs @@ -45,8 +45,6 @@ use chrono::Utc; use cid::Cid; use fil_actors_shared::fvm_ipld_bitfield::BitField; use fil_actors_shared::v10::runtime::DomainSeparationTag; -use futures::stream::FuturesUnordered; -use futures::stream::StreamExt as _; use fvm_ipld_blockstore::Blockstore; use ipld_core::ipld::Ipld; use itertools::Itertools as _; @@ -68,6 +66,7 @@ use std::{ time::Duration, }; use tokio::sync::Semaphore; +use tokio::task::JoinSet; use tracing::debug; const COLLECTION_SAMPLE_SIZE: usize = 5; @@ -2937,7 +2936,7 @@ pub(super) async fn run_tests( let forest = Into::>::into(forest); let lotus = Into::>::into(lotus); let semaphore = Arc::new(Semaphore::new(max_concurrent_requests)); - let mut futures = FuturesUnordered::new(); + let mut tasks = JoinSet::new(); let filter_list = if let Some(filter_file) = &filter_file { FilterList::new_from_file(filter_file)? @@ -2993,7 +2992,7 @@ pub(super) async fn run_tests( let forest = forest.clone(); let lotus = lotus.clone(); let test_criteria_overrides = test_criteria_overrides.to_vec(); - let future = tokio::spawn(async move { + tasks.spawn(async move { let mut n_retries_left = n_retries; let mut backoff_secs = 2; loop { @@ -3009,32 +3008,35 @@ pub(super) async fn run_tests( backoff_secs = backoff_secs.saturating_mul(2); } }); - - futures.push(future); } // If no tests to run after filtering, return early without saving/printing - if futures.is_empty() { + if tasks.is_empty() { return Ok(()); } - while let Some(Ok((success, test, test_result))) = futures.next().await { - let method_name = test.request.method_name.clone(); + while let Some(result) = tasks.join_next().await { + match result { + Ok((success, test, test_result)) => { + let method_name = test.request.method_name.clone(); - report_builder.track_test_result( - method_name.as_ref(), - success, - &test_result, - &test.request.params, - ); + report_builder.track_test_result( + method_name.as_ref(), + success, + &test_result, + &test.request.params, + ); - // Dump test data if configured - if let (Some(dump_dir), Some(test_dump)) = (&dump_dir, &test_result.test_dump) { - dump_test_data(dump_dir, success, test_dump)?; - } + // Dump test data if configured + if let (Some(dump_dir), Some(test_dump)) = (&dump_dir, &test_result.test_dump) { + dump_test_data(dump_dir, success, test_dump)?; + } - if !success && fail_fast { - break; + if !success && fail_fast { + break; + } + } + Err(e) => tracing::warn!("{e}"), } } From 354de22fec1c130ffc43ed76b0d1f202a6d9ee61 Mon Sep 17 00:00:00 2001 From: hanabi1224 Date: Mon, 9 Feb 2026 19:42:16 +0800 Subject: [PATCH 3/5] fix --- src/tool/subcommands/api_cmd/api_compare_tests.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/tool/subcommands/api_cmd/api_compare_tests.rs b/src/tool/subcommands/api_cmd/api_compare_tests.rs index f6fd372560a2..09dffc8ed4af 100644 --- a/src/tool/subcommands/api_cmd/api_compare_tests.rs +++ b/src/tool/subcommands/api_cmd/api_compare_tests.rs @@ -2988,18 +2988,19 @@ pub(super) async fn run_tests( } // Acquire a permit from the semaphore before spawning a test - let permit = semaphore.clone().acquire_owned().await?; + let semaphore = semaphore.clone(); let forest = forest.clone(); let lotus = lotus.clone(); let test_criteria_overrides = test_criteria_overrides.to_vec(); tasks.spawn(async move { let mut n_retries_left = n_retries; let mut backoff_secs = 2; + // Ignore the error since 'An acquire operation can only fail if the semaphore has been closed' + let _permit = semaphore.acquire_owned().await; loop { let test_result = test.run(&forest, &lotus).await; let success = evaluate_test_success(&test_result, &test, &test_criteria_overrides); if success || n_retries_left == 0 { - drop(permit); // Release the permit after test execution return (success, test, test_result); } // Sleep before each retry From 5b9230b7a18a4d202bf875d9befa58db7c36ff14 Mon Sep 17 00:00:00 2001 From: hanabi1224 Date: Tue, 10 Feb 2026 13:54:26 +0800 Subject: [PATCH 4/5] no need to wait for F3 --- scripts/tests/api_compare/.env | 2 +- scripts/tests/api_compare/docker-compose.yml | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/scripts/tests/api_compare/.env b/scripts/tests/api_compare/.env index 884c1cd93dd4..9526b69b1e5c 100644 --- a/scripts/tests/api_compare/.env +++ b/scripts/tests/api_compare/.env @@ -1,6 +1,6 @@ # Note: this should be a `fat` image so that it contains the pre-downloaded filecoin proof parameters FOREST_IMAGE=ghcr.io/chainsafe/forest:edge-fat -LOTUS_IMAGE=filecoin/lotus-all-in-one:v1.34.4-rc1-calibnet +LOTUS_IMAGE=filecoin/lotus-all-in-one:v1.34.4-calibnet FIL_PROOFS_PARAMETER_CACHE=/var/tmp/filecoin-proof-parameters LOTUS_RPC_PORT=1234 FOREST_RPC_PORT=2345 diff --git a/scripts/tests/api_compare/docker-compose.yml b/scripts/tests/api_compare/docker-compose.yml index 6ac3957b9966..758349612a84 100644 --- a/scripts/tests/api_compare/docker-compose.yml +++ b/scripts/tests/api_compare/docker-compose.yml @@ -198,10 +198,6 @@ services: lotus sync wait # After the sync is done, import the wallet for signing blocks. It might be already there, which will return an error. We ignore it. echo $MINER_WORKER_KEY | lotus wallet import || true - # Wait until F3 certificate instance 100 is available - until lotus f3 c get 100; do - sleep 5s; - done api-compare: depends_on: lotus-sync-wait: From 2b2a04609c0f7226ed30281f52b3164b5379d2ed Mon Sep 17 00:00:00 2001 From: hanabi1224 Date: Tue, 10 Feb 2026 14:10:49 +0800 Subject: [PATCH 5/5] Release the semaphore before sleeping --- .../subcommands/api_cmd/api_compare_tests.rs | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/tool/subcommands/api_cmd/api_compare_tests.rs b/src/tool/subcommands/api_cmd/api_compare_tests.rs index 09dffc8ed4af..79fca715e3a8 100644 --- a/src/tool/subcommands/api_cmd/api_compare_tests.rs +++ b/src/tool/subcommands/api_cmd/api_compare_tests.rs @@ -2995,13 +2995,17 @@ pub(super) async fn run_tests( tasks.spawn(async move { let mut n_retries_left = n_retries; let mut backoff_secs = 2; - // Ignore the error since 'An acquire operation can only fail if the semaphore has been closed' - let _permit = semaphore.acquire_owned().await; loop { - let test_result = test.run(&forest, &lotus).await; - let success = evaluate_test_success(&test_result, &test, &test_criteria_overrides); - if success || n_retries_left == 0 { - return (success, test, test_result); + { + // Ignore the error since 'An acquire operation can only fail if the semaphore has been closed' + let _permit = semaphore.acquire().await; + let test_result = test.run(&forest, &lotus).await; + let success = + evaluate_test_success(&test_result, &test, &test_criteria_overrides); + if success || n_retries_left == 0 { + return (success, test, test_result); + } + // Release the semaphore before sleeping } // Sleep before each retry tokio::time::sleep(Duration::from_secs(backoff_secs)).await;