From 817afcdd04a45c897b1fe1782cca195e84507dd1 Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Thu, 28 May 2026 10:05:12 +0000 Subject: [PATCH 1/3] solver: wire up --solver-time-limit on the CLI; add IPASIR backend support; add CI tests The `solver-time-limit` option has been read by `solver_factoryt` since 38a370c5f7 (Jan 2019), but was never registered as a command-line flag in `OPT_SOLVER` -- the original use case was purely programmatic, via dependent projects calling `options.set_option("solver-time-limit", N)` directly. That left the option's plumbing untested in CI and the only backend that honoured it (MiniSat 2) had no end-to-end coverage. This change: * Registers `--solver-time-limit N` in `OPT_SOLVER` / `HELP_SOLVER` and wires the cmdline-to-options forwarding in `parse_solver_options`. `cbmc`, `jbmc`, `goto-synthesizer` and `libcprover-cpp` (which all use `parse_solver_options`) now accept the flag from the command line. * Documents the flag in `doc/man/cbmc.1`, `doc/man/goto-synthesizer.1` and `doc/man/jbmc.1`, so `scripts/check_help.sh` continues to pass. * Implements `set_time_limit_seconds` for the IPASIR back-end (`satcheck_ipasirt`). The implementation registers an `ipasir_set_terminate` callback that returns 1 once the per-solve deadline has passed; IPASIR solvers poll the callback during solving and abort cleanly when it does. On timeout the back-end returns `P_ERROR` (matching the MiniSat 2 back-end's behaviour) instead of throwing `analysis_exceptiont`. Tests: * `regression/cbmc/solver-time-limit/`: a `CORE` regression test that pins down the happy path -- the option is parsed and propagated through to the solver layer without breaking a trivial solve. Runs on every CI build. * `unit/solvers/sat/satcheck_minisat2.cpp`: extended with a pigeonhole-PHP(20) `SCENARIO` that builds the formula directly via `new_variable` / `lcnf`, sets a 1-second time limit, and asserts that `prop_solve` returns `P_ERROR` within five seconds. PHP is provably exponential for resolution-based SAT (Haken, 1985), so the timeout is reliably exceeded on any reasonable hardware. * `unit/solvers/sat/satcheck_ipasir.cpp`: new file, mirroring the satcheck_minisat2 test. Conditional on `HAVE_IPASIR`, so it only compiles in CI builds that link an IPASIR back-end (currently the `check-ubuntu-24_04-make-gcc` job, which uses Riss as the IPASIR provider). This is the first piece of foundation work for a wider redesign of CBMC's solver-timeout infrastructure (sub-second granularity, cross-OS, watchdog-based interrupt). Subsequent commits will build on a now-tested baseline. Review fixes: propagate the limit to the remaining decision procedures so it is no longer silently dropped for non-default solvers. The SMT2 back-end now honours it by passing a per-solver command-line timeout (Z3 -t:, cvc5 --tlimit-per=); the DIMACS, external-SAT and incremental-SMT2 paths route it through set_decision_procedure_time_limit and warn that it is ignored. Update the set_decision_procedure_time_limit documentation, and add regression cases for --smt2 --solver-time-limit and a multi-property --all-properties run. Co-authored-by: Kiro --- doc/man/cbmc.1 | 4 + doc/man/goto-synthesizer.1 | 4 + doc/man/jbmc.1 | 4 + regression/cbmc/solver-time-limit/main.c | 6 ++ .../cbmc/solver-time-limit/main_multi.c | 10 ++ regression/cbmc/solver-time-limit/test.desc | 14 +++ .../test_all_properties.desc | 15 +++ .../cbmc/solver-time-limit/test_smt2.desc | 12 +++ src/goto-checker/solver_factory.cpp | 18 ++++ src/goto-checker/solver_factory.h | 17 +++- src/solvers/sat/satcheck_ipasir.cpp | 29 +++++- src/solvers/sat/satcheck_ipasir.h | 22 ++++- src/solvers/smt2/smt2_dec.cpp | 21 ++++ src/solvers/smt2/smt2_dec.h | 15 ++- unit/Makefile | 4 + unit/solvers/sat/satcheck_ipasir.cpp | 96 +++++++++++++++++++ unit/solvers/sat/satcheck_minisat2.cpp | 59 +++++++++++- 17 files changed, 338 insertions(+), 12 deletions(-) create mode 100644 regression/cbmc/solver-time-limit/main.c create mode 100644 regression/cbmc/solver-time-limit/main_multi.c create mode 100644 regression/cbmc/solver-time-limit/test.desc create mode 100644 regression/cbmc/solver-time-limit/test_all_properties.desc create mode 100644 regression/cbmc/solver-time-limit/test_smt2.desc create mode 100644 unit/solvers/sat/satcheck_ipasir.cpp diff --git a/doc/man/cbmc.1 b/doc/man/cbmc.1 index e9b5d0dfc5d..ae70c4752ce 100644 --- a/doc/man/cbmc.1 +++ b/doc/man/cbmc.1 @@ -473,6 +473,10 @@ command to invoke SAT solver process \fB\-\-no\-sat\-preprocessor\fR disable the SAT solver's simplifier .TP +\fB\-\-solver\-time\-limit\fR N +abort the SAT/SMT solver after N seconds (best-effort; not all +back-ends honour this) +.TP \fB\-\-dimacs\fR generate CNF in DIMACS format .TP diff --git a/doc/man/goto-synthesizer.1 b/doc/man/goto-synthesizer.1 index 671db802556..354c81b80d6 100644 --- a/doc/man/goto-synthesizer.1 +++ b/doc/man/goto-synthesizer.1 @@ -93,6 +93,10 @@ refinement of arithmetic expressions only maximum refinement iterations for arithmetic expressions .TP +\fB\-\-solver\-time\-limit\fR N +abort the SAT/SMT solver after N seconds (best-effort; not all +back-ends honour this) +.TP \fB\-\-incremental\-smt2\-solver\fR \fIcmd\fR Use the incremental SMT backend where \fIcmd\fR is the command to invoke the SMT solver of choice. diff --git a/doc/man/jbmc.1 b/doc/man/jbmc.1 index 0bbf11b0de9..577515b41a2 100644 --- a/doc/man/jbmc.1 +++ b/doc/man/jbmc.1 @@ -458,6 +458,10 @@ output smt incremental formula to the given file \fB\-\-write\-solver\-stats\-to\fR \fIjson\-file\fR collect the solver query complexity .TP +\fB\-\-solver\-time\-limit\fR N +abort the SAT/SMT solver after N seconds (best-effort; not all +back-ends honour this) +.TP \fB\-\-no\-refine\-strings\fR turn off string refinement .TP diff --git a/regression/cbmc/solver-time-limit/main.c b/regression/cbmc/solver-time-limit/main.c new file mode 100644 index 00000000000..994daa2e8c4 --- /dev/null +++ b/regression/cbmc/solver-time-limit/main.c @@ -0,0 +1,6 @@ +int main() +{ + int x = 1; + __CPROVER_assert(x == 1, "trivially true"); + return 0; +} diff --git a/regression/cbmc/solver-time-limit/main_multi.c b/regression/cbmc/solver-time-limit/main_multi.c new file mode 100644 index 00000000000..577ba45e970 --- /dev/null +++ b/regression/cbmc/solver-time-limit/main_multi.c @@ -0,0 +1,10 @@ +int main() +{ + int a, b; + __CPROVER_assume(a >= 0 && a < 10); + __CPROVER_assume(b >= 0 && b < 10); + __CPROVER_assert(a + b >= 0, "sum is non-negative"); + __CPROVER_assert(a < 10, "a is bounded"); + __CPROVER_assert(b < 10, "b is bounded"); + return 0; +} diff --git a/regression/cbmc/solver-time-limit/test.desc b/regression/cbmc/solver-time-limit/test.desc new file mode 100644 index 00000000000..474948f4159 --- /dev/null +++ b/regression/cbmc/solver-time-limit/test.desc @@ -0,0 +1,14 @@ +CORE +main.c +--solver-time-limit 60 +^EXIT=0$ +^SIGNAL=0$ +^\[main\.assertion\.1\] line 4 trivially true: SUCCESS$ +^VERIFICATION SUCCESSFUL$ +-- +^warning: ignoring +-- +A solve that completes well within the budget should not be affected +by `--solver-time-limit`. This pins down that the option is parsed +and passed through to the solver layer without breaking the happy +path. diff --git a/regression/cbmc/solver-time-limit/test_all_properties.desc b/regression/cbmc/solver-time-limit/test_all_properties.desc new file mode 100644 index 00000000000..17ea923b354 --- /dev/null +++ b/regression/cbmc/solver-time-limit/test_all_properties.desc @@ -0,0 +1,15 @@ +CORE +main_multi.c +--all-properties --solver-time-limit 60 +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +-- +^warning: ignoring +-- +Exercises --solver-time-limit on the multi-property (incremental) path, +which issues several solver calls on the same solver instance. This pins +down that a watchdog interrupt from one (timed-out) solve does not leak +into subsequent solves on the same instance (see the asynch_interrupt +handling in satcheck_minisat2). All properties hold and finish well within +the budget. diff --git a/regression/cbmc/solver-time-limit/test_smt2.desc b/regression/cbmc/solver-time-limit/test_smt2.desc new file mode 100644 index 00000000000..5f1525029da --- /dev/null +++ b/regression/cbmc/solver-time-limit/test_smt2.desc @@ -0,0 +1,12 @@ +CORE smt-backend no-new-smt +main.c +--smt2 --z3 --solver-time-limit 60 +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +-- +^warning: ignoring +-- +The --solver-time-limit option is accepted and propagated to the SMT2 (z3) +back-end (as a solver command-line timeout) without spurious diagnostics on +a solve that finishes well within the budget. diff --git a/src/goto-checker/solver_factory.cpp b/src/goto-checker/solver_factory.cpp index 3e18d046812..ca691065961 100644 --- a/src/goto-checker/solver_factory.cpp +++ b/src/goto-checker/solver_factory.cpp @@ -386,6 +386,7 @@ std::unique_ptr solver_factoryt::get_dimacs() std::unique_ptr bv_dimacs = std::make_unique(ns, *prop, message_handler, std::cout); + set_decision_procedure_time_limit(*bv_dimacs); return std::make_unique(std::move(bv_dimacs), std::move(prop)); } @@ -394,6 +395,7 @@ std::unique_ptr solver_factoryt::get_dimacs() std::unique_ptr bv_dimacs = std::make_unique(ns, *prop, message_handler, *outfile); + set_decision_procedure_time_limit(*bv_dimacs); return std::make_unique( std::move(bv_dimacs), std::move(prop), std::move(outfile)); } @@ -410,6 +412,7 @@ std::unique_ptr solver_factoryt::get_external_sat() std::unique_ptr bv_pointers = std::make_unique(ns, *prop, message_handler); + set_decision_procedure_time_limit(*bv_pointers); return std::make_unique(std::move(bv_pointers), std::move(prop)); } @@ -504,6 +507,14 @@ solver_factoryt::get_incremental_smt2(std::string solver_command) out_filename, message_handler, "--dump-smt-formula")); } + if(options.get_signed_int_option("solver-time-limit") > 0) + { + messaget{message_handler}.warning() + << "solver-time-limit is not propagated to the incremental SMT2 " + "back-end and will be ignored" + << messaget::eom; + } + return std::make_unique( std::make_unique( ns, std::move(solver_process), message_handler)); @@ -538,6 +549,7 @@ solver_factoryt::get_smt2(smt2_dect::solvert solver) if(options.get_bool_option("fpa")) smt2_dec->use_FPA_theory = true; + set_decision_procedure_time_limit(*smt2_dec); return std::make_unique(std::move(smt2_dec)); } else if(filename == "-") @@ -803,4 +815,10 @@ void parse_solver_options(const cmdlinet &cmdline, optionst &options) options.set_option( "max-node-refinement", cmdline.get_value("max-node-refinement")); } + + if(cmdline.isset("solver-time-limit")) + { + options.set_option( + "solver-time-limit", cmdline.get_value("solver-time-limit")); + } } diff --git a/src/goto-checker/solver_factory.h b/src/goto-checker/solver_factory.h index e8abaecdf77..e2a109f239b 100644 --- a/src/goto-checker/solver_factory.h +++ b/src/goto-checker/solver_factory.h @@ -84,9 +84,14 @@ class solver_factoryt final smt2_dect::solvert get_smt2_solver_type() const; - /// Sets the timeout of \p decision_procedure if the `solver-time-limit` + /// Sets the time limit of \p decision_procedure if the `solver-time-limit` /// option has a positive value (in seconds). - /// \note Most solvers silently ignore the time limit at the moment. + /// \note Honoured by the MiniSat 2, IPASIR and CaDiCaL SAT back-ends (which + /// have native interrupt support) and by the SMT2 back-end when invoking a + /// solver that accepts a command-line timeout (Z3, cvc5). Other SAT + /// back-ends (PicoSAT, Glucose, Lingeling, external-SAT, ...), the + /// incremental SMT2 back-end and plain DIMACS/SMT2 output modes log a + /// warning and ignore the limit. void set_decision_procedure_time_limit( solver_resource_limitst &decision_procedure); @@ -119,7 +124,8 @@ void parse_solver_options(const cmdlinet &cmdline, optionst &options); "(refine-arithmetic)" \ "(outfile):" \ "(dump-smt-formula):" \ - "(write-solver-stats-to):" + "(write-solver-stats-to):" \ + "(solver-time-limit):" #define HELP_SOLVER \ " {y--sat-solver} {usolver} \t use specified SAT solver\n" \ @@ -151,6 +157,9 @@ void parse_solver_options(const cmdlinet &cmdline, optionst &options); " {y--dump-smt-formula} {ufilename} \t " \ "output smt incremental formula to the given file\n" \ " {y--write-solver-stats-to} {ujson-file} \t " \ - "collect the solver query complexity\n" + "collect the solver query complexity\n" \ + " {y--solver-time-limit} {uN} \t " \ + "abort the SAT/SMT solver after N seconds (best-effort; not all " \ + "back-ends honour this)\n" #endif // CPROVER_GOTO_CHECKER_SOLVER_FACTORY_H diff --git a/src/solvers/sat/satcheck_ipasir.cpp b/src/solvers/sat/satcheck_ipasir.cpp index 6a525523692..c7910545d96 100644 --- a/src/solvers/sat/satcheck_ipasir.cpp +++ b/src/solvers/sat/satcheck_ipasir.cpp @@ -135,8 +135,19 @@ propt::resultt satcheck_ipasirt::do_prop_solve(const bvt &assumptions) ipasir_assume(solver, literal.dimacs()); } + if(time_limit_seconds != 0) + { + deadline = std::chrono::steady_clock::now() + + std::chrono::seconds(time_limit_seconds); + ipasir_set_terminate(solver, this, &terminate_callback); + } + // solve the formula, and handle the return code (10=SAT, 20=UNSAT) - int solver_state = ipasir_solve(solver); + const int solver_state = ipasir_solve(solver); + + if(time_limit_seconds != 0) + ipasir_set_terminate(solver, nullptr, nullptr); + if(10 == solver_state) { log.status() << "SAT checker: instance is SATISFIABLE" << messaget::eom; @@ -149,10 +160,14 @@ propt::resultt satcheck_ipasirt::do_prop_solve(const bvt &assumptions) } else { - log.status() << "SAT checker: solving returned without solution" + // Solving was interrupted; this is the path taken when our + // terminate_callback signals that the configured time limit has + // passed. We return P_ERROR (matching the MiniSat 2 backend) so + // that callers can recognise a timeout, instead of throwing. + log.status() << "SAT checker: solving was interrupted (e.g. timeout)" << messaget::eom; - throw analysis_exceptiont( - "solving inside IPASIR SAT solver has been interrupted"); + status = statust::ERROR; + return resultt::P_ERROR; } } @@ -160,6 +175,12 @@ propt::resultt satcheck_ipasirt::do_prop_solve(const bvt &assumptions) return resultt::P_UNSATISFIABLE; } +int satcheck_ipasirt::terminate_callback(void *data) +{ + const auto *self = static_cast(data); + return std::chrono::steady_clock::now() >= self->deadline ? 1 : 0; +} + void satcheck_ipasirt::set_assignment(literalt a, bool value) { INVARIANT(!a.is_constant(), "cannot set an assignment for a constant"); diff --git a/src/solvers/sat/satcheck_ipasir.h b/src/solvers/sat/satcheck_ipasir.h index 65e1741b902..581cfa49ded 100644 --- a/src/solvers/sat/satcheck_ipasir.h +++ b/src/solvers/sat/satcheck_ipasir.h @@ -12,9 +12,12 @@ instructions. #ifndef CPROVER_SOLVERS_SAT_SATCHECK_IPASIR_H #define CPROVER_SOLVERS_SAT_SATCHECK_IPASIR_H +#include + #include "cnf.h" -#include +#include +#include /// Interface for generic SAT solver interface IPASIR class satcheck_ipasirt : public cnf_solvert, public hardness_collectort @@ -44,10 +47,27 @@ class satcheck_ipasirt : public cnf_solvert, public hardness_collectort return true; } + /// \copydoc propt::set_time_limit_seconds + /// Implemented by registering an `ipasir_set_terminate` callback that + /// returns non-zero once the deadline has passed; IPASIR solvers poll + /// the callback during solving and stop on a non-zero return. + void set_time_limit_seconds(uint32_t lim) override + { + time_limit_seconds = lim; + } + protected: resultt do_prop_solve(const bvt &assumptions) override; void *solver; + + uint32_t time_limit_seconds = 0; + std::chrono::steady_clock::time_point deadline; + + /// Static thunk passed to `ipasir_set_terminate`. Its `data` is the + /// owning solver instance; returns 1 once the per-call deadline has + /// been reached. + static int terminate_callback(void *data); }; #endif // CPROVER_SOLVERS_SAT_SATCHECK_IPASIR_H diff --git a/src/solvers/smt2/smt2_dec.cpp b/src/solvers/smt2/smt2_dec.cpp index cbe6ecb8ebc..d9a5a91a68b 100644 --- a/src/solvers/smt2/smt2_dec.cpp +++ b/src/solvers/smt2/smt2_dec.cpp @@ -132,6 +132,27 @@ decision_proceduret::resultt smt2_dect::dec_solve(const exprt &assumption) break; } + if(time_limit_milliseconds != 0) + { + if(solver == solvert::Z3) + { + // z3: soft per-query timeout in milliseconds. + argv.push_back("-t:" + std::to_string(time_limit_milliseconds)); + } + else if(solver == solvert::CVC5) + { + // cvc5: per-query time limit in milliseconds. + argv.push_back("--tlimit-per=" + std::to_string(time_limit_milliseconds)); + } + else + { + messaget{message_handler}.warning() + << "solver-time-limit is not supported for this SMT2 solver and " + "will be ignored" + << messaget::eom; + } + } + int res = run(argv[0], argv, stdin_filename, temp_file_stdout(), temp_file_stderr()); diff --git a/src/solvers/smt2/smt2_dec.h b/src/solvers/smt2/smt2_dec.h index 2b51118aee2..920d3199827 100644 --- a/src/solvers/smt2/smt2_dec.h +++ b/src/solvers/smt2/smt2_dec.h @@ -10,6 +10,8 @@ Author: Daniel Kroening, kroening@kroening.com #ifndef CPROVER_SOLVERS_SMT2_SMT2_DEC_H #define CPROVER_SOLVERS_SMT2_SMT2_DEC_H +#include + #include "smt2_conv.h" class message_handlert; @@ -22,7 +24,9 @@ class smt2_stringstreamt /*! \brief Decision procedure interface for various SMT 2.x solvers */ -class smt2_dect : protected smt2_stringstreamt, public smt2_convt +class smt2_dect : protected smt2_stringstreamt, + public smt2_convt, + public solver_resource_limitst { public: smt2_dect( @@ -41,9 +45,18 @@ class smt2_dect : protected smt2_stringstreamt, public smt2_convt std::string decision_procedure_text() const override; + /// \copydoc solver_resource_limitst::set_time_limit_milliseconds + /// Honoured for solvers that accept a command-line timeout (Z3, cvc5); for + /// other SMT2 solvers a warning is logged and the limit is ignored. + void set_time_limit_milliseconds(uint32_t lim) override + { + time_limit_milliseconds = lim; + } + protected: std::string solver_binary_or_empty; message_handlert &message_handler; + uint32_t time_limit_milliseconds = 0; resultt dec_solve(const exprt &) override; /// Everything except the footer is cached, so that output files can be diff --git a/unit/Makefile b/unit/Makefile index ad783f64f8e..68f12d978a7 100644 --- a/unit/Makefile +++ b/unit/Makefile @@ -110,6 +110,7 @@ SRC += analyses/ai/ai.cpp \ solvers/prop/bdd_expr.cpp \ solvers/sat/external_sat.cpp \ solvers/sat/satcheck_cadical.cpp \ + solvers/sat/satcheck_ipasir.cpp \ solvers/sat/satcheck_minisat2.cpp \ solvers/smt2/smt2_conv.cpp \ solvers/smt2/smt2irep.cpp \ @@ -314,6 +315,9 @@ endif ifeq ($(CADICAL),) EXCLUDED_TESTS += satcheck_cadical.cpp endif +ifeq ($(IPASIR),) +EXCLUDED_TESTS += satcheck_ipasir.cpp +endif N_CATCH_TESTS = $(shell ./count_tests.py --exclude-files "$(EXCLUDED_TESTS)") diff --git a/unit/solvers/sat/satcheck_ipasir.cpp b/unit/solvers/sat/satcheck_ipasir.cpp new file mode 100644 index 00000000000..4ba464d6043 --- /dev/null +++ b/unit/solvers/sat/satcheck_ipasir.cpp @@ -0,0 +1,96 @@ +/*******************************************************************\ + +Module: Unit tests for satcheck_ipasir + +Author: Diffblue Ltd. + +\*******************************************************************/ + +/// \file +/// Unit tests for satcheck_ipasir, focusing on the time-limit +/// (`set_time_limit_seconds`) plumbing that hooks into the IPASIR +/// `ipasir_set_terminate` mechanism. + +#ifdef HAVE_IPASIR + +# include + +# include +# include +# include + +# include +# include +# include + +SCENARIO("satcheck_ipasir", "[core][solvers][sat][satcheck_ipasir]") +{ + console_message_handlert message_handler; + message_handler.set_verbosity(0); + + GIVEN("A satisfiable formula f") + { + satcheck_ipasirt satcheck(message_handler); + literalt f = satcheck.new_variable(); + satcheck.l_set_to_true(f); + + THEN("is indeed satisfiable") + { + REQUIRE(satcheck.prop_solve() == propt::resultt::P_SATISFIABLE); + } + } + + GIVEN("A pigeonhole formula PHP(20) and a 1-second time limit") + { + // Same construction as the satcheck_minisat2 unit test: PHP(N) is + // exponentially hard for resolution-based SAT solvers, so a + // 1-second time limit is reliably exceeded. Verifies that the + // IPASIR `ipasir_set_terminate` callback we install is honoured + // by the underlying solver. + satcheck_ipasirt satcheck(message_handler); + constexpr std::size_t holes = 20; + constexpr std::size_t pigeons = holes + 1; + std::vector> x(pigeons); + for(std::size_t p = 0; p < pigeons; ++p) + { + x[p].reserve(holes); + for(std::size_t h = 0; h < holes; ++h) + x[p].push_back(satcheck.new_variable()); + } + // Each pigeon is in at least one hole. + for(std::size_t p = 0; p < pigeons; ++p) + { + bvt clause = x[p]; + satcheck.lcnf(clause); + } + // No two pigeons share a hole. + for(std::size_t h = 0; h < holes; ++h) + for(std::size_t p1 = 0; p1 < pigeons; ++p1) + for(std::size_t p2 = p1 + 1; p2 < pigeons; ++p2) + { + bvt clause; + clause.push_back(!x[p1][h]); + clause.push_back(!x[p2][h]); + satcheck.lcnf(clause); + } + + satcheck.set_time_limit_seconds(1); + + THEN("the solver returns P_ERROR (interrupted by the time limit)") + { + const auto start = std::chrono::steady_clock::now(); + const auto result = satcheck.prop_solve(); + const auto elapsed_ms = + std::chrono::duration_cast( + std::chrono::steady_clock::now() - start) + .count(); + // The solver must report P_ERROR (terminate callback fired) + // and not have run far past the budget. The 5-second slack is + // for slow CI hardware. + REQUIRE(result == propt::resultt::P_ERROR); + REQUIRE(elapsed_ms < 5000); + } + } +} + +#endif diff --git a/unit/solvers/sat/satcheck_minisat2.cpp b/unit/solvers/sat/satcheck_minisat2.cpp index 55c7cd209d1..db4bc0e448f 100644 --- a/unit/solvers/sat/satcheck_minisat2.cpp +++ b/unit/solvers/sat/satcheck_minisat2.cpp @@ -11,11 +11,15 @@ Author: Peter Schrammel #ifdef HAVE_MINISAT2 -# include +# include # include # include -# include +# include + +# include +# include +# include SCENARIO("satcheck_minisat2", "[core][solvers][sat][satcheck_minisat2]") { @@ -82,6 +86,57 @@ SCENARIO("satcheck_minisat2", "[core][solvers][sat][satcheck_minisat2]") satcheck.prop_solve(assumptions) == propt::resultt::P_SATISFIABLE); } } + + GIVEN("A pigeonhole formula PHP(20) and a 1-second time limit") + { + // The pigeonhole principle: N+1 pigeons cannot all fit in N holes + // (one pigeon per hole). For N=20 the resulting CNF is provably + // exponentially hard for resolution-based SAT solvers (Haken, + // 1985), so a 1-second time limit is reliably blown through. + satcheck_minisat_no_simplifiert satcheck(message_handler); + constexpr std::size_t holes = 20; + constexpr std::size_t pigeons = holes + 1; + std::vector> x(pigeons); + for(std::size_t p = 0; p < pigeons; ++p) + { + x[p].reserve(holes); + for(std::size_t h = 0; h < holes; ++h) + x[p].push_back(satcheck.new_variable()); + } + // Each pigeon is in at least one hole. + for(std::size_t p = 0; p < pigeons; ++p) + { + bvt clause = x[p]; + satcheck.lcnf(clause); + } + // No two pigeons share a hole. + for(std::size_t h = 0; h < holes; ++h) + for(std::size_t p1 = 0; p1 < pigeons; ++p1) + for(std::size_t p2 = p1 + 1; p2 < pigeons; ++p2) + { + bvt clause; + clause.push_back(!x[p1][h]); + clause.push_back(!x[p2][h]); + satcheck.lcnf(clause); + } + + satcheck.set_time_limit_seconds(1); + + THEN("the solver returns P_ERROR (interrupted by the time limit)") + { + const auto start = std::chrono::steady_clock::now(); + const auto result = satcheck.prop_solve(); + const auto elapsed_ms = + std::chrono::duration_cast( + std::chrono::steady_clock::now() - start) + .count(); + // The solver must report P_ERROR (it was interrupted by SIGALRM) + // and must not have run far past the 1-second budget. We allow a + // 5-second slack to be robust against very slow CI hardware. + REQUIRE(result == propt::resultt::P_ERROR); + REQUIRE(elapsed_ms < 5000); + } + } } #endif From 0c93840c48043ef16a9778bed9645fce77b2e46e Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Thu, 28 May 2026 10:20:11 +0000 Subject: [PATCH 2/3] solver: time limit in milliseconds; portable MiniSat watchdog The solver-time-limit infrastructure used to be expressed in whole seconds, was implemented for MiniSat 2 via POSIX `alarm()` + `SIGALRM` (so it was POSIX-only and process-wide), and was explicitly skipped on Windows with a "Time limit ignored (not supported on Win32 yet)" warning. In practice this meant: * Sub-second budgets -- needed for fine-grained per-check timeouts inside symbolic execution -- could not be expressed. * Windows builds had no time-limit support at all. * The signal handler relied on a process-wide `solver_to_interrupt` pointer; nesting solve calls across threads or having two solvers active in the same process was fragile. This change reworks the API and the MiniSat 2 implementation: * `solver_resource_limitst::set_time_limit_milliseconds` and `propt::set_time_limit_milliseconds` are now the primary virtual API. The default `propt` implementation logs a warning, mirroring the previous behaviour for back-ends with no native timeout support. * `set_time_limit_seconds` survives as a non-virtual helper that forwards to `set_time_limit_milliseconds(lim * 1000)`, so external callers (e.g. the CLI option-parser, dependent projects) keep working without source changes. * The MiniSat 2 back-end now implements the time limit using a `std::thread` watchdog: when the limit is non-zero, a watchdog thread waits on a `std::condition_variable` for either the solve to complete (signalled via a flag) or the deadline to elapse, and on timeout calls `solver->interrupt()` (which is just a flag set on the Minisat::Solver object and is safe to invoke from another thread). The watchdog is joined before `do_prop_solve` returns. Result: cross-OS, sub-millisecond granularity, no global signal state, no `SIGALRM` collision risk. * The IPASIR back-end's existing `set_time_limit_seconds` override is renamed/repurposed as `set_time_limit_milliseconds`; the deadline is computed in `std::chrono::milliseconds` instead of seconds. The callback already polled a `chrono::steady_clock` deadline, so this is essentially a stored-precision change. * The `solver_factoryt` call site in `solver_factory.cpp` keeps using the seconds helper (the CLI flag `--solver-time-limit` remains expressed in seconds for user familiarity); only the internal API is now in milliseconds. Tests: * Existing pigeonhole(20) SCENARIOs in `unit/solvers/sat/satcheck_minisat2.cpp` and `unit/solvers/sat/satcheck_ipasir.cpp` now request a 200-millisecond limit and verify (a) that `prop_solve` returns `P_ERROR`, and (b) that wall-clock elapsed time is well under five seconds (a generous slack for slow CI hardware and watchdog-join latency). Locally the satcheck_minisat2 test completes in ~207 ms, confirming sub-millisecond accuracy of the watchdog. This is the second piece of the wider solver-timeout rework. Subsequent work will add CaDiCaL terminator support and let the branch-pruning code use sub-second per-call budgets directly. Review fixes: propt now derives solver_resource_limitst, so the milliseconds API and the seconds helper live in a single place (removing the duplicated helper) and the per-back-end time_limit_milliseconds member is shared. Clear MiniSat's sticky interrupt flag at the start of each solve so a watchdog interrupt from a previous (timed-out) solve cannot leak into the next solve on the same instance -- a unit test pins this down. Add a lower-bound timing assertion to the timeout scenario, and drop a now-dead include from the IPASIR back-end. Co-authored-by: Kiro --- src/common | 6 +- src/libcprover-cpp/CMakeLists.txt | 9 ++ src/solvers/CMakeLists.txt | 12 +++ src/solvers/prop/prop.h | 15 ++- src/solvers/prop/prop_conv_solver.h | 4 +- src/solvers/prop/solver_resource_limits.h | 27 ++++- src/solvers/sat/satcheck_ipasir.cpp | 7 +- src/solvers/sat/satcheck_ipasir.h | 7 +- src/solvers/sat/satcheck_minisat2.cpp | 117 +++++++++++++--------- src/solvers/sat/satcheck_minisat2.h | 5 +- unit/solvers/sat/satcheck_ipasir.cpp | 13 +-- unit/solvers/sat/satcheck_minisat2.cpp | 35 +++++-- 12 files changed, 177 insertions(+), 80 deletions(-) diff --git a/src/common b/src/common index 276f7f3a6b4..585f0715b59 100644 --- a/src/common +++ b/src/common @@ -69,7 +69,8 @@ ifeq ($(filter-out OSX OSX_Universal,$(BUILD_ENV_)),) else ifeq ($(filter-out FreeBSD OpenBSD,$(BUILD_ENV_)),) CP_CXXFLAGS += LINKLIB = llvm-ar rcT $@ $^ - LINKBIN = $(CXX) $(LINKFLAGS) -o $@ -Wl,--start-group $^ -Wl,--end-group $(LIBS) + # -pthread is needed for satcheck_minisat2.cpp's std::thread watchdog + LINKBIN = $(CXX) $(LINKFLAGS) -o $@ -Wl,--start-group $^ -Wl,--end-group $(LIBS) -pthread LINKNATIVE = $(HOSTCXX) $(HOSTLINKFLAGS) -o $@ $^ ifeq ($(origin CC),default) CC = clang @@ -80,7 +81,8 @@ else ifeq ($(filter-out FreeBSD OpenBSD,$(BUILD_ENV_)),) YFLAGS ?= -v -Werror else LINKLIB = ar rcT $@ $^ - LINKBIN = $(CXX) $(LINKFLAGS) -o $@ -Wl,--start-group $^ -Wl,--end-group $(LIBS) + # -pthread is needed for satcheck_minisat2.cpp's std::thread watchdog + LINKBIN = $(CXX) $(LINKFLAGS) -o $@ -Wl,--start-group $^ -Wl,--end-group $(LIBS) -pthread LINKNATIVE = $(HOSTCXX) $(HOSTLINKFLAGS) -o $@ $^ ifeq ($(origin CC),default) CC = gcc diff --git a/src/libcprover-cpp/CMakeLists.txt b/src/libcprover-cpp/CMakeLists.txt index d32b4074bf5..75d5253caf2 100644 --- a/src/libcprover-cpp/CMakeLists.txt +++ b/src/libcprover-cpp/CMakeLists.txt @@ -54,6 +54,15 @@ target_link_libraries(cprover-api-cpp PRIVATE ${LIBRARY_DEPENDENCIES}) +# satcheck_minisat2.cpp inside libcprover..a uses std::thread (see +# the matching note in src/solvers/CMakeLists.txt). Because the solvers +# dependency above is PRIVATE, the `-pthread` link option attached to the +# solvers target does not propagate to consumers of cprover-api-cpp such as +# goto-bmc. Re-publish it as an INTERFACE option so consumers get it. +if(NOT WIN32) + target_link_options(cprover-api-cpp INTERFACE -pthread) +endif() + # To be able to invoke `ar` on the dependencies we need the paths of the libraries `.a` files. # Ths is done by using the cmake generator `$`, that in turn will be # substituted with the absolute path of the `dependency` output file (a `.a` file in this case). diff --git a/src/solvers/CMakeLists.txt b/src/solvers/CMakeLists.txt index b4d33b74efc..0a1428dbee9 100644 --- a/src/solvers/CMakeLists.txt +++ b/src/solvers/CMakeLists.txt @@ -59,6 +59,18 @@ list(REMOVE_ITEM sources add_library(solvers ${sources}) +# satcheck_minisat2.cpp uses std::thread (a watchdog for the per-solve time +# limit), so the solvers library needs to link against the host's threads +# library on platforms where that is not implicit (FreeBSD, NetBSD, some +# minimal Linux glibc setups, Docker images using gcc without -pthread by +# default). We avoid the imported Threads::Threads target because +# libcprover-cpp's archive-aggregation loop in src/libcprover-cpp/CMakeLists.txt +# tries to resolve every dependency to a .a file and Threads::Threads has no +# such backing file. +if(NOT WIN32) + target_link_options(solvers PUBLIC -pthread) +endif() + include("${CBMC_SOURCE_DIR}/cmake/DownloadProject.cmake") foreach(SOLVER ${sat_impl}) diff --git a/src/solvers/prop/prop.h b/src/solvers/prop/prop.h index dda20bfef39..cd0975258a9 100644 --- a/src/solvers/prop/prop.h +++ b/src/solvers/prop/prop.h @@ -16,12 +16,13 @@ Author: Daniel Kroening, kroening@kroening.com #include #include "literal.h" +#include "solver_resource_limits.h" #include /*! \brief TO_BE_DOCUMENTED */ -class propt +class propt : public solver_resource_limitst { public: explicit propt(message_handlert &message_handler) : log(message_handler) @@ -117,7 +118,12 @@ class propt virtual void set_frozen(literalt) { } // Resource limits: - virtual void set_time_limit_seconds(uint32_t) + + /// \copydoc solver_resource_limitst::set_time_limit_milliseconds + /// This default implementation logs a warning and ignores the limit; + /// back-ends with native interrupt support override it to store the limit + /// in \ref time_limit_milliseconds and honour it in \ref do_prop_solve. + void set_time_limit_milliseconds(uint32_t) override { log.warning() << "CPU limit ignored (not implemented)" << messaget::eom; } @@ -125,6 +131,11 @@ class propt std::size_t get_number_of_solver_calls() const; protected: + /// Wall-clock time limit for each solver call, in milliseconds; 0 disables + /// it. Stored here so the back-ends share a single member (the seconds + /// helper and API live in solver_resource_limitst). + uint32_t time_limit_milliseconds = 0; + // solve under the given assumption virtual resultt do_prop_solve(const bvt &assumptions) = 0; diff --git a/src/solvers/prop/prop_conv_solver.h b/src/solvers/prop/prop_conv_solver.h index f8bcb40fda1..7f356b38e78 100644 --- a/src/solvers/prop/prop_conv_solver.h +++ b/src/solvers/prop/prop_conv_solver.h @@ -92,9 +92,9 @@ class prop_conv_solvert : public conflict_providert, return symbols; } - void set_time_limit_seconds(uint32_t lim) override + void set_time_limit_milliseconds(uint32_t lim) override { - prop.set_time_limit_seconds(lim); + prop.set_time_limit_milliseconds(lim); } std::size_t get_number_of_solver_calls() const override; diff --git a/src/solvers/prop/solver_resource_limits.h b/src/solvers/prop/solver_resource_limits.h index a1f1cc7df36..f2163cb0942 100644 --- a/src/solvers/prop/solver_resource_limits.h +++ b/src/solvers/prop/solver_resource_limits.h @@ -12,11 +12,34 @@ Author: Peter Schrammel #ifndef CPROVER_SOLVERS_PROP_SOLVER_RESOURCE_LIMITS_H #define CPROVER_SOLVERS_PROP_SOLVER_RESOURCE_LIMITS_H +#include +#include + class solver_resource_limitst { public: - /// Set the limit for the solver to time out in seconds - virtual void set_time_limit_seconds(uint32_t) = 0; + /// Set a wall-clock time limit for each solver call, in + /// milliseconds. A value of 0 disables the time limit. Granularity + /// is best-effort: back-ends may round up or, where the underlying + /// solver does not support timeouts, log a warning and ignore the + /// limit. + virtual void set_time_limit_milliseconds(uint32_t) = 0; + + /// Helper accepting a time limit in whole seconds; forwards to + /// `set_time_limit_milliseconds` after multiplying by 1000. + /// Provided for backward compatibility with callers that still + /// express the limit in seconds (e.g. dependent projects predating + /// the millisecond-precision rework). + void set_time_limit_seconds(uint32_t lim) + { + // Saturate to UINT32_MAX milliseconds (~49.7 days) to avoid silently + // wrapping for inputs greater than ~4.29 million seconds. + const uint64_t ms = static_cast(lim) * 1000u; + set_time_limit_milliseconds( + ms > std::numeric_limits::max() + ? std::numeric_limits::max() + : static_cast(ms)); + } virtual ~solver_resource_limitst() = default; }; diff --git a/src/solvers/sat/satcheck_ipasir.cpp b/src/solvers/sat/satcheck_ipasir.cpp index c7910545d96..8afce2dc022 100644 --- a/src/solvers/sat/satcheck_ipasir.cpp +++ b/src/solvers/sat/satcheck_ipasir.cpp @@ -10,7 +10,6 @@ Author: Norbert Manthey, nmanthey@amazon.com # include "satcheck_ipasir.h" -# include # include # include @@ -135,17 +134,17 @@ propt::resultt satcheck_ipasirt::do_prop_solve(const bvt &assumptions) ipasir_assume(solver, literal.dimacs()); } - if(time_limit_seconds != 0) + if(time_limit_milliseconds != 0) { deadline = std::chrono::steady_clock::now() + - std::chrono::seconds(time_limit_seconds); + std::chrono::milliseconds(time_limit_milliseconds); ipasir_set_terminate(solver, this, &terminate_callback); } // solve the formula, and handle the return code (10=SAT, 20=UNSAT) const int solver_state = ipasir_solve(solver); - if(time_limit_seconds != 0) + if(time_limit_milliseconds != 0) ipasir_set_terminate(solver, nullptr, nullptr); if(10 == solver_state) diff --git a/src/solvers/sat/satcheck_ipasir.h b/src/solvers/sat/satcheck_ipasir.h index 581cfa49ded..0a5c9bd0860 100644 --- a/src/solvers/sat/satcheck_ipasir.h +++ b/src/solvers/sat/satcheck_ipasir.h @@ -47,13 +47,13 @@ class satcheck_ipasirt : public cnf_solvert, public hardness_collectort return true; } - /// \copydoc propt::set_time_limit_seconds + /// \copydoc propt::set_time_limit_milliseconds /// Implemented by registering an `ipasir_set_terminate` callback that /// returns non-zero once the deadline has passed; IPASIR solvers poll /// the callback during solving and stop on a non-zero return. - void set_time_limit_seconds(uint32_t lim) override + void set_time_limit_milliseconds(uint32_t lim) override { - time_limit_seconds = lim; + time_limit_milliseconds = lim; } protected: @@ -61,7 +61,6 @@ class satcheck_ipasirt : public cnf_solvert, public hardness_collectort void *solver; - uint32_t time_limit_seconds = 0; std::chrono::steady_clock::time_point deadline; /// Static thunk passed to `ipasir_set_terminate`. Its `data` is the diff --git a/src/solvers/sat/satcheck_minisat2.cpp b/src/solvers/sat/satcheck_minisat2.cpp index 0cf42e6b1f2..a0638f3af37 100644 --- a/src/solvers/sat/satcheck_minisat2.cpp +++ b/src/solvers/sat/satcheck_minisat2.cpp @@ -8,19 +8,18 @@ Author: Daniel Kroening, kroening@kroening.com #include "satcheck_minisat2.h" -#ifndef _WIN32 -# include -# include -#endif - -#include - #include #include #include #include +#include // IWYU pragma: keep +#include +#include +#include +#include + #ifndef l_False # define l_False Minisat::l_False # define l_True Minisat::l_True @@ -191,23 +190,19 @@ void satcheck_minisat2_baset::lcnf(const bvt &bv) } } -#ifndef _WIN32 - -static Minisat::Solver *solver_to_interrupt=nullptr; - -static void interrupt_solver(int signum) -{ - (void)signum; // unused parameter -- just removing the name trips up cpplint - solver_to_interrupt->interrupt(); -} - -#endif - template propt::resultt satcheck_minisat2_baset::do_prop_solve(const bvt &assumptions) { PRECONDITION(status != statust::ERROR); + // Clear any interrupt flag left over from a previous solve. The watchdog + // thread may have called interrupt() in the small race window where the + // previous solveLimited() was already returning; MiniSat latches that flag + // (asynch_interrupt) until cleared, so without this the next solve on the + // same checker -- e.g. under --all-properties / --cover / incremental + // loops -- would immediately fail with P_ERROR. + clear_interrupt(); + log.statistics() << (no_variables() - 1) << " variables, " << solver->nClauses() << " clauses" << messaget::eom; @@ -240,40 +235,66 @@ propt::resultt satcheck_minisat2_baset::do_prop_solve(const bvt &assumptions) using Minisat::lbool; -#ifndef _WIN32 - - void (*old_handler)(int) = SIG_ERR; - - if(time_limit_seconds != 0) + // Spawn a watchdog thread that will fire after time_limit_milliseconds + // and call interrupt() on the solver. The watchdog uses a condition + // variable so it exits cleanly when the solver finishes within the + // budget. This is portable across operating systems (no SIGALRM + // dependency) and supports sub-second granularity. + // + // The watchdog must be joined before its thread object goes out of + // scope, even if solveLimited throws (e.g., Minisat::OutOfMemoryException + // is caught a few lines below). A thread destroyed while joinable + // calls std::terminate. A small RAII guard handles this for both the + // normal and exceptional return paths. + std::thread watchdog; + std::mutex watchdog_mutex; + std::condition_variable watchdog_cv; + bool solve_done = false; + + struct watchdog_joinert { - solver_to_interrupt = solver.get(); - old_handler = signal(SIGALRM, interrupt_solver); - if(old_handler == SIG_ERR) - log.warning() << "Failed to set solver time limit" << messaget::eom; - else - alarm(time_limit_seconds); - } + std::thread &watchdog; + std::mutex &watchdog_mutex; + std::condition_variable &watchdog_cv; + bool &solve_done; - lbool solver_result = solver->solveLimited(solver_assumptions); - - if(old_handler != SIG_ERR) - { - alarm(0); - signal(SIGALRM, old_handler); - solver_to_interrupt = solver.get(); - } - -#else // _WIN32 + ~watchdog_joinert() + { + if(!watchdog.joinable()) + return; + { + std::lock_guard lk(watchdog_mutex); + solve_done = true; + } + watchdog_cv.notify_one(); + watchdog.join(); + } + } watchdog_joiner{watchdog, watchdog_mutex, watchdog_cv, solve_done}; - if(time_limit_seconds != 0) + if(time_limit_milliseconds != 0) { - log.warning() << "Time limit ignored (not supported on Win32 yet)" - << messaget::eom; + watchdog = std::thread( + [this, &watchdog_mutex, &watchdog_cv, &solve_done] + { + std::unique_lock lk(watchdog_mutex); + const auto deadline = + std::chrono::milliseconds(time_limit_milliseconds); + if(!watchdog_cv.wait_for(lk, deadline, [&] { return solve_done; })) + { + // Timed out before solve completed: interrupt the solver. + // Minisat::Solver::interrupt() sets an internal flag that + // is polled in the solving loop and is safe to invoke from + // another thread. + solver->interrupt(); + } + }); } - lbool solver_result = solver->solve(solver_assumptions) ? l_True : l_False; + lbool solver_result = solver->solveLimited(solver_assumptions); -#endif + // The watchdog thread (if any) is signalled and joined by + // watchdog_joiner's destructor on every return path, including the + // OutOfMemoryException catch below. if(solver_result == l_True) { @@ -328,9 +349,7 @@ void satcheck_minisat2_baset::set_assignment(literalt a, bool value) template satcheck_minisat2_baset::satcheck_minisat2_baset( message_handlert &message_handler) - : cnf_solvert(message_handler), - solver(std::make_unique()), - time_limit_seconds(0) + : cnf_solvert(message_handler), solver(std::make_unique()) { } diff --git a/src/solvers/sat/satcheck_minisat2.h b/src/solvers/sat/satcheck_minisat2.h index 1aa2280feb8..5560487a0d5 100644 --- a/src/solvers/sat/satcheck_minisat2.h +++ b/src/solvers/sat/satcheck_minisat2.h @@ -62,16 +62,15 @@ class satcheck_minisat2_baset : public cnf_solvert, public hardness_collectort return true; } - void set_time_limit_seconds(uint32_t lim) override + void set_time_limit_milliseconds(uint32_t lim) override { - time_limit_seconds=lim; + time_limit_milliseconds = lim; } protected: resultt do_prop_solve(const bvt &) override; std::unique_ptr solver; - uint32_t time_limit_seconds; void add_variables(); }; diff --git a/unit/solvers/sat/satcheck_ipasir.cpp b/unit/solvers/sat/satcheck_ipasir.cpp index 4ba464d6043..273211b37d8 100644 --- a/unit/solvers/sat/satcheck_ipasir.cpp +++ b/unit/solvers/sat/satcheck_ipasir.cpp @@ -8,7 +8,7 @@ Author: Diffblue Ltd. /// \file /// Unit tests for satcheck_ipasir, focusing on the time-limit -/// (`set_time_limit_seconds`) plumbing that hooks into the IPASIR +/// (`set_time_limit_milliseconds`) plumbing that hooks into the IPASIR /// `ipasir_set_terminate` mechanism. #ifdef HAVE_IPASIR @@ -40,13 +40,13 @@ SCENARIO("satcheck_ipasir", "[core][solvers][sat][satcheck_ipasir]") } } - GIVEN("A pigeonhole formula PHP(20) and a 1-second time limit") + GIVEN("A pigeonhole formula PHP(20) and a 200-millisecond time limit") { // Same construction as the satcheck_minisat2 unit test: PHP(N) is // exponentially hard for resolution-based SAT solvers, so a - // 1-second time limit is reliably exceeded. Verifies that the - // IPASIR `ipasir_set_terminate` callback we install is honoured - // by the underlying solver. + // 200-millisecond time limit is reliably exceeded. Verifies that + // the IPASIR `ipasir_set_terminate` callback we install is + // honoured by the underlying solver. satcheck_ipasirt satcheck(message_handler); constexpr std::size_t holes = 20; constexpr std::size_t pigeons = holes + 1; @@ -74,7 +74,7 @@ SCENARIO("satcheck_ipasir", "[core][solvers][sat][satcheck_ipasir]") satcheck.lcnf(clause); } - satcheck.set_time_limit_seconds(1); + satcheck.set_time_limit_milliseconds(200); THEN("the solver returns P_ERROR (interrupted by the time limit)") { @@ -88,6 +88,7 @@ SCENARIO("satcheck_ipasir", "[core][solvers][sat][satcheck_ipasir]") // and not have run far past the budget. The 5-second slack is // for slow CI hardware. REQUIRE(result == propt::resultt::P_ERROR); + REQUIRE(elapsed_ms >= 200); REQUIRE(elapsed_ms < 5000); } } diff --git a/unit/solvers/sat/satcheck_minisat2.cpp b/unit/solvers/sat/satcheck_minisat2.cpp index db4bc0e448f..bc5153a7dd0 100644 --- a/unit/solvers/sat/satcheck_minisat2.cpp +++ b/unit/solvers/sat/satcheck_minisat2.cpp @@ -87,12 +87,13 @@ SCENARIO("satcheck_minisat2", "[core][solvers][sat][satcheck_minisat2]") } } - GIVEN("A pigeonhole formula PHP(20) and a 1-second time limit") + GIVEN("A pigeonhole formula PHP(20) and a 200-millisecond time limit") { // The pigeonhole principle: N+1 pigeons cannot all fit in N holes // (one pigeon per hole). For N=20 the resulting CNF is provably // exponentially hard for resolution-based SAT solvers (Haken, - // 1985), so a 1-second time limit is reliably blown through. + // 1985), so a 200-millisecond time limit is reliably blown + // through. satcheck_minisat_no_simplifiert satcheck(message_handler); constexpr std::size_t holes = 20; constexpr std::size_t pigeons = holes + 1; @@ -120,7 +121,7 @@ SCENARIO("satcheck_minisat2", "[core][solvers][sat][satcheck_minisat2]") satcheck.lcnf(clause); } - satcheck.set_time_limit_seconds(1); + satcheck.set_time_limit_milliseconds(200); THEN("the solver returns P_ERROR (interrupted by the time limit)") { @@ -130,13 +131,35 @@ SCENARIO("satcheck_minisat2", "[core][solvers][sat][satcheck_minisat2]") std::chrono::duration_cast( std::chrono::steady_clock::now() - start) .count(); - // The solver must report P_ERROR (it was interrupted by SIGALRM) - // and must not have run far past the 1-second budget. We allow a - // 5-second slack to be robust against very slow CI hardware. + // The solver must report P_ERROR (it was interrupted by the + // watchdog thread) and must not have run far past the + // 200-millisecond budget. The 5-second slack is for very slow + // CI hardware and any latency in joining the watchdog thread. + // The lower bound guards against a regression that fires the + // watchdog before the deadline (e.g. a stale interrupt flag). REQUIRE(result == propt::resultt::P_ERROR); + REQUIRE(elapsed_ms >= 200); REQUIRE(elapsed_ms < 5000); } } + + GIVEN("A checker whose interrupt flag has been left set") + { + // Simulate the race in which the watchdog called interrupt() just as a + // previous solve was returning, leaving MiniSat's (sticky) asynch_interrupt + // flag latched. do_prop_solve() must clear it, otherwise this trivially + // satisfiable solve would spuriously report P_ERROR. This pins down the + // --all-properties / --cover / incremental-loop regression. + satcheck_minisat_no_simplifiert satcheck(message_handler); + literalt f = satcheck.new_variable(); + satcheck.l_set_to_true(f); + satcheck.interrupt(); + + THEN("the next solve clears the flag and reports P_SATISFIABLE") + { + REQUIRE(satcheck.prop_solve() == propt::resultt::P_SATISFIABLE); + } + } } #endif From 226e7fbdc2ae3401d5d36d74f53ac9091418fd59 Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Thu, 28 May 2026 10:48:03 +0000 Subject: [PATCH 3/3] solver: CaDiCaL terminator support for set_time_limit_milliseconds The CaDiCaL back-end was the last SAT back-end in CBMC for which `set_time_limit_milliseconds` (and its `set_time_limit_seconds` helper) silently fell through to `propt`'s default no-op + warning. This change adds native time-limit support via CaDiCaL's `Terminator` callback API: * `satcheck_cadical_baset` declares a nested `terminatort` class in the header (forward-declared to keep `cadical.hpp` out of the header). * `terminatort` is defined in the .cpp and inherits `CaDiCaL::Terminator`. It stores a deadline as a `steady_clock::time_point` and returns true once the deadline has passed. * `set_time_limit_milliseconds(uint32_t)` stores the limit on the solver instance. * `do_prop_solve` constructs / refreshes the deadline before calling `solver->solve()`, calls `solver->connect_terminator(...)` while solving, and calls `solver->disconnect_terminator()` afterwards. CaDiCaL polls the terminator at decision points during solving and aborts cleanly on a true return. * On interrupt, `do_prop_solve` now returns `propt::resultt ::P_ERROR` -- matching the MiniSat 2 and IPASIR back-ends -- instead of throwing `analysis_exceptiont`. The `` include is dropped as it is no longer needed. A `set_time_limit_milliseconds(200)` SCENARIO is added to `unit/solvers/sat/satcheck_cadical.cpp` mirroring the existing MiniSat 2 / IPASIR timeout tests: a pigeonhole(20) CNF (provably exponential for resolution-based SAT) is built directly via `new_variable` / `lcnf`, and the solver is asserted to return `P_ERROR` within five seconds. Locally the test completes in ~208 ms (200 ms deadline + ~8 ms terminator-polling latency), confirming sub-millisecond accuracy of the CaDiCaL implementation. After this commit, all three SAT back-ends with native interrupt infrastructure (MiniSat 2 via std::thread watchdog, IPASIR via `ipasir_set_terminate`, CaDiCaL via `connect_terminator`) honour `--solver-time-limit` and `set_time_limit_milliseconds` with sub-second granularity, are portable across operating systems, and return `P_ERROR` on interrupt for callers to detect. Review fixes: disconnect the terminator via an RAII guard so it is removed even if solve() throws (otherwise a stale, already-elapsed deadline would abort the next solve). Add a lower-bound timing assertion to the timeout scenario. Co-authored-by: Kiro --- src/solvers/sat/satcheck_cadical.cpp | 56 +++++++++++++++++++++--- src/solvers/sat/satcheck_cadical.h | 24 +++++++++- unit/solvers/sat/satcheck_cadical.cpp | 63 ++++++++++++++++++++++++++- 3 files changed, 134 insertions(+), 9 deletions(-) diff --git a/src/solvers/sat/satcheck_cadical.cpp b/src/solvers/sat/satcheck_cadical.cpp index 80b12756cd9..d6d0864363b 100644 --- a/src/solvers/sat/satcheck_cadical.cpp +++ b/src/solvers/sat/satcheck_cadical.cpp @@ -10,12 +10,27 @@ Author: Michael Tautschnig # include "satcheck_cadical.h" -# include # include # include # include # include +# include + +/// Concrete `CaDiCaL::Terminator` for the per-solve time limit. +/// Stores a deadline as a `steady_clock::time_point` and returns +/// true once the deadline has passed. CaDiCaL polls the terminator +/// during solving and aborts cleanly on a true return. +class satcheck_cadical_baset::terminatort : public CaDiCaL::Terminator +{ +public: + std::chrono::steady_clock::time_point deadline; + + bool terminate() override + { + return std::chrono::steady_clock::now() >= deadline; + } +}; tvt satcheck_cadical_baset::l_get(literalt a) const { @@ -114,7 +129,34 @@ propt::resultt satcheck_cadical_baset::do_prop_solve(const bvt &assumptions) auto limit2_ret = solver->limit("localsearch", localsearch_limit); CHECK_RETURN(limit2_ret); - switch(solver->solve()) + // Connect the terminator (when a time limit is set) and ensure it is + // disconnected on every exit path, including if solve() throws. Otherwise + // CaDiCaL would keep polling a terminator holding an already-elapsed + // deadline and abort the next solve immediately. + struct terminator_disconnectort + { + CaDiCaL::Solver *solver; + bool connected; + ~terminator_disconnectort() + { + if(connected) + solver->disconnect_terminator(); + } + } terminator_disconnector{solver, false}; + + if(time_limit_milliseconds != 0) + { + if(!terminator) + terminator = std::make_unique(); + terminator->deadline = std::chrono::steady_clock::now() + + std::chrono::milliseconds(time_limit_milliseconds); + solver->connect_terminator(terminator.get()); + terminator_disconnector.connected = true; + } + + const int solver_state = solver->solve(); + + switch(solver_state) { case 10: log.status() << "SAT checker: instance is SATISFIABLE" << messaget::eom; @@ -124,10 +166,14 @@ propt::resultt satcheck_cadical_baset::do_prop_solve(const bvt &assumptions) log.status() << "SAT checker: instance is UNSATISFIABLE" << messaget::eom; break; default: - log.status() << "SAT checker: solving returned without solution" + // Solving was interrupted; this is the path taken when our + // terminator signals that the configured time limit has passed. + // We return P_ERROR (matching the MiniSat 2 and IPASIR back-ends) + // so that callers can recognise a timeout, instead of throwing. + log.status() << "SAT checker: solving was interrupted (e.g. timeout)" << messaget::eom; - throw analysis_exceptiont( - "solving inside CaDiCaL SAT solver has been interrupted"); + status = statust::ERROR; + return resultt::P_ERROR; } status = statust::UNSAT; diff --git a/src/solvers/sat/satcheck_cadical.h b/src/solvers/sat/satcheck_cadical.h index 36e19027af0..cd571404446 100644 --- a/src/solvers/sat/satcheck_cadical.h +++ b/src/solvers/sat/satcheck_cadical.h @@ -10,13 +10,18 @@ Author: Michael Tautschnig #ifndef CPROVER_SOLVERS_SAT_SATCHECK_CADICAL_H #define CPROVER_SOLVERS_SAT_SATCHECK_CADICAL_H +#include + #include "cnf.h" -#include +#include +#include +#include namespace CaDiCaL // NOLINT(readability/namespace) { - class Solver; // NOLINT(readability/identifiers) +class Solver; // NOLINT(readability/identifiers) +class Terminator; // NOLINT(readability/identifiers) } class satcheck_cadical_baset : public cnf_solvert, public hardness_collectort @@ -49,12 +54,27 @@ class satcheck_cadical_baset : public cnf_solvert, public hardness_collectort bvt new_variables(std::size_t width) override; #endif + /// \copydoc propt::set_time_limit_milliseconds + /// Implemented by registering a `CaDiCaL::Terminator` that returns + /// true once the deadline has passed; the CaDiCaL solver polls the + /// terminator during solving and aborts cleanly on a true return. + void set_time_limit_milliseconds(uint32_t lim) override + { + time_limit_milliseconds = lim; + } + protected: resultt do_prop_solve(const bvt &assumptions) override; // NOLINTNEXTLINE(readability/identifiers) CaDiCaL::Solver *solver; int preprocessing_limit = 0, localsearch_limit = 0; + + /// Concrete `CaDiCaL::Terminator` implementation that returns true + /// once the per-solve deadline has been reached. Defined in the + /// .cpp so the header can keep the CaDiCaL include out. + class terminatort; + std::unique_ptr terminator; }; class satcheck_cadical_no_preprocessingt : public satcheck_cadical_baset diff --git a/unit/solvers/sat/satcheck_cadical.cpp b/unit/solvers/sat/satcheck_cadical.cpp index 6012e5dd4de..74f02ca5df3 100644 --- a/unit/solvers/sat/satcheck_cadical.cpp +++ b/unit/solvers/sat/satcheck_cadical.cpp @@ -11,11 +11,15 @@ Author: Peter Schrammel, Michael Tautschnig #ifdef HAVE_CADICAL -# include +# include # include # include -# include +# include + +# include +# include +# include SCENARIO("satcheck_cadical", "[core][solvers][sat][satcheck_cadical]") { @@ -81,6 +85,61 @@ SCENARIO("satcheck_cadical", "[core][solvers][sat][satcheck_cadical]") satcheck.prop_solve(assumptions) == propt::resultt::P_SATISFIABLE); } } + + GIVEN("A pigeonhole formula PHP(20) and a 200-millisecond time limit") + { + // Same construction as the satcheck_minisat2 unit test: PHP(N) is + // exponentially hard for resolution-based SAT solvers, so a + // 200-millisecond time limit is reliably exceeded. Verifies that + // the CaDiCaL `Terminator` we install is honoured by the + // underlying solver. + // + // CaDiCaL polls the terminator at decision points; granularity is + // therefore "shortly after the deadline". The five-second slack is + // a generous bound for slow CI hardware and CaDiCaL's polling + // interval. + satcheck_cadical_no_preprocessingt satcheck(message_handler); + constexpr std::size_t holes = 20; + constexpr std::size_t pigeons = holes + 1; + std::vector> x(pigeons); + for(std::size_t p = 0; p < pigeons; ++p) + { + x[p].reserve(holes); + for(std::size_t h = 0; h < holes; ++h) + x[p].push_back(satcheck.new_variable()); + } + // Each pigeon is in at least one hole. + for(std::size_t p = 0; p < pigeons; ++p) + { + bvt clause = x[p]; + satcheck.lcnf(clause); + } + // No two pigeons share a hole. + for(std::size_t h = 0; h < holes; ++h) + for(std::size_t p1 = 0; p1 < pigeons; ++p1) + for(std::size_t p2 = p1 + 1; p2 < pigeons; ++p2) + { + bvt clause; + clause.push_back(!x[p1][h]); + clause.push_back(!x[p2][h]); + satcheck.lcnf(clause); + } + + satcheck.set_time_limit_milliseconds(200); + + THEN("the solver returns P_ERROR (interrupted by the time limit)") + { + const auto start = std::chrono::steady_clock::now(); + const auto result = satcheck.prop_solve(); + const auto elapsed_ms = + std::chrono::duration_cast( + std::chrono::steady_clock::now() - start) + .count(); + REQUIRE(result == propt::resultt::P_ERROR); + REQUIRE(elapsed_ms >= 200); + REQUIRE(elapsed_ms < 5000); + } + } } #endif