Skip to content

Commit 616eba6

Browse files
authored
fix: while/do-while loop condition reads stale iteration-0 step output (#2662)
* fix: while/do-while loop condition reads stale iteration-0 step output After executing namespaced loop body steps, copy each iteration's results back to the original unprefixed step key so that evaluate_condition() sees the latest values instead of stale iteration-0 data. Fixes #2592 * address review: cross-platform tests, preserve iteration-0 history - Rewrite shell scripts in tests to use Python via script files instead of POSIX syntax, so they pass on Windows CI. - Snapshot iteration-0 nested-step results under a namespaced key (parent:child:0) before the first copy-back overwrite, preserving complete per-iteration history for debugging. * address review: skip copy-back on paused/failed iterations Move the status check before the copy-back so that partial results from paused or failed nested steps (e.g., a gate awaiting input) do not overwrite the unprefixed key. This preserves correct resume behavior. * address review: quote paths in test shell commands Quote both the Python executable and script file paths in the run: commands to handle spaces in paths on Windows. * address review: execute loop body with original IDs Instead of namespacing step IDs for execution and copying results back, execute the loop body with original (unprefixed) step IDs so results naturally land at the right keys. Snapshot previous iteration results to namespaced keys (parent:child:N) for history only. This fixes multi-step loop bodies where step B references step A's output within the same iteration — previously step B would see stale data until the copy-back ran after the entire iteration. * address review: namespaced execution with per-step copy-back Revert to namespaced step IDs for execution (preserving unique log entries and state keys per iteration) but copy each step's result back to the unprefixed key immediately after it completes. This preserves backward compatibility (same namespaced key format, same log IDs) while fixing both the condition evaluation bug and inter-step references within multi-step loop bodies. * address review: alias after status check, add multi-step body test - Move per-step aliasing below the PAUSED/FAILED/ABORTED status check so partial results from incomplete steps are not aliased back to the unprefixed key. - Add test_while_loop_multi_step_body_inter_step_refs to exercise a multi-step loop body where step B reads step A's output within the same iteration, verifying per-step aliasing works correctly. Addresses feedback from @doquanghuy (items 2 & 4) and Copilot review on commit 9d0a222. * address review: stable fallback IDs, expression-based inter-step test - Use enumerate() for stable fallback IDs when loop body steps lack an explicit id (step-0, step-1, etc. instead of always step-0). - Rewrite multi-step body test so step B uses expression substitution ({{ steps.step-a.output.stdout }}) instead of reading the counter file directly, making it a true regression test for per-step aliasing.
1 parent 1bf4a6e commit 616eba6

2 files changed

Lines changed: 284 additions & 15 deletions

File tree

src/specify_cli/workflows/engine.py

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -673,22 +673,29 @@ def _execute_steps(
673673
if not evaluate_condition(condition, context):
674674
break
675675
# Namespace nested step IDs per iteration
676-
iter_steps = []
677-
for ns in result.next_steps:
676+
# so logs and state keys are unique.
677+
# Execute one step at a time and alias each
678+
# result back to the unprefixed key so that
679+
# later steps in the same body and the loop
680+
# condition see the latest values.
681+
for ns_idx, ns in enumerate(result.next_steps):
678682
ns_copy = dict(ns)
679-
if "id" in ns_copy:
680-
ns_copy["id"] = f"{step_id}:{ns_copy['id']}:{_loop_iter + 1}"
681-
iter_steps.append(ns_copy)
682-
self._execute_steps(
683-
iter_steps, context, state, registry,
684-
step_offset=-1,
685-
)
686-
if state.status in (
687-
RunStatus.PAUSED,
688-
RunStatus.FAILED,
689-
RunStatus.ABORTED,
690-
):
691-
return
683+
orig = ns_copy.get("id")
684+
base_id = orig or f"step-{ns_idx}"
685+
ns_copy["id"] = f"{step_id}:{base_id}:{_loop_iter + 1}"
686+
self._execute_steps(
687+
[ns_copy], context, state, registry,
688+
step_offset=-1,
689+
)
690+
if state.status in (
691+
RunStatus.PAUSED,
692+
RunStatus.FAILED,
693+
RunStatus.ABORTED,
694+
):
695+
return
696+
if orig and ns_copy["id"] in context.steps:
697+
context.steps[orig] = context.steps[ns_copy["id"]]
698+
state.step_results[orig] = context.steps[ns_copy["id"]]
692699

693700
# Fan-out: execute nested step template per item with unique IDs
694701
if step_type == "fan-out":

tests/test_workflows.py

Lines changed: 262 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1889,6 +1889,268 @@ def test_validate_workflow_rejects_non_string_default_for_string_type(self):
18891889
errors = validate_workflow(definition)
18901890
assert any("invalid default" in e for e in errors), errors
18911891

1892+
def test_while_loop_condition_reads_latest_iteration(self, project_dir):
1893+
"""Regression: while-loop condition must see updated step output
1894+
from the most recent iteration, not stale iteration-0 data.
1895+
1896+
See https://github.com/github/spec-kit/issues/2592
1897+
"""
1898+
from specify_cli.workflows.engine import WorkflowEngine, WorkflowDefinition
1899+
from specify_cli.workflows.base import RunStatus
1900+
1901+
# Shell step echoes a counter via a file.
1902+
# Condition: exit_code != 0 means "keep looping" — but a non-zero
1903+
# exit code would mark the step FAILED and abort the run, so we
1904+
# use stdout-based comparison instead.
1905+
#
1906+
# Iteration 0: counter=1, echoes "1" → not "done" → loop continues
1907+
# Iteration 1: counter=2, echoes "done" → condition false → stop
1908+
# Without the fix, condition always reads iteration-0 stdout,
1909+
# so the loop runs all max_iterations.
1910+
import sys
1911+
1912+
counter_file = project_dir / ".counter"
1913+
counter_file.write_text("0", encoding="utf-8")
1914+
py = sys.executable
1915+
script_file = project_dir / "_tick.py"
1916+
script_file.write_text(
1917+
f"import pathlib; p = pathlib.Path(r'{counter_file}')\n"
1918+
"n = int(p.read_text()) + 1; p.write_text(str(n))\n"
1919+
"print('done' if n >= 2 else str(n), end='')\n",
1920+
encoding="utf-8",
1921+
)
1922+
1923+
yaml_str = f"""
1924+
schema_version: "1.0"
1925+
workflow:
1926+
id: "while-condition-update"
1927+
name: "While Condition Update"
1928+
version: "1.0.0"
1929+
steps:
1930+
- id: retry-loop
1931+
type: while
1932+
condition: "{{{{ 'done' not in steps.attempt.output.stdout }}}}"
1933+
max_iterations: 5
1934+
steps:
1935+
- id: attempt
1936+
type: shell
1937+
run: '"{py}" "{script_file}"'
1938+
"""
1939+
definition = WorkflowDefinition.from_string(yaml_str)
1940+
engine = WorkflowEngine(project_dir)
1941+
state = engine.execute(definition)
1942+
1943+
assert state.status == RunStatus.COMPLETED
1944+
# The unprefixed key should reflect the latest iteration's result.
1945+
assert state.step_results["attempt"]["output"]["stdout"] == "done"
1946+
# Namespaced iteration-1 result should also exist.
1947+
assert "retry-loop:attempt:1" in state.step_results
1948+
# Counter should be 2 (iteration 0 + iteration 1), not 5.
1949+
assert counter_file.read_text(encoding="utf-8").strip() == "2"
1950+
1951+
def test_do_while_loop_condition_reads_latest_iteration(self, project_dir):
1952+
"""Regression: do-while loop condition must also see updated output.
1953+
1954+
See https://github.com/github/spec-kit/issues/2592
1955+
"""
1956+
from specify_cli.workflows.engine import WorkflowEngine, WorkflowDefinition
1957+
from specify_cli.workflows.base import RunStatus
1958+
1959+
import sys
1960+
1961+
counter_file = project_dir / ".counter"
1962+
counter_file.write_text("0", encoding="utf-8")
1963+
py = sys.executable
1964+
script_file = project_dir / "_tick.py"
1965+
script_file.write_text(
1966+
f"import pathlib; p = pathlib.Path(r'{counter_file}')\n"
1967+
"n = int(p.read_text()) + 1; p.write_text(str(n))\n"
1968+
"print('done' if n >= 2 else str(n), end='')\n",
1969+
encoding="utf-8",
1970+
)
1971+
1972+
yaml_str = f"""
1973+
schema_version: "1.0"
1974+
workflow:
1975+
id: "do-while-condition-update"
1976+
name: "Do While Condition Update"
1977+
version: "1.0.0"
1978+
steps:
1979+
- id: retry-loop
1980+
type: do-while
1981+
condition: "{{{{ 'done' not in steps.attempt.output.stdout }}}}"
1982+
max_iterations: 5
1983+
steps:
1984+
- id: attempt
1985+
type: shell
1986+
run: '"{py}" "{script_file}"'
1987+
"""
1988+
definition = WorkflowDefinition.from_string(yaml_str)
1989+
engine = WorkflowEngine(project_dir)
1990+
state = engine.execute(definition)
1991+
1992+
assert state.status == RunStatus.COMPLETED
1993+
assert state.step_results["attempt"]["output"]["stdout"] == "done"
1994+
assert counter_file.read_text(encoding="utf-8").strip() == "2"
1995+
1996+
def test_while_loop_runs_to_max_when_condition_stays_true(self, project_dir):
1997+
"""While loop must still run to max_iterations when the condition
1998+
never becomes false — copy-back must not break this path.
1999+
2000+
See https://github.com/github/spec-kit/issues/2592
2001+
"""
2002+
from specify_cli.workflows.engine import WorkflowEngine, WorkflowDefinition
2003+
from specify_cli.workflows.base import RunStatus
2004+
2005+
import sys
2006+
2007+
counter_file = project_dir / ".counter"
2008+
counter_file.write_text("0", encoding="utf-8")
2009+
py = sys.executable
2010+
script_file = project_dir / "_tick.py"
2011+
script_file.write_text(
2012+
f"import pathlib; p = pathlib.Path(r'{counter_file}')\n"
2013+
"n = int(p.read_text()) + 1; p.write_text(str(n))\n"
2014+
"print('pending', end='')\n",
2015+
encoding="utf-8",
2016+
)
2017+
2018+
yaml_str = f"""
2019+
schema_version: "1.0"
2020+
workflow:
2021+
id: "while-max-iterations"
2022+
name: "While Max Iterations"
2023+
version: "1.0.0"
2024+
steps:
2025+
- id: retry-loop
2026+
type: while
2027+
condition: "{{{{ 'done' not in steps.tick.output.stdout }}}}"
2028+
max_iterations: 3
2029+
steps:
2030+
- id: tick
2031+
type: shell
2032+
run: '"{py}" "{script_file}"'
2033+
"""
2034+
definition = WorkflowDefinition.from_string(yaml_str)
2035+
engine = WorkflowEngine(project_dir)
2036+
state = engine.execute(definition)
2037+
2038+
assert state.status == RunStatus.COMPLETED
2039+
# All 3 iterations ran (iteration 0 + 2 loop iterations).
2040+
assert counter_file.read_text(encoding="utf-8").strip() == "3"
2041+
# Unprefixed key holds the last iteration's result.
2042+
assert state.step_results["tick"]["output"]["stdout"] == "pending"
2043+
# Namespaced keys for loop iterations exist.
2044+
assert "retry-loop:tick:1" in state.step_results
2045+
assert "retry-loop:tick:2" in state.step_results
2046+
2047+
def test_do_while_loop_runs_to_max_when_condition_stays_true(self, project_dir):
2048+
"""Do-while loop must still run to max_iterations when the condition
2049+
never becomes false.
2050+
2051+
See https://github.com/github/spec-kit/issues/2592
2052+
"""
2053+
from specify_cli.workflows.engine import WorkflowEngine, WorkflowDefinition
2054+
from specify_cli.workflows.base import RunStatus
2055+
2056+
import sys
2057+
2058+
counter_file = project_dir / ".counter"
2059+
counter_file.write_text("0", encoding="utf-8")
2060+
py = sys.executable
2061+
script_file = project_dir / "_tick.py"
2062+
script_file.write_text(
2063+
f"import pathlib; p = pathlib.Path(r'{counter_file}')\n"
2064+
"n = int(p.read_text()) + 1; p.write_text(str(n))\n"
2065+
"print('pending', end='')\n",
2066+
encoding="utf-8",
2067+
)
2068+
2069+
yaml_str = f"""
2070+
schema_version: "1.0"
2071+
workflow:
2072+
id: "do-while-max-iterations"
2073+
name: "Do While Max Iterations"
2074+
version: "1.0.0"
2075+
steps:
2076+
- id: retry-loop
2077+
type: do-while
2078+
condition: "{{{{ 'done' not in steps.tick.output.stdout }}}}"
2079+
max_iterations: 3
2080+
steps:
2081+
- id: tick
2082+
type: shell
2083+
run: '"{py}" "{script_file}"'
2084+
"""
2085+
definition = WorkflowDefinition.from_string(yaml_str)
2086+
engine = WorkflowEngine(project_dir)
2087+
state = engine.execute(definition)
2088+
2089+
assert state.status == RunStatus.COMPLETED
2090+
assert counter_file.read_text(encoding="utf-8").strip() == "3"
2091+
assert state.step_results["tick"]["output"]["stdout"] == "pending"
2092+
2093+
def test_while_loop_multi_step_body_inter_step_refs(self, project_dir):
2094+
"""Multi-step loop body: step B must see step A's output from the
2095+
current iteration, not a stale previous one.
2096+
2097+
See https://github.com/github/spec-kit/issues/2592
2098+
"""
2099+
from specify_cli.workflows.engine import WorkflowEngine, WorkflowDefinition
2100+
from specify_cli.workflows.base import RunStatus
2101+
2102+
import sys
2103+
2104+
counter_file = project_dir / ".counter"
2105+
counter_file.write_text("0", encoding="utf-8")
2106+
py = sys.executable
2107+
2108+
# Step A: increments counter file, echoes the value.
2109+
step_a_file = project_dir / "_step_a.py"
2110+
step_a_file.write_text(
2111+
f"import pathlib; p = pathlib.Path(r'{counter_file}')\n"
2112+
"n = int(p.read_text()) + 1; p.write_text(str(n))\n"
2113+
"print(str(n), end='')\n",
2114+
encoding="utf-8",
2115+
)
2116+
2117+
# Step B uses {{ steps.step-a.output.stdout }} expression
2118+
# substitution in its run command so the engine resolves the
2119+
# aliased unprefixed key — this is the real inter-step test.
2120+
yaml_str = f"""
2121+
schema_version: "1.0"
2122+
workflow:
2123+
id: "while-multi-step"
2124+
name: "While Multi Step"
2125+
version: "1.0.0"
2126+
steps:
2127+
- id: retry-loop
2128+
type: while
2129+
condition: "{{{{ 'done' not in steps.step-a.output.stdout }}}}"
2130+
max_iterations: 3
2131+
steps:
2132+
- id: step-a
2133+
type: shell
2134+
run: '"{py}" "{step_a_file}"'
2135+
- id: step-b
2136+
type: shell
2137+
run: "echo b-saw-{{{{ steps.step-a.output.stdout }}}}"
2138+
"""
2139+
definition = WorkflowDefinition.from_string(yaml_str)
2140+
engine = WorkflowEngine(project_dir)
2141+
state = engine.execute(definition)
2142+
2143+
assert state.status == RunStatus.COMPLETED
2144+
# Both unprefixed keys reflect the latest iteration's results.
2145+
assert state.step_results["step-a"]["output"]["stdout"] == "3"
2146+
# Step B saw step A's output via expression substitution.
2147+
assert "b-saw-3" in state.step_results["step-b"]["output"]["stdout"]
2148+
# Namespaced keys exist for loop iterations.
2149+
assert "retry-loop:step-a:1" in state.step_results
2150+
assert "retry-loop:step-b:1" in state.step_results
2151+
assert "retry-loop:step-a:2" in state.step_results
2152+
assert "retry-loop:step-b:2" in state.step_results
2153+
18922154

18932155
# ===== State Persistence Tests =====
18942156

0 commit comments

Comments
 (0)