From 5307dd7c0689de88e1ed3d51808cc0675c3742bc Mon Sep 17 00:00:00 2001 From: Isaac Kargar Date: Wed, 13 May 2026 07:02:09 +0300 Subject: [PATCH 1/8] Add initial files for BAML agents - Created a `.gitignore` file to exclude temporary files and Python cache directories. - Added a `README.md` to document the BAML agents, detailing their purpose, usage, and dependencies. - Introduced the `baml_client` module with various components including `async_client`, `sync_client`, `config`, `globals`, `parser`, `runtime`, `stream_types`, and `tracing`, establishing the foundational structure for BAML agent functionality. - Implemented classes and methods for handling asynchronous and synchronous client operations, response parsing, and configuration management. This commit sets up the groundwork for further development of BAML agents and their integration with the Lerim framework. --- baml_agents/.gitignore | 3 + baml_agents/README.md | 53 + baml_agents/baml_client/__init__.py | 60 ++ baml_agents/baml_client/async_client.py | 153 +++ baml_agents/baml_client/config.py | 102 ++ baml_agents/baml_client/globals.py | 35 + baml_agents/baml_client/inlinedbaml.py | 20 + baml_agents/baml_client/parser.py | 46 + baml_agents/baml_client/runtime.py | 361 +++++++ baml_agents/baml_client/stream_types.py | 104 ++ baml_agents/baml_client/sync_client.py | 164 ++++ baml_agents/baml_client/tracing.py | 22 + baml_agents/baml_client/type_builder.py | 861 ++++++++++++++++ baml_agents/baml_client/type_map.py | 55 ++ baml_agents/baml_client/types.py | 153 +++ baml_agents/baml_client/watchers.py | 44 + baml_agents/baml_extract_agent/__init__.py | 5 + baml_agents/baml_extract_agent/graph.py | 410 ++++++++ baml_agents/baml_extract_agent/run.py | 68 ++ baml_agents/baml_extract_agent/tool_bridge.py | 376 +++++++ baml_agents/baml_src/extract_react.baml | 920 ++++++++++++++++++ baml_agents/baml_src/generators.baml | 6 + src/lerim/config/providers.py | 70 +- 23 files changed, 4081 insertions(+), 10 deletions(-) create mode 100644 baml_agents/.gitignore create mode 100644 baml_agents/README.md create mode 100644 baml_agents/baml_client/__init__.py create mode 100644 baml_agents/baml_client/async_client.py create mode 100644 baml_agents/baml_client/config.py create mode 100644 baml_agents/baml_client/globals.py create mode 100644 baml_agents/baml_client/inlinedbaml.py create mode 100644 baml_agents/baml_client/parser.py create mode 100644 baml_agents/baml_client/runtime.py create mode 100644 baml_agents/baml_client/stream_types.py create mode 100644 baml_agents/baml_client/sync_client.py create mode 100644 baml_agents/baml_client/tracing.py create mode 100644 baml_agents/baml_client/type_builder.py create mode 100644 baml_agents/baml_client/type_map.py create mode 100644 baml_agents/baml_client/types.py create mode 100644 baml_agents/baml_client/watchers.py create mode 100644 baml_agents/baml_extract_agent/__init__.py create mode 100644 baml_agents/baml_extract_agent/graph.py create mode 100644 baml_agents/baml_extract_agent/run.py create mode 100644 baml_agents/baml_extract_agent/tool_bridge.py create mode 100644 baml_agents/baml_src/extract_react.baml create mode 100644 baml_agents/baml_src/generators.baml diff --git a/baml_agents/.gitignore b/baml_agents/.gitignore new file mode 100644 index 0000000..764244a --- /dev/null +++ b/baml_agents/.gitignore @@ -0,0 +1,3 @@ +.tmp/ +__pycache__/ +*.pyc diff --git a/baml_agents/README.md b/baml_agents/README.md new file mode 100644 index 0000000..a511d46 --- /dev/null +++ b/baml_agents/README.md @@ -0,0 +1,53 @@ +# Lerim BAML Agents + +Minimal experiment for testing whether BAML improves ReAct-style tool selection +with a small local Ollama model. + +## What This Uses + +- LangGraph builds the graph loop. +- BAML makes the LLM call and parses the next action into a typed schema. +- Ollama serves `gemma4:e4b` through `http://127.0.0.1:11434/v1`. +- MiniMax M2.7 can also be used through BAML's OpenAI-compatible client + registry with `--baml-provider minimax`. +- The BAML function copies Lerim's extraction `SYSTEM_PROMPT` text and keeps the + small BAML/LangGraph harness adaptation in `baml_src/extract_react.baml`. +- Lerim's existing DB-backed extraction tools are imported from `src/lerim`. +- The default model can be overridden with `--model` for local and API model + comparisons. + +## Run + +From the `lerim-cli` repo root: + +```bash +uv run --with baml-py==0.222.0 baml-cli generate --from baml_agents/baml_src +PYTHONPATH="baml_agents:src" uv run --with baml-py==0.222.0 --with langgraph==1.2.0 \ + python -m baml_extract_agent.run \ + --trace tests/fixtures/traces/unit/codex_simple.jsonl \ + --context-db baml_agents/.tmp/context.sqlite3 \ + --project-root . \ + --model gemma4:e4b +``` + +MiniMax M2.7: + +```bash +PYTHONPATH="baml_agents:src" uv run --with baml-py==0.222.0 --with langgraph==1.2.0 \ + python -m baml_extract_agent.run \ + --trace tests/fixtures/traces/unit/codex_simple.jsonl \ + --context-db baml_agents/.tmp/context_minimax.sqlite3 \ + --project-root . \ + --baml-provider minimax \ + --model MiniMax-M2.7 \ + --temperature 1.0 +``` + +BAML-native tests live in `baml_src/extract_react.baml`: + +```bash +uv run --with baml-py==0.222.0 baml-cli test --from baml_agents/baml_src --parallel 1 -i "DecideNextExtractStep::" +``` + +The graph writes into the context DB you pass with `--context-db`. Use a scratch +DB while comparing behavior. diff --git a/baml_agents/baml_client/__init__.py b/baml_agents/baml_client/__init__.py new file mode 100644 index 0000000..b279bc3 --- /dev/null +++ b/baml_agents/baml_client/__init__.py @@ -0,0 +1,60 @@ +# ---------------------------------------------------------------------------- +# +# Welcome to Baml! To use this generated code, please run the following: +# +# $ pip install baml +# +# ---------------------------------------------------------------------------- + +# This file was generated by BAML: please do not edit it. Instead, edit the +# BAML files and re-generate this code using: baml-cli generate +# baml-cli is available with the baml package. + +__version__ = "0.222.0" + +try: + from baml_py.safe_import import EnsureBamlPyImport +except ImportError: + raise ImportError(f"""Update to baml-py required. +Version of baml_client generator (see generators.baml): {__version__} + +Please upgrade baml-py to version "{__version__}". + +$ pip install baml-py=={__version__} +$ uv add baml-py=={__version__} + +If nothing else works, please ask for help: + +https://github.com/boundaryml/baml/issues +https://boundaryml.com/discord +""") from None + + +with EnsureBamlPyImport(__version__) as e: + e.raise_if_incompatible_version(__version__) + + from . import types + from . import tracing + from . import stream_types + from . import config + from .config import reset_baml_env_vars + + from .sync_client import b + + from . import watchers + + +# FOR LEGACY COMPATIBILITY, expose "partial_types" as an alias for "stream_types" +# WE RECOMMEND USERS TO USE "stream_types" INSTEAD +partial_types = stream_types + +__all__ = [ + "b", + "stream_types", + "partial_types", + "tracing", + "types", + "reset_baml_env_vars", + "config", + "watchers", +] \ No newline at end of file diff --git a/baml_agents/baml_client/async_client.py b/baml_agents/baml_client/async_client.py new file mode 100644 index 0000000..5d41237 --- /dev/null +++ b/baml_agents/baml_client/async_client.py @@ -0,0 +1,153 @@ +# ---------------------------------------------------------------------------- +# +# Welcome to Baml! To use this generated code, please run the following: +# +# $ pip install baml +# +# ---------------------------------------------------------------------------- + +# This file was generated by BAML: please do not edit it. Instead, edit the +# BAML files and re-generate this code using: baml-cli generate +# baml-cli is available with the baml package. + +import typing +import typing_extensions +import baml_py + +from . import stream_types, types, type_builder +from .parser import LlmResponseParser, LlmStreamParser +from .runtime import DoNotUseDirectlyCallManager, BamlCallOptions +from .globals import DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_RUNTIME as __runtime__ + + +class BamlAsyncClient: + __options: DoNotUseDirectlyCallManager + __stream_client: "BamlStreamClient" + __http_request: "BamlHttpRequestClient" + __http_stream_request: "BamlHttpStreamRequestClient" + __llm_response_parser: LlmResponseParser + __llm_stream_parser: LlmStreamParser + + def __init__(self, options: DoNotUseDirectlyCallManager): + self.__options = options + self.__stream_client = BamlStreamClient(options) + self.__http_request = BamlHttpRequestClient(options) + self.__http_stream_request = BamlHttpStreamRequestClient(options) + self.__llm_response_parser = LlmResponseParser(options) + self.__llm_stream_parser = LlmStreamParser(options) + + def with_options(self, + tb: typing.Optional[type_builder.TypeBuilder] = None, + client_registry: typing.Optional[baml_py.baml_py.ClientRegistry] = None, + client: typing.Optional[str] = None, + collector: typing.Optional[typing.Union[baml_py.baml_py.Collector, typing.List[baml_py.baml_py.Collector]]] = None, + env: typing.Optional[typing.Dict[str, typing.Optional[str]]] = None, + tags: typing.Optional[typing.Dict[str, str]] = None, + on_tick: typing.Optional[typing.Callable[[str, baml_py.baml_py.FunctionLog], None]] = None, + ) -> "BamlAsyncClient": + options: BamlCallOptions = {} + if tb is not None: + options["tb"] = tb + if client_registry is not None: + options["client_registry"] = client_registry + if client is not None: + options["client"] = client + if collector is not None: + options["collector"] = collector + if env is not None: + options["env"] = env + if tags is not None: + options["tags"] = tags + if on_tick is not None: + options["on_tick"] = on_tick + return BamlAsyncClient(self.__options.merge_options(options)) + + @property + def stream(self): + return self.__stream_client + + @property + def request(self): + return self.__http_request + + @property + def stream_request(self): + return self.__http_stream_request + + @property + def parse(self): + return self.__llm_response_parser + + @property + def parse_stream(self): + return self.__llm_stream_parser + + async def DecideNextExtractStep(self, runtime_dashboard: str,run_instruction: str,tool_manifest: str,scratchpad: str, + baml_options: BamlCallOptions = {}, + ) -> types.ExtractAgentStep: + # Check if on_tick is provided + if 'on_tick' in baml_options: + # Use streaming internally when on_tick is provided + __stream__ = self.stream.DecideNextExtractStep(runtime_dashboard=runtime_dashboard,run_instruction=run_instruction,tool_manifest=tool_manifest,scratchpad=scratchpad, + baml_options=baml_options) + return await __stream__.get_final_response() + else: + # Original non-streaming code + __result__ = await self.__options.merge_options(baml_options).call_function_async(function_name="DecideNextExtractStep", args={ + "runtime_dashboard": runtime_dashboard,"run_instruction": run_instruction,"tool_manifest": tool_manifest,"scratchpad": scratchpad, + }) + return typing.cast(types.ExtractAgentStep, __result__.cast_to(types, types, stream_types, False, __runtime__)) + + + +class BamlStreamClient: + __options: DoNotUseDirectlyCallManager + + def __init__(self, options: DoNotUseDirectlyCallManager): + self.__options = options + + def DecideNextExtractStep(self, runtime_dashboard: str,run_instruction: str,tool_manifest: str,scratchpad: str, + baml_options: BamlCallOptions = {}, + ) -> baml_py.BamlStream[stream_types.ExtractAgentStep, types.ExtractAgentStep]: + __ctx__, __result__ = self.__options.merge_options(baml_options).create_async_stream(function_name="DecideNextExtractStep", args={ + "runtime_dashboard": runtime_dashboard,"run_instruction": run_instruction,"tool_manifest": tool_manifest,"scratchpad": scratchpad, + }) + return baml_py.BamlStream[stream_types.ExtractAgentStep, types.ExtractAgentStep]( + __result__, + lambda x: typing.cast(stream_types.ExtractAgentStep, x.cast_to(types, types, stream_types, True, __runtime__)), + lambda x: typing.cast(types.ExtractAgentStep, x.cast_to(types, types, stream_types, False, __runtime__)), + __ctx__, + ) + + +class BamlHttpRequestClient: + __options: DoNotUseDirectlyCallManager + + def __init__(self, options: DoNotUseDirectlyCallManager): + self.__options = options + + async def DecideNextExtractStep(self, runtime_dashboard: str,run_instruction: str,tool_manifest: str,scratchpad: str, + baml_options: BamlCallOptions = {}, + ) -> baml_py.baml_py.HTTPRequest: + __result__ = await self.__options.merge_options(baml_options).create_http_request_async(function_name="DecideNextExtractStep", args={ + "runtime_dashboard": runtime_dashboard,"run_instruction": run_instruction,"tool_manifest": tool_manifest,"scratchpad": scratchpad, + }, mode="request") + return __result__ + + +class BamlHttpStreamRequestClient: + __options: DoNotUseDirectlyCallManager + + def __init__(self, options: DoNotUseDirectlyCallManager): + self.__options = options + + async def DecideNextExtractStep(self, runtime_dashboard: str,run_instruction: str,tool_manifest: str,scratchpad: str, + baml_options: BamlCallOptions = {}, + ) -> baml_py.baml_py.HTTPRequest: + __result__ = await self.__options.merge_options(baml_options).create_http_request_async(function_name="DecideNextExtractStep", args={ + "runtime_dashboard": runtime_dashboard,"run_instruction": run_instruction,"tool_manifest": tool_manifest,"scratchpad": scratchpad, + }, mode="stream") + return __result__ + + +b = BamlAsyncClient(DoNotUseDirectlyCallManager({})) \ No newline at end of file diff --git a/baml_agents/baml_client/config.py b/baml_agents/baml_client/config.py new file mode 100644 index 0000000..64b7fff --- /dev/null +++ b/baml_agents/baml_client/config.py @@ -0,0 +1,102 @@ +# ---------------------------------------------------------------------------- +# +# Welcome to Baml! To use this generated code, please run the following: +# +# $ pip install baml +# +# ---------------------------------------------------------------------------- + +# This file was generated by BAML: please do not edit it. Instead, edit the +# BAML files and re-generate this code using: baml-cli generate +# baml-cli is available with the baml package. + +from __future__ import annotations + +import os +import warnings +import typing_extensions +import typing +import functools + +from baml_py.logging import ( + get_log_level as baml_get_log_level, + set_log_level as baml_set_log_level, +) +from .globals import reset_baml_env_vars + +rT = typing_extensions.TypeVar("rT") # return type +pT = typing_extensions.ParamSpec("pT") # parameters type + + +def _deprecated(message: str): + def decorator(func: typing.Callable[pT, rT]) -> typing.Callable[pT, rT]: + """Use this decorator to mark functions as deprecated. + Every time the decorated function runs, it will emit + a "deprecation" warning.""" + + @functools.wraps(func) + def new_func(*args: pT.args, **kwargs: pT.kwargs): + warnings.simplefilter("always", DeprecationWarning) # turn off filter + warnings.warn( + "Call to a deprecated function {}.".format(func.__name__) + message, + category=DeprecationWarning, + stacklevel=2, + ) + warnings.simplefilter("default", DeprecationWarning) # reset filter + return func(*args, **kwargs) + + return new_func + + return decorator + + +@_deprecated("Use os.environ['BAML_LOG'] instead") +def get_log_level(): + """ + Get the log level for the BAML Python client. + """ + return baml_get_log_level() + + +@_deprecated("Use os.environ['BAML_LOG'] instead") +def set_log_level( + level: typing_extensions.Literal["DEBUG", "INFO", "WARN", "ERROR", "OFF"] | str, +): + """ + Set the log level for the BAML Python client + """ + baml_set_log_level(level) + os.environ["BAML_LOG"] = level + + +@_deprecated("Use os.environ['BAML_LOG_JSON_MODE'] instead") +def set_log_json_mode(): + """ + Set the log JSON mode for the BAML Python client. + """ + os.environ["BAML_LOG_JSON_MODE"] = "true" + + +@_deprecated("Use os.environ['BAML_LOG_MAX_CHUNK_LENGTH'] instead") +def set_log_max_chunk_length(): + """ + Set the maximum log chunk length for the BAML Python client. + """ + os.environ["BAML_LOG_MAX_CHUNK_LENGTH"] = "1000" + + +def set_log_max_message_length(*args, **kwargs): + """ + Alias for set_log_max_chunk_length for compatibility with docs. + """ + return set_log_max_chunk_length(*args, **kwargs) + + +__all__ = [ + "set_log_level", + "get_log_level", + "set_log_json_mode", + "reset_baml_env_vars", + "set_log_max_message_length", + "set_log_max_chunk_length", +] diff --git a/baml_agents/baml_client/globals.py b/baml_agents/baml_client/globals.py new file mode 100644 index 0000000..769e055 --- /dev/null +++ b/baml_agents/baml_client/globals.py @@ -0,0 +1,35 @@ +# ---------------------------------------------------------------------------- +# +# Welcome to Baml! To use this generated code, please run the following: +# +# $ pip install baml +# +# ---------------------------------------------------------------------------- + +# This file was generated by BAML: please do not edit it. Instead, edit the +# BAML files and re-generate this code using: baml-cli generate +# baml-cli is available with the baml package. + +from __future__ import annotations +import os +import warnings + +from baml_py import BamlCtxManager, BamlRuntime +from .inlinedbaml import get_baml_files +from typing import Dict + +DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_RUNTIME = BamlRuntime.from_files( + "baml_src", + get_baml_files(), + os.environ.copy() +) +DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_CTX = BamlCtxManager(DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_RUNTIME) + +def reset_baml_env_vars(env_vars: Dict[str, str]): + warnings.warn( + "reset_baml_env_vars is deprecated and should be removed. Environment variables are now lazily loaded on each function call", + DeprecationWarning, + stacklevel=2 + ) + +__all__ = [] diff --git a/baml_agents/baml_client/inlinedbaml.py b/baml_agents/baml_client/inlinedbaml.py new file mode 100644 index 0000000..e6bbc38 --- /dev/null +++ b/baml_agents/baml_client/inlinedbaml.py @@ -0,0 +1,20 @@ +# ---------------------------------------------------------------------------- +# +# Welcome to Baml! To use this generated code, please run the following: +# +# $ pip install baml +# +# ---------------------------------------------------------------------------- + +# This file was generated by BAML: please do not edit it. Instead, edit the +# BAML files and re-generate this code using: baml-cli generate +# baml-cli is available with the baml package. + +_file_map = { + + "extract_react.baml": "retry_policy ExtractAgentRetry {\n max_retries 1\n strategy {\n type exponential_backoff\n delay_ms 500\n multiplier 2\n max_delay_ms 8000\n }\n}\n\nclient OllamaGemma4E4B {\n provider \"openai-generic\"\n retry_policy ExtractAgentRetry\n options {\n base_url \"http://127.0.0.1:11434/v1\"\n model \"gemma4:e4b\"\n temperature 0.0\n http {\n connect_timeout_ms 10000\n time_to_first_token_timeout_ms 120000\n idle_timeout_ms 30000\n request_timeout_ms 300000\n }\n }\n}\n\nenum ExtractAction {\n READ_TRACE @alias(\"read_trace\") @description(\"Read the next numbered trace chunk.\")\n SEARCH_CONTEXT @alias(\"search_context\") @description(\"Search existing DB-backed context by meaning.\")\n GET_CONTEXT @alias(\"get_context\") @description(\"Fetch full context records by record ID before any revision.\")\n SAVE_CONTEXT @alias(\"save_context\") @description(\"Create one episode or durable context record.\")\n REVISE_CONTEXT @alias(\"revise_context\") @description(\"Revise a fetched context record with a complete improved payload.\")\n NOTE_TRACE_FINDINGS @alias(\"note_trace_findings\") @description(\"Checkpoint durable or implementation findings from a long trace.\")\n PRUNE_TRACE_READS @alias(\"prune_trace_reads\") @description(\"Prune earlier trace chunks after findings are captured.\")\n FINAL_RESULT @alias(\"final_result\") @description(\"Finish only after exactly one current-session episode record exists.\")\n}\n\nenum RecordKind {\n DECISION @alias(\"decision\") @description(\"A durable project decision with decision and why fields.\")\n PREFERENCE @alias(\"preference\") @description(\"A stable user or workflow preference.\")\n CONSTRAINT @alias(\"constraint\") @description(\"A durable invariant, limit, or must/cannot rule.\")\n FACT @alias(\"fact\") @description(\"A durable project fact or setup truth.\")\n REFERENCE @alias(\"reference\") @description(\"A pointer to an external source of truth.\")\n EPISODE @alias(\"episode\") @description(\"The mandatory per-session episode record; use this for session summaries.\")\n}\n\nenum RecordStatus {\n ACTIVE @alias(\"active\") @description(\"Current context that future sessions may reuse.\")\n ARCHIVED @alias(\"archived\") @description(\"Historical or routine context that should not be treated as active guidance.\")\n}\n\nenum FindingLevel {\n DECISION @alias(\"decision\") @description(\"Durable decision-level finding.\")\n PREFERENCE @alias(\"preference\") @description(\"Durable user or workflow preference finding.\")\n FEEDBACK @alias(\"feedback\") @description(\"Durable feedback-level finding.\")\n REFERENCE @alias(\"reference\") @description(\"Durable external-source finding.\")\n CONSTRAINT @alias(\"constraint\") @description(\"Durable constraint-level finding.\")\n FACT @alias(\"fact\") @description(\"Durable fact-level finding.\")\n IMPLEMENTATION @alias(\"implementation\") @description(\"Trace-local implementation evidence or discarded hypothesis.\")\n}\n\nclass ReadTraceArgs {\n start_line int?\n line_count int?\n}\n\nclass SearchContextArgs {\n query string @description(\"Natural-language query for the durable meaning, not '*' or an empty browse request.\")\n kind RecordKind? @description(\"Optional filter. Use only when the desired record kind is known.\")\n status RecordStatus? @description(\"Optional lifecycle filter.\")\n valid_at string? @description(\"Optional timestamp for historical lookup.\")\n include_archived bool? @description(\"Whether archived records should be included.\")\n limit int? @description(\"Maximum hits to return.\")\n}\n\nclass GetContextArgs {\n record_ids string[] @description(\"Record IDs returned by search_context or list_context. Fetch before any revision.\")\n include_versions bool? @description(\"Whether to include prior versions.\")\n detail \"concise\" | \"detailed\"? @description(\"Use concise unless full version history is needed.\")\n}\n\nclass SaveContextArgs {\n kind RecordKind @description(\"Use episode for the mandatory session summary; use durable kinds only for reusable project context.\")\n title string @description(\"Short standalone title for the lasting point or episode.\")\n body string @description(\"Compact standalone body. Do not include trace-local command logs or copied errors.\")\n status RecordStatus? @description(\"Use archived for routine episodes with no durable signal; active for reusable durable records.\")\n valid_from string? @description(\"Optional validity start timestamp.\")\n valid_until string? @description(\"Optional validity end timestamp.\")\n decision string? @description(\"Required only for kind=decision; omit for facts, preferences, constraints, references, and episodes.\")\n why string? @description(\"Required only for kind=decision when rationale exists; omit when the trace says no durable rationale was supplied.\")\n alternatives string? @description(\"Optional decision alternatives. Omit for non-decisions.\")\n consequences string? @description(\"Optional direct application guidance for the same durable point.\")\n user_intent string? @description(\"Required when kind=episode. Concise statement of what the user wanted in this source session.\")\n what_happened string? @description(\"Required when kind=episode. Concise statement of what the session actually did.\")\n outcomes string? @description(\"Optional concise episode outcome.\")\n}\n\nclass ReviseContextArgs {\n record_id string\n reason string @description(\"Short reason for the revision.\")\n kind RecordKind @description(\"Must match the fetched record kind; revise_context cannot change kind.\")\n title string @description(\"Complete improved title.\")\n body string @description(\"Complete improved body.\")\n status RecordStatus? @description(\"Lifecycle status to keep or set.\")\n valid_from string?\n valid_until string?\n decision string?\n why string?\n alternatives string?\n consequences string?\n user_intent string?\n what_happened string?\n outcomes string?\n}\n\nclass NoteTraceFindingsArgs {\n theme string? @description(\"Short theme label for this finding. Leave empty only for the no-findings checkpoint.\")\n line int? @description(\"1-based trace line with supporting evidence. Leave empty only for the no-findings checkpoint.\")\n quote string? @description(\"Short evidence quote from the trace. Leave empty only for the no-findings checkpoint.\")\n level FindingLevel? @description(\"Durable levels only for reusable context; implementation for trace-local evidence.\")\n}\n\nclass PruneTraceReadsArgs {\n start_lines int[] @description(\"1-based start_line values from earlier read_trace calls, such as 1, 101, or 201. Never use context token counts.\")\n}\n\nclass FinalResultArgs {\n completion_summary string\n}\n\nclass ExtractAgentStep {\n action ExtractAction\n rationale string @description(\"One short sentence explaining why this is the next action. Do not include hidden chain-of-thought.\")\n read_trace ReadTraceArgs?\n search_context SearchContextArgs?\n get_context GetContextArgs?\n save_context SaveContextArgs?\n revise_context ReviseContextArgs?\n note_trace_findings NoteTraceFindingsArgs?\n prune_trace_reads PruneTraceReadsArgs?\n final_result FinalResultArgs?\n}\n\nfunction DecideNextExtractStep(\n runtime_dashboard: string,\n run_instruction: string,\n tool_manifest: string,\n scratchpad: string\n) -> ExtractAgentStep {\n client OllamaGemma4E4B\n prompt #\"\n {{ _.role(\"system\") }}\n \n You are the Lerim extract agent.\n Read one coding-agent trace, compress its signal, and write DB-backed context records.\n \n\n \n - Create exactly one episode record for the session.\n - Create zero or more durable records only when the trace contains durable signal.\n - The episode record is mandatory for every session, even if you also create or update durable records.\n - Updating an existing durable record never replaces the required episode for the current session.\n - The run is not complete until the current session has its episode record.\n - Treat the trace as historical evidence from its source session time, not as live verification of current code.\n - On short traces where the session is already clear after reading, prefer to create the episode promptly rather than leaving it until the end.\n - Episode records must include `user_intent` and `what_happened`; do not put the whole episode only in `body`.\n - Use `status=\"archived\"` for the episode when the session is routine operational work with no durable signal. Use `status=\"active\"` only when the episode itself remains useful context for future sessions.\n \n\n \n Durable signal means one of:\n - decision\n - preference\n - constraint\n - fact\n - reference\n\n Implementation detail alone is not durable signal.\n A temporary code-state finding, audit observation, open task, or release-risk report is not durable by itself. Promote it only when the trace establishes a reusable project rule, unresolved constraint, stable dependency, or standing source of truth.\n \n\n \n - Store the reusable rule, decision, invariant, dependency, preference, or external pointer, not the story of the session.\n - One durable record should hold one durable point.\n - Direct consequences and application guidance usually stay inside that same record.\n - Create the minimum number of durable records that preserves distinct durable meanings. Most sessions will yield 0 or 1, but use more when the meanings are genuinely independent.\n - Classify each durable point into one canonical kind. Prefer the most specific supported kind, and do not duplicate the same point across multiple durable kinds.\n - Duplicates are worse than gaps. Skip uncertain candidates rather than spraying near-duplicates.\n - Never create a second durable record in the same run for the same core claim. If you realize the first draft needs improvement, update or refine that record instead of creating another one.\n - `constraint` and `reference` are first-class durable record kinds, not fallback categories.\n \n\n \n - patch logs, command sequences, retries, timelines, or meeting-style recaps\n - code structure, file paths, git history, or storage mechanics by themselves\n - generic programming knowledge or facts already obvious from the repo\n - rejected lures, discarded explanations, or implementation-only distractions\n - one-run validation findings, approval flow, queue state, DB resets, rebuilds, or\n runtime diagnostics by themselves\n \n\n \n - Read the trace in chunks until the full trace is covered. Do not start writing while unread trace lines remain.\n - Use the findings scratchpad for evidence from chunks you have already read. Notes are summarized back to you on later turns; do not record the same point again unless you learned something new.\n - Keep each durable theme and its supporting implementation evidence together. Do not record a rejected lure or discarded explanation as its own durable finding/theme.\n - If one apparent finding only applies, routes, or operationalizes another finding, keep them as one durable theme instead of separate durable themes.\n - If the trace needs more than one read, call `note_trace_findings` once per useful finding with theme, line, quote, and level before saving or revising context. Call it with no arguments when the full trace has no reusable signal.\n - If you read many chunks, prune older read results only after those chunks have already been captured in notes.\n - Search existing context before creating a durable record whenever the trace suggests an earlier record, duplicate risk, or \"same meaning vs new meaning\" judgment.\n - The injected existing-record manifest is only a shortlist. It is never enough evidence for a revision.\n - Fetch full records before any revision, and fetch each plausible target when several nearby records could match.\n - Revise only when a fetched record clearly carries the same meaning and needs repair. If the core claim differs, create a new record instead.\n - When the trace says an existing durable rule is correct but needs tightening, clarification, or a better why, fetch that record and update it rather than leaving the weaker wording unchanged.\n - Avoid cosmetic same-run revisions. Revise a same-run record only to fix a concrete durable-context error or prevent a duplicate.\n \n\n \n - The system may inject `CONTEXT:` messages showing approximate context pressure. At soft or hard pressure, prune old trace chunks after their findings are captured.\n - The system may inject `NOTES:` messages summarizing findings and trace coverage. Use them as a progress dashboard, not as a replacement for reading unread trace lines.\n - The findings scratchpad writes the dashboard for future turns; do not try to reread the dashboard with tools.\n \n\n \n - First separate findings into durable signal and implementation evidence.\n - Synthesize at the theme level. Usually one theme becomes one durable record.\n - Create multiple durable records only when the trace establishes multiple independent durable points, each with its own support.\n - Do not store one durable point as both a preference and a decision, a fact and a decision, or any other cross-kind duplicate.\n - A stable workflow preference is not also a decision unless the trace separately states an explicit project decision with rationale.\n - A dependency, setup, or environment truth without durable rationale is a fact, not also a decision.\n - A failure caused by the current run's temporary validation setup is not itself a\n durable environment truth. If the investigation reveals a stable requirement that\n future sessions must apply, save that requirement as the durable point and omit\n the temporary validation story.\n - Merge candidates when one only states how to apply the other in local operations, routing, or ownership.\n - If two candidates share the same core claim, merge them.\n - If the difference is only evidence framing, symptom wording, or local-vs-CI phrasing around the same durable fact, keep one record and fold the extra context into it.\n - If one candidate is only the direct application or routing consequence of another, keep it inside the stronger record.\n - Storage boundary plus per-component routing is one decision, not two. Keep the boundary as the record and fold the routing guidance into the same title/body.\n - If one candidate only says how different local components should apply the same project rule, keep that guidance inside the main record rather than creating a second durable record.\n - If one candidate only restates where local project components live or how an internal architecture is applied, keep it inside the stronger decision, fact, or constraint instead of creating a separate reference.\n - If the trace gives one durable rule plus examples of local noise or discarded details, store only the durable rule. The filtering guidance is evidence, not a second record.\n - Do not create a durable record whose whole point is that some local details from this trace were noise, low value, or should not be remembered. That is extraction guidance for this run, not project context.\n - Store durable records only when the lesson is likely reusable beyond this trace.\n - If a candidate is mainly about this trace's commands, files, or timeline, reject it.\n - Trace-local instructions about what to ignore in this session are not preferences unless they clearly express a broader standing workflow rule for future sessions.\n - If the trace explicitly says the rationale is unknown or says not to invent one, do not create a `decision`; use `fact` instead.\n - A stable setup, dependency, or environment requirement without a durable why is a `fact` even if it sounds like the current chosen setup.\n - The instruction \"do not invent a why\" is extraction guidance, not project context.\n - When the trace contains one durable dependency or setup fact plus instructions about how to classify that same evidence, store only the dependency or setup fact. Do not turn the classification guidance into a separate `preference`.\n - If the trace explicitly rejects a lure or distraction, do not carry that rejected idea into the durable record text unless the rejection itself is the durable lesson.\n - If this older trace conflicts with newer existing active records, do not create a new active durable record for the older claim. Preserve the historical session in the episode and let the newer active record remain current.\n - If a long noisy investigation resolves into one source-of-truth boundary, store only that boundary. Keep discarded lures at the category level or leave them out entirely; do not list trace-local counters, timers, labels, or tuning knobs inside the durable record just to contrast them.\n - When a discarded lure matters as evidence, keep it attached to the main durable theme as implementation context rather than storing it as a second durable theme.\n - If the episode summary contains clearly reusable decision, preference, constraint, fact, or reference, that point should usually also exist as its own durable record.\n - Do not leave a clearly reusable rule, invariant, dependency, source-of-truth pointer, or stable preference only inside the episode. The episode says what happened; the durable record stores what future sessions should reuse.\n - Durable records are additional project context, not a substitute for the session episode. Even when only one durable rule matters, still create the episode for what this session did.\n \n\n \n - Durable titles should name the lasting rule, decision, fact, constraint, preference, or reference directly.\n - Durable bodies should be compact, neutral, and standalone.\n - When a durable decision prohibits or routes a named interface, data path, dependency, provider, or boundary, preserve that named subject in the record instead of replacing it with a broader abstraction.\n - Prefer this shape for durable records:\n 1. the durable point\n 2. why it matters\n 3. how to apply it later\n - Do not write durable records as meeting minutes, patch logs, or cleanup commentary.\n - Do not preserve trace-local commands, negotiation phrasing, or \"this is not about X\" sentences in final record text.\n - Do not write a durable record whose body is mainly a warning that certain local details, cleanups, or implementation noise should be ignored.\n - Do not mention discarded implementation noise in durable record fields, including `consequences`. If details are non-durable, omit them entirely rather than saying they are non-durable.\n - When the durable lesson is a source-of-truth rule, write the authoritative rule directly. Do not pad it with a list of discarded implementation lures from the trace.\n - If a short contrast is still helpful, keep it abstract, such as \"not worker-local state\" or \"not ephemeral local state\". Do not enumerate examples in parentheses or comma-separated lists.\n - When updating an existing record, keep the durable meaning but rewrite it into canonical project-context language.\n - When writing from a historical trace, word durable records as source-backed context, not as freshly verified code inspection. Do not imply that a bug, missing capability, or release blocker is current unless the trace itself establishes that it remains unresolved as durable project context.\n - Facts from noisy failures must be rewritten into the underlying dependency, environment requirement, stakeholder driver, or operational fact.\n - If a fact still reads like stderr, an exception symptom, or copied command output, rewrite it again before writing.\n - When the durable lesson is an environment or dependency requirement, do not center the fact on the observed failure symptom. Name the requirement directly and mention the symptom only if it is needed as brief supporting context.\n - If brief supporting context is useful, lead with the requirement and keep the symptom generic. Never include exception class names, quoted error fragments, or copied failure strings in the durable fact.\n - If the candidate is mainly \"this validation run failed until we changed the setup\",\n it belongs in the archived episode. If the candidate names a reusable setup or\n runtime requirement discovered through that validation, keep the requirement and\n drop the failure narrative.\n - When no durable rationale exists, do not spend the fact body explaining that the rationale is absent. Just state the stable dependency, setup requirement, or operational truth directly.\n - Do not quote or paraphrase trace instructions about how to classify the evidence inside the final fact body. Final fact text should describe the underlying truth, not the extraction rule you followed.\n - References must answer both \"where should future sessions look?\" and \"when should they consult it?\"\n - Do not use `reference` for internal file mappings, local storage boundaries, or repo architecture notes when the durable lesson is the project rule itself rather than \"consult this external source next time.\"\n - Keep the episode concise: short title, short body, concise `user_intent`, `what_happened`, and `outcomes`.\n - If the session is mostly routine operational work with little future value and no durable record, create the episode with `status=\"archived\"`.\n \n\n \n \n Stable workflow guidance from the user. Save corrections and confirmed non-obvious working style that should carry into future sessions.\n Do not use `preference` for one-session extraction guidance such as \"that detail is just noise in this trace.\"\n \n \n A chosen approach or project rule that future work should follow and that is not obvious from code alone.\n If the trace does not support a durable why, do not use `decision`.\n \n \n A durable invariant, limit, or must/cannot rule that future work must respect.\n \n \n A durable project fact such as a dependency, environment requirement, stakeholder driver, or other non-obvious truth.\n Use `fact` for stable setup or dependency truths when the trace explicitly says not to invent decision rationale.\n \n \n A pointer to an external dashboard, document, ticket system, or other source of truth outside the repo.\n Use `reference` only when the enduring value is where to look later. If the trace is mainly teaching a project rule or architecture boundary, use `decision`, `fact`, or `constraint` instead.\n \n \n\n \n \n \n - assistant patches a bug and writes a tidy summary\n - user: \"The diff is enough. Don't end with a recap every time.\"\n - later turns continue with normal edits, tests, and review comments\n \n \n Create one preference record about keeping replies terse and not appending redundant change recaps.\n \n \n Store the file edit itself, or treat the correction as only a one-session scratch finding when it is clearly stable workflow guidance.\n \n \n\n \n \n - early turns discuss local refactors, temporary debug prints, and a flaky test\n - midway, several ideas are tried and discarded\n - late in the trace the user settles the architecture: durable project context lives in one store; hot runtime/session state lives in another\n - the follow-on routing guidance is just how to apply that boundary\n \n \n Create the required episode for the session and one decision record for the storage boundary. Keep the routing guidance inside the same record instead of splitting it into a second record.\n \n \n Store the refactor noise, split one architectural choice into two near-duplicate records such as one decision for the boundary and a second local-use record for which component reads which store, or create a separate durable record whose only message is that the refactors and debug edits were noise.\n \n \n\n \n \n - the user makes one architectural choice, such as keeping durable context and hot operational state in separate stores\n - the trace also mentions variable renames, label tweaks, temporary debug prints, and similar low-value cleanups\n - the user explicitly says those local edits should not become durable context\n \n \n Create the required episode and one durable record for the architectural choice only. Treat the explicit \"those edits are just noise\" instruction as extraction guidance for this run, not as its own record.\n \n \n Create a second durable record whose message is that renames, label tweaks, or temporary debug code are non-durable, or let that noise-filtering instruction replace the required episode.\n \n \n\n \n \n - repeated failed commands and partial theories about why a media workflow is broken\n - some guesses are ruled out\n - the stable conclusion is operational: environments that run this workflow need a specific system dependency installed\n \n \n Create one fact record for the dependency requirement in clean operational language. Lead with the missing dependency or environment requirement, and if you mention the failure at all, keep it generic rather than naming the exact exception class or copied command output. Still create the required episode for this session.\n \n \n Store the raw exception text, center the record on the failure symptom, split one operational lesson into separate local-vs-CI facts, create a second durable record whose message is \"do not invent a rationale here,\" keep the command history or debugging timeline, or write only the fact and skip the episode.\n \n \n\n \n \n - the user states one stable dependency or setup truth\n - nearby turns add extraction guidance such as \"this is a fact, not a decision\" or \"do not invent a why beyond the dependency\"\n - no broader workflow rule for future sessions is established\n \n \n Create the required episode and one fact record for the stable dependency or setup truth only.\n \n \n Create a second durable preference whose whole point is how to classify this trace, or store the meta-instruction instead of the underlying dependency fact.\n \n \n\n \n \n - the trace says image-enabled workflows require a system dependency in the environment\n - the user also says not to invent policy rationale beyond that dependency fact\n \n \n Write a fact such as: \"Image-enabled workflows require libvips in the environment.\" Keep the body on the requirement and its effect.\n \n \n Write a fact body such as: \"Do not invent a policy reason here\" or \"No decision rationale was supplied.\" Those are meta comments about classification, not durable project context.\n \n \n\n \n \n - early chunks are noisy and keep circling local counters, timers, labels, and temporary tuning\n - the final chunk clarifies that those were distractions\n - the real durable lesson is a source-of-truth boundary: authoritative state must live in one persisted place that survives restart and failover\n \n \n Create one durable record for the source-of-truth boundary. Mention restart or failover if it explains why the boundary matters, but keep any contrast abstract, such as \"not worker-local state,\" rather than listing local counters or timers.\n \n \n Write a durable record that carries over the rejected lure by naming worker-local counters, attempt counts, backoff knobs, or other trace-local artifacts as a contrast list.\n \n \n\n \n \n - the assistant starts from a partial repo note\n - later the user clarifies that incident ownership and current status are tracked in an external dashboard or ticket system\n - future sessions should consult that external system when this class of issue appears\n \n \n Create one reference record that names the external source and when future sessions should consult it.\n \n \n Center the record on local files, or turn it into a warning slogan about what not to trust locally.\n \n \n\n \n \n - run formatter\n - fix a small lint complaint\n - rerun tests\n - confirm green\n - no new rule, dependency, preference, or durable fact emerges\n \n \n Create only an archived episode.\n \n \n Invent a durable record from the sequence of routine commands.\n \n \n\n \n \n - the trace points at an earlier record that sounds nearby\n - new evidence sharpens part of it, but you still need to decide whether the core claim stayed the same\n - there may be more than one plausible existing record\n \n \n Search first, fetch the plausible existing record, then either update it if the meaning matches or create a new record if the core claim is different. In both cases, still create the episode for this session.\n \n \n Update from a shortlist or search preview alone, force an update when the new claim is only adjacent, or skip the episode because you already changed a durable record.\n \n \n \n\n \n - End the run with the `final_result` tool.\n - Put the plain-text completion summary in `completion_summary`.\n - Before `final_result`, ensure the current session already has exactly one episode record.\n - If you have created durable records but no episode yet, stop and create the episode before `final_result`.\n - If the episode contains the only copy of a reusable rule, invariant, dependency, source-of-truth pointer, or stable preference, stop and create the corresponding durable record before `final_result`.\n - Do not end with free-form assistant text outside `final_result`.\n \n\n \n Do not turn filenames, storage mechanics, graph links, or evidence tables into the main record unless the durable rule is specifically about that boundary.\n \n\n \n - You are running as a BAML + LangGraph replica of the PydanticAI extract agent.\n - You cannot call tools directly in this BAML harness. Return exactly one next tool call as typed JSON.\n - Choose only an action whose tool name appears in Available tools.\n - If a tool name is not listed in Available tools, it does not exist for this turn.\n - Your first non-whitespace character must be `{`.\n - Never output `` tags, hidden reasoning text, markdown, or prose before or after the JSON object.\n - Return one JSON object only. Do not include markdown, self-corrections, multiple JSON blocks, or a plan that chains future actions.\n - Choose only the immediate next action. When the production prompt requires an intermediate step before saving or finalizing, take that step first.\n - If multiple actions are still required, return only the first missing action and omit all later actions.\n - For multi-chunk traces, if `note_trace_findings` is available and no finding checkpoint appears in prior actions, choose `note_trace_findings` before any `save_context`, `revise_context`, or `final_result`; use empty args when the fully read trace has no reusable signal.\n - Fill only the argument object for the selected action.\n - Treat `final_result` as the PydanticAI structured final output.\n - For `save_context kind=episode`, include `user_intent` and `what_happened`.\n - For `save_context kind=decision`, include `decision` and `why`.\n - For `prune_trace_reads`, use only earlier `read_trace` start_line values.\n - After the required episode exists, choose another `save_context` only when a clearly reusable durable signal remains outside the episode; do not save trace-local implementation fixes as durable facts.\n - Do not treat an episode record as a durable record. If a reusable decision, preference, constraint, fact, or reference remains only in trace or episode text, save that durable record before `final_result`.\n \n\n \n {{ runtime_dashboard }}\n \n\n {{ _.role(\"user\") }}\n Run instruction:\n {{ run_instruction }}\n\n Available tools:\n {{ tool_manifest }}\n\n Prior actions and observations:\n {{ scratchpad }}\n\n {{ ctx.output_format }}\n \"#\n}\n\ntest InitialStepReadsTrace {\n functions [DecideNextExtractStep]\n args {\n runtime_dashboard #\"\n CONTEXT: 0/200000 (0%) [normal]\n NOTES: 0 findings\n \"#\n run_instruction #\"\n Read the trace, write exactly one episode record, and write only the\n strongest durable records. This trace has 3 lines. Read all chunks before\n writing.\n \"#\n tool_manifest #\"\n - read_trace(start_line: int?, line_count: int?): Read the next numbered trace chunk.\n - save_context(kind: string, title: string, body: string): Save one context record.\n - final_result(completion_summary: string): Finish after exactly one episode exists.\n \"#\n scratchpad \"No prior actions.\"\n }\n @@assert({{ this.action == \"READ_TRACE\" }})\n}\n\ntest ContinueUnreadTraceBeforeWrite {\n functions [DecideNextExtractStep]\n args {\n runtime_dashboard #\"\n CONTEXT: 20000/200000 (10%) [normal]\n NOTES: 0 findings\n Trace reads: 1 chunk(s)\n Next unread offset: 100\n Pruned offsets: none\n \"#\n run_instruction #\"\n This trace has 220 lines. Read all chunks before writing.\n \"#\n tool_manifest #\"\n - read_trace(start_line: int?, line_count: int?): Read the next numbered trace chunk.\n - save_context(kind: string, title: string, body: string): Save one context record.\n - final_result(completion_summary: string): Finish after exactly one episode exists.\n \"#\n scratchpad #\"\n Action: read_trace\n Status: ok\n Observation:\n [220 lines, showing 1-100] — 120 more lines, call read_trace(start_line=101, line_count=100) for the next chunk\n \"#\n }\n @@assert({{ this.action == \"READ_TRACE\" }})\n}\n\ntest LongTraceNotesFindingBeforeWrite {\n functions [DecideNextExtractStep]\n args {\n runtime_dashboard #\"\n CONTEXT: 42000/200000 (21%) [normal]\n NOTES: 0 findings\n Trace reads: 3 chunk(s)\n Next unread offset: none\n Pruned offsets: none\n \"#\n run_instruction #\"\n This trace has 240 lines and is fully read. An explicit durable architecture decision appears at line 188.\n \"#\n tool_manifest #\"\n - note_trace_findings(theme: string?, line: int?, quote: string?, level: string?): Record one trace finding with line evidence.\n - save_context(kind: string, title: string, body: string): Save one context record.\n \"#\n scratchpad #\"\n Action: read_trace\n Status: ok\n Observation:\n 188 user: We decided that durable context stays in the DB store because runtime queues are separate and temporary.\n \"#\n }\n @@assert({{ this.action == \"NOTE_TRACE_FINDINGS\" }})\n @@assert({{ this.note_trace_findings.level == \"DECISION\" }})\n}\n\ntest LongTraceNoReusableSignalCheckpoint {\n functions [DecideNextExtractStep]\n args {\n runtime_dashboard #\"\n CONTEXT: 39000/200000 (20%) [normal]\n NOTES: 0 findings\n Trace reads: 2 chunk(s)\n Next unread offset: none\n Pruned offsets: none\n \"#\n run_instruction #\"\n This long trace is fully read after multiple chunks. No note_trace_findings checkpoint has been called yet. It only formats files and reruns tests; no reusable rule, fact, preference, constraint, decision, or reference appears.\n \"#\n tool_manifest #\"\n - note_trace_findings(theme: string?, line: int?, quote: string?, level: string?): Record one trace finding with line evidence, or call with no args for none.\n - save_context(kind: string, title: string, body: string): Save one context record.\n \"#\n scratchpad \"Action: read_trace\\nStatus: ok\\nObservation:\\nAll chunks covered routine formatter/test work.\"\n }\n @@assert({{ this.action == \"NOTE_TRACE_FINDINGS\" }})\n}\n\ntest PruneOlderTraceReadsUnderPressure {\n functions [DecideNextExtractStep]\n args {\n runtime_dashboard #\"\n CONTEXT: 130000/200000 (65%) [soft]\n NOTES: 2 findings (1 durable, 1 implementation) across 1 theme(s)\n Trace reads: 3 chunk(s)\n Next unread offset: none\n Pruned offsets: none\n \"#\n run_instruction \"The trace is fully read and findings from the first two chunks have been captured.\"\n tool_manifest #\"\n - prune_trace_reads(start_lines: int[]): Prune earlier read_trace results after findings are noted.\n - save_context(kind: string, title: string, body: string): Save one context record.\n \"#\n scratchpad #\"\n Action: read_trace\n Status: ok\n Observation:\n [300 lines, showing 1-100]\n\n Action: read_trace\n Status: ok\n Observation:\n [300 lines, showing 101-200]\n\n Action: read_trace\n Status: ok\n Observation:\n [300 lines, showing 201-300]\n\n Action: note_trace_findings\n Status: ok\n Observation:\n Noted 1 finding (total 2 so far).\n \"#\n }\n @@assert({{ this.action == \"PRUNE_TRACE_READS\" }})\n @@assert({{ this.prune_trace_reads.start_lines|length > 0 }})\n @@assert({{ this.prune_trace_reads.start_lines[0] == 1 }})\n}\n\ntest RoutineTraceSavesArchivedEpisode {\n functions [DecideNextExtractStep]\n args {\n runtime_dashboard #\"\n CONTEXT: 1000/200000 (1%) [normal]\n NOTES: 0 findings\n Trace reads: 1 chunk(s)\n Next unread offset: none\n Pruned offsets: none\n \"#\n run_instruction \"The short trace is fully read. It only ran formatting and confirmed tests passed.\"\n tool_manifest #\"\n - save_context(kind: string, title: string, body: string, status: string?): Save one context record.\n - final_result(completion_summary: string): Finish after exactly one episode exists.\n \"#\n scratchpad \"Action: read_trace\\nStatus: ok\\nObservation:\\nformatter ran; tests passed; no durable signal.\"\n }\n @@assert({{ this.action == \"SAVE_CONTEXT\" }})\n @@assert({{ this.save_context.kind == \"EPISODE\" }})\n @@assert({{ this.save_context.status == \"ARCHIVED\" }})\n @@assert({{ this.save_context.user_intent != null and this.save_context.user_intent|length > 0 }})\n @@assert({{ this.save_context.what_happened != null and this.save_context.what_happened|length > 0 }})\n}\n\ntest DurablePreferenceSavesPreference {\n functions [DecideNextExtractStep]\n args {\n runtime_dashboard \"CONTEXT: 1000/200000 (1%) [normal]\\nNOTES: 1 findings (1 durable, 0 implementation)\\nTrace reads: 1 chunk(s)\\nNext unread offset: none\"\n run_instruction \"The fully read trace includes a stable user correction: keep final replies short and skip redundant recaps. The current-session episode already exists; the durable preference remains unsaved.\"\n tool_manifest \"- save_context(kind: string, title: string, body: string): Save one context record.\"\n scratchpad \"Action: read_trace\\nStatus: ok\\nObservation:\\nuser: Keep final replies short and skip redundant recaps.\\n\\nAction: save_context\\nStatus: ok\\nObservation:\\n{\\\"ok\\\": true, \\\"result\\\": {\\\"record\\\": {\\\"kind\\\": \\\"episode\\\", \\\"source_session_id\\\": \\\"baml-test-session\\\"}}}\\n\\nAction: note_trace_findings\\nStatus: ok\\nObservation:\\nTheme: reply style. Quote: keep final replies short and skip redundant recaps.\"\n }\n @@assert({{ this.action == \"SAVE_CONTEXT\" }})\n @@assert({{ this.save_context.kind == \"PREFERENCE\" }})\n}\n\ntest DurableConstraintSavesConstraint {\n functions [DecideNextExtractStep]\n args {\n runtime_dashboard \"CONTEXT: 1000/200000 (1%) [normal]\\nNOTES: 1 findings (1 durable, 0 implementation)\\nTrace reads: 1 chunk(s)\\nNext unread offset: none\"\n run_instruction \"The fully read trace establishes a durable invariant: future extraction runs must not mutate unfetched context records. The current-session episode already exists; the durable constraint remains unsaved.\"\n tool_manifest \"- save_context(kind: string, title: string, body: string): Save one context record.\"\n scratchpad \"Action: save_context\\nStatus: ok\\nObservation:\\n{\\\"ok\\\": true, \\\"result\\\": {\\\"record\\\": {\\\"kind\\\": \\\"episode\\\", \\\"source_session_id\\\": \\\"baml-test-session\\\"}}}\\n\\nAction: note_trace_findings\\nStatus: ok\\nObservation:\\nTheme: mutation invariant. Quote: never revise or archive a context record before fetching it in the current run.\"\n }\n @@assert({{ this.action == \"SAVE_CONTEXT\" }})\n @@assert({{ this.save_context.kind == \"CONSTRAINT\" }})\n}\n\ntest DurableDecisionSavesDecisionWithWhy {\n functions [DecideNextExtractStep]\n args {\n runtime_dashboard \"CONTEXT: 1000/200000 (1%) [normal]\\nNOTES: 1 findings (1 durable, 0 implementation)\\nTrace reads: 1 chunk(s)\\nNext unread offset: none\"\n run_instruction \"The fully read trace establishes a durable architecture decision with rationale. The current-session episode already exists; the durable decision remains unsaved.\"\n tool_manifest \"- save_context(kind: string, title: string, body: string, decision: string?, why: string?): Save one context record.\"\n scratchpad \"Action: save_context\\nStatus: ok\\nObservation:\\n{\\\"ok\\\": true, \\\"result\\\": {\\\"record\\\": {\\\"kind\\\": \\\"episode\\\", \\\"source_session_id\\\": \\\"baml-test-session\\\"}}}\\n\\nAction: note_trace_findings\\nStatus: ok\\nObservation:\\nTheme: storage boundary. Quote: durable context is DB-backed so sessions can replay it; runtime state stays separate.\"\n }\n @@assert({{ this.action == \"SAVE_CONTEXT\" }})\n @@assert({{ this.save_context.kind == \"DECISION\" }})\n @@assert({{ this.save_context.decision|length > 0 }})\n @@assert({{ this.save_context.why|length > 0 }})\n}\n\ntest StableDependencySavesFact {\n functions [DecideNextExtractStep]\n args {\n runtime_dashboard \"CONTEXT: 1000/200000 (1%) [normal]\\nNOTES: 1 findings (1 durable, 0 implementation)\\nTrace reads: 1 chunk(s)\\nNext unread offset: none\"\n run_instruction \"The fully read trace establishes that image workflows require libvips. It gives no decision rationale. The current-session episode already exists; the durable dependency fact remains unsaved.\"\n tool_manifest \"- save_context(kind: string, title: string, body: string): Save one context record.\"\n scratchpad \"Action: save_context\\nStatus: ok\\nObservation:\\n{\\\"ok\\\": true, \\\"result\\\": {\\\"record\\\": {\\\"kind\\\": \\\"episode\\\", \\\"source_session_id\\\": \\\"baml-test-session\\\"}}}\\n\\nAction: note_trace_findings\\nStatus: ok\\nObservation:\\nTheme: dependency. Quote: image workflows require libvips in the environment.\"\n }\n @@assert({{ this.action == \"SAVE_CONTEXT\" }})\n @@assert({{ this.save_context.kind == \"FACT\" }})\n @@assert({{ not this.save_context.why }})\n}\n\ntest ExternalSourceSavesReference {\n functions [DecideNextExtractStep]\n args {\n runtime_dashboard \"CONTEXT: 1000/200000 (1%) [normal]\\nNOTES: 1 findings (1 durable, 0 implementation)\\nTrace reads: 1 chunk(s)\\nNext unread offset: none\"\n run_instruction \"The fully read trace says incident ownership and current status live in the Ops dashboard, which future sessions should consult for incidents. The current-session episode already exists; the durable reference remains unsaved.\"\n tool_manifest \"- save_context(kind: string, title: string, body: string): Save one context record.\"\n scratchpad \"Action: save_context\\nStatus: ok\\nObservation:\\n{\\\"ok\\\": true, \\\"result\\\": {\\\"record\\\": {\\\"kind\\\": \\\"episode\\\", \\\"source_session_id\\\": \\\"baml-test-session\\\"}}}\\n\\nAction: note_trace_findings\\nStatus: ok\\nObservation:\\nTheme: incident source. Quote: use the Ops dashboard for incident ownership and current status.\"\n }\n @@assert({{ this.action == \"SAVE_CONTEXT\" }})\n @@assert({{ this.save_context.kind == \"REFERENCE\" }})\n}\n\ntest DuplicateRiskSearchesContext {\n functions [DecideNextExtractStep]\n args {\n runtime_dashboard \"CONTEXT: 1000/200000 (1%) [normal]\\nNOTES: 1 findings (1 durable, 0 implementation)\\nTrace reads: 1 chunk(s)\\nNext unread offset: none\"\n run_instruction \"Relevant existing durable records are shown below; treat them as a shortlist only and fetch before revision.\\n\\nRelevant existing durable records:\\n- rec_storage | decision | DB context boundary | Durable context lives in the DB store.\"\n tool_manifest #\"\n - search_context(query: string, kind: string?, status: string?): Search saved context by meaning.\n - get_context(record_ids: string[]): Fetch saved context records by ID.\n - save_context(kind: string, title: string, body: string): Save one context record.\n \"#\n scratchpad \"Action: read_trace\\nStatus: ok\\nObservation:\\nThe new trace repeats the DB context boundary decision with slightly sharper wording.\"\n }\n @@assert({{ this.action == \"SEARCH_CONTEXT\" or this.action == \"GET_CONTEXT\" }})\n @@assert({{ this.action != \"SAVE_CONTEXT\" }})\n}\n\ntest SearchHitFetchesBeforeRevision {\n functions [DecideNextExtractStep]\n args {\n runtime_dashboard \"CONTEXT: 2000/200000 (1%) [normal]\\nNOTES: 1 findings (1 durable, 0 implementation)\\nTrace reads: 1 chunk(s)\\nNext unread offset: none\"\n run_instruction \"A search_context call found a nearby existing record. The current-session episode already exists. Fetch the full search hit before deciding whether to revise it.\"\n tool_manifest #\"\n - search_context(query: string): Search saved context by meaning.\n - get_context(record_ids: string[]): Fetch full context records by ID before any revision.\n - revise_context(record_id: string, reason: string, kind: string, title: string, body: string): Revise an existing context record.\n \"#\n scratchpad #\"\n Action: save_context\n Status: ok\n Observation:\n {\"ok\": true, \"result\": {\"record\": {\"kind\": \"episode\", \"source_session_id\": \"baml-test-session\"}}}\n\n Action: search_context\n Status: ok\n Observation:\n {\"count\": 1, \"hits\": [{\"record_id\": \"rec_storage\", \"kind\": \"decision\", \"title\": \"DB context boundary\", \"body_preview\": \"Durable context lives in the DB store.\"}]}\n \"#\n }\n @@assert({{ this.action == \"GET_CONTEXT\" }})\n}\n\ntest FetchedSameMeaningRevisesContext {\n functions [DecideNextExtractStep]\n args {\n runtime_dashboard \"CONTEXT: 2000/200000 (1%) [normal]\\nNOTES: 1 findings (1 durable, 0 implementation)\\nTrace reads: 1 chunk(s)\\nNext unread offset: none\"\n run_instruction \"The fetched record has the same core decision but needs the rationale tightened.\"\n tool_manifest #\"\n - revise_context(record_id: string, reason: string, kind: string, title: string, body: string): Revise an existing context record with a complete improved payload.\n - save_context(kind: string, title: string, body: string): Save one context record.\n \"#\n scratchpad #\"\n Action: get_context\n Status: ok\n Observation:\n {\"count\": 1, \"records\": [{\"record_id\": \"rec_storage\", \"kind\": \"decision\", \"title\": \"DB context boundary\", \"body\": \"Durable context lives in the DB store.\", \"decision\": \"Durable context lives in the DB store.\", \"why\": \"It must survive sessions.\"}]}\n \"#\n }\n @@assert({{ this.action == \"REVISE_CONTEXT\" }})\n}\n\ntest FinalBeforeEpisodeSavesEpisode {\n functions [DecideNextExtractStep]\n args {\n runtime_dashboard \"CONTEXT: 3000/200000 (2%) [normal]\\nNOTES: 1 findings (1 durable, 0 implementation)\\nTrace reads: 1 chunk(s)\\nNext unread offset: none\"\n run_instruction \"The trace is fully read. One durable fact has already been saved, but no current-session episode exists.\"\n tool_manifest #\"\n - save_context(kind: string, title: string, body: string): Save one context record.\n - final_result(completion_summary: string): Finish after exactly one episode exists.\n \"#\n scratchpad #\"\n Action: save_context\n Status: ok\n Observation:\n {\"ok\": true, \"result\": {\"record\": {\"kind\": \"fact\", \"source_session_id\": \"baml-test-session\"}}}\n \"#\n }\n @@assert({{ this.action == \"SAVE_CONTEXT\" }})\n @@assert({{ this.save_context.kind == \"EPISODE\" }})\n @@assert({{ this.save_context.user_intent != null and this.save_context.user_intent|length > 0 }})\n @@assert({{ this.save_context.what_happened != null and this.save_context.what_happened|length > 0 }})\n}\n\ntest FinalStepAfterEpisodeSave {\n functions [DecideNextExtractStep]\n args {\n runtime_dashboard #\"\n CONTEXT: 1000/200000 (1%) [normal]\n NOTES: 0 findings\n Trace reads: 1 chunk(s)\n Next unread offset: none\n Pruned offsets: none\n \"#\n run_instruction #\"\n Read the trace, write exactly one episode record, and write only the\n strongest durable records. The trace has already been fully read.\n \"#\n tool_manifest #\"\n - read_trace(start_line: int?, line_count: int?): Read the next numbered trace chunk.\n - save_context(kind: string, title: string, body: string): Save one context record.\n - final_result(completion_summary: string): Finish after exactly one episode exists.\n \"#\n scratchpad #\"\n Action: read_trace\n Status: ok\n Observation:\n [3 lines, trace coverage complete] All trace lines have already been read.\n\n Action: save_context\n Status: ok\n Observation:\n {\"ok\": true, \"result\": {\"record\": {\"kind\": \"episode\", \"source_session_id\": \"baml-test-session\"}}}\n \"#\n }\n @@assert({{ this.action == \"FINAL_RESULT\" }})\n}\n\ntest EpisodeWithReusableDecisionSavesDecisionBeforeFinal {\n functions [DecideNextExtractStep]\n args {\n runtime_dashboard #\"\n CONTEXT: 3000/200000 (2%) [normal]\n NOTES: 0 findings\n Trace reads: 1 chunk(s)\n Next unread offset: none\n Pruned offsets: none\n \"#\n run_instruction \"The trace is fully read. The current-session episode already exists, but a reusable PostgreSQL-over-MySQL project decision remains only in the trace and episode text. Save the durable decision before final_result.\"\n tool_manifest #\"\n - save_context(kind: string, title: string, body: string, decision: string?, why: string?): Save one context record.\n - final_result(completion_summary: string): Finish after exactly one episode exists.\n \"#\n scratchpad #\"\n Action: read_trace\n Status: ok\n Observation:\n user: Good. Also, we decided to use PostgreSQL instead of MySQL for this project.\n assistant: Understood. Decision: PostgreSQL over MySQL. I'll update the connection string and ORM configuration.\n\n Action: save_context\n Status: ok\n Observation:\n {\"ok\": true, \"result\": {\"record\": {\"kind\": \"episode\", \"title\": \"Database Configuration Update\", \"body\": \"The session switched the project's primary database backend from MySQL to PostgreSQL.\", \"source_session_id\": \"baml-test-session\"}}}\n \"#\n }\n @@assert({{ this.action == \"SAVE_CONTEXT\" }})\n @@assert({{ this.save_context.kind == \"DECISION\" }})\n @@assert({{ this.save_context.decision|length > 0 }})\n @@assert({{ this.save_context.why|length > 0 }})\n}\n\ntest ImplementationOnlyAfterEpisodeFinalizes {\n functions [DecideNextExtractStep]\n args {\n runtime_dashboard #\"\n CONTEXT: 3000/200000 (2%) [normal]\n NOTES: 0 findings\n Trace reads: 1 chunk(s)\n Next unread offset: none\n Pruned offsets: none\n \"#\n run_instruction \"The trace is fully read. A durable decision and the current-session episode already exist. The only remaining unsaved content is an implementation fix to a local timeout constant.\"\n tool_manifest #\"\n - save_context(kind: string, title: string, body: string): Save one context record.\n - final_result(completion_summary: string): Finish after exactly one episode exists.\n \"#\n scratchpad #\"\n Action: read_trace\n Status: ok\n Observation:\n The trace fixed a local timeout constant and also established one architecture decision.\n\n Action: save_context\n Status: ok\n Observation:\n {\"ok\": true, \"result\": {\"record\": {\"kind\": \"decision\", \"title\": \"Runtime state remains separate from durable context\"}}}\n\n Action: save_context\n Status: ok\n Observation:\n {\"ok\": true, \"result\": {\"record\": {\"kind\": \"episode\", \"source_session_id\": \"baml-test-session\"}}}\n \"#\n }\n @@assert({{ this.action == \"FINAL_RESULT\" }})\n}\n", + "generators.baml": "generator target {\n output_type \"python/pydantic\"\n output_dir \"../\"\n default_client_mode \"sync\"\n version \"0.222.0\"\n}\n", +} + +def get_baml_files(): + return _file_map \ No newline at end of file diff --git a/baml_agents/baml_client/parser.py b/baml_agents/baml_client/parser.py new file mode 100644 index 0000000..cb18394 --- /dev/null +++ b/baml_agents/baml_client/parser.py @@ -0,0 +1,46 @@ +# ---------------------------------------------------------------------------- +# +# Welcome to Baml! To use this generated code, please run the following: +# +# $ pip install baml +# +# ---------------------------------------------------------------------------- + +# This file was generated by BAML: please do not edit it. Instead, edit the +# BAML files and re-generate this code using: baml-cli generate +# baml-cli is available with the baml package. + +import typing +import typing_extensions + + +from . import stream_types, types +from .runtime import DoNotUseDirectlyCallManager, BamlCallOptions + +class LlmResponseParser: + __options: DoNotUseDirectlyCallManager + + def __init__(self, options: DoNotUseDirectlyCallManager): + self.__options = options + + def DecideNextExtractStep( + self, llm_response: str, baml_options: BamlCallOptions = {}, + ) -> types.ExtractAgentStep: + __result__ = self.__options.merge_options(baml_options).parse_response(function_name="DecideNextExtractStep", llm_response=llm_response, mode="request") + return typing.cast(types.ExtractAgentStep, __result__) + + + +class LlmStreamParser: + __options: DoNotUseDirectlyCallManager + + def __init__(self, options: DoNotUseDirectlyCallManager): + self.__options = options + + def DecideNextExtractStep( + self, llm_response: str, baml_options: BamlCallOptions = {}, + ) -> stream_types.ExtractAgentStep: + __result__ = self.__options.merge_options(baml_options).parse_response(function_name="DecideNextExtractStep", llm_response=llm_response, mode="stream") + return typing.cast(stream_types.ExtractAgentStep, __result__) + + \ No newline at end of file diff --git a/baml_agents/baml_client/runtime.py b/baml_agents/baml_client/runtime.py new file mode 100644 index 0000000..27fc3a9 --- /dev/null +++ b/baml_agents/baml_client/runtime.py @@ -0,0 +1,361 @@ +# ---------------------------------------------------------------------------- +# +# Welcome to Baml! To use this generated code, please run the following: +# +# $ pip install baml +# +# ---------------------------------------------------------------------------- + +# This file was generated by BAML: please do not edit it. Instead, edit the +# BAML files and re-generate this code using: baml-cli generate +# baml-cli is available with the baml package. + +import os +import typing +import typing_extensions + +import baml_py + +from . import types, stream_types, type_builder +from .globals import DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_RUNTIME as __runtime__, DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_CTX as __ctx__manager__ + + +class BamlCallOptions(typing.TypedDict, total=False): + tb: typing_extensions.NotRequired[type_builder.TypeBuilder] + client_registry: typing_extensions.NotRequired[baml_py.baml_py.ClientRegistry] + client: typing_extensions.NotRequired[str] + env: typing_extensions.NotRequired[typing.Dict[str, typing.Optional[str]]] + tags: typing_extensions.NotRequired[typing.Dict[str, str]] + collector: typing_extensions.NotRequired[ + typing.Union[baml_py.baml_py.Collector, typing.List[baml_py.baml_py.Collector]] + ] + abort_controller: typing_extensions.NotRequired[baml_py.baml_py.AbortController] + on_tick: typing_extensions.NotRequired[typing.Callable[[str, baml_py.baml_py.FunctionLog], None]] + watchers: typing_extensions.NotRequired[typing.Any] # EventCollector type, will be overridden in generated clients + + +class _ResolvedBamlOptions: + tb: typing.Optional[baml_py.baml_py.TypeBuilder] + client_registry: typing.Optional[baml_py.baml_py.ClientRegistry] + collectors: typing.List[baml_py.baml_py.Collector] + env_vars: typing.Dict[str, str] + tags: typing.Dict[str, str] + abort_controller: typing.Optional[baml_py.baml_py.AbortController] + on_tick: typing.Optional[typing.Callable[[], None]] + watchers: typing.Optional[typing.Any] + + def __init__( + self, + tb: typing.Optional[baml_py.baml_py.TypeBuilder], + client_registry: typing.Optional[baml_py.baml_py.ClientRegistry], + collectors: typing.List[baml_py.baml_py.Collector], + env_vars: typing.Dict[str, str], + tags: typing.Dict[str, str], + abort_controller: typing.Optional[baml_py.baml_py.AbortController], + on_tick: typing.Optional[typing.Callable[[], None]], + watchers: typing.Optional[typing.Any], + ): + self.tb = tb + self.client_registry = client_registry + self.collectors = collectors + self.env_vars = env_vars + self.tags = tags + self.abort_controller = abort_controller + self.on_tick = on_tick + self.watchers = watchers + + + + +class DoNotUseDirectlyCallManager: + def __init__(self, baml_options: BamlCallOptions): + self.__baml_options = baml_options + + def __getstate__(self): + # Return state needed for pickling + return {"baml_options": self.__baml_options} + + def __setstate__(self, state): + # Restore state from pickling + self.__baml_options = state["baml_options"] + + def __resolve(self) -> _ResolvedBamlOptions: + tb = self.__baml_options.get("tb") + if tb is not None: + baml_tb = tb._tb # type: ignore (we know how to use this private attribute) + else: + baml_tb = None + client_registry = self.__baml_options.get("client_registry") + client = self.__baml_options.get("client") + + # If client is provided, it takes precedence (creates/overrides client_registry primary) + if client is not None: + if client_registry is None: + client_registry = baml_py.baml_py.ClientRegistry() + client_registry.set_primary(client) + + collector = self.__baml_options.get("collector") + collectors_as_list = ( + collector + if isinstance(collector, list) + else [collector] if collector is not None else [] + ) + env_vars = os.environ.copy() + for k, v in self.__baml_options.get("env", {}).items(): + if v is not None: + env_vars[k] = v + else: + env_vars.pop(k, None) + + tags = self.__baml_options.get("tags", {}) or {} + + abort_controller = self.__baml_options.get("abort_controller") + + on_tick = self.__baml_options.get("on_tick") + if on_tick is not None: + collector = baml_py.baml_py.Collector("on-tick-collector") + collectors_as_list.append(collector) + def on_tick_wrapper(): + log = collector.last + if log is not None: + on_tick("Unknown", log) + else: + on_tick_wrapper = None + + watchers = self.__baml_options.get("watchers") + + return _ResolvedBamlOptions( + baml_tb, + client_registry, + collectors_as_list, + env_vars, + tags, + abort_controller, + on_tick_wrapper, + watchers, + ) + + def merge_options(self, options: BamlCallOptions) -> "DoNotUseDirectlyCallManager": + return DoNotUseDirectlyCallManager({**self.__baml_options, **options}) + + async def call_function_async( + self, *, function_name: str, args: typing.Dict[str, typing.Any] + ) -> baml_py.baml_py.FunctionResult: + resolved_options = self.__resolve() + + # Check if already aborted + if resolved_options.abort_controller is not None and resolved_options.abort_controller.aborted: + raise baml_py.baml_py.BamlAbortError("Operation was aborted") + + return await __runtime__.call_function( + function_name, + args, + # ctx + __ctx__manager__.clone_context(), + # tb + resolved_options.tb, + # cr + resolved_options.client_registry, + # collectors + resolved_options.collectors, + # env_vars + resolved_options.env_vars, + # tags + resolved_options.tags, + # abort_controller + resolved_options.abort_controller, + # watchers + resolved_options.watchers, + ) + + def call_function_sync( + self, *, function_name: str, args: typing.Dict[str, typing.Any] + ) -> baml_py.baml_py.FunctionResult: + resolved_options = self.__resolve() + + # Check if already aborted + if resolved_options.abort_controller is not None and resolved_options.abort_controller.aborted: + raise baml_py.baml_py.BamlAbortError("Operation was aborted") + + ctx = __ctx__manager__.get() + return __runtime__.call_function_sync( + function_name, + args, + # ctx + ctx, + # tb + resolved_options.tb, + # cr + resolved_options.client_registry, + # collectors + resolved_options.collectors, + # env_vars + resolved_options.env_vars, + # tags + resolved_options.tags, + # abort_controller + resolved_options.abort_controller, + # watchers + resolved_options.watchers, + ) + + def create_async_stream( + self, + *, + function_name: str, + args: typing.Dict[str, typing.Any], + ) -> typing.Tuple[baml_py.baml_py.RuntimeContextManager, baml_py.baml_py.FunctionResultStream]: + resolved_options = self.__resolve() + ctx = __ctx__manager__.clone_context() + result = __runtime__.stream_function( + function_name, + args, + # this is always None, we set this later! + # on_event + None, + # ctx + ctx, + # tb + resolved_options.tb, + # cr + resolved_options.client_registry, + # collectors + resolved_options.collectors, + # env_vars + resolved_options.env_vars, + # tags + resolved_options.tags, + # on_tick + resolved_options.on_tick, + # abort_controller + resolved_options.abort_controller, + ) + return ctx, result + + def create_sync_stream( + self, + *, + function_name: str, + args: typing.Dict[str, typing.Any], + ) -> typing.Tuple[baml_py.baml_py.RuntimeContextManager, baml_py.baml_py.SyncFunctionResultStream]: + resolved_options = self.__resolve() + if resolved_options.on_tick is not None: + raise ValueError("on_tick is not supported for sync streams. Please use async streams instead.") + ctx = __ctx__manager__.get() + result = __runtime__.stream_function_sync( + function_name, + args, + # this is always None, we set this later! + # on_event + None, + # ctx + ctx, + # tb + resolved_options.tb, + # cr + resolved_options.client_registry, + # collectors + resolved_options.collectors, + # env_vars + resolved_options.env_vars, + # tags + resolved_options.tags, + # on_tick + # always None! sync streams don't support on_tick + None, + # abort_controller + resolved_options.abort_controller, + ) + return ctx, result + + async def create_http_request_async( + self, + *, + function_name: str, + args: typing.Dict[str, typing.Any], + mode: typing_extensions.Literal["stream", "request"], + ) -> baml_py.baml_py.HTTPRequest: + resolved_options = self.__resolve() + return await __runtime__.build_request( + function_name, + args, + # ctx + __ctx__manager__.clone_context(), + # tb + resolved_options.tb, + # cr + resolved_options.client_registry, + # env_vars + resolved_options.env_vars, + # is_stream + mode == "stream", + ) + + def create_http_request_sync( + self, + *, + function_name: str, + args: typing.Dict[str, typing.Any], + mode: typing_extensions.Literal["stream", "request"], + ) -> baml_py.baml_py.HTTPRequest: + resolved_options = self.__resolve() + return __runtime__.build_request_sync( + function_name, + args, + # ctx + __ctx__manager__.get(), + # tb + resolved_options.tb, + # cr + resolved_options.client_registry, + # env_vars + resolved_options.env_vars, + # is_stream + mode == "stream", + ) + + def parse_response(self, *, function_name: str, llm_response: str, mode: typing_extensions.Literal["stream", "request"]) -> typing.Any: + resolved_options = self.__resolve() + return __runtime__.parse_llm_response( + function_name, + llm_response, + # enum_module + types, + # cls_module + types, + # partial_cls_module + stream_types, + # allow_partials + mode == "stream", + # ctx + __ctx__manager__.get(), + # tb + resolved_options.tb, + # cr + resolved_options.client_registry, + # env_vars + resolved_options.env_vars, + ) + + +def disassemble(function: typing.Callable) -> None: + import inspect + from . import b + + if not callable(function): + print(f"disassemble: object {function} is not a Baml function") + return + + is_client_method = False + + for (method_name, _) in inspect.getmembers(b, predicate=inspect.ismethod): + if method_name == function.__name__: + is_client_method = True + break + + if not is_client_method: + print(f"disassemble: function {function.__name__} is not a Baml function") + return + + print(f"----- function {function.__name__} -----") + __runtime__.disassemble(function.__name__) \ No newline at end of file diff --git a/baml_agents/baml_client/stream_types.py b/baml_agents/baml_client/stream_types.py new file mode 100644 index 0000000..10b3eea --- /dev/null +++ b/baml_agents/baml_client/stream_types.py @@ -0,0 +1,104 @@ +# ---------------------------------------------------------------------------- +# +# Welcome to Baml! To use this generated code, please run the following: +# +# $ pip install baml +# +# ---------------------------------------------------------------------------- + +# This file was generated by BAML: please do not edit it. Instead, edit the +# BAML files and re-generate this code using: baml-cli generate +# baml-cli is available with the baml package. + +import typing +import typing_extensions +from pydantic import BaseModel, ConfigDict, Field + +import baml_py + +from . import types + +StreamStateValueT = typing.TypeVar('StreamStateValueT') +class StreamState(BaseModel, typing.Generic[StreamStateValueT]): + value: StreamStateValueT + state: typing_extensions.Literal["Pending", "Incomplete", "Complete"] +# ######################################################################### +# Generated classes (9) +# ######################################################################### + +class ExtractAgentStep(BaseModel): + action: typing.Optional[types.ExtractAction] = None + rationale: typing.Optional[str] = Field(default=None, description='One short sentence explaining why this is the next action. Do not include hidden chain-of-thought.') + read_trace: typing.Optional["ReadTraceArgs"] = None + search_context: typing.Optional["SearchContextArgs"] = None + get_context: typing.Optional["GetContextArgs"] = None + save_context: typing.Optional["SaveContextArgs"] = None + revise_context: typing.Optional["ReviseContextArgs"] = None + note_trace_findings: typing.Optional["NoteTraceFindingsArgs"] = None + prune_trace_reads: typing.Optional["PruneTraceReadsArgs"] = None + final_result: typing.Optional["FinalResultArgs"] = None + +class FinalResultArgs(BaseModel): + completion_summary: typing.Optional[str] = None + +class GetContextArgs(BaseModel): + record_ids: typing.List[str] = Field(description='Record IDs returned by search_context or list_context. Fetch before any revision.') + include_versions: typing.Optional[bool] = Field(default=None, description='Whether to include prior versions.') + detail: typing.Optional[typing.Union[typing_extensions.Literal['concise'], typing_extensions.Literal['detailed']]] = Field(default=None, description='Use concise unless full version history is needed.') + +class NoteTraceFindingsArgs(BaseModel): + theme: typing.Optional[str] = Field(default=None, description='Short theme label for this finding. Leave empty only for the no-findings checkpoint.') + line: typing.Optional[int] = Field(default=None, description='1-based trace line with supporting evidence. Leave empty only for the no-findings checkpoint.') + quote: typing.Optional[str] = Field(default=None, description='Short evidence quote from the trace. Leave empty only for the no-findings checkpoint.') + level: typing.Optional[types.FindingLevel] = Field(default=None, description='Durable levels only for reusable context; implementation for trace-local evidence.') + +class PruneTraceReadsArgs(BaseModel): + start_lines: typing.List[int] = Field(description='1-based start_line values from earlier read_trace calls, such as 1, 101, or 201. Never use context token counts.') + +class ReadTraceArgs(BaseModel): + start_line: typing.Optional[int] = None + line_count: typing.Optional[int] = None + +class ReviseContextArgs(BaseModel): + record_id: typing.Optional[str] = None + reason: typing.Optional[str] = Field(default=None, description='Short reason for the revision.') + kind: typing.Optional[types.RecordKind] = Field(default=None, description='Must match the fetched record kind; revise_context cannot change kind.') + title: typing.Optional[str] = Field(default=None, description='Complete improved title.') + body: typing.Optional[str] = Field(default=None, description='Complete improved body.') + status: typing.Optional[types.RecordStatus] = Field(default=None, description='Lifecycle status to keep or set.') + valid_from: typing.Optional[str] = None + valid_until: typing.Optional[str] = None + decision: typing.Optional[str] = None + why: typing.Optional[str] = None + alternatives: typing.Optional[str] = None + consequences: typing.Optional[str] = None + user_intent: typing.Optional[str] = None + what_happened: typing.Optional[str] = None + outcomes: typing.Optional[str] = None + +class SaveContextArgs(BaseModel): + kind: typing.Optional[types.RecordKind] = Field(default=None, description='Use episode for the mandatory session summary; use durable kinds only for reusable project context.') + title: typing.Optional[str] = Field(default=None, description='Short standalone title for the lasting point or episode.') + body: typing.Optional[str] = Field(default=None, description='Compact standalone body. Do not include trace-local command logs or copied errors.') + status: typing.Optional[types.RecordStatus] = Field(default=None, description='Use archived for routine episodes with no durable signal; active for reusable durable records.') + valid_from: typing.Optional[str] = Field(default=None, description='Optional validity start timestamp.') + valid_until: typing.Optional[str] = Field(default=None, description='Optional validity end timestamp.') + decision: typing.Optional[str] = Field(default=None, description='Required only for kind=decision; omit for facts, preferences, constraints, references, and episodes.') + why: typing.Optional[str] = Field(default=None, description='Required only for kind=decision when rationale exists; omit when the trace says no durable rationale was supplied.') + alternatives: typing.Optional[str] = Field(default=None, description='Optional decision alternatives. Omit for non-decisions.') + consequences: typing.Optional[str] = Field(default=None, description='Optional direct application guidance for the same durable point.') + user_intent: typing.Optional[str] = Field(default=None, description='Required when kind=episode. Concise statement of what the user wanted in this source session.') + what_happened: typing.Optional[str] = Field(default=None, description='Required when kind=episode. Concise statement of what the session actually did.') + outcomes: typing.Optional[str] = Field(default=None, description='Optional concise episode outcome.') + +class SearchContextArgs(BaseModel): + query: typing.Optional[str] = Field(default=None, description='Natural-language query for the durable meaning, not \'*\' or an empty browse request.') + kind: typing.Optional[types.RecordKind] = Field(default=None, description='Optional filter. Use only when the desired record kind is known.') + status: typing.Optional[types.RecordStatus] = Field(default=None, description='Optional lifecycle filter.') + valid_at: typing.Optional[str] = Field(default=None, description='Optional timestamp for historical lookup.') + include_archived: typing.Optional[bool] = Field(default=None, description='Whether archived records should be included.') + limit: typing.Optional[int] = Field(default=None, description='Maximum hits to return.') + +# ######################################################################### +# Generated type aliases (0) +# ######################################################################### diff --git a/baml_agents/baml_client/sync_client.py b/baml_agents/baml_client/sync_client.py new file mode 100644 index 0000000..90230d3 --- /dev/null +++ b/baml_agents/baml_client/sync_client.py @@ -0,0 +1,164 @@ +# ---------------------------------------------------------------------------- +# +# Welcome to Baml! To use this generated code, please run the following: +# +# $ pip install baml +# +# ---------------------------------------------------------------------------- + +# This file was generated by BAML: please do not edit it. Instead, edit the +# BAML files and re-generate this code using: baml-cli generate +# baml-cli is available with the baml package. + +import typing +import typing_extensions +import baml_py + +from . import stream_types, types, type_builder +from .parser import LlmResponseParser, LlmStreamParser +from .runtime import DoNotUseDirectlyCallManager, BamlCallOptions +from .globals import DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_RUNTIME as __runtime__ + +class BamlSyncClient: + __options: DoNotUseDirectlyCallManager + __stream_client: "BamlStreamClient" + __http_request: "BamlHttpRequestClient" + __http_stream_request: "BamlHttpStreamRequestClient" + __llm_response_parser: LlmResponseParser + __llm_stream_parser: LlmStreamParser + + def __init__(self, options: DoNotUseDirectlyCallManager): + self.__options = options + self.__stream_client = BamlStreamClient(options) + self.__http_request = BamlHttpRequestClient(options) + self.__http_stream_request = BamlHttpStreamRequestClient(options) + self.__llm_response_parser = LlmResponseParser(options) + self.__llm_stream_parser = LlmStreamParser(options) + + def __getstate__(self): + # Return state needed for pickling + return {"options": self.__options} + + def __setstate__(self, state): + # Restore state from pickling + self.__options = state["options"] + self.__stream_client = BamlStreamClient(self.__options) + self.__http_request = BamlHttpRequestClient(self.__options) + self.__http_stream_request = BamlHttpStreamRequestClient(self.__options) + self.__llm_response_parser = LlmResponseParser(self.__options) + self.__llm_stream_parser = LlmStreamParser(self.__options) + + def with_options(self, + tb: typing.Optional[type_builder.TypeBuilder] = None, + client_registry: typing.Optional[baml_py.baml_py.ClientRegistry] = None, + client: typing.Optional[str] = None, + collector: typing.Optional[typing.Union[baml_py.baml_py.Collector, typing.List[baml_py.baml_py.Collector]]] = None, + env: typing.Optional[typing.Dict[str, typing.Optional[str]]] = None, + tags: typing.Optional[typing.Dict[str, str]] = None, + on_tick: typing.Optional[typing.Callable[[str, baml_py.baml_py.FunctionLog], None]] = None, + ) -> "BamlSyncClient": + options: BamlCallOptions = {} + if tb is not None: + options["tb"] = tb + if client_registry is not None: + options["client_registry"] = client_registry + if client is not None: + options["client"] = client + if collector is not None: + options["collector"] = collector + if env is not None: + options["env"] = env + if tags is not None: + options["tags"] = tags + if on_tick is not None: + options["on_tick"] = on_tick + return BamlSyncClient(self.__options.merge_options(options)) + + @property + def stream(self): + return self.__stream_client + + @property + def request(self): + return self.__http_request + + @property + def stream_request(self): + return self.__http_stream_request + + @property + def parse(self): + return self.__llm_response_parser + + @property + def parse_stream(self): + return self.__llm_stream_parser + + def DecideNextExtractStep(self, runtime_dashboard: str,run_instruction: str,tool_manifest: str,scratchpad: str, + baml_options: BamlCallOptions = {}, + ) -> types.ExtractAgentStep: + # Check if on_tick is provided + if 'on_tick' in baml_options: + __stream__ = self.stream.DecideNextExtractStep(runtime_dashboard=runtime_dashboard,run_instruction=run_instruction,tool_manifest=tool_manifest,scratchpad=scratchpad, + baml_options=baml_options) + return __stream__.get_final_response() + else: + # Original non-streaming code + __result__ = self.__options.merge_options(baml_options).call_function_sync(function_name="DecideNextExtractStep", args={ + "runtime_dashboard": runtime_dashboard,"run_instruction": run_instruction,"tool_manifest": tool_manifest,"scratchpad": scratchpad, + }) + return typing.cast(types.ExtractAgentStep, __result__.cast_to(types, types, stream_types, False, __runtime__)) + + + +class BamlStreamClient: + __options: DoNotUseDirectlyCallManager + + def __init__(self, options: DoNotUseDirectlyCallManager): + self.__options = options + + def DecideNextExtractStep(self, runtime_dashboard: str,run_instruction: str,tool_manifest: str,scratchpad: str, + baml_options: BamlCallOptions = {}, + ) -> baml_py.BamlSyncStream[stream_types.ExtractAgentStep, types.ExtractAgentStep]: + __ctx__, __result__ = self.__options.merge_options(baml_options).create_sync_stream(function_name="DecideNextExtractStep", args={ + "runtime_dashboard": runtime_dashboard,"run_instruction": run_instruction,"tool_manifest": tool_manifest,"scratchpad": scratchpad, + }) + return baml_py.BamlSyncStream[stream_types.ExtractAgentStep, types.ExtractAgentStep]( + __result__, + lambda x: typing.cast(stream_types.ExtractAgentStep, x.cast_to(types, types, stream_types, True, __runtime__)), + lambda x: typing.cast(types.ExtractAgentStep, x.cast_to(types, types, stream_types, False, __runtime__)), + __ctx__, + ) + + +class BamlHttpRequestClient: + __options: DoNotUseDirectlyCallManager + + def __init__(self, options: DoNotUseDirectlyCallManager): + self.__options = options + + def DecideNextExtractStep(self, runtime_dashboard: str,run_instruction: str,tool_manifest: str,scratchpad: str, + baml_options: BamlCallOptions = {}, + ) -> baml_py.baml_py.HTTPRequest: + __result__ = self.__options.merge_options(baml_options).create_http_request_sync(function_name="DecideNextExtractStep", args={ + "runtime_dashboard": runtime_dashboard,"run_instruction": run_instruction,"tool_manifest": tool_manifest,"scratchpad": scratchpad, + }, mode="request") + return __result__ + + +class BamlHttpStreamRequestClient: + __options: DoNotUseDirectlyCallManager + + def __init__(self, options: DoNotUseDirectlyCallManager): + self.__options = options + + def DecideNextExtractStep(self, runtime_dashboard: str,run_instruction: str,tool_manifest: str,scratchpad: str, + baml_options: BamlCallOptions = {}, + ) -> baml_py.baml_py.HTTPRequest: + __result__ = self.__options.merge_options(baml_options).create_http_request_sync(function_name="DecideNextExtractStep", args={ + "runtime_dashboard": runtime_dashboard,"run_instruction": run_instruction,"tool_manifest": tool_manifest,"scratchpad": scratchpad, + }, mode="stream") + return __result__ + + +b = BamlSyncClient(DoNotUseDirectlyCallManager({})) \ No newline at end of file diff --git a/baml_agents/baml_client/tracing.py b/baml_agents/baml_client/tracing.py new file mode 100644 index 0000000..0672559 --- /dev/null +++ b/baml_agents/baml_client/tracing.py @@ -0,0 +1,22 @@ +# ---------------------------------------------------------------------------- +# +# Welcome to Baml! To use this generated code, please run the following: +# +# $ pip install baml +# +# ---------------------------------------------------------------------------- + +# This file was generated by BAML: please do not edit it. Instead, edit the +# BAML files and re-generate this code using: baml-cli generate +# baml-cli is available with the baml package. + +from .globals import DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_CTX + +trace = DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_CTX.trace_fn +set_tags = DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_CTX.upsert_tags +def flush(): + DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_CTX.flush() +on_log_event = DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_CTX.on_log_event + + +__all__ = ['trace', 'set_tags', "flush", "on_log_event"] diff --git a/baml_agents/baml_client/type_builder.py b/baml_agents/baml_client/type_builder.py new file mode 100644 index 0000000..51d6d61 --- /dev/null +++ b/baml_agents/baml_client/type_builder.py @@ -0,0 +1,861 @@ +# ---------------------------------------------------------------------------- +# +# Welcome to Baml! To use this generated code, please run the following: +# +# $ pip install baml +# +# ---------------------------------------------------------------------------- + +# This file was generated by BAML: please do not edit it. Instead, edit the +# BAML files and re-generate this code using: baml-cli generate +# baml-cli is available with the baml package. + +import typing +from baml_py import type_builder +from baml_py import baml_py +# These are exports, not used here, hence the linter is disabled +from baml_py.baml_py import FieldType, EnumValueBuilder, EnumBuilder, ClassBuilder # noqa: F401 # pylint: disable=unused-import +from .globals import DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_RUNTIME + +class TypeBuilder(type_builder.TypeBuilder): + def __init__(self): + super().__init__(classes=set( + ["ExtractAgentStep","FinalResultArgs","GetContextArgs","NoteTraceFindingsArgs","PruneTraceReadsArgs","ReadTraceArgs","ReviseContextArgs","SaveContextArgs","SearchContextArgs",] + ), enums=set( + ["ExtractAction","FindingLevel","RecordKind","RecordStatus",] + ), runtime=DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_RUNTIME) + + # ######################################################################### + # Generated enums 4 + # ######################################################################### + + @property + def ExtractAction(self) -> "ExtractActionViewer": + return ExtractActionViewer(self) + + @property + def FindingLevel(self) -> "FindingLevelViewer": + return FindingLevelViewer(self) + + @property + def RecordKind(self) -> "RecordKindViewer": + return RecordKindViewer(self) + + @property + def RecordStatus(self) -> "RecordStatusViewer": + return RecordStatusViewer(self) + + + # ######################################################################### + # Generated classes 9 + # ######################################################################### + + @property + def ExtractAgentStep(self) -> "ExtractAgentStepViewer": + return ExtractAgentStepViewer(self) + + @property + def FinalResultArgs(self) -> "FinalResultArgsViewer": + return FinalResultArgsViewer(self) + + @property + def GetContextArgs(self) -> "GetContextArgsViewer": + return GetContextArgsViewer(self) + + @property + def NoteTraceFindingsArgs(self) -> "NoteTraceFindingsArgsViewer": + return NoteTraceFindingsArgsViewer(self) + + @property + def PruneTraceReadsArgs(self) -> "PruneTraceReadsArgsViewer": + return PruneTraceReadsArgsViewer(self) + + @property + def ReadTraceArgs(self) -> "ReadTraceArgsViewer": + return ReadTraceArgsViewer(self) + + @property + def ReviseContextArgs(self) -> "ReviseContextArgsViewer": + return ReviseContextArgsViewer(self) + + @property + def SaveContextArgs(self) -> "SaveContextArgsViewer": + return SaveContextArgsViewer(self) + + @property + def SearchContextArgs(self) -> "SearchContextArgsViewer": + return SearchContextArgsViewer(self) + + + +# ######################################################################### +# Generated enums 4 +# ######################################################################### + +class ExtractActionAst: + def __init__(self, tb: type_builder.TypeBuilder): + _tb = tb._tb # type: ignore (we know how to use this private attribute) + self._bldr = _tb.enum("ExtractAction") + self._values: typing.Set[str] = set([ "READ_TRACE", "SEARCH_CONTEXT", "GET_CONTEXT", "SAVE_CONTEXT", "REVISE_CONTEXT", "NOTE_TRACE_FINDINGS", "PRUNE_TRACE_READS", "FINAL_RESULT", ]) + self._vals = ExtractActionValues(self._bldr, self._values) + + def type(self) -> baml_py.FieldType: + return self._bldr.field() + + @property + def values(self) -> "ExtractActionValues": + return self._vals + + +class ExtractActionViewer(ExtractActionAst): + def __init__(self, tb: type_builder.TypeBuilder): + super().__init__(tb) + + + def list_values(self) -> typing.List[typing.Tuple[str, type_builder.EnumValueViewer]]: + return [(name, type_builder.EnumValueViewer(self._bldr.value(name))) for name in self._values] + + +class ExtractActionValues: + def __init__(self, enum_bldr: baml_py.EnumBuilder, values: typing.Set[str]): + self.__bldr = enum_bldr + self.__values = values # type: ignore (we know how to use this private attribute) # noqa: F821 + + + + @property + def READ_TRACE(self) -> type_builder.EnumValueViewer: + return type_builder.EnumValueViewer(self.__bldr.value("READ_TRACE")) + + @property + def SEARCH_CONTEXT(self) -> type_builder.EnumValueViewer: + return type_builder.EnumValueViewer(self.__bldr.value("SEARCH_CONTEXT")) + + @property + def GET_CONTEXT(self) -> type_builder.EnumValueViewer: + return type_builder.EnumValueViewer(self.__bldr.value("GET_CONTEXT")) + + @property + def SAVE_CONTEXT(self) -> type_builder.EnumValueViewer: + return type_builder.EnumValueViewer(self.__bldr.value("SAVE_CONTEXT")) + + @property + def REVISE_CONTEXT(self) -> type_builder.EnumValueViewer: + return type_builder.EnumValueViewer(self.__bldr.value("REVISE_CONTEXT")) + + @property + def NOTE_TRACE_FINDINGS(self) -> type_builder.EnumValueViewer: + return type_builder.EnumValueViewer(self.__bldr.value("NOTE_TRACE_FINDINGS")) + + @property + def PRUNE_TRACE_READS(self) -> type_builder.EnumValueViewer: + return type_builder.EnumValueViewer(self.__bldr.value("PRUNE_TRACE_READS")) + + @property + def FINAL_RESULT(self) -> type_builder.EnumValueViewer: + return type_builder.EnumValueViewer(self.__bldr.value("FINAL_RESULT")) + + + + +class FindingLevelAst: + def __init__(self, tb: type_builder.TypeBuilder): + _tb = tb._tb # type: ignore (we know how to use this private attribute) + self._bldr = _tb.enum("FindingLevel") + self._values: typing.Set[str] = set([ "DECISION", "PREFERENCE", "FEEDBACK", "REFERENCE", "CONSTRAINT", "FACT", "IMPLEMENTATION", ]) + self._vals = FindingLevelValues(self._bldr, self._values) + + def type(self) -> baml_py.FieldType: + return self._bldr.field() + + @property + def values(self) -> "FindingLevelValues": + return self._vals + + +class FindingLevelViewer(FindingLevelAst): + def __init__(self, tb: type_builder.TypeBuilder): + super().__init__(tb) + + + def list_values(self) -> typing.List[typing.Tuple[str, type_builder.EnumValueViewer]]: + return [(name, type_builder.EnumValueViewer(self._bldr.value(name))) for name in self._values] + + +class FindingLevelValues: + def __init__(self, enum_bldr: baml_py.EnumBuilder, values: typing.Set[str]): + self.__bldr = enum_bldr + self.__values = values # type: ignore (we know how to use this private attribute) # noqa: F821 + + + + @property + def DECISION(self) -> type_builder.EnumValueViewer: + return type_builder.EnumValueViewer(self.__bldr.value("DECISION")) + + @property + def PREFERENCE(self) -> type_builder.EnumValueViewer: + return type_builder.EnumValueViewer(self.__bldr.value("PREFERENCE")) + + @property + def FEEDBACK(self) -> type_builder.EnumValueViewer: + return type_builder.EnumValueViewer(self.__bldr.value("FEEDBACK")) + + @property + def REFERENCE(self) -> type_builder.EnumValueViewer: + return type_builder.EnumValueViewer(self.__bldr.value("REFERENCE")) + + @property + def CONSTRAINT(self) -> type_builder.EnumValueViewer: + return type_builder.EnumValueViewer(self.__bldr.value("CONSTRAINT")) + + @property + def FACT(self) -> type_builder.EnumValueViewer: + return type_builder.EnumValueViewer(self.__bldr.value("FACT")) + + @property + def IMPLEMENTATION(self) -> type_builder.EnumValueViewer: + return type_builder.EnumValueViewer(self.__bldr.value("IMPLEMENTATION")) + + + + +class RecordKindAst: + def __init__(self, tb: type_builder.TypeBuilder): + _tb = tb._tb # type: ignore (we know how to use this private attribute) + self._bldr = _tb.enum("RecordKind") + self._values: typing.Set[str] = set([ "DECISION", "PREFERENCE", "CONSTRAINT", "FACT", "REFERENCE", "EPISODE", ]) + self._vals = RecordKindValues(self._bldr, self._values) + + def type(self) -> baml_py.FieldType: + return self._bldr.field() + + @property + def values(self) -> "RecordKindValues": + return self._vals + + +class RecordKindViewer(RecordKindAst): + def __init__(self, tb: type_builder.TypeBuilder): + super().__init__(tb) + + + def list_values(self) -> typing.List[typing.Tuple[str, type_builder.EnumValueViewer]]: + return [(name, type_builder.EnumValueViewer(self._bldr.value(name))) for name in self._values] + + +class RecordKindValues: + def __init__(self, enum_bldr: baml_py.EnumBuilder, values: typing.Set[str]): + self.__bldr = enum_bldr + self.__values = values # type: ignore (we know how to use this private attribute) # noqa: F821 + + + + @property + def DECISION(self) -> type_builder.EnumValueViewer: + return type_builder.EnumValueViewer(self.__bldr.value("DECISION")) + + @property + def PREFERENCE(self) -> type_builder.EnumValueViewer: + return type_builder.EnumValueViewer(self.__bldr.value("PREFERENCE")) + + @property + def CONSTRAINT(self) -> type_builder.EnumValueViewer: + return type_builder.EnumValueViewer(self.__bldr.value("CONSTRAINT")) + + @property + def FACT(self) -> type_builder.EnumValueViewer: + return type_builder.EnumValueViewer(self.__bldr.value("FACT")) + + @property + def REFERENCE(self) -> type_builder.EnumValueViewer: + return type_builder.EnumValueViewer(self.__bldr.value("REFERENCE")) + + @property + def EPISODE(self) -> type_builder.EnumValueViewer: + return type_builder.EnumValueViewer(self.__bldr.value("EPISODE")) + + + + +class RecordStatusAst: + def __init__(self, tb: type_builder.TypeBuilder): + _tb = tb._tb # type: ignore (we know how to use this private attribute) + self._bldr = _tb.enum("RecordStatus") + self._values: typing.Set[str] = set([ "ACTIVE", "ARCHIVED", ]) + self._vals = RecordStatusValues(self._bldr, self._values) + + def type(self) -> baml_py.FieldType: + return self._bldr.field() + + @property + def values(self) -> "RecordStatusValues": + return self._vals + + +class RecordStatusViewer(RecordStatusAst): + def __init__(self, tb: type_builder.TypeBuilder): + super().__init__(tb) + + + def list_values(self) -> typing.List[typing.Tuple[str, type_builder.EnumValueViewer]]: + return [(name, type_builder.EnumValueViewer(self._bldr.value(name))) for name in self._values] + + +class RecordStatusValues: + def __init__(self, enum_bldr: baml_py.EnumBuilder, values: typing.Set[str]): + self.__bldr = enum_bldr + self.__values = values # type: ignore (we know how to use this private attribute) # noqa: F821 + + + + @property + def ACTIVE(self) -> type_builder.EnumValueViewer: + return type_builder.EnumValueViewer(self.__bldr.value("ACTIVE")) + + @property + def ARCHIVED(self) -> type_builder.EnumValueViewer: + return type_builder.EnumValueViewer(self.__bldr.value("ARCHIVED")) + + + + + +# ######################################################################### +# Generated classes 9 +# ######################################################################### + +class ExtractAgentStepAst: + def __init__(self, tb: type_builder.TypeBuilder): + _tb = tb._tb # type: ignore (we know how to use this private attribute) + self._bldr = _tb.class_("ExtractAgentStep") + self._properties: typing.Set[str] = set([ "action", "rationale", "read_trace", "search_context", "get_context", "save_context", "revise_context", "note_trace_findings", "prune_trace_reads", "final_result", ]) + self._props = ExtractAgentStepProperties(self._bldr, self._properties) + + def type(self) -> baml_py.FieldType: + return self._bldr.field() + + @property + def props(self) -> "ExtractAgentStepProperties": + return self._props + + +class ExtractAgentStepViewer(ExtractAgentStepAst): + def __init__(self, tb: type_builder.TypeBuilder): + super().__init__(tb) + + + def list_properties(self) -> typing.List[typing.Tuple[str, type_builder.ClassPropertyViewer]]: + return [(name, type_builder.ClassPropertyViewer(self._bldr.property(name))) for name in self._properties] + + + +class ExtractAgentStepProperties: + def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]): + self.__bldr = bldr + self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821 + + + + @property + def action(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("action")) + + @property + def rationale(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("rationale")) + + @property + def read_trace(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("read_trace")) + + @property + def search_context(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("search_context")) + + @property + def get_context(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("get_context")) + + @property + def save_context(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("save_context")) + + @property + def revise_context(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("revise_context")) + + @property + def note_trace_findings(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("note_trace_findings")) + + @property + def prune_trace_reads(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("prune_trace_reads")) + + @property + def final_result(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("final_result")) + + + + +class FinalResultArgsAst: + def __init__(self, tb: type_builder.TypeBuilder): + _tb = tb._tb # type: ignore (we know how to use this private attribute) + self._bldr = _tb.class_("FinalResultArgs") + self._properties: typing.Set[str] = set([ "completion_summary", ]) + self._props = FinalResultArgsProperties(self._bldr, self._properties) + + def type(self) -> baml_py.FieldType: + return self._bldr.field() + + @property + def props(self) -> "FinalResultArgsProperties": + return self._props + + +class FinalResultArgsViewer(FinalResultArgsAst): + def __init__(self, tb: type_builder.TypeBuilder): + super().__init__(tb) + + + def list_properties(self) -> typing.List[typing.Tuple[str, type_builder.ClassPropertyViewer]]: + return [(name, type_builder.ClassPropertyViewer(self._bldr.property(name))) for name in self._properties] + + + +class FinalResultArgsProperties: + def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]): + self.__bldr = bldr + self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821 + + + + @property + def completion_summary(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("completion_summary")) + + + + +class GetContextArgsAst: + def __init__(self, tb: type_builder.TypeBuilder): + _tb = tb._tb # type: ignore (we know how to use this private attribute) + self._bldr = _tb.class_("GetContextArgs") + self._properties: typing.Set[str] = set([ "record_ids", "include_versions", "detail", ]) + self._props = GetContextArgsProperties(self._bldr, self._properties) + + def type(self) -> baml_py.FieldType: + return self._bldr.field() + + @property + def props(self) -> "GetContextArgsProperties": + return self._props + + +class GetContextArgsViewer(GetContextArgsAst): + def __init__(self, tb: type_builder.TypeBuilder): + super().__init__(tb) + + + def list_properties(self) -> typing.List[typing.Tuple[str, type_builder.ClassPropertyViewer]]: + return [(name, type_builder.ClassPropertyViewer(self._bldr.property(name))) for name in self._properties] + + + +class GetContextArgsProperties: + def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]): + self.__bldr = bldr + self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821 + + + + @property + def record_ids(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("record_ids")) + + @property + def include_versions(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("include_versions")) + + @property + def detail(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("detail")) + + + + +class NoteTraceFindingsArgsAst: + def __init__(self, tb: type_builder.TypeBuilder): + _tb = tb._tb # type: ignore (we know how to use this private attribute) + self._bldr = _tb.class_("NoteTraceFindingsArgs") + self._properties: typing.Set[str] = set([ "theme", "line", "quote", "level", ]) + self._props = NoteTraceFindingsArgsProperties(self._bldr, self._properties) + + def type(self) -> baml_py.FieldType: + return self._bldr.field() + + @property + def props(self) -> "NoteTraceFindingsArgsProperties": + return self._props + + +class NoteTraceFindingsArgsViewer(NoteTraceFindingsArgsAst): + def __init__(self, tb: type_builder.TypeBuilder): + super().__init__(tb) + + + def list_properties(self) -> typing.List[typing.Tuple[str, type_builder.ClassPropertyViewer]]: + return [(name, type_builder.ClassPropertyViewer(self._bldr.property(name))) for name in self._properties] + + + +class NoteTraceFindingsArgsProperties: + def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]): + self.__bldr = bldr + self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821 + + + + @property + def theme(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("theme")) + + @property + def line(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("line")) + + @property + def quote(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("quote")) + + @property + def level(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("level")) + + + + +class PruneTraceReadsArgsAst: + def __init__(self, tb: type_builder.TypeBuilder): + _tb = tb._tb # type: ignore (we know how to use this private attribute) + self._bldr = _tb.class_("PruneTraceReadsArgs") + self._properties: typing.Set[str] = set([ "start_lines", ]) + self._props = PruneTraceReadsArgsProperties(self._bldr, self._properties) + + def type(self) -> baml_py.FieldType: + return self._bldr.field() + + @property + def props(self) -> "PruneTraceReadsArgsProperties": + return self._props + + +class PruneTraceReadsArgsViewer(PruneTraceReadsArgsAst): + def __init__(self, tb: type_builder.TypeBuilder): + super().__init__(tb) + + + def list_properties(self) -> typing.List[typing.Tuple[str, type_builder.ClassPropertyViewer]]: + return [(name, type_builder.ClassPropertyViewer(self._bldr.property(name))) for name in self._properties] + + + +class PruneTraceReadsArgsProperties: + def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]): + self.__bldr = bldr + self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821 + + + + @property + def start_lines(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("start_lines")) + + + + +class ReadTraceArgsAst: + def __init__(self, tb: type_builder.TypeBuilder): + _tb = tb._tb # type: ignore (we know how to use this private attribute) + self._bldr = _tb.class_("ReadTraceArgs") + self._properties: typing.Set[str] = set([ "start_line", "line_count", ]) + self._props = ReadTraceArgsProperties(self._bldr, self._properties) + + def type(self) -> baml_py.FieldType: + return self._bldr.field() + + @property + def props(self) -> "ReadTraceArgsProperties": + return self._props + + +class ReadTraceArgsViewer(ReadTraceArgsAst): + def __init__(self, tb: type_builder.TypeBuilder): + super().__init__(tb) + + + def list_properties(self) -> typing.List[typing.Tuple[str, type_builder.ClassPropertyViewer]]: + return [(name, type_builder.ClassPropertyViewer(self._bldr.property(name))) for name in self._properties] + + + +class ReadTraceArgsProperties: + def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]): + self.__bldr = bldr + self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821 + + + + @property + def start_line(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("start_line")) + + @property + def line_count(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("line_count")) + + + + +class ReviseContextArgsAst: + def __init__(self, tb: type_builder.TypeBuilder): + _tb = tb._tb # type: ignore (we know how to use this private attribute) + self._bldr = _tb.class_("ReviseContextArgs") + self._properties: typing.Set[str] = set([ "record_id", "reason", "kind", "title", "body", "status", "valid_from", "valid_until", "decision", "why", "alternatives", "consequences", "user_intent", "what_happened", "outcomes", ]) + self._props = ReviseContextArgsProperties(self._bldr, self._properties) + + def type(self) -> baml_py.FieldType: + return self._bldr.field() + + @property + def props(self) -> "ReviseContextArgsProperties": + return self._props + + +class ReviseContextArgsViewer(ReviseContextArgsAst): + def __init__(self, tb: type_builder.TypeBuilder): + super().__init__(tb) + + + def list_properties(self) -> typing.List[typing.Tuple[str, type_builder.ClassPropertyViewer]]: + return [(name, type_builder.ClassPropertyViewer(self._bldr.property(name))) for name in self._properties] + + + +class ReviseContextArgsProperties: + def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]): + self.__bldr = bldr + self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821 + + + + @property + def record_id(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("record_id")) + + @property + def reason(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("reason")) + + @property + def kind(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("kind")) + + @property + def title(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("title")) + + @property + def body(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("body")) + + @property + def status(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("status")) + + @property + def valid_from(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("valid_from")) + + @property + def valid_until(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("valid_until")) + + @property + def decision(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("decision")) + + @property + def why(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("why")) + + @property + def alternatives(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("alternatives")) + + @property + def consequences(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("consequences")) + + @property + def user_intent(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("user_intent")) + + @property + def what_happened(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("what_happened")) + + @property + def outcomes(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("outcomes")) + + + + +class SaveContextArgsAst: + def __init__(self, tb: type_builder.TypeBuilder): + _tb = tb._tb # type: ignore (we know how to use this private attribute) + self._bldr = _tb.class_("SaveContextArgs") + self._properties: typing.Set[str] = set([ "kind", "title", "body", "status", "valid_from", "valid_until", "decision", "why", "alternatives", "consequences", "user_intent", "what_happened", "outcomes", ]) + self._props = SaveContextArgsProperties(self._bldr, self._properties) + + def type(self) -> baml_py.FieldType: + return self._bldr.field() + + @property + def props(self) -> "SaveContextArgsProperties": + return self._props + + +class SaveContextArgsViewer(SaveContextArgsAst): + def __init__(self, tb: type_builder.TypeBuilder): + super().__init__(tb) + + + def list_properties(self) -> typing.List[typing.Tuple[str, type_builder.ClassPropertyViewer]]: + return [(name, type_builder.ClassPropertyViewer(self._bldr.property(name))) for name in self._properties] + + + +class SaveContextArgsProperties: + def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]): + self.__bldr = bldr + self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821 + + + + @property + def kind(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("kind")) + + @property + def title(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("title")) + + @property + def body(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("body")) + + @property + def status(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("status")) + + @property + def valid_from(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("valid_from")) + + @property + def valid_until(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("valid_until")) + + @property + def decision(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("decision")) + + @property + def why(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("why")) + + @property + def alternatives(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("alternatives")) + + @property + def consequences(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("consequences")) + + @property + def user_intent(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("user_intent")) + + @property + def what_happened(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("what_happened")) + + @property + def outcomes(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("outcomes")) + + + + +class SearchContextArgsAst: + def __init__(self, tb: type_builder.TypeBuilder): + _tb = tb._tb # type: ignore (we know how to use this private attribute) + self._bldr = _tb.class_("SearchContextArgs") + self._properties: typing.Set[str] = set([ "query", "kind", "status", "valid_at", "include_archived", "limit", ]) + self._props = SearchContextArgsProperties(self._bldr, self._properties) + + def type(self) -> baml_py.FieldType: + return self._bldr.field() + + @property + def props(self) -> "SearchContextArgsProperties": + return self._props + + +class SearchContextArgsViewer(SearchContextArgsAst): + def __init__(self, tb: type_builder.TypeBuilder): + super().__init__(tb) + + + def list_properties(self) -> typing.List[typing.Tuple[str, type_builder.ClassPropertyViewer]]: + return [(name, type_builder.ClassPropertyViewer(self._bldr.property(name))) for name in self._properties] + + + +class SearchContextArgsProperties: + def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]): + self.__bldr = bldr + self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821 + + + + @property + def query(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("query")) + + @property + def kind(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("kind")) + + @property + def status(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("status")) + + @property + def valid_at(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("valid_at")) + + @property + def include_archived(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("include_archived")) + + @property + def limit(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("limit")) + + + diff --git a/baml_agents/baml_client/type_map.py b/baml_agents/baml_client/type_map.py new file mode 100644 index 0000000..42453ca --- /dev/null +++ b/baml_agents/baml_client/type_map.py @@ -0,0 +1,55 @@ +# ---------------------------------------------------------------------------- +# +# Welcome to Baml! To use this generated code, please run the following: +# +# $ pip install baml +# +# ---------------------------------------------------------------------------- + +# This file was generated by BAML: please do not edit it. Instead, edit the +# BAML files and re-generate this code using: baml-cli generate +# baml-cli is available with the baml package. + +from . import types +from . import stream_types + + +type_map = { + + "types.ExtractAgentStep": types.ExtractAgentStep, + "stream_types.ExtractAgentStep": stream_types.ExtractAgentStep, + + "types.FinalResultArgs": types.FinalResultArgs, + "stream_types.FinalResultArgs": stream_types.FinalResultArgs, + + "types.GetContextArgs": types.GetContextArgs, + "stream_types.GetContextArgs": stream_types.GetContextArgs, + + "types.NoteTraceFindingsArgs": types.NoteTraceFindingsArgs, + "stream_types.NoteTraceFindingsArgs": stream_types.NoteTraceFindingsArgs, + + "types.PruneTraceReadsArgs": types.PruneTraceReadsArgs, + "stream_types.PruneTraceReadsArgs": stream_types.PruneTraceReadsArgs, + + "types.ReadTraceArgs": types.ReadTraceArgs, + "stream_types.ReadTraceArgs": stream_types.ReadTraceArgs, + + "types.ReviseContextArgs": types.ReviseContextArgs, + "stream_types.ReviseContextArgs": stream_types.ReviseContextArgs, + + "types.SaveContextArgs": types.SaveContextArgs, + "stream_types.SaveContextArgs": stream_types.SaveContextArgs, + + "types.SearchContextArgs": types.SearchContextArgs, + "stream_types.SearchContextArgs": stream_types.SearchContextArgs, + + + "types.ExtractAction": types.ExtractAction, + + "types.FindingLevel": types.FindingLevel, + + "types.RecordKind": types.RecordKind, + + "types.RecordStatus": types.RecordStatus, + +} \ No newline at end of file diff --git a/baml_agents/baml_client/types.py b/baml_agents/baml_client/types.py new file mode 100644 index 0000000..e7716e2 --- /dev/null +++ b/baml_agents/baml_client/types.py @@ -0,0 +1,153 @@ +# ---------------------------------------------------------------------------- +# +# Welcome to Baml! To use this generated code, please run the following: +# +# $ pip install baml +# +# ---------------------------------------------------------------------------- + +# This file was generated by BAML: please do not edit it. Instead, edit the +# BAML files and re-generate this code using: baml-cli generate +# baml-cli is available with the baml package. + +import typing +import typing_extensions +from enum import Enum + + +from pydantic import BaseModel, ConfigDict, Field + + +import baml_py + +CheckT = typing_extensions.TypeVar('CheckT') +CheckName = typing_extensions.TypeVar('CheckName', bound=str) + +class Check(BaseModel): + name: str + expression: str + status: str +class Checked(BaseModel, typing.Generic[CheckT, CheckName]): + value: CheckT + checks: typing.Dict[CheckName, Check] + +def get_checks(checks: typing.Dict[CheckName, Check]) -> typing.List[Check]: + return list(checks.values()) + +def all_succeeded(checks: typing.Dict[CheckName, Check]) -> bool: + return all(check.status == "succeeded" for check in get_checks(checks)) +# ######################################################################### +# Generated enums (4) +# ######################################################################### + +class ExtractAction(str, Enum): + READ_TRACE = "READ_TRACE" + SEARCH_CONTEXT = "SEARCH_CONTEXT" + GET_CONTEXT = "GET_CONTEXT" + SAVE_CONTEXT = "SAVE_CONTEXT" + REVISE_CONTEXT = "REVISE_CONTEXT" + NOTE_TRACE_FINDINGS = "NOTE_TRACE_FINDINGS" + PRUNE_TRACE_READS = "PRUNE_TRACE_READS" + FINAL_RESULT = "FINAL_RESULT" + +class FindingLevel(str, Enum): + DECISION = "DECISION" + PREFERENCE = "PREFERENCE" + FEEDBACK = "FEEDBACK" + REFERENCE = "REFERENCE" + CONSTRAINT = "CONSTRAINT" + FACT = "FACT" + IMPLEMENTATION = "IMPLEMENTATION" + +class RecordKind(str, Enum): + DECISION = "DECISION" + PREFERENCE = "PREFERENCE" + CONSTRAINT = "CONSTRAINT" + FACT = "FACT" + REFERENCE = "REFERENCE" + EPISODE = "EPISODE" + +class RecordStatus(str, Enum): + ACTIVE = "ACTIVE" + ARCHIVED = "ARCHIVED" + +# ######################################################################### +# Generated classes (9) +# ######################################################################### + +class ExtractAgentStep(BaseModel): + action: ExtractAction + rationale: str = Field(description='One short sentence explaining why this is the next action. Do not include hidden chain-of-thought.') + read_trace: typing.Optional["ReadTraceArgs"] = None + search_context: typing.Optional["SearchContextArgs"] = None + get_context: typing.Optional["GetContextArgs"] = None + save_context: typing.Optional["SaveContextArgs"] = None + revise_context: typing.Optional["ReviseContextArgs"] = None + note_trace_findings: typing.Optional["NoteTraceFindingsArgs"] = None + prune_trace_reads: typing.Optional["PruneTraceReadsArgs"] = None + final_result: typing.Optional["FinalResultArgs"] = None + +class FinalResultArgs(BaseModel): + completion_summary: str + +class GetContextArgs(BaseModel): + record_ids: typing.List[str] = Field(description='Record IDs returned by search_context or list_context. Fetch before any revision.') + include_versions: typing.Optional[bool] = Field(default=None, description='Whether to include prior versions.') + detail: typing.Optional[typing.Union[typing_extensions.Literal['concise'], typing_extensions.Literal['detailed']]] = Field(default=None, description='Use concise unless full version history is needed.') + +class NoteTraceFindingsArgs(BaseModel): + theme: typing.Optional[str] = Field(default=None, description='Short theme label for this finding. Leave empty only for the no-findings checkpoint.') + line: typing.Optional[int] = Field(default=None, description='1-based trace line with supporting evidence. Leave empty only for the no-findings checkpoint.') + quote: typing.Optional[str] = Field(default=None, description='Short evidence quote from the trace. Leave empty only for the no-findings checkpoint.') + level: typing.Optional[FindingLevel] = Field(default=None, description='Durable levels only for reusable context; implementation for trace-local evidence.') + +class PruneTraceReadsArgs(BaseModel): + start_lines: typing.List[int] = Field(description='1-based start_line values from earlier read_trace calls, such as 1, 101, or 201. Never use context token counts.') + +class ReadTraceArgs(BaseModel): + start_line: typing.Optional[int] = None + line_count: typing.Optional[int] = None + +class ReviseContextArgs(BaseModel): + record_id: str + reason: str = Field(description='Short reason for the revision.') + kind: RecordKind = Field(description='Must match the fetched record kind; revise_context cannot change kind.') + title: str = Field(description='Complete improved title.') + body: str = Field(description='Complete improved body.') + status: typing.Optional[RecordStatus] = Field(default=None, description='Lifecycle status to keep or set.') + valid_from: typing.Optional[str] = None + valid_until: typing.Optional[str] = None + decision: typing.Optional[str] = None + why: typing.Optional[str] = None + alternatives: typing.Optional[str] = None + consequences: typing.Optional[str] = None + user_intent: typing.Optional[str] = None + what_happened: typing.Optional[str] = None + outcomes: typing.Optional[str] = None + +class SaveContextArgs(BaseModel): + kind: RecordKind = Field(description='Use episode for the mandatory session summary; use durable kinds only for reusable project context.') + title: str = Field(description='Short standalone title for the lasting point or episode.') + body: str = Field(description='Compact standalone body. Do not include trace-local command logs or copied errors.') + status: typing.Optional[RecordStatus] = Field(default=None, description='Use archived for routine episodes with no durable signal; active for reusable durable records.') + valid_from: typing.Optional[str] = Field(default=None, description='Optional validity start timestamp.') + valid_until: typing.Optional[str] = Field(default=None, description='Optional validity end timestamp.') + decision: typing.Optional[str] = Field(default=None, description='Required only for kind=decision; omit for facts, preferences, constraints, references, and episodes.') + why: typing.Optional[str] = Field(default=None, description='Required only for kind=decision when rationale exists; omit when the trace says no durable rationale was supplied.') + alternatives: typing.Optional[str] = Field(default=None, description='Optional decision alternatives. Omit for non-decisions.') + consequences: typing.Optional[str] = Field(default=None, description='Optional direct application guidance for the same durable point.') + user_intent: typing.Optional[str] = Field(default=None, description='Required when kind=episode. Concise statement of what the user wanted in this source session.') + what_happened: typing.Optional[str] = Field(default=None, description='Required when kind=episode. Concise statement of what the session actually did.') + outcomes: typing.Optional[str] = Field(default=None, description='Optional concise episode outcome.') + +class SearchContextArgs(BaseModel): + query: str = Field(description='Natural-language query for the durable meaning, not \'*\' or an empty browse request.') + kind: typing.Optional[RecordKind] = Field(default=None, description='Optional filter. Use only when the desired record kind is known.') + status: typing.Optional[RecordStatus] = Field(default=None, description='Optional lifecycle filter.') + valid_at: typing.Optional[str] = Field(default=None, description='Optional timestamp for historical lookup.') + include_archived: typing.Optional[bool] = Field(default=None, description='Whether archived records should be included.') + limit: typing.Optional[int] = Field(default=None, description='Maximum hits to return.') + +# ######################################################################### +# Generated type aliases (0) +# ######################################################################### diff --git a/baml_agents/baml_client/watchers.py b/baml_agents/baml_client/watchers.py new file mode 100644 index 0000000..347146f --- /dev/null +++ b/baml_agents/baml_client/watchers.py @@ -0,0 +1,44 @@ +# ---------------------------------------------------------------------------- +# +# Welcome to Baml! To use this generated code, please run the following: +# +# $ pip install baml +# +# ---------------------------------------------------------------------------- + +# This file was generated by BAML: please do not edit it. Instead, edit the +# BAML files and re-generate this code using: baml-cli generate +# baml-cli is available with the baml package. + +from typing import Callable, Any, Protocol, Generic, TypeVar, overload, Literal +import threading + +T = TypeVar("T") + +class BlockEvent: + def __init__(self, block_label: str, event_type: str): + self.block_label = block_label + self.event_type = event_type # "enter" | "exit" + +class VarEvent(Generic[T]): + def __init__(self, variable_name: str, value: T, timestamp: str, function_name: str): + self.variable_name = variable_name + self.value = value + self.timestamp = timestamp + self.function_name = function_name + +BlockHandler = Callable[[BlockEvent], None] +VarEventHandler = Callable[[VarEvent[T]], None] +StreamHandler = Callable[[Any], None] # Stream will be an async iterator + +class InternalEventBindings(Protocol): + function_name: str + block: list[BlockHandler] + vars: dict[str, list[VarEventHandler[Any]]] + streams: dict[str, list[StreamHandler]] + functions: dict[str, "InternalEventBindings"] + +class EventCollectorInternal(Protocol): + def __handlers__(self) -> InternalEventBindings: + ... + diff --git a/baml_agents/baml_extract_agent/__init__.py b/baml_agents/baml_extract_agent/__init__.py new file mode 100644 index 0000000..ed6c524 --- /dev/null +++ b/baml_agents/baml_extract_agent/__init__.py @@ -0,0 +1,5 @@ +"""Minimal BAML plus LangGraph extraction-agent experiment.""" + +from baml_extract_agent.graph import run_baml_extraction + +__all__ = ["run_baml_extraction"] diff --git a/baml_agents/baml_extract_agent/graph.py b/baml_agents/baml_extract_agent/graph.py new file mode 100644 index 0000000..0150694 --- /dev/null +++ b/baml_agents/baml_extract_agent/graph.py @@ -0,0 +1,410 @@ +"""LangGraph ReAct loop whose LLM decisions are produced by BAML.""" + +from __future__ import annotations + +from collections import Counter +from datetime import datetime, timezone +import math +import operator +from pathlib import Path +from typing import Annotated, Any +from typing_extensions import TypedDict + +from baml_py import ClientRegistry +from langgraph.graph import END, START, StateGraph + +from baml_client.sync_client import b +from lerim.agents.extract import _format_existing_record_manifest +from lerim.config.settings import get_config +from lerim.agents.tools import ( + _TOKENS_PER_CHAR, + MODEL_CONTEXT_TOKEN_LIMIT, + ContextDeps, + _classify_context_pressure, + _first_uncovered_offset, + compute_request_budget, +) +from lerim.context import ProjectIdentity, resolve_project_identity +from lerim.context.spec import DURABLE_FINDING_LEVELS, IMPLEMENTATION_FINDING_LEVELS + +from baml_extract_agent.tool_bridge import ( + build_tool_context, + execute_step, + format_observation, + observation_to_state, + prepare_context_deps, + tool_manifest, +) + + +MODEL_NAME = "gemma4:e4b" +BAML_PROVIDER = "ollama" +OLLAMA_BASE_URL = "http://127.0.0.1:11434/v1" +MINIMAX_BASE_URL = "https://api.minimax.io/v1" +MINIMAX_TEMPERATURE_FLOOR = 0.01 +MAX_BAML_MODEL_RETRIES = 3 +BAML_HTTP_CONNECT_TIMEOUT_MS = 10_000 +BAML_HTTP_TIME_TO_FIRST_TOKEN_TIMEOUT_MS = 120_000 +BAML_HTTP_IDLE_TIMEOUT_MS = 30_000 +BAML_HTTP_REQUEST_TIMEOUT_MS = 300_000 +BAML_RECOVERABLE_ERROR_NAMES = { + "BamlClientFinishReasonError", + "BamlClientHttpError", + "BamlTimeoutError", + "BamlValidationError", +} + + +class ExtractGraphState(TypedDict, total=False): + """Mutable state carried through the BAML ReAct graph.""" + + observations: Annotated[list[dict[str, Any]], operator.add] + llm_calls: int + pending_step: Any + done: bool + completion_summary: str + + +def run_baml_extraction( + *, + trace_path: Path, + context_db_path: Path, + project_root: Path | None = None, + session_id: str = "baml-extract-session", + session_started_at: str | None = None, + model_name: str = MODEL_NAME, + baml_provider: str = BAML_PROVIDER, + api_base_url: str | None = None, + api_key: str | None = None, + temperature: float = 0.0, + ollama_base_url: str = OLLAMA_BASE_URL, + max_llm_calls: int | None = None, + progress: bool = False, +) -> dict[str, Any]: + """Run the minimal BAML plus LangGraph extraction experiment.""" + resolved_trace_path = trace_path.expanduser().resolve() + resolved_context_db_path = context_db_path.expanduser().resolve() + identity = resolve_project_identity(project_root.expanduser().resolve() if project_root else Path.cwd()) + started_at = session_started_at or datetime.now(timezone.utc).isoformat() + deps = prepare_context_deps( + context_db_path=resolved_context_db_path, + project_identity=identity, + session_id=session_id, + trace_path=resolved_trace_path, + session_started_at=started_at, + model_name=model_name, + ) + graph = build_extract_graph( + deps=deps, + run_instruction=_build_run_instruction( + context_db_path=resolved_context_db_path, + project_identity=identity, + trace_path=resolved_trace_path, + session_started_at=started_at, + ), + model_name=model_name, + baml_provider=baml_provider, + api_base_url=api_base_url, + api_key=api_key, + temperature=temperature, + ollama_base_url=ollama_base_url, + max_llm_calls=max_llm_calls or compute_request_budget(resolved_trace_path), + progress=progress, + ) + final_state = graph.invoke( + {"observations": [], "llm_calls": 0, "done": False, "completion_summary": ""} + ) + if not final_state.get("done"): + raise RuntimeError("BAML extraction graph stopped before final_result.") + return { + "completion_summary": final_state.get("completion_summary", ""), + "llm_calls": final_state.get("llm_calls", 0), + "observations": final_state.get("observations", []), + "context_db_path": str(resolved_context_db_path), + "project_id": identity.project_id, + "session_id": session_id, + "model_name": model_name, + "baml_provider": baml_provider, + } + + +def build_extract_graph( + *, + deps: ContextDeps, + run_instruction: str, + model_name: str, + baml_provider: str, + api_base_url: str | None, + api_key: str | None, + temperature: float, + ollama_base_url: str, + max_llm_calls: int, + progress: bool = False, +): + """Compile the LangGraph state machine for one extraction run.""" + runtime_context = build_tool_context(deps) + live_tool_manifest = tool_manifest() + baml_runtime = _baml_client_for_model( + model_name=model_name, + baml_provider=baml_provider, + api_base_url=api_base_url, + api_key=api_key, + temperature=temperature, + ollama_base_url=ollama_base_url, + ) + + def llm_call(state: ExtractGraphState) -> dict[str, Any]: + """Ask BAML to choose the next ReAct action.""" + llm_calls = int(state.get("llm_calls") or 0) + if llm_calls >= max_llm_calls: + raise RuntimeError( + f"BAML extraction exceeded max_llm_calls={max_llm_calls}." + ) + observations = state.get("observations", []) + scratchpad = _scratchpad(observations, deps) + if progress: + print(f" baml llm {llm_calls + 1}/{max_llm_calls}", flush=True) + try: + step = baml_runtime.DecideNextExtractStep( + runtime_dashboard=_runtime_dashboard(deps, observations), + run_instruction=run_instruction, + tool_manifest=live_tool_manifest, + scratchpad=scratchpad, + ) + except Exception as exc: + if not _is_recoverable_baml_error(exc): + raise + model_retry_count = sum( + 1 for observation in observations if observation.get("action") == "model_retry" + ) + if model_retry_count >= MAX_BAML_MODEL_RETRIES: + raise RuntimeError( + f"BAML extraction exceeded model_retry_limit={MAX_BAML_MODEL_RETRIES}." + ) from exc + step = { + "action": "model_retry", + "content": _model_retry_observation(exc), + } + return {"pending_step": step, "llm_calls": llm_calls + 1} + + def tool_node(state: ExtractGraphState) -> dict[str, Any]: + """Execute the BAML-selected action with Lerim's real tools.""" + pending_step = state["pending_step"] + if isinstance(pending_step, dict) and pending_step.get("action") == "model_retry": + if progress: + print(" baml tool model_retry", flush=True) + return { + "observations": [ + { + "action": "model_retry", + "ok": False, + "content": pending_step["content"], + "args": {}, + "done": False, + "completion_summary": "", + } + ], + "done": False, + "completion_summary": "", + } + observation = execute_step(pending_step, runtime_context) + if progress: + print(f" baml tool {observation.action} ok={observation.ok}", flush=True) + return { + "observations": [observation_to_state(observation)], + "done": observation.done, + "completion_summary": observation.completion_summary, + } + + def should_continue(state: ExtractGraphState) -> str: + """Route back to the LLM until final_result validates.""" + if bool(state.get("done")): + return END + return "llm_call" + + graph = StateGraph(ExtractGraphState) + graph.add_node("llm_call", llm_call) + graph.add_node("tool_node", tool_node) + graph.add_edge(START, "llm_call") + graph.add_edge("llm_call", "tool_node") + graph.add_conditional_edges("tool_node", should_continue, ["llm_call", END]) + return graph.compile() + + +def _baml_client_for_model( + *, + model_name: str, + baml_provider: str, + api_base_url: str | None, + api_key: str | None, + temperature: float, + ollama_base_url: str, +): + """Return a generated BAML client pointed at the requested runtime model.""" + normalized_provider = baml_provider.strip().lower() + if normalized_provider == "ollama": + client_name = "RuntimeOllama" + base_url = api_base_url or ollama_base_url + resolved_api_key = api_key + resolved_temperature = temperature + elif normalized_provider == "minimax": + client_name = "RuntimeMiniMax" + cfg = get_config() + base_url = api_base_url or cfg.provider_api_bases.get("minimax") or MINIMAX_BASE_URL + resolved_api_key = api_key or cfg.minimax_api_key + if not resolved_api_key: + raise RuntimeError("missing_api_key:MINIMAX_API_KEY required for BAML MiniMax client") + resolved_temperature = max( + MINIMAX_TEMPERATURE_FLOOR, + min(1.0, float(temperature)), + ) + elif normalized_provider == "openai-generic": + client_name = "RuntimeOpenAIGeneric" + base_url = api_base_url + if not base_url: + raise RuntimeError("missing_api_base:openai-generic BAML client requires api_base_url") + resolved_api_key = api_key + resolved_temperature = temperature + else: + raise RuntimeError(f"unsupported_baml_provider:{baml_provider}") + + options: dict[str, Any] = { + "base_url": base_url, + "model": model_name, + "temperature": resolved_temperature, + "http": { + "connect_timeout_ms": BAML_HTTP_CONNECT_TIMEOUT_MS, + "time_to_first_token_timeout_ms": BAML_HTTP_TIME_TO_FIRST_TOKEN_TIMEOUT_MS, + "idle_timeout_ms": BAML_HTTP_IDLE_TIMEOUT_MS, + "request_timeout_ms": BAML_HTTP_REQUEST_TIMEOUT_MS, + }, + } + if resolved_api_key: + options["api_key"] = resolved_api_key + + registry = ClientRegistry() + registry.add_llm_client( + name=client_name, + provider="openai-generic", + options=options, + retry_policy="ExtractAgentRetry", + ) + registry.set_primary(client_name) + return b.with_options(client_registry=registry) + + +def _is_recoverable_baml_error(exc: Exception) -> bool: + """Return whether a BAML model/parsing failure should be retried in graph.""" + return type(exc).__name__ in BAML_RECOVERABLE_ERROR_NAMES + + +def _model_retry_observation(exc: Exception) -> str: + """Render a compact model failure note for the next BAML turn.""" + message = str(exc).replace("\n", " ")[:1200] + return ( + "The previous BAML model call did not produce a valid next action. " + "Retry and return exactly one JSON object matching the requested schema. " + "Do not include tags, hidden reasoning, markdown, or prose before " + f"the JSON. Error: {type(exc).__name__}: {message}" + ) + + +def _build_run_instruction( + *, + context_db_path: Path, + project_identity: ProjectIdentity, + trace_path: Path, + session_started_at: str, +) -> str: + """Build the same extraction task framing used by Lerim's current agent.""" + try: + trace_line_count = sum(1 for _ in trace_path.open("r", encoding="utf-8")) + except OSError: + trace_line_count = 0 + existing_record_manifest = _format_existing_record_manifest( + context_db_path=context_db_path, + project_identity=project_identity, + ) + source_time_text = str(session_started_at or "").strip() or "unknown" + prompt = ( + "Read the trace, write exactly one episode record, and write only the strongest " + "durable records with non-empty title and body. Store reusable rules and decisions, " + "not a polished recap of the meeting. " + "Durable records must be positive canonical context: when trace text combines a " + "durable point with cleanup/noise/ignore guidance, exclude that guidance entirely " + "from the durable record. " + f"Source session started_at: {source_time_text}. Treat the trace as evidence from " + "that time, not as a fresh verification of the current repository. " + f"This trace has {trace_line_count} lines. Read all chunks before writing. " + "If the trace needs more than one read to cover it, record findings before any write. " + "If relevant existing durable records are shown below, treat them as a shortlist only; " + "fetch the full record before any revision." + + (f"\n\n{existing_record_manifest}" if existing_record_manifest else "") + ) + return prompt + + +def _scratchpad(observations: list[dict[str, Any]], deps: ContextDeps) -> str: + """Render prior actions for the next BAML decision.""" + if not observations: + return "No prior actions." + return "\n\n".join( + format_observation(observation, deps) for observation in observations[-20:] + ) + + +def _runtime_dashboard(deps: ContextDeps, observations: list[dict[str, Any]]) -> str: + """Render the same context-pressure and notes dashboards as the extract agent.""" + scratchpad_chars = sum( + len(format_observation(observation, deps)) for observation in observations + ) + approx_tokens = math.ceil(scratchpad_chars * _TOKENS_PER_CHAR) + pct = approx_tokens / MODEL_CONTEXT_TOKEN_LIMIT + pressure = _classify_context_pressure(pct) + deps.last_context_tokens = approx_tokens + deps.last_context_fill_ratio = pct + context_summary = ( + f"CONTEXT: {approx_tokens}/{MODEL_CONTEXT_TOKEN_LIMIT} ({pct:.0%}) [{pressure}]" + ) + return context_summary + "\n" + _notes_dashboard(deps) + + +def _notes_dashboard(deps: ContextDeps) -> str: + """Render the notes and trace-coverage dashboard used between model turns.""" + findings = deps.notes + if not findings: + summary = "NOTES: 0 findings" + if deps.findings_checked: + summary += " (checkpoint recorded)" + else: + counts = Counter(finding.level for finding in findings) + durable_findings = [ + finding for finding in findings if finding.level in DURABLE_FINDING_LEVELS + ] + theme_source = durable_findings or findings + themes = Counter(finding.theme for finding in theme_source) + durable = sum(counts.get(level, 0) for level in DURABLE_FINDING_LEVELS) + implementation = sum( + counts.get(level, 0) for level in IMPLEMENTATION_FINDING_LEVELS + ) + top_themes = ", ".join( + f"{theme}({count})" for theme, count in themes.most_common(5) + ) + summary = ( + f"NOTES: {len(findings)} findings ({durable} durable, {implementation} implementation) " + f"across {len(themes)} theme(s)" + ) + if top_themes: + summary += f"\nTop themes: {top_themes}" + if deps.read_ranges: + next_uncovered = _first_uncovered_offset( + deps.read_ranges, + int(deps.trace_total_lines), + ) + covered_chunks = len({(int(start), int(end)) for start, end in deps.read_ranges}) + summary += ( + f"\nTrace reads: {covered_chunks} chunk(s)" + f"\nNext unread offset: {next_uncovered if next_uncovered is not None else 'none'}" + f"\nPruned offsets: {sorted(deps.pruned_offsets) if deps.pruned_offsets else 'none'}" + ) + return summary diff --git a/baml_agents/baml_extract_agent/run.py b/baml_agents/baml_extract_agent/run.py new file mode 100644 index 0000000..4e344c5 --- /dev/null +++ b/baml_agents/baml_extract_agent/run.py @@ -0,0 +1,68 @@ +"""Command-line entrypoint for the BAML plus LangGraph extraction experiment.""" + +from __future__ import annotations + +import argparse +import json +import os +from pathlib import Path + +from baml_extract_agent.graph import ( + BAML_PROVIDER, + MODEL_NAME, + OLLAMA_BASE_URL, + run_baml_extraction, +) + + +def main(argv: list[str] | None = None) -> int: + """Parse CLI arguments and run the BAML extraction graph.""" + parser = argparse.ArgumentParser( + description="Run the minimal BAML plus LangGraph Lerim extraction experiment." + ) + parser.add_argument("--trace", required=True, type=Path) + parser.add_argument( + "--context-db", + default=Path(".tmp/baml_agents/context.sqlite3"), + type=Path, + ) + parser.add_argument("--project-root", default=Path.cwd(), type=Path) + parser.add_argument("--session-id", default="baml-extract-session") + parser.add_argument("--session-started-at", default=None) + parser.add_argument("--model", default=MODEL_NAME) + parser.add_argument( + "--baml-provider", + default=BAML_PROVIDER, + choices=("ollama", "minimax", "openai-generic"), + ) + parser.add_argument("--api-base-url", default=None) + parser.add_argument("--api-key-env", default=None) + parser.add_argument("--ollama-base-url", default=OLLAMA_BASE_URL) + parser.add_argument("--temperature", default=0.0, type=float) + parser.add_argument("--max-llm-calls", default=None, type=int) + args = parser.parse_args(argv) + + api_key_env = args.api_key_env + if api_key_env is None and args.baml_provider == "minimax": + api_key_env = "MINIMAX_API_KEY" + + result = run_baml_extraction( + trace_path=args.trace, + context_db_path=args.context_db, + project_root=args.project_root, + session_id=args.session_id, + session_started_at=args.session_started_at, + model_name=args.model, + baml_provider=args.baml_provider, + api_base_url=args.api_base_url, + api_key=os.environ.get(api_key_env) if api_key_env else None, + temperature=args.temperature, + ollama_base_url=args.ollama_base_url, + max_llm_calls=args.max_llm_calls, + ) + print(json.dumps(result, ensure_ascii=True, indent=2)) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/baml_agents/baml_extract_agent/tool_bridge.py b/baml_agents/baml_extract_agent/tool_bridge.py new file mode 100644 index 0000000..0eb8004 --- /dev/null +++ b/baml_agents/baml_extract_agent/tool_bridge.py @@ -0,0 +1,376 @@ +"""Bridge BAML-selected actions to Lerim's existing extraction tools.""" + +from __future__ import annotations + +from dataclasses import dataclass +import inspect +import json +from pathlib import Path +from typing import Any, Callable + +from pydantic_ai import ModelRetry, RunContext +from pydantic_ai.models.test import TestModel +from pydantic_ai.usage import RunUsage + +from lerim.agents import tools as extract_tools +from lerim.agents.toolsets import EXTRACT_TOOLS +from lerim.agents.tools import ContextDeps +from lerim.agents.tools import TRACE_MAX_LINES_PER_READ, _first_uncovered_offset +from lerim.context import ContextStore, ProjectIdentity + + +TOOL_NAMES = tuple(tool.__name__ for tool in EXTRACT_TOOLS) + + +@dataclass(frozen=True) +class ToolObservation: + """Observed result after dispatching one ReAct action.""" + + action: str + ok: bool + content: str + args: dict[str, Any] + done: bool = False + completion_summary: str = "" + + +def build_tool_context(deps: ContextDeps) -> RunContext[ContextDeps]: + """Build the minimal PydanticAI run context required by Lerim tools.""" + return RunContext(deps=deps, model=TestModel(), usage=RunUsage()) + + +def tool_manifest() -> str: + """Render the live Lerim extraction tool signatures for the BAML prompt.""" + lines: list[str] = [] + for name in TOOL_NAMES: + func = getattr(extract_tools, name) + signature = _public_signature(func) + doc = inspect.getdoc(func) or "" + first_line = doc.splitlines()[0] if doc else "" + lines.append(f"- {name}{signature}: {first_line}") + lines.append( + "- final_result(completion_summary: str): Finish after exactly one episode exists." + ) + return "\n".join(lines) + + +def count_current_session_episodes(deps: ContextDeps) -> int: + """Count current-session episode records in the canonical context store.""" + store = ContextStore(deps.context_db_path) + store.initialize() + store.register_project(deps.project_identity) + rows = store.query( + entity="records", + mode="count", + project_ids=[deps.project_identity.project_id], + kind="episode", + source_session_id=deps.session_id, + include_archived=True, + ) + return int(rows.get("count") or 0) + + +def prepare_context_deps( + *, + context_db_path: Path, + project_identity: ProjectIdentity, + session_id: str, + trace_path: Path, + session_started_at: str, + model_name: str, +) -> ContextDeps: + """Initialize store provenance and return dependencies for tool calls.""" + store = ContextStore(context_db_path) + store.initialize() + store.register_project(project_identity) + store.upsert_session( + project_id=project_identity.project_id, + session_id=session_id, + agent_type="baml-langgraph-extract", + source_trace_ref=str(trace_path), + repo_path=str(project_identity.repo_path), + cwd=str(project_identity.repo_path), + started_at=session_started_at, + model_name=model_name, + instructions_text=None, + prompt_text=None, + metadata={"experiment": "baml_agents"}, + ) + return ContextDeps( + context_db_path=context_db_path, + project_identity=project_identity, + session_id=session_id, + trace_path=trace_path, + session_started_at=session_started_at, + ) + + +def execute_step( + step: Any, + ctx: RunContext[ContextDeps], +) -> ToolObservation: + """Dispatch one BAML-selected step to the matching Lerim tool.""" + action = _action_name(getattr(step, "action", "")) + forced_read = _read_next_uncovered_chunk(action, ctx) + if forced_read is not None: + return forced_read + + if action == "final_result": + summary = _final_summary(step) + episode_count = count_current_session_episodes(ctx.deps) + if episode_count != 1: + return ToolObservation( + action=action, + ok=False, + content=( + "final_result refused: expected exactly one episode record " + f"for this session, found {episode_count}." + ), + args={}, + ) + return ToolObservation( + action=action, + ok=True, + content=summary, + args={}, + done=True, + completion_summary=summary, + ) + + args = _args_for_action(step, action) + if args is None: + return ToolObservation( + action=action, + ok=False, + content=f"Missing argument object for action {action}.", + args={}, + ) + + try: + content = _dispatch_tool(action, ctx, args) + except ModelRetry as exc: + content = f"Tool retry needed: {exc}" + return ToolObservation(action=action, ok=False, content=content, args=args) + except Exception as exc: + content = f"Tool error: {type(exc).__name__}: {exc}" + return ToolObservation(action=action, ok=False, content=content, args=args) + return ToolObservation(action=action, ok=True, content=content, args=args) + + +def _read_next_uncovered_chunk( + action: str, + ctx: RunContext[ContextDeps], +) -> ToolObservation | None: + """Force full trace coverage before model-directed non-read actions.""" + if action == "read_trace" or ctx.deps.trace_path is None: + return None + try: + total_lines = sum( + 1 for _ in ctx.deps.trace_path.open("r", encoding="utf-8") + ) + except OSError: + return None + next_offset = _first_uncovered_offset(ctx.deps.read_ranges, total_lines) + if next_offset is None: + return None + args = { + "start_line": next_offset + 1, + "line_count": TRACE_MAX_LINES_PER_READ, + } + try: + content = _read_trace(ctx, args) + except Exception as exc: + return ToolObservation( + action="read_trace", + ok=False, + content=f"Forced trace read failed: {type(exc).__name__}: {exc}", + args=args, + ) + return ToolObservation(action="read_trace", ok=True, content=content, args=args) + + +def observation_to_state(observation: ToolObservation) -> dict[str, Any]: + """Convert a tool observation into serializable graph state.""" + return { + "action": observation.action, + "ok": observation.ok, + "content": observation.content, + "args": observation.args, + "done": observation.done, + "completion_summary": observation.completion_summary, + } + + +def format_observation(observation: dict[str, Any], deps: ContextDeps) -> str: + """Format a tool result as compact scratchpad text for the next BAML call.""" + action = str(observation.get("action") or "") + status = "ok" if bool(observation.get("ok")) else "error" + content = _pruned_content(observation, deps) + return f"Action: {action}\nStatus: {status}\nObservation:\n{content}" + + +def _dispatch_tool( + action: str, + ctx: RunContext[ContextDeps], + args: dict[str, Any], +) -> str: + """Call the raw Lerim tool function for one normalized action.""" + handlers: dict[str, Callable[[RunContext[ContextDeps], dict[str, Any]], str]] = { + "read_trace": _read_trace, + "search_context": _search_context, + "get_context": _get_context, + "save_context": _save_context, + "revise_context": _revise_context, + "note_trace_findings": _note_trace_findings, + "prune_trace_reads": _prune_trace_reads, + } + handler = handlers.get(action) + if handler is None: + return f"Unknown action: {action}" + return handler(ctx, args) + + +def _read_trace(ctx: RunContext[ContextDeps], args: dict[str, Any]) -> str: + """Call read_trace with defaulted numeric arguments.""" + return extract_tools.read_trace( + ctx, + start_line=int(args.get("start_line") or 1), + line_count=int(args.get("line_count") or extract_tools.TRACE_MAX_LINES_PER_READ), + ) + + +def _search_context(ctx: RunContext[ContextDeps], args: dict[str, Any]) -> str: + """Call search_context with only its supported arguments.""" + return extract_tools.search_context( + ctx, + query=str(args.get("query") or ""), + kind=args.get("kind"), + status=args.get("status"), + valid_at=args.get("valid_at"), + include_archived=bool(args.get("include_archived") or False), + limit=int(args.get("limit") or 8), + ) + + +def _get_context(ctx: RunContext[ContextDeps], args: dict[str, Any]) -> str: + """Call get_context with BAML-provided record IDs.""" + return extract_tools.get_context( + ctx, + record_ids=list(args.get("record_ids") or []), + include_versions=bool(args.get("include_versions") or False), + detail=str(args.get("detail") or "detailed"), + ) + + +def _save_context(ctx: RunContext[ContextDeps], args: dict[str, Any]) -> str: + """Call save_context with a complete record payload.""" + return extract_tools.save_context(ctx, **_with_defaults(args, {"status": "active"})) + + +def _revise_context(ctx: RunContext[ContextDeps], args: dict[str, Any]) -> str: + """Call revise_context with a complete replacement payload.""" + return extract_tools.revise_context(ctx, **args) + + +def _note_trace_findings(ctx: RunContext[ContextDeps], args: dict[str, Any]) -> str: + """Call note_trace_findings, allowing the no-findings checkpoint form.""" + if not any(args.get(name) for name in ("theme", "line", "quote")): + return extract_tools.note_trace_findings(ctx) + return extract_tools.note_trace_findings( + ctx, + theme=str(args.get("theme") or ""), + line=args.get("line") or 0, + quote=str(args.get("quote") or ""), + level=str(args.get("level") or "implementation"), + ) + + +def _prune_trace_reads(ctx: RunContext[ContextDeps], args: dict[str, Any]) -> str: + """Call prune_trace_reads with the start-line list.""" + return extract_tools.prune_trace_reads( + ctx, + start_lines=[int(value) for value in args.get("start_lines") or []], + ) + + +def _pruned_content(observation: dict[str, Any], deps: ContextDeps) -> str: + """Return a read_trace stub when that chunk has been pruned.""" + action = str(observation.get("action") or "") + if action != "read_trace": + return str(observation.get("content") or "") + args = observation.get("args") if isinstance(observation.get("args"), dict) else {} + offset = max(0, int(args.get("start_line") or 1) - 1) + if offset in deps.pruned_offsets: + return "[pruned]" + return str(observation.get("content") or "") + + +def _args_for_action(step: Any, action: str) -> dict[str, Any] | None: + """Return the BAML argument object matching an action.""" + field_name = action + payload = getattr(step, field_name, None) + if payload is None: + return None + if hasattr(payload, "model_dump"): + return _coerce_tool_value(payload.model_dump(exclude_none=True)) + if isinstance(payload, dict): + return _coerce_tool_value( + {key: value for key, value in payload.items() if value is not None} + ) + return _coerce_tool_value( + json.loads(json.dumps(payload, default=lambda value: value.__dict__)) + ) + + +def _action_name(action: Any) -> str: + """Normalize a BAML enum value into a Lerim tool name.""" + raw = str(getattr(action, "value", action) or "").strip() + aliases = { + "READ_TRACE": "read_trace", + "SEARCH_CONTEXT": "search_context", + "GET_CONTEXT": "get_context", + "SAVE_CONTEXT": "save_context", + "REVISE_CONTEXT": "revise_context", + "NOTE_TRACE_FINDINGS": "note_trace_findings", + "PRUNE_TRACE_READS": "prune_trace_reads", + "FINAL_RESULT": "final_result", + } + return aliases.get(raw, raw.lower()) + + +def _final_summary(step: Any) -> str: + """Extract final_result.completion_summary from a generated BAML step.""" + payload = getattr(step, "final_result", None) + if payload is None: + return "" + return str(getattr(payload, "completion_summary", "") or "").strip() + + +def _with_defaults( + args: dict[str, Any], defaults: dict[str, Any] +) -> dict[str, Any]: + """Fill omitted optional tool arguments with Lerim's defaults.""" + payload = dict(defaults) + payload.update(args) + return payload + + +def _public_signature(func: Callable[..., str]) -> str: + """Return a tool signature without the PydanticAI context parameter.""" + signature = inspect.signature(func) + params = list(signature.parameters.values()) + if params and params[0].name == "ctx": + params = params[1:] + return "(" + ", ".join(str(param) for param in params) + ")" + + +def _coerce_tool_value(value: Any) -> Any: + """Convert generated BAML enum values into plain JSON-like values.""" + enum_value = getattr(value, "value", None) + if enum_value is not None: + return enum_value + if isinstance(value, dict): + return {key: _coerce_tool_value(item) for key, item in value.items()} + if isinstance(value, list): + return [_coerce_tool_value(item) for item in value] + return value diff --git a/baml_agents/baml_src/extract_react.baml b/baml_agents/baml_src/extract_react.baml new file mode 100644 index 0000000..8d44c9b --- /dev/null +++ b/baml_agents/baml_src/extract_react.baml @@ -0,0 +1,920 @@ +retry_policy ExtractAgentRetry { + max_retries 1 + strategy { + type exponential_backoff + delay_ms 500 + multiplier 2 + max_delay_ms 8000 + } +} + +client OllamaGemma4E4B { + provider "openai-generic" + retry_policy ExtractAgentRetry + options { + base_url "http://127.0.0.1:11434/v1" + model "gemma4:e4b" + temperature 0.0 + http { + connect_timeout_ms 10000 + time_to_first_token_timeout_ms 120000 + idle_timeout_ms 30000 + request_timeout_ms 300000 + } + } +} + +enum ExtractAction { + READ_TRACE @alias("read_trace") @description("Read the next numbered trace chunk.") + SEARCH_CONTEXT @alias("search_context") @description("Search existing DB-backed context by meaning.") + GET_CONTEXT @alias("get_context") @description("Fetch full context records by record ID before any revision.") + SAVE_CONTEXT @alias("save_context") @description("Create one episode or durable context record.") + REVISE_CONTEXT @alias("revise_context") @description("Revise a fetched context record with a complete improved payload.") + NOTE_TRACE_FINDINGS @alias("note_trace_findings") @description("Checkpoint durable or implementation findings from a long trace.") + PRUNE_TRACE_READS @alias("prune_trace_reads") @description("Prune earlier trace chunks after findings are captured.") + FINAL_RESULT @alias("final_result") @description("Finish only after exactly one current-session episode record exists.") +} + +enum RecordKind { + DECISION @alias("decision") @description("A durable project decision with decision and why fields.") + PREFERENCE @alias("preference") @description("A stable user or workflow preference.") + CONSTRAINT @alias("constraint") @description("A durable invariant, limit, or must/cannot rule.") + FACT @alias("fact") @description("A durable project fact or setup truth.") + REFERENCE @alias("reference") @description("A pointer to an external source of truth.") + EPISODE @alias("episode") @description("The mandatory per-session episode record; use this for session summaries.") +} + +enum RecordStatus { + ACTIVE @alias("active") @description("Current context that future sessions may reuse.") + ARCHIVED @alias("archived") @description("Historical or routine context that should not be treated as active guidance.") +} + +enum FindingLevel { + DECISION @alias("decision") @description("Durable decision-level finding.") + PREFERENCE @alias("preference") @description("Durable user or workflow preference finding.") + FEEDBACK @alias("feedback") @description("Durable feedback-level finding.") + REFERENCE @alias("reference") @description("Durable external-source finding.") + CONSTRAINT @alias("constraint") @description("Durable constraint-level finding.") + FACT @alias("fact") @description("Durable fact-level finding.") + IMPLEMENTATION @alias("implementation") @description("Trace-local implementation evidence or discarded hypothesis.") +} + +class ReadTraceArgs { + start_line int? + line_count int? +} + +class SearchContextArgs { + query string @description("Natural-language query for the durable meaning, not '*' or an empty browse request.") + kind RecordKind? @description("Optional filter. Use only when the desired record kind is known.") + status RecordStatus? @description("Optional lifecycle filter.") + valid_at string? @description("Optional timestamp for historical lookup.") + include_archived bool? @description("Whether archived records should be included.") + limit int? @description("Maximum hits to return.") +} + +class GetContextArgs { + record_ids string[] @description("Record IDs returned by search_context or list_context. Fetch before any revision.") + include_versions bool? @description("Whether to include prior versions.") + detail "concise" | "detailed"? @description("Use concise unless full version history is needed.") +} + +class SaveContextArgs { + kind RecordKind @description("Use episode for the mandatory session summary; use durable kinds only for reusable project context.") + title string @description("Short standalone title for the lasting point or episode.") + body string @description("Compact standalone body. Do not include trace-local command logs or copied errors.") + status RecordStatus? @description("Use archived for routine episodes with no durable signal; active for reusable durable records.") + valid_from string? @description("Optional validity start timestamp.") + valid_until string? @description("Optional validity end timestamp.") + decision string? @description("Required only for kind=decision; omit for facts, preferences, constraints, references, and episodes.") + why string? @description("Required only for kind=decision when rationale exists; omit when the trace says no durable rationale was supplied.") + alternatives string? @description("Optional decision alternatives. Omit for non-decisions.") + consequences string? @description("Optional direct application guidance for the same durable point.") + user_intent string? @description("Required when kind=episode. Concise statement of what the user wanted in this source session.") + what_happened string? @description("Required when kind=episode. Concise statement of what the session actually did.") + outcomes string? @description("Optional concise episode outcome.") +} + +class ReviseContextArgs { + record_id string + reason string @description("Short reason for the revision.") + kind RecordKind @description("Must match the fetched record kind; revise_context cannot change kind.") + title string @description("Complete improved title.") + body string @description("Complete improved body.") + status RecordStatus? @description("Lifecycle status to keep or set.") + valid_from string? + valid_until string? + decision string? + why string? + alternatives string? + consequences string? + user_intent string? + what_happened string? + outcomes string? +} + +class NoteTraceFindingsArgs { + theme string? @description("Short theme label for this finding. Leave empty only for the no-findings checkpoint.") + line int? @description("1-based trace line with supporting evidence. Leave empty only for the no-findings checkpoint.") + quote string? @description("Short evidence quote from the trace. Leave empty only for the no-findings checkpoint.") + level FindingLevel? @description("Durable levels only for reusable context; implementation for trace-local evidence.") +} + +class PruneTraceReadsArgs { + start_lines int[] @description("1-based start_line values from earlier read_trace calls, such as 1, 101, or 201. Never use context token counts.") +} + +class FinalResultArgs { + completion_summary string +} + +class ExtractAgentStep { + action ExtractAction + rationale string @description("One short sentence explaining why this is the next action. Do not include hidden chain-of-thought.") + read_trace ReadTraceArgs? + search_context SearchContextArgs? + get_context GetContextArgs? + save_context SaveContextArgs? + revise_context ReviseContextArgs? + note_trace_findings NoteTraceFindingsArgs? + prune_trace_reads PruneTraceReadsArgs? + final_result FinalResultArgs? +} + +function DecideNextExtractStep( + runtime_dashboard: string, + run_instruction: string, + tool_manifest: string, + scratchpad: string +) -> ExtractAgentStep { + client OllamaGemma4E4B + prompt #" + {{ _.role("system") }} + + You are the Lerim extract agent. + Read one coding-agent trace, compress its signal, and write DB-backed context records. + + + + - Create exactly one episode record for the session. + - Create zero or more durable records only when the trace contains durable signal. + - The episode record is mandatory for every session, even if you also create or update durable records. + - Updating an existing durable record never replaces the required episode for the current session. + - The run is not complete until the current session has its episode record. + - Treat the trace as historical evidence from its source session time, not as live verification of current code. + - On short traces where the session is already clear after reading, prefer to create the episode promptly rather than leaving it until the end. + - Episode records must include `user_intent` and `what_happened`; do not put the whole episode only in `body`. + - Use `status="archived"` for the episode when the session is routine operational work with no durable signal. Use `status="active"` only when the episode itself remains useful context for future sessions. + + + + Durable signal means one of: + - decision + - preference + - constraint + - fact + - reference + + Implementation detail alone is not durable signal. + A temporary code-state finding, audit observation, open task, or release-risk report is not durable by itself. Promote it only when the trace establishes a reusable project rule, unresolved constraint, stable dependency, or standing source of truth. + + + + - Store the reusable rule, decision, invariant, dependency, preference, or external pointer, not the story of the session. + - One durable record should hold one durable point. + - Direct consequences and application guidance usually stay inside that same record. + - Create the minimum number of durable records that preserves distinct durable meanings. Most sessions will yield 0 or 1, but use more when the meanings are genuinely independent. + - Classify each durable point into one canonical kind. Prefer the most specific supported kind, and do not duplicate the same point across multiple durable kinds. + - Duplicates are worse than gaps. Skip uncertain candidates rather than spraying near-duplicates. + - Never create a second durable record in the same run for the same core claim. If you realize the first draft needs improvement, update or refine that record instead of creating another one. + - `constraint` and `reference` are first-class durable record kinds, not fallback categories. + + + + - patch logs, command sequences, retries, timelines, or meeting-style recaps + - code structure, file paths, git history, or storage mechanics by themselves + - generic programming knowledge or facts already obvious from the repo + - rejected lures, discarded explanations, or implementation-only distractions + - one-run validation findings, approval flow, queue state, DB resets, rebuilds, or + runtime diagnostics by themselves + + + + - Read the trace in chunks until the full trace is covered. Do not start writing while unread trace lines remain. + - Use the findings scratchpad for evidence from chunks you have already read. Notes are summarized back to you on later turns; do not record the same point again unless you learned something new. + - Keep each durable theme and its supporting implementation evidence together. Do not record a rejected lure or discarded explanation as its own durable finding/theme. + - If one apparent finding only applies, routes, or operationalizes another finding, keep them as one durable theme instead of separate durable themes. + - If the trace needs more than one read, call `note_trace_findings` once per useful finding with theme, line, quote, and level before saving or revising context. Call it with no arguments when the full trace has no reusable signal. + - If you read many chunks, prune older read results only after those chunks have already been captured in notes. + - Search existing context before creating a durable record whenever the trace suggests an earlier record, duplicate risk, or "same meaning vs new meaning" judgment. + - The injected existing-record manifest is only a shortlist. It is never enough evidence for a revision. + - Fetch full records before any revision, and fetch each plausible target when several nearby records could match. + - Revise only when a fetched record clearly carries the same meaning and needs repair. If the core claim differs, create a new record instead. + - When the trace says an existing durable rule is correct but needs tightening, clarification, or a better why, fetch that record and update it rather than leaving the weaker wording unchanged. + - Avoid cosmetic same-run revisions. Revise a same-run record only to fix a concrete durable-context error or prevent a duplicate. + + + + - The system may inject `CONTEXT:` messages showing approximate context pressure. At soft or hard pressure, prune old trace chunks after their findings are captured. + - The system may inject `NOTES:` messages summarizing findings and trace coverage. Use them as a progress dashboard, not as a replacement for reading unread trace lines. + - The findings scratchpad writes the dashboard for future turns; do not try to reread the dashboard with tools. + + + + - First separate findings into durable signal and implementation evidence. + - Synthesize at the theme level. Usually one theme becomes one durable record. + - Create multiple durable records only when the trace establishes multiple independent durable points, each with its own support. + - Do not store one durable point as both a preference and a decision, a fact and a decision, or any other cross-kind duplicate. + - A stable workflow preference is not also a decision unless the trace separately states an explicit project decision with rationale. + - A dependency, setup, or environment truth without durable rationale is a fact, not also a decision. + - A failure caused by the current run's temporary validation setup is not itself a + durable environment truth. If the investigation reveals a stable requirement that + future sessions must apply, save that requirement as the durable point and omit + the temporary validation story. + - Merge candidates when one only states how to apply the other in local operations, routing, or ownership. + - If two candidates share the same core claim, merge them. + - If the difference is only evidence framing, symptom wording, or local-vs-CI phrasing around the same durable fact, keep one record and fold the extra context into it. + - If one candidate is only the direct application or routing consequence of another, keep it inside the stronger record. + - Storage boundary plus per-component routing is one decision, not two. Keep the boundary as the record and fold the routing guidance into the same title/body. + - If one candidate only says how different local components should apply the same project rule, keep that guidance inside the main record rather than creating a second durable record. + - If one candidate only restates where local project components live or how an internal architecture is applied, keep it inside the stronger decision, fact, or constraint instead of creating a separate reference. + - If the trace gives one durable rule plus examples of local noise or discarded details, store only the durable rule. The filtering guidance is evidence, not a second record. + - Do not create a durable record whose whole point is that some local details from this trace were noise, low value, or should not be remembered. That is extraction guidance for this run, not project context. + - Store durable records only when the lesson is likely reusable beyond this trace. + - If a candidate is mainly about this trace's commands, files, or timeline, reject it. + - Trace-local instructions about what to ignore in this session are not preferences unless they clearly express a broader standing workflow rule for future sessions. + - If the trace explicitly says the rationale is unknown or says not to invent one, do not create a `decision`; use `fact` instead. + - A stable setup, dependency, or environment requirement without a durable why is a `fact` even if it sounds like the current chosen setup. + - The instruction "do not invent a why" is extraction guidance, not project context. + - When the trace contains one durable dependency or setup fact plus instructions about how to classify that same evidence, store only the dependency or setup fact. Do not turn the classification guidance into a separate `preference`. + - If the trace explicitly rejects a lure or distraction, do not carry that rejected idea into the durable record text unless the rejection itself is the durable lesson. + - If this older trace conflicts with newer existing active records, do not create a new active durable record for the older claim. Preserve the historical session in the episode and let the newer active record remain current. + - If a long noisy investigation resolves into one source-of-truth boundary, store only that boundary. Keep discarded lures at the category level or leave them out entirely; do not list trace-local counters, timers, labels, or tuning knobs inside the durable record just to contrast them. + - When a discarded lure matters as evidence, keep it attached to the main durable theme as implementation context rather than storing it as a second durable theme. + - If the episode summary contains clearly reusable decision, preference, constraint, fact, or reference, that point should usually also exist as its own durable record. + - Do not leave a clearly reusable rule, invariant, dependency, source-of-truth pointer, or stable preference only inside the episode. The episode says what happened; the durable record stores what future sessions should reuse. + - Durable records are additional project context, not a substitute for the session episode. Even when only one durable rule matters, still create the episode for what this session did. + + + + - Durable titles should name the lasting rule, decision, fact, constraint, preference, or reference directly. + - Durable bodies should be compact, neutral, and standalone. + - When a durable decision prohibits or routes a named interface, data path, dependency, provider, or boundary, preserve that named subject in the record instead of replacing it with a broader abstraction. + - Prefer this shape for durable records: + 1. the durable point + 2. why it matters + 3. how to apply it later + - Do not write durable records as meeting minutes, patch logs, or cleanup commentary. + - Do not preserve trace-local commands, negotiation phrasing, or "this is not about X" sentences in final record text. + - Do not write a durable record whose body is mainly a warning that certain local details, cleanups, or implementation noise should be ignored. + - Do not mention discarded implementation noise in durable record fields, including `consequences`. If details are non-durable, omit them entirely rather than saying they are non-durable. + - When the durable lesson is a source-of-truth rule, write the authoritative rule directly. Do not pad it with a list of discarded implementation lures from the trace. + - If a short contrast is still helpful, keep it abstract, such as "not worker-local state" or "not ephemeral local state". Do not enumerate examples in parentheses or comma-separated lists. + - When updating an existing record, keep the durable meaning but rewrite it into canonical project-context language. + - When writing from a historical trace, word durable records as source-backed context, not as freshly verified code inspection. Do not imply that a bug, missing capability, or release blocker is current unless the trace itself establishes that it remains unresolved as durable project context. + - Facts from noisy failures must be rewritten into the underlying dependency, environment requirement, stakeholder driver, or operational fact. + - If a fact still reads like stderr, an exception symptom, or copied command output, rewrite it again before writing. + - When the durable lesson is an environment or dependency requirement, do not center the fact on the observed failure symptom. Name the requirement directly and mention the symptom only if it is needed as brief supporting context. + - If brief supporting context is useful, lead with the requirement and keep the symptom generic. Never include exception class names, quoted error fragments, or copied failure strings in the durable fact. + - If the candidate is mainly "this validation run failed until we changed the setup", + it belongs in the archived episode. If the candidate names a reusable setup or + runtime requirement discovered through that validation, keep the requirement and + drop the failure narrative. + - When no durable rationale exists, do not spend the fact body explaining that the rationale is absent. Just state the stable dependency, setup requirement, or operational truth directly. + - Do not quote or paraphrase trace instructions about how to classify the evidence inside the final fact body. Final fact text should describe the underlying truth, not the extraction rule you followed. + - References must answer both "where should future sessions look?" and "when should they consult it?" + - Do not use `reference` for internal file mappings, local storage boundaries, or repo architecture notes when the durable lesson is the project rule itself rather than "consult this external source next time." + - Keep the episode concise: short title, short body, concise `user_intent`, `what_happened`, and `outcomes`. + - If the session is mostly routine operational work with little future value and no durable record, create the episode with `status="archived"`. + + + + + Stable workflow guidance from the user. Save corrections and confirmed non-obvious working style that should carry into future sessions. + Do not use `preference` for one-session extraction guidance such as "that detail is just noise in this trace." + + + A chosen approach or project rule that future work should follow and that is not obvious from code alone. + If the trace does not support a durable why, do not use `decision`. + + + A durable invariant, limit, or must/cannot rule that future work must respect. + + + A durable project fact such as a dependency, environment requirement, stakeholder driver, or other non-obvious truth. + Use `fact` for stable setup or dependency truths when the trace explicitly says not to invent decision rationale. + + + A pointer to an external dashboard, document, ticket system, or other source of truth outside the repo. + Use `reference` only when the enduring value is where to look later. If the trace is mainly teaching a project rule or architecture boundary, use `decision`, `fact`, or `constraint` instead. + + + + + + + - assistant patches a bug and writes a tidy summary + - user: "The diff is enough. Don't end with a recap every time." + - later turns continue with normal edits, tests, and review comments + + + Create one preference record about keeping replies terse and not appending redundant change recaps. + + + Store the file edit itself, or treat the correction as only a one-session scratch finding when it is clearly stable workflow guidance. + + + + + + - early turns discuss local refactors, temporary debug prints, and a flaky test + - midway, several ideas are tried and discarded + - late in the trace the user settles the architecture: durable project context lives in one store; hot runtime/session state lives in another + - the follow-on routing guidance is just how to apply that boundary + + + Create the required episode for the session and one decision record for the storage boundary. Keep the routing guidance inside the same record instead of splitting it into a second record. + + + Store the refactor noise, split one architectural choice into two near-duplicate records such as one decision for the boundary and a second local-use record for which component reads which store, or create a separate durable record whose only message is that the refactors and debug edits were noise. + + + + + + - the user makes one architectural choice, such as keeping durable context and hot operational state in separate stores + - the trace also mentions variable renames, label tweaks, temporary debug prints, and similar low-value cleanups + - the user explicitly says those local edits should not become durable context + + + Create the required episode and one durable record for the architectural choice only. Treat the explicit "those edits are just noise" instruction as extraction guidance for this run, not as its own record. + + + Create a second durable record whose message is that renames, label tweaks, or temporary debug code are non-durable, or let that noise-filtering instruction replace the required episode. + + + + + + - repeated failed commands and partial theories about why a media workflow is broken + - some guesses are ruled out + - the stable conclusion is operational: environments that run this workflow need a specific system dependency installed + + + Create one fact record for the dependency requirement in clean operational language. Lead with the missing dependency or environment requirement, and if you mention the failure at all, keep it generic rather than naming the exact exception class or copied command output. Still create the required episode for this session. + + + Store the raw exception text, center the record on the failure symptom, split one operational lesson into separate local-vs-CI facts, create a second durable record whose message is "do not invent a rationale here," keep the command history or debugging timeline, or write only the fact and skip the episode. + + + + + + - the user states one stable dependency or setup truth + - nearby turns add extraction guidance such as "this is a fact, not a decision" or "do not invent a why beyond the dependency" + - no broader workflow rule for future sessions is established + + + Create the required episode and one fact record for the stable dependency or setup truth only. + + + Create a second durable preference whose whole point is how to classify this trace, or store the meta-instruction instead of the underlying dependency fact. + + + + + + - the trace says image-enabled workflows require a system dependency in the environment + - the user also says not to invent policy rationale beyond that dependency fact + + + Write a fact such as: "Image-enabled workflows require libvips in the environment." Keep the body on the requirement and its effect. + + + Write a fact body such as: "Do not invent a policy reason here" or "No decision rationale was supplied." Those are meta comments about classification, not durable project context. + + + + + + - early chunks are noisy and keep circling local counters, timers, labels, and temporary tuning + - the final chunk clarifies that those were distractions + - the real durable lesson is a source-of-truth boundary: authoritative state must live in one persisted place that survives restart and failover + + + Create one durable record for the source-of-truth boundary. Mention restart or failover if it explains why the boundary matters, but keep any contrast abstract, such as "not worker-local state," rather than listing local counters or timers. + + + Write a durable record that carries over the rejected lure by naming worker-local counters, attempt counts, backoff knobs, or other trace-local artifacts as a contrast list. + + + + + + - the assistant starts from a partial repo note + - later the user clarifies that incident ownership and current status are tracked in an external dashboard or ticket system + - future sessions should consult that external system when this class of issue appears + + + Create one reference record that names the external source and when future sessions should consult it. + + + Center the record on local files, or turn it into a warning slogan about what not to trust locally. + + + + + + - run formatter + - fix a small lint complaint + - rerun tests + - confirm green + - no new rule, dependency, preference, or durable fact emerges + + + Create only an archived episode. + + + Invent a durable record from the sequence of routine commands. + + + + + + - the trace points at an earlier record that sounds nearby + - new evidence sharpens part of it, but you still need to decide whether the core claim stayed the same + - there may be more than one plausible existing record + + + Search first, fetch the plausible existing record, then either update it if the meaning matches or create a new record if the core claim is different. In both cases, still create the episode for this session. + + + Update from a shortlist or search preview alone, force an update when the new claim is only adjacent, or skip the episode because you already changed a durable record. + + + + + + - End the run with the `final_result` tool. + - Put the plain-text completion summary in `completion_summary`. + - Before `final_result`, ensure the current session already has exactly one episode record. + - If you have created durable records but no episode yet, stop and create the episode before `final_result`. + - If the episode contains the only copy of a reusable rule, invariant, dependency, source-of-truth pointer, or stable preference, stop and create the corresponding durable record before `final_result`. + - Do not end with free-form assistant text outside `final_result`. + + + + Do not turn filenames, storage mechanics, graph links, or evidence tables into the main record unless the durable rule is specifically about that boundary. + + + + - You are running as a BAML + LangGraph replica of the PydanticAI extract agent. + - You cannot call tools directly in this BAML harness. Return exactly one next tool call as typed JSON. + - Choose only an action whose tool name appears in Available tools. + - If a tool name is not listed in Available tools, it does not exist for this turn. + - Your first non-whitespace character must be `{`. + - Never output `` tags, hidden reasoning text, markdown, or prose before or after the JSON object. + - Return one JSON object only. Do not include markdown, self-corrections, multiple JSON blocks, or a plan that chains future actions. + - Choose only the immediate next action. When the production prompt requires an intermediate step before saving or finalizing, take that step first. + - If multiple actions are still required, return only the first missing action and omit all later actions. + - For multi-chunk traces, if `note_trace_findings` is available and no finding checkpoint appears in prior actions, choose `note_trace_findings` before any `save_context`, `revise_context`, or `final_result`; use empty args when the fully read trace has no reusable signal. + - Fill only the argument object for the selected action. + - Treat `final_result` as the PydanticAI structured final output. + - For `save_context kind=episode`, include `user_intent` and `what_happened`. + - For `save_context kind=decision`, include `decision` and `why`. + - For `prune_trace_reads`, use only earlier `read_trace` start_line values. + - After the required episode exists, choose another `save_context` only when a clearly reusable durable signal remains outside the episode; do not save trace-local implementation fixes as durable facts. + - Do not treat an episode record as a durable record. If a reusable decision, preference, constraint, fact, or reference remains only in trace or episode text, save that durable record before `final_result`. + + + + {{ runtime_dashboard }} + + + {{ _.role("user") }} + Run instruction: + {{ run_instruction }} + + Available tools: + {{ tool_manifest }} + + Prior actions and observations: + {{ scratchpad }} + + {{ ctx.output_format }} + "# +} + +test InitialStepReadsTrace { + functions [DecideNextExtractStep] + args { + runtime_dashboard #" + CONTEXT: 0/200000 (0%) [normal] + NOTES: 0 findings + "# + run_instruction #" + Read the trace, write exactly one episode record, and write only the + strongest durable records. This trace has 3 lines. Read all chunks before + writing. + "# + tool_manifest #" + - read_trace(start_line: int?, line_count: int?): Read the next numbered trace chunk. + - save_context(kind: string, title: string, body: string): Save one context record. + - final_result(completion_summary: string): Finish after exactly one episode exists. + "# + scratchpad "No prior actions." + } + @@assert({{ this.action == "READ_TRACE" }}) +} + +test ContinueUnreadTraceBeforeWrite { + functions [DecideNextExtractStep] + args { + runtime_dashboard #" + CONTEXT: 20000/200000 (10%) [normal] + NOTES: 0 findings + Trace reads: 1 chunk(s) + Next unread offset: 100 + Pruned offsets: none + "# + run_instruction #" + This trace has 220 lines. Read all chunks before writing. + "# + tool_manifest #" + - read_trace(start_line: int?, line_count: int?): Read the next numbered trace chunk. + - save_context(kind: string, title: string, body: string): Save one context record. + - final_result(completion_summary: string): Finish after exactly one episode exists. + "# + scratchpad #" + Action: read_trace + Status: ok + Observation: + [220 lines, showing 1-100] — 120 more lines, call read_trace(start_line=101, line_count=100) for the next chunk + "# + } + @@assert({{ this.action == "READ_TRACE" }}) +} + +test LongTraceNotesFindingBeforeWrite { + functions [DecideNextExtractStep] + args { + runtime_dashboard #" + CONTEXT: 42000/200000 (21%) [normal] + NOTES: 0 findings + Trace reads: 3 chunk(s) + Next unread offset: none + Pruned offsets: none + "# + run_instruction #" + This trace has 240 lines and is fully read. An explicit durable architecture decision appears at line 188. + "# + tool_manifest #" + - note_trace_findings(theme: string?, line: int?, quote: string?, level: string?): Record one trace finding with line evidence. + - save_context(kind: string, title: string, body: string): Save one context record. + "# + scratchpad #" + Action: read_trace + Status: ok + Observation: + 188 user: We decided that durable context stays in the DB store because runtime queues are separate and temporary. + "# + } + @@assert({{ this.action == "NOTE_TRACE_FINDINGS" }}) + @@assert({{ this.note_trace_findings.level == "DECISION" }}) +} + +test LongTraceNoReusableSignalCheckpoint { + functions [DecideNextExtractStep] + args { + runtime_dashboard #" + CONTEXT: 39000/200000 (20%) [normal] + NOTES: 0 findings + Trace reads: 2 chunk(s) + Next unread offset: none + Pruned offsets: none + "# + run_instruction #" + This long trace is fully read after multiple chunks. No note_trace_findings checkpoint has been called yet. It only formats files and reruns tests; no reusable rule, fact, preference, constraint, decision, or reference appears. + "# + tool_manifest #" + - note_trace_findings(theme: string?, line: int?, quote: string?, level: string?): Record one trace finding with line evidence, or call with no args for none. + - save_context(kind: string, title: string, body: string): Save one context record. + "# + scratchpad "Action: read_trace\nStatus: ok\nObservation:\nAll chunks covered routine formatter/test work." + } + @@assert({{ this.action == "NOTE_TRACE_FINDINGS" }}) +} + +test PruneOlderTraceReadsUnderPressure { + functions [DecideNextExtractStep] + args { + runtime_dashboard #" + CONTEXT: 130000/200000 (65%) [soft] + NOTES: 2 findings (1 durable, 1 implementation) across 1 theme(s) + Trace reads: 3 chunk(s) + Next unread offset: none + Pruned offsets: none + "# + run_instruction "The trace is fully read and findings from the first two chunks have been captured." + tool_manifest #" + - prune_trace_reads(start_lines: int[]): Prune earlier read_trace results after findings are noted. + - save_context(kind: string, title: string, body: string): Save one context record. + "# + scratchpad #" + Action: read_trace + Status: ok + Observation: + [300 lines, showing 1-100] + + Action: read_trace + Status: ok + Observation: + [300 lines, showing 101-200] + + Action: read_trace + Status: ok + Observation: + [300 lines, showing 201-300] + + Action: note_trace_findings + Status: ok + Observation: + Noted 1 finding (total 2 so far). + "# + } + @@assert({{ this.action == "PRUNE_TRACE_READS" }}) + @@assert({{ this.prune_trace_reads.start_lines|length > 0 }}) + @@assert({{ this.prune_trace_reads.start_lines[0] == 1 }}) +} + +test RoutineTraceSavesArchivedEpisode { + functions [DecideNextExtractStep] + args { + runtime_dashboard #" + CONTEXT: 1000/200000 (1%) [normal] + NOTES: 0 findings + Trace reads: 1 chunk(s) + Next unread offset: none + Pruned offsets: none + "# + run_instruction "The short trace is fully read. It only ran formatting and confirmed tests passed." + tool_manifest #" + - save_context(kind: string, title: string, body: string, status: string?): Save one context record. + - final_result(completion_summary: string): Finish after exactly one episode exists. + "# + scratchpad "Action: read_trace\nStatus: ok\nObservation:\nformatter ran; tests passed; no durable signal." + } + @@assert({{ this.action == "SAVE_CONTEXT" }}) + @@assert({{ this.save_context.kind == "EPISODE" }}) + @@assert({{ this.save_context.status == "ARCHIVED" }}) + @@assert({{ this.save_context.user_intent != null and this.save_context.user_intent|length > 0 }}) + @@assert({{ this.save_context.what_happened != null and this.save_context.what_happened|length > 0 }}) +} + +test DurablePreferenceSavesPreference { + functions [DecideNextExtractStep] + args { + runtime_dashboard "CONTEXT: 1000/200000 (1%) [normal]\nNOTES: 1 findings (1 durable, 0 implementation)\nTrace reads: 1 chunk(s)\nNext unread offset: none" + run_instruction "The fully read trace includes a stable user correction: keep final replies short and skip redundant recaps. The current-session episode already exists; the durable preference remains unsaved." + tool_manifest "- save_context(kind: string, title: string, body: string): Save one context record." + scratchpad "Action: read_trace\nStatus: ok\nObservation:\nuser: Keep final replies short and skip redundant recaps.\n\nAction: save_context\nStatus: ok\nObservation:\n{\"ok\": true, \"result\": {\"record\": {\"kind\": \"episode\", \"source_session_id\": \"baml-test-session\"}}}\n\nAction: note_trace_findings\nStatus: ok\nObservation:\nTheme: reply style. Quote: keep final replies short and skip redundant recaps." + } + @@assert({{ this.action == "SAVE_CONTEXT" }}) + @@assert({{ this.save_context.kind == "PREFERENCE" }}) +} + +test DurableConstraintSavesConstraint { + functions [DecideNextExtractStep] + args { + runtime_dashboard "CONTEXT: 1000/200000 (1%) [normal]\nNOTES: 1 findings (1 durable, 0 implementation)\nTrace reads: 1 chunk(s)\nNext unread offset: none" + run_instruction "The fully read trace establishes a durable invariant: future extraction runs must not mutate unfetched context records. The current-session episode already exists; the durable constraint remains unsaved." + tool_manifest "- save_context(kind: string, title: string, body: string): Save one context record." + scratchpad "Action: save_context\nStatus: ok\nObservation:\n{\"ok\": true, \"result\": {\"record\": {\"kind\": \"episode\", \"source_session_id\": \"baml-test-session\"}}}\n\nAction: note_trace_findings\nStatus: ok\nObservation:\nTheme: mutation invariant. Quote: never revise or archive a context record before fetching it in the current run." + } + @@assert({{ this.action == "SAVE_CONTEXT" }}) + @@assert({{ this.save_context.kind == "CONSTRAINT" }}) +} + +test DurableDecisionSavesDecisionWithWhy { + functions [DecideNextExtractStep] + args { + runtime_dashboard "CONTEXT: 1000/200000 (1%) [normal]\nNOTES: 1 findings (1 durable, 0 implementation)\nTrace reads: 1 chunk(s)\nNext unread offset: none" + run_instruction "The fully read trace establishes a durable architecture decision with rationale. The current-session episode already exists; the durable decision remains unsaved." + tool_manifest "- save_context(kind: string, title: string, body: string, decision: string?, why: string?): Save one context record." + scratchpad "Action: save_context\nStatus: ok\nObservation:\n{\"ok\": true, \"result\": {\"record\": {\"kind\": \"episode\", \"source_session_id\": \"baml-test-session\"}}}\n\nAction: note_trace_findings\nStatus: ok\nObservation:\nTheme: storage boundary. Quote: durable context is DB-backed so sessions can replay it; runtime state stays separate." + } + @@assert({{ this.action == "SAVE_CONTEXT" }}) + @@assert({{ this.save_context.kind == "DECISION" }}) + @@assert({{ this.save_context.decision|length > 0 }}) + @@assert({{ this.save_context.why|length > 0 }}) +} + +test StableDependencySavesFact { + functions [DecideNextExtractStep] + args { + runtime_dashboard "CONTEXT: 1000/200000 (1%) [normal]\nNOTES: 1 findings (1 durable, 0 implementation)\nTrace reads: 1 chunk(s)\nNext unread offset: none" + run_instruction "The fully read trace establishes that image workflows require libvips. It gives no decision rationale. The current-session episode already exists; the durable dependency fact remains unsaved." + tool_manifest "- save_context(kind: string, title: string, body: string): Save one context record." + scratchpad "Action: save_context\nStatus: ok\nObservation:\n{\"ok\": true, \"result\": {\"record\": {\"kind\": \"episode\", \"source_session_id\": \"baml-test-session\"}}}\n\nAction: note_trace_findings\nStatus: ok\nObservation:\nTheme: dependency. Quote: image workflows require libvips in the environment." + } + @@assert({{ this.action == "SAVE_CONTEXT" }}) + @@assert({{ this.save_context.kind == "FACT" }}) + @@assert({{ not this.save_context.why }}) +} + +test ExternalSourceSavesReference { + functions [DecideNextExtractStep] + args { + runtime_dashboard "CONTEXT: 1000/200000 (1%) [normal]\nNOTES: 1 findings (1 durable, 0 implementation)\nTrace reads: 1 chunk(s)\nNext unread offset: none" + run_instruction "The fully read trace says incident ownership and current status live in the Ops dashboard, which future sessions should consult for incidents. The current-session episode already exists; the durable reference remains unsaved." + tool_manifest "- save_context(kind: string, title: string, body: string): Save one context record." + scratchpad "Action: save_context\nStatus: ok\nObservation:\n{\"ok\": true, \"result\": {\"record\": {\"kind\": \"episode\", \"source_session_id\": \"baml-test-session\"}}}\n\nAction: note_trace_findings\nStatus: ok\nObservation:\nTheme: incident source. Quote: use the Ops dashboard for incident ownership and current status." + } + @@assert({{ this.action == "SAVE_CONTEXT" }}) + @@assert({{ this.save_context.kind == "REFERENCE" }}) +} + +test DuplicateRiskSearchesContext { + functions [DecideNextExtractStep] + args { + runtime_dashboard "CONTEXT: 1000/200000 (1%) [normal]\nNOTES: 1 findings (1 durable, 0 implementation)\nTrace reads: 1 chunk(s)\nNext unread offset: none" + run_instruction "Relevant existing durable records are shown below; treat them as a shortlist only and fetch before revision.\n\nRelevant existing durable records:\n- rec_storage | decision | DB context boundary | Durable context lives in the DB store." + tool_manifest #" + - search_context(query: string, kind: string?, status: string?): Search saved context by meaning. + - get_context(record_ids: string[]): Fetch saved context records by ID. + - save_context(kind: string, title: string, body: string): Save one context record. + "# + scratchpad "Action: read_trace\nStatus: ok\nObservation:\nThe new trace repeats the DB context boundary decision with slightly sharper wording." + } + @@assert({{ this.action == "SEARCH_CONTEXT" or this.action == "GET_CONTEXT" }}) + @@assert({{ this.action != "SAVE_CONTEXT" }}) +} + +test SearchHitFetchesBeforeRevision { + functions [DecideNextExtractStep] + args { + runtime_dashboard "CONTEXT: 2000/200000 (1%) [normal]\nNOTES: 1 findings (1 durable, 0 implementation)\nTrace reads: 1 chunk(s)\nNext unread offset: none" + run_instruction "A search_context call found a nearby existing record. The current-session episode already exists. Fetch the full search hit before deciding whether to revise it." + tool_manifest #" + - search_context(query: string): Search saved context by meaning. + - get_context(record_ids: string[]): Fetch full context records by ID before any revision. + - revise_context(record_id: string, reason: string, kind: string, title: string, body: string): Revise an existing context record. + "# + scratchpad #" + Action: save_context + Status: ok + Observation: + {"ok": true, "result": {"record": {"kind": "episode", "source_session_id": "baml-test-session"}}} + + Action: search_context + Status: ok + Observation: + {"count": 1, "hits": [{"record_id": "rec_storage", "kind": "decision", "title": "DB context boundary", "body_preview": "Durable context lives in the DB store."}]} + "# + } + @@assert({{ this.action == "GET_CONTEXT" }}) +} + +test FetchedSameMeaningRevisesContext { + functions [DecideNextExtractStep] + args { + runtime_dashboard "CONTEXT: 2000/200000 (1%) [normal]\nNOTES: 1 findings (1 durable, 0 implementation)\nTrace reads: 1 chunk(s)\nNext unread offset: none" + run_instruction "The fetched record has the same core decision but needs the rationale tightened." + tool_manifest #" + - revise_context(record_id: string, reason: string, kind: string, title: string, body: string): Revise an existing context record with a complete improved payload. + - save_context(kind: string, title: string, body: string): Save one context record. + "# + scratchpad #" + Action: get_context + Status: ok + Observation: + {"count": 1, "records": [{"record_id": "rec_storage", "kind": "decision", "title": "DB context boundary", "body": "Durable context lives in the DB store.", "decision": "Durable context lives in the DB store.", "why": "It must survive sessions."}]} + "# + } + @@assert({{ this.action == "REVISE_CONTEXT" }}) +} + +test FinalBeforeEpisodeSavesEpisode { + functions [DecideNextExtractStep] + args { + runtime_dashboard "CONTEXT: 3000/200000 (2%) [normal]\nNOTES: 1 findings (1 durable, 0 implementation)\nTrace reads: 1 chunk(s)\nNext unread offset: none" + run_instruction "The trace is fully read. One durable fact has already been saved, but no current-session episode exists." + tool_manifest #" + - save_context(kind: string, title: string, body: string): Save one context record. + - final_result(completion_summary: string): Finish after exactly one episode exists. + "# + scratchpad #" + Action: save_context + Status: ok + Observation: + {"ok": true, "result": {"record": {"kind": "fact", "source_session_id": "baml-test-session"}}} + "# + } + @@assert({{ this.action == "SAVE_CONTEXT" }}) + @@assert({{ this.save_context.kind == "EPISODE" }}) + @@assert({{ this.save_context.user_intent != null and this.save_context.user_intent|length > 0 }}) + @@assert({{ this.save_context.what_happened != null and this.save_context.what_happened|length > 0 }}) +} + +test FinalStepAfterEpisodeSave { + functions [DecideNextExtractStep] + args { + runtime_dashboard #" + CONTEXT: 1000/200000 (1%) [normal] + NOTES: 0 findings + Trace reads: 1 chunk(s) + Next unread offset: none + Pruned offsets: none + "# + run_instruction #" + Read the trace, write exactly one episode record, and write only the + strongest durable records. The trace has already been fully read. + "# + tool_manifest #" + - read_trace(start_line: int?, line_count: int?): Read the next numbered trace chunk. + - save_context(kind: string, title: string, body: string): Save one context record. + - final_result(completion_summary: string): Finish after exactly one episode exists. + "# + scratchpad #" + Action: read_trace + Status: ok + Observation: + [3 lines, trace coverage complete] All trace lines have already been read. + + Action: save_context + Status: ok + Observation: + {"ok": true, "result": {"record": {"kind": "episode", "source_session_id": "baml-test-session"}}} + "# + } + @@assert({{ this.action == "FINAL_RESULT" }}) +} + +test EpisodeWithReusableDecisionSavesDecisionBeforeFinal { + functions [DecideNextExtractStep] + args { + runtime_dashboard #" + CONTEXT: 3000/200000 (2%) [normal] + NOTES: 0 findings + Trace reads: 1 chunk(s) + Next unread offset: none + Pruned offsets: none + "# + run_instruction "The trace is fully read. The current-session episode already exists, but a reusable PostgreSQL-over-MySQL project decision remains only in the trace and episode text. Save the durable decision before final_result." + tool_manifest #" + - save_context(kind: string, title: string, body: string, decision: string?, why: string?): Save one context record. + - final_result(completion_summary: string): Finish after exactly one episode exists. + "# + scratchpad #" + Action: read_trace + Status: ok + Observation: + user: Good. Also, we decided to use PostgreSQL instead of MySQL for this project. + assistant: Understood. Decision: PostgreSQL over MySQL. I'll update the connection string and ORM configuration. + + Action: save_context + Status: ok + Observation: + {"ok": true, "result": {"record": {"kind": "episode", "title": "Database Configuration Update", "body": "The session switched the project's primary database backend from MySQL to PostgreSQL.", "source_session_id": "baml-test-session"}}} + "# + } + @@assert({{ this.action == "SAVE_CONTEXT" }}) + @@assert({{ this.save_context.kind == "DECISION" }}) + @@assert({{ this.save_context.decision|length > 0 }}) + @@assert({{ this.save_context.why|length > 0 }}) +} + +test ImplementationOnlyAfterEpisodeFinalizes { + functions [DecideNextExtractStep] + args { + runtime_dashboard #" + CONTEXT: 3000/200000 (2%) [normal] + NOTES: 0 findings + Trace reads: 1 chunk(s) + Next unread offset: none + Pruned offsets: none + "# + run_instruction "The trace is fully read. A durable decision and the current-session episode already exist. The only remaining unsaved content is an implementation fix to a local timeout constant." + tool_manifest #" + - save_context(kind: string, title: string, body: string): Save one context record. + - final_result(completion_summary: string): Finish after exactly one episode exists. + "# + scratchpad #" + Action: read_trace + Status: ok + Observation: + The trace fixed a local timeout constant and also established one architecture decision. + + Action: save_context + Status: ok + Observation: + {"ok": true, "result": {"record": {"kind": "decision", "title": "Runtime state remains separate from durable context"}}} + + Action: save_context + Status: ok + Observation: + {"ok": true, "result": {"record": {"kind": "episode", "source_session_id": "baml-test-session"}}} + "# + } + @@assert({{ this.action == "FINAL_RESULT" }}) +} diff --git a/baml_agents/baml_src/generators.baml b/baml_agents/baml_src/generators.baml new file mode 100644 index 0000000..13c903b --- /dev/null +++ b/baml_agents/baml_src/generators.baml @@ -0,0 +1,6 @@ +generator target { + output_type "python/pydantic" + output_dir "../" + default_client_mode "sync" + version "0.222.0" +} diff --git a/src/lerim/config/providers.py b/src/lerim/config/providers.py index a4b3207..4e11b0d 100644 --- a/src/lerim/config/providers.py +++ b/src/lerim/config/providers.py @@ -276,6 +276,8 @@ def parse_fallback_spec( def _make_retrying_http_client( max_attempts: int = 5, max_wait_seconds: int = 120, + timeout_seconds: float = MODEL_HTTP_TIMEOUT_SECONDS, + connect_timeout_seconds: float = MODEL_HTTP_CONNECT_TIMEOUT_SECONDS, ) -> AsyncClient: """Build an httpx AsyncClient with tenacity retries for transient errors. @@ -311,8 +313,8 @@ def _validate_response(response): return AsyncClient( transport=transport, timeout=Timeout( - MODEL_HTTP_TIMEOUT_SECONDS, - connect=MODEL_HTTP_CONNECT_TIMEOUT_SECONDS, + timeout_seconds, + connect=connect_timeout_seconds, ), ) @@ -349,6 +351,9 @@ def _build_minimax_anthropic_model( model: str, api_key: str, cfg: Config, + timeout_seconds: float | None = None, + connect_timeout_seconds: float | None = None, + max_retries: int = 5, ) -> AnthropicModel: """Build MiniMax model via its Anthropic-compatible endpoint. @@ -373,18 +378,29 @@ def _build_minimax_anthropic_model( else "https://api.minimax.io/anthropic" ) + resolved_timeout = timeout_seconds or MODEL_HTTP_TIMEOUT_SECONDS + resolved_connect_timeout = connect_timeout_seconds or MODEL_HTTP_CONNECT_TIMEOUT_SECONDS + client_cache_key = "minimax-anthropic" + if ( + resolved_timeout != MODEL_HTTP_TIMEOUT_SECONDS + or resolved_connect_timeout != MODEL_HTTP_CONNECT_TIMEOUT_SECONDS + ): + client_cache_key = ( + f"minimax-anthropic-{resolved_timeout:g}-{resolved_connect_timeout:g}" + ) + client = AsyncAnthropic( api_key=api_key, base_url=base_url, timeout=Timeout( - MODEL_HTTP_TIMEOUT_SECONDS, - connect=MODEL_HTTP_CONNECT_TIMEOUT_SECONDS, + resolved_timeout, + connect=resolved_connect_timeout, ), - max_retries=5, + max_retries=max_retries, http_client=cached_async_http_client( - provider="minimax-anthropic", - timeout=MODEL_HTTP_TIMEOUT_SECONDS, - connect=MODEL_HTTP_CONNECT_TIMEOUT_SECONDS, + provider=client_cache_key, + timeout=resolved_timeout, + connect=resolved_connect_timeout, ), ) anthropic_provider = AnthropicProvider(anthropic_client=client) @@ -408,6 +424,9 @@ def _build_pydantic_model_for_provider( api_base: str, cfg: Config, role_label: str, + http_timeout_seconds: float | None = None, + http_connect_timeout_seconds: float | None = None, + http_max_attempts: int | None = None, ) -> Model: """Build a single PydanticAI model with HTTP retry. @@ -428,7 +447,14 @@ def _build_pydantic_model_for_provider( # MiniMax: Anthropic-compat endpoint for proper tool calling if provider == "minimax": - return _build_minimax_anthropic_model(model=model, api_key=api_key, cfg=cfg) + return _build_minimax_anthropic_model( + model=model, + api_key=api_key, + cfg=cfg, + timeout_seconds=http_timeout_seconds, + connect_timeout_seconds=http_connect_timeout_seconds, + max_retries=http_max_attempts or 5, + ) # All other providers: OpenAI-compat path base_url = api_base or _default_api_base(provider, cfg) @@ -439,7 +465,13 @@ def _build_pydantic_model_for_provider( f"provider={provider} (set [providers].{provider} in default.toml)" ) - http_client = _make_retrying_http_client() + http_client = _make_retrying_http_client( + max_attempts=http_max_attempts or 5, + timeout_seconds=http_timeout_seconds or MODEL_HTTP_TIMEOUT_SECONDS, + connect_timeout_seconds=( + http_connect_timeout_seconds or MODEL_HTTP_CONNECT_TIMEOUT_SECONDS + ), + ) openai_provider = OpenAIProvider( base_url=base_url, api_key=api_key, @@ -473,6 +505,9 @@ def _build_model_chain( fallback_models: tuple[str, ...] | list[str], primary_role_label: str, fallback_role_label_prefix: str, + http_timeout_seconds: float | None = None, + http_connect_timeout_seconds: float | None = None, + http_max_attempts: int | None = None, ) -> Model: """Build a primary model and optional configured fallback chain.""" primary = _build_pydantic_model_for_provider( @@ -481,6 +516,9 @@ def _build_model_chain( api_base=api_base, cfg=cfg, role_label=primary_role_label, + http_timeout_seconds=http_timeout_seconds, + http_connect_timeout_seconds=http_connect_timeout_seconds, + http_max_attempts=http_max_attempts, ) fallbacks = [ @@ -490,6 +528,9 @@ def _build_model_chain( api_base="", cfg=cfg, role_label=f"{fallback_role_label_prefix}{spec.provider}:{spec.model}", + http_timeout_seconds=http_timeout_seconds, + http_connect_timeout_seconds=http_connect_timeout_seconds, + http_max_attempts=http_max_attempts, ) for raw in fallback_models for spec in (parse_fallback_spec(raw, default_provider=provider),) @@ -542,6 +583,9 @@ def build_pydantic_model_from_provider( *, fallback_models: tuple[str, ...] | list[str] | None = None, config: Config | None = None, + http_timeout_seconds: float | None = None, + http_connect_timeout_seconds: float | None = None, + http_max_attempts: int | None = None, ) -> Model: """Build a robust PydanticAI model from explicit provider/model args. @@ -561,6 +605,9 @@ def build_pydantic_model_from_provider( (same format as `default.toml` `fallback_models`). None means no fallback — just the primary with HTTP-level retry. config: Optional Config override (defaults to `get_config()`). + http_timeout_seconds: Optional request timeout override for evals. + http_connect_timeout_seconds: Optional connect timeout override for evals. + http_max_attempts: Optional HTTP retry attempt override for evals. Returns: FallbackModel if fallbacks are configured and their API keys @@ -576,6 +623,9 @@ def build_pydantic_model_from_provider( fallback_models=fallback_models or (), primary_role_label=f"explicit_provider={provider}", fallback_role_label_prefix="explicit_fallback=", + http_timeout_seconds=http_timeout_seconds, + http_connect_timeout_seconds=http_connect_timeout_seconds, + http_max_attempts=http_max_attempts, ) From b856e0fa581718fca7f8fd1f75a34a441ff8cadf Mon Sep 17 00:00:00 2001 From: Isaac Kargar Date: Wed, 13 May 2026 17:57:24 +0300 Subject: [PATCH 2/8] Refactor BAML agent functionality and update documentation - Updated the README to reflect the integration of BAML with LangGraph for improved trace window scanning and record synthesis. - Renamed and refactored methods in the BamlAsyncClient, BamlSyncClient, and BamlStreamClient to support new functionalities: ScanTraceWindow and SynthesizeExtractRecords. - Introduced new types for handling synthesized extractions and trace window scans, enhancing type safety and clarity in the codebase. - Updated the type builder and type map to reflect the removal of deprecated classes and the addition of new types. - Enhanced documentation for the BAML agents, including usage examples for the new methods and updated descriptions for existing functionalities. This commit improves the overall structure and usability of the BAML agents, aligning them with the latest design and functionality requirements. --- baml_agents/README.md | 38 +- baml_agents/baml_client/async_client.py | 79 +- baml_agents/baml_client/inlinedbaml.py | 4 +- baml_agents/baml_client/parser.py | 28 +- baml_agents/baml_client/stream_types.py | 95 +- baml_agents/baml_client/sync_client.py | 78 +- baml_agents/baml_client/type_builder.py | 554 ++------- baml_agents/baml_client/type_map.py | 34 +- baml_agents/baml_client/types.py | 112 +- baml_agents/baml_extract_agent/graph.py | 561 ++++++--- baml_agents/baml_extract_agent/tool_bridge.py | 436 +++---- baml_agents/baml_src/extract_react.baml | 1004 +++-------------- baml_agents/baml_src/extract_react_tests.baml | 92 ++ baml_agents/baml_src/models.baml | 43 + 14 files changed, 1243 insertions(+), 1915 deletions(-) create mode 100644 baml_agents/baml_src/extract_react_tests.baml create mode 100644 baml_agents/baml_src/models.baml diff --git a/baml_agents/README.md b/baml_agents/README.md index a511d46..1367dfc 100644 --- a/baml_agents/README.md +++ b/baml_agents/README.md @@ -1,18 +1,20 @@ # Lerim BAML Agents -Minimal experiment for testing whether BAML improves ReAct-style tool selection -with a small local Ollama model. +Minimal experiment for testing whether a BAML plus LangGraph windowed +extraction harness can replace the PydanticAI extract agent. ## What This Uses -- LangGraph builds the graph loop. -- BAML makes the LLM call and parses the next action into a typed schema. -- Ollama serves `gemma4:e4b` through `http://127.0.0.1:11434/v1`. -- MiniMax M2.7 can also be used through BAML's OpenAI-compatible client - registry with `--baml-provider minimax`. -- The BAML function copies Lerim's extraction `SYSTEM_PROMPT` text and keeps the - small BAML/LangGraph harness adaptation in `baml_src/extract_react.baml`. -- Lerim's existing DB-backed extraction tools are imported from `src/lerim`. +- LangGraph owns trace windowing, coverage, synthesis, and persistence. +- BAML makes the two LLM calls: scan one trace window, then synthesize records. +- The default benchmark/runtime model is MiniMax M2.7 through BAML's + OpenAI-compatible client. +- Ollama-compatible local models can still be used explicitly with + `--baml-provider ollama`. +- Model clients live in `baml_src/models.baml`; extraction prompts and + functions live in `baml_src/extract_react.baml`; BAML-native tests live in + `baml_src/extract_react_tests.baml`. +- Lerim's existing DB-backed `save_context` tool persists synthesized records. - The default model can be overridden with `--model` for local and API model comparisons. @@ -26,27 +28,25 @@ PYTHONPATH="baml_agents:src" uv run --with baml-py==0.222.0 --with langgraph==1. python -m baml_extract_agent.run \ --trace tests/fixtures/traces/unit/codex_simple.jsonl \ --context-db baml_agents/.tmp/context.sqlite3 \ - --project-root . \ - --model gemma4:e4b + --project-root . ``` -MiniMax M2.7: +Local Ollama-compatible model: ```bash PYTHONPATH="baml_agents:src" uv run --with baml-py==0.222.0 --with langgraph==1.2.0 \ python -m baml_extract_agent.run \ --trace tests/fixtures/traces/unit/codex_simple.jsonl \ - --context-db baml_agents/.tmp/context_minimax.sqlite3 \ + --context-db baml_agents/.tmp/context_ollama.sqlite3 \ --project-root . \ - --baml-provider minimax \ - --model MiniMax-M2.7 \ - --temperature 1.0 + --baml-provider ollama \ + --model ``` -BAML-native tests live in `baml_src/extract_react.baml`: +BAML-native tests: ```bash -uv run --with baml-py==0.222.0 baml-cli test --from baml_agents/baml_src --parallel 1 -i "DecideNextExtractStep::" +MINIMAX_API_KEY=... uv run --with baml-py==0.222.0 baml-cli test --from baml_agents/baml_src --parallel 1 ``` The graph writes into the context DB you pass with `--context-db`. Use a scratch diff --git a/baml_agents/baml_client/async_client.py b/baml_agents/baml_client/async_client.py index 5d41237..c4ea6c0 100644 --- a/baml_agents/baml_client/async_client.py +++ b/baml_agents/baml_client/async_client.py @@ -82,21 +82,36 @@ def parse(self): def parse_stream(self): return self.__llm_stream_parser - async def DecideNextExtractStep(self, runtime_dashboard: str,run_instruction: str,tool_manifest: str,scratchpad: str, + async def ScanTraceWindow(self, run_instruction: str,prior_episode_summary: str,prior_findings_summary: str,trace_window: str, baml_options: BamlCallOptions = {}, - ) -> types.ExtractAgentStep: + ) -> types.TraceWindowScan: # Check if on_tick is provided if 'on_tick' in baml_options: # Use streaming internally when on_tick is provided - __stream__ = self.stream.DecideNextExtractStep(runtime_dashboard=runtime_dashboard,run_instruction=run_instruction,tool_manifest=tool_manifest,scratchpad=scratchpad, + __stream__ = self.stream.ScanTraceWindow(run_instruction=run_instruction,prior_episode_summary=prior_episode_summary,prior_findings_summary=prior_findings_summary,trace_window=trace_window, baml_options=baml_options) return await __stream__.get_final_response() else: # Original non-streaming code - __result__ = await self.__options.merge_options(baml_options).call_function_async(function_name="DecideNextExtractStep", args={ - "runtime_dashboard": runtime_dashboard,"run_instruction": run_instruction,"tool_manifest": tool_manifest,"scratchpad": scratchpad, + __result__ = await self.__options.merge_options(baml_options).call_function_async(function_name="ScanTraceWindow", args={ + "run_instruction": run_instruction,"prior_episode_summary": prior_episode_summary,"prior_findings_summary": prior_findings_summary,"trace_window": trace_window, }) - return typing.cast(types.ExtractAgentStep, __result__.cast_to(types, types, stream_types, False, __runtime__)) + return typing.cast(types.TraceWindowScan, __result__.cast_to(types, types, stream_types, False, __runtime__)) + async def SynthesizeExtractRecords(self, run_instruction: str,episode_summary: str,durable_findings_summary: str,existing_record_manifest: str, + baml_options: BamlCallOptions = {}, + ) -> types.SynthesizedExtraction: + # Check if on_tick is provided + if 'on_tick' in baml_options: + # Use streaming internally when on_tick is provided + __stream__ = self.stream.SynthesizeExtractRecords(run_instruction=run_instruction,episode_summary=episode_summary,durable_findings_summary=durable_findings_summary,existing_record_manifest=existing_record_manifest, + baml_options=baml_options) + return await __stream__.get_final_response() + else: + # Original non-streaming code + __result__ = await self.__options.merge_options(baml_options).call_function_async(function_name="SynthesizeExtractRecords", args={ + "run_instruction": run_instruction,"episode_summary": episode_summary,"durable_findings_summary": durable_findings_summary,"existing_record_manifest": existing_record_manifest, + }) + return typing.cast(types.SynthesizedExtraction, __result__.cast_to(types, types, stream_types, False, __runtime__)) @@ -106,16 +121,28 @@ class BamlStreamClient: def __init__(self, options: DoNotUseDirectlyCallManager): self.__options = options - def DecideNextExtractStep(self, runtime_dashboard: str,run_instruction: str,tool_manifest: str,scratchpad: str, + def ScanTraceWindow(self, run_instruction: str,prior_episode_summary: str,prior_findings_summary: str,trace_window: str, + baml_options: BamlCallOptions = {}, + ) -> baml_py.BamlStream[stream_types.TraceWindowScan, types.TraceWindowScan]: + __ctx__, __result__ = self.__options.merge_options(baml_options).create_async_stream(function_name="ScanTraceWindow", args={ + "run_instruction": run_instruction,"prior_episode_summary": prior_episode_summary,"prior_findings_summary": prior_findings_summary,"trace_window": trace_window, + }) + return baml_py.BamlStream[stream_types.TraceWindowScan, types.TraceWindowScan]( + __result__, + lambda x: typing.cast(stream_types.TraceWindowScan, x.cast_to(types, types, stream_types, True, __runtime__)), + lambda x: typing.cast(types.TraceWindowScan, x.cast_to(types, types, stream_types, False, __runtime__)), + __ctx__, + ) + def SynthesizeExtractRecords(self, run_instruction: str,episode_summary: str,durable_findings_summary: str,existing_record_manifest: str, baml_options: BamlCallOptions = {}, - ) -> baml_py.BamlStream[stream_types.ExtractAgentStep, types.ExtractAgentStep]: - __ctx__, __result__ = self.__options.merge_options(baml_options).create_async_stream(function_name="DecideNextExtractStep", args={ - "runtime_dashboard": runtime_dashboard,"run_instruction": run_instruction,"tool_manifest": tool_manifest,"scratchpad": scratchpad, + ) -> baml_py.BamlStream[stream_types.SynthesizedExtraction, types.SynthesizedExtraction]: + __ctx__, __result__ = self.__options.merge_options(baml_options).create_async_stream(function_name="SynthesizeExtractRecords", args={ + "run_instruction": run_instruction,"episode_summary": episode_summary,"durable_findings_summary": durable_findings_summary,"existing_record_manifest": existing_record_manifest, }) - return baml_py.BamlStream[stream_types.ExtractAgentStep, types.ExtractAgentStep]( + return baml_py.BamlStream[stream_types.SynthesizedExtraction, types.SynthesizedExtraction]( __result__, - lambda x: typing.cast(stream_types.ExtractAgentStep, x.cast_to(types, types, stream_types, True, __runtime__)), - lambda x: typing.cast(types.ExtractAgentStep, x.cast_to(types, types, stream_types, False, __runtime__)), + lambda x: typing.cast(stream_types.SynthesizedExtraction, x.cast_to(types, types, stream_types, True, __runtime__)), + lambda x: typing.cast(types.SynthesizedExtraction, x.cast_to(types, types, stream_types, False, __runtime__)), __ctx__, ) @@ -126,11 +153,18 @@ class BamlHttpRequestClient: def __init__(self, options: DoNotUseDirectlyCallManager): self.__options = options - async def DecideNextExtractStep(self, runtime_dashboard: str,run_instruction: str,tool_manifest: str,scratchpad: str, + async def ScanTraceWindow(self, run_instruction: str,prior_episode_summary: str,prior_findings_summary: str,trace_window: str, baml_options: BamlCallOptions = {}, ) -> baml_py.baml_py.HTTPRequest: - __result__ = await self.__options.merge_options(baml_options).create_http_request_async(function_name="DecideNextExtractStep", args={ - "runtime_dashboard": runtime_dashboard,"run_instruction": run_instruction,"tool_manifest": tool_manifest,"scratchpad": scratchpad, + __result__ = await self.__options.merge_options(baml_options).create_http_request_async(function_name="ScanTraceWindow", args={ + "run_instruction": run_instruction,"prior_episode_summary": prior_episode_summary,"prior_findings_summary": prior_findings_summary,"trace_window": trace_window, + }, mode="request") + return __result__ + async def SynthesizeExtractRecords(self, run_instruction: str,episode_summary: str,durable_findings_summary: str,existing_record_manifest: str, + baml_options: BamlCallOptions = {}, + ) -> baml_py.baml_py.HTTPRequest: + __result__ = await self.__options.merge_options(baml_options).create_http_request_async(function_name="SynthesizeExtractRecords", args={ + "run_instruction": run_instruction,"episode_summary": episode_summary,"durable_findings_summary": durable_findings_summary,"existing_record_manifest": existing_record_manifest, }, mode="request") return __result__ @@ -141,11 +175,18 @@ class BamlHttpStreamRequestClient: def __init__(self, options: DoNotUseDirectlyCallManager): self.__options = options - async def DecideNextExtractStep(self, runtime_dashboard: str,run_instruction: str,tool_manifest: str,scratchpad: str, + async def ScanTraceWindow(self, run_instruction: str,prior_episode_summary: str,prior_findings_summary: str,trace_window: str, + baml_options: BamlCallOptions = {}, + ) -> baml_py.baml_py.HTTPRequest: + __result__ = await self.__options.merge_options(baml_options).create_http_request_async(function_name="ScanTraceWindow", args={ + "run_instruction": run_instruction,"prior_episode_summary": prior_episode_summary,"prior_findings_summary": prior_findings_summary,"trace_window": trace_window, + }, mode="stream") + return __result__ + async def SynthesizeExtractRecords(self, run_instruction: str,episode_summary: str,durable_findings_summary: str,existing_record_manifest: str, baml_options: BamlCallOptions = {}, ) -> baml_py.baml_py.HTTPRequest: - __result__ = await self.__options.merge_options(baml_options).create_http_request_async(function_name="DecideNextExtractStep", args={ - "runtime_dashboard": runtime_dashboard,"run_instruction": run_instruction,"tool_manifest": tool_manifest,"scratchpad": scratchpad, + __result__ = await self.__options.merge_options(baml_options).create_http_request_async(function_name="SynthesizeExtractRecords", args={ + "run_instruction": run_instruction,"episode_summary": episode_summary,"durable_findings_summary": durable_findings_summary,"existing_record_manifest": existing_record_manifest, }, mode="stream") return __result__ diff --git a/baml_agents/baml_client/inlinedbaml.py b/baml_agents/baml_client/inlinedbaml.py index e6bbc38..afcbbb7 100644 --- a/baml_agents/baml_client/inlinedbaml.py +++ b/baml_agents/baml_client/inlinedbaml.py @@ -12,8 +12,10 @@ _file_map = { - "extract_react.baml": "retry_policy ExtractAgentRetry {\n max_retries 1\n strategy {\n type exponential_backoff\n delay_ms 500\n multiplier 2\n max_delay_ms 8000\n }\n}\n\nclient OllamaGemma4E4B {\n provider \"openai-generic\"\n retry_policy ExtractAgentRetry\n options {\n base_url \"http://127.0.0.1:11434/v1\"\n model \"gemma4:e4b\"\n temperature 0.0\n http {\n connect_timeout_ms 10000\n time_to_first_token_timeout_ms 120000\n idle_timeout_ms 30000\n request_timeout_ms 300000\n }\n }\n}\n\nenum ExtractAction {\n READ_TRACE @alias(\"read_trace\") @description(\"Read the next numbered trace chunk.\")\n SEARCH_CONTEXT @alias(\"search_context\") @description(\"Search existing DB-backed context by meaning.\")\n GET_CONTEXT @alias(\"get_context\") @description(\"Fetch full context records by record ID before any revision.\")\n SAVE_CONTEXT @alias(\"save_context\") @description(\"Create one episode or durable context record.\")\n REVISE_CONTEXT @alias(\"revise_context\") @description(\"Revise a fetched context record with a complete improved payload.\")\n NOTE_TRACE_FINDINGS @alias(\"note_trace_findings\") @description(\"Checkpoint durable or implementation findings from a long trace.\")\n PRUNE_TRACE_READS @alias(\"prune_trace_reads\") @description(\"Prune earlier trace chunks after findings are captured.\")\n FINAL_RESULT @alias(\"final_result\") @description(\"Finish only after exactly one current-session episode record exists.\")\n}\n\nenum RecordKind {\n DECISION @alias(\"decision\") @description(\"A durable project decision with decision and why fields.\")\n PREFERENCE @alias(\"preference\") @description(\"A stable user or workflow preference.\")\n CONSTRAINT @alias(\"constraint\") @description(\"A durable invariant, limit, or must/cannot rule.\")\n FACT @alias(\"fact\") @description(\"A durable project fact or setup truth.\")\n REFERENCE @alias(\"reference\") @description(\"A pointer to an external source of truth.\")\n EPISODE @alias(\"episode\") @description(\"The mandatory per-session episode record; use this for session summaries.\")\n}\n\nenum RecordStatus {\n ACTIVE @alias(\"active\") @description(\"Current context that future sessions may reuse.\")\n ARCHIVED @alias(\"archived\") @description(\"Historical or routine context that should not be treated as active guidance.\")\n}\n\nenum FindingLevel {\n DECISION @alias(\"decision\") @description(\"Durable decision-level finding.\")\n PREFERENCE @alias(\"preference\") @description(\"Durable user or workflow preference finding.\")\n FEEDBACK @alias(\"feedback\") @description(\"Durable feedback-level finding.\")\n REFERENCE @alias(\"reference\") @description(\"Durable external-source finding.\")\n CONSTRAINT @alias(\"constraint\") @description(\"Durable constraint-level finding.\")\n FACT @alias(\"fact\") @description(\"Durable fact-level finding.\")\n IMPLEMENTATION @alias(\"implementation\") @description(\"Trace-local implementation evidence or discarded hypothesis.\")\n}\n\nclass ReadTraceArgs {\n start_line int?\n line_count int?\n}\n\nclass SearchContextArgs {\n query string @description(\"Natural-language query for the durable meaning, not '*' or an empty browse request.\")\n kind RecordKind? @description(\"Optional filter. Use only when the desired record kind is known.\")\n status RecordStatus? @description(\"Optional lifecycle filter.\")\n valid_at string? @description(\"Optional timestamp for historical lookup.\")\n include_archived bool? @description(\"Whether archived records should be included.\")\n limit int? @description(\"Maximum hits to return.\")\n}\n\nclass GetContextArgs {\n record_ids string[] @description(\"Record IDs returned by search_context or list_context. Fetch before any revision.\")\n include_versions bool? @description(\"Whether to include prior versions.\")\n detail \"concise\" | \"detailed\"? @description(\"Use concise unless full version history is needed.\")\n}\n\nclass SaveContextArgs {\n kind RecordKind @description(\"Use episode for the mandatory session summary; use durable kinds only for reusable project context.\")\n title string @description(\"Short standalone title for the lasting point or episode.\")\n body string @description(\"Compact standalone body. Do not include trace-local command logs or copied errors.\")\n status RecordStatus? @description(\"Use archived for routine episodes with no durable signal; active for reusable durable records.\")\n valid_from string? @description(\"Optional validity start timestamp.\")\n valid_until string? @description(\"Optional validity end timestamp.\")\n decision string? @description(\"Required only for kind=decision; omit for facts, preferences, constraints, references, and episodes.\")\n why string? @description(\"Required only for kind=decision when rationale exists; omit when the trace says no durable rationale was supplied.\")\n alternatives string? @description(\"Optional decision alternatives. Omit for non-decisions.\")\n consequences string? @description(\"Optional direct application guidance for the same durable point.\")\n user_intent string? @description(\"Required when kind=episode. Concise statement of what the user wanted in this source session.\")\n what_happened string? @description(\"Required when kind=episode. Concise statement of what the session actually did.\")\n outcomes string? @description(\"Optional concise episode outcome.\")\n}\n\nclass ReviseContextArgs {\n record_id string\n reason string @description(\"Short reason for the revision.\")\n kind RecordKind @description(\"Must match the fetched record kind; revise_context cannot change kind.\")\n title string @description(\"Complete improved title.\")\n body string @description(\"Complete improved body.\")\n status RecordStatus? @description(\"Lifecycle status to keep or set.\")\n valid_from string?\n valid_until string?\n decision string?\n why string?\n alternatives string?\n consequences string?\n user_intent string?\n what_happened string?\n outcomes string?\n}\n\nclass NoteTraceFindingsArgs {\n theme string? @description(\"Short theme label for this finding. Leave empty only for the no-findings checkpoint.\")\n line int? @description(\"1-based trace line with supporting evidence. Leave empty only for the no-findings checkpoint.\")\n quote string? @description(\"Short evidence quote from the trace. Leave empty only for the no-findings checkpoint.\")\n level FindingLevel? @description(\"Durable levels only for reusable context; implementation for trace-local evidence.\")\n}\n\nclass PruneTraceReadsArgs {\n start_lines int[] @description(\"1-based start_line values from earlier read_trace calls, such as 1, 101, or 201. Never use context token counts.\")\n}\n\nclass FinalResultArgs {\n completion_summary string\n}\n\nclass ExtractAgentStep {\n action ExtractAction\n rationale string @description(\"One short sentence explaining why this is the next action. Do not include hidden chain-of-thought.\")\n read_trace ReadTraceArgs?\n search_context SearchContextArgs?\n get_context GetContextArgs?\n save_context SaveContextArgs?\n revise_context ReviseContextArgs?\n note_trace_findings NoteTraceFindingsArgs?\n prune_trace_reads PruneTraceReadsArgs?\n final_result FinalResultArgs?\n}\n\nfunction DecideNextExtractStep(\n runtime_dashboard: string,\n run_instruction: string,\n tool_manifest: string,\n scratchpad: string\n) -> ExtractAgentStep {\n client OllamaGemma4E4B\n prompt #\"\n {{ _.role(\"system\") }}\n \n You are the Lerim extract agent.\n Read one coding-agent trace, compress its signal, and write DB-backed context records.\n \n\n \n - Create exactly one episode record for the session.\n - Create zero or more durable records only when the trace contains durable signal.\n - The episode record is mandatory for every session, even if you also create or update durable records.\n - Updating an existing durable record never replaces the required episode for the current session.\n - The run is not complete until the current session has its episode record.\n - Treat the trace as historical evidence from its source session time, not as live verification of current code.\n - On short traces where the session is already clear after reading, prefer to create the episode promptly rather than leaving it until the end.\n - Episode records must include `user_intent` and `what_happened`; do not put the whole episode only in `body`.\n - Use `status=\"archived\"` for the episode when the session is routine operational work with no durable signal. Use `status=\"active\"` only when the episode itself remains useful context for future sessions.\n \n\n \n Durable signal means one of:\n - decision\n - preference\n - constraint\n - fact\n - reference\n\n Implementation detail alone is not durable signal.\n A temporary code-state finding, audit observation, open task, or release-risk report is not durable by itself. Promote it only when the trace establishes a reusable project rule, unresolved constraint, stable dependency, or standing source of truth.\n \n\n \n - Store the reusable rule, decision, invariant, dependency, preference, or external pointer, not the story of the session.\n - One durable record should hold one durable point.\n - Direct consequences and application guidance usually stay inside that same record.\n - Create the minimum number of durable records that preserves distinct durable meanings. Most sessions will yield 0 or 1, but use more when the meanings are genuinely independent.\n - Classify each durable point into one canonical kind. Prefer the most specific supported kind, and do not duplicate the same point across multiple durable kinds.\n - Duplicates are worse than gaps. Skip uncertain candidates rather than spraying near-duplicates.\n - Never create a second durable record in the same run for the same core claim. If you realize the first draft needs improvement, update or refine that record instead of creating another one.\n - `constraint` and `reference` are first-class durable record kinds, not fallback categories.\n \n\n \n - patch logs, command sequences, retries, timelines, or meeting-style recaps\n - code structure, file paths, git history, or storage mechanics by themselves\n - generic programming knowledge or facts already obvious from the repo\n - rejected lures, discarded explanations, or implementation-only distractions\n - one-run validation findings, approval flow, queue state, DB resets, rebuilds, or\n runtime diagnostics by themselves\n \n\n \n - Read the trace in chunks until the full trace is covered. Do not start writing while unread trace lines remain.\n - Use the findings scratchpad for evidence from chunks you have already read. Notes are summarized back to you on later turns; do not record the same point again unless you learned something new.\n - Keep each durable theme and its supporting implementation evidence together. Do not record a rejected lure or discarded explanation as its own durable finding/theme.\n - If one apparent finding only applies, routes, or operationalizes another finding, keep them as one durable theme instead of separate durable themes.\n - If the trace needs more than one read, call `note_trace_findings` once per useful finding with theme, line, quote, and level before saving or revising context. Call it with no arguments when the full trace has no reusable signal.\n - If you read many chunks, prune older read results only after those chunks have already been captured in notes.\n - Search existing context before creating a durable record whenever the trace suggests an earlier record, duplicate risk, or \"same meaning vs new meaning\" judgment.\n - The injected existing-record manifest is only a shortlist. It is never enough evidence for a revision.\n - Fetch full records before any revision, and fetch each plausible target when several nearby records could match.\n - Revise only when a fetched record clearly carries the same meaning and needs repair. If the core claim differs, create a new record instead.\n - When the trace says an existing durable rule is correct but needs tightening, clarification, or a better why, fetch that record and update it rather than leaving the weaker wording unchanged.\n - Avoid cosmetic same-run revisions. Revise a same-run record only to fix a concrete durable-context error or prevent a duplicate.\n \n\n \n - The system may inject `CONTEXT:` messages showing approximate context pressure. At soft or hard pressure, prune old trace chunks after their findings are captured.\n - The system may inject `NOTES:` messages summarizing findings and trace coverage. Use them as a progress dashboard, not as a replacement for reading unread trace lines.\n - The findings scratchpad writes the dashboard for future turns; do not try to reread the dashboard with tools.\n \n\n \n - First separate findings into durable signal and implementation evidence.\n - Synthesize at the theme level. Usually one theme becomes one durable record.\n - Create multiple durable records only when the trace establishes multiple independent durable points, each with its own support.\n - Do not store one durable point as both a preference and a decision, a fact and a decision, or any other cross-kind duplicate.\n - A stable workflow preference is not also a decision unless the trace separately states an explicit project decision with rationale.\n - A dependency, setup, or environment truth without durable rationale is a fact, not also a decision.\n - A failure caused by the current run's temporary validation setup is not itself a\n durable environment truth. If the investigation reveals a stable requirement that\n future sessions must apply, save that requirement as the durable point and omit\n the temporary validation story.\n - Merge candidates when one only states how to apply the other in local operations, routing, or ownership.\n - If two candidates share the same core claim, merge them.\n - If the difference is only evidence framing, symptom wording, or local-vs-CI phrasing around the same durable fact, keep one record and fold the extra context into it.\n - If one candidate is only the direct application or routing consequence of another, keep it inside the stronger record.\n - Storage boundary plus per-component routing is one decision, not two. Keep the boundary as the record and fold the routing guidance into the same title/body.\n - If one candidate only says how different local components should apply the same project rule, keep that guidance inside the main record rather than creating a second durable record.\n - If one candidate only restates where local project components live or how an internal architecture is applied, keep it inside the stronger decision, fact, or constraint instead of creating a separate reference.\n - If the trace gives one durable rule plus examples of local noise or discarded details, store only the durable rule. The filtering guidance is evidence, not a second record.\n - Do not create a durable record whose whole point is that some local details from this trace were noise, low value, or should not be remembered. That is extraction guidance for this run, not project context.\n - Store durable records only when the lesson is likely reusable beyond this trace.\n - If a candidate is mainly about this trace's commands, files, or timeline, reject it.\n - Trace-local instructions about what to ignore in this session are not preferences unless they clearly express a broader standing workflow rule for future sessions.\n - If the trace explicitly says the rationale is unknown or says not to invent one, do not create a `decision`; use `fact` instead.\n - A stable setup, dependency, or environment requirement without a durable why is a `fact` even if it sounds like the current chosen setup.\n - The instruction \"do not invent a why\" is extraction guidance, not project context.\n - When the trace contains one durable dependency or setup fact plus instructions about how to classify that same evidence, store only the dependency or setup fact. Do not turn the classification guidance into a separate `preference`.\n - If the trace explicitly rejects a lure or distraction, do not carry that rejected idea into the durable record text unless the rejection itself is the durable lesson.\n - If this older trace conflicts with newer existing active records, do not create a new active durable record for the older claim. Preserve the historical session in the episode and let the newer active record remain current.\n - If a long noisy investigation resolves into one source-of-truth boundary, store only that boundary. Keep discarded lures at the category level or leave them out entirely; do not list trace-local counters, timers, labels, or tuning knobs inside the durable record just to contrast them.\n - When a discarded lure matters as evidence, keep it attached to the main durable theme as implementation context rather than storing it as a second durable theme.\n - If the episode summary contains clearly reusable decision, preference, constraint, fact, or reference, that point should usually also exist as its own durable record.\n - Do not leave a clearly reusable rule, invariant, dependency, source-of-truth pointer, or stable preference only inside the episode. The episode says what happened; the durable record stores what future sessions should reuse.\n - Durable records are additional project context, not a substitute for the session episode. Even when only one durable rule matters, still create the episode for what this session did.\n \n\n \n - Durable titles should name the lasting rule, decision, fact, constraint, preference, or reference directly.\n - Durable bodies should be compact, neutral, and standalone.\n - When a durable decision prohibits or routes a named interface, data path, dependency, provider, or boundary, preserve that named subject in the record instead of replacing it with a broader abstraction.\n - Prefer this shape for durable records:\n 1. the durable point\n 2. why it matters\n 3. how to apply it later\n - Do not write durable records as meeting minutes, patch logs, or cleanup commentary.\n - Do not preserve trace-local commands, negotiation phrasing, or \"this is not about X\" sentences in final record text.\n - Do not write a durable record whose body is mainly a warning that certain local details, cleanups, or implementation noise should be ignored.\n - Do not mention discarded implementation noise in durable record fields, including `consequences`. If details are non-durable, omit them entirely rather than saying they are non-durable.\n - When the durable lesson is a source-of-truth rule, write the authoritative rule directly. Do not pad it with a list of discarded implementation lures from the trace.\n - If a short contrast is still helpful, keep it abstract, such as \"not worker-local state\" or \"not ephemeral local state\". Do not enumerate examples in parentheses or comma-separated lists.\n - When updating an existing record, keep the durable meaning but rewrite it into canonical project-context language.\n - When writing from a historical trace, word durable records as source-backed context, not as freshly verified code inspection. Do not imply that a bug, missing capability, or release blocker is current unless the trace itself establishes that it remains unresolved as durable project context.\n - Facts from noisy failures must be rewritten into the underlying dependency, environment requirement, stakeholder driver, or operational fact.\n - If a fact still reads like stderr, an exception symptom, or copied command output, rewrite it again before writing.\n - When the durable lesson is an environment or dependency requirement, do not center the fact on the observed failure symptom. Name the requirement directly and mention the symptom only if it is needed as brief supporting context.\n - If brief supporting context is useful, lead with the requirement and keep the symptom generic. Never include exception class names, quoted error fragments, or copied failure strings in the durable fact.\n - If the candidate is mainly \"this validation run failed until we changed the setup\",\n it belongs in the archived episode. If the candidate names a reusable setup or\n runtime requirement discovered through that validation, keep the requirement and\n drop the failure narrative.\n - When no durable rationale exists, do not spend the fact body explaining that the rationale is absent. Just state the stable dependency, setup requirement, or operational truth directly.\n - Do not quote or paraphrase trace instructions about how to classify the evidence inside the final fact body. Final fact text should describe the underlying truth, not the extraction rule you followed.\n - References must answer both \"where should future sessions look?\" and \"when should they consult it?\"\n - Do not use `reference` for internal file mappings, local storage boundaries, or repo architecture notes when the durable lesson is the project rule itself rather than \"consult this external source next time.\"\n - Keep the episode concise: short title, short body, concise `user_intent`, `what_happened`, and `outcomes`.\n - If the session is mostly routine operational work with little future value and no durable record, create the episode with `status=\"archived\"`.\n \n\n \n \n Stable workflow guidance from the user. Save corrections and confirmed non-obvious working style that should carry into future sessions.\n Do not use `preference` for one-session extraction guidance such as \"that detail is just noise in this trace.\"\n \n \n A chosen approach or project rule that future work should follow and that is not obvious from code alone.\n If the trace does not support a durable why, do not use `decision`.\n \n \n A durable invariant, limit, or must/cannot rule that future work must respect.\n \n \n A durable project fact such as a dependency, environment requirement, stakeholder driver, or other non-obvious truth.\n Use `fact` for stable setup or dependency truths when the trace explicitly says not to invent decision rationale.\n \n \n A pointer to an external dashboard, document, ticket system, or other source of truth outside the repo.\n Use `reference` only when the enduring value is where to look later. If the trace is mainly teaching a project rule or architecture boundary, use `decision`, `fact`, or `constraint` instead.\n \n \n\n \n \n \n - assistant patches a bug and writes a tidy summary\n - user: \"The diff is enough. Don't end with a recap every time.\"\n - later turns continue with normal edits, tests, and review comments\n \n \n Create one preference record about keeping replies terse and not appending redundant change recaps.\n \n \n Store the file edit itself, or treat the correction as only a one-session scratch finding when it is clearly stable workflow guidance.\n \n \n\n \n \n - early turns discuss local refactors, temporary debug prints, and a flaky test\n - midway, several ideas are tried and discarded\n - late in the trace the user settles the architecture: durable project context lives in one store; hot runtime/session state lives in another\n - the follow-on routing guidance is just how to apply that boundary\n \n \n Create the required episode for the session and one decision record for the storage boundary. Keep the routing guidance inside the same record instead of splitting it into a second record.\n \n \n Store the refactor noise, split one architectural choice into two near-duplicate records such as one decision for the boundary and a second local-use record for which component reads which store, or create a separate durable record whose only message is that the refactors and debug edits were noise.\n \n \n\n \n \n - the user makes one architectural choice, such as keeping durable context and hot operational state in separate stores\n - the trace also mentions variable renames, label tweaks, temporary debug prints, and similar low-value cleanups\n - the user explicitly says those local edits should not become durable context\n \n \n Create the required episode and one durable record for the architectural choice only. Treat the explicit \"those edits are just noise\" instruction as extraction guidance for this run, not as its own record.\n \n \n Create a second durable record whose message is that renames, label tweaks, or temporary debug code are non-durable, or let that noise-filtering instruction replace the required episode.\n \n \n\n \n \n - repeated failed commands and partial theories about why a media workflow is broken\n - some guesses are ruled out\n - the stable conclusion is operational: environments that run this workflow need a specific system dependency installed\n \n \n Create one fact record for the dependency requirement in clean operational language. Lead with the missing dependency or environment requirement, and if you mention the failure at all, keep it generic rather than naming the exact exception class or copied command output. Still create the required episode for this session.\n \n \n Store the raw exception text, center the record on the failure symptom, split one operational lesson into separate local-vs-CI facts, create a second durable record whose message is \"do not invent a rationale here,\" keep the command history or debugging timeline, or write only the fact and skip the episode.\n \n \n\n \n \n - the user states one stable dependency or setup truth\n - nearby turns add extraction guidance such as \"this is a fact, not a decision\" or \"do not invent a why beyond the dependency\"\n - no broader workflow rule for future sessions is established\n \n \n Create the required episode and one fact record for the stable dependency or setup truth only.\n \n \n Create a second durable preference whose whole point is how to classify this trace, or store the meta-instruction instead of the underlying dependency fact.\n \n \n\n \n \n - the trace says image-enabled workflows require a system dependency in the environment\n - the user also says not to invent policy rationale beyond that dependency fact\n \n \n Write a fact such as: \"Image-enabled workflows require libvips in the environment.\" Keep the body on the requirement and its effect.\n \n \n Write a fact body such as: \"Do not invent a policy reason here\" or \"No decision rationale was supplied.\" Those are meta comments about classification, not durable project context.\n \n \n\n \n \n - early chunks are noisy and keep circling local counters, timers, labels, and temporary tuning\n - the final chunk clarifies that those were distractions\n - the real durable lesson is a source-of-truth boundary: authoritative state must live in one persisted place that survives restart and failover\n \n \n Create one durable record for the source-of-truth boundary. Mention restart or failover if it explains why the boundary matters, but keep any contrast abstract, such as \"not worker-local state,\" rather than listing local counters or timers.\n \n \n Write a durable record that carries over the rejected lure by naming worker-local counters, attempt counts, backoff knobs, or other trace-local artifacts as a contrast list.\n \n \n\n \n \n - the assistant starts from a partial repo note\n - later the user clarifies that incident ownership and current status are tracked in an external dashboard or ticket system\n - future sessions should consult that external system when this class of issue appears\n \n \n Create one reference record that names the external source and when future sessions should consult it.\n \n \n Center the record on local files, or turn it into a warning slogan about what not to trust locally.\n \n \n\n \n \n - run formatter\n - fix a small lint complaint\n - rerun tests\n - confirm green\n - no new rule, dependency, preference, or durable fact emerges\n \n \n Create only an archived episode.\n \n \n Invent a durable record from the sequence of routine commands.\n \n \n\n \n \n - the trace points at an earlier record that sounds nearby\n - new evidence sharpens part of it, but you still need to decide whether the core claim stayed the same\n - there may be more than one plausible existing record\n \n \n Search first, fetch the plausible existing record, then either update it if the meaning matches or create a new record if the core claim is different. In both cases, still create the episode for this session.\n \n \n Update from a shortlist or search preview alone, force an update when the new claim is only adjacent, or skip the episode because you already changed a durable record.\n \n \n \n\n \n - End the run with the `final_result` tool.\n - Put the plain-text completion summary in `completion_summary`.\n - Before `final_result`, ensure the current session already has exactly one episode record.\n - If you have created durable records but no episode yet, stop and create the episode before `final_result`.\n - If the episode contains the only copy of a reusable rule, invariant, dependency, source-of-truth pointer, or stable preference, stop and create the corresponding durable record before `final_result`.\n - Do not end with free-form assistant text outside `final_result`.\n \n\n \n Do not turn filenames, storage mechanics, graph links, or evidence tables into the main record unless the durable rule is specifically about that boundary.\n \n\n \n - You are running as a BAML + LangGraph replica of the PydanticAI extract agent.\n - You cannot call tools directly in this BAML harness. Return exactly one next tool call as typed JSON.\n - Choose only an action whose tool name appears in Available tools.\n - If a tool name is not listed in Available tools, it does not exist for this turn.\n - Your first non-whitespace character must be `{`.\n - Never output `` tags, hidden reasoning text, markdown, or prose before or after the JSON object.\n - Return one JSON object only. Do not include markdown, self-corrections, multiple JSON blocks, or a plan that chains future actions.\n - Choose only the immediate next action. When the production prompt requires an intermediate step before saving or finalizing, take that step first.\n - If multiple actions are still required, return only the first missing action and omit all later actions.\n - For multi-chunk traces, if `note_trace_findings` is available and no finding checkpoint appears in prior actions, choose `note_trace_findings` before any `save_context`, `revise_context`, or `final_result`; use empty args when the fully read trace has no reusable signal.\n - Fill only the argument object for the selected action.\n - Treat `final_result` as the PydanticAI structured final output.\n - For `save_context kind=episode`, include `user_intent` and `what_happened`.\n - For `save_context kind=decision`, include `decision` and `why`.\n - For `prune_trace_reads`, use only earlier `read_trace` start_line values.\n - After the required episode exists, choose another `save_context` only when a clearly reusable durable signal remains outside the episode; do not save trace-local implementation fixes as durable facts.\n - Do not treat an episode record as a durable record. If a reusable decision, preference, constraint, fact, or reference remains only in trace or episode text, save that durable record before `final_result`.\n \n\n \n {{ runtime_dashboard }}\n \n\n {{ _.role(\"user\") }}\n Run instruction:\n {{ run_instruction }}\n\n Available tools:\n {{ tool_manifest }}\n\n Prior actions and observations:\n {{ scratchpad }}\n\n {{ ctx.output_format }}\n \"#\n}\n\ntest InitialStepReadsTrace {\n functions [DecideNextExtractStep]\n args {\n runtime_dashboard #\"\n CONTEXT: 0/200000 (0%) [normal]\n NOTES: 0 findings\n \"#\n run_instruction #\"\n Read the trace, write exactly one episode record, and write only the\n strongest durable records. This trace has 3 lines. Read all chunks before\n writing.\n \"#\n tool_manifest #\"\n - read_trace(start_line: int?, line_count: int?): Read the next numbered trace chunk.\n - save_context(kind: string, title: string, body: string): Save one context record.\n - final_result(completion_summary: string): Finish after exactly one episode exists.\n \"#\n scratchpad \"No prior actions.\"\n }\n @@assert({{ this.action == \"READ_TRACE\" }})\n}\n\ntest ContinueUnreadTraceBeforeWrite {\n functions [DecideNextExtractStep]\n args {\n runtime_dashboard #\"\n CONTEXT: 20000/200000 (10%) [normal]\n NOTES: 0 findings\n Trace reads: 1 chunk(s)\n Next unread offset: 100\n Pruned offsets: none\n \"#\n run_instruction #\"\n This trace has 220 lines. Read all chunks before writing.\n \"#\n tool_manifest #\"\n - read_trace(start_line: int?, line_count: int?): Read the next numbered trace chunk.\n - save_context(kind: string, title: string, body: string): Save one context record.\n - final_result(completion_summary: string): Finish after exactly one episode exists.\n \"#\n scratchpad #\"\n Action: read_trace\n Status: ok\n Observation:\n [220 lines, showing 1-100] — 120 more lines, call read_trace(start_line=101, line_count=100) for the next chunk\n \"#\n }\n @@assert({{ this.action == \"READ_TRACE\" }})\n}\n\ntest LongTraceNotesFindingBeforeWrite {\n functions [DecideNextExtractStep]\n args {\n runtime_dashboard #\"\n CONTEXT: 42000/200000 (21%) [normal]\n NOTES: 0 findings\n Trace reads: 3 chunk(s)\n Next unread offset: none\n Pruned offsets: none\n \"#\n run_instruction #\"\n This trace has 240 lines and is fully read. An explicit durable architecture decision appears at line 188.\n \"#\n tool_manifest #\"\n - note_trace_findings(theme: string?, line: int?, quote: string?, level: string?): Record one trace finding with line evidence.\n - save_context(kind: string, title: string, body: string): Save one context record.\n \"#\n scratchpad #\"\n Action: read_trace\n Status: ok\n Observation:\n 188 user: We decided that durable context stays in the DB store because runtime queues are separate and temporary.\n \"#\n }\n @@assert({{ this.action == \"NOTE_TRACE_FINDINGS\" }})\n @@assert({{ this.note_trace_findings.level == \"DECISION\" }})\n}\n\ntest LongTraceNoReusableSignalCheckpoint {\n functions [DecideNextExtractStep]\n args {\n runtime_dashboard #\"\n CONTEXT: 39000/200000 (20%) [normal]\n NOTES: 0 findings\n Trace reads: 2 chunk(s)\n Next unread offset: none\n Pruned offsets: none\n \"#\n run_instruction #\"\n This long trace is fully read after multiple chunks. No note_trace_findings checkpoint has been called yet. It only formats files and reruns tests; no reusable rule, fact, preference, constraint, decision, or reference appears.\n \"#\n tool_manifest #\"\n - note_trace_findings(theme: string?, line: int?, quote: string?, level: string?): Record one trace finding with line evidence, or call with no args for none.\n - save_context(kind: string, title: string, body: string): Save one context record.\n \"#\n scratchpad \"Action: read_trace\\nStatus: ok\\nObservation:\\nAll chunks covered routine formatter/test work.\"\n }\n @@assert({{ this.action == \"NOTE_TRACE_FINDINGS\" }})\n}\n\ntest PruneOlderTraceReadsUnderPressure {\n functions [DecideNextExtractStep]\n args {\n runtime_dashboard #\"\n CONTEXT: 130000/200000 (65%) [soft]\n NOTES: 2 findings (1 durable, 1 implementation) across 1 theme(s)\n Trace reads: 3 chunk(s)\n Next unread offset: none\n Pruned offsets: none\n \"#\n run_instruction \"The trace is fully read and findings from the first two chunks have been captured.\"\n tool_manifest #\"\n - prune_trace_reads(start_lines: int[]): Prune earlier read_trace results after findings are noted.\n - save_context(kind: string, title: string, body: string): Save one context record.\n \"#\n scratchpad #\"\n Action: read_trace\n Status: ok\n Observation:\n [300 lines, showing 1-100]\n\n Action: read_trace\n Status: ok\n Observation:\n [300 lines, showing 101-200]\n\n Action: read_trace\n Status: ok\n Observation:\n [300 lines, showing 201-300]\n\n Action: note_trace_findings\n Status: ok\n Observation:\n Noted 1 finding (total 2 so far).\n \"#\n }\n @@assert({{ this.action == \"PRUNE_TRACE_READS\" }})\n @@assert({{ this.prune_trace_reads.start_lines|length > 0 }})\n @@assert({{ this.prune_trace_reads.start_lines[0] == 1 }})\n}\n\ntest RoutineTraceSavesArchivedEpisode {\n functions [DecideNextExtractStep]\n args {\n runtime_dashboard #\"\n CONTEXT: 1000/200000 (1%) [normal]\n NOTES: 0 findings\n Trace reads: 1 chunk(s)\n Next unread offset: none\n Pruned offsets: none\n \"#\n run_instruction \"The short trace is fully read. It only ran formatting and confirmed tests passed.\"\n tool_manifest #\"\n - save_context(kind: string, title: string, body: string, status: string?): Save one context record.\n - final_result(completion_summary: string): Finish after exactly one episode exists.\n \"#\n scratchpad \"Action: read_trace\\nStatus: ok\\nObservation:\\nformatter ran; tests passed; no durable signal.\"\n }\n @@assert({{ this.action == \"SAVE_CONTEXT\" }})\n @@assert({{ this.save_context.kind == \"EPISODE\" }})\n @@assert({{ this.save_context.status == \"ARCHIVED\" }})\n @@assert({{ this.save_context.user_intent != null and this.save_context.user_intent|length > 0 }})\n @@assert({{ this.save_context.what_happened != null and this.save_context.what_happened|length > 0 }})\n}\n\ntest DurablePreferenceSavesPreference {\n functions [DecideNextExtractStep]\n args {\n runtime_dashboard \"CONTEXT: 1000/200000 (1%) [normal]\\nNOTES: 1 findings (1 durable, 0 implementation)\\nTrace reads: 1 chunk(s)\\nNext unread offset: none\"\n run_instruction \"The fully read trace includes a stable user correction: keep final replies short and skip redundant recaps. The current-session episode already exists; the durable preference remains unsaved.\"\n tool_manifest \"- save_context(kind: string, title: string, body: string): Save one context record.\"\n scratchpad \"Action: read_trace\\nStatus: ok\\nObservation:\\nuser: Keep final replies short and skip redundant recaps.\\n\\nAction: save_context\\nStatus: ok\\nObservation:\\n{\\\"ok\\\": true, \\\"result\\\": {\\\"record\\\": {\\\"kind\\\": \\\"episode\\\", \\\"source_session_id\\\": \\\"baml-test-session\\\"}}}\\n\\nAction: note_trace_findings\\nStatus: ok\\nObservation:\\nTheme: reply style. Quote: keep final replies short and skip redundant recaps.\"\n }\n @@assert({{ this.action == \"SAVE_CONTEXT\" }})\n @@assert({{ this.save_context.kind == \"PREFERENCE\" }})\n}\n\ntest DurableConstraintSavesConstraint {\n functions [DecideNextExtractStep]\n args {\n runtime_dashboard \"CONTEXT: 1000/200000 (1%) [normal]\\nNOTES: 1 findings (1 durable, 0 implementation)\\nTrace reads: 1 chunk(s)\\nNext unread offset: none\"\n run_instruction \"The fully read trace establishes a durable invariant: future extraction runs must not mutate unfetched context records. The current-session episode already exists; the durable constraint remains unsaved.\"\n tool_manifest \"- save_context(kind: string, title: string, body: string): Save one context record.\"\n scratchpad \"Action: save_context\\nStatus: ok\\nObservation:\\n{\\\"ok\\\": true, \\\"result\\\": {\\\"record\\\": {\\\"kind\\\": \\\"episode\\\", \\\"source_session_id\\\": \\\"baml-test-session\\\"}}}\\n\\nAction: note_trace_findings\\nStatus: ok\\nObservation:\\nTheme: mutation invariant. Quote: never revise or archive a context record before fetching it in the current run.\"\n }\n @@assert({{ this.action == \"SAVE_CONTEXT\" }})\n @@assert({{ this.save_context.kind == \"CONSTRAINT\" }})\n}\n\ntest DurableDecisionSavesDecisionWithWhy {\n functions [DecideNextExtractStep]\n args {\n runtime_dashboard \"CONTEXT: 1000/200000 (1%) [normal]\\nNOTES: 1 findings (1 durable, 0 implementation)\\nTrace reads: 1 chunk(s)\\nNext unread offset: none\"\n run_instruction \"The fully read trace establishes a durable architecture decision with rationale. The current-session episode already exists; the durable decision remains unsaved.\"\n tool_manifest \"- save_context(kind: string, title: string, body: string, decision: string?, why: string?): Save one context record.\"\n scratchpad \"Action: save_context\\nStatus: ok\\nObservation:\\n{\\\"ok\\\": true, \\\"result\\\": {\\\"record\\\": {\\\"kind\\\": \\\"episode\\\", \\\"source_session_id\\\": \\\"baml-test-session\\\"}}}\\n\\nAction: note_trace_findings\\nStatus: ok\\nObservation:\\nTheme: storage boundary. Quote: durable context is DB-backed so sessions can replay it; runtime state stays separate.\"\n }\n @@assert({{ this.action == \"SAVE_CONTEXT\" }})\n @@assert({{ this.save_context.kind == \"DECISION\" }})\n @@assert({{ this.save_context.decision|length > 0 }})\n @@assert({{ this.save_context.why|length > 0 }})\n}\n\ntest StableDependencySavesFact {\n functions [DecideNextExtractStep]\n args {\n runtime_dashboard \"CONTEXT: 1000/200000 (1%) [normal]\\nNOTES: 1 findings (1 durable, 0 implementation)\\nTrace reads: 1 chunk(s)\\nNext unread offset: none\"\n run_instruction \"The fully read trace establishes that image workflows require libvips. It gives no decision rationale. The current-session episode already exists; the durable dependency fact remains unsaved.\"\n tool_manifest \"- save_context(kind: string, title: string, body: string): Save one context record.\"\n scratchpad \"Action: save_context\\nStatus: ok\\nObservation:\\n{\\\"ok\\\": true, \\\"result\\\": {\\\"record\\\": {\\\"kind\\\": \\\"episode\\\", \\\"source_session_id\\\": \\\"baml-test-session\\\"}}}\\n\\nAction: note_trace_findings\\nStatus: ok\\nObservation:\\nTheme: dependency. Quote: image workflows require libvips in the environment.\"\n }\n @@assert({{ this.action == \"SAVE_CONTEXT\" }})\n @@assert({{ this.save_context.kind == \"FACT\" }})\n @@assert({{ not this.save_context.why }})\n}\n\ntest ExternalSourceSavesReference {\n functions [DecideNextExtractStep]\n args {\n runtime_dashboard \"CONTEXT: 1000/200000 (1%) [normal]\\nNOTES: 1 findings (1 durable, 0 implementation)\\nTrace reads: 1 chunk(s)\\nNext unread offset: none\"\n run_instruction \"The fully read trace says incident ownership and current status live in the Ops dashboard, which future sessions should consult for incidents. The current-session episode already exists; the durable reference remains unsaved.\"\n tool_manifest \"- save_context(kind: string, title: string, body: string): Save one context record.\"\n scratchpad \"Action: save_context\\nStatus: ok\\nObservation:\\n{\\\"ok\\\": true, \\\"result\\\": {\\\"record\\\": {\\\"kind\\\": \\\"episode\\\", \\\"source_session_id\\\": \\\"baml-test-session\\\"}}}\\n\\nAction: note_trace_findings\\nStatus: ok\\nObservation:\\nTheme: incident source. Quote: use the Ops dashboard for incident ownership and current status.\"\n }\n @@assert({{ this.action == \"SAVE_CONTEXT\" }})\n @@assert({{ this.save_context.kind == \"REFERENCE\" }})\n}\n\ntest DuplicateRiskSearchesContext {\n functions [DecideNextExtractStep]\n args {\n runtime_dashboard \"CONTEXT: 1000/200000 (1%) [normal]\\nNOTES: 1 findings (1 durable, 0 implementation)\\nTrace reads: 1 chunk(s)\\nNext unread offset: none\"\n run_instruction \"Relevant existing durable records are shown below; treat them as a shortlist only and fetch before revision.\\n\\nRelevant existing durable records:\\n- rec_storage | decision | DB context boundary | Durable context lives in the DB store.\"\n tool_manifest #\"\n - search_context(query: string, kind: string?, status: string?): Search saved context by meaning.\n - get_context(record_ids: string[]): Fetch saved context records by ID.\n - save_context(kind: string, title: string, body: string): Save one context record.\n \"#\n scratchpad \"Action: read_trace\\nStatus: ok\\nObservation:\\nThe new trace repeats the DB context boundary decision with slightly sharper wording.\"\n }\n @@assert({{ this.action == \"SEARCH_CONTEXT\" or this.action == \"GET_CONTEXT\" }})\n @@assert({{ this.action != \"SAVE_CONTEXT\" }})\n}\n\ntest SearchHitFetchesBeforeRevision {\n functions [DecideNextExtractStep]\n args {\n runtime_dashboard \"CONTEXT: 2000/200000 (1%) [normal]\\nNOTES: 1 findings (1 durable, 0 implementation)\\nTrace reads: 1 chunk(s)\\nNext unread offset: none\"\n run_instruction \"A search_context call found a nearby existing record. The current-session episode already exists. Fetch the full search hit before deciding whether to revise it.\"\n tool_manifest #\"\n - search_context(query: string): Search saved context by meaning.\n - get_context(record_ids: string[]): Fetch full context records by ID before any revision.\n - revise_context(record_id: string, reason: string, kind: string, title: string, body: string): Revise an existing context record.\n \"#\n scratchpad #\"\n Action: save_context\n Status: ok\n Observation:\n {\"ok\": true, \"result\": {\"record\": {\"kind\": \"episode\", \"source_session_id\": \"baml-test-session\"}}}\n\n Action: search_context\n Status: ok\n Observation:\n {\"count\": 1, \"hits\": [{\"record_id\": \"rec_storage\", \"kind\": \"decision\", \"title\": \"DB context boundary\", \"body_preview\": \"Durable context lives in the DB store.\"}]}\n \"#\n }\n @@assert({{ this.action == \"GET_CONTEXT\" }})\n}\n\ntest FetchedSameMeaningRevisesContext {\n functions [DecideNextExtractStep]\n args {\n runtime_dashboard \"CONTEXT: 2000/200000 (1%) [normal]\\nNOTES: 1 findings (1 durable, 0 implementation)\\nTrace reads: 1 chunk(s)\\nNext unread offset: none\"\n run_instruction \"The fetched record has the same core decision but needs the rationale tightened.\"\n tool_manifest #\"\n - revise_context(record_id: string, reason: string, kind: string, title: string, body: string): Revise an existing context record with a complete improved payload.\n - save_context(kind: string, title: string, body: string): Save one context record.\n \"#\n scratchpad #\"\n Action: get_context\n Status: ok\n Observation:\n {\"count\": 1, \"records\": [{\"record_id\": \"rec_storage\", \"kind\": \"decision\", \"title\": \"DB context boundary\", \"body\": \"Durable context lives in the DB store.\", \"decision\": \"Durable context lives in the DB store.\", \"why\": \"It must survive sessions.\"}]}\n \"#\n }\n @@assert({{ this.action == \"REVISE_CONTEXT\" }})\n}\n\ntest FinalBeforeEpisodeSavesEpisode {\n functions [DecideNextExtractStep]\n args {\n runtime_dashboard \"CONTEXT: 3000/200000 (2%) [normal]\\nNOTES: 1 findings (1 durable, 0 implementation)\\nTrace reads: 1 chunk(s)\\nNext unread offset: none\"\n run_instruction \"The trace is fully read. One durable fact has already been saved, but no current-session episode exists.\"\n tool_manifest #\"\n - save_context(kind: string, title: string, body: string): Save one context record.\n - final_result(completion_summary: string): Finish after exactly one episode exists.\n \"#\n scratchpad #\"\n Action: save_context\n Status: ok\n Observation:\n {\"ok\": true, \"result\": {\"record\": {\"kind\": \"fact\", \"source_session_id\": \"baml-test-session\"}}}\n \"#\n }\n @@assert({{ this.action == \"SAVE_CONTEXT\" }})\n @@assert({{ this.save_context.kind == \"EPISODE\" }})\n @@assert({{ this.save_context.user_intent != null and this.save_context.user_intent|length > 0 }})\n @@assert({{ this.save_context.what_happened != null and this.save_context.what_happened|length > 0 }})\n}\n\ntest FinalStepAfterEpisodeSave {\n functions [DecideNextExtractStep]\n args {\n runtime_dashboard #\"\n CONTEXT: 1000/200000 (1%) [normal]\n NOTES: 0 findings\n Trace reads: 1 chunk(s)\n Next unread offset: none\n Pruned offsets: none\n \"#\n run_instruction #\"\n Read the trace, write exactly one episode record, and write only the\n strongest durable records. The trace has already been fully read.\n \"#\n tool_manifest #\"\n - read_trace(start_line: int?, line_count: int?): Read the next numbered trace chunk.\n - save_context(kind: string, title: string, body: string): Save one context record.\n - final_result(completion_summary: string): Finish after exactly one episode exists.\n \"#\n scratchpad #\"\n Action: read_trace\n Status: ok\n Observation:\n [3 lines, trace coverage complete] All trace lines have already been read.\n\n Action: save_context\n Status: ok\n Observation:\n {\"ok\": true, \"result\": {\"record\": {\"kind\": \"episode\", \"source_session_id\": \"baml-test-session\"}}}\n \"#\n }\n @@assert({{ this.action == \"FINAL_RESULT\" }})\n}\n\ntest EpisodeWithReusableDecisionSavesDecisionBeforeFinal {\n functions [DecideNextExtractStep]\n args {\n runtime_dashboard #\"\n CONTEXT: 3000/200000 (2%) [normal]\n NOTES: 0 findings\n Trace reads: 1 chunk(s)\n Next unread offset: none\n Pruned offsets: none\n \"#\n run_instruction \"The trace is fully read. The current-session episode already exists, but a reusable PostgreSQL-over-MySQL project decision remains only in the trace and episode text. Save the durable decision before final_result.\"\n tool_manifest #\"\n - save_context(kind: string, title: string, body: string, decision: string?, why: string?): Save one context record.\n - final_result(completion_summary: string): Finish after exactly one episode exists.\n \"#\n scratchpad #\"\n Action: read_trace\n Status: ok\n Observation:\n user: Good. Also, we decided to use PostgreSQL instead of MySQL for this project.\n assistant: Understood. Decision: PostgreSQL over MySQL. I'll update the connection string and ORM configuration.\n\n Action: save_context\n Status: ok\n Observation:\n {\"ok\": true, \"result\": {\"record\": {\"kind\": \"episode\", \"title\": \"Database Configuration Update\", \"body\": \"The session switched the project's primary database backend from MySQL to PostgreSQL.\", \"source_session_id\": \"baml-test-session\"}}}\n \"#\n }\n @@assert({{ this.action == \"SAVE_CONTEXT\" }})\n @@assert({{ this.save_context.kind == \"DECISION\" }})\n @@assert({{ this.save_context.decision|length > 0 }})\n @@assert({{ this.save_context.why|length > 0 }})\n}\n\ntest ImplementationOnlyAfterEpisodeFinalizes {\n functions [DecideNextExtractStep]\n args {\n runtime_dashboard #\"\n CONTEXT: 3000/200000 (2%) [normal]\n NOTES: 0 findings\n Trace reads: 1 chunk(s)\n Next unread offset: none\n Pruned offsets: none\n \"#\n run_instruction \"The trace is fully read. A durable decision and the current-session episode already exist. The only remaining unsaved content is an implementation fix to a local timeout constant.\"\n tool_manifest #\"\n - save_context(kind: string, title: string, body: string): Save one context record.\n - final_result(completion_summary: string): Finish after exactly one episode exists.\n \"#\n scratchpad #\"\n Action: read_trace\n Status: ok\n Observation:\n The trace fixed a local timeout constant and also established one architecture decision.\n\n Action: save_context\n Status: ok\n Observation:\n {\"ok\": true, \"result\": {\"record\": {\"kind\": \"decision\", \"title\": \"Runtime state remains separate from durable context\"}}}\n\n Action: save_context\n Status: ok\n Observation:\n {\"ok\": true, \"result\": {\"record\": {\"kind\": \"episode\", \"source_session_id\": \"baml-test-session\"}}}\n \"#\n }\n @@assert({{ this.action == \"FINAL_RESULT\" }})\n}\n", + "extract_react.baml": "enum RecordKind {\n DECISION @alias(\"decision\") @description(\"A durable project decision with decision and why fields.\")\n PREFERENCE @alias(\"preference\") @description(\"A stable user or workflow preference.\")\n CONSTRAINT @alias(\"constraint\") @description(\"A durable invariant, limit, or must/cannot rule.\")\n FACT @alias(\"fact\") @description(\"A durable project fact or setup truth.\")\n REFERENCE @alias(\"reference\") @description(\"A pointer to an external source of truth.\")\n}\n\nenum RecordStatus {\n ACTIVE @alias(\"active\") @description(\"Current context that future sessions may reuse.\")\n ARCHIVED @alias(\"archived\") @description(\"Historical or routine context that should not be treated as active guidance.\")\n}\n\nenum FindingLevel {\n DECISION @alias(\"decision\") @description(\"Durable decision-level finding.\")\n PREFERENCE @alias(\"preference\") @description(\"Durable user or workflow preference finding.\")\n FEEDBACK @alias(\"feedback\") @description(\"Durable feedback-level finding.\")\n REFERENCE @alias(\"reference\") @description(\"Durable external-source finding.\")\n CONSTRAINT @alias(\"constraint\") @description(\"Durable constraint-level finding.\")\n FACT @alias(\"fact\") @description(\"Durable fact-level finding.\")\n IMPLEMENTATION @alias(\"implementation\") @description(\"Trace-local implementation evidence or discarded hypothesis.\")\n}\n\nclass TraceWindowFinding {\n theme string @description(\"Short stable theme for this finding.\")\n level FindingLevel @description(\"Use durable levels for reusable context, implementation for local/noisy evidence.\")\n line int? @description(\"1-based supporting line when the window gives one.\")\n quote string? @description(\"Short supporting quote from the current window.\")\n note string @description(\"Compact semantic finding. Avoid command logs and copied errors.\")\n}\n\nclass TraceWindowScan {\n episode_update string? @description(\"Compact update for the final episode summary. May be omitted when this window adds nothing.\")\n durable_findings TraceWindowFinding[] @description(\"Reusable decisions, preferences, constraints, facts, and references found in this window.\")\n implementation_findings TraceWindowFinding[] @description(\"Implementation evidence, discarded hypotheses, and local details useful only as support/noise.\")\n discarded_noise string[] @description(\"Short descriptions of noisy categories intentionally not saved.\")\n}\n\nclass EpisodeDraft {\n title string? @description(\"Optional short title for the current-session episode. Runtime derives one if omitted.\")\n body string? @description(\"Compact episode body. If omitted, runtime builds it from user_intent and what_happened.\")\n status RecordStatus? @description(\"Use archived for routine/no-durable sessions; active only when the episode itself remains useful.\")\n user_intent string? @description(\"What the user wanted in this source session. Runtime fills a generic fallback if omitted.\")\n what_happened string? @description(\"What the session actually did. Runtime fills a generic fallback if omitted.\")\n outcomes string? @description(\"Optional concise outcome.\")\n}\n\nclass DurableRecordDraft {\n kind RecordKind @description(\"Durable record kind.\")\n title string @description(\"Short standalone durable title.\")\n body string @description(\"Compact standalone durable body.\")\n status RecordStatus? @description(\"Usually active for reusable durable records.\")\n valid_from string?\n valid_until string?\n decision string?\n why string?\n alternatives string?\n consequences string?\n}\n\nclass SynthesizedExtraction {\n episode EpisodeDraft @description(\"Exactly one current-session episode record draft.\")\n durable_records DurableRecordDraft[] @description(\"Zero or more durable records.\")\n completion_summary string? @description(\"Brief summary of extraction work for final_result/reporting.\")\n}\n\nfunction ScanTraceWindow(\n run_instruction: string,\n prior_episode_summary: string,\n prior_findings_summary: string,\n trace_window: string\n) -> TraceWindowScan {\n client MiniMaxM27\n prompt #\"\n {{ _.role(\"system\") }}\n You scan one window from a coding-agent trace for Lerim extraction.\n Return only structured output. Do not save records and do not plan future tool calls.\n Do not include tags, hidden reasoning, markdown, or prose.\n The top-level output must include episode_update, durable_findings, implementation_findings, and discarded_noise.\n Use an empty string or empty list when a field has no content.\n Every durable_findings and implementation_findings item must be an object with:\n - theme\n - level: one of decision, preference, feedback, reference, constraint, fact, or implementation\n - note\n - optional line\n - optional quote\n Never return finding items as plain strings.\n Never use confidence labels such as high, medium, or low as finding levels.\n\n Separate:\n - episode_update: what happened in this session window, for the final episode record.\n - durable_findings: reusable project/user context only.\n - implementation_findings: local evidence, command work, discarded hypotheses, or support.\n - discarded_noise: categories of content intentionally ignored.\n\n Durable signal means a decision, preference, constraint, fact, or reference likely useful beyond this trace.\n Implementation detail alone is not durable signal.\n A durable finding should help a future agent make a better decision in a new chat.\n Save what a future agent would be relieved to already know: stable user preferences, chosen policies, durable constraints, source-of-truth references, and stable project or product truths.\n If a detail only helps replay or debug this trace's terminal session, put it in implementation_findings or discarded_noise, not durable_findings.\n Raw metrics, comparison tables, config edits, API wiring, package installation, debug commands, file paths, and stack traces are implementation_findings unless the user turns them into a reusable policy, preference, constraint, fact, or source-of-truth reference.\n When the same topic has both user-level guidance and technical evidence, the user-level guidance is the durable finding; the technical evidence is implementation support.\n For technical blockers, keep durable_findings at the capability, status, and implication level only. Put patch recipes, flags, functions, commands, exact metrics, and local debug steps in implementation_findings.\n When a window mixes a durable point with local evidence, put only the reusable point in durable_findings and put the supporting local evidence in implementation_findings.\n Use semantic meaning, not exact wording. Do not copy long code, logs, stack traces, or command transcripts.\n If the window has no durable signal, return an empty durable_findings list.\n If the window adds nothing meaningful to the episode, episode_update may be an empty string.\n\n {{ _.role(\"user\") }}\n RUN INSTRUCTION:\n {{ run_instruction }}\n\n PRIOR EPISODE SUMMARY:\n {{ prior_episode_summary }}\n\n PRIOR FINDINGS SUMMARY:\n {{ prior_findings_summary }}\n\n TRACE WINDOW:\n {{ trace_window }}\n\n {{ ctx.output_format }}\n \"#\n}\n\nfunction SynthesizeExtractRecords(\n run_instruction: string,\n episode_summary: string,\n durable_findings_summary: string,\n existing_record_manifest: string\n) -> SynthesizedExtraction {\n client MiniMaxM27\n prompt #\"\n {{ _.role(\"system\") }}\n You synthesize final Lerim context records from scanned trace findings.\n Return only structured output.\n Do not include tags, hidden reasoning, markdown, or prose.\n The top-level output must include episode, durable_records, and completion_summary.\n Use an empty durable_records list when there is no durable signal.\n Every durable_records item must be an object with kind, title, and body.\n Optional durable record fields are status, valid_from, valid_until, decision, why, alternatives, and consequences.\n Never return durable_records items as plain strings or as generic type/record objects.\n\n Create exactly one episode record for the current session.\n Create durable records only for reusable decisions, preferences, constraints, facts, and references.\n The episode says what the session did. Durable records say what future sessions should reuse.\n Before creating each durable record, ask: would this change what a future agent does in a new chat?\n Prefer the small set of memories a future agent would be relieved to already know over exhaustive coverage of what happened.\n Keep durable records compact, standalone, and deduplicated.\n Do not create durable records for command logs, patch steps, generic programming knowledge, temporary diagnostics, or local implementation chatter.\n If there is no reusable signal, durable_records must be empty and the episode should normally be archived.\n Decision records must include decision and why when the rationale is present; use fact when there is no durable why.\n Episode records should include user_intent and what_happened when available; the runtime can derive missing episode fields.\n Treat the trace as historical evidence, not live verification of the current repo.\n\n Existing records, if listed, are only duplicate-risk context. Prefer skipping near-duplicates over creating duplicate durable records.\n\n Quality bar:\n - Store the reusable rule, decision, invariant, dependency, preference, or external pointer, not the story of the session.\n - One durable record should hold one durable point.\n - Direct consequences and application guidance usually stay inside that same record.\n - Create the minimum number of durable records that preserves distinct durable meanings. Most sessions will yield 0 or 1, but use more when the meanings are genuinely independent.\n - Classify each durable point into one canonical kind. Prefer the most specific supported kind, and do not duplicate the same point across multiple durable kinds.\n - Duplicates are worse than gaps. Skip uncertain candidates rather than spraying near-duplicates.\n - Never create a second durable record in the same run for the same core claim.\n - constraint and reference are first-class durable record kinds, not fallback categories.\n\n What not to save:\n - patch logs, command sequences, retries, timelines, or meeting-style recaps\n - code structure, file paths, git history, or storage mechanics by themselves\n - generic programming knowledge or facts already obvious from the repo\n - rejected lures, discarded explanations, or implementation-only distractions\n - one-run validation findings, approval flow, queue state, DB resets, rebuilds, or runtime diagnostics by themselves\n\n Selection rules:\n - First separate findings into durable signal and implementation evidence.\n - Prefer user-level guidance, chosen operating policy, and stable project truth over the technical evidence that revealed them.\n - Synthesize at the theme level. Usually one theme becomes one durable record.\n - Create multiple durable records only when the trace establishes multiple independent durable points, each with its own support.\n - Do not store one durable point as both a preference and a decision, a fact and a decision, or any other cross-kind duplicate.\n - A stable workflow preference is not also a decision unless the trace separately states an explicit project decision with rationale.\n - A dependency, setup, or environment truth without durable rationale is a fact, not also a decision.\n - A failure caused by the current run's temporary validation setup is not itself a durable environment truth. If the investigation reveals a stable requirement that future sessions must apply, save that requirement as the durable point and omit the temporary validation story.\n - Merge candidates when one only states how to apply the other in local operations, routing, or ownership.\n - If two candidates share the same core claim, merge them.\n - If the difference is only evidence framing, symptom wording, or local-vs-CI phrasing around the same durable fact, keep one record and fold the extra context into it.\n - If one candidate is only the direct application or routing consequence of another, keep it inside the stronger record.\n - If one candidate only says how different local components should apply the same project rule, keep that guidance inside the main record rather than creating a second durable record.\n - If one candidate only restates where local project components live or how an internal architecture is applied, keep it inside the stronger decision, fact, or constraint instead of creating a separate reference.\n - If the trace gives one durable rule plus examples of local noise or discarded details, store only the durable rule.\n - If a technical blocker is reusable, record only the capability affected, status or source of truth, and implication for future work.\n - Do not create a durable record whose whole point is that some local details from this trace were noise, low value, or should not be remembered.\n - Store durable records only when the lesson is likely reusable beyond this trace.\n - If a candidate is mainly about this trace's commands, files, or timeline, reject it.\n - Trace-local instructions about what to ignore in this session are not preferences unless they clearly express a broader standing workflow rule for future sessions.\n - If the trace explicitly says the rationale is unknown or says not to invent one, do not create a decision; use fact instead.\n - A stable setup, dependency, or environment requirement without a durable why is a fact even if it sounds like the current chosen setup.\n - The instruction \"do not invent a why\" is extraction guidance, not project context.\n - If the trace explicitly rejects a lure or distraction, do not carry that rejected idea into the durable record text unless the rejection itself is the durable lesson.\n - If a long noisy investigation resolves into one source-of-truth boundary, store only that boundary. Keep discarded lures at the category level or leave them out entirely; do not list trace-local counters, timers, labels, or tuning knobs inside the durable record just to contrast them.\n - When a discarded lure matters as evidence, keep it attached to the main durable theme as implementation context rather than storing it as a second durable theme.\n - If the episode summary contains clearly reusable decision, preference, constraint, fact, or reference, that point should usually also exist as its own durable record.\n - Do not leave a clearly reusable rule, invariant, dependency, source-of-truth pointer, or stable preference only inside the episode.\n - Durable records are additional project context, not a substitute for the session episode.\n - Most traces should produce only a few durable records; create more only when each one would independently change future behavior.\n\n Writing rules:\n - Durable titles should name the lasting rule, decision, fact, constraint, preference, or reference directly.\n - Durable bodies should be compact, neutral, and standalone.\n - Prefer this shape for durable records: the durable point, why it matters, and how to apply it later.\n - Do not write durable records as meeting minutes, patch logs, or cleanup commentary.\n - Do not preserve trace-local commands, negotiation phrasing, or \"this is not about X\" sentences in final record text.\n - Do not mention discarded implementation noise in durable record fields, including consequences.\n - Do not include patch recipes, flags, function names, exact metrics, or local debug steps in durable record text.\n - Do not write a durable record whose body is mainly a warning that certain local details, cleanups, or implementation noise should be ignored.\n - When the durable lesson is a source-of-truth rule, write the authoritative rule directly.\n - If a short contrast is still helpful, keep it abstract, such as \"not worker-local state\" or \"not ephemeral local state\". Do not enumerate examples in parentheses or comma-separated lists.\n - When writing from a historical trace, word durable records as source-backed context, not as freshly verified code inspection.\n - Facts from noisy failures must be rewritten into the underlying dependency, environment requirement, stakeholder driver, or operational fact.\n - If a fact still reads like stderr, an exception symptom, or copied command output, rewrite it again before writing.\n - When the durable lesson is an environment or dependency requirement, do not center the fact on the observed failure symptom. Name the requirement directly and mention the symptom only if it is needed as brief supporting context.\n - If brief supporting context is useful, lead with the requirement and keep the symptom generic. Never include exception class names, quoted error fragments, or copied failure strings in the durable fact.\n - If the candidate is mainly \"this validation run failed until we changed the setup\", it belongs in the archived episode. If the candidate names a reusable setup or runtime requirement discovered through that validation, keep the requirement and drop the failure narrative.\n - When no durable rationale exists, do not spend the fact body explaining that the rationale is absent. Just state the stable dependency, setup requirement, or operational truth directly.\n - Do not quote or paraphrase trace instructions about how to classify the evidence inside the final fact body. Final fact text should describe the underlying truth, not the extraction rule you followed.\n - References must answer both \"where should future sessions look?\" and \"when should they consult it?\"\n - Do not use reference for internal file mappings, local storage boundaries, or repo architecture notes when the durable lesson is the project rule itself rather than \"consult this external source next time.\"\n - Keep the episode concise: short title, short body, concise user_intent, what_happened, and outcomes.\n\n Record types:\n - preference: Stable workflow guidance from the user. Save corrections and confirmed non-obvious working style that should carry into future sessions.\n - decision: A chosen approach or project rule that future work should follow and that is not obvious from code alone. If the trace does not support a durable why, do not use decision.\n - constraint: A durable invariant, limit, or must/cannot rule that future work must respect.\n - fact: A durable project fact such as a dependency, environment requirement, stakeholder driver, or other non-obvious truth.\n - reference: A pointer to an external dashboard, document, ticket system, or other source of truth outside the repo. Use reference only when the enduring value is where to look later.\n\n Few-shot quality examples:\n\n Example preference:\n - Trace signal: the user corrects the assistant after a small code fix and says not to append redundant recaps.\n - Good: create one preference record about keeping replies terse and avoiding redundant change recaps after small diffs.\n - Bad: store the file edit itself, or treat the correction as one-session scratch when it is stable workflow guidance.\n\n Example decision:\n - Trace signal: early turns discuss local refactors and flaky tests; late in the trace the user settles one architecture boundary, and follow-on routing guidance only applies that boundary.\n - Good: create the required episode and one decision record for the architecture boundary. Keep the routing guidance inside that record.\n - Bad: store refactor noise, split one architectural choice into near-duplicate records, or create a separate durable record saying the refactors were noise.\n\n Example fact:\n - Trace signal: repeated failed commands and partial theories eventually resolve to one stable operational requirement.\n - Good: create one fact record for the requirement in clean operational language. Lead with the dependency or environment requirement; mention the failure only as brief generic support if needed.\n - Bad: store raw errors, command history, rejected theories, exact exception text, or a separate record saying not to invent a why.\n\n Example late clarification:\n - Trace signal: early chunks circle local counters, timers, labels, and tuning; the final chunk clarifies those were distractions and the real durable lesson is a source-of-truth boundary.\n - Good: create one durable record for the source-of-truth boundary. Mention restart or failover only if it explains why the boundary matters.\n - Bad: write a durable record that carries over rejected local counters, attempt counts, backoff knobs, or other trace-local artifacts as a contrast list.\n\n Example reference:\n - Trace signal: the assistant starts from a partial repo note, then the user clarifies that ownership or status lives in an external dashboard or ticket system.\n - Good: create one reference record that names the external source and when future sessions should consult it.\n - Bad: center the record on local files, or turn it into a warning slogan about what not to trust locally.\n\n Example routine:\n - Trace signal: formatter, small lint fix, rerun tests, green result, no new rule or durable fact.\n - Good: create only an archived episode.\n - Bad: invent a durable record from the sequence of routine commands.\n\n {{ _.role(\"user\") }}\n RUN INSTRUCTION:\n {{ run_instruction }}\n\n EXISTING RECORD MANIFEST:\n {{ existing_record_manifest }}\n\n EPISODE SUMMARY:\n {{ episode_summary }}\n\n DURABLE FINDINGS:\n {{ durable_findings_summary }}\n\n {{ ctx.output_format }}\n \"#\n}\n", + "extract_react_tests.baml": "test ScanTraceWindowCapturesDurableDecision {\n functions [ScanTraceWindow]\n args {\n run_instruction \"Extract one episode and any durable project context from the trace.\"\n prior_episode_summary \"(none yet)\"\n prior_findings_summary \"(none yet)\"\n trace_window #\"\n [8 lines, window 1-8]\n 1\tuser: We should run extraction evals before changing extraction prompts.\n 2\tassistant: I will update the BAML harness and run the eval.\n 3\tuser: Keep trace-local command logs out of long-term context.\n 4\tassistant: Implemented the change and ran a smoke test.\n \"#\n }\n @@assert({{ this.durable_findings|length > 0 }})\n}\n\ntest ScanTraceWindowSeparatesDurableAndImplementation {\n functions [ScanTraceWindow]\n args {\n run_instruction \"Extract reusable context, not command history.\"\n prior_episode_summary \"(none yet)\"\n prior_findings_summary \"(none yet)\"\n trace_window #\"\n [7 lines, window 1-7]\n 1\tuser: The deployment checklist is the source of truth for release readiness.\n 2\tassistant: Ran pytest -q and fixed a local fixture path.\n 3\tassistant: Edited src/example.py and reran the lint command.\n 4\tuser: The command output is just evidence, not memory.\n \"#\n }\n @@assert({{ this.durable_findings|length > 0 }})\n @@assert({{ this.implementation_findings|length > 0 }})\n}\n\ntest ScanTraceWindowAllowsNoDurableSignal {\n functions [ScanTraceWindow]\n args {\n run_instruction \"Extract durable context only when the trace supports it.\"\n prior_episode_summary \"(none yet)\"\n prior_findings_summary \"(none yet)\"\n trace_window #\"\n [5 lines, window 1-5]\n 1\tuser: Please format this file.\n 2\tassistant: Ran the formatter.\n 3\tassistant: Fixed one lint complaint.\n 4\tassistant: Tests are green.\n \"#\n }\n @@assert({{ this.durable_findings|length == 0 }})\n}\n\ntest SynthesizeExtractRecordsCreatesEpisodeAndDurableRecord {\n functions [SynthesizeExtractRecords]\n args {\n run_instruction \"Create exactly one episode and durable records only for reusable project context.\"\n episode_summary \"- The session updated an extraction harness and ran a smoke eval.\"\n durable_findings_summary \"- preference: eval-first prompt changes: Run the extraction eval before changing extraction prompts. (line 1)\"\n existing_record_manifest \"(none)\"\n }\n @@assert({{ this.episode.user_intent|length > 0 }})\n @@assert({{ this.episode.what_happened|length > 0 }})\n @@assert({{ this.durable_records|length > 0 }})\n}\n\ntest SynthesizeExtractRecordsAllowsNoDurableSignal {\n functions [SynthesizeExtractRecords]\n args {\n run_instruction \"Create exactly one episode and no durable records when no reusable context exists.\"\n episode_summary \"- The session only formatted code, fixed one lint issue, and reran tests.\"\n durable_findings_summary \"(none)\"\n existing_record_manifest \"(none)\"\n }\n @@assert({{ this.episode.user_intent|length > 0 }})\n @@assert({{ this.episode.what_happened|length > 0 }})\n @@assert({{ this.durable_records|length == 0 }})\n}\n\ntest SynthesizeExtractRecordsDeduplicatesCoreClaim {\n functions [SynthesizeExtractRecords]\n args {\n run_instruction \"Create compact durable records and merge duplicate meanings.\"\n episode_summary \"- The session clarified a runtime state boundary after noisy implementation discussion.\"\n durable_findings_summary #\"\n - decision: persisted source of truth: Runtime status must live in one persisted store that survives restart.\n - decision: runtime status boundary: Use the same persisted status store as the source of truth after restart.\n \"#\n existing_record_manifest \"(none)\"\n }\n @@assert({{ this.episode.user_intent|length > 0 }})\n @@assert({{ this.durable_records|length == 1 }})\n}\n", "generators.baml": "generator target {\n output_type \"python/pydantic\"\n output_dir \"../\"\n default_client_mode \"sync\"\n version \"0.222.0\"\n}\n", + "models.baml": "retry_policy ExtractAgentRetry {\n max_retries 1\n strategy {\n type exponential_backoff\n delay_ms 500\n multiplier 2\n max_delay_ms 8000\n }\n}\n\nclient MiniMaxM27 {\n provider \"openai-generic\"\n retry_policy ExtractAgentRetry\n options {\n base_url \"https://api.minimax.io/v1\"\n api_key env.MINIMAX_API_KEY\n model \"MiniMax-M2.7\"\n temperature 0.01\n http {\n connect_timeout_ms 10000\n time_to_first_token_timeout_ms 120000\n idle_timeout_ms 30000\n request_timeout_ms 300000\n }\n }\n}\n\nclient OllamaLocal {\n provider \"openai-generic\"\n retry_policy ExtractAgentRetry\n options {\n base_url \"http://127.0.0.1:11434/v1\"\n api_key \"ollama\"\n model \"gemma4:e4b\"\n temperature 0.0\n http {\n connect_timeout_ms 10000\n time_to_first_token_timeout_ms 120000\n idle_timeout_ms 30000\n request_timeout_ms 300000\n }\n }\n}\n", } def get_baml_files(): diff --git a/baml_agents/baml_client/parser.py b/baml_agents/baml_client/parser.py index cb18394..9cb2f07 100644 --- a/baml_agents/baml_client/parser.py +++ b/baml_agents/baml_client/parser.py @@ -23,11 +23,17 @@ class LlmResponseParser: def __init__(self, options: DoNotUseDirectlyCallManager): self.__options = options - def DecideNextExtractStep( + def ScanTraceWindow( self, llm_response: str, baml_options: BamlCallOptions = {}, - ) -> types.ExtractAgentStep: - __result__ = self.__options.merge_options(baml_options).parse_response(function_name="DecideNextExtractStep", llm_response=llm_response, mode="request") - return typing.cast(types.ExtractAgentStep, __result__) + ) -> types.TraceWindowScan: + __result__ = self.__options.merge_options(baml_options).parse_response(function_name="ScanTraceWindow", llm_response=llm_response, mode="request") + return typing.cast(types.TraceWindowScan, __result__) + + def SynthesizeExtractRecords( + self, llm_response: str, baml_options: BamlCallOptions = {}, + ) -> types.SynthesizedExtraction: + __result__ = self.__options.merge_options(baml_options).parse_response(function_name="SynthesizeExtractRecords", llm_response=llm_response, mode="request") + return typing.cast(types.SynthesizedExtraction, __result__) @@ -37,10 +43,16 @@ class LlmStreamParser: def __init__(self, options: DoNotUseDirectlyCallManager): self.__options = options - def DecideNextExtractStep( + def ScanTraceWindow( + self, llm_response: str, baml_options: BamlCallOptions = {}, + ) -> stream_types.TraceWindowScan: + __result__ = self.__options.merge_options(baml_options).parse_response(function_name="ScanTraceWindow", llm_response=llm_response, mode="stream") + return typing.cast(stream_types.TraceWindowScan, __result__) + + def SynthesizeExtractRecords( self, llm_response: str, baml_options: BamlCallOptions = {}, - ) -> stream_types.ExtractAgentStep: - __result__ = self.__options.merge_options(baml_options).parse_response(function_name="DecideNextExtractStep", llm_response=llm_response, mode="stream") - return typing.cast(stream_types.ExtractAgentStep, __result__) + ) -> stream_types.SynthesizedExtraction: + __result__ = self.__options.merge_options(baml_options).parse_response(function_name="SynthesizeExtractRecords", llm_response=llm_response, mode="stream") + return typing.cast(stream_types.SynthesizedExtraction, __result__) \ No newline at end of file diff --git a/baml_agents/baml_client/stream_types.py b/baml_agents/baml_client/stream_types.py index 10b3eea..0f2d9cf 100644 --- a/baml_agents/baml_client/stream_types.py +++ b/baml_agents/baml_client/stream_types.py @@ -23,81 +23,46 @@ class StreamState(BaseModel, typing.Generic[StreamStateValueT]): value: StreamStateValueT state: typing_extensions.Literal["Pending", "Incomplete", "Complete"] # ######################################################################### -# Generated classes (9) +# Generated classes (5) # ######################################################################### -class ExtractAgentStep(BaseModel): - action: typing.Optional[types.ExtractAction] = None - rationale: typing.Optional[str] = Field(default=None, description='One short sentence explaining why this is the next action. Do not include hidden chain-of-thought.') - read_trace: typing.Optional["ReadTraceArgs"] = None - search_context: typing.Optional["SearchContextArgs"] = None - get_context: typing.Optional["GetContextArgs"] = None - save_context: typing.Optional["SaveContextArgs"] = None - revise_context: typing.Optional["ReviseContextArgs"] = None - note_trace_findings: typing.Optional["NoteTraceFindingsArgs"] = None - prune_trace_reads: typing.Optional["PruneTraceReadsArgs"] = None - final_result: typing.Optional["FinalResultArgs"] = None - -class FinalResultArgs(BaseModel): - completion_summary: typing.Optional[str] = None - -class GetContextArgs(BaseModel): - record_ids: typing.List[str] = Field(description='Record IDs returned by search_context or list_context. Fetch before any revision.') - include_versions: typing.Optional[bool] = Field(default=None, description='Whether to include prior versions.') - detail: typing.Optional[typing.Union[typing_extensions.Literal['concise'], typing_extensions.Literal['detailed']]] = Field(default=None, description='Use concise unless full version history is needed.') - -class NoteTraceFindingsArgs(BaseModel): - theme: typing.Optional[str] = Field(default=None, description='Short theme label for this finding. Leave empty only for the no-findings checkpoint.') - line: typing.Optional[int] = Field(default=None, description='1-based trace line with supporting evidence. Leave empty only for the no-findings checkpoint.') - quote: typing.Optional[str] = Field(default=None, description='Short evidence quote from the trace. Leave empty only for the no-findings checkpoint.') - level: typing.Optional[types.FindingLevel] = Field(default=None, description='Durable levels only for reusable context; implementation for trace-local evidence.') - -class PruneTraceReadsArgs(BaseModel): - start_lines: typing.List[int] = Field(description='1-based start_line values from earlier read_trace calls, such as 1, 101, or 201. Never use context token counts.') - -class ReadTraceArgs(BaseModel): - start_line: typing.Optional[int] = None - line_count: typing.Optional[int] = None - -class ReviseContextArgs(BaseModel): - record_id: typing.Optional[str] = None - reason: typing.Optional[str] = Field(default=None, description='Short reason for the revision.') - kind: typing.Optional[types.RecordKind] = Field(default=None, description='Must match the fetched record kind; revise_context cannot change kind.') - title: typing.Optional[str] = Field(default=None, description='Complete improved title.') - body: typing.Optional[str] = Field(default=None, description='Complete improved body.') - status: typing.Optional[types.RecordStatus] = Field(default=None, description='Lifecycle status to keep or set.') +class DurableRecordDraft(BaseModel): + kind: typing.Optional[types.RecordKind] = Field(default=None, description='Durable record kind.') + title: typing.Optional[str] = Field(default=None, description='Short standalone durable title.') + body: typing.Optional[str] = Field(default=None, description='Compact standalone durable body.') + status: typing.Optional[types.RecordStatus] = Field(default=None, description='Usually active for reusable durable records.') valid_from: typing.Optional[str] = None valid_until: typing.Optional[str] = None decision: typing.Optional[str] = None why: typing.Optional[str] = None alternatives: typing.Optional[str] = None consequences: typing.Optional[str] = None - user_intent: typing.Optional[str] = None - what_happened: typing.Optional[str] = None - outcomes: typing.Optional[str] = None -class SaveContextArgs(BaseModel): - kind: typing.Optional[types.RecordKind] = Field(default=None, description='Use episode for the mandatory session summary; use durable kinds only for reusable project context.') - title: typing.Optional[str] = Field(default=None, description='Short standalone title for the lasting point or episode.') - body: typing.Optional[str] = Field(default=None, description='Compact standalone body. Do not include trace-local command logs or copied errors.') - status: typing.Optional[types.RecordStatus] = Field(default=None, description='Use archived for routine episodes with no durable signal; active for reusable durable records.') - valid_from: typing.Optional[str] = Field(default=None, description='Optional validity start timestamp.') - valid_until: typing.Optional[str] = Field(default=None, description='Optional validity end timestamp.') - decision: typing.Optional[str] = Field(default=None, description='Required only for kind=decision; omit for facts, preferences, constraints, references, and episodes.') - why: typing.Optional[str] = Field(default=None, description='Required only for kind=decision when rationale exists; omit when the trace says no durable rationale was supplied.') - alternatives: typing.Optional[str] = Field(default=None, description='Optional decision alternatives. Omit for non-decisions.') - consequences: typing.Optional[str] = Field(default=None, description='Optional direct application guidance for the same durable point.') - user_intent: typing.Optional[str] = Field(default=None, description='Required when kind=episode. Concise statement of what the user wanted in this source session.') - what_happened: typing.Optional[str] = Field(default=None, description='Required when kind=episode. Concise statement of what the session actually did.') - outcomes: typing.Optional[str] = Field(default=None, description='Optional concise episode outcome.') +class EpisodeDraft(BaseModel): + title: typing.Optional[str] = Field(default=None, description='Optional short title for the current-session episode. Runtime derives one if omitted.') + body: typing.Optional[str] = Field(default=None, description='Compact episode body. If omitted, runtime builds it from user_intent and what_happened.') + status: typing.Optional[types.RecordStatus] = Field(default=None, description='Use archived for routine/no-durable sessions; active only when the episode itself remains useful.') + user_intent: typing.Optional[str] = Field(default=None, description='What the user wanted in this source session. Runtime fills a generic fallback if omitted.') + what_happened: typing.Optional[str] = Field(default=None, description='What the session actually did. Runtime fills a generic fallback if omitted.') + outcomes: typing.Optional[str] = Field(default=None, description='Optional concise outcome.') + +class SynthesizedExtraction(BaseModel): + episode: typing.Optional["EpisodeDraft"] = Field(default=None, description='Exactly one current-session episode record draft.') + durable_records: typing.List["DurableRecordDraft"] = Field(description='Zero or more durable records.') + completion_summary: typing.Optional[str] = Field(default=None, description='Brief summary of extraction work for final_result/reporting.') + +class TraceWindowFinding(BaseModel): + theme: typing.Optional[str] = Field(default=None, description='Short stable theme for this finding.') + level: typing.Optional[types.FindingLevel] = Field(default=None, description='Use durable levels for reusable context, implementation for local/noisy evidence.') + line: typing.Optional[int] = Field(default=None, description='1-based supporting line when the window gives one.') + quote: typing.Optional[str] = Field(default=None, description='Short supporting quote from the current window.') + note: typing.Optional[str] = Field(default=None, description='Compact semantic finding. Avoid command logs and copied errors.') -class SearchContextArgs(BaseModel): - query: typing.Optional[str] = Field(default=None, description='Natural-language query for the durable meaning, not \'*\' or an empty browse request.') - kind: typing.Optional[types.RecordKind] = Field(default=None, description='Optional filter. Use only when the desired record kind is known.') - status: typing.Optional[types.RecordStatus] = Field(default=None, description='Optional lifecycle filter.') - valid_at: typing.Optional[str] = Field(default=None, description='Optional timestamp for historical lookup.') - include_archived: typing.Optional[bool] = Field(default=None, description='Whether archived records should be included.') - limit: typing.Optional[int] = Field(default=None, description='Maximum hits to return.') +class TraceWindowScan(BaseModel): + episode_update: typing.Optional[str] = Field(default=None, description='Compact update for the final episode summary. May be omitted when this window adds nothing.') + durable_findings: typing.List["TraceWindowFinding"] = Field(description='Reusable decisions, preferences, constraints, facts, and references found in this window.') + implementation_findings: typing.List["TraceWindowFinding"] = Field(description='Implementation evidence, discarded hypotheses, and local details useful only as support/noise.') + discarded_noise: typing.List[str] = Field(description='Short descriptions of noisy categories intentionally not saved.') # ######################################################################### # Generated type aliases (0) diff --git a/baml_agents/baml_client/sync_client.py b/baml_agents/baml_client/sync_client.py index 90230d3..e5a104b 100644 --- a/baml_agents/baml_client/sync_client.py +++ b/baml_agents/baml_client/sync_client.py @@ -94,20 +94,34 @@ def parse(self): def parse_stream(self): return self.__llm_stream_parser - def DecideNextExtractStep(self, runtime_dashboard: str,run_instruction: str,tool_manifest: str,scratchpad: str, + def ScanTraceWindow(self, run_instruction: str,prior_episode_summary: str,prior_findings_summary: str,trace_window: str, baml_options: BamlCallOptions = {}, - ) -> types.ExtractAgentStep: + ) -> types.TraceWindowScan: # Check if on_tick is provided if 'on_tick' in baml_options: - __stream__ = self.stream.DecideNextExtractStep(runtime_dashboard=runtime_dashboard,run_instruction=run_instruction,tool_manifest=tool_manifest,scratchpad=scratchpad, + __stream__ = self.stream.ScanTraceWindow(run_instruction=run_instruction,prior_episode_summary=prior_episode_summary,prior_findings_summary=prior_findings_summary,trace_window=trace_window, baml_options=baml_options) return __stream__.get_final_response() else: # Original non-streaming code - __result__ = self.__options.merge_options(baml_options).call_function_sync(function_name="DecideNextExtractStep", args={ - "runtime_dashboard": runtime_dashboard,"run_instruction": run_instruction,"tool_manifest": tool_manifest,"scratchpad": scratchpad, + __result__ = self.__options.merge_options(baml_options).call_function_sync(function_name="ScanTraceWindow", args={ + "run_instruction": run_instruction,"prior_episode_summary": prior_episode_summary,"prior_findings_summary": prior_findings_summary,"trace_window": trace_window, }) - return typing.cast(types.ExtractAgentStep, __result__.cast_to(types, types, stream_types, False, __runtime__)) + return typing.cast(types.TraceWindowScan, __result__.cast_to(types, types, stream_types, False, __runtime__)) + def SynthesizeExtractRecords(self, run_instruction: str,episode_summary: str,durable_findings_summary: str,existing_record_manifest: str, + baml_options: BamlCallOptions = {}, + ) -> types.SynthesizedExtraction: + # Check if on_tick is provided + if 'on_tick' in baml_options: + __stream__ = self.stream.SynthesizeExtractRecords(run_instruction=run_instruction,episode_summary=episode_summary,durable_findings_summary=durable_findings_summary,existing_record_manifest=existing_record_manifest, + baml_options=baml_options) + return __stream__.get_final_response() + else: + # Original non-streaming code + __result__ = self.__options.merge_options(baml_options).call_function_sync(function_name="SynthesizeExtractRecords", args={ + "run_instruction": run_instruction,"episode_summary": episode_summary,"durable_findings_summary": durable_findings_summary,"existing_record_manifest": existing_record_manifest, + }) + return typing.cast(types.SynthesizedExtraction, __result__.cast_to(types, types, stream_types, False, __runtime__)) @@ -117,16 +131,28 @@ class BamlStreamClient: def __init__(self, options: DoNotUseDirectlyCallManager): self.__options = options - def DecideNextExtractStep(self, runtime_dashboard: str,run_instruction: str,tool_manifest: str,scratchpad: str, + def ScanTraceWindow(self, run_instruction: str,prior_episode_summary: str,prior_findings_summary: str,trace_window: str, baml_options: BamlCallOptions = {}, - ) -> baml_py.BamlSyncStream[stream_types.ExtractAgentStep, types.ExtractAgentStep]: - __ctx__, __result__ = self.__options.merge_options(baml_options).create_sync_stream(function_name="DecideNextExtractStep", args={ - "runtime_dashboard": runtime_dashboard,"run_instruction": run_instruction,"tool_manifest": tool_manifest,"scratchpad": scratchpad, + ) -> baml_py.BamlSyncStream[stream_types.TraceWindowScan, types.TraceWindowScan]: + __ctx__, __result__ = self.__options.merge_options(baml_options).create_sync_stream(function_name="ScanTraceWindow", args={ + "run_instruction": run_instruction,"prior_episode_summary": prior_episode_summary,"prior_findings_summary": prior_findings_summary,"trace_window": trace_window, }) - return baml_py.BamlSyncStream[stream_types.ExtractAgentStep, types.ExtractAgentStep]( + return baml_py.BamlSyncStream[stream_types.TraceWindowScan, types.TraceWindowScan]( __result__, - lambda x: typing.cast(stream_types.ExtractAgentStep, x.cast_to(types, types, stream_types, True, __runtime__)), - lambda x: typing.cast(types.ExtractAgentStep, x.cast_to(types, types, stream_types, False, __runtime__)), + lambda x: typing.cast(stream_types.TraceWindowScan, x.cast_to(types, types, stream_types, True, __runtime__)), + lambda x: typing.cast(types.TraceWindowScan, x.cast_to(types, types, stream_types, False, __runtime__)), + __ctx__, + ) + def SynthesizeExtractRecords(self, run_instruction: str,episode_summary: str,durable_findings_summary: str,existing_record_manifest: str, + baml_options: BamlCallOptions = {}, + ) -> baml_py.BamlSyncStream[stream_types.SynthesizedExtraction, types.SynthesizedExtraction]: + __ctx__, __result__ = self.__options.merge_options(baml_options).create_sync_stream(function_name="SynthesizeExtractRecords", args={ + "run_instruction": run_instruction,"episode_summary": episode_summary,"durable_findings_summary": durable_findings_summary,"existing_record_manifest": existing_record_manifest, + }) + return baml_py.BamlSyncStream[stream_types.SynthesizedExtraction, types.SynthesizedExtraction]( + __result__, + lambda x: typing.cast(stream_types.SynthesizedExtraction, x.cast_to(types, types, stream_types, True, __runtime__)), + lambda x: typing.cast(types.SynthesizedExtraction, x.cast_to(types, types, stream_types, False, __runtime__)), __ctx__, ) @@ -137,11 +163,18 @@ class BamlHttpRequestClient: def __init__(self, options: DoNotUseDirectlyCallManager): self.__options = options - def DecideNextExtractStep(self, runtime_dashboard: str,run_instruction: str,tool_manifest: str,scratchpad: str, + def ScanTraceWindow(self, run_instruction: str,prior_episode_summary: str,prior_findings_summary: str,trace_window: str, + baml_options: BamlCallOptions = {}, + ) -> baml_py.baml_py.HTTPRequest: + __result__ = self.__options.merge_options(baml_options).create_http_request_sync(function_name="ScanTraceWindow", args={ + "run_instruction": run_instruction,"prior_episode_summary": prior_episode_summary,"prior_findings_summary": prior_findings_summary,"trace_window": trace_window, + }, mode="request") + return __result__ + def SynthesizeExtractRecords(self, run_instruction: str,episode_summary: str,durable_findings_summary: str,existing_record_manifest: str, baml_options: BamlCallOptions = {}, ) -> baml_py.baml_py.HTTPRequest: - __result__ = self.__options.merge_options(baml_options).create_http_request_sync(function_name="DecideNextExtractStep", args={ - "runtime_dashboard": runtime_dashboard,"run_instruction": run_instruction,"tool_manifest": tool_manifest,"scratchpad": scratchpad, + __result__ = self.__options.merge_options(baml_options).create_http_request_sync(function_name="SynthesizeExtractRecords", args={ + "run_instruction": run_instruction,"episode_summary": episode_summary,"durable_findings_summary": durable_findings_summary,"existing_record_manifest": existing_record_manifest, }, mode="request") return __result__ @@ -152,11 +185,18 @@ class BamlHttpStreamRequestClient: def __init__(self, options: DoNotUseDirectlyCallManager): self.__options = options - def DecideNextExtractStep(self, runtime_dashboard: str,run_instruction: str,tool_manifest: str,scratchpad: str, + def ScanTraceWindow(self, run_instruction: str,prior_episode_summary: str,prior_findings_summary: str,trace_window: str, + baml_options: BamlCallOptions = {}, + ) -> baml_py.baml_py.HTTPRequest: + __result__ = self.__options.merge_options(baml_options).create_http_request_sync(function_name="ScanTraceWindow", args={ + "run_instruction": run_instruction,"prior_episode_summary": prior_episode_summary,"prior_findings_summary": prior_findings_summary,"trace_window": trace_window, + }, mode="stream") + return __result__ + def SynthesizeExtractRecords(self, run_instruction: str,episode_summary: str,durable_findings_summary: str,existing_record_manifest: str, baml_options: BamlCallOptions = {}, ) -> baml_py.baml_py.HTTPRequest: - __result__ = self.__options.merge_options(baml_options).create_http_request_sync(function_name="DecideNextExtractStep", args={ - "runtime_dashboard": runtime_dashboard,"run_instruction": run_instruction,"tool_manifest": tool_manifest,"scratchpad": scratchpad, + __result__ = self.__options.merge_options(baml_options).create_http_request_sync(function_name="SynthesizeExtractRecords", args={ + "run_instruction": run_instruction,"episode_summary": episode_summary,"durable_findings_summary": durable_findings_summary,"existing_record_manifest": existing_record_manifest, }, mode="stream") return __result__ diff --git a/baml_agents/baml_client/type_builder.py b/baml_agents/baml_client/type_builder.py index 51d6d61..8f43731 100644 --- a/baml_agents/baml_client/type_builder.py +++ b/baml_agents/baml_client/type_builder.py @@ -20,19 +20,15 @@ class TypeBuilder(type_builder.TypeBuilder): def __init__(self): super().__init__(classes=set( - ["ExtractAgentStep","FinalResultArgs","GetContextArgs","NoteTraceFindingsArgs","PruneTraceReadsArgs","ReadTraceArgs","ReviseContextArgs","SaveContextArgs","SearchContextArgs",] + ["DurableRecordDraft","EpisodeDraft","SynthesizedExtraction","TraceWindowFinding","TraceWindowScan",] ), enums=set( - ["ExtractAction","FindingLevel","RecordKind","RecordStatus",] + ["FindingLevel","RecordKind","RecordStatus",] ), runtime=DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_RUNTIME) # ######################################################################### - # Generated enums 4 + # Generated enums 3 # ######################################################################### - @property - def ExtractAction(self) -> "ExtractActionViewer": - return ExtractActionViewer(self) - @property def FindingLevel(self) -> "FindingLevelViewer": return FindingLevelViewer(self) @@ -47,117 +43,35 @@ def RecordStatus(self) -> "RecordStatusViewer": # ######################################################################### - # Generated classes 9 + # Generated classes 5 # ######################################################################### @property - def ExtractAgentStep(self) -> "ExtractAgentStepViewer": - return ExtractAgentStepViewer(self) - - @property - def FinalResultArgs(self) -> "FinalResultArgsViewer": - return FinalResultArgsViewer(self) + def DurableRecordDraft(self) -> "DurableRecordDraftViewer": + return DurableRecordDraftViewer(self) @property - def GetContextArgs(self) -> "GetContextArgsViewer": - return GetContextArgsViewer(self) + def EpisodeDraft(self) -> "EpisodeDraftViewer": + return EpisodeDraftViewer(self) @property - def NoteTraceFindingsArgs(self) -> "NoteTraceFindingsArgsViewer": - return NoteTraceFindingsArgsViewer(self) + def SynthesizedExtraction(self) -> "SynthesizedExtractionViewer": + return SynthesizedExtractionViewer(self) @property - def PruneTraceReadsArgs(self) -> "PruneTraceReadsArgsViewer": - return PruneTraceReadsArgsViewer(self) + def TraceWindowFinding(self) -> "TraceWindowFindingViewer": + return TraceWindowFindingViewer(self) @property - def ReadTraceArgs(self) -> "ReadTraceArgsViewer": - return ReadTraceArgsViewer(self) - - @property - def ReviseContextArgs(self) -> "ReviseContextArgsViewer": - return ReviseContextArgsViewer(self) - - @property - def SaveContextArgs(self) -> "SaveContextArgsViewer": - return SaveContextArgsViewer(self) - - @property - def SearchContextArgs(self) -> "SearchContextArgsViewer": - return SearchContextArgsViewer(self) + def TraceWindowScan(self) -> "TraceWindowScanViewer": + return TraceWindowScanViewer(self) # ######################################################################### -# Generated enums 4 +# Generated enums 3 # ######################################################################### -class ExtractActionAst: - def __init__(self, tb: type_builder.TypeBuilder): - _tb = tb._tb # type: ignore (we know how to use this private attribute) - self._bldr = _tb.enum("ExtractAction") - self._values: typing.Set[str] = set([ "READ_TRACE", "SEARCH_CONTEXT", "GET_CONTEXT", "SAVE_CONTEXT", "REVISE_CONTEXT", "NOTE_TRACE_FINDINGS", "PRUNE_TRACE_READS", "FINAL_RESULT", ]) - self._vals = ExtractActionValues(self._bldr, self._values) - - def type(self) -> baml_py.FieldType: - return self._bldr.field() - - @property - def values(self) -> "ExtractActionValues": - return self._vals - - -class ExtractActionViewer(ExtractActionAst): - def __init__(self, tb: type_builder.TypeBuilder): - super().__init__(tb) - - - def list_values(self) -> typing.List[typing.Tuple[str, type_builder.EnumValueViewer]]: - return [(name, type_builder.EnumValueViewer(self._bldr.value(name))) for name in self._values] - - -class ExtractActionValues: - def __init__(self, enum_bldr: baml_py.EnumBuilder, values: typing.Set[str]): - self.__bldr = enum_bldr - self.__values = values # type: ignore (we know how to use this private attribute) # noqa: F821 - - - - @property - def READ_TRACE(self) -> type_builder.EnumValueViewer: - return type_builder.EnumValueViewer(self.__bldr.value("READ_TRACE")) - - @property - def SEARCH_CONTEXT(self) -> type_builder.EnumValueViewer: - return type_builder.EnumValueViewer(self.__bldr.value("SEARCH_CONTEXT")) - - @property - def GET_CONTEXT(self) -> type_builder.EnumValueViewer: - return type_builder.EnumValueViewer(self.__bldr.value("GET_CONTEXT")) - - @property - def SAVE_CONTEXT(self) -> type_builder.EnumValueViewer: - return type_builder.EnumValueViewer(self.__bldr.value("SAVE_CONTEXT")) - - @property - def REVISE_CONTEXT(self) -> type_builder.EnumValueViewer: - return type_builder.EnumValueViewer(self.__bldr.value("REVISE_CONTEXT")) - - @property - def NOTE_TRACE_FINDINGS(self) -> type_builder.EnumValueViewer: - return type_builder.EnumValueViewer(self.__bldr.value("NOTE_TRACE_FINDINGS")) - - @property - def PRUNE_TRACE_READS(self) -> type_builder.EnumValueViewer: - return type_builder.EnumValueViewer(self.__bldr.value("PRUNE_TRACE_READS")) - - @property - def FINAL_RESULT(self) -> type_builder.EnumValueViewer: - return type_builder.EnumValueViewer(self.__bldr.value("FINAL_RESULT")) - - - - class FindingLevelAst: def __init__(self, tb: type_builder.TypeBuilder): _tb = tb._tb # type: ignore (we know how to use this private attribute) @@ -224,7 +138,7 @@ class RecordKindAst: def __init__(self, tb: type_builder.TypeBuilder): _tb = tb._tb # type: ignore (we know how to use this private attribute) self._bldr = _tb.enum("RecordKind") - self._values: typing.Set[str] = set([ "DECISION", "PREFERENCE", "CONSTRAINT", "FACT", "REFERENCE", "EPISODE", ]) + self._values: typing.Set[str] = set([ "DECISION", "PREFERENCE", "CONSTRAINT", "FACT", "REFERENCE", ]) self._vals = RecordKindValues(self._bldr, self._values) def type(self) -> baml_py.FieldType: @@ -271,10 +185,6 @@ def FACT(self) -> type_builder.EnumValueViewer: def REFERENCE(self) -> type_builder.EnumValueViewer: return type_builder.EnumValueViewer(self.__bldr.value("REFERENCE")) - @property - def EPISODE(self) -> type_builder.EnumValueViewer: - return type_builder.EnumValueViewer(self.__bldr.value("EPISODE")) - @@ -322,25 +232,25 @@ def ARCHIVED(self) -> type_builder.EnumValueViewer: # ######################################################################### -# Generated classes 9 +# Generated classes 5 # ######################################################################### -class ExtractAgentStepAst: +class DurableRecordDraftAst: def __init__(self, tb: type_builder.TypeBuilder): _tb = tb._tb # type: ignore (we know how to use this private attribute) - self._bldr = _tb.class_("ExtractAgentStep") - self._properties: typing.Set[str] = set([ "action", "rationale", "read_trace", "search_context", "get_context", "save_context", "revise_context", "note_trace_findings", "prune_trace_reads", "final_result", ]) - self._props = ExtractAgentStepProperties(self._bldr, self._properties) + self._bldr = _tb.class_("DurableRecordDraft") + self._properties: typing.Set[str] = set([ "kind", "title", "body", "status", "valid_from", "valid_until", "decision", "why", "alternatives", "consequences", ]) + self._props = DurableRecordDraftProperties(self._bldr, self._properties) def type(self) -> baml_py.FieldType: return self._bldr.field() @property - def props(self) -> "ExtractAgentStepProperties": + def props(self) -> "DurableRecordDraftProperties": return self._props -class ExtractAgentStepViewer(ExtractAgentStepAst): +class DurableRecordDraftViewer(DurableRecordDraftAst): def __init__(self, tb: type_builder.TypeBuilder): super().__init__(tb) @@ -350,7 +260,7 @@ def list_properties(self) -> typing.List[typing.Tuple[str, type_builder.ClassPro -class ExtractAgentStepProperties: +class DurableRecordDraftProperties: def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]): self.__bldr = bldr self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821 @@ -358,150 +268,64 @@ def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]): @property - def action(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("action")) - - @property - def rationale(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("rationale")) - - @property - def read_trace(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("read_trace")) - - @property - def search_context(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("search_context")) - - @property - def get_context(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("get_context")) - - @property - def save_context(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("save_context")) + def kind(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("kind")) @property - def revise_context(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("revise_context")) + def title(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("title")) @property - def note_trace_findings(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("note_trace_findings")) + def body(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("body")) @property - def prune_trace_reads(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("prune_trace_reads")) + def status(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("status")) @property - def final_result(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("final_result")) - - - - -class FinalResultArgsAst: - def __init__(self, tb: type_builder.TypeBuilder): - _tb = tb._tb # type: ignore (we know how to use this private attribute) - self._bldr = _tb.class_("FinalResultArgs") - self._properties: typing.Set[str] = set([ "completion_summary", ]) - self._props = FinalResultArgsProperties(self._bldr, self._properties) - - def type(self) -> baml_py.FieldType: - return self._bldr.field() - - @property - def props(self) -> "FinalResultArgsProperties": - return self._props - - -class FinalResultArgsViewer(FinalResultArgsAst): - def __init__(self, tb: type_builder.TypeBuilder): - super().__init__(tb) - - - def list_properties(self) -> typing.List[typing.Tuple[str, type_builder.ClassPropertyViewer]]: - return [(name, type_builder.ClassPropertyViewer(self._bldr.property(name))) for name in self._properties] - - - -class FinalResultArgsProperties: - def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]): - self.__bldr = bldr - self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821 - - + def valid_from(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("valid_from")) @property - def completion_summary(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("completion_summary")) - + def valid_until(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("valid_until")) - - -class GetContextArgsAst: - def __init__(self, tb: type_builder.TypeBuilder): - _tb = tb._tb # type: ignore (we know how to use this private attribute) - self._bldr = _tb.class_("GetContextArgs") - self._properties: typing.Set[str] = set([ "record_ids", "include_versions", "detail", ]) - self._props = GetContextArgsProperties(self._bldr, self._properties) - - def type(self) -> baml_py.FieldType: - return self._bldr.field() - @property - def props(self) -> "GetContextArgsProperties": - return self._props - - -class GetContextArgsViewer(GetContextArgsAst): - def __init__(self, tb: type_builder.TypeBuilder): - super().__init__(tb) - - - def list_properties(self) -> typing.List[typing.Tuple[str, type_builder.ClassPropertyViewer]]: - return [(name, type_builder.ClassPropertyViewer(self._bldr.property(name))) for name in self._properties] - - - -class GetContextArgsProperties: - def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]): - self.__bldr = bldr - self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821 - - + def decision(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("decision")) @property - def record_ids(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("record_ids")) + def why(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("why")) @property - def include_versions(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("include_versions")) + def alternatives(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("alternatives")) @property - def detail(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("detail")) + def consequences(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("consequences")) -class NoteTraceFindingsArgsAst: +class EpisodeDraftAst: def __init__(self, tb: type_builder.TypeBuilder): _tb = tb._tb # type: ignore (we know how to use this private attribute) - self._bldr = _tb.class_("NoteTraceFindingsArgs") - self._properties: typing.Set[str] = set([ "theme", "line", "quote", "level", ]) - self._props = NoteTraceFindingsArgsProperties(self._bldr, self._properties) + self._bldr = _tb.class_("EpisodeDraft") + self._properties: typing.Set[str] = set([ "title", "body", "status", "user_intent", "what_happened", "outcomes", ]) + self._props = EpisodeDraftProperties(self._bldr, self._properties) def type(self) -> baml_py.FieldType: return self._bldr.field() @property - def props(self) -> "NoteTraceFindingsArgsProperties": + def props(self) -> "EpisodeDraftProperties": return self._props -class NoteTraceFindingsArgsViewer(NoteTraceFindingsArgsAst): +class EpisodeDraftViewer(EpisodeDraftAst): def __init__(self, tb: type_builder.TypeBuilder): super().__init__(tb) @@ -511,7 +335,7 @@ def list_properties(self) -> typing.List[typing.Tuple[str, type_builder.ClassPro -class NoteTraceFindingsArgsProperties: +class EpisodeDraftProperties: def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]): self.__bldr = bldr self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821 @@ -519,122 +343,48 @@ def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]): @property - def theme(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("theme")) - - @property - def line(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("line")) - - @property - def quote(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("quote")) + def title(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("title")) @property - def level(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("level")) - - - - -class PruneTraceReadsArgsAst: - def __init__(self, tb: type_builder.TypeBuilder): - _tb = tb._tb # type: ignore (we know how to use this private attribute) - self._bldr = _tb.class_("PruneTraceReadsArgs") - self._properties: typing.Set[str] = set([ "start_lines", ]) - self._props = PruneTraceReadsArgsProperties(self._bldr, self._properties) - - def type(self) -> baml_py.FieldType: - return self._bldr.field() - - @property - def props(self) -> "PruneTraceReadsArgsProperties": - return self._props - - -class PruneTraceReadsArgsViewer(PruneTraceReadsArgsAst): - def __init__(self, tb: type_builder.TypeBuilder): - super().__init__(tb) - - - def list_properties(self) -> typing.List[typing.Tuple[str, type_builder.ClassPropertyViewer]]: - return [(name, type_builder.ClassPropertyViewer(self._bldr.property(name))) for name in self._properties] - - - -class PruneTraceReadsArgsProperties: - def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]): - self.__bldr = bldr - self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821 - - + def body(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("body")) @property - def start_lines(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("start_lines")) - + def status(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("status")) - - -class ReadTraceArgsAst: - def __init__(self, tb: type_builder.TypeBuilder): - _tb = tb._tb # type: ignore (we know how to use this private attribute) - self._bldr = _tb.class_("ReadTraceArgs") - self._properties: typing.Set[str] = set([ "start_line", "line_count", ]) - self._props = ReadTraceArgsProperties(self._bldr, self._properties) - - def type(self) -> baml_py.FieldType: - return self._bldr.field() - @property - def props(self) -> "ReadTraceArgsProperties": - return self._props - - -class ReadTraceArgsViewer(ReadTraceArgsAst): - def __init__(self, tb: type_builder.TypeBuilder): - super().__init__(tb) - - - def list_properties(self) -> typing.List[typing.Tuple[str, type_builder.ClassPropertyViewer]]: - return [(name, type_builder.ClassPropertyViewer(self._bldr.property(name))) for name in self._properties] - - - -class ReadTraceArgsProperties: - def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]): - self.__bldr = bldr - self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821 - - + def user_intent(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("user_intent")) @property - def start_line(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("start_line")) + def what_happened(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("what_happened")) @property - def line_count(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("line_count")) + def outcomes(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("outcomes")) -class ReviseContextArgsAst: +class SynthesizedExtractionAst: def __init__(self, tb: type_builder.TypeBuilder): _tb = tb._tb # type: ignore (we know how to use this private attribute) - self._bldr = _tb.class_("ReviseContextArgs") - self._properties: typing.Set[str] = set([ "record_id", "reason", "kind", "title", "body", "status", "valid_from", "valid_until", "decision", "why", "alternatives", "consequences", "user_intent", "what_happened", "outcomes", ]) - self._props = ReviseContextArgsProperties(self._bldr, self._properties) + self._bldr = _tb.class_("SynthesizedExtraction") + self._properties: typing.Set[str] = set([ "episode", "durable_records", "completion_summary", ]) + self._props = SynthesizedExtractionProperties(self._bldr, self._properties) def type(self) -> baml_py.FieldType: return self._bldr.field() @property - def props(self) -> "ReviseContextArgsProperties": + def props(self) -> "SynthesizedExtractionProperties": return self._props -class ReviseContextArgsViewer(ReviseContextArgsAst): +class SynthesizedExtractionViewer(SynthesizedExtractionAst): def __init__(self, tb: type_builder.TypeBuilder): super().__init__(tb) @@ -644,7 +394,7 @@ def list_properties(self) -> typing.List[typing.Tuple[str, type_builder.ClassPro -class ReviseContextArgsProperties: +class SynthesizedExtractionProperties: def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]): self.__bldr = bldr self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821 @@ -652,84 +402,36 @@ def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]): @property - def record_id(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("record_id")) - - @property - def reason(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("reason")) - - @property - def kind(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("kind")) - - @property - def title(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("title")) - - @property - def body(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("body")) - - @property - def status(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("status")) - - @property - def valid_from(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("valid_from")) - - @property - def valid_until(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("valid_until")) - - @property - def decision(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("decision")) - - @property - def why(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("why")) + def episode(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("episode")) @property - def alternatives(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("alternatives")) + def durable_records(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("durable_records")) @property - def consequences(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("consequences")) - - @property - def user_intent(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("user_intent")) - - @property - def what_happened(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("what_happened")) - - @property - def outcomes(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("outcomes")) + def completion_summary(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("completion_summary")) -class SaveContextArgsAst: +class TraceWindowFindingAst: def __init__(self, tb: type_builder.TypeBuilder): _tb = tb._tb # type: ignore (we know how to use this private attribute) - self._bldr = _tb.class_("SaveContextArgs") - self._properties: typing.Set[str] = set([ "kind", "title", "body", "status", "valid_from", "valid_until", "decision", "why", "alternatives", "consequences", "user_intent", "what_happened", "outcomes", ]) - self._props = SaveContextArgsProperties(self._bldr, self._properties) + self._bldr = _tb.class_("TraceWindowFinding") + self._properties: typing.Set[str] = set([ "theme", "level", "line", "quote", "note", ]) + self._props = TraceWindowFindingProperties(self._bldr, self._properties) def type(self) -> baml_py.FieldType: return self._bldr.field() @property - def props(self) -> "SaveContextArgsProperties": + def props(self) -> "TraceWindowFindingProperties": return self._props -class SaveContextArgsViewer(SaveContextArgsAst): +class TraceWindowFindingViewer(TraceWindowFindingAst): def __init__(self, tb: type_builder.TypeBuilder): super().__init__(tb) @@ -739,7 +441,7 @@ def list_properties(self) -> typing.List[typing.Tuple[str, type_builder.ClassPro -class SaveContextArgsProperties: +class TraceWindowFindingProperties: def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]): self.__bldr = bldr self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821 @@ -747,76 +449,44 @@ def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]): @property - def kind(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("kind")) - - @property - def title(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("title")) - - @property - def body(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("body")) - - @property - def status(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("status")) - - @property - def valid_from(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("valid_from")) - - @property - def valid_until(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("valid_until")) - - @property - def decision(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("decision")) - - @property - def why(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("why")) - - @property - def alternatives(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("alternatives")) + def theme(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("theme")) @property - def consequences(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("consequences")) + def level(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("level")) @property - def user_intent(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("user_intent")) + def line(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("line")) @property - def what_happened(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("what_happened")) + def quote(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("quote")) @property - def outcomes(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("outcomes")) + def note(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("note")) -class SearchContextArgsAst: +class TraceWindowScanAst: def __init__(self, tb: type_builder.TypeBuilder): _tb = tb._tb # type: ignore (we know how to use this private attribute) - self._bldr = _tb.class_("SearchContextArgs") - self._properties: typing.Set[str] = set([ "query", "kind", "status", "valid_at", "include_archived", "limit", ]) - self._props = SearchContextArgsProperties(self._bldr, self._properties) + self._bldr = _tb.class_("TraceWindowScan") + self._properties: typing.Set[str] = set([ "episode_update", "durable_findings", "implementation_findings", "discarded_noise", ]) + self._props = TraceWindowScanProperties(self._bldr, self._properties) def type(self) -> baml_py.FieldType: return self._bldr.field() @property - def props(self) -> "SearchContextArgsProperties": + def props(self) -> "TraceWindowScanProperties": return self._props -class SearchContextArgsViewer(SearchContextArgsAst): +class TraceWindowScanViewer(TraceWindowScanAst): def __init__(self, tb: type_builder.TypeBuilder): super().__init__(tb) @@ -826,7 +496,7 @@ def list_properties(self) -> typing.List[typing.Tuple[str, type_builder.ClassPro -class SearchContextArgsProperties: +class TraceWindowScanProperties: def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]): self.__bldr = bldr self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821 @@ -834,28 +504,20 @@ def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]): @property - def query(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("query")) - - @property - def kind(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("kind")) - - @property - def status(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("status")) + def episode_update(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("episode_update")) @property - def valid_at(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("valid_at")) + def durable_findings(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("durable_findings")) @property - def include_archived(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("include_archived")) + def implementation_findings(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("implementation_findings")) @property - def limit(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("limit")) + def discarded_noise(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("discarded_noise")) diff --git a/baml_agents/baml_client/type_map.py b/baml_agents/baml_client/type_map.py index 42453ca..6f67bf0 100644 --- a/baml_agents/baml_client/type_map.py +++ b/baml_agents/baml_client/type_map.py @@ -16,35 +16,21 @@ type_map = { - "types.ExtractAgentStep": types.ExtractAgentStep, - "stream_types.ExtractAgentStep": stream_types.ExtractAgentStep, + "types.DurableRecordDraft": types.DurableRecordDraft, + "stream_types.DurableRecordDraft": stream_types.DurableRecordDraft, - "types.FinalResultArgs": types.FinalResultArgs, - "stream_types.FinalResultArgs": stream_types.FinalResultArgs, + "types.EpisodeDraft": types.EpisodeDraft, + "stream_types.EpisodeDraft": stream_types.EpisodeDraft, - "types.GetContextArgs": types.GetContextArgs, - "stream_types.GetContextArgs": stream_types.GetContextArgs, + "types.SynthesizedExtraction": types.SynthesizedExtraction, + "stream_types.SynthesizedExtraction": stream_types.SynthesizedExtraction, - "types.NoteTraceFindingsArgs": types.NoteTraceFindingsArgs, - "stream_types.NoteTraceFindingsArgs": stream_types.NoteTraceFindingsArgs, + "types.TraceWindowFinding": types.TraceWindowFinding, + "stream_types.TraceWindowFinding": stream_types.TraceWindowFinding, - "types.PruneTraceReadsArgs": types.PruneTraceReadsArgs, - "stream_types.PruneTraceReadsArgs": stream_types.PruneTraceReadsArgs, + "types.TraceWindowScan": types.TraceWindowScan, + "stream_types.TraceWindowScan": stream_types.TraceWindowScan, - "types.ReadTraceArgs": types.ReadTraceArgs, - "stream_types.ReadTraceArgs": stream_types.ReadTraceArgs, - - "types.ReviseContextArgs": types.ReviseContextArgs, - "stream_types.ReviseContextArgs": stream_types.ReviseContextArgs, - - "types.SaveContextArgs": types.SaveContextArgs, - "stream_types.SaveContextArgs": stream_types.SaveContextArgs, - - "types.SearchContextArgs": types.SearchContextArgs, - "stream_types.SearchContextArgs": stream_types.SearchContextArgs, - - - "types.ExtractAction": types.ExtractAction, "types.FindingLevel": types.FindingLevel, diff --git a/baml_agents/baml_client/types.py b/baml_agents/baml_client/types.py index e7716e2..95d7f90 100644 --- a/baml_agents/baml_client/types.py +++ b/baml_agents/baml_client/types.py @@ -37,19 +37,9 @@ def get_checks(checks: typing.Dict[CheckName, Check]) -> typing.List[Check]: def all_succeeded(checks: typing.Dict[CheckName, Check]) -> bool: return all(check.status == "succeeded" for check in get_checks(checks)) # ######################################################################### -# Generated enums (4) +# Generated enums (3) # ######################################################################### -class ExtractAction(str, Enum): - READ_TRACE = "READ_TRACE" - SEARCH_CONTEXT = "SEARCH_CONTEXT" - GET_CONTEXT = "GET_CONTEXT" - SAVE_CONTEXT = "SAVE_CONTEXT" - REVISE_CONTEXT = "REVISE_CONTEXT" - NOTE_TRACE_FINDINGS = "NOTE_TRACE_FINDINGS" - PRUNE_TRACE_READS = "PRUNE_TRACE_READS" - FINAL_RESULT = "FINAL_RESULT" - class FindingLevel(str, Enum): DECISION = "DECISION" PREFERENCE = "PREFERENCE" @@ -65,88 +55,52 @@ class RecordKind(str, Enum): CONSTRAINT = "CONSTRAINT" FACT = "FACT" REFERENCE = "REFERENCE" - EPISODE = "EPISODE" class RecordStatus(str, Enum): ACTIVE = "ACTIVE" ARCHIVED = "ARCHIVED" # ######################################################################### -# Generated classes (9) +# Generated classes (5) # ######################################################################### -class ExtractAgentStep(BaseModel): - action: ExtractAction - rationale: str = Field(description='One short sentence explaining why this is the next action. Do not include hidden chain-of-thought.') - read_trace: typing.Optional["ReadTraceArgs"] = None - search_context: typing.Optional["SearchContextArgs"] = None - get_context: typing.Optional["GetContextArgs"] = None - save_context: typing.Optional["SaveContextArgs"] = None - revise_context: typing.Optional["ReviseContextArgs"] = None - note_trace_findings: typing.Optional["NoteTraceFindingsArgs"] = None - prune_trace_reads: typing.Optional["PruneTraceReadsArgs"] = None - final_result: typing.Optional["FinalResultArgs"] = None - -class FinalResultArgs(BaseModel): - completion_summary: str - -class GetContextArgs(BaseModel): - record_ids: typing.List[str] = Field(description='Record IDs returned by search_context or list_context. Fetch before any revision.') - include_versions: typing.Optional[bool] = Field(default=None, description='Whether to include prior versions.') - detail: typing.Optional[typing.Union[typing_extensions.Literal['concise'], typing_extensions.Literal['detailed']]] = Field(default=None, description='Use concise unless full version history is needed.') - -class NoteTraceFindingsArgs(BaseModel): - theme: typing.Optional[str] = Field(default=None, description='Short theme label for this finding. Leave empty only for the no-findings checkpoint.') - line: typing.Optional[int] = Field(default=None, description='1-based trace line with supporting evidence. Leave empty only for the no-findings checkpoint.') - quote: typing.Optional[str] = Field(default=None, description='Short evidence quote from the trace. Leave empty only for the no-findings checkpoint.') - level: typing.Optional[FindingLevel] = Field(default=None, description='Durable levels only for reusable context; implementation for trace-local evidence.') - -class PruneTraceReadsArgs(BaseModel): - start_lines: typing.List[int] = Field(description='1-based start_line values from earlier read_trace calls, such as 1, 101, or 201. Never use context token counts.') - -class ReadTraceArgs(BaseModel): - start_line: typing.Optional[int] = None - line_count: typing.Optional[int] = None - -class ReviseContextArgs(BaseModel): - record_id: str - reason: str = Field(description='Short reason for the revision.') - kind: RecordKind = Field(description='Must match the fetched record kind; revise_context cannot change kind.') - title: str = Field(description='Complete improved title.') - body: str = Field(description='Complete improved body.') - status: typing.Optional[RecordStatus] = Field(default=None, description='Lifecycle status to keep or set.') +class DurableRecordDraft(BaseModel): + kind: RecordKind = Field(description='Durable record kind.') + title: str = Field(description='Short standalone durable title.') + body: str = Field(description='Compact standalone durable body.') + status: typing.Optional[RecordStatus] = Field(default=None, description='Usually active for reusable durable records.') valid_from: typing.Optional[str] = None valid_until: typing.Optional[str] = None decision: typing.Optional[str] = None why: typing.Optional[str] = None alternatives: typing.Optional[str] = None consequences: typing.Optional[str] = None - user_intent: typing.Optional[str] = None - what_happened: typing.Optional[str] = None - outcomes: typing.Optional[str] = None - -class SaveContextArgs(BaseModel): - kind: RecordKind = Field(description='Use episode for the mandatory session summary; use durable kinds only for reusable project context.') - title: str = Field(description='Short standalone title for the lasting point or episode.') - body: str = Field(description='Compact standalone body. Do not include trace-local command logs or copied errors.') - status: typing.Optional[RecordStatus] = Field(default=None, description='Use archived for routine episodes with no durable signal; active for reusable durable records.') - valid_from: typing.Optional[str] = Field(default=None, description='Optional validity start timestamp.') - valid_until: typing.Optional[str] = Field(default=None, description='Optional validity end timestamp.') - decision: typing.Optional[str] = Field(default=None, description='Required only for kind=decision; omit for facts, preferences, constraints, references, and episodes.') - why: typing.Optional[str] = Field(default=None, description='Required only for kind=decision when rationale exists; omit when the trace says no durable rationale was supplied.') - alternatives: typing.Optional[str] = Field(default=None, description='Optional decision alternatives. Omit for non-decisions.') - consequences: typing.Optional[str] = Field(default=None, description='Optional direct application guidance for the same durable point.') - user_intent: typing.Optional[str] = Field(default=None, description='Required when kind=episode. Concise statement of what the user wanted in this source session.') - what_happened: typing.Optional[str] = Field(default=None, description='Required when kind=episode. Concise statement of what the session actually did.') - outcomes: typing.Optional[str] = Field(default=None, description='Optional concise episode outcome.') - -class SearchContextArgs(BaseModel): - query: str = Field(description='Natural-language query for the durable meaning, not \'*\' or an empty browse request.') - kind: typing.Optional[RecordKind] = Field(default=None, description='Optional filter. Use only when the desired record kind is known.') - status: typing.Optional[RecordStatus] = Field(default=None, description='Optional lifecycle filter.') - valid_at: typing.Optional[str] = Field(default=None, description='Optional timestamp for historical lookup.') - include_archived: typing.Optional[bool] = Field(default=None, description='Whether archived records should be included.') - limit: typing.Optional[int] = Field(default=None, description='Maximum hits to return.') + +class EpisodeDraft(BaseModel): + title: typing.Optional[str] = Field(default=None, description='Optional short title for the current-session episode. Runtime derives one if omitted.') + body: typing.Optional[str] = Field(default=None, description='Compact episode body. If omitted, runtime builds it from user_intent and what_happened.') + status: typing.Optional[RecordStatus] = Field(default=None, description='Use archived for routine/no-durable sessions; active only when the episode itself remains useful.') + user_intent: typing.Optional[str] = Field(default=None, description='What the user wanted in this source session. Runtime fills a generic fallback if omitted.') + what_happened: typing.Optional[str] = Field(default=None, description='What the session actually did. Runtime fills a generic fallback if omitted.') + outcomes: typing.Optional[str] = Field(default=None, description='Optional concise outcome.') + +class SynthesizedExtraction(BaseModel): + episode: "EpisodeDraft" = Field(description='Exactly one current-session episode record draft.') + durable_records: typing.List["DurableRecordDraft"] = Field(description='Zero or more durable records.') + completion_summary: typing.Optional[str] = Field(default=None, description='Brief summary of extraction work for final_result/reporting.') + +class TraceWindowFinding(BaseModel): + theme: str = Field(description='Short stable theme for this finding.') + level: FindingLevel = Field(description='Use durable levels for reusable context, implementation for local/noisy evidence.') + line: typing.Optional[int] = Field(default=None, description='1-based supporting line when the window gives one.') + quote: typing.Optional[str] = Field(default=None, description='Short supporting quote from the current window.') + note: str = Field(description='Compact semantic finding. Avoid command logs and copied errors.') + +class TraceWindowScan(BaseModel): + episode_update: typing.Optional[str] = Field(default=None, description='Compact update for the final episode summary. May be omitted when this window adds nothing.') + durable_findings: typing.List["TraceWindowFinding"] = Field(description='Reusable decisions, preferences, constraints, facts, and references found in this window.') + implementation_findings: typing.List["TraceWindowFinding"] = Field(description='Implementation evidence, discarded hypotheses, and local details useful only as support/noise.') + discarded_noise: typing.List[str] = Field(description='Short descriptions of noisy categories intentionally not saved.') # ######################################################################### # Generated type aliases (0) diff --git a/baml_agents/baml_extract_agent/graph.py b/baml_agents/baml_extract_agent/graph.py index 0150694..3edef0d 100644 --- a/baml_agents/baml_extract_agent/graph.py +++ b/baml_agents/baml_extract_agent/graph.py @@ -1,8 +1,7 @@ -"""LangGraph ReAct loop whose LLM decisions are produced by BAML.""" +"""Windowed LangGraph extraction pipeline whose LLM steps are produced by BAML.""" from __future__ import annotations -from collections import Counter from datetime import datetime, timezone import math import operator @@ -17,28 +16,25 @@ from lerim.agents.extract import _format_existing_record_manifest from lerim.config.settings import get_config from lerim.agents.tools import ( + CONTEXT_SOFT_PRESSURE_PCT, _TOKENS_PER_CHAR, MODEL_CONTEXT_TOKEN_LIMIT, ContextDeps, - _classify_context_pressure, - _first_uncovered_offset, + TRACE_MAX_CHUNK_BYTES, + TRACE_MAX_LINE_BYTES, compute_request_budget, ) from lerim.context import ProjectIdentity, resolve_project_identity -from lerim.context.spec import DURABLE_FINDING_LEVELS, IMPLEMENTATION_FINDING_LEVELS from baml_extract_agent.tool_bridge import ( build_tool_context, - execute_step, - format_observation, - observation_to_state, + persist_synthesized_extraction, prepare_context_deps, - tool_manifest, ) -MODEL_NAME = "gemma4:e4b" -BAML_PROVIDER = "ollama" +MODEL_NAME = "MiniMax-M2.7" +BAML_PROVIDER = "minimax" OLLAMA_BASE_URL = "http://127.0.0.1:11434/v1" MINIMAX_BASE_URL = "https://api.minimax.io/v1" MINIMAX_TEMPERATURE_FLOOR = 0.01 @@ -47,6 +43,9 @@ BAML_HTTP_TIME_TO_FIRST_TOKEN_TIMEOUT_MS = 120_000 BAML_HTTP_IDLE_TIMEOUT_MS = 30_000 BAML_HTTP_REQUEST_TIMEOUT_MS = 300_000 +WINDOW_RESERVE_TOKENS = 30_000 +MIN_WINDOW_CHARS = 20_000 +MAX_WINDOW_CHARS = TRACE_MAX_CHUNK_BYTES BAML_RECOVERABLE_ERROR_NAMES = { "BamlClientFinishReasonError", "BamlClientHttpError", @@ -55,12 +54,19 @@ } -class ExtractGraphState(TypedDict, total=False): - """Mutable state carried through the BAML ReAct graph.""" +class WindowExtractGraphState(TypedDict, total=False): + """State for the windowed BAML extraction pipeline.""" observations: Annotated[list[dict[str, Any]], operator.add] llm_calls: int - pending_step: Any + next_line: int + trace_total_lines: int + current_window: dict[str, Any] + episode_updates: Annotated[list[str], operator.add] + durable_findings: Annotated[list[dict[str, Any]], operator.add] + implementation_findings: Annotated[list[dict[str, Any]], operator.add] + discarded_noise: Annotated[list[str], operator.add] + synthesized: Any done: bool completion_summary: str @@ -94,14 +100,22 @@ def run_baml_extraction( session_started_at=started_at, model_name=model_name, ) - graph = build_extract_graph( + existing_record_manifest = _format_existing_record_manifest( + context_db_path=resolved_context_db_path, + project_identity=identity, + ) + run_instruction = _build_run_instruction( + context_db_path=resolved_context_db_path, + project_identity=identity, + trace_path=resolved_trace_path, + session_started_at=started_at, + existing_record_manifest=existing_record_manifest, + ) + graph = build_windowed_extract_graph( deps=deps, - run_instruction=_build_run_instruction( - context_db_path=resolved_context_db_path, - project_identity=identity, - trace_path=resolved_trace_path, - session_started_at=started_at, - ), + trace_path=resolved_trace_path, + run_instruction=run_instruction, + existing_record_manifest=existing_record_manifest, model_name=model_name, baml_provider=baml_provider, api_base_url=api_base_url, @@ -112,14 +126,20 @@ def run_baml_extraction( progress=progress, ) final_state = graph.invoke( - {"observations": [], "llm_calls": 0, "done": False, "completion_summary": ""} + { + "observations": [], + "llm_calls": 0, + "next_line": 1, + "trace_total_lines": _trace_line_count(resolved_trace_path), + "done": False, + "completion_summary": "", + } ) - if not final_state.get("done"): - raise RuntimeError("BAML extraction graph stopped before final_result.") return { "completion_summary": final_state.get("completion_summary", ""), "llm_calls": final_state.get("llm_calls", 0), "observations": final_state.get("observations", []), + "done": bool(final_state.get("done")), "context_db_path": str(resolved_context_db_path), "project_id": identity.project_id, "session_id": session_id, @@ -128,10 +148,12 @@ def run_baml_extraction( } -def build_extract_graph( +def build_windowed_extract_graph( *, deps: ContextDeps, + trace_path: Path, run_instruction: str, + existing_record_manifest: str, model_name: str, baml_provider: str, api_base_url: str | None, @@ -141,9 +163,8 @@ def build_extract_graph( max_llm_calls: int, progress: bool = False, ): - """Compile the LangGraph state machine for one extraction run.""" + """Compile the windowed scan -> synthesize -> persist extraction graph.""" runtime_context = build_tool_context(deps) - live_tool_manifest = tool_manifest() baml_runtime = _baml_client_for_model( model_name=model_name, baml_provider=baml_provider, @@ -153,84 +174,368 @@ def build_extract_graph( ollama_base_url=ollama_base_url, ) - def llm_call(state: ExtractGraphState) -> dict[str, Any]: - """Ask BAML to choose the next ReAct action.""" + def read_window(state: WindowExtractGraphState) -> dict[str, Any]: + """Read the next budgeted trace window into transient state.""" + total_lines = int(state.get("trace_total_lines") or 0) + start_line = int(state.get("next_line") or 1) + if start_line > total_lines: + return {"current_window": {}} + char_budget = _window_char_budget( + state=state, + run_instruction=run_instruction, + existing_record_manifest=existing_record_manifest, + ) + window = _read_trace_window( + trace_path=trace_path, + start_line=start_line, + total_lines=total_lines, + char_budget=char_budget, + ) + deps.trace_total_lines = total_lines + deps.read_ranges.append((window["start_line"] - 1, window["end_line"])) + if progress: + print( + f" baml window {window['start_line']}-{window['end_line']} " + f"chars={len(window['text'])}", + flush=True, + ) + return { + "current_window": window, + "next_line": int(window["end_line"]) + 1, + "observations": [ + { + "action": "read_window", + "ok": True, + "content": window["header"], + "args": { + "start_line": window["start_line"], + "end_line": window["end_line"], + "char_budget": char_budget, + }, + "done": False, + "completion_summary": "", + } + ], + } + + def scan_window(state: WindowExtractGraphState) -> dict[str, Any]: + """Scan the current window into compact episode/findings state.""" llm_calls = int(state.get("llm_calls") or 0) if llm_calls >= max_llm_calls: raise RuntimeError( f"BAML extraction exceeded max_llm_calls={max_llm_calls}." ) - observations = state.get("observations", []) - scratchpad = _scratchpad(observations, deps) + window = state.get("current_window") or {} + if not window.get("text"): + return {} if progress: - print(f" baml llm {llm_calls + 1}/{max_llm_calls}", flush=True) - try: - step = baml_runtime.DecideNextExtractStep( - runtime_dashboard=_runtime_dashboard(deps, observations), + print(f" baml scan {llm_calls + 1}/{max_llm_calls}", flush=True) + result, retry_observations, attempts = _call_baml_with_retries( + lambda: baml_runtime.ScanTraceWindow( run_instruction=run_instruction, - tool_manifest=live_tool_manifest, - scratchpad=scratchpad, - ) - except Exception as exc: - if not _is_recoverable_baml_error(exc): - raise - model_retry_count = sum( - 1 for observation in observations if observation.get("action") == "model_retry" + prior_episode_summary=_episode_summary(state), + prior_findings_summary=_findings_summary(state), + trace_window=str(window["text"]), + ), + stage="scan_window", + progress=progress, + ) + payload = _model_payload(result) + episode_update = str(payload.get("episode_update") or "").strip() + durable = [_model_payload(item) for item in payload.get("durable_findings") or []] + implementation = [ + _model_payload(item) + for item in payload.get("implementation_findings") or [] + ] + noise = [ + str(item).strip() + for item in payload.get("discarded_noise") or [] + if str(item).strip() + ] + return { + "llm_calls": llm_calls + attempts, + "episode_updates": [episode_update] if episode_update else [], + "durable_findings": durable, + "implementation_findings": implementation, + "discarded_noise": noise, + "observations": [ + *retry_observations, + { + "action": "scan_window", + "ok": True, + "content": ( + f"window={window.get('start_line')}-{window.get('end_line')} " + f"durable={len(durable)} implementation={len(implementation)}" + ), + "args": { + "start_line": window.get("start_line"), + "end_line": window.get("end_line"), + }, + "done": False, + "completion_summary": "", + }, + ], + } + + def synthesize_records(state: WindowExtractGraphState) -> dict[str, Any]: + """Synthesize final episode and durable record candidates.""" + llm_calls = int(state.get("llm_calls") or 0) + if llm_calls >= max_llm_calls: + raise RuntimeError( + f"BAML extraction exceeded max_llm_calls={max_llm_calls}." ) - if model_retry_count >= MAX_BAML_MODEL_RETRIES: - raise RuntimeError( - f"BAML extraction exceeded model_retry_limit={MAX_BAML_MODEL_RETRIES}." - ) from exc - step = { - "action": "model_retry", - "content": _model_retry_observation(exc), - } - return {"pending_step": step, "llm_calls": llm_calls + 1} - - def tool_node(state: ExtractGraphState) -> dict[str, Any]: - """Execute the BAML-selected action with Lerim's real tools.""" - pending_step = state["pending_step"] - if isinstance(pending_step, dict) and pending_step.get("action") == "model_retry": - if progress: - print(" baml tool model_retry", flush=True) - return { - "observations": [ - { - "action": "model_retry", - "ok": False, - "content": pending_step["content"], - "args": {}, - "done": False, - "completion_summary": "", - } - ], - "done": False, - "completion_summary": "", - } - observation = execute_step(pending_step, runtime_context) if progress: - print(f" baml tool {observation.action} ok={observation.ok}", flush=True) + print(f" baml synth {llm_calls + 1}/{max_llm_calls}", flush=True) + result, retry_observations, attempts = _call_baml_with_retries( + lambda: baml_runtime.SynthesizeExtractRecords( + run_instruction=run_instruction, + episode_summary=_episode_summary(state), + durable_findings_summary=_durable_findings_summary(state), + existing_record_manifest=existing_record_manifest or "(none)", + ), + stage="synthesize_records", + progress=progress, + ) + payload = _model_payload(result) + durable_count = len(payload.get("durable_records") or []) + return { + "llm_calls": llm_calls + attempts, + "synthesized": result, + "observations": [ + *retry_observations, + { + "action": "synthesize_records", + "ok": True, + "content": f"durable_records={durable_count}", + "args": {}, + "done": False, + "completion_summary": "", + }, + ], + } + + def persist_records(state: WindowExtractGraphState) -> dict[str, Any]: + """Persist synthesized records and finish the graph.""" + runtime_context.deps.findings_checked = True + observations, done, completion_summary = persist_synthesized_extraction( + state.get("synthesized"), + runtime_context, + ) + if progress: + print(f" baml persist done={done}", flush=True) return { - "observations": [observation_to_state(observation)], - "done": observation.done, - "completion_summary": observation.completion_summary, + "observations": observations, + "done": done, + "completion_summary": completion_summary, } - def should_continue(state: ExtractGraphState) -> str: - """Route back to the LLM until final_result validates.""" - if bool(state.get("done")): - return END - return "llm_call" - - graph = StateGraph(ExtractGraphState) - graph.add_node("llm_call", llm_call) - graph.add_node("tool_node", tool_node) - graph.add_edge(START, "llm_call") - graph.add_edge("llm_call", "tool_node") - graph.add_conditional_edges("tool_node", should_continue, ["llm_call", END]) + def after_scan(state: WindowExtractGraphState) -> str: + """Continue scanning until all trace lines are covered.""" + next_line = int(state.get("next_line") or 1) + total_lines = int(state.get("trace_total_lines") or 0) + if next_line <= total_lines: + return "read_window" + return "synthesize_records" + + graph = StateGraph(WindowExtractGraphState) + graph.add_node("read_window", read_window) + graph.add_node("scan_window", scan_window) + graph.add_node("synthesize_records", synthesize_records) + graph.add_node("persist_records", persist_records) + graph.add_edge(START, "read_window") + graph.add_edge("read_window", "scan_window") + graph.add_conditional_edges( + "scan_window", + after_scan, + ["read_window", "synthesize_records"], + ) + graph.add_edge("synthesize_records", "persist_records") + graph.add_edge("persist_records", END) return graph.compile() +def _trace_line_count(trace_path: Path) -> int: + """Return the number of lines in a trace file.""" + try: + return sum(1 for _ in trace_path.open("r", encoding="utf-8")) + except OSError: + return 0 + + +def _window_char_budget( + *, + state: WindowExtractGraphState, + run_instruction: str, + existing_record_manifest: str, +) -> int: + """Compute how much raw trace text can fit in the next scan window.""" + soft_tokens = int(MODEL_CONTEXT_TOKEN_LIMIT * CONTEXT_SOFT_PRESSURE_PCT) + state_text = "\n".join( + [ + run_instruction, + existing_record_manifest, + _episode_summary(state), + _durable_findings_summary(state), + _implementation_summary(state), + ] + ) + state_tokens = math.ceil(len(state_text) * _TOKENS_PER_CHAR) + available_tokens = max( + MIN_WINDOW_CHARS * _TOKENS_PER_CHAR, + soft_tokens - WINDOW_RESERVE_TOKENS - state_tokens, + ) + return min( + MAX_WINDOW_CHARS, + max(MIN_WINDOW_CHARS, int(available_tokens / _TOKENS_PER_CHAR)), + ) + + +def _read_trace_window( + *, + trace_path: Path, + start_line: int, + total_lines: int, + char_budget: int, +) -> dict[str, Any]: + """Read as many complete trace lines as fit in the character budget.""" + numbered: list[str] = [] + current_chars = 0 + end_line = start_line - 1 + with trace_path.open("r", encoding="utf-8") as handle: + for line_number, raw_line in enumerate(handle, start=1): + if line_number < start_line: + continue + line = raw_line.rstrip("\n") + if len(line) > TRACE_MAX_LINE_BYTES: + dropped = len(line) - TRACE_MAX_LINE_BYTES + line = ( + line[:TRACE_MAX_LINE_BYTES] + + f" ... [truncated {dropped} chars from this line]" + ) + rendered = f"{line_number}\t{line}" + if numbered and current_chars + len(rendered) + 1 > char_budget: + break + numbered.append(rendered) + current_chars += len(rendered) + 1 + end_line = line_number + if current_chars >= char_budget: + break + if not numbered and start_line <= total_lines: + numbered.append(f"{start_line}\t") + end_line = start_line + header = f"[{total_lines} lines, window {start_line}-{end_line}]" + if end_line < total_lines: + header += f" — next window starts at line {end_line + 1}" + return { + "start_line": start_line, + "end_line": end_line, + "header": header, + "text": header + "\n" + "\n".join(numbered), + } + + +def _call_baml_with_retries(call, *, stage: str, progress: bool) -> tuple[Any, list[dict[str, Any]], int]: + """Run one BAML call with graph-visible recoverable retries.""" + observations: list[dict[str, Any]] = [] + attempts = 0 + while True: + attempts += 1 + try: + return call(), observations, attempts + except Exception as exc: + if not _is_recoverable_baml_error(exc) or attempts > MAX_BAML_MODEL_RETRIES: + raise + if progress: + print(f" baml retry {stage} attempt={attempts}", flush=True) + observations.append( + { + "action": "model_retry", + "ok": False, + "content": _model_retry_observation(exc), + "args": {"stage": stage, "attempt": attempts}, + "done": False, + "completion_summary": "", + } + ) + + +def _model_payload(value: Any) -> dict[str, Any]: + """Convert generated BAML objects into plain dictionaries.""" + if hasattr(value, "model_dump"): + return _plain_value(value.model_dump(exclude_none=True)) + if isinstance(value, dict): + return _plain_value( + {key: item for key, item in value.items() if item is not None} + ) + if value is None: + return {} + return _plain_value(getattr(value, "__dict__", {})) + + +def _plain_value(value: Any) -> Any: + """Convert enum-ish values recursively into JSON-like values.""" + enum_value = getattr(value, "value", None) + if enum_value is not None: + return enum_value + if isinstance(value, dict): + return {key: _plain_value(item) for key, item in value.items()} + if isinstance(value, list): + return [_plain_value(item) for item in value] + return value + + +def _episode_summary(state: WindowExtractGraphState) -> str: + """Render compact rolling episode summary.""" + updates = [item for item in state.get("episode_updates", []) if item] + return "\n".join(f"- {item}" for item in updates) or "(none yet)" + + +def _findings_summary(state: WindowExtractGraphState) -> str: + """Render all prior findings for the next scan window.""" + return "\n\n".join( + [ + "Durable findings:\n" + _durable_findings_summary(state), + "Implementation/noise findings:\n" + _implementation_summary(state), + ] + ) + + +def _durable_findings_summary(state: WindowExtractGraphState) -> str: + """Render durable findings compactly for BAML prompts.""" + findings = state.get("durable_findings", []) + if not findings: + return "(none)" + return "\n".join(_format_finding(finding) for finding in findings) + + +def _implementation_summary(state: WindowExtractGraphState) -> str: + """Render implementation findings and discarded noise compactly.""" + parts: list[str] = [] + findings = state.get("implementation_findings", []) + if findings: + parts.append("\n".join(_format_finding(finding) for finding in findings)) + noise = state.get("discarded_noise", []) + if noise: + parts.append("Discarded noise:\n" + "\n".join(f"- {item}" for item in noise)) + return "\n".join(parts) if parts else "(none)" + + +def _format_finding(finding: dict[str, Any]) -> str: + """Render one scan finding as one compact bullet.""" + level = str(finding.get("level") or "").strip() + theme = str(finding.get("theme") or "").strip() + note = str(finding.get("note") or "").strip() + line = finding.get("line") + quote = str(finding.get("quote") or "").strip() + prefix = f"- {level}: {theme}" if level or theme else "-" + details = note + if line: + details += f" (line {line})" + if quote: + details += f" Evidence: {quote}" + return f"{prefix}: {details}".strip() + + def _baml_client_for_model( *, model_name: str, @@ -315,16 +620,18 @@ def _build_run_instruction( project_identity: ProjectIdentity, trace_path: Path, session_started_at: str, + existing_record_manifest: str | None = None, ) -> str: """Build the same extraction task framing used by Lerim's current agent.""" try: trace_line_count = sum(1 for _ in trace_path.open("r", encoding="utf-8")) except OSError: trace_line_count = 0 - existing_record_manifest = _format_existing_record_manifest( - context_db_path=context_db_path, - project_identity=project_identity, - ) + if existing_record_manifest is None: + existing_record_manifest = _format_existing_record_manifest( + context_db_path=context_db_path, + project_identity=project_identity, + ) source_time_text = str(session_started_at or "").strip() or "unknown" prompt = ( "Read the trace, write exactly one episode record, and write only the strongest " @@ -342,69 +649,3 @@ def _build_run_instruction( + (f"\n\n{existing_record_manifest}" if existing_record_manifest else "") ) return prompt - - -def _scratchpad(observations: list[dict[str, Any]], deps: ContextDeps) -> str: - """Render prior actions for the next BAML decision.""" - if not observations: - return "No prior actions." - return "\n\n".join( - format_observation(observation, deps) for observation in observations[-20:] - ) - - -def _runtime_dashboard(deps: ContextDeps, observations: list[dict[str, Any]]) -> str: - """Render the same context-pressure and notes dashboards as the extract agent.""" - scratchpad_chars = sum( - len(format_observation(observation, deps)) for observation in observations - ) - approx_tokens = math.ceil(scratchpad_chars * _TOKENS_PER_CHAR) - pct = approx_tokens / MODEL_CONTEXT_TOKEN_LIMIT - pressure = _classify_context_pressure(pct) - deps.last_context_tokens = approx_tokens - deps.last_context_fill_ratio = pct - context_summary = ( - f"CONTEXT: {approx_tokens}/{MODEL_CONTEXT_TOKEN_LIMIT} ({pct:.0%}) [{pressure}]" - ) - return context_summary + "\n" + _notes_dashboard(deps) - - -def _notes_dashboard(deps: ContextDeps) -> str: - """Render the notes and trace-coverage dashboard used between model turns.""" - findings = deps.notes - if not findings: - summary = "NOTES: 0 findings" - if deps.findings_checked: - summary += " (checkpoint recorded)" - else: - counts = Counter(finding.level for finding in findings) - durable_findings = [ - finding for finding in findings if finding.level in DURABLE_FINDING_LEVELS - ] - theme_source = durable_findings or findings - themes = Counter(finding.theme for finding in theme_source) - durable = sum(counts.get(level, 0) for level in DURABLE_FINDING_LEVELS) - implementation = sum( - counts.get(level, 0) for level in IMPLEMENTATION_FINDING_LEVELS - ) - top_themes = ", ".join( - f"{theme}({count})" for theme, count in themes.most_common(5) - ) - summary = ( - f"NOTES: {len(findings)} findings ({durable} durable, {implementation} implementation) " - f"across {len(themes)} theme(s)" - ) - if top_themes: - summary += f"\nTop themes: {top_themes}" - if deps.read_ranges: - next_uncovered = _first_uncovered_offset( - deps.read_ranges, - int(deps.trace_total_lines), - ) - covered_chunks = len({(int(start), int(end)) for start, end in deps.read_ranges}) - summary += ( - f"\nTrace reads: {covered_chunks} chunk(s)" - f"\nNext unread offset: {next_uncovered if next_uncovered is not None else 'none'}" - f"\nPruned offsets: {sorted(deps.pruned_offsets) if deps.pruned_offsets else 'none'}" - ) - return summary diff --git a/baml_agents/baml_extract_agent/tool_bridge.py b/baml_agents/baml_extract_agent/tool_bridge.py index 0eb8004..30b71a0 100644 --- a/baml_agents/baml_extract_agent/tool_bridge.py +++ b/baml_agents/baml_extract_agent/tool_bridge.py @@ -1,30 +1,33 @@ -"""Bridge BAML-selected actions to Lerim's existing extraction tools.""" +"""Bridge synthesized BAML records to Lerim's canonical extraction tools.""" from __future__ import annotations from dataclasses import dataclass -import inspect import json from pathlib import Path -from typing import Any, Callable +import textwrap +from typing import Any from pydantic_ai import ModelRetry, RunContext from pydantic_ai.models.test import TestModel from pydantic_ai.usage import RunUsage from lerim.agents import tools as extract_tools -from lerim.agents.toolsets import EXTRACT_TOOLS from lerim.agents.tools import ContextDeps -from lerim.agents.tools import TRACE_MAX_LINES_PER_READ, _first_uncovered_offset from lerim.context import ContextStore, ProjectIdentity - - -TOOL_NAMES = tuple(tool.__name__ for tool in EXTRACT_TOOLS) +from lerim.context.spec import ( + MAX_DURABLE_BODY_CHARS, + MAX_EPISODE_BODY_CHARS, + MAX_EPISODE_OUTCOMES_CHARS, + MAX_EPISODE_USER_INTENT_CHARS, + MAX_EPISODE_WHAT_HAPPENED_CHARS, + MAX_RECORD_TITLE_CHARS, +) @dataclass(frozen=True) class ToolObservation: - """Observed result after dispatching one ReAct action.""" + """Observed result after dispatching one persistence action.""" action: str ok: bool @@ -39,21 +42,6 @@ def build_tool_context(deps: ContextDeps) -> RunContext[ContextDeps]: return RunContext(deps=deps, model=TestModel(), usage=RunUsage()) -def tool_manifest() -> str: - """Render the live Lerim extraction tool signatures for the BAML prompt.""" - lines: list[str] = [] - for name in TOOL_NAMES: - func = getattr(extract_tools, name) - signature = _public_signature(func) - doc = inspect.getdoc(func) or "" - first_line = doc.splitlines()[0] if doc else "" - lines.append(f"- {name}{signature}: {first_line}") - lines.append( - "- final_result(completion_summary: str): Finish after exactly one episode exists." - ) - return "\n".join(lines) - - def count_current_session_episodes(deps: ContextDeps) -> int: """Count current-session episode records in the canonical context store.""" store = ContextStore(deps.context_db_path) @@ -70,6 +58,70 @@ def count_current_session_episodes(deps: ContextDeps) -> int: return int(rows.get("count") or 0) +def persist_synthesized_extraction( + synthesized: Any, + ctx: RunContext[ContextDeps], +) -> tuple[list[dict[str, Any]], bool, str]: + """Persist synthesized episode and durable records through the real tools.""" + payload = _tool_payload(synthesized) + completion_summary = str(payload.get("completion_summary") or "").strip() + episode = _prepare_episode(payload.get("episode") or {}, completion_summary) + durable_records = [ + record + for record in (_tool_payload(item) for item in payload.get("durable_records") or []) + if _is_persistable_durable_record(record) + ] + + observations: list[dict[str, Any]] = [] + for index, record in enumerate([episode, *durable_records]): + default_status = "archived" if record.get("kind") == "episode" else "active" + args = _with_defaults(record, {"status": default_status}) + try: + content = _save_context(ctx, args) + observation = ToolObservation( + action="save_context", + ok=True, + content=content, + args=args, + ) + except ModelRetry as exc: + observation = ToolObservation( + action="save_context", + ok=False, + content=f"Tool retry needed: {exc}", + args=args, + ) + except Exception as exc: + observation = ToolObservation( + action="save_context", + ok=False, + content=f"Tool error: {type(exc).__name__}: {exc}", + args=args, + ) + observations.append(observation_to_state(observation)) + if index == 0 and not observation.ok: + break + + episode_count = count_current_session_episodes(ctx.deps) + done = episode_count == 1 + if not completion_summary: + completion_summary = "Extraction completed." + final_observation = ToolObservation( + action="final_result", + ok=done, + content=( + completion_summary + if done + else f"final_result refused: expected exactly one episode record, found {episode_count}." + ), + args={}, + done=done, + completion_summary=completion_summary if done else "", + ) + observations.append(observation_to_state(final_observation)) + return observations, done, completion_summary if done else "" + + def prepare_context_deps( *, context_db_path: Path, @@ -105,90 +157,6 @@ def prepare_context_deps( ) -def execute_step( - step: Any, - ctx: RunContext[ContextDeps], -) -> ToolObservation: - """Dispatch one BAML-selected step to the matching Lerim tool.""" - action = _action_name(getattr(step, "action", "")) - forced_read = _read_next_uncovered_chunk(action, ctx) - if forced_read is not None: - return forced_read - - if action == "final_result": - summary = _final_summary(step) - episode_count = count_current_session_episodes(ctx.deps) - if episode_count != 1: - return ToolObservation( - action=action, - ok=False, - content=( - "final_result refused: expected exactly one episode record " - f"for this session, found {episode_count}." - ), - args={}, - ) - return ToolObservation( - action=action, - ok=True, - content=summary, - args={}, - done=True, - completion_summary=summary, - ) - - args = _args_for_action(step, action) - if args is None: - return ToolObservation( - action=action, - ok=False, - content=f"Missing argument object for action {action}.", - args={}, - ) - - try: - content = _dispatch_tool(action, ctx, args) - except ModelRetry as exc: - content = f"Tool retry needed: {exc}" - return ToolObservation(action=action, ok=False, content=content, args=args) - except Exception as exc: - content = f"Tool error: {type(exc).__name__}: {exc}" - return ToolObservation(action=action, ok=False, content=content, args=args) - return ToolObservation(action=action, ok=True, content=content, args=args) - - -def _read_next_uncovered_chunk( - action: str, - ctx: RunContext[ContextDeps], -) -> ToolObservation | None: - """Force full trace coverage before model-directed non-read actions.""" - if action == "read_trace" or ctx.deps.trace_path is None: - return None - try: - total_lines = sum( - 1 for _ in ctx.deps.trace_path.open("r", encoding="utf-8") - ) - except OSError: - return None - next_offset = _first_uncovered_offset(ctx.deps.read_ranges, total_lines) - if next_offset is None: - return None - args = { - "start_line": next_offset + 1, - "line_count": TRACE_MAX_LINES_PER_READ, - } - try: - content = _read_trace(ctx, args) - except Exception as exc: - return ToolObservation( - action="read_trace", - ok=False, - content=f"Forced trace read failed: {type(exc).__name__}: {exc}", - args=args, - ) - return ToolObservation(action="read_trace", ok=True, content=content, args=args) - - def observation_to_state(observation: ToolObservation) -> dict[str, Any]: """Convert a tool observation into serializable graph state.""" return { @@ -201,64 +169,50 @@ def observation_to_state(observation: ToolObservation) -> dict[str, Any]: } -def format_observation(observation: dict[str, Any], deps: ContextDeps) -> str: - """Format a tool result as compact scratchpad text for the next BAML call.""" - action = str(observation.get("action") or "") - status = "ok" if bool(observation.get("ok")) else "error" - content = _pruned_content(observation, deps) - return f"Action: {action}\nStatus: {status}\nObservation:\n{content}" - - -def _dispatch_tool( - action: str, - ctx: RunContext[ContextDeps], - args: dict[str, Any], -) -> str: - """Call the raw Lerim tool function for one normalized action.""" - handlers: dict[str, Callable[[RunContext[ContextDeps], dict[str, Any]], str]] = { - "read_trace": _read_trace, - "search_context": _search_context, - "get_context": _get_context, - "save_context": _save_context, - "revise_context": _revise_context, - "note_trace_findings": _note_trace_findings, - "prune_trace_reads": _prune_trace_reads, - } - handler = handlers.get(action) - if handler is None: - return f"Unknown action: {action}" - return handler(ctx, args) - - -def _read_trace(ctx: RunContext[ContextDeps], args: dict[str, Any]) -> str: - """Call read_trace with defaulted numeric arguments.""" - return extract_tools.read_trace( - ctx, - start_line=int(args.get("start_line") or 1), - line_count=int(args.get("line_count") or extract_tools.TRACE_MAX_LINES_PER_READ), +def _prepare_episode(value: Any, completion_summary: str) -> dict[str, Any]: + """Normalize a synthesized episode draft into a valid save_context payload.""" + episode = _tool_payload(value) + episode["kind"] = "episode" + if not str(episode.get("title") or "").strip(): + episode["title"] = _episode_title_from_payload(episode, completion_summary) + if not str(episode.get("user_intent") or "").strip(): + episode["user_intent"] = "Extract context from the source trace." + if not str(episode.get("what_happened") or "").strip(): + fallback = ( + str(episode.get("body") or "").strip() + or completion_summary + or "The trace was scanned and summarized for context extraction." + ) + episode["what_happened"] = fallback + if not str(episode.get("body") or "").strip(): + episode["body"] = _episode_body_from_structured_fields(episode) + episode["title"] = _compact_text(episode.get("title"), MAX_RECORD_TITLE_CHARS) + episode["user_intent"] = _compact_text( + episode.get("user_intent"), + MAX_EPISODE_USER_INTENT_CHARS, ) - - -def _search_context(ctx: RunContext[ContextDeps], args: dict[str, Any]) -> str: - """Call search_context with only its supported arguments.""" - return extract_tools.search_context( - ctx, - query=str(args.get("query") or ""), - kind=args.get("kind"), - status=args.get("status"), - valid_at=args.get("valid_at"), - include_archived=bool(args.get("include_archived") or False), - limit=int(args.get("limit") or 8), + episode["what_happened"] = _compact_text( + episode.get("what_happened"), + MAX_EPISODE_WHAT_HAPPENED_CHARS, ) - - -def _get_context(ctx: RunContext[ContextDeps], args: dict[str, Any]) -> str: - """Call get_context with BAML-provided record IDs.""" - return extract_tools.get_context( - ctx, - record_ids=list(args.get("record_ids") or []), - include_versions=bool(args.get("include_versions") or False), - detail=str(args.get("detail") or "detailed"), + episode["outcomes"] = _compact_optional_text( + episode.get("outcomes"), + MAX_EPISODE_OUTCOMES_CHARS, + ) + episode["body"] = _compact_text(episode.get("body"), MAX_EPISODE_BODY_CHARS) + return episode + + +def _is_persistable_durable_record(record: dict[str, Any]) -> bool: + """Return whether a synthesized durable record is complete enough to save.""" + kind = str(record.get("kind") or "").strip().lower() + if not kind or kind == "episode": + return False + record["title"] = _compact_text(record.get("title"), MAX_RECORD_TITLE_CHARS) + record["body"] = _compact_text(record.get("body"), MAX_DURABLE_BODY_CHARS) + return bool( + str(record.get("title") or "").strip() + and str(record.get("body") or "").strip() ) @@ -267,103 +221,13 @@ def _save_context(ctx: RunContext[ContextDeps], args: dict[str, Any]) -> str: return extract_tools.save_context(ctx, **_with_defaults(args, {"status": "active"})) -def _revise_context(ctx: RunContext[ContextDeps], args: dict[str, Any]) -> str: - """Call revise_context with a complete replacement payload.""" - return extract_tools.revise_context(ctx, **args) - - -def _note_trace_findings(ctx: RunContext[ContextDeps], args: dict[str, Any]) -> str: - """Call note_trace_findings, allowing the no-findings checkpoint form.""" - if not any(args.get(name) for name in ("theme", "line", "quote")): - return extract_tools.note_trace_findings(ctx) - return extract_tools.note_trace_findings( - ctx, - theme=str(args.get("theme") or ""), - line=args.get("line") or 0, - quote=str(args.get("quote") or ""), - level=str(args.get("level") or "implementation"), - ) - - -def _prune_trace_reads(ctx: RunContext[ContextDeps], args: dict[str, Any]) -> str: - """Call prune_trace_reads with the start-line list.""" - return extract_tools.prune_trace_reads( - ctx, - start_lines=[int(value) for value in args.get("start_lines") or []], - ) - - -def _pruned_content(observation: dict[str, Any], deps: ContextDeps) -> str: - """Return a read_trace stub when that chunk has been pruned.""" - action = str(observation.get("action") or "") - if action != "read_trace": - return str(observation.get("content") or "") - args = observation.get("args") if isinstance(observation.get("args"), dict) else {} - offset = max(0, int(args.get("start_line") or 1) - 1) - if offset in deps.pruned_offsets: - return "[pruned]" - return str(observation.get("content") or "") - - -def _args_for_action(step: Any, action: str) -> dict[str, Any] | None: - """Return the BAML argument object matching an action.""" - field_name = action - payload = getattr(step, field_name, None) - if payload is None: - return None - if hasattr(payload, "model_dump"): - return _coerce_tool_value(payload.model_dump(exclude_none=True)) - if isinstance(payload, dict): - return _coerce_tool_value( - {key: value for key, value in payload.items() if value is not None} - ) - return _coerce_tool_value( - json.loads(json.dumps(payload, default=lambda value: value.__dict__)) - ) - - -def _action_name(action: Any) -> str: - """Normalize a BAML enum value into a Lerim tool name.""" - raw = str(getattr(action, "value", action) or "").strip() - aliases = { - "READ_TRACE": "read_trace", - "SEARCH_CONTEXT": "search_context", - "GET_CONTEXT": "get_context", - "SAVE_CONTEXT": "save_context", - "REVISE_CONTEXT": "revise_context", - "NOTE_TRACE_FINDINGS": "note_trace_findings", - "PRUNE_TRACE_READS": "prune_trace_reads", - "FINAL_RESULT": "final_result", - } - return aliases.get(raw, raw.lower()) - - -def _final_summary(step: Any) -> str: - """Extract final_result.completion_summary from a generated BAML step.""" - payload = getattr(step, "final_result", None) - if payload is None: - return "" - return str(getattr(payload, "completion_summary", "") or "").strip() - - -def _with_defaults( - args: dict[str, Any], defaults: dict[str, Any] -) -> dict[str, Any]: +def _with_defaults(args: dict[str, Any], defaults: dict[str, Any]) -> dict[str, Any]: """Fill omitted optional tool arguments with Lerim's defaults.""" payload = dict(defaults) payload.update(args) return payload -def _public_signature(func: Callable[..., str]) -> str: - """Return a tool signature without the PydanticAI context parameter.""" - signature = inspect.signature(func) - params = list(signature.parameters.values()) - if params and params[0].name == "ctx": - params = params[1:] - return "(" + ", ".join(str(param) for param in params) + ")" - - def _coerce_tool_value(value: Any) -> Any: """Convert generated BAML enum values into plain JSON-like values.""" enum_value = getattr(value, "value", None) @@ -374,3 +238,65 @@ def _coerce_tool_value(value: Any) -> Any: if isinstance(value, list): return [_coerce_tool_value(item) for item in value] return value + + +def _tool_payload(value: Any) -> dict[str, Any]: + """Return a plain dict from a generated BAML/Pydantic-ish object.""" + if hasattr(value, "model_dump"): + return _coerce_tool_value(value.model_dump(exclude_none=True)) + if isinstance(value, dict): + return _coerce_tool_value( + {key: item for key, item in value.items() if item is not None} + ) + if value is None: + return {} + return _coerce_tool_value( + json.loads(json.dumps(value, default=lambda item: item.__dict__)) + ) + + +def _episode_body_from_structured_fields(episode: dict[str, Any]) -> str: + """Build an episode body when synthesis provided structured fields only.""" + user_intent = str(episode.get("user_intent") or "").strip() + what_happened = str(episode.get("what_happened") or "").strip() + outcomes = str(episode.get("outcomes") or "").strip() + parts = [] + if user_intent: + parts.append(f"User intent: {user_intent}") + if what_happened: + parts.append(f"What happened: {what_happened}") + if outcomes: + parts.append(f"Outcome: {outcomes}") + return " ".join(parts) or "The session was scanned and summarized for context extraction." + + +def _episode_title_from_payload(episode: dict[str, Any], completion_summary: str) -> str: + """Derive a compact episode title from available episode text.""" + candidates = [ + episode.get("user_intent"), + episode.get("what_happened"), + episode.get("outcomes"), + completion_summary, + episode.get("body"), + ] + for candidate in candidates: + text = str(candidate or "").strip() + if text: + return text[:80].rstrip(" .") or "Extracted session" + return "Extracted session" + + +def _compact_text(value: Any, max_chars: int) -> str: + """Return non-empty text that fits the canonical record field budget.""" + text = " ".join(str(value or "").split()) + if not text: + return "" + if len(text) <= max_chars: + return text + return textwrap.shorten(text, width=max_chars, placeholder="...") + + +def _compact_optional_text(value: Any, max_chars: int) -> str | None: + """Return optional compact text, preserving None for empty values.""" + text = _compact_text(value, max_chars) + return text or None diff --git a/baml_agents/baml_src/extract_react.baml b/baml_agents/baml_src/extract_react.baml index 8d44c9b..fbdc616 100644 --- a/baml_agents/baml_src/extract_react.baml +++ b/baml_agents/baml_src/extract_react.baml @@ -1,47 +1,9 @@ -retry_policy ExtractAgentRetry { - max_retries 1 - strategy { - type exponential_backoff - delay_ms 500 - multiplier 2 - max_delay_ms 8000 - } -} - -client OllamaGemma4E4B { - provider "openai-generic" - retry_policy ExtractAgentRetry - options { - base_url "http://127.0.0.1:11434/v1" - model "gemma4:e4b" - temperature 0.0 - http { - connect_timeout_ms 10000 - time_to_first_token_timeout_ms 120000 - idle_timeout_ms 30000 - request_timeout_ms 300000 - } - } -} - -enum ExtractAction { - READ_TRACE @alias("read_trace") @description("Read the next numbered trace chunk.") - SEARCH_CONTEXT @alias("search_context") @description("Search existing DB-backed context by meaning.") - GET_CONTEXT @alias("get_context") @description("Fetch full context records by record ID before any revision.") - SAVE_CONTEXT @alias("save_context") @description("Create one episode or durable context record.") - REVISE_CONTEXT @alias("revise_context") @description("Revise a fetched context record with a complete improved payload.") - NOTE_TRACE_FINDINGS @alias("note_trace_findings") @description("Checkpoint durable or implementation findings from a long trace.") - PRUNE_TRACE_READS @alias("prune_trace_reads") @description("Prune earlier trace chunks after findings are captured.") - FINAL_RESULT @alias("final_result") @description("Finish only after exactly one current-session episode record exists.") -} - enum RecordKind { DECISION @alias("decision") @description("A durable project decision with decision and why fields.") PREFERENCE @alias("preference") @description("A stable user or workflow preference.") CONSTRAINT @alias("constraint") @description("A durable invariant, limit, or must/cannot rule.") FACT @alias("fact") @description("A durable project fact or setup truth.") REFERENCE @alias("reference") @description("A pointer to an external source of truth.") - EPISODE @alias("episode") @description("The mandatory per-session episode record; use this for session summaries.") } enum RecordStatus { @@ -59,862 +21,264 @@ enum FindingLevel { IMPLEMENTATION @alias("implementation") @description("Trace-local implementation evidence or discarded hypothesis.") } -class ReadTraceArgs { - start_line int? - line_count int? -} - -class SearchContextArgs { - query string @description("Natural-language query for the durable meaning, not '*' or an empty browse request.") - kind RecordKind? @description("Optional filter. Use only when the desired record kind is known.") - status RecordStatus? @description("Optional lifecycle filter.") - valid_at string? @description("Optional timestamp for historical lookup.") - include_archived bool? @description("Whether archived records should be included.") - limit int? @description("Maximum hits to return.") +class TraceWindowFinding { + theme string @description("Short stable theme for this finding.") + level FindingLevel @description("Use durable levels for reusable context, implementation for local/noisy evidence.") + line int? @description("1-based supporting line when the window gives one.") + quote string? @description("Short supporting quote from the current window.") + note string @description("Compact semantic finding. Avoid command logs and copied errors.") } -class GetContextArgs { - record_ids string[] @description("Record IDs returned by search_context or list_context. Fetch before any revision.") - include_versions bool? @description("Whether to include prior versions.") - detail "concise" | "detailed"? @description("Use concise unless full version history is needed.") +class TraceWindowScan { + episode_update string? @description("Compact update for the final episode summary. May be omitted when this window adds nothing.") + durable_findings TraceWindowFinding[] @description("Reusable decisions, preferences, constraints, facts, and references found in this window.") + implementation_findings TraceWindowFinding[] @description("Implementation evidence, discarded hypotheses, and local details useful only as support/noise.") + discarded_noise string[] @description("Short descriptions of noisy categories intentionally not saved.") } -class SaveContextArgs { - kind RecordKind @description("Use episode for the mandatory session summary; use durable kinds only for reusable project context.") - title string @description("Short standalone title for the lasting point or episode.") - body string @description("Compact standalone body. Do not include trace-local command logs or copied errors.") - status RecordStatus? @description("Use archived for routine episodes with no durable signal; active for reusable durable records.") - valid_from string? @description("Optional validity start timestamp.") - valid_until string? @description("Optional validity end timestamp.") - decision string? @description("Required only for kind=decision; omit for facts, preferences, constraints, references, and episodes.") - why string? @description("Required only for kind=decision when rationale exists; omit when the trace says no durable rationale was supplied.") - alternatives string? @description("Optional decision alternatives. Omit for non-decisions.") - consequences string? @description("Optional direct application guidance for the same durable point.") - user_intent string? @description("Required when kind=episode. Concise statement of what the user wanted in this source session.") - what_happened string? @description("Required when kind=episode. Concise statement of what the session actually did.") - outcomes string? @description("Optional concise episode outcome.") +class EpisodeDraft { + title string? @description("Optional short title for the current-session episode. Runtime derives one if omitted.") + body string? @description("Compact episode body. If omitted, runtime builds it from user_intent and what_happened.") + status RecordStatus? @description("Use archived for routine/no-durable sessions; active only when the episode itself remains useful.") + user_intent string? @description("What the user wanted in this source session. Runtime fills a generic fallback if omitted.") + what_happened string? @description("What the session actually did. Runtime fills a generic fallback if omitted.") + outcomes string? @description("Optional concise outcome.") } -class ReviseContextArgs { - record_id string - reason string @description("Short reason for the revision.") - kind RecordKind @description("Must match the fetched record kind; revise_context cannot change kind.") - title string @description("Complete improved title.") - body string @description("Complete improved body.") - status RecordStatus? @description("Lifecycle status to keep or set.") +class DurableRecordDraft { + kind RecordKind @description("Durable record kind.") + title string @description("Short standalone durable title.") + body string @description("Compact standalone durable body.") + status RecordStatus? @description("Usually active for reusable durable records.") valid_from string? valid_until string? decision string? why string? alternatives string? consequences string? - user_intent string? - what_happened string? - outcomes string? } -class NoteTraceFindingsArgs { - theme string? @description("Short theme label for this finding. Leave empty only for the no-findings checkpoint.") - line int? @description("1-based trace line with supporting evidence. Leave empty only for the no-findings checkpoint.") - quote string? @description("Short evidence quote from the trace. Leave empty only for the no-findings checkpoint.") - level FindingLevel? @description("Durable levels only for reusable context; implementation for trace-local evidence.") +class SynthesizedExtraction { + episode EpisodeDraft @description("Exactly one current-session episode record draft.") + durable_records DurableRecordDraft[] @description("Zero or more durable records.") + completion_summary string? @description("Brief summary of extraction work for final_result/reporting.") } -class PruneTraceReadsArgs { - start_lines int[] @description("1-based start_line values from earlier read_trace calls, such as 1, 101, or 201. Never use context token counts.") -} +function ScanTraceWindow( + run_instruction: string, + prior_episode_summary: string, + prior_findings_summary: string, + trace_window: string +) -> TraceWindowScan { + client MiniMaxM27 + prompt #" + {{ _.role("system") }} + You scan one window from a coding-agent trace for Lerim extraction. + Return only structured output. Do not save records and do not plan future tool calls. + Do not include tags, hidden reasoning, markdown, or prose. + The top-level output must include episode_update, durable_findings, implementation_findings, and discarded_noise. + Use an empty string or empty list when a field has no content. + Every durable_findings and implementation_findings item must be an object with: + - theme + - level: one of decision, preference, feedback, reference, constraint, fact, or implementation + - note + - optional line + - optional quote + Never return finding items as plain strings. + Never use confidence labels such as high, medium, or low as finding levels. + + Separate: + - episode_update: what happened in this session window, for the final episode record. + - durable_findings: reusable project/user context only. + - implementation_findings: local evidence, command work, discarded hypotheses, or support. + - discarded_noise: categories of content intentionally ignored. + + Durable signal means a decision, preference, constraint, fact, or reference likely useful beyond this trace. + Implementation detail alone is not durable signal. + A durable finding should help a future agent make a better decision in a new chat. + Save what a future agent would be relieved to already know: stable user preferences, chosen policies, durable constraints, source-of-truth references, and stable project or product truths. + If a detail only helps replay or debug this trace's terminal session, put it in implementation_findings or discarded_noise, not durable_findings. + Raw metrics, comparison tables, config edits, API wiring, package installation, debug commands, file paths, and stack traces are implementation_findings unless the user turns them into a reusable policy, preference, constraint, fact, or source-of-truth reference. + When the same topic has both user-level guidance and technical evidence, the user-level guidance is the durable finding; the technical evidence is implementation support. + For technical blockers, keep durable_findings at the capability, status, and implication level only. Put patch recipes, flags, functions, commands, exact metrics, and local debug steps in implementation_findings. + When a window mixes a durable point with local evidence, put only the reusable point in durable_findings and put the supporting local evidence in implementation_findings. + Use semantic meaning, not exact wording. Do not copy long code, logs, stack traces, or command transcripts. + If the window has no durable signal, return an empty durable_findings list. + If the window adds nothing meaningful to the episode, episode_update may be an empty string. -class FinalResultArgs { - completion_summary string -} + {{ _.role("user") }} + RUN INSTRUCTION: + {{ run_instruction }} -class ExtractAgentStep { - action ExtractAction - rationale string @description("One short sentence explaining why this is the next action. Do not include hidden chain-of-thought.") - read_trace ReadTraceArgs? - search_context SearchContextArgs? - get_context GetContextArgs? - save_context SaveContextArgs? - revise_context ReviseContextArgs? - note_trace_findings NoteTraceFindingsArgs? - prune_trace_reads PruneTraceReadsArgs? - final_result FinalResultArgs? + PRIOR EPISODE SUMMARY: + {{ prior_episode_summary }} + + PRIOR FINDINGS SUMMARY: + {{ prior_findings_summary }} + + TRACE WINDOW: + {{ trace_window }} + + {{ ctx.output_format }} + "# } -function DecideNextExtractStep( - runtime_dashboard: string, +function SynthesizeExtractRecords( run_instruction: string, - tool_manifest: string, - scratchpad: string -) -> ExtractAgentStep { - client OllamaGemma4E4B + episode_summary: string, + durable_findings_summary: string, + existing_record_manifest: string +) -> SynthesizedExtraction { + client MiniMaxM27 prompt #" {{ _.role("system") }} - - You are the Lerim extract agent. - Read one coding-agent trace, compress its signal, and write DB-backed context records. - - - - - Create exactly one episode record for the session. - - Create zero or more durable records only when the trace contains durable signal. - - The episode record is mandatory for every session, even if you also create or update durable records. - - Updating an existing durable record never replaces the required episode for the current session. - - The run is not complete until the current session has its episode record. - - Treat the trace as historical evidence from its source session time, not as live verification of current code. - - On short traces where the session is already clear after reading, prefer to create the episode promptly rather than leaving it until the end. - - Episode records must include `user_intent` and `what_happened`; do not put the whole episode only in `body`. - - Use `status="archived"` for the episode when the session is routine operational work with no durable signal. Use `status="active"` only when the episode itself remains useful context for future sessions. - - - - Durable signal means one of: - - decision - - preference - - constraint - - fact - - reference - - Implementation detail alone is not durable signal. - A temporary code-state finding, audit observation, open task, or release-risk report is not durable by itself. Promote it only when the trace establishes a reusable project rule, unresolved constraint, stable dependency, or standing source of truth. - - - + You synthesize final Lerim context records from scanned trace findings. + Return only structured output. + Do not include tags, hidden reasoning, markdown, or prose. + The top-level output must include episode, durable_records, and completion_summary. + Use an empty durable_records list when there is no durable signal. + Every durable_records item must be an object with kind, title, and body. + Optional durable record fields are status, valid_from, valid_until, decision, why, alternatives, and consequences. + Never return durable_records items as plain strings or as generic type/record objects. + + Create exactly one episode record for the current session. + Create durable records only for reusable decisions, preferences, constraints, facts, and references. + The episode says what the session did. Durable records say what future sessions should reuse. + Before creating each durable record, ask: would this change what a future agent does in a new chat? + Prefer the small set of memories a future agent would be relieved to already know over exhaustive coverage of what happened. + Keep durable records compact, standalone, and deduplicated. + Do not create durable records for command logs, patch steps, generic programming knowledge, temporary diagnostics, or local implementation chatter. + If there is no reusable signal, durable_records must be empty and the episode should normally be archived. + Decision records must include decision and why when the rationale is present; use fact when there is no durable why. + Episode records should include user_intent and what_happened when available; the runtime can derive missing episode fields. + Treat the trace as historical evidence, not live verification of the current repo. + + Existing records, if listed, are only duplicate-risk context. Prefer skipping near-duplicates over creating duplicate durable records. + + Quality bar: - Store the reusable rule, decision, invariant, dependency, preference, or external pointer, not the story of the session. - One durable record should hold one durable point. - Direct consequences and application guidance usually stay inside that same record. - Create the minimum number of durable records that preserves distinct durable meanings. Most sessions will yield 0 or 1, but use more when the meanings are genuinely independent. - Classify each durable point into one canonical kind. Prefer the most specific supported kind, and do not duplicate the same point across multiple durable kinds. - Duplicates are worse than gaps. Skip uncertain candidates rather than spraying near-duplicates. - - Never create a second durable record in the same run for the same core claim. If you realize the first draft needs improvement, update or refine that record instead of creating another one. - - `constraint` and `reference` are first-class durable record kinds, not fallback categories. - + - Never create a second durable record in the same run for the same core claim. + - constraint and reference are first-class durable record kinds, not fallback categories. - + What not to save: - patch logs, command sequences, retries, timelines, or meeting-style recaps - code structure, file paths, git history, or storage mechanics by themselves - generic programming knowledge or facts already obvious from the repo - rejected lures, discarded explanations, or implementation-only distractions - - one-run validation findings, approval flow, queue state, DB resets, rebuilds, or - runtime diagnostics by themselves - - - - - Read the trace in chunks until the full trace is covered. Do not start writing while unread trace lines remain. - - Use the findings scratchpad for evidence from chunks you have already read. Notes are summarized back to you on later turns; do not record the same point again unless you learned something new. - - Keep each durable theme and its supporting implementation evidence together. Do not record a rejected lure or discarded explanation as its own durable finding/theme. - - If one apparent finding only applies, routes, or operationalizes another finding, keep them as one durable theme instead of separate durable themes. - - If the trace needs more than one read, call `note_trace_findings` once per useful finding with theme, line, quote, and level before saving or revising context. Call it with no arguments when the full trace has no reusable signal. - - If you read many chunks, prune older read results only after those chunks have already been captured in notes. - - Search existing context before creating a durable record whenever the trace suggests an earlier record, duplicate risk, or "same meaning vs new meaning" judgment. - - The injected existing-record manifest is only a shortlist. It is never enough evidence for a revision. - - Fetch full records before any revision, and fetch each plausible target when several nearby records could match. - - Revise only when a fetched record clearly carries the same meaning and needs repair. If the core claim differs, create a new record instead. - - When the trace says an existing durable rule is correct but needs tightening, clarification, or a better why, fetch that record and update it rather than leaving the weaker wording unchanged. - - Avoid cosmetic same-run revisions. Revise a same-run record only to fix a concrete durable-context error or prevent a duplicate. - - - - - The system may inject `CONTEXT:` messages showing approximate context pressure. At soft or hard pressure, prune old trace chunks after their findings are captured. - - The system may inject `NOTES:` messages summarizing findings and trace coverage. Use them as a progress dashboard, not as a replacement for reading unread trace lines. - - The findings scratchpad writes the dashboard for future turns; do not try to reread the dashboard with tools. - - - + - one-run validation findings, approval flow, queue state, DB resets, rebuilds, or runtime diagnostics by themselves + + Selection rules: - First separate findings into durable signal and implementation evidence. + - Prefer user-level guidance, chosen operating policy, and stable project truth over the technical evidence that revealed them. - Synthesize at the theme level. Usually one theme becomes one durable record. - Create multiple durable records only when the trace establishes multiple independent durable points, each with its own support. - Do not store one durable point as both a preference and a decision, a fact and a decision, or any other cross-kind duplicate. - A stable workflow preference is not also a decision unless the trace separately states an explicit project decision with rationale. - A dependency, setup, or environment truth without durable rationale is a fact, not also a decision. - - A failure caused by the current run's temporary validation setup is not itself a - durable environment truth. If the investigation reveals a stable requirement that - future sessions must apply, save that requirement as the durable point and omit - the temporary validation story. + - A failure caused by the current run's temporary validation setup is not itself a durable environment truth. If the investigation reveals a stable requirement that future sessions must apply, save that requirement as the durable point and omit the temporary validation story. - Merge candidates when one only states how to apply the other in local operations, routing, or ownership. - If two candidates share the same core claim, merge them. - If the difference is only evidence framing, symptom wording, or local-vs-CI phrasing around the same durable fact, keep one record and fold the extra context into it. - If one candidate is only the direct application or routing consequence of another, keep it inside the stronger record. - - Storage boundary plus per-component routing is one decision, not two. Keep the boundary as the record and fold the routing guidance into the same title/body. - If one candidate only says how different local components should apply the same project rule, keep that guidance inside the main record rather than creating a second durable record. - If one candidate only restates where local project components live or how an internal architecture is applied, keep it inside the stronger decision, fact, or constraint instead of creating a separate reference. - - If the trace gives one durable rule plus examples of local noise or discarded details, store only the durable rule. The filtering guidance is evidence, not a second record. - - Do not create a durable record whose whole point is that some local details from this trace were noise, low value, or should not be remembered. That is extraction guidance for this run, not project context. + - If the trace gives one durable rule plus examples of local noise or discarded details, store only the durable rule. + - If a technical blocker is reusable, record only the capability affected, status or source of truth, and implication for future work. + - Do not create a durable record whose whole point is that some local details from this trace were noise, low value, or should not be remembered. - Store durable records only when the lesson is likely reusable beyond this trace. - If a candidate is mainly about this trace's commands, files, or timeline, reject it. - Trace-local instructions about what to ignore in this session are not preferences unless they clearly express a broader standing workflow rule for future sessions. - - If the trace explicitly says the rationale is unknown or says not to invent one, do not create a `decision`; use `fact` instead. - - A stable setup, dependency, or environment requirement without a durable why is a `fact` even if it sounds like the current chosen setup. + - If the trace explicitly says the rationale is unknown or says not to invent one, do not create a decision; use fact instead. + - A stable setup, dependency, or environment requirement without a durable why is a fact even if it sounds like the current chosen setup. - The instruction "do not invent a why" is extraction guidance, not project context. - - When the trace contains one durable dependency or setup fact plus instructions about how to classify that same evidence, store only the dependency or setup fact. Do not turn the classification guidance into a separate `preference`. - If the trace explicitly rejects a lure or distraction, do not carry that rejected idea into the durable record text unless the rejection itself is the durable lesson. - - If this older trace conflicts with newer existing active records, do not create a new active durable record for the older claim. Preserve the historical session in the episode and let the newer active record remain current. - If a long noisy investigation resolves into one source-of-truth boundary, store only that boundary. Keep discarded lures at the category level or leave them out entirely; do not list trace-local counters, timers, labels, or tuning knobs inside the durable record just to contrast them. - When a discarded lure matters as evidence, keep it attached to the main durable theme as implementation context rather than storing it as a second durable theme. - If the episode summary contains clearly reusable decision, preference, constraint, fact, or reference, that point should usually also exist as its own durable record. - - Do not leave a clearly reusable rule, invariant, dependency, source-of-truth pointer, or stable preference only inside the episode. The episode says what happened; the durable record stores what future sessions should reuse. - - Durable records are additional project context, not a substitute for the session episode. Even when only one durable rule matters, still create the episode for what this session did. - + - Do not leave a clearly reusable rule, invariant, dependency, source-of-truth pointer, or stable preference only inside the episode. + - Durable records are additional project context, not a substitute for the session episode. + - Most traces should produce only a few durable records; create more only when each one would independently change future behavior. - + Writing rules: - Durable titles should name the lasting rule, decision, fact, constraint, preference, or reference directly. - Durable bodies should be compact, neutral, and standalone. - - When a durable decision prohibits or routes a named interface, data path, dependency, provider, or boundary, preserve that named subject in the record instead of replacing it with a broader abstraction. - - Prefer this shape for durable records: - 1. the durable point - 2. why it matters - 3. how to apply it later + - Prefer this shape for durable records: the durable point, why it matters, and how to apply it later. - Do not write durable records as meeting minutes, patch logs, or cleanup commentary. - Do not preserve trace-local commands, negotiation phrasing, or "this is not about X" sentences in final record text. + - Do not mention discarded implementation noise in durable record fields, including consequences. + - Do not include patch recipes, flags, function names, exact metrics, or local debug steps in durable record text. - Do not write a durable record whose body is mainly a warning that certain local details, cleanups, or implementation noise should be ignored. - - Do not mention discarded implementation noise in durable record fields, including `consequences`. If details are non-durable, omit them entirely rather than saying they are non-durable. - - When the durable lesson is a source-of-truth rule, write the authoritative rule directly. Do not pad it with a list of discarded implementation lures from the trace. + - When the durable lesson is a source-of-truth rule, write the authoritative rule directly. - If a short contrast is still helpful, keep it abstract, such as "not worker-local state" or "not ephemeral local state". Do not enumerate examples in parentheses or comma-separated lists. - - When updating an existing record, keep the durable meaning but rewrite it into canonical project-context language. - - When writing from a historical trace, word durable records as source-backed context, not as freshly verified code inspection. Do not imply that a bug, missing capability, or release blocker is current unless the trace itself establishes that it remains unresolved as durable project context. + - When writing from a historical trace, word durable records as source-backed context, not as freshly verified code inspection. - Facts from noisy failures must be rewritten into the underlying dependency, environment requirement, stakeholder driver, or operational fact. - If a fact still reads like stderr, an exception symptom, or copied command output, rewrite it again before writing. - When the durable lesson is an environment or dependency requirement, do not center the fact on the observed failure symptom. Name the requirement directly and mention the symptom only if it is needed as brief supporting context. - If brief supporting context is useful, lead with the requirement and keep the symptom generic. Never include exception class names, quoted error fragments, or copied failure strings in the durable fact. - - If the candidate is mainly "this validation run failed until we changed the setup", - it belongs in the archived episode. If the candidate names a reusable setup or - runtime requirement discovered through that validation, keep the requirement and - drop the failure narrative. + - If the candidate is mainly "this validation run failed until we changed the setup", it belongs in the archived episode. If the candidate names a reusable setup or runtime requirement discovered through that validation, keep the requirement and drop the failure narrative. - When no durable rationale exists, do not spend the fact body explaining that the rationale is absent. Just state the stable dependency, setup requirement, or operational truth directly. - Do not quote or paraphrase trace instructions about how to classify the evidence inside the final fact body. Final fact text should describe the underlying truth, not the extraction rule you followed. - References must answer both "where should future sessions look?" and "when should they consult it?" - - Do not use `reference` for internal file mappings, local storage boundaries, or repo architecture notes when the durable lesson is the project rule itself rather than "consult this external source next time." - - Keep the episode concise: short title, short body, concise `user_intent`, `what_happened`, and `outcomes`. - - If the session is mostly routine operational work with little future value and no durable record, create the episode with `status="archived"`. - - - - - Stable workflow guidance from the user. Save corrections and confirmed non-obvious working style that should carry into future sessions. - Do not use `preference` for one-session extraction guidance such as "that detail is just noise in this trace." - - - A chosen approach or project rule that future work should follow and that is not obvious from code alone. - If the trace does not support a durable why, do not use `decision`. - - - A durable invariant, limit, or must/cannot rule that future work must respect. - - - A durable project fact such as a dependency, environment requirement, stakeholder driver, or other non-obvious truth. - Use `fact` for stable setup or dependency truths when the trace explicitly says not to invent decision rationale. - - - A pointer to an external dashboard, document, ticket system, or other source of truth outside the repo. - Use `reference` only when the enduring value is where to look later. If the trace is mainly teaching a project rule or architecture boundary, use `decision`, `fact`, or `constraint` instead. - - - - - - - - assistant patches a bug and writes a tidy summary - - user: "The diff is enough. Don't end with a recap every time." - - later turns continue with normal edits, tests, and review comments - - - Create one preference record about keeping replies terse and not appending redundant change recaps. - - - Store the file edit itself, or treat the correction as only a one-session scratch finding when it is clearly stable workflow guidance. - - - - - - - early turns discuss local refactors, temporary debug prints, and a flaky test - - midway, several ideas are tried and discarded - - late in the trace the user settles the architecture: durable project context lives in one store; hot runtime/session state lives in another - - the follow-on routing guidance is just how to apply that boundary - - - Create the required episode for the session and one decision record for the storage boundary. Keep the routing guidance inside the same record instead of splitting it into a second record. - - - Store the refactor noise, split one architectural choice into two near-duplicate records such as one decision for the boundary and a second local-use record for which component reads which store, or create a separate durable record whose only message is that the refactors and debug edits were noise. - - - - - - - the user makes one architectural choice, such as keeping durable context and hot operational state in separate stores - - the trace also mentions variable renames, label tweaks, temporary debug prints, and similar low-value cleanups - - the user explicitly says those local edits should not become durable context - - - Create the required episode and one durable record for the architectural choice only. Treat the explicit "those edits are just noise" instruction as extraction guidance for this run, not as its own record. - - - Create a second durable record whose message is that renames, label tweaks, or temporary debug code are non-durable, or let that noise-filtering instruction replace the required episode. - - - - - - - repeated failed commands and partial theories about why a media workflow is broken - - some guesses are ruled out - - the stable conclusion is operational: environments that run this workflow need a specific system dependency installed - - - Create one fact record for the dependency requirement in clean operational language. Lead with the missing dependency or environment requirement, and if you mention the failure at all, keep it generic rather than naming the exact exception class or copied command output. Still create the required episode for this session. - - - Store the raw exception text, center the record on the failure symptom, split one operational lesson into separate local-vs-CI facts, create a second durable record whose message is "do not invent a rationale here," keep the command history or debugging timeline, or write only the fact and skip the episode. - - - - - - - the user states one stable dependency or setup truth - - nearby turns add extraction guidance such as "this is a fact, not a decision" or "do not invent a why beyond the dependency" - - no broader workflow rule for future sessions is established - - - Create the required episode and one fact record for the stable dependency or setup truth only. - - - Create a second durable preference whose whole point is how to classify this trace, or store the meta-instruction instead of the underlying dependency fact. - - - - - - - the trace says image-enabled workflows require a system dependency in the environment - - the user also says not to invent policy rationale beyond that dependency fact - - - Write a fact such as: "Image-enabled workflows require libvips in the environment." Keep the body on the requirement and its effect. - - - Write a fact body such as: "Do not invent a policy reason here" or "No decision rationale was supplied." Those are meta comments about classification, not durable project context. - - - - - - - early chunks are noisy and keep circling local counters, timers, labels, and temporary tuning - - the final chunk clarifies that those were distractions - - the real durable lesson is a source-of-truth boundary: authoritative state must live in one persisted place that survives restart and failover - - - Create one durable record for the source-of-truth boundary. Mention restart or failover if it explains why the boundary matters, but keep any contrast abstract, such as "not worker-local state," rather than listing local counters or timers. - - - Write a durable record that carries over the rejected lure by naming worker-local counters, attempt counts, backoff knobs, or other trace-local artifacts as a contrast list. - - - - - - - the assistant starts from a partial repo note - - later the user clarifies that incident ownership and current status are tracked in an external dashboard or ticket system - - future sessions should consult that external system when this class of issue appears - - - Create one reference record that names the external source and when future sessions should consult it. - - - Center the record on local files, or turn it into a warning slogan about what not to trust locally. - - - - - - - run formatter - - fix a small lint complaint - - rerun tests - - confirm green - - no new rule, dependency, preference, or durable fact emerges - - - Create only an archived episode. - - - Invent a durable record from the sequence of routine commands. - - - - - - - the trace points at an earlier record that sounds nearby - - new evidence sharpens part of it, but you still need to decide whether the core claim stayed the same - - there may be more than one plausible existing record - - - Search first, fetch the plausible existing record, then either update it if the meaning matches or create a new record if the core claim is different. In both cases, still create the episode for this session. - - - Update from a shortlist or search preview alone, force an update when the new claim is only adjacent, or skip the episode because you already changed a durable record. - - - - - - - End the run with the `final_result` tool. - - Put the plain-text completion summary in `completion_summary`. - - Before `final_result`, ensure the current session already has exactly one episode record. - - If you have created durable records but no episode yet, stop and create the episode before `final_result`. - - If the episode contains the only copy of a reusable rule, invariant, dependency, source-of-truth pointer, or stable preference, stop and create the corresponding durable record before `final_result`. - - Do not end with free-form assistant text outside `final_result`. - - - - Do not turn filenames, storage mechanics, graph links, or evidence tables into the main record unless the durable rule is specifically about that boundary. - - - - - You are running as a BAML + LangGraph replica of the PydanticAI extract agent. - - You cannot call tools directly in this BAML harness. Return exactly one next tool call as typed JSON. - - Choose only an action whose tool name appears in Available tools. - - If a tool name is not listed in Available tools, it does not exist for this turn. - - Your first non-whitespace character must be `{`. - - Never output `` tags, hidden reasoning text, markdown, or prose before or after the JSON object. - - Return one JSON object only. Do not include markdown, self-corrections, multiple JSON blocks, or a plan that chains future actions. - - Choose only the immediate next action. When the production prompt requires an intermediate step before saving or finalizing, take that step first. - - If multiple actions are still required, return only the first missing action and omit all later actions. - - For multi-chunk traces, if `note_trace_findings` is available and no finding checkpoint appears in prior actions, choose `note_trace_findings` before any `save_context`, `revise_context`, or `final_result`; use empty args when the fully read trace has no reusable signal. - - Fill only the argument object for the selected action. - - Treat `final_result` as the PydanticAI structured final output. - - For `save_context kind=episode`, include `user_intent` and `what_happened`. - - For `save_context kind=decision`, include `decision` and `why`. - - For `prune_trace_reads`, use only earlier `read_trace` start_line values. - - After the required episode exists, choose another `save_context` only when a clearly reusable durable signal remains outside the episode; do not save trace-local implementation fixes as durable facts. - - Do not treat an episode record as a durable record. If a reusable decision, preference, constraint, fact, or reference remains only in trace or episode text, save that durable record before `final_result`. - - - - {{ runtime_dashboard }} - + - Do not use reference for internal file mappings, local storage boundaries, or repo architecture notes when the durable lesson is the project rule itself rather than "consult this external source next time." + - Keep the episode concise: short title, short body, concise user_intent, what_happened, and outcomes. + + Record types: + - preference: Stable workflow guidance from the user. Save corrections and confirmed non-obvious working style that should carry into future sessions. + - decision: A chosen approach or project rule that future work should follow and that is not obvious from code alone. If the trace does not support a durable why, do not use decision. + - constraint: A durable invariant, limit, or must/cannot rule that future work must respect. + - fact: A durable project fact such as a dependency, environment requirement, stakeholder driver, or other non-obvious truth. + - reference: A pointer to an external dashboard, document, ticket system, or other source of truth outside the repo. Use reference only when the enduring value is where to look later. + + Few-shot quality examples: + + Example preference: + - Trace signal: the user corrects the assistant after a small code fix and says not to append redundant recaps. + - Good: create one preference record about keeping replies terse and avoiding redundant change recaps after small diffs. + - Bad: store the file edit itself, or treat the correction as one-session scratch when it is stable workflow guidance. + + Example decision: + - Trace signal: early turns discuss local refactors and flaky tests; late in the trace the user settles one architecture boundary, and follow-on routing guidance only applies that boundary. + - Good: create the required episode and one decision record for the architecture boundary. Keep the routing guidance inside that record. + - Bad: store refactor noise, split one architectural choice into near-duplicate records, or create a separate durable record saying the refactors were noise. + + Example fact: + - Trace signal: repeated failed commands and partial theories eventually resolve to one stable operational requirement. + - Good: create one fact record for the requirement in clean operational language. Lead with the dependency or environment requirement; mention the failure only as brief generic support if needed. + - Bad: store raw errors, command history, rejected theories, exact exception text, or a separate record saying not to invent a why. + + Example late clarification: + - Trace signal: early chunks circle local counters, timers, labels, and tuning; the final chunk clarifies those were distractions and the real durable lesson is a source-of-truth boundary. + - Good: create one durable record for the source-of-truth boundary. Mention restart or failover only if it explains why the boundary matters. + - Bad: write a durable record that carries over rejected local counters, attempt counts, backoff knobs, or other trace-local artifacts as a contrast list. + + Example reference: + - Trace signal: the assistant starts from a partial repo note, then the user clarifies that ownership or status lives in an external dashboard or ticket system. + - Good: create one reference record that names the external source and when future sessions should consult it. + - Bad: center the record on local files, or turn it into a warning slogan about what not to trust locally. + + Example routine: + - Trace signal: formatter, small lint fix, rerun tests, green result, no new rule or durable fact. + - Good: create only an archived episode. + - Bad: invent a durable record from the sequence of routine commands. {{ _.role("user") }} - Run instruction: + RUN INSTRUCTION: {{ run_instruction }} - Available tools: - {{ tool_manifest }} + EXISTING RECORD MANIFEST: + {{ existing_record_manifest }} + + EPISODE SUMMARY: + {{ episode_summary }} - Prior actions and observations: - {{ scratchpad }} + DURABLE FINDINGS: + {{ durable_findings_summary }} {{ ctx.output_format }} "# } - -test InitialStepReadsTrace { - functions [DecideNextExtractStep] - args { - runtime_dashboard #" - CONTEXT: 0/200000 (0%) [normal] - NOTES: 0 findings - "# - run_instruction #" - Read the trace, write exactly one episode record, and write only the - strongest durable records. This trace has 3 lines. Read all chunks before - writing. - "# - tool_manifest #" - - read_trace(start_line: int?, line_count: int?): Read the next numbered trace chunk. - - save_context(kind: string, title: string, body: string): Save one context record. - - final_result(completion_summary: string): Finish after exactly one episode exists. - "# - scratchpad "No prior actions." - } - @@assert({{ this.action == "READ_TRACE" }}) -} - -test ContinueUnreadTraceBeforeWrite { - functions [DecideNextExtractStep] - args { - runtime_dashboard #" - CONTEXT: 20000/200000 (10%) [normal] - NOTES: 0 findings - Trace reads: 1 chunk(s) - Next unread offset: 100 - Pruned offsets: none - "# - run_instruction #" - This trace has 220 lines. Read all chunks before writing. - "# - tool_manifest #" - - read_trace(start_line: int?, line_count: int?): Read the next numbered trace chunk. - - save_context(kind: string, title: string, body: string): Save one context record. - - final_result(completion_summary: string): Finish after exactly one episode exists. - "# - scratchpad #" - Action: read_trace - Status: ok - Observation: - [220 lines, showing 1-100] — 120 more lines, call read_trace(start_line=101, line_count=100) for the next chunk - "# - } - @@assert({{ this.action == "READ_TRACE" }}) -} - -test LongTraceNotesFindingBeforeWrite { - functions [DecideNextExtractStep] - args { - runtime_dashboard #" - CONTEXT: 42000/200000 (21%) [normal] - NOTES: 0 findings - Trace reads: 3 chunk(s) - Next unread offset: none - Pruned offsets: none - "# - run_instruction #" - This trace has 240 lines and is fully read. An explicit durable architecture decision appears at line 188. - "# - tool_manifest #" - - note_trace_findings(theme: string?, line: int?, quote: string?, level: string?): Record one trace finding with line evidence. - - save_context(kind: string, title: string, body: string): Save one context record. - "# - scratchpad #" - Action: read_trace - Status: ok - Observation: - 188 user: We decided that durable context stays in the DB store because runtime queues are separate and temporary. - "# - } - @@assert({{ this.action == "NOTE_TRACE_FINDINGS" }}) - @@assert({{ this.note_trace_findings.level == "DECISION" }}) -} - -test LongTraceNoReusableSignalCheckpoint { - functions [DecideNextExtractStep] - args { - runtime_dashboard #" - CONTEXT: 39000/200000 (20%) [normal] - NOTES: 0 findings - Trace reads: 2 chunk(s) - Next unread offset: none - Pruned offsets: none - "# - run_instruction #" - This long trace is fully read after multiple chunks. No note_trace_findings checkpoint has been called yet. It only formats files and reruns tests; no reusable rule, fact, preference, constraint, decision, or reference appears. - "# - tool_manifest #" - - note_trace_findings(theme: string?, line: int?, quote: string?, level: string?): Record one trace finding with line evidence, or call with no args for none. - - save_context(kind: string, title: string, body: string): Save one context record. - "# - scratchpad "Action: read_trace\nStatus: ok\nObservation:\nAll chunks covered routine formatter/test work." - } - @@assert({{ this.action == "NOTE_TRACE_FINDINGS" }}) -} - -test PruneOlderTraceReadsUnderPressure { - functions [DecideNextExtractStep] - args { - runtime_dashboard #" - CONTEXT: 130000/200000 (65%) [soft] - NOTES: 2 findings (1 durable, 1 implementation) across 1 theme(s) - Trace reads: 3 chunk(s) - Next unread offset: none - Pruned offsets: none - "# - run_instruction "The trace is fully read and findings from the first two chunks have been captured." - tool_manifest #" - - prune_trace_reads(start_lines: int[]): Prune earlier read_trace results after findings are noted. - - save_context(kind: string, title: string, body: string): Save one context record. - "# - scratchpad #" - Action: read_trace - Status: ok - Observation: - [300 lines, showing 1-100] - - Action: read_trace - Status: ok - Observation: - [300 lines, showing 101-200] - - Action: read_trace - Status: ok - Observation: - [300 lines, showing 201-300] - - Action: note_trace_findings - Status: ok - Observation: - Noted 1 finding (total 2 so far). - "# - } - @@assert({{ this.action == "PRUNE_TRACE_READS" }}) - @@assert({{ this.prune_trace_reads.start_lines|length > 0 }}) - @@assert({{ this.prune_trace_reads.start_lines[0] == 1 }}) -} - -test RoutineTraceSavesArchivedEpisode { - functions [DecideNextExtractStep] - args { - runtime_dashboard #" - CONTEXT: 1000/200000 (1%) [normal] - NOTES: 0 findings - Trace reads: 1 chunk(s) - Next unread offset: none - Pruned offsets: none - "# - run_instruction "The short trace is fully read. It only ran formatting and confirmed tests passed." - tool_manifest #" - - save_context(kind: string, title: string, body: string, status: string?): Save one context record. - - final_result(completion_summary: string): Finish after exactly one episode exists. - "# - scratchpad "Action: read_trace\nStatus: ok\nObservation:\nformatter ran; tests passed; no durable signal." - } - @@assert({{ this.action == "SAVE_CONTEXT" }}) - @@assert({{ this.save_context.kind == "EPISODE" }}) - @@assert({{ this.save_context.status == "ARCHIVED" }}) - @@assert({{ this.save_context.user_intent != null and this.save_context.user_intent|length > 0 }}) - @@assert({{ this.save_context.what_happened != null and this.save_context.what_happened|length > 0 }}) -} - -test DurablePreferenceSavesPreference { - functions [DecideNextExtractStep] - args { - runtime_dashboard "CONTEXT: 1000/200000 (1%) [normal]\nNOTES: 1 findings (1 durable, 0 implementation)\nTrace reads: 1 chunk(s)\nNext unread offset: none" - run_instruction "The fully read trace includes a stable user correction: keep final replies short and skip redundant recaps. The current-session episode already exists; the durable preference remains unsaved." - tool_manifest "- save_context(kind: string, title: string, body: string): Save one context record." - scratchpad "Action: read_trace\nStatus: ok\nObservation:\nuser: Keep final replies short and skip redundant recaps.\n\nAction: save_context\nStatus: ok\nObservation:\n{\"ok\": true, \"result\": {\"record\": {\"kind\": \"episode\", \"source_session_id\": \"baml-test-session\"}}}\n\nAction: note_trace_findings\nStatus: ok\nObservation:\nTheme: reply style. Quote: keep final replies short and skip redundant recaps." - } - @@assert({{ this.action == "SAVE_CONTEXT" }}) - @@assert({{ this.save_context.kind == "PREFERENCE" }}) -} - -test DurableConstraintSavesConstraint { - functions [DecideNextExtractStep] - args { - runtime_dashboard "CONTEXT: 1000/200000 (1%) [normal]\nNOTES: 1 findings (1 durable, 0 implementation)\nTrace reads: 1 chunk(s)\nNext unread offset: none" - run_instruction "The fully read trace establishes a durable invariant: future extraction runs must not mutate unfetched context records. The current-session episode already exists; the durable constraint remains unsaved." - tool_manifest "- save_context(kind: string, title: string, body: string): Save one context record." - scratchpad "Action: save_context\nStatus: ok\nObservation:\n{\"ok\": true, \"result\": {\"record\": {\"kind\": \"episode\", \"source_session_id\": \"baml-test-session\"}}}\n\nAction: note_trace_findings\nStatus: ok\nObservation:\nTheme: mutation invariant. Quote: never revise or archive a context record before fetching it in the current run." - } - @@assert({{ this.action == "SAVE_CONTEXT" }}) - @@assert({{ this.save_context.kind == "CONSTRAINT" }}) -} - -test DurableDecisionSavesDecisionWithWhy { - functions [DecideNextExtractStep] - args { - runtime_dashboard "CONTEXT: 1000/200000 (1%) [normal]\nNOTES: 1 findings (1 durable, 0 implementation)\nTrace reads: 1 chunk(s)\nNext unread offset: none" - run_instruction "The fully read trace establishes a durable architecture decision with rationale. The current-session episode already exists; the durable decision remains unsaved." - tool_manifest "- save_context(kind: string, title: string, body: string, decision: string?, why: string?): Save one context record." - scratchpad "Action: save_context\nStatus: ok\nObservation:\n{\"ok\": true, \"result\": {\"record\": {\"kind\": \"episode\", \"source_session_id\": \"baml-test-session\"}}}\n\nAction: note_trace_findings\nStatus: ok\nObservation:\nTheme: storage boundary. Quote: durable context is DB-backed so sessions can replay it; runtime state stays separate." - } - @@assert({{ this.action == "SAVE_CONTEXT" }}) - @@assert({{ this.save_context.kind == "DECISION" }}) - @@assert({{ this.save_context.decision|length > 0 }}) - @@assert({{ this.save_context.why|length > 0 }}) -} - -test StableDependencySavesFact { - functions [DecideNextExtractStep] - args { - runtime_dashboard "CONTEXT: 1000/200000 (1%) [normal]\nNOTES: 1 findings (1 durable, 0 implementation)\nTrace reads: 1 chunk(s)\nNext unread offset: none" - run_instruction "The fully read trace establishes that image workflows require libvips. It gives no decision rationale. The current-session episode already exists; the durable dependency fact remains unsaved." - tool_manifest "- save_context(kind: string, title: string, body: string): Save one context record." - scratchpad "Action: save_context\nStatus: ok\nObservation:\n{\"ok\": true, \"result\": {\"record\": {\"kind\": \"episode\", \"source_session_id\": \"baml-test-session\"}}}\n\nAction: note_trace_findings\nStatus: ok\nObservation:\nTheme: dependency. Quote: image workflows require libvips in the environment." - } - @@assert({{ this.action == "SAVE_CONTEXT" }}) - @@assert({{ this.save_context.kind == "FACT" }}) - @@assert({{ not this.save_context.why }}) -} - -test ExternalSourceSavesReference { - functions [DecideNextExtractStep] - args { - runtime_dashboard "CONTEXT: 1000/200000 (1%) [normal]\nNOTES: 1 findings (1 durable, 0 implementation)\nTrace reads: 1 chunk(s)\nNext unread offset: none" - run_instruction "The fully read trace says incident ownership and current status live in the Ops dashboard, which future sessions should consult for incidents. The current-session episode already exists; the durable reference remains unsaved." - tool_manifest "- save_context(kind: string, title: string, body: string): Save one context record." - scratchpad "Action: save_context\nStatus: ok\nObservation:\n{\"ok\": true, \"result\": {\"record\": {\"kind\": \"episode\", \"source_session_id\": \"baml-test-session\"}}}\n\nAction: note_trace_findings\nStatus: ok\nObservation:\nTheme: incident source. Quote: use the Ops dashboard for incident ownership and current status." - } - @@assert({{ this.action == "SAVE_CONTEXT" }}) - @@assert({{ this.save_context.kind == "REFERENCE" }}) -} - -test DuplicateRiskSearchesContext { - functions [DecideNextExtractStep] - args { - runtime_dashboard "CONTEXT: 1000/200000 (1%) [normal]\nNOTES: 1 findings (1 durable, 0 implementation)\nTrace reads: 1 chunk(s)\nNext unread offset: none" - run_instruction "Relevant existing durable records are shown below; treat them as a shortlist only and fetch before revision.\n\nRelevant existing durable records:\n- rec_storage | decision | DB context boundary | Durable context lives in the DB store." - tool_manifest #" - - search_context(query: string, kind: string?, status: string?): Search saved context by meaning. - - get_context(record_ids: string[]): Fetch saved context records by ID. - - save_context(kind: string, title: string, body: string): Save one context record. - "# - scratchpad "Action: read_trace\nStatus: ok\nObservation:\nThe new trace repeats the DB context boundary decision with slightly sharper wording." - } - @@assert({{ this.action == "SEARCH_CONTEXT" or this.action == "GET_CONTEXT" }}) - @@assert({{ this.action != "SAVE_CONTEXT" }}) -} - -test SearchHitFetchesBeforeRevision { - functions [DecideNextExtractStep] - args { - runtime_dashboard "CONTEXT: 2000/200000 (1%) [normal]\nNOTES: 1 findings (1 durable, 0 implementation)\nTrace reads: 1 chunk(s)\nNext unread offset: none" - run_instruction "A search_context call found a nearby existing record. The current-session episode already exists. Fetch the full search hit before deciding whether to revise it." - tool_manifest #" - - search_context(query: string): Search saved context by meaning. - - get_context(record_ids: string[]): Fetch full context records by ID before any revision. - - revise_context(record_id: string, reason: string, kind: string, title: string, body: string): Revise an existing context record. - "# - scratchpad #" - Action: save_context - Status: ok - Observation: - {"ok": true, "result": {"record": {"kind": "episode", "source_session_id": "baml-test-session"}}} - - Action: search_context - Status: ok - Observation: - {"count": 1, "hits": [{"record_id": "rec_storage", "kind": "decision", "title": "DB context boundary", "body_preview": "Durable context lives in the DB store."}]} - "# - } - @@assert({{ this.action == "GET_CONTEXT" }}) -} - -test FetchedSameMeaningRevisesContext { - functions [DecideNextExtractStep] - args { - runtime_dashboard "CONTEXT: 2000/200000 (1%) [normal]\nNOTES: 1 findings (1 durable, 0 implementation)\nTrace reads: 1 chunk(s)\nNext unread offset: none" - run_instruction "The fetched record has the same core decision but needs the rationale tightened." - tool_manifest #" - - revise_context(record_id: string, reason: string, kind: string, title: string, body: string): Revise an existing context record with a complete improved payload. - - save_context(kind: string, title: string, body: string): Save one context record. - "# - scratchpad #" - Action: get_context - Status: ok - Observation: - {"count": 1, "records": [{"record_id": "rec_storage", "kind": "decision", "title": "DB context boundary", "body": "Durable context lives in the DB store.", "decision": "Durable context lives in the DB store.", "why": "It must survive sessions."}]} - "# - } - @@assert({{ this.action == "REVISE_CONTEXT" }}) -} - -test FinalBeforeEpisodeSavesEpisode { - functions [DecideNextExtractStep] - args { - runtime_dashboard "CONTEXT: 3000/200000 (2%) [normal]\nNOTES: 1 findings (1 durable, 0 implementation)\nTrace reads: 1 chunk(s)\nNext unread offset: none" - run_instruction "The trace is fully read. One durable fact has already been saved, but no current-session episode exists." - tool_manifest #" - - save_context(kind: string, title: string, body: string): Save one context record. - - final_result(completion_summary: string): Finish after exactly one episode exists. - "# - scratchpad #" - Action: save_context - Status: ok - Observation: - {"ok": true, "result": {"record": {"kind": "fact", "source_session_id": "baml-test-session"}}} - "# - } - @@assert({{ this.action == "SAVE_CONTEXT" }}) - @@assert({{ this.save_context.kind == "EPISODE" }}) - @@assert({{ this.save_context.user_intent != null and this.save_context.user_intent|length > 0 }}) - @@assert({{ this.save_context.what_happened != null and this.save_context.what_happened|length > 0 }}) -} - -test FinalStepAfterEpisodeSave { - functions [DecideNextExtractStep] - args { - runtime_dashboard #" - CONTEXT: 1000/200000 (1%) [normal] - NOTES: 0 findings - Trace reads: 1 chunk(s) - Next unread offset: none - Pruned offsets: none - "# - run_instruction #" - Read the trace, write exactly one episode record, and write only the - strongest durable records. The trace has already been fully read. - "# - tool_manifest #" - - read_trace(start_line: int?, line_count: int?): Read the next numbered trace chunk. - - save_context(kind: string, title: string, body: string): Save one context record. - - final_result(completion_summary: string): Finish after exactly one episode exists. - "# - scratchpad #" - Action: read_trace - Status: ok - Observation: - [3 lines, trace coverage complete] All trace lines have already been read. - - Action: save_context - Status: ok - Observation: - {"ok": true, "result": {"record": {"kind": "episode", "source_session_id": "baml-test-session"}}} - "# - } - @@assert({{ this.action == "FINAL_RESULT" }}) -} - -test EpisodeWithReusableDecisionSavesDecisionBeforeFinal { - functions [DecideNextExtractStep] - args { - runtime_dashboard #" - CONTEXT: 3000/200000 (2%) [normal] - NOTES: 0 findings - Trace reads: 1 chunk(s) - Next unread offset: none - Pruned offsets: none - "# - run_instruction "The trace is fully read. The current-session episode already exists, but a reusable PostgreSQL-over-MySQL project decision remains only in the trace and episode text. Save the durable decision before final_result." - tool_manifest #" - - save_context(kind: string, title: string, body: string, decision: string?, why: string?): Save one context record. - - final_result(completion_summary: string): Finish after exactly one episode exists. - "# - scratchpad #" - Action: read_trace - Status: ok - Observation: - user: Good. Also, we decided to use PostgreSQL instead of MySQL for this project. - assistant: Understood. Decision: PostgreSQL over MySQL. I'll update the connection string and ORM configuration. - - Action: save_context - Status: ok - Observation: - {"ok": true, "result": {"record": {"kind": "episode", "title": "Database Configuration Update", "body": "The session switched the project's primary database backend from MySQL to PostgreSQL.", "source_session_id": "baml-test-session"}}} - "# - } - @@assert({{ this.action == "SAVE_CONTEXT" }}) - @@assert({{ this.save_context.kind == "DECISION" }}) - @@assert({{ this.save_context.decision|length > 0 }}) - @@assert({{ this.save_context.why|length > 0 }}) -} - -test ImplementationOnlyAfterEpisodeFinalizes { - functions [DecideNextExtractStep] - args { - runtime_dashboard #" - CONTEXT: 3000/200000 (2%) [normal] - NOTES: 0 findings - Trace reads: 1 chunk(s) - Next unread offset: none - Pruned offsets: none - "# - run_instruction "The trace is fully read. A durable decision and the current-session episode already exist. The only remaining unsaved content is an implementation fix to a local timeout constant." - tool_manifest #" - - save_context(kind: string, title: string, body: string): Save one context record. - - final_result(completion_summary: string): Finish after exactly one episode exists. - "# - scratchpad #" - Action: read_trace - Status: ok - Observation: - The trace fixed a local timeout constant and also established one architecture decision. - - Action: save_context - Status: ok - Observation: - {"ok": true, "result": {"record": {"kind": "decision", "title": "Runtime state remains separate from durable context"}}} - - Action: save_context - Status: ok - Observation: - {"ok": true, "result": {"record": {"kind": "episode", "source_session_id": "baml-test-session"}}} - "# - } - @@assert({{ this.action == "FINAL_RESULT" }}) -} diff --git a/baml_agents/baml_src/extract_react_tests.baml b/baml_agents/baml_src/extract_react_tests.baml new file mode 100644 index 0000000..fffcfc9 --- /dev/null +++ b/baml_agents/baml_src/extract_react_tests.baml @@ -0,0 +1,92 @@ +test ScanTraceWindowCapturesDurableDecision { + functions [ScanTraceWindow] + args { + run_instruction "Extract one episode and any durable project context from the trace." + prior_episode_summary "(none yet)" + prior_findings_summary "(none yet)" + trace_window #" + [8 lines, window 1-8] + 1 user: We should run extraction evals before changing extraction prompts. + 2 assistant: I will update the BAML harness and run the eval. + 3 user: Keep trace-local command logs out of long-term context. + 4 assistant: Implemented the change and ran a smoke test. + "# + } + @@assert({{ this.durable_findings|length > 0 }}) +} + +test ScanTraceWindowSeparatesDurableAndImplementation { + functions [ScanTraceWindow] + args { + run_instruction "Extract reusable context, not command history." + prior_episode_summary "(none yet)" + prior_findings_summary "(none yet)" + trace_window #" + [7 lines, window 1-7] + 1 user: The deployment checklist is the source of truth for release readiness. + 2 assistant: Ran pytest -q and fixed a local fixture path. + 3 assistant: Edited src/example.py and reran the lint command. + 4 user: The command output is just evidence, not memory. + "# + } + @@assert({{ this.durable_findings|length > 0 }}) + @@assert({{ this.implementation_findings|length > 0 }}) +} + +test ScanTraceWindowAllowsNoDurableSignal { + functions [ScanTraceWindow] + args { + run_instruction "Extract durable context only when the trace supports it." + prior_episode_summary "(none yet)" + prior_findings_summary "(none yet)" + trace_window #" + [5 lines, window 1-5] + 1 user: Please format this file. + 2 assistant: Ran the formatter. + 3 assistant: Fixed one lint complaint. + 4 assistant: Tests are green. + "# + } + @@assert({{ this.durable_findings|length == 0 }}) +} + +test SynthesizeExtractRecordsCreatesEpisodeAndDurableRecord { + functions [SynthesizeExtractRecords] + args { + run_instruction "Create exactly one episode and durable records only for reusable project context." + episode_summary "- The session updated an extraction harness and ran a smoke eval." + durable_findings_summary "- preference: eval-first prompt changes: Run the extraction eval before changing extraction prompts. (line 1)" + existing_record_manifest "(none)" + } + @@assert({{ this.episode.user_intent|length > 0 }}) + @@assert({{ this.episode.what_happened|length > 0 }}) + @@assert({{ this.durable_records|length > 0 }}) +} + +test SynthesizeExtractRecordsAllowsNoDurableSignal { + functions [SynthesizeExtractRecords] + args { + run_instruction "Create exactly one episode and no durable records when no reusable context exists." + episode_summary "- The session only formatted code, fixed one lint issue, and reran tests." + durable_findings_summary "(none)" + existing_record_manifest "(none)" + } + @@assert({{ this.episode.user_intent|length > 0 }}) + @@assert({{ this.episode.what_happened|length > 0 }}) + @@assert({{ this.durable_records|length == 0 }}) +} + +test SynthesizeExtractRecordsDeduplicatesCoreClaim { + functions [SynthesizeExtractRecords] + args { + run_instruction "Create compact durable records and merge duplicate meanings." + episode_summary "- The session clarified a runtime state boundary after noisy implementation discussion." + durable_findings_summary #" + - decision: persisted source of truth: Runtime status must live in one persisted store that survives restart. + - decision: runtime status boundary: Use the same persisted status store as the source of truth after restart. + "# + existing_record_manifest "(none)" + } + @@assert({{ this.episode.user_intent|length > 0 }}) + @@assert({{ this.durable_records|length == 1 }}) +} diff --git a/baml_agents/baml_src/models.baml b/baml_agents/baml_src/models.baml new file mode 100644 index 0000000..e5b568d --- /dev/null +++ b/baml_agents/baml_src/models.baml @@ -0,0 +1,43 @@ +retry_policy ExtractAgentRetry { + max_retries 1 + strategy { + type exponential_backoff + delay_ms 500 + multiplier 2 + max_delay_ms 8000 + } +} + +client MiniMaxM27 { + provider "openai-generic" + retry_policy ExtractAgentRetry + options { + base_url "https://api.minimax.io/v1" + api_key env.MINIMAX_API_KEY + model "MiniMax-M2.7" + temperature 0.01 + http { + connect_timeout_ms 10000 + time_to_first_token_timeout_ms 120000 + idle_timeout_ms 30000 + request_timeout_ms 300000 + } + } +} + +client OllamaLocal { + provider "openai-generic" + retry_policy ExtractAgentRetry + options { + base_url "http://127.0.0.1:11434/v1" + api_key "ollama" + model "gemma4:e4b" + temperature 0.0 + http { + connect_timeout_ms 10000 + time_to_first_token_timeout_ms 120000 + idle_timeout_ms 30000 + request_timeout_ms 300000 + } + } +} From 403b57d165e56f9b6d8f771d28ad4036be78b007 Mon Sep 17 00:00:00 2001 From: Isaac Kargar Date: Thu, 14 May 2026 10:22:18 +0300 Subject: [PATCH 3/8] Remove BAML extraction agent files and related components - Deleted the run.py script, which served as the command-line entry point for the BAML extraction experiment. - Removed tool_bridge.py, which acted as a bridge for synthesizing BAML records to Lerim's extraction tools. - Eliminated extract_react.baml and extract_react_tests.baml, which defined the BAML structure and tests for extraction processes. - Cleared out generators.baml and models.baml, which contained generator configurations and client definitions for the extraction agent. --- baml_agents/.gitignore | 3 - baml_agents/README.md | 53 -- baml_agents/baml_client/__init__.py | 60 -- baml_agents/baml_client/async_client.py | 194 ------ baml_agents/baml_client/config.py | 102 --- baml_agents/baml_client/globals.py | 35 - baml_agents/baml_client/inlinedbaml.py | 22 - baml_agents/baml_client/parser.py | 58 -- baml_agents/baml_client/runtime.py | 361 ---------- baml_agents/baml_client/stream_types.py | 69 -- baml_agents/baml_client/sync_client.py | 204 ------ baml_agents/baml_client/tracing.py | 22 - baml_agents/baml_client/type_builder.py | 523 -------------- baml_agents/baml_client/type_map.py | 41 -- baml_agents/baml_client/types.py | 107 --- baml_agents/baml_client/watchers.py | 44 -- baml_agents/baml_extract_agent/__init__.py | 5 - baml_agents/baml_extract_agent/graph.py | 651 ------------------ baml_agents/baml_extract_agent/run.py | 68 -- baml_agents/baml_extract_agent/tool_bridge.py | 302 -------- baml_agents/baml_src/extract_react.baml | 284 -------- baml_agents/baml_src/extract_react_tests.baml | 92 --- baml_agents/baml_src/generators.baml | 6 - baml_agents/baml_src/models.baml | 43 -- 24 files changed, 3349 deletions(-) delete mode 100644 baml_agents/.gitignore delete mode 100644 baml_agents/README.md delete mode 100644 baml_agents/baml_client/__init__.py delete mode 100644 baml_agents/baml_client/async_client.py delete mode 100644 baml_agents/baml_client/config.py delete mode 100644 baml_agents/baml_client/globals.py delete mode 100644 baml_agents/baml_client/inlinedbaml.py delete mode 100644 baml_agents/baml_client/parser.py delete mode 100644 baml_agents/baml_client/runtime.py delete mode 100644 baml_agents/baml_client/stream_types.py delete mode 100644 baml_agents/baml_client/sync_client.py delete mode 100644 baml_agents/baml_client/tracing.py delete mode 100644 baml_agents/baml_client/type_builder.py delete mode 100644 baml_agents/baml_client/type_map.py delete mode 100644 baml_agents/baml_client/types.py delete mode 100644 baml_agents/baml_client/watchers.py delete mode 100644 baml_agents/baml_extract_agent/__init__.py delete mode 100644 baml_agents/baml_extract_agent/graph.py delete mode 100644 baml_agents/baml_extract_agent/run.py delete mode 100644 baml_agents/baml_extract_agent/tool_bridge.py delete mode 100644 baml_agents/baml_src/extract_react.baml delete mode 100644 baml_agents/baml_src/extract_react_tests.baml delete mode 100644 baml_agents/baml_src/generators.baml delete mode 100644 baml_agents/baml_src/models.baml diff --git a/baml_agents/.gitignore b/baml_agents/.gitignore deleted file mode 100644 index 764244a..0000000 --- a/baml_agents/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -.tmp/ -__pycache__/ -*.pyc diff --git a/baml_agents/README.md b/baml_agents/README.md deleted file mode 100644 index 1367dfc..0000000 --- a/baml_agents/README.md +++ /dev/null @@ -1,53 +0,0 @@ -# Lerim BAML Agents - -Minimal experiment for testing whether a BAML plus LangGraph windowed -extraction harness can replace the PydanticAI extract agent. - -## What This Uses - -- LangGraph owns trace windowing, coverage, synthesis, and persistence. -- BAML makes the two LLM calls: scan one trace window, then synthesize records. -- The default benchmark/runtime model is MiniMax M2.7 through BAML's - OpenAI-compatible client. -- Ollama-compatible local models can still be used explicitly with - `--baml-provider ollama`. -- Model clients live in `baml_src/models.baml`; extraction prompts and - functions live in `baml_src/extract_react.baml`; BAML-native tests live in - `baml_src/extract_react_tests.baml`. -- Lerim's existing DB-backed `save_context` tool persists synthesized records. -- The default model can be overridden with `--model` for local and API model - comparisons. - -## Run - -From the `lerim-cli` repo root: - -```bash -uv run --with baml-py==0.222.0 baml-cli generate --from baml_agents/baml_src -PYTHONPATH="baml_agents:src" uv run --with baml-py==0.222.0 --with langgraph==1.2.0 \ - python -m baml_extract_agent.run \ - --trace tests/fixtures/traces/unit/codex_simple.jsonl \ - --context-db baml_agents/.tmp/context.sqlite3 \ - --project-root . -``` - -Local Ollama-compatible model: - -```bash -PYTHONPATH="baml_agents:src" uv run --with baml-py==0.222.0 --with langgraph==1.2.0 \ - python -m baml_extract_agent.run \ - --trace tests/fixtures/traces/unit/codex_simple.jsonl \ - --context-db baml_agents/.tmp/context_ollama.sqlite3 \ - --project-root . \ - --baml-provider ollama \ - --model -``` - -BAML-native tests: - -```bash -MINIMAX_API_KEY=... uv run --with baml-py==0.222.0 baml-cli test --from baml_agents/baml_src --parallel 1 -``` - -The graph writes into the context DB you pass with `--context-db`. Use a scratch -DB while comparing behavior. diff --git a/baml_agents/baml_client/__init__.py b/baml_agents/baml_client/__init__.py deleted file mode 100644 index b279bc3..0000000 --- a/baml_agents/baml_client/__init__.py +++ /dev/null @@ -1,60 +0,0 @@ -# ---------------------------------------------------------------------------- -# -# Welcome to Baml! To use this generated code, please run the following: -# -# $ pip install baml -# -# ---------------------------------------------------------------------------- - -# This file was generated by BAML: please do not edit it. Instead, edit the -# BAML files and re-generate this code using: baml-cli generate -# baml-cli is available with the baml package. - -__version__ = "0.222.0" - -try: - from baml_py.safe_import import EnsureBamlPyImport -except ImportError: - raise ImportError(f"""Update to baml-py required. -Version of baml_client generator (see generators.baml): {__version__} - -Please upgrade baml-py to version "{__version__}". - -$ pip install baml-py=={__version__} -$ uv add baml-py=={__version__} - -If nothing else works, please ask for help: - -https://github.com/boundaryml/baml/issues -https://boundaryml.com/discord -""") from None - - -with EnsureBamlPyImport(__version__) as e: - e.raise_if_incompatible_version(__version__) - - from . import types - from . import tracing - from . import stream_types - from . import config - from .config import reset_baml_env_vars - - from .sync_client import b - - from . import watchers - - -# FOR LEGACY COMPATIBILITY, expose "partial_types" as an alias for "stream_types" -# WE RECOMMEND USERS TO USE "stream_types" INSTEAD -partial_types = stream_types - -__all__ = [ - "b", - "stream_types", - "partial_types", - "tracing", - "types", - "reset_baml_env_vars", - "config", - "watchers", -] \ No newline at end of file diff --git a/baml_agents/baml_client/async_client.py b/baml_agents/baml_client/async_client.py deleted file mode 100644 index c4ea6c0..0000000 --- a/baml_agents/baml_client/async_client.py +++ /dev/null @@ -1,194 +0,0 @@ -# ---------------------------------------------------------------------------- -# -# Welcome to Baml! To use this generated code, please run the following: -# -# $ pip install baml -# -# ---------------------------------------------------------------------------- - -# This file was generated by BAML: please do not edit it. Instead, edit the -# BAML files and re-generate this code using: baml-cli generate -# baml-cli is available with the baml package. - -import typing -import typing_extensions -import baml_py - -from . import stream_types, types, type_builder -from .parser import LlmResponseParser, LlmStreamParser -from .runtime import DoNotUseDirectlyCallManager, BamlCallOptions -from .globals import DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_RUNTIME as __runtime__ - - -class BamlAsyncClient: - __options: DoNotUseDirectlyCallManager - __stream_client: "BamlStreamClient" - __http_request: "BamlHttpRequestClient" - __http_stream_request: "BamlHttpStreamRequestClient" - __llm_response_parser: LlmResponseParser - __llm_stream_parser: LlmStreamParser - - def __init__(self, options: DoNotUseDirectlyCallManager): - self.__options = options - self.__stream_client = BamlStreamClient(options) - self.__http_request = BamlHttpRequestClient(options) - self.__http_stream_request = BamlHttpStreamRequestClient(options) - self.__llm_response_parser = LlmResponseParser(options) - self.__llm_stream_parser = LlmStreamParser(options) - - def with_options(self, - tb: typing.Optional[type_builder.TypeBuilder] = None, - client_registry: typing.Optional[baml_py.baml_py.ClientRegistry] = None, - client: typing.Optional[str] = None, - collector: typing.Optional[typing.Union[baml_py.baml_py.Collector, typing.List[baml_py.baml_py.Collector]]] = None, - env: typing.Optional[typing.Dict[str, typing.Optional[str]]] = None, - tags: typing.Optional[typing.Dict[str, str]] = None, - on_tick: typing.Optional[typing.Callable[[str, baml_py.baml_py.FunctionLog], None]] = None, - ) -> "BamlAsyncClient": - options: BamlCallOptions = {} - if tb is not None: - options["tb"] = tb - if client_registry is not None: - options["client_registry"] = client_registry - if client is not None: - options["client"] = client - if collector is not None: - options["collector"] = collector - if env is not None: - options["env"] = env - if tags is not None: - options["tags"] = tags - if on_tick is not None: - options["on_tick"] = on_tick - return BamlAsyncClient(self.__options.merge_options(options)) - - @property - def stream(self): - return self.__stream_client - - @property - def request(self): - return self.__http_request - - @property - def stream_request(self): - return self.__http_stream_request - - @property - def parse(self): - return self.__llm_response_parser - - @property - def parse_stream(self): - return self.__llm_stream_parser - - async def ScanTraceWindow(self, run_instruction: str,prior_episode_summary: str,prior_findings_summary: str,trace_window: str, - baml_options: BamlCallOptions = {}, - ) -> types.TraceWindowScan: - # Check if on_tick is provided - if 'on_tick' in baml_options: - # Use streaming internally when on_tick is provided - __stream__ = self.stream.ScanTraceWindow(run_instruction=run_instruction,prior_episode_summary=prior_episode_summary,prior_findings_summary=prior_findings_summary,trace_window=trace_window, - baml_options=baml_options) - return await __stream__.get_final_response() - else: - # Original non-streaming code - __result__ = await self.__options.merge_options(baml_options).call_function_async(function_name="ScanTraceWindow", args={ - "run_instruction": run_instruction,"prior_episode_summary": prior_episode_summary,"prior_findings_summary": prior_findings_summary,"trace_window": trace_window, - }) - return typing.cast(types.TraceWindowScan, __result__.cast_to(types, types, stream_types, False, __runtime__)) - async def SynthesizeExtractRecords(self, run_instruction: str,episode_summary: str,durable_findings_summary: str,existing_record_manifest: str, - baml_options: BamlCallOptions = {}, - ) -> types.SynthesizedExtraction: - # Check if on_tick is provided - if 'on_tick' in baml_options: - # Use streaming internally when on_tick is provided - __stream__ = self.stream.SynthesizeExtractRecords(run_instruction=run_instruction,episode_summary=episode_summary,durable_findings_summary=durable_findings_summary,existing_record_manifest=existing_record_manifest, - baml_options=baml_options) - return await __stream__.get_final_response() - else: - # Original non-streaming code - __result__ = await self.__options.merge_options(baml_options).call_function_async(function_name="SynthesizeExtractRecords", args={ - "run_instruction": run_instruction,"episode_summary": episode_summary,"durable_findings_summary": durable_findings_summary,"existing_record_manifest": existing_record_manifest, - }) - return typing.cast(types.SynthesizedExtraction, __result__.cast_to(types, types, stream_types, False, __runtime__)) - - - -class BamlStreamClient: - __options: DoNotUseDirectlyCallManager - - def __init__(self, options: DoNotUseDirectlyCallManager): - self.__options = options - - def ScanTraceWindow(self, run_instruction: str,prior_episode_summary: str,prior_findings_summary: str,trace_window: str, - baml_options: BamlCallOptions = {}, - ) -> baml_py.BamlStream[stream_types.TraceWindowScan, types.TraceWindowScan]: - __ctx__, __result__ = self.__options.merge_options(baml_options).create_async_stream(function_name="ScanTraceWindow", args={ - "run_instruction": run_instruction,"prior_episode_summary": prior_episode_summary,"prior_findings_summary": prior_findings_summary,"trace_window": trace_window, - }) - return baml_py.BamlStream[stream_types.TraceWindowScan, types.TraceWindowScan]( - __result__, - lambda x: typing.cast(stream_types.TraceWindowScan, x.cast_to(types, types, stream_types, True, __runtime__)), - lambda x: typing.cast(types.TraceWindowScan, x.cast_to(types, types, stream_types, False, __runtime__)), - __ctx__, - ) - def SynthesizeExtractRecords(self, run_instruction: str,episode_summary: str,durable_findings_summary: str,existing_record_manifest: str, - baml_options: BamlCallOptions = {}, - ) -> baml_py.BamlStream[stream_types.SynthesizedExtraction, types.SynthesizedExtraction]: - __ctx__, __result__ = self.__options.merge_options(baml_options).create_async_stream(function_name="SynthesizeExtractRecords", args={ - "run_instruction": run_instruction,"episode_summary": episode_summary,"durable_findings_summary": durable_findings_summary,"existing_record_manifest": existing_record_manifest, - }) - return baml_py.BamlStream[stream_types.SynthesizedExtraction, types.SynthesizedExtraction]( - __result__, - lambda x: typing.cast(stream_types.SynthesizedExtraction, x.cast_to(types, types, stream_types, True, __runtime__)), - lambda x: typing.cast(types.SynthesizedExtraction, x.cast_to(types, types, stream_types, False, __runtime__)), - __ctx__, - ) - - -class BamlHttpRequestClient: - __options: DoNotUseDirectlyCallManager - - def __init__(self, options: DoNotUseDirectlyCallManager): - self.__options = options - - async def ScanTraceWindow(self, run_instruction: str,prior_episode_summary: str,prior_findings_summary: str,trace_window: str, - baml_options: BamlCallOptions = {}, - ) -> baml_py.baml_py.HTTPRequest: - __result__ = await self.__options.merge_options(baml_options).create_http_request_async(function_name="ScanTraceWindow", args={ - "run_instruction": run_instruction,"prior_episode_summary": prior_episode_summary,"prior_findings_summary": prior_findings_summary,"trace_window": trace_window, - }, mode="request") - return __result__ - async def SynthesizeExtractRecords(self, run_instruction: str,episode_summary: str,durable_findings_summary: str,existing_record_manifest: str, - baml_options: BamlCallOptions = {}, - ) -> baml_py.baml_py.HTTPRequest: - __result__ = await self.__options.merge_options(baml_options).create_http_request_async(function_name="SynthesizeExtractRecords", args={ - "run_instruction": run_instruction,"episode_summary": episode_summary,"durable_findings_summary": durable_findings_summary,"existing_record_manifest": existing_record_manifest, - }, mode="request") - return __result__ - - -class BamlHttpStreamRequestClient: - __options: DoNotUseDirectlyCallManager - - def __init__(self, options: DoNotUseDirectlyCallManager): - self.__options = options - - async def ScanTraceWindow(self, run_instruction: str,prior_episode_summary: str,prior_findings_summary: str,trace_window: str, - baml_options: BamlCallOptions = {}, - ) -> baml_py.baml_py.HTTPRequest: - __result__ = await self.__options.merge_options(baml_options).create_http_request_async(function_name="ScanTraceWindow", args={ - "run_instruction": run_instruction,"prior_episode_summary": prior_episode_summary,"prior_findings_summary": prior_findings_summary,"trace_window": trace_window, - }, mode="stream") - return __result__ - async def SynthesizeExtractRecords(self, run_instruction: str,episode_summary: str,durable_findings_summary: str,existing_record_manifest: str, - baml_options: BamlCallOptions = {}, - ) -> baml_py.baml_py.HTTPRequest: - __result__ = await self.__options.merge_options(baml_options).create_http_request_async(function_name="SynthesizeExtractRecords", args={ - "run_instruction": run_instruction,"episode_summary": episode_summary,"durable_findings_summary": durable_findings_summary,"existing_record_manifest": existing_record_manifest, - }, mode="stream") - return __result__ - - -b = BamlAsyncClient(DoNotUseDirectlyCallManager({})) \ No newline at end of file diff --git a/baml_agents/baml_client/config.py b/baml_agents/baml_client/config.py deleted file mode 100644 index 64b7fff..0000000 --- a/baml_agents/baml_client/config.py +++ /dev/null @@ -1,102 +0,0 @@ -# ---------------------------------------------------------------------------- -# -# Welcome to Baml! To use this generated code, please run the following: -# -# $ pip install baml -# -# ---------------------------------------------------------------------------- - -# This file was generated by BAML: please do not edit it. Instead, edit the -# BAML files and re-generate this code using: baml-cli generate -# baml-cli is available with the baml package. - -from __future__ import annotations - -import os -import warnings -import typing_extensions -import typing -import functools - -from baml_py.logging import ( - get_log_level as baml_get_log_level, - set_log_level as baml_set_log_level, -) -from .globals import reset_baml_env_vars - -rT = typing_extensions.TypeVar("rT") # return type -pT = typing_extensions.ParamSpec("pT") # parameters type - - -def _deprecated(message: str): - def decorator(func: typing.Callable[pT, rT]) -> typing.Callable[pT, rT]: - """Use this decorator to mark functions as deprecated. - Every time the decorated function runs, it will emit - a "deprecation" warning.""" - - @functools.wraps(func) - def new_func(*args: pT.args, **kwargs: pT.kwargs): - warnings.simplefilter("always", DeprecationWarning) # turn off filter - warnings.warn( - "Call to a deprecated function {}.".format(func.__name__) + message, - category=DeprecationWarning, - stacklevel=2, - ) - warnings.simplefilter("default", DeprecationWarning) # reset filter - return func(*args, **kwargs) - - return new_func - - return decorator - - -@_deprecated("Use os.environ['BAML_LOG'] instead") -def get_log_level(): - """ - Get the log level for the BAML Python client. - """ - return baml_get_log_level() - - -@_deprecated("Use os.environ['BAML_LOG'] instead") -def set_log_level( - level: typing_extensions.Literal["DEBUG", "INFO", "WARN", "ERROR", "OFF"] | str, -): - """ - Set the log level for the BAML Python client - """ - baml_set_log_level(level) - os.environ["BAML_LOG"] = level - - -@_deprecated("Use os.environ['BAML_LOG_JSON_MODE'] instead") -def set_log_json_mode(): - """ - Set the log JSON mode for the BAML Python client. - """ - os.environ["BAML_LOG_JSON_MODE"] = "true" - - -@_deprecated("Use os.environ['BAML_LOG_MAX_CHUNK_LENGTH'] instead") -def set_log_max_chunk_length(): - """ - Set the maximum log chunk length for the BAML Python client. - """ - os.environ["BAML_LOG_MAX_CHUNK_LENGTH"] = "1000" - - -def set_log_max_message_length(*args, **kwargs): - """ - Alias for set_log_max_chunk_length for compatibility with docs. - """ - return set_log_max_chunk_length(*args, **kwargs) - - -__all__ = [ - "set_log_level", - "get_log_level", - "set_log_json_mode", - "reset_baml_env_vars", - "set_log_max_message_length", - "set_log_max_chunk_length", -] diff --git a/baml_agents/baml_client/globals.py b/baml_agents/baml_client/globals.py deleted file mode 100644 index 769e055..0000000 --- a/baml_agents/baml_client/globals.py +++ /dev/null @@ -1,35 +0,0 @@ -# ---------------------------------------------------------------------------- -# -# Welcome to Baml! To use this generated code, please run the following: -# -# $ pip install baml -# -# ---------------------------------------------------------------------------- - -# This file was generated by BAML: please do not edit it. Instead, edit the -# BAML files and re-generate this code using: baml-cli generate -# baml-cli is available with the baml package. - -from __future__ import annotations -import os -import warnings - -from baml_py import BamlCtxManager, BamlRuntime -from .inlinedbaml import get_baml_files -from typing import Dict - -DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_RUNTIME = BamlRuntime.from_files( - "baml_src", - get_baml_files(), - os.environ.copy() -) -DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_CTX = BamlCtxManager(DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_RUNTIME) - -def reset_baml_env_vars(env_vars: Dict[str, str]): - warnings.warn( - "reset_baml_env_vars is deprecated and should be removed. Environment variables are now lazily loaded on each function call", - DeprecationWarning, - stacklevel=2 - ) - -__all__ = [] diff --git a/baml_agents/baml_client/inlinedbaml.py b/baml_agents/baml_client/inlinedbaml.py deleted file mode 100644 index afcbbb7..0000000 --- a/baml_agents/baml_client/inlinedbaml.py +++ /dev/null @@ -1,22 +0,0 @@ -# ---------------------------------------------------------------------------- -# -# Welcome to Baml! To use this generated code, please run the following: -# -# $ pip install baml -# -# ---------------------------------------------------------------------------- - -# This file was generated by BAML: please do not edit it. Instead, edit the -# BAML files and re-generate this code using: baml-cli generate -# baml-cli is available with the baml package. - -_file_map = { - - "extract_react.baml": "enum RecordKind {\n DECISION @alias(\"decision\") @description(\"A durable project decision with decision and why fields.\")\n PREFERENCE @alias(\"preference\") @description(\"A stable user or workflow preference.\")\n CONSTRAINT @alias(\"constraint\") @description(\"A durable invariant, limit, or must/cannot rule.\")\n FACT @alias(\"fact\") @description(\"A durable project fact or setup truth.\")\n REFERENCE @alias(\"reference\") @description(\"A pointer to an external source of truth.\")\n}\n\nenum RecordStatus {\n ACTIVE @alias(\"active\") @description(\"Current context that future sessions may reuse.\")\n ARCHIVED @alias(\"archived\") @description(\"Historical or routine context that should not be treated as active guidance.\")\n}\n\nenum FindingLevel {\n DECISION @alias(\"decision\") @description(\"Durable decision-level finding.\")\n PREFERENCE @alias(\"preference\") @description(\"Durable user or workflow preference finding.\")\n FEEDBACK @alias(\"feedback\") @description(\"Durable feedback-level finding.\")\n REFERENCE @alias(\"reference\") @description(\"Durable external-source finding.\")\n CONSTRAINT @alias(\"constraint\") @description(\"Durable constraint-level finding.\")\n FACT @alias(\"fact\") @description(\"Durable fact-level finding.\")\n IMPLEMENTATION @alias(\"implementation\") @description(\"Trace-local implementation evidence or discarded hypothesis.\")\n}\n\nclass TraceWindowFinding {\n theme string @description(\"Short stable theme for this finding.\")\n level FindingLevel @description(\"Use durable levels for reusable context, implementation for local/noisy evidence.\")\n line int? @description(\"1-based supporting line when the window gives one.\")\n quote string? @description(\"Short supporting quote from the current window.\")\n note string @description(\"Compact semantic finding. Avoid command logs and copied errors.\")\n}\n\nclass TraceWindowScan {\n episode_update string? @description(\"Compact update for the final episode summary. May be omitted when this window adds nothing.\")\n durable_findings TraceWindowFinding[] @description(\"Reusable decisions, preferences, constraints, facts, and references found in this window.\")\n implementation_findings TraceWindowFinding[] @description(\"Implementation evidence, discarded hypotheses, and local details useful only as support/noise.\")\n discarded_noise string[] @description(\"Short descriptions of noisy categories intentionally not saved.\")\n}\n\nclass EpisodeDraft {\n title string? @description(\"Optional short title for the current-session episode. Runtime derives one if omitted.\")\n body string? @description(\"Compact episode body. If omitted, runtime builds it from user_intent and what_happened.\")\n status RecordStatus? @description(\"Use archived for routine/no-durable sessions; active only when the episode itself remains useful.\")\n user_intent string? @description(\"What the user wanted in this source session. Runtime fills a generic fallback if omitted.\")\n what_happened string? @description(\"What the session actually did. Runtime fills a generic fallback if omitted.\")\n outcomes string? @description(\"Optional concise outcome.\")\n}\n\nclass DurableRecordDraft {\n kind RecordKind @description(\"Durable record kind.\")\n title string @description(\"Short standalone durable title.\")\n body string @description(\"Compact standalone durable body.\")\n status RecordStatus? @description(\"Usually active for reusable durable records.\")\n valid_from string?\n valid_until string?\n decision string?\n why string?\n alternatives string?\n consequences string?\n}\n\nclass SynthesizedExtraction {\n episode EpisodeDraft @description(\"Exactly one current-session episode record draft.\")\n durable_records DurableRecordDraft[] @description(\"Zero or more durable records.\")\n completion_summary string? @description(\"Brief summary of extraction work for final_result/reporting.\")\n}\n\nfunction ScanTraceWindow(\n run_instruction: string,\n prior_episode_summary: string,\n prior_findings_summary: string,\n trace_window: string\n) -> TraceWindowScan {\n client MiniMaxM27\n prompt #\"\n {{ _.role(\"system\") }}\n You scan one window from a coding-agent trace for Lerim extraction.\n Return only structured output. Do not save records and do not plan future tool calls.\n Do not include tags, hidden reasoning, markdown, or prose.\n The top-level output must include episode_update, durable_findings, implementation_findings, and discarded_noise.\n Use an empty string or empty list when a field has no content.\n Every durable_findings and implementation_findings item must be an object with:\n - theme\n - level: one of decision, preference, feedback, reference, constraint, fact, or implementation\n - note\n - optional line\n - optional quote\n Never return finding items as plain strings.\n Never use confidence labels such as high, medium, or low as finding levels.\n\n Separate:\n - episode_update: what happened in this session window, for the final episode record.\n - durable_findings: reusable project/user context only.\n - implementation_findings: local evidence, command work, discarded hypotheses, or support.\n - discarded_noise: categories of content intentionally ignored.\n\n Durable signal means a decision, preference, constraint, fact, or reference likely useful beyond this trace.\n Implementation detail alone is not durable signal.\n A durable finding should help a future agent make a better decision in a new chat.\n Save what a future agent would be relieved to already know: stable user preferences, chosen policies, durable constraints, source-of-truth references, and stable project or product truths.\n If a detail only helps replay or debug this trace's terminal session, put it in implementation_findings or discarded_noise, not durable_findings.\n Raw metrics, comparison tables, config edits, API wiring, package installation, debug commands, file paths, and stack traces are implementation_findings unless the user turns them into a reusable policy, preference, constraint, fact, or source-of-truth reference.\n When the same topic has both user-level guidance and technical evidence, the user-level guidance is the durable finding; the technical evidence is implementation support.\n For technical blockers, keep durable_findings at the capability, status, and implication level only. Put patch recipes, flags, functions, commands, exact metrics, and local debug steps in implementation_findings.\n When a window mixes a durable point with local evidence, put only the reusable point in durable_findings and put the supporting local evidence in implementation_findings.\n Use semantic meaning, not exact wording. Do not copy long code, logs, stack traces, or command transcripts.\n If the window has no durable signal, return an empty durable_findings list.\n If the window adds nothing meaningful to the episode, episode_update may be an empty string.\n\n {{ _.role(\"user\") }}\n RUN INSTRUCTION:\n {{ run_instruction }}\n\n PRIOR EPISODE SUMMARY:\n {{ prior_episode_summary }}\n\n PRIOR FINDINGS SUMMARY:\n {{ prior_findings_summary }}\n\n TRACE WINDOW:\n {{ trace_window }}\n\n {{ ctx.output_format }}\n \"#\n}\n\nfunction SynthesizeExtractRecords(\n run_instruction: string,\n episode_summary: string,\n durable_findings_summary: string,\n existing_record_manifest: string\n) -> SynthesizedExtraction {\n client MiniMaxM27\n prompt #\"\n {{ _.role(\"system\") }}\n You synthesize final Lerim context records from scanned trace findings.\n Return only structured output.\n Do not include tags, hidden reasoning, markdown, or prose.\n The top-level output must include episode, durable_records, and completion_summary.\n Use an empty durable_records list when there is no durable signal.\n Every durable_records item must be an object with kind, title, and body.\n Optional durable record fields are status, valid_from, valid_until, decision, why, alternatives, and consequences.\n Never return durable_records items as plain strings or as generic type/record objects.\n\n Create exactly one episode record for the current session.\n Create durable records only for reusable decisions, preferences, constraints, facts, and references.\n The episode says what the session did. Durable records say what future sessions should reuse.\n Before creating each durable record, ask: would this change what a future agent does in a new chat?\n Prefer the small set of memories a future agent would be relieved to already know over exhaustive coverage of what happened.\n Keep durable records compact, standalone, and deduplicated.\n Do not create durable records for command logs, patch steps, generic programming knowledge, temporary diagnostics, or local implementation chatter.\n If there is no reusable signal, durable_records must be empty and the episode should normally be archived.\n Decision records must include decision and why when the rationale is present; use fact when there is no durable why.\n Episode records should include user_intent and what_happened when available; the runtime can derive missing episode fields.\n Treat the trace as historical evidence, not live verification of the current repo.\n\n Existing records, if listed, are only duplicate-risk context. Prefer skipping near-duplicates over creating duplicate durable records.\n\n Quality bar:\n - Store the reusable rule, decision, invariant, dependency, preference, or external pointer, not the story of the session.\n - One durable record should hold one durable point.\n - Direct consequences and application guidance usually stay inside that same record.\n - Create the minimum number of durable records that preserves distinct durable meanings. Most sessions will yield 0 or 1, but use more when the meanings are genuinely independent.\n - Classify each durable point into one canonical kind. Prefer the most specific supported kind, and do not duplicate the same point across multiple durable kinds.\n - Duplicates are worse than gaps. Skip uncertain candidates rather than spraying near-duplicates.\n - Never create a second durable record in the same run for the same core claim.\n - constraint and reference are first-class durable record kinds, not fallback categories.\n\n What not to save:\n - patch logs, command sequences, retries, timelines, or meeting-style recaps\n - code structure, file paths, git history, or storage mechanics by themselves\n - generic programming knowledge or facts already obvious from the repo\n - rejected lures, discarded explanations, or implementation-only distractions\n - one-run validation findings, approval flow, queue state, DB resets, rebuilds, or runtime diagnostics by themselves\n\n Selection rules:\n - First separate findings into durable signal and implementation evidence.\n - Prefer user-level guidance, chosen operating policy, and stable project truth over the technical evidence that revealed them.\n - Synthesize at the theme level. Usually one theme becomes one durable record.\n - Create multiple durable records only when the trace establishes multiple independent durable points, each with its own support.\n - Do not store one durable point as both a preference and a decision, a fact and a decision, or any other cross-kind duplicate.\n - A stable workflow preference is not also a decision unless the trace separately states an explicit project decision with rationale.\n - A dependency, setup, or environment truth without durable rationale is a fact, not also a decision.\n - A failure caused by the current run's temporary validation setup is not itself a durable environment truth. If the investigation reveals a stable requirement that future sessions must apply, save that requirement as the durable point and omit the temporary validation story.\n - Merge candidates when one only states how to apply the other in local operations, routing, or ownership.\n - If two candidates share the same core claim, merge them.\n - If the difference is only evidence framing, symptom wording, or local-vs-CI phrasing around the same durable fact, keep one record and fold the extra context into it.\n - If one candidate is only the direct application or routing consequence of another, keep it inside the stronger record.\n - If one candidate only says how different local components should apply the same project rule, keep that guidance inside the main record rather than creating a second durable record.\n - If one candidate only restates where local project components live or how an internal architecture is applied, keep it inside the stronger decision, fact, or constraint instead of creating a separate reference.\n - If the trace gives one durable rule plus examples of local noise or discarded details, store only the durable rule.\n - If a technical blocker is reusable, record only the capability affected, status or source of truth, and implication for future work.\n - Do not create a durable record whose whole point is that some local details from this trace were noise, low value, or should not be remembered.\n - Store durable records only when the lesson is likely reusable beyond this trace.\n - If a candidate is mainly about this trace's commands, files, or timeline, reject it.\n - Trace-local instructions about what to ignore in this session are not preferences unless they clearly express a broader standing workflow rule for future sessions.\n - If the trace explicitly says the rationale is unknown or says not to invent one, do not create a decision; use fact instead.\n - A stable setup, dependency, or environment requirement without a durable why is a fact even if it sounds like the current chosen setup.\n - The instruction \"do not invent a why\" is extraction guidance, not project context.\n - If the trace explicitly rejects a lure or distraction, do not carry that rejected idea into the durable record text unless the rejection itself is the durable lesson.\n - If a long noisy investigation resolves into one source-of-truth boundary, store only that boundary. Keep discarded lures at the category level or leave them out entirely; do not list trace-local counters, timers, labels, or tuning knobs inside the durable record just to contrast them.\n - When a discarded lure matters as evidence, keep it attached to the main durable theme as implementation context rather than storing it as a second durable theme.\n - If the episode summary contains clearly reusable decision, preference, constraint, fact, or reference, that point should usually also exist as its own durable record.\n - Do not leave a clearly reusable rule, invariant, dependency, source-of-truth pointer, or stable preference only inside the episode.\n - Durable records are additional project context, not a substitute for the session episode.\n - Most traces should produce only a few durable records; create more only when each one would independently change future behavior.\n\n Writing rules:\n - Durable titles should name the lasting rule, decision, fact, constraint, preference, or reference directly.\n - Durable bodies should be compact, neutral, and standalone.\n - Prefer this shape for durable records: the durable point, why it matters, and how to apply it later.\n - Do not write durable records as meeting minutes, patch logs, or cleanup commentary.\n - Do not preserve trace-local commands, negotiation phrasing, or \"this is not about X\" sentences in final record text.\n - Do not mention discarded implementation noise in durable record fields, including consequences.\n - Do not include patch recipes, flags, function names, exact metrics, or local debug steps in durable record text.\n - Do not write a durable record whose body is mainly a warning that certain local details, cleanups, or implementation noise should be ignored.\n - When the durable lesson is a source-of-truth rule, write the authoritative rule directly.\n - If a short contrast is still helpful, keep it abstract, such as \"not worker-local state\" or \"not ephemeral local state\". Do not enumerate examples in parentheses or comma-separated lists.\n - When writing from a historical trace, word durable records as source-backed context, not as freshly verified code inspection.\n - Facts from noisy failures must be rewritten into the underlying dependency, environment requirement, stakeholder driver, or operational fact.\n - If a fact still reads like stderr, an exception symptom, or copied command output, rewrite it again before writing.\n - When the durable lesson is an environment or dependency requirement, do not center the fact on the observed failure symptom. Name the requirement directly and mention the symptom only if it is needed as brief supporting context.\n - If brief supporting context is useful, lead with the requirement and keep the symptom generic. Never include exception class names, quoted error fragments, or copied failure strings in the durable fact.\n - If the candidate is mainly \"this validation run failed until we changed the setup\", it belongs in the archived episode. If the candidate names a reusable setup or runtime requirement discovered through that validation, keep the requirement and drop the failure narrative.\n - When no durable rationale exists, do not spend the fact body explaining that the rationale is absent. Just state the stable dependency, setup requirement, or operational truth directly.\n - Do not quote or paraphrase trace instructions about how to classify the evidence inside the final fact body. Final fact text should describe the underlying truth, not the extraction rule you followed.\n - References must answer both \"where should future sessions look?\" and \"when should they consult it?\"\n - Do not use reference for internal file mappings, local storage boundaries, or repo architecture notes when the durable lesson is the project rule itself rather than \"consult this external source next time.\"\n - Keep the episode concise: short title, short body, concise user_intent, what_happened, and outcomes.\n\n Record types:\n - preference: Stable workflow guidance from the user. Save corrections and confirmed non-obvious working style that should carry into future sessions.\n - decision: A chosen approach or project rule that future work should follow and that is not obvious from code alone. If the trace does not support a durable why, do not use decision.\n - constraint: A durable invariant, limit, or must/cannot rule that future work must respect.\n - fact: A durable project fact such as a dependency, environment requirement, stakeholder driver, or other non-obvious truth.\n - reference: A pointer to an external dashboard, document, ticket system, or other source of truth outside the repo. Use reference only when the enduring value is where to look later.\n\n Few-shot quality examples:\n\n Example preference:\n - Trace signal: the user corrects the assistant after a small code fix and says not to append redundant recaps.\n - Good: create one preference record about keeping replies terse and avoiding redundant change recaps after small diffs.\n - Bad: store the file edit itself, or treat the correction as one-session scratch when it is stable workflow guidance.\n\n Example decision:\n - Trace signal: early turns discuss local refactors and flaky tests; late in the trace the user settles one architecture boundary, and follow-on routing guidance only applies that boundary.\n - Good: create the required episode and one decision record for the architecture boundary. Keep the routing guidance inside that record.\n - Bad: store refactor noise, split one architectural choice into near-duplicate records, or create a separate durable record saying the refactors were noise.\n\n Example fact:\n - Trace signal: repeated failed commands and partial theories eventually resolve to one stable operational requirement.\n - Good: create one fact record for the requirement in clean operational language. Lead with the dependency or environment requirement; mention the failure only as brief generic support if needed.\n - Bad: store raw errors, command history, rejected theories, exact exception text, or a separate record saying not to invent a why.\n\n Example late clarification:\n - Trace signal: early chunks circle local counters, timers, labels, and tuning; the final chunk clarifies those were distractions and the real durable lesson is a source-of-truth boundary.\n - Good: create one durable record for the source-of-truth boundary. Mention restart or failover only if it explains why the boundary matters.\n - Bad: write a durable record that carries over rejected local counters, attempt counts, backoff knobs, or other trace-local artifacts as a contrast list.\n\n Example reference:\n - Trace signal: the assistant starts from a partial repo note, then the user clarifies that ownership or status lives in an external dashboard or ticket system.\n - Good: create one reference record that names the external source and when future sessions should consult it.\n - Bad: center the record on local files, or turn it into a warning slogan about what not to trust locally.\n\n Example routine:\n - Trace signal: formatter, small lint fix, rerun tests, green result, no new rule or durable fact.\n - Good: create only an archived episode.\n - Bad: invent a durable record from the sequence of routine commands.\n\n {{ _.role(\"user\") }}\n RUN INSTRUCTION:\n {{ run_instruction }}\n\n EXISTING RECORD MANIFEST:\n {{ existing_record_manifest }}\n\n EPISODE SUMMARY:\n {{ episode_summary }}\n\n DURABLE FINDINGS:\n {{ durable_findings_summary }}\n\n {{ ctx.output_format }}\n \"#\n}\n", - "extract_react_tests.baml": "test ScanTraceWindowCapturesDurableDecision {\n functions [ScanTraceWindow]\n args {\n run_instruction \"Extract one episode and any durable project context from the trace.\"\n prior_episode_summary \"(none yet)\"\n prior_findings_summary \"(none yet)\"\n trace_window #\"\n [8 lines, window 1-8]\n 1\tuser: We should run extraction evals before changing extraction prompts.\n 2\tassistant: I will update the BAML harness and run the eval.\n 3\tuser: Keep trace-local command logs out of long-term context.\n 4\tassistant: Implemented the change and ran a smoke test.\n \"#\n }\n @@assert({{ this.durable_findings|length > 0 }})\n}\n\ntest ScanTraceWindowSeparatesDurableAndImplementation {\n functions [ScanTraceWindow]\n args {\n run_instruction \"Extract reusable context, not command history.\"\n prior_episode_summary \"(none yet)\"\n prior_findings_summary \"(none yet)\"\n trace_window #\"\n [7 lines, window 1-7]\n 1\tuser: The deployment checklist is the source of truth for release readiness.\n 2\tassistant: Ran pytest -q and fixed a local fixture path.\n 3\tassistant: Edited src/example.py and reran the lint command.\n 4\tuser: The command output is just evidence, not memory.\n \"#\n }\n @@assert({{ this.durable_findings|length > 0 }})\n @@assert({{ this.implementation_findings|length > 0 }})\n}\n\ntest ScanTraceWindowAllowsNoDurableSignal {\n functions [ScanTraceWindow]\n args {\n run_instruction \"Extract durable context only when the trace supports it.\"\n prior_episode_summary \"(none yet)\"\n prior_findings_summary \"(none yet)\"\n trace_window #\"\n [5 lines, window 1-5]\n 1\tuser: Please format this file.\n 2\tassistant: Ran the formatter.\n 3\tassistant: Fixed one lint complaint.\n 4\tassistant: Tests are green.\n \"#\n }\n @@assert({{ this.durable_findings|length == 0 }})\n}\n\ntest SynthesizeExtractRecordsCreatesEpisodeAndDurableRecord {\n functions [SynthesizeExtractRecords]\n args {\n run_instruction \"Create exactly one episode and durable records only for reusable project context.\"\n episode_summary \"- The session updated an extraction harness and ran a smoke eval.\"\n durable_findings_summary \"- preference: eval-first prompt changes: Run the extraction eval before changing extraction prompts. (line 1)\"\n existing_record_manifest \"(none)\"\n }\n @@assert({{ this.episode.user_intent|length > 0 }})\n @@assert({{ this.episode.what_happened|length > 0 }})\n @@assert({{ this.durable_records|length > 0 }})\n}\n\ntest SynthesizeExtractRecordsAllowsNoDurableSignal {\n functions [SynthesizeExtractRecords]\n args {\n run_instruction \"Create exactly one episode and no durable records when no reusable context exists.\"\n episode_summary \"- The session only formatted code, fixed one lint issue, and reran tests.\"\n durable_findings_summary \"(none)\"\n existing_record_manifest \"(none)\"\n }\n @@assert({{ this.episode.user_intent|length > 0 }})\n @@assert({{ this.episode.what_happened|length > 0 }})\n @@assert({{ this.durable_records|length == 0 }})\n}\n\ntest SynthesizeExtractRecordsDeduplicatesCoreClaim {\n functions [SynthesizeExtractRecords]\n args {\n run_instruction \"Create compact durable records and merge duplicate meanings.\"\n episode_summary \"- The session clarified a runtime state boundary after noisy implementation discussion.\"\n durable_findings_summary #\"\n - decision: persisted source of truth: Runtime status must live in one persisted store that survives restart.\n - decision: runtime status boundary: Use the same persisted status store as the source of truth after restart.\n \"#\n existing_record_manifest \"(none)\"\n }\n @@assert({{ this.episode.user_intent|length > 0 }})\n @@assert({{ this.durable_records|length == 1 }})\n}\n", - "generators.baml": "generator target {\n output_type \"python/pydantic\"\n output_dir \"../\"\n default_client_mode \"sync\"\n version \"0.222.0\"\n}\n", - "models.baml": "retry_policy ExtractAgentRetry {\n max_retries 1\n strategy {\n type exponential_backoff\n delay_ms 500\n multiplier 2\n max_delay_ms 8000\n }\n}\n\nclient MiniMaxM27 {\n provider \"openai-generic\"\n retry_policy ExtractAgentRetry\n options {\n base_url \"https://api.minimax.io/v1\"\n api_key env.MINIMAX_API_KEY\n model \"MiniMax-M2.7\"\n temperature 0.01\n http {\n connect_timeout_ms 10000\n time_to_first_token_timeout_ms 120000\n idle_timeout_ms 30000\n request_timeout_ms 300000\n }\n }\n}\n\nclient OllamaLocal {\n provider \"openai-generic\"\n retry_policy ExtractAgentRetry\n options {\n base_url \"http://127.0.0.1:11434/v1\"\n api_key \"ollama\"\n model \"gemma4:e4b\"\n temperature 0.0\n http {\n connect_timeout_ms 10000\n time_to_first_token_timeout_ms 120000\n idle_timeout_ms 30000\n request_timeout_ms 300000\n }\n }\n}\n", -} - -def get_baml_files(): - return _file_map \ No newline at end of file diff --git a/baml_agents/baml_client/parser.py b/baml_agents/baml_client/parser.py deleted file mode 100644 index 9cb2f07..0000000 --- a/baml_agents/baml_client/parser.py +++ /dev/null @@ -1,58 +0,0 @@ -# ---------------------------------------------------------------------------- -# -# Welcome to Baml! To use this generated code, please run the following: -# -# $ pip install baml -# -# ---------------------------------------------------------------------------- - -# This file was generated by BAML: please do not edit it. Instead, edit the -# BAML files and re-generate this code using: baml-cli generate -# baml-cli is available with the baml package. - -import typing -import typing_extensions - - -from . import stream_types, types -from .runtime import DoNotUseDirectlyCallManager, BamlCallOptions - -class LlmResponseParser: - __options: DoNotUseDirectlyCallManager - - def __init__(self, options: DoNotUseDirectlyCallManager): - self.__options = options - - def ScanTraceWindow( - self, llm_response: str, baml_options: BamlCallOptions = {}, - ) -> types.TraceWindowScan: - __result__ = self.__options.merge_options(baml_options).parse_response(function_name="ScanTraceWindow", llm_response=llm_response, mode="request") - return typing.cast(types.TraceWindowScan, __result__) - - def SynthesizeExtractRecords( - self, llm_response: str, baml_options: BamlCallOptions = {}, - ) -> types.SynthesizedExtraction: - __result__ = self.__options.merge_options(baml_options).parse_response(function_name="SynthesizeExtractRecords", llm_response=llm_response, mode="request") - return typing.cast(types.SynthesizedExtraction, __result__) - - - -class LlmStreamParser: - __options: DoNotUseDirectlyCallManager - - def __init__(self, options: DoNotUseDirectlyCallManager): - self.__options = options - - def ScanTraceWindow( - self, llm_response: str, baml_options: BamlCallOptions = {}, - ) -> stream_types.TraceWindowScan: - __result__ = self.__options.merge_options(baml_options).parse_response(function_name="ScanTraceWindow", llm_response=llm_response, mode="stream") - return typing.cast(stream_types.TraceWindowScan, __result__) - - def SynthesizeExtractRecords( - self, llm_response: str, baml_options: BamlCallOptions = {}, - ) -> stream_types.SynthesizedExtraction: - __result__ = self.__options.merge_options(baml_options).parse_response(function_name="SynthesizeExtractRecords", llm_response=llm_response, mode="stream") - return typing.cast(stream_types.SynthesizedExtraction, __result__) - - \ No newline at end of file diff --git a/baml_agents/baml_client/runtime.py b/baml_agents/baml_client/runtime.py deleted file mode 100644 index 27fc3a9..0000000 --- a/baml_agents/baml_client/runtime.py +++ /dev/null @@ -1,361 +0,0 @@ -# ---------------------------------------------------------------------------- -# -# Welcome to Baml! To use this generated code, please run the following: -# -# $ pip install baml -# -# ---------------------------------------------------------------------------- - -# This file was generated by BAML: please do not edit it. Instead, edit the -# BAML files and re-generate this code using: baml-cli generate -# baml-cli is available with the baml package. - -import os -import typing -import typing_extensions - -import baml_py - -from . import types, stream_types, type_builder -from .globals import DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_RUNTIME as __runtime__, DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_CTX as __ctx__manager__ - - -class BamlCallOptions(typing.TypedDict, total=False): - tb: typing_extensions.NotRequired[type_builder.TypeBuilder] - client_registry: typing_extensions.NotRequired[baml_py.baml_py.ClientRegistry] - client: typing_extensions.NotRequired[str] - env: typing_extensions.NotRequired[typing.Dict[str, typing.Optional[str]]] - tags: typing_extensions.NotRequired[typing.Dict[str, str]] - collector: typing_extensions.NotRequired[ - typing.Union[baml_py.baml_py.Collector, typing.List[baml_py.baml_py.Collector]] - ] - abort_controller: typing_extensions.NotRequired[baml_py.baml_py.AbortController] - on_tick: typing_extensions.NotRequired[typing.Callable[[str, baml_py.baml_py.FunctionLog], None]] - watchers: typing_extensions.NotRequired[typing.Any] # EventCollector type, will be overridden in generated clients - - -class _ResolvedBamlOptions: - tb: typing.Optional[baml_py.baml_py.TypeBuilder] - client_registry: typing.Optional[baml_py.baml_py.ClientRegistry] - collectors: typing.List[baml_py.baml_py.Collector] - env_vars: typing.Dict[str, str] - tags: typing.Dict[str, str] - abort_controller: typing.Optional[baml_py.baml_py.AbortController] - on_tick: typing.Optional[typing.Callable[[], None]] - watchers: typing.Optional[typing.Any] - - def __init__( - self, - tb: typing.Optional[baml_py.baml_py.TypeBuilder], - client_registry: typing.Optional[baml_py.baml_py.ClientRegistry], - collectors: typing.List[baml_py.baml_py.Collector], - env_vars: typing.Dict[str, str], - tags: typing.Dict[str, str], - abort_controller: typing.Optional[baml_py.baml_py.AbortController], - on_tick: typing.Optional[typing.Callable[[], None]], - watchers: typing.Optional[typing.Any], - ): - self.tb = tb - self.client_registry = client_registry - self.collectors = collectors - self.env_vars = env_vars - self.tags = tags - self.abort_controller = abort_controller - self.on_tick = on_tick - self.watchers = watchers - - - - -class DoNotUseDirectlyCallManager: - def __init__(self, baml_options: BamlCallOptions): - self.__baml_options = baml_options - - def __getstate__(self): - # Return state needed for pickling - return {"baml_options": self.__baml_options} - - def __setstate__(self, state): - # Restore state from pickling - self.__baml_options = state["baml_options"] - - def __resolve(self) -> _ResolvedBamlOptions: - tb = self.__baml_options.get("tb") - if tb is not None: - baml_tb = tb._tb # type: ignore (we know how to use this private attribute) - else: - baml_tb = None - client_registry = self.__baml_options.get("client_registry") - client = self.__baml_options.get("client") - - # If client is provided, it takes precedence (creates/overrides client_registry primary) - if client is not None: - if client_registry is None: - client_registry = baml_py.baml_py.ClientRegistry() - client_registry.set_primary(client) - - collector = self.__baml_options.get("collector") - collectors_as_list = ( - collector - if isinstance(collector, list) - else [collector] if collector is not None else [] - ) - env_vars = os.environ.copy() - for k, v in self.__baml_options.get("env", {}).items(): - if v is not None: - env_vars[k] = v - else: - env_vars.pop(k, None) - - tags = self.__baml_options.get("tags", {}) or {} - - abort_controller = self.__baml_options.get("abort_controller") - - on_tick = self.__baml_options.get("on_tick") - if on_tick is not None: - collector = baml_py.baml_py.Collector("on-tick-collector") - collectors_as_list.append(collector) - def on_tick_wrapper(): - log = collector.last - if log is not None: - on_tick("Unknown", log) - else: - on_tick_wrapper = None - - watchers = self.__baml_options.get("watchers") - - return _ResolvedBamlOptions( - baml_tb, - client_registry, - collectors_as_list, - env_vars, - tags, - abort_controller, - on_tick_wrapper, - watchers, - ) - - def merge_options(self, options: BamlCallOptions) -> "DoNotUseDirectlyCallManager": - return DoNotUseDirectlyCallManager({**self.__baml_options, **options}) - - async def call_function_async( - self, *, function_name: str, args: typing.Dict[str, typing.Any] - ) -> baml_py.baml_py.FunctionResult: - resolved_options = self.__resolve() - - # Check if already aborted - if resolved_options.abort_controller is not None and resolved_options.abort_controller.aborted: - raise baml_py.baml_py.BamlAbortError("Operation was aborted") - - return await __runtime__.call_function( - function_name, - args, - # ctx - __ctx__manager__.clone_context(), - # tb - resolved_options.tb, - # cr - resolved_options.client_registry, - # collectors - resolved_options.collectors, - # env_vars - resolved_options.env_vars, - # tags - resolved_options.tags, - # abort_controller - resolved_options.abort_controller, - # watchers - resolved_options.watchers, - ) - - def call_function_sync( - self, *, function_name: str, args: typing.Dict[str, typing.Any] - ) -> baml_py.baml_py.FunctionResult: - resolved_options = self.__resolve() - - # Check if already aborted - if resolved_options.abort_controller is not None and resolved_options.abort_controller.aborted: - raise baml_py.baml_py.BamlAbortError("Operation was aborted") - - ctx = __ctx__manager__.get() - return __runtime__.call_function_sync( - function_name, - args, - # ctx - ctx, - # tb - resolved_options.tb, - # cr - resolved_options.client_registry, - # collectors - resolved_options.collectors, - # env_vars - resolved_options.env_vars, - # tags - resolved_options.tags, - # abort_controller - resolved_options.abort_controller, - # watchers - resolved_options.watchers, - ) - - def create_async_stream( - self, - *, - function_name: str, - args: typing.Dict[str, typing.Any], - ) -> typing.Tuple[baml_py.baml_py.RuntimeContextManager, baml_py.baml_py.FunctionResultStream]: - resolved_options = self.__resolve() - ctx = __ctx__manager__.clone_context() - result = __runtime__.stream_function( - function_name, - args, - # this is always None, we set this later! - # on_event - None, - # ctx - ctx, - # tb - resolved_options.tb, - # cr - resolved_options.client_registry, - # collectors - resolved_options.collectors, - # env_vars - resolved_options.env_vars, - # tags - resolved_options.tags, - # on_tick - resolved_options.on_tick, - # abort_controller - resolved_options.abort_controller, - ) - return ctx, result - - def create_sync_stream( - self, - *, - function_name: str, - args: typing.Dict[str, typing.Any], - ) -> typing.Tuple[baml_py.baml_py.RuntimeContextManager, baml_py.baml_py.SyncFunctionResultStream]: - resolved_options = self.__resolve() - if resolved_options.on_tick is not None: - raise ValueError("on_tick is not supported for sync streams. Please use async streams instead.") - ctx = __ctx__manager__.get() - result = __runtime__.stream_function_sync( - function_name, - args, - # this is always None, we set this later! - # on_event - None, - # ctx - ctx, - # tb - resolved_options.tb, - # cr - resolved_options.client_registry, - # collectors - resolved_options.collectors, - # env_vars - resolved_options.env_vars, - # tags - resolved_options.tags, - # on_tick - # always None! sync streams don't support on_tick - None, - # abort_controller - resolved_options.abort_controller, - ) - return ctx, result - - async def create_http_request_async( - self, - *, - function_name: str, - args: typing.Dict[str, typing.Any], - mode: typing_extensions.Literal["stream", "request"], - ) -> baml_py.baml_py.HTTPRequest: - resolved_options = self.__resolve() - return await __runtime__.build_request( - function_name, - args, - # ctx - __ctx__manager__.clone_context(), - # tb - resolved_options.tb, - # cr - resolved_options.client_registry, - # env_vars - resolved_options.env_vars, - # is_stream - mode == "stream", - ) - - def create_http_request_sync( - self, - *, - function_name: str, - args: typing.Dict[str, typing.Any], - mode: typing_extensions.Literal["stream", "request"], - ) -> baml_py.baml_py.HTTPRequest: - resolved_options = self.__resolve() - return __runtime__.build_request_sync( - function_name, - args, - # ctx - __ctx__manager__.get(), - # tb - resolved_options.tb, - # cr - resolved_options.client_registry, - # env_vars - resolved_options.env_vars, - # is_stream - mode == "stream", - ) - - def parse_response(self, *, function_name: str, llm_response: str, mode: typing_extensions.Literal["stream", "request"]) -> typing.Any: - resolved_options = self.__resolve() - return __runtime__.parse_llm_response( - function_name, - llm_response, - # enum_module - types, - # cls_module - types, - # partial_cls_module - stream_types, - # allow_partials - mode == "stream", - # ctx - __ctx__manager__.get(), - # tb - resolved_options.tb, - # cr - resolved_options.client_registry, - # env_vars - resolved_options.env_vars, - ) - - -def disassemble(function: typing.Callable) -> None: - import inspect - from . import b - - if not callable(function): - print(f"disassemble: object {function} is not a Baml function") - return - - is_client_method = False - - for (method_name, _) in inspect.getmembers(b, predicate=inspect.ismethod): - if method_name == function.__name__: - is_client_method = True - break - - if not is_client_method: - print(f"disassemble: function {function.__name__} is not a Baml function") - return - - print(f"----- function {function.__name__} -----") - __runtime__.disassemble(function.__name__) \ No newline at end of file diff --git a/baml_agents/baml_client/stream_types.py b/baml_agents/baml_client/stream_types.py deleted file mode 100644 index 0f2d9cf..0000000 --- a/baml_agents/baml_client/stream_types.py +++ /dev/null @@ -1,69 +0,0 @@ -# ---------------------------------------------------------------------------- -# -# Welcome to Baml! To use this generated code, please run the following: -# -# $ pip install baml -# -# ---------------------------------------------------------------------------- - -# This file was generated by BAML: please do not edit it. Instead, edit the -# BAML files and re-generate this code using: baml-cli generate -# baml-cli is available with the baml package. - -import typing -import typing_extensions -from pydantic import BaseModel, ConfigDict, Field - -import baml_py - -from . import types - -StreamStateValueT = typing.TypeVar('StreamStateValueT') -class StreamState(BaseModel, typing.Generic[StreamStateValueT]): - value: StreamStateValueT - state: typing_extensions.Literal["Pending", "Incomplete", "Complete"] -# ######################################################################### -# Generated classes (5) -# ######################################################################### - -class DurableRecordDraft(BaseModel): - kind: typing.Optional[types.RecordKind] = Field(default=None, description='Durable record kind.') - title: typing.Optional[str] = Field(default=None, description='Short standalone durable title.') - body: typing.Optional[str] = Field(default=None, description='Compact standalone durable body.') - status: typing.Optional[types.RecordStatus] = Field(default=None, description='Usually active for reusable durable records.') - valid_from: typing.Optional[str] = None - valid_until: typing.Optional[str] = None - decision: typing.Optional[str] = None - why: typing.Optional[str] = None - alternatives: typing.Optional[str] = None - consequences: typing.Optional[str] = None - -class EpisodeDraft(BaseModel): - title: typing.Optional[str] = Field(default=None, description='Optional short title for the current-session episode. Runtime derives one if omitted.') - body: typing.Optional[str] = Field(default=None, description='Compact episode body. If omitted, runtime builds it from user_intent and what_happened.') - status: typing.Optional[types.RecordStatus] = Field(default=None, description='Use archived for routine/no-durable sessions; active only when the episode itself remains useful.') - user_intent: typing.Optional[str] = Field(default=None, description='What the user wanted in this source session. Runtime fills a generic fallback if omitted.') - what_happened: typing.Optional[str] = Field(default=None, description='What the session actually did. Runtime fills a generic fallback if omitted.') - outcomes: typing.Optional[str] = Field(default=None, description='Optional concise outcome.') - -class SynthesizedExtraction(BaseModel): - episode: typing.Optional["EpisodeDraft"] = Field(default=None, description='Exactly one current-session episode record draft.') - durable_records: typing.List["DurableRecordDraft"] = Field(description='Zero or more durable records.') - completion_summary: typing.Optional[str] = Field(default=None, description='Brief summary of extraction work for final_result/reporting.') - -class TraceWindowFinding(BaseModel): - theme: typing.Optional[str] = Field(default=None, description='Short stable theme for this finding.') - level: typing.Optional[types.FindingLevel] = Field(default=None, description='Use durable levels for reusable context, implementation for local/noisy evidence.') - line: typing.Optional[int] = Field(default=None, description='1-based supporting line when the window gives one.') - quote: typing.Optional[str] = Field(default=None, description='Short supporting quote from the current window.') - note: typing.Optional[str] = Field(default=None, description='Compact semantic finding. Avoid command logs and copied errors.') - -class TraceWindowScan(BaseModel): - episode_update: typing.Optional[str] = Field(default=None, description='Compact update for the final episode summary. May be omitted when this window adds nothing.') - durable_findings: typing.List["TraceWindowFinding"] = Field(description='Reusable decisions, preferences, constraints, facts, and references found in this window.') - implementation_findings: typing.List["TraceWindowFinding"] = Field(description='Implementation evidence, discarded hypotheses, and local details useful only as support/noise.') - discarded_noise: typing.List[str] = Field(description='Short descriptions of noisy categories intentionally not saved.') - -# ######################################################################### -# Generated type aliases (0) -# ######################################################################### diff --git a/baml_agents/baml_client/sync_client.py b/baml_agents/baml_client/sync_client.py deleted file mode 100644 index e5a104b..0000000 --- a/baml_agents/baml_client/sync_client.py +++ /dev/null @@ -1,204 +0,0 @@ -# ---------------------------------------------------------------------------- -# -# Welcome to Baml! To use this generated code, please run the following: -# -# $ pip install baml -# -# ---------------------------------------------------------------------------- - -# This file was generated by BAML: please do not edit it. Instead, edit the -# BAML files and re-generate this code using: baml-cli generate -# baml-cli is available with the baml package. - -import typing -import typing_extensions -import baml_py - -from . import stream_types, types, type_builder -from .parser import LlmResponseParser, LlmStreamParser -from .runtime import DoNotUseDirectlyCallManager, BamlCallOptions -from .globals import DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_RUNTIME as __runtime__ - -class BamlSyncClient: - __options: DoNotUseDirectlyCallManager - __stream_client: "BamlStreamClient" - __http_request: "BamlHttpRequestClient" - __http_stream_request: "BamlHttpStreamRequestClient" - __llm_response_parser: LlmResponseParser - __llm_stream_parser: LlmStreamParser - - def __init__(self, options: DoNotUseDirectlyCallManager): - self.__options = options - self.__stream_client = BamlStreamClient(options) - self.__http_request = BamlHttpRequestClient(options) - self.__http_stream_request = BamlHttpStreamRequestClient(options) - self.__llm_response_parser = LlmResponseParser(options) - self.__llm_stream_parser = LlmStreamParser(options) - - def __getstate__(self): - # Return state needed for pickling - return {"options": self.__options} - - def __setstate__(self, state): - # Restore state from pickling - self.__options = state["options"] - self.__stream_client = BamlStreamClient(self.__options) - self.__http_request = BamlHttpRequestClient(self.__options) - self.__http_stream_request = BamlHttpStreamRequestClient(self.__options) - self.__llm_response_parser = LlmResponseParser(self.__options) - self.__llm_stream_parser = LlmStreamParser(self.__options) - - def with_options(self, - tb: typing.Optional[type_builder.TypeBuilder] = None, - client_registry: typing.Optional[baml_py.baml_py.ClientRegistry] = None, - client: typing.Optional[str] = None, - collector: typing.Optional[typing.Union[baml_py.baml_py.Collector, typing.List[baml_py.baml_py.Collector]]] = None, - env: typing.Optional[typing.Dict[str, typing.Optional[str]]] = None, - tags: typing.Optional[typing.Dict[str, str]] = None, - on_tick: typing.Optional[typing.Callable[[str, baml_py.baml_py.FunctionLog], None]] = None, - ) -> "BamlSyncClient": - options: BamlCallOptions = {} - if tb is not None: - options["tb"] = tb - if client_registry is not None: - options["client_registry"] = client_registry - if client is not None: - options["client"] = client - if collector is not None: - options["collector"] = collector - if env is not None: - options["env"] = env - if tags is not None: - options["tags"] = tags - if on_tick is not None: - options["on_tick"] = on_tick - return BamlSyncClient(self.__options.merge_options(options)) - - @property - def stream(self): - return self.__stream_client - - @property - def request(self): - return self.__http_request - - @property - def stream_request(self): - return self.__http_stream_request - - @property - def parse(self): - return self.__llm_response_parser - - @property - def parse_stream(self): - return self.__llm_stream_parser - - def ScanTraceWindow(self, run_instruction: str,prior_episode_summary: str,prior_findings_summary: str,trace_window: str, - baml_options: BamlCallOptions = {}, - ) -> types.TraceWindowScan: - # Check if on_tick is provided - if 'on_tick' in baml_options: - __stream__ = self.stream.ScanTraceWindow(run_instruction=run_instruction,prior_episode_summary=prior_episode_summary,prior_findings_summary=prior_findings_summary,trace_window=trace_window, - baml_options=baml_options) - return __stream__.get_final_response() - else: - # Original non-streaming code - __result__ = self.__options.merge_options(baml_options).call_function_sync(function_name="ScanTraceWindow", args={ - "run_instruction": run_instruction,"prior_episode_summary": prior_episode_summary,"prior_findings_summary": prior_findings_summary,"trace_window": trace_window, - }) - return typing.cast(types.TraceWindowScan, __result__.cast_to(types, types, stream_types, False, __runtime__)) - def SynthesizeExtractRecords(self, run_instruction: str,episode_summary: str,durable_findings_summary: str,existing_record_manifest: str, - baml_options: BamlCallOptions = {}, - ) -> types.SynthesizedExtraction: - # Check if on_tick is provided - if 'on_tick' in baml_options: - __stream__ = self.stream.SynthesizeExtractRecords(run_instruction=run_instruction,episode_summary=episode_summary,durable_findings_summary=durable_findings_summary,existing_record_manifest=existing_record_manifest, - baml_options=baml_options) - return __stream__.get_final_response() - else: - # Original non-streaming code - __result__ = self.__options.merge_options(baml_options).call_function_sync(function_name="SynthesizeExtractRecords", args={ - "run_instruction": run_instruction,"episode_summary": episode_summary,"durable_findings_summary": durable_findings_summary,"existing_record_manifest": existing_record_manifest, - }) - return typing.cast(types.SynthesizedExtraction, __result__.cast_to(types, types, stream_types, False, __runtime__)) - - - -class BamlStreamClient: - __options: DoNotUseDirectlyCallManager - - def __init__(self, options: DoNotUseDirectlyCallManager): - self.__options = options - - def ScanTraceWindow(self, run_instruction: str,prior_episode_summary: str,prior_findings_summary: str,trace_window: str, - baml_options: BamlCallOptions = {}, - ) -> baml_py.BamlSyncStream[stream_types.TraceWindowScan, types.TraceWindowScan]: - __ctx__, __result__ = self.__options.merge_options(baml_options).create_sync_stream(function_name="ScanTraceWindow", args={ - "run_instruction": run_instruction,"prior_episode_summary": prior_episode_summary,"prior_findings_summary": prior_findings_summary,"trace_window": trace_window, - }) - return baml_py.BamlSyncStream[stream_types.TraceWindowScan, types.TraceWindowScan]( - __result__, - lambda x: typing.cast(stream_types.TraceWindowScan, x.cast_to(types, types, stream_types, True, __runtime__)), - lambda x: typing.cast(types.TraceWindowScan, x.cast_to(types, types, stream_types, False, __runtime__)), - __ctx__, - ) - def SynthesizeExtractRecords(self, run_instruction: str,episode_summary: str,durable_findings_summary: str,existing_record_manifest: str, - baml_options: BamlCallOptions = {}, - ) -> baml_py.BamlSyncStream[stream_types.SynthesizedExtraction, types.SynthesizedExtraction]: - __ctx__, __result__ = self.__options.merge_options(baml_options).create_sync_stream(function_name="SynthesizeExtractRecords", args={ - "run_instruction": run_instruction,"episode_summary": episode_summary,"durable_findings_summary": durable_findings_summary,"existing_record_manifest": existing_record_manifest, - }) - return baml_py.BamlSyncStream[stream_types.SynthesizedExtraction, types.SynthesizedExtraction]( - __result__, - lambda x: typing.cast(stream_types.SynthesizedExtraction, x.cast_to(types, types, stream_types, True, __runtime__)), - lambda x: typing.cast(types.SynthesizedExtraction, x.cast_to(types, types, stream_types, False, __runtime__)), - __ctx__, - ) - - -class BamlHttpRequestClient: - __options: DoNotUseDirectlyCallManager - - def __init__(self, options: DoNotUseDirectlyCallManager): - self.__options = options - - def ScanTraceWindow(self, run_instruction: str,prior_episode_summary: str,prior_findings_summary: str,trace_window: str, - baml_options: BamlCallOptions = {}, - ) -> baml_py.baml_py.HTTPRequest: - __result__ = self.__options.merge_options(baml_options).create_http_request_sync(function_name="ScanTraceWindow", args={ - "run_instruction": run_instruction,"prior_episode_summary": prior_episode_summary,"prior_findings_summary": prior_findings_summary,"trace_window": trace_window, - }, mode="request") - return __result__ - def SynthesizeExtractRecords(self, run_instruction: str,episode_summary: str,durable_findings_summary: str,existing_record_manifest: str, - baml_options: BamlCallOptions = {}, - ) -> baml_py.baml_py.HTTPRequest: - __result__ = self.__options.merge_options(baml_options).create_http_request_sync(function_name="SynthesizeExtractRecords", args={ - "run_instruction": run_instruction,"episode_summary": episode_summary,"durable_findings_summary": durable_findings_summary,"existing_record_manifest": existing_record_manifest, - }, mode="request") - return __result__ - - -class BamlHttpStreamRequestClient: - __options: DoNotUseDirectlyCallManager - - def __init__(self, options: DoNotUseDirectlyCallManager): - self.__options = options - - def ScanTraceWindow(self, run_instruction: str,prior_episode_summary: str,prior_findings_summary: str,trace_window: str, - baml_options: BamlCallOptions = {}, - ) -> baml_py.baml_py.HTTPRequest: - __result__ = self.__options.merge_options(baml_options).create_http_request_sync(function_name="ScanTraceWindow", args={ - "run_instruction": run_instruction,"prior_episode_summary": prior_episode_summary,"prior_findings_summary": prior_findings_summary,"trace_window": trace_window, - }, mode="stream") - return __result__ - def SynthesizeExtractRecords(self, run_instruction: str,episode_summary: str,durable_findings_summary: str,existing_record_manifest: str, - baml_options: BamlCallOptions = {}, - ) -> baml_py.baml_py.HTTPRequest: - __result__ = self.__options.merge_options(baml_options).create_http_request_sync(function_name="SynthesizeExtractRecords", args={ - "run_instruction": run_instruction,"episode_summary": episode_summary,"durable_findings_summary": durable_findings_summary,"existing_record_manifest": existing_record_manifest, - }, mode="stream") - return __result__ - - -b = BamlSyncClient(DoNotUseDirectlyCallManager({})) \ No newline at end of file diff --git a/baml_agents/baml_client/tracing.py b/baml_agents/baml_client/tracing.py deleted file mode 100644 index 0672559..0000000 --- a/baml_agents/baml_client/tracing.py +++ /dev/null @@ -1,22 +0,0 @@ -# ---------------------------------------------------------------------------- -# -# Welcome to Baml! To use this generated code, please run the following: -# -# $ pip install baml -# -# ---------------------------------------------------------------------------- - -# This file was generated by BAML: please do not edit it. Instead, edit the -# BAML files and re-generate this code using: baml-cli generate -# baml-cli is available with the baml package. - -from .globals import DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_CTX - -trace = DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_CTX.trace_fn -set_tags = DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_CTX.upsert_tags -def flush(): - DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_CTX.flush() -on_log_event = DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_CTX.on_log_event - - -__all__ = ['trace', 'set_tags', "flush", "on_log_event"] diff --git a/baml_agents/baml_client/type_builder.py b/baml_agents/baml_client/type_builder.py deleted file mode 100644 index 8f43731..0000000 --- a/baml_agents/baml_client/type_builder.py +++ /dev/null @@ -1,523 +0,0 @@ -# ---------------------------------------------------------------------------- -# -# Welcome to Baml! To use this generated code, please run the following: -# -# $ pip install baml -# -# ---------------------------------------------------------------------------- - -# This file was generated by BAML: please do not edit it. Instead, edit the -# BAML files and re-generate this code using: baml-cli generate -# baml-cli is available with the baml package. - -import typing -from baml_py import type_builder -from baml_py import baml_py -# These are exports, not used here, hence the linter is disabled -from baml_py.baml_py import FieldType, EnumValueBuilder, EnumBuilder, ClassBuilder # noqa: F401 # pylint: disable=unused-import -from .globals import DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_RUNTIME - -class TypeBuilder(type_builder.TypeBuilder): - def __init__(self): - super().__init__(classes=set( - ["DurableRecordDraft","EpisodeDraft","SynthesizedExtraction","TraceWindowFinding","TraceWindowScan",] - ), enums=set( - ["FindingLevel","RecordKind","RecordStatus",] - ), runtime=DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_RUNTIME) - - # ######################################################################### - # Generated enums 3 - # ######################################################################### - - @property - def FindingLevel(self) -> "FindingLevelViewer": - return FindingLevelViewer(self) - - @property - def RecordKind(self) -> "RecordKindViewer": - return RecordKindViewer(self) - - @property - def RecordStatus(self) -> "RecordStatusViewer": - return RecordStatusViewer(self) - - - # ######################################################################### - # Generated classes 5 - # ######################################################################### - - @property - def DurableRecordDraft(self) -> "DurableRecordDraftViewer": - return DurableRecordDraftViewer(self) - - @property - def EpisodeDraft(self) -> "EpisodeDraftViewer": - return EpisodeDraftViewer(self) - - @property - def SynthesizedExtraction(self) -> "SynthesizedExtractionViewer": - return SynthesizedExtractionViewer(self) - - @property - def TraceWindowFinding(self) -> "TraceWindowFindingViewer": - return TraceWindowFindingViewer(self) - - @property - def TraceWindowScan(self) -> "TraceWindowScanViewer": - return TraceWindowScanViewer(self) - - - -# ######################################################################### -# Generated enums 3 -# ######################################################################### - -class FindingLevelAst: - def __init__(self, tb: type_builder.TypeBuilder): - _tb = tb._tb # type: ignore (we know how to use this private attribute) - self._bldr = _tb.enum("FindingLevel") - self._values: typing.Set[str] = set([ "DECISION", "PREFERENCE", "FEEDBACK", "REFERENCE", "CONSTRAINT", "FACT", "IMPLEMENTATION", ]) - self._vals = FindingLevelValues(self._bldr, self._values) - - def type(self) -> baml_py.FieldType: - return self._bldr.field() - - @property - def values(self) -> "FindingLevelValues": - return self._vals - - -class FindingLevelViewer(FindingLevelAst): - def __init__(self, tb: type_builder.TypeBuilder): - super().__init__(tb) - - - def list_values(self) -> typing.List[typing.Tuple[str, type_builder.EnumValueViewer]]: - return [(name, type_builder.EnumValueViewer(self._bldr.value(name))) for name in self._values] - - -class FindingLevelValues: - def __init__(self, enum_bldr: baml_py.EnumBuilder, values: typing.Set[str]): - self.__bldr = enum_bldr - self.__values = values # type: ignore (we know how to use this private attribute) # noqa: F821 - - - - @property - def DECISION(self) -> type_builder.EnumValueViewer: - return type_builder.EnumValueViewer(self.__bldr.value("DECISION")) - - @property - def PREFERENCE(self) -> type_builder.EnumValueViewer: - return type_builder.EnumValueViewer(self.__bldr.value("PREFERENCE")) - - @property - def FEEDBACK(self) -> type_builder.EnumValueViewer: - return type_builder.EnumValueViewer(self.__bldr.value("FEEDBACK")) - - @property - def REFERENCE(self) -> type_builder.EnumValueViewer: - return type_builder.EnumValueViewer(self.__bldr.value("REFERENCE")) - - @property - def CONSTRAINT(self) -> type_builder.EnumValueViewer: - return type_builder.EnumValueViewer(self.__bldr.value("CONSTRAINT")) - - @property - def FACT(self) -> type_builder.EnumValueViewer: - return type_builder.EnumValueViewer(self.__bldr.value("FACT")) - - @property - def IMPLEMENTATION(self) -> type_builder.EnumValueViewer: - return type_builder.EnumValueViewer(self.__bldr.value("IMPLEMENTATION")) - - - - -class RecordKindAst: - def __init__(self, tb: type_builder.TypeBuilder): - _tb = tb._tb # type: ignore (we know how to use this private attribute) - self._bldr = _tb.enum("RecordKind") - self._values: typing.Set[str] = set([ "DECISION", "PREFERENCE", "CONSTRAINT", "FACT", "REFERENCE", ]) - self._vals = RecordKindValues(self._bldr, self._values) - - def type(self) -> baml_py.FieldType: - return self._bldr.field() - - @property - def values(self) -> "RecordKindValues": - return self._vals - - -class RecordKindViewer(RecordKindAst): - def __init__(self, tb: type_builder.TypeBuilder): - super().__init__(tb) - - - def list_values(self) -> typing.List[typing.Tuple[str, type_builder.EnumValueViewer]]: - return [(name, type_builder.EnumValueViewer(self._bldr.value(name))) for name in self._values] - - -class RecordKindValues: - def __init__(self, enum_bldr: baml_py.EnumBuilder, values: typing.Set[str]): - self.__bldr = enum_bldr - self.__values = values # type: ignore (we know how to use this private attribute) # noqa: F821 - - - - @property - def DECISION(self) -> type_builder.EnumValueViewer: - return type_builder.EnumValueViewer(self.__bldr.value("DECISION")) - - @property - def PREFERENCE(self) -> type_builder.EnumValueViewer: - return type_builder.EnumValueViewer(self.__bldr.value("PREFERENCE")) - - @property - def CONSTRAINT(self) -> type_builder.EnumValueViewer: - return type_builder.EnumValueViewer(self.__bldr.value("CONSTRAINT")) - - @property - def FACT(self) -> type_builder.EnumValueViewer: - return type_builder.EnumValueViewer(self.__bldr.value("FACT")) - - @property - def REFERENCE(self) -> type_builder.EnumValueViewer: - return type_builder.EnumValueViewer(self.__bldr.value("REFERENCE")) - - - - -class RecordStatusAst: - def __init__(self, tb: type_builder.TypeBuilder): - _tb = tb._tb # type: ignore (we know how to use this private attribute) - self._bldr = _tb.enum("RecordStatus") - self._values: typing.Set[str] = set([ "ACTIVE", "ARCHIVED", ]) - self._vals = RecordStatusValues(self._bldr, self._values) - - def type(self) -> baml_py.FieldType: - return self._bldr.field() - - @property - def values(self) -> "RecordStatusValues": - return self._vals - - -class RecordStatusViewer(RecordStatusAst): - def __init__(self, tb: type_builder.TypeBuilder): - super().__init__(tb) - - - def list_values(self) -> typing.List[typing.Tuple[str, type_builder.EnumValueViewer]]: - return [(name, type_builder.EnumValueViewer(self._bldr.value(name))) for name in self._values] - - -class RecordStatusValues: - def __init__(self, enum_bldr: baml_py.EnumBuilder, values: typing.Set[str]): - self.__bldr = enum_bldr - self.__values = values # type: ignore (we know how to use this private attribute) # noqa: F821 - - - - @property - def ACTIVE(self) -> type_builder.EnumValueViewer: - return type_builder.EnumValueViewer(self.__bldr.value("ACTIVE")) - - @property - def ARCHIVED(self) -> type_builder.EnumValueViewer: - return type_builder.EnumValueViewer(self.__bldr.value("ARCHIVED")) - - - - - -# ######################################################################### -# Generated classes 5 -# ######################################################################### - -class DurableRecordDraftAst: - def __init__(self, tb: type_builder.TypeBuilder): - _tb = tb._tb # type: ignore (we know how to use this private attribute) - self._bldr = _tb.class_("DurableRecordDraft") - self._properties: typing.Set[str] = set([ "kind", "title", "body", "status", "valid_from", "valid_until", "decision", "why", "alternatives", "consequences", ]) - self._props = DurableRecordDraftProperties(self._bldr, self._properties) - - def type(self) -> baml_py.FieldType: - return self._bldr.field() - - @property - def props(self) -> "DurableRecordDraftProperties": - return self._props - - -class DurableRecordDraftViewer(DurableRecordDraftAst): - def __init__(self, tb: type_builder.TypeBuilder): - super().__init__(tb) - - - def list_properties(self) -> typing.List[typing.Tuple[str, type_builder.ClassPropertyViewer]]: - return [(name, type_builder.ClassPropertyViewer(self._bldr.property(name))) for name in self._properties] - - - -class DurableRecordDraftProperties: - def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]): - self.__bldr = bldr - self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821 - - - - @property - def kind(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("kind")) - - @property - def title(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("title")) - - @property - def body(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("body")) - - @property - def status(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("status")) - - @property - def valid_from(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("valid_from")) - - @property - def valid_until(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("valid_until")) - - @property - def decision(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("decision")) - - @property - def why(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("why")) - - @property - def alternatives(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("alternatives")) - - @property - def consequences(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("consequences")) - - - - -class EpisodeDraftAst: - def __init__(self, tb: type_builder.TypeBuilder): - _tb = tb._tb # type: ignore (we know how to use this private attribute) - self._bldr = _tb.class_("EpisodeDraft") - self._properties: typing.Set[str] = set([ "title", "body", "status", "user_intent", "what_happened", "outcomes", ]) - self._props = EpisodeDraftProperties(self._bldr, self._properties) - - def type(self) -> baml_py.FieldType: - return self._bldr.field() - - @property - def props(self) -> "EpisodeDraftProperties": - return self._props - - -class EpisodeDraftViewer(EpisodeDraftAst): - def __init__(self, tb: type_builder.TypeBuilder): - super().__init__(tb) - - - def list_properties(self) -> typing.List[typing.Tuple[str, type_builder.ClassPropertyViewer]]: - return [(name, type_builder.ClassPropertyViewer(self._bldr.property(name))) for name in self._properties] - - - -class EpisodeDraftProperties: - def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]): - self.__bldr = bldr - self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821 - - - - @property - def title(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("title")) - - @property - def body(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("body")) - - @property - def status(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("status")) - - @property - def user_intent(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("user_intent")) - - @property - def what_happened(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("what_happened")) - - @property - def outcomes(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("outcomes")) - - - - -class SynthesizedExtractionAst: - def __init__(self, tb: type_builder.TypeBuilder): - _tb = tb._tb # type: ignore (we know how to use this private attribute) - self._bldr = _tb.class_("SynthesizedExtraction") - self._properties: typing.Set[str] = set([ "episode", "durable_records", "completion_summary", ]) - self._props = SynthesizedExtractionProperties(self._bldr, self._properties) - - def type(self) -> baml_py.FieldType: - return self._bldr.field() - - @property - def props(self) -> "SynthesizedExtractionProperties": - return self._props - - -class SynthesizedExtractionViewer(SynthesizedExtractionAst): - def __init__(self, tb: type_builder.TypeBuilder): - super().__init__(tb) - - - def list_properties(self) -> typing.List[typing.Tuple[str, type_builder.ClassPropertyViewer]]: - return [(name, type_builder.ClassPropertyViewer(self._bldr.property(name))) for name in self._properties] - - - -class SynthesizedExtractionProperties: - def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]): - self.__bldr = bldr - self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821 - - - - @property - def episode(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("episode")) - - @property - def durable_records(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("durable_records")) - - @property - def completion_summary(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("completion_summary")) - - - - -class TraceWindowFindingAst: - def __init__(self, tb: type_builder.TypeBuilder): - _tb = tb._tb # type: ignore (we know how to use this private attribute) - self._bldr = _tb.class_("TraceWindowFinding") - self._properties: typing.Set[str] = set([ "theme", "level", "line", "quote", "note", ]) - self._props = TraceWindowFindingProperties(self._bldr, self._properties) - - def type(self) -> baml_py.FieldType: - return self._bldr.field() - - @property - def props(self) -> "TraceWindowFindingProperties": - return self._props - - -class TraceWindowFindingViewer(TraceWindowFindingAst): - def __init__(self, tb: type_builder.TypeBuilder): - super().__init__(tb) - - - def list_properties(self) -> typing.List[typing.Tuple[str, type_builder.ClassPropertyViewer]]: - return [(name, type_builder.ClassPropertyViewer(self._bldr.property(name))) for name in self._properties] - - - -class TraceWindowFindingProperties: - def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]): - self.__bldr = bldr - self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821 - - - - @property - def theme(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("theme")) - - @property - def level(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("level")) - - @property - def line(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("line")) - - @property - def quote(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("quote")) - - @property - def note(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("note")) - - - - -class TraceWindowScanAst: - def __init__(self, tb: type_builder.TypeBuilder): - _tb = tb._tb # type: ignore (we know how to use this private attribute) - self._bldr = _tb.class_("TraceWindowScan") - self._properties: typing.Set[str] = set([ "episode_update", "durable_findings", "implementation_findings", "discarded_noise", ]) - self._props = TraceWindowScanProperties(self._bldr, self._properties) - - def type(self) -> baml_py.FieldType: - return self._bldr.field() - - @property - def props(self) -> "TraceWindowScanProperties": - return self._props - - -class TraceWindowScanViewer(TraceWindowScanAst): - def __init__(self, tb: type_builder.TypeBuilder): - super().__init__(tb) - - - def list_properties(self) -> typing.List[typing.Tuple[str, type_builder.ClassPropertyViewer]]: - return [(name, type_builder.ClassPropertyViewer(self._bldr.property(name))) for name in self._properties] - - - -class TraceWindowScanProperties: - def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]): - self.__bldr = bldr - self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821 - - - - @property - def episode_update(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("episode_update")) - - @property - def durable_findings(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("durable_findings")) - - @property - def implementation_findings(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("implementation_findings")) - - @property - def discarded_noise(self) -> type_builder.ClassPropertyViewer: - return type_builder.ClassPropertyViewer(self.__bldr.property("discarded_noise")) - - - diff --git a/baml_agents/baml_client/type_map.py b/baml_agents/baml_client/type_map.py deleted file mode 100644 index 6f67bf0..0000000 --- a/baml_agents/baml_client/type_map.py +++ /dev/null @@ -1,41 +0,0 @@ -# ---------------------------------------------------------------------------- -# -# Welcome to Baml! To use this generated code, please run the following: -# -# $ pip install baml -# -# ---------------------------------------------------------------------------- - -# This file was generated by BAML: please do not edit it. Instead, edit the -# BAML files and re-generate this code using: baml-cli generate -# baml-cli is available with the baml package. - -from . import types -from . import stream_types - - -type_map = { - - "types.DurableRecordDraft": types.DurableRecordDraft, - "stream_types.DurableRecordDraft": stream_types.DurableRecordDraft, - - "types.EpisodeDraft": types.EpisodeDraft, - "stream_types.EpisodeDraft": stream_types.EpisodeDraft, - - "types.SynthesizedExtraction": types.SynthesizedExtraction, - "stream_types.SynthesizedExtraction": stream_types.SynthesizedExtraction, - - "types.TraceWindowFinding": types.TraceWindowFinding, - "stream_types.TraceWindowFinding": stream_types.TraceWindowFinding, - - "types.TraceWindowScan": types.TraceWindowScan, - "stream_types.TraceWindowScan": stream_types.TraceWindowScan, - - - "types.FindingLevel": types.FindingLevel, - - "types.RecordKind": types.RecordKind, - - "types.RecordStatus": types.RecordStatus, - -} \ No newline at end of file diff --git a/baml_agents/baml_client/types.py b/baml_agents/baml_client/types.py deleted file mode 100644 index 95d7f90..0000000 --- a/baml_agents/baml_client/types.py +++ /dev/null @@ -1,107 +0,0 @@ -# ---------------------------------------------------------------------------- -# -# Welcome to Baml! To use this generated code, please run the following: -# -# $ pip install baml -# -# ---------------------------------------------------------------------------- - -# This file was generated by BAML: please do not edit it. Instead, edit the -# BAML files and re-generate this code using: baml-cli generate -# baml-cli is available with the baml package. - -import typing -import typing_extensions -from enum import Enum - - -from pydantic import BaseModel, ConfigDict, Field - - -import baml_py - -CheckT = typing_extensions.TypeVar('CheckT') -CheckName = typing_extensions.TypeVar('CheckName', bound=str) - -class Check(BaseModel): - name: str - expression: str - status: str -class Checked(BaseModel, typing.Generic[CheckT, CheckName]): - value: CheckT - checks: typing.Dict[CheckName, Check] - -def get_checks(checks: typing.Dict[CheckName, Check]) -> typing.List[Check]: - return list(checks.values()) - -def all_succeeded(checks: typing.Dict[CheckName, Check]) -> bool: - return all(check.status == "succeeded" for check in get_checks(checks)) -# ######################################################################### -# Generated enums (3) -# ######################################################################### - -class FindingLevel(str, Enum): - DECISION = "DECISION" - PREFERENCE = "PREFERENCE" - FEEDBACK = "FEEDBACK" - REFERENCE = "REFERENCE" - CONSTRAINT = "CONSTRAINT" - FACT = "FACT" - IMPLEMENTATION = "IMPLEMENTATION" - -class RecordKind(str, Enum): - DECISION = "DECISION" - PREFERENCE = "PREFERENCE" - CONSTRAINT = "CONSTRAINT" - FACT = "FACT" - REFERENCE = "REFERENCE" - -class RecordStatus(str, Enum): - ACTIVE = "ACTIVE" - ARCHIVED = "ARCHIVED" - -# ######################################################################### -# Generated classes (5) -# ######################################################################### - -class DurableRecordDraft(BaseModel): - kind: RecordKind = Field(description='Durable record kind.') - title: str = Field(description='Short standalone durable title.') - body: str = Field(description='Compact standalone durable body.') - status: typing.Optional[RecordStatus] = Field(default=None, description='Usually active for reusable durable records.') - valid_from: typing.Optional[str] = None - valid_until: typing.Optional[str] = None - decision: typing.Optional[str] = None - why: typing.Optional[str] = None - alternatives: typing.Optional[str] = None - consequences: typing.Optional[str] = None - -class EpisodeDraft(BaseModel): - title: typing.Optional[str] = Field(default=None, description='Optional short title for the current-session episode. Runtime derives one if omitted.') - body: typing.Optional[str] = Field(default=None, description='Compact episode body. If omitted, runtime builds it from user_intent and what_happened.') - status: typing.Optional[RecordStatus] = Field(default=None, description='Use archived for routine/no-durable sessions; active only when the episode itself remains useful.') - user_intent: typing.Optional[str] = Field(default=None, description='What the user wanted in this source session. Runtime fills a generic fallback if omitted.') - what_happened: typing.Optional[str] = Field(default=None, description='What the session actually did. Runtime fills a generic fallback if omitted.') - outcomes: typing.Optional[str] = Field(default=None, description='Optional concise outcome.') - -class SynthesizedExtraction(BaseModel): - episode: "EpisodeDraft" = Field(description='Exactly one current-session episode record draft.') - durable_records: typing.List["DurableRecordDraft"] = Field(description='Zero or more durable records.') - completion_summary: typing.Optional[str] = Field(default=None, description='Brief summary of extraction work for final_result/reporting.') - -class TraceWindowFinding(BaseModel): - theme: str = Field(description='Short stable theme for this finding.') - level: FindingLevel = Field(description='Use durable levels for reusable context, implementation for local/noisy evidence.') - line: typing.Optional[int] = Field(default=None, description='1-based supporting line when the window gives one.') - quote: typing.Optional[str] = Field(default=None, description='Short supporting quote from the current window.') - note: str = Field(description='Compact semantic finding. Avoid command logs and copied errors.') - -class TraceWindowScan(BaseModel): - episode_update: typing.Optional[str] = Field(default=None, description='Compact update for the final episode summary. May be omitted when this window adds nothing.') - durable_findings: typing.List["TraceWindowFinding"] = Field(description='Reusable decisions, preferences, constraints, facts, and references found in this window.') - implementation_findings: typing.List["TraceWindowFinding"] = Field(description='Implementation evidence, discarded hypotheses, and local details useful only as support/noise.') - discarded_noise: typing.List[str] = Field(description='Short descriptions of noisy categories intentionally not saved.') - -# ######################################################################### -# Generated type aliases (0) -# ######################################################################### diff --git a/baml_agents/baml_client/watchers.py b/baml_agents/baml_client/watchers.py deleted file mode 100644 index 347146f..0000000 --- a/baml_agents/baml_client/watchers.py +++ /dev/null @@ -1,44 +0,0 @@ -# ---------------------------------------------------------------------------- -# -# Welcome to Baml! To use this generated code, please run the following: -# -# $ pip install baml -# -# ---------------------------------------------------------------------------- - -# This file was generated by BAML: please do not edit it. Instead, edit the -# BAML files and re-generate this code using: baml-cli generate -# baml-cli is available with the baml package. - -from typing import Callable, Any, Protocol, Generic, TypeVar, overload, Literal -import threading - -T = TypeVar("T") - -class BlockEvent: - def __init__(self, block_label: str, event_type: str): - self.block_label = block_label - self.event_type = event_type # "enter" | "exit" - -class VarEvent(Generic[T]): - def __init__(self, variable_name: str, value: T, timestamp: str, function_name: str): - self.variable_name = variable_name - self.value = value - self.timestamp = timestamp - self.function_name = function_name - -BlockHandler = Callable[[BlockEvent], None] -VarEventHandler = Callable[[VarEvent[T]], None] -StreamHandler = Callable[[Any], None] # Stream will be an async iterator - -class InternalEventBindings(Protocol): - function_name: str - block: list[BlockHandler] - vars: dict[str, list[VarEventHandler[Any]]] - streams: dict[str, list[StreamHandler]] - functions: dict[str, "InternalEventBindings"] - -class EventCollectorInternal(Protocol): - def __handlers__(self) -> InternalEventBindings: - ... - diff --git a/baml_agents/baml_extract_agent/__init__.py b/baml_agents/baml_extract_agent/__init__.py deleted file mode 100644 index ed6c524..0000000 --- a/baml_agents/baml_extract_agent/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -"""Minimal BAML plus LangGraph extraction-agent experiment.""" - -from baml_extract_agent.graph import run_baml_extraction - -__all__ = ["run_baml_extraction"] diff --git a/baml_agents/baml_extract_agent/graph.py b/baml_agents/baml_extract_agent/graph.py deleted file mode 100644 index 3edef0d..0000000 --- a/baml_agents/baml_extract_agent/graph.py +++ /dev/null @@ -1,651 +0,0 @@ -"""Windowed LangGraph extraction pipeline whose LLM steps are produced by BAML.""" - -from __future__ import annotations - -from datetime import datetime, timezone -import math -import operator -from pathlib import Path -from typing import Annotated, Any -from typing_extensions import TypedDict - -from baml_py import ClientRegistry -from langgraph.graph import END, START, StateGraph - -from baml_client.sync_client import b -from lerim.agents.extract import _format_existing_record_manifest -from lerim.config.settings import get_config -from lerim.agents.tools import ( - CONTEXT_SOFT_PRESSURE_PCT, - _TOKENS_PER_CHAR, - MODEL_CONTEXT_TOKEN_LIMIT, - ContextDeps, - TRACE_MAX_CHUNK_BYTES, - TRACE_MAX_LINE_BYTES, - compute_request_budget, -) -from lerim.context import ProjectIdentity, resolve_project_identity - -from baml_extract_agent.tool_bridge import ( - build_tool_context, - persist_synthesized_extraction, - prepare_context_deps, -) - - -MODEL_NAME = "MiniMax-M2.7" -BAML_PROVIDER = "minimax" -OLLAMA_BASE_URL = "http://127.0.0.1:11434/v1" -MINIMAX_BASE_URL = "https://api.minimax.io/v1" -MINIMAX_TEMPERATURE_FLOOR = 0.01 -MAX_BAML_MODEL_RETRIES = 3 -BAML_HTTP_CONNECT_TIMEOUT_MS = 10_000 -BAML_HTTP_TIME_TO_FIRST_TOKEN_TIMEOUT_MS = 120_000 -BAML_HTTP_IDLE_TIMEOUT_MS = 30_000 -BAML_HTTP_REQUEST_TIMEOUT_MS = 300_000 -WINDOW_RESERVE_TOKENS = 30_000 -MIN_WINDOW_CHARS = 20_000 -MAX_WINDOW_CHARS = TRACE_MAX_CHUNK_BYTES -BAML_RECOVERABLE_ERROR_NAMES = { - "BamlClientFinishReasonError", - "BamlClientHttpError", - "BamlTimeoutError", - "BamlValidationError", -} - - -class WindowExtractGraphState(TypedDict, total=False): - """State for the windowed BAML extraction pipeline.""" - - observations: Annotated[list[dict[str, Any]], operator.add] - llm_calls: int - next_line: int - trace_total_lines: int - current_window: dict[str, Any] - episode_updates: Annotated[list[str], operator.add] - durable_findings: Annotated[list[dict[str, Any]], operator.add] - implementation_findings: Annotated[list[dict[str, Any]], operator.add] - discarded_noise: Annotated[list[str], operator.add] - synthesized: Any - done: bool - completion_summary: str - - -def run_baml_extraction( - *, - trace_path: Path, - context_db_path: Path, - project_root: Path | None = None, - session_id: str = "baml-extract-session", - session_started_at: str | None = None, - model_name: str = MODEL_NAME, - baml_provider: str = BAML_PROVIDER, - api_base_url: str | None = None, - api_key: str | None = None, - temperature: float = 0.0, - ollama_base_url: str = OLLAMA_BASE_URL, - max_llm_calls: int | None = None, - progress: bool = False, -) -> dict[str, Any]: - """Run the minimal BAML plus LangGraph extraction experiment.""" - resolved_trace_path = trace_path.expanduser().resolve() - resolved_context_db_path = context_db_path.expanduser().resolve() - identity = resolve_project_identity(project_root.expanduser().resolve() if project_root else Path.cwd()) - started_at = session_started_at or datetime.now(timezone.utc).isoformat() - deps = prepare_context_deps( - context_db_path=resolved_context_db_path, - project_identity=identity, - session_id=session_id, - trace_path=resolved_trace_path, - session_started_at=started_at, - model_name=model_name, - ) - existing_record_manifest = _format_existing_record_manifest( - context_db_path=resolved_context_db_path, - project_identity=identity, - ) - run_instruction = _build_run_instruction( - context_db_path=resolved_context_db_path, - project_identity=identity, - trace_path=resolved_trace_path, - session_started_at=started_at, - existing_record_manifest=existing_record_manifest, - ) - graph = build_windowed_extract_graph( - deps=deps, - trace_path=resolved_trace_path, - run_instruction=run_instruction, - existing_record_manifest=existing_record_manifest, - model_name=model_name, - baml_provider=baml_provider, - api_base_url=api_base_url, - api_key=api_key, - temperature=temperature, - ollama_base_url=ollama_base_url, - max_llm_calls=max_llm_calls or compute_request_budget(resolved_trace_path), - progress=progress, - ) - final_state = graph.invoke( - { - "observations": [], - "llm_calls": 0, - "next_line": 1, - "trace_total_lines": _trace_line_count(resolved_trace_path), - "done": False, - "completion_summary": "", - } - ) - return { - "completion_summary": final_state.get("completion_summary", ""), - "llm_calls": final_state.get("llm_calls", 0), - "observations": final_state.get("observations", []), - "done": bool(final_state.get("done")), - "context_db_path": str(resolved_context_db_path), - "project_id": identity.project_id, - "session_id": session_id, - "model_name": model_name, - "baml_provider": baml_provider, - } - - -def build_windowed_extract_graph( - *, - deps: ContextDeps, - trace_path: Path, - run_instruction: str, - existing_record_manifest: str, - model_name: str, - baml_provider: str, - api_base_url: str | None, - api_key: str | None, - temperature: float, - ollama_base_url: str, - max_llm_calls: int, - progress: bool = False, -): - """Compile the windowed scan -> synthesize -> persist extraction graph.""" - runtime_context = build_tool_context(deps) - baml_runtime = _baml_client_for_model( - model_name=model_name, - baml_provider=baml_provider, - api_base_url=api_base_url, - api_key=api_key, - temperature=temperature, - ollama_base_url=ollama_base_url, - ) - - def read_window(state: WindowExtractGraphState) -> dict[str, Any]: - """Read the next budgeted trace window into transient state.""" - total_lines = int(state.get("trace_total_lines") or 0) - start_line = int(state.get("next_line") or 1) - if start_line > total_lines: - return {"current_window": {}} - char_budget = _window_char_budget( - state=state, - run_instruction=run_instruction, - existing_record_manifest=existing_record_manifest, - ) - window = _read_trace_window( - trace_path=trace_path, - start_line=start_line, - total_lines=total_lines, - char_budget=char_budget, - ) - deps.trace_total_lines = total_lines - deps.read_ranges.append((window["start_line"] - 1, window["end_line"])) - if progress: - print( - f" baml window {window['start_line']}-{window['end_line']} " - f"chars={len(window['text'])}", - flush=True, - ) - return { - "current_window": window, - "next_line": int(window["end_line"]) + 1, - "observations": [ - { - "action": "read_window", - "ok": True, - "content": window["header"], - "args": { - "start_line": window["start_line"], - "end_line": window["end_line"], - "char_budget": char_budget, - }, - "done": False, - "completion_summary": "", - } - ], - } - - def scan_window(state: WindowExtractGraphState) -> dict[str, Any]: - """Scan the current window into compact episode/findings state.""" - llm_calls = int(state.get("llm_calls") or 0) - if llm_calls >= max_llm_calls: - raise RuntimeError( - f"BAML extraction exceeded max_llm_calls={max_llm_calls}." - ) - window = state.get("current_window") or {} - if not window.get("text"): - return {} - if progress: - print(f" baml scan {llm_calls + 1}/{max_llm_calls}", flush=True) - result, retry_observations, attempts = _call_baml_with_retries( - lambda: baml_runtime.ScanTraceWindow( - run_instruction=run_instruction, - prior_episode_summary=_episode_summary(state), - prior_findings_summary=_findings_summary(state), - trace_window=str(window["text"]), - ), - stage="scan_window", - progress=progress, - ) - payload = _model_payload(result) - episode_update = str(payload.get("episode_update") or "").strip() - durable = [_model_payload(item) for item in payload.get("durable_findings") or []] - implementation = [ - _model_payload(item) - for item in payload.get("implementation_findings") or [] - ] - noise = [ - str(item).strip() - for item in payload.get("discarded_noise") or [] - if str(item).strip() - ] - return { - "llm_calls": llm_calls + attempts, - "episode_updates": [episode_update] if episode_update else [], - "durable_findings": durable, - "implementation_findings": implementation, - "discarded_noise": noise, - "observations": [ - *retry_observations, - { - "action": "scan_window", - "ok": True, - "content": ( - f"window={window.get('start_line')}-{window.get('end_line')} " - f"durable={len(durable)} implementation={len(implementation)}" - ), - "args": { - "start_line": window.get("start_line"), - "end_line": window.get("end_line"), - }, - "done": False, - "completion_summary": "", - }, - ], - } - - def synthesize_records(state: WindowExtractGraphState) -> dict[str, Any]: - """Synthesize final episode and durable record candidates.""" - llm_calls = int(state.get("llm_calls") or 0) - if llm_calls >= max_llm_calls: - raise RuntimeError( - f"BAML extraction exceeded max_llm_calls={max_llm_calls}." - ) - if progress: - print(f" baml synth {llm_calls + 1}/{max_llm_calls}", flush=True) - result, retry_observations, attempts = _call_baml_with_retries( - lambda: baml_runtime.SynthesizeExtractRecords( - run_instruction=run_instruction, - episode_summary=_episode_summary(state), - durable_findings_summary=_durable_findings_summary(state), - existing_record_manifest=existing_record_manifest or "(none)", - ), - stage="synthesize_records", - progress=progress, - ) - payload = _model_payload(result) - durable_count = len(payload.get("durable_records") or []) - return { - "llm_calls": llm_calls + attempts, - "synthesized": result, - "observations": [ - *retry_observations, - { - "action": "synthesize_records", - "ok": True, - "content": f"durable_records={durable_count}", - "args": {}, - "done": False, - "completion_summary": "", - }, - ], - } - - def persist_records(state: WindowExtractGraphState) -> dict[str, Any]: - """Persist synthesized records and finish the graph.""" - runtime_context.deps.findings_checked = True - observations, done, completion_summary = persist_synthesized_extraction( - state.get("synthesized"), - runtime_context, - ) - if progress: - print(f" baml persist done={done}", flush=True) - return { - "observations": observations, - "done": done, - "completion_summary": completion_summary, - } - - def after_scan(state: WindowExtractGraphState) -> str: - """Continue scanning until all trace lines are covered.""" - next_line = int(state.get("next_line") or 1) - total_lines = int(state.get("trace_total_lines") or 0) - if next_line <= total_lines: - return "read_window" - return "synthesize_records" - - graph = StateGraph(WindowExtractGraphState) - graph.add_node("read_window", read_window) - graph.add_node("scan_window", scan_window) - graph.add_node("synthesize_records", synthesize_records) - graph.add_node("persist_records", persist_records) - graph.add_edge(START, "read_window") - graph.add_edge("read_window", "scan_window") - graph.add_conditional_edges( - "scan_window", - after_scan, - ["read_window", "synthesize_records"], - ) - graph.add_edge("synthesize_records", "persist_records") - graph.add_edge("persist_records", END) - return graph.compile() - - -def _trace_line_count(trace_path: Path) -> int: - """Return the number of lines in a trace file.""" - try: - return sum(1 for _ in trace_path.open("r", encoding="utf-8")) - except OSError: - return 0 - - -def _window_char_budget( - *, - state: WindowExtractGraphState, - run_instruction: str, - existing_record_manifest: str, -) -> int: - """Compute how much raw trace text can fit in the next scan window.""" - soft_tokens = int(MODEL_CONTEXT_TOKEN_LIMIT * CONTEXT_SOFT_PRESSURE_PCT) - state_text = "\n".join( - [ - run_instruction, - existing_record_manifest, - _episode_summary(state), - _durable_findings_summary(state), - _implementation_summary(state), - ] - ) - state_tokens = math.ceil(len(state_text) * _TOKENS_PER_CHAR) - available_tokens = max( - MIN_WINDOW_CHARS * _TOKENS_PER_CHAR, - soft_tokens - WINDOW_RESERVE_TOKENS - state_tokens, - ) - return min( - MAX_WINDOW_CHARS, - max(MIN_WINDOW_CHARS, int(available_tokens / _TOKENS_PER_CHAR)), - ) - - -def _read_trace_window( - *, - trace_path: Path, - start_line: int, - total_lines: int, - char_budget: int, -) -> dict[str, Any]: - """Read as many complete trace lines as fit in the character budget.""" - numbered: list[str] = [] - current_chars = 0 - end_line = start_line - 1 - with trace_path.open("r", encoding="utf-8") as handle: - for line_number, raw_line in enumerate(handle, start=1): - if line_number < start_line: - continue - line = raw_line.rstrip("\n") - if len(line) > TRACE_MAX_LINE_BYTES: - dropped = len(line) - TRACE_MAX_LINE_BYTES - line = ( - line[:TRACE_MAX_LINE_BYTES] - + f" ... [truncated {dropped} chars from this line]" - ) - rendered = f"{line_number}\t{line}" - if numbered and current_chars + len(rendered) + 1 > char_budget: - break - numbered.append(rendered) - current_chars += len(rendered) + 1 - end_line = line_number - if current_chars >= char_budget: - break - if not numbered and start_line <= total_lines: - numbered.append(f"{start_line}\t") - end_line = start_line - header = f"[{total_lines} lines, window {start_line}-{end_line}]" - if end_line < total_lines: - header += f" — next window starts at line {end_line + 1}" - return { - "start_line": start_line, - "end_line": end_line, - "header": header, - "text": header + "\n" + "\n".join(numbered), - } - - -def _call_baml_with_retries(call, *, stage: str, progress: bool) -> tuple[Any, list[dict[str, Any]], int]: - """Run one BAML call with graph-visible recoverable retries.""" - observations: list[dict[str, Any]] = [] - attempts = 0 - while True: - attempts += 1 - try: - return call(), observations, attempts - except Exception as exc: - if not _is_recoverable_baml_error(exc) or attempts > MAX_BAML_MODEL_RETRIES: - raise - if progress: - print(f" baml retry {stage} attempt={attempts}", flush=True) - observations.append( - { - "action": "model_retry", - "ok": False, - "content": _model_retry_observation(exc), - "args": {"stage": stage, "attempt": attempts}, - "done": False, - "completion_summary": "", - } - ) - - -def _model_payload(value: Any) -> dict[str, Any]: - """Convert generated BAML objects into plain dictionaries.""" - if hasattr(value, "model_dump"): - return _plain_value(value.model_dump(exclude_none=True)) - if isinstance(value, dict): - return _plain_value( - {key: item for key, item in value.items() if item is not None} - ) - if value is None: - return {} - return _plain_value(getattr(value, "__dict__", {})) - - -def _plain_value(value: Any) -> Any: - """Convert enum-ish values recursively into JSON-like values.""" - enum_value = getattr(value, "value", None) - if enum_value is not None: - return enum_value - if isinstance(value, dict): - return {key: _plain_value(item) for key, item in value.items()} - if isinstance(value, list): - return [_plain_value(item) for item in value] - return value - - -def _episode_summary(state: WindowExtractGraphState) -> str: - """Render compact rolling episode summary.""" - updates = [item for item in state.get("episode_updates", []) if item] - return "\n".join(f"- {item}" for item in updates) or "(none yet)" - - -def _findings_summary(state: WindowExtractGraphState) -> str: - """Render all prior findings for the next scan window.""" - return "\n\n".join( - [ - "Durable findings:\n" + _durable_findings_summary(state), - "Implementation/noise findings:\n" + _implementation_summary(state), - ] - ) - - -def _durable_findings_summary(state: WindowExtractGraphState) -> str: - """Render durable findings compactly for BAML prompts.""" - findings = state.get("durable_findings", []) - if not findings: - return "(none)" - return "\n".join(_format_finding(finding) for finding in findings) - - -def _implementation_summary(state: WindowExtractGraphState) -> str: - """Render implementation findings and discarded noise compactly.""" - parts: list[str] = [] - findings = state.get("implementation_findings", []) - if findings: - parts.append("\n".join(_format_finding(finding) for finding in findings)) - noise = state.get("discarded_noise", []) - if noise: - parts.append("Discarded noise:\n" + "\n".join(f"- {item}" for item in noise)) - return "\n".join(parts) if parts else "(none)" - - -def _format_finding(finding: dict[str, Any]) -> str: - """Render one scan finding as one compact bullet.""" - level = str(finding.get("level") or "").strip() - theme = str(finding.get("theme") or "").strip() - note = str(finding.get("note") or "").strip() - line = finding.get("line") - quote = str(finding.get("quote") or "").strip() - prefix = f"- {level}: {theme}" if level or theme else "-" - details = note - if line: - details += f" (line {line})" - if quote: - details += f" Evidence: {quote}" - return f"{prefix}: {details}".strip() - - -def _baml_client_for_model( - *, - model_name: str, - baml_provider: str, - api_base_url: str | None, - api_key: str | None, - temperature: float, - ollama_base_url: str, -): - """Return a generated BAML client pointed at the requested runtime model.""" - normalized_provider = baml_provider.strip().lower() - if normalized_provider == "ollama": - client_name = "RuntimeOllama" - base_url = api_base_url or ollama_base_url - resolved_api_key = api_key - resolved_temperature = temperature - elif normalized_provider == "minimax": - client_name = "RuntimeMiniMax" - cfg = get_config() - base_url = api_base_url or cfg.provider_api_bases.get("minimax") or MINIMAX_BASE_URL - resolved_api_key = api_key or cfg.minimax_api_key - if not resolved_api_key: - raise RuntimeError("missing_api_key:MINIMAX_API_KEY required for BAML MiniMax client") - resolved_temperature = max( - MINIMAX_TEMPERATURE_FLOOR, - min(1.0, float(temperature)), - ) - elif normalized_provider == "openai-generic": - client_name = "RuntimeOpenAIGeneric" - base_url = api_base_url - if not base_url: - raise RuntimeError("missing_api_base:openai-generic BAML client requires api_base_url") - resolved_api_key = api_key - resolved_temperature = temperature - else: - raise RuntimeError(f"unsupported_baml_provider:{baml_provider}") - - options: dict[str, Any] = { - "base_url": base_url, - "model": model_name, - "temperature": resolved_temperature, - "http": { - "connect_timeout_ms": BAML_HTTP_CONNECT_TIMEOUT_MS, - "time_to_first_token_timeout_ms": BAML_HTTP_TIME_TO_FIRST_TOKEN_TIMEOUT_MS, - "idle_timeout_ms": BAML_HTTP_IDLE_TIMEOUT_MS, - "request_timeout_ms": BAML_HTTP_REQUEST_TIMEOUT_MS, - }, - } - if resolved_api_key: - options["api_key"] = resolved_api_key - - registry = ClientRegistry() - registry.add_llm_client( - name=client_name, - provider="openai-generic", - options=options, - retry_policy="ExtractAgentRetry", - ) - registry.set_primary(client_name) - return b.with_options(client_registry=registry) - - -def _is_recoverable_baml_error(exc: Exception) -> bool: - """Return whether a BAML model/parsing failure should be retried in graph.""" - return type(exc).__name__ in BAML_RECOVERABLE_ERROR_NAMES - - -def _model_retry_observation(exc: Exception) -> str: - """Render a compact model failure note for the next BAML turn.""" - message = str(exc).replace("\n", " ")[:1200] - return ( - "The previous BAML model call did not produce a valid next action. " - "Retry and return exactly one JSON object matching the requested schema. " - "Do not include tags, hidden reasoning, markdown, or prose before " - f"the JSON. Error: {type(exc).__name__}: {message}" - ) - - -def _build_run_instruction( - *, - context_db_path: Path, - project_identity: ProjectIdentity, - trace_path: Path, - session_started_at: str, - existing_record_manifest: str | None = None, -) -> str: - """Build the same extraction task framing used by Lerim's current agent.""" - try: - trace_line_count = sum(1 for _ in trace_path.open("r", encoding="utf-8")) - except OSError: - trace_line_count = 0 - if existing_record_manifest is None: - existing_record_manifest = _format_existing_record_manifest( - context_db_path=context_db_path, - project_identity=project_identity, - ) - source_time_text = str(session_started_at or "").strip() or "unknown" - prompt = ( - "Read the trace, write exactly one episode record, and write only the strongest " - "durable records with non-empty title and body. Store reusable rules and decisions, " - "not a polished recap of the meeting. " - "Durable records must be positive canonical context: when trace text combines a " - "durable point with cleanup/noise/ignore guidance, exclude that guidance entirely " - "from the durable record. " - f"Source session started_at: {source_time_text}. Treat the trace as evidence from " - "that time, not as a fresh verification of the current repository. " - f"This trace has {trace_line_count} lines. Read all chunks before writing. " - "If the trace needs more than one read to cover it, record findings before any write. " - "If relevant existing durable records are shown below, treat them as a shortlist only; " - "fetch the full record before any revision." - + (f"\n\n{existing_record_manifest}" if existing_record_manifest else "") - ) - return prompt diff --git a/baml_agents/baml_extract_agent/run.py b/baml_agents/baml_extract_agent/run.py deleted file mode 100644 index 4e344c5..0000000 --- a/baml_agents/baml_extract_agent/run.py +++ /dev/null @@ -1,68 +0,0 @@ -"""Command-line entrypoint for the BAML plus LangGraph extraction experiment.""" - -from __future__ import annotations - -import argparse -import json -import os -from pathlib import Path - -from baml_extract_agent.graph import ( - BAML_PROVIDER, - MODEL_NAME, - OLLAMA_BASE_URL, - run_baml_extraction, -) - - -def main(argv: list[str] | None = None) -> int: - """Parse CLI arguments and run the BAML extraction graph.""" - parser = argparse.ArgumentParser( - description="Run the minimal BAML plus LangGraph Lerim extraction experiment." - ) - parser.add_argument("--trace", required=True, type=Path) - parser.add_argument( - "--context-db", - default=Path(".tmp/baml_agents/context.sqlite3"), - type=Path, - ) - parser.add_argument("--project-root", default=Path.cwd(), type=Path) - parser.add_argument("--session-id", default="baml-extract-session") - parser.add_argument("--session-started-at", default=None) - parser.add_argument("--model", default=MODEL_NAME) - parser.add_argument( - "--baml-provider", - default=BAML_PROVIDER, - choices=("ollama", "minimax", "openai-generic"), - ) - parser.add_argument("--api-base-url", default=None) - parser.add_argument("--api-key-env", default=None) - parser.add_argument("--ollama-base-url", default=OLLAMA_BASE_URL) - parser.add_argument("--temperature", default=0.0, type=float) - parser.add_argument("--max-llm-calls", default=None, type=int) - args = parser.parse_args(argv) - - api_key_env = args.api_key_env - if api_key_env is None and args.baml_provider == "minimax": - api_key_env = "MINIMAX_API_KEY" - - result = run_baml_extraction( - trace_path=args.trace, - context_db_path=args.context_db, - project_root=args.project_root, - session_id=args.session_id, - session_started_at=args.session_started_at, - model_name=args.model, - baml_provider=args.baml_provider, - api_base_url=args.api_base_url, - api_key=os.environ.get(api_key_env) if api_key_env else None, - temperature=args.temperature, - ollama_base_url=args.ollama_base_url, - max_llm_calls=args.max_llm_calls, - ) - print(json.dumps(result, ensure_ascii=True, indent=2)) - return 0 - - -if __name__ == "__main__": - raise SystemExit(main()) diff --git a/baml_agents/baml_extract_agent/tool_bridge.py b/baml_agents/baml_extract_agent/tool_bridge.py deleted file mode 100644 index 30b71a0..0000000 --- a/baml_agents/baml_extract_agent/tool_bridge.py +++ /dev/null @@ -1,302 +0,0 @@ -"""Bridge synthesized BAML records to Lerim's canonical extraction tools.""" - -from __future__ import annotations - -from dataclasses import dataclass -import json -from pathlib import Path -import textwrap -from typing import Any - -from pydantic_ai import ModelRetry, RunContext -from pydantic_ai.models.test import TestModel -from pydantic_ai.usage import RunUsage - -from lerim.agents import tools as extract_tools -from lerim.agents.tools import ContextDeps -from lerim.context import ContextStore, ProjectIdentity -from lerim.context.spec import ( - MAX_DURABLE_BODY_CHARS, - MAX_EPISODE_BODY_CHARS, - MAX_EPISODE_OUTCOMES_CHARS, - MAX_EPISODE_USER_INTENT_CHARS, - MAX_EPISODE_WHAT_HAPPENED_CHARS, - MAX_RECORD_TITLE_CHARS, -) - - -@dataclass(frozen=True) -class ToolObservation: - """Observed result after dispatching one persistence action.""" - - action: str - ok: bool - content: str - args: dict[str, Any] - done: bool = False - completion_summary: str = "" - - -def build_tool_context(deps: ContextDeps) -> RunContext[ContextDeps]: - """Build the minimal PydanticAI run context required by Lerim tools.""" - return RunContext(deps=deps, model=TestModel(), usage=RunUsage()) - - -def count_current_session_episodes(deps: ContextDeps) -> int: - """Count current-session episode records in the canonical context store.""" - store = ContextStore(deps.context_db_path) - store.initialize() - store.register_project(deps.project_identity) - rows = store.query( - entity="records", - mode="count", - project_ids=[deps.project_identity.project_id], - kind="episode", - source_session_id=deps.session_id, - include_archived=True, - ) - return int(rows.get("count") or 0) - - -def persist_synthesized_extraction( - synthesized: Any, - ctx: RunContext[ContextDeps], -) -> tuple[list[dict[str, Any]], bool, str]: - """Persist synthesized episode and durable records through the real tools.""" - payload = _tool_payload(synthesized) - completion_summary = str(payload.get("completion_summary") or "").strip() - episode = _prepare_episode(payload.get("episode") or {}, completion_summary) - durable_records = [ - record - for record in (_tool_payload(item) for item in payload.get("durable_records") or []) - if _is_persistable_durable_record(record) - ] - - observations: list[dict[str, Any]] = [] - for index, record in enumerate([episode, *durable_records]): - default_status = "archived" if record.get("kind") == "episode" else "active" - args = _with_defaults(record, {"status": default_status}) - try: - content = _save_context(ctx, args) - observation = ToolObservation( - action="save_context", - ok=True, - content=content, - args=args, - ) - except ModelRetry as exc: - observation = ToolObservation( - action="save_context", - ok=False, - content=f"Tool retry needed: {exc}", - args=args, - ) - except Exception as exc: - observation = ToolObservation( - action="save_context", - ok=False, - content=f"Tool error: {type(exc).__name__}: {exc}", - args=args, - ) - observations.append(observation_to_state(observation)) - if index == 0 and not observation.ok: - break - - episode_count = count_current_session_episodes(ctx.deps) - done = episode_count == 1 - if not completion_summary: - completion_summary = "Extraction completed." - final_observation = ToolObservation( - action="final_result", - ok=done, - content=( - completion_summary - if done - else f"final_result refused: expected exactly one episode record, found {episode_count}." - ), - args={}, - done=done, - completion_summary=completion_summary if done else "", - ) - observations.append(observation_to_state(final_observation)) - return observations, done, completion_summary if done else "" - - -def prepare_context_deps( - *, - context_db_path: Path, - project_identity: ProjectIdentity, - session_id: str, - trace_path: Path, - session_started_at: str, - model_name: str, -) -> ContextDeps: - """Initialize store provenance and return dependencies for tool calls.""" - store = ContextStore(context_db_path) - store.initialize() - store.register_project(project_identity) - store.upsert_session( - project_id=project_identity.project_id, - session_id=session_id, - agent_type="baml-langgraph-extract", - source_trace_ref=str(trace_path), - repo_path=str(project_identity.repo_path), - cwd=str(project_identity.repo_path), - started_at=session_started_at, - model_name=model_name, - instructions_text=None, - prompt_text=None, - metadata={"experiment": "baml_agents"}, - ) - return ContextDeps( - context_db_path=context_db_path, - project_identity=project_identity, - session_id=session_id, - trace_path=trace_path, - session_started_at=session_started_at, - ) - - -def observation_to_state(observation: ToolObservation) -> dict[str, Any]: - """Convert a tool observation into serializable graph state.""" - return { - "action": observation.action, - "ok": observation.ok, - "content": observation.content, - "args": observation.args, - "done": observation.done, - "completion_summary": observation.completion_summary, - } - - -def _prepare_episode(value: Any, completion_summary: str) -> dict[str, Any]: - """Normalize a synthesized episode draft into a valid save_context payload.""" - episode = _tool_payload(value) - episode["kind"] = "episode" - if not str(episode.get("title") or "").strip(): - episode["title"] = _episode_title_from_payload(episode, completion_summary) - if not str(episode.get("user_intent") or "").strip(): - episode["user_intent"] = "Extract context from the source trace." - if not str(episode.get("what_happened") or "").strip(): - fallback = ( - str(episode.get("body") or "").strip() - or completion_summary - or "The trace was scanned and summarized for context extraction." - ) - episode["what_happened"] = fallback - if not str(episode.get("body") or "").strip(): - episode["body"] = _episode_body_from_structured_fields(episode) - episode["title"] = _compact_text(episode.get("title"), MAX_RECORD_TITLE_CHARS) - episode["user_intent"] = _compact_text( - episode.get("user_intent"), - MAX_EPISODE_USER_INTENT_CHARS, - ) - episode["what_happened"] = _compact_text( - episode.get("what_happened"), - MAX_EPISODE_WHAT_HAPPENED_CHARS, - ) - episode["outcomes"] = _compact_optional_text( - episode.get("outcomes"), - MAX_EPISODE_OUTCOMES_CHARS, - ) - episode["body"] = _compact_text(episode.get("body"), MAX_EPISODE_BODY_CHARS) - return episode - - -def _is_persistable_durable_record(record: dict[str, Any]) -> bool: - """Return whether a synthesized durable record is complete enough to save.""" - kind = str(record.get("kind") or "").strip().lower() - if not kind or kind == "episode": - return False - record["title"] = _compact_text(record.get("title"), MAX_RECORD_TITLE_CHARS) - record["body"] = _compact_text(record.get("body"), MAX_DURABLE_BODY_CHARS) - return bool( - str(record.get("title") or "").strip() - and str(record.get("body") or "").strip() - ) - - -def _save_context(ctx: RunContext[ContextDeps], args: dict[str, Any]) -> str: - """Call save_context with a complete record payload.""" - return extract_tools.save_context(ctx, **_with_defaults(args, {"status": "active"})) - - -def _with_defaults(args: dict[str, Any], defaults: dict[str, Any]) -> dict[str, Any]: - """Fill omitted optional tool arguments with Lerim's defaults.""" - payload = dict(defaults) - payload.update(args) - return payload - - -def _coerce_tool_value(value: Any) -> Any: - """Convert generated BAML enum values into plain JSON-like values.""" - enum_value = getattr(value, "value", None) - if enum_value is not None: - return enum_value - if isinstance(value, dict): - return {key: _coerce_tool_value(item) for key, item in value.items()} - if isinstance(value, list): - return [_coerce_tool_value(item) for item in value] - return value - - -def _tool_payload(value: Any) -> dict[str, Any]: - """Return a plain dict from a generated BAML/Pydantic-ish object.""" - if hasattr(value, "model_dump"): - return _coerce_tool_value(value.model_dump(exclude_none=True)) - if isinstance(value, dict): - return _coerce_tool_value( - {key: item for key, item in value.items() if item is not None} - ) - if value is None: - return {} - return _coerce_tool_value( - json.loads(json.dumps(value, default=lambda item: item.__dict__)) - ) - - -def _episode_body_from_structured_fields(episode: dict[str, Any]) -> str: - """Build an episode body when synthesis provided structured fields only.""" - user_intent = str(episode.get("user_intent") or "").strip() - what_happened = str(episode.get("what_happened") or "").strip() - outcomes = str(episode.get("outcomes") or "").strip() - parts = [] - if user_intent: - parts.append(f"User intent: {user_intent}") - if what_happened: - parts.append(f"What happened: {what_happened}") - if outcomes: - parts.append(f"Outcome: {outcomes}") - return " ".join(parts) or "The session was scanned and summarized for context extraction." - - -def _episode_title_from_payload(episode: dict[str, Any], completion_summary: str) -> str: - """Derive a compact episode title from available episode text.""" - candidates = [ - episode.get("user_intent"), - episode.get("what_happened"), - episode.get("outcomes"), - completion_summary, - episode.get("body"), - ] - for candidate in candidates: - text = str(candidate or "").strip() - if text: - return text[:80].rstrip(" .") or "Extracted session" - return "Extracted session" - - -def _compact_text(value: Any, max_chars: int) -> str: - """Return non-empty text that fits the canonical record field budget.""" - text = " ".join(str(value or "").split()) - if not text: - return "" - if len(text) <= max_chars: - return text - return textwrap.shorten(text, width=max_chars, placeholder="...") - - -def _compact_optional_text(value: Any, max_chars: int) -> str | None: - """Return optional compact text, preserving None for empty values.""" - text = _compact_text(value, max_chars) - return text or None diff --git a/baml_agents/baml_src/extract_react.baml b/baml_agents/baml_src/extract_react.baml deleted file mode 100644 index fbdc616..0000000 --- a/baml_agents/baml_src/extract_react.baml +++ /dev/null @@ -1,284 +0,0 @@ -enum RecordKind { - DECISION @alias("decision") @description("A durable project decision with decision and why fields.") - PREFERENCE @alias("preference") @description("A stable user or workflow preference.") - CONSTRAINT @alias("constraint") @description("A durable invariant, limit, or must/cannot rule.") - FACT @alias("fact") @description("A durable project fact or setup truth.") - REFERENCE @alias("reference") @description("A pointer to an external source of truth.") -} - -enum RecordStatus { - ACTIVE @alias("active") @description("Current context that future sessions may reuse.") - ARCHIVED @alias("archived") @description("Historical or routine context that should not be treated as active guidance.") -} - -enum FindingLevel { - DECISION @alias("decision") @description("Durable decision-level finding.") - PREFERENCE @alias("preference") @description("Durable user or workflow preference finding.") - FEEDBACK @alias("feedback") @description("Durable feedback-level finding.") - REFERENCE @alias("reference") @description("Durable external-source finding.") - CONSTRAINT @alias("constraint") @description("Durable constraint-level finding.") - FACT @alias("fact") @description("Durable fact-level finding.") - IMPLEMENTATION @alias("implementation") @description("Trace-local implementation evidence or discarded hypothesis.") -} - -class TraceWindowFinding { - theme string @description("Short stable theme for this finding.") - level FindingLevel @description("Use durable levels for reusable context, implementation for local/noisy evidence.") - line int? @description("1-based supporting line when the window gives one.") - quote string? @description("Short supporting quote from the current window.") - note string @description("Compact semantic finding. Avoid command logs and copied errors.") -} - -class TraceWindowScan { - episode_update string? @description("Compact update for the final episode summary. May be omitted when this window adds nothing.") - durable_findings TraceWindowFinding[] @description("Reusable decisions, preferences, constraints, facts, and references found in this window.") - implementation_findings TraceWindowFinding[] @description("Implementation evidence, discarded hypotheses, and local details useful only as support/noise.") - discarded_noise string[] @description("Short descriptions of noisy categories intentionally not saved.") -} - -class EpisodeDraft { - title string? @description("Optional short title for the current-session episode. Runtime derives one if omitted.") - body string? @description("Compact episode body. If omitted, runtime builds it from user_intent and what_happened.") - status RecordStatus? @description("Use archived for routine/no-durable sessions; active only when the episode itself remains useful.") - user_intent string? @description("What the user wanted in this source session. Runtime fills a generic fallback if omitted.") - what_happened string? @description("What the session actually did. Runtime fills a generic fallback if omitted.") - outcomes string? @description("Optional concise outcome.") -} - -class DurableRecordDraft { - kind RecordKind @description("Durable record kind.") - title string @description("Short standalone durable title.") - body string @description("Compact standalone durable body.") - status RecordStatus? @description("Usually active for reusable durable records.") - valid_from string? - valid_until string? - decision string? - why string? - alternatives string? - consequences string? -} - -class SynthesizedExtraction { - episode EpisodeDraft @description("Exactly one current-session episode record draft.") - durable_records DurableRecordDraft[] @description("Zero or more durable records.") - completion_summary string? @description("Brief summary of extraction work for final_result/reporting.") -} - -function ScanTraceWindow( - run_instruction: string, - prior_episode_summary: string, - prior_findings_summary: string, - trace_window: string -) -> TraceWindowScan { - client MiniMaxM27 - prompt #" - {{ _.role("system") }} - You scan one window from a coding-agent trace for Lerim extraction. - Return only structured output. Do not save records and do not plan future tool calls. - Do not include tags, hidden reasoning, markdown, or prose. - The top-level output must include episode_update, durable_findings, implementation_findings, and discarded_noise. - Use an empty string or empty list when a field has no content. - Every durable_findings and implementation_findings item must be an object with: - - theme - - level: one of decision, preference, feedback, reference, constraint, fact, or implementation - - note - - optional line - - optional quote - Never return finding items as plain strings. - Never use confidence labels such as high, medium, or low as finding levels. - - Separate: - - episode_update: what happened in this session window, for the final episode record. - - durable_findings: reusable project/user context only. - - implementation_findings: local evidence, command work, discarded hypotheses, or support. - - discarded_noise: categories of content intentionally ignored. - - Durable signal means a decision, preference, constraint, fact, or reference likely useful beyond this trace. - Implementation detail alone is not durable signal. - A durable finding should help a future agent make a better decision in a new chat. - Save what a future agent would be relieved to already know: stable user preferences, chosen policies, durable constraints, source-of-truth references, and stable project or product truths. - If a detail only helps replay or debug this trace's terminal session, put it in implementation_findings or discarded_noise, not durable_findings. - Raw metrics, comparison tables, config edits, API wiring, package installation, debug commands, file paths, and stack traces are implementation_findings unless the user turns them into a reusable policy, preference, constraint, fact, or source-of-truth reference. - When the same topic has both user-level guidance and technical evidence, the user-level guidance is the durable finding; the technical evidence is implementation support. - For technical blockers, keep durable_findings at the capability, status, and implication level only. Put patch recipes, flags, functions, commands, exact metrics, and local debug steps in implementation_findings. - When a window mixes a durable point with local evidence, put only the reusable point in durable_findings and put the supporting local evidence in implementation_findings. - Use semantic meaning, not exact wording. Do not copy long code, logs, stack traces, or command transcripts. - If the window has no durable signal, return an empty durable_findings list. - If the window adds nothing meaningful to the episode, episode_update may be an empty string. - - {{ _.role("user") }} - RUN INSTRUCTION: - {{ run_instruction }} - - PRIOR EPISODE SUMMARY: - {{ prior_episode_summary }} - - PRIOR FINDINGS SUMMARY: - {{ prior_findings_summary }} - - TRACE WINDOW: - {{ trace_window }} - - {{ ctx.output_format }} - "# -} - -function SynthesizeExtractRecords( - run_instruction: string, - episode_summary: string, - durable_findings_summary: string, - existing_record_manifest: string -) -> SynthesizedExtraction { - client MiniMaxM27 - prompt #" - {{ _.role("system") }} - You synthesize final Lerim context records from scanned trace findings. - Return only structured output. - Do not include tags, hidden reasoning, markdown, or prose. - The top-level output must include episode, durable_records, and completion_summary. - Use an empty durable_records list when there is no durable signal. - Every durable_records item must be an object with kind, title, and body. - Optional durable record fields are status, valid_from, valid_until, decision, why, alternatives, and consequences. - Never return durable_records items as plain strings or as generic type/record objects. - - Create exactly one episode record for the current session. - Create durable records only for reusable decisions, preferences, constraints, facts, and references. - The episode says what the session did. Durable records say what future sessions should reuse. - Before creating each durable record, ask: would this change what a future agent does in a new chat? - Prefer the small set of memories a future agent would be relieved to already know over exhaustive coverage of what happened. - Keep durable records compact, standalone, and deduplicated. - Do not create durable records for command logs, patch steps, generic programming knowledge, temporary diagnostics, or local implementation chatter. - If there is no reusable signal, durable_records must be empty and the episode should normally be archived. - Decision records must include decision and why when the rationale is present; use fact when there is no durable why. - Episode records should include user_intent and what_happened when available; the runtime can derive missing episode fields. - Treat the trace as historical evidence, not live verification of the current repo. - - Existing records, if listed, are only duplicate-risk context. Prefer skipping near-duplicates over creating duplicate durable records. - - Quality bar: - - Store the reusable rule, decision, invariant, dependency, preference, or external pointer, not the story of the session. - - One durable record should hold one durable point. - - Direct consequences and application guidance usually stay inside that same record. - - Create the minimum number of durable records that preserves distinct durable meanings. Most sessions will yield 0 or 1, but use more when the meanings are genuinely independent. - - Classify each durable point into one canonical kind. Prefer the most specific supported kind, and do not duplicate the same point across multiple durable kinds. - - Duplicates are worse than gaps. Skip uncertain candidates rather than spraying near-duplicates. - - Never create a second durable record in the same run for the same core claim. - - constraint and reference are first-class durable record kinds, not fallback categories. - - What not to save: - - patch logs, command sequences, retries, timelines, or meeting-style recaps - - code structure, file paths, git history, or storage mechanics by themselves - - generic programming knowledge or facts already obvious from the repo - - rejected lures, discarded explanations, or implementation-only distractions - - one-run validation findings, approval flow, queue state, DB resets, rebuilds, or runtime diagnostics by themselves - - Selection rules: - - First separate findings into durable signal and implementation evidence. - - Prefer user-level guidance, chosen operating policy, and stable project truth over the technical evidence that revealed them. - - Synthesize at the theme level. Usually one theme becomes one durable record. - - Create multiple durable records only when the trace establishes multiple independent durable points, each with its own support. - - Do not store one durable point as both a preference and a decision, a fact and a decision, or any other cross-kind duplicate. - - A stable workflow preference is not also a decision unless the trace separately states an explicit project decision with rationale. - - A dependency, setup, or environment truth without durable rationale is a fact, not also a decision. - - A failure caused by the current run's temporary validation setup is not itself a durable environment truth. If the investigation reveals a stable requirement that future sessions must apply, save that requirement as the durable point and omit the temporary validation story. - - Merge candidates when one only states how to apply the other in local operations, routing, or ownership. - - If two candidates share the same core claim, merge them. - - If the difference is only evidence framing, symptom wording, or local-vs-CI phrasing around the same durable fact, keep one record and fold the extra context into it. - - If one candidate is only the direct application or routing consequence of another, keep it inside the stronger record. - - If one candidate only says how different local components should apply the same project rule, keep that guidance inside the main record rather than creating a second durable record. - - If one candidate only restates where local project components live or how an internal architecture is applied, keep it inside the stronger decision, fact, or constraint instead of creating a separate reference. - - If the trace gives one durable rule plus examples of local noise or discarded details, store only the durable rule. - - If a technical blocker is reusable, record only the capability affected, status or source of truth, and implication for future work. - - Do not create a durable record whose whole point is that some local details from this trace were noise, low value, or should not be remembered. - - Store durable records only when the lesson is likely reusable beyond this trace. - - If a candidate is mainly about this trace's commands, files, or timeline, reject it. - - Trace-local instructions about what to ignore in this session are not preferences unless they clearly express a broader standing workflow rule for future sessions. - - If the trace explicitly says the rationale is unknown or says not to invent one, do not create a decision; use fact instead. - - A stable setup, dependency, or environment requirement without a durable why is a fact even if it sounds like the current chosen setup. - - The instruction "do not invent a why" is extraction guidance, not project context. - - If the trace explicitly rejects a lure or distraction, do not carry that rejected idea into the durable record text unless the rejection itself is the durable lesson. - - If a long noisy investigation resolves into one source-of-truth boundary, store only that boundary. Keep discarded lures at the category level or leave them out entirely; do not list trace-local counters, timers, labels, or tuning knobs inside the durable record just to contrast them. - - When a discarded lure matters as evidence, keep it attached to the main durable theme as implementation context rather than storing it as a second durable theme. - - If the episode summary contains clearly reusable decision, preference, constraint, fact, or reference, that point should usually also exist as its own durable record. - - Do not leave a clearly reusable rule, invariant, dependency, source-of-truth pointer, or stable preference only inside the episode. - - Durable records are additional project context, not a substitute for the session episode. - - Most traces should produce only a few durable records; create more only when each one would independently change future behavior. - - Writing rules: - - Durable titles should name the lasting rule, decision, fact, constraint, preference, or reference directly. - - Durable bodies should be compact, neutral, and standalone. - - Prefer this shape for durable records: the durable point, why it matters, and how to apply it later. - - Do not write durable records as meeting minutes, patch logs, or cleanup commentary. - - Do not preserve trace-local commands, negotiation phrasing, or "this is not about X" sentences in final record text. - - Do not mention discarded implementation noise in durable record fields, including consequences. - - Do not include patch recipes, flags, function names, exact metrics, or local debug steps in durable record text. - - Do not write a durable record whose body is mainly a warning that certain local details, cleanups, or implementation noise should be ignored. - - When the durable lesson is a source-of-truth rule, write the authoritative rule directly. - - If a short contrast is still helpful, keep it abstract, such as "not worker-local state" or "not ephemeral local state". Do not enumerate examples in parentheses or comma-separated lists. - - When writing from a historical trace, word durable records as source-backed context, not as freshly verified code inspection. - - Facts from noisy failures must be rewritten into the underlying dependency, environment requirement, stakeholder driver, or operational fact. - - If a fact still reads like stderr, an exception symptom, or copied command output, rewrite it again before writing. - - When the durable lesson is an environment or dependency requirement, do not center the fact on the observed failure symptom. Name the requirement directly and mention the symptom only if it is needed as brief supporting context. - - If brief supporting context is useful, lead with the requirement and keep the symptom generic. Never include exception class names, quoted error fragments, or copied failure strings in the durable fact. - - If the candidate is mainly "this validation run failed until we changed the setup", it belongs in the archived episode. If the candidate names a reusable setup or runtime requirement discovered through that validation, keep the requirement and drop the failure narrative. - - When no durable rationale exists, do not spend the fact body explaining that the rationale is absent. Just state the stable dependency, setup requirement, or operational truth directly. - - Do not quote or paraphrase trace instructions about how to classify the evidence inside the final fact body. Final fact text should describe the underlying truth, not the extraction rule you followed. - - References must answer both "where should future sessions look?" and "when should they consult it?" - - Do not use reference for internal file mappings, local storage boundaries, or repo architecture notes when the durable lesson is the project rule itself rather than "consult this external source next time." - - Keep the episode concise: short title, short body, concise user_intent, what_happened, and outcomes. - - Record types: - - preference: Stable workflow guidance from the user. Save corrections and confirmed non-obvious working style that should carry into future sessions. - - decision: A chosen approach or project rule that future work should follow and that is not obvious from code alone. If the trace does not support a durable why, do not use decision. - - constraint: A durable invariant, limit, or must/cannot rule that future work must respect. - - fact: A durable project fact such as a dependency, environment requirement, stakeholder driver, or other non-obvious truth. - - reference: A pointer to an external dashboard, document, ticket system, or other source of truth outside the repo. Use reference only when the enduring value is where to look later. - - Few-shot quality examples: - - Example preference: - - Trace signal: the user corrects the assistant after a small code fix and says not to append redundant recaps. - - Good: create one preference record about keeping replies terse and avoiding redundant change recaps after small diffs. - - Bad: store the file edit itself, or treat the correction as one-session scratch when it is stable workflow guidance. - - Example decision: - - Trace signal: early turns discuss local refactors and flaky tests; late in the trace the user settles one architecture boundary, and follow-on routing guidance only applies that boundary. - - Good: create the required episode and one decision record for the architecture boundary. Keep the routing guidance inside that record. - - Bad: store refactor noise, split one architectural choice into near-duplicate records, or create a separate durable record saying the refactors were noise. - - Example fact: - - Trace signal: repeated failed commands and partial theories eventually resolve to one stable operational requirement. - - Good: create one fact record for the requirement in clean operational language. Lead with the dependency or environment requirement; mention the failure only as brief generic support if needed. - - Bad: store raw errors, command history, rejected theories, exact exception text, or a separate record saying not to invent a why. - - Example late clarification: - - Trace signal: early chunks circle local counters, timers, labels, and tuning; the final chunk clarifies those were distractions and the real durable lesson is a source-of-truth boundary. - - Good: create one durable record for the source-of-truth boundary. Mention restart or failover only if it explains why the boundary matters. - - Bad: write a durable record that carries over rejected local counters, attempt counts, backoff knobs, or other trace-local artifacts as a contrast list. - - Example reference: - - Trace signal: the assistant starts from a partial repo note, then the user clarifies that ownership or status lives in an external dashboard or ticket system. - - Good: create one reference record that names the external source and when future sessions should consult it. - - Bad: center the record on local files, or turn it into a warning slogan about what not to trust locally. - - Example routine: - - Trace signal: formatter, small lint fix, rerun tests, green result, no new rule or durable fact. - - Good: create only an archived episode. - - Bad: invent a durable record from the sequence of routine commands. - - {{ _.role("user") }} - RUN INSTRUCTION: - {{ run_instruction }} - - EXISTING RECORD MANIFEST: - {{ existing_record_manifest }} - - EPISODE SUMMARY: - {{ episode_summary }} - - DURABLE FINDINGS: - {{ durable_findings_summary }} - - {{ ctx.output_format }} - "# -} diff --git a/baml_agents/baml_src/extract_react_tests.baml b/baml_agents/baml_src/extract_react_tests.baml deleted file mode 100644 index fffcfc9..0000000 --- a/baml_agents/baml_src/extract_react_tests.baml +++ /dev/null @@ -1,92 +0,0 @@ -test ScanTraceWindowCapturesDurableDecision { - functions [ScanTraceWindow] - args { - run_instruction "Extract one episode and any durable project context from the trace." - prior_episode_summary "(none yet)" - prior_findings_summary "(none yet)" - trace_window #" - [8 lines, window 1-8] - 1 user: We should run extraction evals before changing extraction prompts. - 2 assistant: I will update the BAML harness and run the eval. - 3 user: Keep trace-local command logs out of long-term context. - 4 assistant: Implemented the change and ran a smoke test. - "# - } - @@assert({{ this.durable_findings|length > 0 }}) -} - -test ScanTraceWindowSeparatesDurableAndImplementation { - functions [ScanTraceWindow] - args { - run_instruction "Extract reusable context, not command history." - prior_episode_summary "(none yet)" - prior_findings_summary "(none yet)" - trace_window #" - [7 lines, window 1-7] - 1 user: The deployment checklist is the source of truth for release readiness. - 2 assistant: Ran pytest -q and fixed a local fixture path. - 3 assistant: Edited src/example.py and reran the lint command. - 4 user: The command output is just evidence, not memory. - "# - } - @@assert({{ this.durable_findings|length > 0 }}) - @@assert({{ this.implementation_findings|length > 0 }}) -} - -test ScanTraceWindowAllowsNoDurableSignal { - functions [ScanTraceWindow] - args { - run_instruction "Extract durable context only when the trace supports it." - prior_episode_summary "(none yet)" - prior_findings_summary "(none yet)" - trace_window #" - [5 lines, window 1-5] - 1 user: Please format this file. - 2 assistant: Ran the formatter. - 3 assistant: Fixed one lint complaint. - 4 assistant: Tests are green. - "# - } - @@assert({{ this.durable_findings|length == 0 }}) -} - -test SynthesizeExtractRecordsCreatesEpisodeAndDurableRecord { - functions [SynthesizeExtractRecords] - args { - run_instruction "Create exactly one episode and durable records only for reusable project context." - episode_summary "- The session updated an extraction harness and ran a smoke eval." - durable_findings_summary "- preference: eval-first prompt changes: Run the extraction eval before changing extraction prompts. (line 1)" - existing_record_manifest "(none)" - } - @@assert({{ this.episode.user_intent|length > 0 }}) - @@assert({{ this.episode.what_happened|length > 0 }}) - @@assert({{ this.durable_records|length > 0 }}) -} - -test SynthesizeExtractRecordsAllowsNoDurableSignal { - functions [SynthesizeExtractRecords] - args { - run_instruction "Create exactly one episode and no durable records when no reusable context exists." - episode_summary "- The session only formatted code, fixed one lint issue, and reran tests." - durable_findings_summary "(none)" - existing_record_manifest "(none)" - } - @@assert({{ this.episode.user_intent|length > 0 }}) - @@assert({{ this.episode.what_happened|length > 0 }}) - @@assert({{ this.durable_records|length == 0 }}) -} - -test SynthesizeExtractRecordsDeduplicatesCoreClaim { - functions [SynthesizeExtractRecords] - args { - run_instruction "Create compact durable records and merge duplicate meanings." - episode_summary "- The session clarified a runtime state boundary after noisy implementation discussion." - durable_findings_summary #" - - decision: persisted source of truth: Runtime status must live in one persisted store that survives restart. - - decision: runtime status boundary: Use the same persisted status store as the source of truth after restart. - "# - existing_record_manifest "(none)" - } - @@assert({{ this.episode.user_intent|length > 0 }}) - @@assert({{ this.durable_records|length == 1 }}) -} diff --git a/baml_agents/baml_src/generators.baml b/baml_agents/baml_src/generators.baml deleted file mode 100644 index 13c903b..0000000 --- a/baml_agents/baml_src/generators.baml +++ /dev/null @@ -1,6 +0,0 @@ -generator target { - output_type "python/pydantic" - output_dir "../" - default_client_mode "sync" - version "0.222.0" -} diff --git a/baml_agents/baml_src/models.baml b/baml_agents/baml_src/models.baml deleted file mode 100644 index e5b568d..0000000 --- a/baml_agents/baml_src/models.baml +++ /dev/null @@ -1,43 +0,0 @@ -retry_policy ExtractAgentRetry { - max_retries 1 - strategy { - type exponential_backoff - delay_ms 500 - multiplier 2 - max_delay_ms 8000 - } -} - -client MiniMaxM27 { - provider "openai-generic" - retry_policy ExtractAgentRetry - options { - base_url "https://api.minimax.io/v1" - api_key env.MINIMAX_API_KEY - model "MiniMax-M2.7" - temperature 0.01 - http { - connect_timeout_ms 10000 - time_to_first_token_timeout_ms 120000 - idle_timeout_ms 30000 - request_timeout_ms 300000 - } - } -} - -client OllamaLocal { - provider "openai-generic" - retry_policy ExtractAgentRetry - options { - base_url "http://127.0.0.1:11434/v1" - api_key "ollama" - model "gemma4:e4b" - temperature 0.0 - http { - connect_timeout_ms 10000 - time_to_first_token_timeout_ms 120000 - idle_timeout_ms 30000 - request_timeout_ms 300000 - } - } -} From 52bc24f45ba2f49243b6fdb2b14dc64f218f0a50 Mon Sep 17 00:00:00 2001 From: Isaac Kargar Date: Thu, 14 May 2026 10:42:53 +0300 Subject: [PATCH 4/8] Refactor Lerim agents to integrate BAML and LangGraph - Updated README to reflect the new architecture using BAML and LangGraph for synchronization and agent execution. - Removed the PydanticAI-only extract agent, replacing it with a new BAML runtime module for agent flows. - Adjusted agent flow documentation and structure to include new directories for BAML source and client files. - Enhanced the organization of agent tools and updated references to align with the new architecture. This commit improves the clarity and functionality of the Lerim agents, ensuring they are better integrated with the latest BAML features. --- src/lerim/README.md | 10 +- src/lerim/agents/__init__.py | 2 +- src/lerim/agents/baml_client/__init__.py | 60 ++ src/lerim/agents/baml_client/async_client.py | 194 +++++++ src/lerim/agents/baml_client/config.py | 102 ++++ src/lerim/agents/baml_client/globals.py | 35 ++ src/lerim/agents/baml_client/inlinedbaml.py | 22 + src/lerim/agents/baml_client/parser.py | 58 ++ src/lerim/agents/baml_client/runtime.py | 361 ++++++++++++ src/lerim/agents/baml_client/stream_types.py | 69 +++ src/lerim/agents/baml_client/sync_client.py | 204 +++++++ src/lerim/agents/baml_client/tracing.py | 22 + src/lerim/agents/baml_client/type_builder.py | 523 ++++++++++++++++++ src/lerim/agents/baml_client/type_map.py | 41 ++ src/lerim/agents/baml_client/types.py | 107 ++++ src/lerim/agents/baml_client/watchers.py | 44 ++ src/lerim/agents/baml_runtime.py | 162 ++++++ src/lerim/agents/baml_src/extract_react.baml | 284 ++++++++++ .../agents/baml_src/extract_react_tests.baml | 87 +++ src/lerim/agents/baml_src/generators.baml | 6 + src/lerim/agents/baml_src/models.baml | 43 ++ src/lerim/agents/extract.py | 512 ----------------- src/lerim/agents/extract/__init__.py | 11 + src/lerim/agents/extract/api.py | 192 +++++++ src/lerim/agents/extract/graph.py | 398 +++++++++++++ src/lerim/agents/extract/persistence.py | 390 +++++++++++++ src/lerim/agents/extract/state.py | 24 + src/lerim/agents/extract/types.py | 37 ++ src/lerim/agents/extract/windowing.py | 122 ++++ src/lerim/agents/history_processors.py | 148 ----- src/lerim/agents/tools.py | 440 +-------------- src/lerim/agents/toolsets.py | 30 +- src/lerim/config/default.toml | 7 +- src/lerim/config/providers.py | 3 +- src/lerim/config/settings.py | 7 +- src/lerim/config/tracing.py | 12 +- src/lerim/server/cli.py | 2 +- src/lerim/server/runtime.py | 45 +- src/lerim/skills/cli-reference.md | 4 +- 39 files changed, 3646 insertions(+), 1174 deletions(-) create mode 100644 src/lerim/agents/baml_client/__init__.py create mode 100644 src/lerim/agents/baml_client/async_client.py create mode 100644 src/lerim/agents/baml_client/config.py create mode 100644 src/lerim/agents/baml_client/globals.py create mode 100644 src/lerim/agents/baml_client/inlinedbaml.py create mode 100644 src/lerim/agents/baml_client/parser.py create mode 100644 src/lerim/agents/baml_client/runtime.py create mode 100644 src/lerim/agents/baml_client/stream_types.py create mode 100644 src/lerim/agents/baml_client/sync_client.py create mode 100644 src/lerim/agents/baml_client/tracing.py create mode 100644 src/lerim/agents/baml_client/type_builder.py create mode 100644 src/lerim/agents/baml_client/type_map.py create mode 100644 src/lerim/agents/baml_client/types.py create mode 100644 src/lerim/agents/baml_client/watchers.py create mode 100644 src/lerim/agents/baml_runtime.py create mode 100644 src/lerim/agents/baml_src/extract_react.baml create mode 100644 src/lerim/agents/baml_src/extract_react_tests.baml create mode 100644 src/lerim/agents/baml_src/generators.baml create mode 100644 src/lerim/agents/baml_src/models.baml delete mode 100644 src/lerim/agents/extract.py create mode 100644 src/lerim/agents/extract/__init__.py create mode 100644 src/lerim/agents/extract/api.py create mode 100644 src/lerim/agents/extract/graph.py create mode 100644 src/lerim/agents/extract/persistence.py create mode 100644 src/lerim/agents/extract/state.py create mode 100644 src/lerim/agents/extract/types.py create mode 100644 src/lerim/agents/extract/windowing.py delete mode 100644 src/lerim/agents/history_processors.py diff --git a/src/lerim/README.md b/src/lerim/README.md index 0b087e3..51f6a71 100644 --- a/src/lerim/README.md +++ b/src/lerim/README.md @@ -3,13 +3,14 @@ ## Summary This folder contains the Lerim runtime package. -Current architecture is PydanticAI-only for agent execution. +Current architecture uses BAML plus LangGraph for sync extraction, and +PydanticAI for maintain, ask, and working-memory agent execution. Durable Lerim context now lives in the global SQLite store at `~/.lerim/context.sqlite3`. Project identity is used to separate records by repo inside that shared DB. The package is organized by feature boundary: -- `agents/`: agent flows (`extract.py`, `maintain.py`, `ask.py`, `working_memory.py`), semantic context tools (`tools.py`), typed contracts (`contracts.py`) +- `agents/`: agent flows (`extract/`, `maintain.py`, `ask.py`, `working_memory.py`), BAML source/client files (`baml_src/`, `baml_client/`), semantic context tools (`tools.py`), typed contracts (`contracts.py`) - `server/`: CLI (`cli.py`), HTTP API (`httpd.py`), daemon (`daemon.py`), runtime orchestrator (`runtime.py`), Docker/runtime API helpers (`api.py`) - `config/`: config loading (`settings.py`), PydanticAI model builders (`providers.py`), tracing and logging setup - `context/`: global SQLite context store, ONNX embedding provider, `sqlite-vec` index management, and retrieval/write helpers @@ -29,8 +30,9 @@ If you are new to the codebase, read in this order: 4. `working_memory.py` and `agents/working_memory.py` for generated Working Memory. 5. `context/store.py` for the canonical SQLite schema and retrieval/write logic. This is where hybrid search happens: local ONNX embeddings, `sqlite-vec` KNN, SQLite FTS5, and RRF fusion. -6. `agents/tools.py` for the authoritative semantic agent tool surface (`read_trace`, `list_context`, `search_context`, `get_context`, `save_context`, `revise_context`, `archive_context`, `supersede_context`, `count_context`, `note_trace_findings`, `prune_trace_reads`). -7. `agents/extract.py`, `agents/maintain.py`, `agents/ask.py` for PydanticAI agent behavior. +6. `agents/extract/` and `agents/baml_src/` for sync extraction behavior. +7. `agents/tools.py` for the maintain/ask semantic tool surface (`list_context`, `search_context`, `get_context`, `revise_context`, `archive_context`, `supersede_context`, `count_context`). +8. `agents/maintain.py`, `agents/ask.py`, and `agents/working_memory.py` for PydanticAI agent behavior. ## Working Memory flow diff --git a/src/lerim/agents/__init__.py b/src/lerim/agents/__init__.py index 3289380..393eaf5 100644 --- a/src/lerim/agents/__init__.py +++ b/src/lerim/agents/__init__.py @@ -1,4 +1,4 @@ -"""Agent modules: extract, maintain, ask + shared tools (all PydanticAI).""" +"""Agent modules for extract, maintain, ask, and working-memory flows.""" from __future__ import annotations diff --git a/src/lerim/agents/baml_client/__init__.py b/src/lerim/agents/baml_client/__init__.py new file mode 100644 index 0000000..b279bc3 --- /dev/null +++ b/src/lerim/agents/baml_client/__init__.py @@ -0,0 +1,60 @@ +# ---------------------------------------------------------------------------- +# +# Welcome to Baml! To use this generated code, please run the following: +# +# $ pip install baml +# +# ---------------------------------------------------------------------------- + +# This file was generated by BAML: please do not edit it. Instead, edit the +# BAML files and re-generate this code using: baml-cli generate +# baml-cli is available with the baml package. + +__version__ = "0.222.0" + +try: + from baml_py.safe_import import EnsureBamlPyImport +except ImportError: + raise ImportError(f"""Update to baml-py required. +Version of baml_client generator (see generators.baml): {__version__} + +Please upgrade baml-py to version "{__version__}". + +$ pip install baml-py=={__version__} +$ uv add baml-py=={__version__} + +If nothing else works, please ask for help: + +https://github.com/boundaryml/baml/issues +https://boundaryml.com/discord +""") from None + + +with EnsureBamlPyImport(__version__) as e: + e.raise_if_incompatible_version(__version__) + + from . import types + from . import tracing + from . import stream_types + from . import config + from .config import reset_baml_env_vars + + from .sync_client import b + + from . import watchers + + +# FOR LEGACY COMPATIBILITY, expose "partial_types" as an alias for "stream_types" +# WE RECOMMEND USERS TO USE "stream_types" INSTEAD +partial_types = stream_types + +__all__ = [ + "b", + "stream_types", + "partial_types", + "tracing", + "types", + "reset_baml_env_vars", + "config", + "watchers", +] \ No newline at end of file diff --git a/src/lerim/agents/baml_client/async_client.py b/src/lerim/agents/baml_client/async_client.py new file mode 100644 index 0000000..c4ea6c0 --- /dev/null +++ b/src/lerim/agents/baml_client/async_client.py @@ -0,0 +1,194 @@ +# ---------------------------------------------------------------------------- +# +# Welcome to Baml! To use this generated code, please run the following: +# +# $ pip install baml +# +# ---------------------------------------------------------------------------- + +# This file was generated by BAML: please do not edit it. Instead, edit the +# BAML files and re-generate this code using: baml-cli generate +# baml-cli is available with the baml package. + +import typing +import typing_extensions +import baml_py + +from . import stream_types, types, type_builder +from .parser import LlmResponseParser, LlmStreamParser +from .runtime import DoNotUseDirectlyCallManager, BamlCallOptions +from .globals import DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_RUNTIME as __runtime__ + + +class BamlAsyncClient: + __options: DoNotUseDirectlyCallManager + __stream_client: "BamlStreamClient" + __http_request: "BamlHttpRequestClient" + __http_stream_request: "BamlHttpStreamRequestClient" + __llm_response_parser: LlmResponseParser + __llm_stream_parser: LlmStreamParser + + def __init__(self, options: DoNotUseDirectlyCallManager): + self.__options = options + self.__stream_client = BamlStreamClient(options) + self.__http_request = BamlHttpRequestClient(options) + self.__http_stream_request = BamlHttpStreamRequestClient(options) + self.__llm_response_parser = LlmResponseParser(options) + self.__llm_stream_parser = LlmStreamParser(options) + + def with_options(self, + tb: typing.Optional[type_builder.TypeBuilder] = None, + client_registry: typing.Optional[baml_py.baml_py.ClientRegistry] = None, + client: typing.Optional[str] = None, + collector: typing.Optional[typing.Union[baml_py.baml_py.Collector, typing.List[baml_py.baml_py.Collector]]] = None, + env: typing.Optional[typing.Dict[str, typing.Optional[str]]] = None, + tags: typing.Optional[typing.Dict[str, str]] = None, + on_tick: typing.Optional[typing.Callable[[str, baml_py.baml_py.FunctionLog], None]] = None, + ) -> "BamlAsyncClient": + options: BamlCallOptions = {} + if tb is not None: + options["tb"] = tb + if client_registry is not None: + options["client_registry"] = client_registry + if client is not None: + options["client"] = client + if collector is not None: + options["collector"] = collector + if env is not None: + options["env"] = env + if tags is not None: + options["tags"] = tags + if on_tick is not None: + options["on_tick"] = on_tick + return BamlAsyncClient(self.__options.merge_options(options)) + + @property + def stream(self): + return self.__stream_client + + @property + def request(self): + return self.__http_request + + @property + def stream_request(self): + return self.__http_stream_request + + @property + def parse(self): + return self.__llm_response_parser + + @property + def parse_stream(self): + return self.__llm_stream_parser + + async def ScanTraceWindow(self, run_instruction: str,prior_episode_summary: str,prior_findings_summary: str,trace_window: str, + baml_options: BamlCallOptions = {}, + ) -> types.TraceWindowScan: + # Check if on_tick is provided + if 'on_tick' in baml_options: + # Use streaming internally when on_tick is provided + __stream__ = self.stream.ScanTraceWindow(run_instruction=run_instruction,prior_episode_summary=prior_episode_summary,prior_findings_summary=prior_findings_summary,trace_window=trace_window, + baml_options=baml_options) + return await __stream__.get_final_response() + else: + # Original non-streaming code + __result__ = await self.__options.merge_options(baml_options).call_function_async(function_name="ScanTraceWindow", args={ + "run_instruction": run_instruction,"prior_episode_summary": prior_episode_summary,"prior_findings_summary": prior_findings_summary,"trace_window": trace_window, + }) + return typing.cast(types.TraceWindowScan, __result__.cast_to(types, types, stream_types, False, __runtime__)) + async def SynthesizeExtractRecords(self, run_instruction: str,episode_summary: str,durable_findings_summary: str,existing_record_manifest: str, + baml_options: BamlCallOptions = {}, + ) -> types.SynthesizedExtraction: + # Check if on_tick is provided + if 'on_tick' in baml_options: + # Use streaming internally when on_tick is provided + __stream__ = self.stream.SynthesizeExtractRecords(run_instruction=run_instruction,episode_summary=episode_summary,durable_findings_summary=durable_findings_summary,existing_record_manifest=existing_record_manifest, + baml_options=baml_options) + return await __stream__.get_final_response() + else: + # Original non-streaming code + __result__ = await self.__options.merge_options(baml_options).call_function_async(function_name="SynthesizeExtractRecords", args={ + "run_instruction": run_instruction,"episode_summary": episode_summary,"durable_findings_summary": durable_findings_summary,"existing_record_manifest": existing_record_manifest, + }) + return typing.cast(types.SynthesizedExtraction, __result__.cast_to(types, types, stream_types, False, __runtime__)) + + + +class BamlStreamClient: + __options: DoNotUseDirectlyCallManager + + def __init__(self, options: DoNotUseDirectlyCallManager): + self.__options = options + + def ScanTraceWindow(self, run_instruction: str,prior_episode_summary: str,prior_findings_summary: str,trace_window: str, + baml_options: BamlCallOptions = {}, + ) -> baml_py.BamlStream[stream_types.TraceWindowScan, types.TraceWindowScan]: + __ctx__, __result__ = self.__options.merge_options(baml_options).create_async_stream(function_name="ScanTraceWindow", args={ + "run_instruction": run_instruction,"prior_episode_summary": prior_episode_summary,"prior_findings_summary": prior_findings_summary,"trace_window": trace_window, + }) + return baml_py.BamlStream[stream_types.TraceWindowScan, types.TraceWindowScan]( + __result__, + lambda x: typing.cast(stream_types.TraceWindowScan, x.cast_to(types, types, stream_types, True, __runtime__)), + lambda x: typing.cast(types.TraceWindowScan, x.cast_to(types, types, stream_types, False, __runtime__)), + __ctx__, + ) + def SynthesizeExtractRecords(self, run_instruction: str,episode_summary: str,durable_findings_summary: str,existing_record_manifest: str, + baml_options: BamlCallOptions = {}, + ) -> baml_py.BamlStream[stream_types.SynthesizedExtraction, types.SynthesizedExtraction]: + __ctx__, __result__ = self.__options.merge_options(baml_options).create_async_stream(function_name="SynthesizeExtractRecords", args={ + "run_instruction": run_instruction,"episode_summary": episode_summary,"durable_findings_summary": durable_findings_summary,"existing_record_manifest": existing_record_manifest, + }) + return baml_py.BamlStream[stream_types.SynthesizedExtraction, types.SynthesizedExtraction]( + __result__, + lambda x: typing.cast(stream_types.SynthesizedExtraction, x.cast_to(types, types, stream_types, True, __runtime__)), + lambda x: typing.cast(types.SynthesizedExtraction, x.cast_to(types, types, stream_types, False, __runtime__)), + __ctx__, + ) + + +class BamlHttpRequestClient: + __options: DoNotUseDirectlyCallManager + + def __init__(self, options: DoNotUseDirectlyCallManager): + self.__options = options + + async def ScanTraceWindow(self, run_instruction: str,prior_episode_summary: str,prior_findings_summary: str,trace_window: str, + baml_options: BamlCallOptions = {}, + ) -> baml_py.baml_py.HTTPRequest: + __result__ = await self.__options.merge_options(baml_options).create_http_request_async(function_name="ScanTraceWindow", args={ + "run_instruction": run_instruction,"prior_episode_summary": prior_episode_summary,"prior_findings_summary": prior_findings_summary,"trace_window": trace_window, + }, mode="request") + return __result__ + async def SynthesizeExtractRecords(self, run_instruction: str,episode_summary: str,durable_findings_summary: str,existing_record_manifest: str, + baml_options: BamlCallOptions = {}, + ) -> baml_py.baml_py.HTTPRequest: + __result__ = await self.__options.merge_options(baml_options).create_http_request_async(function_name="SynthesizeExtractRecords", args={ + "run_instruction": run_instruction,"episode_summary": episode_summary,"durable_findings_summary": durable_findings_summary,"existing_record_manifest": existing_record_manifest, + }, mode="request") + return __result__ + + +class BamlHttpStreamRequestClient: + __options: DoNotUseDirectlyCallManager + + def __init__(self, options: DoNotUseDirectlyCallManager): + self.__options = options + + async def ScanTraceWindow(self, run_instruction: str,prior_episode_summary: str,prior_findings_summary: str,trace_window: str, + baml_options: BamlCallOptions = {}, + ) -> baml_py.baml_py.HTTPRequest: + __result__ = await self.__options.merge_options(baml_options).create_http_request_async(function_name="ScanTraceWindow", args={ + "run_instruction": run_instruction,"prior_episode_summary": prior_episode_summary,"prior_findings_summary": prior_findings_summary,"trace_window": trace_window, + }, mode="stream") + return __result__ + async def SynthesizeExtractRecords(self, run_instruction: str,episode_summary: str,durable_findings_summary: str,existing_record_manifest: str, + baml_options: BamlCallOptions = {}, + ) -> baml_py.baml_py.HTTPRequest: + __result__ = await self.__options.merge_options(baml_options).create_http_request_async(function_name="SynthesizeExtractRecords", args={ + "run_instruction": run_instruction,"episode_summary": episode_summary,"durable_findings_summary": durable_findings_summary,"existing_record_manifest": existing_record_manifest, + }, mode="stream") + return __result__ + + +b = BamlAsyncClient(DoNotUseDirectlyCallManager({})) \ No newline at end of file diff --git a/src/lerim/agents/baml_client/config.py b/src/lerim/agents/baml_client/config.py new file mode 100644 index 0000000..64b7fff --- /dev/null +++ b/src/lerim/agents/baml_client/config.py @@ -0,0 +1,102 @@ +# ---------------------------------------------------------------------------- +# +# Welcome to Baml! To use this generated code, please run the following: +# +# $ pip install baml +# +# ---------------------------------------------------------------------------- + +# This file was generated by BAML: please do not edit it. Instead, edit the +# BAML files and re-generate this code using: baml-cli generate +# baml-cli is available with the baml package. + +from __future__ import annotations + +import os +import warnings +import typing_extensions +import typing +import functools + +from baml_py.logging import ( + get_log_level as baml_get_log_level, + set_log_level as baml_set_log_level, +) +from .globals import reset_baml_env_vars + +rT = typing_extensions.TypeVar("rT") # return type +pT = typing_extensions.ParamSpec("pT") # parameters type + + +def _deprecated(message: str): + def decorator(func: typing.Callable[pT, rT]) -> typing.Callable[pT, rT]: + """Use this decorator to mark functions as deprecated. + Every time the decorated function runs, it will emit + a "deprecation" warning.""" + + @functools.wraps(func) + def new_func(*args: pT.args, **kwargs: pT.kwargs): + warnings.simplefilter("always", DeprecationWarning) # turn off filter + warnings.warn( + "Call to a deprecated function {}.".format(func.__name__) + message, + category=DeprecationWarning, + stacklevel=2, + ) + warnings.simplefilter("default", DeprecationWarning) # reset filter + return func(*args, **kwargs) + + return new_func + + return decorator + + +@_deprecated("Use os.environ['BAML_LOG'] instead") +def get_log_level(): + """ + Get the log level for the BAML Python client. + """ + return baml_get_log_level() + + +@_deprecated("Use os.environ['BAML_LOG'] instead") +def set_log_level( + level: typing_extensions.Literal["DEBUG", "INFO", "WARN", "ERROR", "OFF"] | str, +): + """ + Set the log level for the BAML Python client + """ + baml_set_log_level(level) + os.environ["BAML_LOG"] = level + + +@_deprecated("Use os.environ['BAML_LOG_JSON_MODE'] instead") +def set_log_json_mode(): + """ + Set the log JSON mode for the BAML Python client. + """ + os.environ["BAML_LOG_JSON_MODE"] = "true" + + +@_deprecated("Use os.environ['BAML_LOG_MAX_CHUNK_LENGTH'] instead") +def set_log_max_chunk_length(): + """ + Set the maximum log chunk length for the BAML Python client. + """ + os.environ["BAML_LOG_MAX_CHUNK_LENGTH"] = "1000" + + +def set_log_max_message_length(*args, **kwargs): + """ + Alias for set_log_max_chunk_length for compatibility with docs. + """ + return set_log_max_chunk_length(*args, **kwargs) + + +__all__ = [ + "set_log_level", + "get_log_level", + "set_log_json_mode", + "reset_baml_env_vars", + "set_log_max_message_length", + "set_log_max_chunk_length", +] diff --git a/src/lerim/agents/baml_client/globals.py b/src/lerim/agents/baml_client/globals.py new file mode 100644 index 0000000..769e055 --- /dev/null +++ b/src/lerim/agents/baml_client/globals.py @@ -0,0 +1,35 @@ +# ---------------------------------------------------------------------------- +# +# Welcome to Baml! To use this generated code, please run the following: +# +# $ pip install baml +# +# ---------------------------------------------------------------------------- + +# This file was generated by BAML: please do not edit it. Instead, edit the +# BAML files and re-generate this code using: baml-cli generate +# baml-cli is available with the baml package. + +from __future__ import annotations +import os +import warnings + +from baml_py import BamlCtxManager, BamlRuntime +from .inlinedbaml import get_baml_files +from typing import Dict + +DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_RUNTIME = BamlRuntime.from_files( + "baml_src", + get_baml_files(), + os.environ.copy() +) +DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_CTX = BamlCtxManager(DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_RUNTIME) + +def reset_baml_env_vars(env_vars: Dict[str, str]): + warnings.warn( + "reset_baml_env_vars is deprecated and should be removed. Environment variables are now lazily loaded on each function call", + DeprecationWarning, + stacklevel=2 + ) + +__all__ = [] diff --git a/src/lerim/agents/baml_client/inlinedbaml.py b/src/lerim/agents/baml_client/inlinedbaml.py new file mode 100644 index 0000000..7f11456 --- /dev/null +++ b/src/lerim/agents/baml_client/inlinedbaml.py @@ -0,0 +1,22 @@ +# ---------------------------------------------------------------------------- +# +# Welcome to Baml! To use this generated code, please run the following: +# +# $ pip install baml +# +# ---------------------------------------------------------------------------- + +# This file was generated by BAML: please do not edit it. Instead, edit the +# BAML files and re-generate this code using: baml-cli generate +# baml-cli is available with the baml package. + +_file_map = { + + "extract_react.baml": "enum RecordKind {\n DECISION @alias(\"decision\") @description(\"A durable project decision with decision and why fields.\")\n PREFERENCE @alias(\"preference\") @description(\"A stable user or workflow preference.\")\n CONSTRAINT @alias(\"constraint\") @description(\"A durable invariant, limit, or must/cannot rule.\")\n FACT @alias(\"fact\") @description(\"A durable project fact or setup truth.\")\n REFERENCE @alias(\"reference\") @description(\"A pointer to an external source of truth.\")\n}\n\nenum RecordStatus {\n ACTIVE @alias(\"active\") @description(\"Current context that future sessions may reuse.\")\n ARCHIVED @alias(\"archived\") @description(\"Historical or routine context that should not be treated as active guidance.\")\n}\n\nenum FindingLevel {\n DECISION @alias(\"decision\") @description(\"Durable decision-level finding.\")\n PREFERENCE @alias(\"preference\") @description(\"Durable user or workflow preference finding.\")\n FEEDBACK @alias(\"feedback\") @description(\"Durable feedback-level finding.\")\n REFERENCE @alias(\"reference\") @description(\"Durable external-source finding.\")\n CONSTRAINT @alias(\"constraint\") @description(\"Durable constraint-level finding.\")\n FACT @alias(\"fact\") @description(\"Durable fact-level finding.\")\n IMPLEMENTATION @alias(\"implementation\") @description(\"Trace-local implementation evidence or discarded hypothesis.\")\n}\n\nclass TraceWindowFinding {\n theme string @description(\"Short stable theme for this finding.\")\n level FindingLevel @description(\"Use durable levels for reusable context, implementation for local/noisy evidence.\")\n line int? @description(\"1-based supporting line when the window gives one.\")\n quote string? @description(\"Short supporting quote from the current window.\")\n note string @description(\"Compact semantic finding. Avoid command logs and copied errors.\")\n}\n\nclass TraceWindowScan {\n episode_update string? @description(\"Compact update for the final episode summary. May be omitted when this window adds nothing.\")\n durable_findings TraceWindowFinding[] @description(\"Reusable decisions, preferences, constraints, facts, and references found in this window.\")\n implementation_findings TraceWindowFinding[] @description(\"Implementation evidence, discarded hypotheses, and local details useful only as support/noise.\")\n discarded_noise string[] @description(\"Short descriptions of noisy categories intentionally not saved.\")\n}\n\nclass EpisodeDraft {\n title string? @description(\"Optional short title for the current-session episode. Runtime derives one if omitted.\")\n body string? @description(\"Compact episode body. If omitted, runtime builds it from user_intent and what_happened.\")\n status RecordStatus? @description(\"Use archived for routine/no-durable sessions; active only when the episode itself remains useful.\")\n user_intent string? @description(\"What the user wanted in this source session. Runtime fills a generic fallback if omitted.\")\n what_happened string? @description(\"What the session actually did. Runtime fills a generic fallback if omitted.\")\n outcomes string? @description(\"Optional concise outcome.\")\n}\n\nclass DurableRecordDraft {\n kind RecordKind @description(\"Durable record kind.\")\n title string @description(\"Short standalone durable title.\")\n body string @description(\"Compact standalone durable body.\")\n status RecordStatus? @description(\"Usually active for reusable durable records.\")\n valid_from string?\n valid_until string?\n decision string?\n why string?\n alternatives string?\n consequences string?\n}\n\nclass SynthesizedExtraction {\n episode EpisodeDraft @description(\"Exactly one current-session episode record draft.\")\n durable_records DurableRecordDraft[] @description(\"Zero or more durable records.\")\n completion_summary string? @description(\"Brief summary of extraction work for final_result/reporting.\")\n}\n\nfunction ScanTraceWindow(\n run_instruction: string,\n prior_episode_summary: string,\n prior_findings_summary: string,\n trace_window: string\n) -> TraceWindowScan {\n client MiniMaxM27\n prompt #\"\n {{ _.role(\"system\") }}\n You scan one window from a coding-agent trace for Lerim extraction.\n Return only structured output. Do not save records and do not plan future tool calls.\n Do not include tags, hidden reasoning, markdown, or prose.\n The top-level output must include episode_update, durable_findings, implementation_findings, and discarded_noise.\n Use an empty string or empty list when a field has no content.\n Every durable_findings and implementation_findings item must be an object with:\n - theme\n - level: one of decision, preference, feedback, reference, constraint, fact, or implementation\n - note\n - optional line\n - optional quote\n Never return finding items as plain strings.\n Never use confidence labels such as high, medium, or low as finding levels.\n\n Separate:\n - episode_update: what happened in this session window, for the final episode record.\n - durable_findings: reusable project/user context only.\n - implementation_findings: local evidence, command work, discarded hypotheses, or support.\n - discarded_noise: categories of content intentionally ignored.\n\n Durable signal means a decision, preference, constraint, fact, or reference likely useful beyond this trace.\n Implementation detail alone is not durable signal.\n A durable finding should help a future agent make a better decision in a new chat.\n Save what a future agent would be relieved to already know: stable user preferences, chosen policies, durable constraints, source-of-truth references, and stable project or product truths.\n If a detail only helps replay or debug this trace's terminal session, put it in implementation_findings or discarded_noise, not durable_findings.\n Raw metrics, comparison tables, config edits, API wiring, package installation, debug commands, file paths, and stack traces are implementation_findings unless the user turns them into a reusable policy, preference, constraint, fact, or source-of-truth reference.\n When the same topic has both user-level guidance and technical evidence, the user-level guidance is the durable finding; the technical evidence is implementation support.\n For technical blockers, keep durable_findings at the capability, status, and implication level only. Put patch recipes, flags, functions, commands, exact metrics, and local debug steps in implementation_findings.\n When a window mixes a durable point with local evidence, put only the reusable point in durable_findings and put the supporting local evidence in implementation_findings.\n Use semantic meaning, not exact wording. Do not copy long code, logs, stack traces, or command transcripts.\n If the window has no durable signal, return an empty durable_findings list.\n If the window adds nothing meaningful to the episode, episode_update may be an empty string.\n\n {{ _.role(\"user\") }}\n RUN INSTRUCTION:\n {{ run_instruction }}\n\n PRIOR EPISODE SUMMARY:\n {{ prior_episode_summary }}\n\n PRIOR FINDINGS SUMMARY:\n {{ prior_findings_summary }}\n\n TRACE WINDOW:\n {{ trace_window }}\n\n {{ ctx.output_format }}\n \"#\n}\n\nfunction SynthesizeExtractRecords(\n run_instruction: string,\n episode_summary: string,\n durable_findings_summary: string,\n existing_record_manifest: string\n) -> SynthesizedExtraction {\n client MiniMaxM27\n prompt #\"\n {{ _.role(\"system\") }}\n You synthesize final Lerim context records from scanned trace findings.\n Return only structured output.\n Do not include tags, hidden reasoning, markdown, or prose.\n The top-level output must include episode, durable_records, and completion_summary.\n Use an empty durable_records list when there is no durable signal.\n Every durable_records item must be an object with kind, title, and body.\n Optional durable record fields are status, valid_from, valid_until, decision, why, alternatives, and consequences.\n Never return durable_records items as plain strings or as generic type/record objects.\n\n Create exactly one episode record for the current session.\n Create durable records only for reusable decisions, preferences, constraints, facts, and references.\n The episode says what the session did. Durable records say what future sessions should reuse.\n Before creating each durable record, ask: would this change what a future agent does in a new chat?\n Prefer the small set of memories a future agent would be relieved to already know over exhaustive coverage of what happened.\n Keep durable records compact, standalone, and deduplicated.\n Do not create durable records for command logs, patch steps, generic programming knowledge, temporary diagnostics, or local implementation chatter.\n If there is no reusable signal, durable_records must be empty and the episode should normally be archived.\n Decision records must include decision and why when the rationale is present; use fact when there is no durable why.\n Episode records should include user_intent and what_happened when available; the runtime can derive missing episode fields.\n Treat the trace as historical evidence, not live verification of the current repo.\n\n Existing records, if listed, are only duplicate-risk context. Prefer skipping near-duplicates over creating duplicate durable records.\n\n Quality bar:\n - Store the reusable rule, decision, invariant, dependency, preference, or external pointer, not the story of the session.\n - One durable record should hold one durable point.\n - Direct consequences and application guidance usually stay inside that same record.\n - Create the minimum number of durable records that preserves distinct durable meanings. Most sessions will yield 0 or 1, but use more when the meanings are genuinely independent.\n - Classify each durable point into one canonical kind. Prefer the most specific supported kind, and do not duplicate the same point across multiple durable kinds.\n - Duplicates are worse than gaps. Skip uncertain candidates rather than spraying near-duplicates.\n - Never create a second durable record in the same run for the same core claim.\n - constraint and reference are first-class durable record kinds, not fallback categories.\n\n What not to save:\n - patch logs, command sequences, retries, timelines, or meeting-style recaps\n - code structure, file paths, git history, or storage mechanics by themselves\n - generic programming knowledge or facts already obvious from the repo\n - rejected lures, discarded explanations, or implementation-only distractions\n - one-run validation findings, approval flow, queue state, DB resets, rebuilds, or runtime diagnostics by themselves\n\n Selection rules:\n - First separate findings into durable signal and implementation evidence.\n - Prefer user-level guidance, chosen operating policy, and stable project truth over the technical evidence that revealed them.\n - Synthesize at the theme level. Usually one theme becomes one durable record.\n - Create multiple durable records only when the trace establishes multiple independent durable points, each with its own support.\n - Do not store one durable point as both a preference and a decision, a fact and a decision, or any other cross-kind duplicate.\n - A stable workflow preference is not also a decision unless the trace separately states an explicit project decision with rationale.\n - A dependency, setup, or environment truth without durable rationale is a fact, not also a decision.\n - A failure caused by the current run's temporary validation setup is not itself a durable environment truth. If the investigation reveals a stable requirement that future sessions must apply, save that requirement as the durable point and omit the temporary validation story.\n - Merge candidates when one only states how to apply the other in local operations, routing, or ownership.\n - If two candidates share the same core claim, merge them.\n - If the difference is only evidence framing, symptom wording, or local-vs-CI phrasing around the same durable fact, keep one record and fold the extra context into it.\n - If one candidate is only the direct application or routing consequence of another, keep it inside the stronger record.\n - If one candidate only says how different local components should apply the same project rule, keep that guidance inside the main record rather than creating a second durable record.\n - If one candidate only restates where local project components live or how an internal architecture is applied, keep it inside the stronger decision, fact, or constraint instead of creating a separate reference.\n - If the trace gives one durable rule plus examples of local noise or discarded details, store only the durable rule.\n - If a technical blocker is reusable, record only the capability affected, status or source of truth, and implication for future work.\n - Do not create a durable record whose whole point is that some local details from this trace were noise, low value, or should not be remembered.\n - Store durable records only when the lesson is likely reusable beyond this trace.\n - If a candidate is mainly about this trace's commands, files, or timeline, reject it.\n - Trace-local instructions about what to ignore in this session are not preferences unless they clearly express a broader standing workflow rule for future sessions.\n - If the trace explicitly says the rationale is unknown or says not to invent one, do not create a decision; use fact instead.\n - A stable setup, dependency, or environment requirement without a durable why is a fact even if it sounds like the current chosen setup.\n - The instruction \"do not invent a why\" is extraction guidance, not project context.\n - If the trace explicitly rejects a lure or distraction, do not carry that rejected idea into the durable record text unless the rejection itself is the durable lesson.\n - If a long noisy investigation resolves into one source-of-truth boundary, store only that boundary. Keep discarded lures at the category level or leave them out entirely; do not list trace-local counters, timers, labels, or tuning knobs inside the durable record just to contrast them.\n - When a discarded lure matters as evidence, keep it attached to the main durable theme as implementation context rather than storing it as a second durable theme.\n - If the episode summary contains clearly reusable decision, preference, constraint, fact, or reference, that point should usually also exist as its own durable record.\n - Do not leave a clearly reusable rule, invariant, dependency, source-of-truth pointer, or stable preference only inside the episode.\n - Durable records are additional project context, not a substitute for the session episode.\n - Most traces should produce only a few durable records; create more only when each one would independently change future behavior.\n\n Writing rules:\n - Durable titles should name the lasting rule, decision, fact, constraint, preference, or reference directly.\n - Durable bodies should be compact, neutral, and standalone.\n - Prefer this shape for durable records: the durable point, why it matters, and how to apply it later.\n - Do not write durable records as meeting minutes, patch logs, or cleanup commentary.\n - Do not preserve trace-local commands, negotiation phrasing, or \"this is not about X\" sentences in final record text.\n - Do not mention discarded implementation noise in durable record fields, including consequences.\n - Do not include patch recipes, flags, function names, exact metrics, or local debug steps in durable record text.\n - Do not write a durable record whose body is mainly a warning that certain local details, cleanups, or implementation noise should be ignored.\n - When the durable lesson is a source-of-truth rule, write the authoritative rule directly.\n - If a short contrast is still helpful, keep it abstract, such as \"not worker-local state\" or \"not ephemeral local state\". Do not enumerate examples in parentheses or comma-separated lists.\n - When writing from a historical trace, word durable records as source-backed context, not as freshly verified code inspection.\n - Facts from noisy failures must be rewritten into the underlying dependency, environment requirement, stakeholder driver, or operational fact.\n - If a fact still reads like stderr, an exception symptom, or copied command output, rewrite it again before writing.\n - When the durable lesson is an environment or dependency requirement, do not center the fact on the observed failure symptom. Name the requirement directly and mention the symptom only if it is needed as brief supporting context.\n - If brief supporting context is useful, lead with the requirement and keep the symptom generic. Never include exception class names, quoted error fragments, or copied failure strings in the durable fact.\n - If the candidate is mainly \"this validation run failed until we changed the setup\", it belongs in the archived episode. If the candidate names a reusable setup or runtime requirement discovered through that validation, keep the requirement and drop the failure narrative.\n - When no durable rationale exists, do not spend the fact body explaining that the rationale is absent. Just state the stable dependency, setup requirement, or operational truth directly.\n - Do not quote or paraphrase trace instructions about how to classify the evidence inside the final fact body. Final fact text should describe the underlying truth, not the extraction rule you followed.\n - References must answer both \"where should future sessions look?\" and \"when should they consult it?\"\n - Do not use reference for internal file mappings, local storage boundaries, or repo architecture notes when the durable lesson is the project rule itself rather than \"consult this external source next time.\"\n - Keep the episode concise: short title, short body, concise user_intent, what_happened, and outcomes.\n\n Record types:\n - preference: Stable workflow guidance from the user. Save corrections and confirmed non-obvious working style that should carry into future sessions.\n - decision: A chosen approach or project rule that future work should follow and that is not obvious from code alone. If the trace does not support a durable why, do not use decision.\n - constraint: A durable invariant, limit, or must/cannot rule that future work must respect.\n - fact: A durable project fact such as a dependency, environment requirement, stakeholder driver, or other non-obvious truth.\n - reference: A pointer to an external dashboard, document, ticket system, or other source of truth outside the repo. Use reference only when the enduring value is where to look later.\n\n Few-shot quality examples:\n\n Example preference:\n - Trace signal: the user corrects the assistant after a small code fix and says not to append redundant recaps.\n - Good: create one preference record about keeping replies terse and avoiding redundant change recaps after small diffs.\n - Bad: store the file edit itself, or treat the correction as one-session scratch when it is stable workflow guidance.\n\n Example decision:\n - Trace signal: early turns discuss local refactors and flaky tests; late in the trace the user settles one architecture boundary, and follow-on routing guidance only applies that boundary.\n - Good: create the required episode and one decision record for the architecture boundary. Keep the routing guidance inside that record.\n - Bad: store refactor noise, split one architectural choice into near-duplicate records, or create a separate durable record saying the refactors were noise.\n\n Example fact:\n - Trace signal: repeated failed commands and partial theories eventually resolve to one stable operational requirement.\n - Good: create one fact record for the requirement in clean operational language. Lead with the dependency or environment requirement; mention the failure only as brief generic support if needed.\n - Bad: store raw errors, command history, rejected theories, exact exception text, or a separate record saying not to invent a why.\n\n Example late clarification:\n - Trace signal: early chunks circle local counters, timers, labels, and tuning; the final chunk clarifies those were distractions and the real durable lesson is a source-of-truth boundary.\n - Good: create one durable record for the source-of-truth boundary. Mention restart or failover only if it explains why the boundary matters.\n - Bad: write a durable record that carries over rejected local counters, attempt counts, backoff knobs, or other trace-local artifacts as a contrast list.\n\n Example reference:\n - Trace signal: the assistant starts from a partial repo note, then the user clarifies that ownership or status lives in an external dashboard or ticket system.\n - Good: create one reference record that names the external source and when future sessions should consult it.\n - Bad: center the record on local files, or turn it into a warning slogan about what not to trust locally.\n\n Example routine:\n - Trace signal: formatter, small lint fix, rerun tests, green result, no new rule or durable fact.\n - Good: create only an archived episode.\n - Bad: invent a durable record from the sequence of routine commands.\n\n {{ _.role(\"user\") }}\n RUN INSTRUCTION:\n {{ run_instruction }}\n\n EXISTING RECORD MANIFEST:\n {{ existing_record_manifest }}\n\n EPISODE SUMMARY:\n {{ episode_summary }}\n\n DURABLE FINDINGS:\n {{ durable_findings_summary }}\n\n {{ ctx.output_format }}\n \"#\n}\n", + "extract_react_tests.baml": "test ScanTraceWindowCapturesDurableDecision {\n functions [ScanTraceWindow]\n args {\n run_instruction \"Extract one episode and any durable project context from the trace.\"\n prior_episode_summary \"(none yet)\"\n prior_findings_summary \"(none yet)\"\n trace_window #\"\n [8 lines, window 1-8]\n 1\tuser: We should run extraction evals before changing extraction prompts.\n 2\tassistant: I will update the BAML harness and run the eval.\n 3\tuser: Keep trace-local command logs out of long-term context.\n 4\tassistant: Implemented the change and ran a smoke test.\n \"#\n }\n @@assert({{ this.durable_findings|length > 0 }})\n}\n\ntest ScanTraceWindowSeparatesDurableAndImplementation {\n functions [ScanTraceWindow]\n args {\n run_instruction \"Extract reusable context, not command history.\"\n prior_episode_summary \"(none yet)\"\n prior_findings_summary \"(none yet)\"\n trace_window #\"\n [7 lines, window 1-7]\n 1\tuser: The deployment checklist is the source of truth for release readiness.\n 2\tassistant: Ran pytest -q and fixed a local fixture path.\n 3\tassistant: Edited src/example.py and reran the lint command.\n 4\tuser: The command output is just evidence, not memory.\n \"#\n }\n @@assert({{ this.durable_findings|length > 0 }})\n @@assert({{ this.implementation_findings|length > 0 }})\n}\n\ntest ScanTraceWindowAllowsNoDurableSignal {\n functions [ScanTraceWindow]\n args {\n run_instruction \"Extract durable context only when the trace supports it.\"\n prior_episode_summary \"(none yet)\"\n prior_findings_summary \"(none yet)\"\n trace_window #\"\n [5 lines, window 1-5]\n 1\tuser: Please format this file.\n 2\tassistant: Ran the formatter.\n 3\tassistant: Fixed one lint complaint.\n 4\tassistant: Tests are green.\n \"#\n }\n @@assert({{ this.durable_findings|length == 0 }})\n}\n\ntest SynthesizeExtractRecordsCreatesEpisodeAndDurableRecord {\n functions [SynthesizeExtractRecords]\n args {\n run_instruction \"Create exactly one episode and durable records only for reusable project context.\"\n episode_summary \"- The session updated an extraction harness and ran a smoke eval.\"\n durable_findings_summary \"- preference: eval-first prompt changes: Run the extraction eval before changing extraction prompts. (line 1)\"\n existing_record_manifest \"(none)\"\n }\n @@assert({{ this.durable_records|length > 0 }})\n}\n\ntest SynthesizeExtractRecordsAllowsNoDurableSignal {\n functions [SynthesizeExtractRecords]\n args {\n run_instruction \"Create exactly one episode and no durable records when no reusable context exists.\"\n episode_summary \"- The session only formatted code, fixed one lint issue, and reran tests.\"\n durable_findings_summary \"(none)\"\n existing_record_manifest \"(none)\"\n }\n @@assert({{ this.durable_records|length == 0 }})\n}\n\ntest SynthesizeExtractRecordsDeduplicatesCoreClaim {\n functions [SynthesizeExtractRecords]\n args {\n run_instruction \"Create compact durable records and merge duplicate meanings.\"\n episode_summary \"- The session clarified a runtime state boundary after noisy implementation discussion.\"\n durable_findings_summary #\"\n - decision: persisted source of truth: Runtime status must live in one persisted store that survives restart.\n - decision: runtime status boundary: Use the same persisted status store as the source of truth after restart.\n \"#\n existing_record_manifest \"(none)\"\n }\n @@assert({{ this.durable_records|length == 1 }})\n}\n", + "generators.baml": "generator target {\n output_type \"python/pydantic\"\n output_dir \"../\"\n default_client_mode \"sync\"\n version \"0.222.0\"\n}\n", + "models.baml": "retry_policy ExtractAgentRetry {\n max_retries 1\n strategy {\n type exponential_backoff\n delay_ms 500\n multiplier 2\n max_delay_ms 8000\n }\n}\n\nclient MiniMaxM27 {\n provider \"openai-generic\"\n retry_policy ExtractAgentRetry\n options {\n base_url \"https://api.minimax.io/v1\"\n api_key env.MINIMAX_API_KEY\n model \"MiniMax-M2.7\"\n temperature 0.01\n http {\n connect_timeout_ms 10000\n time_to_first_token_timeout_ms 120000\n idle_timeout_ms 30000\n request_timeout_ms 300000\n }\n }\n}\n\nclient OllamaLocal {\n provider \"openai-generic\"\n retry_policy ExtractAgentRetry\n options {\n base_url \"http://127.0.0.1:11434/v1\"\n api_key \"ollama\"\n model \"gemma4:e4b\"\n temperature 0.0\n http {\n connect_timeout_ms 10000\n time_to_first_token_timeout_ms 120000\n idle_timeout_ms 30000\n request_timeout_ms 300000\n }\n }\n}\n", +} + +def get_baml_files(): + return _file_map \ No newline at end of file diff --git a/src/lerim/agents/baml_client/parser.py b/src/lerim/agents/baml_client/parser.py new file mode 100644 index 0000000..9cb2f07 --- /dev/null +++ b/src/lerim/agents/baml_client/parser.py @@ -0,0 +1,58 @@ +# ---------------------------------------------------------------------------- +# +# Welcome to Baml! To use this generated code, please run the following: +# +# $ pip install baml +# +# ---------------------------------------------------------------------------- + +# This file was generated by BAML: please do not edit it. Instead, edit the +# BAML files and re-generate this code using: baml-cli generate +# baml-cli is available with the baml package. + +import typing +import typing_extensions + + +from . import stream_types, types +from .runtime import DoNotUseDirectlyCallManager, BamlCallOptions + +class LlmResponseParser: + __options: DoNotUseDirectlyCallManager + + def __init__(self, options: DoNotUseDirectlyCallManager): + self.__options = options + + def ScanTraceWindow( + self, llm_response: str, baml_options: BamlCallOptions = {}, + ) -> types.TraceWindowScan: + __result__ = self.__options.merge_options(baml_options).parse_response(function_name="ScanTraceWindow", llm_response=llm_response, mode="request") + return typing.cast(types.TraceWindowScan, __result__) + + def SynthesizeExtractRecords( + self, llm_response: str, baml_options: BamlCallOptions = {}, + ) -> types.SynthesizedExtraction: + __result__ = self.__options.merge_options(baml_options).parse_response(function_name="SynthesizeExtractRecords", llm_response=llm_response, mode="request") + return typing.cast(types.SynthesizedExtraction, __result__) + + + +class LlmStreamParser: + __options: DoNotUseDirectlyCallManager + + def __init__(self, options: DoNotUseDirectlyCallManager): + self.__options = options + + def ScanTraceWindow( + self, llm_response: str, baml_options: BamlCallOptions = {}, + ) -> stream_types.TraceWindowScan: + __result__ = self.__options.merge_options(baml_options).parse_response(function_name="ScanTraceWindow", llm_response=llm_response, mode="stream") + return typing.cast(stream_types.TraceWindowScan, __result__) + + def SynthesizeExtractRecords( + self, llm_response: str, baml_options: BamlCallOptions = {}, + ) -> stream_types.SynthesizedExtraction: + __result__ = self.__options.merge_options(baml_options).parse_response(function_name="SynthesizeExtractRecords", llm_response=llm_response, mode="stream") + return typing.cast(stream_types.SynthesizedExtraction, __result__) + + \ No newline at end of file diff --git a/src/lerim/agents/baml_client/runtime.py b/src/lerim/agents/baml_client/runtime.py new file mode 100644 index 0000000..27fc3a9 --- /dev/null +++ b/src/lerim/agents/baml_client/runtime.py @@ -0,0 +1,361 @@ +# ---------------------------------------------------------------------------- +# +# Welcome to Baml! To use this generated code, please run the following: +# +# $ pip install baml +# +# ---------------------------------------------------------------------------- + +# This file was generated by BAML: please do not edit it. Instead, edit the +# BAML files and re-generate this code using: baml-cli generate +# baml-cli is available with the baml package. + +import os +import typing +import typing_extensions + +import baml_py + +from . import types, stream_types, type_builder +from .globals import DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_RUNTIME as __runtime__, DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_CTX as __ctx__manager__ + + +class BamlCallOptions(typing.TypedDict, total=False): + tb: typing_extensions.NotRequired[type_builder.TypeBuilder] + client_registry: typing_extensions.NotRequired[baml_py.baml_py.ClientRegistry] + client: typing_extensions.NotRequired[str] + env: typing_extensions.NotRequired[typing.Dict[str, typing.Optional[str]]] + tags: typing_extensions.NotRequired[typing.Dict[str, str]] + collector: typing_extensions.NotRequired[ + typing.Union[baml_py.baml_py.Collector, typing.List[baml_py.baml_py.Collector]] + ] + abort_controller: typing_extensions.NotRequired[baml_py.baml_py.AbortController] + on_tick: typing_extensions.NotRequired[typing.Callable[[str, baml_py.baml_py.FunctionLog], None]] + watchers: typing_extensions.NotRequired[typing.Any] # EventCollector type, will be overridden in generated clients + + +class _ResolvedBamlOptions: + tb: typing.Optional[baml_py.baml_py.TypeBuilder] + client_registry: typing.Optional[baml_py.baml_py.ClientRegistry] + collectors: typing.List[baml_py.baml_py.Collector] + env_vars: typing.Dict[str, str] + tags: typing.Dict[str, str] + abort_controller: typing.Optional[baml_py.baml_py.AbortController] + on_tick: typing.Optional[typing.Callable[[], None]] + watchers: typing.Optional[typing.Any] + + def __init__( + self, + tb: typing.Optional[baml_py.baml_py.TypeBuilder], + client_registry: typing.Optional[baml_py.baml_py.ClientRegistry], + collectors: typing.List[baml_py.baml_py.Collector], + env_vars: typing.Dict[str, str], + tags: typing.Dict[str, str], + abort_controller: typing.Optional[baml_py.baml_py.AbortController], + on_tick: typing.Optional[typing.Callable[[], None]], + watchers: typing.Optional[typing.Any], + ): + self.tb = tb + self.client_registry = client_registry + self.collectors = collectors + self.env_vars = env_vars + self.tags = tags + self.abort_controller = abort_controller + self.on_tick = on_tick + self.watchers = watchers + + + + +class DoNotUseDirectlyCallManager: + def __init__(self, baml_options: BamlCallOptions): + self.__baml_options = baml_options + + def __getstate__(self): + # Return state needed for pickling + return {"baml_options": self.__baml_options} + + def __setstate__(self, state): + # Restore state from pickling + self.__baml_options = state["baml_options"] + + def __resolve(self) -> _ResolvedBamlOptions: + tb = self.__baml_options.get("tb") + if tb is not None: + baml_tb = tb._tb # type: ignore (we know how to use this private attribute) + else: + baml_tb = None + client_registry = self.__baml_options.get("client_registry") + client = self.__baml_options.get("client") + + # If client is provided, it takes precedence (creates/overrides client_registry primary) + if client is not None: + if client_registry is None: + client_registry = baml_py.baml_py.ClientRegistry() + client_registry.set_primary(client) + + collector = self.__baml_options.get("collector") + collectors_as_list = ( + collector + if isinstance(collector, list) + else [collector] if collector is not None else [] + ) + env_vars = os.environ.copy() + for k, v in self.__baml_options.get("env", {}).items(): + if v is not None: + env_vars[k] = v + else: + env_vars.pop(k, None) + + tags = self.__baml_options.get("tags", {}) or {} + + abort_controller = self.__baml_options.get("abort_controller") + + on_tick = self.__baml_options.get("on_tick") + if on_tick is not None: + collector = baml_py.baml_py.Collector("on-tick-collector") + collectors_as_list.append(collector) + def on_tick_wrapper(): + log = collector.last + if log is not None: + on_tick("Unknown", log) + else: + on_tick_wrapper = None + + watchers = self.__baml_options.get("watchers") + + return _ResolvedBamlOptions( + baml_tb, + client_registry, + collectors_as_list, + env_vars, + tags, + abort_controller, + on_tick_wrapper, + watchers, + ) + + def merge_options(self, options: BamlCallOptions) -> "DoNotUseDirectlyCallManager": + return DoNotUseDirectlyCallManager({**self.__baml_options, **options}) + + async def call_function_async( + self, *, function_name: str, args: typing.Dict[str, typing.Any] + ) -> baml_py.baml_py.FunctionResult: + resolved_options = self.__resolve() + + # Check if already aborted + if resolved_options.abort_controller is not None and resolved_options.abort_controller.aborted: + raise baml_py.baml_py.BamlAbortError("Operation was aborted") + + return await __runtime__.call_function( + function_name, + args, + # ctx + __ctx__manager__.clone_context(), + # tb + resolved_options.tb, + # cr + resolved_options.client_registry, + # collectors + resolved_options.collectors, + # env_vars + resolved_options.env_vars, + # tags + resolved_options.tags, + # abort_controller + resolved_options.abort_controller, + # watchers + resolved_options.watchers, + ) + + def call_function_sync( + self, *, function_name: str, args: typing.Dict[str, typing.Any] + ) -> baml_py.baml_py.FunctionResult: + resolved_options = self.__resolve() + + # Check if already aborted + if resolved_options.abort_controller is not None and resolved_options.abort_controller.aborted: + raise baml_py.baml_py.BamlAbortError("Operation was aborted") + + ctx = __ctx__manager__.get() + return __runtime__.call_function_sync( + function_name, + args, + # ctx + ctx, + # tb + resolved_options.tb, + # cr + resolved_options.client_registry, + # collectors + resolved_options.collectors, + # env_vars + resolved_options.env_vars, + # tags + resolved_options.tags, + # abort_controller + resolved_options.abort_controller, + # watchers + resolved_options.watchers, + ) + + def create_async_stream( + self, + *, + function_name: str, + args: typing.Dict[str, typing.Any], + ) -> typing.Tuple[baml_py.baml_py.RuntimeContextManager, baml_py.baml_py.FunctionResultStream]: + resolved_options = self.__resolve() + ctx = __ctx__manager__.clone_context() + result = __runtime__.stream_function( + function_name, + args, + # this is always None, we set this later! + # on_event + None, + # ctx + ctx, + # tb + resolved_options.tb, + # cr + resolved_options.client_registry, + # collectors + resolved_options.collectors, + # env_vars + resolved_options.env_vars, + # tags + resolved_options.tags, + # on_tick + resolved_options.on_tick, + # abort_controller + resolved_options.abort_controller, + ) + return ctx, result + + def create_sync_stream( + self, + *, + function_name: str, + args: typing.Dict[str, typing.Any], + ) -> typing.Tuple[baml_py.baml_py.RuntimeContextManager, baml_py.baml_py.SyncFunctionResultStream]: + resolved_options = self.__resolve() + if resolved_options.on_tick is not None: + raise ValueError("on_tick is not supported for sync streams. Please use async streams instead.") + ctx = __ctx__manager__.get() + result = __runtime__.stream_function_sync( + function_name, + args, + # this is always None, we set this later! + # on_event + None, + # ctx + ctx, + # tb + resolved_options.tb, + # cr + resolved_options.client_registry, + # collectors + resolved_options.collectors, + # env_vars + resolved_options.env_vars, + # tags + resolved_options.tags, + # on_tick + # always None! sync streams don't support on_tick + None, + # abort_controller + resolved_options.abort_controller, + ) + return ctx, result + + async def create_http_request_async( + self, + *, + function_name: str, + args: typing.Dict[str, typing.Any], + mode: typing_extensions.Literal["stream", "request"], + ) -> baml_py.baml_py.HTTPRequest: + resolved_options = self.__resolve() + return await __runtime__.build_request( + function_name, + args, + # ctx + __ctx__manager__.clone_context(), + # tb + resolved_options.tb, + # cr + resolved_options.client_registry, + # env_vars + resolved_options.env_vars, + # is_stream + mode == "stream", + ) + + def create_http_request_sync( + self, + *, + function_name: str, + args: typing.Dict[str, typing.Any], + mode: typing_extensions.Literal["stream", "request"], + ) -> baml_py.baml_py.HTTPRequest: + resolved_options = self.__resolve() + return __runtime__.build_request_sync( + function_name, + args, + # ctx + __ctx__manager__.get(), + # tb + resolved_options.tb, + # cr + resolved_options.client_registry, + # env_vars + resolved_options.env_vars, + # is_stream + mode == "stream", + ) + + def parse_response(self, *, function_name: str, llm_response: str, mode: typing_extensions.Literal["stream", "request"]) -> typing.Any: + resolved_options = self.__resolve() + return __runtime__.parse_llm_response( + function_name, + llm_response, + # enum_module + types, + # cls_module + types, + # partial_cls_module + stream_types, + # allow_partials + mode == "stream", + # ctx + __ctx__manager__.get(), + # tb + resolved_options.tb, + # cr + resolved_options.client_registry, + # env_vars + resolved_options.env_vars, + ) + + +def disassemble(function: typing.Callable) -> None: + import inspect + from . import b + + if not callable(function): + print(f"disassemble: object {function} is not a Baml function") + return + + is_client_method = False + + for (method_name, _) in inspect.getmembers(b, predicate=inspect.ismethod): + if method_name == function.__name__: + is_client_method = True + break + + if not is_client_method: + print(f"disassemble: function {function.__name__} is not a Baml function") + return + + print(f"----- function {function.__name__} -----") + __runtime__.disassemble(function.__name__) \ No newline at end of file diff --git a/src/lerim/agents/baml_client/stream_types.py b/src/lerim/agents/baml_client/stream_types.py new file mode 100644 index 0000000..0f2d9cf --- /dev/null +++ b/src/lerim/agents/baml_client/stream_types.py @@ -0,0 +1,69 @@ +# ---------------------------------------------------------------------------- +# +# Welcome to Baml! To use this generated code, please run the following: +# +# $ pip install baml +# +# ---------------------------------------------------------------------------- + +# This file was generated by BAML: please do not edit it. Instead, edit the +# BAML files and re-generate this code using: baml-cli generate +# baml-cli is available with the baml package. + +import typing +import typing_extensions +from pydantic import BaseModel, ConfigDict, Field + +import baml_py + +from . import types + +StreamStateValueT = typing.TypeVar('StreamStateValueT') +class StreamState(BaseModel, typing.Generic[StreamStateValueT]): + value: StreamStateValueT + state: typing_extensions.Literal["Pending", "Incomplete", "Complete"] +# ######################################################################### +# Generated classes (5) +# ######################################################################### + +class DurableRecordDraft(BaseModel): + kind: typing.Optional[types.RecordKind] = Field(default=None, description='Durable record kind.') + title: typing.Optional[str] = Field(default=None, description='Short standalone durable title.') + body: typing.Optional[str] = Field(default=None, description='Compact standalone durable body.') + status: typing.Optional[types.RecordStatus] = Field(default=None, description='Usually active for reusable durable records.') + valid_from: typing.Optional[str] = None + valid_until: typing.Optional[str] = None + decision: typing.Optional[str] = None + why: typing.Optional[str] = None + alternatives: typing.Optional[str] = None + consequences: typing.Optional[str] = None + +class EpisodeDraft(BaseModel): + title: typing.Optional[str] = Field(default=None, description='Optional short title for the current-session episode. Runtime derives one if omitted.') + body: typing.Optional[str] = Field(default=None, description='Compact episode body. If omitted, runtime builds it from user_intent and what_happened.') + status: typing.Optional[types.RecordStatus] = Field(default=None, description='Use archived for routine/no-durable sessions; active only when the episode itself remains useful.') + user_intent: typing.Optional[str] = Field(default=None, description='What the user wanted in this source session. Runtime fills a generic fallback if omitted.') + what_happened: typing.Optional[str] = Field(default=None, description='What the session actually did. Runtime fills a generic fallback if omitted.') + outcomes: typing.Optional[str] = Field(default=None, description='Optional concise outcome.') + +class SynthesizedExtraction(BaseModel): + episode: typing.Optional["EpisodeDraft"] = Field(default=None, description='Exactly one current-session episode record draft.') + durable_records: typing.List["DurableRecordDraft"] = Field(description='Zero or more durable records.') + completion_summary: typing.Optional[str] = Field(default=None, description='Brief summary of extraction work for final_result/reporting.') + +class TraceWindowFinding(BaseModel): + theme: typing.Optional[str] = Field(default=None, description='Short stable theme for this finding.') + level: typing.Optional[types.FindingLevel] = Field(default=None, description='Use durable levels for reusable context, implementation for local/noisy evidence.') + line: typing.Optional[int] = Field(default=None, description='1-based supporting line when the window gives one.') + quote: typing.Optional[str] = Field(default=None, description='Short supporting quote from the current window.') + note: typing.Optional[str] = Field(default=None, description='Compact semantic finding. Avoid command logs and copied errors.') + +class TraceWindowScan(BaseModel): + episode_update: typing.Optional[str] = Field(default=None, description='Compact update for the final episode summary. May be omitted when this window adds nothing.') + durable_findings: typing.List["TraceWindowFinding"] = Field(description='Reusable decisions, preferences, constraints, facts, and references found in this window.') + implementation_findings: typing.List["TraceWindowFinding"] = Field(description='Implementation evidence, discarded hypotheses, and local details useful only as support/noise.') + discarded_noise: typing.List[str] = Field(description='Short descriptions of noisy categories intentionally not saved.') + +# ######################################################################### +# Generated type aliases (0) +# ######################################################################### diff --git a/src/lerim/agents/baml_client/sync_client.py b/src/lerim/agents/baml_client/sync_client.py new file mode 100644 index 0000000..e5a104b --- /dev/null +++ b/src/lerim/agents/baml_client/sync_client.py @@ -0,0 +1,204 @@ +# ---------------------------------------------------------------------------- +# +# Welcome to Baml! To use this generated code, please run the following: +# +# $ pip install baml +# +# ---------------------------------------------------------------------------- + +# This file was generated by BAML: please do not edit it. Instead, edit the +# BAML files and re-generate this code using: baml-cli generate +# baml-cli is available with the baml package. + +import typing +import typing_extensions +import baml_py + +from . import stream_types, types, type_builder +from .parser import LlmResponseParser, LlmStreamParser +from .runtime import DoNotUseDirectlyCallManager, BamlCallOptions +from .globals import DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_RUNTIME as __runtime__ + +class BamlSyncClient: + __options: DoNotUseDirectlyCallManager + __stream_client: "BamlStreamClient" + __http_request: "BamlHttpRequestClient" + __http_stream_request: "BamlHttpStreamRequestClient" + __llm_response_parser: LlmResponseParser + __llm_stream_parser: LlmStreamParser + + def __init__(self, options: DoNotUseDirectlyCallManager): + self.__options = options + self.__stream_client = BamlStreamClient(options) + self.__http_request = BamlHttpRequestClient(options) + self.__http_stream_request = BamlHttpStreamRequestClient(options) + self.__llm_response_parser = LlmResponseParser(options) + self.__llm_stream_parser = LlmStreamParser(options) + + def __getstate__(self): + # Return state needed for pickling + return {"options": self.__options} + + def __setstate__(self, state): + # Restore state from pickling + self.__options = state["options"] + self.__stream_client = BamlStreamClient(self.__options) + self.__http_request = BamlHttpRequestClient(self.__options) + self.__http_stream_request = BamlHttpStreamRequestClient(self.__options) + self.__llm_response_parser = LlmResponseParser(self.__options) + self.__llm_stream_parser = LlmStreamParser(self.__options) + + def with_options(self, + tb: typing.Optional[type_builder.TypeBuilder] = None, + client_registry: typing.Optional[baml_py.baml_py.ClientRegistry] = None, + client: typing.Optional[str] = None, + collector: typing.Optional[typing.Union[baml_py.baml_py.Collector, typing.List[baml_py.baml_py.Collector]]] = None, + env: typing.Optional[typing.Dict[str, typing.Optional[str]]] = None, + tags: typing.Optional[typing.Dict[str, str]] = None, + on_tick: typing.Optional[typing.Callable[[str, baml_py.baml_py.FunctionLog], None]] = None, + ) -> "BamlSyncClient": + options: BamlCallOptions = {} + if tb is not None: + options["tb"] = tb + if client_registry is not None: + options["client_registry"] = client_registry + if client is not None: + options["client"] = client + if collector is not None: + options["collector"] = collector + if env is not None: + options["env"] = env + if tags is not None: + options["tags"] = tags + if on_tick is not None: + options["on_tick"] = on_tick + return BamlSyncClient(self.__options.merge_options(options)) + + @property + def stream(self): + return self.__stream_client + + @property + def request(self): + return self.__http_request + + @property + def stream_request(self): + return self.__http_stream_request + + @property + def parse(self): + return self.__llm_response_parser + + @property + def parse_stream(self): + return self.__llm_stream_parser + + def ScanTraceWindow(self, run_instruction: str,prior_episode_summary: str,prior_findings_summary: str,trace_window: str, + baml_options: BamlCallOptions = {}, + ) -> types.TraceWindowScan: + # Check if on_tick is provided + if 'on_tick' in baml_options: + __stream__ = self.stream.ScanTraceWindow(run_instruction=run_instruction,prior_episode_summary=prior_episode_summary,prior_findings_summary=prior_findings_summary,trace_window=trace_window, + baml_options=baml_options) + return __stream__.get_final_response() + else: + # Original non-streaming code + __result__ = self.__options.merge_options(baml_options).call_function_sync(function_name="ScanTraceWindow", args={ + "run_instruction": run_instruction,"prior_episode_summary": prior_episode_summary,"prior_findings_summary": prior_findings_summary,"trace_window": trace_window, + }) + return typing.cast(types.TraceWindowScan, __result__.cast_to(types, types, stream_types, False, __runtime__)) + def SynthesizeExtractRecords(self, run_instruction: str,episode_summary: str,durable_findings_summary: str,existing_record_manifest: str, + baml_options: BamlCallOptions = {}, + ) -> types.SynthesizedExtraction: + # Check if on_tick is provided + if 'on_tick' in baml_options: + __stream__ = self.stream.SynthesizeExtractRecords(run_instruction=run_instruction,episode_summary=episode_summary,durable_findings_summary=durable_findings_summary,existing_record_manifest=existing_record_manifest, + baml_options=baml_options) + return __stream__.get_final_response() + else: + # Original non-streaming code + __result__ = self.__options.merge_options(baml_options).call_function_sync(function_name="SynthesizeExtractRecords", args={ + "run_instruction": run_instruction,"episode_summary": episode_summary,"durable_findings_summary": durable_findings_summary,"existing_record_manifest": existing_record_manifest, + }) + return typing.cast(types.SynthesizedExtraction, __result__.cast_to(types, types, stream_types, False, __runtime__)) + + + +class BamlStreamClient: + __options: DoNotUseDirectlyCallManager + + def __init__(self, options: DoNotUseDirectlyCallManager): + self.__options = options + + def ScanTraceWindow(self, run_instruction: str,prior_episode_summary: str,prior_findings_summary: str,trace_window: str, + baml_options: BamlCallOptions = {}, + ) -> baml_py.BamlSyncStream[stream_types.TraceWindowScan, types.TraceWindowScan]: + __ctx__, __result__ = self.__options.merge_options(baml_options).create_sync_stream(function_name="ScanTraceWindow", args={ + "run_instruction": run_instruction,"prior_episode_summary": prior_episode_summary,"prior_findings_summary": prior_findings_summary,"trace_window": trace_window, + }) + return baml_py.BamlSyncStream[stream_types.TraceWindowScan, types.TraceWindowScan]( + __result__, + lambda x: typing.cast(stream_types.TraceWindowScan, x.cast_to(types, types, stream_types, True, __runtime__)), + lambda x: typing.cast(types.TraceWindowScan, x.cast_to(types, types, stream_types, False, __runtime__)), + __ctx__, + ) + def SynthesizeExtractRecords(self, run_instruction: str,episode_summary: str,durable_findings_summary: str,existing_record_manifest: str, + baml_options: BamlCallOptions = {}, + ) -> baml_py.BamlSyncStream[stream_types.SynthesizedExtraction, types.SynthesizedExtraction]: + __ctx__, __result__ = self.__options.merge_options(baml_options).create_sync_stream(function_name="SynthesizeExtractRecords", args={ + "run_instruction": run_instruction,"episode_summary": episode_summary,"durable_findings_summary": durable_findings_summary,"existing_record_manifest": existing_record_manifest, + }) + return baml_py.BamlSyncStream[stream_types.SynthesizedExtraction, types.SynthesizedExtraction]( + __result__, + lambda x: typing.cast(stream_types.SynthesizedExtraction, x.cast_to(types, types, stream_types, True, __runtime__)), + lambda x: typing.cast(types.SynthesizedExtraction, x.cast_to(types, types, stream_types, False, __runtime__)), + __ctx__, + ) + + +class BamlHttpRequestClient: + __options: DoNotUseDirectlyCallManager + + def __init__(self, options: DoNotUseDirectlyCallManager): + self.__options = options + + def ScanTraceWindow(self, run_instruction: str,prior_episode_summary: str,prior_findings_summary: str,trace_window: str, + baml_options: BamlCallOptions = {}, + ) -> baml_py.baml_py.HTTPRequest: + __result__ = self.__options.merge_options(baml_options).create_http_request_sync(function_name="ScanTraceWindow", args={ + "run_instruction": run_instruction,"prior_episode_summary": prior_episode_summary,"prior_findings_summary": prior_findings_summary,"trace_window": trace_window, + }, mode="request") + return __result__ + def SynthesizeExtractRecords(self, run_instruction: str,episode_summary: str,durable_findings_summary: str,existing_record_manifest: str, + baml_options: BamlCallOptions = {}, + ) -> baml_py.baml_py.HTTPRequest: + __result__ = self.__options.merge_options(baml_options).create_http_request_sync(function_name="SynthesizeExtractRecords", args={ + "run_instruction": run_instruction,"episode_summary": episode_summary,"durable_findings_summary": durable_findings_summary,"existing_record_manifest": existing_record_manifest, + }, mode="request") + return __result__ + + +class BamlHttpStreamRequestClient: + __options: DoNotUseDirectlyCallManager + + def __init__(self, options: DoNotUseDirectlyCallManager): + self.__options = options + + def ScanTraceWindow(self, run_instruction: str,prior_episode_summary: str,prior_findings_summary: str,trace_window: str, + baml_options: BamlCallOptions = {}, + ) -> baml_py.baml_py.HTTPRequest: + __result__ = self.__options.merge_options(baml_options).create_http_request_sync(function_name="ScanTraceWindow", args={ + "run_instruction": run_instruction,"prior_episode_summary": prior_episode_summary,"prior_findings_summary": prior_findings_summary,"trace_window": trace_window, + }, mode="stream") + return __result__ + def SynthesizeExtractRecords(self, run_instruction: str,episode_summary: str,durable_findings_summary: str,existing_record_manifest: str, + baml_options: BamlCallOptions = {}, + ) -> baml_py.baml_py.HTTPRequest: + __result__ = self.__options.merge_options(baml_options).create_http_request_sync(function_name="SynthesizeExtractRecords", args={ + "run_instruction": run_instruction,"episode_summary": episode_summary,"durable_findings_summary": durable_findings_summary,"existing_record_manifest": existing_record_manifest, + }, mode="stream") + return __result__ + + +b = BamlSyncClient(DoNotUseDirectlyCallManager({})) \ No newline at end of file diff --git a/src/lerim/agents/baml_client/tracing.py b/src/lerim/agents/baml_client/tracing.py new file mode 100644 index 0000000..0672559 --- /dev/null +++ b/src/lerim/agents/baml_client/tracing.py @@ -0,0 +1,22 @@ +# ---------------------------------------------------------------------------- +# +# Welcome to Baml! To use this generated code, please run the following: +# +# $ pip install baml +# +# ---------------------------------------------------------------------------- + +# This file was generated by BAML: please do not edit it. Instead, edit the +# BAML files and re-generate this code using: baml-cli generate +# baml-cli is available with the baml package. + +from .globals import DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_CTX + +trace = DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_CTX.trace_fn +set_tags = DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_CTX.upsert_tags +def flush(): + DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_CTX.flush() +on_log_event = DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_CTX.on_log_event + + +__all__ = ['trace', 'set_tags', "flush", "on_log_event"] diff --git a/src/lerim/agents/baml_client/type_builder.py b/src/lerim/agents/baml_client/type_builder.py new file mode 100644 index 0000000..8f43731 --- /dev/null +++ b/src/lerim/agents/baml_client/type_builder.py @@ -0,0 +1,523 @@ +# ---------------------------------------------------------------------------- +# +# Welcome to Baml! To use this generated code, please run the following: +# +# $ pip install baml +# +# ---------------------------------------------------------------------------- + +# This file was generated by BAML: please do not edit it. Instead, edit the +# BAML files and re-generate this code using: baml-cli generate +# baml-cli is available with the baml package. + +import typing +from baml_py import type_builder +from baml_py import baml_py +# These are exports, not used here, hence the linter is disabled +from baml_py.baml_py import FieldType, EnumValueBuilder, EnumBuilder, ClassBuilder # noqa: F401 # pylint: disable=unused-import +from .globals import DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_RUNTIME + +class TypeBuilder(type_builder.TypeBuilder): + def __init__(self): + super().__init__(classes=set( + ["DurableRecordDraft","EpisodeDraft","SynthesizedExtraction","TraceWindowFinding","TraceWindowScan",] + ), enums=set( + ["FindingLevel","RecordKind","RecordStatus",] + ), runtime=DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_RUNTIME) + + # ######################################################################### + # Generated enums 3 + # ######################################################################### + + @property + def FindingLevel(self) -> "FindingLevelViewer": + return FindingLevelViewer(self) + + @property + def RecordKind(self) -> "RecordKindViewer": + return RecordKindViewer(self) + + @property + def RecordStatus(self) -> "RecordStatusViewer": + return RecordStatusViewer(self) + + + # ######################################################################### + # Generated classes 5 + # ######################################################################### + + @property + def DurableRecordDraft(self) -> "DurableRecordDraftViewer": + return DurableRecordDraftViewer(self) + + @property + def EpisodeDraft(self) -> "EpisodeDraftViewer": + return EpisodeDraftViewer(self) + + @property + def SynthesizedExtraction(self) -> "SynthesizedExtractionViewer": + return SynthesizedExtractionViewer(self) + + @property + def TraceWindowFinding(self) -> "TraceWindowFindingViewer": + return TraceWindowFindingViewer(self) + + @property + def TraceWindowScan(self) -> "TraceWindowScanViewer": + return TraceWindowScanViewer(self) + + + +# ######################################################################### +# Generated enums 3 +# ######################################################################### + +class FindingLevelAst: + def __init__(self, tb: type_builder.TypeBuilder): + _tb = tb._tb # type: ignore (we know how to use this private attribute) + self._bldr = _tb.enum("FindingLevel") + self._values: typing.Set[str] = set([ "DECISION", "PREFERENCE", "FEEDBACK", "REFERENCE", "CONSTRAINT", "FACT", "IMPLEMENTATION", ]) + self._vals = FindingLevelValues(self._bldr, self._values) + + def type(self) -> baml_py.FieldType: + return self._bldr.field() + + @property + def values(self) -> "FindingLevelValues": + return self._vals + + +class FindingLevelViewer(FindingLevelAst): + def __init__(self, tb: type_builder.TypeBuilder): + super().__init__(tb) + + + def list_values(self) -> typing.List[typing.Tuple[str, type_builder.EnumValueViewer]]: + return [(name, type_builder.EnumValueViewer(self._bldr.value(name))) for name in self._values] + + +class FindingLevelValues: + def __init__(self, enum_bldr: baml_py.EnumBuilder, values: typing.Set[str]): + self.__bldr = enum_bldr + self.__values = values # type: ignore (we know how to use this private attribute) # noqa: F821 + + + + @property + def DECISION(self) -> type_builder.EnumValueViewer: + return type_builder.EnumValueViewer(self.__bldr.value("DECISION")) + + @property + def PREFERENCE(self) -> type_builder.EnumValueViewer: + return type_builder.EnumValueViewer(self.__bldr.value("PREFERENCE")) + + @property + def FEEDBACK(self) -> type_builder.EnumValueViewer: + return type_builder.EnumValueViewer(self.__bldr.value("FEEDBACK")) + + @property + def REFERENCE(self) -> type_builder.EnumValueViewer: + return type_builder.EnumValueViewer(self.__bldr.value("REFERENCE")) + + @property + def CONSTRAINT(self) -> type_builder.EnumValueViewer: + return type_builder.EnumValueViewer(self.__bldr.value("CONSTRAINT")) + + @property + def FACT(self) -> type_builder.EnumValueViewer: + return type_builder.EnumValueViewer(self.__bldr.value("FACT")) + + @property + def IMPLEMENTATION(self) -> type_builder.EnumValueViewer: + return type_builder.EnumValueViewer(self.__bldr.value("IMPLEMENTATION")) + + + + +class RecordKindAst: + def __init__(self, tb: type_builder.TypeBuilder): + _tb = tb._tb # type: ignore (we know how to use this private attribute) + self._bldr = _tb.enum("RecordKind") + self._values: typing.Set[str] = set([ "DECISION", "PREFERENCE", "CONSTRAINT", "FACT", "REFERENCE", ]) + self._vals = RecordKindValues(self._bldr, self._values) + + def type(self) -> baml_py.FieldType: + return self._bldr.field() + + @property + def values(self) -> "RecordKindValues": + return self._vals + + +class RecordKindViewer(RecordKindAst): + def __init__(self, tb: type_builder.TypeBuilder): + super().__init__(tb) + + + def list_values(self) -> typing.List[typing.Tuple[str, type_builder.EnumValueViewer]]: + return [(name, type_builder.EnumValueViewer(self._bldr.value(name))) for name in self._values] + + +class RecordKindValues: + def __init__(self, enum_bldr: baml_py.EnumBuilder, values: typing.Set[str]): + self.__bldr = enum_bldr + self.__values = values # type: ignore (we know how to use this private attribute) # noqa: F821 + + + + @property + def DECISION(self) -> type_builder.EnumValueViewer: + return type_builder.EnumValueViewer(self.__bldr.value("DECISION")) + + @property + def PREFERENCE(self) -> type_builder.EnumValueViewer: + return type_builder.EnumValueViewer(self.__bldr.value("PREFERENCE")) + + @property + def CONSTRAINT(self) -> type_builder.EnumValueViewer: + return type_builder.EnumValueViewer(self.__bldr.value("CONSTRAINT")) + + @property + def FACT(self) -> type_builder.EnumValueViewer: + return type_builder.EnumValueViewer(self.__bldr.value("FACT")) + + @property + def REFERENCE(self) -> type_builder.EnumValueViewer: + return type_builder.EnumValueViewer(self.__bldr.value("REFERENCE")) + + + + +class RecordStatusAst: + def __init__(self, tb: type_builder.TypeBuilder): + _tb = tb._tb # type: ignore (we know how to use this private attribute) + self._bldr = _tb.enum("RecordStatus") + self._values: typing.Set[str] = set([ "ACTIVE", "ARCHIVED", ]) + self._vals = RecordStatusValues(self._bldr, self._values) + + def type(self) -> baml_py.FieldType: + return self._bldr.field() + + @property + def values(self) -> "RecordStatusValues": + return self._vals + + +class RecordStatusViewer(RecordStatusAst): + def __init__(self, tb: type_builder.TypeBuilder): + super().__init__(tb) + + + def list_values(self) -> typing.List[typing.Tuple[str, type_builder.EnumValueViewer]]: + return [(name, type_builder.EnumValueViewer(self._bldr.value(name))) for name in self._values] + + +class RecordStatusValues: + def __init__(self, enum_bldr: baml_py.EnumBuilder, values: typing.Set[str]): + self.__bldr = enum_bldr + self.__values = values # type: ignore (we know how to use this private attribute) # noqa: F821 + + + + @property + def ACTIVE(self) -> type_builder.EnumValueViewer: + return type_builder.EnumValueViewer(self.__bldr.value("ACTIVE")) + + @property + def ARCHIVED(self) -> type_builder.EnumValueViewer: + return type_builder.EnumValueViewer(self.__bldr.value("ARCHIVED")) + + + + + +# ######################################################################### +# Generated classes 5 +# ######################################################################### + +class DurableRecordDraftAst: + def __init__(self, tb: type_builder.TypeBuilder): + _tb = tb._tb # type: ignore (we know how to use this private attribute) + self._bldr = _tb.class_("DurableRecordDraft") + self._properties: typing.Set[str] = set([ "kind", "title", "body", "status", "valid_from", "valid_until", "decision", "why", "alternatives", "consequences", ]) + self._props = DurableRecordDraftProperties(self._bldr, self._properties) + + def type(self) -> baml_py.FieldType: + return self._bldr.field() + + @property + def props(self) -> "DurableRecordDraftProperties": + return self._props + + +class DurableRecordDraftViewer(DurableRecordDraftAst): + def __init__(self, tb: type_builder.TypeBuilder): + super().__init__(tb) + + + def list_properties(self) -> typing.List[typing.Tuple[str, type_builder.ClassPropertyViewer]]: + return [(name, type_builder.ClassPropertyViewer(self._bldr.property(name))) for name in self._properties] + + + +class DurableRecordDraftProperties: + def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]): + self.__bldr = bldr + self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821 + + + + @property + def kind(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("kind")) + + @property + def title(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("title")) + + @property + def body(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("body")) + + @property + def status(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("status")) + + @property + def valid_from(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("valid_from")) + + @property + def valid_until(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("valid_until")) + + @property + def decision(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("decision")) + + @property + def why(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("why")) + + @property + def alternatives(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("alternatives")) + + @property + def consequences(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("consequences")) + + + + +class EpisodeDraftAst: + def __init__(self, tb: type_builder.TypeBuilder): + _tb = tb._tb # type: ignore (we know how to use this private attribute) + self._bldr = _tb.class_("EpisodeDraft") + self._properties: typing.Set[str] = set([ "title", "body", "status", "user_intent", "what_happened", "outcomes", ]) + self._props = EpisodeDraftProperties(self._bldr, self._properties) + + def type(self) -> baml_py.FieldType: + return self._bldr.field() + + @property + def props(self) -> "EpisodeDraftProperties": + return self._props + + +class EpisodeDraftViewer(EpisodeDraftAst): + def __init__(self, tb: type_builder.TypeBuilder): + super().__init__(tb) + + + def list_properties(self) -> typing.List[typing.Tuple[str, type_builder.ClassPropertyViewer]]: + return [(name, type_builder.ClassPropertyViewer(self._bldr.property(name))) for name in self._properties] + + + +class EpisodeDraftProperties: + def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]): + self.__bldr = bldr + self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821 + + + + @property + def title(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("title")) + + @property + def body(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("body")) + + @property + def status(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("status")) + + @property + def user_intent(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("user_intent")) + + @property + def what_happened(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("what_happened")) + + @property + def outcomes(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("outcomes")) + + + + +class SynthesizedExtractionAst: + def __init__(self, tb: type_builder.TypeBuilder): + _tb = tb._tb # type: ignore (we know how to use this private attribute) + self._bldr = _tb.class_("SynthesizedExtraction") + self._properties: typing.Set[str] = set([ "episode", "durable_records", "completion_summary", ]) + self._props = SynthesizedExtractionProperties(self._bldr, self._properties) + + def type(self) -> baml_py.FieldType: + return self._bldr.field() + + @property + def props(self) -> "SynthesizedExtractionProperties": + return self._props + + +class SynthesizedExtractionViewer(SynthesizedExtractionAst): + def __init__(self, tb: type_builder.TypeBuilder): + super().__init__(tb) + + + def list_properties(self) -> typing.List[typing.Tuple[str, type_builder.ClassPropertyViewer]]: + return [(name, type_builder.ClassPropertyViewer(self._bldr.property(name))) for name in self._properties] + + + +class SynthesizedExtractionProperties: + def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]): + self.__bldr = bldr + self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821 + + + + @property + def episode(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("episode")) + + @property + def durable_records(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("durable_records")) + + @property + def completion_summary(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("completion_summary")) + + + + +class TraceWindowFindingAst: + def __init__(self, tb: type_builder.TypeBuilder): + _tb = tb._tb # type: ignore (we know how to use this private attribute) + self._bldr = _tb.class_("TraceWindowFinding") + self._properties: typing.Set[str] = set([ "theme", "level", "line", "quote", "note", ]) + self._props = TraceWindowFindingProperties(self._bldr, self._properties) + + def type(self) -> baml_py.FieldType: + return self._bldr.field() + + @property + def props(self) -> "TraceWindowFindingProperties": + return self._props + + +class TraceWindowFindingViewer(TraceWindowFindingAst): + def __init__(self, tb: type_builder.TypeBuilder): + super().__init__(tb) + + + def list_properties(self) -> typing.List[typing.Tuple[str, type_builder.ClassPropertyViewer]]: + return [(name, type_builder.ClassPropertyViewer(self._bldr.property(name))) for name in self._properties] + + + +class TraceWindowFindingProperties: + def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]): + self.__bldr = bldr + self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821 + + + + @property + def theme(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("theme")) + + @property + def level(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("level")) + + @property + def line(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("line")) + + @property + def quote(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("quote")) + + @property + def note(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("note")) + + + + +class TraceWindowScanAst: + def __init__(self, tb: type_builder.TypeBuilder): + _tb = tb._tb # type: ignore (we know how to use this private attribute) + self._bldr = _tb.class_("TraceWindowScan") + self._properties: typing.Set[str] = set([ "episode_update", "durable_findings", "implementation_findings", "discarded_noise", ]) + self._props = TraceWindowScanProperties(self._bldr, self._properties) + + def type(self) -> baml_py.FieldType: + return self._bldr.field() + + @property + def props(self) -> "TraceWindowScanProperties": + return self._props + + +class TraceWindowScanViewer(TraceWindowScanAst): + def __init__(self, tb: type_builder.TypeBuilder): + super().__init__(tb) + + + def list_properties(self) -> typing.List[typing.Tuple[str, type_builder.ClassPropertyViewer]]: + return [(name, type_builder.ClassPropertyViewer(self._bldr.property(name))) for name in self._properties] + + + +class TraceWindowScanProperties: + def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]): + self.__bldr = bldr + self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821 + + + + @property + def episode_update(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("episode_update")) + + @property + def durable_findings(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("durable_findings")) + + @property + def implementation_findings(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("implementation_findings")) + + @property + def discarded_noise(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("discarded_noise")) + + + diff --git a/src/lerim/agents/baml_client/type_map.py b/src/lerim/agents/baml_client/type_map.py new file mode 100644 index 0000000..6f67bf0 --- /dev/null +++ b/src/lerim/agents/baml_client/type_map.py @@ -0,0 +1,41 @@ +# ---------------------------------------------------------------------------- +# +# Welcome to Baml! To use this generated code, please run the following: +# +# $ pip install baml +# +# ---------------------------------------------------------------------------- + +# This file was generated by BAML: please do not edit it. Instead, edit the +# BAML files and re-generate this code using: baml-cli generate +# baml-cli is available with the baml package. + +from . import types +from . import stream_types + + +type_map = { + + "types.DurableRecordDraft": types.DurableRecordDraft, + "stream_types.DurableRecordDraft": stream_types.DurableRecordDraft, + + "types.EpisodeDraft": types.EpisodeDraft, + "stream_types.EpisodeDraft": stream_types.EpisodeDraft, + + "types.SynthesizedExtraction": types.SynthesizedExtraction, + "stream_types.SynthesizedExtraction": stream_types.SynthesizedExtraction, + + "types.TraceWindowFinding": types.TraceWindowFinding, + "stream_types.TraceWindowFinding": stream_types.TraceWindowFinding, + + "types.TraceWindowScan": types.TraceWindowScan, + "stream_types.TraceWindowScan": stream_types.TraceWindowScan, + + + "types.FindingLevel": types.FindingLevel, + + "types.RecordKind": types.RecordKind, + + "types.RecordStatus": types.RecordStatus, + +} \ No newline at end of file diff --git a/src/lerim/agents/baml_client/types.py b/src/lerim/agents/baml_client/types.py new file mode 100644 index 0000000..95d7f90 --- /dev/null +++ b/src/lerim/agents/baml_client/types.py @@ -0,0 +1,107 @@ +# ---------------------------------------------------------------------------- +# +# Welcome to Baml! To use this generated code, please run the following: +# +# $ pip install baml +# +# ---------------------------------------------------------------------------- + +# This file was generated by BAML: please do not edit it. Instead, edit the +# BAML files and re-generate this code using: baml-cli generate +# baml-cli is available with the baml package. + +import typing +import typing_extensions +from enum import Enum + + +from pydantic import BaseModel, ConfigDict, Field + + +import baml_py + +CheckT = typing_extensions.TypeVar('CheckT') +CheckName = typing_extensions.TypeVar('CheckName', bound=str) + +class Check(BaseModel): + name: str + expression: str + status: str +class Checked(BaseModel, typing.Generic[CheckT, CheckName]): + value: CheckT + checks: typing.Dict[CheckName, Check] + +def get_checks(checks: typing.Dict[CheckName, Check]) -> typing.List[Check]: + return list(checks.values()) + +def all_succeeded(checks: typing.Dict[CheckName, Check]) -> bool: + return all(check.status == "succeeded" for check in get_checks(checks)) +# ######################################################################### +# Generated enums (3) +# ######################################################################### + +class FindingLevel(str, Enum): + DECISION = "DECISION" + PREFERENCE = "PREFERENCE" + FEEDBACK = "FEEDBACK" + REFERENCE = "REFERENCE" + CONSTRAINT = "CONSTRAINT" + FACT = "FACT" + IMPLEMENTATION = "IMPLEMENTATION" + +class RecordKind(str, Enum): + DECISION = "DECISION" + PREFERENCE = "PREFERENCE" + CONSTRAINT = "CONSTRAINT" + FACT = "FACT" + REFERENCE = "REFERENCE" + +class RecordStatus(str, Enum): + ACTIVE = "ACTIVE" + ARCHIVED = "ARCHIVED" + +# ######################################################################### +# Generated classes (5) +# ######################################################################### + +class DurableRecordDraft(BaseModel): + kind: RecordKind = Field(description='Durable record kind.') + title: str = Field(description='Short standalone durable title.') + body: str = Field(description='Compact standalone durable body.') + status: typing.Optional[RecordStatus] = Field(default=None, description='Usually active for reusable durable records.') + valid_from: typing.Optional[str] = None + valid_until: typing.Optional[str] = None + decision: typing.Optional[str] = None + why: typing.Optional[str] = None + alternatives: typing.Optional[str] = None + consequences: typing.Optional[str] = None + +class EpisodeDraft(BaseModel): + title: typing.Optional[str] = Field(default=None, description='Optional short title for the current-session episode. Runtime derives one if omitted.') + body: typing.Optional[str] = Field(default=None, description='Compact episode body. If omitted, runtime builds it from user_intent and what_happened.') + status: typing.Optional[RecordStatus] = Field(default=None, description='Use archived for routine/no-durable sessions; active only when the episode itself remains useful.') + user_intent: typing.Optional[str] = Field(default=None, description='What the user wanted in this source session. Runtime fills a generic fallback if omitted.') + what_happened: typing.Optional[str] = Field(default=None, description='What the session actually did. Runtime fills a generic fallback if omitted.') + outcomes: typing.Optional[str] = Field(default=None, description='Optional concise outcome.') + +class SynthesizedExtraction(BaseModel): + episode: "EpisodeDraft" = Field(description='Exactly one current-session episode record draft.') + durable_records: typing.List["DurableRecordDraft"] = Field(description='Zero or more durable records.') + completion_summary: typing.Optional[str] = Field(default=None, description='Brief summary of extraction work for final_result/reporting.') + +class TraceWindowFinding(BaseModel): + theme: str = Field(description='Short stable theme for this finding.') + level: FindingLevel = Field(description='Use durable levels for reusable context, implementation for local/noisy evidence.') + line: typing.Optional[int] = Field(default=None, description='1-based supporting line when the window gives one.') + quote: typing.Optional[str] = Field(default=None, description='Short supporting quote from the current window.') + note: str = Field(description='Compact semantic finding. Avoid command logs and copied errors.') + +class TraceWindowScan(BaseModel): + episode_update: typing.Optional[str] = Field(default=None, description='Compact update for the final episode summary. May be omitted when this window adds nothing.') + durable_findings: typing.List["TraceWindowFinding"] = Field(description='Reusable decisions, preferences, constraints, facts, and references found in this window.') + implementation_findings: typing.List["TraceWindowFinding"] = Field(description='Implementation evidence, discarded hypotheses, and local details useful only as support/noise.') + discarded_noise: typing.List[str] = Field(description='Short descriptions of noisy categories intentionally not saved.') + +# ######################################################################### +# Generated type aliases (0) +# ######################################################################### diff --git a/src/lerim/agents/baml_client/watchers.py b/src/lerim/agents/baml_client/watchers.py new file mode 100644 index 0000000..347146f --- /dev/null +++ b/src/lerim/agents/baml_client/watchers.py @@ -0,0 +1,44 @@ +# ---------------------------------------------------------------------------- +# +# Welcome to Baml! To use this generated code, please run the following: +# +# $ pip install baml +# +# ---------------------------------------------------------------------------- + +# This file was generated by BAML: please do not edit it. Instead, edit the +# BAML files and re-generate this code using: baml-cli generate +# baml-cli is available with the baml package. + +from typing import Callable, Any, Protocol, Generic, TypeVar, overload, Literal +import threading + +T = TypeVar("T") + +class BlockEvent: + def __init__(self, block_label: str, event_type: str): + self.block_label = block_label + self.event_type = event_type # "enter" | "exit" + +class VarEvent(Generic[T]): + def __init__(self, variable_name: str, value: T, timestamp: str, function_name: str): + self.variable_name = variable_name + self.value = value + self.timestamp = timestamp + self.function_name = function_name + +BlockHandler = Callable[[BlockEvent], None] +VarEventHandler = Callable[[VarEvent[T]], None] +StreamHandler = Callable[[Any], None] # Stream will be an async iterator + +class InternalEventBindings(Protocol): + function_name: str + block: list[BlockHandler] + vars: dict[str, list[VarEventHandler[Any]]] + streams: dict[str, list[StreamHandler]] + functions: dict[str, "InternalEventBindings"] + +class EventCollectorInternal(Protocol): + def __handlers__(self) -> InternalEventBindings: + ... + diff --git a/src/lerim/agents/baml_runtime.py b/src/lerim/agents/baml_runtime.py new file mode 100644 index 0000000..c87466b --- /dev/null +++ b/src/lerim/agents/baml_runtime.py @@ -0,0 +1,162 @@ +"""Shared BAML runtime construction for Lerim agents.""" + +from __future__ import annotations + +from baml_py import ClientRegistry + +from lerim.agents.baml_client.sync_client import b +from lerim.config.providers import MINIMAX_TEMPERATURE_FLOOR, normalize_model_name +from lerim.config.settings import Config, RoleConfig, get_config + +BAML_HTTP_CONNECT_TIMEOUT_MS = 10_000 +BAML_HTTP_TIME_TO_FIRST_TOKEN_TIMEOUT_MS = 120_000 +BAML_HTTP_IDLE_TIMEOUT_MS = 30_000 +BAML_HTTP_REQUEST_TIMEOUT_MS = 300_000 + +_API_KEY_ATTRS = { + "minimax": "minimax_api_key", + "opencode_go": "opencode_api_key", + "zai": "zai_api_key", + "openai": "openai_api_key", + "openrouter": "openrouter_api_key", +} +_API_KEY_ENVS = { + "minimax": "MINIMAX_API_KEY", + "opencode_go": "OPENCODE_API_KEY", + "zai": "ZAI_API_KEY", + "openai": "OPENAI_API_KEY", + "openrouter": "OPENROUTER_API_KEY", +} +_LOCAL_PROVIDERS = {"ollama", "mlx"} + + +def build_baml_client_for_role( + *, + config: Config | None = None, + role: RoleConfig | None = None, + provider: str | None = None, + model_name: str | None = None, + api_base_url: str | None = None, + api_key: str | None = None, + temperature: float | None = None, +): + """Build a generated BAML client for one configured Lerim agent role.""" + cfg = config or get_config() + role_cfg = role or cfg.agent_role + resolved_provider = (provider or role_cfg.provider).strip().lower() + resolved_model = normalize_model_name( + resolved_provider, + (model_name or role_cfg.model).strip(), + ) + resolved_base_url = _resolve_base_url( + cfg, + role_cfg=role_cfg, + provider=resolved_provider, + override=api_base_url, + ) + resolved_api_key = _resolve_api_key( + cfg, + provider=resolved_provider, + override=api_key, + ) + resolved_temperature = _resolve_temperature( + provider=resolved_provider, + value=role_cfg.temperature if temperature is None else temperature, + ) + + registry = ClientRegistry() + registry.add_llm_client( + name="RuntimeAgentModel", + provider="openai-generic", + options={ + "base_url": resolved_base_url, + "api_key": resolved_api_key, + "model": resolved_model, + "temperature": resolved_temperature, + "http": { + "connect_timeout_ms": BAML_HTTP_CONNECT_TIMEOUT_MS, + "time_to_first_token_timeout_ms": BAML_HTTP_TIME_TO_FIRST_TOKEN_TIMEOUT_MS, + "idle_timeout_ms": BAML_HTTP_IDLE_TIMEOUT_MS, + "request_timeout_ms": BAML_HTTP_REQUEST_TIMEOUT_MS, + }, + }, + retry_policy="ExtractAgentRetry", + ) + registry.set_primary("RuntimeAgentModel") + return b.with_options(client_registry=registry) + + +def model_label( + *, + config: Config | None = None, + provider: str | None = None, + model_name: str | None = None, +) -> str: + """Return the effective provider/model label for observability.""" + cfg = config or get_config() + role_cfg = cfg.agent_role + resolved_provider = (provider or role_cfg.provider).strip().lower() + resolved_model = normalize_model_name( + resolved_provider, + (model_name or role_cfg.model).strip(), + ) + return f"{resolved_provider}/{resolved_model}" + + +def _resolve_base_url( + config: Config, + *, + role_cfg: RoleConfig, + provider: str, + override: str | None, +) -> str: + """Resolve the OpenAI-compatible base URL used by BAML.""" + role_base_url = role_cfg.api_base.strip() if provider == role_cfg.provider else "" + base_url = ( + (override or "").strip() + or role_base_url + or config.provider_api_bases.get(provider, "") + ) + if not base_url: + raise RuntimeError( + f"missing_api_base:no default base URL configured for provider={provider}" + ) + if provider in _LOCAL_PROVIDERS and not base_url.rstrip("/").endswith("/v1"): + return f"{base_url.rstrip('/')}/v1" + return base_url.rstrip("/") + + +def _resolve_api_key( + config: Config, + *, + provider: str, + override: str | None, +) -> str: + """Resolve the API key value expected by BAML's OpenAI-compatible client.""" + if override: + return override + attr = _API_KEY_ATTRS.get(provider) + if attr is None: + return provider + value = getattr(config, attr, None) + if not value: + env_name = _API_KEY_ENVS.get(provider, attr.removesuffix("_api_key").upper() + "_API_KEY") + raise RuntimeError(f"missing_api_key:{env_name} required for provider={provider}") + return str(value) + + +def _resolve_temperature(*, provider: str, value: float) -> float: + """Normalize model temperature for provider quirks.""" + temperature = float(value) + if provider == "minimax": + return max(MINIMAX_TEMPERATURE_FLOOR, min(1.0, temperature)) + return temperature + + +def _self_check() -> None: + """Run a small import-time construction check without network calls.""" + assert isinstance(_API_KEY_ATTRS, dict) + + +if __name__ == "__main__": + _self_check() diff --git a/src/lerim/agents/baml_src/extract_react.baml b/src/lerim/agents/baml_src/extract_react.baml new file mode 100644 index 0000000..fbdc616 --- /dev/null +++ b/src/lerim/agents/baml_src/extract_react.baml @@ -0,0 +1,284 @@ +enum RecordKind { + DECISION @alias("decision") @description("A durable project decision with decision and why fields.") + PREFERENCE @alias("preference") @description("A stable user or workflow preference.") + CONSTRAINT @alias("constraint") @description("A durable invariant, limit, or must/cannot rule.") + FACT @alias("fact") @description("A durable project fact or setup truth.") + REFERENCE @alias("reference") @description("A pointer to an external source of truth.") +} + +enum RecordStatus { + ACTIVE @alias("active") @description("Current context that future sessions may reuse.") + ARCHIVED @alias("archived") @description("Historical or routine context that should not be treated as active guidance.") +} + +enum FindingLevel { + DECISION @alias("decision") @description("Durable decision-level finding.") + PREFERENCE @alias("preference") @description("Durable user or workflow preference finding.") + FEEDBACK @alias("feedback") @description("Durable feedback-level finding.") + REFERENCE @alias("reference") @description("Durable external-source finding.") + CONSTRAINT @alias("constraint") @description("Durable constraint-level finding.") + FACT @alias("fact") @description("Durable fact-level finding.") + IMPLEMENTATION @alias("implementation") @description("Trace-local implementation evidence or discarded hypothesis.") +} + +class TraceWindowFinding { + theme string @description("Short stable theme for this finding.") + level FindingLevel @description("Use durable levels for reusable context, implementation for local/noisy evidence.") + line int? @description("1-based supporting line when the window gives one.") + quote string? @description("Short supporting quote from the current window.") + note string @description("Compact semantic finding. Avoid command logs and copied errors.") +} + +class TraceWindowScan { + episode_update string? @description("Compact update for the final episode summary. May be omitted when this window adds nothing.") + durable_findings TraceWindowFinding[] @description("Reusable decisions, preferences, constraints, facts, and references found in this window.") + implementation_findings TraceWindowFinding[] @description("Implementation evidence, discarded hypotheses, and local details useful only as support/noise.") + discarded_noise string[] @description("Short descriptions of noisy categories intentionally not saved.") +} + +class EpisodeDraft { + title string? @description("Optional short title for the current-session episode. Runtime derives one if omitted.") + body string? @description("Compact episode body. If omitted, runtime builds it from user_intent and what_happened.") + status RecordStatus? @description("Use archived for routine/no-durable sessions; active only when the episode itself remains useful.") + user_intent string? @description("What the user wanted in this source session. Runtime fills a generic fallback if omitted.") + what_happened string? @description("What the session actually did. Runtime fills a generic fallback if omitted.") + outcomes string? @description("Optional concise outcome.") +} + +class DurableRecordDraft { + kind RecordKind @description("Durable record kind.") + title string @description("Short standalone durable title.") + body string @description("Compact standalone durable body.") + status RecordStatus? @description("Usually active for reusable durable records.") + valid_from string? + valid_until string? + decision string? + why string? + alternatives string? + consequences string? +} + +class SynthesizedExtraction { + episode EpisodeDraft @description("Exactly one current-session episode record draft.") + durable_records DurableRecordDraft[] @description("Zero or more durable records.") + completion_summary string? @description("Brief summary of extraction work for final_result/reporting.") +} + +function ScanTraceWindow( + run_instruction: string, + prior_episode_summary: string, + prior_findings_summary: string, + trace_window: string +) -> TraceWindowScan { + client MiniMaxM27 + prompt #" + {{ _.role("system") }} + You scan one window from a coding-agent trace for Lerim extraction. + Return only structured output. Do not save records and do not plan future tool calls. + Do not include tags, hidden reasoning, markdown, or prose. + The top-level output must include episode_update, durable_findings, implementation_findings, and discarded_noise. + Use an empty string or empty list when a field has no content. + Every durable_findings and implementation_findings item must be an object with: + - theme + - level: one of decision, preference, feedback, reference, constraint, fact, or implementation + - note + - optional line + - optional quote + Never return finding items as plain strings. + Never use confidence labels such as high, medium, or low as finding levels. + + Separate: + - episode_update: what happened in this session window, for the final episode record. + - durable_findings: reusable project/user context only. + - implementation_findings: local evidence, command work, discarded hypotheses, or support. + - discarded_noise: categories of content intentionally ignored. + + Durable signal means a decision, preference, constraint, fact, or reference likely useful beyond this trace. + Implementation detail alone is not durable signal. + A durable finding should help a future agent make a better decision in a new chat. + Save what a future agent would be relieved to already know: stable user preferences, chosen policies, durable constraints, source-of-truth references, and stable project or product truths. + If a detail only helps replay or debug this trace's terminal session, put it in implementation_findings or discarded_noise, not durable_findings. + Raw metrics, comparison tables, config edits, API wiring, package installation, debug commands, file paths, and stack traces are implementation_findings unless the user turns them into a reusable policy, preference, constraint, fact, or source-of-truth reference. + When the same topic has both user-level guidance and technical evidence, the user-level guidance is the durable finding; the technical evidence is implementation support. + For technical blockers, keep durable_findings at the capability, status, and implication level only. Put patch recipes, flags, functions, commands, exact metrics, and local debug steps in implementation_findings. + When a window mixes a durable point with local evidence, put only the reusable point in durable_findings and put the supporting local evidence in implementation_findings. + Use semantic meaning, not exact wording. Do not copy long code, logs, stack traces, or command transcripts. + If the window has no durable signal, return an empty durable_findings list. + If the window adds nothing meaningful to the episode, episode_update may be an empty string. + + {{ _.role("user") }} + RUN INSTRUCTION: + {{ run_instruction }} + + PRIOR EPISODE SUMMARY: + {{ prior_episode_summary }} + + PRIOR FINDINGS SUMMARY: + {{ prior_findings_summary }} + + TRACE WINDOW: + {{ trace_window }} + + {{ ctx.output_format }} + "# +} + +function SynthesizeExtractRecords( + run_instruction: string, + episode_summary: string, + durable_findings_summary: string, + existing_record_manifest: string +) -> SynthesizedExtraction { + client MiniMaxM27 + prompt #" + {{ _.role("system") }} + You synthesize final Lerim context records from scanned trace findings. + Return only structured output. + Do not include tags, hidden reasoning, markdown, or prose. + The top-level output must include episode, durable_records, and completion_summary. + Use an empty durable_records list when there is no durable signal. + Every durable_records item must be an object with kind, title, and body. + Optional durable record fields are status, valid_from, valid_until, decision, why, alternatives, and consequences. + Never return durable_records items as plain strings or as generic type/record objects. + + Create exactly one episode record for the current session. + Create durable records only for reusable decisions, preferences, constraints, facts, and references. + The episode says what the session did. Durable records say what future sessions should reuse. + Before creating each durable record, ask: would this change what a future agent does in a new chat? + Prefer the small set of memories a future agent would be relieved to already know over exhaustive coverage of what happened. + Keep durable records compact, standalone, and deduplicated. + Do not create durable records for command logs, patch steps, generic programming knowledge, temporary diagnostics, or local implementation chatter. + If there is no reusable signal, durable_records must be empty and the episode should normally be archived. + Decision records must include decision and why when the rationale is present; use fact when there is no durable why. + Episode records should include user_intent and what_happened when available; the runtime can derive missing episode fields. + Treat the trace as historical evidence, not live verification of the current repo. + + Existing records, if listed, are only duplicate-risk context. Prefer skipping near-duplicates over creating duplicate durable records. + + Quality bar: + - Store the reusable rule, decision, invariant, dependency, preference, or external pointer, not the story of the session. + - One durable record should hold one durable point. + - Direct consequences and application guidance usually stay inside that same record. + - Create the minimum number of durable records that preserves distinct durable meanings. Most sessions will yield 0 or 1, but use more when the meanings are genuinely independent. + - Classify each durable point into one canonical kind. Prefer the most specific supported kind, and do not duplicate the same point across multiple durable kinds. + - Duplicates are worse than gaps. Skip uncertain candidates rather than spraying near-duplicates. + - Never create a second durable record in the same run for the same core claim. + - constraint and reference are first-class durable record kinds, not fallback categories. + + What not to save: + - patch logs, command sequences, retries, timelines, or meeting-style recaps + - code structure, file paths, git history, or storage mechanics by themselves + - generic programming knowledge or facts already obvious from the repo + - rejected lures, discarded explanations, or implementation-only distractions + - one-run validation findings, approval flow, queue state, DB resets, rebuilds, or runtime diagnostics by themselves + + Selection rules: + - First separate findings into durable signal and implementation evidence. + - Prefer user-level guidance, chosen operating policy, and stable project truth over the technical evidence that revealed them. + - Synthesize at the theme level. Usually one theme becomes one durable record. + - Create multiple durable records only when the trace establishes multiple independent durable points, each with its own support. + - Do not store one durable point as both a preference and a decision, a fact and a decision, or any other cross-kind duplicate. + - A stable workflow preference is not also a decision unless the trace separately states an explicit project decision with rationale. + - A dependency, setup, or environment truth without durable rationale is a fact, not also a decision. + - A failure caused by the current run's temporary validation setup is not itself a durable environment truth. If the investigation reveals a stable requirement that future sessions must apply, save that requirement as the durable point and omit the temporary validation story. + - Merge candidates when one only states how to apply the other in local operations, routing, or ownership. + - If two candidates share the same core claim, merge them. + - If the difference is only evidence framing, symptom wording, or local-vs-CI phrasing around the same durable fact, keep one record and fold the extra context into it. + - If one candidate is only the direct application or routing consequence of another, keep it inside the stronger record. + - If one candidate only says how different local components should apply the same project rule, keep that guidance inside the main record rather than creating a second durable record. + - If one candidate only restates where local project components live or how an internal architecture is applied, keep it inside the stronger decision, fact, or constraint instead of creating a separate reference. + - If the trace gives one durable rule plus examples of local noise or discarded details, store only the durable rule. + - If a technical blocker is reusable, record only the capability affected, status or source of truth, and implication for future work. + - Do not create a durable record whose whole point is that some local details from this trace were noise, low value, or should not be remembered. + - Store durable records only when the lesson is likely reusable beyond this trace. + - If a candidate is mainly about this trace's commands, files, or timeline, reject it. + - Trace-local instructions about what to ignore in this session are not preferences unless they clearly express a broader standing workflow rule for future sessions. + - If the trace explicitly says the rationale is unknown or says not to invent one, do not create a decision; use fact instead. + - A stable setup, dependency, or environment requirement without a durable why is a fact even if it sounds like the current chosen setup. + - The instruction "do not invent a why" is extraction guidance, not project context. + - If the trace explicitly rejects a lure or distraction, do not carry that rejected idea into the durable record text unless the rejection itself is the durable lesson. + - If a long noisy investigation resolves into one source-of-truth boundary, store only that boundary. Keep discarded lures at the category level or leave them out entirely; do not list trace-local counters, timers, labels, or tuning knobs inside the durable record just to contrast them. + - When a discarded lure matters as evidence, keep it attached to the main durable theme as implementation context rather than storing it as a second durable theme. + - If the episode summary contains clearly reusable decision, preference, constraint, fact, or reference, that point should usually also exist as its own durable record. + - Do not leave a clearly reusable rule, invariant, dependency, source-of-truth pointer, or stable preference only inside the episode. + - Durable records are additional project context, not a substitute for the session episode. + - Most traces should produce only a few durable records; create more only when each one would independently change future behavior. + + Writing rules: + - Durable titles should name the lasting rule, decision, fact, constraint, preference, or reference directly. + - Durable bodies should be compact, neutral, and standalone. + - Prefer this shape for durable records: the durable point, why it matters, and how to apply it later. + - Do not write durable records as meeting minutes, patch logs, or cleanup commentary. + - Do not preserve trace-local commands, negotiation phrasing, or "this is not about X" sentences in final record text. + - Do not mention discarded implementation noise in durable record fields, including consequences. + - Do not include patch recipes, flags, function names, exact metrics, or local debug steps in durable record text. + - Do not write a durable record whose body is mainly a warning that certain local details, cleanups, or implementation noise should be ignored. + - When the durable lesson is a source-of-truth rule, write the authoritative rule directly. + - If a short contrast is still helpful, keep it abstract, such as "not worker-local state" or "not ephemeral local state". Do not enumerate examples in parentheses or comma-separated lists. + - When writing from a historical trace, word durable records as source-backed context, not as freshly verified code inspection. + - Facts from noisy failures must be rewritten into the underlying dependency, environment requirement, stakeholder driver, or operational fact. + - If a fact still reads like stderr, an exception symptom, or copied command output, rewrite it again before writing. + - When the durable lesson is an environment or dependency requirement, do not center the fact on the observed failure symptom. Name the requirement directly and mention the symptom only if it is needed as brief supporting context. + - If brief supporting context is useful, lead with the requirement and keep the symptom generic. Never include exception class names, quoted error fragments, or copied failure strings in the durable fact. + - If the candidate is mainly "this validation run failed until we changed the setup", it belongs in the archived episode. If the candidate names a reusable setup or runtime requirement discovered through that validation, keep the requirement and drop the failure narrative. + - When no durable rationale exists, do not spend the fact body explaining that the rationale is absent. Just state the stable dependency, setup requirement, or operational truth directly. + - Do not quote or paraphrase trace instructions about how to classify the evidence inside the final fact body. Final fact text should describe the underlying truth, not the extraction rule you followed. + - References must answer both "where should future sessions look?" and "when should they consult it?" + - Do not use reference for internal file mappings, local storage boundaries, or repo architecture notes when the durable lesson is the project rule itself rather than "consult this external source next time." + - Keep the episode concise: short title, short body, concise user_intent, what_happened, and outcomes. + + Record types: + - preference: Stable workflow guidance from the user. Save corrections and confirmed non-obvious working style that should carry into future sessions. + - decision: A chosen approach or project rule that future work should follow and that is not obvious from code alone. If the trace does not support a durable why, do not use decision. + - constraint: A durable invariant, limit, or must/cannot rule that future work must respect. + - fact: A durable project fact such as a dependency, environment requirement, stakeholder driver, or other non-obvious truth. + - reference: A pointer to an external dashboard, document, ticket system, or other source of truth outside the repo. Use reference only when the enduring value is where to look later. + + Few-shot quality examples: + + Example preference: + - Trace signal: the user corrects the assistant after a small code fix and says not to append redundant recaps. + - Good: create one preference record about keeping replies terse and avoiding redundant change recaps after small diffs. + - Bad: store the file edit itself, or treat the correction as one-session scratch when it is stable workflow guidance. + + Example decision: + - Trace signal: early turns discuss local refactors and flaky tests; late in the trace the user settles one architecture boundary, and follow-on routing guidance only applies that boundary. + - Good: create the required episode and one decision record for the architecture boundary. Keep the routing guidance inside that record. + - Bad: store refactor noise, split one architectural choice into near-duplicate records, or create a separate durable record saying the refactors were noise. + + Example fact: + - Trace signal: repeated failed commands and partial theories eventually resolve to one stable operational requirement. + - Good: create one fact record for the requirement in clean operational language. Lead with the dependency or environment requirement; mention the failure only as brief generic support if needed. + - Bad: store raw errors, command history, rejected theories, exact exception text, or a separate record saying not to invent a why. + + Example late clarification: + - Trace signal: early chunks circle local counters, timers, labels, and tuning; the final chunk clarifies those were distractions and the real durable lesson is a source-of-truth boundary. + - Good: create one durable record for the source-of-truth boundary. Mention restart or failover only if it explains why the boundary matters. + - Bad: write a durable record that carries over rejected local counters, attempt counts, backoff knobs, or other trace-local artifacts as a contrast list. + + Example reference: + - Trace signal: the assistant starts from a partial repo note, then the user clarifies that ownership or status lives in an external dashboard or ticket system. + - Good: create one reference record that names the external source and when future sessions should consult it. + - Bad: center the record on local files, or turn it into a warning slogan about what not to trust locally. + + Example routine: + - Trace signal: formatter, small lint fix, rerun tests, green result, no new rule or durable fact. + - Good: create only an archived episode. + - Bad: invent a durable record from the sequence of routine commands. + + {{ _.role("user") }} + RUN INSTRUCTION: + {{ run_instruction }} + + EXISTING RECORD MANIFEST: + {{ existing_record_manifest }} + + EPISODE SUMMARY: + {{ episode_summary }} + + DURABLE FINDINGS: + {{ durable_findings_summary }} + + {{ ctx.output_format }} + "# +} diff --git a/src/lerim/agents/baml_src/extract_react_tests.baml b/src/lerim/agents/baml_src/extract_react_tests.baml new file mode 100644 index 0000000..77cb85e --- /dev/null +++ b/src/lerim/agents/baml_src/extract_react_tests.baml @@ -0,0 +1,87 @@ +test ScanTraceWindowCapturesDurableDecision { + functions [ScanTraceWindow] + args { + run_instruction "Extract one episode and any durable project context from the trace." + prior_episode_summary "(none yet)" + prior_findings_summary "(none yet)" + trace_window #" + [8 lines, window 1-8] + 1 user: We should run extraction evals before changing extraction prompts. + 2 assistant: I will update the BAML harness and run the eval. + 3 user: Keep trace-local command logs out of long-term context. + 4 assistant: Implemented the change and ran a smoke test. + "# + } + @@assert({{ this.durable_findings|length > 0 }}) +} + +test ScanTraceWindowSeparatesDurableAndImplementation { + functions [ScanTraceWindow] + args { + run_instruction "Extract reusable context, not command history." + prior_episode_summary "(none yet)" + prior_findings_summary "(none yet)" + trace_window #" + [7 lines, window 1-7] + 1 user: The deployment checklist is the source of truth for release readiness. + 2 assistant: Ran pytest -q and fixed a local fixture path. + 3 assistant: Edited src/example.py and reran the lint command. + 4 user: The command output is just evidence, not memory. + "# + } + @@assert({{ this.durable_findings|length > 0 }}) + @@assert({{ this.implementation_findings|length > 0 }}) +} + +test ScanTraceWindowAllowsNoDurableSignal { + functions [ScanTraceWindow] + args { + run_instruction "Extract durable context only when the trace supports it." + prior_episode_summary "(none yet)" + prior_findings_summary "(none yet)" + trace_window #" + [5 lines, window 1-5] + 1 user: Please format this file. + 2 assistant: Ran the formatter. + 3 assistant: Fixed one lint complaint. + 4 assistant: Tests are green. + "# + } + @@assert({{ this.durable_findings|length == 0 }}) +} + +test SynthesizeExtractRecordsCreatesEpisodeAndDurableRecord { + functions [SynthesizeExtractRecords] + args { + run_instruction "Create exactly one episode and durable records only for reusable project context." + episode_summary "- The session updated an extraction harness and ran a smoke eval." + durable_findings_summary "- preference: eval-first prompt changes: Run the extraction eval before changing extraction prompts. (line 1)" + existing_record_manifest "(none)" + } + @@assert({{ this.durable_records|length > 0 }}) +} + +test SynthesizeExtractRecordsAllowsNoDurableSignal { + functions [SynthesizeExtractRecords] + args { + run_instruction "Create exactly one episode and no durable records when no reusable context exists." + episode_summary "- The session only formatted code, fixed one lint issue, and reran tests." + durable_findings_summary "(none)" + existing_record_manifest "(none)" + } + @@assert({{ this.durable_records|length == 0 }}) +} + +test SynthesizeExtractRecordsDeduplicatesCoreClaim { + functions [SynthesizeExtractRecords] + args { + run_instruction "Create compact durable records and merge duplicate meanings." + episode_summary "- The session clarified a runtime state boundary after noisy implementation discussion." + durable_findings_summary #" + - decision: persisted source of truth: Runtime status must live in one persisted store that survives restart. + - decision: runtime status boundary: Use the same persisted status store as the source of truth after restart. + "# + existing_record_manifest "(none)" + } + @@assert({{ this.durable_records|length == 1 }}) +} diff --git a/src/lerim/agents/baml_src/generators.baml b/src/lerim/agents/baml_src/generators.baml new file mode 100644 index 0000000..13c903b --- /dev/null +++ b/src/lerim/agents/baml_src/generators.baml @@ -0,0 +1,6 @@ +generator target { + output_type "python/pydantic" + output_dir "../" + default_client_mode "sync" + version "0.222.0" +} diff --git a/src/lerim/agents/baml_src/models.baml b/src/lerim/agents/baml_src/models.baml new file mode 100644 index 0000000..e5b568d --- /dev/null +++ b/src/lerim/agents/baml_src/models.baml @@ -0,0 +1,43 @@ +retry_policy ExtractAgentRetry { + max_retries 1 + strategy { + type exponential_backoff + delay_ms 500 + multiplier 2 + max_delay_ms 8000 + } +} + +client MiniMaxM27 { + provider "openai-generic" + retry_policy ExtractAgentRetry + options { + base_url "https://api.minimax.io/v1" + api_key env.MINIMAX_API_KEY + model "MiniMax-M2.7" + temperature 0.01 + http { + connect_timeout_ms 10000 + time_to_first_token_timeout_ms 120000 + idle_timeout_ms 30000 + request_timeout_ms 300000 + } + } +} + +client OllamaLocal { + provider "openai-generic" + retry_policy ExtractAgentRetry + options { + base_url "http://127.0.0.1:11434/v1" + api_key "ollama" + model "gemma4:e4b" + temperature 0.0 + http { + connect_timeout_ms 10000 + time_to_first_token_timeout_ms 120000 + idle_timeout_ms 30000 + request_timeout_ms 300000 + } + } +} diff --git a/src/lerim/agents/extract.py b/src/lerim/agents/extract.py deleted file mode 100644 index b135993..0000000 --- a/src/lerim/agents/extract.py +++ /dev/null @@ -1,512 +0,0 @@ -"""PydanticAI extract agent for the DB-only Lerim context system.""" - -from __future__ import annotations - -from pathlib import Path - -from pydantic import BaseModel, Field -from pydantic_ai import Agent, ModelRetry, RunContext -from pydantic_ai.models import Model -from pydantic_ai.usage import UsageLimits - -from lerim.agents.history_processors import ( - context_pressure_injector, - notes_state_injector, - prune_history_processor, -) -from lerim.agents.model_settings import LOW_VARIANCE_AGENT_MODEL_SETTINGS -from lerim.agents.mlflow_observability import handle_mlflow_event_stream, mlflow_span -from lerim.agents.toolsets import EXTRACT_TOOLS -from lerim.agents.tools import ( - ContextDeps, - compute_request_budget, -) -from lerim.context import ContextStore, DURABLE_RECORD_KINDS, format_durable_record_kinds -from lerim.context.project_identity import ProjectIdentity - - -_DURABLE_SIGNAL_BULLETS = "\n".join( - f"- {kind}" for kind in DURABLE_RECORD_KINDS -) -_DURABLE_KIND_TEXT = format_durable_record_kinds() - - -SYSTEM_PROMPT = """\ - -You are the Lerim extract agent. -Read one coding-agent trace, compress its signal, and write DB-backed context records. - - - -- Create exactly one episode record for the session. -- Create zero or more durable records only when the trace contains durable signal. -- The episode record is mandatory for every session, even if you also create or update durable records. -- Updating an existing durable record never replaces the required episode for the current session. -- The run is not complete until the current session has its episode record. -- Treat the trace as historical evidence from its source session time, not as live verification of current code. -- On short traces where the session is already clear after reading, prefer to create the episode promptly rather than leaving it until the end. -- Episode records must include `user_intent` and `what_happened`; do not put the whole episode only in `body`. -- Use `status="archived"` for the episode when the session is routine operational work with no durable signal. Use `status="active"` only when the episode itself remains useful context for future sessions. - - - -Durable signal means one of: -{durable_signal_bullets} - -Implementation detail alone is not durable signal. -A temporary code-state finding, audit observation, open task, or release-risk report is not durable by itself. Promote it only when the trace establishes a reusable project rule, unresolved constraint, stable dependency, or standing source of truth. - - - -- Store the reusable rule, decision, invariant, dependency, preference, or external pointer, not the story of the session. -- One durable record should hold one durable point. -- Direct consequences and application guidance usually stay inside that same record. -- Create the minimum number of durable records that preserves distinct durable meanings. Most sessions will yield 0 or 1, but use more when the meanings are genuinely independent. -- Classify each durable point into one canonical kind. Prefer the most specific supported kind, and do not duplicate the same point across multiple durable kinds. -- Duplicates are worse than gaps. Skip uncertain candidates rather than spraying near-duplicates. -- Never create a second durable record in the same run for the same core claim. If you realize the first draft needs improvement, update or refine that record instead of creating another one. -- `constraint` and `reference` are first-class durable record kinds, not fallback categories. - - - -- patch logs, command sequences, retries, timelines, or meeting-style recaps -- code structure, file paths, git history, or storage mechanics by themselves -- generic programming knowledge or facts already obvious from the repo -- rejected lures, discarded explanations, or implementation-only distractions -- one-run validation findings, approval flow, queue state, DB resets, rebuilds, or - runtime diagnostics by themselves - - - -- Read the trace in chunks until the full trace is covered. Do not start writing while unread trace lines remain. -- Use the findings scratchpad for evidence from chunks you have already read. Notes are summarized back to you on later turns; do not record the same point again unless you learned something new. -- Keep each durable theme and its supporting implementation evidence together. Do not record a rejected lure or discarded explanation as its own durable finding/theme. -- If one apparent finding only applies, routes, or operationalizes another finding, keep them as one durable theme instead of separate durable themes. -- If the trace needs more than one read, call `note_trace_findings` once per useful finding with theme, line, quote, and level before saving or revising context. Call it with no arguments when the full trace has no reusable signal. -- If you read many chunks, prune older read results only after those chunks have already been captured in notes. -- Search existing context before creating a durable record whenever the trace suggests an earlier record, duplicate risk, or "same meaning vs new meaning" judgment. -- The injected existing-record manifest is only a shortlist. It is never enough evidence for a revision. -- Fetch full records before any revision, and fetch each plausible target when several nearby records could match. -- Revise only when a fetched record clearly carries the same meaning and needs repair. If the core claim differs, create a new record instead. -- When the trace says an existing durable rule is correct but needs tightening, clarification, or a better why, fetch that record and update it rather than leaving the weaker wording unchanged. -- Avoid cosmetic same-run revisions. Revise a same-run record only to fix a concrete durable-context error or prevent a duplicate. - - - -- The system may inject `CONTEXT:` messages showing approximate context pressure. At soft or hard pressure, prune old trace chunks after their findings are captured. -- The system may inject `NOTES:` messages summarizing findings and trace coverage. Use them as a progress dashboard, not as a replacement for reading unread trace lines. -- The findings scratchpad writes the dashboard for future turns; do not try to reread the dashboard with tools. - - - -- First separate findings into durable signal and implementation evidence. -- Synthesize at the theme level. Usually one theme becomes one durable record. -- Create multiple durable records only when the trace establishes multiple independent durable points, each with its own support. -- Do not store one durable point as both a preference and a decision, a fact and a decision, or any other cross-kind duplicate. -- A stable workflow preference is not also a decision unless the trace separately states an explicit project decision with rationale. -- A dependency, setup, or environment truth without durable rationale is a fact, not also a decision. -- A failure caused by the current run's temporary validation setup is not itself a - durable environment truth. If the investigation reveals a stable requirement that - future sessions must apply, save that requirement as the durable point and omit - the temporary validation story. -- Merge candidates when one only states how to apply the other in local operations, routing, or ownership. -- If two candidates share the same core claim, merge them. -- If the difference is only evidence framing, symptom wording, or local-vs-CI phrasing around the same durable fact, keep one record and fold the extra context into it. -- If one candidate is only the direct application or routing consequence of another, keep it inside the stronger record. -- Storage boundary plus per-component routing is one decision, not two. Keep the boundary as the record and fold the routing guidance into the same title/body. -- If one candidate only says how different local components should apply the same project rule, keep that guidance inside the main record rather than creating a second durable record. -- If one candidate only restates where local project components live or how an internal architecture is applied, keep it inside the stronger decision, fact, or constraint instead of creating a separate reference. -- If the trace gives one durable rule plus examples of local noise or discarded details, store only the durable rule. The filtering guidance is evidence, not a second record. -- Do not create a durable record whose whole point is that some local details from this trace were noise, low value, or should not be remembered. That is extraction guidance for this run, not project context. -- Store durable records only when the lesson is likely reusable beyond this trace. -- If a candidate is mainly about this trace's commands, files, or timeline, reject it. -- Trace-local instructions about what to ignore in this session are not preferences unless they clearly express a broader standing workflow rule for future sessions. -- If the trace explicitly says the rationale is unknown or says not to invent one, do not create a `decision`; use `fact` instead. -- A stable setup, dependency, or environment requirement without a durable why is a `fact` even if it sounds like the current chosen setup. -- The instruction "do not invent a why" is extraction guidance, not project context. -- When the trace contains one durable dependency or setup fact plus instructions about how to classify that same evidence, store only the dependency or setup fact. Do not turn the classification guidance into a separate `preference`. -- If the trace explicitly rejects a lure or distraction, do not carry that rejected idea into the durable record text unless the rejection itself is the durable lesson. -- If this older trace conflicts with newer existing active records, do not create a new active durable record for the older claim. Preserve the historical session in the episode and let the newer active record remain current. -- If a long noisy investigation resolves into one source-of-truth boundary, store only that boundary. Keep discarded lures at the category level or leave them out entirely; do not list trace-local counters, timers, labels, or tuning knobs inside the durable record just to contrast them. -- When a discarded lure matters as evidence, keep it attached to the main durable theme as implementation context rather than storing it as a second durable theme. -- If the episode summary contains clearly reusable {durable_kind_text}, that point should usually also exist as its own durable record. -- Do not leave a clearly reusable rule, invariant, dependency, source-of-truth pointer, or stable preference only inside the episode. The episode says what happened; the durable record stores what future sessions should reuse. -- Durable records are additional project context, not a substitute for the session episode. Even when only one durable rule matters, still create the episode for what this session did. - - - -- Durable titles should name the lasting rule, decision, fact, constraint, preference, or reference directly. -- Durable bodies should be compact, neutral, and standalone. -- When a durable decision prohibits or routes a named interface, data path, dependency, provider, or boundary, preserve that named subject in the record instead of replacing it with a broader abstraction. -- Prefer this shape for durable records: - 1. the durable point - 2. why it matters - 3. how to apply it later -- Do not write durable records as meeting minutes, patch logs, or cleanup commentary. -- Do not preserve trace-local commands, negotiation phrasing, or "this is not about X" sentences in final record text. -- Do not write a durable record whose body is mainly a warning that certain local details, cleanups, or implementation noise should be ignored. -- Do not mention discarded implementation noise in durable record fields, including `consequences`. If details are non-durable, omit them entirely rather than saying they are non-durable. -- When the durable lesson is a source-of-truth rule, write the authoritative rule directly. Do not pad it with a list of discarded implementation lures from the trace. -- If a short contrast is still helpful, keep it abstract, such as "not worker-local state" or "not ephemeral local state". Do not enumerate examples in parentheses or comma-separated lists. -- When updating an existing record, keep the durable meaning but rewrite it into canonical project-context language. -- When writing from a historical trace, word durable records as source-backed context, not as freshly verified code inspection. Do not imply that a bug, missing capability, or release blocker is current unless the trace itself establishes that it remains unresolved as durable project context. -- Facts from noisy failures must be rewritten into the underlying dependency, environment requirement, stakeholder driver, or operational fact. -- If a fact still reads like stderr, an exception symptom, or copied command output, rewrite it again before writing. -- When the durable lesson is an environment or dependency requirement, do not center the fact on the observed failure symptom. Name the requirement directly and mention the symptom only if it is needed as brief supporting context. -- If brief supporting context is useful, lead with the requirement and keep the symptom generic. Never include exception class names, quoted error fragments, or copied failure strings in the durable fact. -- If the candidate is mainly "this validation run failed until we changed the setup", - it belongs in the archived episode. If the candidate names a reusable setup or - runtime requirement discovered through that validation, keep the requirement and - drop the failure narrative. -- When no durable rationale exists, do not spend the fact body explaining that the rationale is absent. Just state the stable dependency, setup requirement, or operational truth directly. -- Do not quote or paraphrase trace instructions about how to classify the evidence inside the final fact body. Final fact text should describe the underlying truth, not the extraction rule you followed. -- References must answer both "where should future sessions look?" and "when should they consult it?" -- Do not use `reference` for internal file mappings, local storage boundaries, or repo architecture notes when the durable lesson is the project rule itself rather than "consult this external source next time." -- Keep the episode concise: short title, short body, concise `user_intent`, `what_happened`, and `outcomes`. -- If the session is mostly routine operational work with little future value and no durable record, create the episode with `status="archived"`. - - - - -Stable workflow guidance from the user. Save corrections and confirmed non-obvious working style that should carry into future sessions. -Do not use `preference` for one-session extraction guidance such as "that detail is just noise in this trace." - - -A chosen approach or project rule that future work should follow and that is not obvious from code alone. -If the trace does not support a durable why, do not use `decision`. - - -A durable invariant, limit, or must/cannot rule that future work must respect. - - -A durable project fact such as a dependency, environment requirement, stakeholder driver, or other non-obvious truth. -Use `fact` for stable setup or dependency truths when the trace explicitly says not to invent decision rationale. - - -A pointer to an external dashboard, document, ticket system, or other source of truth outside the repo. -Use `reference` only when the enduring value is where to look later. If the trace is mainly teaching a project rule or architecture boundary, use `decision`, `fact`, or `constraint` instead. - - - - - - -- assistant patches a bug and writes a tidy summary -- user: "The diff is enough. Don't end with a recap every time." -- later turns continue with normal edits, tests, and review comments - - -Create one preference record about keeping replies terse and not appending redundant change recaps. - - -Store the file edit itself, or treat the correction as only a one-session scratch finding when it is clearly stable workflow guidance. - - - - - -- early turns discuss local refactors, temporary debug prints, and a flaky test -- midway, several ideas are tried and discarded -- late in the trace the user settles the architecture: durable project context lives in one store; hot runtime/session state lives in another -- the follow-on routing guidance is just how to apply that boundary - - -Create the required episode for the session and one decision record for the storage boundary. Keep the routing guidance inside the same record instead of splitting it into a second record. - - -Store the refactor noise, split one architectural choice into two near-duplicate records such as one decision for the boundary and a second local-use record for which component reads which store, or create a separate durable record whose only message is that the refactors and debug edits were noise. - - - - - -- the user makes one architectural choice, such as keeping durable context and hot operational state in separate stores -- the trace also mentions variable renames, label tweaks, temporary debug prints, and similar low-value cleanups -- the user explicitly says those local edits should not become durable context - - -Create the required episode and one durable record for the architectural choice only. Treat the explicit "those edits are just noise" instruction as extraction guidance for this run, not as its own record. - - -Create a second durable record whose message is that renames, label tweaks, or temporary debug code are non-durable, or let that noise-filtering instruction replace the required episode. - - - - - -- repeated failed commands and partial theories about why a media workflow is broken -- some guesses are ruled out -- the stable conclusion is operational: environments that run this workflow need a specific system dependency installed - - -Create one fact record for the dependency requirement in clean operational language. Lead with the missing dependency or environment requirement, and if you mention the failure at all, keep it generic rather than naming the exact exception class or copied command output. Still create the required episode for this session. - - -Store the raw exception text, center the record on the failure symptom, split one operational lesson into separate local-vs-CI facts, create a second durable record whose message is "do not invent a rationale here," keep the command history or debugging timeline, or write only the fact and skip the episode. - - - - - -- the user states one stable dependency or setup truth -- nearby turns add extraction guidance such as "this is a fact, not a decision" or "do not invent a why beyond the dependency" -- no broader workflow rule for future sessions is established - - -Create the required episode and one fact record for the stable dependency or setup truth only. - - -Create a second durable preference whose whole point is how to classify this trace, or store the meta-instruction instead of the underlying dependency fact. - - - - - -- the trace says image-enabled workflows require a system dependency in the environment -- the user also says not to invent policy rationale beyond that dependency fact - - -Write a fact such as: "Image-enabled workflows require libvips in the environment." Keep the body on the requirement and its effect. - - -Write a fact body such as: "Do not invent a policy reason here" or "No decision rationale was supplied." Those are meta comments about classification, not durable project context. - - - - - -- early chunks are noisy and keep circling local counters, timers, labels, and temporary tuning -- the final chunk clarifies that those were distractions -- the real durable lesson is a source-of-truth boundary: authoritative state must live in one persisted place that survives restart and failover - - -Create one durable record for the source-of-truth boundary. Mention restart or failover if it explains why the boundary matters, but keep any contrast abstract, such as "not worker-local state," rather than listing local counters or timers. - - -Write a durable record that carries over the rejected lure by naming worker-local counters, attempt counts, backoff knobs, or other trace-local artifacts as a contrast list. - - - - - -- the assistant starts from a partial repo note -- later the user clarifies that incident ownership and current status are tracked in an external dashboard or ticket system -- future sessions should consult that external system when this class of issue appears - - -Create one reference record that names the external source and when future sessions should consult it. - - -Center the record on local files, or turn it into a warning slogan about what not to trust locally. - - - - - -- run formatter -- fix a small lint complaint -- rerun tests -- confirm green -- no new rule, dependency, preference, or durable fact emerges - - -Create only an archived episode. - - -Invent a durable record from the sequence of routine commands. - - - - - -- the trace points at an earlier record that sounds nearby -- new evidence sharpens part of it, but you still need to decide whether the core claim stayed the same -- there may be more than one plausible existing record - - -Search first, fetch the plausible existing record, then either update it if the meaning matches or create a new record if the core claim is different. In both cases, still create the episode for this session. - - -Update from a shortlist or search preview alone, force an update when the new claim is only adjacent, or skip the episode because you already changed a durable record. - - - - - -- End the run with the `final_result` tool. -- Put the plain-text completion summary in `completion_summary`. -- Before `final_result`, ensure the current session already has exactly one episode record. -- If you have created durable records but no episode yet, stop and create the episode before `final_result`. -- If the episode contains the only copy of a reusable rule, invariant, dependency, source-of-truth pointer, or stable preference, stop and create the corresponding durable record before `final_result`. -- Do not end with free-form assistant text outside `final_result`. - - - -Do not turn filenames, storage mechanics, graph links, or evidence tables into the main record unless the durable rule is specifically about that boundary. - -""".format( - durable_signal_bullets=_DURABLE_SIGNAL_BULLETS, - durable_kind_text=_DURABLE_KIND_TEXT, -) - - -class ExtractionResult(BaseModel): - """Structured output for the extract flow.""" - - completion_summary: str = Field(description="Short plain-text completion summary") - - -def build_extract_agent(model: Model) -> Agent[ContextDeps, ExtractionResult]: - """Build the extract agent with semantic DB tools.""" - agent = Agent( - model, - deps_type=ContextDeps, - output_type=ExtractionResult, - system_prompt=SYSTEM_PROMPT, - tools=EXTRACT_TOOLS, - model_settings=LOW_VARIANCE_AGENT_MODEL_SETTINGS, - history_processors=[ - context_pressure_injector, - notes_state_injector, - prune_history_processor, - ], - retries=5, - output_retries=4, - ) - - # Keep final validation structural. Semantic durable-signal quality belongs - # in the prompt and integration/eval cases, not keyword scans over prose. - @agent.output_validator - def _require_session_episode( - ctx: RunContext[ContextDeps], data: ExtractionResult - ) -> ExtractionResult: - store = ContextStore(ctx.deps.context_db_path) - store.initialize() - store.register_project(ctx.deps.project_identity) - rows = store.query( - entity="records", - mode="count", - project_ids=[ctx.deps.project_identity.project_id], - kind="episode", - source_session_id=ctx.deps.session_id, - include_archived=True, - ) - episode_count = int(rows.get("count") or 0) - if episode_count != 1: - raise ModelRetry( - "The run is not complete yet. Create exactly one episode record for the current session before final_result." - ) - return data - - return agent - - -def _format_existing_record_manifest( - *, - context_db_path: Path, - project_identity: ProjectIdentity, - limit: int = 5, -) -> str: - """Build a compact manifest of recent active durable records for create-vs-update decisions.""" - store = ContextStore(context_db_path) - store.initialize() - store.register_project(project_identity) - rows = store.query( - entity="records", - mode="list", - project_ids=[project_identity.project_id], - status="active", - order_by="updated_at", - limit=max(1, limit * 2), - include_total=False, - )["rows"] - durable_rows = [row for row in rows if str(row.get("kind") or "") != "episode"][:limit] - if not durable_rows: - return "" - - def _shorten(text: str, max_chars: int = 140) -> str: - value = " ".join((text or "").split()) - if len(value) <= max_chars: - return value - return value[: max_chars - 3].rstrip() + "..." - - lines = ["Relevant existing durable records:"] - for row in durable_rows: - record_id = str(row.get("record_id") or "") - kind = str(row.get("kind") or "") - title = _shorten(str(row.get("title") or "")) - body = _shorten(str(row.get("body") or "")) - lines.append(f"- {record_id} | {kind} | {title} | {body}") - return "\n".join(lines) - - -def run_extraction( - *, - context_db_path: Path, - project_identity: ProjectIdentity, - session_id: str, - trace_path: Path, - model: Model, - run_folder: Path, - session_started_at: str = "", - return_messages: bool = False, -): - """Run the extract agent on one trace.""" - agent = build_extract_agent(model) - try: - trace_line_count = sum(1 for _ in trace_path.open("r", encoding="utf-8")) - except OSError: - trace_line_count = 0 - existing_record_manifest = _format_existing_record_manifest( - context_db_path=context_db_path, - project_identity=project_identity, - ) - deps = ContextDeps( - context_db_path=context_db_path, - project_identity=project_identity, - session_id=session_id, - trace_path=trace_path, - session_started_at=str(session_started_at or "").strip(), - ) - source_time_text = str(session_started_at or "").strip() or "unknown" - prompt = ( - "Read the trace, write exactly one episode record, and write only the strongest " - "durable records with non-empty title and body. Store reusable rules and decisions, " - "not a polished recap of the meeting. " - "Durable records must be positive canonical context: when trace text combines a " - "durable point with cleanup/noise/ignore guidance, exclude that guidance entirely " - "from the durable record. " - f"Source session started_at: {source_time_text}. Treat the trace as evidence from " - "that time, not as a fresh verification of the current repository. " - f"This trace has {trace_line_count} lines. Read all chunks before writing. " - "If the trace needs more than one read to cover it, record findings before any write. " - "If relevant existing durable records are shown below, treat them as a shortlist only; " - "fetch the full record before any revision." - + (f"\n\n{existing_record_manifest}" if existing_record_manifest else "") - ) - request_limit = compute_request_budget(trace_path) + 4 - with mlflow_span( - "lerim.agent.extract", - span_type="AGENT", - attributes={"lerim.agent_name": "extract"}, - inputs={ - "trace_path": str(trace_path), - "trace_line_count": trace_line_count, - "request_limit": request_limit, - }, - ): - result = agent.run_sync( - prompt, - deps=deps, - usage_limits=UsageLimits(request_limit=request_limit), - event_stream_handler=handle_mlflow_event_stream, - ) - if return_messages: - return result.output, list(result.all_messages()) - return result.output - - -if __name__ == "__main__": - """Run a tiny constructor smoke check.""" - assert SYSTEM_PROMPT - print("extract agent: self-test passed") diff --git a/src/lerim/agents/extract/__init__.py b/src/lerim/agents/extract/__init__.py new file mode 100644 index 0000000..5f254bf --- /dev/null +++ b/src/lerim/agents/extract/__init__.py @@ -0,0 +1,11 @@ +"""BAML and LangGraph extract agent public API.""" + +from lerim.agents.extract.api import run_extraction +from lerim.agents.extract.types import ExtractionEvent, ExtractionResult, ExtractionRunDetails + +__all__ = [ + "ExtractionEvent", + "ExtractionResult", + "ExtractionRunDetails", + "run_extraction", +] diff --git a/src/lerim/agents/extract/api.py b/src/lerim/agents/extract/api.py new file mode 100644 index 0000000..6ce902c --- /dev/null +++ b/src/lerim/agents/extract/api.py @@ -0,0 +1,192 @@ +"""Production extract-agent API.""" + +from __future__ import annotations + +from datetime import datetime, timezone +from pathlib import Path +from typing import overload + +from lerim.agents.baml_runtime import model_label +from lerim.agents.extract.graph import run_windowed_extract_graph +from lerim.agents.extract.persistence import ( + PersistenceContext, + format_existing_record_manifest, + prepare_context_store, +) +from lerim.agents.extract.types import ( + ExtractionEvent, + ExtractionResult, + ExtractionRunDetails, +) +from lerim.agents.extract.windowing import trace_line_count +from lerim.agents.mlflow_observability import mlflow_span +from lerim.config.settings import Config, get_config +from lerim.context import ProjectIdentity + + +@overload +def run_extraction( + *, + context_db_path: Path, + project_identity: ProjectIdentity, + session_id: str, + trace_path: Path, + config: Config | None = None, + session_started_at: str = "", + return_details: bool = False, + provider: str | None = None, + model_name: str | None = None, + api_base_url: str | None = None, + api_key: str | None = None, + temperature: float | None = None, + max_llm_calls: int | None = None, + progress: bool = False, +) -> ExtractionResult: + ... + + +@overload +def run_extraction( + *, + context_db_path: Path, + project_identity: ProjectIdentity, + session_id: str, + trace_path: Path, + config: Config | None = None, + session_started_at: str = "", + return_details: bool, + provider: str | None = None, + model_name: str | None = None, + api_base_url: str | None = None, + api_key: str | None = None, + temperature: float | None = None, + max_llm_calls: int | None = None, + progress: bool = False, +) -> tuple[ExtractionResult, ExtractionRunDetails]: + ... + + +def run_extraction( + *, + context_db_path: Path, + project_identity: ProjectIdentity, + session_id: str, + trace_path: Path, + config: Config | None = None, + session_started_at: str = "", + return_details: bool = False, + provider: str | None = None, + model_name: str | None = None, + api_base_url: str | None = None, + api_key: str | None = None, + temperature: float | None = None, + max_llm_calls: int | None = None, + progress: bool = False, +) -> ExtractionResult | tuple[ExtractionResult, ExtractionRunDetails]: + """Run the BAML and LangGraph extract agent on one trace.""" + cfg = config or get_config() + resolved_context_db_path = context_db_path.expanduser().resolve() + resolved_trace_path = trace_path.expanduser().resolve() + source_started_at = ( + str(session_started_at or "").strip() + or datetime.now(timezone.utc).isoformat() + ) + effective_model_label = model_label( + config=cfg, + provider=provider, + model_name=model_name, + ) + persistence_context = PersistenceContext( + context_db_path=resolved_context_db_path, + project_identity=project_identity, + session_id=session_id, + trace_path=resolved_trace_path, + session_started_at=source_started_at, + model_name=effective_model_label, + ) + prepare_context_store(persistence_context) + existing_record_manifest = format_existing_record_manifest( + context_db_path=resolved_context_db_path, + project_identity=project_identity, + ) + run_instruction = _build_run_instruction( + context_db_path=resolved_context_db_path, + project_identity=project_identity, + trace_path=resolved_trace_path, + session_started_at=source_started_at, + existing_record_manifest=existing_record_manifest, + ) + line_count = trace_line_count(resolved_trace_path) + with mlflow_span( + "lerim.agent.extract", + span_type="AGENT", + attributes={"lerim.agent_name": "extract"}, + inputs={ + "trace_path": str(resolved_trace_path), + "trace_line_count": line_count, + "model_name": effective_model_label, + }, + ): + final_state = run_windowed_extract_graph( + persistence_context=persistence_context, + config=cfg, + run_instruction=run_instruction, + existing_record_manifest=existing_record_manifest, + provider=provider, + model_name=model_name, + api_base_url=api_base_url, + api_key=api_key, + temperature=temperature, + max_llm_calls=max_llm_calls, + progress=progress, + ) + result = ExtractionResult( + completion_summary=str(final_state.get("completion_summary") or "").strip() + or "Extraction completed." + ) + events = [ + ExtractionEvent.model_validate(item) + for item in final_state.get("observations", []) + ] + details = ExtractionRunDetails( + events=events, + llm_calls=int(final_state.get("llm_calls") or 0), + done=bool(final_state.get("done")), + context_db_path=str(resolved_context_db_path), + project_id=project_identity.project_id, + session_id=session_id, + model_name=effective_model_label, + trace_total_lines=line_count, + ) + if return_details: + return result, details + return result + + +def _build_run_instruction( + *, + context_db_path: Path, + project_identity: ProjectIdentity, + trace_path: Path, + session_started_at: str, + existing_record_manifest: str | None = None, +) -> str: + """Build extraction task framing for the BAML graph.""" + del context_db_path, project_identity + line_count = trace_line_count(trace_path) + source_time_text = str(session_started_at or "").strip() or "unknown" + prompt = ( + "Read the trace, write exactly one episode record, and write only the strongest " + "durable records with non-empty title and body. Store reusable rules and decisions, " + "not a polished recap of the meeting. " + "Durable records must be positive canonical context: when trace text combines a " + "durable point with cleanup/noise/ignore guidance, exclude that guidance entirely " + "from the durable record. " + f"Source session started_at: {source_time_text}. Treat the trace as evidence from " + "that time, not as a fresh verification of the current repository. " + f"This trace has {line_count} lines. Read all chunks before writing. " + "If relevant existing durable records are shown below, treat them as duplicate-risk " + "context only; prefer skipping near-duplicates over creating duplicates." + + (f"\n\n{existing_record_manifest}" if existing_record_manifest else "") + ) + return prompt diff --git a/src/lerim/agents/extract/graph.py b/src/lerim/agents/extract/graph.py new file mode 100644 index 0000000..2411d32 --- /dev/null +++ b/src/lerim/agents/extract/graph.py @@ -0,0 +1,398 @@ +"""Windowed LangGraph extraction pipeline backed by BAML.""" + +from __future__ import annotations + +from typing import Any, Callable + +from langgraph.graph import END, START, StateGraph + +from lerim.agents.baml_runtime import build_baml_client_for_role +from lerim.agents.extract.persistence import ( + PersistenceContext, + persist_synthesized_extraction, +) +from lerim.agents.extract.state import WindowExtractGraphState +from lerim.agents.extract.windowing import ( + compute_request_budget, + read_trace_window, + trace_line_count, + window_char_budget, +) +from lerim.config.settings import Config + +MAX_BAML_MODEL_RETRIES = 3 +BAML_RECOVERABLE_ERROR_NAMES = { + "BamlClientFinishReasonError", + "BamlClientHttpError", + "BamlTimeoutError", + "BamlValidationError", +} + + +def run_windowed_extract_graph( + *, + persistence_context: PersistenceContext, + config: Config, + run_instruction: str, + existing_record_manifest: str, + provider: str | None = None, + model_name: str | None = None, + api_base_url: str | None = None, + api_key: str | None = None, + temperature: float | None = None, + max_llm_calls: int | None = None, + progress: bool = False, +) -> dict[str, Any]: + """Run the BAML extraction graph and return its final state.""" + total_lines = trace_line_count(persistence_context.trace_path) + graph = build_windowed_extract_graph( + persistence_context=persistence_context, + config=config, + run_instruction=run_instruction, + existing_record_manifest=existing_record_manifest, + provider=provider, + model_name=model_name, + api_base_url=api_base_url, + api_key=api_key, + temperature=temperature, + max_llm_calls=max_llm_calls + or compute_request_budget(persistence_context.trace_path), + progress=progress, + ) + return graph.invoke( + { + "observations": [], + "llm_calls": 0, + "next_line": 1, + "trace_total_lines": total_lines, + "done": False, + "completion_summary": "", + } + ) + + +def build_windowed_extract_graph( + *, + persistence_context: PersistenceContext, + config: Config, + run_instruction: str, + existing_record_manifest: str, + provider: str | None, + model_name: str | None, + api_base_url: str | None, + api_key: str | None, + temperature: float | None, + max_llm_calls: int, + progress: bool = False, +): + """Compile the windowed scan, synthesize, and persist extraction graph.""" + baml_runtime = build_baml_client_for_role( + config=config, + provider=provider, + model_name=model_name, + api_base_url=api_base_url, + api_key=api_key, + temperature=temperature, + ) + + def read_window(state: WindowExtractGraphState) -> dict[str, Any]: + """Read the next budgeted trace window into transient state.""" + total_lines = int(state.get("trace_total_lines") or 0) + start_line = int(state.get("next_line") or 1) + if start_line > total_lines: + return {"current_window": {}} + char_budget = window_char_budget( + state=state, + run_instruction=run_instruction, + existing_record_manifest=existing_record_manifest, + episode_summary=_episode_summary(state), + durable_findings_summary=_durable_findings_summary(state), + implementation_summary=_implementation_summary(state), + ) + window = read_trace_window( + trace_path=persistence_context.trace_path, + start_line=start_line, + total_lines=total_lines, + char_budget=char_budget, + ) + if progress: + print( + f" extract window {window['start_line']}-{window['end_line']} " + f"chars={len(window['text'])}", + flush=True, + ) + return { + "current_window": window, + "next_line": int(window["end_line"]) + 1, + "observations": [ + { + "action": "read_window", + "ok": True, + "content": window["header"], + "args": { + "start_line": window["start_line"], + "end_line": window["end_line"], + "char_budget": char_budget, + }, + "done": False, + "completion_summary": "", + } + ], + } + + def scan_window(state: WindowExtractGraphState) -> dict[str, Any]: + """Scan the current window into compact episode and findings state.""" + llm_calls = int(state.get("llm_calls") or 0) + if llm_calls >= max_llm_calls: + raise RuntimeError( + f"BAML extraction exceeded max_llm_calls={max_llm_calls}." + ) + window = state.get("current_window") or {} + if not window.get("text"): + return {} + if progress: + print(f" extract scan {llm_calls + 1}/{max_llm_calls}", flush=True) + result, retry_observations, attempts = _call_baml_with_retries( + lambda: baml_runtime.ScanTraceWindow( + run_instruction=run_instruction, + prior_episode_summary=_episode_summary(state), + prior_findings_summary=_findings_summary(state), + trace_window=str(window["text"]), + ), + stage="scan_window", + progress=progress, + ) + payload = _model_payload(result) + episode_update = str(payload.get("episode_update") or "").strip() + durable = [_model_payload(item) for item in payload.get("durable_findings") or []] + implementation = [ + _model_payload(item) + for item in payload.get("implementation_findings") or [] + ] + noise = [ + str(item).strip() + for item in payload.get("discarded_noise") or [] + if str(item).strip() + ] + return { + "llm_calls": llm_calls + attempts, + "episode_updates": [episode_update] if episode_update else [], + "durable_findings": durable, + "implementation_findings": implementation, + "discarded_noise": noise, + "observations": [ + *retry_observations, + { + "action": "scan_window", + "ok": True, + "content": ( + f"window={window.get('start_line')}-{window.get('end_line')} " + f"durable={len(durable)} implementation={len(implementation)}" + ), + "args": { + "start_line": window.get("start_line"), + "end_line": window.get("end_line"), + }, + "done": False, + "completion_summary": "", + }, + ], + } + + def synthesize_records(state: WindowExtractGraphState) -> dict[str, Any]: + """Synthesize final episode and durable record candidates.""" + llm_calls = int(state.get("llm_calls") or 0) + if llm_calls >= max_llm_calls: + raise RuntimeError( + f"BAML extraction exceeded max_llm_calls={max_llm_calls}." + ) + if progress: + print(f" extract synth {llm_calls + 1}/{max_llm_calls}", flush=True) + result, retry_observations, attempts = _call_baml_with_retries( + lambda: baml_runtime.SynthesizeExtractRecords( + run_instruction=run_instruction, + episode_summary=_episode_summary(state), + durable_findings_summary=_durable_findings_summary(state), + existing_record_manifest=existing_record_manifest or "(none)", + ), + stage="synthesize_records", + progress=progress, + ) + payload = _model_payload(result) + durable_count = len(payload.get("durable_records") or []) + return { + "llm_calls": llm_calls + attempts, + "synthesized": result, + "observations": [ + *retry_observations, + { + "action": "synthesize_records", + "ok": True, + "content": f"durable_records={durable_count}", + "args": {}, + "done": False, + "completion_summary": "", + }, + ], + } + + def persist_records(state: WindowExtractGraphState) -> dict[str, Any]: + """Persist synthesized records and finish the graph.""" + observations, done, completion_summary = persist_synthesized_extraction( + state.get("synthesized"), + persistence_context, + ) + if progress: + print(f" extract persist done={done}", flush=True) + return { + "observations": observations, + "done": done, + "completion_summary": completion_summary, + } + + def after_scan(state: WindowExtractGraphState) -> str: + """Continue scanning until all trace lines are covered.""" + next_line = int(state.get("next_line") or 1) + total_lines = int(state.get("trace_total_lines") or 0) + if next_line <= total_lines: + return "read_window" + return "synthesize_records" + + graph = StateGraph(WindowExtractGraphState) + graph.add_node("read_window", read_window) + graph.add_node("scan_window", scan_window) + graph.add_node("synthesize_records", synthesize_records) + graph.add_node("persist_records", persist_records) + graph.add_edge(START, "read_window") + graph.add_edge("read_window", "scan_window") + graph.add_conditional_edges( + "scan_window", + after_scan, + ["read_window", "synthesize_records"], + ) + graph.add_edge("synthesize_records", "persist_records") + graph.add_edge("persist_records", END) + return graph.compile() + + +def _call_baml_with_retries( + call: Callable[[], Any], + *, + stage: str, + progress: bool, +) -> tuple[Any, list[dict[str, Any]], int]: + """Run one BAML call with graph-visible recoverable retries.""" + observations: list[dict[str, Any]] = [] + attempts = 0 + while True: + attempts += 1 + try: + return call(), observations, attempts + except Exception as exc: + if not _is_recoverable_baml_error(exc) or attempts > MAX_BAML_MODEL_RETRIES: + raise + if progress: + print(f" extract retry {stage} attempt={attempts}", flush=True) + observations.append( + { + "action": "model_retry", + "ok": False, + "content": _model_retry_observation(exc), + "args": {"stage": stage, "attempt": attempts}, + "done": False, + "completion_summary": "", + } + ) + + +def _model_payload(value: Any) -> dict[str, Any]: + """Convert generated BAML objects into plain dictionaries.""" + if hasattr(value, "model_dump"): + return _plain_value(value.model_dump(exclude_none=True)) + if isinstance(value, dict): + return _plain_value( + {key: item for key, item in value.items() if item is not None} + ) + if value is None: + return {} + return _plain_value(getattr(value, "__dict__", {})) + + +def _plain_value(value: Any) -> Any: + """Convert enum-ish values recursively into JSON-like values.""" + enum_value = getattr(value, "value", None) + if enum_value is not None: + return enum_value + if isinstance(value, dict): + return {key: _plain_value(item) for key, item in value.items()} + if isinstance(value, list): + return [_plain_value(item) for item in value] + return value + + +def _episode_summary(state: WindowExtractGraphState) -> str: + """Render compact rolling episode summary.""" + updates = [item for item in state.get("episode_updates", []) if item] + return "\n".join(f"- {item}" for item in updates) or "(none yet)" + + +def _findings_summary(state: WindowExtractGraphState) -> str: + """Render all prior findings for the next scan window.""" + return "\n\n".join( + [ + "Durable findings:\n" + _durable_findings_summary(state), + "Implementation/noise findings:\n" + _implementation_summary(state), + ] + ) + + +def _durable_findings_summary(state: WindowExtractGraphState) -> str: + """Render durable findings compactly for BAML prompts.""" + findings = state.get("durable_findings", []) + if not findings: + return "(none)" + return "\n".join(_format_finding(finding) for finding in findings) + + +def _implementation_summary(state: WindowExtractGraphState) -> str: + """Render implementation findings and discarded noise compactly.""" + parts: list[str] = [] + findings = state.get("implementation_findings", []) + if findings: + parts.append("\n".join(_format_finding(finding) for finding in findings)) + noise = state.get("discarded_noise", []) + if noise: + parts.append("Discarded noise:\n" + "\n".join(f"- {item}" for item in noise)) + return "\n".join(parts) if parts else "(none)" + + +def _format_finding(finding: dict[str, Any]) -> str: + """Render one scan finding as one compact bullet.""" + level = str(finding.get("level") or "").strip() + theme = str(finding.get("theme") or "").strip() + note = str(finding.get("note") or "").strip() + line = finding.get("line") + quote = str(finding.get("quote") or "").strip() + prefix = f"- {level}: {theme}" if level or theme else "-" + details = note + if line: + details += f" (line {line})" + if quote: + details += f" Evidence: {quote}" + return f"{prefix}: {details}".strip() + + +def _is_recoverable_baml_error(exc: Exception) -> bool: + """Return whether a BAML model/parsing failure should be retried.""" + return type(exc).__name__ in BAML_RECOVERABLE_ERROR_NAMES + + +def _model_retry_observation(exc: Exception) -> str: + """Render a compact model failure note.""" + message = str(exc).replace("\n", " ")[:1200] + return ( + "The previous BAML model call did not produce a valid next action. " + "Retry and return exactly one JSON object matching the requested schema. " + "Do not include tags, hidden reasoning, markdown, or prose before " + f"the JSON. Error: {type(exc).__name__}: {message}" + ) diff --git a/src/lerim/agents/extract/persistence.py b/src/lerim/agents/extract/persistence.py new file mode 100644 index 0000000..eb475ae --- /dev/null +++ b/src/lerim/agents/extract/persistence.py @@ -0,0 +1,390 @@ +"""Persistence helpers for synthesized extract records.""" + +from __future__ import annotations + +from dataclasses import dataclass +import json +from pathlib import Path +import textwrap +from typing import Any + +from lerim.context import ContextStore, ProjectIdentity +from lerim.context.spec import ( + DURABLE_RECORD_KINDS, + MAX_DURABLE_BODY_CHARS, + MAX_EPISODE_BODY_CHARS, + MAX_EPISODE_OUTCOMES_CHARS, + MAX_EPISODE_USER_INTENT_CHARS, + MAX_EPISODE_WHAT_HAPPENED_CHARS, + MAX_RECORD_TITLE_CHARS, + normalize_record_kind, + normalize_record_status, +) + + +@dataclass(frozen=True) +class PersistenceContext: + """Context needed to write extracted records.""" + + context_db_path: Path + project_identity: ProjectIdentity + session_id: str + trace_path: Path + session_started_at: str + model_name: str + + +@dataclass(frozen=True) +class PersistenceObservation: + """Observed result after one persistence action.""" + + action: str + ok: bool + content: str + args: dict[str, Any] + done: bool = False + completion_summary: str = "" + + +def prepare_context_store(ctx: PersistenceContext) -> None: + """Initialize store provenance for the extract run.""" + store = ContextStore(ctx.context_db_path) + store.initialize() + store.register_project(ctx.project_identity) + store.upsert_session( + project_id=ctx.project_identity.project_id, + session_id=ctx.session_id, + agent_type="baml-langgraph-extract", + source_trace_ref=str(ctx.trace_path), + repo_path=str(ctx.project_identity.repo_path), + cwd=str(ctx.project_identity.repo_path), + started_at=ctx.session_started_at, + model_name=ctx.model_name, + instructions_text=None, + prompt_text=None, + metadata={}, + ) + + +def format_existing_record_manifest( + *, + context_db_path: Path, + project_identity: ProjectIdentity, + limit: int = 5, +) -> str: + """Build a compact manifest of recent active durable records.""" + store = ContextStore(context_db_path) + store.initialize() + store.register_project(project_identity) + rows = store.query( + entity="records", + mode="list", + project_ids=[project_identity.project_id], + status="active", + order_by="updated_at", + limit=max(1, limit * 2), + include_total=False, + )["rows"] + durable_rows = [ + row for row in rows if str(row.get("kind") or "") != "episode" + ][:limit] + if not durable_rows: + return "" + + lines = ["Relevant existing durable records:"] + for row in durable_rows: + record_id = str(row.get("record_id") or "") + kind = str(row.get("kind") or "") + title = _shorten(str(row.get("title") or "")) + body = _shorten(str(row.get("body") or "")) + lines.append(f"- {record_id} | {kind} | {title} | {body}") + return "\n".join(lines) + + +def count_current_session_episodes(ctx: PersistenceContext) -> int: + """Count current-session episode records in the canonical context store.""" + store = ContextStore(ctx.context_db_path) + store.initialize() + store.register_project(ctx.project_identity) + rows = store.query( + entity="records", + mode="count", + project_ids=[ctx.project_identity.project_id], + kind="episode", + source_session_id=ctx.session_id, + include_archived=True, + ) + return int(rows.get("count") or 0) + + +def persist_synthesized_extraction( + synthesized: Any, + ctx: PersistenceContext, +) -> tuple[list[dict[str, Any]], bool, str]: + """Persist synthesized episode and durable records through ContextStore.""" + payload = _model_payload(synthesized) + completion_summary = str(payload.get("completion_summary") or "").strip() + durable_records = [ + record + for record in ( + _prepare_durable_record(item) + for item in payload.get("durable_records") or [] + ) + if record is not None + ] + episode = _prepare_episode( + payload.get("episode") or {}, + completion_summary, + has_durable_records=bool(durable_records), + ) + + observations: list[dict[str, Any]] = [] + store = ContextStore(ctx.context_db_path) + store.initialize() + store.register_project(ctx.project_identity) + for index, record in enumerate([episode, *durable_records]): + try: + result = store.create_record( + project_id=ctx.project_identity.project_id, + session_id=ctx.session_id, + change_reason="baml_extract", + created_at=ctx.session_started_at or None, + **record, + ) + observation = PersistenceObservation( + action="save_context", + ok=True, + content=json.dumps( + {"ok": True, "result": result}, + ensure_ascii=True, + indent=2, + ), + args=record, + ) + except Exception as exc: + observation = PersistenceObservation( + action="save_context", + ok=False, + content=f"Record write failed: {type(exc).__name__}: {exc}", + args=record, + ) + observations.append(observation_to_state(observation)) + if index == 0 and not observation.ok: + break + + episode_count = count_current_session_episodes(ctx) + done = episode_count == 1 + if not completion_summary: + completion_summary = "Extraction completed." + final_observation = PersistenceObservation( + action="final_result", + ok=done, + content=( + completion_summary + if done + else f"final_result refused: expected exactly one episode record, found {episode_count}." + ), + args={}, + done=done, + completion_summary=completion_summary if done else "", + ) + observations.append(observation_to_state(final_observation)) + return observations, done, completion_summary if done else "" + + +def observation_to_state(observation: PersistenceObservation) -> dict[str, Any]: + """Convert a persistence observation into serializable graph state.""" + return { + "action": observation.action, + "ok": observation.ok, + "content": observation.content, + "args": observation.args, + "done": observation.done, + "completion_summary": observation.completion_summary, + } + + +def _prepare_episode( + value: Any, + completion_summary: str, + *, + has_durable_records: bool, +) -> dict[str, Any]: + """Normalize a synthesized episode draft into a canonical record payload.""" + episode = _model_payload(value) + status = _status_value(episode.get("status")) + if not status: + status = "active" if has_durable_records else "archived" + if not str(episode.get("title") or "").strip(): + episode["title"] = _episode_title_from_payload(episode, completion_summary) + if not str(episode.get("user_intent") or "").strip(): + episode["user_intent"] = "Extract context from the source trace." + if not str(episode.get("what_happened") or "").strip(): + fallback = ( + str(episode.get("body") or "").strip() + or completion_summary + or "The trace was scanned and summarized for context extraction." + ) + episode["what_happened"] = fallback + if not str(episode.get("body") or "").strip(): + episode["body"] = _episode_body_from_structured_fields(episode) + return { + "kind": "episode", + "title": _compact_text(episode.get("title"), MAX_RECORD_TITLE_CHARS), + "body": _compact_text(episode.get("body"), MAX_EPISODE_BODY_CHARS), + "status": status, + "valid_from": _empty_to_none(episode.get("valid_from")), + "valid_until": _empty_to_none(episode.get("valid_until")), + "decision": None, + "why": None, + "alternatives": None, + "consequences": None, + "user_intent": _compact_text( + episode.get("user_intent"), + MAX_EPISODE_USER_INTENT_CHARS, + ), + "what_happened": _compact_text( + episode.get("what_happened"), + MAX_EPISODE_WHAT_HAPPENED_CHARS, + ), + "outcomes": _compact_optional_text( + episode.get("outcomes"), + MAX_EPISODE_OUTCOMES_CHARS, + ), + } + + +def _prepare_durable_record(value: Any) -> dict[str, Any] | None: + """Normalize one durable draft into a canonical record payload.""" + record = _model_payload(value) + kind = normalize_record_kind(_enum_text(record.get("kind"))) + if kind not in DURABLE_RECORD_KINDS: + return None + title = _compact_text(record.get("title"), MAX_RECORD_TITLE_CHARS) + body = _compact_text(record.get("body"), MAX_DURABLE_BODY_CHARS) + if not title or not body: + return None + decision = _empty_to_none(record.get("decision")) + why = _empty_to_none(record.get("why")) + if kind == "decision" and (not decision or not why): + kind = "fact" + decision = None + why = None + return { + "kind": kind, + "title": title, + "body": body, + "status": _status_value(record.get("status")) or "active", + "valid_from": _empty_to_none(record.get("valid_from")), + "valid_until": _empty_to_none(record.get("valid_until")), + "decision": decision if kind == "decision" else None, + "why": why if kind == "decision" else None, + "alternatives": _empty_to_none(record.get("alternatives")) + if kind == "decision" + else None, + "consequences": _empty_to_none(record.get("consequences")) + if kind == "decision" + else None, + "user_intent": None, + "what_happened": None, + "outcomes": None, + } + + +def _model_payload(value: Any) -> dict[str, Any]: + """Return a plain dict from a generated BAML/Pydantic-ish object.""" + if hasattr(value, "model_dump"): + return _coerce_value(value.model_dump(exclude_none=True)) + if isinstance(value, dict): + return _coerce_value( + {key: item for key, item in value.items() if item is not None} + ) + if value is None: + return {} + return _coerce_value( + json.loads(json.dumps(value, default=lambda item: item.__dict__)) + ) + + +def _coerce_value(value: Any) -> Any: + """Convert generated BAML enum values into plain JSON-like values.""" + enum_value = getattr(value, "value", None) + if enum_value is not None: + return enum_value + if isinstance(value, dict): + return {key: _coerce_value(item) for key, item in value.items()} + if isinstance(value, list): + return [_coerce_value(item) for item in value] + return value + + +def _status_value(value: Any) -> str: + """Return a canonical record status or an empty string.""" + text = normalize_record_status(_enum_text(value), default="") + return text if text in {"active", "archived"} else "" + + +def _enum_text(value: Any) -> str: + """Convert BAML enum text into lowercase alias text.""" + return str(value or "").strip().lower() + + +def _episode_body_from_structured_fields(episode: dict[str, Any]) -> str: + """Build an episode body when synthesis provided structured fields only.""" + user_intent = str(episode.get("user_intent") or "").strip() + what_happened = str(episode.get("what_happened") or "").strip() + outcomes = str(episode.get("outcomes") or "").strip() + parts = [] + if user_intent: + parts.append(f"User intent: {user_intent}") + if what_happened: + parts.append(f"What happened: {what_happened}") + if outcomes: + parts.append(f"Outcome: {outcomes}") + return " ".join(parts) or "The session was scanned and summarized for context extraction." + + +def _episode_title_from_payload(episode: dict[str, Any], completion_summary: str) -> str: + """Derive a compact episode title from available episode text.""" + candidates = [ + episode.get("user_intent"), + episode.get("what_happened"), + episode.get("outcomes"), + completion_summary, + episode.get("body"), + ] + for candidate in candidates: + text = str(candidate or "").strip() + if text: + return text[:80].rstrip(" .") or "Extracted session" + return "Extracted session" + + +def _compact_text(value: Any, max_chars: int) -> str: + """Return non-empty text that fits the canonical record field budget.""" + text = " ".join(str(value or "").split()) + if not text: + return "" + if len(text) <= max_chars: + return text + return textwrap.shorten(text, width=max_chars, placeholder="...") + + +def _compact_optional_text(value: Any, max_chars: int) -> str | None: + """Return optional compact text, preserving None for empty values.""" + text = _compact_text(value, max_chars) + return text or None + + +def _empty_to_none(value: Any) -> str | None: + """Convert blank values to None.""" + text = " ".join(str(value or "").split()) + return text or None + + +def _shorten(text: str, max_chars: int = 140) -> str: + """Shorten one manifest field.""" + value = " ".join((text or "").split()) + if len(value) <= max_chars: + return value + return value[: max_chars - 3].rstrip() + "..." diff --git a/src/lerim/agents/extract/state.py b/src/lerim/agents/extract/state.py new file mode 100644 index 0000000..a0858dd --- /dev/null +++ b/src/lerim/agents/extract/state.py @@ -0,0 +1,24 @@ +"""LangGraph state for BAML extraction.""" + +from __future__ import annotations + +import operator +from typing import Annotated, Any +from typing_extensions import TypedDict + + +class WindowExtractGraphState(TypedDict, total=False): + """State for the windowed BAML extraction pipeline.""" + + observations: Annotated[list[dict[str, Any]], operator.add] + llm_calls: int + next_line: int + trace_total_lines: int + current_window: dict[str, Any] + episode_updates: Annotated[list[str], operator.add] + durable_findings: Annotated[list[dict[str, Any]], operator.add] + implementation_findings: Annotated[list[dict[str, Any]], operator.add] + discarded_noise: Annotated[list[str], operator.add] + synthesized: Any + done: bool + completion_summary: str diff --git a/src/lerim/agents/extract/types.py b/src/lerim/agents/extract/types.py new file mode 100644 index 0000000..d2654ef --- /dev/null +++ b/src/lerim/agents/extract/types.py @@ -0,0 +1,37 @@ +"""Public types returned by the extract flow.""" + +from __future__ import annotations + +from typing import Any + +from pydantic import BaseModel, Field + + +class ExtractionEvent(BaseModel): + """One structured event emitted by the extract graph.""" + + action: str + ok: bool = True + content: str = "" + args: dict[str, Any] = Field(default_factory=dict) + done: bool = False + completion_summary: str = "" + + +class ExtractionResult(BaseModel): + """Structured output for the extract flow.""" + + completion_summary: str = Field(description="Short plain-text completion summary") + + +class ExtractionRunDetails(BaseModel): + """Structured trace for one extract run.""" + + events: list[ExtractionEvent] = Field(default_factory=list) + llm_calls: int = 0 + done: bool = False + context_db_path: str + project_id: str + session_id: str + model_name: str + trace_total_lines: int = 0 diff --git a/src/lerim/agents/extract/windowing.py b/src/lerim/agents/extract/windowing.py new file mode 100644 index 0000000..7e8be36 --- /dev/null +++ b/src/lerim/agents/extract/windowing.py @@ -0,0 +1,122 @@ +"""Deterministic trace windowing for the extract graph.""" + +from __future__ import annotations + +import math +from pathlib import Path +from typing import Any + +from lerim.agents.extract.state import WindowExtractGraphState + +TRACE_MAX_LINE_BYTES = 5_000 +TRACE_MAX_CHUNK_BYTES = 50_000 +MODEL_CONTEXT_TOKEN_LIMIT = 200_000 +CONTEXT_SOFT_PRESSURE_PCT = 0.60 +WINDOW_RESERVE_TOKENS = 30_000 +MIN_WINDOW_CHARS = 20_000 +MAX_WINDOW_CHARS = TRACE_MAX_CHUNK_BYTES +_TOKENS_PER_CHAR = 0.25 + + +def trace_line_count(trace_path: Path) -> int: + """Return the number of lines in a trace file.""" + try: + return sum(1 for _ in trace_path.open("r", encoding="utf-8")) + except OSError: + return 0 + + +def compute_request_budget(trace_path: Path) -> int: + """Scale extract request budget from trace size.""" + try: + line_count = 0 + estimated_bytes = 0 + with trace_path.open("r", encoding="utf-8") as handle: + for line in handle: + line_count += 1 + estimated_bytes += min( + len(line.rstrip("\n").encode("utf-8")), + TRACE_MAX_LINE_BYTES, + ) + except OSError: + return 50 + line_limited_calls = max(1, math.ceil(estimated_bytes / MAX_WINDOW_CHARS)) + if line_count == 0: + return 50 + overhead = 80 + return max(50, line_limited_calls + overhead) + + +def window_char_budget( + *, + state: WindowExtractGraphState, + run_instruction: str, + existing_record_manifest: str, + episode_summary: str, + durable_findings_summary: str, + implementation_summary: str, +) -> int: + """Compute how much raw trace text can fit in the next scan window.""" + soft_tokens = int(MODEL_CONTEXT_TOKEN_LIMIT * CONTEXT_SOFT_PRESSURE_PCT) + state_text = "\n".join( + [ + run_instruction, + existing_record_manifest, + episode_summary, + durable_findings_summary, + implementation_summary, + ] + ) + state_tokens = math.ceil(len(state_text) * _TOKENS_PER_CHAR) + available_tokens = max( + MIN_WINDOW_CHARS * _TOKENS_PER_CHAR, + soft_tokens - WINDOW_RESERVE_TOKENS - state_tokens, + ) + return min( + MAX_WINDOW_CHARS, + max(MIN_WINDOW_CHARS, int(available_tokens / _TOKENS_PER_CHAR)), + ) + + +def read_trace_window( + *, + trace_path: Path, + start_line: int, + total_lines: int, + char_budget: int, +) -> dict[str, Any]: + """Read as many complete trace lines as fit in the character budget.""" + numbered: list[str] = [] + current_chars = 0 + end_line = start_line - 1 + with trace_path.open("r", encoding="utf-8") as handle: + for line_number, raw_line in enumerate(handle, start=1): + if line_number < start_line: + continue + line = raw_line.rstrip("\n") + if len(line) > TRACE_MAX_LINE_BYTES: + dropped = len(line) - TRACE_MAX_LINE_BYTES + line = ( + line[:TRACE_MAX_LINE_BYTES] + + f" ... [truncated {dropped} chars from this line]" + ) + rendered = f"{line_number}\t{line}" + if numbered and current_chars + len(rendered) + 1 > char_budget: + break + numbered.append(rendered) + current_chars += len(rendered) + 1 + end_line = line_number + if current_chars >= char_budget: + break + if not numbered and start_line <= total_lines: + numbered.append(f"{start_line}\t") + end_line = start_line + header = f"[{total_lines} lines, window {start_line}-{end_line}]" + if end_line < total_lines: + header += f" - next window starts at line {end_line + 1}" + return { + "start_line": start_line, + "end_line": end_line, + "header": header, + "text": header + "\n" + "\n".join(numbered), + } diff --git a/src/lerim/agents/history_processors.py b/src/lerim/agents/history_processors.py deleted file mode 100644 index 555c193..0000000 --- a/src/lerim/agents/history_processors.py +++ /dev/null @@ -1,148 +0,0 @@ -"""History processors for extract-agent orchestration. - -These are not tools. They deterministically rewrite or augment the message -history between model turns so the extractor can stay within context limits -while preserving its own intermediate notes. -""" - -from __future__ import annotations - -import json -import math -from collections import Counter -from dataclasses import replace - -from pydantic_ai import RunContext -from pydantic_ai.messages import ( - ModelMessage, - ModelRequest, - SystemPromptPart, - ToolCallPart, - ToolReturnPart, -) - -from lerim.agents.tools import ( - MODEL_CONTEXT_TOKEN_LIMIT, - _TOKENS_PER_CHAR, - _classify_context_pressure, - _first_uncovered_offset, - ContextDeps, -) -from lerim.context.spec import DURABLE_FINDING_LEVELS, IMPLEMENTATION_FINDING_LEVELS - -PRUNED_STUB = "[pruned]" - - -def notes_state_injector( - ctx: RunContext[ContextDeps], - history: list[ModelMessage], -) -> list[ModelMessage]: - """Inject a compact notes dashboard into the next model request.""" - findings = ctx.deps.notes - if not findings: - summary = "NOTES: 0 findings" - if ctx.deps.findings_checked: - summary += " (checkpoint recorded)" - else: - counts = Counter(f.level for f in findings) - durable_findings = [f for f in findings if f.level in DURABLE_FINDING_LEVELS] - theme_source = durable_findings or findings - themes = Counter(f.theme for f in theme_source) - durable = sum(counts.get(level, 0) for level in DURABLE_FINDING_LEVELS) - implementation = sum( - counts.get(level, 0) for level in IMPLEMENTATION_FINDING_LEVELS - ) - top_themes = ", ".join( - f"{theme}({count})" for theme, count in themes.most_common(5) - ) - summary = ( - f"NOTES: {len(findings)} findings ({durable} durable, {implementation} implementation) " - f"across {len(themes)} theme(s)" - ) - if top_themes: - summary += f"\nTop themes: {top_themes}" - if ctx.deps.read_ranges: - next_uncovered = _first_uncovered_offset( - ctx.deps.read_ranges, int(ctx.deps.trace_total_lines) - ) - covered_chunks = len( - {(int(start), int(end)) for start, end in ctx.deps.read_ranges} - ) - summary += ( - f"\nTrace reads: {covered_chunks} chunk(s)" - f"\nNext unread offset: {next_uncovered if next_uncovered is not None else 'none'}" - f"\nPruned offsets: {sorted(ctx.deps.pruned_offsets) if ctx.deps.pruned_offsets else 'none'}" - ) - injected = list(history) - injected.append(ModelRequest(parts=[SystemPromptPart(content=summary)])) - return injected - - -def context_pressure_injector( - ctx: RunContext[ContextDeps], - history: list[ModelMessage], -) -> list[ModelMessage]: - """Inject approximate context-pressure information into the next model request.""" - chars = 0 - for message in history: - parts = getattr(message, "parts", []) or [] - for part in parts: - content = getattr(part, "content", None) - if isinstance(content, str): - chars += len(content) - elif content is not None: - chars += len(json.dumps(content, ensure_ascii=True)) - approx_tokens = math.ceil(chars * _TOKENS_PER_CHAR) - pct = approx_tokens / MODEL_CONTEXT_TOKEN_LIMIT - pressure = _classify_context_pressure(pct) - ctx.deps.last_context_tokens = approx_tokens - ctx.deps.last_context_fill_ratio = pct - summary = ( - f"CONTEXT: {approx_tokens}/{MODEL_CONTEXT_TOKEN_LIMIT} ({pct:.0%}) [{pressure}]" - ) - injected = list(history) - injected.append(ModelRequest(parts=[SystemPromptPart(content=summary)])) - return injected - - -def prune_history_processor( - ctx: RunContext[ContextDeps], - history: list[ModelMessage], -) -> list[ModelMessage]: - """Rewrite prior read_trace results to tiny stubs for pruned offsets.""" - if not ctx.deps.pruned_offsets: - return history - pruned = set(ctx.deps.pruned_offsets) - rewritten: list[ModelMessage] = [] - pending_offset: int | None = None - for message in history: - parts = getattr(message, "parts", []) or [] - new_parts = [] - for part in parts: - if ( - isinstance(part, ToolCallPart) - and getattr(part, "tool_name", "") == "read_trace" - ): - args = getattr(part, "args", None) - offset = None - if isinstance(args, dict): - try: - offset = max(0, int(args.get("start_line", 1)) - 1) - except Exception: - offset = 0 - pending_offset = offset - new_parts.append(part) - continue - if ( - isinstance(part, ToolReturnPart) - and pending_offset in pruned - and isinstance(part.content, str) - ): - new_parts.append(replace(part, content=PRUNED_STUB)) - pending_offset = None - continue - new_parts.append(part) - if isinstance(part, ToolReturnPart): - pending_offset = None - rewritten.append(replace(message, parts=new_parts)) - return rewritten diff --git a/src/lerim/agents/tools.py b/src/lerim/agents/tools.py index bb9b56f..6cf7d7a 100644 --- a/src/lerim/agents/tools.py +++ b/src/lerim/agents/tools.py @@ -3,68 +3,25 @@ from __future__ import annotations import json -import math from dataclasses import dataclass, field from pathlib import Path from typing import Any, Literal -from pydantic import BaseModel, ConfigDict, Field, field_validator -from pydantic import ValidationError from pydantic_ai import ModelRetry, RunContext from lerim.context import ContextStore, ProjectIdentity from lerim.context.spec import ( - ALLOWED_FINDING_LEVELS, ALLOWED_KINDS, ALLOWED_STATUSES, RECORD_TYPED_FIELDS, - format_allowed_finding_levels, - normalize_finding_level, normalize_record_kind, normalize_record_status, record_validation_message, ) -TRACE_MAX_LINES_PER_READ = 100 -TRACE_MAX_LINE_BYTES = 5_000 -TRACE_MAX_CHUNK_BYTES = 50_000 -MODEL_CONTEXT_TOKEN_LIMIT = 200_000 -CONTEXT_SOFT_PRESSURE_PCT = 0.60 -CONTEXT_HARD_PRESSURE_PCT = 0.80 -_TOKENS_PER_CHAR = 0.25 - DetailLevel = Literal["concise", "detailed"] -class TraceFinding(BaseModel): - """Structured extract finding captured during trace scanning.""" - - model_config = ConfigDict(extra="forbid") - - theme: str = Field(description="Short theme label for the finding.") - line: int = Field(ge=1, description="1-based trace line with supporting evidence.") - quote: str = Field(description="Short verbatim evidence snippet from the trace.") - level: str = Field( - description=( - "Signal level: use durable levels only for reusable project context. " - "Use `implementation` for dead ends, discarded hypotheses, trace-local noise, " - "and supporting evidence that should not become its own durable theme. " - "Allowed levels: " - f"{format_allowed_finding_levels()}." - ) - ) - - @field_validator("level") - @classmethod - def validate_level(cls, value: str) -> str: - """Validate finding levels against the shared canonical spec.""" - normalized = normalize_finding_level(value) - if normalized not in ALLOWED_FINDING_LEVELS: - allowed = ", ".join(ALLOWED_FINDING_LEVELS) - raise ValueError(f"level must be one of: {allowed}") - return normalized - - @dataclass class ContextDeps: """Dependencies and per-run state shared across tool calls.""" @@ -73,16 +30,7 @@ class ContextDeps: project_identity: ProjectIdentity session_id: str project_ids: list[str] | None = None - trace_path: Path | None = None - session_started_at: str = "" - trace_total_lines: int = 0 - read_ranges: list[tuple[int, int]] = field(default_factory=list) - notes: list[TraceFinding] = field(default_factory=list) - findings_checked: bool = False - pruned_offsets: set[int] = field(default_factory=set) fetched_context_record_ids: set[str] = field(default_factory=set) - last_context_tokens: int = 0 - last_context_fill_ratio: float = 0.0 def _store(ctx: RunContext[ContextDeps]) -> ContextStore: @@ -93,143 +41,6 @@ def _store(ctx: RunContext[ContextDeps]) -> ContextStore: return store -def _source_session_started_at( - ctx: RunContext[ContextDeps], store: ContextStore -) -> str: - """Return the source session start timestamp for record provenance.""" - explicit = str(ctx.deps.session_started_at or "").strip() - if explicit: - return explicit - session_id = str(ctx.deps.session_id or "").strip() - if not session_id: - return "" - with store.connect() as conn: - row = conn.execute( - """ - SELECT started_at - FROM sessions - WHERE session_id = ? AND project_id = ? - """, - (session_id, ctx.deps.project_identity.project_id), - ).fetchone() - if row is None: - return "" - return str(row["started_at"] or "").strip() - - -def _trace_lines(trace_path: Path) -> list[str]: - """Read the current trace file into a list of lines.""" - return trace_path.read_text(encoding="utf-8").splitlines() - - -def _read_offsets(ctx: RunContext[ContextDeps]) -> list[int]: - """Return unique trace-read offsets in order.""" - return sorted({int(start) for start, _end in ctx.deps.read_ranges}) - - -def _older_read_offsets(ctx: RunContext[ContextDeps]) -> list[int]: - """Return older read offsets, keeping the newest chunk in context.""" - offsets = _read_offsets(ctx) - if len(offsets) <= 1: - return [] - return offsets[:-1] - - -def _classify_context_pressure(fill_ratio: float) -> str: - """Convert current fill ratio into a user-facing pressure label.""" - if fill_ratio >= CONTEXT_HARD_PRESSURE_PCT: - return "hard" - if fill_ratio >= CONTEXT_SOFT_PRESSURE_PCT: - return "soft" - return "normal" - - -def _auto_prune_before_trace_read( - ctx: RunContext[ContextDeps], offset: int -) -> list[int]: - """Prune old trace reads under context pressure before returning more trace.""" - if offset <= 0: - return [] - fill_ratio = float(ctx.deps.last_context_fill_ratio or 0.0) - if fill_ratio < CONTEXT_SOFT_PRESSURE_PCT: - return [] - older_offsets = _older_read_offsets(ctx) - if not older_offsets: - return [] - before = set(ctx.deps.pruned_offsets) - ctx.deps.pruned_offsets.update(older_offsets) - return sorted(ctx.deps.pruned_offsets - before) - - -def read_trace( - ctx: RunContext[ContextDeps], start_line: int = 1, line_count: int = 100 -) -> str: - """Read the next numbered trace chunk from the source session. - - Args: - start_line: 1-based first line to read. After scanning starts, - overlapping or out-of-order values advance to the first unread line. - line_count: Maximum lines to return, capped by Lerim. - """ - trace_path = ctx.deps.trace_path - if trace_path is None: - return "Error: no trace path configured" - lines = _trace_lines(trace_path) - total = len(lines) - ctx.deps.trace_total_lines = total - offset = max(0, int(start_line) - 1) - adjusted_from: int | None = None - next_unread = _first_uncovered_offset(ctx.deps.read_ranges, total) - if next_unread is None and ctx.deps.read_ranges: - return ( - f"[{total} lines, trace coverage complete] " - "All trace lines have already been read. Save the episode and any durable records now." - ) - if next_unread is not None and ctx.deps.read_ranges and offset != next_unread: - adjusted_from = offset - offset = next_unread - if offset >= total and total > 0: - raise ModelRetry( - f"read_trace start_line {start_line} is past the end of the trace. " - f"Use a start_line from 1 to {max(1, total)}." - ) - if line_count <= 0 or line_count > TRACE_MAX_LINES_PER_READ: - line_count = TRACE_MAX_LINES_PER_READ - auto_pruned = _auto_prune_before_trace_read(ctx, offset) - chunk = lines[offset : offset + line_count] - safe_chunk: list[str] = [] - running_bytes = 0 - for line in chunk: - if len(line) > TRACE_MAX_LINE_BYTES: - dropped = len(line) - TRACE_MAX_LINE_BYTES - line = ( - line[:TRACE_MAX_LINE_BYTES] - + f" ... [truncated {dropped} chars from this line]" - ) - line_bytes = len(line.encode("utf-8")) - if running_bytes + line_bytes > TRACE_MAX_CHUNK_BYTES: - break - safe_chunk.append(line) - running_bytes += line_bytes - numbered = [ - f"{offset + index + 1}\t{line}" for index, line in enumerate(safe_chunk) - ] - last_line = offset + len(safe_chunk) - ctx.deps.read_ranges.append((int(offset), int(last_line))) - header = f"[{total} lines, showing {offset + 1}-{last_line}]" - if adjusted_from is not None: - header += f" [advanced from requested line {adjusted_from + 1} to first unread line {offset + 1}]" - if auto_pruned: - pruned_lines = ", ".join(str(item + 1) for item in auto_pruned) - header += f" [auto-pruned older read_trace start lines: {pruned_lines}]" - if last_line < total: - header += ( - f" — {total - last_line} more lines, call " - f"read_trace(start_line={last_line + 1}, line_count={TRACE_MAX_LINES_PER_READ}) for the next chunk" - ) - return header + "\n" + "\n".join(numbered) - - def search_context( ctx: RunContext[ContextDeps], query: str, @@ -675,155 +486,6 @@ def _maybe_raise_record_retry(exc: ValueError) -> None: ) from exc -def _trace_line_count(ctx: RunContext[ContextDeps]) -> int: - """Return and cache the current trace line count.""" - trace_path = ctx.deps.trace_path - if trace_path is None: - return 0 - total_lines = int(ctx.deps.trace_total_lines) - if total_lines > 0: - return total_lines - try: - total_lines = sum(1 for _ in trace_path.open("r", encoding="utf-8")) - except OSError: - return 0 - ctx.deps.trace_total_lines = total_lines - return total_lines - - -def _require_trace_ready_for_write( - ctx: RunContext[ContextDeps], changes: dict[str, Any] | None = None -) -> None: - """Require trace coverage and note discipline before extract writes.""" - trace_path = ctx.deps.trace_path - if trace_path is None: - return - total_lines = _trace_line_count(ctx) - if total_lines <= 0: - return - if not ctx.deps.read_ranges: - raise ModelRetry( - "No trace lines have been read yet. " - f"Call read_trace(start_line=1, line_count={TRACE_MAX_LINES_PER_READ}) " - "before you create or update records." - ) - next_offset = _first_uncovered_offset(ctx.deps.read_ranges, total_lines) - if next_offset is not None: - raise ModelRetry( - "Unread trace lines remain. " - f"Continue reading with read_trace(start_line={next_offset + 1}, line_count={TRACE_MAX_LINES_PER_READ}) " - "before you create or update records." - ) - is_archived_episode = ( - changes is not None - and changes.get("kind") == "episode" - and changes.get("status") == "archived" - ) - if ( - total_lines > TRACE_MAX_LINES_PER_READ - and not ctx.deps.notes - and not ctx.deps.findings_checked - and not is_archived_episode - ): - raise ModelRetry( - "This trace is longer than one read_trace chunk. " - "Call note_trace_findings once for each strongest durable or implementation finding, " - "or call it with no arguments if the full trace has no reusable signal, then create or update records." - ) - - -def _first_uncovered_offset( - read_ranges: list[tuple[int, int]], total_lines: int -) -> int | None: - """Return the first unread trace offset, or None when coverage is complete.""" - if total_lines <= 0: - return None - merged: list[tuple[int, int]] = [] - for start, end in sorted(read_ranges): - start = max(0, int(start)) - end = max(start, int(end)) - if not merged or start > merged[-1][1]: - merged.append((start, end)) - continue - merged[-1] = (merged[-1][0], max(merged[-1][1], end)) - expected = 0 - for start, end in merged: - if start > expected: - return expected - expected = max(expected, end) - if expected >= total_lines: - return None - if expected < total_lines: - return expected - return None - - -def save_context( - ctx: RunContext[ContextDeps], - kind: str, - title: str, - body: str, - status: str = "active", - valid_from: str | None = None, - valid_until: str | None = None, - decision: str | None = None, - why: str | None = None, - alternatives: str | None = None, - consequences: str | None = None, - user_intent: str | None = None, - what_happened: str | None = None, - outcomes: str | None = None, -) -> str: - """Save one context record. - - For kind="episode", provide both user_intent and what_happened. - For kind="decision", provide both decision and why. - """ - changes = _context_changes( - kind=kind, - title=title, - body=body, - status=status, - valid_from=valid_from, - valid_until=valid_until, - decision=decision, - why=why, - alternatives=alternatives, - consequences=consequences, - user_intent=user_intent, - what_happened=what_happened, - outcomes=outcomes, - ) - _require_trace_ready_for_write(ctx, changes) - store = _store(ctx) - project_id = ctx.deps.project_identity.project_id - session_id = ctx.deps.session_id - source_started_at = _source_session_started_at(ctx, store) - try: - result = store.create_record( - project_id=project_id, - session_id=session_id, - kind=changes["kind"], - title=changes["title"], - body=changes["body"], - status=changes["status"], - created_at=source_started_at or None, - valid_from=changes["valid_from"], - valid_until=changes["valid_until"], - decision=changes["decision"], - why=changes["why"], - alternatives=changes["alternatives"], - consequences=changes["consequences"], - user_intent=changes["user_intent"], - what_happened=changes["what_happened"], - outcomes=changes["outcomes"], - ) - except ValueError as exc: - _maybe_raise_record_retry(exc) - raise - return json.dumps({"ok": True, "result": result}, ensure_ascii=True, indent=2) - - def revise_context( ctx: RunContext[ContextDeps], record_id: str, @@ -887,7 +549,6 @@ def revise_context( what_happened=what_happened, outcomes=outcomes, ) - _require_trace_ready_for_write(ctx, changes) if changes["kind"] != existing["kind"]: raise ModelRetry( "revise_context cannot change a record's kind. " @@ -986,102 +647,7 @@ def supersede_context( return json.dumps({"ok": True, "result": result}, ensure_ascii=True, indent=2) -def note_trace_findings( - ctx: RunContext[ContextDeps], - theme: str = "", - line: int | str = 0, - quote: str = "", - level: str = "implementation", -) -> str: - """Record one trace finding with line evidence, or call with no args for none.""" - if not str(theme or "").strip() and not str(quote or "").strip() and not line: - ctx.deps.findings_checked = True - return "No findings recorded; trace findings checkpoint saved." - try: - line_number = int(_clean_scalar(line) or 0) - except (TypeError, ValueError) as exc: - raise ModelRetry("Finding line must be a 1-based trace line number.") from exc - try: - finding = TraceFinding( - theme=str(theme or "").strip(), - line=line_number, - quote=str(quote or "").strip(), - level=str(_clean_scalar(level) or "").strip(), - ) - except ValidationError as exc: - raise ModelRetry( - "Finding must include a valid 1-based line and level. " - f"Allowed levels: {format_allowed_finding_levels()}." - ) from exc - ctx.deps.notes.append(finding) - ctx.deps.findings_checked = True - total = len(ctx.deps.notes) - return f"Noted 1 finding (total {total} so far)." - - -def prune_trace_reads(ctx: RunContext[ContextDeps], start_lines: list[int]) -> str: - """Prune earlier read_trace results after findings are noted. - - Args: - start_lines: 1-based start lines from earlier read_trace calls. - """ - if not start_lines: - return "No trace reads to prune." - read_offsets = set(_read_offsets(ctx)) - requested = {max(0, int(line) - 1) for line in start_lines} - unknown_offsets = sorted(requested - read_offsets) - if unknown_offsets: - known = ", ".join(str(offset + 1) for offset in sorted(read_offsets)) or "none" - bad = ", ".join(str(offset + 1) for offset in unknown_offsets) - raise ModelRetry( - f"Cannot prune unread trace start line(s): {bad}. " - f"Only previously read start lines can be pruned; read start lines: {known}." - ) - before = len(ctx.deps.pruned_offsets) - ctx.deps.pruned_offsets.update(requested) - added = len(ctx.deps.pruned_offsets) - before - return f"Pruned {added} new trace read(s); total pruned: {len(ctx.deps.pruned_offsets)}." - - -def compute_request_budget(trace_path: Path) -> int: - """Scale extract request budget from trace size. - - Budget from the actual number of trace reads plus room for notes, pruning, - writes, final validation, and retries. Long traces are expensive in tool - calls; under-budgeting them turns otherwise recoverable sessions into - request-limit failures. - """ - try: - line_count = 0 - estimated_bytes = 0 - with trace_path.open("r", encoding="utf-8") as fh: - for line in fh: - line_count += 1 - estimated_bytes += min( - len(line.rstrip("\n").encode("utf-8")), - TRACE_MAX_LINE_BYTES, - ) - except OSError: - return 50 - read_calls = max(1, math.ceil(line_count / TRACE_MAX_LINES_PER_READ)) - byte_limited_calls = max(1, math.ceil(estimated_bytes / TRACE_MAX_CHUNK_BYTES)) - read_calls = max(read_calls, byte_limited_calls) - if read_calls == 1: - return 50 - prune_cycles = max(0, read_calls - 1) - overhead = 80 - return max(50, read_calls + prune_cycles + overhead) - - if __name__ == "__main__": - """Run a small smoke check for request budget logic.""" - import tempfile - - with tempfile.TemporaryDirectory() as tmp: - trace_path = Path(tmp) / "trace.jsonl" - trace_path.write_text( - "\n".join(f"line {i}" for i in range(240)), encoding="utf-8" - ) - budget = compute_request_budget(trace_path) - assert budget >= 20 - print("agent tools: self-test passed") + """Run a small smoke check for context-tool helpers.""" + assert _normalize_kind("FACT") == "fact" + print("agent tools: self-test passed") diff --git a/src/lerim/agents/toolsets.py b/src/lerim/agents/toolsets.py index 8dd4060..9d8552e 100644 --- a/src/lerim/agents/toolsets.py +++ b/src/lerim/agents/toolsets.py @@ -7,37 +7,20 @@ count_context, get_context, list_context, - note_trace_findings, - prune_trace_reads, - read_trace, revise_context, - save_context, search_context, supersede_context, ) from lerim.agents.mlflow_observability import trace_mlflow_tool -READ_TRACE_TOOL_NAME = "read_trace" SEARCH_CONTEXT_TOOL_NAME = "search_context" GET_CONTEXT_TOOL_NAME = "get_context" -SAVE_CONTEXT_TOOL_NAME = "save_context" REVISE_CONTEXT_TOOL_NAME = "revise_context" -NOTE_TRACE_FINDINGS_TOOL_NAME = "note_trace_findings" -PRUNE_TRACE_READS_TOOL_NAME = "prune_trace_reads" LIST_CONTEXT_TOOL_NAME = "list_context" ARCHIVE_CONTEXT_TOOL_NAME = "archive_context" SUPERSEDE_CONTEXT_TOOL_NAME = "supersede_context" COUNT_CONTEXT_TOOL_NAME = "count_context" -EXTRACT_TOOLS = ( - trace_mlflow_tool(read_trace), - trace_mlflow_tool(search_context), - trace_mlflow_tool(get_context), - trace_mlflow_tool(save_context), - trace_mlflow_tool(revise_context), - trace_mlflow_tool(note_trace_findings), - trace_mlflow_tool(prune_trace_reads), -) MAINTAIN_TOOLS = ( trace_mlflow_tool(list_context), trace_mlflow_tool(search_context), @@ -53,17 +36,6 @@ trace_mlflow_tool(get_context), ) -EXTRACT_TOOL_NAMES = frozenset( - { - READ_TRACE_TOOL_NAME, - SEARCH_CONTEXT_TOOL_NAME, - GET_CONTEXT_TOOL_NAME, - SAVE_CONTEXT_TOOL_NAME, - REVISE_CONTEXT_TOOL_NAME, - NOTE_TRACE_FINDINGS_TOOL_NAME, - PRUNE_TRACE_READS_TOOL_NAME, - } -) MAINTAIN_TOOL_NAMES = frozenset( { LIST_CONTEXT_TOOL_NAME, @@ -82,4 +54,4 @@ GET_CONTEXT_TOOL_NAME, } ) -CURRENT_AGENT_TOOL_NAMES = EXTRACT_TOOL_NAMES | MAINTAIN_TOOL_NAMES | ASK_TOOL_NAMES +CURRENT_AGENT_TOOL_NAMES = MAINTAIN_TOOL_NAMES | ASK_TOOL_NAMES diff --git a/src/lerim/config/default.toml b/src/lerim/config/default.toml index c70acce..b1d6512 100644 --- a/src/lerim/config/default.toml +++ b/src/lerim/config/default.toml @@ -27,11 +27,8 @@ model = "MiniMax-M2.7" # package default model for minimax provi api_base = "" # Model names are auto-normalized per provider (e.g. minimax-m2.5 → MiniMax-M2.5 for minimax provider). fallback_models = [] # disabled for now — ensure exact model is used, no silent fallback -# PydanticAI single-pass sync auto-scales its request budget from trace -# size via lerim.agents.tools.compute_request_budget(trace_path). Small -# traces get a 40-turn floor; 2000-line traces get a modest increase; -# pathological inputs clamp at 100. The formula lives in tools.py and is the single -# source of truth — no per-pass limits in config. +# BAML/LangGraph sync reads deterministic trace windows and derives its +# extraction budget from the trace size in lerim.agents.extract.windowing. max_iters_maintain = 50 # max request turns for maintain flow max_iters_ask = 20 # max request turns for ask flow # MiniMax-M2 official sampling preset (from HuggingFace model card): diff --git a/src/lerim/config/providers.py b/src/lerim/config/providers.py index 4e11b0d..8d97575 100644 --- a/src/lerim/config/providers.py +++ b/src/lerim/config/providers.py @@ -359,7 +359,8 @@ def _build_minimax_anthropic_model( MiniMax M2.7 emits proper Anthropic ``tool_use`` blocks via the ``/anthropic`` endpoint. M2.5's tool calling is broken on both - endpoints. This is the production path for the extract agent. + endpoints. This remains the PydanticAI path for maintain, ask, and + working-memory flows. Uses ``AsyncAnthropic(max_retries=5)`` for HTTP-level retries — the Anthropic SDK handles 429/5xx natively. diff --git a/src/lerim/config/settings.py b/src/lerim/config/settings.py index ab52a12..d73663c 100644 --- a/src/lerim/config/settings.py +++ b/src/lerim/config/settings.py @@ -46,10 +46,9 @@ class RoleConfig: top_k: int = 40 max_tokens: int = 32000 parallel_tool_calls: bool = True - # PydanticAI single-pass sync now auto-scales its request_limit from - # trace size via lerim.agents.tools.compute_request_budget(trace_path). - # No static extract-budget field on RoleConfig — the budget is derived - # at run start from the actual trace's line count, clamped [40, 100]. + # BAML/LangGraph sync derives its windowing budget from trace size in + # lerim.agents.extract.windowing. There is no static extract-budget field + # on RoleConfig. # PydanticAI request-turn limits for maintain/ask flows. max_iters_maintain: int = 30 max_iters_ask: int = 30 diff --git a/src/lerim/config/tracing.py b/src/lerim/config/tracing.py index 86403f5..bf2f54e 100644 --- a/src/lerim/config/tracing.py +++ b/src/lerim/config/tracing.py @@ -1,7 +1,7 @@ -"""MLflow tracing for PydanticAI agent observability. +"""MLflow tracing for Lerim agent observability. -Activates MLflow autologging for PydanticAI when ``LERIM_MLFLOW=true`` is set. -All PydanticAI agent/model/tool spans are captured automatically. +Activates MLflow when ``LERIM_MLFLOW=true`` is set. PydanticAI flows are +captured by autologging; the BAML/LangGraph extract flow emits explicit spans. Traces are stored under ``~/.lerim/observability/`` so observability files do not clutter the root of the Lerim home directory. @@ -113,7 +113,7 @@ def _ensure_mlflow_schema(tracking_uri: str, db_path: str) -> None: def configure_tracing(config: Config, experiment_name: str = "lerim") -> None: - """Activate MLflow PydanticAI autologging if enabled via env/config. + """Activate MLflow tracing if enabled via env/config. Must be called once at startup before any agent is constructed. @@ -145,7 +145,7 @@ def _activate_mlflow() -> None: _ensure_mlflow_schema(tracking_uri, str(db_path)) _activate_mlflow() logger.info( - "MLflow tracing enabled (PydanticAI autolog) → sqlite:///{} experiment={}", + "MLflow tracing enabled → sqlite:///{} experiment={}", db_path, experiment_name, ) @@ -161,7 +161,7 @@ def _activate_mlflow() -> None: _ensure_mlflow_schema(tracking_uri, str(db_path)) _activate_mlflow() logger.info( - "MLflow tracing enabled (PydanticAI autolog) → sqlite:///{} experiment={}", + "MLflow tracing enabled → sqlite:///{} experiment={}", db_path, experiment_name, ) diff --git a/src/lerim/server/cli.py b/src/lerim/server/cli.py index e902b18..d0e7638 100644 --- a/src/lerim/server/cli.py +++ b/src/lerim/server/cli.py @@ -1635,7 +1635,7 @@ def build_parser() -> argparse.ArgumentParser: formatter_class=_F, help="Index new sessions and extract context records (hot path)", description=( - "Index new sessions and extract context records via PydanticAI.\n\n" + "Index new sessions and extract context records via BAML/LangGraph.\n\n" "Examples:\n" " lerim sync # default 7d window\n" " lerim sync --window 30d # last 30 days\n" diff --git a/src/lerim/server/runtime.py b/src/lerim/server/runtime.py index 9ac2e88..8c01028 100644 --- a/src/lerim/server/runtime.py +++ b/src/lerim/server/runtime.py @@ -1,6 +1,7 @@ -"""Runtime orchestrator for Lerim sync, maintain, and ask (PydanticAI only). +"""Runtime orchestrator for Lerim sync, maintain, and ask. -All three flows run through PydanticAI models and shared retry/fallback logic. +Sync uses the BAML/LangGraph extract harness. Maintain, ask, and working-memory +synthesis still use PydanticAI until those flows are migrated. """ from __future__ import annotations @@ -20,7 +21,7 @@ SyncResultContract, WorkingMemoryResultContract, ) -from lerim.agents.extract import ExtractionResult, run_extraction +from lerim.agents.extract import ExtractionRunDetails, run_extraction from lerim.agents.maintain import run_maintain from lerim.agents.mlflow_observability import finish_mlflow_run, lerim_mlflow_run from lerim.agents.working_memory import run_working_memory_synthesis @@ -150,6 +151,12 @@ def _write_agent_trace(path: Path, messages: list[ModelMessage]) -> None: path.write_text(json.dumps(trace_data, indent=2), encoding="utf-8") +def _write_extract_agent_trace(path: Path, details: ExtractionRunDetails) -> None: + """Serialize BAML/LangGraph extract events to a stable JSON artifact.""" + payload = [event.model_dump(mode="json") for event in details.events] + path.write_text(json.dumps(payload, ensure_ascii=True, indent=2), encoding="utf-8") + + def _write_error_artifact(path: Path, exc: Exception) -> None: """Persist a compact, structured failure artifact for a run.""" _write_json_artifact( @@ -397,7 +404,7 @@ def _is_quota_error_pydantic(exc: Exception) -> bool: class LerimRuntime: - """Runtime orchestrator — PydanticAI sync, maintain, and ask.""" + """Runtime orchestrator for sync, maintain, ask, and working memory.""" def __init__( self, @@ -565,21 +572,6 @@ def _sync_inner( {"ts": manifest["started_at"], "event": "started", "run_id": run_id}, ) - def _primary_builder() -> Any: - return build_pydantic_model("agent", config=self.config) - - def _call(model: Any) -> tuple[ExtractionResult, list[ModelMessage]]: - return run_extraction( - context_db_path=self.config.context_db_path, - project_identity=project_identity, - session_id=resolved_session_id, - trace_path=trace_file, - model=model, - run_folder=run_folder, - session_started_at=str(session_meta.get("started_at") or ""), - return_messages=True, - ) - try: with lerim_mlflow_run( enabled=self.config.mlflow_enabled, @@ -591,17 +583,21 @@ def _call(model: Any) -> tuple[ExtractionResult, list[ModelMessage]]: run_folder=run_folder, request_preview=f"sync:{resolved_session_id}", ) as mlflow_run: - result, messages = self._run_with_fallback( - flow="sync", - callable_fn=_call, - model_builders=[_primary_builder], + result, details = run_extraction( + context_db_path=self.config.context_db_path, + project_identity=project_identity, + session_id=resolved_session_id, + trace_path=trace_file, + config=self.config, + session_started_at=str(session_meta.get("started_at") or ""), + return_details=True, ) response_text = (result.completion_summary or "").strip() or "(no response)" _write_text_with_newline(artifact_paths["agent_log"], response_text) try: - _write_agent_trace(artifact_paths["agent_trace"], messages) + _write_extract_agent_trace(artifact_paths["agent_trace"], details) except Exception as exc: logger.warning(f"[sync] Failed to write agent trace: {exc}") artifact_paths["agent_trace"].write_text("[]", encoding="utf-8") @@ -634,6 +630,7 @@ def _call(model: Any) -> tuple[ExtractionResult, list[ModelMessage]]: response_preview=response_text, outputs={ "completion_summary": response_text, + "llm_calls": details.llm_calls, "records_created": manifest["records_created"], "records_updated": manifest["records_updated"], "records_archived": manifest["records_archived"], diff --git a/src/lerim/skills/cli-reference.md b/src/lerim/skills/cli-reference.md index b1d324c..5044c2d 100644 --- a/src/lerim/skills/cli-reference.md +++ b/src/lerim/skills/cli-reference.md @@ -165,7 +165,7 @@ lerim connect remove claude # disconnect Claude ### `lerim sync` Hot-path: discover new agent sessions from connected platforms, enqueue them, -and run PydanticAI extraction to create context records. +and run BAML plus LangGraph extraction to create context records. Requires a running server (`lerim up` or `lerim serve`). **Time window** controls which sessions to scan: @@ -200,7 +200,7 @@ lerim sync --max-sessions 100 # process up to 100 sessions | `--dry-run` | off | Preview mode, no writes | Notes: -- `sync` is the hot path (queue + PydanticAI extraction + lead write). +- `sync` is the hot path (queue + BAML/LangGraph extraction + context write). - Normal backlog sync claims the newest available session per project first. - `--ignore-lock` exists only as a CLI-local debug flag and is intentionally not supported by `/api/sync`; skipping the writer lock risks corruption. - Cold maintenance work is not executed in `sync`. From af6c0bc591f2df38b26cca6ef9fb0a691b8c29b3 Mon Sep 17 00:00:00 2001 From: Isaac Kargar Date: Thu, 14 May 2026 10:43:11 +0300 Subject: [PATCH 5/8] Refactor extraction tools and update expectations - Removed references to `read_trace` and replaced them with `read_window` across various test cases and expectations to align with the new extraction logic. - Introduced `EXTRACT_EVENT_NAMES` to replace `EXTRACT_TOOL_NAMES` in integration tests, enhancing clarity in tool usage. - Updated README to reflect changes in extraction tools and their functionalities. - Added new expectations for handling very long traces, ensuring proper tool usage and context management. This commit improves the consistency and accuracy of the extraction process, ensuring that the latest tools and methodologies are correctly represented in the tests and documentation. --- tests/README.md | 2 +- ...ous_search_hits_correct_update_target.yaml | 2 +- .../borderline_non_durable_incident.yaml | 3 +- .../extract/clear_decision_with_noise.yaml | 3 +- .../extract/constraint_extraction.yaml | 2 +- ...cision_without_why_falls_back_to_fact.yaml | 2 +- .../extract/duplicate_existing_record.yaml | 3 +- .../environment_fact_from_noisy_error.yaml | 3 +- .../late_disambiguation_at_end_of_trace.yaml | 6 +- .../extract/long_trace_requires_note.yaml | 10 +- .../extract/multi_record_trace.yaml | 3 +- .../extract/recap_temptation_trace.yaml | 3 +- .../extract/reference_extraction.yaml | 2 +- ...routine_operational_no_durable_record.yaml | 3 +- .../runtime_requirement_from_diagnostics.yaml | 3 +- .../extract/similar_but_new_decision.yaml | 2 +- .../extract/stable_user_preference.yaml | 3 +- ...yaml => very_long_trace_uses_windows.yaml} | 11 +- tests/integration/extract/helpers.py | 24 +- .../extract/test_context_pressure.py | 35 +- tests/integration/extract/test_core.py | 22 +- .../extract/test_dedup_and_abstention.py | 12 +- tests/integration/runtime/test_cases.py | 31 +- tests/integration/scope/test_cases.py | 30 +- tests/live_helpers.py | 13 +- tests/smoke/test_embeddings_indexed.py | 7 +- tests/smoke/test_extract_completes.py | 7 +- tests/unit/agents/conftest.py | 10 - tests/unit/agents/test_agent_build.py | 9 +- tests/unit/agents/test_tools.py | 812 ------------------ tests/unit/config/test_providers.py | 2 +- tests/unit/server/test_runtime.py | 41 +- 32 files changed, 170 insertions(+), 951 deletions(-) rename tests/fixtures/expectations/extract/{very_long_trace_requires_prune.yaml => very_long_trace_uses_windows.yaml} (74%) diff --git a/tests/README.md b/tests/README.md index a2b33b8..1fae2ff 100644 --- a/tests/README.md +++ b/tests/README.md @@ -110,7 +110,7 @@ The current system is: - canonical run artifacts in `~/.lerim/workspace/` - generated Working Memory artifacts in `~/.lerim/workspace/current//WORKING_MEMORY.md` - local semantic retrieval via ONNX embeddings + `sqlite-vec` + FTS5 + RRF -- extract tools: `read_trace`, `search_context`, `get_context`, `save_context`, `revise_context`, `note_trace_findings`, `prune_trace_reads` +- extract graph: deterministic window reads, BAML window scan, BAML record synthesis, context-store persistence - maintain tools: `list_context`, `search_context`, `get_context`, `revise_context`, `archive_context`, `supersede_context` - ask tools: `list_context`, `search_context`, `get_context`, `count_context` diff --git a/tests/fixtures/expectations/extract/ambiguous_search_hits_correct_update_target.yaml b/tests/fixtures/expectations/extract/ambiguous_search_hits_correct_update_target.yaml index e9245c2..4fc5fbc 100644 --- a/tests/fixtures/expectations/extract/ambiguous_search_hits_correct_update_target.yaml +++ b/tests/fixtures/expectations/extract/ambiguous_search_hits_correct_update_target.yaml @@ -10,7 +10,7 @@ expected: unchanged_record_ids: - rec_storage_routing must_use_tools: - - read_trace + - read_window - get_context - revise_context must_not_use_tools: [] diff --git a/tests/fixtures/expectations/extract/borderline_non_durable_incident.yaml b/tests/fixtures/expectations/extract/borderline_non_durable_incident.yaml index 10cc478..be81bc3 100644 --- a/tests/fixtures/expectations/extract/borderline_non_durable_incident.yaml +++ b/tests/fixtures/expectations/extract/borderline_non_durable_incident.yaml @@ -5,11 +5,10 @@ expected: episode_count: 1 durable_count: 0 must_use_tools: - - read_trace + - read_window - save_context must_not_use_tools: - get_context - - prune_trace_reads episode_text_must_include_any: - stale - fixture diff --git a/tests/fixtures/expectations/extract/clear_decision_with_noise.yaml b/tests/fixtures/expectations/extract/clear_decision_with_noise.yaml index c27c2e8..0faa235 100644 --- a/tests/fixtures/expectations/extract/clear_decision_with_noise.yaml +++ b/tests/fixtures/expectations/extract/clear_decision_with_noise.yaml @@ -6,10 +6,9 @@ expected: durable_count: 1 decision_count: 1 must_use_tools: - - read_trace + - read_window - save_context must_not_use_tools: - - prune_trace_reads - revise_context decision_text_must_include_any: - separate diff --git a/tests/fixtures/expectations/extract/constraint_extraction.yaml b/tests/fixtures/expectations/extract/constraint_extraction.yaml index bd6b666..39015ac 100644 --- a/tests/fixtures/expectations/extract/constraint_extraction.yaml +++ b/tests/fixtures/expectations/extract/constraint_extraction.yaml @@ -6,7 +6,7 @@ expected: durable_count: 1 constraint_count: 1 must_use_tools: - - read_trace + - read_window - save_context must_not_use_tools: - revise_context diff --git a/tests/fixtures/expectations/extract/decision_without_why_falls_back_to_fact.yaml b/tests/fixtures/expectations/extract/decision_without_why_falls_back_to_fact.yaml index 97f9e89..54e1e28 100644 --- a/tests/fixtures/expectations/extract/decision_without_why_falls_back_to_fact.yaml +++ b/tests/fixtures/expectations/extract/decision_without_why_falls_back_to_fact.yaml @@ -7,7 +7,7 @@ expected: decision_count: 0 fact_count: 1 must_use_tools: - - read_trace + - read_window - save_context must_not_use_tools: - revise_context diff --git a/tests/fixtures/expectations/extract/duplicate_existing_record.yaml b/tests/fixtures/expectations/extract/duplicate_existing_record.yaml index 2f92e4d..e728814 100644 --- a/tests/fixtures/expectations/extract/duplicate_existing_record.yaml +++ b/tests/fixtures/expectations/extract/duplicate_existing_record.yaml @@ -7,12 +7,11 @@ expected: changed_record_count: 2 changed_decision_count: 1 must_use_tools: - - read_trace + - read_window - save_context - get_context - revise_context must_not_use_tools: - - prune_trace_reads updated_decision_text_must_include_all: - separate updated_decision_text_must_include_any: diff --git a/tests/fixtures/expectations/extract/environment_fact_from_noisy_error.yaml b/tests/fixtures/expectations/extract/environment_fact_from_noisy_error.yaml index dbdbea0..6f10722 100644 --- a/tests/fixtures/expectations/extract/environment_fact_from_noisy_error.yaml +++ b/tests/fixtures/expectations/extract/environment_fact_from_noisy_error.yaml @@ -6,10 +6,9 @@ expected: durable_count: 1 fact_count: 1 must_use_tools: - - read_trace + - read_window - save_context must_not_use_tools: - - prune_trace_reads fact_text_must_include_all: - wkhtmltopdf - pdf diff --git a/tests/fixtures/expectations/extract/late_disambiguation_at_end_of_trace.yaml b/tests/fixtures/expectations/extract/late_disambiguation_at_end_of_trace.yaml index 782897f..ee1a4e8 100644 --- a/tests/fixtures/expectations/extract/late_disambiguation_at_end_of_trace.yaml +++ b/tests/fixtures/expectations/extract/late_disambiguation_at_end_of_trace.yaml @@ -8,10 +8,10 @@ expected: - decision - fact - constraint - min_read_trace_calls: 2 + min_read_window_calls: 2 must_use_tools: - - read_trace - - note_trace_findings + - read_window + - scan_window - save_context must_not_use_tools: [] durable_text_must_include_all: diff --git a/tests/fixtures/expectations/extract/long_trace_requires_note.yaml b/tests/fixtures/expectations/extract/long_trace_requires_note.yaml index aa9e7b0..e272942 100644 --- a/tests/fixtures/expectations/extract/long_trace_requires_note.yaml +++ b/tests/fixtures/expectations/extract/long_trace_requires_note.yaml @@ -1,15 +1,15 @@ case: long_trace_requires_note -summary: A long trace should require multiple reads and should preserve findings with note_trace_findings before writing. +summary: A long trace should require multiple reads and should preserve findings with scan_window before writing. expected: episode_count: 1 durable_count: 1 decision_count: 1 - read_trace_count_at_least: 2 - note_trace_findings_count_at_least: 1 + read_window_count_at_least: 2 + scan_window_count_at_least: 1 must_use_tools: - - read_trace - - note_trace_findings + - read_window + - scan_window - save_context must_not_use_tools: - revise_context diff --git a/tests/fixtures/expectations/extract/multi_record_trace.yaml b/tests/fixtures/expectations/extract/multi_record_trace.yaml index eda9cbe..99771c9 100644 --- a/tests/fixtures/expectations/extract/multi_record_trace.yaml +++ b/tests/fixtures/expectations/extract/multi_record_trace.yaml @@ -7,11 +7,10 @@ expected: decision_count: 1 fact_count: 1 must_use_tools: - - read_trace + - read_window - save_context must_not_use_tools: - revise_context - - prune_trace_reads decision_text_must_include_all: - raw sql - typed diff --git a/tests/fixtures/expectations/extract/recap_temptation_trace.yaml b/tests/fixtures/expectations/extract/recap_temptation_trace.yaml index bc08d39..a68d8ed 100644 --- a/tests/fixtures/expectations/extract/recap_temptation_trace.yaml +++ b/tests/fixtures/expectations/extract/recap_temptation_trace.yaml @@ -6,11 +6,10 @@ expected: durable_count: 1 decision_count: 1 must_use_tools: - - read_trace + - read_window - save_context must_not_use_tools: - revise_context - - prune_trace_reads decision_text_must_include_all: - append-only - event diff --git a/tests/fixtures/expectations/extract/reference_extraction.yaml b/tests/fixtures/expectations/extract/reference_extraction.yaml index 90c023e..6322490 100644 --- a/tests/fixtures/expectations/extract/reference_extraction.yaml +++ b/tests/fixtures/expectations/extract/reference_extraction.yaml @@ -6,7 +6,7 @@ expected: durable_count: 1 reference_count: 1 must_use_tools: - - read_trace + - read_window - save_context must_not_use_tools: - revise_context diff --git a/tests/fixtures/expectations/extract/routine_operational_no_durable_record.yaml b/tests/fixtures/expectations/extract/routine_operational_no_durable_record.yaml index ffc6b9f..9db1605 100644 --- a/tests/fixtures/expectations/extract/routine_operational_no_durable_record.yaml +++ b/tests/fixtures/expectations/extract/routine_operational_no_durable_record.yaml @@ -5,12 +5,11 @@ expected: episode_count: 1 durable_count: 0 must_use_tools: - - read_trace + - read_window - save_context must_not_use_tools: - revise_context - get_context - - prune_trace_reads episode_text_must_include_any: - routine - cleanup diff --git a/tests/fixtures/expectations/extract/runtime_requirement_from_diagnostics.yaml b/tests/fixtures/expectations/extract/runtime_requirement_from_diagnostics.yaml index 51d1c8d..fdc754c 100644 --- a/tests/fixtures/expectations/extract/runtime_requirement_from_diagnostics.yaml +++ b/tests/fixtures/expectations/extract/runtime_requirement_from_diagnostics.yaml @@ -6,10 +6,9 @@ expected: durable_count: 1 fact_count: 1 must_use_tools: - - read_trace + - read_window - save_context must_not_use_tools: - - prune_trace_reads fact_text_must_include_all: - project roots - absolute paths diff --git a/tests/fixtures/expectations/extract/similar_but_new_decision.yaml b/tests/fixtures/expectations/extract/similar_but_new_decision.yaml index 2f90503..81cd3ad 100644 --- a/tests/fixtures/expectations/extract/similar_but_new_decision.yaml +++ b/tests/fixtures/expectations/extract/similar_but_new_decision.yaml @@ -7,7 +7,7 @@ expected: decision_count: 1 created_durable_count: 1 must_use_tools: - - read_trace + - read_window - save_context must_not_use_tools: - revise_context diff --git a/tests/fixtures/expectations/extract/stable_user_preference.yaml b/tests/fixtures/expectations/extract/stable_user_preference.yaml index 46a4c78..2493e22 100644 --- a/tests/fixtures/expectations/extract/stable_user_preference.yaml +++ b/tests/fixtures/expectations/extract/stable_user_preference.yaml @@ -6,10 +6,9 @@ expected: durable_count: 1 preference_count: 1 must_use_tools: - - read_trace + - read_window - save_context must_not_use_tools: - - prune_trace_reads preference_text_must_include_all: - tightly coupled preference_text_must_include_any: diff --git a/tests/fixtures/expectations/extract/very_long_trace_requires_prune.yaml b/tests/fixtures/expectations/extract/very_long_trace_uses_windows.yaml similarity index 74% rename from tests/fixtures/expectations/extract/very_long_trace_requires_prune.yaml rename to tests/fixtures/expectations/extract/very_long_trace_uses_windows.yaml index e540d77..99f7bce 100644 --- a/tests/fixtures/expectations/extract/very_long_trace_requires_prune.yaml +++ b/tests/fixtures/expectations/extract/very_long_trace_uses_windows.yaml @@ -1,4 +1,4 @@ -case: very_long_trace_requires_prune +case: very_long_trace_uses_windows summary: A very long trace should stay compressed before the final writes. expected: @@ -8,12 +8,11 @@ expected: - constraint - decision - fact - read_trace_count_at_least: 4 - note_trace_findings_count_at_least: 1 - prune_trace_reads_count_at_least: 0 + read_window_count_at_least: 4 + scan_window_count_at_least: 1 must_use_tools: - - read_trace - - note_trace_findings + - read_window + - scan_window - save_context must_not_use_tools: - revise_context diff --git a/tests/integration/extract/helpers.py b/tests/integration/extract/helpers.py index 4951f2e..21224ec 100644 --- a/tests/integration/extract/helpers.py +++ b/tests/integration/extract/helpers.py @@ -8,16 +8,13 @@ from typing import Any from lerim.agents.extract import ExtractionResult, run_extraction -from lerim.config.providers import build_pydantic_model from lerim.context import ContextStore, resolve_project_identity from tests.conftest import EXTRACT_EXPECTATIONS_DIR, EXTRACT_TRACES_DIR from tests.integration.common_helpers import ( - extract_tool_calls, load_yaml_expectation, retry_on_overload, seed_session, ) -from tests.live_helpers import dump_messages, extract_tool_names @dataclass @@ -39,7 +36,7 @@ def load_extract_expectation(case_name: str) -> dict[str, Any]: return load_yaml_expectation(EXTRACT_EXPECTATIONS_DIR, case_name) -def _build_very_long_prune_trace(trace_path: Path) -> None: +def _build_very_long_window_trace(trace_path: Path) -> None: """Materialize one very long trace that can create real context pressure.""" messages: list[dict[str, str]] = [ { @@ -203,9 +200,9 @@ def _resolve_trace_path(case_name: str, run_folder: Path) -> Path: static_path = EXTRACT_TRACES_DIR / f"{case_name}.jsonl" if static_path.exists(): return static_path - if case_name == "very_long_trace_requires_prune": + if case_name == "very_long_trace_uses_windows": generated = run_folder / f"{case_name}.jsonl" - _build_very_long_prune_trace(generated) + _build_very_long_window_trace(generated) return generated if case_name == "late_disambiguation_at_end_of_trace": generated = run_folder / f"{case_name}.jsonl" @@ -257,16 +254,15 @@ def run_extract_case( source_trace_ref=str(trace_path), ) - model = build_pydantic_model("agent", config=live_config) - result, messages = retry_on_overload( + result, details = retry_on_overload( lambda: run_extraction( context_db_path=live_config.context_db_path, project_identity=identity, session_id=session_id, trace_path=trace_path, - model=model, - run_folder=run_folder, - return_messages=True, + config=live_config, + session_started_at="2026-01-01T00:00:00Z", + return_details=True, ) ) @@ -303,11 +299,11 @@ def run_extract_case( for record_id in changed_record_ids ] - payload = dump_messages(messages) + payload = [event.model_dump(mode="json") for event in details.events] return ExtractCaseOutcome( result=result, - tool_names=extract_tool_names(payload), - tool_calls=extract_tool_calls(payload), + tool_names=[str(event.get("action") or "") for event in payload], + tool_calls=payload, rows=rows, records=[record for record in records if record is not None], changed_version_rows=version_rows, diff --git a/tests/integration/extract/test_context_pressure.py b/tests/integration/extract/test_context_pressure.py index dd2e04f..5fb3bbc 100644 --- a/tests/integration/extract/test_context_pressure.py +++ b/tests/integration/extract/test_context_pressure.py @@ -6,7 +6,7 @@ from tests.integration.extract.helpers import load_extract_expectation, run_extract_case from tests.live_helpers import ( - EXTRACT_TOOL_NAMES, + EXTRACT_EVENT_NAMES, FRAMEWORK_TOOL_NAMES, assert_clean_context_schema, assert_quality_metrics, @@ -22,7 +22,7 @@ def test_extract_long_trace_requires_note_before_writing( live_config, live_repo_root, ) -> None: - """Long traces should trigger multi-read extraction with note_trace_findings compression.""" + """Long traces should trigger multi-read extraction with scan_window compression.""" expectation = load_extract_expectation("long_trace_requires_note")["expected"] outcome = run_extract_case( case_name="long_trace_requires_note", @@ -31,15 +31,15 @@ def test_extract_long_trace_requires_note_before_writing( ) tool_names = outcome.tool_names - assert set(tool_names).issubset(EXTRACT_TOOL_NAMES | FRAMEWORK_TOOL_NAMES) + assert set(tool_names).issubset(EXTRACT_EVENT_NAMES | FRAMEWORK_TOOL_NAMES) for tool_name in expectation["must_use_tools"]: assert tool_name in tool_names for tool_name in expectation["must_not_use_tools"]: assert tool_name not in tool_names - assert tool_names.count("read_trace") >= expectation["read_trace_count_at_least"] + assert tool_names.count("read_window") >= expectation["read_window_count_at_least"] assert ( - tool_names.count("note_trace_findings") - >= expectation["note_trace_findings_count_at_least"] + tool_names.count("scan_window") + >= expectation["scan_window_count_at_least"] ) rows = outcome.rows @@ -100,34 +100,29 @@ def test_extract_long_trace_requires_note_before_writing( @pytest.mark.integration @pytest.mark.llm @pytest.mark.agent -def test_extract_very_long_trace_requires_prune( +def test_extract_very_long_trace_uses_windows( live_config, live_repo_root, ) -> None: """Very long traces should stay compressed while preserving the extracted signal.""" - expectation = load_extract_expectation("very_long_trace_requires_prune")["expected"] + expectation = load_extract_expectation("very_long_trace_uses_windows")["expected"] outcome = run_extract_case( - case_name="very_long_trace_requires_prune", + case_name="very_long_trace_uses_windows", live_config=live_config, live_repo_root=live_repo_root, ) tool_names = outcome.tool_names - assert set(tool_names).issubset(EXTRACT_TOOL_NAMES | FRAMEWORK_TOOL_NAMES) + assert set(tool_names).issubset(EXTRACT_EVENT_NAMES | FRAMEWORK_TOOL_NAMES) for tool_name in expectation["must_use_tools"]: assert tool_name in tool_names for tool_name in expectation["must_not_use_tools"]: assert tool_name not in tool_names - assert tool_names.count("read_trace") >= expectation["read_trace_count_at_least"] + assert tool_names.count("read_window") >= expectation["read_window_count_at_least"] assert ( - tool_names.count("note_trace_findings") - >= expectation["note_trace_findings_count_at_least"] + tool_names.count("scan_window") + >= expectation["scan_window_count_at_least"] ) - assert ( - tool_names.count("prune_trace_reads") - >= expectation["prune_trace_reads_count_at_least"] - ) - rows = outcome.rows episode_rows = [row for row in rows if row["kind"] == "episode"] durable_rows = [row for row in rows if row["kind"] != "episode"] @@ -181,10 +176,10 @@ def test_extract_late_disambiguation_at_end_of_trace( ) tool_names = outcome.tool_names - assert set(tool_names).issubset(EXTRACT_TOOL_NAMES | FRAMEWORK_TOOL_NAMES) + assert set(tool_names).issubset(EXTRACT_EVENT_NAMES | FRAMEWORK_TOOL_NAMES) for tool_name in expectation["must_use_tools"]: assert tool_name in tool_names - assert tool_names.count("read_trace") >= expectation["min_read_trace_calls"] + assert tool_names.count("read_window") >= expectation["min_read_window_calls"] rows = outcome.rows episode_rows = [row for row in rows if row["kind"] == "episode"] diff --git a/tests/integration/extract/test_core.py b/tests/integration/extract/test_core.py index 1cc590d..8ce7ff6 100644 --- a/tests/integration/extract/test_core.py +++ b/tests/integration/extract/test_core.py @@ -6,7 +6,7 @@ from tests.integration.extract.helpers import load_extract_expectation, run_extract_case from tests.live_helpers import ( - EXTRACT_TOOL_NAMES, + EXTRACT_EVENT_NAMES, FRAMEWORK_TOOL_NAMES, assert_clean_context_schema, assert_quality_metrics, @@ -31,9 +31,9 @@ def test_extract_clear_decision_ignores_implementation_noise( ) tool_names = outcome.tool_names - assert "read_trace" in tool_names + assert "read_window" in tool_names assert "save_context" in tool_names - assert set(tool_names).issubset(EXTRACT_TOOL_NAMES | FRAMEWORK_TOOL_NAMES) + assert set(tool_names).issubset(EXTRACT_EVENT_NAMES | FRAMEWORK_TOOL_NAMES) for tool_name in expectation["must_use_tools"]: assert tool_name in tool_names for tool_name in expectation["must_not_use_tools"]: @@ -104,7 +104,7 @@ def test_extract_multi_record_trace_keeps_two_independent_records( ) tool_names = outcome.tool_names - assert set(tool_names).issubset(EXTRACT_TOOL_NAMES | FRAMEWORK_TOOL_NAMES) + assert set(tool_names).issubset(EXTRACT_EVENT_NAMES | FRAMEWORK_TOOL_NAMES) for tool_name in expectation["must_use_tools"]: assert tool_name in tool_names for tool_name in expectation["must_not_use_tools"]: @@ -189,7 +189,7 @@ def test_extract_recap_temptation_trace_stays_compact_and_durable( ) tool_names = outcome.tool_names - assert set(tool_names).issubset(EXTRACT_TOOL_NAMES | FRAMEWORK_TOOL_NAMES) + assert set(tool_names).issubset(EXTRACT_EVENT_NAMES | FRAMEWORK_TOOL_NAMES) for tool_name in expectation["must_use_tools"]: assert tool_name in tool_names for tool_name in expectation["must_not_use_tools"]: @@ -260,7 +260,7 @@ def test_extract_stable_user_preference_creates_preference_record( ) tool_names = outcome.tool_names - assert set(tool_names).issubset(EXTRACT_TOOL_NAMES | FRAMEWORK_TOOL_NAMES) + assert set(tool_names).issubset(EXTRACT_EVENT_NAMES | FRAMEWORK_TOOL_NAMES) for tool_name in expectation["must_use_tools"]: assert tool_name in tool_names for tool_name in expectation["must_not_use_tools"]: @@ -329,7 +329,7 @@ def test_extract_environment_fact_from_noisy_error_creates_fact_record( ) tool_names = outcome.tool_names - assert set(tool_names).issubset(EXTRACT_TOOL_NAMES | FRAMEWORK_TOOL_NAMES) + assert set(tool_names).issubset(EXTRACT_EVENT_NAMES | FRAMEWORK_TOOL_NAMES) for tool_name in expectation["must_use_tools"]: assert tool_name in tool_names for tool_name in expectation["must_not_use_tools"]: @@ -399,7 +399,7 @@ def test_extract_runtime_requirement_from_diagnostics_creates_fact_record( ) tool_names = outcome.tool_names - assert set(tool_names).issubset(EXTRACT_TOOL_NAMES | FRAMEWORK_TOOL_NAMES) + assert set(tool_names).issubset(EXTRACT_EVENT_NAMES | FRAMEWORK_TOOL_NAMES) for tool_name in expectation["must_use_tools"]: assert tool_name in tool_names for tool_name in expectation["must_not_use_tools"]: @@ -443,7 +443,7 @@ def test_extract_constraint_trace_creates_constraint_record( ) tool_names = outcome.tool_names - assert set(tool_names).issubset(EXTRACT_TOOL_NAMES | FRAMEWORK_TOOL_NAMES) + assert set(tool_names).issubset(EXTRACT_EVENT_NAMES | FRAMEWORK_TOOL_NAMES) for tool_name in expectation["must_use_tools"]: assert tool_name in tool_names for tool_name in expectation["must_not_use_tools"]: @@ -484,7 +484,7 @@ def test_extract_reference_trace_creates_reference_record( ) tool_names = outcome.tool_names - assert set(tool_names).issubset(EXTRACT_TOOL_NAMES | FRAMEWORK_TOOL_NAMES) + assert set(tool_names).issubset(EXTRACT_EVENT_NAMES | FRAMEWORK_TOOL_NAMES) for tool_name in expectation["must_use_tools"]: assert tool_name in tool_names for tool_name in expectation["must_not_use_tools"]: @@ -525,7 +525,7 @@ def test_extract_decision_without_why_falls_back_to_fact( ) tool_names = outcome.tool_names - assert set(tool_names).issubset(EXTRACT_TOOL_NAMES | FRAMEWORK_TOOL_NAMES) + assert set(tool_names).issubset(EXTRACT_EVENT_NAMES | FRAMEWORK_TOOL_NAMES) for tool_name in expectation["must_use_tools"]: assert tool_name in tool_names for tool_name in expectation["must_not_use_tools"]: diff --git a/tests/integration/extract/test_dedup_and_abstention.py b/tests/integration/extract/test_dedup_and_abstention.py index 252e6da..ea9b73a 100644 --- a/tests/integration/extract/test_dedup_and_abstention.py +++ b/tests/integration/extract/test_dedup_and_abstention.py @@ -6,7 +6,7 @@ from tests.integration.extract.helpers import load_extract_expectation, run_extract_case from tests.live_helpers import ( - EXTRACT_TOOL_NAMES, + EXTRACT_EVENT_NAMES, FRAMEWORK_TOOL_NAMES, assert_clean_context_schema, assert_quality_metrics, @@ -44,7 +44,7 @@ def test_extract_updates_existing_record_instead_of_creating_duplicate( ) tool_names = outcome.tool_names - assert set(tool_names).issubset(EXTRACT_TOOL_NAMES | FRAMEWORK_TOOL_NAMES) + assert set(tool_names).issubset(EXTRACT_EVENT_NAMES | FRAMEWORK_TOOL_NAMES) for tool_name in expectation["must_use_tools"]: assert tool_name in tool_names for tool_name in expectation["must_not_use_tools"]: @@ -112,7 +112,7 @@ def test_extract_routine_operational_trace_creates_no_durable_record( ) tool_names = outcome.tool_names - assert set(tool_names).issubset(EXTRACT_TOOL_NAMES | FRAMEWORK_TOOL_NAMES) + assert set(tool_names).issubset(EXTRACT_EVENT_NAMES | FRAMEWORK_TOOL_NAMES) for tool_name in expectation["must_use_tools"]: assert tool_name in tool_names for tool_name in expectation["must_not_use_tools"]: @@ -182,7 +182,7 @@ def test_extract_borderline_non_durable_incident_abstains_from_record( ) tool_names = outcome.tool_names - assert set(tool_names).issubset(EXTRACT_TOOL_NAMES | FRAMEWORK_TOOL_NAMES) + assert set(tool_names).issubset(EXTRACT_EVENT_NAMES | FRAMEWORK_TOOL_NAMES) for tool_name in expectation["must_use_tools"]: assert tool_name in tool_names for tool_name in expectation["must_not_use_tools"]: @@ -262,7 +262,7 @@ def test_extract_similar_but_new_decision_creates_new_record( ) tool_names = outcome.tool_names - assert set(tool_names).issubset(EXTRACT_TOOL_NAMES | FRAMEWORK_TOOL_NAMES) + assert set(tool_names).issubset(EXTRACT_EVENT_NAMES | FRAMEWORK_TOOL_NAMES) for tool_name in expectation["must_use_tools"]: assert tool_name in tool_names for tool_name in expectation["must_not_use_tools"]: @@ -322,7 +322,7 @@ def test_extract_ambiguous_search_hits_update_only_true_target( ) tool_names = outcome.tool_names - assert set(tool_names).issubset(EXTRACT_TOOL_NAMES | FRAMEWORK_TOOL_NAMES) + assert set(tool_names).issubset(EXTRACT_EVENT_NAMES | FRAMEWORK_TOOL_NAMES) for tool_name in expectation["must_use_tools"]: assert tool_name in tool_names for tool_name in expectation["must_not_use_tools"]: diff --git a/tests/integration/runtime/test_cases.py b/tests/integration/runtime/test_cases.py index 6136bd3..06a8be2 100644 --- a/tests/integration/runtime/test_cases.py +++ b/tests/integration/runtime/test_cases.py @@ -8,7 +8,7 @@ from types import SimpleNamespace from lerim.agents.ask import AskResult -from lerim.agents.extract import ExtractionResult +from lerim.agents.extract import ExtractionEvent, ExtractionResult, ExtractionRunDetails from lerim.context import ContextStore from lerim.working_memory import ( MemoryLine, @@ -40,6 +40,29 @@ def _assert_run_folder_layout( assert len(day.name) == 2 and day.name.isdigit() +def _extract_details(kwargs, *, summary: str) -> ExtractionRunDetails: + """Build graph-style extraction details for sync runtime test doubles.""" + return ExtractionRunDetails( + events=[ + ExtractionEvent( + action="final_result", + ok=True, + content=summary, + args={}, + done=True, + completion_summary=summary, + ) + ], + llm_calls=1, + done=True, + context_db_path=str(kwargs["context_db_path"]), + project_id=kwargs["project_identity"].project_id, + session_id=kwargs["session_id"], + model_name="test-model", + trace_total_lines=1, + ) + + def test_sync_artifact_paths_are_stable_per_flow( monkeypatch, live_config, live_repo_root ): @@ -61,7 +84,7 @@ def test_sync_artifact_paths_are_stable_per_flow( "lerim.server.runtime.run_extraction", lambda **kwargs: ( ExtractionResult(completion_summary="sync complete"), - build_ordered_ask_messages()[:1], + _extract_details(kwargs, summary="sync complete"), ), ) @@ -280,7 +303,7 @@ def _flaky_run_extraction(**kwargs): raise RuntimeError("temporary upstream failure") return ( ExtractionResult(completion_summary="sync recovered"), - build_ordered_ask_messages()[:1], + _extract_details(kwargs, summary="sync recovered"), ) monkeypatch.setattr("lerim.server.runtime.run_extraction", _flaky_run_extraction) @@ -331,7 +354,7 @@ def _fake_run_extraction(**kwargs): ) return ( ExtractionResult(completion_summary="sync wrote initial fact"), - build_ordered_ask_messages()[:1], + _extract_details(kwargs, summary="sync wrote initial fact"), ) def _fake_run_maintain(**kwargs): diff --git a/tests/integration/scope/test_cases.py b/tests/integration/scope/test_cases.py index 453a291..81a654d 100644 --- a/tests/integration/scope/test_cases.py +++ b/tests/integration/scope/test_cases.py @@ -4,7 +4,7 @@ import pytest -from lerim.agents.extract import ExtractionResult +from lerim.agents.extract import ExtractionEvent, ExtractionResult, ExtractionRunDetails from lerim.agents.maintain import run_maintain from lerim.config.providers import build_pydantic_model from lerim.server.api import api_query @@ -19,6 +19,29 @@ ) +def _extract_details(kwargs, *, summary: str) -> ExtractionRunDetails: + """Build graph-style extraction details for sync runtime test doubles.""" + return ExtractionRunDetails( + events=[ + ExtractionEvent( + action="final_result", + ok=True, + content=summary, + args={}, + done=True, + completion_summary=summary, + ) + ], + llm_calls=1, + done=True, + context_db_path=str(kwargs["context_db_path"]), + project_id=kwargs["project_identity"].project_id, + session_id=kwargs["session_id"], + model_name="test-model", + trace_total_lines=1, + ) + + def _seed_ask_scope_records(env: ScopeCaseEnv) -> None: """Seed one distinctive durable decision into each project.""" seed_scope_session( @@ -317,7 +340,10 @@ def _fake_run_extraction(**kwargs): why="Project-scoped extraction should not mutate beta records.", change_reason="scope_extract_update", ) - return (ExtractionResult(completion_summary="scope extract complete"), []) + return ( + ExtractionResult(completion_summary="scope extract complete"), + _extract_details(kwargs, summary="scope extract complete"), + ) monkeypatch.setattr("lerim.server.runtime.run_extraction", _fake_run_extraction) diff --git a/tests/live_helpers.py b/tests/live_helpers.py index 34c4f77..aa54cb4 100644 --- a/tests/live_helpers.py +++ b/tests/live_helpers.py @@ -15,7 +15,6 @@ from lerim.agents.toolsets import ( ASK_TOOL_NAMES as ASK_TOOL_NAMES, CURRENT_AGENT_TOOL_NAMES as CURRENT_AGENT_TOOL_NAMES, - EXTRACT_TOOL_NAMES as EXTRACT_TOOL_NAMES, MAINTAIN_TOOL_NAMES as MAINTAIN_TOOL_NAMES, ) from lerim.config.settings import Config, get_config, reload_config @@ -48,6 +47,15 @@ FRAMEWORK_TOOL_NAMES = { "final_result", } +EXTRACT_EVENT_NAMES = frozenset( + { + "read_window", + "scan_window", + "synthesize_records", + "save_context", + "model_retry", + } +) _API_KEY_ATTRS = { "minimax": "minimax_api_key", "openai": "openai_api_key", @@ -133,6 +141,9 @@ def extract_tool_names(payload: list[dict[str, Any]]) -> list[str]: def walk(value: Any) -> None: if isinstance(value, dict): + action = str(value.get("action") or "").strip() + if action: + names.append(action) if value.get("part_kind") == "tool-call": tool_name = str(value.get("tool_name") or "").strip() if tool_name: diff --git a/tests/smoke/test_embeddings_indexed.py b/tests/smoke/test_embeddings_indexed.py index d261d58..7db1cf5 100644 --- a/tests/smoke/test_embeddings_indexed.py +++ b/tests/smoke/test_embeddings_indexed.py @@ -3,7 +3,6 @@ import pytest from lerim.agents.extract import run_extraction -from lerim.config.providers import build_pydantic_model from lerim.context import ContextStore, resolve_project_identity from tests.conftest import EXTRACT_TRACES_DIR @@ -22,8 +21,6 @@ def test_embeddings_indexed(live_config, live_repo_root): store.register_project(identity) session_id = "smoke-embeddings" - run_folder = live_config.global_data_dir / "workspace" / "sync" / session_id - run_folder.mkdir(parents=True, exist_ok=True) trace_path = EXTRACT_TRACES_DIR / "clear_decision_with_noise.jsonl" store.upsert_session( @@ -40,14 +37,12 @@ def test_embeddings_indexed(live_config, live_repo_root): metadata={}, ) - model = build_pydantic_model("agent", config=live_config) result = run_extraction( context_db_path=live_config.context_db_path, project_identity=identity, session_id=session_id, trace_path=trace_path, - model=model, - run_folder=run_folder, + config=live_config, ) assert result is not None, "Extraction returned no result" diff --git a/tests/smoke/test_extract_completes.py b/tests/smoke/test_extract_completes.py index 84902a4..966cf9d 100644 --- a/tests/smoke/test_extract_completes.py +++ b/tests/smoke/test_extract_completes.py @@ -3,7 +3,6 @@ import pytest from lerim.agents.extract import run_extraction -from lerim.config.providers import build_pydantic_model from lerim.context import ContextStore, resolve_project_identity from tests.conftest import EXTRACT_TRACES_DIR @@ -23,8 +22,6 @@ def test_extract_completes(live_config, live_repo_root): store.register_project(identity) session_id = "smoke-extract" - run_folder = live_config.global_data_dir / "workspace" / "sync" / session_id - run_folder.mkdir(parents=True, exist_ok=True) trace_path = EXTRACT_TRACES_DIR / "routine_operational_no_durable_record.jsonl" store.upsert_session( @@ -41,14 +38,12 @@ def test_extract_completes(live_config, live_repo_root): metadata={}, ) - model = build_pydantic_model("agent", config=live_config) result = run_extraction( context_db_path=live_config.context_db_path, project_identity=identity, session_id=session_id, trace_path=trace_path, - model=model, - run_folder=run_folder, + config=live_config, ) assert result is not None, "Extraction returned no result" diff --git a/tests/unit/agents/conftest.py b/tests/unit/agents/conftest.py index 25fe0d3..32800c5 100644 --- a/tests/unit/agents/conftest.py +++ b/tests/unit/agents/conftest.py @@ -32,11 +32,6 @@ def deps(tmp_path, project_identity): project_identity=project_identity, session_id="sess_test", project_ids=[project_identity.project_id], - trace_path=None, - trace_total_lines=0, - read_ranges=[], - notes=[], - pruned_offsets=set(), ) @@ -58,11 +53,6 @@ def deps_with_trace(tmp_path, project_identity): project_identity=project_identity, session_id="sess_test", project_ids=[project_identity.project_id], - trace_path=trace_path, - trace_total_lines=10, - read_ranges=[], - notes=[], - pruned_offsets=set(), ) diff --git a/tests/unit/agents/test_agent_build.py b/tests/unit/agents/test_agent_build.py index 919f918..56fb2a4 100644 --- a/tests/unit/agents/test_agent_build.py +++ b/tests/unit/agents/test_agent_build.py @@ -16,7 +16,7 @@ ASK_SYSTEM_PROMPT, AskResult, ) -from lerim.agents.extract import SYSTEM_PROMPT, ExtractionResult +from lerim.agents.extract import ExtractionResult from lerim.agents.maintain import ( MAINTAIN_SYSTEM_PROMPT, MaintainResult, @@ -53,9 +53,10 @@ def test_authoritative_docs_match_current_agent_tool_contract(): class TestBuildExtractAgent: """Tests for extract-agent public contract.""" - def test_system_prompt_non_empty(self): - assert isinstance(SYSTEM_PROMPT, str) - assert len(SYSTEM_PROMPT.strip()) > 0 + def test_baml_source_exists(self): + path = REPO_ROOT / "src" / "lerim" / "agents" / "baml_src" / "extract_react.baml" + assert path.exists() + assert "function ScanTraceWindow" in path.read_text(encoding="utf-8") class TestBuildMaintainAgent: diff --git a/tests/unit/agents/test_tools.py b/tests/unit/agents/test_tools.py index 203a747..98357ed 100644 --- a/tests/unit/agents/test_tools.py +++ b/tests/unit/agents/test_tools.py @@ -3,53 +3,23 @@ from __future__ import annotations import json -import inspect from pathlib import Path from unittest.mock import MagicMock import pytest -from pydantic import ValidationError from pydantic_ai import ModelRetry -from pydantic_ai.messages import ( - ModelRequest, - ModelResponse, - SystemPromptPart, - ToolCallPart, - ToolReturnPart, -) - -from lerim.agents.history_processors import ( - PRUNED_STUB, - context_pressure_injector, - notes_state_injector, - prune_history_processor, -) from lerim.agents.tools import ( - CONTEXT_HARD_PRESSURE_PCT, - CONTEXT_SOFT_PRESSURE_PCT, - MODEL_CONTEXT_TOKEN_LIMIT, - TRACE_MAX_LINE_BYTES, - ContextDeps, - TraceFinding, - _classify_context_pressure, - _first_uncovered_offset, _maybe_raise_record_retry, _normalize_kind, _normalize_status, - _require_trace_ready_for_write, _store, archive_context, - compute_request_budget, count_context, - save_context, get_context, list_context, - note_trace_findings, - prune_trace_reads, search_context, supersede_context, - read_trace, revise_context, ) from lerim.context import ContextStore @@ -221,59 +191,6 @@ def test_retry_message_content(self): _maybe_raise_record_retry(ValueError("title_required")) -# --------------------------------------------------------------------------- -# _first_uncovered_offset -# --------------------------------------------------------------------------- - - -class TestFirstUncoveredOffset: - def test_no_ranges_returns_zero(self): - assert _first_uncovered_offset([], 10) == 0 - - def test_total_zero_returns_none(self): - assert _first_uncovered_offset([], 0) is None - - def test_fully_covered_returns_none(self): - assert _first_uncovered_offset([(0, 10)], 10) is None - - def test_gap_at_start(self): - assert _first_uncovered_offset([(5, 10)], 10) == 0 - - def test_gap_in_middle(self): - assert _first_uncovered_offset([(0, 3), (7, 10)], 10) == 3 - - def test_truncation_at_end(self): - assert _first_uncovered_offset([(0, 5)], 10) == 5 - - def test_overlapping_ranges_merged(self): - assert _first_uncovered_offset([(0, 4), (3, 8)], 8) is None - - def test_single_range_partial(self): - assert _first_uncovered_offset([(0, 5)], 20) == 5 - - -# --------------------------------------------------------------------------- -# _classify_context_pressure -# --------------------------------------------------------------------------- - - -class TestClassifyContextPressure: - def test_normal_below_soft(self): - assert _classify_context_pressure(0.3) == "normal" - - def test_soft_at_threshold(self): - assert _classify_context_pressure(CONTEXT_SOFT_PRESSURE_PCT) == "soft" - - def test_soft_between(self): - assert _classify_context_pressure(0.7) == "soft" - - def test_hard_at_threshold(self): - assert _classify_context_pressure(CONTEXT_HARD_PRESSURE_PCT) == "hard" - - def test_hard_above(self): - assert _classify_context_pressure(0.95) == "hard" - - # --------------------------------------------------------------------------- # _store # --------------------------------------------------------------------------- @@ -286,255 +203,6 @@ def test_creates_initialized_store(self, deps): assert isinstance(store, ContextStore) -# --------------------------------------------------------------------------- -# compute_request_budget -# --------------------------------------------------------------------------- - - -class TestComputeRequestBudget: - def test_zero_lines(self, tmp_path): - p = tmp_path / "trace.jsonl" - p.write_text("", encoding="utf-8") - assert compute_request_budget(p) == 50 - - def test_200_lines(self, tmp_path): - p = _make_trace(tmp_path / "t.jsonl", 200) - assert compute_request_budget(p) == 83 - - def test_500_lines(self, tmp_path): - p = _make_trace(tmp_path / "t.jsonl", 500) - budget = compute_request_budget(p) - assert budget == 89 - - def test_1000_lines(self, tmp_path): - p = _make_trace(tmp_path / "t.jsonl", 1000) - budget = compute_request_budget(p) - assert budget == 99 - - def test_5000_lines(self, tmp_path): - p = _make_trace(tmp_path / "t.jsonl", 5000) - assert compute_request_budget(p) == 179 - - def test_large_lines_budget_for_byte_limited_chunks(self, tmp_path): - p = tmp_path / "trace.jsonl" - p.write_text("\n".join("x" * 4000 for _ in range(200)), encoding="utf-8") - assert compute_request_budget(p) == 111 - - def test_missing_file(self, tmp_path): - p = tmp_path / "nonexistent.jsonl" - assert compute_request_budget(p) == 50 - - def test_100_lines(self, tmp_path): - p = _make_trace(tmp_path / "t.jsonl", 100) - assert compute_request_budget(p) == 50 - - -# --------------------------------------------------------------------------- -# read_trace -# --------------------------------------------------------------------------- - - -class TestTraceRead: - def test_no_trace_path(self, deps): - ctx = make_run_context(deps) - result = read_trace(ctx) - assert "no trace path" in result - - def test_basic_read(self, deps_with_trace): - ctx = make_run_context(deps_with_trace) - result = read_trace(ctx, start_line=1, line_count=5) - assert "[10 lines" in result - assert "showing 1-5" in result - assert "5 more lines" in result - assert len(deps_with_trace.read_ranges) == 1 - assert deps_with_trace.read_ranges[0] == (0, 5) - - def test_read_full(self, deps_with_trace): - ctx = make_run_context(deps_with_trace) - result = read_trace(ctx, start_line=1, line_count=100) - assert "showing 1-10" in result - assert "more lines" not in result - - def test_offset_past_end(self, deps_with_trace): - ctx = make_run_context(deps_with_trace) - with pytest.raises(ModelRetry, match="past the end"): - read_trace(ctx, start_line=101) - - def test_negative_offset_clamped_to_start(self, deps_with_trace): - ctx = make_run_context(deps_with_trace) - result = read_trace(ctx, start_line=-20, line_count=2) - assert "showing 1-2" in result - - def test_limit_zero_clamped(self, deps_with_trace): - ctx = make_run_context(deps_with_trace) - result = read_trace(ctx, start_line=1, line_count=0) - assert "[10 lines" in result - - def test_limit_over_max_clamped(self, deps_with_trace): - ctx = make_run_context(deps_with_trace) - result = read_trace(ctx, start_line=1, line_count=999) - assert "[10 lines" in result - - def test_line_truncation(self, tmp_path, project_identity): - trace_path = tmp_path / "trace.jsonl" - long_line = "x" * (TRACE_MAX_LINE_BYTES + 500) - trace_path.write_text(long_line, encoding="utf-8") - deps = ContextDeps( - context_db_path=tmp_path / "context.sqlite3", - project_identity=project_identity, - session_id="sess_test", - trace_path=trace_path, - trace_total_lines=0, - ) - ctx = make_run_context(deps) - result = read_trace(ctx, start_line=1, line_count=10) - assert "truncated" in result - - def test_chunk_byte_truncation(self, tmp_path, project_identity): - trace_path = tmp_path / "trace.jsonl" - lines = ["x" * 4000 for _ in range(20)] - trace_path.write_text("\n".join(lines), encoding="utf-8") - deps = ContextDeps( - context_db_path=tmp_path / "context.sqlite3", - project_identity=project_identity, - session_id="sess_test", - trace_path=trace_path, - trace_total_lines=0, - ) - ctx = make_run_context(deps) - result = read_trace(ctx, start_line=1, line_count=20) - numbered_lines = [line for line in result.split("\n") if "\t" in line] - assert len(numbered_lines) < 20 - - def test_read_ranges_tracking(self, deps_with_trace): - ctx = make_run_context(deps_with_trace) - read_trace(ctx, start_line=1, line_count=5) - read_trace(ctx, start_line=6, line_count=5) - assert len(deps_with_trace.read_ranges) == 2 - assert deps_with_trace.read_ranges[0] == (0, 5) - assert deps_with_trace.read_ranges[1] == (5, 10) - - def test_overlapping_read_advances_to_first_unread_line(self, deps_with_trace): - ctx = make_run_context(deps_with_trace) - read_trace(ctx, start_line=1, line_count=5) - - result = read_trace(ctx, start_line=3, line_count=3) - - assert "showing 6-8" in result - assert "advanced from requested line 3 to first unread line 6" in result - assert deps_with_trace.read_ranges[-1] == (5, 8) - - def test_completed_trace_read_returns_done_message(self, deps_with_trace): - ctx = make_run_context(deps_with_trace) - read_trace(ctx, start_line=1, line_count=5) - read_trace(ctx, start_line=6, line_count=5) - - result = read_trace(ctx, start_line=1, line_count=5) - - assert "trace coverage complete" in result - assert len(deps_with_trace.read_ranges) == 2 - - def test_read_trace_auto_prunes_under_context_pressure(self, deps_with_trace): - ctx = make_run_context(deps_with_trace) - read_trace(ctx, start_line=1, line_count=3) - read_trace(ctx, start_line=4, line_count=3) - deps_with_trace.last_context_fill_ratio = CONTEXT_SOFT_PRESSURE_PCT - - result = read_trace(ctx, start_line=7, line_count=3) - - assert 0 in deps_with_trace.pruned_offsets - assert "auto-pruned older read_trace start lines: 1" in result - - -# --------------------------------------------------------------------------- -# _require_trace_ready_for_write -# --------------------------------------------------------------------------- - - -class TestRequireTraceReadyForWrite: - def test_no_trace_path_passes(self, deps): - ctx = make_run_context(deps) - _require_trace_ready_for_write(ctx) - - def test_no_read_ranges_raises_from_trace_start(self, deps_with_trace): - ctx = make_run_context(deps_with_trace) - with pytest.raises(ModelRetry, match=r"read_trace\(start_line=1"): - _require_trace_ready_for_write(ctx) - - def test_empty_trace_passes_without_read_ranges(self, tmp_path, project_identity): - trace_path = tmp_path / "trace.jsonl" - trace_path.write_text("", encoding="utf-8") - deps = ContextDeps( - context_db_path=tmp_path / "context.sqlite3", - project_identity=project_identity, - session_id="sess_test", - trace_path=trace_path, - trace_total_lines=0, - ) - ctx = make_run_context(deps) - _require_trace_ready_for_write(ctx) - - def test_full_coverage_passes(self, deps_with_trace): - ctx = make_run_context(deps_with_trace) - deps_with_trace.read_ranges = [(0, 10)] - _require_trace_ready_for_write(ctx) - - def test_partial_coverage_raises(self, deps_with_trace): - ctx = make_run_context(deps_with_trace) - deps_with_trace.read_ranges = [(0, 5)] - with pytest.raises(ModelRetry, match="Unread trace lines"): - _require_trace_ready_for_write(ctx) - - def test_already_has_notes_passes(self, tmp_path, project_identity): - trace_path = tmp_path / "trace.jsonl" - _make_trace(trace_path, 200) - deps = ContextDeps( - context_db_path=tmp_path / "context.sqlite3", - project_identity=project_identity, - session_id="sess_test", - trace_path=trace_path, - read_ranges=[(0, 200)], - notes=[TraceFinding(theme="t", line=1, quote="q", level="fact")], - ) - ctx = make_run_context(deps) - _require_trace_ready_for_write(ctx) - - def test_short_trace_passes(self, deps_with_trace): - ctx = make_run_context(deps_with_trace) - deps_with_trace.read_ranges = [(0, 10)] - _require_trace_ready_for_write(ctx) - - def test_long_trace_no_notes_raises(self, tmp_path, project_identity): - trace_path = tmp_path / "trace.jsonl" - _make_trace(trace_path, 200) - deps = ContextDeps( - context_db_path=tmp_path / "context.sqlite3", - project_identity=project_identity, - session_id="sess_test", - trace_path=trace_path, - read_ranges=[(0, 200)], - ) - ctx = make_run_context(deps) - with pytest.raises(ModelRetry, match="note_trace_findings"): - _require_trace_ready_for_write(ctx) - - def test_long_trace_empty_findings_checkpoint_passes( - self, tmp_path, project_identity - ): - trace_path = tmp_path / "trace.jsonl" - _make_trace(trace_path, 200) - deps = ContextDeps( - context_db_path=tmp_path / "context.sqlite3", - project_identity=project_identity, - session_id="sess_test", - trace_path=trace_path, - read_ranges=[(0, 200)], - findings_checked=True, - ) - ctx = make_run_context(deps) - _require_trace_ready_for_write(ctx) - - # --------------------------------------------------------------------------- # search_context # --------------------------------------------------------------------------- @@ -800,222 +468,6 @@ def test_detailed_full_body(self, deps, mock_embeddings): assert rec["record_id"] in deps.fetched_context_record_ids -# --------------------------------------------------------------------------- -# save_context -# --------------------------------------------------------------------------- - - -class TestSaveContext: - def test_write_tools_expose_flat_record_fields(self): - save_params = inspect.signature(save_context).parameters - revise_params = inspect.signature(revise_context).parameters - - assert "context" not in save_params - assert "context" not in revise_params - for field_name in ("kind", "title", "body"): - assert field_name in save_params - assert field_name in revise_params - - def test_basic_create(self, deps_with_session, mock_embeddings): - ctx = make_run_context(deps_with_session) - result = save_context( - ctx, - **_fact(title="Test fact", body="A test body."), - ) - parsed = json.loads(result) - assert parsed["ok"] is True - - def test_create_accepts_json_quoted_scalars( - self, deps_with_session, mock_embeddings - ): - ctx = make_run_context(deps_with_session) - result = save_context( - ctx, - kind='"fact"', - title="Quoted fact", - body="A quoted body.", - status='"active"', - ) - record = json.loads(result)["result"] - - assert record["kind"] == "fact" - assert record["title"] == "Quoted fact" - assert record["body"] == "A quoted body." - - def test_invalid_kind_raises_retry(self, deps_with_session, mock_embeddings): - ctx = make_run_context(deps_with_session) - with pytest.raises(ModelRetry, match="Record kind is invalid"): - save_context(ctx, kind="note", title="Bad kind", body="Body") - - def test_invalid_status_raises_retry(self, deps_with_session, mock_embeddings): - ctx = make_run_context(deps_with_session) - with pytest.raises(ModelRetry, match="status must be one of"): - save_context( - ctx, - kind="fact", - title="Bad status", - body="Body", - status="pending", - ) - - def test_create_anchors_record_time_to_source_session( - self, deps_with_session, mock_embeddings - ): - ctx = make_run_context(deps_with_session) - result = save_context( - ctx, - **_fact( - title="Historical source fact", - body="This fact was learned from a historical session.", - ), - ) - record = json.loads(result)["result"] - - assert record["created_at"] == "2026-01-01T00:00:00+00:00" - assert record["updated_at"] == "2026-01-01T00:00:00+00:00" - assert record["valid_from"] == "2026-01-01T00:00:00+00:00" - - def test_explicit_valid_from_overrides_source_session_time( - self, deps_with_session, mock_embeddings - ): - ctx = make_run_context(deps_with_session) - result = save_context( - ctx, - **_fact( - title="Explicit validity fact", - body="This fact became valid at a specific time.", - valid_from="2026-02-01T00:00:00+00:00", - ), - ) - record = json.loads(result)["result"] - - assert record["created_at"] == "2026-01-01T00:00:00+00:00" - assert record["valid_from"] == "2026-02-01T00:00:00+00:00" - - def test_missing_title_raises_retry(self, deps_with_session, mock_embeddings): - ctx = make_run_context(deps_with_session) - with pytest.raises(ModelRetry, match="non-empty title"): - save_context(ctx, **_fact(title="", body="body")) - - def test_missing_body_raises_retry(self, deps_with_session, mock_embeddings): - ctx = make_run_context(deps_with_session) - with pytest.raises(ModelRetry, match="non-empty body"): - save_context(ctx, **_fact(title="title", body="")) - - def test_title_too_long_raises_retry(self, deps_with_session, mock_embeddings): - ctx = make_run_context(deps_with_session) - with pytest.raises(ModelRetry, match="too long"): - save_context(ctx, **_fact(title="x" * 200, body="body")) - - def test_guard_full_trace_coverage(self, deps_with_trace, mock_embeddings): - ctx = make_run_context(deps_with_trace) - deps_with_trace.read_ranges = [(0, 5)] - with pytest.raises(ModelRetry, match="Unread trace lines"): - save_context(ctx, **_fact(title="t", body="b")) - - def test_guard_refuses_create_before_any_read_trace( - self, deps_with_trace, mock_embeddings - ): - ctx = make_run_context(deps_with_trace) - with pytest.raises(ModelRetry, match=r"read_trace\(start_line=1"): - save_context(ctx, **_fact(title="t", body="b")) - - def test_guard_notes_before_long_trace( - self, tmp_path, project_identity, mock_embeddings - ): - trace_path = tmp_path / "trace.jsonl" - _make_trace(trace_path, 200) - db_path = tmp_path / "context.sqlite3" - store = ContextStore(db_path) - store.initialize() - store.register_project(project_identity) - _seed_session(store, project_identity.project_id) - deps = ContextDeps( - context_db_path=db_path, - project_identity=project_identity, - session_id="sess_test", - trace_path=trace_path, - trace_total_lines=200, - read_ranges=[(0, 200)], - ) - ctx = make_run_context(deps) - with pytest.raises(ModelRetry, match="read_trace chunk"): - save_context(ctx, **_fact(title="t", body="b")) - - def test_guard_allows_archived_episode_after_full_long_trace( - self, tmp_path, project_identity, mock_embeddings - ): - trace_path = tmp_path / "trace.jsonl" - _make_trace(trace_path, 200) - db_path = tmp_path / "context.sqlite3" - store = ContextStore(db_path) - store.initialize() - store.register_project(project_identity) - _seed_session(store, project_identity.project_id) - deps = ContextDeps( - context_db_path=db_path, - project_identity=project_identity, - session_id="sess_test", - trace_path=trace_path, - trace_total_lines=200, - read_ranges=[(0, 200)], - ) - ctx = make_run_context(deps) - - result = save_context(ctx, **_episode(status="archived")) - - assert json.loads(result)["ok"] is True - - def test_decision_without_why_raises_retry( - self, deps_with_session, mock_embeddings - ): - ctx = make_run_context(deps_with_session) - with pytest.raises(ModelRetry, match="both `decision` and `why`"): - save_context( - ctx, - **_decision( - title="A decision", - body="We decided something.", - decision="Use X", - why="", - ), - ) - - def test_duplicate_episode_raises_guided_retry( - self, deps_with_session, mock_embeddings - ): - ctx = make_run_context(deps_with_session) - save_context( - ctx, - **_episode( - title="Session title", - body="Short session recap.", - user_intent="Fix the extractor.", - what_happened="Read the trace and stored the main outcomes.", - ), - ) - - with pytest.raises(ModelRetry, match="already has an episode record"): - save_context( - ctx, - **_episode( - title="Another session title", - body="Another short session recap.", - user_intent="Fix the extractor.", - what_happened="Tried to write a second episode.", - ), - ) - - def test_extract_allows_durable_create_before_episode( - self, deps_with_session, mock_embeddings - ): - ctx = make_run_context(deps_with_session) - - result = save_context(ctx, **_fact(title="Fact", body="Reusable fact.")) - - assert json.loads(result)["ok"] is True - - # --------------------------------------------------------------------------- # revise_context # --------------------------------------------------------------------------- @@ -1067,53 +519,6 @@ def test_requires_fetch_before_update(self, deps, mock_embeddings): **_fact(title="New title", body="Old body"), ) - def test_guard_full_trace_coverage(self, deps_with_trace, mock_embeddings): - ctx = make_run_context(deps_with_trace) - store = ContextStore(deps_with_trace.context_db_path) - store.initialize() - store.register_project(deps_with_trace.project_identity) - _seed_session(store, deps_with_trace.project_identity.project_id) - rec = store.create_record( - project_id=deps_with_trace.project_identity.project_id, - session_id="sess_test", - kind="fact", - title="Old title", - body="Old body", - ) - _fetch_records(ctx, rec["record_id"]) - deps_with_trace.read_ranges = [(0, 5)] - with pytest.raises(ModelRetry, match="Unread trace lines"): - revise_context( - ctx, - record_id=rec["record_id"], - reason="test", - **_fact(title="t", body="b"), - ) - - def test_guard_refuses_update_before_any_read_trace( - self, deps_with_trace, mock_embeddings - ): - ctx = make_run_context(deps_with_trace) - store = ContextStore(deps_with_trace.context_db_path) - store.initialize() - store.register_project(deps_with_trace.project_identity) - _seed_session(store, deps_with_trace.project_identity.project_id) - rec = store.create_record( - project_id=deps_with_trace.project_identity.project_id, - session_id="sess_test", - kind="fact", - title="Old title", - body="Old body", - ) - _fetch_records(ctx, rec["record_id"]) - with pytest.raises(ModelRetry, match=r"read_trace\(start_line=1"): - revise_context( - ctx, - record_id=rec["record_id"], - reason="test", - **_fact(title="t", body="b"), - ) - def test_update_does_not_require_episode_ordering( self, deps_with_session, mock_embeddings ): @@ -1453,220 +858,3 @@ def test_accepts_quoted_scalars(self, deps, mock_embeddings): result = count_context(ctx, kind='"decision"', status='"active"') parsed = json.loads(result) assert parsed["count"] == 1 - - -# --------------------------------------------------------------------------- -# note_trace_findings -# --------------------------------------------------------------------------- - - -class TestNote: - def test_tool_exposes_flat_finding_fields(self): - params = inspect.signature(note_trace_findings).parameters - - assert "findings" not in params - for field_name in ("theme", "line", "quote", "level"): - assert field_name in params - - def test_finding_line_is_one_based(self): - with pytest.raises(ValidationError): - TraceFinding(theme="auth", line=0, quote="q", level="decision") - - def test_tool_rejects_zero_line(self, deps): - ctx = make_run_context(deps) - with pytest.raises(ModelRetry, match="valid 1-based line"): - note_trace_findings(ctx, theme="auth", line=0, quote="q", level="decision") - assert deps.findings_checked is False - - def test_appends_findings(self, deps): - ctx = make_run_context(deps) - note_trace_findings(ctx, theme="auth", line=1, quote="use JWT", level="decision") - result = note_trace_findings( - ctx, theme="db", line=5, quote="use sqlite", level="fact" - ) - assert "1 finding" in result - assert "total 2" in result - assert len(deps.notes) == 2 - assert deps.findings_checked is True - - def test_empty_findings(self, deps): - ctx = make_run_context(deps) - result = note_trace_findings(ctx) - assert "No findings" in result - assert deps.findings_checked is True - - def test_accumulates_across_calls(self, deps): - ctx = make_run_context(deps) - note_trace_findings(ctx, theme="a", line=1, quote="q", level="fact") - result = note_trace_findings( - ctx, theme="b", line=1, quote="q", level="decision" - ) - assert "total 2" in result - assert len(deps.notes) == 2 - - def test_runtime_only_no_db(self, deps): - ctx = make_run_context(deps) - note_trace_findings(ctx, theme="a", line=1, quote="q", level="fact") - assert len(deps.notes) == 1 - - -# --------------------------------------------------------------------------- -# prune_trace_reads -# --------------------------------------------------------------------------- - - -class TestPrune: - def test_marks_offsets(self, deps): - ctx = make_run_context(deps) - deps.read_ranges = [(0, 10), (100, 110)] - result = prune_trace_reads(ctx, start_lines=[1, 101]) - assert "2 new" in result - assert 0 in deps.pruned_offsets - assert 100 in deps.pruned_offsets - - def test_empty_offsets(self, deps): - ctx = make_run_context(deps) - result = prune_trace_reads(ctx, start_lines=[]) - assert "No trace reads" in result - - def test_deduplication(self, deps): - ctx = make_run_context(deps) - deps.read_ranges = [(0, 10), (100, 110), (200, 210)] - prune_trace_reads(ctx, start_lines=[1, 101]) - result = prune_trace_reads(ctx, start_lines=[1, 201]) - assert "1 new" in result - assert len(deps.pruned_offsets) == 3 - - def test_rejects_unread_offset(self, deps): - ctx = make_run_context(deps) - deps.read_ranges = [(0, 10)] - with pytest.raises(ModelRetry, match="Cannot prune unread trace start line"): - prune_trace_reads(ctx, start_lines=[101]) - - def test_runtime_only_no_db(self, deps): - ctx = make_run_context(deps) - deps.read_ranges = [(5, 15)] - prune_trace_reads(ctx, start_lines=[6]) - assert 5 in deps.pruned_offsets - - -# --------------------------------------------------------------------------- -# notes_state_injector -# --------------------------------------------------------------------------- - - -class TestNotesStateInjector: - def test_no_findings(self, deps): - ctx = make_run_context(deps) - history = [ModelRequest(parts=[SystemPromptPart(content="system")])] - result = notes_state_injector(ctx, history) - assert len(result) == 2 - assert "0 findings" in result[-1].parts[0].content - - def test_with_findings(self, deps): - ctx = make_run_context(deps) - deps.notes = [ - TraceFinding(theme="auth", line=1, quote="q", level="decision"), - TraceFinding(theme="db", line=1, quote="q", level="fact"), - TraceFinding(theme="api", line=2, quote="q", level="implementation"), - ] - result = notes_state_injector(ctx, []) - content = result[-1].parts[0].content - assert "3 findings" in content - assert "2 durable" in content - assert "1 implementation" in content - - def test_read_trace_info(self, deps_with_trace): - ctx = make_run_context(deps_with_trace) - deps_with_trace.read_ranges = [(0, 5)] - result = notes_state_injector(ctx, []) - content = result[-1].parts[0].content - assert "Trace reads" in content - - def test_does_not_mutate_original(self, deps): - ctx = make_run_context(deps) - original = [ModelRequest(parts=[SystemPromptPart(content="system")])] - result = notes_state_injector(ctx, original) - assert len(original) == 1 - assert len(result) == 2 - - -# --------------------------------------------------------------------------- -# context_pressure_injector -# --------------------------------------------------------------------------- - - -class TestContextPressureInjector: - def test_empty_history(self, deps): - ctx = make_run_context(deps) - result = context_pressure_injector(ctx, []) - assert len(result) == 1 - content = result[0].parts[0].content - assert "CONTEXT:" in content - assert "[normal]" in content - - def test_large_history_soft_pressure(self, deps): - ctx = make_run_context(deps) - big_content = "x" * int(MODEL_CONTEXT_TOKEN_LIMIT * 0.65 / 0.25) - history = [ModelRequest(parts=[SystemPromptPart(content=big_content)])] - result = context_pressure_injector(ctx, history) - content = result[-1].parts[0].content - assert "[soft]" in content - - def test_large_history_hard_pressure(self, deps): - ctx = make_run_context(deps) - big_content = "x" * int(MODEL_CONTEXT_TOKEN_LIMIT * 0.85 / 0.25) - history = [ModelRequest(parts=[SystemPromptPart(content=big_content)])] - result = context_pressure_injector(ctx, history) - content = result[-1].parts[0].content - assert "[hard]" in content - - def test_updates_deps(self, deps): - ctx = make_run_context(deps) - context_pressure_injector(ctx, []) - assert deps.last_context_tokens >= 0 - assert deps.last_context_fill_ratio >= 0.0 - - -# --------------------------------------------------------------------------- -# prune_history_processor -# --------------------------------------------------------------------------- - - -class TestPruneHistoryProcessor: - def test_no_pruned_offsets_returns_same(self, deps): - ctx = make_run_context(deps) - history = [ModelRequest(parts=[SystemPromptPart(content="hello")])] - result = prune_history_processor(ctx, history) - assert result is history - - def test_prunes_matching_read_trace(self, deps): - ctx = make_run_context(deps) - deps.pruned_offsets = {0} - call = ToolCallPart( - tool_name="read_trace", args={"start_line": 1, "line_count": 10} - ) - ret = ToolReturnPart(tool_name="read_trace", content="line data here") - history: list = [ModelResponse(parts=[call]), ModelRequest(parts=[ret])] - result = prune_history_processor(ctx, history) - assert result[1].parts[0].content == PRUNED_STUB - - def test_does_not_prune_non_matching(self, deps): - ctx = make_run_context(deps) - deps.pruned_offsets = {50} - call = ToolCallPart( - tool_name="read_trace", args={"start_line": 1, "line_count": 10} - ) - ret = ToolReturnPart(tool_name="read_trace", content="line data here") - history: list = [ModelResponse(parts=[call]), ModelRequest(parts=[ret])] - result = prune_history_processor(ctx, history) - assert result[1].parts[0].content == "line data here" - - def test_does_not_prune_other_tools(self, deps): - ctx = make_run_context(deps) - deps.pruned_offsets = {0} - call = ToolCallPart(tool_name="search_context", args={"query": "test"}) - ret = ToolReturnPart(tool_name="search_context", content="search data") - history: list = [ModelResponse(parts=[call]), ModelRequest(parts=[ret])] - result = prune_history_processor(ctx, history) - assert result[1].parts[0].content == "search data" diff --git a/tests/unit/config/test_providers.py b/tests/unit/config/test_providers.py index 9a66556..3680a2f 100644 --- a/tests/unit/config/test_providers.py +++ b/tests/unit/config/test_providers.py @@ -1,4 +1,4 @@ -"""Unit tests for provider builders (PydanticAI-only).""" +"""Unit tests for provider builders used by PydanticAI flows.""" from __future__ import annotations diff --git a/tests/unit/server/test_runtime.py b/tests/unit/server/test_runtime.py index 92452cd..d958a33 100644 --- a/tests/unit/server/test_runtime.py +++ b/tests/unit/server/test_runtime.py @@ -1,4 +1,4 @@ -"""Unit tests for LerimRuntime orchestration (PydanticAI-only).""" +"""Unit tests for LerimRuntime orchestration.""" from __future__ import annotations @@ -23,7 +23,7 @@ ) from lerim.agents.ask import AskResult -from lerim.agents.extract import ExtractionResult +from lerim.agents.extract import ExtractionEvent, ExtractionResult, ExtractionRunDetails from lerim.server.runtime import ( LerimRuntime, _resolve_runtime_roots, @@ -57,6 +57,27 @@ def _build_runtime(tmp_path, monkeypatch): return LerimRuntime(default_cwd=str(tmp_path), config=cfg) +def _extract_details(tmp_path) -> ExtractionRunDetails: + """Return minimal fake extract details for sync unit tests.""" + return ExtractionRunDetails( + events=[ + ExtractionEvent( + action="read_window", + ok=True, + content="read", + args={}, + ) + ], + llm_calls=1, + done=True, + context_db_path=str(tmp_path / "context.sqlite3"), + project_id="proj_test", + session_id="trace", + model_name="test/model", + trace_total_lines=1, + ) + + class TestHelpers: def test_resolve_runtime_roots_defaults(self, tmp_path): cfg = make_config(tmp_path) @@ -237,15 +258,11 @@ def test_sync_happy_path(self, tmp_path, monkeypatch): trace = tmp_path / "trace.jsonl" trace.write_text('{"role":"user","content":"hello"}\n', encoding="utf-8") - monkeypatch.setattr( - "lerim.server.runtime.build_pydantic_model", - lambda *args, **kwargs: "fake-model", - ) monkeypatch.setattr( "lerim.server.runtime.run_extraction", lambda **kwargs: ( ExtractionResult(completion_summary="extracted"), - [ModelRequest(parts=[SystemPromptPart(content="extract")])], + _extract_details(tmp_path), ), ) @@ -284,10 +301,6 @@ def test_sync_failure_writes_structured_error_artifacts( trace = tmp_path / "trace.jsonl" trace.write_text('{"role":"user","content":"hello"}\n', encoding="utf-8") monkeypatch.setattr(time, "sleep", lambda *_: None) - monkeypatch.setattr( - "lerim.server.runtime.build_pydantic_model", - lambda *args, **kwargs: "fake-model", - ) monkeypatch.setattr( "lerim.server.runtime.run_extraction", lambda **kwargs: (_ for _ in ()).throw(RuntimeError("broken extract")), @@ -312,15 +325,11 @@ def test_sync_postprocessing_failure_marks_run_failed(self, tmp_path, monkeypatc rt = _build_runtime(tmp_path, monkeypatch) trace = tmp_path / "trace.jsonl" trace.write_text('{"role":"user","content":"hello"}\n', encoding="utf-8") - monkeypatch.setattr( - "lerim.server.runtime.build_pydantic_model", - lambda *args, **kwargs: "fake-model", - ) monkeypatch.setattr( "lerim.server.runtime.run_extraction", lambda **kwargs: ( ExtractionResult(completion_summary="extracted"), - [ModelRequest(parts=[SystemPromptPart(content="extract")])], + _extract_details(tmp_path), ), ) monkeypatch.setattr( From dc23b8b365abb61211be339034f24d8d6230ae41 Mon Sep 17 00:00:00 2001 From: Isaac Kargar Date: Thu, 14 May 2026 10:43:31 +0300 Subject: [PATCH 6/8] Update CHANGELOG for version 0.1.83 and enhance agent functionality - Added new BAML source and client packaging under `src/lerim/agents/` to facilitate shared layouts for future agents. - Introduced a production BAML/LangGraph extract package featuring deterministic trace windowing, typed BAML scans, record synthesis, context-store persistence, and structured graph events. - Replaced the legacy PydanticAI extract agent with the BAML/LangGraph harness, updating extraction evaluations, integration tests, documentation, and run artifacts accordingly. - Removed outdated components including the legacy PydanticAI extract agent and associated tools. - Updated `pyproject.toml` to reflect the new version and added dependencies for `baml-py` and `langgraph`. - Enhanced documentation to clarify the new agent runtime and extraction processes, ensuring better user guidance and understanding of the updated functionalities. --- CHANGELOG.md | 13 ++ README.md | 17 +- docs/cli/sync.md | 40 ++-- docs/concepts/how-it-works.md | 18 +- docs/concepts/sync-maintain.md | 5 +- docs/configuration/tracing.md | 25 ++- pyproject.toml | 15 +- uv.lock | 375 ++++++++++++++++++++++++++++++++- 8 files changed, 452 insertions(+), 56 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 30fd811..a60fbef 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.1.83] - 2026-05-14 + +### Added +- Packaged the BAML source and generated client under `src/lerim/agents/` so future agents can share the same BAML/LangGraph layout. +- Added the production BAML/LangGraph extract package with deterministic trace windowing, typed BAML scans, record synthesis, context-store persistence, and structured graph events. + +### Changed +- Replaced sync extraction with the BAML/LangGraph harness while keeping maintain, ask, and working-memory on PydanticAI. +- Updated extraction evals, integration tests, docs, and run artifacts to use graph events instead of PydanticAI extract messages. + +### Removed +- Removed the legacy PydanticAI extract agent, extract-only trace tools, history processors, and the experimental `baml_agents/` sidecar. + ## [0.1.81] - 2026-04-29 ### Fixed diff --git a/README.md b/README.md index 5caf9bc..e8b772d 100644 --- a/README.md +++ b/README.md @@ -222,23 +222,26 @@ Project separation happens inside the database by `project_id`. There is no per-project durable store on disk. -## Agent Tools +## Agent Runtime -The agent-facing tool contract is intentionally small: +The sync extractor uses a BAML plus LangGraph graph under +`src/lerim/agents/`. The graph reads deterministic trace windows, asks BAML +for typed window scans, synthesizes one final record payload, and persists the +result to SQLite. + +The maintain, ask, and working-memory flows still use PydanticAI with a small +semantic DB-era tool surface: -- `read_trace` - `list_context` - `search_context` - `get_context` -- `save_context` - `revise_context` - `archive_context` - `supersede_context` - `count_context` -- `note_trace_findings` -- `prune_trace_reads` -These are the authoritative runtime tool names. Keeping the surface DB-era and semantic makes the runtime easier to reason about and gives smaller future models a cleaner action space for training. +Keeping the surface DB-era and semantic makes the runtime easier to reason +about and gives smaller future models a cleaner action space for training. ## Common Commands diff --git a/docs/cli/sync.md b/docs/cli/sync.md index 018a4bb..d579721 100644 --- a/docs/cli/sync.md +++ b/docs/cli/sync.md @@ -24,29 +24,23 @@ lerim sync --agent claude,codex ```mermaid flowchart TD A["Trigger: lerim sync or daemon"] --> B["Discover and queue changed sessions"] - B --> C["Extract agent receives one session trace"] - - C --> D["Prompt goal: turn this session into durable project memory"] - D --> E["Agent reads trace chunks with read_trace"] - E --> F{"Has the agent read enough of the trace?"} - F -- "no" --> E - F -- "yes" --> G["Agent identifies candidate memories: episode, decisions, preferences, constraints, facts, references"] - - G --> H{"Could this update or duplicate existing memory?"} - H -- "yes" --> I["Use search_context/get_context to inspect existing records"] - H -- "no" --> J["Prepare new records"] - - I --> K{"Existing record should change?"} - K -- "revise" --> L["Use revise_context on fetched record"] - K -- "new memory" --> J - K -- "no durable value" --> M["Do not write"] - - J --> N["Use save_context for supported durable records"] - L --> O["SQLite context DB + record_versions"] - N --> O - M --> P["Completion summary"] - O --> P - P --> Q["Sync artifacts: manifest, agent log, trace"] + B --> C["Extractor receives one session trace"] + + C --> D["Deterministic graph reads the next trace window"] + D --> E["BAML ScanTraceWindow returns typed findings"] + E --> F{"More trace windows?"} + F -- "yes" --> D + F -- "no" --> G["BAML SynthesizeExtractRecords creates one episode and durable candidates"] + + G --> H["Persistence normalizes and validates record drafts"] + H --> I{"Durable records present?"} + I -- "yes" --> J["Write active durable records"] + I -- "no" --> K["Write archived episode only"] + + J --> L["SQLite context DB + record_versions"] + K --> L + L --> M["Completion summary"] + M --> N["Sync artifacts: manifest, graph events, trace"] ``` ## Notes diff --git a/docs/concepts/how-it-works.md b/docs/concepts/how-it-works.md index 63a9e74..df67c99 100644 --- a/docs/concepts/how-it-works.md +++ b/docs/concepts/how-it-works.md @@ -29,27 +29,25 @@ Canonical storage is global: Projects are scoped by `project_id` inside the database. -## Agent tool surface +## Agent runtime surface -Lerim does not expose raw SQL or file CRUD to the agent. +Lerim does not expose raw SQL or file CRUD to agents. -The durable context tools are: +The sync extractor is a BAML plus LangGraph graph. It reads deterministic trace +windows, scans each window into typed findings, synthesizes records once, and +persists them through the context store. + +The maintain, ask, and working-memory flows use PydanticAI. Their semantic +context tools are: -- `read_trace` - `list_context` - `search_context` - `get_context` -- `save_context` - `revise_context` - `archive_context` - `supersede_context` - `count_context` -The extract flow also uses: - -- `note_trace_findings` -- `prune_trace_reads` - Retrieval is hybrid: - local ONNX embeddings from `mixedbread-ai/mxbai-embed-xsmall-v1` diff --git a/docs/concepts/sync-maintain.md b/docs/concepts/sync-maintain.md index bc6dfaa..a2f94ff 100644 --- a/docs/concepts/sync-maintain.md +++ b/docs/concepts/sync-maintain.md @@ -7,7 +7,8 @@ clean: - **Maintain** (cold path) -- refines existing records offline Both run automatically in the daemon loop and can also be triggered manually. -Both use the same PydanticAI runtime and the `[roles.agent]` role model. +Sync extraction uses the BAML plus LangGraph runtime and the `[roles.agent]` +role model. Maintain uses the PydanticAI runtime with the same role model. --- @@ -20,7 +21,7 @@ records: 2. **Index** -- new sessions are cataloged in `sessions.sqlite3` 3. **Match to project** -- sessions matching a registered project are enqueued; unmatched sessions are indexed but not extracted 4. **Compact** -- traces are compacted (tool outputs stripped) and cached -5. **Extract flow** -- the PydanticAI extraction agent (`[roles.agent]`) reads the trace and uses `read_trace`, `note_trace_findings`, `prune_trace_reads`, `search_context`, `get_context`, `save_context`, and `revise_context` to write one episode record plus a small number of durable records into `~/.lerim/context.sqlite3` +5. **Extract flow** -- the BAML plus LangGraph extractor (`[roles.agent]`) reads deterministic trace windows, scans typed findings, synthesizes the final payload, and writes one episode record plus a small number of durable records into `~/.lerim/context.sqlite3` ### Record quality contract diff --git a/docs/configuration/tracing.md b/docs/configuration/tracing.md index 8873511..a543b87 100644 --- a/docs/configuration/tracing.md +++ b/docs/configuration/tracing.md @@ -1,6 +1,6 @@ # Tracing -Lerim uses [MLflow](https://mlflow.org) for PydanticAI agent observability. +Lerim uses [MLflow](https://mlflow.org) for agent observability. Tracing is opt-in and controlled by `[observability].mlflow_enabled` in `~/.lerim/config.toml`. The `LERIM_MLFLOW` environment variable can override it for one-off runs. @@ -9,10 +9,12 @@ for one-off runs. When tracing is enabled, MLflow records: -- **PydanticAI model calls** -- via `mlflow.pydantic_ai.autolog()`, every language model invocation - across sync/maintain/ask flows is captured automatically, including - input prompts, outputs, token counts, and latency. -- **Agent/tool executions** -- tool calls and agent steps are traced as nested spans within each run. +- **Sync extraction graph** -- the BAML plus LangGraph extractor emits a + top-level `lerim.agent.extract` span with trace metadata and model label. +- **PydanticAI model calls** -- via `mlflow.pydantic_ai.autolog()`, maintain, + ask, and working-memory model invocations are captured automatically, + including input prompts, outputs, token counts, and latency. +- **Agent/tool executions** -- tool calls and agent steps are traced as nested spans within each run when the runtime exposes them. - **agent_trace.json** -- each sync/maintain run also writes a local `agent_trace.json` under the run workspace for a full tool/message history (not MLflow-specific). @@ -104,17 +106,18 @@ Lerim continues writing traces as long as the server is running with In the UI, look for: - **Experiments** -- select the `lerim` experiment. -- **Traces** -- the primary view for PydanticAI autologging. Expand a trace to - see the model/tool span tree. +- **Traces** -- the primary view for Lerim agent spans. Expand a trace to see + the sync graph span or PydanticAI model/tool span tree. - **Run id** -- match a local run folder to MLflow by searching for the `manifest.json` `run_id` value. It is also stored as `client_request_id` and the `lerim.run_id` tag. -- **Model calls** -- every PydanticAI model request is logged with input prompts, - outputs, token counts, and latency. +- **Model calls** -- PydanticAI model requests are logged with input prompts, + outputs, token counts, and latency. Sync extraction model metadata is attached + to the BAML/LangGraph extract span. - **Spans** -- nested spans show the call hierarchy from the top-level orchestration down to individual LM calls and tool invocations. -Classic MLflow **Runs** may be empty for PydanticAI traces. That does not mean +Classic MLflow **Runs** may be empty for agent traces. That does not mean tracing is broken; check the Traces view or verify the SQLite counts below. !!! tip "Filtering" @@ -146,7 +149,7 @@ Important files: - `manifest.json` -- run id, operation, project, session id, artifact paths, and status. `mlflow_client_request_id` matches the MLflow trace request id. - `events.jsonl` -- compact started/succeeded/failed events for that run. -- `agent_trace.json` -- serialized PydanticAI messages when available. +- `agent_trace.json` -- serialized graph events or PydanticAI messages when available. - `agent.log` -- short human-readable agent summary on success. - `error.json` -- structured error details on failure. diff --git a/pyproject.toml b/pyproject.toml index 72a60bb..82a7baa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,13 +4,15 @@ build-backend = "setuptools.build_meta" [project] name = "lerim" -version = "0.1.82" +version = "0.1.83" description = "Continual learning layer for coding agents and software projects." readme = "README.md" requires-python = ">=3.11" license = "BUSL-1.1" dependencies = [ "pydantic==2.12.5", + "baml-py==0.222.0", + "langgraph==1.2.0", "pydantic-ai==1.70.0", "pydantic-evals==1.70.0", "eval_type_backport==0.3.1; python_version < '3.13'", @@ -62,7 +64,12 @@ where = ["src"] include = ["lerim*"] [tool.setuptools.package-data] -lerim = ["config/default.toml", "skills/*.md", "server/lerim-seccomp.json"] +lerim = [ + "agents/baml_src/*.baml", + "config/default.toml", + "skills/*.md", + "server/lerim-seccomp.json", +] [tool.pytest.ini_options] pythonpath = ["."] @@ -84,8 +91,12 @@ select = ["E4", "E7", "E9", "F"] extend-select = ["B006"] ignore = ["E501"] +[tool.ruff] +extend-exclude = ["src/lerim/agents/baml_client"] + [tool.vulture] paths = ["src/lerim/", "vulture_whitelist.py"] +exclude = ["src/lerim/agents/baml_client/*"] min_confidence = 60 [dependency-groups] diff --git a/uv.lock b/uv.lock index 460c3d2..ae8ac44 100644 --- a/uv.lock +++ b/uv.lock @@ -430,6 +430,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/21/f8/d02f650c47d05034dcd6f9c8cf94f39598b7a89c00ecda0ecb2911bc27e9/backrefs-6.2-py39-none-any.whl", hash = "sha256:664e33cd88c6840b7625b826ecf2555f32d491800900f5a541f772c485f7cda7", size = 381077, upload-time = "2026-02-16T19:10:13.74Z" }, ] +[[package]] +name = "baml-py" +version = "0.222.0" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/52/ec/9c8958b0e1a661c60900a94e2b8d13e7b4b6d09511837dfa3d59732d4e02/baml_py-0.222.0-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:1270fa6ceaa2093bec7d4bf7f857d33ce4547849eaca8aa21c5518b498832e25", size = 21808749, upload-time = "2026-04-27T23:29:18.126Z" }, + { url = "https://files.pythonhosted.org/packages/30/b8/73f2d43dcf1fab0d79fbb91132f3ca184b44891ebb98bd0def431999dbb1/baml_py-0.222.0-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:bfd00afed25f939d10f99d555de2900930fc462ebb20b8d46e99c1aabadba7e9", size = 20668661, upload-time = "2026-04-27T23:29:20.87Z" }, + { url = "https://files.pythonhosted.org/packages/1a/79/f452e52abdea0f971e835c9ca6e95833f8ea1bcbbd9ae65089e32200151c/baml_py-0.222.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:174ef4e713da23d6a9d6dc2d55b6a28a88c5030edf55060dfc42f1c7eccfc7db", size = 24360345, upload-time = "2026-04-27T23:29:23.5Z" }, + { url = "https://files.pythonhosted.org/packages/6c/b2/f5b9437e236c418b22de3e1fc247d72109843391cb52186f7170cbde51c6/baml_py-0.222.0-cp38-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:a4a3c0f249e398be48eefc8f4f8dd78a54b0ec73f1e44c23062f549884e921a3", size = 23738386, upload-time = "2026-04-27T23:29:25.822Z" }, + { url = "https://files.pythonhosted.org/packages/e9/08/9696bf70bec78b7a158b1d0dbdc043581170c4d8988aaf9ed7966aae4527/baml_py-0.222.0-cp38-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:5e3ac35992dbb7ef8fe60e4f7ad15623fac9fb008255394c436b9514c2fd0287", size = 24109751, upload-time = "2026-04-27T23:29:27.999Z" }, + { url = "https://files.pythonhosted.org/packages/46/05/32053db7478b94d3e98295bcceae5e043d54e7d32eaa3decd6318fbcf7ce/baml_py-0.222.0-cp38-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:84a2af099a78d337bb053fd84fd126bfffc96d0338a356627d63ff4d7a85972b", size = 24614675, upload-time = "2026-04-27T23:29:30.507Z" }, + { url = "https://files.pythonhosted.org/packages/eb/89/ac76b9f6fe2d663b91f05c08bbdb47bbed0c5211d1ed3f188ee4483556e4/baml_py-0.222.0-cp38-abi3-win_amd64.whl", hash = "sha256:d4d769f4c6cd8a4605f549aa519b3d68aa93e0f6d5b7f42172ac16bae95ad820", size = 22008363, upload-time = "2026-04-27T23:29:33.451Z" }, + { url = "https://files.pythonhosted.org/packages/05/62/9cfa9c35e9544ce56a41ca63b5d0cd35d655cb60a8f6ddead408b8c25fc9/baml_py-0.222.0-cp38-abi3-win_arm64.whl", hash = "sha256:c26a7c71268fb18ea5d3f56b2693c55aca18cc422bce630486c3ef20b409daf3", size = 20417604, upload-time = "2026-04-27T23:29:35.704Z" }, +] + [[package]] name = "beartype" version = "0.22.9" @@ -2275,6 +2290,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d7/9e/038522f50ceb7e74f1f991bf1b699f24b0c2bbe7c390dd36ad69f4582258/json5-0.13.0-py3-none-any.whl", hash = "sha256:9a08e1dd65f6a4d4c6fa82d216cf2477349ec2346a38fd70cc11d2557499fbcc", size = 36163, upload-time = "2026-01-01T19:42:13.962Z" }, ] +[[package]] +name = "jsonpatch" +version = "1.33" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jsonpointer" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/42/78/18813351fe5d63acad16aec57f94ec2b70a09e53ca98145589e185423873/jsonpatch-1.33.tar.gz", hash = "sha256:9fcd4009c41e6d12348b4a0ff2563ba56a2923a7dfee731d004e212e1ee5030c", size = 21699, upload-time = "2023-06-26T12:07:29.144Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/73/07/02e16ed01e04a374e644b575638ec7987ae846d25ad97bcc9945a3ee4b0e/jsonpatch-1.33-py2.py3-none-any.whl", hash = "sha256:0ae28c0cd062bbd8b8ecc26d7d164fbbea9652a1a3693f3b956c1eae5145dade", size = 12898, upload-time = "2023-06-16T21:01:28.466Z" }, +] + [[package]] name = "jsonpath-python" version = "1.1.5" @@ -2634,6 +2661,114 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0a/dd/8050c947d435c8d4bc94e3252f4d8bb8a76cfb424f043a8680be637a57f1/kiwisolver-1.5.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:59cd8683f575d96df5bb48f6add94afc055012c29e28124fcae2b63661b9efb1", size = 73558, upload-time = "2026-03-09T13:15:52.112Z" }, ] +[[package]] +name = "langchain-core" +version = "1.4.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jsonpatch" }, + { name = "langchain-protocol" }, + { name = "langsmith" }, + { name = "packaging" }, + { name = "pydantic" }, + { name = "pyyaml" }, + { name = "tenacity" }, + { name = "typing-extensions" }, + { name = "uuid-utils" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/59/de/679a53472c25860837e32c0442c962fa86e95317a36460e2c9d5c91b17c2/langchain_core-1.4.0.tar.gz", hash = "sha256:1dc341eed802ed9c117c0df3923c991e5e9e226571e5725c194eeb5bd93d1a7f", size = 920260, upload-time = "2026-05-11T18:42:35.919Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0f/1a/86c38c27b81913a1c6c12448cab55defb5a1097c7dc9a4cea83f55477a2d/langchain_core-1.4.0-py3-none-any.whl", hash = "sha256:23cbbdb46e38ddd1dd5247e6167e96013eae74bea4c5949c550809970a9e565c", size = 548120, upload-time = "2026-05-11T18:42:33.992Z" }, +] + +[[package]] +name = "langchain-protocol" +version = "0.0.15" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/4f/24/9777489d6fbbee64af0c8f96d4f840239c408cf694f3394672807dafc490/langchain_protocol-0.0.15.tar.gz", hash = "sha256:9ab2d11ee73944754f10e037e717098d3a6796f0e58afa9cadda6154e7655ade", size = 5862, upload-time = "2026-05-01T22:30:04.748Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1d/7a/9c97a7b9cbe4c5dc6a44cdb1545450c28f0c8ce89b9c1f0ee7fbad896263/langchain_protocol-0.0.15-py3-none-any.whl", hash = "sha256:461eb794358f83d5e42635a5797799ffec7b4702314e34edf73ac21e75d3ef79", size = 6982, upload-time = "2026-05-01T22:30:03.877Z" }, +] + +[[package]] +name = "langgraph" +version = "1.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "langchain-core" }, + { name = "langgraph-checkpoint" }, + { name = "langgraph-prebuilt" }, + { name = "langgraph-sdk" }, + { name = "pydantic" }, + { name = "xxhash" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/58/61/d5d25e783035aa307d289b37e082258a6061c0fb4caa4a284f3bf1e87169/langgraph-1.2.0.tar.gz", hash = "sha256:4a9baaf62afc5d5f63144a50095140a34b9aa9b7cea695d25326d564775348e7", size = 690248, upload-time = "2026-05-12T03:46:39.164Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f6/e8/e3304ac0015c2bdb04ad9785e4ed65c788855ce7857ce6104dd2f5d322db/langgraph-1.2.0-py3-none-any.whl", hash = "sha256:03fd5895a8d4b70db1ff63ebc3bacead29dd20cd794a8b1a483e7ec9018f7a65", size = 234262, upload-time = "2026-05-12T03:46:37.971Z" }, +] + +[[package]] +name = "langgraph-checkpoint" +version = "4.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "langchain-core" }, + { name = "ormsgpack" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/02/b4/6005c5dd88ad484fe6235d4c43a0d2cee7e91b08ad85a180985c2662df87/langgraph_checkpoint-4.1.0.tar.gz", hash = "sha256:e5bb304e30fc1363ac8fcb5f7dee5ca2185d77fe475b0d01de2c5f91324c2c21", size = 181942, upload-time = "2026-05-12T03:33:49.888Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/93/74/d3be2b41955e20ccd624dba5f6fe9d38dcee385ba470a6e13ed86732fc86/langgraph_checkpoint-4.1.0-py3-none-any.whl", hash = "sha256:8bc2a0466a20c38b865ce6671b42093fd5c041133f32351cae4222e0eeaf7fb5", size = 56047, upload-time = "2026-05-12T03:33:48.548Z" }, +] + +[[package]] +name = "langgraph-prebuilt" +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "langchain-core" }, + { name = "langgraph-checkpoint" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/29/66/ed9b93f56bc17ef22d551892f0ac2b225a97fe0fcf23a511b857f70d590b/langgraph_prebuilt-1.1.0.tar.gz", hash = "sha256:3c579cf6eed2d17f9c157c2d0fcaddcd8688524e7022d3b22b37a3bf4589d528", size = 178833, upload-time = "2026-05-12T03:37:49.332Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e9/43/3fe1a700b8490ed02679cdbbc8c915eb23a092faf496c9c1118abcd10be3/langgraph_prebuilt-1.1.0-py3-none-any.whl", hash = "sha256:51e311747d755b751d5c6b39b0c1446124d3a7643d2515017e6714b323508fc9", size = 41043, upload-time = "2026-05-12T03:37:48.007Z" }, +] + +[[package]] +name = "langgraph-sdk" +version = "0.3.14" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "httpx" }, + { name = "orjson" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/02/f1/134046c20bc4a4a15d410d1d21c9e298a3e9923777b4cc867b8669bc636b/langgraph_sdk-0.3.14.tar.gz", hash = "sha256:acd1674c538e97f3cdaa610f6dd7e34bc9bad30167f0ccc482dcd563325e81f5", size = 198162, upload-time = "2026-05-05T18:40:03.524Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/34/96/1c9f9fbfe756ddd850a2585e7f1949d8ebb97fdaa7a5eff8f45ed1314670/langgraph_sdk-0.3.14-py3-none-any.whl", hash = "sha256:68935bf6f4924eda92617a9e5dfb4f4281197508c648cb9d62ff083907607f9d", size = 97028, upload-time = "2026-05-05T18:40:02.099Z" }, +] + +[[package]] +name = "langsmith" +version = "0.8.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "httpx" }, + { name = "orjson", marker = "platform_python_implementation != 'PyPy'" }, + { name = "packaging" }, + { name = "pydantic" }, + { name = "requests" }, + { name = "requests-toolbelt" }, + { name = "uuid-utils" }, + { name = "xxhash" }, + { name = "zstandard" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d4/e9/4ceeba766bae47de1a6ecdaa4024d10eff63eed936796b77005742399e8d/langsmith-0.8.4.tar.gz", hash = "sha256:989b387f6ff92ec5f9d14c0edb333e2579590cad5a1ca07042d924b0ec43cd10", size = 4460243, upload-time = "2026-05-13T21:00:59.338Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/db/94/8b872959ea529ecfbbe2c3f91d9ebf98cb8dbd9e3f7487bc134740d3d235/langsmith-0.8.4-py3-none-any.whl", hash = "sha256:4e334ab223d10129c9943c461d95fa9089523638ea29cd048045a7f99b973f50", size = 398701, upload-time = "2026-05-13T21:00:57.393Z" }, +] + [[package]] name = "lark" version = "1.3.1" @@ -2645,11 +2780,13 @@ wheels = [ [[package]] name = "lerim" -version = "0.1.82" +version = "0.1.83" source = { editable = "." } dependencies = [ + { name = "baml-py" }, { name = "eval-type-backport", marker = "python_full_version < '3.13'" }, { name = "huggingface-hub" }, + { name = "langgraph" }, { name = "loguru" }, { name = "mlflow" }, { name = "numpy" }, @@ -2693,8 +2830,10 @@ dev = [ [package.metadata] requires-dist = [ + { name = "baml-py", specifier = "==0.222.0" }, { name = "eval-type-backport", marker = "python_full_version < '3.13'", specifier = "==0.3.1" }, { name = "huggingface-hub", specifier = ">=0.35.0" }, + { name = "langgraph", specifier = "==1.2.0" }, { name = "loguru", specifier = "==0.7.3" }, { name = "mkdocs-material", marker = "extra == 'docs'", specifier = ">=9.6" }, { name = "mkdocstrings", extras = ["python"], marker = "extra == 'docs'", specifier = ">=0.27" }, @@ -4077,6 +4216,54 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6f/1c/f2a8d8a1b17514660a614ce5f7aac74b934e69f5abc2700cc7ced882a009/orjson-3.11.7-cp314-cp314-win_arm64.whl", hash = "sha256:4a2e9c5be347b937a2e0203866f12bba36082e89b402ddb9e927d5822e43088d", size = 126038, upload-time = "2026-02-02T15:38:47.703Z" }, ] +[[package]] +name = "ormsgpack" +version = "1.12.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/12/0c/f1761e21486942ab9bb6feaebc610fa074f7c5e496e6962dea5873348077/ormsgpack-1.12.2.tar.gz", hash = "sha256:944a2233640273bee67521795a73cf1e959538e0dfb7ac635505010455e53b33", size = 39031, upload-time = "2026-01-18T20:55:28.023Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4b/08/8b68f24b18e69d92238aa8f258218e6dfeacf4381d9d07ab8df303f524a9/ormsgpack-1.12.2-cp311-cp311-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:bd5f4bf04c37888e864f08e740c5a573c4017f6fd6e99fa944c5c935fabf2dd9", size = 378266, upload-time = "2026-01-18T20:55:59.876Z" }, + { url = "https://files.pythonhosted.org/packages/0d/24/29fc13044ecb7c153523ae0a1972269fcd613650d1fa1a9cec1044c6b666/ormsgpack-1.12.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:34d5b28b3570e9fed9a5a76528fc7230c3c76333bc214798958e58e9b79cc18a", size = 203035, upload-time = "2026-01-18T20:55:30.59Z" }, + { url = "https://files.pythonhosted.org/packages/ad/c2/00169fb25dd8f9213f5e8a549dfb73e4d592009ebc85fbbcd3e1dcac575b/ormsgpack-1.12.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3708693412c28f3538fb5a65da93787b6bbab3484f6bc6e935bfb77a62400ae5", size = 210539, upload-time = "2026-01-18T20:55:48.569Z" }, + { url = "https://files.pythonhosted.org/packages/1b/33/543627f323ff3c73091f51d6a20db28a1a33531af30873ea90c5ac95a9b5/ormsgpack-1.12.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:43013a3f3e2e902e1d05e72c0f1aeb5bedbb8e09240b51e26792a3c89267e181", size = 212401, upload-time = "2026-01-18T20:56:10.101Z" }, + { url = "https://files.pythonhosted.org/packages/e8/5d/f70e2c3da414f46186659d24745483757bcc9adccb481a6eb93e2b729301/ormsgpack-1.12.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7c8b1667a72cbba74f0ae7ecf3105a5e01304620ed14528b2cb4320679d2869b", size = 387082, upload-time = "2026-01-18T20:56:12.047Z" }, + { url = "https://files.pythonhosted.org/packages/c0/d6/06e8dc920c7903e051f30934d874d4afccc9bb1c09dcaf0bc03a7de4b343/ormsgpack-1.12.2-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:df6961442140193e517303d0b5d7bc2e20e69a879c2d774316125350c4a76b92", size = 482346, upload-time = "2026-01-18T20:56:05.152Z" }, + { url = "https://files.pythonhosted.org/packages/66/c4/f337ac0905eed9c393ef990c54565cd33644918e0a8031fe48c098c71dbf/ormsgpack-1.12.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:c6a4c34ddef109647c769d69be65fa1de7a6022b02ad45546a69b3216573eb4a", size = 425181, upload-time = "2026-01-18T20:55:37.83Z" }, + { url = "https://files.pythonhosted.org/packages/78/29/6d5758fabef3babdf4bbbc453738cc7de9cd3334e4c38dd5737e27b85653/ormsgpack-1.12.2-cp311-cp311-win_amd64.whl", hash = "sha256:73670ed0375ecc303858e3613f407628dd1fca18fe6ac57b7b7ce66cc7bb006c", size = 117182, upload-time = "2026-01-18T20:55:31.472Z" }, + { url = "https://files.pythonhosted.org/packages/c4/57/17a15549233c37e7fd054c48fe9207492e06b026dbd872b826a0b5f833b6/ormsgpack-1.12.2-cp311-cp311-win_arm64.whl", hash = "sha256:c2be829954434e33601ae5da328cccce3266b098927ca7a30246a0baec2ce7bd", size = 111464, upload-time = "2026-01-18T20:55:38.811Z" }, + { url = "https://files.pythonhosted.org/packages/4c/36/16c4b1921c308a92cef3bf6663226ae283395aa0ff6e154f925c32e91ff5/ormsgpack-1.12.2-cp312-cp312-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:7a29d09b64b9694b588ff2f80e9826bdceb3a2b91523c5beae1fab27d5c940e7", size = 378618, upload-time = "2026-01-18T20:55:50.835Z" }, + { url = "https://files.pythonhosted.org/packages/c0/68/468de634079615abf66ed13bb5c34ff71da237213f29294363beeeca5306/ormsgpack-1.12.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0b39e629fd2e1c5b2f46f99778450b59454d1f901bc507963168985e79f09c5d", size = 203186, upload-time = "2026-01-18T20:56:11.163Z" }, + { url = "https://files.pythonhosted.org/packages/73/a9/d756e01961442688b7939bacd87ce13bfad7d26ce24f910f6028178b2cc8/ormsgpack-1.12.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:958dcb270d30a7cb633a45ee62b9444433fa571a752d2ca484efdac07480876e", size = 210738, upload-time = "2026-01-18T20:56:09.181Z" }, + { url = "https://files.pythonhosted.org/packages/7b/ba/795b1036888542c9113269a3f5690ab53dd2258c6fb17676ac4bd44fcf94/ormsgpack-1.12.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58d379d72b6c5e964851c77cfedfb386e474adee4fd39791c2c5d9efb53505cc", size = 212569, upload-time = "2026-01-18T20:56:06.135Z" }, + { url = "https://files.pythonhosted.org/packages/6c/aa/bff73c57497b9e0cba8837c7e4bcab584b1a6dbc91a5dd5526784a5030c8/ormsgpack-1.12.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8463a3fc5f09832e67bdb0e2fda6d518dc4281b133166146a67f54c08496442e", size = 387166, upload-time = "2026-01-18T20:55:36.738Z" }, + { url = "https://files.pythonhosted.org/packages/d3/cf/f8283cba44bcb7b14f97b6274d449db276b3a86589bdb363169b51bc12de/ormsgpack-1.12.2-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:eddffb77eff0bad4e67547d67a130604e7e2dfbb7b0cde0796045be4090f35c6", size = 482498, upload-time = "2026-01-18T20:55:29.626Z" }, + { url = "https://files.pythonhosted.org/packages/05/be/71e37b852d723dfcbe952ad04178c030df60d6b78eba26bfd14c9a40575e/ormsgpack-1.12.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fcd55e5f6ba0dbce624942adf9f152062135f991a0126064889f68eb850de0dd", size = 425518, upload-time = "2026-01-18T20:55:49.556Z" }, + { url = "https://files.pythonhosted.org/packages/7a/0c/9803aa883d18c7ef197213cd2cbf73ba76472a11fe100fb7dab2884edf48/ormsgpack-1.12.2-cp312-cp312-win_amd64.whl", hash = "sha256:d024b40828f1dde5654faebd0d824f9cc29ad46891f626272dd5bfd7af2333a4", size = 117462, upload-time = "2026-01-18T20:55:47.726Z" }, + { url = "https://files.pythonhosted.org/packages/c8/9e/029e898298b2cc662f10d7a15652a53e3b525b1e7f07e21fef8536a09bb8/ormsgpack-1.12.2-cp312-cp312-win_arm64.whl", hash = "sha256:da538c542bac7d1c8f3f2a937863dba36f013108ce63e55745941dda4b75dbb6", size = 111559, upload-time = "2026-01-18T20:55:54.273Z" }, + { url = "https://files.pythonhosted.org/packages/eb/29/bb0eba3288c0449efbb013e9c6f58aea79cf5cb9ee1921f8865f04c1a9d7/ormsgpack-1.12.2-cp313-cp313-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:5ea60cb5f210b1cfbad8c002948d73447508e629ec375acb82910e3efa8ff355", size = 378661, upload-time = "2026-01-18T20:55:57.765Z" }, + { url = "https://files.pythonhosted.org/packages/6e/31/5efa31346affdac489acade2926989e019e8ca98129658a183e3add7af5e/ormsgpack-1.12.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3601f19afdbea273ed70b06495e5794606a8b690a568d6c996a90d7255e51c1", size = 203194, upload-time = "2026-01-18T20:56:08.252Z" }, + { url = "https://files.pythonhosted.org/packages/eb/56/d0087278beef833187e0167f8527235ebe6f6ffc2a143e9de12a98b1ce87/ormsgpack-1.12.2-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:29a9f17a3dac6054c0dce7925e0f4995c727f7c41859adf9b5572180f640d172", size = 210778, upload-time = "2026-01-18T20:55:17.694Z" }, + { url = "https://files.pythonhosted.org/packages/1c/a2/072343e1413d9443e5a252a8eb591c2d5b1bffbe5e7bfc78c069361b92eb/ormsgpack-1.12.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39c1bd2092880e413902910388be8715f70b9f15f20779d44e673033a6146f2d", size = 212592, upload-time = "2026-01-18T20:55:32.747Z" }, + { url = "https://files.pythonhosted.org/packages/a2/8b/a0da3b98a91d41187a63b02dda14267eefc2a74fcb43cc2701066cf1510e/ormsgpack-1.12.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:50b7249244382209877deedeee838aef1542f3d0fc28b8fe71ca9d7e1896a0d7", size = 387164, upload-time = "2026-01-18T20:55:40.853Z" }, + { url = "https://files.pythonhosted.org/packages/19/bb/6d226bc4cf9fc20d8eb1d976d027a3f7c3491e8f08289a2e76abe96a65f3/ormsgpack-1.12.2-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:5af04800d844451cf102a59c74a841324868d3f1625c296a06cc655c542a6685", size = 482516, upload-time = "2026-01-18T20:55:42.033Z" }, + { url = "https://files.pythonhosted.org/packages/fb/f1/bb2c7223398543dedb3dbf8bb93aaa737b387de61c5feaad6f908841b782/ormsgpack-1.12.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:cec70477d4371cd524534cd16472d8b9cc187e0e3043a8790545a9a9b296c258", size = 425539, upload-time = "2026-01-18T20:55:24.727Z" }, + { url = "https://files.pythonhosted.org/packages/7b/e8/0fb45f57a2ada1fed374f7494c8cd55e2f88ccd0ab0a669aa3468716bf5f/ormsgpack-1.12.2-cp313-cp313-win_amd64.whl", hash = "sha256:21f4276caca5c03a818041d637e4019bc84f9d6ca8baa5ea03e5cc8bf56140e9", size = 117459, upload-time = "2026-01-18T20:55:56.876Z" }, + { url = "https://files.pythonhosted.org/packages/7a/d4/0cfeea1e960d550a131001a7f38a5132c7ae3ebde4c82af1f364ccc5d904/ormsgpack-1.12.2-cp313-cp313-win_arm64.whl", hash = "sha256:baca4b6773d20a82e36d6fd25f341064244f9f86a13dead95dd7d7f996f51709", size = 111577, upload-time = "2026-01-18T20:55:43.605Z" }, + { url = "https://files.pythonhosted.org/packages/94/16/24d18851334be09c25e87f74307c84950f18c324a4d3c0b41dabdbf19c29/ormsgpack-1.12.2-cp314-cp314-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:bc68dd5915f4acf66ff2010ee47c8906dc1cf07399b16f4089f8c71733f6e36c", size = 378717, upload-time = "2026-01-18T20:55:26.164Z" }, + { url = "https://files.pythonhosted.org/packages/b5/a2/88b9b56f83adae8032ac6a6fa7f080c65b3baf9b6b64fd3d37bd202991d4/ormsgpack-1.12.2-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:46d084427b4132553940070ad95107266656cb646ea9da4975f85cb1a6676553", size = 203183, upload-time = "2026-01-18T20:55:18.815Z" }, + { url = "https://files.pythonhosted.org/packages/a9/80/43e4555963bf602e5bdc79cbc8debd8b6d5456c00d2504df9775e74b450b/ormsgpack-1.12.2-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c010da16235806cf1d7bc4c96bf286bfa91c686853395a299b3ddb49499a3e13", size = 210814, upload-time = "2026-01-18T20:55:33.973Z" }, + { url = "https://files.pythonhosted.org/packages/78/e1/7cfbf28de8bca6efe7e525b329c31277d1b64ce08dcba723971c241a9d60/ormsgpack-1.12.2-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:18867233df592c997154ff942a6503df274b5ac1765215bceba7a231bea2745d", size = 212634, upload-time = "2026-01-18T20:55:28.634Z" }, + { url = "https://files.pythonhosted.org/packages/95/f8/30ae5716e88d792a4e879debee195653c26ddd3964c968594ddef0a3cc7e/ormsgpack-1.12.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:b009049086ddc6b8f80c76b3955df1aa22a5fbd7673c525cd63bf91f23122ede", size = 387139, upload-time = "2026-01-18T20:56:02.013Z" }, + { url = "https://files.pythonhosted.org/packages/dc/81/aee5b18a3e3a0e52f718b37ab4b8af6fae0d9d6a65103036a90c2a8ffb5d/ormsgpack-1.12.2-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:1dcc17d92b6390d4f18f937cf0b99054824a7815818012ddca925d6e01c2e49e", size = 482578, upload-time = "2026-01-18T20:55:35.117Z" }, + { url = "https://files.pythonhosted.org/packages/bd/17/71c9ba472d5d45f7546317f467a5fc941929cd68fb32796ca3d13dcbaec2/ormsgpack-1.12.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:f04b5e896d510b07c0ad733d7fce2d44b260c5e6c402d272128f8941984e4285", size = 425539, upload-time = "2026-01-18T20:56:04.009Z" }, + { url = "https://files.pythonhosted.org/packages/2e/a6/ac99cd7fe77e822fed5250ff4b86fa66dd4238937dd178d2299f10b69816/ormsgpack-1.12.2-cp314-cp314-win_amd64.whl", hash = "sha256:ae3aba7eed4ca7cb79fd3436eddd29140f17ea254b91604aa1eb19bfcedb990f", size = 117493, upload-time = "2026-01-18T20:56:07.343Z" }, + { url = "https://files.pythonhosted.org/packages/3a/67/339872846a1ae4592535385a1c1f93614138566d7af094200c9c3b45d1e5/ormsgpack-1.12.2-cp314-cp314-win_arm64.whl", hash = "sha256:118576ea6006893aea811b17429bfc561b4778fad393f5f538c84af70b01260c", size = 111579, upload-time = "2026-01-18T20:55:21.161Z" }, + { url = "https://files.pythonhosted.org/packages/49/c2/6feb972dc87285ad381749d3882d8aecbde9f6ecf908dd717d33d66df095/ormsgpack-1.12.2-cp314-cp314t-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:7121b3d355d3858781dc40dafe25a32ff8a8242b9d80c692fd548a4b1f7fd3c8", size = 378721, upload-time = "2026-01-18T20:55:52.12Z" }, + { url = "https://files.pythonhosted.org/packages/a3/9a/900a6b9b413e0f8a471cf07830f9cf65939af039a362204b36bd5b581d8b/ormsgpack-1.12.2-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ee766d2e78251b7a63daf1cddfac36a73562d3ddef68cacfb41b2af64698033", size = 203170, upload-time = "2026-01-18T20:55:44.469Z" }, + { url = "https://files.pythonhosted.org/packages/87/4c/27a95466354606b256f24fad464d7c97ab62bce6cc529dd4673e1179b8fb/ormsgpack-1.12.2-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:292410a7d23de9b40444636b9b8f1e4e4b814af7f1ef476e44887e52a123f09d", size = 212816, upload-time = "2026-01-18T20:55:23.501Z" }, + { url = "https://files.pythonhosted.org/packages/73/cd/29cee6007bddf7a834e6cd6f536754c0535fcb939d384f0f37a38b1cddb8/ormsgpack-1.12.2-cp314-cp314t-win_amd64.whl", hash = "sha256:837dd316584485b72ef451d08dd3e96c4a11d12e4963aedb40e08f89685d8ec2", size = 117232, upload-time = "2026-01-18T20:55:45.448Z" }, +] + [[package]] name = "overrides" version = "7.7.0" @@ -5350,6 +5537,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738, upload-time = "2025-08-18T20:46:00.542Z" }, ] +[[package]] +name = "requests-toolbelt" +version = "1.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f3/61/d7545dafb7ac2230c70d38d31cbfe4cc64f7144dc41f6e4e4b78ecd9f5bb/requests-toolbelt-1.0.0.tar.gz", hash = "sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6", size = 206888, upload-time = "2023-05-01T04:11:33.229Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3f/51/d4db610ef29373b879047326cbf6fa98b6c1969d6f6dc423279de2b1be2c/requests_toolbelt-1.0.0-py2.py3-none-any.whl", hash = "sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06", size = 54481, upload-time = "2023-05-01T04:11:28.427Z" }, +] + [[package]] name = "rfc3339-validator" version = "0.1.4" @@ -6442,6 +6641,106 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584, upload-time = "2026-01-07T16:24:42.685Z" }, ] +[[package]] +name = "uuid-utils" +version = "0.15.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0b/f6/1856dc5935a947a062fb8fefd8a26e0f9f6694320e7203c7e85bd291dc93/uuid_utils-0.15.0.tar.gz", hash = "sha256:f182733e3d88edd2ceeca292627e2b1d5fa8693abe00b160de5517616ed399ea", size = 42182, upload-time = "2026-05-11T12:07:01.82Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/15/d8/06ebb55d495ce27a0647942c24fc699b7beab953338fa516029fd31e466f/uuid_utils-0.15.0-cp311-cp311-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:62fd9267b40d82e2d9d148f560e86436f5b2daa9a1623c329ed0ec7e61fefc4d", size = 564112, upload-time = "2026-05-11T12:08:46.093Z" }, + { url = "https://files.pythonhosted.org/packages/f0/72/6b34c1ee02e50f74bb8d92660b5fae1b87a13ada868c62b8621ec1c7fe5d/uuid_utils-0.15.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:93b30c7bcb148fa23497779ac53dfe34a0de6f53e300f6d585ac759e9e6718ef", size = 289704, upload-time = "2026-05-11T12:06:26.263Z" }, + { url = "https://files.pythonhosted.org/packages/bd/d5/f2b167910bd9043a6a110db4b1d2c0d2c41c5c11bc6e59a945f3955d97d2/uuid_utils-0.15.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dc391e241f9b3d98901c5ada27546ddb49b71f1ad2f9dfe41cd91d6d69d84156", size = 327011, upload-time = "2026-05-11T12:07:14.199Z" }, + { url = "https://files.pythonhosted.org/packages/8e/fc/057c41b224c330680325b1d3b5f7acda96ebcd0e104bc6bdcb9c2969da35/uuid_utils-0.15.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:072bacb159ded3c2c4c5b1b23191c72cb0906937816561fd6b71e8ab6612394e", size = 333546, upload-time = "2026-05-11T12:07:42.64Z" }, + { url = "https://files.pythonhosted.org/packages/cf/24/297a7c112a312173f0f960f64214db633ba8b22c95cb78f490902072dccd/uuid_utils-0.15.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c97357517e59bf767c7e0d13e9fe02c26f241b4ebf297c7479b100fea277c0b2", size = 447716, upload-time = "2026-05-11T12:06:58.739Z" }, + { url = "https://files.pythonhosted.org/packages/f0/64/e4face9cb91260587b0193bb81ba058f476204a9a7d1ca754d31e414fc92/uuid_utils-0.15.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8399dd0dcfcb57db99090dae944644ba23151c57497226585f94926af9d93ae7", size = 326500, upload-time = "2026-05-11T12:08:24.14Z" }, + { url = "https://files.pythonhosted.org/packages/c1/2a/d6bf1469889348aedaf65d8a71dbba8c2132539840b866c66a7a6cd7b987/uuid_utils-0.15.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d3d1a1ede7d85f80cbad381f8a09467f083b3bd9978f3daa32cc8b6f09cdc3fe", size = 352092, upload-time = "2026-05-11T12:06:53.295Z" }, + { url = "https://files.pythonhosted.org/packages/ad/4d/96970e4597c82eecc24f13bf1892abe299fa3d381d628a4854cd4259591b/uuid_utils-0.15.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:51c66955aeee2c284fb8cc5181e64587a63748e9835405de4b88f333f70a06fa", size = 503708, upload-time = "2026-05-11T12:08:13.232Z" }, + { url = "https://files.pythonhosted.org/packages/74/9a/42e593d97980a7819621f79953d0e477b421f2f00d698815ee5fd73643fb/uuid_utils-0.15.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:05777b4e9a15b43707fba9789581bc39803172e7865e7c7932faf3e2f4299a4e", size = 608745, upload-time = "2026-05-11T12:08:06.097Z" }, + { url = "https://files.pythonhosted.org/packages/b3/2f/45377b749ce7e052dbd9b47d29fea3b465aff8bcb486e591d895c119819c/uuid_utils-0.15.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:ab7a1bf10777953c375e8525bd7070072566c8b247ffc4d3c082dad5f1a66e86", size = 568216, upload-time = "2026-05-11T12:06:27.292Z" }, + { url = "https://files.pythonhosted.org/packages/ab/8a/99104dd3af9609e494a62097328bf4f469797b8b1845258bfea68240b802/uuid_utils-0.15.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:c6435d27f2d541506590ea3db6ab92701bad24652678e1b6b2d8e48d8888152b", size = 531565, upload-time = "2026-05-11T12:08:09.985Z" }, + { url = "https://files.pythonhosted.org/packages/db/58/ab984258b5213615a26a08b47b43b28245efa3cd4aeba159c48c8ba9e3af/uuid_utils-0.15.0-cp311-cp311-win32.whl", hash = "sha256:7d06408fb951d187677d1ec5adf9073c873d818704be502e2ece178685a68bbf", size = 169849, upload-time = "2026-05-11T12:06:28.808Z" }, + { url = "https://files.pythonhosted.org/packages/c0/33/c40caf02a33f69a00de04d211ec58ffca191ed16d9a169a0441d0d2e4533/uuid_utils-0.15.0-cp311-cp311-win_amd64.whl", hash = "sha256:a8fb2aad5bb6256324de967bbf86f2227884586c3598a3e14fd5c339d3bfc20f", size = 175939, upload-time = "2026-05-11T12:08:32.376Z" }, + { url = "https://files.pythonhosted.org/packages/a8/76/6b39fc4a9a0f425cb4ccf65ce872c64c12821f105e7e1ef2c02d3c19a403/uuid_utils-0.15.0-cp311-cp311-win_arm64.whl", hash = "sha256:d6b61e5f201535b525956817e3f8a09a90ec5b7d389b5a511b4f985427f23476", size = 174315, upload-time = "2026-05-11T12:07:40.271Z" }, + { url = "https://files.pythonhosted.org/packages/e5/1d/5869a54e85753078a532958d7fc27dbccb48f10f428498f5a77ae700be28/uuid_utils-0.15.0-cp312-cp312-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:2e68c9d2927ab3b79892f6f9d857cffdb2043be33044854b05a84634ffdad88b", size = 559609, upload-time = "2026-05-11T12:08:38.493Z" }, + { url = "https://files.pythonhosted.org/packages/f6/83/142a2ea23cca01609587b878c4a471ccec82dfab40e70fc1f463d98a618b/uuid_utils-0.15.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:bceb8aefc5c26ed896f93a36344ff476085f340d051a73074603426ef7588e4d", size = 288304, upload-time = "2026-05-11T12:07:47.94Z" }, + { url = "https://files.pythonhosted.org/packages/b2/78/8c75511cf355e749f9fb71c0a8e228e82b47efd9db1214daecb69db8bd07/uuid_utils-0.15.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bfaab7ec64936ceae273ec195673acbee247d69525a2186159360d46d54819a0", size = 324652, upload-time = "2026-05-11T12:06:24.798Z" }, + { url = "https://files.pythonhosted.org/packages/b9/5b/16c17ebc6af1d1ecf737b14da538d53383969ab805207819383e66ef6a9c/uuid_utils-0.15.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a30412da63cc484bc7e132f4362b4b44ea7dc1ec19ca33378c9bf9f64c5e294d", size = 331281, upload-time = "2026-05-11T12:07:10.91Z" }, + { url = "https://files.pythonhosted.org/packages/80/b5/25e0dd967398bc57fca9265acfa44be8daa8e82f1a7e7bbf7de54ea35ada/uuid_utils-0.15.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:98b74c6b46e0082c3b8ec2fbe1eb65376d8caf9ed2c903a457350d56260764c3", size = 444048, upload-time = "2026-05-11T12:06:29.722Z" }, + { url = "https://files.pythonhosted.org/packages/8b/32/a383438d884f1e991b9b76e8da7e72a046ecacdd9f6d59695cd049467fbf/uuid_utils-0.15.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2f4b2f5b10f61ce498736b75c4f9fdb16b564ee92649f2ec41505e2584d86ff3", size = 324658, upload-time = "2026-05-11T12:07:18.763Z" }, + { url = "https://files.pythonhosted.org/packages/4c/4e/72b460c19c036db1d78fd7b2b8e95b98a5c57f2f872ac5abfd1b3766999f/uuid_utils-0.15.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4dbafbb3ee8828d3ef50414e4691e38b1202ce5f80c96a017f12a0821b8c791d", size = 348304, upload-time = "2026-05-11T12:08:42.086Z" }, + { url = "https://files.pythonhosted.org/packages/d4/d1/d0057b927502dcb65cf29b1f374d9da6aa9acc3b2fb06cb061c50cbf8891/uuid_utils-0.15.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:97221ee09f9c97e9e32a5a468afa8b5d1440b65e7a57d4a0c2c9fe0546fc529b", size = 501057, upload-time = "2026-05-11T12:06:31.225Z" }, + { url = "https://files.pythonhosted.org/packages/cb/88/d99699f62030093768a387ebd0414c6918a35d85b54513d795dbf8344a5a/uuid_utils-0.15.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:704c709d1054079a756e7baf0be2e76cb766d3fd2b3b6c71b1b758258c1d24e0", size = 606248, upload-time = "2026-05-11T12:08:14.536Z" }, + { url = "https://files.pythonhosted.org/packages/65/fa/89798bae188dd33e059fa32f33acb2e6188fe27ea24bc95cdfc8454c525f/uuid_utils-0.15.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:bc9cf4c4a7058f06b67b8cf81f228ccd80ba1ef506e875eed33d05ff19e9a32b", size = 564794, upload-time = "2026-05-11T12:08:44.496Z" }, + { url = "https://files.pythonhosted.org/packages/db/2b/c91039a0651a37fbf009f156b9df3aa0d65a6b53aae44192874a341181e0/uuid_utils-0.15.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:9e0c1d03e7d245f03d968f1da709e396f37f56495e231a22bd47f94ab6ae8827", size = 529717, upload-time = "2026-05-11T12:07:27.839Z" }, + { url = "https://files.pythonhosted.org/packages/68/af/fc4ce13a3c25efb3ad7a50b97e1fef84d544cdd9119f30c116d2318905e3/uuid_utils-0.15.0-cp312-cp312-win32.whl", hash = "sha256:65fff497efacde5edf8627d59663a498f12f38e7eae51a7723dd881b5cf15ec7", size = 168200, upload-time = "2026-05-11T12:07:03.842Z" }, + { url = "https://files.pythonhosted.org/packages/88/74/d1c1ea655d4cd45d351fb216ba80fe3ac12ef8d5a512c2f843449bedfa78/uuid_utils-0.15.0-cp312-cp312-win_amd64.whl", hash = "sha256:19f73783b7ab5a560368702f245bd550cd88e3b64ef33e689aebc67b51d782b3", size = 173974, upload-time = "2026-05-11T12:07:59.863Z" }, + { url = "https://files.pythonhosted.org/packages/6c/41/994a2812629b889116dfcc14d5edb72ca188dfbd7c977042ae718fd121f5/uuid_utils-0.15.0-cp312-cp312-win_arm64.whl", hash = "sha256:151dcf8aafd93d3747e6cac3d2de8173b4e8880b57db815fd51d945cb434afac", size = 172236, upload-time = "2026-05-11T12:06:44.451Z" }, + { url = "https://files.pythonhosted.org/packages/50/a5/27c31c42a66fb11c2cee1b0be77e6bda3363b6920f6e6105c2402596ac09/uuid_utils-0.15.0-cp313-cp313-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:3334a5fdb5d5241c4f764382f01eeac6f56fc8fddf49924cd78a47e5c86ed329", size = 560586, upload-time = "2026-05-11T12:07:53.856Z" }, + { url = "https://files.pythonhosted.org/packages/3e/89/a6a79248bdb7f46a9edfa1e1d1777bd4ad57e5b278cbb4daaf602f125cc9/uuid_utils-0.15.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:7ea97b77218b431c4854f2ccd502819d78d1109188fccabaa005cff61c2ccc81", size = 288804, upload-time = "2026-05-11T12:07:46.957Z" }, + { url = "https://files.pythonhosted.org/packages/02/79/3ddb82178963627693a836f81ab0cdfb2371d73f795a4be4937456e15df9/uuid_utils-0.15.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6fb8636100cd521325ac90a9c3ad6d4e6cc39ee39ce78bf757c014aaab79b780", size = 324895, upload-time = "2026-05-11T12:07:51.407Z" }, + { url = "https://files.pythonhosted.org/packages/ad/78/1b8aedb556a20b268ffacf20bea115ce163c5019c3c66768c3a44141317d/uuid_utils-0.15.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:80a23d5728d82666e788810d67f2dd57b209d4e95929d61d978e02d1d7ab27bc", size = 331448, upload-time = "2026-05-11T12:07:43.949Z" }, + { url = "https://files.pythonhosted.org/packages/3d/09/f3b25d35246df2f2c69cc3fce244b77022d02a26f389419a02d214fdc635/uuid_utils-0.15.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b96ffa58744f62dd6dd9c5ed33f8e6232a90e710aeb46758f3776d904352f755", size = 444839, upload-time = "2026-05-11T12:08:25.646Z" }, + { url = "https://files.pythonhosted.org/packages/6d/8d/618c28414bf95c2e555b7ecd7b7fadcd139b191c64213ea8044624ede6b2/uuid_utils-0.15.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:04dafe5b74f9b9c27587001f39a256e981619626ddda20d7701d6b0a6c3cad51", size = 323820, upload-time = "2026-05-11T12:08:37.357Z" }, + { url = "https://files.pythonhosted.org/packages/76/e4/9762df18f91e33afcc869058dba0ea4c013c64c08f3866160a827b4daa05/uuid_utils-0.15.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:257335769b12ebd8c1ae809f8d22e5a4b829bdb9c796ce4f5a5f55d8bb76db86", size = 348568, upload-time = "2026-05-11T12:08:01.19Z" }, + { url = "https://files.pythonhosted.org/packages/86/3e/c99202e8aba95b30aaed419d3508da4f9f5c0a19fa3d01c76fab6a8aed34/uuid_utils-0.15.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:deee61ce9447f63e6ec765484b40f77dadac9672fb5c49d5f5586d93df38ae85", size = 501135, upload-time = "2026-05-11T12:07:56.803Z" }, + { url = "https://files.pythonhosted.org/packages/f5/bc/740663747449cc0df8dd0e5523dc0e34d566692902edc7a1665a3327ee6e/uuid_utils-0.15.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:616a3e8f178c69f58d54d015bbb1666c6401ce3d41cc0473e67dfa278b96c8e5", size = 606513, upload-time = "2026-05-11T12:08:30.886Z" }, + { url = "https://files.pythonhosted.org/packages/22/14/6e4b523a90014fab0b55b13ea792d5529abf70f0f8c97fd5b90a5200bbcf/uuid_utils-0.15.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:98bc52c15cf1baf602c965ecc2ed5d798cc8908084098ab6478b53a99b479fa8", size = 565139, upload-time = "2026-05-11T12:06:54.408Z" }, + { url = "https://files.pythonhosted.org/packages/27/d9/ee0c8ce35cc8b0425adc822feec41fdf477d15e3259fb721a711018bb7db/uuid_utils-0.15.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e6c7ed64c69f815cf434384681d64ee5aa574160a8e2d2a9a63088d388cb8ae7", size = 529000, upload-time = "2026-05-11T12:07:15.671Z" }, + { url = "https://files.pythonhosted.org/packages/e6/3b/24ecbbcef49c0b209aea0d8dfbc15855cf8c3d80829f5e9c0513b4c1e499/uuid_utils-0.15.0-cp313-cp313-pyemscripten_2025_0_wasm32.whl", hash = "sha256:50cc685517e6b99be99b127e7f1817fbb65000d8816537852e603a2e3b60ac88", size = 97671, upload-time = "2026-05-11T12:07:31.232Z" }, + { url = "https://files.pythonhosted.org/packages/c1/4d/9ebcbe90c2be622a9aed56f7606ae1ddc4800ecbe8b1cc6b7fbca2cadead/uuid_utils-0.15.0-cp313-cp313-win32.whl", hash = "sha256:805c52f49bdb90a83727c80b97c98769ef68cc16f2a12ef6c41c4533633e8a95", size = 168345, upload-time = "2026-05-11T12:07:08.968Z" }, + { url = "https://files.pythonhosted.org/packages/82/1a/10ce5709825de275b0a4f5c44f1cd0e13474b5a5430ea64567bdbd8dcd5f/uuid_utils-0.15.0-cp313-cp313-win_amd64.whl", hash = "sha256:e1e2f4a8ca70ff617916719eadb1f148cc6eb65a4b2b89f35422bf9d595461aa", size = 174290, upload-time = "2026-05-11T12:07:19.852Z" }, + { url = "https://files.pythonhosted.org/packages/85/b3/a120d672b7c84bcd45210a67a368333179c821dd4d76c73da69aaad5414a/uuid_utils-0.15.0-cp313-cp313-win_arm64.whl", hash = "sha256:5929aa92bf4ccb5456bd40646e3c45219cc8f1d751675af75f681674e7bd0029", size = 172579, upload-time = "2026-05-11T12:07:12.915Z" }, + { url = "https://files.pythonhosted.org/packages/7f/9f/67a1a323db03b872c78cc36ddc3249f756d523ee409a6abdfb6c643c0a59/uuid_utils-0.15.0-cp313-cp313t-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:395ea1e40d6cd22bf6cfd00a3b25764571df783741d7a501f8b7a2d578f1148d", size = 561609, upload-time = "2026-05-11T12:07:45.575Z" }, + { url = "https://files.pythonhosted.org/packages/8e/a5/cc6ed878f6323209a7d497ad345e6eea4c9186af4904f9cd60e5bc9d72e6/uuid_utils-0.15.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:e25b270f98dcf395a434bec704cb503516a71519198634bc827ba87a584387f7", size = 288953, upload-time = "2026-05-11T12:07:17.658Z" }, + { url = "https://files.pythonhosted.org/packages/1f/28/ca25f2e88ff84f4beb3e5310a45508651de389af80c61f172170bde81e19/uuid_utils-0.15.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6f3d38354ce3943fd721109c508b27a54147531ae656e675155301dfe25e8367", size = 324198, upload-time = "2026-05-11T12:06:47.487Z" }, + { url = "https://files.pythonhosted.org/packages/0d/9f/0c9e22ccc4cd3e7cccb6d92cf3ccab3c259d04ff4d34a4d22bc6a8f5f9da/uuid_utils-0.15.0-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:92076407ddb8b752df055378671b8c8bd3c6ffdb3064982190765b1fa685e624", size = 331096, upload-time = "2026-05-11T12:07:05.173Z" }, + { url = "https://files.pythonhosted.org/packages/29/fd/cf820e6af8d4a8bb71a1dd1ea89a895d4186c41ffcd519eddf0b8cd3a126/uuid_utils-0.15.0-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:53b7c12e9ac48372781e6d409877621d1505955d8b37a505dbadb864f7098e85", size = 444743, upload-time = "2026-05-11T12:06:36.172Z" }, + { url = "https://files.pythonhosted.org/packages/57/1f/c6d31b0cefaa79c42529dde10b8638b541032b2b61e3ca2d77acaa64857f/uuid_utils-0.15.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b4b001172dede7e0681c6e288ac7febf36efa3efcbe92a964ddcef4acdd9f7b", size = 325096, upload-time = "2026-05-11T12:06:37.601Z" }, + { url = "https://files.pythonhosted.org/packages/1e/9a/8354234e8f6b7a128bb10457bfa00b641b4e79fcf48a03958584ab753fd3/uuid_utils-0.15.0-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b6326c3aff73342b50a39af0301972b671f1da68e6f2d88aaf5b959489b0c0a1", size = 349441, upload-time = "2026-05-11T12:07:07.568Z" }, + { url = "https://files.pythonhosted.org/packages/7c/b0/7abda94d184e0e05f2aced8720f004581502f7072d60642b227c5861980f/uuid_utils-0.15.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:f51f8f74f65b1a8f0cecccd2ab8d04c28df82e813e83cd29248c6a0a9cb96b71", size = 500226, upload-time = "2026-05-11T12:08:11.084Z" }, + { url = "https://files.pythonhosted.org/packages/23/44/efbb84e88d2a3adfc883bcfa97e50259ac39f5ba8858e68438bbd8cb1993/uuid_utils-0.15.0-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:06a76000bd4917526549fedb63c417e1ea8e745388aedc9906d7af079f969668", size = 606411, upload-time = "2026-05-11T12:08:07.212Z" }, + { url = "https://files.pythonhosted.org/packages/d0/b1/43c1121329467590e99a1aa3a81845d0c908ce7319e870cb68334c5803bd/uuid_utils-0.15.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:5d721605af5478415b311b9d2bd7f3cc71d19dc071c7b891dc92221a845150d1", size = 566029, upload-time = "2026-05-11T12:07:29.176Z" }, + { url = "https://files.pythonhosted.org/packages/80/fa/1f105833249b8259e3afec9ef7874da7c8cd80c534a2eb59726aa6b6945f/uuid_utils-0.15.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:fc9a85207269b436255b08f504e3ea185f6f1e4813ffa43c0e658a63af99e7e6", size = 529679, upload-time = "2026-05-11T12:06:57.549Z" }, + { url = "https://files.pythonhosted.org/packages/e5/1d/841ffad2cf8b6050c66de2c9657549cd54b7cbe4e7a807a95dad863ce9bc/uuid_utils-0.15.0-cp313-cp313t-win32.whl", hash = "sha256:be62c176390690b9c28b2cfd5ae8fb1f1d469c76ff85348912904f000d6576fd", size = 167999, upload-time = "2026-05-11T12:06:43.452Z" }, + { url = "https://files.pythonhosted.org/packages/81/9b/1eaa4016c5b2c614d07e4b58a201dfa89e3cf58d8905ba8e4c2b83e4ccba/uuid_utils-0.15.0-cp313-cp313t-win_amd64.whl", hash = "sha256:061a5d6f58e447ff41f13b07da83e0876cb4d9bcd5a83bf547db315abb886c0a", size = 174534, upload-time = "2026-05-11T12:07:50.367Z" }, + { url = "https://files.pythonhosted.org/packages/7b/49/e18fb7681f0d09fc64d2210a5142b5836507e64999dd68971ad8dacd228c/uuid_utils-0.15.0-cp314-cp314-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:1b48d6ca94783f5d3907717cea6a636e9451d3169d9398b287c81b18857c91b9", size = 561884, upload-time = "2026-05-11T12:06:49.765Z" }, + { url = "https://files.pythonhosted.org/packages/03/08/dd93d490d06e125a45c322175bd161087e4fff2c9f3d2b7b9b91f8d2d349/uuid_utils-0.15.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:8b44795c09928ba55b15d94c4a2d29e942983eaf77f1bfa008ae596b5f1c72dd", size = 288932, upload-time = "2026-05-11T12:07:23.196Z" }, + { url = "https://files.pythonhosted.org/packages/88/12/df5c29e5acb1bc3122e7ecca15bef68de6287663c0a2a381822008d4cbf5/uuid_utils-0.15.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f76f5654441960425726e377e3ecfaa9e14cde3cc9b2e9f673bbb11daa38e1c3", size = 324611, upload-time = "2026-05-11T12:06:34.691Z" }, + { url = "https://files.pythonhosted.org/packages/62/b7/7c20949ebe7a4e19bf13805ab2f71e667e549e3149502f01e41f695190c6/uuid_utils-0.15.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d4f797414c036c7b7c862d6401da8bcbfd19086eabb41035c468e0ad564d339e", size = 331380, upload-time = "2026-05-11T12:07:16.641Z" }, + { url = "https://files.pythonhosted.org/packages/0d/ed/7d32f0ffa31cc4023e5f2919acb9abb103330c3a338a27c85a2f877a4475/uuid_utils-0.15.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:670f174a447fa478605c48254f1b8f1fd309f1861be9fd469e5639230bc80ab7", size = 443350, upload-time = "2026-05-11T12:07:38.157Z" }, + { url = "https://files.pythonhosted.org/packages/1a/10/76b4da4086bd70924b562de487a2ef647a0fbee1ed7d5e8777664cc4a986/uuid_utils-0.15.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c4835b0907466a535b255a27df6cf0d37ea4ab4b69edde53cc350563e8b55442", size = 323637, upload-time = "2026-05-11T12:08:43.227Z" }, + { url = "https://files.pythonhosted.org/packages/eb/ab/3d31222f7536e1f2113ad0719cc76f4c78007ebcd752fc9170f1eebb448f/uuid_utils-0.15.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:3070ba33b609299202e7e2ecfcfeb40451591874bcd4a6b268028d0f026bec49", size = 348390, upload-time = "2026-05-11T12:08:34.604Z" }, + { url = "https://files.pythonhosted.org/packages/b1/67/822fc66ac27ecd086f6bdb6eb1d8e0ddc47b353ed60945038e74c67bfc1d/uuid_utils-0.15.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:771f9db3cb3e5e3167beb7892ddcaf5d0440c5eff631f3b61476b607d7e59dab", size = 501144, upload-time = "2026-05-11T12:06:42.473Z" }, + { url = "https://files.pythonhosted.org/packages/c4/f5/5d9758e655cbbe9a1d5b72e17f10fd42afc39b88d1cdd21d6e2532dbfbdf/uuid_utils-0.15.0-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:c40cb6a68b95787a55d401394178213003dfce1e6e62d1097756a5fb70aae9da", size = 606407, upload-time = "2026-05-11T12:07:41.328Z" }, + { url = "https://files.pythonhosted.org/packages/8f/cc/16c91835db9cb6870b00529db64c3e0f23dc6e39002b86b80d958358e6b2/uuid_utils-0.15.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:031baf2ce4136e98f68845d040683b83a64aac4f52c01830e066bbbe2a9113fc", size = 564984, upload-time = "2026-05-11T12:07:00.738Z" }, + { url = "https://files.pythonhosted.org/packages/fc/5a/84c356b33f13fbc6fccc065f4dd51095526bee3bb939e89a64bc959502a4/uuid_utils-0.15.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:ba5bc9191c5636bf2bc33d81166c0b27a71ff1b19ab881a8c80bd70f86578a3d", size = 528947, upload-time = "2026-05-11T12:06:59.716Z" }, + { url = "https://files.pythonhosted.org/packages/40/63/88ee651f506298a08afc32c7a33adc27839fcdce331ae438a50617bcf70c/uuid_utils-0.15.0-cp314-cp314-win32.whl", hash = "sha256:5050efb42112cd2dc37f8eb4efa65188b722dc60ae6e28a52845b5d27f35a85d", size = 168620, upload-time = "2026-05-11T12:06:45.434Z" }, + { url = "https://files.pythonhosted.org/packages/cd/e2/f37cb4a220aab39a627e83d6b9f76705862c5b0db62140f24d38847ab4a5/uuid_utils-0.15.0-cp314-cp314-win_amd64.whl", hash = "sha256:743fe546f8910edfd6a650cc4eb9995eb0d9dcfee11d948f5b326702851cb246", size = 173867, upload-time = "2026-05-11T12:08:36.358Z" }, + { url = "https://files.pythonhosted.org/packages/ca/60/c1423514345690162c37c4cc33f6052b81bfa6886f5569ba92bee9fa3302/uuid_utils-0.15.0-cp314-cp314-win_arm64.whl", hash = "sha256:ebacba63d31afbea72e5bf12205413a5f53a2654c9f6302abf8de7cc6697a4d8", size = 172153, upload-time = "2026-05-11T12:08:18.045Z" }, + { url = "https://files.pythonhosted.org/packages/a1/6a/65d401e3ff1f9e79faac5bbc769cab06ca6c454fa492fb8f07fd5c7b2230/uuid_utils-0.15.0-cp314-cp314t-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:5c29e29e8d5e9302cd84e4e5fdac38409448893048f42bd73d5e9b64d6eda2e4", size = 562240, upload-time = "2026-05-11T12:06:52.088Z" }, + { url = "https://files.pythonhosted.org/packages/2d/67/974e71d000b99440717b2864eb53f42d4589edcb6267e46100ccdf1a22fc/uuid_utils-0.15.0-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:a8ab927c4bec80e4b784c5c9af7ce1c74f22b80abc6db2895fe18268255a0060", size = 289149, upload-time = "2026-05-11T12:07:34.581Z" }, + { url = "https://files.pythonhosted.org/packages/51/d4/52a7d5f9f2a4e6f871309e68080921a90f03ccf46b64b9d7dac29ece2bdb/uuid_utils-0.15.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7169dd734319ea95e51894b61ad17e76b7edcf6927669ad3b963818e35e06086", size = 324661, upload-time = "2026-05-11T12:07:06.526Z" }, + { url = "https://files.pythonhosted.org/packages/2e/b7/389c0c5d0d8a04999bbe2a677d3b4bf09d3f3e3298801f27fdd14894d58d/uuid_utils-0.15.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c78462302d81e1d7f7fb0ee14ff7c521e47a27c4d7222a4933c01a431d2a6efc", size = 331568, upload-time = "2026-05-11T12:07:55.457Z" }, + { url = "https://files.pythonhosted.org/packages/20/08/1f1e10d0182afa865c623ed272ddbd7750781b81425f05f4e8cab6be5a78/uuid_utils-0.15.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:270d7f11cfe821d68433103f63058d724c9165c2d1d443559f66cd67352748af", size = 444798, upload-time = "2026-05-11T12:07:39.282Z" }, + { url = "https://files.pythonhosted.org/packages/13/81/1cc1b3b266b7e601571bac85e565a420a0cd47682aaf224aa4a825860283/uuid_utils-0.15.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ea58b9332ce8c04b8eec2c655b8bbd34ae31c06a5baf53f9a9b2324fc7d55a1", size = 324919, upload-time = "2026-05-11T12:08:22.951Z" }, + { url = "https://files.pythonhosted.org/packages/14/59/8a8be072f42618cbfe736c382a75456134771a0eb56101668fbb658be883/uuid_utils-0.15.0-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a11e885489a12b8fcf71fcfe7e1ae078515574e9a102f0819f189a4d62db301e", size = 349480, upload-time = "2026-05-11T12:08:29.421Z" }, + { url = "https://files.pythonhosted.org/packages/47/e4/66a96cb1d74b402248ba4d24e2eba8ecb4618f88dcfe7d82f1a7c13da297/uuid_utils-0.15.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:ad134557819143c37ebd0eecf058accba94664ff4d50ef8bf619a255bdafdcea", size = 500791, upload-time = "2026-05-11T12:07:36.9Z" }, + { url = "https://files.pythonhosted.org/packages/e6/12/09171a3e2f03e18f6b6c86b5a089fc984891293ac8cccb6727a8c6b1bbb2/uuid_utils-0.15.0-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:60d5d7ef85592cdd555b01be4bc32b30a15854c3de99c5613e2e47299762b044", size = 606626, upload-time = "2026-05-11T12:08:40.874Z" }, + { url = "https://files.pythonhosted.org/packages/c5/56/8057a4f38b7e93fe51264d7bda3cbb1c1d9c61654368aa71ffec0057c17f/uuid_utils-0.15.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:c0960c0475033bab0dddb13919e627c062d83d17900f22206c59b2942fe03703", size = 566218, upload-time = "2026-05-11T12:08:04.616Z" }, + { url = "https://files.pythonhosted.org/packages/14/f8/65f1273a82fa84c529caaa737bfdd512bbc2c1028d35e342d0aba88a89b2/uuid_utils-0.15.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:cb9cc99885b676d0f5ce8e0996b57ba2a53fe3a3f0163c7c9e06151e0232982f", size = 529658, upload-time = "2026-05-11T12:08:21.796Z" }, + { url = "https://files.pythonhosted.org/packages/27/8b/2eea5e55d8d2185527cc37e481a363b77ac893534bdda4b9e277cdd71aa1/uuid_utils-0.15.0-cp314-cp314t-win32.whl", hash = "sha256:30e7340f8b55f552a78d90eab2b2be6f68520c380215ddb7fb70a6d234ce154d", size = 168093, upload-time = "2026-05-11T12:06:33.548Z" }, + { url = "https://files.pythonhosted.org/packages/1e/e5/7524e94c316fc0194c3da1a91e51cce69722520e5fc499c4ece53007a967/uuid_utils-0.15.0-cp314-cp314t-win_amd64.whl", hash = "sha256:5ef6edbb10a4956755614e116aee4b558d75284b52dbedcf5f7505c518eb1011", size = 174063, upload-time = "2026-05-11T12:08:03.414Z" }, + { url = "https://files.pythonhosted.org/packages/d8/64/8be140712e3fa9d8406f0cb61876ce6d02f72067d4f9d31d1bf73e127c01/uuid_utils-0.15.0-cp314-cp314t-win_arm64.whl", hash = "sha256:b3f0e567b5e992b28a50f50e0aeba546a2e2d3e463590eb5543204cb5d0f40b3", size = 171358, upload-time = "2026-05-11T12:07:30.282Z" }, + { url = "https://files.pythonhosted.org/packages/c1/a0/1e4a1833326627a2134fad5fb45ecc00b8638a83a99525e189dfa94b098e/uuid_utils-0.15.0-pp311-pypy311_pp73-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:81b8caec4b40925cbe2f0533af266cd9cd4485d94e48ecbb34663d5941c033aa", size = 566931, upload-time = "2026-05-11T12:07:35.687Z" }, + { url = "https://files.pythonhosted.org/packages/ab/2f/5bf043f87df4bb1fdfa54acad9ab09fee40a3c47bfcf99911c3ba15e1599/uuid_utils-0.15.0-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:e5763f07d99e2237742ebd0155ca18c1c8233de457c9e8e59bdc4d130895d15a", size = 291304, upload-time = "2026-05-11T12:07:58.421Z" }, + { url = "https://files.pythonhosted.org/packages/20/2c/4316821fe2780eee11d277c9a3188b361fb1cabe52255e010b3b521efe68/uuid_utils-0.15.0-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:90be3946ab215e180adb9827a90f9c63b6965af93b116c566f32e280bad6cccd", size = 327798, upload-time = "2026-05-11T12:08:02.35Z" }, + { url = "https://files.pythonhosted.org/packages/3e/59/5340e801865d863aad50cc16e3e5f9e2e14806c12f76474721073b396b52/uuid_utils-0.15.0-pp311-pypy311_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a1a02705b659a2b9874de0e2187f0c64277e14dae3299b392f0c46c762bd1144", size = 334131, upload-time = "2026-05-11T12:07:21.956Z" }, + { url = "https://files.pythonhosted.org/packages/54/d7/0fa1443fbec25d7e8232324f7c9e4ac64390574cf7481608a15bd6eecc0d/uuid_utils-0.15.0-pp311-pypy311_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:39a05db4e66ae5fe39b1d328446cdc560c29073dbe00c7abfea3d7a02dce62a1", size = 448571, upload-time = "2026-05-11T12:08:08.868Z" }, + { url = "https://files.pythonhosted.org/packages/ab/68/a0aedbf39885d7d6d3b3b419a796214fd3e92a7e6a556b336bfee2246fdc/uuid_utils-0.15.0-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2422feb60039ce88daf02b9885665b060f0d2deb80a3debffaaedc443d9aa673", size = 327546, upload-time = "2026-05-11T12:06:21.687Z" }, + { url = "https://files.pythonhosted.org/packages/fd/1c/347970e5706f3b7fc1964227493aa98dd43c7348fe2a84a3aeb3f1b9299b/uuid_utils-0.15.0-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:14481b7c98fbac536783d475b4d4cc7a4a21ec4f1ce794fc66557d3540b0c8b7", size = 354981, upload-time = "2026-05-11T12:07:26.505Z" }, + { url = "https://files.pythonhosted.org/packages/08/13/5e2d92fe7d7b8df48b0c7c0ec714d828863227ee099e17caaa0d6ed23203/uuid_utils-0.15.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:21af6cc771a769e4a8ef9ab245f7ee811a56fbcdd021e1163d845172a9c01e60", size = 176805, upload-time = "2026-05-11T12:07:09.967Z" }, +] + [[package]] name = "uvicorn" version = "0.40.0" @@ -7035,3 +7334,77 @@ sdist = { url = "https://files.pythonhosted.org/packages/e3/02/0f2892c661036d50e wheels = [ { url = "https://files.pythonhosted.org/packages/2e/54/647ade08bf0db230bfea292f893923872fd20be6ac6f53b2b936ba839d75/zipp-3.23.0-py3-none-any.whl", hash = "sha256:071652d6115ed432f5ce1d34c336c0adfd6a884660d1e9712a256d3d3bd4b14e", size = 10276, upload-time = "2025-06-08T17:06:38.034Z" }, ] + +[[package]] +name = "zstandard" +version = "0.25.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fd/aa/3e0508d5a5dd96529cdc5a97011299056e14c6505b678fd58938792794b1/zstandard-0.25.0.tar.gz", hash = "sha256:7713e1179d162cf5c7906da876ec2ccb9c3a9dcbdffef0cc7f70c3667a205f0b", size = 711513, upload-time = "2025-09-14T22:15:54.002Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2a/83/c3ca27c363d104980f1c9cee1101cc8ba724ac8c28a033ede6aab89585b1/zstandard-0.25.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:933b65d7680ea337180733cf9e87293cc5500cc0eb3fc8769f4d3c88d724ec5c", size = 795254, upload-time = "2025-09-14T22:16:26.137Z" }, + { url = "https://files.pythonhosted.org/packages/ac/4d/e66465c5411a7cf4866aeadc7d108081d8ceba9bc7abe6b14aa21c671ec3/zstandard-0.25.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a3f79487c687b1fc69f19e487cd949bf3aae653d181dfb5fde3bf6d18894706f", size = 640559, upload-time = "2025-09-14T22:16:27.973Z" }, + { url = "https://files.pythonhosted.org/packages/12/56/354fe655905f290d3b147b33fe946b0f27e791e4b50a5f004c802cb3eb7b/zstandard-0.25.0-cp311-cp311-manylinux2010_i686.manylinux2014_i686.manylinux_2_12_i686.manylinux_2_17_i686.whl", hash = "sha256:0bbc9a0c65ce0eea3c34a691e3c4b6889f5f3909ba4822ab385fab9057099431", size = 5348020, upload-time = "2025-09-14T22:16:29.523Z" }, + { url = "https://files.pythonhosted.org/packages/3b/13/2b7ed68bd85e69a2069bcc72141d378f22cae5a0f3b353a2c8f50ef30c1b/zstandard-0.25.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:01582723b3ccd6939ab7b3a78622c573799d5d8737b534b86d0e06ac18dbde4a", size = 5058126, upload-time = "2025-09-14T22:16:31.811Z" }, + { url = "https://files.pythonhosted.org/packages/c9/dd/fdaf0674f4b10d92cb120ccff58bbb6626bf8368f00ebfd2a41ba4a0dc99/zstandard-0.25.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:5f1ad7bf88535edcf30038f6919abe087f606f62c00a87d7e33e7fc57cb69fcc", size = 5405390, upload-time = "2025-09-14T22:16:33.486Z" }, + { url = "https://files.pythonhosted.org/packages/0f/67/354d1555575bc2490435f90d67ca4dd65238ff2f119f30f72d5cde09c2ad/zstandard-0.25.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:06acb75eebeedb77b69048031282737717a63e71e4ae3f77cc0c3b9508320df6", size = 5452914, upload-time = "2025-09-14T22:16:35.277Z" }, + { url = "https://files.pythonhosted.org/packages/bb/1f/e9cfd801a3f9190bf3e759c422bbfd2247db9d7f3d54a56ecde70137791a/zstandard-0.25.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9300d02ea7c6506f00e627e287e0492a5eb0371ec1670ae852fefffa6164b072", size = 5559635, upload-time = "2025-09-14T22:16:37.141Z" }, + { url = "https://files.pythonhosted.org/packages/21/88/5ba550f797ca953a52d708c8e4f380959e7e3280af029e38fbf47b55916e/zstandard-0.25.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:bfd06b1c5584b657a2892a6014c2f4c20e0db0208c159148fa78c65f7e0b0277", size = 5048277, upload-time = "2025-09-14T22:16:38.807Z" }, + { url = "https://files.pythonhosted.org/packages/46/c0/ca3e533b4fa03112facbe7fbe7779cb1ebec215688e5df576fe5429172e0/zstandard-0.25.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:f373da2c1757bb7f1acaf09369cdc1d51d84131e50d5fa9863982fd626466313", size = 5574377, upload-time = "2025-09-14T22:16:40.523Z" }, + { url = "https://files.pythonhosted.org/packages/12/9b/3fb626390113f272abd0799fd677ea33d5fc3ec185e62e6be534493c4b60/zstandard-0.25.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:6c0e5a65158a7946e7a7affa6418878ef97ab66636f13353b8502d7ea03c8097", size = 4961493, upload-time = "2025-09-14T22:16:43.3Z" }, + { url = "https://files.pythonhosted.org/packages/cb/d3/23094a6b6a4b1343b27ae68249daa17ae0651fcfec9ed4de09d14b940285/zstandard-0.25.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:c8e167d5adf59476fa3e37bee730890e389410c354771a62e3c076c86f9f7778", size = 5269018, upload-time = "2025-09-14T22:16:45.292Z" }, + { url = "https://files.pythonhosted.org/packages/8c/a7/bb5a0c1c0f3f4b5e9d5b55198e39de91e04ba7c205cc46fcb0f95f0383c1/zstandard-0.25.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:98750a309eb2f020da61e727de7d7ba3c57c97cf6213f6f6277bb7fb42a8e065", size = 5443672, upload-time = "2025-09-14T22:16:47.076Z" }, + { url = "https://files.pythonhosted.org/packages/27/22/503347aa08d073993f25109c36c8d9f029c7d5949198050962cb568dfa5e/zstandard-0.25.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:22a086cff1b6ceca18a8dd6096ec631e430e93a8e70a9ca5efa7561a00f826fa", size = 5822753, upload-time = "2025-09-14T22:16:49.316Z" }, + { url = "https://files.pythonhosted.org/packages/e2/be/94267dc6ee64f0f8ba2b2ae7c7a2df934a816baaa7291db9e1aa77394c3c/zstandard-0.25.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:72d35d7aa0bba323965da807a462b0966c91608ef3a48ba761678cb20ce5d8b7", size = 5366047, upload-time = "2025-09-14T22:16:51.328Z" }, + { url = "https://files.pythonhosted.org/packages/7b/a3/732893eab0a3a7aecff8b99052fecf9f605cf0fb5fb6d0290e36beee47a4/zstandard-0.25.0-cp311-cp311-win32.whl", hash = "sha256:f5aeea11ded7320a84dcdd62a3d95b5186834224a9e55b92ccae35d21a8b63d4", size = 436484, upload-time = "2025-09-14T22:16:55.005Z" }, + { url = "https://files.pythonhosted.org/packages/43/a3/c6155f5c1cce691cb80dfd38627046e50af3ee9ddc5d0b45b9b063bfb8c9/zstandard-0.25.0-cp311-cp311-win_amd64.whl", hash = "sha256:daab68faadb847063d0c56f361a289c4f268706b598afbf9ad113cbe5c38b6b2", size = 506183, upload-time = "2025-09-14T22:16:52.753Z" }, + { url = "https://files.pythonhosted.org/packages/8c/3e/8945ab86a0820cc0e0cdbf38086a92868a9172020fdab8a03ac19662b0e5/zstandard-0.25.0-cp311-cp311-win_arm64.whl", hash = "sha256:22a06c5df3751bb7dc67406f5374734ccee8ed37fc5981bf1ad7041831fa1137", size = 462533, upload-time = "2025-09-14T22:16:53.878Z" }, + { url = "https://files.pythonhosted.org/packages/82/fc/f26eb6ef91ae723a03e16eddb198abcfce2bc5a42e224d44cc8b6765e57e/zstandard-0.25.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7b3c3a3ab9daa3eed242d6ecceead93aebbb8f5f84318d82cee643e019c4b73b", size = 795738, upload-time = "2025-09-14T22:16:56.237Z" }, + { url = "https://files.pythonhosted.org/packages/aa/1c/d920d64b22f8dd028a8b90e2d756e431a5d86194caa78e3819c7bf53b4b3/zstandard-0.25.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:913cbd31a400febff93b564a23e17c3ed2d56c064006f54efec210d586171c00", size = 640436, upload-time = "2025-09-14T22:16:57.774Z" }, + { url = "https://files.pythonhosted.org/packages/53/6c/288c3f0bd9fcfe9ca41e2c2fbfd17b2097f6af57b62a81161941f09afa76/zstandard-0.25.0-cp312-cp312-manylinux2010_i686.manylinux2014_i686.manylinux_2_12_i686.manylinux_2_17_i686.whl", hash = "sha256:011d388c76b11a0c165374ce660ce2c8efa8e5d87f34996aa80f9c0816698b64", size = 5343019, upload-time = "2025-09-14T22:16:59.302Z" }, + { url = "https://files.pythonhosted.org/packages/1e/15/efef5a2f204a64bdb5571e6161d49f7ef0fffdbca953a615efbec045f60f/zstandard-0.25.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:6dffecc361d079bb48d7caef5d673c88c8988d3d33fb74ab95b7ee6da42652ea", size = 5063012, upload-time = "2025-09-14T22:17:01.156Z" }, + { url = "https://files.pythonhosted.org/packages/b7/37/a6ce629ffdb43959e92e87ebdaeebb5ac81c944b6a75c9c47e300f85abdf/zstandard-0.25.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:7149623bba7fdf7e7f24312953bcf73cae103db8cae49f8154dd1eadc8a29ecb", size = 5394148, upload-time = "2025-09-14T22:17:03.091Z" }, + { url = "https://files.pythonhosted.org/packages/e3/79/2bf870b3abeb5c070fe2d670a5a8d1057a8270f125ef7676d29ea900f496/zstandard-0.25.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:6a573a35693e03cf1d67799fd01b50ff578515a8aeadd4595d2a7fa9f3ec002a", size = 5451652, upload-time = "2025-09-14T22:17:04.979Z" }, + { url = "https://files.pythonhosted.org/packages/53/60/7be26e610767316c028a2cbedb9a3beabdbe33e2182c373f71a1c0b88f36/zstandard-0.25.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5a56ba0db2d244117ed744dfa8f6f5b366e14148e00de44723413b2f3938a902", size = 5546993, upload-time = "2025-09-14T22:17:06.781Z" }, + { url = "https://files.pythonhosted.org/packages/85/c7/3483ad9ff0662623f3648479b0380d2de5510abf00990468c286c6b04017/zstandard-0.25.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:10ef2a79ab8e2974e2075fb984e5b9806c64134810fac21576f0668e7ea19f8f", size = 5046806, upload-time = "2025-09-14T22:17:08.415Z" }, + { url = "https://files.pythonhosted.org/packages/08/b3/206883dd25b8d1591a1caa44b54c2aad84badccf2f1de9e2d60a446f9a25/zstandard-0.25.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:aaf21ba8fb76d102b696781bddaa0954b782536446083ae3fdaa6f16b25a1c4b", size = 5576659, upload-time = "2025-09-14T22:17:10.164Z" }, + { url = "https://files.pythonhosted.org/packages/9d/31/76c0779101453e6c117b0ff22565865c54f48f8bd807df2b00c2c404b8e0/zstandard-0.25.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1869da9571d5e94a85a5e8d57e4e8807b175c9e4a6294e3b66fa4efb074d90f6", size = 4953933, upload-time = "2025-09-14T22:17:11.857Z" }, + { url = "https://files.pythonhosted.org/packages/18/e1/97680c664a1bf9a247a280a053d98e251424af51f1b196c6d52f117c9720/zstandard-0.25.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:809c5bcb2c67cd0ed81e9229d227d4ca28f82d0f778fc5fea624a9def3963f91", size = 5268008, upload-time = "2025-09-14T22:17:13.627Z" }, + { url = "https://files.pythonhosted.org/packages/1e/73/316e4010de585ac798e154e88fd81bb16afc5c5cb1a72eeb16dd37e8024a/zstandard-0.25.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:f27662e4f7dbf9f9c12391cb37b4c4c3cb90ffbd3b1fb9284dadbbb8935fa708", size = 5433517, upload-time = "2025-09-14T22:17:16.103Z" }, + { url = "https://files.pythonhosted.org/packages/5b/60/dd0f8cfa8129c5a0ce3ea6b7f70be5b33d2618013a161e1ff26c2b39787c/zstandard-0.25.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:99c0c846e6e61718715a3c9437ccc625de26593fea60189567f0118dc9db7512", size = 5814292, upload-time = "2025-09-14T22:17:17.827Z" }, + { url = "https://files.pythonhosted.org/packages/fc/5f/75aafd4b9d11b5407b641b8e41a57864097663699f23e9ad4dbb91dc6bfe/zstandard-0.25.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:474d2596a2dbc241a556e965fb76002c1ce655445e4e3bf38e5477d413165ffa", size = 5360237, upload-time = "2025-09-14T22:17:19.954Z" }, + { url = "https://files.pythonhosted.org/packages/ff/8d/0309daffea4fcac7981021dbf21cdb2e3427a9e76bafbcdbdf5392ff99a4/zstandard-0.25.0-cp312-cp312-win32.whl", hash = "sha256:23ebc8f17a03133b4426bcc04aabd68f8236eb78c3760f12783385171b0fd8bd", size = 436922, upload-time = "2025-09-14T22:17:24.398Z" }, + { url = "https://files.pythonhosted.org/packages/79/3b/fa54d9015f945330510cb5d0b0501e8253c127cca7ebe8ba46a965df18c5/zstandard-0.25.0-cp312-cp312-win_amd64.whl", hash = "sha256:ffef5a74088f1e09947aecf91011136665152e0b4b359c42be3373897fb39b01", size = 506276, upload-time = "2025-09-14T22:17:21.429Z" }, + { url = "https://files.pythonhosted.org/packages/ea/6b/8b51697e5319b1f9ac71087b0af9a40d8a6288ff8025c36486e0c12abcc4/zstandard-0.25.0-cp312-cp312-win_arm64.whl", hash = "sha256:181eb40e0b6a29b3cd2849f825e0fa34397f649170673d385f3598ae17cca2e9", size = 462679, upload-time = "2025-09-14T22:17:23.147Z" }, + { url = "https://files.pythonhosted.org/packages/35/0b/8df9c4ad06af91d39e94fa96cc010a24ac4ef1378d3efab9223cc8593d40/zstandard-0.25.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ec996f12524f88e151c339688c3897194821d7f03081ab35d31d1e12ec975e94", size = 795735, upload-time = "2025-09-14T22:17:26.042Z" }, + { url = "https://files.pythonhosted.org/packages/3f/06/9ae96a3e5dcfd119377ba33d4c42a7d89da1efabd5cb3e366b156c45ff4d/zstandard-0.25.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a1a4ae2dec3993a32247995bdfe367fc3266da832d82f8438c8570f989753de1", size = 640440, upload-time = "2025-09-14T22:17:27.366Z" }, + { url = "https://files.pythonhosted.org/packages/d9/14/933d27204c2bd404229c69f445862454dcc101cd69ef8c6068f15aaec12c/zstandard-0.25.0-cp313-cp313-manylinux2010_i686.manylinux2014_i686.manylinux_2_12_i686.manylinux_2_17_i686.whl", hash = "sha256:e96594a5537722fdfb79951672a2a63aec5ebfb823e7560586f7484819f2a08f", size = 5343070, upload-time = "2025-09-14T22:17:28.896Z" }, + { url = "https://files.pythonhosted.org/packages/6d/db/ddb11011826ed7db9d0e485d13df79b58586bfdec56e5c84a928a9a78c1c/zstandard-0.25.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:bfc4e20784722098822e3eee42b8e576b379ed72cca4a7cb856ae733e62192ea", size = 5063001, upload-time = "2025-09-14T22:17:31.044Z" }, + { url = "https://files.pythonhosted.org/packages/db/00/87466ea3f99599d02a5238498b87bf84a6348290c19571051839ca943777/zstandard-0.25.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:457ed498fc58cdc12fc48f7950e02740d4f7ae9493dd4ab2168a47c93c31298e", size = 5394120, upload-time = "2025-09-14T22:17:32.711Z" }, + { url = "https://files.pythonhosted.org/packages/2b/95/fc5531d9c618a679a20ff6c29e2b3ef1d1f4ad66c5e161ae6ff847d102a9/zstandard-0.25.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:fd7a5004eb1980d3cefe26b2685bcb0b17989901a70a1040d1ac86f1d898c551", size = 5451230, upload-time = "2025-09-14T22:17:34.41Z" }, + { url = "https://files.pythonhosted.org/packages/63/4b/e3678b4e776db00f9f7b2fe58e547e8928ef32727d7a1ff01dea010f3f13/zstandard-0.25.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8e735494da3db08694d26480f1493ad2cf86e99bdd53e8e9771b2752a5c0246a", size = 5547173, upload-time = "2025-09-14T22:17:36.084Z" }, + { url = "https://files.pythonhosted.org/packages/4e/d5/ba05ed95c6b8ec30bd468dfeab20589f2cf709b5c940483e31d991f2ca58/zstandard-0.25.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:3a39c94ad7866160a4a46d772e43311a743c316942037671beb264e395bdd611", size = 5046736, upload-time = "2025-09-14T22:17:37.891Z" }, + { url = "https://files.pythonhosted.org/packages/50/d5/870aa06b3a76c73eced65c044b92286a3c4e00554005ff51962deef28e28/zstandard-0.25.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:172de1f06947577d3a3005416977cce6168f2261284c02080e7ad0185faeced3", size = 5576368, upload-time = "2025-09-14T22:17:40.206Z" }, + { url = "https://files.pythonhosted.org/packages/5d/35/398dc2ffc89d304d59bc12f0fdd931b4ce455bddf7038a0a67733a25f550/zstandard-0.25.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:3c83b0188c852a47cd13ef3bf9209fb0a77fa5374958b8c53aaa699398c6bd7b", size = 4954022, upload-time = "2025-09-14T22:17:41.879Z" }, + { url = "https://files.pythonhosted.org/packages/9a/5c/36ba1e5507d56d2213202ec2b05e8541734af5f2ce378c5d1ceaf4d88dc4/zstandard-0.25.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:1673b7199bbe763365b81a4f3252b8e80f44c9e323fc42940dc8843bfeaf9851", size = 5267889, upload-time = "2025-09-14T22:17:43.577Z" }, + { url = "https://files.pythonhosted.org/packages/70/e8/2ec6b6fb7358b2ec0113ae202647ca7c0e9d15b61c005ae5225ad0995df5/zstandard-0.25.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:0be7622c37c183406f3dbf0cba104118eb16a4ea7359eeb5752f0794882fc250", size = 5433952, upload-time = "2025-09-14T22:17:45.271Z" }, + { url = "https://files.pythonhosted.org/packages/7b/01/b5f4d4dbc59ef193e870495c6f1275f5b2928e01ff5a81fecb22a06e22fb/zstandard-0.25.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:5f5e4c2a23ca271c218ac025bd7d635597048b366d6f31f420aaeb715239fc98", size = 5814054, upload-time = "2025-09-14T22:17:47.08Z" }, + { url = "https://files.pythonhosted.org/packages/b2/e5/fbd822d5c6f427cf158316d012c5a12f233473c2f9c5fe5ab1ae5d21f3d8/zstandard-0.25.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4f187a0bb61b35119d1926aee039524d1f93aaf38a9916b8c4b78ac8514a0aaf", size = 5360113, upload-time = "2025-09-14T22:17:48.893Z" }, + { url = "https://files.pythonhosted.org/packages/8e/e0/69a553d2047f9a2c7347caa225bb3a63b6d7704ad74610cb7823baa08ed7/zstandard-0.25.0-cp313-cp313-win32.whl", hash = "sha256:7030defa83eef3e51ff26f0b7bfb229f0204b66fe18e04359ce3474ac33cbc09", size = 436936, upload-time = "2025-09-14T22:17:52.658Z" }, + { url = "https://files.pythonhosted.org/packages/d9/82/b9c06c870f3bd8767c201f1edbdf9e8dc34be5b0fbc5682c4f80fe948475/zstandard-0.25.0-cp313-cp313-win_amd64.whl", hash = "sha256:1f830a0dac88719af0ae43b8b2d6aef487d437036468ef3c2ea59c51f9d55fd5", size = 506232, upload-time = "2025-09-14T22:17:50.402Z" }, + { url = "https://files.pythonhosted.org/packages/d4/57/60c3c01243bb81d381c9916e2a6d9e149ab8627c0c7d7abb2d73384b3c0c/zstandard-0.25.0-cp313-cp313-win_arm64.whl", hash = "sha256:85304a43f4d513f5464ceb938aa02c1e78c2943b29f44a750b48b25ac999a049", size = 462671, upload-time = "2025-09-14T22:17:51.533Z" }, + { url = "https://files.pythonhosted.org/packages/3d/5c/f8923b595b55fe49e30612987ad8bf053aef555c14f05bb659dd5dbe3e8a/zstandard-0.25.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:e29f0cf06974c899b2c188ef7f783607dbef36da4c242eb6c82dcd8b512855e3", size = 795887, upload-time = "2025-09-14T22:17:54.198Z" }, + { url = "https://files.pythonhosted.org/packages/8d/09/d0a2a14fc3439c5f874042dca72a79c70a532090b7ba0003be73fee37ae2/zstandard-0.25.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:05df5136bc5a011f33cd25bc9f506e7426c0c9b3f9954f056831ce68f3b6689f", size = 640658, upload-time = "2025-09-14T22:17:55.423Z" }, + { url = "https://files.pythonhosted.org/packages/5d/7c/8b6b71b1ddd517f68ffb55e10834388d4f793c49c6b83effaaa05785b0b4/zstandard-0.25.0-cp314-cp314-manylinux2010_i686.manylinux_2_12_i686.manylinux_2_28_i686.whl", hash = "sha256:f604efd28f239cc21b3adb53eb061e2a205dc164be408e553b41ba2ffe0ca15c", size = 5379849, upload-time = "2025-09-14T22:17:57.372Z" }, + { url = "https://files.pythonhosted.org/packages/a4/86/a48e56320d0a17189ab7a42645387334fba2200e904ee47fc5a26c1fd8ca/zstandard-0.25.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:223415140608d0f0da010499eaa8ccdb9af210a543fac54bce15babbcfc78439", size = 5058095, upload-time = "2025-09-14T22:17:59.498Z" }, + { url = "https://files.pythonhosted.org/packages/f8/ad/eb659984ee2c0a779f9d06dbfe45e2dc39d99ff40a319895df2d3d9a48e5/zstandard-0.25.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2e54296a283f3ab5a26fc9b8b5d4978ea0532f37b231644f367aa588930aa043", size = 5551751, upload-time = "2025-09-14T22:18:01.618Z" }, + { url = "https://files.pythonhosted.org/packages/61/b3/b637faea43677eb7bd42ab204dfb7053bd5c4582bfe6b1baefa80ac0c47b/zstandard-0.25.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ca54090275939dc8ec5dea2d2afb400e0f83444b2fc24e07df7fdef677110859", size = 6364818, upload-time = "2025-09-14T22:18:03.769Z" }, + { url = "https://files.pythonhosted.org/packages/31/dc/cc50210e11e465c975462439a492516a73300ab8caa8f5e0902544fd748b/zstandard-0.25.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e09bb6252b6476d8d56100e8147b803befa9a12cea144bbe629dd508800d1ad0", size = 5560402, upload-time = "2025-09-14T22:18:05.954Z" }, + { url = "https://files.pythonhosted.org/packages/c9/ae/56523ae9c142f0c08efd5e868a6da613ae76614eca1305259c3bf6a0ed43/zstandard-0.25.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:a9ec8c642d1ec73287ae3e726792dd86c96f5681eb8df274a757bf62b750eae7", size = 4955108, upload-time = "2025-09-14T22:18:07.68Z" }, + { url = "https://files.pythonhosted.org/packages/98/cf/c899f2d6df0840d5e384cf4c4121458c72802e8bda19691f3b16619f51e9/zstandard-0.25.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:a4089a10e598eae6393756b036e0f419e8c1d60f44a831520f9af41c14216cf2", size = 5269248, upload-time = "2025-09-14T22:18:09.753Z" }, + { url = "https://files.pythonhosted.org/packages/1b/c0/59e912a531d91e1c192d3085fc0f6fb2852753c301a812d856d857ea03c6/zstandard-0.25.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:f67e8f1a324a900e75b5e28ffb152bcac9fbed1cc7b43f99cd90f395c4375344", size = 5430330, upload-time = "2025-09-14T22:18:11.966Z" }, + { url = "https://files.pythonhosted.org/packages/a0/1d/7e31db1240de2df22a58e2ea9a93fc6e38cc29353e660c0272b6735d6669/zstandard-0.25.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:9654dbc012d8b06fc3d19cc825af3f7bf8ae242226df5f83936cb39f5fdc846c", size = 5811123, upload-time = "2025-09-14T22:18:13.907Z" }, + { url = "https://files.pythonhosted.org/packages/f6/49/fac46df5ad353d50535e118d6983069df68ca5908d4d65b8c466150a4ff1/zstandard-0.25.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:4203ce3b31aec23012d3a4cf4a2ed64d12fea5269c49aed5e4c3611b938e4088", size = 5359591, upload-time = "2025-09-14T22:18:16.465Z" }, + { url = "https://files.pythonhosted.org/packages/c2/38/f249a2050ad1eea0bb364046153942e34abba95dd5520af199aed86fbb49/zstandard-0.25.0-cp314-cp314-win32.whl", hash = "sha256:da469dc041701583e34de852d8634703550348d5822e66a0c827d39b05365b12", size = 444513, upload-time = "2025-09-14T22:18:20.61Z" }, + { url = "https://files.pythonhosted.org/packages/3a/43/241f9615bcf8ba8903b3f0432da069e857fc4fd1783bd26183db53c4804b/zstandard-0.25.0-cp314-cp314-win_amd64.whl", hash = "sha256:c19bcdd826e95671065f8692b5a4aa95c52dc7a02a4c5a0cac46deb879a017a2", size = 516118, upload-time = "2025-09-14T22:18:17.849Z" }, + { url = "https://files.pythonhosted.org/packages/f0/ef/da163ce2450ed4febf6467d77ccb4cd52c4c30ab45624bad26ca0a27260c/zstandard-0.25.0-cp314-cp314-win_arm64.whl", hash = "sha256:d7541afd73985c630bafcd6338d2518ae96060075f9463d7dc14cfb33514383d", size = 476940, upload-time = "2025-09-14T22:18:19.088Z" }, +] From 9078a7bbf3d6bcbfd7556d4222eb6e69f8f47b80 Mon Sep 17 00:00:00 2001 From: Isaac Kargar Date: Thu, 14 May 2026 12:13:35 +0300 Subject: [PATCH 7/8] Harden BAML extract runtime --- CHANGELOG.md | 6 ++ src/lerim/agents/baml_client/inlinedbaml.py | 4 +- src/lerim/agents/baml_src/extract_react.baml | 2 + .../agents/baml_src/extract_react_tests.baml | 32 +++++++ src/lerim/agents/extract/persistence.py | 46 +++++++++ src/lerim/server/api.py | 95 +++++++++++++++---- src/lerim/server/daemon.py | 95 +++++++++++++++---- src/lerim/sessions/catalog.py | 93 +++++++++++++++--- tests/README.md | 5 + tests/unit/agents/extract/test_persistence.py | 80 ++++++++++++++++ tests/unit/server/test_api.py | 49 ++++++++++ tests/unit/server/test_daemon_functions.py | 66 +++++++++++++ .../unit/server/test_daemon_sync_maintain.py | 37 ++++++++ tests/unit/sessions/test_catalog.py | 49 ++++++++++ 14 files changed, 606 insertions(+), 53 deletions(-) create mode 100644 tests/unit/agents/extract/test_persistence.py diff --git a/CHANGELOG.md b/CHANGELOG.md index a60fbef..c455c77 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - Replaced sync extraction with the BAML/LangGraph harness while keeping maintain, ask, and working-memory on PydanticAI. - Updated extraction evals, integration tests, docs, and run artifacts to use graph events instead of PydanticAI extract messages. +- Tuned extraction prompts to avoid storing incidental personal names unless identity itself is the durable context. + +### Fixed +- Hardened session catalog/API status paths so catalog storage issues degrade status responses instead of crashing status or maintain. +- Made extraction persistence idempotent when a rebuilt session catalog replays a session whose episode already exists. +- Improved long-running extraction queue handling so transient SQLite heartbeat write failures and sequential processing do not create false stale-running jobs. ### Removed - Removed the legacy PydanticAI extract agent, extract-only trace tools, history processors, and the experimental `baml_agents/` sidecar. diff --git a/src/lerim/agents/baml_client/inlinedbaml.py b/src/lerim/agents/baml_client/inlinedbaml.py index 7f11456..3030237 100644 --- a/src/lerim/agents/baml_client/inlinedbaml.py +++ b/src/lerim/agents/baml_client/inlinedbaml.py @@ -12,8 +12,8 @@ _file_map = { - "extract_react.baml": "enum RecordKind {\n DECISION @alias(\"decision\") @description(\"A durable project decision with decision and why fields.\")\n PREFERENCE @alias(\"preference\") @description(\"A stable user or workflow preference.\")\n CONSTRAINT @alias(\"constraint\") @description(\"A durable invariant, limit, or must/cannot rule.\")\n FACT @alias(\"fact\") @description(\"A durable project fact or setup truth.\")\n REFERENCE @alias(\"reference\") @description(\"A pointer to an external source of truth.\")\n}\n\nenum RecordStatus {\n ACTIVE @alias(\"active\") @description(\"Current context that future sessions may reuse.\")\n ARCHIVED @alias(\"archived\") @description(\"Historical or routine context that should not be treated as active guidance.\")\n}\n\nenum FindingLevel {\n DECISION @alias(\"decision\") @description(\"Durable decision-level finding.\")\n PREFERENCE @alias(\"preference\") @description(\"Durable user or workflow preference finding.\")\n FEEDBACK @alias(\"feedback\") @description(\"Durable feedback-level finding.\")\n REFERENCE @alias(\"reference\") @description(\"Durable external-source finding.\")\n CONSTRAINT @alias(\"constraint\") @description(\"Durable constraint-level finding.\")\n FACT @alias(\"fact\") @description(\"Durable fact-level finding.\")\n IMPLEMENTATION @alias(\"implementation\") @description(\"Trace-local implementation evidence or discarded hypothesis.\")\n}\n\nclass TraceWindowFinding {\n theme string @description(\"Short stable theme for this finding.\")\n level FindingLevel @description(\"Use durable levels for reusable context, implementation for local/noisy evidence.\")\n line int? @description(\"1-based supporting line when the window gives one.\")\n quote string? @description(\"Short supporting quote from the current window.\")\n note string @description(\"Compact semantic finding. Avoid command logs and copied errors.\")\n}\n\nclass TraceWindowScan {\n episode_update string? @description(\"Compact update for the final episode summary. May be omitted when this window adds nothing.\")\n durable_findings TraceWindowFinding[] @description(\"Reusable decisions, preferences, constraints, facts, and references found in this window.\")\n implementation_findings TraceWindowFinding[] @description(\"Implementation evidence, discarded hypotheses, and local details useful only as support/noise.\")\n discarded_noise string[] @description(\"Short descriptions of noisy categories intentionally not saved.\")\n}\n\nclass EpisodeDraft {\n title string? @description(\"Optional short title for the current-session episode. Runtime derives one if omitted.\")\n body string? @description(\"Compact episode body. If omitted, runtime builds it from user_intent and what_happened.\")\n status RecordStatus? @description(\"Use archived for routine/no-durable sessions; active only when the episode itself remains useful.\")\n user_intent string? @description(\"What the user wanted in this source session. Runtime fills a generic fallback if omitted.\")\n what_happened string? @description(\"What the session actually did. Runtime fills a generic fallback if omitted.\")\n outcomes string? @description(\"Optional concise outcome.\")\n}\n\nclass DurableRecordDraft {\n kind RecordKind @description(\"Durable record kind.\")\n title string @description(\"Short standalone durable title.\")\n body string @description(\"Compact standalone durable body.\")\n status RecordStatus? @description(\"Usually active for reusable durable records.\")\n valid_from string?\n valid_until string?\n decision string?\n why string?\n alternatives string?\n consequences string?\n}\n\nclass SynthesizedExtraction {\n episode EpisodeDraft @description(\"Exactly one current-session episode record draft.\")\n durable_records DurableRecordDraft[] @description(\"Zero or more durable records.\")\n completion_summary string? @description(\"Brief summary of extraction work for final_result/reporting.\")\n}\n\nfunction ScanTraceWindow(\n run_instruction: string,\n prior_episode_summary: string,\n prior_findings_summary: string,\n trace_window: string\n) -> TraceWindowScan {\n client MiniMaxM27\n prompt #\"\n {{ _.role(\"system\") }}\n You scan one window from a coding-agent trace for Lerim extraction.\n Return only structured output. Do not save records and do not plan future tool calls.\n Do not include tags, hidden reasoning, markdown, or prose.\n The top-level output must include episode_update, durable_findings, implementation_findings, and discarded_noise.\n Use an empty string or empty list when a field has no content.\n Every durable_findings and implementation_findings item must be an object with:\n - theme\n - level: one of decision, preference, feedback, reference, constraint, fact, or implementation\n - note\n - optional line\n - optional quote\n Never return finding items as plain strings.\n Never use confidence labels such as high, medium, or low as finding levels.\n\n Separate:\n - episode_update: what happened in this session window, for the final episode record.\n - durable_findings: reusable project/user context only.\n - implementation_findings: local evidence, command work, discarded hypotheses, or support.\n - discarded_noise: categories of content intentionally ignored.\n\n Durable signal means a decision, preference, constraint, fact, or reference likely useful beyond this trace.\n Implementation detail alone is not durable signal.\n A durable finding should help a future agent make a better decision in a new chat.\n Save what a future agent would be relieved to already know: stable user preferences, chosen policies, durable constraints, source-of-truth references, and stable project or product truths.\n If a detail only helps replay or debug this trace's terminal session, put it in implementation_findings or discarded_noise, not durable_findings.\n Raw metrics, comparison tables, config edits, API wiring, package installation, debug commands, file paths, and stack traces are implementation_findings unless the user turns them into a reusable policy, preference, constraint, fact, or source-of-truth reference.\n When the same topic has both user-level guidance and technical evidence, the user-level guidance is the durable finding; the technical evidence is implementation support.\n For technical blockers, keep durable_findings at the capability, status, and implication level only. Put patch recipes, flags, functions, commands, exact metrics, and local debug steps in implementation_findings.\n When a window mixes a durable point with local evidence, put only the reusable point in durable_findings and put the supporting local evidence in implementation_findings.\n Use semantic meaning, not exact wording. Do not copy long code, logs, stack traces, or command transcripts.\n If the window has no durable signal, return an empty durable_findings list.\n If the window adds nothing meaningful to the episode, episode_update may be an empty string.\n\n {{ _.role(\"user\") }}\n RUN INSTRUCTION:\n {{ run_instruction }}\n\n PRIOR EPISODE SUMMARY:\n {{ prior_episode_summary }}\n\n PRIOR FINDINGS SUMMARY:\n {{ prior_findings_summary }}\n\n TRACE WINDOW:\n {{ trace_window }}\n\n {{ ctx.output_format }}\n \"#\n}\n\nfunction SynthesizeExtractRecords(\n run_instruction: string,\n episode_summary: string,\n durable_findings_summary: string,\n existing_record_manifest: string\n) -> SynthesizedExtraction {\n client MiniMaxM27\n prompt #\"\n {{ _.role(\"system\") }}\n You synthesize final Lerim context records from scanned trace findings.\n Return only structured output.\n Do not include tags, hidden reasoning, markdown, or prose.\n The top-level output must include episode, durable_records, and completion_summary.\n Use an empty durable_records list when there is no durable signal.\n Every durable_records item must be an object with kind, title, and body.\n Optional durable record fields are status, valid_from, valid_until, decision, why, alternatives, and consequences.\n Never return durable_records items as plain strings or as generic type/record objects.\n\n Create exactly one episode record for the current session.\n Create durable records only for reusable decisions, preferences, constraints, facts, and references.\n The episode says what the session did. Durable records say what future sessions should reuse.\n Before creating each durable record, ask: would this change what a future agent does in a new chat?\n Prefer the small set of memories a future agent would be relieved to already know over exhaustive coverage of what happened.\n Keep durable records compact, standalone, and deduplicated.\n Do not create durable records for command logs, patch steps, generic programming knowledge, temporary diagnostics, or local implementation chatter.\n If there is no reusable signal, durable_records must be empty and the episode should normally be archived.\n Decision records must include decision and why when the rationale is present; use fact when there is no durable why.\n Episode records should include user_intent and what_happened when available; the runtime can derive missing episode fields.\n Treat the trace as historical evidence, not live verification of the current repo.\n\n Existing records, if listed, are only duplicate-risk context. Prefer skipping near-duplicates over creating duplicate durable records.\n\n Quality bar:\n - Store the reusable rule, decision, invariant, dependency, preference, or external pointer, not the story of the session.\n - One durable record should hold one durable point.\n - Direct consequences and application guidance usually stay inside that same record.\n - Create the minimum number of durable records that preserves distinct durable meanings. Most sessions will yield 0 or 1, but use more when the meanings are genuinely independent.\n - Classify each durable point into one canonical kind. Prefer the most specific supported kind, and do not duplicate the same point across multiple durable kinds.\n - Duplicates are worse than gaps. Skip uncertain candidates rather than spraying near-duplicates.\n - Never create a second durable record in the same run for the same core claim.\n - constraint and reference are first-class durable record kinds, not fallback categories.\n\n What not to save:\n - patch logs, command sequences, retries, timelines, or meeting-style recaps\n - code structure, file paths, git history, or storage mechanics by themselves\n - generic programming knowledge or facts already obvious from the repo\n - rejected lures, discarded explanations, or implementation-only distractions\n - one-run validation findings, approval flow, queue state, DB resets, rebuilds, or runtime diagnostics by themselves\n\n Selection rules:\n - First separate findings into durable signal and implementation evidence.\n - Prefer user-level guidance, chosen operating policy, and stable project truth over the technical evidence that revealed them.\n - Synthesize at the theme level. Usually one theme becomes one durable record.\n - Create multiple durable records only when the trace establishes multiple independent durable points, each with its own support.\n - Do not store one durable point as both a preference and a decision, a fact and a decision, or any other cross-kind duplicate.\n - A stable workflow preference is not also a decision unless the trace separately states an explicit project decision with rationale.\n - A dependency, setup, or environment truth without durable rationale is a fact, not also a decision.\n - A failure caused by the current run's temporary validation setup is not itself a durable environment truth. If the investigation reveals a stable requirement that future sessions must apply, save that requirement as the durable point and omit the temporary validation story.\n - Merge candidates when one only states how to apply the other in local operations, routing, or ownership.\n - If two candidates share the same core claim, merge them.\n - If the difference is only evidence framing, symptom wording, or local-vs-CI phrasing around the same durable fact, keep one record and fold the extra context into it.\n - If one candidate is only the direct application or routing consequence of another, keep it inside the stronger record.\n - If one candidate only says how different local components should apply the same project rule, keep that guidance inside the main record rather than creating a second durable record.\n - If one candidate only restates where local project components live or how an internal architecture is applied, keep it inside the stronger decision, fact, or constraint instead of creating a separate reference.\n - If the trace gives one durable rule plus examples of local noise or discarded details, store only the durable rule.\n - If a technical blocker is reusable, record only the capability affected, status or source of truth, and implication for future work.\n - Do not create a durable record whose whole point is that some local details from this trace were noise, low value, or should not be remembered.\n - Store durable records only when the lesson is likely reusable beyond this trace.\n - If a candidate is mainly about this trace's commands, files, or timeline, reject it.\n - Trace-local instructions about what to ignore in this session are not preferences unless they clearly express a broader standing workflow rule for future sessions.\n - If the trace explicitly says the rationale is unknown or says not to invent one, do not create a decision; use fact instead.\n - A stable setup, dependency, or environment requirement without a durable why is a fact even if it sounds like the current chosen setup.\n - The instruction \"do not invent a why\" is extraction guidance, not project context.\n - If the trace explicitly rejects a lure or distraction, do not carry that rejected idea into the durable record text unless the rejection itself is the durable lesson.\n - If a long noisy investigation resolves into one source-of-truth boundary, store only that boundary. Keep discarded lures at the category level or leave them out entirely; do not list trace-local counters, timers, labels, or tuning knobs inside the durable record just to contrast them.\n - When a discarded lure matters as evidence, keep it attached to the main durable theme as implementation context rather than storing it as a second durable theme.\n - If the episode summary contains clearly reusable decision, preference, constraint, fact, or reference, that point should usually also exist as its own durable record.\n - Do not leave a clearly reusable rule, invariant, dependency, source-of-truth pointer, or stable preference only inside the episode.\n - Durable records are additional project context, not a substitute for the session episode.\n - Most traces should produce only a few durable records; create more only when each one would independently change future behavior.\n\n Writing rules:\n - Durable titles should name the lasting rule, decision, fact, constraint, preference, or reference directly.\n - Durable bodies should be compact, neutral, and standalone.\n - Prefer this shape for durable records: the durable point, why it matters, and how to apply it later.\n - Do not write durable records as meeting minutes, patch logs, or cleanup commentary.\n - Do not preserve trace-local commands, negotiation phrasing, or \"this is not about X\" sentences in final record text.\n - Do not mention discarded implementation noise in durable record fields, including consequences.\n - Do not include patch recipes, flags, function names, exact metrics, or local debug steps in durable record text.\n - Do not write a durable record whose body is mainly a warning that certain local details, cleanups, or implementation noise should be ignored.\n - When the durable lesson is a source-of-truth rule, write the authoritative rule directly.\n - If a short contrast is still helpful, keep it abstract, such as \"not worker-local state\" or \"not ephemeral local state\". Do not enumerate examples in parentheses or comma-separated lists.\n - When writing from a historical trace, word durable records as source-backed context, not as freshly verified code inspection.\n - Facts from noisy failures must be rewritten into the underlying dependency, environment requirement, stakeholder driver, or operational fact.\n - If a fact still reads like stderr, an exception symptom, or copied command output, rewrite it again before writing.\n - When the durable lesson is an environment or dependency requirement, do not center the fact on the observed failure symptom. Name the requirement directly and mention the symptom only if it is needed as brief supporting context.\n - If brief supporting context is useful, lead with the requirement and keep the symptom generic. Never include exception class names, quoted error fragments, or copied failure strings in the durable fact.\n - If the candidate is mainly \"this validation run failed until we changed the setup\", it belongs in the archived episode. If the candidate names a reusable setup or runtime requirement discovered through that validation, keep the requirement and drop the failure narrative.\n - When no durable rationale exists, do not spend the fact body explaining that the rationale is absent. Just state the stable dependency, setup requirement, or operational truth directly.\n - Do not quote or paraphrase trace instructions about how to classify the evidence inside the final fact body. Final fact text should describe the underlying truth, not the extraction rule you followed.\n - References must answer both \"where should future sessions look?\" and \"when should they consult it?\"\n - Do not use reference for internal file mappings, local storage boundaries, or repo architecture notes when the durable lesson is the project rule itself rather than \"consult this external source next time.\"\n - Keep the episode concise: short title, short body, concise user_intent, what_happened, and outcomes.\n\n Record types:\n - preference: Stable workflow guidance from the user. Save corrections and confirmed non-obvious working style that should carry into future sessions.\n - decision: A chosen approach or project rule that future work should follow and that is not obvious from code alone. If the trace does not support a durable why, do not use decision.\n - constraint: A durable invariant, limit, or must/cannot rule that future work must respect.\n - fact: A durable project fact such as a dependency, environment requirement, stakeholder driver, or other non-obvious truth.\n - reference: A pointer to an external dashboard, document, ticket system, or other source of truth outside the repo. Use reference only when the enduring value is where to look later.\n\n Few-shot quality examples:\n\n Example preference:\n - Trace signal: the user corrects the assistant after a small code fix and says not to append redundant recaps.\n - Good: create one preference record about keeping replies terse and avoiding redundant change recaps after small diffs.\n - Bad: store the file edit itself, or treat the correction as one-session scratch when it is stable workflow guidance.\n\n Example decision:\n - Trace signal: early turns discuss local refactors and flaky tests; late in the trace the user settles one architecture boundary, and follow-on routing guidance only applies that boundary.\n - Good: create the required episode and one decision record for the architecture boundary. Keep the routing guidance inside that record.\n - Bad: store refactor noise, split one architectural choice into near-duplicate records, or create a separate durable record saying the refactors were noise.\n\n Example fact:\n - Trace signal: repeated failed commands and partial theories eventually resolve to one stable operational requirement.\n - Good: create one fact record for the requirement in clean operational language. Lead with the dependency or environment requirement; mention the failure only as brief generic support if needed.\n - Bad: store raw errors, command history, rejected theories, exact exception text, or a separate record saying not to invent a why.\n\n Example late clarification:\n - Trace signal: early chunks circle local counters, timers, labels, and tuning; the final chunk clarifies those were distractions and the real durable lesson is a source-of-truth boundary.\n - Good: create one durable record for the source-of-truth boundary. Mention restart or failover only if it explains why the boundary matters.\n - Bad: write a durable record that carries over rejected local counters, attempt counts, backoff knobs, or other trace-local artifacts as a contrast list.\n\n Example reference:\n - Trace signal: the assistant starts from a partial repo note, then the user clarifies that ownership or status lives in an external dashboard or ticket system.\n - Good: create one reference record that names the external source and when future sessions should consult it.\n - Bad: center the record on local files, or turn it into a warning slogan about what not to trust locally.\n\n Example routine:\n - Trace signal: formatter, small lint fix, rerun tests, green result, no new rule or durable fact.\n - Good: create only an archived episode.\n - Bad: invent a durable record from the sequence of routine commands.\n\n {{ _.role(\"user\") }}\n RUN INSTRUCTION:\n {{ run_instruction }}\n\n EXISTING RECORD MANIFEST:\n {{ existing_record_manifest }}\n\n EPISODE SUMMARY:\n {{ episode_summary }}\n\n DURABLE FINDINGS:\n {{ durable_findings_summary }}\n\n {{ ctx.output_format }}\n \"#\n}\n", - "extract_react_tests.baml": "test ScanTraceWindowCapturesDurableDecision {\n functions [ScanTraceWindow]\n args {\n run_instruction \"Extract one episode and any durable project context from the trace.\"\n prior_episode_summary \"(none yet)\"\n prior_findings_summary \"(none yet)\"\n trace_window #\"\n [8 lines, window 1-8]\n 1\tuser: We should run extraction evals before changing extraction prompts.\n 2\tassistant: I will update the BAML harness and run the eval.\n 3\tuser: Keep trace-local command logs out of long-term context.\n 4\tassistant: Implemented the change and ran a smoke test.\n \"#\n }\n @@assert({{ this.durable_findings|length > 0 }})\n}\n\ntest ScanTraceWindowSeparatesDurableAndImplementation {\n functions [ScanTraceWindow]\n args {\n run_instruction \"Extract reusable context, not command history.\"\n prior_episode_summary \"(none yet)\"\n prior_findings_summary \"(none yet)\"\n trace_window #\"\n [7 lines, window 1-7]\n 1\tuser: The deployment checklist is the source of truth for release readiness.\n 2\tassistant: Ran pytest -q and fixed a local fixture path.\n 3\tassistant: Edited src/example.py and reran the lint command.\n 4\tuser: The command output is just evidence, not memory.\n \"#\n }\n @@assert({{ this.durable_findings|length > 0 }})\n @@assert({{ this.implementation_findings|length > 0 }})\n}\n\ntest ScanTraceWindowAllowsNoDurableSignal {\n functions [ScanTraceWindow]\n args {\n run_instruction \"Extract durable context only when the trace supports it.\"\n prior_episode_summary \"(none yet)\"\n prior_findings_summary \"(none yet)\"\n trace_window #\"\n [5 lines, window 1-5]\n 1\tuser: Please format this file.\n 2\tassistant: Ran the formatter.\n 3\tassistant: Fixed one lint complaint.\n 4\tassistant: Tests are green.\n \"#\n }\n @@assert({{ this.durable_findings|length == 0 }})\n}\n\ntest SynthesizeExtractRecordsCreatesEpisodeAndDurableRecord {\n functions [SynthesizeExtractRecords]\n args {\n run_instruction \"Create exactly one episode and durable records only for reusable project context.\"\n episode_summary \"- The session updated an extraction harness and ran a smoke eval.\"\n durable_findings_summary \"- preference: eval-first prompt changes: Run the extraction eval before changing extraction prompts. (line 1)\"\n existing_record_manifest \"(none)\"\n }\n @@assert({{ this.durable_records|length > 0 }})\n}\n\ntest SynthesizeExtractRecordsAllowsNoDurableSignal {\n functions [SynthesizeExtractRecords]\n args {\n run_instruction \"Create exactly one episode and no durable records when no reusable context exists.\"\n episode_summary \"- The session only formatted code, fixed one lint issue, and reran tests.\"\n durable_findings_summary \"(none)\"\n existing_record_manifest \"(none)\"\n }\n @@assert({{ this.durable_records|length == 0 }})\n}\n\ntest SynthesizeExtractRecordsDeduplicatesCoreClaim {\n functions [SynthesizeExtractRecords]\n args {\n run_instruction \"Create compact durable records and merge duplicate meanings.\"\n episode_summary \"- The session clarified a runtime state boundary after noisy implementation discussion.\"\n durable_findings_summary #\"\n - decision: persisted source of truth: Runtime status must live in one persisted store that survives restart.\n - decision: runtime status boundary: Use the same persisted status store as the source of truth after restart.\n \"#\n existing_record_manifest \"(none)\"\n }\n @@assert({{ this.durable_records|length == 1 }})\n}\n", + "extract_react.baml": "enum RecordKind {\n DECISION @alias(\"decision\") @description(\"A durable project decision with decision and why fields.\")\n PREFERENCE @alias(\"preference\") @description(\"A stable user or workflow preference.\")\n CONSTRAINT @alias(\"constraint\") @description(\"A durable invariant, limit, or must/cannot rule.\")\n FACT @alias(\"fact\") @description(\"A durable project fact or setup truth.\")\n REFERENCE @alias(\"reference\") @description(\"A pointer to an external source of truth.\")\n}\n\nenum RecordStatus {\n ACTIVE @alias(\"active\") @description(\"Current context that future sessions may reuse.\")\n ARCHIVED @alias(\"archived\") @description(\"Historical or routine context that should not be treated as active guidance.\")\n}\n\nenum FindingLevel {\n DECISION @alias(\"decision\") @description(\"Durable decision-level finding.\")\n PREFERENCE @alias(\"preference\") @description(\"Durable user or workflow preference finding.\")\n FEEDBACK @alias(\"feedback\") @description(\"Durable feedback-level finding.\")\n REFERENCE @alias(\"reference\") @description(\"Durable external-source finding.\")\n CONSTRAINT @alias(\"constraint\") @description(\"Durable constraint-level finding.\")\n FACT @alias(\"fact\") @description(\"Durable fact-level finding.\")\n IMPLEMENTATION @alias(\"implementation\") @description(\"Trace-local implementation evidence or discarded hypothesis.\")\n}\n\nclass TraceWindowFinding {\n theme string @description(\"Short stable theme for this finding.\")\n level FindingLevel @description(\"Use durable levels for reusable context, implementation for local/noisy evidence.\")\n line int? @description(\"1-based supporting line when the window gives one.\")\n quote string? @description(\"Short supporting quote from the current window.\")\n note string @description(\"Compact semantic finding. Avoid command logs and copied errors.\")\n}\n\nclass TraceWindowScan {\n episode_update string? @description(\"Compact update for the final episode summary. May be omitted when this window adds nothing.\")\n durable_findings TraceWindowFinding[] @description(\"Reusable decisions, preferences, constraints, facts, and references found in this window.\")\n implementation_findings TraceWindowFinding[] @description(\"Implementation evidence, discarded hypotheses, and local details useful only as support/noise.\")\n discarded_noise string[] @description(\"Short descriptions of noisy categories intentionally not saved.\")\n}\n\nclass EpisodeDraft {\n title string? @description(\"Optional short title for the current-session episode. Runtime derives one if omitted.\")\n body string? @description(\"Compact episode body. If omitted, runtime builds it from user_intent and what_happened.\")\n status RecordStatus? @description(\"Use archived for routine/no-durable sessions; active only when the episode itself remains useful.\")\n user_intent string? @description(\"What the user wanted in this source session. Runtime fills a generic fallback if omitted.\")\n what_happened string? @description(\"What the session actually did. Runtime fills a generic fallback if omitted.\")\n outcomes string? @description(\"Optional concise outcome.\")\n}\n\nclass DurableRecordDraft {\n kind RecordKind @description(\"Durable record kind.\")\n title string @description(\"Short standalone durable title.\")\n body string @description(\"Compact standalone durable body.\")\n status RecordStatus? @description(\"Usually active for reusable durable records.\")\n valid_from string?\n valid_until string?\n decision string?\n why string?\n alternatives string?\n consequences string?\n}\n\nclass SynthesizedExtraction {\n episode EpisodeDraft @description(\"Exactly one current-session episode record draft.\")\n durable_records DurableRecordDraft[] @description(\"Zero or more durable records.\")\n completion_summary string? @description(\"Brief summary of extraction work for final_result/reporting.\")\n}\n\nfunction ScanTraceWindow(\n run_instruction: string,\n prior_episode_summary: string,\n prior_findings_summary: string,\n trace_window: string\n) -> TraceWindowScan {\n client MiniMaxM27\n prompt #\"\n {{ _.role(\"system\") }}\n You scan one window from a coding-agent trace for Lerim extraction.\n Return only structured output. Do not save records and do not plan future tool calls.\n Do not include tags, hidden reasoning, markdown, or prose.\n The top-level output must include episode_update, durable_findings, implementation_findings, and discarded_noise.\n Use an empty string or empty list when a field has no content.\n Every durable_findings and implementation_findings item must be an object with:\n - theme\n - level: one of decision, preference, feedback, reference, constraint, fact, or implementation\n - note\n - optional line\n - optional quote\n Never return finding items as plain strings.\n Never use confidence labels such as high, medium, or low as finding levels.\n\n Separate:\n - episode_update: what happened in this session window, for the final episode record.\n - durable_findings: reusable project/user context only.\n - implementation_findings: local evidence, command work, discarded hypotheses, or support.\n - discarded_noise: categories of content intentionally ignored.\n\n Durable signal means a decision, preference, constraint, fact, or reference likely useful beyond this trace.\n Implementation detail alone is not durable signal.\n A durable finding should help a future agent make a better decision in a new chat.\n Save what a future agent would be relieved to already know: stable user preferences, chosen policies, durable constraints, source-of-truth references, and stable project or product truths.\n If a detail only helps replay or debug this trace's terminal session, put it in implementation_findings or discarded_noise, not durable_findings.\n Raw metrics, comparison tables, config edits, API wiring, package installation, debug commands, file paths, and stack traces are implementation_findings unless the user turns them into a reusable policy, preference, constraint, fact, or source-of-truth reference.\n When the same topic has both user-level guidance and technical evidence, the user-level guidance is the durable finding; the technical evidence is implementation support.\n For technical blockers, keep durable_findings at the capability, status, and implication level only. Put patch recipes, flags, functions, commands, exact metrics, and local debug steps in implementation_findings.\n When a window mixes a durable point with local evidence, put only the reusable point in durable_findings and put the supporting local evidence in implementation_findings.\n Use semantic meaning, not exact wording. Do not copy long code, logs, stack traces, or command transcripts.\n Do not carry incidental personal names, direct address labels, or conversational identity markers into episode updates or findings unless the identity itself is the durable context. Prefer role-neutral wording such as \"the user\" and \"the assistant\".\n If the window has no durable signal, return an empty durable_findings list.\n If the window adds nothing meaningful to the episode, episode_update may be an empty string.\n\n {{ _.role(\"user\") }}\n RUN INSTRUCTION:\n {{ run_instruction }}\n\n PRIOR EPISODE SUMMARY:\n {{ prior_episode_summary }}\n\n PRIOR FINDINGS SUMMARY:\n {{ prior_findings_summary }}\n\n TRACE WINDOW:\n {{ trace_window }}\n\n {{ ctx.output_format }}\n \"#\n}\n\nfunction SynthesizeExtractRecords(\n run_instruction: string,\n episode_summary: string,\n durable_findings_summary: string,\n existing_record_manifest: string\n) -> SynthesizedExtraction {\n client MiniMaxM27\n prompt #\"\n {{ _.role(\"system\") }}\n You synthesize final Lerim context records from scanned trace findings.\n Return only structured output.\n Do not include tags, hidden reasoning, markdown, or prose.\n The top-level output must include episode, durable_records, and completion_summary.\n Use an empty durable_records list when there is no durable signal.\n Every durable_records item must be an object with kind, title, and body.\n Optional durable record fields are status, valid_from, valid_until, decision, why, alternatives, and consequences.\n Never return durable_records items as plain strings or as generic type/record objects.\n\n Create exactly one episode record for the current session.\n Create durable records only for reusable decisions, preferences, constraints, facts, and references.\n The episode says what the session did. Durable records say what future sessions should reuse.\n Before creating each durable record, ask: would this change what a future agent does in a new chat?\n Prefer the small set of memories a future agent would be relieved to already know over exhaustive coverage of what happened.\n Keep durable records compact, standalone, and deduplicated.\n Do not create durable records for command logs, patch steps, generic programming knowledge, temporary diagnostics, or local implementation chatter.\n If there is no reusable signal, durable_records must be empty and the episode should normally be archived.\n Decision records must include decision and why when the rationale is present; use fact when there is no durable why.\n Episode records should include user_intent and what_happened when available; the runtime can derive missing episode fields.\n Treat the trace as historical evidence, not live verification of the current repo.\n\n Existing records, if listed, are only duplicate-risk context. Prefer skipping near-duplicates over creating duplicate durable records.\n\n Quality bar:\n - Store the reusable rule, decision, invariant, dependency, preference, or external pointer, not the story of the session.\n - One durable record should hold one durable point.\n - Direct consequences and application guidance usually stay inside that same record.\n - Create the minimum number of durable records that preserves distinct durable meanings. Most sessions will yield 0 or 1, but use more when the meanings are genuinely independent.\n - Classify each durable point into one canonical kind. Prefer the most specific supported kind, and do not duplicate the same point across multiple durable kinds.\n - Duplicates are worse than gaps. Skip uncertain candidates rather than spraying near-duplicates.\n - Never create a second durable record in the same run for the same core claim.\n - constraint and reference are first-class durable record kinds, not fallback categories.\n\n What not to save:\n - patch logs, command sequences, retries, timelines, or meeting-style recaps\n - code structure, file paths, git history, or storage mechanics by themselves\n - generic programming knowledge or facts already obvious from the repo\n - rejected lures, discarded explanations, or implementation-only distractions\n - one-run validation findings, approval flow, queue state, DB resets, rebuilds, or runtime diagnostics by themselves\n\n Selection rules:\n - First separate findings into durable signal and implementation evidence.\n - Prefer user-level guidance, chosen operating policy, and stable project truth over the technical evidence that revealed them.\n - Synthesize at the theme level. Usually one theme becomes one durable record.\n - Create multiple durable records only when the trace establishes multiple independent durable points, each with its own support.\n - Do not store one durable point as both a preference and a decision, a fact and a decision, or any other cross-kind duplicate.\n - A stable workflow preference is not also a decision unless the trace separately states an explicit project decision with rationale.\n - A dependency, setup, or environment truth without durable rationale is a fact, not also a decision.\n - A failure caused by the current run's temporary validation setup is not itself a durable environment truth. If the investigation reveals a stable requirement that future sessions must apply, save that requirement as the durable point and omit the temporary validation story.\n - Merge candidates when one only states how to apply the other in local operations, routing, or ownership.\n - If two candidates share the same core claim, merge them.\n - If the difference is only evidence framing, symptom wording, or local-vs-CI phrasing around the same durable fact, keep one record and fold the extra context into it.\n - If one candidate is only the direct application or routing consequence of another, keep it inside the stronger record.\n - If one candidate only says how different local components should apply the same project rule, keep that guidance inside the main record rather than creating a second durable record.\n - If one candidate only restates where local project components live or how an internal architecture is applied, keep it inside the stronger decision, fact, or constraint instead of creating a separate reference.\n - If the trace gives one durable rule plus examples of local noise or discarded details, store only the durable rule.\n - If a technical blocker is reusable, record only the capability affected, status or source of truth, and implication for future work.\n - Do not create a durable record whose whole point is that some local details from this trace were noise, low value, or should not be remembered.\n - Store durable records only when the lesson is likely reusable beyond this trace.\n - If a candidate is mainly about this trace's commands, files, or timeline, reject it.\n - Trace-local instructions about what to ignore in this session are not preferences unless they clearly express a broader standing workflow rule for future sessions.\n - If the trace explicitly says the rationale is unknown or says not to invent one, do not create a decision; use fact instead.\n - A stable setup, dependency, or environment requirement without a durable why is a fact even if it sounds like the current chosen setup.\n - The instruction \"do not invent a why\" is extraction guidance, not project context.\n - If the trace explicitly rejects a lure or distraction, do not carry that rejected idea into the durable record text unless the rejection itself is the durable lesson.\n - If a long noisy investigation resolves into one source-of-truth boundary, store only that boundary. Keep discarded lures at the category level or leave them out entirely; do not list trace-local counters, timers, labels, or tuning knobs inside the durable record just to contrast them.\n - When a discarded lure matters as evidence, keep it attached to the main durable theme as implementation context rather than storing it as a second durable theme.\n - If the episode summary contains clearly reusable decision, preference, constraint, fact, or reference, that point should usually also exist as its own durable record.\n - Do not leave a clearly reusable rule, invariant, dependency, source-of-truth pointer, or stable preference only inside the episode.\n - Durable records are additional project context, not a substitute for the session episode.\n - Most traces should produce only a few durable records; create more only when each one would independently change future behavior.\n\n Writing rules:\n - Durable titles should name the lasting rule, decision, fact, constraint, preference, or reference directly.\n - Durable bodies should be compact, neutral, and standalone.\n - Prefer this shape for durable records: the durable point, why it matters, and how to apply it later.\n - Do not write durable records as meeting minutes, patch logs, or cleanup commentary.\n - Do not preserve trace-local commands, negotiation phrasing, or \"this is not about X\" sentences in final record text.\n - Do not include personal names, direct address labels, or conversational identity markers in episode or durable record text unless the identity itself is the durable context. Prefer role-neutral wording such as \"the user\" and \"the assistant\".\n - Do not mention discarded implementation noise in durable record fields, including consequences.\n - Do not include patch recipes, flags, function names, exact metrics, or local debug steps in durable record text.\n - Do not write a durable record whose body is mainly a warning that certain local details, cleanups, or implementation noise should be ignored.\n - When the durable lesson is a source-of-truth rule, write the authoritative rule directly.\n - If a short contrast is still helpful, keep it abstract, such as \"not worker-local state\" or \"not ephemeral local state\". Do not enumerate examples in parentheses or comma-separated lists.\n - When writing from a historical trace, word durable records as source-backed context, not as freshly verified code inspection.\n - Facts from noisy failures must be rewritten into the underlying dependency, environment requirement, stakeholder driver, or operational fact.\n - If a fact still reads like stderr, an exception symptom, or copied command output, rewrite it again before writing.\n - When the durable lesson is an environment or dependency requirement, do not center the fact on the observed failure symptom. Name the requirement directly and mention the symptom only if it is needed as brief supporting context.\n - If brief supporting context is useful, lead with the requirement and keep the symptom generic. Never include exception class names, quoted error fragments, or copied failure strings in the durable fact.\n - If the candidate is mainly \"this validation run failed until we changed the setup\", it belongs in the archived episode. If the candidate names a reusable setup or runtime requirement discovered through that validation, keep the requirement and drop the failure narrative.\n - When no durable rationale exists, do not spend the fact body explaining that the rationale is absent. Just state the stable dependency, setup requirement, or operational truth directly.\n - Do not quote or paraphrase trace instructions about how to classify the evidence inside the final fact body. Final fact text should describe the underlying truth, not the extraction rule you followed.\n - References must answer both \"where should future sessions look?\" and \"when should they consult it?\"\n - Do not use reference for internal file mappings, local storage boundaries, or repo architecture notes when the durable lesson is the project rule itself rather than \"consult this external source next time.\"\n - Keep the episode concise: short title, short body, concise user_intent, what_happened, and outcomes.\n\n Record types:\n - preference: Stable workflow guidance from the user. Save corrections and confirmed non-obvious working style that should carry into future sessions.\n - decision: A chosen approach or project rule that future work should follow and that is not obvious from code alone. If the trace does not support a durable why, do not use decision.\n - constraint: A durable invariant, limit, or must/cannot rule that future work must respect.\n - fact: A durable project fact such as a dependency, environment requirement, stakeholder driver, or other non-obvious truth.\n - reference: A pointer to an external dashboard, document, ticket system, or other source of truth outside the repo. Use reference only when the enduring value is where to look later.\n\n Few-shot quality examples:\n\n Example preference:\n - Trace signal: the user corrects the assistant after a small code fix and says not to append redundant recaps.\n - Good: create one preference record about keeping replies terse and avoiding redundant change recaps after small diffs.\n - Bad: store the file edit itself, or treat the correction as one-session scratch when it is stable workflow guidance.\n\n Example decision:\n - Trace signal: early turns discuss local refactors and flaky tests; late in the trace the user settles one architecture boundary, and follow-on routing guidance only applies that boundary.\n - Good: create the required episode and one decision record for the architecture boundary. Keep the routing guidance inside that record.\n - Bad: store refactor noise, split one architectural choice into near-duplicate records, or create a separate durable record saying the refactors were noise.\n\n Example fact:\n - Trace signal: repeated failed commands and partial theories eventually resolve to one stable operational requirement.\n - Good: create one fact record for the requirement in clean operational language. Lead with the dependency or environment requirement; mention the failure only as brief generic support if needed.\n - Bad: store raw errors, command history, rejected theories, exact exception text, or a separate record saying not to invent a why.\n\n Example late clarification:\n - Trace signal: early chunks circle local counters, timers, labels, and tuning; the final chunk clarifies those were distractions and the real durable lesson is a source-of-truth boundary.\n - Good: create one durable record for the source-of-truth boundary. Mention restart or failover only if it explains why the boundary matters.\n - Bad: write a durable record that carries over rejected local counters, attempt counts, backoff knobs, or other trace-local artifacts as a contrast list.\n\n Example reference:\n - Trace signal: the assistant starts from a partial repo note, then the user clarifies that ownership or status lives in an external dashboard or ticket system.\n - Good: create one reference record that names the external source and when future sessions should consult it.\n - Bad: center the record on local files, or turn it into a warning slogan about what not to trust locally.\n\n Example routine:\n - Trace signal: formatter, small lint fix, rerun tests, green result, no new rule or durable fact.\n - Good: create only an archived episode.\n - Bad: invent a durable record from the sequence of routine commands.\n\n {{ _.role(\"user\") }}\n RUN INSTRUCTION:\n {{ run_instruction }}\n\n EXISTING RECORD MANIFEST:\n {{ existing_record_manifest }}\n\n EPISODE SUMMARY:\n {{ episode_summary }}\n\n DURABLE FINDINGS:\n {{ durable_findings_summary }}\n\n {{ ctx.output_format }}\n \"#\n}\n", + "extract_react_tests.baml": "test ScanTraceWindowCapturesDurableDecision {\n functions [ScanTraceWindow]\n args {\n run_instruction \"Extract one episode and any durable project context from the trace.\"\n prior_episode_summary \"(none yet)\"\n prior_findings_summary \"(none yet)\"\n trace_window #\"\n [8 lines, window 1-8]\n 1\tuser: We should run extraction evals before changing extraction prompts.\n 2\tassistant: I will update the BAML harness and run the eval.\n 3\tuser: Keep trace-local command logs out of long-term context.\n 4\tassistant: Implemented the change and ran a smoke test.\n \"#\n }\n @@assert({{ this.durable_findings|length > 0 }})\n}\n\ntest ScanTraceWindowSeparatesDurableAndImplementation {\n functions [ScanTraceWindow]\n args {\n run_instruction \"Extract reusable context, not command history.\"\n prior_episode_summary \"(none yet)\"\n prior_findings_summary \"(none yet)\"\n trace_window #\"\n [7 lines, window 1-7]\n 1\tuser: The deployment checklist is the source of truth for release readiness.\n 2\tassistant: Ran pytest -q and fixed a local fixture path.\n 3\tassistant: Edited src/example.py and reran the lint command.\n 4\tuser: The command output is just evidence, not memory.\n \"#\n }\n @@assert({{ this.durable_findings|length > 0 }})\n @@assert({{ this.implementation_findings|length > 0 }})\n}\n\ntest ScanTraceWindowAllowsNoDurableSignal {\n functions [ScanTraceWindow]\n args {\n run_instruction \"Extract durable context only when the trace supports it.\"\n prior_episode_summary \"(none yet)\"\n prior_findings_summary \"(none yet)\"\n trace_window #\"\n [5 lines, window 1-5]\n 1\tuser: Please format this file.\n 2\tassistant: Ran the formatter.\n 3\tassistant: Fixed one lint complaint.\n 4\tassistant: Tests are green.\n \"#\n }\n @@assert({{ this.durable_findings|length == 0 }})\n}\n\ntest ScanTraceWindowAvoidsIncidentalPersonalName {\n functions [ScanTraceWindow]\n args {\n run_instruction \"Extract durable context without preserving incidental personal names.\"\n prior_episode_summary \"(none yet)\"\n prior_findings_summary \"(none yet)\"\n trace_window #\"\n [5 lines, window 1-5]\n 1\tuser: Morgan asked for extraction prompts to avoid storing names unless identity is the point.\n 2\tassistant: I will update the prompt generally.\n 3\tuser: The durable rule is role-neutral memory text, not the person's name.\n \"#\n }\n @@assert({{ \"Morgan\" not in (this.episode_update or \"\") }})\n @@assert({{ \"Morgan\" not in (this.durable_findings|map(attribute=\"note\")|join(\" \")) }})\n}\n\ntest SynthesizeExtractRecordsCreatesEpisodeAndDurableRecord {\n functions [SynthesizeExtractRecords]\n args {\n run_instruction \"Create exactly one episode and durable records only for reusable project context.\"\n episode_summary \"- The session updated an extraction harness and ran a smoke eval.\"\n durable_findings_summary \"- preference: eval-first prompt changes: Run the extraction eval before changing extraction prompts. (line 1)\"\n existing_record_manifest \"(none)\"\n }\n @@assert({{ this.durable_records|length > 0 }})\n}\n\ntest SynthesizeExtractRecordsAllowsNoDurableSignal {\n functions [SynthesizeExtractRecords]\n args {\n run_instruction \"Create exactly one episode and no durable records when no reusable context exists.\"\n episode_summary \"- The session only formatted code, fixed one lint issue, and reran tests.\"\n durable_findings_summary \"(none)\"\n existing_record_manifest \"(none)\"\n }\n @@assert({{ this.durable_records|length == 0 }})\n}\n\ntest SynthesizeExtractRecordsDeduplicatesCoreClaim {\n functions [SynthesizeExtractRecords]\n args {\n run_instruction \"Create compact durable records and merge duplicate meanings.\"\n episode_summary \"- The session clarified a runtime state boundary after noisy implementation discussion.\"\n durable_findings_summary #\"\n - decision: persisted source of truth: Runtime status must live in one persisted store that survives restart.\n - decision: runtime status boundary: Use the same persisted status store as the source of truth after restart.\n \"#\n existing_record_manifest \"(none)\"\n }\n @@assert({{ this.durable_records|length == 1 }})\n}\n\ntest SynthesizeExtractRecordsAvoidsIncidentalPersonalName {\n functions [SynthesizeExtractRecords]\n args {\n run_instruction \"Create role-neutral episode and durable records.\"\n episode_summary \"Morgan asked the assistant to make future extraction records avoid incidental personal names.\"\n durable_findings_summary \"- preference: role-neutral memory text: Avoid personal names in memory text unless identity itself is the durable context.\"\n existing_record_manifest \"(none)\"\n }\n @@assert({{ \"Morgan\" not in (this.episode.title or \"\") }})\n @@assert({{ \"Morgan\" not in (this.episode.body or \"\") }})\n @@assert({{ \"Morgan\" not in (this.episode.user_intent or \"\") }})\n @@assert({{ \"Morgan\" not in (this.episode.what_happened or \"\") }})\n @@assert({{ \"Morgan\" not in (this.durable_records|map(attribute=\"body\")|join(\" \")) }})\n}\n", "generators.baml": "generator target {\n output_type \"python/pydantic\"\n output_dir \"../\"\n default_client_mode \"sync\"\n version \"0.222.0\"\n}\n", "models.baml": "retry_policy ExtractAgentRetry {\n max_retries 1\n strategy {\n type exponential_backoff\n delay_ms 500\n multiplier 2\n max_delay_ms 8000\n }\n}\n\nclient MiniMaxM27 {\n provider \"openai-generic\"\n retry_policy ExtractAgentRetry\n options {\n base_url \"https://api.minimax.io/v1\"\n api_key env.MINIMAX_API_KEY\n model \"MiniMax-M2.7\"\n temperature 0.01\n http {\n connect_timeout_ms 10000\n time_to_first_token_timeout_ms 120000\n idle_timeout_ms 30000\n request_timeout_ms 300000\n }\n }\n}\n\nclient OllamaLocal {\n provider \"openai-generic\"\n retry_policy ExtractAgentRetry\n options {\n base_url \"http://127.0.0.1:11434/v1\"\n api_key \"ollama\"\n model \"gemma4:e4b\"\n temperature 0.0\n http {\n connect_timeout_ms 10000\n time_to_first_token_timeout_ms 120000\n idle_timeout_ms 30000\n request_timeout_ms 300000\n }\n }\n}\n", } diff --git a/src/lerim/agents/baml_src/extract_react.baml b/src/lerim/agents/baml_src/extract_react.baml index fbdc616..c53b6db 100644 --- a/src/lerim/agents/baml_src/extract_react.baml +++ b/src/lerim/agents/baml_src/extract_react.baml @@ -103,6 +103,7 @@ function ScanTraceWindow( For technical blockers, keep durable_findings at the capability, status, and implication level only. Put patch recipes, flags, functions, commands, exact metrics, and local debug steps in implementation_findings. When a window mixes a durable point with local evidence, put only the reusable point in durable_findings and put the supporting local evidence in implementation_findings. Use semantic meaning, not exact wording. Do not copy long code, logs, stack traces, or command transcripts. + Do not carry incidental personal names, direct address labels, or conversational identity markers into episode updates or findings unless the identity itself is the durable context. Prefer role-neutral wording such as "the user" and "the assistant". If the window has no durable signal, return an empty durable_findings list. If the window adds nothing meaningful to the episode, episode_update may be an empty string. @@ -210,6 +211,7 @@ function SynthesizeExtractRecords( - Prefer this shape for durable records: the durable point, why it matters, and how to apply it later. - Do not write durable records as meeting minutes, patch logs, or cleanup commentary. - Do not preserve trace-local commands, negotiation phrasing, or "this is not about X" sentences in final record text. + - Do not include personal names, direct address labels, or conversational identity markers in episode or durable record text unless the identity itself is the durable context. Prefer role-neutral wording such as "the user" and "the assistant". - Do not mention discarded implementation noise in durable record fields, including consequences. - Do not include patch recipes, flags, function names, exact metrics, or local debug steps in durable record text. - Do not write a durable record whose body is mainly a warning that certain local details, cleanups, or implementation noise should be ignored. diff --git a/src/lerim/agents/baml_src/extract_react_tests.baml b/src/lerim/agents/baml_src/extract_react_tests.baml index 77cb85e..d25fa3a 100644 --- a/src/lerim/agents/baml_src/extract_react_tests.baml +++ b/src/lerim/agents/baml_src/extract_react_tests.baml @@ -50,6 +50,23 @@ test ScanTraceWindowAllowsNoDurableSignal { @@assert({{ this.durable_findings|length == 0 }}) } +test ScanTraceWindowAvoidsIncidentalPersonalName { + functions [ScanTraceWindow] + args { + run_instruction "Extract durable context without preserving incidental personal names." + prior_episode_summary "(none yet)" + prior_findings_summary "(none yet)" + trace_window #" + [5 lines, window 1-5] + 1 user: Morgan asked for extraction prompts to avoid storing names unless identity is the point. + 2 assistant: I will update the prompt generally. + 3 user: The durable rule is role-neutral memory text, not the person's name. + "# + } + @@assert({{ "Morgan" not in (this.episode_update or "") }}) + @@assert({{ "Morgan" not in (this.durable_findings|map(attribute="note")|join(" ")) }}) +} + test SynthesizeExtractRecordsCreatesEpisodeAndDurableRecord { functions [SynthesizeExtractRecords] args { @@ -85,3 +102,18 @@ test SynthesizeExtractRecordsDeduplicatesCoreClaim { } @@assert({{ this.durable_records|length == 1 }}) } + +test SynthesizeExtractRecordsAvoidsIncidentalPersonalName { + functions [SynthesizeExtractRecords] + args { + run_instruction "Create role-neutral episode and durable records." + episode_summary "Morgan asked the assistant to make future extraction records avoid incidental personal names." + durable_findings_summary "- preference: role-neutral memory text: Avoid personal names in memory text unless identity itself is the durable context." + existing_record_manifest "(none)" + } + @@assert({{ "Morgan" not in (this.episode.title or "") }}) + @@assert({{ "Morgan" not in (this.episode.body or "") }}) + @@assert({{ "Morgan" not in (this.episode.user_intent or "") }}) + @@assert({{ "Morgan" not in (this.episode.what_happened or "") }}) + @@assert({{ "Morgan" not in (this.durable_records|map(attribute="body")|join(" ")) }}) +} diff --git a/src/lerim/agents/extract/persistence.py b/src/lerim/agents/extract/persistence.py index eb475ae..22d9bf3 100644 --- a/src/lerim/agents/extract/persistence.py +++ b/src/lerim/agents/extract/persistence.py @@ -143,6 +143,7 @@ def persist_synthesized_extraction( store.initialize() store.register_project(ctx.project_identity) for index, record in enumerate([episode, *durable_records]): + skip_remaining_records = False try: result = store.create_record( project_id=ctx.project_identity.project_id, @@ -161,6 +162,17 @@ def persist_synthesized_extraction( ), args=record, ) + except ValueError as exc: + if index == 0 and str(exc) == "duplicate_episode_for_session": + observation = _duplicate_episode_observation(store, ctx, record) + skip_remaining_records = True + else: + observation = PersistenceObservation( + action="save_context", + ok=False, + content=f"Record write failed: {type(exc).__name__}: {exc}", + args=record, + ) except Exception as exc: observation = PersistenceObservation( action="save_context", @@ -169,6 +181,8 @@ def persist_synthesized_extraction( args=record, ) observations.append(observation_to_state(observation)) + if skip_remaining_records: + break if index == 0 and not observation.ok: break @@ -192,6 +206,38 @@ def persist_synthesized_extraction( return observations, done, completion_summary if done else "" +def _duplicate_episode_observation( + store: ContextStore, + ctx: PersistenceContext, + record: dict[str, Any], +) -> PersistenceObservation: + """Return an idempotent observation for an already-extracted session.""" + rows = store.query( + entity="records", + mode="list", + project_ids=[ctx.project_identity.project_id], + kind="episode", + source_session_id=ctx.session_id, + limit=1, + include_archived=True, + ).get("rows") + existing = rows[0] if isinstance(rows, list) and rows else {} + return PersistenceObservation( + action="save_context", + ok=True, + content=json.dumps( + { + "ok": True, + "skipped": "duplicate_episode_for_session", + "existing_record_id": str(existing.get("record_id") or ""), + }, + ensure_ascii=True, + indent=2, + ), + args=record, + ) + + def observation_to_state(observation: PersistenceObservation) -> dict[str, Any]: """Convert a persistence observation into serializable graph state.""" return { diff --git a/src/lerim/server/api.py b/src/lerim/server/api.py index 0d4c596..3c0a29a 100644 --- a/src/lerim/server/api.py +++ b/src/lerim/server/api.py @@ -671,7 +671,7 @@ def api_sync( window_start=window_start, window_end=window_end, ) - queue_health = queue_health_snapshot() + queue_health = _safe_queue_health_snapshot() payload: dict[str, Any] = { "code": code, **asdict(summary), @@ -689,7 +689,7 @@ def api_maintain(dry_run: bool = False) -> dict[str, Any]: config = get_config() with ollama_lifecycle(config): code, payload = run_maintain_once(dry_run=dry_run) - queue_health = queue_health_snapshot() + queue_health = _safe_queue_health_snapshot() result: dict[str, Any] = {"code": code, **payload, "queue_health": queue_health} if queue_health.get("degraded"): result["warning"] = "Queue degraded. " + str( @@ -761,6 +761,41 @@ def _public_error_message(raw: Any) -> str: return "Error details hidden" if str(raw or "").strip() else "" +def _empty_queue_counts() -> dict[str, int]: + """Return zero-filled queue counts for unavailable catalog reads.""" + return { + "pending": 0, + "running": 0, + "done": 0, + "failed": 0, + "dead_letter": 0, + } + + +def _catalog_unavailable_health(_exc: sqlite3.Error) -> dict[str, Any]: + """Return degraded queue health when the session catalog cannot be read.""" + return { + "degraded": True, + "stale_running_count": 0, + "dead_letter_count": 0, + "oldest_running_age_seconds": None, + "oldest_dead_letter_age_seconds": None, + "advice": ( + "Session catalog is unavailable; stop Lerim and rebuild " + "the session index." + ), + "error": "Session catalog storage is unavailable.", + } + + +def _safe_queue_health_snapshot() -> dict[str, Any]: + """Return queue health without letting catalog storage abort API responses.""" + try: + return queue_health_snapshot() + except sqlite3.Error as exc: + return _catalog_unavailable_health(exc) + + def _normalize_activity_item(run: dict[str, Any]) -> dict[str, Any]: """Normalize one service_run row into status activity item.""" details = run.get("details") if isinstance(run.get("details"), dict) else {} @@ -1059,30 +1094,50 @@ def api_status( total_records = 0 now = datetime.now(timezone.utc) - latest_sync_raw = latest_service_run("sync") - latest_maintain_raw = latest_service_run("maintain") - queue = count_session_jobs_by_status() - queue_health = queue_health_snapshot() + catalog_error: sqlite3.Error | None = None + try: + latest_sync_raw = latest_service_run("sync") + latest_maintain_raw = latest_service_run("maintain") + queue = count_session_jobs_by_status() + queue_health = queue_health_snapshot() + sessions_indexed_count = count_fts_indexed() + unscoped_by_agent = count_unscoped_sessions_by_agent(projects=config.projects) + except sqlite3.Error as exc: + catalog_error = exc + latest_sync_raw = None + latest_maintain_raw = None + queue = _empty_queue_counts() + queue_health = _catalog_unavailable_health(exc) + sessions_indexed_count = 0 + unscoped_by_agent = {} + latest_sync_details = (latest_sync_raw or {}).get("details") or {} latest_sync_metrics = ( _sync_metrics_from_details(latest_sync_details) if isinstance(latest_sync_details, dict) else {} ) - unscoped_by_agent = count_unscoped_sessions_by_agent(projects=config.projects) selected_project_names = {name for name, _ in selected_projects} platforms = _public_platforms(list_platforms(config.platforms_path)) - recent_activity = ( - _running_activity_rows(selected_projects=selected_projects) - + _recent_activity( - limit=12, - allowed_projects=selected_project_names - if normalized_scope == "project" - else None, - ) - )[:12] + if catalog_error is None: + try: + recent_activity = ( + _running_activity_rows(selected_projects=selected_projects) + + _recent_activity( + limit=12, + allowed_projects=selected_project_names + if normalized_scope == "project" + else None, + ) + )[:12] + except sqlite3.Error as exc: + catalog_error = exc + queue_health = _catalog_unavailable_health(exc) + recent_activity = [] + else: + recent_activity = [] latest_sync = _normalize_latest_run(latest_sync_raw) @@ -1094,9 +1149,15 @@ def api_status( ], "platforms": platforms, "record_count": total_records, - "sessions_indexed_count": count_fts_indexed(), + "sessions_indexed_count": sessions_indexed_count, "queue": queue, "queue_health": queue_health, + "session_catalog": { + "status": "unavailable" if catalog_error else "available", + "error": "Session catalog storage is unavailable." + if catalog_error + else "", + }, "projects": projects_payload, "sync_window_days": config.sync_window_days, "schedule": { diff --git a/src/lerim/server/daemon.py b/src/lerim/server/daemon.py index c0c0b87..f79f634 100644 --- a/src/lerim/server/daemon.py +++ b/src/lerim/server/daemon.py @@ -14,7 +14,7 @@ from typing import Any, Callable from lerim.config.project_scope import match_session_project -from lerim.config.logging import log_file_path +from lerim.config.logging import log_file_path, logger from lerim.config.settings import get_config, reload_config from lerim.server.runtime import LerimRuntime from lerim.sessions.catalog import ( @@ -25,8 +25,10 @@ enqueue_session_job, fail_session_job, fetch_session_doc, + clear_local_running_job, heartbeat_session_job, index_new_sessions, + note_local_running_job, reap_stale_running_jobs, record_service_run, ) @@ -186,11 +188,27 @@ def _retry_backoff_seconds(attempts: int) -> int: def _start_job_heartbeat(run_id: str, interval_seconds: int = 30) -> threading.Event: """Refresh a running queue job lease until the returned event is set.""" stop = threading.Event() - heartbeat_session_job(run_id) + + def _heartbeat_once() -> bool: + try: + ok = heartbeat_session_job(run_id) + if ok: + note_local_running_job(run_id) + return ok + except sqlite3.Error as exc: + note_local_running_job(run_id) + logger.warning( + "session job heartbeat failed | run_id={} error={}", + run_id, + exc, + ) + return True + + _heartbeat_once() def _beat() -> None: while not stop.wait(max(1, int(interval_seconds))): - if not heartbeat_session_job(run_id): + if not _heartbeat_once(): return threading.Thread( @@ -228,14 +246,47 @@ def _record_service_event( details: dict[str, Any], ) -> None: """Record a service run with canonical completed timestamp.""" - record_fn( - job_type=job_type, - status=status, - started_at=started_at, - completed_at=_now_iso(), - trigger=trigger, - details=details, - ) + try: + record_fn( + job_type=job_type, + status=status, + started_at=started_at, + completed_at=_now_iso(), + trigger=trigger, + details=details, + ) + except sqlite3.Error as exc: + logger.warning( + "failed to record {} service event | status={} error={}", + job_type, + status, + exc, + ) + + +def _record_service_start( + record_fn: Callable[..., Any], + *, + job_type: str, + started_at: str, + trigger: str, +) -> None: + """Record the start of a service run without failing the operation.""" + try: + record_fn( + job_type=job_type, + status="started", + started_at=started_at, + completed_at=None, + trigger=trigger, + details=None, + ) + except sqlite3.Error as exc: + logger.warning( + "failed to record {} service start | error={}", + job_type, + exc, + ) def _pid_alive(pid: int | None) -> bool: @@ -798,7 +849,12 @@ def _process_claimed_jobs( projects_metrics: dict[str, dict[str, Any]] = {} events: list[dict[str, Any]] = [] for job in claimed: - result = _process_one_job(job) + run_id = str(job.get("run_id") or "") + note_local_running_job(run_id) + try: + result = _process_one_job(job) + finally: + clear_local_running_job(run_id) project_name = str(result.get("project_name") or "unknown") metric_row = projects_metrics.setdefault(project_name, _new_project_metric()) metric_row["sessions_analyzed"] = ( @@ -905,13 +961,11 @@ def run_sync_once( return EXIT_LOCK_BUSY, _empty_sync_summary() try: - record_service_run( + _record_service_start( + record_service_run, job_type="sync", - status="started", started_at=started, - completed_at=None, trigger=trigger, - details=None, ) config = get_config() @@ -993,10 +1047,9 @@ def run_sync_once( skipped = len(target_run_ids) elif not dry_run: # Process up to max_sessions by claiming in a loop. - # Each claim returns at most 1 job per project. Normal backlog - # extraction is newest-first; explicit replay can request - # chronological order directly from the catalog API. - # After processing, claim again to get the next session. + # Claim only one job at a time because extraction is sequential. + # Marking a batch as running before each job has its own heartbeat + # makes waiting jobs look stale during long LLM calls. total_processed = 0 while total_processed < claim_limit: reap_stale_running_jobs( @@ -1004,7 +1057,7 @@ def run_sync_once( retry_backoff_fn=_retry_backoff_seconds, ) claimed = claim_session_jobs( - limit=claim_limit - total_processed, + limit=1, run_ids=[run_id] if run_id else None, claim_order="newest", ) diff --git a/src/lerim/sessions/catalog.py b/src/lerim/sessions/catalog.py index 679647e..1b2966e 100644 --- a/src/lerim/sessions/catalog.py +++ b/src/lerim/sessions/catalog.py @@ -30,6 +30,8 @@ SESSION_JOB_CLAIM_ORDERS = {SESSION_JOB_CLAIM_NEWEST, SESSION_JOB_CLAIM_OLDEST} _DB_INIT_LOCK = threading.Lock() _DB_INITIALIZED_PATH: Path | None = None +_LOCAL_RUNNING_LEASES_LOCK = threading.Lock() +_LOCAL_RUNNING_LEASES: dict[str, datetime] = {} DEFAULT_RUNNING_JOB_LEASE_SECONDS = 2 * 60 @@ -55,6 +57,41 @@ def _iso_now() -> str: return _utc_now().isoformat() +def note_local_running_job(run_id: str) -> None: + """Record that this process is actively processing *run_id*.""" + if not run_id: + return + with _LOCAL_RUNNING_LEASES_LOCK: + _LOCAL_RUNNING_LEASES[run_id] = _utc_now() + + +def clear_local_running_job(run_id: str) -> None: + """Forget this process's active lease for *run_id*.""" + if not run_id: + return + with _LOCAL_RUNNING_LEASES_LOCK: + _LOCAL_RUNNING_LEASES.pop(run_id, None) + + +def _recent_local_running_lease_at( + run_id: str, + *, + now: datetime, + lease_seconds: int, +) -> datetime | None: + """Return this process's fresh lease timestamp for *run_id*, if any.""" + if not run_id: + return None + with _LOCAL_RUNNING_LEASES_LOCK: + lease_at = _LOCAL_RUNNING_LEASES.get(run_id) + if not lease_at: + return None + if now - lease_at <= timedelta(seconds=max(1, int(lease_seconds))): + return lease_at + _LOCAL_RUNNING_LEASES.pop(run_id, None) + return None + + def _to_iso(value: datetime | None) -> str | None: """Convert datetime to UTC-aware ISO string when value is present.""" if value is None: @@ -94,7 +131,8 @@ def _connect() -> sqlite3.Connection: """Open catalog SQLite connection with dictionary row factory.""" path = _db_path() path.parent.mkdir(parents=True, exist_ok=True) - conn = sqlite3.connect(path) + conn = sqlite3.connect(path, timeout=60.0) + conn.execute("PRAGMA busy_timeout = 60000;") conn.row_factory = _dict_row return conn @@ -1061,7 +1099,17 @@ def list_stale_running_jobs( """, (JOB_STATUS_RUNNING, job_type, cutoff, max(1, int(limit))), ).fetchall() - return rows + now = _utc_now() + return [ + row + for row in rows + if _recent_local_running_lease_at( + str(row.get("run_id") or ""), + now=now, + lease_seconds=effective_lease, + ) + is None + ] def reap_stale_running_jobs( @@ -1114,26 +1162,25 @@ def queue_health_snapshot( dead_letter_count = int(counts.get(JOB_STATUS_DEAD_LETTER, 0)) with _connect() as conn: - stale_row = conn.execute( + stale_rows = conn.execute( """ - SELECT COUNT(1) AS total + SELECT run_id, COALESCE(heartbeat_at, claimed_at) AS lease_at FROM session_jobs WHERE status = ? AND COALESCE(heartbeat_at, claimed_at) IS NOT NULL AND COALESCE(heartbeat_at, claimed_at) <= ? """, (JOB_STATUS_RUNNING, cutoff), - ).fetchone() - oldest_running_row = conn.execute( + ).fetchall() + running_rows = conn.execute( """ - SELECT COALESCE(heartbeat_at, claimed_at) AS lease_at + SELECT run_id, COALESCE(heartbeat_at, claimed_at) AS lease_at FROM session_jobs WHERE status = ? AND COALESCE(heartbeat_at, claimed_at) IS NOT NULL ORDER BY COALESCE(heartbeat_at, claimed_at) ASC, id ASC - LIMIT 1 """, (JOB_STATUS_RUNNING,), - ).fetchone() + ).fetchall() oldest_dead_row = conn.execute( """ SELECT updated_at @@ -1145,13 +1192,33 @@ def queue_health_snapshot( (JOB_STATUS_DEAD_LETTER,), ).fetchone() - stale_running_count = int((stale_row or {}).get("total") or 0) - oldest_running_at = _parse_iso((oldest_running_row or {}).get("lease_at")) + visible_stale_rows = [ + row + for row in stale_rows + if _recent_local_running_lease_at( + str(row.get("run_id") or ""), + now=now, + lease_seconds=effective_lease, + ) + is None + ] + stale_running_count = len(visible_stale_rows) + running_lease_times: list[datetime] = [] + for row in running_rows: + run_id = str(row.get("run_id") or "") + local_lease_at = _recent_local_running_lease_at( + run_id, + now=now, + lease_seconds=effective_lease, + ) + lease_at = local_lease_at or _parse_iso(row.get("lease_at")) + if lease_at: + running_lease_times.append(lease_at) oldest_dead_at = _parse_iso((oldest_dead_row or {}).get("updated_at")) oldest_running_age_seconds = ( - max(0, int((now - oldest_running_at).total_seconds())) - if oldest_running_at + max(max(0, int((now - lease_at).total_seconds())) for lease_at in running_lease_times) + if running_lease_times else None ) oldest_dead_letter_age_seconds = ( diff --git a/tests/README.md b/tests/README.md index 1fae2ff..c35f072 100644 --- a/tests/README.md +++ b/tests/README.md @@ -84,7 +84,12 @@ Rules: - agent tool tests enforce fetch-before-mutate: `revise_context`, `archive_context`, and `supersede_context` only accept records returned by `get_context` in the same run - agent build tests guard the runtime tool contract against documentation and helper drift - adapter tests cover compact-trace visibility for canonical message fields and structured event messages without keyword heuristics +- extract persistence tests cover idempotent replay when a session episode already exists - session catalog tests cover queue claim availability, content-hash refresh/change detection, and stable pagination ordering +- API/daemon tests cover degraded status reporting when the session catalog is unavailable +- daemon tests cover transient session-job heartbeat write failures +- session catalog tests cover process-local active-job leases that avoid false stale queue health during transient heartbeat write failures +- daemon sync tests cover one-at-a-time job claiming to avoid false stale-running queue state - config tests cover provider client lifecycle, provider-specific model settings, fallback-model parsing, strict config parsing, and SDK log-noise filters - Working Memory tests cover cwd project resolution, freshness counts, markdown citations, CLI local reads, and artifact writes without live LLM calls diff --git a/tests/unit/agents/extract/test_persistence.py b/tests/unit/agents/extract/test_persistence.py new file mode 100644 index 0000000..3bf1f64 --- /dev/null +++ b/tests/unit/agents/extract/test_persistence.py @@ -0,0 +1,80 @@ +"""Tests for BAML extract persistence helpers.""" + +from __future__ import annotations + +from pathlib import Path + +from lerim.agents.extract.persistence import ( + PersistenceContext, + persist_synthesized_extraction, + prepare_context_store, +) +from lerim.context import ContextStore, resolve_project_identity + + +def _context(tmp_path: Path) -> PersistenceContext: + """Return one isolated persistence context.""" + return PersistenceContext( + context_db_path=tmp_path / "context.sqlite3", + project_identity=resolve_project_identity(tmp_path), + session_id="session-1", + trace_path=tmp_path / "trace.jsonl", + session_started_at="2026-01-01T00:00:00+00:00", + model_name="test-model", + ) + + +def _synthesized_payload() -> dict: + """Return one synthesized extraction payload.""" + return { + "completion_summary": "Extraction completed.", + "episode": { + "title": "Implement extract replay idempotency", + "body": "The session extracted context from a trace.", + "status": "archived", + "user_intent": "Extract context from the trace.", + "what_happened": "The trace was scanned and persisted.", + "outcomes": "One episode and one durable fact were produced.", + }, + "durable_records": [ + { + "kind": "fact", + "title": "Session catalog rebuild can replay traces", + "body": "Rebuilding the session catalog may replay a trace whose episode is already in the context DB.", + "status": "active", + } + ], + } + + +def test_duplicate_episode_replay_is_idempotent(tmp_path) -> None: + """Replaying an already-extracted session skips cleanly without duplicates.""" + ctx = _context(tmp_path) + prepare_context_store(ctx) + + first_observations, first_done, _summary = persist_synthesized_extraction( + _synthesized_payload(), + ctx, + ) + second_observations, second_done, _summary = persist_synthesized_extraction( + _synthesized_payload(), + ctx, + ) + + store = ContextStore(ctx.context_db_path) + rows = store.query( + entity="records", + mode="list", + project_ids=[ctx.project_identity.project_id], + source_session_id=ctx.session_id, + limit=10, + include_archived=True, + )["rows"] + + assert first_done is True + assert first_observations[0]["ok"] is True + assert second_done is True + assert second_observations[0]["ok"] is True + assert "duplicate_episode_for_session" in second_observations[0]["content"] + assert len(rows) == 2 + assert sorted(row["kind"] for row in rows) == ["episode", "fact"] diff --git a/tests/unit/server/test_api.py b/tests/unit/server/test_api.py index 5f7ce53..1738516 100644 --- a/tests/unit/server/test_api.py +++ b/tests/unit/server/test_api.py @@ -521,6 +521,55 @@ def test_api_status_no_records(monkeypatch, tmp_path) -> None: assert result["record_count"] == 0 +def test_api_status_degrades_when_session_catalog_unavailable( + monkeypatch, tmp_path +) -> None: + """api_status reports catalog storage failure without raising.""" + cfg = make_config(tmp_path) + monkeypatch.setattr(api_mod, "get_config", lambda: cfg) + monkeypatch.setattr(api_mod, "list_platforms", lambda path: []) + + def broken_latest_service_run(_job_type: str) -> None: + """Simulate a malformed session catalog.""" + raise sqlite3.DatabaseError("database disk image is malformed") + + monkeypatch.setattr(api_mod, "latest_service_run", broken_latest_service_run) + + result = api_status() + + assert result["session_catalog"]["status"] == "unavailable" + assert result["session_catalog"]["error"] + assert result["queue"] == api_mod._empty_queue_counts() + assert result["queue_health"]["degraded"] is True + assert result["recent_activity"] == [] + + +def test_api_maintain_degrades_when_queue_health_unavailable( + monkeypatch, tmp_path +) -> None: + """api_maintain still returns its payload when queue health cannot read.""" + cfg = make_config(tmp_path) + monkeypatch.setattr(api_mod, "get_config", lambda: cfg) + monkeypatch.setattr( + api_mod, + "run_maintain_once", + lambda dry_run: (0, {"dry_run": dry_run}), + ) + + def broken_queue_health() -> None: + """Simulate a malformed session catalog.""" + raise sqlite3.DatabaseError("database disk image is malformed") + + monkeypatch.setattr(api_mod, "queue_health_snapshot", broken_queue_health) + + result = api_maintain(dry_run=True) + + assert result["code"] == 0 + assert result["dry_run"] is True + assert result["queue_health"]["degraded"] is True + assert result["queue_health"]["error"] + + def test_api_status_scope_skipped_unscoped_from_latest_sync( monkeypatch, tmp_path ) -> None: diff --git a/tests/unit/server/test_daemon_functions.py b/tests/unit/server/test_daemon_functions.py index 4458141..0072e5d 100644 --- a/tests/unit/server/test_daemon_functions.py +++ b/tests/unit/server/test_daemon_functions.py @@ -8,6 +8,8 @@ import json import os +import sqlite3 +import threading import time from dataclasses import replace from datetime import datetime, timedelta, timezone @@ -760,6 +762,29 @@ def test_empty_sync_summary() -> None: assert s.cost_usd == 0.0 +def test_start_job_heartbeat_retries_after_sqlite_error(monkeypatch) -> None: + """A transient heartbeat write error does not kill the heartbeat thread.""" + seen_retry = threading.Event() + calls = {"count": 0} + + def flaky_heartbeat(_run_id: str) -> bool: + """Raise once, then prove the background loop kept running.""" + calls["count"] += 1 + if calls["count"] == 1: + raise sqlite3.OperationalError("disk I/O error") + seen_retry.set() + return False + + monkeypatch.setattr(daemon, "heartbeat_session_job", flaky_heartbeat) + + stop = daemon._start_job_heartbeat("run-heartbeat", interval_seconds=1) + try: + assert seen_retry.wait(timeout=2) + finally: + stop.set() + assert calls["count"] >= 2 + + # --------------------------------------------------------------------------- # _record_service_event # --------------------------------------------------------------------------- @@ -792,6 +817,47 @@ def fake_record(**kwargs): assert captured[0]["details"]["extracted_sessions"] == 3 +def test_record_service_event_suppresses_sqlite_error() -> None: + """Service-run audit failures do not fail the daemon operation.""" + from lerim.server.daemon import _record_service_event + + def broken_record(**_kwargs): + """Raise like a malformed catalog write.""" + raise sqlite3.DatabaseError("database disk image is malformed") + + _record_service_event( + broken_record, + job_type="sync", + status="failed", + started_at="2026-01-01T00:00:00+00:00", + trigger="manual", + details={"error": "catalog unavailable"}, + ) + + +def test_record_service_start_calls_fn() -> None: + """_record_service_start writes a started row with no completion time.""" + from lerim.server.daemon import _record_service_start + + captured: list[dict] = [] + + def fake_record(**kwargs): + """Capture service run recording call.""" + captured.append(kwargs) + + _record_service_start( + fake_record, + job_type="sync", + started_at="2026-01-01T00:00:00+00:00", + trigger="api", + ) + + assert captured[0]["job_type"] == "sync" + assert captured[0]["status"] == "started" + assert captured[0]["completed_at"] is None + assert captured[0]["details"] is None + + # --------------------------------------------------------------------------- # lock_path # --------------------------------------------------------------------------- diff --git a/tests/unit/server/test_daemon_sync_maintain.py b/tests/unit/server/test_daemon_sync_maintain.py index 8fd86c4..9e73e6e 100644 --- a/tests/unit/server/test_daemon_sync_maintain.py +++ b/tests/unit/server/test_daemon_sync_maintain.py @@ -113,6 +113,43 @@ def _fail_sync(*_args, **_kwargs): assert latest["status"] == "failed" +def test_sync_claims_one_job_at_a_time(monkeypatch, tmp_path) -> None: + """Sync does not mark a batch running before jobs can receive heartbeats.""" + config_path = write_test_config(tmp_path, projects={"testproj": str(tmp_path)}) + monkeypatch.setenv("LERIM_CONFIG", str(config_path)) + reload_config() + + captured_limits: list[int] = [] + + monkeypatch.setattr(daemon, "record_service_run", lambda **_kwargs: 1) + monkeypatch.setattr(daemon, "index_new_sessions", lambda **_kwargs: []) + monkeypatch.setattr(daemon, "reap_stale_running_jobs", lambda **_kwargs: 0) + + def fake_claim_session_jobs(**kwargs): + """Capture claim size and stop the processing loop.""" + captured_limits.append(int(kwargs["limit"])) + return [] + + monkeypatch.setattr(daemon, "claim_session_jobs", fake_claim_session_jobs) + + code, summary = daemon.run_sync_once( + run_id=None, + agent_filter=None, + no_extract=False, + force=False, + max_sessions=5, + dry_run=False, + ignore_lock=True, + trigger="test", + window_start=None, + window_end=None, + ) + + assert code == daemon.EXIT_OK + assert summary.extracted_sessions == 0 + assert captured_limits == [1] + + def test_sync_force_enqueues_changed_sessions(monkeypatch, tmp_path) -> None: """Changed sessions (hash differs) are force-enqueued so they get re-extracted.""" _setup(tmp_path, monkeypatch) diff --git a/tests/unit/sessions/test_catalog.py b/tests/unit/sessions/test_catalog.py index f35a7c8..2a8349b 100644 --- a/tests/unit/sessions/test_catalog.py +++ b/tests/unit/sessions/test_catalog.py @@ -13,6 +13,7 @@ from lerim.sessions.catalog import ( _connect, claim_session_jobs, + clear_local_running_job, complete_session_job, count_fts_indexed, count_session_jobs_by_status, @@ -28,6 +29,7 @@ list_service_runs, list_sessions_window, list_unscoped_sessions, + note_local_running_job, queue_health_snapshot, reap_stale_running_jobs, record_service_run, @@ -45,6 +47,7 @@ @pytest.fixture(autouse=True) def _reset_init_flag(monkeypatch): monkeypatch.setattr("lerim.sessions.catalog._DB_INITIALIZED_PATH", None) + monkeypatch.setattr("lerim.sessions.catalog._LOCAL_RUNNING_LEASES", {}) def _db(sessions_db: Path) -> sqlite3.Connection: @@ -126,6 +129,13 @@ def test_tables_created(self, sessions_db): assert "service_runs" in tables conn.close() + def test_connect_sets_busy_timeout(self, sessions_db): + """Catalog connections wait for busy writers before failing.""" + with _connect() as conn: + row = conn.execute("PRAGMA busy_timeout").fetchone() + + assert int(row.get("timeout") or 0) == 60000 + def test_fts_virtual_table_created(self, sessions_db): conn = _db(sessions_db) tables = { @@ -1375,6 +1385,45 @@ def test_reap_running_job_uses_heartbeat_when_present(self, sessions_db): ).fetchone() assert row["status"] == "running" + def test_local_running_lease_masks_transient_heartbeat_write_failure( + self, sessions_db + ): + """A live in-process job is not stale just because DB heartbeat writes fail.""" + _seed_and_enqueue( + "alive-local", + repo_path="/tmp/proj-alive-local", + start_time="2026-03-01T10:00:00Z", + ) + claimed = claim_session_jobs(limit=1, run_ids=["alive-local"]) + assert len(claimed) == 1 + old = (datetime.now(timezone.utc) - timedelta(hours=2)).isoformat() + with _connect() as conn: + conn.execute( + """ + UPDATE session_jobs + SET claimed_at = ?, heartbeat_at = ?, updated_at = ? + WHERE run_id = ? + """, + (old, old, old, "alive-local"), + ) + conn.commit() + + note_local_running_job("alive-local") + try: + health = queue_health_snapshot(lease_seconds=60) + assert health["degraded"] is False + assert health["stale_running_count"] == 0 + assert reap_stale_running_jobs(lease_seconds=60) == 0 + finally: + clear_local_running_job("alive-local") + + with _connect() as conn: + row = conn.execute( + "SELECT status FROM session_jobs WHERE run_id = ?", + ("alive-local",), + ).fetchone() + assert row["status"] == "running" + def test_reap_stale_running_job_to_dead_letter_when_attempts_exhausted( self, sessions_db ): From 55a2516ac5b91dd6692b2bd9d25a55a1d9ba416a Mon Sep 17 00:00:00 2001 From: Isaac Kargar Date: Thu, 14 May 2026 12:14:46 +0300 Subject: [PATCH 8/8] Fix vulture checks for BAML extract --- pyproject.toml | 2 +- vulture_whitelist.py | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 82a7baa..792bf62 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -96,7 +96,7 @@ extend-exclude = ["src/lerim/agents/baml_client"] [tool.vulture] paths = ["src/lerim/", "vulture_whitelist.py"] -exclude = ["src/lerim/agents/baml_client/*"] +exclude = ["*/src/lerim/agents/baml_client/*"] min_confidence = 60 [dependency-groups] diff --git a/vulture_whitelist.py b/vulture_whitelist.py index cf32180..e6c3fa5 100644 --- a/vulture_whitelist.py +++ b/vulture_whitelist.py @@ -46,6 +46,12 @@ model_config # noqa last_context_tokens # noqa metrics_version # noqa +trace_total_lines # noqa +current_window # noqa +episode_updates # noqa +durable_findings # noqa +implementation_findings # noqa +discarded_noise # noqa records_changed_since_generation # noqa latest_run_folder # noqa suggested_action # noqa