diff --git a/python/packages/azurefunctions/agent_framework_azurefunctions/_serialization.py b/python/packages/azurefunctions/agent_framework_azurefunctions/_serialization.py index 4ed080eceb..27730fb441 100644 --- a/python/packages/azurefunctions/agent_framework_azurefunctions/_serialization.py +++ b/python/packages/azurefunctions/agent_framework_azurefunctions/_serialization.py @@ -14,6 +14,24 @@ - reconstruct_to_type: for HITL responses where external data (without type markers) needs to be reconstructed to a known type - resolve_type: resolves 'module:class' type keys to Python types + +Security Model +-------------- +The underlying Azure Durable Functions storage (Azure Storage account) is the +trusted persistence layer for serialized checkpoint data. The +``RestrictedUnpickler`` in the core encoding module provides defense-in-depth +type filtering, but checkpoint storage itself must be properly access-controlled: + +- Ensure the Azure Storage account used by Durable Functions is not publicly + writable and uses appropriate RBAC / shared-access policies. +- Never route untrusted user input directly into ``deserialize_value`` without + first calling :func:`strip_pickle_markers` to neutralize injection of + pickle markers into the data path. +- Configure your checkpoint storage with ``allowed_checkpoint_types`` (or call + ``decode_checkpoint_value(..., allowed_types=...)`` directly) to restrict the set of types that can be deserialized. + +See :mod:`agent_framework._workflows._checkpoint_encoding` for the full +security model documentation. """ from __future__ import annotations diff --git a/python/packages/core/agent_framework/_workflows/_checkpoint_encoding.py b/python/packages/core/agent_framework/_workflows/_checkpoint_encoding.py index dd1fb3d704..c66faae75e 100644 --- a/python/packages/core/agent_framework/_workflows/_checkpoint_encoding.py +++ b/python/packages/core/agent_framework/_workflows/_checkpoint_encoding.py @@ -13,6 +13,35 @@ value types (primitives, datetime, uuid, ...), all ``agent_framework`` internal types, and all ``openai.types`` types. Callers can extend the set by passing additional ``"module:qualname"`` strings. + +Security Model +-------------- +Checkpoint storage is treated as a **trusted data source**. The serialization +format uses Python's ``pickle`` module which can execute arbitrary code during +deserialization. The ``RestrictedUnpickler`` provides a defense-in-depth +allowlist that limits instantiable classes, but it is **not** a security +boundary — certain allowlisted builtins (e.g. ``getattr``) are required for +legitimate object reconstruction (enums, named tuples) and cannot be removed +without breaking compatibility. + +Developers **must** ensure that: + +1. The checkpoint storage backend (file system, Cosmos DB, Azure Blob, Durable + Functions storage) is access-controlled and not writable by untrusted + parties. +2. Data flowing into ``decode_checkpoint_value`` originates exclusively from + the application's own checkpoint storage — never from user-supplied HTTP + requests, message payloads, or other untrusted sources. +3. The ``allowed_types`` parameter is specified whenever possible to restrict + the set of reconstructible types to the minimum required by the application. +4. Never pass untrusted external input to ``decode_checkpoint_value``. If you + must accept external JSON that might contain checkpoint markers, sanitize it + first (for example, :func:`agent_framework_azurefunctions._serialization.strip_pickle_markers`). + +The allowlist is a mitigation that reduces attack surface but does not +eliminate the inherent risks of deserializing untrusted pickle data. Treat +your checkpoint storage with the same access controls you would apply to +application secrets or database credentials. """ from __future__ import annotations