Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion cli/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,13 @@
from tools.tool_registry import ToolRegistry

from agentic import AssetManager
from config import load_config
from config import (
apply_runtime_environment,
build_runtime_policy,
handle_preflight_issues,
load_config,
run_global_preflight,
)
from database import create_database_strategy
from llm import LLMTraceRecorder, OpenAIProvider, TracingLLMProvider
from runtime import JobManager, RuntimeExecutor
Expand All @@ -24,6 +30,13 @@ def bootstrap_environment(
include_candidate_tools: bool = False,
) -> dict[str, Any]:
cfg = load_config(config_path=config_path, prefix=prefix)
runtime_policy = build_runtime_policy(cfg)
apply_runtime_environment(runtime_policy)
handle_preflight_issues(
run_global_preflight(cfg, runtime_policy),
strict=runtime_policy.strict_preflight,
logger=logger,
)
trace_recorder = LLMTraceRecorder(
env_id=_resolve_env_id(config_path, prefix),
enabled=cfg.llm_tracing.enabled,
Expand Down Expand Up @@ -61,6 +74,7 @@ def bootstrap_environment(

return {
"config": cfg,
"runtime_policy": runtime_policy,
"database": db,
"registry": registry,
"job_manager": job_manager,
Expand Down
5 changes: 5 additions & 0 deletions config.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
deployment:
network_mode: offline # Options: online | offline
resource_tier: full # Options: light | standard | full


database:
type: local_file # Options: local_file, mock
path: ./test_data
Expand Down
23 changes: 22 additions & 1 deletion config/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,24 @@
from .config_loader import Config, load_config
from .runtime_policy import (
PreflightIssue,
RuntimePolicy,
VALID_NETWORK_MODES,
VALID_RESOURCE_TIERS,
apply_runtime_environment,
build_runtime_policy,
handle_preflight_issues,
run_global_preflight,
)

__all__ = ["Config", "load_config"]
__all__ = [
"Config",
"PreflightIssue",
"RuntimePolicy",
"VALID_NETWORK_MODES",
"VALID_RESOURCE_TIERS",
"apply_runtime_environment",
"build_runtime_policy",
"handle_preflight_issues",
"load_config",
"run_global_preflight",
]
9 changes: 9 additions & 0 deletions config/config_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,12 @@ class RuntimeConfig:
timeout_seconds: int = 300


@dataclass
class DeploymentConfig:
network_mode: str = "online"
resource_tier: str = "light"


@dataclass
class AgentConfig:
type: str = "opencode"
Expand Down Expand Up @@ -95,6 +101,7 @@ class Config:
database: DatabaseConfig = field(default_factory=DatabaseConfig)
execution: ExecutionConfig = field(default_factory=ExecutionConfig)
runtime: RuntimeConfig = field(default_factory=RuntimeConfig)
deployment: DeploymentConfig = field(default_factory=DeploymentConfig)
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

让全局config对象能带上新加的部署策略

agent: AgentConfig = field(default_factory=AgentConfig)
tool_llm: ToolLLMConfig = field(default_factory=ToolLLMConfig)
llm_tracing: LLMTracingConfig = field(default_factory=LLMTracingConfig)
Expand All @@ -109,6 +116,7 @@ def from_dict(cls, data: dict[str, Any]) -> "Config":
db_data = data.get("database", {})
exec_data = data.get("execution", {})
runtime_data = data.get("runtime", {})
deployment_data = data.get("deployment", {})
agent_data = _expand_env_placeholders(data.get("agent", {}))
tool_llm_data = _expand_env_placeholders(data.get("tool_llm", {}))
llm_tracing_data = data.get("llm_tracing", {})
Expand All @@ -118,6 +126,7 @@ def from_dict(cls, data: dict[str, Any]) -> "Config":
database=DatabaseConfig(**db_data),
execution=ExecutionConfig(**exec_data),
runtime=RuntimeConfig(**runtime_data),
deployment=DeploymentConfig(**deployment_data),
agent=AgentConfig(**agent_data),
tool_llm=ToolLLMConfig(**tool_llm_data),
llm_tracing=LLMTracingConfig(**llm_tracing_data),
Expand Down
204 changes: 204 additions & 0 deletions config/runtime_policy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
from __future__ import annotations

import ipaddress
import os
from dataclasses import dataclass
from typing import Any
from urllib.parse import urlparse


VALID_NETWORK_MODES = {"online", "offline"}
VALID_RESOURCE_TIERS = {"light", "standard", "full"}
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

定义合法值枚举类,校验的时候会用到。 如果资源配置需要改,这里也要改掉。



@dataclass(frozen=True)
class RuntimePolicy:
network_mode: str = "online"
resource_tier: str = "light"

@property
def online(self) -> bool:
return self.network_mode == "online"

@property
def offline(self) -> bool:
return self.network_mode == "offline"

@property
def light_resource_tier(self) -> bool:
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

根据resource_tier自动判断资源档位。可以直接读这些 property,或者读policy.resource_tier。
如果后续资源配置改了,这里也跟着改一下。

return self.resource_tier == "light"

@property
def standard_resource_tier(self) -> bool:
return self.resource_tier == "standard"

@property
def full_resource_tier(self) -> bool:
return self.resource_tier == "full"

@property
def model_policy(self) -> str:
return "local_only" if self.offline else "allow_download"

@property
def strict_preflight(self) -> bool:
return self.offline


@dataclass(frozen=True)
class PreflightIssue:
level: str
code: str
message: str
checker_name: str | None = None
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

用来统一表示preflight问题。 后面做network_mode / resource_tier相关的校验时安札个结构返回。


def format(self) -> str:
prefix = f"[{self.code}]"
if self.checker_name:
prefix += f" {self.checker_name}:"
return f"{prefix} {self.message}"


def build_runtime_policy(config: Any) -> RuntimePolicy:
deployment = _get_section(config, "deployment")
network_mode = str(_get_value(deployment, "network_mode", "online") or "online").strip().lower()
resource_tier = str(_get_value(deployment, "resource_tier", "light") or "light").strip().lower()
return RuntimePolicy(network_mode=network_mode, resource_tier=resource_tier)


def apply_runtime_environment(policy: RuntimePolicy) -> None:
if not policy.offline:
return
os.environ.setdefault("HF_HUB_OFFLINE", "1")
os.environ.setdefault("TRANSFORMERS_OFFLINE", "1")
os.environ.setdefault("HF_DATASETS_OFFLINE", "1")
os.environ.setdefault("DATAELF_OFFLINE_MODE", "1")


def run_global_preflight(config: Any, policy: RuntimePolicy) -> list[PreflightIssue]:
issues: list[PreflightIssue] = []

if policy.network_mode not in VALID_NETWORK_MODES:
issues.append(PreflightIssue(
level="error",
code="invalid_network_mode",
message=(
f"deployment.network_mode must be one of {sorted(VALID_NETWORK_MODES)}, "
f"got {policy.network_mode!r}."
),
))

if policy.resource_tier not in VALID_RESOURCE_TIERS:
issues.append(PreflightIssue(
level="error",
code="invalid_resource_tier",
message=(
f"deployment.resource_tier must be one of {sorted(VALID_RESOURCE_TIERS)}, "
f"got {policy.resource_tier!r}."
),
))

if not policy.offline:
# TODO: (network_mode) Add best-effort online dependency checks here as tools
# expose cheap, non-network preflight hooks.
return issues


# TODO: (network_mode) Re-enable strict offline endpoint checks after local
# LLM deployment is ready. Development still uses an external relay LLM.
# issues.extend(_validate_offline_llm_endpoint(config, "agent", required=_agent_requires_llm(config)))
# issues.extend(_validate_offline_llm_endpoint(config, "tool_llm", required=_tool_llm_configured(config)))
return issues


def handle_preflight_issues(
issues: list[PreflightIssue],
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

统一处理preflight结果

*,
strict: bool,
logger: Any | None = None,
) -> None:
if not issues:
return

errors = [issue for issue in issues if issue.level == "error"]
warnings = [issue for issue in issues if issue.level != "error"]

for issue in warnings:
if logger is not None and hasattr(logger, "warning"):
logger.warning(f"Preflight warning: {issue.format()}")

blocking = errors + (warnings if strict else [])
if blocking:
message = "Preflight failed:\n" + "\n".join(f" - {issue.format()}" for issue in blocking)
raise RuntimeError(message)


def _validate_offline_llm_endpoint(
config: Any,
section_name: str,
*,
required: bool,
) -> list[PreflightIssue]:
section = _get_section(config, section_name)
base_url = _get_value(section, "base_url", None)
if not base_url:
if not required:
return []
return [PreflightIssue(
level="error",
code="offline_missing_llm_endpoint",
message=(
f"deployment.network_mode=offline requires {section_name}.base_url "
"to point to a local or intranet OpenAI-compatible service."
),
)]

if _is_public_ip_endpoint(str(base_url)):
return [PreflightIssue(
level="error",
code="offline_public_llm_endpoint",
message=(
f"deployment.network_mode=offline cannot use public endpoint "
f"{section_name}.base_url={base_url!r}."
),
)]
# TODO: (network_mode) Add optional endpoint allowlisting once the first
# deployment config needs approved intranet hostnames beyond private IPs.
return []


def _agent_requires_llm(config: Any) -> bool:
agent = _get_section(config, "agent")
return _get_value(agent, "type", "opencode") == "opencode"


def _tool_llm_configured(config: Any) -> bool:
tool_llm = _get_section(config, "tool_llm")
return bool(_get_value(tool_llm, "model", None) or _get_value(tool_llm, "base_url", None))


def _is_public_ip_endpoint(url: str) -> bool:
parsed = urlparse(url)
host = parsed.hostname
if not host:
return False
if host in {"localhost", "127.0.0.1", "::1"}:
return False
try:
ip = ipaddress.ip_address(host)
except ValueError:
# Hostnames may be intranet DNS names; keep the first version flexible.
return False
return not (ip.is_private or ip.is_loopback or ip.is_link_local)


def _get_section(config: Any, name: str) -> Any:
if isinstance(config, dict):
return config.get(name, {})
return getattr(config, name, {})


def _get_value(section: Any, name: str, default: Any = None) -> Any:
if isinstance(section, dict):
return section.get(name, default)
return getattr(section, name, default)
2 changes: 1 addition & 1 deletion runtime/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,7 +382,7 @@ def execute(self, job_id: str, pipeline: str) -> dict[str, Any]:
# Save pipeline to file for reference
pipeline_dir = Path("pipelines")
pipeline_dir.mkdir(exist_ok=True)
pipeline_file = pipeline_dir / f"{job_id}.py"
pipeline_file = pipeline_dir / f"{job_id}.dsl"
with open(pipeline_file, "w") as f:
f.write(pipeline)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ def __init__(
min_batch_size: int = _MIN_BATCH_SIZE,
dct_keep_ratio: float = 0.125,
device: str = "auto",
local_files_only: bool = False,
**kwargs,
):
"""
Expand All @@ -45,6 +46,7 @@ def __init__(
min_batch_size: Minimum samples required for clustering.
dct_keep_ratio: Keep top-left ratio for DCT (default 1/8).
device: "auto" | "cuda" | "cpu".
local_files_only: only load local model files; used by offline mode.
"""
super().__init__(**kwargs)
self.victim_config = victim_config or {}
Expand All @@ -54,6 +56,7 @@ def __init__(
self.min_batch_size = min_batch_size
self.dct_keep_ratio = dct_keep_ratio
self.device = device
self.local_files_only = local_files_only
self._victim: Optional[CasualLLMVictim] = None

def check(self, sample: DataSample) -> CheckResult:
Expand Down Expand Up @@ -142,6 +145,7 @@ def _load_victim(self) -> CasualLLMVictim:
config["device"] = "gpu"
elif self.device == "cpu":
config["device"] = "cpu"
config["local_files_only"] = self.local_files_only
self._victim = load_victim(config)
return self._victim

Expand Down
7 changes: 5 additions & 2 deletions tools/security_audit/checker/heuristic/graceful_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ def __init__(
model: Optional[str] = "llama",
path: Optional[str] = None,
max_len: Optional[int] = 4096,
local_files_only: bool = False,
**kwargs,
):
if not path:
Expand All @@ -67,16 +68,18 @@ def __init__(
"cuda" if device in {"gpu", "cuda"} and torch.cuda.is_available() else "cpu"
)
self.model_type = model
self.model_config = AutoConfig.from_pretrained(path)
self.local_files_only = local_files_only
self.model_config = AutoConfig.from_pretrained(path, local_files_only=local_files_only)
self.llm = AutoModelForCausalLM.from_pretrained(
path,
config=self.model_config,
trust_remote_code=True,
device_map="auto" if self.device.type == "cuda" else None,
local_files_only=local_files_only,
)
if self.device.type != "cuda":
self.llm = self.llm.to(self.device)
self.tokenizer = AutoTokenizer.from_pretrained(path)
self.tokenizer = AutoTokenizer.from_pretrained(path, local_files_only=local_files_only)
if self.model_type == "llama":
pad_token = self.tokenizer.unk_token or self.tokenizer.eos_token
self.llm.config.pad_token_id = self.tokenizer.convert_tokens_to_ids(pad_token)
Expand Down
Loading