diff --git a/apipod/cli.py b/apipod/cli.py index b2fecaf..936d1ca 100644 --- a/apipod/cli.py +++ b/apipod/cli.py @@ -24,9 +24,12 @@ def input_yes_no(question: str, default: bool = True) -> bool: def select_base_image(manager: DeploymentManager, config_data: dict) -> str: """Interactive base image selection process.""" recommended_image = manager.recommend_image(config_data) - print(f"Detected configuration: Python {config_data.get('python_version')}, " - f"PyTorch: {config_data.get('pytorch')}, TensorFlow: {config_data.get('tensorflow')}, " - f"ONNX: {config_data.get('onnx')}") + print( + f"Detected configuration: profile={config_data.get('profile')}, " + f"Python {config_data.get('python_version')}, " + f"PyTorch: {config_data.get('pytorch')}, TensorFlow: {config_data.get('tensorflow')}, " + f"ONNX: {config_data.get('onnx')}" + ) print(f"Recommended Base Image: {recommended_image}") if input_yes_no("Is this correct?"): diff --git a/apipod/deploy/detectors/IDetector.py b/apipod/deploy/detectors/IDetector.py index 7e3b372..16a3680 100644 --- a/apipod/deploy/detectors/IDetector.py +++ b/apipod/deploy/detectors/IDetector.py @@ -24,7 +24,8 @@ def should_ignore(self, path: str) -> bool: '__pycache__', '.git', '.svn', '.hg', '.DS_Store', 'node_modules', 'venv', 'env', '.env', '.venv', 'build', 'dist', '.pytest_cache', '.mypy_cache', - '.tox', '.coverage', 'htmlcov', '.eggs', '*.egg-info' + '.tox', '.coverage', 'htmlcov', '.eggs', '*.egg-info', + 'apipod-deploy', } # Check if any part of the path matches ignore patterns diff --git a/apipod/deploy/detectors/entrypoint.py b/apipod/deploy/detectors/entrypoint.py index 712afb7..bec4823 100644 --- a/apipod/deploy/detectors/entrypoint.py +++ b/apipod/deploy/detectors/entrypoint.py @@ -11,8 +11,11 @@ def detect(self, target_file: Optional[str] | None = None) -> Dict[str, Any]: result = { "file": None, - "title": "apipod-service", # Default - "found_config": False + "title": "apipod-service", + "found_config": False, + "orchestrator": "local", + "compute": "dedicated", + "provider": "localhost", } # 1. Prioritize user-provided target file @@ -85,17 +88,30 @@ def _scan_file_for_title(self, file_path: str, result: Dict[str, Any]): for node in ast.walk(tree): if isinstance(node, ast.Call): if isinstance(node.func, ast.Name) and node.func.id == "APIPod": + result["found_config"] = True for keyword in node.keywords: + value = self._ast_constant(keyword.value) + if value is None: + continue if keyword.arg == "title": - if isinstance(keyword.value, ast.Constant): # Python 3.8+ - result["title"] = keyword.value.value - result["found_config"] = True - elif isinstance(keyword.value, ast.Str): # Python < 3.8 - result["title"] = keyword.value.s - result["found_config"] = True + result["title"] = value + elif keyword.arg == "orchestrator": + result["orchestrator"] = value + elif keyword.arg == "compute": + result["compute"] = value + elif keyword.arg == "provider": + result["provider"] = value except Exception: pass + @staticmethod + def _ast_constant(node) -> Optional[str]: + if isinstance(node, ast.Constant) and isinstance(node.value, str): + return node.value + if isinstance(node, ast.Str): + return node.s + return None + def _scan_file_for_indicators(self, file_path: str, result: Dict[str, Any]) -> bool: try: with open(file_path, "r", encoding="utf-8") as f: diff --git a/apipod/deploy/detectors/framework.py b/apipod/deploy/detectors/framework.py index 94559e6..3b0a584 100644 --- a/apipod/deploy/detectors/framework.py +++ b/apipod/deploy/detectors/framework.py @@ -1,15 +1,25 @@ +import ast +import json import os import re import toml -import json -from typing import Dict, Any, List +from typing import Any, Dict, List, Optional, Set + +from apipod.deploy.profile import ( + DIFFUSERS_PACKAGES, + ONNX_PACKAGES, + PYTORCH_PACKAGES, + TENSORFLOW_PACKAGES, + TRANSFORMERS_PACKAGES, +) from .IDetector import Detector class FrameworkDetector(Detector): - def detect(self) -> Dict[str, Any]: + def detect(self, entrypoint: Optional[str] = None) -> Dict[str, Any]: print("Scanning for frameworks and models...") - config = { + self._detected_python_version = None + config: Dict[str, Any] = { "pytorch": False, "tensorflow": False, "onnx": False, @@ -17,169 +27,187 @@ def detect(self) -> Dict[str, Any]: "diffusers": False, "cuda": False, "python_version": "3.10", - "model_files": [] + "model_files": [], + "python_dependencies": [], + "entrypoint_imports": [], } - # 1. Check Dependencies (pyproject.toml / requirements.txt) - self._check_dependencies(config) + dep_names = self._gather_dependency_names() + config["python_dependencies"] = sorted(dep_names) + self._apply_dependency_packages(dep_names, config) - # 2. Check Imports if not detected via deps - if not (config["pytorch"] or config["tensorflow"] or config["onnx"] or config["transformers"] or config["diffusers"]): - self._check_imports(config) + entrypoint_imports = self._check_entrypoint_imports(entrypoint) + config["entrypoint_imports"] = sorted(entrypoint_imports) + self._apply_entrypoint_imports(entrypoint_imports, config) - # 3. Scan for model files self._scan_model_files(config) - return config - def _check_dependencies(self, config: Dict[str, Any]): - dependencies = [] + @staticmethod + def _has_any_framework(config: Dict[str, Any]) -> bool: + return any( + config[key] + for key in ("pytorch", "tensorflow", "onnx", "transformers", "diffusers") + ) - # Check pyproject.toml + def _gather_dependency_names(self) -> Set[str]: + names: Set[str] = set() pyproject_path = os.path.join(self.project_root, "pyproject.toml") if os.path.exists(pyproject_path): try: data = toml.load(pyproject_path) - if "project" in data and "dependencies" in data["project"]: - dependencies.extend(data["project"]["dependencies"]) - if "tool" in data and "poetry" in data["tool"] and "dependencies" in data["tool"]["poetry"]: - dependencies.extend(data["tool"]["poetry"]["dependencies"].keys()) - - if "project" in data and "requires-python" in data["project"]: - ver = data["project"]["requires-python"] - match = re.search(r'3\.(\d+)', ver) - if match: - config["python_version"] = f"3.{match.group(1)}" - except Exception as e: - print(f"Warning: Error parsing pyproject.toml: {e}") - - # Check requirements.txt + project = data.get("project", {}) + for dep in project.get("dependencies", []): + names.add(self._extract_package_name(dep)) + ver = project.get("requires-python", "") + match = re.search(r"3\.(\d+)", ver) + if match: + self._detected_python_version = f"3.{match.group(1)}" + from apipod.deploy.profile import POETRY_NON_PACKAGE_KEYS + + poetry_deps = data.get("tool", {}).get("poetry", {}).get("dependencies", {}) + for dep in poetry_deps.keys(): + key = dep.lower() + if key not in POETRY_NON_PACKAGE_KEYS: + names.add(key) + except Exception as exc: + print(f"Warning: Error parsing pyproject.toml: {exc}") + requirements_path = os.path.join(self.project_root, "requirements.txt") if os.path.exists(requirements_path): try: - with open(requirements_path, "r") as f: - dependencies.extend(f.readlines()) - except Exception as e: - print(f"Warning: Error parsing requirements.txt: {e}") - - self._parse_dependencies(dependencies, config) - - def _parse_dependencies(self, dependencies: List[str], config: Dict[str, Any]): - for dep in dependencies: - dep_lower = dep.lower() - - if "torch" in dep_lower: - config["pytorch"] = self._resolve_version(dep) - if "cu1" in dep_lower or "cuda" in dep_lower: + with open(requirements_path, "r", encoding="utf-8") as handle: + for line in handle: + line = line.strip() + if line and not line.startswith("#"): + names.add(self._extract_package_name(line)) + except Exception as exc: + print(f"Warning: Error parsing requirements.txt: {exc}") + + return names + + _detected_python_version = None + + def _apply_dependency_packages(self, dep_names: Set[str], config: Dict[str, Any]) -> None: + if self._detected_python_version: + config["python_version"] = self._detected_python_version + + for name in dep_names: + if name in PYTORCH_PACKAGES: + config["pytorch"] = True + if "cuda" in name or name.endswith("-gpu"): config["cuda"] = True - if "tensorflow" in dep_lower: - config["tensorflow"] = self._resolve_version(dep) - if "onnx" in dep_lower: - config["onnx"] = self._resolve_version(dep) - if "transformers" in dep_lower: - config["transformers"] = self._resolve_version(dep) - if "diffusers" in dep_lower: - config["diffusers"] = self._resolve_version(dep) - - def _check_imports(self, config: Dict[str, Any]): - for root, _, files in os.walk(self.project_root): - if self.should_ignore(root): - continue - for file in files: - if file.endswith(".py"): - try: - with open(os.path.join(root, file), "r", encoding="utf-8") as f: - content = f.read() - if "torch" in content: - config["pytorch"] = True - if "tensorflow" in content: - config["tensorflow"] = True - if "onnx" in content: - config["onnx"] = True - if "transformers" in content: - config["transformers"] = True - if "diffusers" in content: - config["diffusers"] = True - except Exception: - pass - if any([config["pytorch"], config["tensorflow"], config["onnx"], config["transformers"], config["diffusers"]]): - break - - def _scan_model_files(self, config: Dict[str, Any]): - # Extensions commonly associated with model weights + if name in TENSORFLOW_PACKAGES: + config["tensorflow"] = True + if name in ONNX_PACKAGES: + config["onnx"] = True + if name in TRANSFORMERS_PACKAGES: + config["transformers"] = True + if name in DIFFUSERS_PACKAGES: + config["diffusers"] = True + + for name in dep_names: + if name == "torch": + config["pytorch"] = True + lowered = name + if "cu" in lowered and "torch" in lowered: + config["cuda"] = True + + @staticmethod + def _extract_package_name(dependency: str) -> str: + dependency = dependency.split("#", 1)[0].strip() + name = re.split(r"[\s=<>!~;\[]", dependency, maxsplit=1)[0].strip() + return name.lower().replace("_", "-") + + def _check_entrypoint_imports(self, entrypoint: Optional[str]) -> Set[str]: + """Inspect only the service entrypoint for ML imports (not the whole repo).""" + if not entrypoint: + return set() + + file_path = os.path.join(self.project_root, entrypoint) + if not os.path.isfile(file_path): + return set() + + top_level: Set[str] = set() + try: + with open(file_path, "r", encoding="utf-8") as handle: + tree = ast.parse(handle.read(), filename=entrypoint) + except Exception: + return top_level + + for node in ast.walk(tree): + if isinstance(node, ast.Import): + for alias in node.names: + top_level.add(alias.name.split(".", 1)[0]) + elif isinstance(node, ast.ImportFrom) and node.module: + top_level.add(node.module.split(".", 1)[0]) + return top_level + + def _apply_entrypoint_imports(self, entrypoint_imports: Set[str], config: Dict[str, Any]) -> None: + mapping = { + "pytorch": {"torch", "torchvision", "torchaudio"}, + "tensorflow": {"tensorflow", "keras"}, + "onnx": {"onnx", "onnxruntime"}, + "transformers": {"transformers"}, + "diffusers": {"diffusers"}, + } + for key, modules in mapping.items(): + if entrypoint_imports & modules: + config[key] = True + + def _scan_model_files(self, config: Dict[str, Any]) -> None: extensions = {".pt", ".pth", ".onnx", ".h5", ".safetensors", ".bin", ".gguf"} - found_files = [] + found_files: List[str] = [] for root, _, files in os.walk(self.project_root): if self.should_ignore(root): continue - for file in files: file_path = os.path.join(root, file) _, ext = os.path.splitext(file) ext = ext.lower() - if ext in extensions: found_files.append(os.path.relpath(file_path, self.project_root)) - elif ext == ".json": - if self._is_model_json(file_path): - found_files.append(os.path.relpath(file_path, self.project_root)) + elif ext == ".json" and self._is_model_json(file_path): + found_files.append(os.path.relpath(file_path, self.project_root)) config["model_files"] = found_files def _is_model_json(self, file_path: str) -> bool: - """ - Heuristic to determine if a JSON file is a model configuration or tokenizer file. - """ filename = os.path.basename(file_path).lower() - # Known model json files - if filename in ["config.json", "tokenizer.json", "tokenizer_config.json", "generation_config.json", "adapter_config.json"]: + if filename in { + "config.json", + "tokenizer.json", + "tokenizer_config.json", + "generation_config.json", + "adapter_config.json", + }: return True - - # Ignore standard project files - if filename in ["package.json", "tsconfig.json", "apipod.json", "pyproject.json", "launch.json", "settings.json"]: + if filename in { + "package.json", + "tsconfig.json", + "apipod.json", + "pyproject.json", + "launch.json", + "settings.json", + }: return False - - # Inspect content for keys common in model configs try: - # Only read the first 4KB to be safe/fast - with open(file_path, "r", encoding="utf-8") as f: - # We read a chunk, but for JSON we need valid syntax. - # If the file is huge, it's probably not a config. - # But if it's model weights stored as JSON (rare), we might want it? - # Generally JSON weights are split or not just pure JSON. - # Let's try to load it if it's small enough (< 1MB) - if os.path.getsize(file_path) > 1024 * 1024: - return False - - content = json.load(f) - if isinstance(content, dict): - keys = content.keys() - # Common HF config keys - model_keys = {"architectures", "model_type", "vocab_size", "hidden_size", "layer_norm_epsilon"} - if any(k in keys for k in model_keys): - return True - # Common Tokenizer keys - if "version" in keys and "truncation" in keys: - return True + if os.path.getsize(file_path) > 1024 * 1024: + return False + with open(file_path, "r", encoding="utf-8") as handle: + content = json.load(handle) + if isinstance(content, dict): + keys = content.keys() + model_keys = { + "architectures", + "model_type", + "vocab_size", + "hidden_size", + "layer_norm_epsilon", + } + if any(key in keys for key in model_keys): + return True except Exception: pass - return False - - def _resolve_version(self, dependency: str) -> str: - # Simple extraction logic reusing previous concepts - if "=" in dependency and not any(op in dependency for op in [">=", "<=", "!=", "==", "~=", ">", "<"]): - # TOML table or simple assignment - match = re.search(r'["\']([^"\']+)["\']', dependency) - return match.group(1) if match else "latest" - - version_operators = ["==", ">=", "<=", "!=", "~=", ">", "<"] - for op in version_operators: - if op in dependency: - parts = dependency.split(op, 1) - if len(parts) == 2: - # Clean up version string - version = re.split(r'[;\s]', parts[1].strip())[0].strip().strip('"\'') - return version - return "latest" diff --git a/apipod/deploy/docker_factory.py b/apipod/deploy/docker_factory.py index 8e4dfda..6f41cf4 100644 --- a/apipod/deploy/docker_factory.py +++ b/apipod/deploy/docker_factory.py @@ -3,16 +3,25 @@ from typing import Any, Dict, List, Optional from jinja2 import Environment, FileSystemLoader +from apipod.deploy.profile import ( + PROFILE_ML_GPU, + PROFILE_SERVERLESS_MINIMAL, + recommend_base_image, +) + + class DockerFactory: """ Encapsulates all Docker-related operations. """ - # Updated to use more robust base images DEFAULT_IMAGES = [ + "python:3.12-slim", + "python:3.11-slim", "python:3.10-slim", - "nvidia/cuda:12.1.1-cudnn8-runtime-ubuntu22.04", # Standard CUDA Runtime + "ghcr.io/astral-sh/uv:python3.12-bookworm-slim", "runpod/pytorch:2.4.0-py3.11-cuda12.4.1-devel-ubuntu22.04", + "nvidia/cuda:12.1.1-cudnn8-runtime-ubuntu22.04", ] def __init__( @@ -29,6 +38,7 @@ def __init__( loader=FileSystemLoader(str(self.template_dir)) ) self.docker_template = self.template_env.get_template("docker_template.j2") + self.minimal_template = self.template_env.get_template("docker_template_minimal.j2") self.images = self._load_images(self.template_dir / "docker_images.txt") def _load_images(self, images_path: Path) -> List[str]: @@ -37,54 +47,48 @@ def _load_images(self, images_path: Path) -> List[str]: with images_path.open("r", encoding="utf-8") as f: images = [line.strip() for line in f if line.strip()] if images: - return images + preferred = [ + "ghcr.io/astral-sh/uv:python3.12-bookworm-slim", + "python:3.12-slim", + ] + merged = preferred + [img for img in images if img not in preferred] + return merged except Exception: pass return self.DEFAULT_IMAGES.copy() def recommend_image(self, config: Dict[str, Any]) -> str: - """ - Optimized image recommendation logic. - """ - has_cuda = config.get("cuda", False) - has_pytorch = config.get("pytorch", False) - - # If CUDA is needed, NEVER use python-slim - if has_cuda or has_pytorch or config.get("tensorflow") or config.get("onnx"): - # Priority 1: RunPod specialized PyTorch image - if has_pytorch: - for img in self.images: - if "runpod/pytorch" in img: - return img - return "runpod/pytorch:2.4.0-py3.11-cuda12.4.1-devel-ubuntu22.04" - - # Priority 2: Standard NVIDIA CUDA image (solves the 'locate package' error) - return "nvidia/cuda:12.1.1-cudnn8-runtime-ubuntu22.04" - - # Fallback to standard Python for CPU tasks - python_version = str(config.get("python_version") or "3.10") - for img in self.images: - if f"python:{python_version}-slim" in img: - return img - - return "python:3.10-slim" + profile = config.get("profile", PROFILE_ML_GPU) + suggested = recommend_base_image(profile, config.get("python_version", "3.12"), config) + if suggested in self.images: + return suggested + if profile == PROFILE_SERVERLESS_MINIMAL: + version = str(config.get("python_version") or "3.12") + for img in self.images: + if f"python{version}" in img and "astral-sh/uv" in img: + return img + if f"python:{version}-slim" in img: + return img + return suggested def render_dockerfile(self, base_image: str, config: Dict[str, Any]) -> str: + profile = config.get("profile", PROFILE_ML_GPU) + if profile == PROFILE_SERVERLESS_MINIMAL: + return self._render_minimal(base_image, config) + has_requirements = (self.project_root / "requirements.txt").exists() entrypoint = config.get("entrypoint", "main.py") entrypoint_module = ( Path(entrypoint).with_suffix("").as_posix().replace("/", ".").replace("\\", ".") ) - # FIX: Only attempt to install cuDNN if we are NOT using an NVIDIA base image. - # NVIDIA images already have cuDNN, and their apt repos work. - # Debian/Python-slim images will fail to find these packages. is_nvidia_base = "nvidia/cuda" in base_image or "runpod/" in base_image - - should_install_cudnn = ( - bool(config.get("tensorflow") or config.get("onnx")) - and not is_nvidia_base + needs_ml_system_libs = bool( + config.get("pytorch") + or config.get("tensorflow") + or config.get("onnx") ) + should_install_cudnn = needs_ml_system_libs and not is_nvidia_base context = { "base_image": base_image, @@ -92,9 +96,46 @@ def render_dockerfile(self, base_image: str, config: Dict[str, Any]) -> str: "entrypoint_module": entrypoint_module, "install_cudnn": should_install_cudnn, "system_packages": config.get("system_packages", []), + "orchestrator": config.get("orchestrator", "local"), + "compute": config.get("compute", "dedicated"), + "provider": config.get("provider", "localhost"), } return self.docker_template.render(**context) + def _render_minimal(self, base_image: str, config: Dict[str, Any]) -> str: + root = self.project_root + has_uv_lock = (root / "uv.lock").is_file() + has_pyproject = (root / "pyproject.toml").is_file() + has_requirements = (root / "requirements.txt").is_file() + install_project = has_pyproject and self._pyproject_defines_package(root / "pyproject.toml") + + entrypoint = config.get("entrypoint", "main.py") + context = { + "base_image": base_image, + "has_uv_lock": has_uv_lock, + "has_pyproject": has_pyproject, + "has_requirements": has_requirements, + "install_project": install_project, + "entrypoint_script": Path(entrypoint).name, + "orchestrator": config.get("orchestrator", "local"), + "compute": config.get("compute", "serverless"), + "provider": config.get("provider", "runpod"), + } + return self.minimal_template.render(**context) + + @staticmethod + def _pyproject_defines_package(pyproject_path: Path) -> bool: + try: + import toml + + data = toml.load(pyproject_path) + project = data.get("project", {}) + if project.get("name"): + return True + except Exception: + pass + return False + def write_dockerfile(self, content: str, dockerfile_path: Path) -> Path: self.deploy_dir.mkdir(parents=True, exist_ok=True) dockerfile_path = Path(dockerfile_path) @@ -102,8 +143,29 @@ def write_dockerfile(self, content: str, dockerfile_path: Path) -> Path: print(f"Dockerfile created at {dockerfile_path}") return dockerfile_path + def write_project_dockerignore(self) -> Path: + path = self.project_root / ".dockerignore" + lines = [ + "apipod-deploy/", + ".git/", + ".venv/", + "venv/", + "__pycache__/", + "*.pyc", + ".pytest_cache/", + ".mypy_cache/", + "dist/", + "build/", + "*.egg-info/", + "", + ] + if not path.exists(): + path.write_text("\n".join(lines), encoding="utf-8") + print(f"Created {path}") + return path + def build_image(self, tag: str, dockerfile_path: Path, context_dir: Path) -> bool: - # Using context_dir as the build context (project root) + self.write_project_dockerignore() cmd = ["docker", "build", "-t", tag, "-f", str(dockerfile_path), str(Path(context_dir))] print(f"Running: {' '.join(cmd)}") try: @@ -114,4 +176,4 @@ def build_image(self, tag: str, dockerfile_path: Path, context_dir: Path) -> boo print("Error: 'docker' command not found.") except subprocess.CalledProcessError: print("Docker build failed.") - return False \ No newline at end of file + return False diff --git a/apipod/deploy/docker_template_minimal.j2 b/apipod/deploy/docker_template_minimal.j2 new file mode 100644 index 0000000..270b8e8 --- /dev/null +++ b/apipod/deploy/docker_template_minimal.j2 @@ -0,0 +1,33 @@ +FROM {{ base_image }} + +WORKDIR /app + +ENV DEBIAN_FRONTEND=noninteractive +ENV APIPOD_ORCHESTRATOR="{{ orchestrator | default('local') }}" +ENV APIPOD_COMPUTE="{{ compute | default('serverless') }}" +ENV APIPOD_PROVIDER="{{ provider | default('runpod') }}" +ENV APIPOD_HOST=0.0.0.0 +ENV APIPOD_PORT=8000 + +EXPOSE 8000 + +{%- if has_uv_lock %} +COPY pyproject.toml uv.lock ./ +RUN uv sync --frozen --no-dev +{%- elif has_pyproject %} +COPY pyproject.toml ./ +RUN uv sync --frozen --no-dev +{%- elif has_requirements %} +COPY requirements.txt ./ +RUN uv pip install --system --no-cache -r requirements.txt +{%- endif %} + +COPY . . + +{%- if has_pyproject and install_project %} +RUN uv pip install --system --no-cache . +{%- elif not has_uv_lock and not has_requirements and not has_pyproject %} +RUN uv pip install --system --no-cache apipod runpod>=1.7.7 +{%- endif %} + +CMD ["uv", "run", "python", "{{ entrypoint_script }}", "--rp_api_host", "0.0.0.0", "--rp_api_port", "8000"] diff --git a/apipod/deploy/profile.py b/apipod/deploy/profile.py new file mode 100644 index 0000000..f6fc6a8 --- /dev/null +++ b/apipod/deploy/profile.py @@ -0,0 +1,178 @@ +"""Deployment profiles for scan/build (CPU minimal vs ML/GPU).""" + +from __future__ import annotations + +from typing import Any, Dict, FrozenSet, Optional, Set + +PROFILE_SERVERLESS_MINIMAL = "serverless-minimal" +PROFILE_WEB_API = "web-api" +PROFILE_ML_GPU = "ml-gpu" + +PYTORCH_PACKAGES: Set[str] = { + "torch", + "torchvision", + "torchaudio", + "pytorch-lightning", + "lightning", + "accelerate", + "bitsandbytes", + "xformers", + "pytorch3d", +} + +TENSORFLOW_PACKAGES: Set[str] = { + "tensorflow", + "tensorflow-gpu", + "tf-nightly", + "keras", +} + +ONNX_PACKAGES: Set[str] = { + "onnx", + "onnxruntime", + "onnxruntime-gpu", + "onnxmltools", +} + +TRANSFORMERS_PACKAGES: Set[str] = { + "transformers", + "sentence-transformers", + "optimum", +} + +DIFFUSERS_PACKAGES: Set[str] = { + "diffusers", +} + +ML_DIRECT_PACKAGES: FrozenSet[str] = ( + PYTORCH_PACKAGES + | TENSORFLOW_PACKAGES + | ONNX_PACKAGES + | TRANSFORMERS_PACKAGES + | DIFFUSERS_PACKAGES +) + +POETRY_NON_PACKAGE_KEYS: FrozenSet[str] = frozenset( + {"python", "pip", "setuptools", "wheel"} +) + +LIGHTWEIGHT_PACKAGES: Set[str] = { + "apipod", + "runpod", + "uvicorn", + "fastapi", + "httpx", + "pydantic", + "python-multipart", + "starlette", + "requests", + "click", + "tqdm", + "singleton-decorator", + "fastsdk", + "apipod-registry", + "media-toolkit", +} + + +def direct_ml_dependencies(python_deps: Set[str]) -> Set[str]: + """Declared dependencies that are ML frameworks (exact package names only).""" + return {name for name in python_deps if name in ML_DIRECT_PACKAGES} + + +_ENTRYPOINT_TORCH = frozenset({"torch", "torchvision", "torchaudio"}) +_ENTRYPOINT_TF = frozenset({"tensorflow", "keras"}) +_ENTRYPOINT_ONNX = frozenset({"onnx", "onnxruntime"}) +_ENTRYPOINT_TRANSFORMERS = frozenset({"transformers"}) +_ENTRYPOINT_DIFFUSERS = frozenset({"diffusers"}) + + +def reconcile_framework_flags( + *, + python_deps: Set[str], + entrypoint_imports: Set[str], + model_files: list, +) -> Dict[str, bool]: + """ + Framework flags are true only for direct ML dependencies, entrypoint imports, + or on-disk model artifacts — not from scanning the whole repository tree. + """ + direct_ml = direct_ml_dependencies(python_deps) + has_weights = bool(model_files) + + pytorch = bool(direct_ml & PYTORCH_PACKAGES) or bool( + entrypoint_imports & _ENTRYPOINT_TORCH + ) or has_weights + tensorflow = bool(direct_ml & TENSORFLOW_PACKAGES) or bool( + entrypoint_imports & _ENTRYPOINT_TF + ) + onnx = bool(direct_ml & ONNX_PACKAGES) or bool(entrypoint_imports & _ENTRYPOINT_ONNX) + transformers = bool(direct_ml & TRANSFORMERS_PACKAGES) or bool( + entrypoint_imports & _ENTRYPOINT_TRANSFORMERS + ) + diffusers = bool(direct_ml & DIFFUSERS_PACKAGES) or bool( + entrypoint_imports & _ENTRYPOINT_DIFFUSERS + ) + cuda = any("cuda" in name or name.endswith("-gpu") for name in direct_ml) + + return { + "pytorch": pytorch, + "tensorflow": tensorflow, + "onnx": onnx, + "transformers": transformers, + "diffusers": diffusers, + "cuda": cuda, + } + + +def infer_profile( + *, + pytorch: bool, + tensorflow: bool, + onnx: bool, + transformers: bool, + diffusers: bool, + cuda: bool, + compute: Optional[str], + provider: Optional[str], + python_deps: Set[str], + model_files: Optional[list] = None, +) -> str: + """Choose a deployment profile from scan results.""" + has_ml = any([pytorch, tensorflow, onnx, transformers, diffusers, cuda]) + model_files = model_files or [] + direct_ml = direct_ml_dependencies(python_deps) + + if compute == "serverless" and provider == "runpod": + if not has_ml and not model_files and not direct_ml: + return PROFILE_SERVERLESS_MINIMAL + if has_ml or model_files or direct_ml: + return PROFILE_ML_GPU + return PROFILE_SERVERLESS_MINIMAL + + if compute == "serverless" or provider == "runpod": + if not has_ml: + return PROFILE_SERVERLESS_MINIMAL + return PROFILE_ML_GPU + + if has_ml: + return PROFILE_ML_GPU + + if "uvicorn" in python_deps or "fastapi" in python_deps: + return PROFILE_WEB_API + + if python_deps and python_deps.issubset(LIGHTWEIGHT_PACKAGES): + return PROFILE_SERVERLESS_MINIMAL + + return PROFILE_WEB_API + + +def recommend_base_image(profile: str, python_version: str, config: Dict[str, Any]) -> str: + version = str(python_version or "3.12") + if profile == PROFILE_SERVERLESS_MINIMAL: + return f"ghcr.io/astral-sh/uv:python{version}-bookworm-slim" + if profile == PROFILE_ML_GPU: + if config.get("pytorch"): + return "runpod/pytorch:2.4.0-py3.11-cuda12.4.1-devel-ubuntu22.04" + return "nvidia/cuda:12.1.1-cudnn8-runtime-ubuntu22.04" + return f"python:{version}-slim" diff --git a/apipod/deploy/scanner.py b/apipod/deploy/scanner.py index 5dbd7fc..db25991 100644 --- a/apipod/deploy/scanner.py +++ b/apipod/deploy/scanner.py @@ -1,7 +1,8 @@ import json +import shutil from dataclasses import asdict, dataclass, field from pathlib import Path -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List, Optional, Set from apipod.deploy.detectors import ( DependencyDetector, @@ -9,13 +10,18 @@ EntrypointDetector, FrameworkDetector, ) +from apipod.deploy.profile import infer_profile, reconcile_framework_flags @dataclass class DeploymentConfig: entrypoint: str = "main.py" title: str = "apipod-service" + profile: str = "web-api" python_version: str = "3.10" + orchestrator: str = "local" + compute: str = "dedicated" + provider: str = "localhost" pytorch: bool = False tensorflow: bool = False onnx: bool = False @@ -38,10 +44,10 @@ class Scanner: def __init__(self, root_path: Path, config_path: Path): self.root_path = Path(root_path).resolve() self.config_path = Path(config_path) - self.entrypoint_detector = EntrypointDetector(self.root_path) - self.framework_detector = FrameworkDetector(self.root_path) - self.dependency_detector = DependencyDetector(self.root_path) - self.env_detector = EnvDetector(self.root_path) + self.entrypoint_detector = EntrypointDetector(str(self.root_path)) + self.framework_detector = FrameworkDetector(str(self.root_path)) + self.dependency_detector = DependencyDetector(str(self.root_path)) + self.env_detector = EnvDetector(str(self.root_path)) def scan(self, target_file: Optional[str] = None) -> Dict[str, Any]: """ @@ -49,37 +55,86 @@ def scan(self, target_file: Optional[str] = None) -> Dict[str, Any]: If target_file is provided, it forces the entrypoint to that file. """ print("\n--- Starting Project Scan ---\n") - - # Pass the target_file to the entrypoint detector if it supports it - # or override the detection result manually below. + entrypoint_info = self.entrypoint_detector.detect(target_file=target_file) - - framework_info = self.framework_detector.detect() + entrypoint = entrypoint_info.get("file", target_file or "main.py") + framework_info = self.framework_detector.detect(entrypoint=entrypoint) dependency_info = self.dependency_detector.detect() env_info = self.env_detector.detect() + python_deps: Set[str] = set(framework_info.get("python_dependencies", [])) + entrypoint_imports: Set[str] = set(framework_info.get("entrypoint_imports", [])) + model_files: List[str] = framework_info.get("model_files", []) + system_packages: List[str] = [] if dependency_info.get("gcc"): system_packages.append("gcc") if dependency_info.get("libturbojpg"): system_packages.append("libturbojpg") + raw_flags = { + "pytorch": bool(framework_info.get("pytorch")), + "tensorflow": bool(framework_info.get("tensorflow")), + "onnx": bool(framework_info.get("onnx")), + "transformers": bool(framework_info.get("transformers")), + "diffusers": bool(framework_info.get("diffusers")), + "cuda": bool(framework_info.get("cuda")), + } + flags = reconcile_framework_flags( + python_deps=python_deps, + entrypoint_imports=entrypoint_imports, + model_files=model_files, + ) + pytorch = flags["pytorch"] + tensorflow = flags["tensorflow"] + onnx = flags["onnx"] + transformers = flags["transformers"] + diffusers = flags["diffusers"] + cuda = flags["cuda"] + + compute = entrypoint_info.get("compute") + provider = entrypoint_info.get("provider") + profile = infer_profile( + pytorch=pytorch, + tensorflow=tensorflow, + onnx=onnx, + transformers=transformers, + diffusers=diffusers, + cuda=cuda, + compute=compute, + provider=provider, + python_deps=python_deps, + model_files=model_files, + ) + + if flags != raw_flags: + print( + "Adjusted framework flags after verification " + f"(entrypoint imports: {', '.join(sorted(entrypoint_imports)) or 'none'})" + ) + deployment_config = DeploymentConfig( - # Use the target_file if detection didn't already pick it up entrypoint=entrypoint_info.get("file", target_file or "main.py"), title=entrypoint_info.get("title", "apipod-service"), + profile=profile, python_version=framework_info.get("python_version", "3.10"), - pytorch=bool(framework_info.get("pytorch")), - tensorflow=bool(framework_info.get("tensorflow")), - onnx=bool(framework_info.get("onnx")), - transformers=bool(framework_info.get("transformers")), - diffusers=bool(framework_info.get("diffusers")), - cuda=bool(framework_info.get("cuda")), + orchestrator=entrypoint_info.get("orchestrator", "local"), + compute=compute or "dedicated", + provider=provider or "localhost", + pytorch=pytorch, + tensorflow=tensorflow, + onnx=onnx, + transformers=transformers, + diffusers=diffusers, + cuda=cuda, system_packages=system_packages, model_files=framework_info.get("model_files", []), has_env_file=env_info.get("has_env_file", False), ) + print(f"Deployment profile: {profile}") + if python_deps: + print(f"Python dependencies: {', '.join(sorted(python_deps))}") print("\n--- Scan Completed ---\n") return deployment_config.to_dict() @@ -89,9 +144,40 @@ def save_report(self, config: Dict[str, Any]) -> None: with self.config_path.open("w", encoding="utf-8") as f: json.dump(config, f, indent=4) print(f"Configuration saved to {self.config_path}") + self._write_starter_files(config) except Exception as exc: print(f"Error saving configuration: {exc}") + def _write_starter_files(self, config: Dict[str, Any]) -> None: + deploy_dir = self.config_path.parent + readme_dst = deploy_dir / "README.md" + starter = Path(__file__).parent / "starter_README.md" + if starter.is_file() and not readme_dst.exists(): + shutil.copy(starter, readme_dst) + print(f"Starter guide written to {readme_dst}") + + dockerignore = deploy_dir / ".dockerignore" + if not dockerignore.exists(): + dockerignore.write_text( + "\n".join( + [ + "apipod-deploy/", + ".git/", + ".venv/", + "venv/", + "__pycache__/", + "*.pyc", + ".pytest_cache/", + ".mypy_cache/", + "dist/", + "build/", + "*.egg-info/", + "", + ] + ), + encoding="utf-8", + ) + def load_report(self) -> Optional[Dict[str, Any]]: if not self.config_path.exists(): return None diff --git a/apipod/deploy/starter_README.md b/apipod/deploy/starter_README.md new file mode 100644 index 0000000..237e811 --- /dev/null +++ b/apipod/deploy/starter_README.md @@ -0,0 +1,22 @@ +# APIPod deploy starter + +Generated by `socaity scan`. This folder holds deployment artifacts only — keep your app code in the project root (`../`). + +## Layout + +``` +your-project/ ← main.py, pyproject.toml, source +apipod-deploy/ ← apipod.json, Dockerfile (this folder) +``` + +## Next steps + +1. Review `apipod.json` (profile, entrypoint, Python version). +2. Run `socaity build` from the project root to refresh the Dockerfile and optionally build the image. +3. Deploy the image to your provider (e.g. RunPod Serverless). + +## RunPod serverless notes + +- The generated Dockerfile uses `APIPOD_COMPUTE=serverless` and `APIPOD_PROVIDER=runpod`. +- The container runs your entrypoint with `--rp_api_host 0.0.0.0` so the worker API binds correctly. +- Re-scan after changing dependencies: `socaity scan`. diff --git a/apipod/engine/backend/runpod/router.py b/apipod/engine/backend/runpod/router.py index f48e806..e78d55b 100644 --- a/apipod/engine/backend/runpod/router.py +++ b/apipod/engine/backend/runpod/router.py @@ -8,13 +8,19 @@ from apipod.common import constants from apipod.engine.jobs.base_job import JOB_STATUS from apipod.engine.jobs.job_progress import JobProgressRunpod, JobProgress -from apipod.engine.jobs.job_result import JobResultFactory, JobResult +from apipod.engine.jobs.job_result import ( + JobResultFactory, + JobResult, + JobMetrics, + _compute_duration_s, + _job_status_to_public, +) from apipod.engine.base_backend import _BaseBackend from apipod.engine.files.base_file_mixin import _BaseFileHandlingMixin from apipod.engine.backend.runpod.llm_mixin import _RunPodLLMMixin from apipod.engine.utils import normalize_name -from apipod.common.settings import APIPOD_PROVIDER, APIPOD_PORT, DEFAULT_DATE_TIME_FORMAT +from apipod.common.settings import APIPOD_PROVIDER, APIPOD_PORT class SocaityRunpodRouter(_BaseBackend, _BaseFileHandlingMixin, _RunPodLLMMixin): @@ -193,12 +199,8 @@ def _router(self, path, job, **kwargs): # Handle file uploads and conversions route_function = self._handle_file_uploads(route_function) - # Prepare result tracking start_time = datetime.now(timezone.utc) - result = JobResult( - job_id=job["id"], - created_at=start_time.strftime(DEFAULT_DATE_TIME_FORMAT), - ) + result = JobResult(job_id=job["id"], endpoint=path) try: # Execute the function (Sync or Async Handling) @@ -214,17 +216,24 @@ def _router(self, path, job, **kwargs): res = JobResultFactory._serialize_result(res) result.result = res - result.status = JOB_STATUS.FINISHED.value + result.status = _job_status_to_public(JOB_STATUS.FINISHED) except Exception as e: result.error = str(e) - result.status = JOB_STATUS.FAILED.value + result.status = _job_status_to_public(JOB_STATUS.FAILED) print(f"Job {job['id']} failed: {str(e)}") traceback.print_exc() - finally: - result.updated_at = datetime.now(timezone.utc).strftime(DEFAULT_DATE_TIME_FORMAT) - result = result.model_dump_json() - return result + for arg in kwargs.values(): + if isinstance(arg, JobProgressRunpod): + result.progress = float(arg._progress) + result.message = arg._message + break + + inference_time_s = _compute_duration_s(start_time, datetime.now(timezone.utc)) + if inference_time_s is not None: + result.metrics = JobMetrics(inference_time_s=inference_time_s) + + return result.model_dump_json(exclude_none=True) def _execute_route_function(self, route_function, kwargs): """ diff --git a/pyproject.toml b/pyproject.toml index 87bb803..fdd3e7e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,3 +33,15 @@ runpod = [ [project.urls] Repository = "https://github.com/SocAIty/apipod" Homepage = "https://www.socaity.ai" + +[tool.setuptools.packages.find] +where = ["."] +include = ["apipod*"] + +[tool.setuptools.package-data] +apipod = [ + "deploy/*.j2", + "deploy/*.txt", + "deploy/starter_README.md", +] + diff --git a/test/test_deploy_profile.py b/test/test_deploy_profile.py new file mode 100644 index 0000000..7d612bd --- /dev/null +++ b/test/test_deploy_profile.py @@ -0,0 +1,96 @@ +from apipod.deploy.profile import PROFILE_ML_GPU, PROFILE_SERVERLESS_MINIMAL, infer_profile + + +def test_infer_serverless_minimal_for_apipod_only(): + profile = infer_profile( + pytorch=False, + tensorflow=False, + onnx=False, + transformers=False, + diffusers=False, + cuda=False, + compute="serverless", + provider="runpod", + python_deps={"apipod", "runpod"}, + ) + assert profile == PROFILE_SERVERLESS_MINIMAL + + +def test_infer_ml_gpu_when_torch_present(): + profile = infer_profile( + pytorch=True, + tensorflow=False, + onnx=False, + transformers=False, + diffusers=False, + cuda=True, + compute="serverless", + provider="runpod", + python_deps={"torch", "runpod"}, + ) + assert profile == PROFILE_ML_GPU + + +def test_tensorboard_does_not_imply_tensorflow(): + from apipod.deploy.detectors.framework import FrameworkDetector + import tempfile + from pathlib import Path + + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) + (root / "pyproject.toml").write_text( + '[project]\nname="x"\nrequires-python=">=3.12"\n' + 'dependencies = ["apipod", "runpod", "tensorboard"]\n' + ) + (root / "main.py").write_text( + 'from apipod import APIPod\napp = APIPod(compute="serverless", provider="runpod")\n' + ) + info = FrameworkDetector(str(root)).detect() + assert info["pytorch"] is False + assert info["tensorflow"] is False + assert info["onnx"] is False + + +def test_reconcile_clears_false_positive_flags(): + from apipod.deploy.profile import reconcile_framework_flags + + flags = reconcile_framework_flags( + python_deps={"apipod", "runpod"}, + entrypoint_imports=set(), + model_files=[], + ) + assert flags == { + "pytorch": False, + "tensorflow": False, + "onnx": False, + "transformers": False, + "diffusers": False, + "cuda": False, + } + + +def test_serverless_runpod_ping_profile(): + from apipod.deploy.profile import PROFILE_SERVERLESS_MINIMAL, infer_profile + + profile = infer_profile( + pytorch=False, + tensorflow=False, + onnx=False, + transformers=False, + diffusers=False, + cuda=False, + compute="serverless", + provider="runpod", + python_deps={"apipod", "runpod"}, + model_files=[], + ) + assert profile == PROFILE_SERVERLESS_MINIMAL + + +if __name__ == "__main__": + test_infer_serverless_minimal_for_apipod_only() + test_infer_ml_gpu_when_torch_present() + test_tensorboard_does_not_imply_tensorflow() + test_reconcile_clears_false_positive_flags() + test_serverless_runpod_ping_profile() + print("ok")