"""Thin sync HTTP wrapper around the clawdforge service. We deliberately keep this self-contained (just ``requests``) rather than depending on the sibling Python SDK at ``clients/python/``. Reasons: - ``clawdforge-mcp`` ships independently and may be ``pip install``'d on a host that doesn't have the Python SDK published anywhere reachable. - The MCP server only needs three endpoints (healthz, run, files) and trivial error wrapping — pulling the full SDK is overkill. Errors from this layer are surfaced as :class:`ForgeError` (and subclasses) which the MCP server's tool handlers catch and reformat into MCP error content. We never let a stack trace leak back through the JSON-RPC pipe — clients show that to the model verbatim and it pollutes the context. """ from __future__ import annotations import os from pathlib import Path from typing import Any import requests _HEALTHZ_TIMEOUT_SECS = 10 _HTTP_TIMEOUT_MARGIN_SECS = 30 _DEFAULT_RUN_TIMEOUT_SECS = 120 # Upload-side guards. The MCP-specific threat model is "what can a malicious # LLM-driven client do?" — and the obvious win for an attacker is convincing # a user to "let me upload this config so I can debug" and walking away with # `~/.ssh/id_rsa`, `~/.aws/credentials`, etc. We defend in three ways: # # 1. ``CLAWDFORGE_UPLOAD_ROOT`` pins an allow-root (default: process cwd). # Symlinks and ``..`` traversal both get neutralized via # ``Path.resolve(strict=True)`` + ``is_relative_to``. # 2. ``CLAWDFORGE_UPLOAD_MAX_BYTES`` caps the on-disk size we'll stream # (default 100 MiB) so a runaway / malicious request can't pin the # forge host on a bottomless file. # 3. We refuse anything that isn't a regular file post-resolve — no FIFOs, # sockets, devices, or directories. _DEFAULT_UPLOAD_MAX_BYTES = 100 * 1024 * 1024 # 100 MiB class ForgeError(Exception): """Base error for the clawdforge HTTP wrapper.""" class ForgeTransportError(ForgeError): """Connection / TCP / DNS / TLS failure — no HTTP response.""" class ForgeAPIError(ForgeError): """4xx / 5xx response. ``status_code`` and ``body`` are populated.""" def __init__( self, message: str, *, status_code: int, body: dict[str, Any] | str | None = None, ) -> None: super().__init__(message) self.status_code = status_code self.body = body self.message = message class ForgeAuthError(ForgeAPIError): """401 / 403 — bad token or IP not allowed.""" class ForgeClient: """Minimal sync client for the three endpoints we expose via MCP. One instance per MCP server process. Holds a ``requests.Session`` for keep-alive across tool calls. Not thread-safe — but the MCP server is asyncio-single-threaded, and we only ever call this from ``asyncio.to_thread``, so a single shared instance is fine. """ def __init__( self, *, base_url: str, token: str, default_timeout_secs: int = _DEFAULT_RUN_TIMEOUT_SECS, http_timeout_margin: int = _HTTP_TIMEOUT_MARGIN_SECS, session: requests.Session | None = None, ) -> None: if not base_url: raise ValueError("base_url is required") if not token: raise ValueError("token is required") self.base_url = base_url.rstrip("/") self.token = token self.default_timeout_secs = default_timeout_secs self.http_timeout_margin = http_timeout_margin self._session = session or requests.Session() self._owns_session = session is None def close(self) -> None: if self._owns_session: self._session.close() # -- internals --------------------------------------------------------- def _headers(self) -> dict[str, str]: return {"Authorization": f"Bearer {self.token}"} def _request( self, method: str, path: str, *, json_body: dict | None = None, data: dict | None = None, files: dict | None = None, timeout: float | tuple[float, float] | None = None, ) -> Any: try: resp = self._session.request( method, f"{self.base_url}{path}", headers=self._headers(), json=json_body, data=data, files=files, timeout=timeout, ) except requests.RequestException as e: raise ForgeTransportError(f"transport: {e}") from e try: body = resp.json() except ValueError: body = resp.text or None if resp.status_code >= 400: short = "" if isinstance(body, dict): short = body.get("error") or body.get("detail") or "" elif isinstance(body, str): short = body[:200] msg = f"{resp.status_code} {resp.reason}: {short}".rstrip(": ") if resp.status_code in (401, 403): raise ForgeAuthError(msg, status_code=resp.status_code, body=body) raise ForgeAPIError(msg, status_code=resp.status_code, body=body) return body # -- endpoints --------------------------------------------------------- def healthz(self) -> dict: return self._request("GET", "/healthz", timeout=_HEALTHZ_TIMEOUT_SECS) def run( self, *, prompt: str, model: str | None = None, system: str | None = None, files: list[str] | None = None, timeout_secs: int | None = None, ) -> dict: if not prompt: raise ValueError("prompt must be non-empty") body: dict[str, Any] = {"prompt": prompt} if model is not None: body["model"] = model if system is not None: body["system"] = system if files: body["files"] = list(files) if timeout_secs is not None: body["timeout_secs"] = timeout_secs effective_run_timeout = timeout_secs or self.default_timeout_secs http_timeout = effective_run_timeout + self.http_timeout_margin payload = self._request("POST", "/run", json_body=body, timeout=http_timeout) if not isinstance(payload, dict): raise ForgeError(f"unexpected /run response type: {type(payload).__name__}") return payload def upload_file( self, *, path: str | os.PathLike[str], ttl_secs: int = 3600, ) -> dict: # Resolve the configured allow-root and size cap fresh on every call # so tests / runtime env-toggles take effect without re-instantiating # the client. Both fall back to safe defaults. root_env = os.environ.get("CLAWDFORGE_UPLOAD_ROOT") if root_env: try: root = Path(root_env).resolve(strict=True) except (OSError, RuntimeError) as e: raise ValueError( f"CLAWDFORGE_UPLOAD_ROOT is not a valid directory: {e}" ) from e else: root = Path.cwd().resolve() if not root.is_dir(): raise ValueError( f"CLAWDFORGE_UPLOAD_ROOT must be a directory (got {root})" ) max_bytes_env = os.environ.get("CLAWDFORGE_UPLOAD_MAX_BYTES") if max_bytes_env: try: max_bytes = int(max_bytes_env) except ValueError as e: raise ValueError( "CLAWDFORGE_UPLOAD_MAX_BYTES must be an integer" ) from e if max_bytes <= 0: raise ValueError( "CLAWDFORGE_UPLOAD_MAX_BYTES must be a positive integer" ) else: max_bytes = _DEFAULT_UPLOAD_MAX_BYTES # ``resolve(strict=True)`` raises FileNotFoundError if the path (or # any intermediate symlink target) is missing — that's the exact # behaviour we want, just under one error type. try: p = Path(path).resolve(strict=True) except FileNotFoundError as e: raise ValueError(f"file does not exist: {path}") from e except (OSError, RuntimeError) as e: raise ValueError(f"cannot resolve path: {e}") from e # Containment check after symlink resolution. ``is_relative_to`` is # the 3.9+ API; we require 3.10+ in pyproject so it's always present. if not p.is_relative_to(root): raise ValueError( f"path is outside CLAWDFORGE_UPLOAD_ROOT ({root}): refusing to upload" ) # Reject FIFOs, sockets, block/char devices, directories. ``is_file`` # only returns True for regular files (after symlink resolution). if not p.is_file(): raise ValueError(f"path is not a regular file: {p}") try: size = p.stat().st_size except OSError as e: raise ValueError(f"cannot stat {p}: {e}") from e if size > max_bytes: raise ValueError( f"file exceeds CLAWDFORGE_UPLOAD_MAX_BYTES " f"({size} > {max_bytes} bytes)" ) try: fh = p.open("rb") except OSError as e: raise ValueError(f"cannot open {p}: {e}") from e try: payload = self._request( "POST", "/files", data={"ttl_secs": str(ttl_secs)}, files={"file": (p.name, fh)}, timeout=self.default_timeout_secs + self.http_timeout_margin, ) finally: fh.close() if not isinstance(payload, dict): raise ForgeError( f"unexpected /files response type: {type(payload).__name__}" ) return payload