HIGH: - S1: upload_file allow-root + symlink-resolve + size-cap. Env: CLAWDFORGE_UPLOAD_ROOT (default cwd), CLAWDFORGE_UPLOAD_MAX_BYTES (default 100MiB). README updated with threat-model paragraph. LOW: - S2: logger.propagate = False (stdout discipline defense-in-depth) - S3: catch-all error message no longer echoes str(e) (host paths) - S4: whitelist healthz/upload tool response fields - S5: pattern-validate ff_* file tokens in run schema - C1: strict-bool guard on timeout_secs/ttl_secs - C2: coerce empty-string model/system to None Deps: - requests>=2.32 (CVE-2024-35195) - urllib3>=2.2.2 (CVE-2024-37891) - mcp>=1.2.0 Audit: memory/clawdforge-audits/mcp-093021c.md
277 lines
9.7 KiB
Python
277 lines
9.7 KiB
Python
"""Thin sync HTTP wrapper around the clawdforge service.
|
|
|
|
We deliberately keep this self-contained (just ``requests``) rather than
|
|
depending on the sibling Python SDK at ``clients/python/``. Reasons:
|
|
|
|
- ``clawdforge-mcp`` ships independently and may be ``pip install``'d on a
|
|
host that doesn't have the Python SDK published anywhere reachable.
|
|
- The MCP server only needs three endpoints (healthz, run, files) and
|
|
trivial error wrapping — pulling the full SDK is overkill.
|
|
|
|
Errors from this layer are surfaced as :class:`ForgeError` (and subclasses)
|
|
which the MCP server's tool handlers catch and reformat into MCP error
|
|
content. We never let a stack trace leak back through the JSON-RPC pipe —
|
|
clients show that to the model verbatim and it pollutes the context.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
import requests
|
|
|
|
|
|
_HEALTHZ_TIMEOUT_SECS = 10
|
|
_HTTP_TIMEOUT_MARGIN_SECS = 30
|
|
_DEFAULT_RUN_TIMEOUT_SECS = 120
|
|
|
|
# Upload-side guards. The MCP-specific threat model is "what can a malicious
|
|
# LLM-driven client do?" — and the obvious win for an attacker is convincing
|
|
# a user to "let me upload this config so I can debug" and walking away with
|
|
# `~/.ssh/id_rsa`, `~/.aws/credentials`, etc. We defend in three ways:
|
|
#
|
|
# 1. ``CLAWDFORGE_UPLOAD_ROOT`` pins an allow-root (default: process cwd).
|
|
# Symlinks and ``..`` traversal both get neutralized via
|
|
# ``Path.resolve(strict=True)`` + ``is_relative_to``.
|
|
# 2. ``CLAWDFORGE_UPLOAD_MAX_BYTES`` caps the on-disk size we'll stream
|
|
# (default 100 MiB) so a runaway / malicious request can't pin the
|
|
# forge host on a bottomless file.
|
|
# 3. We refuse anything that isn't a regular file post-resolve — no FIFOs,
|
|
# sockets, devices, or directories.
|
|
_DEFAULT_UPLOAD_MAX_BYTES = 100 * 1024 * 1024 # 100 MiB
|
|
|
|
|
|
class ForgeError(Exception):
|
|
"""Base error for the clawdforge HTTP wrapper."""
|
|
|
|
|
|
class ForgeTransportError(ForgeError):
|
|
"""Connection / TCP / DNS / TLS failure — no HTTP response."""
|
|
|
|
|
|
class ForgeAPIError(ForgeError):
|
|
"""4xx / 5xx response. ``status_code`` and ``body`` are populated."""
|
|
|
|
def __init__(
|
|
self,
|
|
message: str,
|
|
*,
|
|
status_code: int,
|
|
body: dict[str, Any] | str | None = None,
|
|
) -> None:
|
|
super().__init__(message)
|
|
self.status_code = status_code
|
|
self.body = body
|
|
self.message = message
|
|
|
|
|
|
class ForgeAuthError(ForgeAPIError):
|
|
"""401 / 403 — bad token or IP not allowed."""
|
|
|
|
|
|
class ForgeClient:
|
|
"""Minimal sync client for the three endpoints we expose via MCP.
|
|
|
|
One instance per MCP server process. Holds a ``requests.Session`` for
|
|
keep-alive across tool calls. Not thread-safe — but the MCP server is
|
|
asyncio-single-threaded, and we only ever call this from
|
|
``asyncio.to_thread``, so a single shared instance is fine.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
*,
|
|
base_url: str,
|
|
token: str,
|
|
default_timeout_secs: int = _DEFAULT_RUN_TIMEOUT_SECS,
|
|
http_timeout_margin: int = _HTTP_TIMEOUT_MARGIN_SECS,
|
|
session: requests.Session | None = None,
|
|
) -> None:
|
|
if not base_url:
|
|
raise ValueError("base_url is required")
|
|
if not token:
|
|
raise ValueError("token is required")
|
|
self.base_url = base_url.rstrip("/")
|
|
self.token = token
|
|
self.default_timeout_secs = default_timeout_secs
|
|
self.http_timeout_margin = http_timeout_margin
|
|
self._session = session or requests.Session()
|
|
self._owns_session = session is None
|
|
|
|
def close(self) -> None:
|
|
if self._owns_session:
|
|
self._session.close()
|
|
|
|
# -- internals ---------------------------------------------------------
|
|
|
|
def _headers(self) -> dict[str, str]:
|
|
return {"Authorization": f"Bearer {self.token}"}
|
|
|
|
def _request(
|
|
self,
|
|
method: str,
|
|
path: str,
|
|
*,
|
|
json_body: dict | None = None,
|
|
data: dict | None = None,
|
|
files: dict | None = None,
|
|
timeout: float | tuple[float, float] | None = None,
|
|
) -> Any:
|
|
try:
|
|
resp = self._session.request(
|
|
method,
|
|
f"{self.base_url}{path}",
|
|
headers=self._headers(),
|
|
json=json_body,
|
|
data=data,
|
|
files=files,
|
|
timeout=timeout,
|
|
)
|
|
except requests.RequestException as e:
|
|
raise ForgeTransportError(f"transport: {e}") from e
|
|
|
|
try:
|
|
body = resp.json()
|
|
except ValueError:
|
|
body = resp.text or None
|
|
|
|
if resp.status_code >= 400:
|
|
short = ""
|
|
if isinstance(body, dict):
|
|
short = body.get("error") or body.get("detail") or ""
|
|
elif isinstance(body, str):
|
|
short = body[:200]
|
|
msg = f"{resp.status_code} {resp.reason}: {short}".rstrip(": ")
|
|
if resp.status_code in (401, 403):
|
|
raise ForgeAuthError(msg, status_code=resp.status_code, body=body)
|
|
raise ForgeAPIError(msg, status_code=resp.status_code, body=body)
|
|
|
|
return body
|
|
|
|
# -- endpoints ---------------------------------------------------------
|
|
|
|
def healthz(self) -> dict:
|
|
return self._request("GET", "/healthz", timeout=_HEALTHZ_TIMEOUT_SECS)
|
|
|
|
def run(
|
|
self,
|
|
*,
|
|
prompt: str,
|
|
model: str | None = None,
|
|
system: str | None = None,
|
|
files: list[str] | None = None,
|
|
timeout_secs: int | None = None,
|
|
) -> dict:
|
|
if not prompt:
|
|
raise ValueError("prompt must be non-empty")
|
|
|
|
body: dict[str, Any] = {"prompt": prompt}
|
|
if model is not None:
|
|
body["model"] = model
|
|
if system is not None:
|
|
body["system"] = system
|
|
if files:
|
|
body["files"] = list(files)
|
|
if timeout_secs is not None:
|
|
body["timeout_secs"] = timeout_secs
|
|
|
|
effective_run_timeout = timeout_secs or self.default_timeout_secs
|
|
http_timeout = effective_run_timeout + self.http_timeout_margin
|
|
|
|
payload = self._request("POST", "/run", json_body=body, timeout=http_timeout)
|
|
if not isinstance(payload, dict):
|
|
raise ForgeError(f"unexpected /run response type: {type(payload).__name__}")
|
|
return payload
|
|
|
|
def upload_file(
|
|
self,
|
|
*,
|
|
path: str | os.PathLike[str],
|
|
ttl_secs: int = 3600,
|
|
) -> dict:
|
|
# Resolve the configured allow-root and size cap fresh on every call
|
|
# so tests / runtime env-toggles take effect without re-instantiating
|
|
# the client. Both fall back to safe defaults.
|
|
root_env = os.environ.get("CLAWDFORGE_UPLOAD_ROOT")
|
|
if root_env:
|
|
try:
|
|
root = Path(root_env).resolve(strict=True)
|
|
except (OSError, RuntimeError) as e:
|
|
raise ValueError(
|
|
f"CLAWDFORGE_UPLOAD_ROOT is not a valid directory: {e}"
|
|
) from e
|
|
else:
|
|
root = Path.cwd().resolve()
|
|
if not root.is_dir():
|
|
raise ValueError(
|
|
f"CLAWDFORGE_UPLOAD_ROOT must be a directory (got {root})"
|
|
)
|
|
|
|
max_bytes_env = os.environ.get("CLAWDFORGE_UPLOAD_MAX_BYTES")
|
|
if max_bytes_env:
|
|
try:
|
|
max_bytes = int(max_bytes_env)
|
|
except ValueError as e:
|
|
raise ValueError(
|
|
"CLAWDFORGE_UPLOAD_MAX_BYTES must be an integer"
|
|
) from e
|
|
if max_bytes <= 0:
|
|
raise ValueError(
|
|
"CLAWDFORGE_UPLOAD_MAX_BYTES must be a positive integer"
|
|
)
|
|
else:
|
|
max_bytes = _DEFAULT_UPLOAD_MAX_BYTES
|
|
|
|
# ``resolve(strict=True)`` raises FileNotFoundError if the path (or
|
|
# any intermediate symlink target) is missing — that's the exact
|
|
# behaviour we want, just under one error type.
|
|
try:
|
|
p = Path(path).resolve(strict=True)
|
|
except FileNotFoundError as e:
|
|
raise ValueError(f"file does not exist: {path}") from e
|
|
except (OSError, RuntimeError) as e:
|
|
raise ValueError(f"cannot resolve path: {e}") from e
|
|
|
|
# Containment check after symlink resolution. ``is_relative_to`` is
|
|
# the 3.9+ API; we require 3.10+ in pyproject so it's always present.
|
|
if not p.is_relative_to(root):
|
|
raise ValueError(
|
|
f"path is outside CLAWDFORGE_UPLOAD_ROOT ({root}): refusing to upload"
|
|
)
|
|
|
|
# Reject FIFOs, sockets, block/char devices, directories. ``is_file``
|
|
# only returns True for regular files (after symlink resolution).
|
|
if not p.is_file():
|
|
raise ValueError(f"path is not a regular file: {p}")
|
|
|
|
try:
|
|
size = p.stat().st_size
|
|
except OSError as e:
|
|
raise ValueError(f"cannot stat {p}: {e}") from e
|
|
if size > max_bytes:
|
|
raise ValueError(
|
|
f"file exceeds CLAWDFORGE_UPLOAD_MAX_BYTES "
|
|
f"({size} > {max_bytes} bytes)"
|
|
)
|
|
|
|
try:
|
|
fh = p.open("rb")
|
|
except OSError as e:
|
|
raise ValueError(f"cannot open {p}: {e}") from e
|
|
try:
|
|
payload = self._request(
|
|
"POST",
|
|
"/files",
|
|
data={"ttl_secs": str(ttl_secs)},
|
|
files={"file": (p.name, fh)},
|
|
timeout=self.default_timeout_secs + self.http_timeout_margin,
|
|
)
|
|
finally:
|
|
fh.close()
|
|
|
|
if not isinstance(payload, dict):
|
|
raise ForgeError(
|
|
f"unexpected /files response type: {type(payload).__name__}"
|
|
)
|
|
return payload
|