clawdforge/clients/mcp/src/clawdforge_mcp/client.py
Kayos 104f49c441 clients/mcp: apply audit findings — release-blocker fix on upload (093021c → new)
HIGH:
- S1: upload_file allow-root + symlink-resolve + size-cap. Env: CLAWDFORGE_UPLOAD_ROOT (default cwd), CLAWDFORGE_UPLOAD_MAX_BYTES (default 100MiB). README updated with threat-model paragraph.

LOW:
- S2: logger.propagate = False (stdout discipline defense-in-depth)
- S3: catch-all error message no longer echoes str(e) (host paths)
- S4: whitelist healthz/upload tool response fields
- S5: pattern-validate ff_* file tokens in run schema
- C1: strict-bool guard on timeout_secs/ttl_secs
- C2: coerce empty-string model/system to None

Deps:
- requests>=2.32 (CVE-2024-35195)
- urllib3>=2.2.2 (CVE-2024-37891)
- mcp>=1.2.0

Audit: memory/clawdforge-audits/mcp-093021c.md
2026-04-28 23:10:33 -07:00

277 lines
9.7 KiB
Python

"""Thin sync HTTP wrapper around the clawdforge service.
We deliberately keep this self-contained (just ``requests``) rather than
depending on the sibling Python SDK at ``clients/python/``. Reasons:
- ``clawdforge-mcp`` ships independently and may be ``pip install``'d on a
host that doesn't have the Python SDK published anywhere reachable.
- The MCP server only needs three endpoints (healthz, run, files) and
trivial error wrapping — pulling the full SDK is overkill.
Errors from this layer are surfaced as :class:`ForgeError` (and subclasses)
which the MCP server's tool handlers catch and reformat into MCP error
content. We never let a stack trace leak back through the JSON-RPC pipe —
clients show that to the model verbatim and it pollutes the context.
"""
from __future__ import annotations
import os
from pathlib import Path
from typing import Any
import requests
_HEALTHZ_TIMEOUT_SECS = 10
_HTTP_TIMEOUT_MARGIN_SECS = 30
_DEFAULT_RUN_TIMEOUT_SECS = 120
# Upload-side guards. The MCP-specific threat model is "what can a malicious
# LLM-driven client do?" — and the obvious win for an attacker is convincing
# a user to "let me upload this config so I can debug" and walking away with
# `~/.ssh/id_rsa`, `~/.aws/credentials`, etc. We defend in three ways:
#
# 1. ``CLAWDFORGE_UPLOAD_ROOT`` pins an allow-root (default: process cwd).
# Symlinks and ``..`` traversal both get neutralized via
# ``Path.resolve(strict=True)`` + ``is_relative_to``.
# 2. ``CLAWDFORGE_UPLOAD_MAX_BYTES`` caps the on-disk size we'll stream
# (default 100 MiB) so a runaway / malicious request can't pin the
# forge host on a bottomless file.
# 3. We refuse anything that isn't a regular file post-resolve — no FIFOs,
# sockets, devices, or directories.
_DEFAULT_UPLOAD_MAX_BYTES = 100 * 1024 * 1024 # 100 MiB
class ForgeError(Exception):
"""Base error for the clawdforge HTTP wrapper."""
class ForgeTransportError(ForgeError):
"""Connection / TCP / DNS / TLS failure — no HTTP response."""
class ForgeAPIError(ForgeError):
"""4xx / 5xx response. ``status_code`` and ``body`` are populated."""
def __init__(
self,
message: str,
*,
status_code: int,
body: dict[str, Any] | str | None = None,
) -> None:
super().__init__(message)
self.status_code = status_code
self.body = body
self.message = message
class ForgeAuthError(ForgeAPIError):
"""401 / 403 — bad token or IP not allowed."""
class ForgeClient:
"""Minimal sync client for the three endpoints we expose via MCP.
One instance per MCP server process. Holds a ``requests.Session`` for
keep-alive across tool calls. Not thread-safe — but the MCP server is
asyncio-single-threaded, and we only ever call this from
``asyncio.to_thread``, so a single shared instance is fine.
"""
def __init__(
self,
*,
base_url: str,
token: str,
default_timeout_secs: int = _DEFAULT_RUN_TIMEOUT_SECS,
http_timeout_margin: int = _HTTP_TIMEOUT_MARGIN_SECS,
session: requests.Session | None = None,
) -> None:
if not base_url:
raise ValueError("base_url is required")
if not token:
raise ValueError("token is required")
self.base_url = base_url.rstrip("/")
self.token = token
self.default_timeout_secs = default_timeout_secs
self.http_timeout_margin = http_timeout_margin
self._session = session or requests.Session()
self._owns_session = session is None
def close(self) -> None:
if self._owns_session:
self._session.close()
# -- internals ---------------------------------------------------------
def _headers(self) -> dict[str, str]:
return {"Authorization": f"Bearer {self.token}"}
def _request(
self,
method: str,
path: str,
*,
json_body: dict | None = None,
data: dict | None = None,
files: dict | None = None,
timeout: float | tuple[float, float] | None = None,
) -> Any:
try:
resp = self._session.request(
method,
f"{self.base_url}{path}",
headers=self._headers(),
json=json_body,
data=data,
files=files,
timeout=timeout,
)
except requests.RequestException as e:
raise ForgeTransportError(f"transport: {e}") from e
try:
body = resp.json()
except ValueError:
body = resp.text or None
if resp.status_code >= 400:
short = ""
if isinstance(body, dict):
short = body.get("error") or body.get("detail") or ""
elif isinstance(body, str):
short = body[:200]
msg = f"{resp.status_code} {resp.reason}: {short}".rstrip(": ")
if resp.status_code in (401, 403):
raise ForgeAuthError(msg, status_code=resp.status_code, body=body)
raise ForgeAPIError(msg, status_code=resp.status_code, body=body)
return body
# -- endpoints ---------------------------------------------------------
def healthz(self) -> dict:
return self._request("GET", "/healthz", timeout=_HEALTHZ_TIMEOUT_SECS)
def run(
self,
*,
prompt: str,
model: str | None = None,
system: str | None = None,
files: list[str] | None = None,
timeout_secs: int | None = None,
) -> dict:
if not prompt:
raise ValueError("prompt must be non-empty")
body: dict[str, Any] = {"prompt": prompt}
if model is not None:
body["model"] = model
if system is not None:
body["system"] = system
if files:
body["files"] = list(files)
if timeout_secs is not None:
body["timeout_secs"] = timeout_secs
effective_run_timeout = timeout_secs or self.default_timeout_secs
http_timeout = effective_run_timeout + self.http_timeout_margin
payload = self._request("POST", "/run", json_body=body, timeout=http_timeout)
if not isinstance(payload, dict):
raise ForgeError(f"unexpected /run response type: {type(payload).__name__}")
return payload
def upload_file(
self,
*,
path: str | os.PathLike[str],
ttl_secs: int = 3600,
) -> dict:
# Resolve the configured allow-root and size cap fresh on every call
# so tests / runtime env-toggles take effect without re-instantiating
# the client. Both fall back to safe defaults.
root_env = os.environ.get("CLAWDFORGE_UPLOAD_ROOT")
if root_env:
try:
root = Path(root_env).resolve(strict=True)
except (OSError, RuntimeError) as e:
raise ValueError(
f"CLAWDFORGE_UPLOAD_ROOT is not a valid directory: {e}"
) from e
else:
root = Path.cwd().resolve()
if not root.is_dir():
raise ValueError(
f"CLAWDFORGE_UPLOAD_ROOT must be a directory (got {root})"
)
max_bytes_env = os.environ.get("CLAWDFORGE_UPLOAD_MAX_BYTES")
if max_bytes_env:
try:
max_bytes = int(max_bytes_env)
except ValueError as e:
raise ValueError(
"CLAWDFORGE_UPLOAD_MAX_BYTES must be an integer"
) from e
if max_bytes <= 0:
raise ValueError(
"CLAWDFORGE_UPLOAD_MAX_BYTES must be a positive integer"
)
else:
max_bytes = _DEFAULT_UPLOAD_MAX_BYTES
# ``resolve(strict=True)`` raises FileNotFoundError if the path (or
# any intermediate symlink target) is missing — that's the exact
# behaviour we want, just under one error type.
try:
p = Path(path).resolve(strict=True)
except FileNotFoundError as e:
raise ValueError(f"file does not exist: {path}") from e
except (OSError, RuntimeError) as e:
raise ValueError(f"cannot resolve path: {e}") from e
# Containment check after symlink resolution. ``is_relative_to`` is
# the 3.9+ API; we require 3.10+ in pyproject so it's always present.
if not p.is_relative_to(root):
raise ValueError(
f"path is outside CLAWDFORGE_UPLOAD_ROOT ({root}): refusing to upload"
)
# Reject FIFOs, sockets, block/char devices, directories. ``is_file``
# only returns True for regular files (after symlink resolution).
if not p.is_file():
raise ValueError(f"path is not a regular file: {p}")
try:
size = p.stat().st_size
except OSError as e:
raise ValueError(f"cannot stat {p}: {e}") from e
if size > max_bytes:
raise ValueError(
f"file exceeds CLAWDFORGE_UPLOAD_MAX_BYTES "
f"({size} > {max_bytes} bytes)"
)
try:
fh = p.open("rb")
except OSError as e:
raise ValueError(f"cannot open {p}: {e}") from e
try:
payload = self._request(
"POST",
"/files",
data={"ttl_secs": str(ttl_secs)},
files={"file": (p.name, fh)},
timeout=self.default_timeout_secs + self.http_timeout_margin,
)
finally:
fh.close()
if not isinstance(payload, dict):
raise ForgeError(
f"unexpected /files response type: {type(payload).__name__}"
)
return payload