clawdforge/clients/cpp/src/http.cpp
Kayos 19fe299b3d clients/cpp: apply audit findings — protocol-error guard + libcurl redirect clamp (bae34a7 → next)
HIGH:
- H1: nlohmann::json::exception wrapped as ProtocolError at 5 sites in
  client.cpp via with_protocol_guard helper. Preserves the documented
  clawdforge::Error catch-all base contract; nlohmann types never leak
  into the message (e.what() only).
- H2: libcurl MAXREDIRS=5, REDIR_PROTOCOLS_STR="http,https"
  (CURLOPT_REDIR_PROTOCOLS bitmask fallback for libcurl < 7.85.0),
  UNRESTRICTED_AUTH=0L. Defense-in-depth on top of libcurl's automatic
  bearer strip on cross-host redirects (>=7.64.0).

MEDIUM:
- M1: upload_file resolves the path via std::filesystem::canonical up
  front. Closes broken-symlink, symlink-loop, and TOCTOU-on-target
  classes without a doc burden on callers.
- M2: README "Linking" section documents the public-ABI nlohmann_json
  implication. v0.2 wrapper deferred.
- M3: README "Threat model" section documents the parse-depth concern
  on the result field of /run replies. Runtime guard skipped for v0.1
  per audit recommendation (low yield, complexity).

LOW:
- L1: cxx_std_20 → cxx_std_17 in CMakeLists.txt (no C++20-only
  features in the library source; broader downstream reach). Examples
  and tests still build via designated initializers (g++ accepts these
  in C++17 mode).
- L2: RunResult struct doc clarifies that missing ok/duration_ms
  decode to defaults — opt-out forward-compat.
- L3: Client class doc clarifies that moved-from instances must not
  have any non-special-member methods invoked (UB), with explicit
  callout on base_url() returning an internal reference.

Test-only:
- cpp-httplib 0.15.3 → 0.20.1. Optional backends (OpenSSL / zlib /
  brotli / zstd) forced off to keep the dep graph minimal. Test-only,
  never on the consumer wire path. README "Test deps" section added
  for transparency.

Tests added (12 → 23 cases, 70 → 106 assertions):
- protocol_error on malformed response for healthz, run, upload_file,
  create_token, list_tokens (H1 regression)
- redirect_clamp_test (H2 regression — TransportError after 5+ hops)
- redirect_protocol_clamp (H2 regression — ftp:// Location rejected)
- upload_file_canonicalize: symlink→file works, broken symlink
  rejected, symlink loop rejected, directory rejected (M1 regression)

Verified:
- cmake --build build clean (-Wall -Wextra -Wpedantic -Wshadow
  -Wconversion -Wsign-conversion -Wold-style-cast -Werror)
- ctest --output-on-failure all green (Release)
- ASan + UBSan: 23/23 cases, 106/106 assertions, zero diagnostics

Audit: memory/clawdforge-audits/cpp-bae34a7.md
2026-04-28 23:41:41 -07:00

341 lines
12 KiB
C++

// SPDX-License-Identifier: MIT
#include "http.hpp"
#include <atomic>
#include <cctype>
#include <cstring>
#include <mutex>
#include <stdexcept>
#include <string>
#include <clawdforge/error.hpp>
namespace clawdforge::detail {
namespace {
// Reference-counted global init. libcurl is safe to call from multiple threads
// once `curl_global_init` has run; we just need to make sure it has.
std::atomic<int> g_global_refs{0};
std::mutex g_global_mu;
std::size_t write_body_cb(char* ptr, std::size_t size, std::size_t nmemb, void* userdata) {
auto* out = static_cast<std::string*>(userdata);
const std::size_t n = size * nmemb;
out->append(ptr, n);
return n;
}
std::size_t write_header_cb(char* ptr, std::size_t size, std::size_t nmemb, void* userdata) {
auto* hdrs = static_cast<HeaderMap*>(userdata);
const std::size_t n = size * nmemb;
std::string line(ptr, n);
// Strip trailing CRLF.
while (!line.empty() && (line.back() == '\n' || line.back() == '\r')) {
line.pop_back();
}
auto colon = line.find(':');
if (colon == std::string::npos) {
return n; // status line or blank — skip
}
std::string name = line.substr(0, colon);
std::string value = line.substr(colon + 1);
// Trim leading whitespace on the value.
std::size_t i = 0;
while (i < value.size() && (value[i] == ' ' || value[i] == '\t')) ++i;
value.erase(0, i);
// Lowercase the header name for case-insensitive lookup.
for (auto& c : name) c = static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
(*hdrs)[std::move(name)] = std::move(value);
return n;
}
} // namespace
CurlGlobalGuard::CurlGlobalGuard() {
std::lock_guard<std::mutex> lk(g_global_mu);
if (g_global_refs.fetch_add(1) == 0) {
const CURLcode rc = curl_global_init(CURL_GLOBAL_DEFAULT);
if (rc != CURLE_OK) {
g_global_refs.fetch_sub(1);
active_ = false;
throw TransportError(std::string{"curl_global_init failed: "} + curl_easy_strerror(rc));
}
}
active_ = true;
}
CurlGlobalGuard::~CurlGlobalGuard() {
if (!active_) return;
std::lock_guard<std::mutex> lk(g_global_mu);
if (g_global_refs.fetch_sub(1) == 1) {
curl_global_cleanup();
}
}
CurlGlobalGuard::CurlGlobalGuard(const CurlGlobalGuard& other) : active_(other.active_) {
if (active_) {
g_global_refs.fetch_add(1);
}
}
CurlGlobalGuard& CurlGlobalGuard::operator=(const CurlGlobalGuard& other) {
if (this == &other) return *this;
// Drop our existing ref, then take one from `other`.
if (active_) {
std::lock_guard<std::mutex> lk(g_global_mu);
if (g_global_refs.fetch_sub(1) == 1) {
curl_global_cleanup();
}
}
active_ = other.active_;
if (active_) {
g_global_refs.fetch_add(1);
}
return *this;
}
CurlGlobalGuard::CurlGlobalGuard(CurlGlobalGuard&& other) noexcept : active_(other.active_) {
other.active_ = false;
}
CurlGlobalGuard& CurlGlobalGuard::operator=(CurlGlobalGuard&& other) noexcept {
if (this == &other) return *this;
if (active_) {
std::lock_guard<std::mutex> lk(g_global_mu);
if (g_global_refs.fetch_sub(1) == 1) {
curl_global_cleanup();
}
}
active_ = other.active_;
other.active_ = false;
return *this;
}
CurlSession::CurlSession(std::chrono::seconds timeout,
std::chrono::seconds connect_timeout,
std::string user_agent,
bool insecure_tls)
: timeout_(timeout),
connect_timeout_(connect_timeout),
user_agent_(std::move(user_agent)),
insecure_tls_(insecure_tls) {
easy_ = curl_easy_init();
if (easy_ == nullptr) {
throw TransportError("curl_easy_init returned null");
}
}
CurlSession::~CurlSession() {
if (easy_ != nullptr) {
curl_easy_cleanup(easy_);
}
}
CurlSession::CurlSession(CurlSession&& other) noexcept
: easy_(other.easy_),
timeout_(other.timeout_),
connect_timeout_(other.connect_timeout_),
user_agent_(std::move(other.user_agent_)),
insecure_tls_(other.insecure_tls_) {
other.easy_ = nullptr;
}
CurlSession& CurlSession::operator=(CurlSession&& other) noexcept {
if (this != &other) {
if (easy_ != nullptr) curl_easy_cleanup(easy_);
easy_ = other.easy_;
timeout_ = other.timeout_;
connect_timeout_ = other.connect_timeout_;
user_agent_ = std::move(other.user_agent_);
insecure_tls_ = other.insecure_tls_;
other.easy_ = nullptr;
}
return *this;
}
Response CurlSession::perform(const Request& req) {
// libcurl recommends easy_reset between uses of an easy handle; cheaper
// than init + cleanup per request and preserves the connection cache.
curl_easy_reset(easy_);
Response resp;
curl_easy_setopt(easy_, CURLOPT_URL, req.url.c_str());
curl_easy_setopt(easy_, CURLOPT_FOLLOWLOCATION, 1L);
// Clamp redirects: bound the chain length, only allow http(s) on the
// redirect path (default also includes ftp/sftp/scp), and explicitly
// disable cross-host bearer leakage. libcurl >= 7.64.0 already strips
// header-mode auth on cross-host redirects; UNRESTRICTED_AUTH=0 is
// defense-in-depth.
curl_easy_setopt(easy_, CURLOPT_MAXREDIRS, 5L);
// CURLOPT_REDIR_PROTOCOLS_STR added in 7.85.0; older curl uses the
// bitmask form. Both spell the same allowlist: http + https only.
#if LIBCURL_VERSION_NUM >= 0x075500 /* 7.85.0 */
curl_easy_setopt(easy_, CURLOPT_REDIR_PROTOCOLS_STR, "http,https");
#else
curl_easy_setopt(easy_, CURLOPT_REDIR_PROTOCOLS,
static_cast<long>(CURLPROTO_HTTP | CURLPROTO_HTTPS));
#endif
curl_easy_setopt(easy_, CURLOPT_UNRESTRICTED_AUTH, 0L);
curl_easy_setopt(easy_, CURLOPT_NOSIGNAL, 1L); // be thread-friendly
curl_easy_setopt(easy_, CURLOPT_TIMEOUT, static_cast<long>(timeout_.count()));
curl_easy_setopt(easy_, CURLOPT_CONNECTTIMEOUT, static_cast<long>(connect_timeout_.count()));
curl_easy_setopt(easy_, CURLOPT_USERAGENT, user_agent_.c_str());
curl_easy_setopt(easy_, CURLOPT_WRITEFUNCTION, write_body_cb);
curl_easy_setopt(easy_, CURLOPT_WRITEDATA, &resp.body);
curl_easy_setopt(easy_, CURLOPT_HEADERFUNCTION, write_header_cb);
curl_easy_setopt(easy_, CURLOPT_HEADERDATA, &resp.headers);
if (insecure_tls_) {
curl_easy_setopt(easy_, CURLOPT_SSL_VERIFYPEER, 0L);
curl_easy_setopt(easy_, CURLOPT_SSL_VERIFYHOST, 0L);
}
// Method + body / mime ----------------------------------------------------
struct curl_slist* hdr_list = nullptr;
curl_mime* mime = nullptr;
// Idempotent RAII cleanup — runs exactly once whether we exit via return
// or exception, no matter the path through the body.
struct CurlReqCleanup {
struct curl_slist** hdrs;
curl_mime** mime;
~CurlReqCleanup() {
if (*hdrs != nullptr) {
curl_slist_free_all(*hdrs);
*hdrs = nullptr;
}
if (*mime != nullptr) {
curl_mime_free(*mime);
*mime = nullptr;
}
}
} cleanup{&hdr_list, &mime};
{
if (req.method == "GET") {
curl_easy_setopt(easy_, CURLOPT_HTTPGET, 1L);
} else if (req.method == "POST") {
curl_easy_setopt(easy_, CURLOPT_POST, 1L);
if (req.file) {
mime = curl_mime_init(easy_);
if (mime == nullptr) {
throw TransportError("curl_mime_init returned null");
}
{
curl_mimepart* part = curl_mime_addpart(mime);
curl_mime_name(part, req.file->field_name.c_str());
curl_mime_filename(part, req.file->filename.c_str());
if (!req.file->content_type.empty()) {
curl_mime_type(part, req.file->content_type.c_str());
}
const CURLcode mrc =
curl_mime_filedata(part, req.file->filesystem_path.c_str());
if (mrc != CURLE_OK) {
throw TransportError(std::string{"curl_mime_filedata: "} +
curl_easy_strerror(mrc));
}
}
for (const auto& [k, v] : req.form_fields) {
curl_mimepart* part = curl_mime_addpart(mime);
curl_mime_name(part, k.c_str());
curl_mime_data(part, v.c_str(), v.size());
}
curl_easy_setopt(easy_, CURLOPT_MIMEPOST, mime);
} else if (req.body) {
curl_easy_setopt(easy_, CURLOPT_POSTFIELDS, req.body->data());
curl_easy_setopt(easy_, CURLOPT_POSTFIELDSIZE_LARGE,
static_cast<curl_off_t>(req.body->size()));
} else {
// POST with empty body — still valid (e.g. action endpoints).
curl_easy_setopt(easy_, CURLOPT_POSTFIELDS, "");
curl_easy_setopt(easy_, CURLOPT_POSTFIELDSIZE, 0L);
}
} else {
curl_easy_setopt(easy_, CURLOPT_CUSTOMREQUEST, req.method.c_str());
if (req.body) {
curl_easy_setopt(easy_, CURLOPT_POSTFIELDS, req.body->data());
curl_easy_setopt(easy_, CURLOPT_POSTFIELDSIZE_LARGE,
static_cast<curl_off_t>(req.body->size()));
}
}
for (const auto& [name, value] : req.headers) {
std::string line = name + ": " + value;
hdr_list = curl_slist_append(hdr_list, line.c_str());
}
// Suppress libcurl's auto-Expect:100-continue on POST — needless RTT.
hdr_list = curl_slist_append(hdr_list, "Expect:");
if (hdr_list != nullptr) {
curl_easy_setopt(easy_, CURLOPT_HTTPHEADER, hdr_list);
}
char errbuf[CURL_ERROR_SIZE]{};
curl_easy_setopt(easy_, CURLOPT_ERRORBUFFER, errbuf);
const CURLcode rc = curl_easy_perform(easy_);
if (rc != CURLE_OK) {
std::string msg = errbuf[0] != '\0' ? errbuf : curl_easy_strerror(rc);
throw TransportError(std::string{"libcurl: "} + msg);
}
long status = 0;
curl_easy_getinfo(easy_, CURLINFO_RESPONSE_CODE, &status);
resp.status = status;
}
return resp;
}
std::string join_url(std::string_view base, std::string_view path) {
while (!base.empty() && base.back() == '/') {
base.remove_suffix(1);
}
while (!path.empty() && path.front() == '/') {
path.remove_prefix(1);
}
std::string out;
out.reserve(base.size() + 1 + path.size());
out.append(base.begin(), base.end());
out.push_back('/');
out.append(path.begin(), path.end());
return out;
}
std::string url_encode_path(std::string_view in) {
static const char* hex = "0123456789ABCDEF";
std::string out;
out.reserve(in.size());
for (char ch : in) {
const auto c = static_cast<unsigned char>(ch);
const bool unreserved =
(c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||
(c >= '0' && c <= '9') || c == '-' || c == '_' || c == '.' || c == '~';
if (unreserved) {
out.push_back(static_cast<char>(c));
} else {
out.push_back('%');
out.push_back(hex[c >> 4U]);
out.push_back(hex[c & 0x0FU]);
}
}
return out;
}
std::string truncate_for_log(std::string_view s, std::size_t max) {
if (s.size() <= max) return std::string{s};
// Don't slice mid-codepoint (best effort).
std::size_t cut = max;
while (cut > 0 && (static_cast<unsigned char>(s[cut]) & 0xC0) == 0x80) {
--cut;
}
std::string out{s.substr(0, cut)};
out.append("...[truncated]");
return out;
}
} // namespace clawdforge::detail