HIGH: - H1: nlohmann::json::exception wrapped as ProtocolError at 5 sites in client.cpp via with_protocol_guard helper. Preserves the documented clawdforge::Error catch-all base contract; nlohmann types never leak into the message (e.what() only). - H2: libcurl MAXREDIRS=5, REDIR_PROTOCOLS_STR="http,https" (CURLOPT_REDIR_PROTOCOLS bitmask fallback for libcurl < 7.85.0), UNRESTRICTED_AUTH=0L. Defense-in-depth on top of libcurl's automatic bearer strip on cross-host redirects (>=7.64.0). MEDIUM: - M1: upload_file resolves the path via std::filesystem::canonical up front. Closes broken-symlink, symlink-loop, and TOCTOU-on-target classes without a doc burden on callers. - M2: README "Linking" section documents the public-ABI nlohmann_json implication. v0.2 wrapper deferred. - M3: README "Threat model" section documents the parse-depth concern on the result field of /run replies. Runtime guard skipped for v0.1 per audit recommendation (low yield, complexity). LOW: - L1: cxx_std_20 → cxx_std_17 in CMakeLists.txt (no C++20-only features in the library source; broader downstream reach). Examples and tests still build via designated initializers (g++ accepts these in C++17 mode). - L2: RunResult struct doc clarifies that missing ok/duration_ms decode to defaults — opt-out forward-compat. - L3: Client class doc clarifies that moved-from instances must not have any non-special-member methods invoked (UB), with explicit callout on base_url() returning an internal reference. Test-only: - cpp-httplib 0.15.3 → 0.20.1. Optional backends (OpenSSL / zlib / brotli / zstd) forced off to keep the dep graph minimal. Test-only, never on the consumer wire path. README "Test deps" section added for transparency. Tests added (12 → 23 cases, 70 → 106 assertions): - protocol_error on malformed response for healthz, run, upload_file, create_token, list_tokens (H1 regression) - redirect_clamp_test (H2 regression — TransportError after 5+ hops) - redirect_protocol_clamp (H2 regression — ftp:// Location rejected) - upload_file_canonicalize: symlink→file works, broken symlink rejected, symlink loop rejected, directory rejected (M1 regression) Verified: - cmake --build build clean (-Wall -Wextra -Wpedantic -Wshadow -Wconversion -Wsign-conversion -Wold-style-cast -Werror) - ctest --output-on-failure all green (Release) - ASan + UBSan: 23/23 cases, 106/106 assertions, zero diagnostics Audit: memory/clawdforge-audits/cpp-bae34a7.md
341 lines
12 KiB
C++
341 lines
12 KiB
C++
// SPDX-License-Identifier: MIT
|
|
|
|
#include "http.hpp"
|
|
|
|
#include <atomic>
|
|
#include <cctype>
|
|
#include <cstring>
|
|
#include <mutex>
|
|
#include <stdexcept>
|
|
#include <string>
|
|
|
|
#include <clawdforge/error.hpp>
|
|
|
|
namespace clawdforge::detail {
|
|
|
|
namespace {
|
|
|
|
// Reference-counted global init. libcurl is safe to call from multiple threads
|
|
// once `curl_global_init` has run; we just need to make sure it has.
|
|
std::atomic<int> g_global_refs{0};
|
|
std::mutex g_global_mu;
|
|
|
|
std::size_t write_body_cb(char* ptr, std::size_t size, std::size_t nmemb, void* userdata) {
|
|
auto* out = static_cast<std::string*>(userdata);
|
|
const std::size_t n = size * nmemb;
|
|
out->append(ptr, n);
|
|
return n;
|
|
}
|
|
|
|
std::size_t write_header_cb(char* ptr, std::size_t size, std::size_t nmemb, void* userdata) {
|
|
auto* hdrs = static_cast<HeaderMap*>(userdata);
|
|
const std::size_t n = size * nmemb;
|
|
std::string line(ptr, n);
|
|
|
|
// Strip trailing CRLF.
|
|
while (!line.empty() && (line.back() == '\n' || line.back() == '\r')) {
|
|
line.pop_back();
|
|
}
|
|
auto colon = line.find(':');
|
|
if (colon == std::string::npos) {
|
|
return n; // status line or blank — skip
|
|
}
|
|
|
|
std::string name = line.substr(0, colon);
|
|
std::string value = line.substr(colon + 1);
|
|
// Trim leading whitespace on the value.
|
|
std::size_t i = 0;
|
|
while (i < value.size() && (value[i] == ' ' || value[i] == '\t')) ++i;
|
|
value.erase(0, i);
|
|
|
|
// Lowercase the header name for case-insensitive lookup.
|
|
for (auto& c : name) c = static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
|
|
|
|
(*hdrs)[std::move(name)] = std::move(value);
|
|
return n;
|
|
}
|
|
|
|
} // namespace
|
|
|
|
CurlGlobalGuard::CurlGlobalGuard() {
|
|
std::lock_guard<std::mutex> lk(g_global_mu);
|
|
if (g_global_refs.fetch_add(1) == 0) {
|
|
const CURLcode rc = curl_global_init(CURL_GLOBAL_DEFAULT);
|
|
if (rc != CURLE_OK) {
|
|
g_global_refs.fetch_sub(1);
|
|
active_ = false;
|
|
throw TransportError(std::string{"curl_global_init failed: "} + curl_easy_strerror(rc));
|
|
}
|
|
}
|
|
active_ = true;
|
|
}
|
|
|
|
CurlGlobalGuard::~CurlGlobalGuard() {
|
|
if (!active_) return;
|
|
std::lock_guard<std::mutex> lk(g_global_mu);
|
|
if (g_global_refs.fetch_sub(1) == 1) {
|
|
curl_global_cleanup();
|
|
}
|
|
}
|
|
|
|
CurlGlobalGuard::CurlGlobalGuard(const CurlGlobalGuard& other) : active_(other.active_) {
|
|
if (active_) {
|
|
g_global_refs.fetch_add(1);
|
|
}
|
|
}
|
|
|
|
CurlGlobalGuard& CurlGlobalGuard::operator=(const CurlGlobalGuard& other) {
|
|
if (this == &other) return *this;
|
|
// Drop our existing ref, then take one from `other`.
|
|
if (active_) {
|
|
std::lock_guard<std::mutex> lk(g_global_mu);
|
|
if (g_global_refs.fetch_sub(1) == 1) {
|
|
curl_global_cleanup();
|
|
}
|
|
}
|
|
active_ = other.active_;
|
|
if (active_) {
|
|
g_global_refs.fetch_add(1);
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
CurlGlobalGuard::CurlGlobalGuard(CurlGlobalGuard&& other) noexcept : active_(other.active_) {
|
|
other.active_ = false;
|
|
}
|
|
|
|
CurlGlobalGuard& CurlGlobalGuard::operator=(CurlGlobalGuard&& other) noexcept {
|
|
if (this == &other) return *this;
|
|
if (active_) {
|
|
std::lock_guard<std::mutex> lk(g_global_mu);
|
|
if (g_global_refs.fetch_sub(1) == 1) {
|
|
curl_global_cleanup();
|
|
}
|
|
}
|
|
active_ = other.active_;
|
|
other.active_ = false;
|
|
return *this;
|
|
}
|
|
|
|
CurlSession::CurlSession(std::chrono::seconds timeout,
|
|
std::chrono::seconds connect_timeout,
|
|
std::string user_agent,
|
|
bool insecure_tls)
|
|
: timeout_(timeout),
|
|
connect_timeout_(connect_timeout),
|
|
user_agent_(std::move(user_agent)),
|
|
insecure_tls_(insecure_tls) {
|
|
easy_ = curl_easy_init();
|
|
if (easy_ == nullptr) {
|
|
throw TransportError("curl_easy_init returned null");
|
|
}
|
|
}
|
|
|
|
CurlSession::~CurlSession() {
|
|
if (easy_ != nullptr) {
|
|
curl_easy_cleanup(easy_);
|
|
}
|
|
}
|
|
|
|
CurlSession::CurlSession(CurlSession&& other) noexcept
|
|
: easy_(other.easy_),
|
|
timeout_(other.timeout_),
|
|
connect_timeout_(other.connect_timeout_),
|
|
user_agent_(std::move(other.user_agent_)),
|
|
insecure_tls_(other.insecure_tls_) {
|
|
other.easy_ = nullptr;
|
|
}
|
|
|
|
CurlSession& CurlSession::operator=(CurlSession&& other) noexcept {
|
|
if (this != &other) {
|
|
if (easy_ != nullptr) curl_easy_cleanup(easy_);
|
|
easy_ = other.easy_;
|
|
timeout_ = other.timeout_;
|
|
connect_timeout_ = other.connect_timeout_;
|
|
user_agent_ = std::move(other.user_agent_);
|
|
insecure_tls_ = other.insecure_tls_;
|
|
other.easy_ = nullptr;
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
Response CurlSession::perform(const Request& req) {
|
|
// libcurl recommends easy_reset between uses of an easy handle; cheaper
|
|
// than init + cleanup per request and preserves the connection cache.
|
|
curl_easy_reset(easy_);
|
|
|
|
Response resp;
|
|
|
|
curl_easy_setopt(easy_, CURLOPT_URL, req.url.c_str());
|
|
curl_easy_setopt(easy_, CURLOPT_FOLLOWLOCATION, 1L);
|
|
// Clamp redirects: bound the chain length, only allow http(s) on the
|
|
// redirect path (default also includes ftp/sftp/scp), and explicitly
|
|
// disable cross-host bearer leakage. libcurl >= 7.64.0 already strips
|
|
// header-mode auth on cross-host redirects; UNRESTRICTED_AUTH=0 is
|
|
// defense-in-depth.
|
|
curl_easy_setopt(easy_, CURLOPT_MAXREDIRS, 5L);
|
|
// CURLOPT_REDIR_PROTOCOLS_STR added in 7.85.0; older curl uses the
|
|
// bitmask form. Both spell the same allowlist: http + https only.
|
|
#if LIBCURL_VERSION_NUM >= 0x075500 /* 7.85.0 */
|
|
curl_easy_setopt(easy_, CURLOPT_REDIR_PROTOCOLS_STR, "http,https");
|
|
#else
|
|
curl_easy_setopt(easy_, CURLOPT_REDIR_PROTOCOLS,
|
|
static_cast<long>(CURLPROTO_HTTP | CURLPROTO_HTTPS));
|
|
#endif
|
|
curl_easy_setopt(easy_, CURLOPT_UNRESTRICTED_AUTH, 0L);
|
|
curl_easy_setopt(easy_, CURLOPT_NOSIGNAL, 1L); // be thread-friendly
|
|
curl_easy_setopt(easy_, CURLOPT_TIMEOUT, static_cast<long>(timeout_.count()));
|
|
curl_easy_setopt(easy_, CURLOPT_CONNECTTIMEOUT, static_cast<long>(connect_timeout_.count()));
|
|
curl_easy_setopt(easy_, CURLOPT_USERAGENT, user_agent_.c_str());
|
|
curl_easy_setopt(easy_, CURLOPT_WRITEFUNCTION, write_body_cb);
|
|
curl_easy_setopt(easy_, CURLOPT_WRITEDATA, &resp.body);
|
|
curl_easy_setopt(easy_, CURLOPT_HEADERFUNCTION, write_header_cb);
|
|
curl_easy_setopt(easy_, CURLOPT_HEADERDATA, &resp.headers);
|
|
|
|
if (insecure_tls_) {
|
|
curl_easy_setopt(easy_, CURLOPT_SSL_VERIFYPEER, 0L);
|
|
curl_easy_setopt(easy_, CURLOPT_SSL_VERIFYHOST, 0L);
|
|
}
|
|
|
|
// Method + body / mime ----------------------------------------------------
|
|
struct curl_slist* hdr_list = nullptr;
|
|
curl_mime* mime = nullptr;
|
|
// Idempotent RAII cleanup — runs exactly once whether we exit via return
|
|
// or exception, no matter the path through the body.
|
|
struct CurlReqCleanup {
|
|
struct curl_slist** hdrs;
|
|
curl_mime** mime;
|
|
~CurlReqCleanup() {
|
|
if (*hdrs != nullptr) {
|
|
curl_slist_free_all(*hdrs);
|
|
*hdrs = nullptr;
|
|
}
|
|
if (*mime != nullptr) {
|
|
curl_mime_free(*mime);
|
|
*mime = nullptr;
|
|
}
|
|
}
|
|
} cleanup{&hdr_list, &mime};
|
|
|
|
{
|
|
if (req.method == "GET") {
|
|
curl_easy_setopt(easy_, CURLOPT_HTTPGET, 1L);
|
|
} else if (req.method == "POST") {
|
|
curl_easy_setopt(easy_, CURLOPT_POST, 1L);
|
|
if (req.file) {
|
|
mime = curl_mime_init(easy_);
|
|
if (mime == nullptr) {
|
|
throw TransportError("curl_mime_init returned null");
|
|
}
|
|
{
|
|
curl_mimepart* part = curl_mime_addpart(mime);
|
|
curl_mime_name(part, req.file->field_name.c_str());
|
|
curl_mime_filename(part, req.file->filename.c_str());
|
|
if (!req.file->content_type.empty()) {
|
|
curl_mime_type(part, req.file->content_type.c_str());
|
|
}
|
|
const CURLcode mrc =
|
|
curl_mime_filedata(part, req.file->filesystem_path.c_str());
|
|
if (mrc != CURLE_OK) {
|
|
throw TransportError(std::string{"curl_mime_filedata: "} +
|
|
curl_easy_strerror(mrc));
|
|
}
|
|
}
|
|
for (const auto& [k, v] : req.form_fields) {
|
|
curl_mimepart* part = curl_mime_addpart(mime);
|
|
curl_mime_name(part, k.c_str());
|
|
curl_mime_data(part, v.c_str(), v.size());
|
|
}
|
|
curl_easy_setopt(easy_, CURLOPT_MIMEPOST, mime);
|
|
} else if (req.body) {
|
|
curl_easy_setopt(easy_, CURLOPT_POSTFIELDS, req.body->data());
|
|
curl_easy_setopt(easy_, CURLOPT_POSTFIELDSIZE_LARGE,
|
|
static_cast<curl_off_t>(req.body->size()));
|
|
} else {
|
|
// POST with empty body — still valid (e.g. action endpoints).
|
|
curl_easy_setopt(easy_, CURLOPT_POSTFIELDS, "");
|
|
curl_easy_setopt(easy_, CURLOPT_POSTFIELDSIZE, 0L);
|
|
}
|
|
} else {
|
|
curl_easy_setopt(easy_, CURLOPT_CUSTOMREQUEST, req.method.c_str());
|
|
if (req.body) {
|
|
curl_easy_setopt(easy_, CURLOPT_POSTFIELDS, req.body->data());
|
|
curl_easy_setopt(easy_, CURLOPT_POSTFIELDSIZE_LARGE,
|
|
static_cast<curl_off_t>(req.body->size()));
|
|
}
|
|
}
|
|
|
|
for (const auto& [name, value] : req.headers) {
|
|
std::string line = name + ": " + value;
|
|
hdr_list = curl_slist_append(hdr_list, line.c_str());
|
|
}
|
|
// Suppress libcurl's auto-Expect:100-continue on POST — needless RTT.
|
|
hdr_list = curl_slist_append(hdr_list, "Expect:");
|
|
if (hdr_list != nullptr) {
|
|
curl_easy_setopt(easy_, CURLOPT_HTTPHEADER, hdr_list);
|
|
}
|
|
|
|
char errbuf[CURL_ERROR_SIZE]{};
|
|
curl_easy_setopt(easy_, CURLOPT_ERRORBUFFER, errbuf);
|
|
|
|
const CURLcode rc = curl_easy_perform(easy_);
|
|
if (rc != CURLE_OK) {
|
|
std::string msg = errbuf[0] != '\0' ? errbuf : curl_easy_strerror(rc);
|
|
throw TransportError(std::string{"libcurl: "} + msg);
|
|
}
|
|
|
|
long status = 0;
|
|
curl_easy_getinfo(easy_, CURLINFO_RESPONSE_CODE, &status);
|
|
resp.status = status;
|
|
}
|
|
return resp;
|
|
}
|
|
|
|
std::string join_url(std::string_view base, std::string_view path) {
|
|
while (!base.empty() && base.back() == '/') {
|
|
base.remove_suffix(1);
|
|
}
|
|
while (!path.empty() && path.front() == '/') {
|
|
path.remove_prefix(1);
|
|
}
|
|
std::string out;
|
|
out.reserve(base.size() + 1 + path.size());
|
|
out.append(base.begin(), base.end());
|
|
out.push_back('/');
|
|
out.append(path.begin(), path.end());
|
|
return out;
|
|
}
|
|
|
|
std::string url_encode_path(std::string_view in) {
|
|
static const char* hex = "0123456789ABCDEF";
|
|
std::string out;
|
|
out.reserve(in.size());
|
|
for (char ch : in) {
|
|
const auto c = static_cast<unsigned char>(ch);
|
|
const bool unreserved =
|
|
(c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||
|
|
(c >= '0' && c <= '9') || c == '-' || c == '_' || c == '.' || c == '~';
|
|
if (unreserved) {
|
|
out.push_back(static_cast<char>(c));
|
|
} else {
|
|
out.push_back('%');
|
|
out.push_back(hex[c >> 4U]);
|
|
out.push_back(hex[c & 0x0FU]);
|
|
}
|
|
}
|
|
return out;
|
|
}
|
|
|
|
std::string truncate_for_log(std::string_view s, std::size_t max) {
|
|
if (s.size() <= max) return std::string{s};
|
|
// Don't slice mid-codepoint (best effort).
|
|
std::size_t cut = max;
|
|
while (cut > 0 && (static_cast<unsigned char>(s[cut]) & 0xC0) == 0x80) {
|
|
--cut;
|
|
}
|
|
std::string out{s.substr(0, cut)};
|
|
out.append("...[truncated]");
|
|
return out;
|
|
}
|
|
|
|
} // namespace clawdforge::detail
|