summaryrefslogtreecommitdiff
path: root/platform/default/src/mbgl/storage/http_file_source.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'platform/default/src/mbgl/storage/http_file_source.cpp')
-rw-r--r--platform/default/src/mbgl/storage/http_file_source.cpp495
1 files changed, 495 insertions, 0 deletions
diff --git a/platform/default/src/mbgl/storage/http_file_source.cpp b/platform/default/src/mbgl/storage/http_file_source.cpp
new file mode 100644
index 0000000000..213b53de98
--- /dev/null
+++ b/platform/default/src/mbgl/storage/http_file_source.cpp
@@ -0,0 +1,495 @@
+#include <mbgl/storage/http_file_source.hpp>
+#include <mbgl/storage/resource.hpp>
+#include <mbgl/storage/response.hpp>
+#include <mbgl/util/logging.hpp>
+
+#include <mbgl/util/util.hpp>
+#include <mbgl/util/optional.hpp>
+#include <mbgl/util/run_loop.hpp>
+#include <mbgl/util/string.hpp>
+#include <mbgl/util/timer.hpp>
+#include <mbgl/util/chrono.hpp>
+#include <mbgl/util/http_header.hpp>
+
+#include <curl/curl.h>
+
+// Dynamically load all cURL functions. Debian-derived systems upgraded the OpenSSL version linked
+// to in https://salsa.debian.org/debian/curl/commit/95c94957bb7e89e36e78b995fed468c42f64d18d
+// They state:
+// Rename libcurl3 to libcurl4, because libcurl exposes an SSL_CTX via
+// CURLOPT_SSL_CTX_FUNCTION, and this object changes incompatibly between
+// openssl 1.0 and openssl 1.1.
+// Since we are not accessing the underlying OpenSSL context, we don't care whether we're linking
+// against libcurl3 or libcurl4; both use the ABI version 4 which hasn't changed since 2006
+// (see https://curl.haxx.se/libcurl/abi.html). In fact, cURL's ABI compatibility is very good as
+// shown on https://abi-laboratory.pro/tracker/timeline/curl/
+// Therefore, we're dynamically loading the cURL symbols we need to avoid linking against versioned
+// symbols.
+#include <dlfcn.h>
+
+namespace curl {
+
+#define CURL_FUNCTIONS \
+ X(global_init) \
+ X(getdate) \
+ X(easy_strerror) \
+ X(easy_init) \
+ X(easy_setopt) \
+ X(easy_cleanup) \
+ X(easy_getinfo) \
+ X(easy_reset) \
+ X(multi_init) \
+ X(multi_add_handle) \
+ X(multi_remove_handle) \
+ X(multi_cleanup) \
+ X(multi_info_read) \
+ X(multi_strerror) \
+ X(multi_socket_action) \
+ X(multi_setopt) \
+ X(share_init) \
+ X(share_cleanup) \
+ X(slist_append) \
+ X(slist_free_all)
+
+#define X(name) static decltype(&curl_ ## name) name = nullptr;
+CURL_FUNCTIONS
+#undef X
+
+static void* handle = nullptr;
+
+static void* load(const char* name) {
+ void* symbol = dlsym(handle, name);
+ if (const char* error = dlerror()) {
+ fprintf(stderr, "Cannot load symbol '%s': %s\n", name, error);
+ dlclose(handle);
+ handle = nullptr;
+ abort();
+ }
+ return symbol;
+}
+
+__attribute__((constructor))
+static void load() {
+ assert(!handle);
+ handle = dlopen("libcurl.so.4", RTLD_LAZY | RTLD_LOCAL);
+ if (!handle) {
+ fprintf(stderr, "Could not open shared library '%s'\n", "libcurl.so.4");
+ abort();
+ }
+
+ #define X(name) name = (decltype(&curl_ ## name))load("curl_" #name);
+ CURL_FUNCTIONS
+ #undef X
+}
+
+__attribute__((constructor))
+static void unload() {
+ if (handle) {
+ dlclose(handle);
+ }
+}
+
+} // namespace curl
+
+
+#include <queue>
+#include <map>
+#include <cassert>
+#include <cstring>
+#include <cstdio>
+
+static void handleError(CURLMcode code) {
+ if (code != CURLM_OK) {
+ throw std::runtime_error(std::string("CURL multi error: ") + curl::multi_strerror(code));
+ }
+}
+
+static void handleError(CURLcode code) {
+ if (code != CURLE_OK) {
+ throw std::runtime_error(std::string("CURL easy error: ") + curl::easy_strerror(code));
+ }
+}
+
+namespace mbgl {
+
+class HTTPFileSource::Impl {
+public:
+ Impl();
+ ~Impl();
+
+ static int handleSocket(CURL *handle, curl_socket_t s, int action, void *userp, void *socketp);
+ static int startTimeout(CURLM *multi, long timeout_ms, void *userp);
+ static void onTimeout(HTTPFileSource::Impl *context);
+
+ void perform(curl_socket_t s, util::RunLoop::Event event);
+ CURL *getHandle();
+ void returnHandle(CURL *handle);
+ void checkMultiInfo();
+
+ // Used as the CURL timer function to periodically check for socket updates.
+ util::Timer timeout;
+
+ // CURL multi handle that we use to request multiple URLs at the same time, without having to
+ // block and spawn threads.
+ CURLM *multi = nullptr;
+
+ // CURL share handles are used for sharing session state (e.g.)
+ CURLSH *share = nullptr;
+
+ // A queue that we use for storing resuable CURL easy handles to avoid creating and destroying
+ // them all the time.
+ std::queue<CURL *> handles;
+};
+
+class HTTPRequest : public AsyncRequest {
+public:
+ HTTPRequest(HTTPFileSource::Impl*, Resource, FileSource::Callback);
+ ~HTTPRequest() override;
+
+ void handleResult(CURLcode code);
+
+private:
+ static size_t headerCallback(char *const buffer, const size_t size, const size_t nmemb, void *userp);
+ static size_t writeCallback(void *const contents, const size_t size, const size_t nmemb, void *userp);
+
+ HTTPFileSource::Impl* context = nullptr;
+ Resource resource;
+ FileSource::Callback callback;
+
+ // Will store the current response.
+ std::shared_ptr<std::string> data;
+ std::unique_ptr<Response> response;
+
+ optional<std::string> retryAfter;
+ optional<std::string> xRateLimitReset;
+
+ CURL *handle = nullptr;
+ curl_slist *headers = nullptr;
+
+ char error[CURL_ERROR_SIZE] = { 0 };
+};
+
+HTTPFileSource::Impl::Impl() {
+ if (curl::global_init(CURL_GLOBAL_ALL)) {
+ throw std::runtime_error("Could not init cURL");
+ }
+
+ share = curl::share_init();
+
+ multi = curl::multi_init();
+ handleError(curl::multi_setopt(multi, CURLMOPT_SOCKETFUNCTION, handleSocket));
+ handleError(curl::multi_setopt(multi, CURLMOPT_SOCKETDATA, this));
+ handleError(curl::multi_setopt(multi, CURLMOPT_TIMERFUNCTION, startTimeout));
+ handleError(curl::multi_setopt(multi, CURLMOPT_TIMERDATA, this));
+}
+
+HTTPFileSource::Impl::~Impl() {
+ while (!handles.empty()) {
+ curl::easy_cleanup(handles.front());
+ handles.pop();
+ }
+
+ curl::multi_cleanup(multi);
+ multi = nullptr;
+
+ curl::share_cleanup(share);
+ share = nullptr;
+
+ timeout.stop();
+}
+
+CURL *HTTPFileSource::Impl::getHandle() {
+ if (!handles.empty()) {
+ auto handle = handles.front();
+ handles.pop();
+ return handle;
+ } else {
+ return curl::easy_init();
+ }
+}
+
+void HTTPFileSource::Impl::returnHandle(CURL *handle) {
+ curl::easy_reset(handle);
+ handles.push(handle);
+}
+
+void HTTPFileSource::Impl::checkMultiInfo() {
+ CURLMsg *message = nullptr;
+ int pending = 0;
+
+ while ((message = curl::multi_info_read(multi, &pending))) {
+ switch (message->msg) {
+ case CURLMSG_DONE: {
+ HTTPRequest *baton = nullptr;
+ curl::easy_getinfo(message->easy_handle, CURLINFO_PRIVATE, (char *)&baton);
+ assert(baton);
+ baton->handleResult(message->data.result);
+ } break;
+
+ default:
+ // This should never happen, because there are no other message types.
+ throw std::runtime_error("CURLMsg returned unknown message type");
+ }
+ }
+}
+
+void HTTPFileSource::Impl::perform(curl_socket_t s, util::RunLoop::Event events) {
+ int flags = 0;
+
+ if (events == util::RunLoop::Event::Read) {
+ flags |= CURL_CSELECT_IN;
+ }
+ if (events == util::RunLoop::Event::Write) {
+ flags |= CURL_CSELECT_OUT;
+ }
+
+
+ int running_handles = 0;
+ curl::multi_socket_action(multi, s, flags, &running_handles);
+ checkMultiInfo();
+}
+
+int HTTPFileSource::Impl::handleSocket(CURL * /* handle */, curl_socket_t s, int action, void *userp,
+ void * /* socketp */) {
+ assert(userp);
+ auto context = reinterpret_cast<Impl *>(userp);
+
+ switch (action) {
+ case CURL_POLL_IN: {
+ using namespace std::placeholders;
+ util::RunLoop::Get()->addWatch(s, util::RunLoop::Event::Read,
+ std::bind(&Impl::perform, context, _1, _2));
+ break;
+ }
+ case CURL_POLL_OUT: {
+ using namespace std::placeholders;
+ util::RunLoop::Get()->addWatch(s, util::RunLoop::Event::Write,
+ std::bind(&Impl::perform, context, _1, _2));
+ break;
+ }
+ case CURL_POLL_REMOVE:
+ util::RunLoop::Get()->removeWatch(s);
+ break;
+ default:
+ throw std::runtime_error("Unhandled CURL socket action");
+ }
+
+ return 0;
+}
+
+void HTTPFileSource::Impl::onTimeout(Impl *context) {
+ int running_handles;
+ CURLMcode error = curl::multi_socket_action(context->multi, CURL_SOCKET_TIMEOUT, 0, &running_handles);
+ if (error != CURLM_OK) {
+ throw std::runtime_error(std::string("CURL multi error: ") + curl::multi_strerror(error));
+ }
+ context->checkMultiInfo();
+}
+
+int HTTPFileSource::Impl::startTimeout(CURLM * /* multi */, long timeout_ms, void *userp) {
+ assert(userp);
+ auto context = reinterpret_cast<Impl *>(userp);
+
+ if (timeout_ms < 0) {
+ // A timeout of 0 ms means that the timer will invoked in the next loop iteration.
+ timeout_ms = 0;
+ }
+
+ context->timeout.stop();
+ context->timeout.start(mbgl::Milliseconds(timeout_ms), Duration::zero(),
+ std::bind(&Impl::onTimeout, context));
+
+ return 0;
+}
+
+HTTPRequest::HTTPRequest(HTTPFileSource::Impl* context_, Resource resource_, FileSource::Callback callback_)
+ : context(context_),
+ resource(std::move(resource_)),
+ callback(std::move(callback_)),
+ handle(context->getHandle()) {
+
+ // If there's already a response, set the correct etags/modified headers to make sure we are
+ // getting a 304 response if possible. This avoids redownloading unchanged data.
+ if (resource.priorEtag) {
+ const std::string header = std::string("If-None-Match: ") + *resource.priorEtag;
+ headers = curl::slist_append(headers, header.c_str());
+ } else if (resource.priorModified) {
+ const std::string time =
+ std::string("If-Modified-Since: ") + util::rfc1123(*resource.priorModified);
+ headers = curl::slist_append(headers, time.c_str());
+ }
+
+ if (headers) {
+ curl::easy_setopt(handle, CURLOPT_HTTPHEADER, headers);
+ }
+
+ handleError(curl::easy_setopt(handle, CURLOPT_PRIVATE, this));
+ handleError(curl::easy_setopt(handle, CURLOPT_ERRORBUFFER, error));
+ handleError(curl::easy_setopt(handle, CURLOPT_CAINFO, "ca-bundle.crt"));
+ handleError(curl::easy_setopt(handle, CURLOPT_FOLLOWLOCATION, 1));
+ handleError(curl::easy_setopt(handle, CURLOPT_URL, resource.url.c_str()));
+ handleError(curl::easy_setopt(handle, CURLOPT_WRITEFUNCTION, writeCallback));
+ handleError(curl::easy_setopt(handle, CURLOPT_WRITEDATA, this));
+ handleError(curl::easy_setopt(handle, CURLOPT_HEADERFUNCTION, headerCallback));
+ handleError(curl::easy_setopt(handle, CURLOPT_HEADERDATA, this));
+#if LIBCURL_VERSION_NUM >= ((7) << 16 | (21) << 8 | 6) // Renamed in 7.21.6
+ handleError(curl::easy_setopt(handle, CURLOPT_ACCEPT_ENCODING, "gzip, deflate"));
+#else
+ handleError(curl::easy_setopt(handle, CURLOPT_ENCODING, "gzip, deflate"));
+#endif
+ handleError(curl::easy_setopt(handle, CURLOPT_USERAGENT, "MapboxGL/1.0"));
+ handleError(curl::easy_setopt(handle, CURLOPT_SHARE, context->share));
+
+ // Start requesting the information.
+ handleError(curl::multi_add_handle(context->multi, handle));
+}
+
+HTTPRequest::~HTTPRequest() {
+ handleError(curl::multi_remove_handle(context->multi, handle));
+ context->returnHandle(handle);
+ handle = nullptr;
+
+ if (headers) {
+ curl::slist_free_all(headers);
+ headers = nullptr;
+ }
+}
+
+// This function is called when we have new data for a request. We just append it to the string
+// containing the previous data.
+size_t HTTPRequest::writeCallback(void *const contents, const size_t size, const size_t nmemb, void *userp) {
+ assert(userp);
+ auto impl = reinterpret_cast<HTTPRequest *>(userp);
+
+ if (!impl->data) {
+ impl->data = std::make_shared<std::string>();
+ }
+
+ impl->data->append((char *)contents, size * nmemb);
+ return size * nmemb;
+}
+
+// Compares the beginning of the (non-zero-terminated!) data buffer with the (zero-terminated!)
+// header string. If the data buffer contains the header string at the beginning, it returns
+// the length of the header string == begin of the value, otherwise it returns npos.
+// The comparison of the header is ASCII-case-insensitive.
+size_t headerMatches(const char *const header, const char *const buffer, const size_t length) {
+ const size_t headerLength = strlen(header);
+ if (length < headerLength) {
+ return std::string::npos;
+ }
+ size_t i = 0;
+ while (i < length && i < headerLength && std::tolower(buffer[i]) == std::tolower(header[i])) {
+ i++;
+ }
+ return i == headerLength ? i : std::string::npos;
+}
+
+size_t HTTPRequest::headerCallback(char *const buffer, const size_t size, const size_t nmemb, void *userp) {
+ assert(userp);
+ auto baton = reinterpret_cast<HTTPRequest *>(userp);
+
+ if (!baton->response) {
+ baton->response = std::make_unique<Response>();
+ }
+
+ const size_t length = size * nmemb;
+ size_t begin = std::string::npos;
+ if ((begin = headerMatches("last-modified: ", buffer, length)) != std::string::npos) {
+ // Always overwrite the modification date; We might already have a value here from the
+ // Date header, but this one is more accurate.
+ const std::string value { buffer + begin, length - begin - 2 }; // remove \r\n
+ baton->response->modified = Timestamp{ Seconds(curl::getdate(value.c_str(), nullptr)) };
+ } else if ((begin = headerMatches("etag: ", buffer, length)) != std::string::npos) {
+ baton->response->etag = std::string(buffer + begin, length - begin - 2); // remove \r\n
+ } else if ((begin = headerMatches("cache-control: ", buffer, length)) != std::string::npos) {
+ const std::string value { buffer + begin, length - begin - 2 }; // remove \r\n
+ const auto cc = http::CacheControl::parse(value.c_str());
+ baton->response->expires = cc.toTimePoint();
+ baton->response->mustRevalidate = cc.mustRevalidate;
+ } else if ((begin = headerMatches("expires: ", buffer, length)) != std::string::npos) {
+ const std::string value { buffer + begin, length - begin - 2 }; // remove \r\n
+ baton->response->expires = Timestamp{ Seconds(curl::getdate(value.c_str(), nullptr)) };
+ } else if ((begin = headerMatches("retry-after: ", buffer, length)) != std::string::npos) {
+ baton->retryAfter = std::string(buffer + begin, length - begin - 2); // remove \r\n
+ } else if ((begin = headerMatches("x-rate-limit-reset: ", buffer, length)) != std::string::npos) {
+ baton->xRateLimitReset = std::string(buffer + begin, length - begin - 2); // remove \r\n
+ }
+
+ return length;
+}
+
+void HTTPRequest::handleResult(CURLcode code) {
+ // Make sure a response object exists in case we haven't got any headers or content.
+ if (!response) {
+ response = std::make_unique<Response>();
+ }
+
+ using Error = Response::Error;
+
+ // Add human-readable error code
+ if (code != CURLE_OK) {
+ switch (code) {
+ case CURLE_COULDNT_RESOLVE_PROXY:
+ case CURLE_COULDNT_RESOLVE_HOST:
+ case CURLE_COULDNT_CONNECT:
+ case CURLE_OPERATION_TIMEDOUT:
+
+ response->error = std::make_unique<Error>(
+ Error::Reason::Connection, std::string{ curl::easy_strerror(code) } + ": " + error);
+ break;
+
+ default:
+ response->error = std::make_unique<Error>(
+ Error::Reason::Other, std::string{ curl::easy_strerror(code) } + ": " + error);
+ break;
+ }
+ } else {
+ long responseCode = 0;
+ curl::easy_getinfo(handle, CURLINFO_RESPONSE_CODE, &responseCode);
+
+ if (responseCode == 200) {
+ if (data) {
+ response->data = std::move(data);
+ } else {
+ response->data = std::make_shared<std::string>();
+ }
+ } else if (responseCode == 204 || (responseCode == 404 && resource.kind == Resource::Kind::Tile)) {
+ response->noContent = true;
+ } else if (responseCode == 304) {
+ response->notModified = true;
+ } else if (responseCode == 404) {
+ response->error =
+ std::make_unique<Error>(Error::Reason::NotFound, "HTTP status code 404");
+ } else if (responseCode == 429) {
+ response->error =
+ std::make_unique<Error>(Error::Reason::RateLimit, "HTTP status code 429",
+ http::parseRetryHeaders(retryAfter, xRateLimitReset));
+ } else if (responseCode >= 500 && responseCode < 600) {
+ response->error =
+ std::make_unique<Error>(Error::Reason::Server, std::string{ "HTTP status code " } +
+ util::toString(responseCode));
+ } else {
+ response->error =
+ std::make_unique<Error>(Error::Reason::Other, std::string{ "HTTP status code " } +
+ util::toString(responseCode));
+ }
+ }
+
+ // Calling `callback` may result in deleting `this`. Copy data to temporaries first.
+ auto callback_ = callback;
+ auto response_ = *response;
+ callback_(response_);
+}
+
+HTTPFileSource::HTTPFileSource()
+ : impl(std::make_unique<Impl>()) {
+}
+
+HTTPFileSource::~HTTPFileSource() = default;
+
+std::unique_ptr<AsyncRequest> HTTPFileSource::request(const Resource& resource, Callback callback) {
+ return std::make_unique<HTTPRequest>(impl.get(), resource, callback);
+}
+
+} // namespace mbgl