summaryrefslogtreecommitdiffstats
path: root/src/raptor_www_curl.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/raptor_www_curl.c')
-rw-r--r--src/raptor_www_curl.c388
1 files changed, 388 insertions, 0 deletions
diff --git a/src/raptor_www_curl.c b/src/raptor_www_curl.c
new file mode 100644
index 0000000..d689f2b
--- /dev/null
+++ b/src/raptor_www_curl.c
@@ -0,0 +1,388 @@
+/* -*- Mode: c; c-basic-offset: 2 -*-
+ *
+ * raptor_www_curl.c - Raptor WWW retrieval via libcurl
+ *
+ * Copyright (C) 2003-2008, David Beckett http://www.dajobe.org/
+ * Copyright (C) 2003-2004, University of Bristol, UK http://www.bristol.ac.uk/
+ *
+ * This package is Free Software and part of Redland http://librdf.org/
+ *
+ * It is licensed under the following three licenses as alternatives:
+ * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version
+ * 2. GNU General Public License (GPL) V2 or any newer version
+ * 3. Apache License, V2.0 or any newer version
+ *
+ * You may not use this file except in compliance with at least one of
+ * the above three licenses.
+ *
+ * See LICENSE.html or LICENSE.txt at the top of this package for the
+ * complete terms and further detail along with the license texts for
+ * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively.
+ *
+ *
+ */
+
+
+#ifdef HAVE_CONFIG_H
+#include <raptor_config.h>
+#endif
+
+#ifdef RAPTOR_WWW_LIBCURL
+
+#include <stdio.h>
+#include <string.h>
+#ifdef HAVE_STRINGS_H
+#include <strings.h>
+#endif
+#include <stdarg.h>
+
+/* Raptor includes */
+#include "raptor2.h"
+#include "raptor_internal.h"
+
+
+static void
+raptor_www_curl_update_status(raptor_www* www)
+{
+ char* final_uri;
+
+ if(www->failed)
+ return;
+
+ if(www->checked_status++)
+ return;
+
+ if(!www->final_uri) {
+ /* If not already found in headers by
+ * raptor_www_curl_header_callback() which overrides what libcurl
+ * found in HTTP status line (3xx)
+ */
+
+ if(curl_easy_getinfo(www->curl_handle, CURLINFO_EFFECTIVE_URL,
+ &final_uri) == CURLE_OK) {
+ www->final_uri = raptor_new_uri(www->world, (const unsigned char*)final_uri);
+ if(www->final_uri_handler)
+ www->final_uri_handler(www, www->final_uri_userdata, www->final_uri);
+ }
+ }
+
+}
+
+
+static size_t
+raptor_www_curl_write_callback(void *ptr, size_t size, size_t nmemb, void *userdata)
+{
+ raptor_www* www = (raptor_www*)userdata;
+ size_t bytes = size * nmemb;
+
+ /* If WWW has been aborted, return nothing so that
+ * libcurl will abort the transfer
+ */
+ if(www->failed)
+ return 0;
+
+ raptor_www_curl_update_status(www);
+
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 2
+ RAPTOR_DEBUG2("Got %d bytes\n", bytes);
+#endif
+
+ if(www->write_bytes)
+ www->write_bytes(www, www->write_bytes_userdata, ptr, size, nmemb);
+ www->total_bytes += bytes;
+ return bytes;
+}
+
+
+static size_t
+raptor_www_curl_header_callback(void* ptr, size_t size, size_t nmemb,
+ void *userdata)
+{
+ raptor_www* www = (raptor_www*)userdata;
+ size_t bytes = size * nmemb;
+ int c;
+
+ /* If WWW has been aborted, return nothing so that
+ * libcurl will abort the transfer
+ */
+ if(www->failed)
+ return 0;
+
+#define CONTENT_TYPE_LEN 14
+ if(!raptor_strncasecmp((char*)ptr, "Content-Type: ", CONTENT_TYPE_LEN)) {
+ size_t len = bytes - CONTENT_TYPE_LEN - 2; /* for \r\n */
+ char *type_buffer = RAPTOR_MALLOC(char*, len + 1);
+ memcpy(type_buffer, (char*)ptr + 14, len);
+ type_buffer[len]='\0';
+ if(www->type)
+ RAPTOR_FREE(char*, www->type);
+ www->type = type_buffer;
+ www->free_type = 1;
+
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 2
+ RAPTOR_DEBUG3("Got content type header '%s' (%d bytes)\n", type_buffer, len);
+#endif
+ if(www->content_type)
+ www->content_type(www, www->content_type_userdata, www->type);
+ }
+
+
+#define CONTENT_LOCATION_LEN 18
+ if(!raptor_strncasecmp((char*)ptr, "Content-Location: ",
+ CONTENT_LOCATION_LEN)) {
+ size_t uri_len = bytes - CONTENT_LOCATION_LEN - 2; /* for \r\n */
+ unsigned char* uri_str = (unsigned char*)ptr + CONTENT_LOCATION_LEN;
+
+ if(www->final_uri)
+ raptor_free_uri(www->final_uri);
+
+ /* Ensure it is NUL terminated */
+ c = uri_str[uri_len];
+ uri_str[uri_len] = '\0';
+ www->final_uri = raptor_new_uri_relative_to_base_counted(www->world,
+ www->uri,
+ uri_str, uri_len);
+ uri_str[uri_len] = RAPTOR_GOOD_CAST(unsigned char, c);
+
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 2
+ if(www->final_uri)
+ RAPTOR_DEBUG2("Got content location header '%s'\n",
+ raptor_uri_as_string(www->final_uri));
+#endif
+ if(www->final_uri_handler)
+ www->final_uri_handler(www, www->final_uri_userdata, www->final_uri);
+ }
+
+ return bytes;
+}
+
+
+/* Return non-0 on failure */
+int
+raptor_www_curl_init(raptor_www *www)
+{
+ CURLcode res;
+
+#define curl_init_setopt_or_fail(h, k, v) do { \
+ res = curl_easy_setopt(h, k, v); \
+ if(res != CURLE_OK) \
+ return 1; \
+ } while(0)
+
+ if(!www->curl_handle) {
+ www->curl_handle = curl_easy_init();
+ www->curl_init_here = 1;
+ }
+
+
+#ifndef CURLOPT_WRITEDATA
+#define CURLOPT_WRITEDATA CURLOPT_FILE
+#endif
+
+ /* send all data to this function */
+ curl_init_setopt_or_fail(www->curl_handle, CURLOPT_WRITEFUNCTION,
+ raptor_www_curl_write_callback);
+ /* ... using this data pointer */
+ curl_init_setopt_or_fail(www->curl_handle, CURLOPT_WRITEDATA, www);
+
+
+ /* send all headers to this function */
+ curl_init_setopt_or_fail(www->curl_handle, CURLOPT_HEADERFUNCTION,
+ raptor_www_curl_header_callback);
+ /* ... using this data pointer */
+ curl_init_setopt_or_fail(www->curl_handle, CURLOPT_WRITEHEADER, www);
+
+ /* Make it follow Location: headers */
+ curl_init_setopt_or_fail(www->curl_handle, CURLOPT_FOLLOWLOCATION, 1);
+
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 2
+ curl_init_setopt_or_fail(www->curl_handle, CURLOPT_VERBOSE, (void*)1);
+#endif
+
+ curl_init_setopt_or_fail(www->curl_handle, CURLOPT_ERRORBUFFER,
+ www->error_buffer);
+
+ /* Connection timeout in seconds */
+ curl_init_setopt_or_fail(www->curl_handle, CURLOPT_CONNECTTIMEOUT,
+ www->connection_timeout);
+ curl_init_setopt_or_fail(www->curl_handle, CURLOPT_NOSIGNAL, 1);
+
+ return 0;
+}
+
+
+void
+raptor_www_curl_free(raptor_www *www)
+{
+ /* only tidy up if we did all the work */
+ if(www->curl_init_here && www->curl_handle) {
+ curl_easy_cleanup(www->curl_handle);
+ www->curl_handle = NULL;
+ }
+}
+
+
+int
+raptor_www_curl_fetch(raptor_www *www)
+{
+ CURLcode res = CURLE_OK;
+ struct curl_slist *slist = NULL;
+
+ if(www->proxy) {
+ res = curl_easy_setopt(www->curl_handle, CURLOPT_PROXY, www->proxy);
+ if(res != CURLE_OK) {
+ www->failed = 1;
+ raptor_www_error(www, "Setting proxy to %s failed", www->proxy);
+ return 1;
+ }
+ }
+
+ if(www->user_agent) {
+ res = curl_easy_setopt(www->curl_handle, CURLOPT_USERAGENT, www->user_agent);
+ if(res != CURLE_OK) {
+ www->failed = 1;
+ raptor_www_error(www, "Setting user agent to %s failed", www->user_agent);
+ return 1;
+ }
+ }
+
+ if(www->http_accept)
+ slist = curl_slist_append(slist, (const char*)www->http_accept);
+
+ /* ALWAYS disable curl default "Pragma: no-cache" */
+ slist = curl_slist_append(slist, "Pragma:");
+ if(www->cache_control)
+ slist = curl_slist_append(slist, (const char*)www->cache_control);
+
+ if(slist) {
+ res = curl_easy_setopt(www->curl_handle, CURLOPT_HTTPHEADER, slist);
+ if(res != CURLE_OK) {
+ www->failed = 1;
+ raptor_www_error(www, "Setting request http headers failed");
+ return 1;
+ }
+ }
+
+ /* specify URL to get */
+ res = curl_easy_setopt(www->curl_handle, CURLOPT_URL,
+ raptor_uri_as_string(www->uri));
+ if(res != CURLE_OK) {
+ www->failed = 1;
+ raptor_www_error(www, "Setting request URL failed");
+ return 1;
+ }
+
+ if(curl_easy_perform(www->curl_handle)) {
+ /* failed */
+ www->failed = 1;
+ raptor_www_error(www, "Resolving URI failed: %s", www->error_buffer);
+ } else {
+ long lstatus;
+
+#ifndef CURLINFO_RESPONSE_CODE
+#define CURLINFO_RESPONSE_CODE CURLINFO_HTTP_CODE
+#endif
+
+ /* Requires pointer to a long */
+ if(curl_easy_getinfo(www->curl_handle, CURLINFO_RESPONSE_CODE, &lstatus) == CURLE_OK)
+ /* CURL status code will always fit in an int */
+ www->status_code = RAPTOR_GOOD_CAST(int, lstatus);
+
+ }
+
+ if(slist)
+ curl_slist_free_all(slist);
+
+ return www->failed;
+}
+
+
+int
+raptor_www_curl_set_ssl_cert_options(raptor_www* www,
+ const char* cert_filename,
+ const char* cert_type,
+ const char* cert_passphrase)
+{
+ CURLcode res;
+
+ /* client certificate file name */
+ if(cert_filename) {
+ res = curl_easy_setopt(www->curl_handle, CURLOPT_SSLCERT, cert_filename);
+ if(res != CURLE_OK) {
+ www->failed = 1;
+ raptor_www_error(www, "Setting request SSL cert filename to %s failed",
+ cert_filename);
+ return 1;
+ }
+ }
+
+ /* curl default is "PEM" */
+ if(cert_type) {
+ res = curl_easy_setopt(www->curl_handle, CURLOPT_SSLCERTTYPE, cert_type);
+ if(res != CURLE_OK) {
+ www->failed = 1;
+ raptor_www_error(www, "Setting request SSL cert type to %s failed",
+ cert_type);
+ return 1;
+ }
+ }
+
+ /* passphrase */
+ /* Removed in 7.16.4 */
+#if LIBCURL_VERSION_NUM < 0x071004
+#define CURLOPT_KEYPASSWD CURLOPT_SSLKEYPASSWD
+#endif
+ if(cert_passphrase) {
+ res = curl_easy_setopt(www->curl_handle, CURLOPT_KEYPASSWD, cert_passphrase);
+ if(res != CURLE_OK) {
+ www->failed = 1;
+ raptor_www_error(www, "Setting request SSL cert pass phrase failed");
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+
+int
+raptor_www_curl_set_ssl_verify_options(raptor_www* www, int verify_peer,
+ int verify_host)
+{
+ CURLcode res;
+
+ if(verify_peer)
+ verify_peer = 1;
+ res = curl_easy_setopt(www->curl_handle, CURLOPT_SSL_VERIFYPEER, verify_peer);
+ if(res != CURLE_OK) {
+ www->failed = 1;
+ raptor_www_error(www, "Setting request SSL verify peer flag %d failed",
+ verify_peer);
+ return 1;
+ }
+
+ /* curl 7.28.1 removed the value 1 from being legal:
+ * http://daniel.haxx.se/blog/2012/10/25/libcurl-claimed-to-be-dangerous/
+ *
+ * CURL GIT commit da82f59b697310229ccdf66104d5d65a44dfab98
+ * Sat Oct 27 12:31:39 2012 +0200
+ *
+ * Legal values are:
+ * 0 to disable host verifying
+ * 2 (default) to enable host verifyinging
+ */
+ if(verify_host)
+ verify_host = 2;
+ res = curl_easy_setopt(www->curl_handle, CURLOPT_SSL_VERIFYHOST, verify_host);
+ if(res != CURLE_OK) {
+ www->failed = 1;
+ raptor_www_error(www, "Setting request SSL verify host flag %d failed",
+ verify_host);
+ return 1;
+ }
+
+ return 0;
+}
+
+
+#endif /* RAPTOR_WWW_LIBCURL */