diff options
Diffstat (limited to 'src/raptor_www_curl.c')
-rw-r--r-- | src/raptor_www_curl.c | 388 |
1 files changed, 388 insertions, 0 deletions
diff --git a/src/raptor_www_curl.c b/src/raptor_www_curl.c new file mode 100644 index 0000000..d689f2b --- /dev/null +++ b/src/raptor_www_curl.c @@ -0,0 +1,388 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_www_curl.c - Raptor WWW retrieval via libcurl + * + * Copyright (C) 2003-2008, David Beckett http://www.dajobe.org/ + * Copyright (C) 2003-2004, University of Bristol, UK http://www.bristol.ac.uk/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + * + */ + + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#ifdef RAPTOR_WWW_LIBCURL + +#include <stdio.h> +#include <string.h> +#ifdef HAVE_STRINGS_H +#include <strings.h> +#endif +#include <stdarg.h> + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" + + +static void +raptor_www_curl_update_status(raptor_www* www) +{ + char* final_uri; + + if(www->failed) + return; + + if(www->checked_status++) + return; + + if(!www->final_uri) { + /* If not already found in headers by + * raptor_www_curl_header_callback() which overrides what libcurl + * found in HTTP status line (3xx) + */ + + if(curl_easy_getinfo(www->curl_handle, CURLINFO_EFFECTIVE_URL, + &final_uri) == CURLE_OK) { + www->final_uri = raptor_new_uri(www->world, (const unsigned char*)final_uri); + if(www->final_uri_handler) + www->final_uri_handler(www, www->final_uri_userdata, www->final_uri); + } + } + +} + + +static size_t +raptor_www_curl_write_callback(void *ptr, size_t size, size_t nmemb, void *userdata) +{ + raptor_www* www = (raptor_www*)userdata; + size_t bytes = size * nmemb; + + /* If WWW has been aborted, return nothing so that + * libcurl will abort the transfer + */ + if(www->failed) + return 0; + + raptor_www_curl_update_status(www); + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 2 + RAPTOR_DEBUG2("Got %d bytes\n", bytes); +#endif + + if(www->write_bytes) + www->write_bytes(www, www->write_bytes_userdata, ptr, size, nmemb); + www->total_bytes += bytes; + return bytes; +} + + +static size_t +raptor_www_curl_header_callback(void* ptr, size_t size, size_t nmemb, + void *userdata) +{ + raptor_www* www = (raptor_www*)userdata; + size_t bytes = size * nmemb; + int c; + + /* If WWW has been aborted, return nothing so that + * libcurl will abort the transfer + */ + if(www->failed) + return 0; + +#define CONTENT_TYPE_LEN 14 + if(!raptor_strncasecmp((char*)ptr, "Content-Type: ", CONTENT_TYPE_LEN)) { + size_t len = bytes - CONTENT_TYPE_LEN - 2; /* for \r\n */ + char *type_buffer = RAPTOR_MALLOC(char*, len + 1); + memcpy(type_buffer, (char*)ptr + 14, len); + type_buffer[len]='\0'; + if(www->type) + RAPTOR_FREE(char*, www->type); + www->type = type_buffer; + www->free_type = 1; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 2 + RAPTOR_DEBUG3("Got content type header '%s' (%d bytes)\n", type_buffer, len); +#endif + if(www->content_type) + www->content_type(www, www->content_type_userdata, www->type); + } + + +#define CONTENT_LOCATION_LEN 18 + if(!raptor_strncasecmp((char*)ptr, "Content-Location: ", + CONTENT_LOCATION_LEN)) { + size_t uri_len = bytes - CONTENT_LOCATION_LEN - 2; /* for \r\n */ + unsigned char* uri_str = (unsigned char*)ptr + CONTENT_LOCATION_LEN; + + if(www->final_uri) + raptor_free_uri(www->final_uri); + + /* Ensure it is NUL terminated */ + c = uri_str[uri_len]; + uri_str[uri_len] = '\0'; + www->final_uri = raptor_new_uri_relative_to_base_counted(www->world, + www->uri, + uri_str, uri_len); + uri_str[uri_len] = RAPTOR_GOOD_CAST(unsigned char, c); + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 2 + if(www->final_uri) + RAPTOR_DEBUG2("Got content location header '%s'\n", + raptor_uri_as_string(www->final_uri)); +#endif + if(www->final_uri_handler) + www->final_uri_handler(www, www->final_uri_userdata, www->final_uri); + } + + return bytes; +} + + +/* Return non-0 on failure */ +int +raptor_www_curl_init(raptor_www *www) +{ + CURLcode res; + +#define curl_init_setopt_or_fail(h, k, v) do { \ + res = curl_easy_setopt(h, k, v); \ + if(res != CURLE_OK) \ + return 1; \ + } while(0) + + if(!www->curl_handle) { + www->curl_handle = curl_easy_init(); + www->curl_init_here = 1; + } + + +#ifndef CURLOPT_WRITEDATA +#define CURLOPT_WRITEDATA CURLOPT_FILE +#endif + + /* send all data to this function */ + curl_init_setopt_or_fail(www->curl_handle, CURLOPT_WRITEFUNCTION, + raptor_www_curl_write_callback); + /* ... using this data pointer */ + curl_init_setopt_or_fail(www->curl_handle, CURLOPT_WRITEDATA, www); + + + /* send all headers to this function */ + curl_init_setopt_or_fail(www->curl_handle, CURLOPT_HEADERFUNCTION, + raptor_www_curl_header_callback); + /* ... using this data pointer */ + curl_init_setopt_or_fail(www->curl_handle, CURLOPT_WRITEHEADER, www); + + /* Make it follow Location: headers */ + curl_init_setopt_or_fail(www->curl_handle, CURLOPT_FOLLOWLOCATION, 1); + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 2 + curl_init_setopt_or_fail(www->curl_handle, CURLOPT_VERBOSE, (void*)1); +#endif + + curl_init_setopt_or_fail(www->curl_handle, CURLOPT_ERRORBUFFER, + www->error_buffer); + + /* Connection timeout in seconds */ + curl_init_setopt_or_fail(www->curl_handle, CURLOPT_CONNECTTIMEOUT, + www->connection_timeout); + curl_init_setopt_or_fail(www->curl_handle, CURLOPT_NOSIGNAL, 1); + + return 0; +} + + +void +raptor_www_curl_free(raptor_www *www) +{ + /* only tidy up if we did all the work */ + if(www->curl_init_here && www->curl_handle) { + curl_easy_cleanup(www->curl_handle); + www->curl_handle = NULL; + } +} + + +int +raptor_www_curl_fetch(raptor_www *www) +{ + CURLcode res = CURLE_OK; + struct curl_slist *slist = NULL; + + if(www->proxy) { + res = curl_easy_setopt(www->curl_handle, CURLOPT_PROXY, www->proxy); + if(res != CURLE_OK) { + www->failed = 1; + raptor_www_error(www, "Setting proxy to %s failed", www->proxy); + return 1; + } + } + + if(www->user_agent) { + res = curl_easy_setopt(www->curl_handle, CURLOPT_USERAGENT, www->user_agent); + if(res != CURLE_OK) { + www->failed = 1; + raptor_www_error(www, "Setting user agent to %s failed", www->user_agent); + return 1; + } + } + + if(www->http_accept) + slist = curl_slist_append(slist, (const char*)www->http_accept); + + /* ALWAYS disable curl default "Pragma: no-cache" */ + slist = curl_slist_append(slist, "Pragma:"); + if(www->cache_control) + slist = curl_slist_append(slist, (const char*)www->cache_control); + + if(slist) { + res = curl_easy_setopt(www->curl_handle, CURLOPT_HTTPHEADER, slist); + if(res != CURLE_OK) { + www->failed = 1; + raptor_www_error(www, "Setting request http headers failed"); + return 1; + } + } + + /* specify URL to get */ + res = curl_easy_setopt(www->curl_handle, CURLOPT_URL, + raptor_uri_as_string(www->uri)); + if(res != CURLE_OK) { + www->failed = 1; + raptor_www_error(www, "Setting request URL failed"); + return 1; + } + + if(curl_easy_perform(www->curl_handle)) { + /* failed */ + www->failed = 1; + raptor_www_error(www, "Resolving URI failed: %s", www->error_buffer); + } else { + long lstatus; + +#ifndef CURLINFO_RESPONSE_CODE +#define CURLINFO_RESPONSE_CODE CURLINFO_HTTP_CODE +#endif + + /* Requires pointer to a long */ + if(curl_easy_getinfo(www->curl_handle, CURLINFO_RESPONSE_CODE, &lstatus) == CURLE_OK) + /* CURL status code will always fit in an int */ + www->status_code = RAPTOR_GOOD_CAST(int, lstatus); + + } + + if(slist) + curl_slist_free_all(slist); + + return www->failed; +} + + +int +raptor_www_curl_set_ssl_cert_options(raptor_www* www, + const char* cert_filename, + const char* cert_type, + const char* cert_passphrase) +{ + CURLcode res; + + /* client certificate file name */ + if(cert_filename) { + res = curl_easy_setopt(www->curl_handle, CURLOPT_SSLCERT, cert_filename); + if(res != CURLE_OK) { + www->failed = 1; + raptor_www_error(www, "Setting request SSL cert filename to %s failed", + cert_filename); + return 1; + } + } + + /* curl default is "PEM" */ + if(cert_type) { + res = curl_easy_setopt(www->curl_handle, CURLOPT_SSLCERTTYPE, cert_type); + if(res != CURLE_OK) { + www->failed = 1; + raptor_www_error(www, "Setting request SSL cert type to %s failed", + cert_type); + return 1; + } + } + + /* passphrase */ + /* Removed in 7.16.4 */ +#if LIBCURL_VERSION_NUM < 0x071004 +#define CURLOPT_KEYPASSWD CURLOPT_SSLKEYPASSWD +#endif + if(cert_passphrase) { + res = curl_easy_setopt(www->curl_handle, CURLOPT_KEYPASSWD, cert_passphrase); + if(res != CURLE_OK) { + www->failed = 1; + raptor_www_error(www, "Setting request SSL cert pass phrase failed"); + return 1; + } + } + + return 0; +} + + +int +raptor_www_curl_set_ssl_verify_options(raptor_www* www, int verify_peer, + int verify_host) +{ + CURLcode res; + + if(verify_peer) + verify_peer = 1; + res = curl_easy_setopt(www->curl_handle, CURLOPT_SSL_VERIFYPEER, verify_peer); + if(res != CURLE_OK) { + www->failed = 1; + raptor_www_error(www, "Setting request SSL verify peer flag %d failed", + verify_peer); + return 1; + } + + /* curl 7.28.1 removed the value 1 from being legal: + * http://daniel.haxx.se/blog/2012/10/25/libcurl-claimed-to-be-dangerous/ + * + * CURL GIT commit da82f59b697310229ccdf66104d5d65a44dfab98 + * Sat Oct 27 12:31:39 2012 +0200 + * + * Legal values are: + * 0 to disable host verifying + * 2 (default) to enable host verifyinging + */ + if(verify_host) + verify_host = 2; + res = curl_easy_setopt(www->curl_handle, CURLOPT_SSL_VERIFYHOST, verify_host); + if(res != CURLE_OK) { + www->failed = 1; + raptor_www_error(www, "Setting request SSL verify host flag %d failed", + verify_host); + return 1; + } + + return 0; +} + + +#endif /* RAPTOR_WWW_LIBCURL */ |