summaryrefslogtreecommitdiffstats
path: root/src/raptor_www.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-15 05:40:05 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-15 05:40:05 +0000
commit4038ab95a094b363f1748f3dcb51511a1217475d (patch)
tree7f393d66a783f91ddd263c78d681e485cf4f45ca /src/raptor_www.c
parentInitial commit. (diff)
downloadraptor2-4038ab95a094b363f1748f3dcb51511a1217475d.tar.xz
raptor2-4038ab95a094b363f1748f3dcb51511a1217475d.zip
Adding upstream version 2.0.16.upstream/2.0.16upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/raptor_www.c')
-rw-r--r--src/raptor_www.c896
1 files changed, 896 insertions, 0 deletions
diff --git a/src/raptor_www.c b/src/raptor_www.c
new file mode 100644
index 0000000..26c2fa2
--- /dev/null
+++ b/src/raptor_www.c
@@ -0,0 +1,896 @@
+/* -*- Mode: c; c-basic-offset: 2 -*-
+ *
+ * raptor_www.c - Raptor WWW retrieval core
+ *
+ * Copyright (C) 2003-2008, David Beckett http://www.dajobe.org/
+ * Copyright (C) 2003-2005, University of Bristol, UK http://www.bristol.ac.uk/
+ *
+ * This package is Free Software and part of Redland http://librdf.org/
+ *
+ * It is licensed under the following three licenses as alternatives:
+ * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version
+ * 2. GNU General Public License (GPL) V2 or any newer version
+ * 3. Apache License, V2.0 or any newer version
+ *
+ * You may not use this file except in compliance with at least one of
+ * the above three licenses.
+ *
+ * See LICENSE.html or LICENSE.txt at the top of this package for the
+ * complete terms and further detail along with the license texts for
+ * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively.
+ *
+ *
+ */
+
+
+#ifdef HAVE_CONFIG_H
+#include <raptor_config.h>
+#endif
+
+#include <stdio.h>
+#include <string.h>
+#include <stdarg.h>
+#ifdef HAVE_ERRNO_H
+#include <errno.h>
+#endif
+#ifdef HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
+
+/* Raptor includes */
+#include "raptor2.h"
+#include "raptor_internal.h"
+
+
+static int raptor_www_file_fetch(raptor_www* www);
+
+
+
+/*
+ * raptor_www_init:
+ * @world: raptor_world object
+ *
+ * INTERNAL - Initialise the WWW class.
+ *
+ * Must be called before creating any #raptor_www object.
+ *
+ * Return value: non-0 on failure
+ **/
+int
+raptor_www_init(raptor_world* world)
+{
+ int rc = 0;
+
+ if(world->www_initialized)
+ return 0;
+
+ if(!world->www_skip_www_init_finish) {
+#ifdef RAPTOR_WWW_LIBCURL
+ rc = curl_global_init(CURL_GLOBAL_ALL);
+#endif
+ }
+
+ world->www_initialized = 1;
+ return rc;
+}
+
+
+/*
+ * raptor_www_finish:
+ * @world: raptor_world object
+ *
+ * INTERNAL - Terminate the WWW class.
+ *
+ * Must be called to clean any resources used by the WWW implementation.
+ *
+ **/
+void
+raptor_www_finish(raptor_world* world)
+{
+ if(!world->www_skip_www_init_finish) {
+#ifdef RAPTOR_WWW_LIBCURL
+ curl_global_cleanup();
+#endif
+ }
+}
+
+
+/**
+ * raptor_new_www_with_connection:
+ * @world: raptor_world object
+ * @connection: external WWW connection object.
+ *
+ * Constructor - create a new #raptor_www object over an existing WWW connection.
+ *
+ * At present this only works with a libcurl CURL handle object
+ * when raptor is compiled with libcurl suppport. Otherwise the
+ * @connection is ignored. This allows such things as setting
+ * up special flags on the curl handle before passing into the constructor.
+ *
+ * Return value: a new #raptor_www object or NULL on failure.
+ **/
+raptor_www*
+raptor_new_www_with_connection(raptor_world* world, void *connection)
+{
+ raptor_www* www;
+
+ RAPTOR_CHECK_CONSTRUCTOR_WORLD(world);
+
+ raptor_world_open(world);
+
+ www = RAPTOR_CALLOC(raptor_www*, 1, sizeof(*www));
+ if(!www)
+ return NULL;
+
+ www->world = world;
+ www->type = NULL;
+ www->free_type = 1; /* default is to free content type */
+ www->total_bytes = 0;
+ www->failed = 0;
+ www->status_code = 0;
+ www->write_bytes = NULL;
+ www->content_type = NULL;
+ www->uri_filter = NULL;
+ www->connection_timeout = 10;
+ www->cache_control = NULL;
+
+#ifdef RAPTOR_WWW_LIBCURL
+ www->curl_handle = (CURL*)connection;
+ if(raptor_www_curl_init(www)) {
+ raptor_free_www(www);
+ www = NULL;
+ }
+#endif
+#ifdef RAPTOR_WWW_LIBXML
+ raptor_www_libxml_init(www);
+#endif
+#ifdef RAPTOR_WWW_LIBFETCH
+ raptor_www_libfetch_init(www);
+#endif
+
+ return www;
+}
+
+
+/**
+ * raptor_new_www:
+ * @world: raptor_world object
+ *
+ * Constructor - create a new #raptor_www object.
+ *
+ * Return value: a new #raptor_www or NULL on failure.
+ **/
+raptor_www*
+raptor_new_www(raptor_world* world)
+{
+ RAPTOR_CHECK_CONSTRUCTOR_WORLD(world);
+
+ raptor_world_open(world);
+
+ return raptor_new_www_with_connection(world, NULL);
+}
+
+
+/**
+ * raptor_free_www:
+ * @www: WWW object.
+ *
+ * Destructor - destroy a #raptor_www object.
+ **/
+void
+raptor_free_www(raptor_www* www)
+{
+ /* free context */
+ if(www->type) {
+ if(www->free_type)
+ RAPTOR_FREE(char*, www->type);
+ www->type = NULL;
+ }
+
+ if(www->user_agent) {
+ RAPTOR_FREE(char*, www->user_agent);
+ www->user_agent = NULL;
+ }
+
+ if(www->cache_control) {
+ RAPTOR_FREE(char*, www->cache_control);
+ www->cache_control = NULL;
+ }
+
+ if(www->proxy) {
+ RAPTOR_FREE(char*, www->proxy);
+ www->proxy = NULL;
+ }
+
+ if(www->http_accept) {
+ RAPTOR_FREE(char*, www->http_accept);
+ www->http_accept = NULL;
+ }
+
+#ifdef RAPTOR_WWW_LIBCURL
+ raptor_www_curl_free(www);
+#endif
+#ifdef RAPTOR_WWW_LIBXML
+ raptor_www_libxml_free(www);
+#endif
+#ifdef RAPTOR_WWW_LIBFETCH
+ raptor_www_libfetch_free(www);
+#endif
+
+ if(www->uri)
+ raptor_free_uri(www->uri);
+
+ if(www->final_uri)
+ raptor_free_uri(www->final_uri);
+
+ RAPTOR_FREE(www, www);
+}
+
+
+
+/**
+ * raptor_www_set_write_bytes_handler:
+ * @www: WWW object
+ * @handler: bytes handler function
+ * @user_data: bytes handler data
+ *
+ * Set the handler to receive bytes written by the #raptor_www implementation.
+ *
+ **/
+void
+raptor_www_set_write_bytes_handler(raptor_www* www,
+ raptor_www_write_bytes_handler handler,
+ void *user_data)
+{
+ www->write_bytes = handler;
+ www->write_bytes_userdata = user_data;
+}
+
+
+/**
+ * raptor_www_set_content_type_handler:
+ * @www: WWW object
+ * @handler: content type handler function
+ * @user_data: content type handler data
+ *
+ * Set the handler to receive the HTTP Content-Type header value.
+ *
+ * This is called if or when the value is discovered during retrieval
+ * by the raptor_www implementation. Not all implementations provide
+ * access to this.
+ **/
+void
+raptor_www_set_content_type_handler(raptor_www* www,
+ raptor_www_content_type_handler handler,
+ void *user_data)
+{
+ www->content_type = handler;
+ www->content_type_userdata = user_data;
+}
+
+
+/**
+ * raptor_www_set_user_agent2:
+ * @www: WWW object
+ * @user_agent: User-Agent string
+ * @user_agent_len: Length of @user_agent string or 0 to count it here.
+ *
+ * Set the user agent value, for HTTP requests typically.
+ *
+ * Return value: non-0 on failure
+ **/
+int
+raptor_www_set_user_agent2(raptor_www* www, const char *user_agent,
+ size_t user_agent_len)
+{
+ char *ua_copy = NULL;
+
+ if(!user_agent || !*user_agent) {
+ www->user_agent = NULL;
+ return 0;
+ }
+
+ if(user_agent_len == 0)
+ user_agent_len = strlen(user_agent);
+
+ ua_copy = RAPTOR_MALLOC(char*, user_agent_len + 1);
+ if(!ua_copy)
+ return 1;
+
+ memcpy(ua_copy, user_agent, user_agent_len + 1); /* copy NUL */
+
+ www->user_agent = ua_copy;
+
+ return 0;
+}
+
+
+/**
+ * raptor_www_set_user_agent:
+ * @www: WWW object
+ * @user_agent: User-Agent string
+ *
+ * Set the user agent value, for HTTP requests typically.
+ *
+ * @Deprecated: use raptor_www_set_user_agent2() which takes a length
+ * parameter and returns a value to singify failure.
+ *
+ **/
+void
+raptor_www_set_user_agent(raptor_www* www, const char *user_agent)
+{
+ (void)raptor_www_set_user_agent2(www, user_agent, 0);
+}
+
+
+/**
+ * raptor_www_set_proxy2:
+ * @www: WWW object
+ * @proxy: proxy string.
+ * @proxy_len: Length of @proxy string or 0 to count it here.
+ *
+ * Set the proxy for the WWW object.
+ *
+ * The @proxy usually a string of the form http://server.domain:port.
+ *
+ * Return value: non-0 on failure
+ **/
+int
+raptor_www_set_proxy2(raptor_www* www, const char *proxy,
+ size_t proxy_len)
+{
+ char *proxy_copy;
+
+ if(!proxy)
+ return 1;
+
+ if(proxy_len == 0)
+ proxy_len = strlen(proxy);
+
+ proxy_copy = RAPTOR_MALLOC(char*, proxy_len + 1);
+ if(!proxy_copy)
+ return 1;
+
+ memcpy(proxy_copy, proxy, proxy_len + 1); /* copy NUL */
+
+ www->proxy = proxy_copy;
+
+ return 0;
+}
+
+
+/**
+ * raptor_www_set_proxy:
+ * @www: WWW object
+ * @proxy: proxy string.
+ *
+ * Set the proxy for the WWW object.
+ *
+ * The @proxy usually a string of the form http://server.domain:port.
+ *
+ * @Deprecated: use raptor_www_set_proxy2() which takes an length
+ * parameter and returns a value to singify failure.
+ *
+ **/
+void
+raptor_www_set_proxy(raptor_www* www, const char *proxy)
+{
+ (void)raptor_www_set_proxy2(www, proxy, 0);
+}
+
+
+/**
+ * raptor_www_set_http_accept2:
+ * @www: #raptor_www class
+ * @value: Accept: header value or NULL to have an empty one.
+ * @value_len: Length of @value string or 0 to count it here.
+ *
+ * Set HTTP Accept header.
+ *
+ * Return value: non-0 on failure
+ **/
+int
+raptor_www_set_http_accept2(raptor_www* www, const char *value,
+ size_t value_len)
+{
+ char *value_copy;
+ size_t len = 8; /* strlen("Accept:")+1 */
+
+ if(value) {
+ if (value_len == 0)
+ value_len = strlen(value);
+ len += 1 + value_len; /* " "+value */
+ }
+
+ value_copy = RAPTOR_MALLOC(char*, len);
+ if(!value_copy)
+ return 1;
+ www->http_accept = value_copy;
+
+ /* copy header name */
+ memcpy(value_copy, "Accept:", 7); /* Do not copy NUL */
+ value_copy += 7;
+
+ /* copy header value */
+ if(value) {
+ *value_copy++ = ' ';
+ memcpy(value_copy, value, value_len + 1); /* Copy NUL */
+ } else {
+ /* Ensure value is NUL terminated */
+ *value_copy = '\0';
+ }
+
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ RAPTOR_DEBUG2("Using Accept header: '%s'\n", www->http_accept);
+#endif
+
+ return 0;
+}
+
+
+/**
+ * raptor_www_set_http_accept:
+ * @www: #raptor_www class
+ * @value: Accept: header value or NULL to have an empty one.
+ *
+ * Set HTTP Accept header.
+ *
+ * @Deprecated: use raptor_www_set_http_accept2() which takes an
+ * length parameter and returns a value to singify failure.
+ *
+ **/
+void
+raptor_www_set_http_accept(raptor_www* www, const char *value)
+{
+ (void)raptor_www_set_http_accept2(www, value, 0);
+}
+
+
+/**
+ * raptor_www_set_connection_timeout:
+ * @www: WWW object
+ * @timeout: Timeout in seconds
+ *
+ * Set WWW connection timeout
+ **/
+void
+raptor_www_set_connection_timeout(raptor_www* www, int timeout)
+{
+ www->connection_timeout = timeout;
+}
+
+
+/**
+ * raptor_www_set_http_cache_control:
+ * @www: WWW object
+ * @cache_control: Cache-Control header value (or NULL to disable)
+ *
+ * Set HTTP Cache-Control:header (default none)
+ *
+ * The @cache_control value can be a string to set it, "" to send
+ * a blank header or NULL to not set the header at all.
+ *
+ * Return value: non-0 on failure
+ **/
+int
+raptor_www_set_http_cache_control(raptor_www* www, const char* cache_control)
+{
+ char *cache_control_copy;
+ const char* const header="Cache-Control:";
+ const size_t header_len = 14; /* strlen("Cache-Control:") */
+ size_t len;
+ size_t cc_len;
+
+ RAPTOR_ASSERT_RETURN((strlen(header) != header_len), "Cache-Control header length is wrong", 1);
+
+ if(www->cache_control) {
+ RAPTOR_FREE(char*, www->cache_control);
+ www->cache_control = NULL;
+ }
+
+ if(!cache_control) {
+ www->cache_control = NULL;
+ return 0;
+ }
+
+ cc_len = strlen(cache_control);
+ len = header_len + 1 + cc_len + 1; /* header+" "+cache_control+"\0" */
+
+ cache_control_copy = RAPTOR_MALLOC(char*, len);
+ if(!cache_control_copy)
+ return 1;
+
+ www->cache_control = cache_control_copy;
+
+ /* copy header name */
+ memcpy(cache_control_copy, header, header_len); /* Do not copy NUL */
+ cache_control_copy += header_len;
+
+ /* copy header value */
+ if(*cache_control) {
+ *cache_control_copy ++= ' ';
+ memcpy(cache_control_copy, cache_control, cc_len + 1); /* Copy NUL */
+ } else {
+ /* Ensure value is NUL terminated */
+ *cache_control_copy = '\0';
+ }
+
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ RAPTOR_DEBUG2("Using Cache-Control header: '%s'\n", www->cache_control);
+#endif
+
+ return 0;
+}
+
+
+/**
+ * raptor_www_set_uri_filter:
+ * @www: WWW object
+ * @filter: URI filter function
+ * @user_data: User data to pass to filter function
+ *
+ * Set URI filter function for WWW retrieval.
+ **/
+void
+raptor_www_set_uri_filter(raptor_www* www,
+ raptor_uri_filter_func filter,
+ void *user_data)
+{
+ www->uri_filter = filter;
+ www->uri_filter_user_data = user_data;
+}
+
+
+/**
+ * raptor_www_set_ssl_cert_options:
+ * @www: WWW object
+ * @cert_filename: SSL client certificate file
+ * @cert_type: SSL client certificate type (default is "PEM")
+ * @cert_passphrase: SSL client certificate password
+ *
+ * Set SSL client certificate options (where supported)
+ *
+ * Return value: non-0 when setting options is not supported
+ **/
+int
+raptor_www_set_ssl_cert_options(raptor_www* www,
+ const char* cert_filename,
+ const char* cert_type,
+ const char* cert_passphrase)
+{
+#ifdef RAPTOR_WWW_LIBCURL
+ return raptor_www_curl_set_ssl_cert_options(www, cert_filename, cert_type,
+ cert_passphrase);
+#else
+ return 1;
+#endif
+}
+
+
+/**
+ * raptor_www_set_ssl_verify_options:
+ * @www: WWW object
+ * @verify_peer: SSL verify peer - non-0 to verify peer SSL certificate (default)
+ * @verify_host: SSL verify host - 0 none, non-0 to require a CN match (default).
+ *
+ * Set whether SSL verifies the authenticity of the peer's certificate
+ *
+ * These options correspond to setting the curl
+ * CURLOPT_SSL_VERIFYPEER and CURLOPT_SSL_VERIFYHOST options.
+ *
+ * Return value: non-0 on failure
+ **/
+int
+raptor_www_set_ssl_verify_options(raptor_www* www, int verify_peer,
+ int verify_host)
+{
+#ifdef RAPTOR_WWW_LIBCURL
+ return raptor_www_curl_set_ssl_verify_options(www, verify_peer,
+ verify_host);
+#else
+ return 1;
+#endif
+}
+
+
+
+/**
+ * raptor_www_get_connection:
+ * @www: #raptor_www object
+ *
+ * Get WWW library connection object.
+ *
+ * Return the internal WWW connection handle. For libcurl, this
+ * returns the CURL handle and for libxml the context. Otherwise
+ * it returns NULL.
+ *
+ * Return value: connection pointer
+ **/
+void*
+raptor_www_get_connection(raptor_www* www)
+{
+#if defined(RAPTOR_WWW_LIBCURL)
+ return www->curl_handle;
+#elif defined(RAPTOR_WWW_LIBXML)
+ return www->ctxt;
+#else
+ return NULL;
+#endif
+}
+
+
+/**
+ * raptor_www_abort:
+ * @www: WWW object
+ * @reason: abort reason message
+ *
+ * Abort an ongoing raptor WWW operation and pass back a reason.
+ *
+ * This is typically used within one of the raptor WWW handlers
+ * when retrieval need no longer continue due to another
+ * processing issue or error.
+ **/
+void
+raptor_www_abort(raptor_www* www, const char *reason)
+{
+ www->failed = 1;
+}
+
+
+void
+raptor_www_error(raptor_www* www, const char *message, ...)
+{
+ va_list arguments;
+
+ va_start(arguments, message);
+
+ raptor_log_error_varargs(www->world,
+ RAPTOR_LOG_LEVEL_ERROR,
+ &www->locator,
+ message, arguments);
+
+ va_end(arguments);
+}
+
+
+static int
+raptor_www_file_handle_fetch(raptor_www* www, FILE* fh)
+{
+ while(!feof(fh)) {
+ size_t len = fread(www->buffer, 1, RAPTOR_WWW_BUFFER_SIZE, fh);
+ if(len > 0) {
+ www->total_bytes += len;
+ www->buffer[len]='\0';
+
+ if(www->write_bytes)
+ www->write_bytes(www, www->write_bytes_userdata, www->buffer, len, 1);
+ }
+
+ if(feof(fh) || www->failed)
+ break;
+ }
+
+ if(!www->failed)
+ www->status_code = 200;
+
+ return www->failed;
+}
+
+
+static int
+raptor_www_file_fetch(raptor_www* www)
+{
+ char *filename;
+ FILE *fh;
+ unsigned char *uri_string = raptor_uri_as_string(www->uri);
+#if defined(HAVE_UNISTD_H) && defined(HAVE_SYS_STAT_H)
+ struct stat buf;
+#endif
+
+ www->status_code = 200;
+
+ filename = raptor_uri_uri_string_to_filename(uri_string);
+ if(!filename) {
+ raptor_www_error(www, "Not a file: URI");
+ return 1;
+ }
+
+#if defined(HAVE_UNISTD_H) && defined(HAVE_SYS_STAT_H)
+ if(!stat(filename, &buf) && S_ISDIR(buf.st_mode)) {
+ raptor_www_error(www, "Cannot read from a directory '%s'", filename);
+ RAPTOR_FREE(char*, filename);
+ www->status_code = 404;
+ return 1;
+ }
+#endif
+
+ fh = fopen(filename, "rb");
+ if(!fh) {
+ raptor_www_error(www, "file '%s' open failed - %s",
+ filename, strerror(errno));
+ RAPTOR_FREE(char*, filename);
+ www->status_code = (errno == EACCES) ? 403: 404;
+ www->failed = 1;
+
+ return www->failed;
+ }
+
+ raptor_www_file_handle_fetch(www, fh);
+ fclose(fh);
+
+ RAPTOR_FREE(char*, filename);
+
+ return www->failed;
+}
+
+
+/**
+* raptor_www_fetch:
+* @www: WWW object
+* @uri: URI to read from
+*
+* Start a WWW content retrieval for the given URI, returning data via the write_bytes handler.
+*
+* Return value: non-0 on failure.
+**/
+int
+raptor_www_fetch(raptor_www *www, raptor_uri *uri)
+{
+ int status = 1;
+
+ www->uri = raptor_new_uri_for_retrieval(uri);
+
+ www->locator.uri = uri;
+ www->locator.line= -1;
+ www->locator.column= -1;
+
+ if(www->uri_filter) {
+ int rc = www->uri_filter(www->uri_filter_user_data, uri);
+ if(rc)
+ return rc;
+ }
+
+#ifdef RAPTOR_WWW_NONE
+ status = raptor_www_file_fetch(www);
+#else
+
+ if(raptor_uri_uri_string_is_file_uri(raptor_uri_as_string(www->uri)))
+ status = raptor_www_file_fetch(www);
+ else {
+#ifdef RAPTOR_WWW_LIBCURL
+ status = raptor_www_curl_fetch(www);
+#endif
+
+#ifdef RAPTOR_WWW_LIBXML
+ status = raptor_www_libxml_fetch(www);
+#endif
+
+#ifdef RAPTOR_WWW_LIBFETCH
+ status = raptor_www_libfetch_fetch(www);
+#endif
+ }
+
+#endif
+ if(!status && www->status_code && www->status_code != 200){
+ raptor_www_error(www, "Resolving URI failed with HTTP status %d",
+ www->status_code);
+ status = 1;
+ }
+
+ www->failed = status;
+
+ return www->failed;
+}
+
+
+static void
+raptor_www_fetch_to_string_write_bytes(raptor_www* www, void *userdata,
+ const void *ptr, size_t size,
+ size_t nmemb)
+{
+ raptor_stringbuffer* sb = (raptor_stringbuffer*)userdata;
+ size_t len = size * nmemb;
+
+ raptor_stringbuffer_append_counted_string(sb, (unsigned char*)ptr, len, 1);
+}
+
+
+/**
+ * raptor_www_fetch_to_string:
+ * @www: raptor_www object
+ * @uri: raptor_uri to retrieve
+ * @string_p: pointer to location to hold string
+ * @length_p: pointer to location to hold length of string (or NULL)
+ * @malloc_handler: pointer to malloc() to use to make string (or NULL)
+ *
+ * Start a WWW content retrieval for the given URI, returning the data in a new string.
+ *
+ * If @malloc_handler is null, raptor will allocate it using it's
+ * own memory allocator. *string_p is set to NULL on failure (and
+ * *length_p to 0 if length_p is not NULL).
+ *
+ * Return value: non-0 on failure
+ **/
+RAPTOR_EXTERN_C
+int
+raptor_www_fetch_to_string(raptor_www *www, raptor_uri *uri,
+ void **string_p, size_t *length_p,
+ raptor_data_malloc_handler const malloc_handler)
+{
+ raptor_stringbuffer *sb = NULL;
+ void *str = NULL;
+ raptor_www_write_bytes_handler saved_write_bytes;
+ void *saved_write_bytes_userdata;
+
+ sb = raptor_new_stringbuffer();
+ if(!sb)
+ return 1;
+
+ if(length_p)
+ *length_p=0;
+
+ saved_write_bytes = www->write_bytes;
+ saved_write_bytes_userdata = www->write_bytes_userdata;
+ raptor_www_set_write_bytes_handler(www, raptor_www_fetch_to_string_write_bytes, sb);
+
+ if(raptor_www_fetch(www, uri))
+ str = NULL;
+ else {
+ size_t len = raptor_stringbuffer_length(sb);
+ if(len) {
+ str = (void*)malloc_handler(len+1);
+ if(str) {
+ raptor_stringbuffer_copy_to_string(sb, (unsigned char*)str, len+1);
+ *string_p=str;
+ if(length_p)
+ *length_p=len;
+ }
+ }
+ }
+
+ if(sb)
+ raptor_free_stringbuffer(sb);
+
+ raptor_www_set_write_bytes_handler(www, saved_write_bytes, saved_write_bytes_userdata);
+
+ return (str == NULL);
+}
+
+
+/**
+ * raptor_www_get_final_uri:
+ * @www: #raptor_www object
+ *
+ * Get the WWW final resolved URI.
+ *
+ * This returns the URI used after any protocol redirection.
+ *
+ * Return value: a new URI or NULL if not known.
+ **/
+raptor_uri*
+raptor_www_get_final_uri(raptor_www* www)
+{
+ return www->final_uri ? raptor_uri_copy(www->final_uri) : NULL;
+}
+
+
+/**
+ * raptor_www_set_final_uri_handler:
+ * @www: WWW object
+ * @handler: content type handler function
+ * @user_data: content type handler data
+ *
+ * Set the handler to receive the HTTP Content-Type header value.
+ *
+ * This is called if or when the value is discovered during retrieval
+ * by the raptor_www implementation. Not all implementations provide
+ * access to this.
+ **/
+void
+raptor_www_set_final_uri_handler(raptor_www* www,
+ raptor_www_final_uri_handler handler,
+ void *user_data)
+{
+ www->final_uri_handler = handler;
+ www->final_uri_userdata = user_data;
+}