summaryrefslogtreecommitdiffstats
path: root/src/raptor_www_libxml.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/raptor_www_libxml.c')
-rw-r--r--src/raptor_www_libxml.c159
1 files changed, 159 insertions, 0 deletions
diff --git a/src/raptor_www_libxml.c b/src/raptor_www_libxml.c
new file mode 100644
index 0000000..e2b9f3e
--- /dev/null
+++ b/src/raptor_www_libxml.c
@@ -0,0 +1,159 @@
+/* -*- Mode: c; c-basic-offset: 2 -*-
+ *
+ * raptor_www_libxml.c - Raptor WWW retrieval via libxml2
+ *
+ * Copyright (C) 2003-2008, David Beckett http://www.dajobe.org/
+ * Copyright (C) 2003-2004, University of Bristol, UK http://www.bristol.ac.uk/
+ *
+ * This package is Free Software and part of Redland http://librdf.org/
+ *
+ * It is licensed under the following three licenses as alternatives:
+ * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version
+ * 2. GNU General Public License (GPL) V2 or any newer version
+ * 3. Apache License, V2.0 or any newer version
+ *
+ * You may not use this file except in compliance with at least one of
+ * the above three licenses.
+ *
+ * See LICENSE.html or LICENSE.txt at the top of this package for the
+ * complete terms and further detail along with the license texts for
+ * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively.
+ *
+ *
+ */
+
+
+#ifdef HAVE_CONFIG_H
+#include <raptor_config.h>
+#endif
+
+#include <stdio.h>
+#include <string.h>
+#include <stdarg.h>
+
+/* Raptor includes */
+#include "raptor2.h"
+#include "raptor_internal.h"
+
+#ifdef RAPTOR_WWW_LIBXML
+
+void
+raptor_www_libxml_init(raptor_www *www)
+{
+ xmlNanoHTTPInit();
+ www->ctxt = NULL;
+}
+
+
+void
+raptor_www_libxml_free(raptor_www *www)
+{
+ xmlNanoHTTPCleanup();
+}
+
+
+int
+raptor_www_libxml_fetch(raptor_www *www)
+{
+ char* headers = NULL;
+
+ if(www->proxy)
+ xmlNanoHTTPScanProxy(www->proxy);
+
+ if(www->http_accept || www->user_agent) {
+ size_t accept_len = 0;
+ size_t ua_len = 0;
+ size_t cc_len = 0;
+ size_t len = 0;
+ char *p;
+
+ if(www->http_accept) {
+ accept_len = strlen(www->http_accept);
+ len += accept_len+2; /* \r\n */
+ }
+
+ if(www->user_agent) {
+ ua_len = strlen(www->user_agent);
+ len += 12+ua_len+2; /* strlen("User-Agent: ") + \r\n */
+ }
+
+ if(www->cache_control) {
+ cc_len = strlen(www->cache_control);
+ len += cc_len+2; /* \r\n */
+ }
+
+ headers = RAPTOR_MALLOC(char*, len + 1);
+ if(!headers)
+ return 1;
+
+ p = headers;
+ if(www->http_accept) {
+ memcpy(p, www->http_accept, accept_len);
+ p+= accept_len;
+ *p++='\r';
+ *p++='\n';
+ }
+ if(www->user_agent) {
+ memcpy(p, "User-Agent: ", 12);
+ p += 12;
+ memcpy(p, www->user_agent, ua_len);
+ p+= ua_len;
+ *p++='\r';
+ *p++='\n';
+ }
+ if(www->cache_control) {
+ memcpy(p, www->cache_control, cc_len);
+ p+= cc_len;
+ *p++='\r';
+ *p++='\n';
+ }
+ *p='\0';
+ }
+
+ www->ctxt = xmlNanoHTTPMethod((const char*)raptor_uri_as_string(www->uri),
+ NULL, /* HTTP method (default GET) */
+ NULL, /* input string */
+ &www->type,
+ headers,
+ 0); /* input length - ilen */
+
+ if(headers)
+ RAPTOR_FREE(char*, headers);
+
+ if(!www->ctxt)
+ return 1;
+
+ if(www->type) {
+ if(www->content_type) {
+ www->content_type(www, www->content_type_userdata, www->type);
+ if(www->failed) {
+ xmlNanoHTTPClose(www->ctxt);
+ return 1;
+ }
+ }
+ xmlFree(www->type);
+ www->type = NULL;
+ }
+
+ www->status_code = xmlNanoHTTPReturnCode(www->ctxt);
+
+ while(1) {
+ int len = xmlNanoHTTPRead(www->ctxt, www->buffer, RAPTOR_WWW_BUFFER_SIZE);
+ if(len < 0)
+ break;
+
+ www->total_bytes += len;
+
+ if(www->write_bytes)
+ www->write_bytes(www, www->write_bytes_userdata, www->buffer, len, 1);
+
+ if(len < RAPTOR_WWW_BUFFER_SIZE || www->failed)
+ break;
+ }
+
+ xmlNanoHTTPClose(www->ctxt);
+
+ return www->failed;
+}
+
+#endif /* #ifdef RAPTOR_WWW_LIBXML*/