summaryrefslogtreecommitdiffstats
path: root/docs/html/restrict-parser-network-access.html
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-15 05:40:05 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-15 05:40:05 +0000
commit4038ab95a094b363f1748f3dcb51511a1217475d (patch)
tree7f393d66a783f91ddd263c78d681e485cf4f45ca /docs/html/restrict-parser-network-access.html
parentInitial commit. (diff)
downloadraptor2-4038ab95a094b363f1748f3dcb51511a1217475d.tar.xz
raptor2-4038ab95a094b363f1748f3dcb51511a1217475d.zip
Adding upstream version 2.0.16.upstream/2.0.16upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'docs/html/restrict-parser-network-access.html')
-rw-r--r--docs/html/restrict-parser-network-access.html152
1 files changed, 152 insertions, 0 deletions
diff --git a/docs/html/restrict-parser-network-access.html b/docs/html/restrict-parser-network-access.html
new file mode 100644
index 0000000..295d994
--- /dev/null
+++ b/docs/html/restrict-parser-network-access.html
@@ -0,0 +1,152 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html>
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<title>Restrict parser network access: Raptor RDF Syntax Library Manual</title>
+<meta name="generator" content="DocBook XSL Stylesheets Vsnapshot">
+<link rel="home" href="index.html" title="Raptor RDF Syntax Library Manual">
+<link rel="up" href="tutorial-parsing.html" title="Parsing syntaxes to RDF Triples">
+<link rel="prev" href="tutorial-parser-content.html" title="Provide syntax content to parse">
+<link rel="next" href="tutorial-parser-static-info.html" title="Querying parser static information">
+<meta name="generator" content="GTK-Doc V1.33.1 (XML mode)">
+<link rel="stylesheet" href="style.css" type="text/css">
+</head>
+<body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF">
+<table class="navigation" id="top" width="100%" summary="Navigation header" cellpadding="2" cellspacing="5"><tr valign="middle">
+<td width="100%" align="left" class="shortcuts"></td>
+<td><a accesskey="h" href="index.html"><img src="home.png" width="16" height="16" border="0" alt="Home"></a></td>
+<td><a accesskey="u" href="tutorial-parsing.html"><img src="up.png" width="16" height="16" border="0" alt="Up"></a></td>
+<td><a accesskey="p" href="tutorial-parser-content.html"><img src="left.png" width="16" height="16" border="0" alt="Prev"></a></td>
+<td><a accesskey="n" href="tutorial-parser-static-info.html"><img src="right.png" width="16" height="16" border="0" alt="Next"></a></td>
+</tr></table>
+<div class="section">
+<div class="titlepage"><div><div><h2 class="title" style="clear: both">
+<a name="restrict-parser-network-access"></a>Restrict parser network access</h2></div></div></div>
+<p>
+Parsing can cause network requests to be performed, especially
+if a URI is given as an argument such as with
+<a class="link" href="raptor2-section-parser.html#raptor-parser-parse-uri" title="raptor_parser_parse_uri ()"><code class="function">raptor_parser_parse_uri()</code></a>
+however there may also be indirect requests such as with the
+GRDDL parser that retrieves URIs depending on the results of
+initial parse requests. The URIs requested may not be wanted
+to be fetched or need to be filtered, and this can be done in
+three ways.
+</p>
+<div class="section">
+<div class="titlepage"><div><div><h3 class="title">
+<a name="tutorial-filter-network-with-feature"></a>Filtering parser network requests with option <a class="link" href="raptor2-section-option.html#RAPTOR-OPTION-NO-NET:CAPS"><code class="literal">RAPTOR_OPTION_NO_NET</code></a>
+</h3></div></div></div>
+<p>
+The parser option
+<a class="link" href="raptor2-section-option.html#RAPTOR-OPTION-NO-NET:CAPS"><code class="literal">RAPTOR_OPTION_NO_NET</code></a>
+can be set with
+<a class="link" href="raptor2-section-parser.html#raptor-parser-set-option" title="raptor_parser_set_option ()"><code class="function">raptor_parser_set_option()</code></a>
+and forbids all network requests. There is no customisation with
+this approach, for that see the URI filter in the next section.
+</p>
+<pre class="programlisting">
+ rdf_parser = raptor_new_parser(world, "rdfxml");
+
+ /* Disable internal network requests */
+ raptor_parser_set_option(rdf_parser, RAPTOR_OPTION_NO_NET, NULL, 1);
+</pre>
+</div>
+<div class="section">
+<div class="titlepage"><div><div><h3 class="title">
+<a name="tutorial-filter-network-www-uri-filter"></a>Filtering parser network requests with <a class="link" href="raptor2-section-www.html#raptor-www-set-uri-filter" title="raptor_www_set_uri_filter ()"><code class="function">raptor_www_set_uri_filter()</code></a>
+</h3></div></div></div>
+<p>
+The
+<a class="link" href="raptor2-section-www.html#raptor-www-set-uri-filter" title="raptor_www_set_uri_filter ()"><code class="function">raptor_www_set_uri_filter()</code></a>
+
+allows setting of a filtering function to operate on all URIs
+retrieved by a WWW connection. This connection can be used in
+parsing when operated by hand.
+</p>
+<pre class="programlisting">
+void write_bytes_handler(raptor_www* www, void *user_data,
+ const void *ptr, size_t size, size_t nmemb) {
+{
+ raptor_parser* rdf_parser = (raptor_parser*)user_data;
+
+ raptor_parser_parse_chunk(rdf_parser, (unsigned char*)ptr, size*nmemb, 0);
+}
+
+int uri_filter(void* filter_user_data, raptor_uri* uri) {
+ /* return non-0 to forbid the request */
+}
+
+int main(int argc, char *argv[]) {
+ ...
+
+ rdf_parser = raptor_new_parser(world, "rdfxml");
+ www = raptor_new_www(world);
+
+ /* filter all URI requests */
+ raptor_www_set_uri_filter(www, uri_filter, filter_user_data);
+
+ /* make WWW write bytes to parser */
+ raptor_www_set_write_bytes_handler(www, write_bytes_handler, rdf_parser);
+
+ raptor_parser_parse_start(rdf_parser, uri);
+ raptor_www_fetch(www, uri);
+ /* tell the parser that we are done */
+ raptor_parser_parse_chunk(rdf_parser, NULL, 0, 1);
+
+ raptor_free_www(www);
+ raptor_free_parser(rdf_parser);
+
+ ...
+}
+
+</pre>
+</div>
+<div class="section">
+<div class="titlepage"><div><div><h3 class="title">
+<a name="tutorial-filter-network-parser-uri-filter"></a>Filtering parser network requests with <a class="link" href="raptor2-section-parser.html#raptor-parser-set-uri-filter" title="raptor_parser_set_uri_filter ()"><code class="function">raptor_parser_set_uri_filter()</code></a>
+</h3></div></div></div>
+<p>
+The
+<a class="link" href="raptor2-section-parser.html#raptor-parser-set-uri-filter" title="raptor_parser_set_uri_filter ()"><code class="function">raptor_parser_set_uri_filter()</code></a>
+allows setting of a filtering function to operate on all URIs that
+the parser sees. This operates on the internal raptor_www object
+used inside parsing to retrieve URIs, similar to that described in
+the <a class="link" href="restrict-parser-network-access.html#tutorial-filter-network-www-uri-filter" title="Filtering parser network requests with raptor_www_set_uri_filter()">previous section</a>.
+</p>
+<pre class="programlisting">
+ int uri_filter(void* filter_user_data, raptor_uri* uri) {
+ /* return non-0 to forbid the request */
+ }
+
+ rdf_parser = raptor_new_parser(world, "rdfxml");
+
+ raptor_parser_set_uri_filter(rdf_parser, uri_filter, filter_user_data);
+
+ /* parse content as normal */
+ raptor_parser_parse_uri(rdf_parser, uri, base_uri);
+</pre>
+</div>
+<div class="section">
+<div class="titlepage"><div><div><h3 class="title">
+<a name="tutorial-filter-network-parser-timeout"></a>Setting timeout for parser network requests with option <a class="link" href="raptor2-section-option.html#RAPTOR-OPTION-WWW-TIMEOUT:CAPS"><code class="literal">RAPTOR_OPTION_WWW_TIMEOUT</code></a>
+</h3></div></div></div>
+<p>If the value of option
+<a class="link" href="raptor2-section-option.html#RAPTOR-OPTION-WWW-TIMEOUT:CAPS"><code class="literal">RAPTOR_OPTION_WWW_TIMEOUT</code></a>
+if set to a number &gt;0, it is used as the timeout in seconds
+for retrieving of URIs during parsing (primarily for GRDDL).
+This uses
+<a class="link" href="raptor2-section-www.html#raptor-www-set-connection-timeout" title="raptor_www_set_connection_timeout ()"><code class="function">raptor_www_set_connection_timeout()</code></a>
+internally.
+</p>
+<pre class="programlisting">
+ rdf_parser = raptor_new_parser(world, "grddl");
+
+ /* set internal URI retrieval maximum time to 5 seconds */
+ raptor_parser_set_option(rdf_parser, RAPTOR_OPTION_WWW_TIMEOUT, NULL, 5);
+</pre>
+</div>
+</div>
+<div class="footer">
+<hr>Generated by GTK-Doc V1.33.1</div>
+</body>
+</html> \ No newline at end of file