summaryrefslogtreecommitdiffstats
path: root/src/HtmlParser.h
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-10 19:37:08 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-10 19:37:08 +0000
commitd710a65c8b50bc3d4d0920dc6e865296f42edd5e (patch)
treed3bf9843448af9398b55f49a50a194bbaacd724e /src/HtmlParser.h
parentInitial commit. (diff)
downloadnghttp2-d710a65c8b50bc3d4d0920dc6e865296f42edd5e.tar.xz
nghttp2-d710a65c8b50bc3d4d0920dc6e865296f42edd5e.zip
Adding upstream version 1.59.0.upstream/1.59.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r--src/HtmlParser.h94
1 files changed, 94 insertions, 0 deletions
diff --git a/src/HtmlParser.h b/src/HtmlParser.h
new file mode 100644
index 0000000..1e84688
--- /dev/null
+++ b/src/HtmlParser.h
@@ -0,0 +1,94 @@
+/*
+ * nghttp2 - HTTP/2 C Library
+ *
+ * Copyright (c) 2012 Tatsuhiro Tsujikawa
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef HTML_PARSER_H
+#define HTML_PARSER_H
+
+#include "nghttp2_config.h"
+
+#include <vector>
+#include <string>
+
+#ifdef HAVE_LIBXML2
+
+# include <libxml/HTMLparser.h>
+
+#endif // HAVE_LIBXML2
+
+namespace nghttp2 {
+
+enum ResourceType {
+ REQ_CSS = 1,
+ REQ_JS,
+ REQ_UNBLOCK_JS,
+ REQ_IMG,
+ REQ_OTHERS,
+};
+
+struct ParserData {
+ std::string base_uri;
+ std::vector<std::pair<std::string, ResourceType>> links;
+ // > 0 if we are inside "head" element.
+ int inside_head;
+ ParserData(const std::string &base_uri);
+};
+
+#ifdef HAVE_LIBXML2
+
+class HtmlParser {
+public:
+ HtmlParser(const std::string &base_uri);
+ ~HtmlParser();
+ int parse_chunk(const char *chunk, size_t size, int fin);
+ const std::vector<std::pair<std::string, ResourceType>> &get_links() const;
+ void clear_links();
+
+private:
+ int parse_chunk_internal(const char *chunk, size_t size, int fin);
+
+ std::string base_uri_;
+ htmlParserCtxtPtr parser_ctx_;
+ ParserData parser_data_;
+};
+
+#else // !HAVE_LIBXML2
+
+class HtmlParser {
+public:
+ HtmlParser(const std::string &base_uri) {}
+ int parse_chunk(const char *chunk, size_t size, int fin) { return 0; }
+ const std::vector<std::pair<std::string, ResourceType>> &get_links() const {
+ return links_;
+ }
+ void clear_links() {}
+
+private:
+ std::vector<std::pair<std::string, ResourceType>> links_;
+};
+
+#endif // !HAVE_LIBXML2
+
+} // namespace nghttp2
+
+#endif // HTML_PARSER_H