summaryrefslogtreecommitdiffstats
path: root/src/md4cpp.cc
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/md4cpp.cc303
1 files changed, 303 insertions, 0 deletions
diff --git a/src/md4cpp.cc b/src/md4cpp.cc
new file mode 100644
index 0000000..5edd3d7
--- /dev/null
+++ b/src/md4cpp.cc
@@ -0,0 +1,303 @@
+/**
+ * Copyright (c) 2022, Timothy Stack
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ * * Neither the name of Timothy Stack nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "md4cpp.hh"
+
+#include "base/is_utf8.hh"
+#include "base/lnav_log.hh"
+#include "emojis-json.h"
+#include "xml-entities-json.h"
+#include "yajlpp/yajlpp_def.hh"
+
+namespace md4cpp {
+
+static const typed_json_path_container<xml_entity> xml_entity_handlers = {
+ yajlpp::property_handler("characters").for_field(&xml_entity::xe_chars),
+};
+
+static const typed_json_path_container<xml_entity_map> xml_entity_map_handlers
+ = {
+ yajlpp::pattern_property_handler("(?<var_name>\\&\\w+;?)")
+ .with_synopsis("<name>")
+ .with_path_provider<xml_entity_map>(
+ [](struct xml_entity_map* xem,
+ std::vector<std::string>& paths_out) {
+ for (const auto& iter : xem->xem_entities) {
+ paths_out.emplace_back(iter.first);
+ }
+ })
+ .with_obj_provider<xml_entity, xml_entity_map>(
+ [](const yajlpp_provider_context& ypc, xml_entity_map* xem) {
+ auto entity_name = ypc.get_substr(0);
+ return &xem->xem_entities[entity_name];
+ })
+ .with_children(xml_entity_handlers),
+};
+
+static const typed_json_path_container<emoji> emoji_handlers = {
+ yajlpp::property_handler("emoji").for_field(&emoji::e_value),
+ yajlpp::property_handler("shortname").for_field(&emoji::e_shortname),
+};
+
+static const typed_json_path_container<emoji_map> emoji_map_handlers = {
+ yajlpp::property_handler("emojis#")
+ .for_field(&emoji_map::em_emojis)
+ .with_children(emoji_handlers),
+};
+
+static xml_entity_map
+load_xml_entity_map()
+{
+ static const intern_string_t name
+ = intern_string::lookup(xml_entities_json.get_name());
+ auto parse_res
+ = xml_entity_map_handlers.parser_for(name).with_ignore_unused(true).of(
+ xml_entities_json.to_string_fragment());
+
+ assert(parse_res.isOk());
+
+ return parse_res.unwrap();
+}
+
+const xml_entity_map&
+get_xml_entity_map()
+{
+ static const auto retval = load_xml_entity_map();
+
+ return retval;
+}
+
+static emoji_map
+load_emoji_map()
+{
+ static const intern_string_t name
+ = intern_string::lookup(emojis_json.get_name());
+ auto parse_res
+ = emoji_map_handlers.parser_for(name).with_ignore_unused(true).of(
+ emojis_json.to_string_fragment());
+
+ assert(parse_res.isOk());
+
+ auto retval = parse_res.unwrap();
+ for (auto& em : retval.em_emojis) {
+ retval.em_shortname2emoji.emplace(em.e_shortname, em);
+ }
+
+ return retval;
+}
+
+const emoji_map&
+get_emoji_map()
+{
+ static const auto retval = load_emoji_map();
+
+ return retval;
+}
+
+struct parse_userdata {
+ event_handler& pu_handler;
+ std::string pu_error_msg;
+};
+
+static event_handler::block
+build_block(MD_BLOCKTYPE type, void* detail)
+{
+ switch (type) {
+ case MD_BLOCK_DOC:
+ return event_handler::block_doc{};
+ case MD_BLOCK_QUOTE:
+ return event_handler::block_quote{};
+ case MD_BLOCK_UL:
+ return static_cast<MD_BLOCK_UL_DETAIL*>(detail);
+ case MD_BLOCK_OL:
+ return static_cast<MD_BLOCK_OL_DETAIL*>(detail);
+ case MD_BLOCK_LI:
+ return static_cast<MD_BLOCK_LI_DETAIL*>(detail);
+ case MD_BLOCK_HR:
+ return event_handler::block_hr{};
+ case MD_BLOCK_H:
+ return static_cast<MD_BLOCK_H_DETAIL*>(detail);
+ case MD_BLOCK_CODE:
+ return static_cast<MD_BLOCK_CODE_DETAIL*>(detail);
+ case MD_BLOCK_HTML:
+ return event_handler::block_html{};
+ case MD_BLOCK_P:
+ return event_handler::block_p{};
+ case MD_BLOCK_TABLE:
+ return static_cast<MD_BLOCK_TABLE_DETAIL*>(detail);
+ case MD_BLOCK_THEAD:
+ return event_handler::block_thead{};
+ case MD_BLOCK_TBODY:
+ return event_handler::block_tbody{};
+ case MD_BLOCK_TR:
+ return event_handler::block_tr{};
+ case MD_BLOCK_TH:
+ return event_handler::block_th{};
+ case MD_BLOCK_TD:
+ return static_cast<MD_BLOCK_TD_DETAIL*>(detail);
+ }
+
+ return {};
+}
+
+static event_handler::span
+build_span(MD_SPANTYPE type, void* detail)
+{
+ switch (type) {
+ case MD_SPAN_EM:
+ return event_handler::span_em{};
+ case MD_SPAN_STRONG:
+ return event_handler::span_strong{};
+ case MD_SPAN_A:
+ return static_cast<MD_SPAN_A_DETAIL*>(detail);
+ case MD_SPAN_IMG:
+ return static_cast<MD_SPAN_IMG_DETAIL*>(detail);
+ case MD_SPAN_CODE:
+ return event_handler::span_code{};
+ case MD_SPAN_DEL:
+ return event_handler::span_del{};
+ case MD_SPAN_U:
+ return event_handler::span_u{};
+ default:
+ break;
+ }
+
+ return {};
+}
+
+static int
+md4cpp_enter_block(MD_BLOCKTYPE type, void* detail, void* userdata)
+{
+ auto* pu = static_cast<parse_userdata*>(userdata);
+
+ auto enter_res = pu->pu_handler.enter_block(build_block(type, detail));
+ if (enter_res.isErr()) {
+ pu->pu_error_msg = enter_res.unwrapErr();
+ return 1;
+ }
+
+ return 0;
+}
+
+static int
+md4cpp_leave_block(MD_BLOCKTYPE type, void* detail, void* userdata)
+{
+ auto* pu = static_cast<parse_userdata*>(userdata);
+
+ auto leave_res = pu->pu_handler.leave_block(build_block(type, detail));
+ if (leave_res.isErr()) {
+ pu->pu_error_msg = leave_res.unwrapErr();
+ return 1;
+ }
+
+ return 0;
+}
+
+static int
+md4cpp_enter_span(MD_SPANTYPE type, void* detail, void* userdata)
+{
+ auto* pu = static_cast<parse_userdata*>(userdata);
+
+ auto enter_res = pu->pu_handler.enter_span(build_span(type, detail));
+ if (enter_res.isErr()) {
+ pu->pu_error_msg = enter_res.unwrapErr();
+ return 1;
+ }
+
+ return 0;
+}
+
+static int
+md4cpp_leave_span(MD_SPANTYPE type, void* detail, void* userdata)
+{
+ auto* pu = static_cast<parse_userdata*>(userdata);
+
+ auto leave_res = pu->pu_handler.leave_span(build_span(type, detail));
+ if (leave_res.isErr()) {
+ pu->pu_error_msg = leave_res.unwrapErr();
+ return 1;
+ }
+
+ return 0;
+}
+
+static int
+md4cpp_text(MD_TEXTTYPE type, const MD_CHAR* text, MD_SIZE size, void* userdata)
+{
+ auto* pu = static_cast<parse_userdata*>(userdata);
+ auto text_res = pu->pu_handler.text(type, string_fragment(text, 0, size));
+ if (text_res.isErr()) {
+ pu->pu_error_msg = text_res.unwrapErr();
+ return 1;
+ }
+
+ return 0;
+}
+
+namespace details {
+Result<void, std::string>
+parse(const string_fragment& sf, event_handler& eh)
+{
+ const char* utf8_errmsg = nullptr;
+ int utf8_faulty_bytes = 0;
+
+ auto scan_res = is_utf8((unsigned char*) sf.data(),
+ sf.length(),
+ &utf8_errmsg,
+ &utf8_faulty_bytes);
+ if (utf8_errmsg != nullptr) {
+ return Err(
+ fmt::format(FMT_STRING("file has invalid UTF-8 at offset {}: {}"),
+ scan_res.usr_end,
+ utf8_errmsg));
+ }
+
+ MD_PARSER parser = {0};
+ auto pu = parse_userdata{eh};
+
+ parser.abi_version = 0;
+ parser.flags = (MD_DIALECT_GITHUB | MD_FLAG_UNDERLINE)
+ & ~(MD_FLAG_PERMISSIVEAUTOLINKS);
+ parser.enter_block = md4cpp_enter_block;
+ parser.leave_block = md4cpp_leave_block;
+ parser.enter_span = md4cpp_enter_span;
+ parser.leave_span = md4cpp_leave_span;
+ parser.text = md4cpp_text;
+
+ auto rc = md_parse(sf.data(), sf.length(), &parser, &pu);
+
+ if (rc == 0) {
+ return Ok();
+ }
+
+ return Err(pu.pu_error_msg);
+}
+} // namespace details
+
+} // namespace md4cpp