gnome-software/plugins/packagekit/gs-markdown.c

/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*-
 * vi:set noexpandtab tabstop=8 shiftwidth=8:
 *
 * Copyright (C) 2008 Richard Hughes <richard@hughsie.com>
 * Copyright (C) 2015 Kalev Lember <klember@redhat.com>
 *
 * SPDX-License-Identifier: GPL-2.0-or-later
 */

#include "config.h"

#include <string.h>
#include <glib.h>

#include "gs-markdown.h"

/*******************************************************************************
 *
 * This is a simple Markdown parser.
 * It can output to Pango, HTML or plain text. The following limitations are
 * already known, and properly deliberate:
 *
 * - No code section support
 * - No ordered list support
 * - No blockquote section support
 * - No image support
 * - No email support
 * - No backslash escapes support
 * - No HTML escaping support
 * - Auto-escapes certain word patterns, like http://
 *
 * It does support the rest of the standard pretty well, although it's not
 * been run against any conformance tests. The parsing is single pass, with
 * a simple enumerated interpretor mode and a single line back-memory.
 *
 ******************************************************************************/

typedef enum {
	GS_MARKDOWN_MODE_BLANK,
	GS_MARKDOWN_MODE_RULE,
	GS_MARKDOWN_MODE_BULLETT,
	GS_MARKDOWN_MODE_PARA,
	GS_MARKDOWN_MODE_H1,
	GS_MARKDOWN_MODE_H2,
	GS_MARKDOWN_MODE_H3,
	GS_MARKDOWN_MODE_H4,
	GS_MARKDOWN_MODE_H5,
	GS_MARKDOWN_MODE_H6,
	GS_MARKDOWN_MODE_UNKNOWN
} GsMarkdownMode;

typedef struct {
	const gchar *em_start;
	const gchar *em_end;
	const gchar *strong_start;
	const gchar *strong_end;
	const gchar *code_start;
	const gchar *code_end;
	const gchar *h1_start;
	const gchar *h1_end;
	const gchar *h2_start;
	const gchar *h2_end;
	const gchar *h3_start;
	const gchar *h3_end;
	const gchar *h4_start;
	const gchar *h4_end;
	const gchar *h5_start;
	const gchar *h5_end;
	const gchar *h6_start;
	const gchar *h6_end;
	const gchar *bullet_start;
	const gchar *bullet_end;
	const gchar *rule;
	const gchar *link_prefix;
	const gchar *link_mid;
	const gchar *link_suffix;
} GsMarkdownTags;

struct _GsMarkdown {
	GObject			 parent_instance;

	GsMarkdownMode		 mode;
	GsMarkdownTags		 tags;
	GsMarkdownOutputKind	 output;
	gint			 max_lines;
	gint			 line_count;
	gboolean		 smart_quoting;
	gboolean		 escape;
	gboolean		 autocode;
	gboolean		 autolinkify;
	GString			*pending;
	GString			*processed;
};

G_DEFINE_TYPE (GsMarkdown, gs_markdown, G_TYPE_OBJECT)

/*
 * gs_markdown_to_text_line_is_rule:
 *
 * Horizontal rules are created by placing three or more hyphens, asterisks,
 * or underscores on a line by themselves.
 * You may use spaces between the hyphens or asterisks.
 **/
static gboolean
gs_markdown_to_text_line_is_rule (const gchar *line)
{
	guint i;
	guint len;
	guint count = 0;
	g_autofree gchar *copy = NULL;

	len = (guint) strlen (line);
	if (len == 0)
		return FALSE;

	/* replace non-rule chars with ~ */
	copy = g_strdup (line);
	g_strcanon (copy, "-*_ ", '~');
	for (i = 0; i < len; i++) {
		if (copy[i] == '~')
			return FALSE;
		if (copy[i] != ' ')
			count++;
	}

	/* if we matched, return true */
	if (count >= 3)
		return TRUE;
	return FALSE;
}

static gboolean
gs_markdown_to_text_line_is_bullet (const gchar **pline)
{
	const gchar *line = *pline;

	/* skip leading spaces */
	while (g_ascii_isspace (*line))
		line++;

	if (g_str_has_prefix (line, "- ") ||
	    g_str_has_prefix (line, "* ") ||
	    g_str_has_prefix (line, "+ ")) {
		*pline = line + 2;
		return TRUE;
	}

	return FALSE;
}

static gboolean
gs_markdown_to_text_line_is_header_x (const gchar **pline,
				      guint xx)
{
	const gchar *line = *pline;

	while (*line == '#' && xx > 0) {
		line++;
		xx--;
	}

	if (xx == 0 && *line != '\0' && *line != '#') {
		if (g_ascii_isspace (*line))
			line++;
		*pline = line;
		return TRUE;
	}

	return FALSE;
}

static gboolean
gs_markdown_to_text_line_is_header1 (const gchar **pline)
{
	return gs_markdown_to_text_line_is_header_x (pline, 1);
}

static gboolean
gs_markdown_to_text_line_is_header2 (const gchar **pline)
{
	return gs_markdown_to_text_line_is_header_x (pline, 2);
}

static gboolean
gs_markdown_to_text_line_is_header3 (const gchar **pline)
{
	return gs_markdown_to_text_line_is_header_x (pline, 3);
}

static gboolean
gs_markdown_to_text_line_is_header4 (const gchar **pline)
{
	return gs_markdown_to_text_line_is_header_x (pline, 4);
}

static gboolean
gs_markdown_to_text_line_is_header5 (const gchar **pline)
{
	return gs_markdown_to_text_line_is_header_x (pline, 5);
}

static gboolean
gs_markdown_to_text_line_is_header6 (const gchar **pline)
{
	return gs_markdown_to_text_line_is_header_x (pline, 6);
}

static gboolean
gs_markdown_to_text_line_is_header1_type2 (const gchar *line)
{
	return g_str_has_prefix (line, "===");
}

static gboolean
gs_markdown_to_text_line_is_header2_type2 (const gchar *line)
{
	return g_str_has_prefix (line, "---");
}

#if 0
static gboolean
gs_markdown_to_text_line_is_code (const gchar *line)
{
	return (g_str_has_prefix (line, "    ") ||
		g_str_has_prefix (line, "\t"));
}

static gboolean
gs_markdown_to_text_line_is_blockquote (const gchar *line)
{
	return (g_str_has_prefix (line, "> "));
}
#endif

static gboolean
gs_markdown_to_text_line_is_blank (const gchar *line)
{
	guint i;
	guint len;

	/* a line with no characters is blank by definition */
	len = (guint) strlen (line);
	if (len == 0)
		return TRUE;

	/* find if there are only space chars */
	for (i = 0; i < len; i++) {
		if (line[i] != ' ' && line[i] != '\t')
			return FALSE;
	}

	/* if we matched, return true */
	return TRUE;
}

static gchar *
gs_markdown_replace (const gchar *haystack,
		     const gchar *needle,
		     const gchar *replace)
{
	g_auto(GStrv) split = NULL;
	split = g_strsplit (haystack, needle, -1);
	return g_strjoinv (replace, split);
}

static gchar *
gs_markdown_strstr_spaces (const gchar *haystack, const gchar *needle)
{
	gchar *found;
	const gchar *haystack_new = haystack;

retry:
	/* don't find if surrounded by spaces */
	found = strstr (haystack_new, needle);
	if (found == NULL)
		return NULL;

	/* start of the string, always valid */
	if (found == haystack)
		return found;

	/* end of the string, always valid */
	if (*(found-1) == ' ' && *(found+1) == ' ') {
		haystack_new = found+1;
		goto retry;
	}
	return found;
}

static gchar *
gs_markdown_to_text_line_formatter (const gchar *line,
				    const gchar *formatter,
				    const gchar *left,
				    const gchar *right)
{
	guint len;
	gchar *str1;
	gchar *str2;
	gchar *start = NULL;
	gchar *middle = NULL;
	gchar *end = NULL;
	g_autofree gchar *copy = NULL;

	/* needed to know for shifts */
	len = (guint) strlen (formatter);
	if (len == 0)
		return NULL;

	/* find sections */
	copy = g_strdup (line);
	str1 = gs_markdown_strstr_spaces (copy, formatter);
	if (str1 != NULL) {
		*str1 = '\0';
		str2 = gs_markdown_strstr_spaces (str1+len, formatter);
		if (str2 != NULL) {
			*str2 = '\0';
			middle = str1 + len;
			start = copy;
			end = str2 + len;
		}
	}

	/* if we found, replace and keep looking for the same string */
	if (start != NULL && middle != NULL && end != NULL) {
		g_autofree gchar *temp = NULL;
		temp = g_strdup_printf ("%s%s%s%s%s", start, left, middle, right, end);
		/* recursive */
		return gs_markdown_to_text_line_formatter (temp, formatter, left, right);
	}

	/* not found, keep return as-is */
	return g_strdup (line);
}

static gchar *
gs_markdown_to_text_line_format_sections (GsMarkdown *self, const gchar *line)
{
	gchar *data = g_strdup (line);
	gchar *temp;

	/* bold1 */
	temp = data;
	data = gs_markdown_to_text_line_formatter (temp, "**",
						   self->tags.strong_start,
						   self->tags.strong_end);
	g_free (temp);

	/* bold2 */
	temp = data;
	data = gs_markdown_to_text_line_formatter (temp, "__",
						   self->tags.strong_start,
						   self->tags.strong_end);
	g_free (temp);

	/* italic1 */
	temp = data;
	data = gs_markdown_to_text_line_formatter (temp, "*",
						   self->tags.em_start,
						   self->tags.em_end);
	g_free (temp);

	/* italic2 */
	temp = data;
	data = gs_markdown_to_text_line_formatter (temp, "_",
						   self->tags.em_start,
						   self->tags.em_end);
	g_free (temp);

	/* em-dash */
	temp = data;
	data = gs_markdown_replace (temp, " -- ", " — ");
	g_free (temp);

	/* smart quoting */
	if (self->smart_quoting) {
		temp = data;
		data = gs_markdown_to_text_line_formatter (temp, "\"", "“", "”");
		g_free (temp);

		temp = data;
		data = gs_markdown_to_text_line_formatter (temp, "'", "‘", "’");
		g_free (temp);
	}

	return data;
}

static gchar *
gs_markdown_to_text_line_format (GsMarkdown *self, const gchar *line)
{
	GString *string;
	gboolean mode = FALSE;
	gchar *text;
	guint i;
	g_auto(GStrv) codes = NULL;

	/* optimise the trivial case where we don't have any code tags */
	text = strstr (line, "`");
	if (text == NULL)
		return gs_markdown_to_text_line_format_sections (self, line);

	/* we want to parse the code sections without formatting */
	codes = g_strsplit (line, "`", -1);
	string = g_string_new ("");
	for (i = 0; codes[i] != NULL; i++) {
		if (!mode) {
			text = gs_markdown_to_text_line_format_sections (self, codes[i]);
			g_string_append (string, text);
			g_free (text);
			mode = TRUE;
		} else {
			/* just append without formatting */
			g_string_append (string, self->tags.code_start);
			g_string_append (string, codes[i]);
			g_string_append (string, self->tags.code_end);
			mode = FALSE;
		}
	}
	return g_string_free (string, FALSE);
}

static gboolean
gs_markdown_add_pending (GsMarkdown *self, const gchar *line)
{
	g_autofree gchar *copy = NULL;

	/* would put us over the limit */
	if (self->max_lines > 0 && self->line_count >= self->max_lines)
		return FALSE;

	copy = g_strdup (line);

	/* strip leading and trailing spaces */
	g_strstrip (copy);

	/* append */
	g_string_append_printf (self->pending, "%s ", copy);
	return TRUE;
}

static gboolean
gs_markdown_add_pending_header (GsMarkdown *self, const gchar *line)
{
	g_autofree gchar *copy = NULL;

	/* strip trailing # */
	copy = g_strdup (line);
	g_strdelimit (copy, "#", ' ');
	return gs_markdown_add_pending (self, copy);
}

static guint
gs_markdown_count_chars_in_word (const gchar *text, gchar find)
{
	guint i;
	guint len;
	guint count = 0;

	/* get length */
	len = (guint) strlen (text);
	if (len == 0)
		return 0;

	/* find matching chars */
	for (i = 0; i < len; i++) {
		if (text[i] == find)
			count++;
	}
	return count;
}

static gboolean
gs_markdown_word_is_code (const gchar *text)
{
	/* already code */
	if (g_str_has_prefix (text, "`"))
		return FALSE;
	if (g_str_has_suffix (text, "`"))
		return FALSE;

	/* paths */
	if (g_str_has_prefix (text, "/"))
		return TRUE;

	/* bugzillas */
	if (g_str_has_prefix (text, "#"))
		return TRUE;

	/* patch files */
	if (g_strrstr (text, ".patch") != NULL)
		return TRUE;
	if (g_strrstr (text, ".diff") != NULL)
		return TRUE;

	/* function names */
	if (g_strrstr (text, "()") != NULL)
		return TRUE;

	/* email addresses */
	if (g_strrstr (text, "@") != NULL)
		return TRUE;

	/* compiler defines */
	if (text[0] != '_' &&
	    gs_markdown_count_chars_in_word (text, '_') > 1)
		return TRUE;

	/* nothing special */
	return FALSE;
}

static gchar *
gs_markdown_word_auto_format_code (const gchar *text)
{
	guint i;
	gchar *temp;
	gboolean ret = FALSE;
	g_auto(GStrv) words = NULL;

	/* split sentence up with space */
	words = g_strsplit (text, " ", -1);

	/* search each word */
	for (i = 0; words[i] != NULL; i++) {
		if (gs_markdown_word_is_code (words[i])) {
			temp = g_strdup_printf ("`%s`", words[i]);
			g_free (words[i]);
			words[i] = temp;
			ret = TRUE;
		}
	}

	/* no replacements, so just return a copy */
	if (!ret)
		return g_strdup (text);

	/* join the array back into a string */
	return g_strjoinv (" ", words);
}

static gboolean
gs_markdown_word_is_url (const gchar *text)
{
	if (g_str_has_prefix (text, "http://"))
		return TRUE;
	if (g_str_has_prefix (text, "https://"))
		return TRUE;
	if (g_str_has_prefix (text, "ftp://"))
		return TRUE;
	return FALSE;
}

static gchar *
gs_markdown_word_auto_format_urls (const gchar *text)
{
	guint i;
	gchar *temp;
	gboolean ret = FALSE;
	g_auto(GStrv) words = NULL;

	/* split sentence up with space */
	words = g_strsplit (text, " ", -1);

	/* search each word */
	for (i = 0; words[i] != NULL; i++) {
		if (gs_markdown_word_is_url (words[i])) {
			temp = g_strdup_printf ("<a href=\"%s\">%s</a>",
						words[i], words[i]);
			g_free (words[i]);
			words[i] = temp;
			ret = TRUE;
		}
	}

	/* no replacements, so just return a copy */
	if (!ret)
		return g_strdup (text);

	/* join the array back into a string */
	return g_strjoinv (" ", words);
}

static gchar *
gs_markdown_replace_links (GsMarkdown *self,
			   const gchar *text)
{
	GString *str = g_string_new ("");
	const gchar *start, *mid, *end, *from;

	/* it's: [title](https://....) */
	from = text;
	start = strchr (from, '[');
	while (start != NULL) {
		start += 1;
		mid = strstr (start, "](");
		if (mid != NULL) {
			mid += 2;
			end = strchr (mid, ')');
			if (end != NULL) {
				if (start > from)
					g_string_append_len (str, from, start - from - 1);
				g_string_append (str, self->tags.link_prefix);
				g_string_append_len (str, mid, end - mid);
				g_string_append (str, self->tags.link_mid);
				g_string_append_len (str, start, mid - start - 2);
				g_string_append (str, self->tags.link_suffix);
				from = end + 1;
				start = strchr (from, '[');
			} else {
				break;
			}
		} else {
			break;
		}
	}

	if (*from)
		g_string_append (str, from);

	return g_string_free (str, FALSE);
}

static void
gs_markdown_flush_pending (GsMarkdown *self)
{
	g_autofree gchar *copy = NULL;
	g_autofree gchar *temp = NULL;

	/* no data yet */
	if (self->mode == GS_MARKDOWN_MODE_UNKNOWN)
		return;

	/* remove trailing spaces */
	while (g_str_has_suffix (self->pending->str, " "))
		g_string_set_size (self->pending, self->pending->len - 1);

	/* pango requires escaping */
	copy = g_strdup (self->pending->str);
	if (!self->escape && self->output == GS_MARKDOWN_OUTPUT_PANGO) {
		g_strdelimit (copy, "<", '(');
		g_strdelimit (copy, ">", ')');
		g_strdelimit (copy, "&", '+');
	}

	/* check words for code */
	if (self->autocode &&
	    (self->mode == GS_MARKDOWN_MODE_PARA ||
	     self->mode == GS_MARKDOWN_MODE_BULLETT)) {
		temp = gs_markdown_word_auto_format_code (copy);
		g_free (copy);
		copy = temp;
	}

	/* escape */
	if (self->escape) {
		temp = g_markup_escape_text (copy, -1);
		g_free (copy);
		copy = temp;
	}

	/* check words for URLS */
	if (self->autolinkify &&
	    self->output == GS_MARKDOWN_OUTPUT_PANGO &&
	    (self->mode == GS_MARKDOWN_MODE_PARA ||
	     self->mode == GS_MARKDOWN_MODE_BULLETT)) {
		temp = gs_markdown_word_auto_format_urls (copy);
		g_free (copy);
		copy = temp;
	}

	if (self->tags.link_prefix != NULL &&
	    self->tags.link_mid != NULL &&
	    self->tags.link_suffix != NULL) {
		temp = gs_markdown_replace_links (self, copy);
		g_free (copy);
		copy = temp;
	}

	/* do formatting */
	temp = gs_markdown_to_text_line_format (self, copy);
	if (self->mode == GS_MARKDOWN_MODE_BULLETT) {
		g_string_append_printf (self->processed, "%s%s%s\n",
					self->tags.bullet_start,
					temp,
					self->tags.bullet_end);
		self->line_count++;
	} else if (self->mode == GS_MARKDOWN_MODE_H1) {
		g_string_append_printf (self->processed, "%s%s%s\n",
					self->tags.h1_start,
					temp,
					self->tags.h1_end);
	} else if (self->mode == GS_MARKDOWN_MODE_H2) {
		g_string_append_printf (self->processed, "%s%s%s\n",
					self->tags.h2_start,
					temp,
					self->tags.h2_end);
	} else if (self->mode == GS_MARKDOWN_MODE_H3) {
		g_string_append_printf (self->processed, "%s%s%s\n",
					self->tags.h3_start,
					temp,
					self->tags.h3_end);
	} else if (self->mode == GS_MARKDOWN_MODE_H4) {
		g_string_append_printf (self->processed, "%s%s%s\n",
					self->tags.h4_start,
					temp,
					self->tags.h4_end);
	} else if (self->mode == GS_MARKDOWN_MODE_H5) {
		g_string_append_printf (self->processed, "%s%s%s\n",
					self->tags.h5_start,
					temp,
					self->tags.h5_end);
	} else if (self->mode == GS_MARKDOWN_MODE_H6) {
		g_string_append_printf (self->processed, "%s%s%s\n",
					self->tags.h6_start,
					temp,
					self->tags.h6_end);
	} else if (self->mode == GS_MARKDOWN_MODE_PARA ||
		   self->mode == GS_MARKDOWN_MODE_RULE) {
		g_string_append_printf (self->processed, "%s\n", temp);
		self->line_count++;
	}

	/* clear */
	g_string_truncate (self->pending, 0);
}

static gboolean
gs_markdown_to_text_line_process (GsMarkdown *self, const gchar *line)
{
	gboolean ret;

	/* blank */
	ret = gs_markdown_to_text_line_is_blank (line);
	if (ret) {
		gs_markdown_flush_pending (self);
		/* a new line after a list is the end of list, not a gap */
		if (self->mode != GS_MARKDOWN_MODE_BULLETT)
			ret = gs_markdown_add_pending (self, "\n");
		self->mode = GS_MARKDOWN_MODE_BLANK;
		goto out;
	}

	/* header1_type2 */
	ret = gs_markdown_to_text_line_is_header1_type2 (line);
	if (ret) {
		if (self->mode == GS_MARKDOWN_MODE_PARA)
			self->mode = GS_MARKDOWN_MODE_H1;
		goto out;
	}

	/* header2_type2 */
	ret = gs_markdown_to_text_line_is_header2_type2 (line);
	if (ret) {
		if (self->mode == GS_MARKDOWN_MODE_PARA)
			self->mode = GS_MARKDOWN_MODE_H2;
		goto out;
	}

	/* rule */
	ret = gs_markdown_to_text_line_is_rule (line);
	if (ret) {
		gs_markdown_flush_pending (self);
		self->mode = GS_MARKDOWN_MODE_RULE;
		ret = gs_markdown_add_pending (self, self->tags.rule);
		goto out;
	}

	/* bullet */
	ret = gs_markdown_to_text_line_is_bullet (&line);
	if (ret) {
		gs_markdown_flush_pending (self);
		self->mode = GS_MARKDOWN_MODE_BULLETT;
		ret = gs_markdown_add_pending (self, line);
		goto out;
	}

	/* header1 */
	ret = gs_markdown_to_text_line_is_header1 (&line);
	if (ret) {
		gs_markdown_flush_pending (self);
		self->mode = GS_MARKDOWN_MODE_H1;
		ret = gs_markdown_add_pending_header (self, line);
		goto out;
	}

	/* header2 */
	ret = gs_markdown_to_text_line_is_header2 (&line);
	if (ret) {
		gs_markdown_flush_pending (self);
		self->mode = GS_MARKDOWN_MODE_H2;
		ret = gs_markdown_add_pending_header (self, line);
		goto out;
	}

	/* header3 */
	ret = gs_markdown_to_text_line_is_header3 (&line);
	if (ret) {
		gs_markdown_flush_pending (self);
		self->mode = GS_MARKDOWN_MODE_H3;
		ret = gs_markdown_add_pending_header (self, line);
		goto out;
	}

	/* header4 */
	ret = gs_markdown_to_text_line_is_header4 (&line);
	if (ret) {
		gs_markdown_flush_pending (self);
		self->mode = GS_MARKDOWN_MODE_H4;
		ret = gs_markdown_add_pending_header (self, line);
		goto out;
	}

	/* header5 */
	ret = gs_markdown_to_text_line_is_header5 (&line);
	if (ret) {
		gs_markdown_flush_pending (self);
		self->mode = GS_MARKDOWN_MODE_H5;
		ret = gs_markdown_add_pending_header (self, line);
		goto out;
	}

	/* header6 */
	ret = gs_markdown_to_text_line_is_header6 (&line);
	if (ret) {
		gs_markdown_flush_pending (self);
		self->mode = GS_MARKDOWN_MODE_H6;
		ret = gs_markdown_add_pending_header (self, line);
		goto out;
	}

	/* paragraph */
	if (self->mode == GS_MARKDOWN_MODE_BLANK ||
	    self->mode == GS_MARKDOWN_MODE_UNKNOWN) {
		gs_markdown_flush_pending (self);
		self->mode = GS_MARKDOWN_MODE_PARA;
	}

	/* add to pending */
	ret = gs_markdown_add_pending (self, line);
out:
	/* if we failed to add, we don't know the mode */
	if (!ret)
		self->mode = GS_MARKDOWN_MODE_UNKNOWN;
	return ret;
}

static void
gs_markdown_set_output_kind (GsMarkdown *self, GsMarkdownOutputKind output)
{
	g_return_if_fail (GS_IS_MARKDOWN (self));

	self->output = output;
	switch (output) {
	case GS_MARKDOWN_OUTPUT_PANGO:
		/* PangoMarkup */
		self->tags.em_start = "<i>";
		self->tags.em_end = "</i>";
		self->tags.strong_start = "<b>";
		self->tags.strong_end = "</b>";
		self->tags.code_start = "<tt>";
		self->tags.code_end = "</tt>";
		self->tags.h1_start = "\n<big>";
		self->tags.h1_end = "</big>\n";
		self->tags.h2_start = "\n<b>";
		self->tags.h2_end = "</b>\n";
		self->tags.h3_start = "\n<b>";
		self->tags.h3_end = "</b>\n";
		self->tags.h4_start = "\n<b>";
		self->tags.h4_end = "</b>\n";
		self->tags.h5_start = "\n<b>";
		self->tags.h5_end = "</b>\n";
		self->tags.h6_start = "\n<b>";
		self->tags.h6_end = "</b>\n";
		self->tags.bullet_start = "• ";
		self->tags.bullet_end = "";
		self->tags.rule = "⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯\n";
		self->tags.link_prefix = "<a href=\"";
		self->tags.link_mid = "\">";
		self->tags.link_suffix = "</a>";
		self->escape = TRUE;
		self->autolinkify = TRUE;
		break;
	case GS_MARKDOWN_OUTPUT_HTML:
		/* XHTML */
		self->tags.em_start = "<em>";
		self->tags.em_end = "<em>";
		self->tags.strong_start = "<strong>";
		self->tags.strong_end = "</strong>";
		self->tags.code_start = "<code>";
		self->tags.code_end = "</code>";
		self->tags.h1_start = "<h1>";
		self->tags.h1_end = "</h1>";
		self->tags.h2_start = "<h2>";
		self->tags.h2_end = "</h2>";
		self->tags.h3_start = "<h3>";
		self->tags.h3_end = "</h3>";
		self->tags.h4_start = "<h4>";
		self->tags.h4_end = "</h4>";
		self->tags.h5_start = "<h5>";
		self->tags.h5_end = "</h5>";
		self->tags.h6_start = "<h6>";
		self->tags.h6_end = "</h6>";
		self->tags.bullet_start = "<li>";
		self->tags.bullet_end = "</li>";
		self->tags.rule = "<hr>";
		self->tags.link_prefix = "<a href=\"";
		self->tags.link_mid = "\">";
		self->tags.link_suffix = "</a>";
		self->escape = TRUE;
		self->autolinkify = TRUE;
		break;
	case GS_MARKDOWN_OUTPUT_TEXT:
		/* plain text */
		self->tags.em_start = "";
		self->tags.em_end = "";
		self->tags.strong_start = "";
		self->tags.strong_end = "";
		self->tags.code_start = "";
		self->tags.code_end = "";
		self->tags.h1_start = "[";
		self->tags.h1_end = "]";
		self->tags.h2_start = "-";
		self->tags.h2_end = "-";
		self->tags.h3_start = "  ";
		self->tags.h3_end = "  ";
		self->tags.h4_start = "   ";
		self->tags.h4_end = "   ";
		self->tags.h5_start = "    ";
		self->tags.h5_end = "    ";
		self->tags.h6_start = "     ";
		self->tags.h6_end = "     ";
		self->tags.bullet_start = "* ";
		self->tags.bullet_end = "";
		self->tags.rule = " ----- \n";
		self->tags.link_prefix = NULL;
		self->tags.link_mid = NULL;
		self->tags.link_suffix = NULL;
		self->escape = FALSE;
		self->autolinkify = FALSE;
		break;
	default:
		g_warning ("unknown output enum");
		break;
	}
}

void
gs_markdown_set_max_lines (GsMarkdown *self, gint max_lines)
{
	g_return_if_fail (GS_IS_MARKDOWN (self));
	self->max_lines = max_lines;
}

void
gs_markdown_set_smart_quoting (GsMarkdown *self, gboolean smart_quoting)
{
	g_return_if_fail (GS_IS_MARKDOWN (self));
	self->smart_quoting = smart_quoting;
}

void
gs_markdown_set_escape (GsMarkdown *self, gboolean escape)
{
	g_return_if_fail (GS_IS_MARKDOWN (self));
	self->escape = escape;
}

void
gs_markdown_set_autocode (GsMarkdown *self, gboolean autocode)
{
	g_return_if_fail (GS_IS_MARKDOWN (self));
	self->autocode = autocode;
}

void
gs_markdown_set_autolinkify (GsMarkdown *self, gboolean autolinkify)
{
	g_return_if_fail (GS_IS_MARKDOWN (self));
	self->autolinkify = autolinkify;
}

gchar *
gs_markdown_parse (GsMarkdown *self, const gchar *markdown)
{
	gboolean ret;
	gchar *temp;
	guint i;
	guint len;
	g_auto(GStrv) lines = NULL;
	const gchar *output;

	g_return_val_if_fail (GS_IS_MARKDOWN (self), NULL);

	/* process */
	self->mode = GS_MARKDOWN_MODE_UNKNOWN;
	self->line_count = 0;
	g_string_truncate (self->pending, 0);
	g_string_truncate (self->processed, 0);
	lines = g_strsplit (markdown, "\n", -1);
	len = g_strv_length (lines);

	/* process each line */
	for (i = 0; i < len; i++) {
		ret = gs_markdown_to_text_line_process (self, lines[i]);
		if (!ret)
			break;
	}
	gs_markdown_flush_pending (self);

	/* remove trailing \n */
	while (self->processed->len > 0 && self->processed->str[self->processed->len - 1] == '\n')
		g_string_set_size (self->processed, self->processed->len - 1);

	/* skip leading \n, which can happen with headers in the Pango mode */
	output = self->processed->str;
	while (*output == '\n')
		output++;

	/* get a copy */
	temp = g_strdup (output);
	g_string_truncate (self->pending, 0);
	g_string_truncate (self->processed, 0);
	return temp;
}

static void
gs_markdown_finalize (GObject *object)
{
	GsMarkdown *self;

	g_return_if_fail (GS_IS_MARKDOWN (object));

	self = GS_MARKDOWN (object);

	g_string_free (self->pending, TRUE);
	g_string_free (self->processed, TRUE);

	G_OBJECT_CLASS (gs_markdown_parent_class)->finalize (object);
}

static void
gs_markdown_class_init (GsMarkdownClass *klass)
{
	GObjectClass *object_class = G_OBJECT_CLASS (klass);
	object_class->finalize = gs_markdown_finalize;
}

static void
gs_markdown_init (GsMarkdown *self)
{
	self->mode = GS_MARKDOWN_MODE_UNKNOWN;
	self->pending = g_string_new ("");
	self->processed = g_string_new ("");
	self->max_lines = -1;
	self->smart_quoting = FALSE;
	self->escape = FALSE;
	self->autocode = FALSE;
}

GsMarkdown *
gs_markdown_new (GsMarkdownOutputKind output)
{
	GsMarkdown *self;
	self = g_object_new (GS_TYPE_MARKDOWN, NULL);
	gs_markdown_set_output_kind (self, output);
	return GS_MARKDOWN (self);
}