1067 lines
25 KiB
C
1067 lines
25 KiB
C
/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*-
|
||
* vi:set noexpandtab tabstop=8 shiftwidth=8:
|
||
*
|
||
* Copyright (C) 2008 Richard Hughes <richard@hughsie.com>
|
||
* Copyright (C) 2015 Kalev Lember <klember@redhat.com>
|
||
*
|
||
* SPDX-License-Identifier: GPL-2.0-or-later
|
||
*/
|
||
|
||
#include "config.h"
|
||
|
||
#include <string.h>
|
||
#include <glib.h>
|
||
|
||
#include "gs-markdown.h"
|
||
|
||
/*******************************************************************************
|
||
*
|
||
* This is a simple Markdown parser.
|
||
* It can output to Pango, HTML or plain text. The following limitations are
|
||
* already known, and properly deliberate:
|
||
*
|
||
* - No code section support
|
||
* - No ordered list support
|
||
* - No blockquote section support
|
||
* - No image support
|
||
* - No email support
|
||
* - No backslash escapes support
|
||
* - No HTML escaping support
|
||
* - Auto-escapes certain word patterns, like http://
|
||
*
|
||
* It does support the rest of the standard pretty well, although it's not
|
||
* been run against any conformance tests. The parsing is single pass, with
|
||
* a simple enumerated interpretor mode and a single line back-memory.
|
||
*
|
||
******************************************************************************/
|
||
|
||
typedef enum {
|
||
GS_MARKDOWN_MODE_BLANK,
|
||
GS_MARKDOWN_MODE_RULE,
|
||
GS_MARKDOWN_MODE_BULLETT,
|
||
GS_MARKDOWN_MODE_PARA,
|
||
GS_MARKDOWN_MODE_H1,
|
||
GS_MARKDOWN_MODE_H2,
|
||
GS_MARKDOWN_MODE_H3,
|
||
GS_MARKDOWN_MODE_H4,
|
||
GS_MARKDOWN_MODE_H5,
|
||
GS_MARKDOWN_MODE_H6,
|
||
GS_MARKDOWN_MODE_UNKNOWN
|
||
} GsMarkdownMode;
|
||
|
||
typedef struct {
|
||
const gchar *em_start;
|
||
const gchar *em_end;
|
||
const gchar *strong_start;
|
||
const gchar *strong_end;
|
||
const gchar *code_start;
|
||
const gchar *code_end;
|
||
const gchar *h1_start;
|
||
const gchar *h1_end;
|
||
const gchar *h2_start;
|
||
const gchar *h2_end;
|
||
const gchar *h3_start;
|
||
const gchar *h3_end;
|
||
const gchar *h4_start;
|
||
const gchar *h4_end;
|
||
const gchar *h5_start;
|
||
const gchar *h5_end;
|
||
const gchar *h6_start;
|
||
const gchar *h6_end;
|
||
const gchar *bullet_start;
|
||
const gchar *bullet_end;
|
||
const gchar *rule;
|
||
const gchar *link_prefix;
|
||
const gchar *link_mid;
|
||
const gchar *link_suffix;
|
||
} GsMarkdownTags;
|
||
|
||
struct _GsMarkdown {
|
||
GObject parent_instance;
|
||
|
||
GsMarkdownMode mode;
|
||
GsMarkdownTags tags;
|
||
GsMarkdownOutputKind output;
|
||
gint max_lines;
|
||
gint line_count;
|
||
gboolean smart_quoting;
|
||
gboolean escape;
|
||
gboolean autocode;
|
||
gboolean autolinkify;
|
||
GString *pending;
|
||
GString *processed;
|
||
};
|
||
|
||
G_DEFINE_TYPE (GsMarkdown, gs_markdown, G_TYPE_OBJECT)
|
||
|
||
/*
|
||
* gs_markdown_to_text_line_is_rule:
|
||
*
|
||
* Horizontal rules are created by placing three or more hyphens, asterisks,
|
||
* or underscores on a line by themselves.
|
||
* You may use spaces between the hyphens or asterisks.
|
||
**/
|
||
static gboolean
|
||
gs_markdown_to_text_line_is_rule (const gchar *line)
|
||
{
|
||
guint i;
|
||
guint len;
|
||
guint count = 0;
|
||
g_autofree gchar *copy = NULL;
|
||
|
||
len = (guint) strlen (line);
|
||
if (len == 0)
|
||
return FALSE;
|
||
|
||
/* replace non-rule chars with ~ */
|
||
copy = g_strdup (line);
|
||
g_strcanon (copy, "-*_ ", '~');
|
||
for (i = 0; i < len; i++) {
|
||
if (copy[i] == '~')
|
||
return FALSE;
|
||
if (copy[i] != ' ')
|
||
count++;
|
||
}
|
||
|
||
/* if we matched, return true */
|
||
if (count >= 3)
|
||
return TRUE;
|
||
return FALSE;
|
||
}
|
||
|
||
static gboolean
|
||
gs_markdown_to_text_line_is_bullet (const gchar **pline)
|
||
{
|
||
const gchar *line = *pline;
|
||
|
||
/* skip leading spaces */
|
||
while (g_ascii_isspace (*line))
|
||
line++;
|
||
|
||
if (g_str_has_prefix (line, "- ") ||
|
||
g_str_has_prefix (line, "* ") ||
|
||
g_str_has_prefix (line, "+ ")) {
|
||
*pline = line + 2;
|
||
return TRUE;
|
||
}
|
||
|
||
return FALSE;
|
||
}
|
||
|
||
static gboolean
|
||
gs_markdown_to_text_line_is_header_x (const gchar **pline,
|
||
guint xx)
|
||
{
|
||
const gchar *line = *pline;
|
||
|
||
while (*line == '#' && xx > 0) {
|
||
line++;
|
||
xx--;
|
||
}
|
||
|
||
if (xx == 0 && *line != '\0' && *line != '#') {
|
||
if (g_ascii_isspace (*line))
|
||
line++;
|
||
*pline = line;
|
||
return TRUE;
|
||
}
|
||
|
||
return FALSE;
|
||
}
|
||
|
||
static gboolean
|
||
gs_markdown_to_text_line_is_header1 (const gchar **pline)
|
||
{
|
||
return gs_markdown_to_text_line_is_header_x (pline, 1);
|
||
}
|
||
|
||
static gboolean
|
||
gs_markdown_to_text_line_is_header2 (const gchar **pline)
|
||
{
|
||
return gs_markdown_to_text_line_is_header_x (pline, 2);
|
||
}
|
||
|
||
static gboolean
|
||
gs_markdown_to_text_line_is_header3 (const gchar **pline)
|
||
{
|
||
return gs_markdown_to_text_line_is_header_x (pline, 3);
|
||
}
|
||
|
||
static gboolean
|
||
gs_markdown_to_text_line_is_header4 (const gchar **pline)
|
||
{
|
||
return gs_markdown_to_text_line_is_header_x (pline, 4);
|
||
}
|
||
|
||
static gboolean
|
||
gs_markdown_to_text_line_is_header5 (const gchar **pline)
|
||
{
|
||
return gs_markdown_to_text_line_is_header_x (pline, 5);
|
||
}
|
||
|
||
static gboolean
|
||
gs_markdown_to_text_line_is_header6 (const gchar **pline)
|
||
{
|
||
return gs_markdown_to_text_line_is_header_x (pline, 6);
|
||
}
|
||
|
||
static gboolean
|
||
gs_markdown_to_text_line_is_header1_type2 (const gchar *line)
|
||
{
|
||
return g_str_has_prefix (line, "===");
|
||
}
|
||
|
||
static gboolean
|
||
gs_markdown_to_text_line_is_header2_type2 (const gchar *line)
|
||
{
|
||
return g_str_has_prefix (line, "---");
|
||
}
|
||
|
||
#if 0
|
||
static gboolean
|
||
gs_markdown_to_text_line_is_code (const gchar *line)
|
||
{
|
||
return (g_str_has_prefix (line, " ") ||
|
||
g_str_has_prefix (line, "\t"));
|
||
}
|
||
|
||
static gboolean
|
||
gs_markdown_to_text_line_is_blockquote (const gchar *line)
|
||
{
|
||
return (g_str_has_prefix (line, "> "));
|
||
}
|
||
#endif
|
||
|
||
static gboolean
|
||
gs_markdown_to_text_line_is_blank (const gchar *line)
|
||
{
|
||
guint i;
|
||
guint len;
|
||
|
||
/* a line with no characters is blank by definition */
|
||
len = (guint) strlen (line);
|
||
if (len == 0)
|
||
return TRUE;
|
||
|
||
/* find if there are only space chars */
|
||
for (i = 0; i < len; i++) {
|
||
if (line[i] != ' ' && line[i] != '\t')
|
||
return FALSE;
|
||
}
|
||
|
||
/* if we matched, return true */
|
||
return TRUE;
|
||
}
|
||
|
||
static gchar *
|
||
gs_markdown_replace (const gchar *haystack,
|
||
const gchar *needle,
|
||
const gchar *replace)
|
||
{
|
||
g_auto(GStrv) split = NULL;
|
||
split = g_strsplit (haystack, needle, -1);
|
||
return g_strjoinv (replace, split);
|
||
}
|
||
|
||
static gchar *
|
||
gs_markdown_strstr_spaces (const gchar *haystack, const gchar *needle)
|
||
{
|
||
gchar *found;
|
||
const gchar *haystack_new = haystack;
|
||
|
||
retry:
|
||
/* don't find if surrounded by spaces */
|
||
found = strstr (haystack_new, needle);
|
||
if (found == NULL)
|
||
return NULL;
|
||
|
||
/* start of the string, always valid */
|
||
if (found == haystack)
|
||
return found;
|
||
|
||
/* end of the string, always valid */
|
||
if (*(found-1) == ' ' && *(found+1) == ' ') {
|
||
haystack_new = found+1;
|
||
goto retry;
|
||
}
|
||
return found;
|
||
}
|
||
|
||
static gchar *
|
||
gs_markdown_to_text_line_formatter (const gchar *line,
|
||
const gchar *formatter,
|
||
const gchar *left,
|
||
const gchar *right)
|
||
{
|
||
guint len;
|
||
gchar *str1;
|
||
gchar *str2;
|
||
gchar *start = NULL;
|
||
gchar *middle = NULL;
|
||
gchar *end = NULL;
|
||
g_autofree gchar *copy = NULL;
|
||
|
||
/* needed to know for shifts */
|
||
len = (guint) strlen (formatter);
|
||
if (len == 0)
|
||
return NULL;
|
||
|
||
/* find sections */
|
||
copy = g_strdup (line);
|
||
str1 = gs_markdown_strstr_spaces (copy, formatter);
|
||
if (str1 != NULL) {
|
||
*str1 = '\0';
|
||
str2 = gs_markdown_strstr_spaces (str1+len, formatter);
|
||
if (str2 != NULL) {
|
||
*str2 = '\0';
|
||
middle = str1 + len;
|
||
start = copy;
|
||
end = str2 + len;
|
||
}
|
||
}
|
||
|
||
/* if we found, replace and keep looking for the same string */
|
||
if (start != NULL && middle != NULL && end != NULL) {
|
||
g_autofree gchar *temp = NULL;
|
||
temp = g_strdup_printf ("%s%s%s%s%s", start, left, middle, right, end);
|
||
/* recursive */
|
||
return gs_markdown_to_text_line_formatter (temp, formatter, left, right);
|
||
}
|
||
|
||
/* not found, keep return as-is */
|
||
return g_strdup (line);
|
||
}
|
||
|
||
static gchar *
|
||
gs_markdown_to_text_line_format_sections (GsMarkdown *self, const gchar *line)
|
||
{
|
||
gchar *data = g_strdup (line);
|
||
gchar *temp;
|
||
|
||
/* bold1 */
|
||
temp = data;
|
||
data = gs_markdown_to_text_line_formatter (temp, "**",
|
||
self->tags.strong_start,
|
||
self->tags.strong_end);
|
||
g_free (temp);
|
||
|
||
/* bold2 */
|
||
temp = data;
|
||
data = gs_markdown_to_text_line_formatter (temp, "__",
|
||
self->tags.strong_start,
|
||
self->tags.strong_end);
|
||
g_free (temp);
|
||
|
||
/* italic1 */
|
||
temp = data;
|
||
data = gs_markdown_to_text_line_formatter (temp, "*",
|
||
self->tags.em_start,
|
||
self->tags.em_end);
|
||
g_free (temp);
|
||
|
||
/* italic2 */
|
||
temp = data;
|
||
data = gs_markdown_to_text_line_formatter (temp, "_",
|
||
self->tags.em_start,
|
||
self->tags.em_end);
|
||
g_free (temp);
|
||
|
||
/* em-dash */
|
||
temp = data;
|
||
data = gs_markdown_replace (temp, " -- ", " — ");
|
||
g_free (temp);
|
||
|
||
/* smart quoting */
|
||
if (self->smart_quoting) {
|
||
temp = data;
|
||
data = gs_markdown_to_text_line_formatter (temp, "\"", "“", "”");
|
||
g_free (temp);
|
||
|
||
temp = data;
|
||
data = gs_markdown_to_text_line_formatter (temp, "'", "‘", "’");
|
||
g_free (temp);
|
||
}
|
||
|
||
return data;
|
||
}
|
||
|
||
static gchar *
|
||
gs_markdown_to_text_line_format (GsMarkdown *self, const gchar *line)
|
||
{
|
||
GString *string;
|
||
gboolean mode = FALSE;
|
||
gchar *text;
|
||
guint i;
|
||
g_auto(GStrv) codes = NULL;
|
||
|
||
/* optimise the trivial case where we don't have any code tags */
|
||
text = strstr (line, "`");
|
||
if (text == NULL)
|
||
return gs_markdown_to_text_line_format_sections (self, line);
|
||
|
||
/* we want to parse the code sections without formatting */
|
||
codes = g_strsplit (line, "`", -1);
|
||
string = g_string_new ("");
|
||
for (i = 0; codes[i] != NULL; i++) {
|
||
if (!mode) {
|
||
text = gs_markdown_to_text_line_format_sections (self, codes[i]);
|
||
g_string_append (string, text);
|
||
g_free (text);
|
||
mode = TRUE;
|
||
} else {
|
||
/* just append without formatting */
|
||
g_string_append (string, self->tags.code_start);
|
||
g_string_append (string, codes[i]);
|
||
g_string_append (string, self->tags.code_end);
|
||
mode = FALSE;
|
||
}
|
||
}
|
||
return g_string_free (string, FALSE);
|
||
}
|
||
|
||
static gboolean
|
||
gs_markdown_add_pending (GsMarkdown *self, const gchar *line)
|
||
{
|
||
g_autofree gchar *copy = NULL;
|
||
|
||
/* would put us over the limit */
|
||
if (self->max_lines > 0 && self->line_count >= self->max_lines)
|
||
return FALSE;
|
||
|
||
copy = g_strdup (line);
|
||
|
||
/* strip leading and trailing spaces */
|
||
g_strstrip (copy);
|
||
|
||
/* append */
|
||
g_string_append_printf (self->pending, "%s ", copy);
|
||
return TRUE;
|
||
}
|
||
|
||
static gboolean
|
||
gs_markdown_add_pending_header (GsMarkdown *self, const gchar *line)
|
||
{
|
||
g_autofree gchar *copy = NULL;
|
||
|
||
/* strip trailing # */
|
||
copy = g_strdup (line);
|
||
g_strdelimit (copy, "#", ' ');
|
||
return gs_markdown_add_pending (self, copy);
|
||
}
|
||
|
||
static guint
|
||
gs_markdown_count_chars_in_word (const gchar *text, gchar find)
|
||
{
|
||
guint i;
|
||
guint len;
|
||
guint count = 0;
|
||
|
||
/* get length */
|
||
len = (guint) strlen (text);
|
||
if (len == 0)
|
||
return 0;
|
||
|
||
/* find matching chars */
|
||
for (i = 0; i < len; i++) {
|
||
if (text[i] == find)
|
||
count++;
|
||
}
|
||
return count;
|
||
}
|
||
|
||
static gboolean
|
||
gs_markdown_word_is_code (const gchar *text)
|
||
{
|
||
/* already code */
|
||
if (g_str_has_prefix (text, "`"))
|
||
return FALSE;
|
||
if (g_str_has_suffix (text, "`"))
|
||
return FALSE;
|
||
|
||
/* paths */
|
||
if (g_str_has_prefix (text, "/"))
|
||
return TRUE;
|
||
|
||
/* bugzillas */
|
||
if (g_str_has_prefix (text, "#"))
|
||
return TRUE;
|
||
|
||
/* patch files */
|
||
if (g_strrstr (text, ".patch") != NULL)
|
||
return TRUE;
|
||
if (g_strrstr (text, ".diff") != NULL)
|
||
return TRUE;
|
||
|
||
/* function names */
|
||
if (g_strrstr (text, "()") != NULL)
|
||
return TRUE;
|
||
|
||
/* email addresses */
|
||
if (g_strrstr (text, "@") != NULL)
|
||
return TRUE;
|
||
|
||
/* compiler defines */
|
||
if (text[0] != '_' &&
|
||
gs_markdown_count_chars_in_word (text, '_') > 1)
|
||
return TRUE;
|
||
|
||
/* nothing special */
|
||
return FALSE;
|
||
}
|
||
|
||
static gchar *
|
||
gs_markdown_word_auto_format_code (const gchar *text)
|
||
{
|
||
guint i;
|
||
gchar *temp;
|
||
gboolean ret = FALSE;
|
||
g_auto(GStrv) words = NULL;
|
||
|
||
/* split sentence up with space */
|
||
words = g_strsplit (text, " ", -1);
|
||
|
||
/* search each word */
|
||
for (i = 0; words[i] != NULL; i++) {
|
||
if (gs_markdown_word_is_code (words[i])) {
|
||
temp = g_strdup_printf ("`%s`", words[i]);
|
||
g_free (words[i]);
|
||
words[i] = temp;
|
||
ret = TRUE;
|
||
}
|
||
}
|
||
|
||
/* no replacements, so just return a copy */
|
||
if (!ret)
|
||
return g_strdup (text);
|
||
|
||
/* join the array back into a string */
|
||
return g_strjoinv (" ", words);
|
||
}
|
||
|
||
static gboolean
|
||
gs_markdown_word_is_url (const gchar *text)
|
||
{
|
||
if (g_str_has_prefix (text, "http://"))
|
||
return TRUE;
|
||
if (g_str_has_prefix (text, "https://"))
|
||
return TRUE;
|
||
if (g_str_has_prefix (text, "ftp://"))
|
||
return TRUE;
|
||
return FALSE;
|
||
}
|
||
|
||
static gchar *
|
||
gs_markdown_word_auto_format_urls (const gchar *text)
|
||
{
|
||
guint i;
|
||
gchar *temp;
|
||
gboolean ret = FALSE;
|
||
g_auto(GStrv) words = NULL;
|
||
|
||
/* split sentence up with space */
|
||
words = g_strsplit (text, " ", -1);
|
||
|
||
/* search each word */
|
||
for (i = 0; words[i] != NULL; i++) {
|
||
if (gs_markdown_word_is_url (words[i])) {
|
||
temp = g_strdup_printf ("<a href=\"%s\">%s</a>",
|
||
words[i], words[i]);
|
||
g_free (words[i]);
|
||
words[i] = temp;
|
||
ret = TRUE;
|
||
}
|
||
}
|
||
|
||
/* no replacements, so just return a copy */
|
||
if (!ret)
|
||
return g_strdup (text);
|
||
|
||
/* join the array back into a string */
|
||
return g_strjoinv (" ", words);
|
||
}
|
||
|
||
static gchar *
|
||
gs_markdown_replace_links (GsMarkdown *self,
|
||
const gchar *text)
|
||
{
|
||
GString *str = g_string_new ("");
|
||
const gchar *start, *mid, *end, *from;
|
||
|
||
/* it's: [title](https://....) */
|
||
from = text;
|
||
start = strchr (from, '[');
|
||
while (start != NULL) {
|
||
start += 1;
|
||
mid = strstr (start, "](");
|
||
if (mid != NULL) {
|
||
mid += 2;
|
||
end = strchr (mid, ')');
|
||
if (end != NULL) {
|
||
if (start > from)
|
||
g_string_append_len (str, from, start - from - 1);
|
||
g_string_append (str, self->tags.link_prefix);
|
||
g_string_append_len (str, mid, end - mid);
|
||
g_string_append (str, self->tags.link_mid);
|
||
g_string_append_len (str, start, mid - start - 2);
|
||
g_string_append (str, self->tags.link_suffix);
|
||
from = end + 1;
|
||
start = strchr (from, '[');
|
||
} else {
|
||
break;
|
||
}
|
||
} else {
|
||
break;
|
||
}
|
||
}
|
||
|
||
if (*from)
|
||
g_string_append (str, from);
|
||
|
||
return g_string_free (str, FALSE);
|
||
}
|
||
|
||
static void
|
||
gs_markdown_flush_pending (GsMarkdown *self)
|
||
{
|
||
g_autofree gchar *copy = NULL;
|
||
g_autofree gchar *temp = NULL;
|
||
|
||
/* no data yet */
|
||
if (self->mode == GS_MARKDOWN_MODE_UNKNOWN)
|
||
return;
|
||
|
||
/* remove trailing spaces */
|
||
while (g_str_has_suffix (self->pending->str, " "))
|
||
g_string_set_size (self->pending, self->pending->len - 1);
|
||
|
||
/* pango requires escaping */
|
||
copy = g_strdup (self->pending->str);
|
||
if (!self->escape && self->output == GS_MARKDOWN_OUTPUT_PANGO) {
|
||
g_strdelimit (copy, "<", '(');
|
||
g_strdelimit (copy, ">", ')');
|
||
g_strdelimit (copy, "&", '+');
|
||
}
|
||
|
||
/* check words for code */
|
||
if (self->autocode &&
|
||
(self->mode == GS_MARKDOWN_MODE_PARA ||
|
||
self->mode == GS_MARKDOWN_MODE_BULLETT)) {
|
||
temp = gs_markdown_word_auto_format_code (copy);
|
||
g_free (copy);
|
||
copy = temp;
|
||
}
|
||
|
||
/* escape */
|
||
if (self->escape) {
|
||
temp = g_markup_escape_text (copy, -1);
|
||
g_free (copy);
|
||
copy = temp;
|
||
}
|
||
|
||
/* check words for URLS */
|
||
if (self->autolinkify &&
|
||
self->output == GS_MARKDOWN_OUTPUT_PANGO &&
|
||
(self->mode == GS_MARKDOWN_MODE_PARA ||
|
||
self->mode == GS_MARKDOWN_MODE_BULLETT)) {
|
||
temp = gs_markdown_word_auto_format_urls (copy);
|
||
g_free (copy);
|
||
copy = temp;
|
||
}
|
||
|
||
if (self->tags.link_prefix != NULL &&
|
||
self->tags.link_mid != NULL &&
|
||
self->tags.link_suffix != NULL) {
|
||
temp = gs_markdown_replace_links (self, copy);
|
||
g_free (copy);
|
||
copy = temp;
|
||
}
|
||
|
||
/* do formatting */
|
||
temp = gs_markdown_to_text_line_format (self, copy);
|
||
if (self->mode == GS_MARKDOWN_MODE_BULLETT) {
|
||
g_string_append_printf (self->processed, "%s%s%s\n",
|
||
self->tags.bullet_start,
|
||
temp,
|
||
self->tags.bullet_end);
|
||
self->line_count++;
|
||
} else if (self->mode == GS_MARKDOWN_MODE_H1) {
|
||
g_string_append_printf (self->processed, "%s%s%s\n",
|
||
self->tags.h1_start,
|
||
temp,
|
||
self->tags.h1_end);
|
||
} else if (self->mode == GS_MARKDOWN_MODE_H2) {
|
||
g_string_append_printf (self->processed, "%s%s%s\n",
|
||
self->tags.h2_start,
|
||
temp,
|
||
self->tags.h2_end);
|
||
} else if (self->mode == GS_MARKDOWN_MODE_H3) {
|
||
g_string_append_printf (self->processed, "%s%s%s\n",
|
||
self->tags.h3_start,
|
||
temp,
|
||
self->tags.h3_end);
|
||
} else if (self->mode == GS_MARKDOWN_MODE_H4) {
|
||
g_string_append_printf (self->processed, "%s%s%s\n",
|
||
self->tags.h4_start,
|
||
temp,
|
||
self->tags.h4_end);
|
||
} else if (self->mode == GS_MARKDOWN_MODE_H5) {
|
||
g_string_append_printf (self->processed, "%s%s%s\n",
|
||
self->tags.h5_start,
|
||
temp,
|
||
self->tags.h5_end);
|
||
} else if (self->mode == GS_MARKDOWN_MODE_H6) {
|
||
g_string_append_printf (self->processed, "%s%s%s\n",
|
||
self->tags.h6_start,
|
||
temp,
|
||
self->tags.h6_end);
|
||
} else if (self->mode == GS_MARKDOWN_MODE_PARA ||
|
||
self->mode == GS_MARKDOWN_MODE_RULE) {
|
||
g_string_append_printf (self->processed, "%s\n", temp);
|
||
self->line_count++;
|
||
}
|
||
|
||
/* clear */
|
||
g_string_truncate (self->pending, 0);
|
||
}
|
||
|
||
static gboolean
|
||
gs_markdown_to_text_line_process (GsMarkdown *self, const gchar *line)
|
||
{
|
||
gboolean ret;
|
||
|
||
/* blank */
|
||
ret = gs_markdown_to_text_line_is_blank (line);
|
||
if (ret) {
|
||
gs_markdown_flush_pending (self);
|
||
/* a new line after a list is the end of list, not a gap */
|
||
if (self->mode != GS_MARKDOWN_MODE_BULLETT)
|
||
ret = gs_markdown_add_pending (self, "\n");
|
||
self->mode = GS_MARKDOWN_MODE_BLANK;
|
||
goto out;
|
||
}
|
||
|
||
/* header1_type2 */
|
||
ret = gs_markdown_to_text_line_is_header1_type2 (line);
|
||
if (ret) {
|
||
if (self->mode == GS_MARKDOWN_MODE_PARA)
|
||
self->mode = GS_MARKDOWN_MODE_H1;
|
||
goto out;
|
||
}
|
||
|
||
/* header2_type2 */
|
||
ret = gs_markdown_to_text_line_is_header2_type2 (line);
|
||
if (ret) {
|
||
if (self->mode == GS_MARKDOWN_MODE_PARA)
|
||
self->mode = GS_MARKDOWN_MODE_H2;
|
||
goto out;
|
||
}
|
||
|
||
/* rule */
|
||
ret = gs_markdown_to_text_line_is_rule (line);
|
||
if (ret) {
|
||
gs_markdown_flush_pending (self);
|
||
self->mode = GS_MARKDOWN_MODE_RULE;
|
||
ret = gs_markdown_add_pending (self, self->tags.rule);
|
||
goto out;
|
||
}
|
||
|
||
/* bullet */
|
||
ret = gs_markdown_to_text_line_is_bullet (&line);
|
||
if (ret) {
|
||
gs_markdown_flush_pending (self);
|
||
self->mode = GS_MARKDOWN_MODE_BULLETT;
|
||
ret = gs_markdown_add_pending (self, line);
|
||
goto out;
|
||
}
|
||
|
||
/* header1 */
|
||
ret = gs_markdown_to_text_line_is_header1 (&line);
|
||
if (ret) {
|
||
gs_markdown_flush_pending (self);
|
||
self->mode = GS_MARKDOWN_MODE_H1;
|
||
ret = gs_markdown_add_pending_header (self, line);
|
||
goto out;
|
||
}
|
||
|
||
/* header2 */
|
||
ret = gs_markdown_to_text_line_is_header2 (&line);
|
||
if (ret) {
|
||
gs_markdown_flush_pending (self);
|
||
self->mode = GS_MARKDOWN_MODE_H2;
|
||
ret = gs_markdown_add_pending_header (self, line);
|
||
goto out;
|
||
}
|
||
|
||
/* header3 */
|
||
ret = gs_markdown_to_text_line_is_header3 (&line);
|
||
if (ret) {
|
||
gs_markdown_flush_pending (self);
|
||
self->mode = GS_MARKDOWN_MODE_H3;
|
||
ret = gs_markdown_add_pending_header (self, line);
|
||
goto out;
|
||
}
|
||
|
||
/* header4 */
|
||
ret = gs_markdown_to_text_line_is_header4 (&line);
|
||
if (ret) {
|
||
gs_markdown_flush_pending (self);
|
||
self->mode = GS_MARKDOWN_MODE_H4;
|
||
ret = gs_markdown_add_pending_header (self, line);
|
||
goto out;
|
||
}
|
||
|
||
/* header5 */
|
||
ret = gs_markdown_to_text_line_is_header5 (&line);
|
||
if (ret) {
|
||
gs_markdown_flush_pending (self);
|
||
self->mode = GS_MARKDOWN_MODE_H5;
|
||
ret = gs_markdown_add_pending_header (self, line);
|
||
goto out;
|
||
}
|
||
|
||
/* header6 */
|
||
ret = gs_markdown_to_text_line_is_header6 (&line);
|
||
if (ret) {
|
||
gs_markdown_flush_pending (self);
|
||
self->mode = GS_MARKDOWN_MODE_H6;
|
||
ret = gs_markdown_add_pending_header (self, line);
|
||
goto out;
|
||
}
|
||
|
||
/* paragraph */
|
||
if (self->mode == GS_MARKDOWN_MODE_BLANK ||
|
||
self->mode == GS_MARKDOWN_MODE_UNKNOWN) {
|
||
gs_markdown_flush_pending (self);
|
||
self->mode = GS_MARKDOWN_MODE_PARA;
|
||
}
|
||
|
||
/* add to pending */
|
||
ret = gs_markdown_add_pending (self, line);
|
||
out:
|
||
/* if we failed to add, we don't know the mode */
|
||
if (!ret)
|
||
self->mode = GS_MARKDOWN_MODE_UNKNOWN;
|
||
return ret;
|
||
}
|
||
|
||
static void
|
||
gs_markdown_set_output_kind (GsMarkdown *self, GsMarkdownOutputKind output)
|
||
{
|
||
g_return_if_fail (GS_IS_MARKDOWN (self));
|
||
|
||
self->output = output;
|
||
switch (output) {
|
||
case GS_MARKDOWN_OUTPUT_PANGO:
|
||
/* PangoMarkup */
|
||
self->tags.em_start = "<i>";
|
||
self->tags.em_end = "</i>";
|
||
self->tags.strong_start = "<b>";
|
||
self->tags.strong_end = "</b>";
|
||
self->tags.code_start = "<tt>";
|
||
self->tags.code_end = "</tt>";
|
||
self->tags.h1_start = "\n<big>";
|
||
self->tags.h1_end = "</big>\n";
|
||
self->tags.h2_start = "\n<b>";
|
||
self->tags.h2_end = "</b>\n";
|
||
self->tags.h3_start = "\n<b>";
|
||
self->tags.h3_end = "</b>\n";
|
||
self->tags.h4_start = "\n<b>";
|
||
self->tags.h4_end = "</b>\n";
|
||
self->tags.h5_start = "\n<b>";
|
||
self->tags.h5_end = "</b>\n";
|
||
self->tags.h6_start = "\n<b>";
|
||
self->tags.h6_end = "</b>\n";
|
||
self->tags.bullet_start = "• ";
|
||
self->tags.bullet_end = "";
|
||
self->tags.rule = "⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯\n";
|
||
self->tags.link_prefix = "<a href=\"";
|
||
self->tags.link_mid = "\">";
|
||
self->tags.link_suffix = "</a>";
|
||
self->escape = TRUE;
|
||
self->autolinkify = TRUE;
|
||
break;
|
||
case GS_MARKDOWN_OUTPUT_HTML:
|
||
/* XHTML */
|
||
self->tags.em_start = "<em>";
|
||
self->tags.em_end = "<em>";
|
||
self->tags.strong_start = "<strong>";
|
||
self->tags.strong_end = "</strong>";
|
||
self->tags.code_start = "<code>";
|
||
self->tags.code_end = "</code>";
|
||
self->tags.h1_start = "<h1>";
|
||
self->tags.h1_end = "</h1>";
|
||
self->tags.h2_start = "<h2>";
|
||
self->tags.h2_end = "</h2>";
|
||
self->tags.h3_start = "<h3>";
|
||
self->tags.h3_end = "</h3>";
|
||
self->tags.h4_start = "<h4>";
|
||
self->tags.h4_end = "</h4>";
|
||
self->tags.h5_start = "<h5>";
|
||
self->tags.h5_end = "</h5>";
|
||
self->tags.h6_start = "<h6>";
|
||
self->tags.h6_end = "</h6>";
|
||
self->tags.bullet_start = "<li>";
|
||
self->tags.bullet_end = "</li>";
|
||
self->tags.rule = "<hr>";
|
||
self->tags.link_prefix = "<a href=\"";
|
||
self->tags.link_mid = "\">";
|
||
self->tags.link_suffix = "</a>";
|
||
self->escape = TRUE;
|
||
self->autolinkify = TRUE;
|
||
break;
|
||
case GS_MARKDOWN_OUTPUT_TEXT:
|
||
/* plain text */
|
||
self->tags.em_start = "";
|
||
self->tags.em_end = "";
|
||
self->tags.strong_start = "";
|
||
self->tags.strong_end = "";
|
||
self->tags.code_start = "";
|
||
self->tags.code_end = "";
|
||
self->tags.h1_start = "[";
|
||
self->tags.h1_end = "]";
|
||
self->tags.h2_start = "-";
|
||
self->tags.h2_end = "-";
|
||
self->tags.h3_start = " ";
|
||
self->tags.h3_end = " ";
|
||
self->tags.h4_start = " ";
|
||
self->tags.h4_end = " ";
|
||
self->tags.h5_start = " ";
|
||
self->tags.h5_end = " ";
|
||
self->tags.h6_start = " ";
|
||
self->tags.h6_end = " ";
|
||
self->tags.bullet_start = "* ";
|
||
self->tags.bullet_end = "";
|
||
self->tags.rule = " ----- \n";
|
||
self->tags.link_prefix = NULL;
|
||
self->tags.link_mid = NULL;
|
||
self->tags.link_suffix = NULL;
|
||
self->escape = FALSE;
|
||
self->autolinkify = FALSE;
|
||
break;
|
||
default:
|
||
g_warning ("unknown output enum");
|
||
break;
|
||
}
|
||
}
|
||
|
||
void
|
||
gs_markdown_set_max_lines (GsMarkdown *self, gint max_lines)
|
||
{
|
||
g_return_if_fail (GS_IS_MARKDOWN (self));
|
||
self->max_lines = max_lines;
|
||
}
|
||
|
||
void
|
||
gs_markdown_set_smart_quoting (GsMarkdown *self, gboolean smart_quoting)
|
||
{
|
||
g_return_if_fail (GS_IS_MARKDOWN (self));
|
||
self->smart_quoting = smart_quoting;
|
||
}
|
||
|
||
void
|
||
gs_markdown_set_escape (GsMarkdown *self, gboolean escape)
|
||
{
|
||
g_return_if_fail (GS_IS_MARKDOWN (self));
|
||
self->escape = escape;
|
||
}
|
||
|
||
void
|
||
gs_markdown_set_autocode (GsMarkdown *self, gboolean autocode)
|
||
{
|
||
g_return_if_fail (GS_IS_MARKDOWN (self));
|
||
self->autocode = autocode;
|
||
}
|
||
|
||
void
|
||
gs_markdown_set_autolinkify (GsMarkdown *self, gboolean autolinkify)
|
||
{
|
||
g_return_if_fail (GS_IS_MARKDOWN (self));
|
||
self->autolinkify = autolinkify;
|
||
}
|
||
|
||
gchar *
|
||
gs_markdown_parse (GsMarkdown *self, const gchar *markdown)
|
||
{
|
||
gboolean ret;
|
||
gchar *temp;
|
||
guint i;
|
||
guint len;
|
||
g_auto(GStrv) lines = NULL;
|
||
const gchar *output;
|
||
|
||
g_return_val_if_fail (GS_IS_MARKDOWN (self), NULL);
|
||
|
||
/* process */
|
||
self->mode = GS_MARKDOWN_MODE_UNKNOWN;
|
||
self->line_count = 0;
|
||
g_string_truncate (self->pending, 0);
|
||
g_string_truncate (self->processed, 0);
|
||
lines = g_strsplit (markdown, "\n", -1);
|
||
len = g_strv_length (lines);
|
||
|
||
/* process each line */
|
||
for (i = 0; i < len; i++) {
|
||
ret = gs_markdown_to_text_line_process (self, lines[i]);
|
||
if (!ret)
|
||
break;
|
||
}
|
||
gs_markdown_flush_pending (self);
|
||
|
||
/* remove trailing \n */
|
||
while (self->processed->len > 0 && self->processed->str[self->processed->len - 1] == '\n')
|
||
g_string_set_size (self->processed, self->processed->len - 1);
|
||
|
||
/* skip leading \n, which can happen with headers in the Pango mode */
|
||
output = self->processed->str;
|
||
while (*output == '\n')
|
||
output++;
|
||
|
||
/* get a copy */
|
||
temp = g_strdup (output);
|
||
g_string_truncate (self->pending, 0);
|
||
g_string_truncate (self->processed, 0);
|
||
return temp;
|
||
}
|
||
|
||
static void
|
||
gs_markdown_finalize (GObject *object)
|
||
{
|
||
GsMarkdown *self;
|
||
|
||
g_return_if_fail (GS_IS_MARKDOWN (object));
|
||
|
||
self = GS_MARKDOWN (object);
|
||
|
||
g_string_free (self->pending, TRUE);
|
||
g_string_free (self->processed, TRUE);
|
||
|
||
G_OBJECT_CLASS (gs_markdown_parent_class)->finalize (object);
|
||
}
|
||
|
||
static void
|
||
gs_markdown_class_init (GsMarkdownClass *klass)
|
||
{
|
||
GObjectClass *object_class = G_OBJECT_CLASS (klass);
|
||
object_class->finalize = gs_markdown_finalize;
|
||
}
|
||
|
||
static void
|
||
gs_markdown_init (GsMarkdown *self)
|
||
{
|
||
self->mode = GS_MARKDOWN_MODE_UNKNOWN;
|
||
self->pending = g_string_new ("");
|
||
self->processed = g_string_new ("");
|
||
self->max_lines = -1;
|
||
self->smart_quoting = FALSE;
|
||
self->escape = FALSE;
|
||
self->autocode = FALSE;
|
||
}
|
||
|
||
GsMarkdown *
|
||
gs_markdown_new (GsMarkdownOutputKind output)
|
||
{
|
||
GsMarkdown *self;
|
||
self = g_object_new (GS_TYPE_MARKDOWN, NULL);
|
||
gs_markdown_set_output_kind (self, output);
|
||
return GS_MARKDOWN (self);
|
||
}
|