7 files changed, 724 insertions, 0 deletions
diff --git a/debian/dconv/parser/__init__.py b/debian/dconv/parser/__init__.py
new file mode 100644
index 0000000..82b8522
--- /dev/null
+++ b/debian/dconv/parser/__init__.py
@@ -0,0 +1,81 @@
+__all__ = [
+    'arguments',
+    'example',
+    'keyword',
+    'seealso',
+    'table',
+    'underline'
+]
+
+
+class Parser:
+    def __init__(self, pctxt):
+        self.pctxt = pctxt
+
+    def parse(self, line):
+        return line
+
+class PContext:
+    def __init__(self, templates = None):
+        self.set_content_list([])
+        self.templates = templates
+
+    def set_content(self, content):
+        self.set_content_list(content.split("\n"))
+
+    def set_content_list(self, content):
+        self.lines = content
+        self.nblines = len(self.lines)
+        self.i = 0
+        self.stop = False
+
+    def get_lines(self):
+        return self.lines
+
+    def eat_lines(self):
+        count = 0
+        while self.has_more_lines() and self.lines[self.i].strip():
+            count += 1
+            self.next()
+        return count
+
+    def eat_empty_lines(self):
+        count = 0
+        while self.has_more_lines() and not self.lines[self.i].strip():
+            count += 1
+            self.next()
+        return count
+
+    def next(self, count=1):
+        self.i += count
+
+    def has_more_lines(self, offset=0):
+        return self.i + offset < self.nblines
+
+    def get_line(self, offset=0):
+        return self.lines[self.i + offset].rstrip()
+
+
+# Get the indentation of a line
+def get_indent(line):
+        indent = 0
+        length = len(line)
+        while indent < length and line[indent] == ' ':
+            indent += 1
+        return indent
+
+
+# Remove unneeded indentation
+def remove_indent(list):
+    # Detect the minimum indentation in the list
+    min_indent = -1
+    for line in list:
+        if not line.strip():
+            continue
+        indent = get_indent(line)
+        if min_indent < 0 or indent < min_indent:
+            min_indent = indent
+    # Realign the list content to remove the minimum indentation
+    if min_indent > 0:
+        for index, line in enumerate(list):
+            list[index] = line[min_indent:]
diff --git a/debian/dconv/parser/arguments.py b/debian/dconv/parser/arguments.py
new file mode 100644
index 0000000..096b269
--- /dev/null
+++ b/debian/dconv/parser/arguments.py
@@ -0,0 +1,132 @@
+import sys
+import re
+import parser
+
+'''
+TODO: Allow inner data parsing (this will allow to parse the examples provided in an arguments block)
+'''
+class Parser(parser.Parser):
+    def __init__(self, pctxt):
+        parser.Parser.__init__(self, pctxt)
+        #template = pctxt.templates.get_template("parser/arguments.tpl")
+        #self.replace = template.render().strip()
+
+    def parse(self, line):
+        #return re.sub(r'(Arguments *:)', self.replace, line)
+        pctxt = self.pctxt
+
+        result = re.search(r'(Arguments? *:)', line)
+        if result:
+            label = result.group(0)
+            content = []
+
+            desc_indent = False
+            desc = re.sub(r'.*Arguments? *:', '', line).strip()
+
+            indent = parser.get_indent(line)
+
+            pctxt.next()
+            pctxt.eat_empty_lines()
+
+            arglines = []
+            if desc != "none":
+                add_empty_lines = 0
+                while pctxt.has_more_lines() and (parser.get_indent(pctxt.get_line()) > indent):
+                    for j in range(0, add_empty_lines):
+                        arglines.append("")
+                    arglines.append(pctxt.get_line())
+                    pctxt.next()
+                    add_empty_lines = pctxt.eat_empty_lines()
+                    '''
+                    print line
+
+                    if parser.get_indent(line) == arg_indent:
+                        argument = re.sub(r' *([^ ]+).*', r'\1', line)
+                        if argument:
+                            #content.append("<b>%s</b>" % argument)
+                            arg_desc = [line.replace(argument, " " * len(self.unescape(argument)), 1)]
+                            #arg_desc = re.sub(r'( *)([^ ]+)(.*)', r'\1<b>\2</b>\3', line)
+                            arg_desc_indent = parser.get_indent(arg_desc[0])
+                            arg_desc[0] = arg_desc[0][arg_indent:]
+                            pctxt.next()
+                            add_empty_lines = 0
+                            while pctxt.has_more_lines and parser.get_indent(pctxt.get_line()) >= arg_indent:
+                                for i in range(0, add_empty_lines):
+                                    arg_desc.append("")
+                                arg_desc.append(pctxt.get_line()[arg_indent:])
+                                pctxt.next()
+                                add_empty_lines = pctxt.eat_empty_lines()
+                            # TODO : reduce space at the beginnning
+                            content.append({
+                                'name': argument,
+                                'desc': arg_desc
+                            })
+                    '''
+
+                if arglines:
+                    new_arglines = []
+                    #content = self.parse_args(arglines)
+                    parser.remove_indent(arglines)
+                    '''
+                    pctxt2 = parser.PContext(pctxt.templates)
+                    pctxt2.set_content_list(arglines)
+                    while pctxt2.has_more_lines():
+                        new_arglines.append(parser.example.Parser(pctxt2).parse(pctxt2.get_line()))
+                        pctxt2.next()
+                    arglines = new_arglines
+                    '''
+
+            pctxt.stop = True
+
+            template = pctxt.templates.get_template("parser/arguments.tpl")
+            return template.render(
+                pctxt=pctxt,
+                label=label,
+                desc=desc,
+                content=arglines
+                #content=content
+            )
+            return line
+
+        return line
+
+'''
+    def parse_args(self, data):
+        args = []
+
+        pctxt = parser.PContext()
+        pctxt.set_content_list(data)
+
+        while pctxt.has_more_lines():
+            line = pctxt.get_line()
+            arg_indent = parser.get_indent(line)
+            argument = re.sub(r' *([^ ]+).*', r'\1', line)
+            if True or argument:
+                arg_desc = []
+                trailing_desc = line.replace(argument, " " * len(self.unescape(argument)), 1)[arg_indent:]
+                if trailing_desc.strip():
+                    arg_desc.append(trailing_desc)
+                pctxt.next()
+                add_empty_lines = 0
+                while pctxt.has_more_lines() and parser.get_indent(pctxt.get_line()) > arg_indent:
+                    for i in range(0, add_empty_lines):
+                        arg_desc.append("")
+                    arg_desc.append(pctxt.get_line()[arg_indent:])
+                    pctxt.next()
+                    add_empty_lines = pctxt.eat_empty_lines()
+
+                parser.remove_indent(arg_desc)
+
+                args.append({
+                    'name': argument,
+                    'desc': arg_desc
+                })
+        return args
+
+    def unescape(self, s):
+        s = s.replace("&lt;", "<")
+        s = s.replace("&gt;", ">")
+        # this has to be last:
+        s = s.replace("&amp;", "&")
+        return s
+'''
diff --git a/debian/dconv/parser/example.py b/debian/dconv/parser/example.py
new file mode 100644
index 0000000..3958992
--- /dev/null
+++ b/debian/dconv/parser/example.py
@@ -0,0 +1,77 @@
+import re
+import parser
+
+# Detect examples blocks
+class Parser(parser.Parser):
+    def __init__(self, pctxt):
+        parser.Parser.__init__(self, pctxt)
+        template = pctxt.templates.get_template("parser/example/comment.tpl")
+        self.comment = template.render(pctxt=pctxt).strip()
+
+
+    def parse(self, line):
+        pctxt = self.pctxt
+
+        result = re.search(r'^ *(Examples? *:)(.*)', line)
+        if result:
+            label = result.group(1)
+
+            desc_indent = False
+            desc = result.group(2).strip()
+
+            # Some examples have a description
+            if desc:
+                desc_indent = len(line) - len(desc)
+
+            indent = parser.get_indent(line)
+
+            if desc:
+                # And some description are on multiple lines
+                while pctxt.get_line(1) and parser.get_indent(pctxt.get_line(1)) == desc_indent:
+                    desc += " " + pctxt.get_line(1).strip()
+                    pctxt.next()
+
+            pctxt.next()
+            add_empty_line = pctxt.eat_empty_lines()
+
+            content = []
+
+            if parser.get_indent(pctxt.get_line()) > indent:
+                if desc:
+                    desc = desc[0].upper() + desc[1:]
+                add_empty_line = 0
+                while pctxt.has_more_lines() and ((not pctxt.get_line()) or (parser.get_indent(pctxt.get_line()) > indent)):
+                    if pctxt.get_line():
+                        for j in range(0, add_empty_line):
+                            content.append("")
+
+                        content.append(re.sub(r'(#.*)$', self.comment, pctxt.get_line()))
+                        add_empty_line = 0
+                    else:
+                        add_empty_line += 1
+                    pctxt.next()
+            elif parser.get_indent(pctxt.get_line()) == indent:
+                # Simple example that can't have empty lines
+                if add_empty_line and desc:
+                    # This means that the example was on the same line as the 'Example' tag
+                    # and was not a description
+                    content.append(" " * indent + desc)
+                    desc = False
+                else:
+                    while pctxt.has_more_lines() and (parser.get_indent(pctxt.get_line()) >= indent):
+                        content.append(pctxt.get_line())
+                        pctxt.next()
+                    pctxt.eat_empty_lines() # Skip empty remaining lines
+
+            pctxt.stop = True
+
+            parser.remove_indent(content)
+
+            template = pctxt.templates.get_template("parser/example.tpl")
+            return template.render(
+                pctxt=pctxt,
+                label=label,
+                desc=desc,
+                content=content
+            )
+        return line
diff --git a/debian/dconv/parser/keyword.py b/debian/dconv/parser/keyword.py
new file mode 100644
index 0000000..f20944f
--- /dev/null
+++ b/debian/dconv/parser/keyword.py
@@ -0,0 +1,142 @@
+import re
+import parser
+from urllib.parse import quote
+
+class Parser(parser.Parser):
+    def __init__(self, pctxt):
+        parser.Parser.__init__(self, pctxt)
+        self.keywordPattern = re.compile(r'^(%s%s)(%s)' % (
+            '([a-z][a-z0-9\-\+_\.]*[a-z0-9\-\+_)])', # keyword
+            '( [a-z0-9\-_]+)*',                  # subkeywords
+            '(\([^ ]*\))?',   # arg (ex: (<backend>), (<frontend>/<backend>), (<offset1>,<length>[,<offset2>]) ...
+        ))
+
+    def parse(self, line):
+        pctxt = self.pctxt
+        keywords = pctxt.keywords
+        keywordsCount = pctxt.keywordsCount
+        chapters = pctxt.chapters
+
+        res = ""
+
+        if line != "" and not re.match(r'^ ', line):
+            parsed = self.keywordPattern.match(line)
+            if parsed != None:
+                keyword = parsed.group(1)
+                arg     = parsed.group(4)
+                parameters = line[len(keyword) + len(arg):]
+                if (parameters != "" and not re.match("^ +((&lt;|\[|\{|/).*|(: [a-z +]+))?(\(deprecated\))?$", parameters)):
+                    # Dirty hack
+                    # - parameters should only start with the characer "<", "[", "{", "/"
+                    # - or a column (":") followed by a alpha keywords to identify fetching samples (optionally separated by the character "+")
+                    # - or the string "(deprecated)" at the end
+                    keyword = False
+                else:
+                    splitKeyword = keyword.split(" ")
+
+                parameters = arg + parameters
+            else:
+                keyword = False
+
+            if keyword and (len(splitKeyword) <= 5):
+                toplevel = pctxt.details["toplevel"]
+                for j in range(0, len(splitKeyword)):
+                    subKeyword = " ".join(splitKeyword[0:j + 1])
+                    if subKeyword != "no":
+                        if not subKeyword in keywords:
+                            keywords[subKeyword] = set()
+                        keywords[subKeyword].add(pctxt.details["chapter"])
+                    res += '<a class="anchor" name="%s"></a>' % subKeyword
+                    res += '<a class="anchor" name="%s-%s"></a>' % (toplevel, subKeyword)
+                    res += '<a class="anchor" name="%s-%s"></a>' % (pctxt.details["chapter"], subKeyword)
+                    res += '<a class="anchor" name="%s (%s)"></a>' % (subKeyword, chapters[toplevel]['title'])
+                    res += '<a class="anchor" name="%s (%s)"></a>' % (subKeyword, chapters[pctxt.details["chapter"]]['title'])
+
+                deprecated = parameters.find("(deprecated)")
+                if deprecated != -1:
+                    prefix = ""
+                    suffix = ""
+                    parameters = parameters.replace("(deprecated)", '<span class="label label-warning">(deprecated)</span>')
+                else:
+                    prefix = ""
+                    suffix = ""
+
+                nextline = pctxt.get_line(1)
+
+                while nextline.startswith("   "):
+                    # Found parameters on the next line
+                    parameters += "\n" + nextline
+                    pctxt.next()
+                    if pctxt.has_more_lines(1):
+                        nextline = pctxt.get_line(1)
+                    else:
+                        nextline = ""
+
+
+                parameters = self.colorize(parameters)
+                res += '<div class="keyword">%s<b><a class="anchor" name="%s"></a><a href="#%s">%s</a></b>%s%s</div>' % (prefix, keyword, quote("%s-%s" % (pctxt.details["chapter"], keyword)), keyword, parameters, suffix)
+                pctxt.next()
+                pctxt.stop = True
+            elif line.startswith("/*"):
+                # Skip comments in the documentation
+                while not pctxt.get_line().endswith("*/"):
+                    pctxt.next()
+                pctxt.next()
+            else:
+                # This is probably not a keyword but a text, ignore it
+                res += line
+        else:
+            res += line
+
+        return res
+
+    # Used to colorize keywords parameters
+    # TODO : use CSS styling
+    def colorize(self, text):
+        colorized = ""
+        tags = [
+                [ "["   , "]"   , "#008" ],
+                [ "{"   , "}"   , "#800" ],
+                [ "&lt;", "&gt;", "#080" ],
+        ]
+        heap = []
+        pos = 0
+        while pos < len(text):
+            substring = text[pos:]
+            found = False
+            for tag in tags:
+                if substring.startswith(tag[0]):
+                    # Opening tag
+                    heap.append(tag)
+                    colorized += '<span style="color: %s">%s' % (tag[2], substring[0:len(tag[0])])
+                    pos += len(tag[0])
+                    found = True
+                    break
+                elif substring.startswith(tag[1]):
+                    # Closing tag
+
+                    # pop opening tags until the corresponding one is found
+                    openingTag = False
+                    while heap and openingTag != tag:
+                        openingTag = heap.pop()
+                        if openingTag != tag:
+                            colorized += '</span>'
+                    # all intermediate tags are now closed, we can display the tag
+                    colorized += substring[0:len(tag[1])]
+                    # and the close it if it was previously opened
+                    if openingTag == tag:
+                        colorized += '</span>'
+                    pos += len(tag[1])
+                    found = True
+                    break
+            if not found:
+                colorized += substring[0]
+                pos += 1
+        # close all unterminated tags
+        while heap:
+            tag = heap.pop()
+            colorized += '</span>'
+
+        return colorized
+
+
diff --git a/debian/dconv/parser/seealso.py b/debian/dconv/parser/seealso.py
new file mode 100644
index 0000000..bbb53f9
--- /dev/null
+++ b/debian/dconv/parser/seealso.py
@@ -0,0 +1,32 @@
+import re
+import parser
+
+class Parser(parser.Parser):
+    def parse(self, line):
+        pctxt = self.pctxt
+
+        result = re.search(r'(See also *:)', line)
+        if result:
+            label = result.group(0)
+
+            desc = re.sub(r'.*See also *:', '', line).strip()
+
+            indent = parser.get_indent(line)
+
+            # Some descriptions are on multiple lines
+            while pctxt.has_more_lines(1) and parser.get_indent(pctxt.get_line(1)) >= indent:
+                desc += " " + pctxt.get_line(1).strip()
+                pctxt.next()
+
+            pctxt.eat_empty_lines()
+            pctxt.next()
+            pctxt.stop = True
+
+            template = pctxt.templates.get_template("parser/seealso.tpl")
+            return template.render(
+                pctxt=pctxt,
+                label=label,
+                desc=desc,
+            )
+
+        return line
diff --git a/debian/dconv/parser/table.py b/debian/dconv/parser/table.py
new file mode 100644
index 0000000..e2575b1
--- /dev/null
+++ b/debian/dconv/parser/table.py
@@ -0,0 +1,244 @@
+import re
+import sys
+import parser
+
+class Parser(parser.Parser):
+    def __init__(self, pctxt):
+        parser.Parser.__init__(self, pctxt)
+        self.table1Pattern = re.compile(r'^ *(-+\+)+-+')
+        self.table2Pattern = re.compile(r'^ *\+(-+\+)+')
+
+    def parse(self, line):
+        global document, keywords, keywordsCount, chapters, keyword_conflicts
+
+        pctxt = self.pctxt
+
+        if pctxt.context['headers']['subtitle'] != 'Configuration Manual':
+            # Quick exit
+            return line
+        elif pctxt.details['chapter'] == "4":
+            # BUG: the matrix in chapter 4. Proxies is not well displayed, we skip this chapter
+            return line
+
+        if pctxt.has_more_lines(1):
+            nextline = pctxt.get_line(1)
+        else:
+            nextline = ""
+
+        if self.table1Pattern.match(nextline):
+            # activate table rendering only for the Configuration Manual
+            lineSeparator = nextline
+            nbColumns = nextline.count("+") + 1
+            extraColumns = 0
+            print("Entering table mode (%d columns)" % nbColumns, file=sys.stderr)
+            table = []
+            if line.find("|") != -1:
+                row = []
+                while pctxt.has_more_lines():
+                    line = pctxt.get_line()
+                    if pctxt.has_more_lines(1):
+                        nextline = pctxt.get_line(1)
+                    else:
+                        nextline = ""
+                    if line == lineSeparator:
+                        # New row
+                        table.append(row)
+                        row = []
+                        if nextline.find("|") == -1:
+                            break # End of table
+                    else:
+                        # Data
+                        columns = line.split("|")
+                        for j in range(0, len(columns)):
+                            try:
+                                if row[j]:
+                                    row[j] += "<br />"
+                                row[j] += columns[j].strip()
+                            except:
+                                row.append(columns[j].strip())
+                    pctxt.next()
+            else:
+                row = []
+                headers = nextline
+                while pctxt.has_more_lines():
+                    line = pctxt.get_line()
+                    if pctxt.has_more_lines(1):
+                        nextline = pctxt.get_line(1)
+                    else:
+                        nextline = ""
+
+                    if nextline == "":
+                        if row: table.append(row)
+                        break # End of table
+
+                    if (line != lineSeparator) and (line[0] != "-"):
+                        start = 0
+
+                        if row and not line.startswith(" "):
+                            # Row is complete, parse a new one
+                            table.append(row)
+                            row = []
+
+                        tmprow = []
+                        while start != -1:
+                            end = headers.find("+", start)
+                            if end == -1:
+                                end = len(headers)
+
+                            realend = end
+                            if realend == len(headers):
+                                realend = len(line)
+                            else:
+                                while realend < len(line) and line[realend] != " ":
+                                    realend += 1
+                                    end += 1
+
+                            tmprow.append(line[start:realend])
+
+                            start = end + 1
+                            if start >= len(headers):
+                                start = -1
+                        for j in range(0, nbColumns):
+                            try:
+                                row[j] += tmprow[j].strip()
+                            except:
+                                row.append(tmprow[j].strip())
+
+                        deprecated = row[0].endswith("(deprecated)")
+                        if deprecated:
+                            row[0] = row[0][: -len("(deprecated)")].rstrip()
+
+                        nooption = row[1].startswith("(*)")
+                        if nooption:
+                            row[1] = row[1][len("(*)"):].strip()
+
+                        if deprecated or nooption:
+                            extraColumns = 1
+                            extra = ""
+                            if deprecated:
+                                extra += '<span class="label label-warning">(deprecated)</span>'
+                            if nooption:
+                                extra += '<span>(*)</span>'
+                            row.append(extra)
+
+                    pctxt.next()
+            print("Leaving table mode", file=sys.stderr)
+            pctxt.next() # skip useless next line
+            pctxt.stop = True
+
+            return self.renderTable(table, nbColumns, pctxt.details["toplevel"])
+        # elif self.table2Pattern.match(line):
+        #    return self.parse_table_format2()
+        elif line.find("May be used in sections") != -1:
+            nextline = pctxt.get_line(1)
+            rows = []
+            headers = line.split(":")
+            rows.append(headers[1].split("|"))
+            rows.append(nextline.split("|"))
+            table = {
+                    "rows": rows,
+                    "title": headers[0]
+            }
+            pctxt.next(2)  # skip this previous table
+            pctxt.stop = True
+
+            return self.renderTable(table)
+
+        return line
+
+
+    def parse_table_format2(self):
+        pctxt = self.pctxt
+
+        linesep = pctxt.get_line()
+        rows = []
+
+        pctxt.next()
+        maxcols = 0
+        while pctxt.get_line().strip().startswith("|"):
+            row = pctxt.get_line().strip()[1:-1].split("|")
+            rows.append(row)
+            maxcols = max(maxcols, len(row))
+            pctxt.next()
+            if pctxt.get_line() == linesep:
+                # TODO : find a way to define a special style for next row
+                pctxt.next()
+        pctxt.stop = True
+
+        return self.renderTable(rows, maxcols)
+
+    # Render tables detected by the conversion parser
+    def renderTable(self, table, maxColumns = 0, toplevel = None):
+        pctxt  = self.pctxt
+        template = pctxt.templates.get_template("parser/table.tpl")
+
+        res = ""
+
+        title = None
+        if isinstance(table, dict):
+            title = table["title"]
+            table = table["rows"]
+
+        if not maxColumns:
+            maxColumns = len(table[0])
+
+        rows = []
+
+        mode = "th"
+        headerLine = ""
+        hasKeywords = False
+        i = 0
+        for row in table:
+            line = ""
+
+            if i == 0:
+                row_template = pctxt.templates.get_template("parser/table/header.tpl")
+            else:
+                row_template = pctxt.templates.get_template("parser/table/row.tpl")
+
+            if i > 1 and (i  - 1) % 20 == 0 and len(table) > 50:
+                # Repeat headers periodically for long tables
+                rows.append(headerLine)
+
+            j = 0
+            cols = []
+            for column in row:
+                if j >= maxColumns:
+                    break
+
+                tplcol = {}
+
+                data = column.strip()
+                keyword = column
+                if j == 0 and i == 0 and keyword == 'keyword':
+                    hasKeywords = True
+                if j == 0 and i != 0 and hasKeywords:
+                    if keyword.startswith("[no] "):
+                        keyword = keyword[len("[no] "):]
+                    tplcol['toplevel'] = toplevel
+                    tplcol['keyword'] = keyword
+                tplcol['extra'] = []
+                if j == 0 and len(row) > maxColumns:
+                    for k in range(maxColumns, len(row)):
+                        tplcol['extra'].append(row[k])
+                tplcol['data'] = data
+                cols.append(tplcol)
+                j += 1
+            mode = "td"
+
+            line = row_template.render(
+                pctxt=pctxt,
+                columns=cols
+            ).strip()
+            if i == 0:
+                headerLine = line
+
+            rows.append(line)
+
+            i += 1
+
+        return template.render(
+            pctxt=pctxt,
+            title=title,
+            rows=rows,
+        )
diff --git a/debian/dconv/parser/underline.py b/debian/dconv/parser/underline.py
new file mode 100644
index 0000000..3a2350c
--- /dev/null
+++ b/debian/dconv/parser/underline.py
@@ -0,0 +1,16 @@
+import parser
+
+class Parser(parser.Parser):
+    # Detect underlines
+    def parse(self, line):
+        pctxt = self.pctxt
+        if pctxt.has_more_lines(1):
+            nextline = pctxt.get_line(1)
+            if (len(line) > 0) and (len(nextline) > 0) and (nextline[0] == '-') and ("-" * len(line) == nextline):
+                template = pctxt.templates.get_template("parser/underline.tpl")
+                line = template.render(pctxt=pctxt, data=line).strip()
+                pctxt.next(2)
+                pctxt.eat_empty_lines()
+                pctxt.stop = True
+
+        return line