summaryrefslogtreecommitdiffstats
path: root/debian/dconv/parser
diff options
context:
space:
mode:
Diffstat (limited to 'debian/dconv/parser')
-rw-r--r--debian/dconv/parser/__init__.py81
-rw-r--r--debian/dconv/parser/arguments.py132
-rw-r--r--debian/dconv/parser/example.py77
-rw-r--r--debian/dconv/parser/keyword.py142
-rw-r--r--debian/dconv/parser/seealso.py32
-rw-r--r--debian/dconv/parser/table.py244
-rw-r--r--debian/dconv/parser/underline.py16
7 files changed, 724 insertions, 0 deletions
diff --git a/debian/dconv/parser/__init__.py b/debian/dconv/parser/__init__.py
new file mode 100644
index 0000000..82b8522
--- /dev/null
+++ b/debian/dconv/parser/__init__.py
@@ -0,0 +1,81 @@
+__all__ = [
+ 'arguments',
+ 'example',
+ 'keyword',
+ 'seealso',
+ 'table',
+ 'underline'
+]
+
+
+class Parser:
+ def __init__(self, pctxt):
+ self.pctxt = pctxt
+
+ def parse(self, line):
+ return line
+
+class PContext:
+ def __init__(self, templates = None):
+ self.set_content_list([])
+ self.templates = templates
+
+ def set_content(self, content):
+ self.set_content_list(content.split("\n"))
+
+ def set_content_list(self, content):
+ self.lines = content
+ self.nblines = len(self.lines)
+ self.i = 0
+ self.stop = False
+
+ def get_lines(self):
+ return self.lines
+
+ def eat_lines(self):
+ count = 0
+ while self.has_more_lines() and self.lines[self.i].strip():
+ count += 1
+ self.next()
+ return count
+
+ def eat_empty_lines(self):
+ count = 0
+ while self.has_more_lines() and not self.lines[self.i].strip():
+ count += 1
+ self.next()
+ return count
+
+ def next(self, count=1):
+ self.i += count
+
+ def has_more_lines(self, offset=0):
+ return self.i + offset < self.nblines
+
+ def get_line(self, offset=0):
+ return self.lines[self.i + offset].rstrip()
+
+
+# Get the indentation of a line
+def get_indent(line):
+ indent = 0
+ length = len(line)
+ while indent < length and line[indent] == ' ':
+ indent += 1
+ return indent
+
+
+# Remove unneeded indentation
+def remove_indent(list):
+ # Detect the minimum indentation in the list
+ min_indent = -1
+ for line in list:
+ if not line.strip():
+ continue
+ indent = get_indent(line)
+ if min_indent < 0 or indent < min_indent:
+ min_indent = indent
+ # Realign the list content to remove the minimum indentation
+ if min_indent > 0:
+ for index, line in enumerate(list):
+ list[index] = line[min_indent:]
diff --git a/debian/dconv/parser/arguments.py b/debian/dconv/parser/arguments.py
new file mode 100644
index 0000000..096b269
--- /dev/null
+++ b/debian/dconv/parser/arguments.py
@@ -0,0 +1,132 @@
+import sys
+import re
+import parser
+
+'''
+TODO: Allow inner data parsing (this will allow to parse the examples provided in an arguments block)
+'''
+class Parser(parser.Parser):
+ def __init__(self, pctxt):
+ parser.Parser.__init__(self, pctxt)
+ #template = pctxt.templates.get_template("parser/arguments.tpl")
+ #self.replace = template.render().strip()
+
+ def parse(self, line):
+ #return re.sub(r'(Arguments *:)', self.replace, line)
+ pctxt = self.pctxt
+
+ result = re.search(r'(Arguments? *:)', line)
+ if result:
+ label = result.group(0)
+ content = []
+
+ desc_indent = False
+ desc = re.sub(r'.*Arguments? *:', '', line).strip()
+
+ indent = parser.get_indent(line)
+
+ pctxt.next()
+ pctxt.eat_empty_lines()
+
+ arglines = []
+ if desc != "none":
+ add_empty_lines = 0
+ while pctxt.has_more_lines() and (parser.get_indent(pctxt.get_line()) > indent):
+ for j in range(0, add_empty_lines):
+ arglines.append("")
+ arglines.append(pctxt.get_line())
+ pctxt.next()
+ add_empty_lines = pctxt.eat_empty_lines()
+ '''
+ print line
+
+ if parser.get_indent(line) == arg_indent:
+ argument = re.sub(r' *([^ ]+).*', r'\1', line)
+ if argument:
+ #content.append("<b>%s</b>" % argument)
+ arg_desc = [line.replace(argument, " " * len(self.unescape(argument)), 1)]
+ #arg_desc = re.sub(r'( *)([^ ]+)(.*)', r'\1<b>\2</b>\3', line)
+ arg_desc_indent = parser.get_indent(arg_desc[0])
+ arg_desc[0] = arg_desc[0][arg_indent:]
+ pctxt.next()
+ add_empty_lines = 0
+ while pctxt.has_more_lines and parser.get_indent(pctxt.get_line()) >= arg_indent:
+ for i in range(0, add_empty_lines):
+ arg_desc.append("")
+ arg_desc.append(pctxt.get_line()[arg_indent:])
+ pctxt.next()
+ add_empty_lines = pctxt.eat_empty_lines()
+ # TODO : reduce space at the beginnning
+ content.append({
+ 'name': argument,
+ 'desc': arg_desc
+ })
+ '''
+
+ if arglines:
+ new_arglines = []
+ #content = self.parse_args(arglines)
+ parser.remove_indent(arglines)
+ '''
+ pctxt2 = parser.PContext(pctxt.templates)
+ pctxt2.set_content_list(arglines)
+ while pctxt2.has_more_lines():
+ new_arglines.append(parser.example.Parser(pctxt2).parse(pctxt2.get_line()))
+ pctxt2.next()
+ arglines = new_arglines
+ '''
+
+ pctxt.stop = True
+
+ template = pctxt.templates.get_template("parser/arguments.tpl")
+ return template.render(
+ pctxt=pctxt,
+ label=label,
+ desc=desc,
+ content=arglines
+ #content=content
+ )
+ return line
+
+ return line
+
+'''
+ def parse_args(self, data):
+ args = []
+
+ pctxt = parser.PContext()
+ pctxt.set_content_list(data)
+
+ while pctxt.has_more_lines():
+ line = pctxt.get_line()
+ arg_indent = parser.get_indent(line)
+ argument = re.sub(r' *([^ ]+).*', r'\1', line)
+ if True or argument:
+ arg_desc = []
+ trailing_desc = line.replace(argument, " " * len(self.unescape(argument)), 1)[arg_indent:]
+ if trailing_desc.strip():
+ arg_desc.append(trailing_desc)
+ pctxt.next()
+ add_empty_lines = 0
+ while pctxt.has_more_lines() and parser.get_indent(pctxt.get_line()) > arg_indent:
+ for i in range(0, add_empty_lines):
+ arg_desc.append("")
+ arg_desc.append(pctxt.get_line()[arg_indent:])
+ pctxt.next()
+ add_empty_lines = pctxt.eat_empty_lines()
+
+ parser.remove_indent(arg_desc)
+
+ args.append({
+ 'name': argument,
+ 'desc': arg_desc
+ })
+ return args
+
+ def unescape(self, s):
+ s = s.replace("&lt;", "<")
+ s = s.replace("&gt;", ">")
+ # this has to be last:
+ s = s.replace("&amp;", "&")
+ return s
+'''
diff --git a/debian/dconv/parser/example.py b/debian/dconv/parser/example.py
new file mode 100644
index 0000000..3958992
--- /dev/null
+++ b/debian/dconv/parser/example.py
@@ -0,0 +1,77 @@
+import re
+import parser
+
+# Detect examples blocks
+class Parser(parser.Parser):
+ def __init__(self, pctxt):
+ parser.Parser.__init__(self, pctxt)
+ template = pctxt.templates.get_template("parser/example/comment.tpl")
+ self.comment = template.render(pctxt=pctxt).strip()
+
+
+ def parse(self, line):
+ pctxt = self.pctxt
+
+ result = re.search(r'^ *(Examples? *:)(.*)', line)
+ if result:
+ label = result.group(1)
+
+ desc_indent = False
+ desc = result.group(2).strip()
+
+ # Some examples have a description
+ if desc:
+ desc_indent = len(line) - len(desc)
+
+ indent = parser.get_indent(line)
+
+ if desc:
+ # And some description are on multiple lines
+ while pctxt.get_line(1) and parser.get_indent(pctxt.get_line(1)) == desc_indent:
+ desc += " " + pctxt.get_line(1).strip()
+ pctxt.next()
+
+ pctxt.next()
+ add_empty_line = pctxt.eat_empty_lines()
+
+ content = []
+
+ if parser.get_indent(pctxt.get_line()) > indent:
+ if desc:
+ desc = desc[0].upper() + desc[1:]
+ add_empty_line = 0
+ while pctxt.has_more_lines() and ((not pctxt.get_line()) or (parser.get_indent(pctxt.get_line()) > indent)):
+ if pctxt.get_line():
+ for j in range(0, add_empty_line):
+ content.append("")
+
+ content.append(re.sub(r'(#.*)$', self.comment, pctxt.get_line()))
+ add_empty_line = 0
+ else:
+ add_empty_line += 1
+ pctxt.next()
+ elif parser.get_indent(pctxt.get_line()) == indent:
+ # Simple example that can't have empty lines
+ if add_empty_line and desc:
+ # This means that the example was on the same line as the 'Example' tag
+ # and was not a description
+ content.append(" " * indent + desc)
+ desc = False
+ else:
+ while pctxt.has_more_lines() and (parser.get_indent(pctxt.get_line()) >= indent):
+ content.append(pctxt.get_line())
+ pctxt.next()
+ pctxt.eat_empty_lines() # Skip empty remaining lines
+
+ pctxt.stop = True
+
+ parser.remove_indent(content)
+
+ template = pctxt.templates.get_template("parser/example.tpl")
+ return template.render(
+ pctxt=pctxt,
+ label=label,
+ desc=desc,
+ content=content
+ )
+ return line
diff --git a/debian/dconv/parser/keyword.py b/debian/dconv/parser/keyword.py
new file mode 100644
index 0000000..f20944f
--- /dev/null
+++ b/debian/dconv/parser/keyword.py
@@ -0,0 +1,142 @@
+import re
+import parser
+from urllib.parse import quote
+
+class Parser(parser.Parser):
+ def __init__(self, pctxt):
+ parser.Parser.__init__(self, pctxt)
+ self.keywordPattern = re.compile(r'^(%s%s)(%s)' % (
+ '([a-z][a-z0-9\-\+_\.]*[a-z0-9\-\+_)])', # keyword
+ '( [a-z0-9\-_]+)*', # subkeywords
+ '(\([^ ]*\))?', # arg (ex: (<backend>), (<frontend>/<backend>), (<offset1>,<length>[,<offset2>]) ...
+ ))
+
+ def parse(self, line):
+ pctxt = self.pctxt
+ keywords = pctxt.keywords
+ keywordsCount = pctxt.keywordsCount
+ chapters = pctxt.chapters
+
+ res = ""
+
+ if line != "" and not re.match(r'^ ', line):
+ parsed = self.keywordPattern.match(line)
+ if parsed != None:
+ keyword = parsed.group(1)
+ arg = parsed.group(4)
+ parameters = line[len(keyword) + len(arg):]
+ if (parameters != "" and not re.match("^ +((&lt;|\[|\{|/).*|(: [a-z +]+))?(\(deprecated\))?$", parameters)):
+ # Dirty hack
+ # - parameters should only start with the characer "<", "[", "{", "/"
+ # - or a column (":") followed by a alpha keywords to identify fetching samples (optionally separated by the character "+")
+ # - or the string "(deprecated)" at the end
+ keyword = False
+ else:
+ splitKeyword = keyword.split(" ")
+
+ parameters = arg + parameters
+ else:
+ keyword = False
+
+ if keyword and (len(splitKeyword) <= 5):
+ toplevel = pctxt.details["toplevel"]
+ for j in range(0, len(splitKeyword)):
+ subKeyword = " ".join(splitKeyword[0:j + 1])
+ if subKeyword != "no":
+ if not subKeyword in keywords:
+ keywords[subKeyword] = set()
+ keywords[subKeyword].add(pctxt.details["chapter"])
+ res += '<a class="anchor" name="%s"></a>' % subKeyword
+ res += '<a class="anchor" name="%s-%s"></a>' % (toplevel, subKeyword)
+ res += '<a class="anchor" name="%s-%s"></a>' % (pctxt.details["chapter"], subKeyword)
+ res += '<a class="anchor" name="%s (%s)"></a>' % (subKeyword, chapters[toplevel]['title'])
+ res += '<a class="anchor" name="%s (%s)"></a>' % (subKeyword, chapters[pctxt.details["chapter"]]['title'])
+
+ deprecated = parameters.find("(deprecated)")
+ if deprecated != -1:
+ prefix = ""
+ suffix = ""
+ parameters = parameters.replace("(deprecated)", '<span class="label label-warning">(deprecated)</span>')
+ else:
+ prefix = ""
+ suffix = ""
+
+ nextline = pctxt.get_line(1)
+
+ while nextline.startswith(" "):
+ # Found parameters on the next line
+ parameters += "\n" + nextline
+ pctxt.next()
+ if pctxt.has_more_lines(1):
+ nextline = pctxt.get_line(1)
+ else:
+ nextline = ""
+
+
+ parameters = self.colorize(parameters)
+ res += '<div class="keyword">%s<b><a class="anchor" name="%s"></a><a href="#%s">%s</a></b>%s%s</div>' % (prefix, keyword, quote("%s-%s" % (pctxt.details["chapter"], keyword)), keyword, parameters, suffix)
+ pctxt.next()
+ pctxt.stop = True
+ elif line.startswith("/*"):
+ # Skip comments in the documentation
+ while not pctxt.get_line().endswith("*/"):
+ pctxt.next()
+ pctxt.next()
+ else:
+ # This is probably not a keyword but a text, ignore it
+ res += line
+ else:
+ res += line
+
+ return res
+
+ # Used to colorize keywords parameters
+ # TODO : use CSS styling
+ def colorize(self, text):
+ colorized = ""
+ tags = [
+ [ "[" , "]" , "#008" ],
+ [ "{" , "}" , "#800" ],
+ [ "&lt;", "&gt;", "#080" ],
+ ]
+ heap = []
+ pos = 0
+ while pos < len(text):
+ substring = text[pos:]
+ found = False
+ for tag in tags:
+ if substring.startswith(tag[0]):
+ # Opening tag
+ heap.append(tag)
+ colorized += '<span style="color: %s">%s' % (tag[2], substring[0:len(tag[0])])
+ pos += len(tag[0])
+ found = True
+ break
+ elif substring.startswith(tag[1]):
+ # Closing tag
+
+ # pop opening tags until the corresponding one is found
+ openingTag = False
+ while heap and openingTag != tag:
+ openingTag = heap.pop()
+ if openingTag != tag:
+ colorized += '</span>'
+ # all intermediate tags are now closed, we can display the tag
+ colorized += substring[0:len(tag[1])]
+ # and the close it if it was previously opened
+ if openingTag == tag:
+ colorized += '</span>'
+ pos += len(tag[1])
+ found = True
+ break
+ if not found:
+ colorized += substring[0]
+ pos += 1
+ # close all unterminated tags
+ while heap:
+ tag = heap.pop()
+ colorized += '</span>'
+
+ return colorized
+
+
diff --git a/debian/dconv/parser/seealso.py b/debian/dconv/parser/seealso.py
new file mode 100644
index 0000000..bbb53f9
--- /dev/null
+++ b/debian/dconv/parser/seealso.py
@@ -0,0 +1,32 @@
+import re
+import parser
+
+class Parser(parser.Parser):
+ def parse(self, line):
+ pctxt = self.pctxt
+
+ result = re.search(r'(See also *:)', line)
+ if result:
+ label = result.group(0)
+
+ desc = re.sub(r'.*See also *:', '', line).strip()
+
+ indent = parser.get_indent(line)
+
+ # Some descriptions are on multiple lines
+ while pctxt.has_more_lines(1) and parser.get_indent(pctxt.get_line(1)) >= indent:
+ desc += " " + pctxt.get_line(1).strip()
+ pctxt.next()
+
+ pctxt.eat_empty_lines()
+ pctxt.next()
+ pctxt.stop = True
+
+ template = pctxt.templates.get_template("parser/seealso.tpl")
+ return template.render(
+ pctxt=pctxt,
+ label=label,
+ desc=desc,
+ )
+
+ return line
diff --git a/debian/dconv/parser/table.py b/debian/dconv/parser/table.py
new file mode 100644
index 0000000..e2575b1
--- /dev/null
+++ b/debian/dconv/parser/table.py
@@ -0,0 +1,244 @@
+import re
+import sys
+import parser
+
+class Parser(parser.Parser):
+ def __init__(self, pctxt):
+ parser.Parser.__init__(self, pctxt)
+ self.table1Pattern = re.compile(r'^ *(-+\+)+-+')
+ self.table2Pattern = re.compile(r'^ *\+(-+\+)+')
+
+ def parse(self, line):
+ global document, keywords, keywordsCount, chapters, keyword_conflicts
+
+ pctxt = self.pctxt
+
+ if pctxt.context['headers']['subtitle'] != 'Configuration Manual':
+ # Quick exit
+ return line
+ elif pctxt.details['chapter'] == "4":
+ # BUG: the matrix in chapter 4. Proxies is not well displayed, we skip this chapter
+ return line
+
+ if pctxt.has_more_lines(1):
+ nextline = pctxt.get_line(1)
+ else:
+ nextline = ""
+
+ if self.table1Pattern.match(nextline):
+ # activate table rendering only for the Configuration Manual
+ lineSeparator = nextline
+ nbColumns = nextline.count("+") + 1
+ extraColumns = 0
+ print("Entering table mode (%d columns)" % nbColumns, file=sys.stderr)
+ table = []
+ if line.find("|") != -1:
+ row = []
+ while pctxt.has_more_lines():
+ line = pctxt.get_line()
+ if pctxt.has_more_lines(1):
+ nextline = pctxt.get_line(1)
+ else:
+ nextline = ""
+ if line == lineSeparator:
+ # New row
+ table.append(row)
+ row = []
+ if nextline.find("|") == -1:
+ break # End of table
+ else:
+ # Data
+ columns = line.split("|")
+ for j in range(0, len(columns)):
+ try:
+ if row[j]:
+ row[j] += "<br />"
+ row[j] += columns[j].strip()
+ except:
+ row.append(columns[j].strip())
+ pctxt.next()
+ else:
+ row = []
+ headers = nextline
+ while pctxt.has_more_lines():
+ line = pctxt.get_line()
+ if pctxt.has_more_lines(1):
+ nextline = pctxt.get_line(1)
+ else:
+ nextline = ""
+
+ if nextline == "":
+ if row: table.append(row)
+ break # End of table
+
+ if (line != lineSeparator) and (line[0] != "-"):
+ start = 0
+
+ if row and not line.startswith(" "):
+ # Row is complete, parse a new one
+ table.append(row)
+ row = []
+
+ tmprow = []
+ while start != -1:
+ end = headers.find("+", start)
+ if end == -1:
+ end = len(headers)
+
+ realend = end
+ if realend == len(headers):
+ realend = len(line)
+ else:
+ while realend < len(line) and line[realend] != " ":
+ realend += 1
+ end += 1
+
+ tmprow.append(line[start:realend])
+
+ start = end + 1
+ if start >= len(headers):
+ start = -1
+ for j in range(0, nbColumns):
+ try:
+ row[j] += tmprow[j].strip()
+ except:
+ row.append(tmprow[j].strip())
+
+ deprecated = row[0].endswith("(deprecated)")
+ if deprecated:
+ row[0] = row[0][: -len("(deprecated)")].rstrip()
+
+ nooption = row[1].startswith("(*)")
+ if nooption:
+ row[1] = row[1][len("(*)"):].strip()
+
+ if deprecated or nooption:
+ extraColumns = 1
+ extra = ""
+ if deprecated:
+ extra += '<span class="label label-warning">(deprecated)</span>'
+ if nooption:
+ extra += '<span>(*)</span>'
+ row.append(extra)
+
+ pctxt.next()
+ print("Leaving table mode", file=sys.stderr)
+ pctxt.next() # skip useless next line
+ pctxt.stop = True
+
+ return self.renderTable(table, nbColumns, pctxt.details["toplevel"])
+ # elif self.table2Pattern.match(line):
+ # return self.parse_table_format2()
+ elif line.find("May be used in sections") != -1:
+ nextline = pctxt.get_line(1)
+ rows = []
+ headers = line.split(":")
+ rows.append(headers[1].split("|"))
+ rows.append(nextline.split("|"))
+ table = {
+ "rows": rows,
+ "title": headers[0]
+ }
+ pctxt.next(2) # skip this previous table
+ pctxt.stop = True
+
+ return self.renderTable(table)
+
+ return line
+
+
+ def parse_table_format2(self):
+ pctxt = self.pctxt
+
+ linesep = pctxt.get_line()
+ rows = []
+
+ pctxt.next()
+ maxcols = 0
+ while pctxt.get_line().strip().startswith("|"):
+ row = pctxt.get_line().strip()[1:-1].split("|")
+ rows.append(row)
+ maxcols = max(maxcols, len(row))
+ pctxt.next()
+ if pctxt.get_line() == linesep:
+ # TODO : find a way to define a special style for next row
+ pctxt.next()
+ pctxt.stop = True
+
+ return self.renderTable(rows, maxcols)
+
+ # Render tables detected by the conversion parser
+ def renderTable(self, table, maxColumns = 0, toplevel = None):
+ pctxt = self.pctxt
+ template = pctxt.templates.get_template("parser/table.tpl")
+
+ res = ""
+
+ title = None
+ if isinstance(table, dict):
+ title = table["title"]
+ table = table["rows"]
+
+ if not maxColumns:
+ maxColumns = len(table[0])
+
+ rows = []
+
+ mode = "th"
+ headerLine = ""
+ hasKeywords = False
+ i = 0
+ for row in table:
+ line = ""
+
+ if i == 0:
+ row_template = pctxt.templates.get_template("parser/table/header.tpl")
+ else:
+ row_template = pctxt.templates.get_template("parser/table/row.tpl")
+
+ if i > 1 and (i - 1) % 20 == 0 and len(table) > 50:
+ # Repeat headers periodically for long tables
+ rows.append(headerLine)
+
+ j = 0
+ cols = []
+ for column in row:
+ if j >= maxColumns:
+ break
+
+ tplcol = {}
+
+ data = column.strip()
+ keyword = column
+ if j == 0 and i == 0 and keyword == 'keyword':
+ hasKeywords = True
+ if j == 0 and i != 0 and hasKeywords:
+ if keyword.startswith("[no] "):
+ keyword = keyword[len("[no] "):]
+ tplcol['toplevel'] = toplevel
+ tplcol['keyword'] = keyword
+ tplcol['extra'] = []
+ if j == 0 and len(row) > maxColumns:
+ for k in range(maxColumns, len(row)):
+ tplcol['extra'].append(row[k])
+ tplcol['data'] = data
+ cols.append(tplcol)
+ j += 1
+ mode = "td"
+
+ line = row_template.render(
+ pctxt=pctxt,
+ columns=cols
+ ).strip()
+ if i == 0:
+ headerLine = line
+
+ rows.append(line)
+
+ i += 1
+
+ return template.render(
+ pctxt=pctxt,
+ title=title,
+ rows=rows,
+ )
diff --git a/debian/dconv/parser/underline.py b/debian/dconv/parser/underline.py
new file mode 100644
index 0000000..3a2350c
--- /dev/null
+++ b/debian/dconv/parser/underline.py
@@ -0,0 +1,16 @@
+import parser
+
+class Parser(parser.Parser):
+ # Detect underlines
+ def parse(self, line):
+ pctxt = self.pctxt
+ if pctxt.has_more_lines(1):
+ nextline = pctxt.get_line(1)
+ if (len(line) > 0) and (len(nextline) > 0) and (nextline[0] == '-') and ("-" * len(line) == nextline):
+ template = pctxt.templates.get_template("parser/underline.tpl")
+ line = template.render(pctxt=pctxt, data=line).strip()
+ pctxt.next(2)
+ pctxt.eat_empty_lines()
+ pctxt.stop = True
+
+ return line