diff options
Diffstat (limited to 'debian/dconv/parser')
-rw-r--r-- | debian/dconv/parser/__init__.py | 81 | ||||
-rw-r--r-- | debian/dconv/parser/arguments.py | 132 | ||||
-rw-r--r-- | debian/dconv/parser/example.py | 77 | ||||
-rw-r--r-- | debian/dconv/parser/keyword.py | 142 | ||||
-rw-r--r-- | debian/dconv/parser/seealso.py | 32 | ||||
-rw-r--r-- | debian/dconv/parser/table.py | 244 | ||||
-rw-r--r-- | debian/dconv/parser/underline.py | 16 |
7 files changed, 724 insertions, 0 deletions
diff --git a/debian/dconv/parser/__init__.py b/debian/dconv/parser/__init__.py new file mode 100644 index 0000000..82b8522 --- /dev/null +++ b/debian/dconv/parser/__init__.py @@ -0,0 +1,81 @@ +__all__ = [ + 'arguments', + 'example', + 'keyword', + 'seealso', + 'table', + 'underline' +] + + +class Parser: + def __init__(self, pctxt): + self.pctxt = pctxt + + def parse(self, line): + return line + +class PContext: + def __init__(self, templates = None): + self.set_content_list([]) + self.templates = templates + + def set_content(self, content): + self.set_content_list(content.split("\n")) + + def set_content_list(self, content): + self.lines = content + self.nblines = len(self.lines) + self.i = 0 + self.stop = False + + def get_lines(self): + return self.lines + + def eat_lines(self): + count = 0 + while self.has_more_lines() and self.lines[self.i].strip(): + count += 1 + self.next() + return count + + def eat_empty_lines(self): + count = 0 + while self.has_more_lines() and not self.lines[self.i].strip(): + count += 1 + self.next() + return count + + def next(self, count=1): + self.i += count + + def has_more_lines(self, offset=0): + return self.i + offset < self.nblines + + def get_line(self, offset=0): + return self.lines[self.i + offset].rstrip() + + +# Get the indentation of a line +def get_indent(line): + indent = 0 + length = len(line) + while indent < length and line[indent] == ' ': + indent += 1 + return indent + + +# Remove unneeded indentation +def remove_indent(list): + # Detect the minimum indentation in the list + min_indent = -1 + for line in list: + if not line.strip(): + continue + indent = get_indent(line) + if min_indent < 0 or indent < min_indent: + min_indent = indent + # Realign the list content to remove the minimum indentation + if min_indent > 0: + for index, line in enumerate(list): + list[index] = line[min_indent:] diff --git a/debian/dconv/parser/arguments.py b/debian/dconv/parser/arguments.py new file mode 100644 index 0000000..096b269 --- /dev/null +++ b/debian/dconv/parser/arguments.py @@ -0,0 +1,132 @@ +import sys +import re +import parser + +''' +TODO: Allow inner data parsing (this will allow to parse the examples provided in an arguments block) +''' +class Parser(parser.Parser): + def __init__(self, pctxt): + parser.Parser.__init__(self, pctxt) + #template = pctxt.templates.get_template("parser/arguments.tpl") + #self.replace = template.render().strip() + + def parse(self, line): + #return re.sub(r'(Arguments *:)', self.replace, line) + pctxt = self.pctxt + + result = re.search(r'(Arguments? *:)', line) + if result: + label = result.group(0) + content = [] + + desc_indent = False + desc = re.sub(r'.*Arguments? *:', '', line).strip() + + indent = parser.get_indent(line) + + pctxt.next() + pctxt.eat_empty_lines() + + arglines = [] + if desc != "none": + add_empty_lines = 0 + while pctxt.has_more_lines() and (parser.get_indent(pctxt.get_line()) > indent): + for j in range(0, add_empty_lines): + arglines.append("") + arglines.append(pctxt.get_line()) + pctxt.next() + add_empty_lines = pctxt.eat_empty_lines() + ''' + print line + + if parser.get_indent(line) == arg_indent: + argument = re.sub(r' *([^ ]+).*', r'\1', line) + if argument: + #content.append("<b>%s</b>" % argument) + arg_desc = [line.replace(argument, " " * len(self.unescape(argument)), 1)] + #arg_desc = re.sub(r'( *)([^ ]+)(.*)', r'\1<b>\2</b>\3', line) + arg_desc_indent = parser.get_indent(arg_desc[0]) + arg_desc[0] = arg_desc[0][arg_indent:] + pctxt.next() + add_empty_lines = 0 + while pctxt.has_more_lines and parser.get_indent(pctxt.get_line()) >= arg_indent: + for i in range(0, add_empty_lines): + arg_desc.append("") + arg_desc.append(pctxt.get_line()[arg_indent:]) + pctxt.next() + add_empty_lines = pctxt.eat_empty_lines() + # TODO : reduce space at the beginnning + content.append({ + 'name': argument, + 'desc': arg_desc + }) + ''' + + if arglines: + new_arglines = [] + #content = self.parse_args(arglines) + parser.remove_indent(arglines) + ''' + pctxt2 = parser.PContext(pctxt.templates) + pctxt2.set_content_list(arglines) + while pctxt2.has_more_lines(): + new_arglines.append(parser.example.Parser(pctxt2).parse(pctxt2.get_line())) + pctxt2.next() + arglines = new_arglines + ''' + + pctxt.stop = True + + template = pctxt.templates.get_template("parser/arguments.tpl") + return template.render( + pctxt=pctxt, + label=label, + desc=desc, + content=arglines + #content=content + ) + return line + + return line + +''' + def parse_args(self, data): + args = [] + + pctxt = parser.PContext() + pctxt.set_content_list(data) + + while pctxt.has_more_lines(): + line = pctxt.get_line() + arg_indent = parser.get_indent(line) + argument = re.sub(r' *([^ ]+).*', r'\1', line) + if True or argument: + arg_desc = [] + trailing_desc = line.replace(argument, " " * len(self.unescape(argument)), 1)[arg_indent:] + if trailing_desc.strip(): + arg_desc.append(trailing_desc) + pctxt.next() + add_empty_lines = 0 + while pctxt.has_more_lines() and parser.get_indent(pctxt.get_line()) > arg_indent: + for i in range(0, add_empty_lines): + arg_desc.append("") + arg_desc.append(pctxt.get_line()[arg_indent:]) + pctxt.next() + add_empty_lines = pctxt.eat_empty_lines() + + parser.remove_indent(arg_desc) + + args.append({ + 'name': argument, + 'desc': arg_desc + }) + return args + + def unescape(self, s): + s = s.replace("<", "<") + s = s.replace(">", ">") + # this has to be last: + s = s.replace("&", "&") + return s +''' diff --git a/debian/dconv/parser/example.py b/debian/dconv/parser/example.py new file mode 100644 index 0000000..3958992 --- /dev/null +++ b/debian/dconv/parser/example.py @@ -0,0 +1,77 @@ +import re +import parser + +# Detect examples blocks +class Parser(parser.Parser): + def __init__(self, pctxt): + parser.Parser.__init__(self, pctxt) + template = pctxt.templates.get_template("parser/example/comment.tpl") + self.comment = template.render(pctxt=pctxt).strip() + + + def parse(self, line): + pctxt = self.pctxt + + result = re.search(r'^ *(Examples? *:)(.*)', line) + if result: + label = result.group(1) + + desc_indent = False + desc = result.group(2).strip() + + # Some examples have a description + if desc: + desc_indent = len(line) - len(desc) + + indent = parser.get_indent(line) + + if desc: + # And some description are on multiple lines + while pctxt.get_line(1) and parser.get_indent(pctxt.get_line(1)) == desc_indent: + desc += " " + pctxt.get_line(1).strip() + pctxt.next() + + pctxt.next() + add_empty_line = pctxt.eat_empty_lines() + + content = [] + + if parser.get_indent(pctxt.get_line()) > indent: + if desc: + desc = desc[0].upper() + desc[1:] + add_empty_line = 0 + while pctxt.has_more_lines() and ((not pctxt.get_line()) or (parser.get_indent(pctxt.get_line()) > indent)): + if pctxt.get_line(): + for j in range(0, add_empty_line): + content.append("") + + content.append(re.sub(r'(#.*)$', self.comment, pctxt.get_line())) + add_empty_line = 0 + else: + add_empty_line += 1 + pctxt.next() + elif parser.get_indent(pctxt.get_line()) == indent: + # Simple example that can't have empty lines + if add_empty_line and desc: + # This means that the example was on the same line as the 'Example' tag + # and was not a description + content.append(" " * indent + desc) + desc = False + else: + while pctxt.has_more_lines() and (parser.get_indent(pctxt.get_line()) >= indent): + content.append(pctxt.get_line()) + pctxt.next() + pctxt.eat_empty_lines() # Skip empty remaining lines + + pctxt.stop = True + + parser.remove_indent(content) + + template = pctxt.templates.get_template("parser/example.tpl") + return template.render( + pctxt=pctxt, + label=label, + desc=desc, + content=content + ) + return line diff --git a/debian/dconv/parser/keyword.py b/debian/dconv/parser/keyword.py new file mode 100644 index 0000000..f20944f --- /dev/null +++ b/debian/dconv/parser/keyword.py @@ -0,0 +1,142 @@ +import re +import parser +from urllib.parse import quote + +class Parser(parser.Parser): + def __init__(self, pctxt): + parser.Parser.__init__(self, pctxt) + self.keywordPattern = re.compile(r'^(%s%s)(%s)' % ( + '([a-z][a-z0-9\-\+_\.]*[a-z0-9\-\+_)])', # keyword + '( [a-z0-9\-_]+)*', # subkeywords + '(\([^ ]*\))?', # arg (ex: (<backend>), (<frontend>/<backend>), (<offset1>,<length>[,<offset2>]) ... + )) + + def parse(self, line): + pctxt = self.pctxt + keywords = pctxt.keywords + keywordsCount = pctxt.keywordsCount + chapters = pctxt.chapters + + res = "" + + if line != "" and not re.match(r'^ ', line): + parsed = self.keywordPattern.match(line) + if parsed != None: + keyword = parsed.group(1) + arg = parsed.group(4) + parameters = line[len(keyword) + len(arg):] + if (parameters != "" and not re.match("^ +((<|\[|\{|/).*|(: [a-z +]+))?(\(deprecated\))?$", parameters)): + # Dirty hack + # - parameters should only start with the characer "<", "[", "{", "/" + # - or a column (":") followed by a alpha keywords to identify fetching samples (optionally separated by the character "+") + # - or the string "(deprecated)" at the end + keyword = False + else: + splitKeyword = keyword.split(" ") + + parameters = arg + parameters + else: + keyword = False + + if keyword and (len(splitKeyword) <= 5): + toplevel = pctxt.details["toplevel"] + for j in range(0, len(splitKeyword)): + subKeyword = " ".join(splitKeyword[0:j + 1]) + if subKeyword != "no": + if not subKeyword in keywords: + keywords[subKeyword] = set() + keywords[subKeyword].add(pctxt.details["chapter"]) + res += '<a class="anchor" name="%s"></a>' % subKeyword + res += '<a class="anchor" name="%s-%s"></a>' % (toplevel, subKeyword) + res += '<a class="anchor" name="%s-%s"></a>' % (pctxt.details["chapter"], subKeyword) + res += '<a class="anchor" name="%s (%s)"></a>' % (subKeyword, chapters[toplevel]['title']) + res += '<a class="anchor" name="%s (%s)"></a>' % (subKeyword, chapters[pctxt.details["chapter"]]['title']) + + deprecated = parameters.find("(deprecated)") + if deprecated != -1: + prefix = "" + suffix = "" + parameters = parameters.replace("(deprecated)", '<span class="label label-warning">(deprecated)</span>') + else: + prefix = "" + suffix = "" + + nextline = pctxt.get_line(1) + + while nextline.startswith(" "): + # Found parameters on the next line + parameters += "\n" + nextline + pctxt.next() + if pctxt.has_more_lines(1): + nextline = pctxt.get_line(1) + else: + nextline = "" + + + parameters = self.colorize(parameters) + res += '<div class="keyword">%s<b><a class="anchor" name="%s"></a><a href="#%s">%s</a></b>%s%s</div>' % (prefix, keyword, quote("%s-%s" % (pctxt.details["chapter"], keyword)), keyword, parameters, suffix) + pctxt.next() + pctxt.stop = True + elif line.startswith("/*"): + # Skip comments in the documentation + while not pctxt.get_line().endswith("*/"): + pctxt.next() + pctxt.next() + else: + # This is probably not a keyword but a text, ignore it + res += line + else: + res += line + + return res + + # Used to colorize keywords parameters + # TODO : use CSS styling + def colorize(self, text): + colorized = "" + tags = [ + [ "[" , "]" , "#008" ], + [ "{" , "}" , "#800" ], + [ "<", ">", "#080" ], + ] + heap = [] + pos = 0 + while pos < len(text): + substring = text[pos:] + found = False + for tag in tags: + if substring.startswith(tag[0]): + # Opening tag + heap.append(tag) + colorized += '<span style="color: %s">%s' % (tag[2], substring[0:len(tag[0])]) + pos += len(tag[0]) + found = True + break + elif substring.startswith(tag[1]): + # Closing tag + + # pop opening tags until the corresponding one is found + openingTag = False + while heap and openingTag != tag: + openingTag = heap.pop() + if openingTag != tag: + colorized += '</span>' + # all intermediate tags are now closed, we can display the tag + colorized += substring[0:len(tag[1])] + # and the close it if it was previously opened + if openingTag == tag: + colorized += '</span>' + pos += len(tag[1]) + found = True + break + if not found: + colorized += substring[0] + pos += 1 + # close all unterminated tags + while heap: + tag = heap.pop() + colorized += '</span>' + + return colorized + + diff --git a/debian/dconv/parser/seealso.py b/debian/dconv/parser/seealso.py new file mode 100644 index 0000000..bbb53f9 --- /dev/null +++ b/debian/dconv/parser/seealso.py @@ -0,0 +1,32 @@ +import re +import parser + +class Parser(parser.Parser): + def parse(self, line): + pctxt = self.pctxt + + result = re.search(r'(See also *:)', line) + if result: + label = result.group(0) + + desc = re.sub(r'.*See also *:', '', line).strip() + + indent = parser.get_indent(line) + + # Some descriptions are on multiple lines + while pctxt.has_more_lines(1) and parser.get_indent(pctxt.get_line(1)) >= indent: + desc += " " + pctxt.get_line(1).strip() + pctxt.next() + + pctxt.eat_empty_lines() + pctxt.next() + pctxt.stop = True + + template = pctxt.templates.get_template("parser/seealso.tpl") + return template.render( + pctxt=pctxt, + label=label, + desc=desc, + ) + + return line diff --git a/debian/dconv/parser/table.py b/debian/dconv/parser/table.py new file mode 100644 index 0000000..e2575b1 --- /dev/null +++ b/debian/dconv/parser/table.py @@ -0,0 +1,244 @@ +import re +import sys +import parser + +class Parser(parser.Parser): + def __init__(self, pctxt): + parser.Parser.__init__(self, pctxt) + self.table1Pattern = re.compile(r'^ *(-+\+)+-+') + self.table2Pattern = re.compile(r'^ *\+(-+\+)+') + + def parse(self, line): + global document, keywords, keywordsCount, chapters, keyword_conflicts + + pctxt = self.pctxt + + if pctxt.context['headers']['subtitle'] != 'Configuration Manual': + # Quick exit + return line + elif pctxt.details['chapter'] == "4": + # BUG: the matrix in chapter 4. Proxies is not well displayed, we skip this chapter + return line + + if pctxt.has_more_lines(1): + nextline = pctxt.get_line(1) + else: + nextline = "" + + if self.table1Pattern.match(nextline): + # activate table rendering only for the Configuration Manual + lineSeparator = nextline + nbColumns = nextline.count("+") + 1 + extraColumns = 0 + print("Entering table mode (%d columns)" % nbColumns, file=sys.stderr) + table = [] + if line.find("|") != -1: + row = [] + while pctxt.has_more_lines(): + line = pctxt.get_line() + if pctxt.has_more_lines(1): + nextline = pctxt.get_line(1) + else: + nextline = "" + if line == lineSeparator: + # New row + table.append(row) + row = [] + if nextline.find("|") == -1: + break # End of table + else: + # Data + columns = line.split("|") + for j in range(0, len(columns)): + try: + if row[j]: + row[j] += "<br />" + row[j] += columns[j].strip() + except: + row.append(columns[j].strip()) + pctxt.next() + else: + row = [] + headers = nextline + while pctxt.has_more_lines(): + line = pctxt.get_line() + if pctxt.has_more_lines(1): + nextline = pctxt.get_line(1) + else: + nextline = "" + + if nextline == "": + if row: table.append(row) + break # End of table + + if (line != lineSeparator) and (line[0] != "-"): + start = 0 + + if row and not line.startswith(" "): + # Row is complete, parse a new one + table.append(row) + row = [] + + tmprow = [] + while start != -1: + end = headers.find("+", start) + if end == -1: + end = len(headers) + + realend = end + if realend == len(headers): + realend = len(line) + else: + while realend < len(line) and line[realend] != " ": + realend += 1 + end += 1 + + tmprow.append(line[start:realend]) + + start = end + 1 + if start >= len(headers): + start = -1 + for j in range(0, nbColumns): + try: + row[j] += tmprow[j].strip() + except: + row.append(tmprow[j].strip()) + + deprecated = row[0].endswith("(deprecated)") + if deprecated: + row[0] = row[0][: -len("(deprecated)")].rstrip() + + nooption = row[1].startswith("(*)") + if nooption: + row[1] = row[1][len("(*)"):].strip() + + if deprecated or nooption: + extraColumns = 1 + extra = "" + if deprecated: + extra += '<span class="label label-warning">(deprecated)</span>' + if nooption: + extra += '<span>(*)</span>' + row.append(extra) + + pctxt.next() + print("Leaving table mode", file=sys.stderr) + pctxt.next() # skip useless next line + pctxt.stop = True + + return self.renderTable(table, nbColumns, pctxt.details["toplevel"]) + # elif self.table2Pattern.match(line): + # return self.parse_table_format2() + elif line.find("May be used in sections") != -1: + nextline = pctxt.get_line(1) + rows = [] + headers = line.split(":") + rows.append(headers[1].split("|")) + rows.append(nextline.split("|")) + table = { + "rows": rows, + "title": headers[0] + } + pctxt.next(2) # skip this previous table + pctxt.stop = True + + return self.renderTable(table) + + return line + + + def parse_table_format2(self): + pctxt = self.pctxt + + linesep = pctxt.get_line() + rows = [] + + pctxt.next() + maxcols = 0 + while pctxt.get_line().strip().startswith("|"): + row = pctxt.get_line().strip()[1:-1].split("|") + rows.append(row) + maxcols = max(maxcols, len(row)) + pctxt.next() + if pctxt.get_line() == linesep: + # TODO : find a way to define a special style for next row + pctxt.next() + pctxt.stop = True + + return self.renderTable(rows, maxcols) + + # Render tables detected by the conversion parser + def renderTable(self, table, maxColumns = 0, toplevel = None): + pctxt = self.pctxt + template = pctxt.templates.get_template("parser/table.tpl") + + res = "" + + title = None + if isinstance(table, dict): + title = table["title"] + table = table["rows"] + + if not maxColumns: + maxColumns = len(table[0]) + + rows = [] + + mode = "th" + headerLine = "" + hasKeywords = False + i = 0 + for row in table: + line = "" + + if i == 0: + row_template = pctxt.templates.get_template("parser/table/header.tpl") + else: + row_template = pctxt.templates.get_template("parser/table/row.tpl") + + if i > 1 and (i - 1) % 20 == 0 and len(table) > 50: + # Repeat headers periodically for long tables + rows.append(headerLine) + + j = 0 + cols = [] + for column in row: + if j >= maxColumns: + break + + tplcol = {} + + data = column.strip() + keyword = column + if j == 0 and i == 0 and keyword == 'keyword': + hasKeywords = True + if j == 0 and i != 0 and hasKeywords: + if keyword.startswith("[no] "): + keyword = keyword[len("[no] "):] + tplcol['toplevel'] = toplevel + tplcol['keyword'] = keyword + tplcol['extra'] = [] + if j == 0 and len(row) > maxColumns: + for k in range(maxColumns, len(row)): + tplcol['extra'].append(row[k]) + tplcol['data'] = data + cols.append(tplcol) + j += 1 + mode = "td" + + line = row_template.render( + pctxt=pctxt, + columns=cols + ).strip() + if i == 0: + headerLine = line + + rows.append(line) + + i += 1 + + return template.render( + pctxt=pctxt, + title=title, + rows=rows, + ) diff --git a/debian/dconv/parser/underline.py b/debian/dconv/parser/underline.py new file mode 100644 index 0000000..3a2350c --- /dev/null +++ b/debian/dconv/parser/underline.py @@ -0,0 +1,16 @@ +import parser + +class Parser(parser.Parser): + # Detect underlines + def parse(self, line): + pctxt = self.pctxt + if pctxt.has_more_lines(1): + nextline = pctxt.get_line(1) + if (len(line) > 0) and (len(nextline) > 0) and (nextline[0] == '-') and ("-" * len(line) == nextline): + template = pctxt.templates.get_template("parser/underline.tpl") + line = template.render(pctxt=pctxt, data=line).strip() + pctxt.next(2) + pctxt.eat_empty_lines() + pctxt.stop = True + + return line |