summaryrefslogtreecommitdiffstats
path: root/markdown_it/rules_block/table.py
diff options
context:
space:
mode:
Diffstat (limited to 'markdown_it/rules_block/table.py')
-rw-r--r--markdown_it/rules_block/table.py238
1 files changed, 238 insertions, 0 deletions
diff --git a/markdown_it/rules_block/table.py b/markdown_it/rules_block/table.py
new file mode 100644
index 0000000..e3db858
--- /dev/null
+++ b/markdown_it/rules_block/table.py
@@ -0,0 +1,238 @@
+# GFM table, https://github.github.com/gfm/#tables-extension-
+import re
+
+from ..common.utils import charCodeAt, isSpace
+from .state_block import StateBlock
+
+headerLineRe = re.compile(r"^:?-+:?$")
+enclosingPipesRe = re.compile(r"^\||\|$")
+
+
+def getLine(state: StateBlock, line: int):
+ pos = state.bMarks[line] + state.tShift[line]
+ maximum = state.eMarks[line]
+
+ # return state.src.substr(pos, max - pos)
+ return state.src[pos:maximum]
+
+
+def escapedSplit(string):
+ result = []
+ pos = 0
+ max = len(string)
+ isEscaped = False
+ lastPos = 0
+ current = ""
+ ch = charCodeAt(string, pos)
+
+ while pos < max:
+ if ch == 0x7C: # /* | */
+ if not isEscaped:
+ # pipe separating cells, '|'
+ result.append(current + string[lastPos:pos])
+ current = ""
+ lastPos = pos + 1
+ else:
+ # escaped pipe, '\|'
+ current += string[lastPos : pos - 1]
+ lastPos = pos
+
+ isEscaped = ch == 0x5C # /* \ */
+ pos += 1
+
+ ch = charCodeAt(string, pos)
+
+ result.append(current + string[lastPos:])
+
+ return result
+
+
+def table(state: StateBlock, startLine: int, endLine: int, silent: bool):
+ tbodyLines = None
+
+ # should have at least two lines
+ if startLine + 2 > endLine:
+ return False
+
+ nextLine = startLine + 1
+
+ if state.sCount[nextLine] < state.blkIndent:
+ return False
+
+ # if it's indented more than 3 spaces, it should be a code block
+ if state.sCount[nextLine] - state.blkIndent >= 4:
+ return False
+
+ # first character of the second line should be '|', '-', ':',
+ # and no other characters are allowed but spaces;
+ # basically, this is the equivalent of /^[-:|][-:|\s]*$/ regexp
+
+ pos = state.bMarks[nextLine] + state.tShift[nextLine]
+ if pos >= state.eMarks[nextLine]:
+ return False
+ first_ch = state.srcCharCode[pos]
+ pos += 1
+ if first_ch not in {0x7C, 0x2D, 0x3A}: # not in {"|", "-", ":"}
+ return False
+
+ if pos >= state.eMarks[nextLine]:
+ return False
+ second_ch = state.srcCharCode[pos]
+ pos += 1
+ # not in {"|", "-", ":"} and not space
+ if second_ch not in {0x7C, 0x2D, 0x3A} and not isSpace(second_ch):
+ return False
+
+ # if first character is '-', then second character must not be a space
+ # (due to parsing ambiguity with list)
+ if first_ch == 0x2D and isSpace(second_ch):
+ return False
+
+ while pos < state.eMarks[nextLine]:
+ ch = state.srcCharCode[pos]
+
+ # /* | */ /* - */ /* : */
+ if ch not in {0x7C, 0x2D, 0x3A} and not isSpace(ch):
+ return False
+
+ pos += 1
+
+ lineText = getLine(state, startLine + 1)
+
+ columns = lineText.split("|")
+ aligns = []
+ for i in range(len(columns)):
+ t = columns[i].strip()
+ if not t:
+ # allow empty columns before and after table, but not in between columns;
+ # e.g. allow ` |---| `, disallow ` ---||--- `
+ if i == 0 or i == len(columns) - 1:
+ continue
+ else:
+ return False
+
+ if not headerLineRe.search(t):
+ return False
+ if charCodeAt(t, len(t) - 1) == 0x3A: # /* : */
+ # /* : */
+ aligns.append("center" if charCodeAt(t, 0) == 0x3A else "right")
+ elif charCodeAt(t, 0) == 0x3A: # /* : */
+ aligns.append("left")
+ else:
+ aligns.append("")
+
+ lineText = getLine(state, startLine).strip()
+ if "|" not in lineText:
+ return False
+ if state.sCount[startLine] - state.blkIndent >= 4:
+ return False
+ columns = escapedSplit(lineText)
+ if columns and columns[0] == "":
+ columns.pop(0)
+ if columns and columns[-1] == "":
+ columns.pop()
+
+ # header row will define an amount of columns in the entire table,
+ # and align row should be exactly the same (the rest of the rows can differ)
+ columnCount = len(columns)
+ if columnCount == 0 or columnCount != len(aligns):
+ return False
+
+ if silent:
+ return True
+
+ oldParentType = state.parentType
+ state.parentType = "table"
+
+ # use 'blockquote' lists for termination because it's
+ # the most similar to tables
+ terminatorRules = state.md.block.ruler.getRules("blockquote")
+
+ token = state.push("table_open", "table", 1)
+ token.map = tableLines = [startLine, 0]
+
+ token = state.push("thead_open", "thead", 1)
+ token.map = [startLine, startLine + 1]
+
+ token = state.push("tr_open", "tr", 1)
+ token.map = [startLine, startLine + 1]
+
+ for i in range(len(columns)):
+ token = state.push("th_open", "th", 1)
+ if aligns[i]:
+ token.attrs = {"style": "text-align:" + aligns[i]}
+
+ token = state.push("inline", "", 0)
+ # note in markdown-it this map was removed in v12.0.0 however, we keep it,
+ # since it is helpful to propagate to children tokens
+ token.map = [startLine, startLine + 1]
+ token.content = columns[i].strip()
+ token.children = []
+
+ token = state.push("th_close", "th", -1)
+
+ token = state.push("tr_close", "tr", -1)
+ token = state.push("thead_close", "thead", -1)
+
+ nextLine = startLine + 2
+ while nextLine < endLine:
+ if state.sCount[nextLine] < state.blkIndent:
+ break
+
+ terminate = False
+ for i in range(len(terminatorRules)):
+ if terminatorRules[i](state, nextLine, endLine, True):
+ terminate = True
+ break
+
+ if terminate:
+ break
+ lineText = getLine(state, nextLine).strip()
+ if not lineText:
+ break
+ if state.sCount[nextLine] - state.blkIndent >= 4:
+ break
+ columns = escapedSplit(lineText)
+ if columns and columns[0] == "":
+ columns.pop(0)
+ if columns and columns[-1] == "":
+ columns.pop()
+
+ if nextLine == startLine + 2:
+ token = state.push("tbody_open", "tbody", 1)
+ token.map = tbodyLines = [startLine + 2, 0]
+
+ token = state.push("tr_open", "tr", 1)
+ token.map = [nextLine, nextLine + 1]
+
+ for i in range(columnCount):
+ token = state.push("td_open", "td", 1)
+ if aligns[i]:
+ token.attrs = {"style": "text-align:" + aligns[i]}
+
+ token = state.push("inline", "", 0)
+ # note in markdown-it this map was removed in v12.0.0 however, we keep it,
+ # since it is helpful to propagate to children tokens
+ token.map = [nextLine, nextLine + 1]
+ try:
+ token.content = columns[i].strip() if columns[i] else ""
+ except IndexError:
+ token.content = ""
+ token.children = []
+
+ token = state.push("td_close", "td", -1)
+
+ token = state.push("tr_close", "tr", -1)
+
+ nextLine += 1
+
+ if tbodyLines:
+ token = state.push("tbody_close", "tbody", -1)
+ tbodyLines[1] = nextLine
+
+ token = state.push("table_close", "table", -1)
+
+ tableLines[1] = nextLine
+ state.parentType = oldParentType
+ state.line = nextLine
+ return True