summaryrefslogtreecommitdiffstats
path: root/vendor/pulldown-cmark/src
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:02:58 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:02:58 +0000
commit698f8c2f01ea549d77d7dc3338a12e04c11057b9 (patch)
tree173a775858bd501c378080a10dca74132f05bc50 /vendor/pulldown-cmark/src
parentInitial commit. (diff)
downloadrustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.tar.xz
rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.zip
Adding upstream version 1.64.0+dfsg1.upstream/1.64.0+dfsg1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/pulldown-cmark/src')
-rw-r--r--vendor/pulldown-cmark/src/entities.rs2158
-rw-r--r--vendor/pulldown-cmark/src/escape.rs368
-rw-r--r--vendor/pulldown-cmark/src/firstpass.rs1927
-rw-r--r--vendor/pulldown-cmark/src/html.rs478
-rw-r--r--vendor/pulldown-cmark/src/lib.rs289
-rw-r--r--vendor/pulldown-cmark/src/linklabel.rs135
-rw-r--r--vendor/pulldown-cmark/src/main.rs123
-rw-r--r--vendor/pulldown-cmark/src/parse.rs1904
-rw-r--r--vendor/pulldown-cmark/src/puncttable.rs351
-rw-r--r--vendor/pulldown-cmark/src/scanners.rs1327
-rw-r--r--vendor/pulldown-cmark/src/strings.rs373
-rw-r--r--vendor/pulldown-cmark/src/tree.rs276
12 files changed, 9709 insertions, 0 deletions
diff --git a/vendor/pulldown-cmark/src/entities.rs b/vendor/pulldown-cmark/src/entities.rs
new file mode 100644
index 000000000..042c9bccc
--- /dev/null
+++ b/vendor/pulldown-cmark/src/entities.rs
@@ -0,0 +1,2158 @@
+// Copyright 2015 Google Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+//! Expansions of HTML5 entities
+
+// Autogenerated by mk_entities.py
+
+const ENTITIES: [(&[u8], &str); 2125] = [
+ (b"AElig", "\u{00C6}"),
+ (b"AMP", "\u{0026}"),
+ (b"Aacute", "\u{00C1}"),
+ (b"Abreve", "\u{0102}"),
+ (b"Acirc", "\u{00C2}"),
+ (b"Acy", "\u{0410}"),
+ (b"Afr", "\u{1D504}"),
+ (b"Agrave", "\u{00C0}"),
+ (b"Alpha", "\u{0391}"),
+ (b"Amacr", "\u{0100}"),
+ (b"And", "\u{2A53}"),
+ (b"Aogon", "\u{0104}"),
+ (b"Aopf", "\u{1D538}"),
+ (b"ApplyFunction", "\u{2061}"),
+ (b"Aring", "\u{00C5}"),
+ (b"Ascr", "\u{1D49C}"),
+ (b"Assign", "\u{2254}"),
+ (b"Atilde", "\u{00C3}"),
+ (b"Auml", "\u{00C4}"),
+ (b"Backslash", "\u{2216}"),
+ (b"Barv", "\u{2AE7}"),
+ (b"Barwed", "\u{2306}"),
+ (b"Bcy", "\u{0411}"),
+ (b"Because", "\u{2235}"),
+ (b"Bernoullis", "\u{212C}"),
+ (b"Beta", "\u{0392}"),
+ (b"Bfr", "\u{1D505}"),
+ (b"Bopf", "\u{1D539}"),
+ (b"Breve", "\u{02D8}"),
+ (b"Bscr", "\u{212C}"),
+ (b"Bumpeq", "\u{224E}"),
+ (b"CHcy", "\u{0427}"),
+ (b"COPY", "\u{00A9}"),
+ (b"Cacute", "\u{0106}"),
+ (b"Cap", "\u{22D2}"),
+ (b"CapitalDifferentialD", "\u{2145}"),
+ (b"Cayleys", "\u{212D}"),
+ (b"Ccaron", "\u{010C}"),
+ (b"Ccedil", "\u{00C7}"),
+ (b"Ccirc", "\u{0108}"),
+ (b"Cconint", "\u{2230}"),
+ (b"Cdot", "\u{010A}"),
+ (b"Cedilla", "\u{00B8}"),
+ (b"CenterDot", "\u{00B7}"),
+ (b"Cfr", "\u{212D}"),
+ (b"Chi", "\u{03A7}"),
+ (b"CircleDot", "\u{2299}"),
+ (b"CircleMinus", "\u{2296}"),
+ (b"CirclePlus", "\u{2295}"),
+ (b"CircleTimes", "\u{2297}"),
+ (b"ClockwiseContourIntegral", "\u{2232}"),
+ (b"CloseCurlyDoubleQuote", "\u{201D}"),
+ (b"CloseCurlyQuote", "\u{2019}"),
+ (b"Colon", "\u{2237}"),
+ (b"Colone", "\u{2A74}"),
+ (b"Congruent", "\u{2261}"),
+ (b"Conint", "\u{222F}"),
+ (b"ContourIntegral", "\u{222E}"),
+ (b"Copf", "\u{2102}"),
+ (b"Coproduct", "\u{2210}"),
+ (b"CounterClockwiseContourIntegral", "\u{2233}"),
+ (b"Cross", "\u{2A2F}"),
+ (b"Cscr", "\u{1D49E}"),
+ (b"Cup", "\u{22D3}"),
+ (b"CupCap", "\u{224D}"),
+ (b"DD", "\u{2145}"),
+ (b"DDotrahd", "\u{2911}"),
+ (b"DJcy", "\u{0402}"),
+ (b"DScy", "\u{0405}"),
+ (b"DZcy", "\u{040F}"),
+ (b"Dagger", "\u{2021}"),
+ (b"Darr", "\u{21A1}"),
+ (b"Dashv", "\u{2AE4}"),
+ (b"Dcaron", "\u{010E}"),
+ (b"Dcy", "\u{0414}"),
+ (b"Del", "\u{2207}"),
+ (b"Delta", "\u{0394}"),
+ (b"Dfr", "\u{1D507}"),
+ (b"DiacriticalAcute", "\u{00B4}"),
+ (b"DiacriticalDot", "\u{02D9}"),
+ (b"DiacriticalDoubleAcute", "\u{02DD}"),
+ (b"DiacriticalGrave", "\u{0060}"),
+ (b"DiacriticalTilde", "\u{02DC}"),
+ (b"Diamond", "\u{22C4}"),
+ (b"DifferentialD", "\u{2146}"),
+ (b"Dopf", "\u{1D53B}"),
+ (b"Dot", "\u{00A8}"),
+ (b"DotDot", "\u{20DC}"),
+ (b"DotEqual", "\u{2250}"),
+ (b"DoubleContourIntegral", "\u{222F}"),
+ (b"DoubleDot", "\u{00A8}"),
+ (b"DoubleDownArrow", "\u{21D3}"),
+ (b"DoubleLeftArrow", "\u{21D0}"),
+ (b"DoubleLeftRightArrow", "\u{21D4}"),
+ (b"DoubleLeftTee", "\u{2AE4}"),
+ (b"DoubleLongLeftArrow", "\u{27F8}"),
+ (b"DoubleLongLeftRightArrow", "\u{27FA}"),
+ (b"DoubleLongRightArrow", "\u{27F9}"),
+ (b"DoubleRightArrow", "\u{21D2}"),
+ (b"DoubleRightTee", "\u{22A8}"),
+ (b"DoubleUpArrow", "\u{21D1}"),
+ (b"DoubleUpDownArrow", "\u{21D5}"),
+ (b"DoubleVerticalBar", "\u{2225}"),
+ (b"DownArrow", "\u{2193}"),
+ (b"DownArrowBar", "\u{2913}"),
+ (b"DownArrowUpArrow", "\u{21F5}"),
+ (b"DownBreve", "\u{0311}"),
+ (b"DownLeftRightVector", "\u{2950}"),
+ (b"DownLeftTeeVector", "\u{295E}"),
+ (b"DownLeftVector", "\u{21BD}"),
+ (b"DownLeftVectorBar", "\u{2956}"),
+ (b"DownRightTeeVector", "\u{295F}"),
+ (b"DownRightVector", "\u{21C1}"),
+ (b"DownRightVectorBar", "\u{2957}"),
+ (b"DownTee", "\u{22A4}"),
+ (b"DownTeeArrow", "\u{21A7}"),
+ (b"Downarrow", "\u{21D3}"),
+ (b"Dscr", "\u{1D49F}"),
+ (b"Dstrok", "\u{0110}"),
+ (b"ENG", "\u{014A}"),
+ (b"ETH", "\u{00D0}"),
+ (b"Eacute", "\u{00C9}"),
+ (b"Ecaron", "\u{011A}"),
+ (b"Ecirc", "\u{00CA}"),
+ (b"Ecy", "\u{042D}"),
+ (b"Edot", "\u{0116}"),
+ (b"Efr", "\u{1D508}"),
+ (b"Egrave", "\u{00C8}"),
+ (b"Element", "\u{2208}"),
+ (b"Emacr", "\u{0112}"),
+ (b"EmptySmallSquare", "\u{25FB}"),
+ (b"EmptyVerySmallSquare", "\u{25AB}"),
+ (b"Eogon", "\u{0118}"),
+ (b"Eopf", "\u{1D53C}"),
+ (b"Epsilon", "\u{0395}"),
+ (b"Equal", "\u{2A75}"),
+ (b"EqualTilde", "\u{2242}"),
+ (b"Equilibrium", "\u{21CC}"),
+ (b"Escr", "\u{2130}"),
+ (b"Esim", "\u{2A73}"),
+ (b"Eta", "\u{0397}"),
+ (b"Euml", "\u{00CB}"),
+ (b"Exists", "\u{2203}"),
+ (b"ExponentialE", "\u{2147}"),
+ (b"Fcy", "\u{0424}"),
+ (b"Ffr", "\u{1D509}"),
+ (b"FilledSmallSquare", "\u{25FC}"),
+ (b"FilledVerySmallSquare", "\u{25AA}"),
+ (b"Fopf", "\u{1D53D}"),
+ (b"ForAll", "\u{2200}"),
+ (b"Fouriertrf", "\u{2131}"),
+ (b"Fscr", "\u{2131}"),
+ (b"GJcy", "\u{0403}"),
+ (b"GT", "\u{003E}"),
+ (b"Gamma", "\u{0393}"),
+ (b"Gammad", "\u{03DC}"),
+ (b"Gbreve", "\u{011E}"),
+ (b"Gcedil", "\u{0122}"),
+ (b"Gcirc", "\u{011C}"),
+ (b"Gcy", "\u{0413}"),
+ (b"Gdot", "\u{0120}"),
+ (b"Gfr", "\u{1D50A}"),
+ (b"Gg", "\u{22D9}"),
+ (b"Gopf", "\u{1D53E}"),
+ (b"GreaterEqual", "\u{2265}"),
+ (b"GreaterEqualLess", "\u{22DB}"),
+ (b"GreaterFullEqual", "\u{2267}"),
+ (b"GreaterGreater", "\u{2AA2}"),
+ (b"GreaterLess", "\u{2277}"),
+ (b"GreaterSlantEqual", "\u{2A7E}"),
+ (b"GreaterTilde", "\u{2273}"),
+ (b"Gscr", "\u{1D4A2}"),
+ (b"Gt", "\u{226B}"),
+ (b"HARDcy", "\u{042A}"),
+ (b"Hacek", "\u{02C7}"),
+ (b"Hat", "\u{005E}"),
+ (b"Hcirc", "\u{0124}"),
+ (b"Hfr", "\u{210C}"),
+ (b"HilbertSpace", "\u{210B}"),
+ (b"Hopf", "\u{210D}"),
+ (b"HorizontalLine", "\u{2500}"),
+ (b"Hscr", "\u{210B}"),
+ (b"Hstrok", "\u{0126}"),
+ (b"HumpDownHump", "\u{224E}"),
+ (b"HumpEqual", "\u{224F}"),
+ (b"IEcy", "\u{0415}"),
+ (b"IJlig", "\u{0132}"),
+ (b"IOcy", "\u{0401}"),
+ (b"Iacute", "\u{00CD}"),
+ (b"Icirc", "\u{00CE}"),
+ (b"Icy", "\u{0418}"),
+ (b"Idot", "\u{0130}"),
+ (b"Ifr", "\u{2111}"),
+ (b"Igrave", "\u{00CC}"),
+ (b"Im", "\u{2111}"),
+ (b"Imacr", "\u{012A}"),
+ (b"ImaginaryI", "\u{2148}"),
+ (b"Implies", "\u{21D2}"),
+ (b"Int", "\u{222C}"),
+ (b"Integral", "\u{222B}"),
+ (b"Intersection", "\u{22C2}"),
+ (b"InvisibleComma", "\u{2063}"),
+ (b"InvisibleTimes", "\u{2062}"),
+ (b"Iogon", "\u{012E}"),
+ (b"Iopf", "\u{1D540}"),
+ (b"Iota", "\u{0399}"),
+ (b"Iscr", "\u{2110}"),
+ (b"Itilde", "\u{0128}"),
+ (b"Iukcy", "\u{0406}"),
+ (b"Iuml", "\u{00CF}"),
+ (b"Jcirc", "\u{0134}"),
+ (b"Jcy", "\u{0419}"),
+ (b"Jfr", "\u{1D50D}"),
+ (b"Jopf", "\u{1D541}"),
+ (b"Jscr", "\u{1D4A5}"),
+ (b"Jsercy", "\u{0408}"),
+ (b"Jukcy", "\u{0404}"),
+ (b"KHcy", "\u{0425}"),
+ (b"KJcy", "\u{040C}"),
+ (b"Kappa", "\u{039A}"),
+ (b"Kcedil", "\u{0136}"),
+ (b"Kcy", "\u{041A}"),
+ (b"Kfr", "\u{1D50E}"),
+ (b"Kopf", "\u{1D542}"),
+ (b"Kscr", "\u{1D4A6}"),
+ (b"LJcy", "\u{0409}"),
+ (b"LT", "\u{003C}"),
+ (b"Lacute", "\u{0139}"),
+ (b"Lambda", "\u{039B}"),
+ (b"Lang", "\u{27EA}"),
+ (b"Laplacetrf", "\u{2112}"),
+ (b"Larr", "\u{219E}"),
+ (b"Lcaron", "\u{013D}"),
+ (b"Lcedil", "\u{013B}"),
+ (b"Lcy", "\u{041B}"),
+ (b"LeftAngleBracket", "\u{27E8}"),
+ (b"LeftArrow", "\u{2190}"),
+ (b"LeftArrowBar", "\u{21E4}"),
+ (b"LeftArrowRightArrow", "\u{21C6}"),
+ (b"LeftCeiling", "\u{2308}"),
+ (b"LeftDoubleBracket", "\u{27E6}"),
+ (b"LeftDownTeeVector", "\u{2961}"),
+ (b"LeftDownVector", "\u{21C3}"),
+ (b"LeftDownVectorBar", "\u{2959}"),
+ (b"LeftFloor", "\u{230A}"),
+ (b"LeftRightArrow", "\u{2194}"),
+ (b"LeftRightVector", "\u{294E}"),
+ (b"LeftTee", "\u{22A3}"),
+ (b"LeftTeeArrow", "\u{21A4}"),
+ (b"LeftTeeVector", "\u{295A}"),
+ (b"LeftTriangle", "\u{22B2}"),
+ (b"LeftTriangleBar", "\u{29CF}"),
+ (b"LeftTriangleEqual", "\u{22B4}"),
+ (b"LeftUpDownVector", "\u{2951}"),
+ (b"LeftUpTeeVector", "\u{2960}"),
+ (b"LeftUpVector", "\u{21BF}"),
+ (b"LeftUpVectorBar", "\u{2958}"),
+ (b"LeftVector", "\u{21BC}"),
+ (b"LeftVectorBar", "\u{2952}"),
+ (b"Leftarrow", "\u{21D0}"),
+ (b"Leftrightarrow", "\u{21D4}"),
+ (b"LessEqualGreater", "\u{22DA}"),
+ (b"LessFullEqual", "\u{2266}"),
+ (b"LessGreater", "\u{2276}"),
+ (b"LessLess", "\u{2AA1}"),
+ (b"LessSlantEqual", "\u{2A7D}"),
+ (b"LessTilde", "\u{2272}"),
+ (b"Lfr", "\u{1D50F}"),
+ (b"Ll", "\u{22D8}"),
+ (b"Lleftarrow", "\u{21DA}"),
+ (b"Lmidot", "\u{013F}"),
+ (b"LongLeftArrow", "\u{27F5}"),
+ (b"LongLeftRightArrow", "\u{27F7}"),
+ (b"LongRightArrow", "\u{27F6}"),
+ (b"Longleftarrow", "\u{27F8}"),
+ (b"Longleftrightarrow", "\u{27FA}"),
+ (b"Longrightarrow", "\u{27F9}"),
+ (b"Lopf", "\u{1D543}"),
+ (b"LowerLeftArrow", "\u{2199}"),
+ (b"LowerRightArrow", "\u{2198}"),
+ (b"Lscr", "\u{2112}"),
+ (b"Lsh", "\u{21B0}"),
+ (b"Lstrok", "\u{0141}"),
+ (b"Lt", "\u{226A}"),
+ (b"Map", "\u{2905}"),
+ (b"Mcy", "\u{041C}"),
+ (b"MediumSpace", "\u{205F}"),
+ (b"Mellintrf", "\u{2133}"),
+ (b"Mfr", "\u{1D510}"),
+ (b"MinusPlus", "\u{2213}"),
+ (b"Mopf", "\u{1D544}"),
+ (b"Mscr", "\u{2133}"),
+ (b"Mu", "\u{039C}"),
+ (b"NJcy", "\u{040A}"),
+ (b"Nacute", "\u{0143}"),
+ (b"Ncaron", "\u{0147}"),
+ (b"Ncedil", "\u{0145}"),
+ (b"Ncy", "\u{041D}"),
+ (b"NegativeMediumSpace", "\u{200B}"),
+ (b"NegativeThickSpace", "\u{200B}"),
+ (b"NegativeThinSpace", "\u{200B}"),
+ (b"NegativeVeryThinSpace", "\u{200B}"),
+ (b"NestedGreaterGreater", "\u{226B}"),
+ (b"NestedLessLess", "\u{226A}"),
+ (b"NewLine", "\u{000A}"),
+ (b"Nfr", "\u{1D511}"),
+ (b"NoBreak", "\u{2060}"),
+ (b"NonBreakingSpace", "\u{00A0}"),
+ (b"Nopf", "\u{2115}"),
+ (b"Not", "\u{2AEC}"),
+ (b"NotCongruent", "\u{2262}"),
+ (b"NotCupCap", "\u{226D}"),
+ (b"NotDoubleVerticalBar", "\u{2226}"),
+ (b"NotElement", "\u{2209}"),
+ (b"NotEqual", "\u{2260}"),
+ (b"NotEqualTilde", "\u{2242}\u{0338}"),
+ (b"NotExists", "\u{2204}"),
+ (b"NotGreater", "\u{226F}"),
+ (b"NotGreaterEqual", "\u{2271}"),
+ (b"NotGreaterFullEqual", "\u{2267}\u{0338}"),
+ (b"NotGreaterGreater", "\u{226B}\u{0338}"),
+ (b"NotGreaterLess", "\u{2279}"),
+ (b"NotGreaterSlantEqual", "\u{2A7E}\u{0338}"),
+ (b"NotGreaterTilde", "\u{2275}"),
+ (b"NotHumpDownHump", "\u{224E}\u{0338}"),
+ (b"NotHumpEqual", "\u{224F}\u{0338}"),
+ (b"NotLeftTriangle", "\u{22EA}"),
+ (b"NotLeftTriangleBar", "\u{29CF}\u{0338}"),
+ (b"NotLeftTriangleEqual", "\u{22EC}"),
+ (b"NotLess", "\u{226E}"),
+ (b"NotLessEqual", "\u{2270}"),
+ (b"NotLessGreater", "\u{2278}"),
+ (b"NotLessLess", "\u{226A}\u{0338}"),
+ (b"NotLessSlantEqual", "\u{2A7D}\u{0338}"),
+ (b"NotLessTilde", "\u{2274}"),
+ (b"NotNestedGreaterGreater", "\u{2AA2}\u{0338}"),
+ (b"NotNestedLessLess", "\u{2AA1}\u{0338}"),
+ (b"NotPrecedes", "\u{2280}"),
+ (b"NotPrecedesEqual", "\u{2AAF}\u{0338}"),
+ (b"NotPrecedesSlantEqual", "\u{22E0}"),
+ (b"NotReverseElement", "\u{220C}"),
+ (b"NotRightTriangle", "\u{22EB}"),
+ (b"NotRightTriangleBar", "\u{29D0}\u{0338}"),
+ (b"NotRightTriangleEqual", "\u{22ED}"),
+ (b"NotSquareSubset", "\u{228F}\u{0338}"),
+ (b"NotSquareSubsetEqual", "\u{22E2}"),
+ (b"NotSquareSuperset", "\u{2290}\u{0338}"),
+ (b"NotSquareSupersetEqual", "\u{22E3}"),
+ (b"NotSubset", "\u{2282}\u{20D2}"),
+ (b"NotSubsetEqual", "\u{2288}"),
+ (b"NotSucceeds", "\u{2281}"),
+ (b"NotSucceedsEqual", "\u{2AB0}\u{0338}"),
+ (b"NotSucceedsSlantEqual", "\u{22E1}"),
+ (b"NotSucceedsTilde", "\u{227F}\u{0338}"),
+ (b"NotSuperset", "\u{2283}\u{20D2}"),
+ (b"NotSupersetEqual", "\u{2289}"),
+ (b"NotTilde", "\u{2241}"),
+ (b"NotTildeEqual", "\u{2244}"),
+ (b"NotTildeFullEqual", "\u{2247}"),
+ (b"NotTildeTilde", "\u{2249}"),
+ (b"NotVerticalBar", "\u{2224}"),
+ (b"Nscr", "\u{1D4A9}"),
+ (b"Ntilde", "\u{00D1}"),
+ (b"Nu", "\u{039D}"),
+ (b"OElig", "\u{0152}"),
+ (b"Oacute", "\u{00D3}"),
+ (b"Ocirc", "\u{00D4}"),
+ (b"Ocy", "\u{041E}"),
+ (b"Odblac", "\u{0150}"),
+ (b"Ofr", "\u{1D512}"),
+ (b"Ograve", "\u{00D2}"),
+ (b"Omacr", "\u{014C}"),
+ (b"Omega", "\u{03A9}"),
+ (b"Omicron", "\u{039F}"),
+ (b"Oopf", "\u{1D546}"),
+ (b"OpenCurlyDoubleQuote", "\u{201C}"),
+ (b"OpenCurlyQuote", "\u{2018}"),
+ (b"Or", "\u{2A54}"),
+ (b"Oscr", "\u{1D4AA}"),
+ (b"Oslash", "\u{00D8}"),
+ (b"Otilde", "\u{00D5}"),
+ (b"Otimes", "\u{2A37}"),
+ (b"Ouml", "\u{00D6}"),
+ (b"OverBar", "\u{203E}"),
+ (b"OverBrace", "\u{23DE}"),
+ (b"OverBracket", "\u{23B4}"),
+ (b"OverParenthesis", "\u{23DC}"),
+ (b"PartialD", "\u{2202}"),
+ (b"Pcy", "\u{041F}"),
+ (b"Pfr", "\u{1D513}"),
+ (b"Phi", "\u{03A6}"),
+ (b"Pi", "\u{03A0}"),
+ (b"PlusMinus", "\u{00B1}"),
+ (b"Poincareplane", "\u{210C}"),
+ (b"Popf", "\u{2119}"),
+ (b"Pr", "\u{2ABB}"),
+ (b"Precedes", "\u{227A}"),
+ (b"PrecedesEqual", "\u{2AAF}"),
+ (b"PrecedesSlantEqual", "\u{227C}"),
+ (b"PrecedesTilde", "\u{227E}"),
+ (b"Prime", "\u{2033}"),
+ (b"Product", "\u{220F}"),
+ (b"Proportion", "\u{2237}"),
+ (b"Proportional", "\u{221D}"),
+ (b"Pscr", "\u{1D4AB}"),
+ (b"Psi", "\u{03A8}"),
+ (b"QUOT", "\u{0022}"),
+ (b"Qfr", "\u{1D514}"),
+ (b"Qopf", "\u{211A}"),
+ (b"Qscr", "\u{1D4AC}"),
+ (b"RBarr", "\u{2910}"),
+ (b"REG", "\u{00AE}"),
+ (b"Racute", "\u{0154}"),
+ (b"Rang", "\u{27EB}"),
+ (b"Rarr", "\u{21A0}"),
+ (b"Rarrtl", "\u{2916}"),
+ (b"Rcaron", "\u{0158}"),
+ (b"Rcedil", "\u{0156}"),
+ (b"Rcy", "\u{0420}"),
+ (b"Re", "\u{211C}"),
+ (b"ReverseElement", "\u{220B}"),
+ (b"ReverseEquilibrium", "\u{21CB}"),
+ (b"ReverseUpEquilibrium", "\u{296F}"),
+ (b"Rfr", "\u{211C}"),
+ (b"Rho", "\u{03A1}"),
+ (b"RightAngleBracket", "\u{27E9}"),
+ (b"RightArrow", "\u{2192}"),
+ (b"RightArrowBar", "\u{21E5}"),
+ (b"RightArrowLeftArrow", "\u{21C4}"),
+ (b"RightCeiling", "\u{2309}"),
+ (b"RightDoubleBracket", "\u{27E7}"),
+ (b"RightDownTeeVector", "\u{295D}"),
+ (b"RightDownVector", "\u{21C2}"),
+ (b"RightDownVectorBar", "\u{2955}"),
+ (b"RightFloor", "\u{230B}"),
+ (b"RightTee", "\u{22A2}"),
+ (b"RightTeeArrow", "\u{21A6}"),
+ (b"RightTeeVector", "\u{295B}"),
+ (b"RightTriangle", "\u{22B3}"),
+ (b"RightTriangleBar", "\u{29D0}"),
+ (b"RightTriangleEqual", "\u{22B5}"),
+ (b"RightUpDownVector", "\u{294F}"),
+ (b"RightUpTeeVector", "\u{295C}"),
+ (b"RightUpVector", "\u{21BE}"),
+ (b"RightUpVectorBar", "\u{2954}"),
+ (b"RightVector", "\u{21C0}"),
+ (b"RightVectorBar", "\u{2953}"),
+ (b"Rightarrow", "\u{21D2}"),
+ (b"Ropf", "\u{211D}"),
+ (b"RoundImplies", "\u{2970}"),
+ (b"Rrightarrow", "\u{21DB}"),
+ (b"Rscr", "\u{211B}"),
+ (b"Rsh", "\u{21B1}"),
+ (b"RuleDelayed", "\u{29F4}"),
+ (b"SHCHcy", "\u{0429}"),
+ (b"SHcy", "\u{0428}"),
+ (b"SOFTcy", "\u{042C}"),
+ (b"Sacute", "\u{015A}"),
+ (b"Sc", "\u{2ABC}"),
+ (b"Scaron", "\u{0160}"),
+ (b"Scedil", "\u{015E}"),
+ (b"Scirc", "\u{015C}"),
+ (b"Scy", "\u{0421}"),
+ (b"Sfr", "\u{1D516}"),
+ (b"ShortDownArrow", "\u{2193}"),
+ (b"ShortLeftArrow", "\u{2190}"),
+ (b"ShortRightArrow", "\u{2192}"),
+ (b"ShortUpArrow", "\u{2191}"),
+ (b"Sigma", "\u{03A3}"),
+ (b"SmallCircle", "\u{2218}"),
+ (b"Sopf", "\u{1D54A}"),
+ (b"Sqrt", "\u{221A}"),
+ (b"Square", "\u{25A1}"),
+ (b"SquareIntersection", "\u{2293}"),
+ (b"SquareSubset", "\u{228F}"),
+ (b"SquareSubsetEqual", "\u{2291}"),
+ (b"SquareSuperset", "\u{2290}"),
+ (b"SquareSupersetEqual", "\u{2292}"),
+ (b"SquareUnion", "\u{2294}"),
+ (b"Sscr", "\u{1D4AE}"),
+ (b"Star", "\u{22C6}"),
+ (b"Sub", "\u{22D0}"),
+ (b"Subset", "\u{22D0}"),
+ (b"SubsetEqual", "\u{2286}"),
+ (b"Succeeds", "\u{227B}"),
+ (b"SucceedsEqual", "\u{2AB0}"),
+ (b"SucceedsSlantEqual", "\u{227D}"),
+ (b"SucceedsTilde", "\u{227F}"),
+ (b"SuchThat", "\u{220B}"),
+ (b"Sum", "\u{2211}"),
+ (b"Sup", "\u{22D1}"),
+ (b"Superset", "\u{2283}"),
+ (b"SupersetEqual", "\u{2287}"),
+ (b"Supset", "\u{22D1}"),
+ (b"THORN", "\u{00DE}"),
+ (b"TRADE", "\u{2122}"),
+ (b"TSHcy", "\u{040B}"),
+ (b"TScy", "\u{0426}"),
+ (b"Tab", "\u{0009}"),
+ (b"Tau", "\u{03A4}"),
+ (b"Tcaron", "\u{0164}"),
+ (b"Tcedil", "\u{0162}"),
+ (b"Tcy", "\u{0422}"),
+ (b"Tfr", "\u{1D517}"),
+ (b"Therefore", "\u{2234}"),
+ (b"Theta", "\u{0398}"),
+ (b"ThickSpace", "\u{205F}\u{200A}"),
+ (b"ThinSpace", "\u{2009}"),
+ (b"Tilde", "\u{223C}"),
+ (b"TildeEqual", "\u{2243}"),
+ (b"TildeFullEqual", "\u{2245}"),
+ (b"TildeTilde", "\u{2248}"),
+ (b"Topf", "\u{1D54B}"),
+ (b"TripleDot", "\u{20DB}"),
+ (b"Tscr", "\u{1D4AF}"),
+ (b"Tstrok", "\u{0166}"),
+ (b"Uacute", "\u{00DA}"),
+ (b"Uarr", "\u{219F}"),
+ (b"Uarrocir", "\u{2949}"),
+ (b"Ubrcy", "\u{040E}"),
+ (b"Ubreve", "\u{016C}"),
+ (b"Ucirc", "\u{00DB}"),
+ (b"Ucy", "\u{0423}"),
+ (b"Udblac", "\u{0170}"),
+ (b"Ufr", "\u{1D518}"),
+ (b"Ugrave", "\u{00D9}"),
+ (b"Umacr", "\u{016A}"),
+ (b"UnderBar", "\u{005F}"),
+ (b"UnderBrace", "\u{23DF}"),
+ (b"UnderBracket", "\u{23B5}"),
+ (b"UnderParenthesis", "\u{23DD}"),
+ (b"Union", "\u{22C3}"),
+ (b"UnionPlus", "\u{228E}"),
+ (b"Uogon", "\u{0172}"),
+ (b"Uopf", "\u{1D54C}"),
+ (b"UpArrow", "\u{2191}"),
+ (b"UpArrowBar", "\u{2912}"),
+ (b"UpArrowDownArrow", "\u{21C5}"),
+ (b"UpDownArrow", "\u{2195}"),
+ (b"UpEquilibrium", "\u{296E}"),
+ (b"UpTee", "\u{22A5}"),
+ (b"UpTeeArrow", "\u{21A5}"),
+ (b"Uparrow", "\u{21D1}"),
+ (b"Updownarrow", "\u{21D5}"),
+ (b"UpperLeftArrow", "\u{2196}"),
+ (b"UpperRightArrow", "\u{2197}"),
+ (b"Upsi", "\u{03D2}"),
+ (b"Upsilon", "\u{03A5}"),
+ (b"Uring", "\u{016E}"),
+ (b"Uscr", "\u{1D4B0}"),
+ (b"Utilde", "\u{0168}"),
+ (b"Uuml", "\u{00DC}"),
+ (b"VDash", "\u{22AB}"),
+ (b"Vbar", "\u{2AEB}"),
+ (b"Vcy", "\u{0412}"),
+ (b"Vdash", "\u{22A9}"),
+ (b"Vdashl", "\u{2AE6}"),
+ (b"Vee", "\u{22C1}"),
+ (b"Verbar", "\u{2016}"),
+ (b"Vert", "\u{2016}"),
+ (b"VerticalBar", "\u{2223}"),
+ (b"VerticalLine", "\u{007C}"),
+ (b"VerticalSeparator", "\u{2758}"),
+ (b"VerticalTilde", "\u{2240}"),
+ (b"VeryThinSpace", "\u{200A}"),
+ (b"Vfr", "\u{1D519}"),
+ (b"Vopf", "\u{1D54D}"),
+ (b"Vscr", "\u{1D4B1}"),
+ (b"Vvdash", "\u{22AA}"),
+ (b"Wcirc", "\u{0174}"),
+ (b"Wedge", "\u{22C0}"),
+ (b"Wfr", "\u{1D51A}"),
+ (b"Wopf", "\u{1D54E}"),
+ (b"Wscr", "\u{1D4B2}"),
+ (b"Xfr", "\u{1D51B}"),
+ (b"Xi", "\u{039E}"),
+ (b"Xopf", "\u{1D54F}"),
+ (b"Xscr", "\u{1D4B3}"),
+ (b"YAcy", "\u{042F}"),
+ (b"YIcy", "\u{0407}"),
+ (b"YUcy", "\u{042E}"),
+ (b"Yacute", "\u{00DD}"),
+ (b"Ycirc", "\u{0176}"),
+ (b"Ycy", "\u{042B}"),
+ (b"Yfr", "\u{1D51C}"),
+ (b"Yopf", "\u{1D550}"),
+ (b"Yscr", "\u{1D4B4}"),
+ (b"Yuml", "\u{0178}"),
+ (b"ZHcy", "\u{0416}"),
+ (b"Zacute", "\u{0179}"),
+ (b"Zcaron", "\u{017D}"),
+ (b"Zcy", "\u{0417}"),
+ (b"Zdot", "\u{017B}"),
+ (b"ZeroWidthSpace", "\u{200B}"),
+ (b"Zeta", "\u{0396}"),
+ (b"Zfr", "\u{2128}"),
+ (b"Zopf", "\u{2124}"),
+ (b"Zscr", "\u{1D4B5}"),
+ (b"aacute", "\u{00E1}"),
+ (b"abreve", "\u{0103}"),
+ (b"ac", "\u{223E}"),
+ (b"acE", "\u{223E}\u{0333}"),
+ (b"acd", "\u{223F}"),
+ (b"acirc", "\u{00E2}"),
+ (b"acute", "\u{00B4}"),
+ (b"acy", "\u{0430}"),
+ (b"aelig", "\u{00E6}"),
+ (b"af", "\u{2061}"),
+ (b"afr", "\u{1D51E}"),
+ (b"agrave", "\u{00E0}"),
+ (b"alefsym", "\u{2135}"),
+ (b"aleph", "\u{2135}"),
+ (b"alpha", "\u{03B1}"),
+ (b"amacr", "\u{0101}"),
+ (b"amalg", "\u{2A3F}"),
+ (b"amp", "\u{0026}"),
+ (b"and", "\u{2227}"),
+ (b"andand", "\u{2A55}"),
+ (b"andd", "\u{2A5C}"),
+ (b"andslope", "\u{2A58}"),
+ (b"andv", "\u{2A5A}"),
+ (b"ang", "\u{2220}"),
+ (b"ange", "\u{29A4}"),
+ (b"angle", "\u{2220}"),
+ (b"angmsd", "\u{2221}"),
+ (b"angmsdaa", "\u{29A8}"),
+ (b"angmsdab", "\u{29A9}"),
+ (b"angmsdac", "\u{29AA}"),
+ (b"angmsdad", "\u{29AB}"),
+ (b"angmsdae", "\u{29AC}"),
+ (b"angmsdaf", "\u{29AD}"),
+ (b"angmsdag", "\u{29AE}"),
+ (b"angmsdah", "\u{29AF}"),
+ (b"angrt", "\u{221F}"),
+ (b"angrtvb", "\u{22BE}"),
+ (b"angrtvbd", "\u{299D}"),
+ (b"angsph", "\u{2222}"),
+ (b"angst", "\u{00C5}"),
+ (b"angzarr", "\u{237C}"),
+ (b"aogon", "\u{0105}"),
+ (b"aopf", "\u{1D552}"),
+ (b"ap", "\u{2248}"),
+ (b"apE", "\u{2A70}"),
+ (b"apacir", "\u{2A6F}"),
+ (b"ape", "\u{224A}"),
+ (b"apid", "\u{224B}"),
+ (b"apos", "\u{0027}"),
+ (b"approx", "\u{2248}"),
+ (b"approxeq", "\u{224A}"),
+ (b"aring", "\u{00E5}"),
+ (b"ascr", "\u{1D4B6}"),
+ (b"ast", "\u{002A}"),
+ (b"asymp", "\u{2248}"),
+ (b"asympeq", "\u{224D}"),
+ (b"atilde", "\u{00E3}"),
+ (b"auml", "\u{00E4}"),
+ (b"awconint", "\u{2233}"),
+ (b"awint", "\u{2A11}"),
+ (b"bNot", "\u{2AED}"),
+ (b"backcong", "\u{224C}"),
+ (b"backepsilon", "\u{03F6}"),
+ (b"backprime", "\u{2035}"),
+ (b"backsim", "\u{223D}"),
+ (b"backsimeq", "\u{22CD}"),
+ (b"barvee", "\u{22BD}"),
+ (b"barwed", "\u{2305}"),
+ (b"barwedge", "\u{2305}"),
+ (b"bbrk", "\u{23B5}"),
+ (b"bbrktbrk", "\u{23B6}"),
+ (b"bcong", "\u{224C}"),
+ (b"bcy", "\u{0431}"),
+ (b"bdquo", "\u{201E}"),
+ (b"becaus", "\u{2235}"),
+ (b"because", "\u{2235}"),
+ (b"bemptyv", "\u{29B0}"),
+ (b"bepsi", "\u{03F6}"),
+ (b"bernou", "\u{212C}"),
+ (b"beta", "\u{03B2}"),
+ (b"beth", "\u{2136}"),
+ (b"between", "\u{226C}"),
+ (b"bfr", "\u{1D51F}"),
+ (b"bigcap", "\u{22C2}"),
+ (b"bigcirc", "\u{25EF}"),
+ (b"bigcup", "\u{22C3}"),
+ (b"bigodot", "\u{2A00}"),
+ (b"bigoplus", "\u{2A01}"),
+ (b"bigotimes", "\u{2A02}"),
+ (b"bigsqcup", "\u{2A06}"),
+ (b"bigstar", "\u{2605}"),
+ (b"bigtriangledown", "\u{25BD}"),
+ (b"bigtriangleup", "\u{25B3}"),
+ (b"biguplus", "\u{2A04}"),
+ (b"bigvee", "\u{22C1}"),
+ (b"bigwedge", "\u{22C0}"),
+ (b"bkarow", "\u{290D}"),
+ (b"blacklozenge", "\u{29EB}"),
+ (b"blacksquare", "\u{25AA}"),
+ (b"blacktriangle", "\u{25B4}"),
+ (b"blacktriangledown", "\u{25BE}"),
+ (b"blacktriangleleft", "\u{25C2}"),
+ (b"blacktriangleright", "\u{25B8}"),
+ (b"blank", "\u{2423}"),
+ (b"blk12", "\u{2592}"),
+ (b"blk14", "\u{2591}"),
+ (b"blk34", "\u{2593}"),
+ (b"block", "\u{2588}"),
+ (b"bne", "\u{003D}\u{20E5}"),
+ (b"bnequiv", "\u{2261}\u{20E5}"),
+ (b"bnot", "\u{2310}"),
+ (b"bopf", "\u{1D553}"),
+ (b"bot", "\u{22A5}"),
+ (b"bottom", "\u{22A5}"),
+ (b"bowtie", "\u{22C8}"),
+ (b"boxDL", "\u{2557}"),
+ (b"boxDR", "\u{2554}"),
+ (b"boxDl", "\u{2556}"),
+ (b"boxDr", "\u{2553}"),
+ (b"boxH", "\u{2550}"),
+ (b"boxHD", "\u{2566}"),
+ (b"boxHU", "\u{2569}"),
+ (b"boxHd", "\u{2564}"),
+ (b"boxHu", "\u{2567}"),
+ (b"boxUL", "\u{255D}"),
+ (b"boxUR", "\u{255A}"),
+ (b"boxUl", "\u{255C}"),
+ (b"boxUr", "\u{2559}"),
+ (b"boxV", "\u{2551}"),
+ (b"boxVH", "\u{256C}"),
+ (b"boxVL", "\u{2563}"),
+ (b"boxVR", "\u{2560}"),
+ (b"boxVh", "\u{256B}"),
+ (b"boxVl", "\u{2562}"),
+ (b"boxVr", "\u{255F}"),
+ (b"boxbox", "\u{29C9}"),
+ (b"boxdL", "\u{2555}"),
+ (b"boxdR", "\u{2552}"),
+ (b"boxdl", "\u{2510}"),
+ (b"boxdr", "\u{250C}"),
+ (b"boxh", "\u{2500}"),
+ (b"boxhD", "\u{2565}"),
+ (b"boxhU", "\u{2568}"),
+ (b"boxhd", "\u{252C}"),
+ (b"boxhu", "\u{2534}"),
+ (b"boxminus", "\u{229F}"),
+ (b"boxplus", "\u{229E}"),
+ (b"boxtimes", "\u{22A0}"),
+ (b"boxuL", "\u{255B}"),
+ (b"boxuR", "\u{2558}"),
+ (b"boxul", "\u{2518}"),
+ (b"boxur", "\u{2514}"),
+ (b"boxv", "\u{2502}"),
+ (b"boxvH", "\u{256A}"),
+ (b"boxvL", "\u{2561}"),
+ (b"boxvR", "\u{255E}"),
+ (b"boxvh", "\u{253C}"),
+ (b"boxvl", "\u{2524}"),
+ (b"boxvr", "\u{251C}"),
+ (b"bprime", "\u{2035}"),
+ (b"breve", "\u{02D8}"),
+ (b"brvbar", "\u{00A6}"),
+ (b"bscr", "\u{1D4B7}"),
+ (b"bsemi", "\u{204F}"),
+ (b"bsim", "\u{223D}"),
+ (b"bsime", "\u{22CD}"),
+ (b"bsol", "\u{005C}"),
+ (b"bsolb", "\u{29C5}"),
+ (b"bsolhsub", "\u{27C8}"),
+ (b"bull", "\u{2022}"),
+ (b"bullet", "\u{2022}"),
+ (b"bump", "\u{224E}"),
+ (b"bumpE", "\u{2AAE}"),
+ (b"bumpe", "\u{224F}"),
+ (b"bumpeq", "\u{224F}"),
+ (b"cacute", "\u{0107}"),
+ (b"cap", "\u{2229}"),
+ (b"capand", "\u{2A44}"),
+ (b"capbrcup", "\u{2A49}"),
+ (b"capcap", "\u{2A4B}"),
+ (b"capcup", "\u{2A47}"),
+ (b"capdot", "\u{2A40}"),
+ (b"caps", "\u{2229}\u{FE00}"),
+ (b"caret", "\u{2041}"),
+ (b"caron", "\u{02C7}"),
+ (b"ccaps", "\u{2A4D}"),
+ (b"ccaron", "\u{010D}"),
+ (b"ccedil", "\u{00E7}"),
+ (b"ccirc", "\u{0109}"),
+ (b"ccups", "\u{2A4C}"),
+ (b"ccupssm", "\u{2A50}"),
+ (b"cdot", "\u{010B}"),
+ (b"cedil", "\u{00B8}"),
+ (b"cemptyv", "\u{29B2}"),
+ (b"cent", "\u{00A2}"),
+ (b"centerdot", "\u{00B7}"),
+ (b"cfr", "\u{1D520}"),
+ (b"chcy", "\u{0447}"),
+ (b"check", "\u{2713}"),
+ (b"checkmark", "\u{2713}"),
+ (b"chi", "\u{03C7}"),
+ (b"cir", "\u{25CB}"),
+ (b"cirE", "\u{29C3}"),
+ (b"circ", "\u{02C6}"),
+ (b"circeq", "\u{2257}"),
+ (b"circlearrowleft", "\u{21BA}"),
+ (b"circlearrowright", "\u{21BB}"),
+ (b"circledR", "\u{00AE}"),
+ (b"circledS", "\u{24C8}"),
+ (b"circledast", "\u{229B}"),
+ (b"circledcirc", "\u{229A}"),
+ (b"circleddash", "\u{229D}"),
+ (b"cire", "\u{2257}"),
+ (b"cirfnint", "\u{2A10}"),
+ (b"cirmid", "\u{2AEF}"),
+ (b"cirscir", "\u{29C2}"),
+ (b"clubs", "\u{2663}"),
+ (b"clubsuit", "\u{2663}"),
+ (b"colon", "\u{003A}"),
+ (b"colone", "\u{2254}"),
+ (b"coloneq", "\u{2254}"),
+ (b"comma", "\u{002C}"),
+ (b"commat", "\u{0040}"),
+ (b"comp", "\u{2201}"),
+ (b"compfn", "\u{2218}"),
+ (b"complement", "\u{2201}"),
+ (b"complexes", "\u{2102}"),
+ (b"cong", "\u{2245}"),
+ (b"congdot", "\u{2A6D}"),
+ (b"conint", "\u{222E}"),
+ (b"copf", "\u{1D554}"),
+ (b"coprod", "\u{2210}"),
+ (b"copy", "\u{00A9}"),
+ (b"copysr", "\u{2117}"),
+ (b"crarr", "\u{21B5}"),
+ (b"cross", "\u{2717}"),
+ (b"cscr", "\u{1D4B8}"),
+ (b"csub", "\u{2ACF}"),
+ (b"csube", "\u{2AD1}"),
+ (b"csup", "\u{2AD0}"),
+ (b"csupe", "\u{2AD2}"),
+ (b"ctdot", "\u{22EF}"),
+ (b"cudarrl", "\u{2938}"),
+ (b"cudarrr", "\u{2935}"),
+ (b"cuepr", "\u{22DE}"),
+ (b"cuesc", "\u{22DF}"),
+ (b"cularr", "\u{21B6}"),
+ (b"cularrp", "\u{293D}"),
+ (b"cup", "\u{222A}"),
+ (b"cupbrcap", "\u{2A48}"),
+ (b"cupcap", "\u{2A46}"),
+ (b"cupcup", "\u{2A4A}"),
+ (b"cupdot", "\u{228D}"),
+ (b"cupor", "\u{2A45}"),
+ (b"cups", "\u{222A}\u{FE00}"),
+ (b"curarr", "\u{21B7}"),
+ (b"curarrm", "\u{293C}"),
+ (b"curlyeqprec", "\u{22DE}"),
+ (b"curlyeqsucc", "\u{22DF}"),
+ (b"curlyvee", "\u{22CE}"),
+ (b"curlywedge", "\u{22CF}"),
+ (b"curren", "\u{00A4}"),
+ (b"curvearrowleft", "\u{21B6}"),
+ (b"curvearrowright", "\u{21B7}"),
+ (b"cuvee", "\u{22CE}"),
+ (b"cuwed", "\u{22CF}"),
+ (b"cwconint", "\u{2232}"),
+ (b"cwint", "\u{2231}"),
+ (b"cylcty", "\u{232D}"),
+ (b"dArr", "\u{21D3}"),
+ (b"dHar", "\u{2965}"),
+ (b"dagger", "\u{2020}"),
+ (b"daleth", "\u{2138}"),
+ (b"darr", "\u{2193}"),
+ (b"dash", "\u{2010}"),
+ (b"dashv", "\u{22A3}"),
+ (b"dbkarow", "\u{290F}"),
+ (b"dblac", "\u{02DD}"),
+ (b"dcaron", "\u{010F}"),
+ (b"dcy", "\u{0434}"),
+ (b"dd", "\u{2146}"),
+ (b"ddagger", "\u{2021}"),
+ (b"ddarr", "\u{21CA}"),
+ (b"ddotseq", "\u{2A77}"),
+ (b"deg", "\u{00B0}"),
+ (b"delta", "\u{03B4}"),
+ (b"demptyv", "\u{29B1}"),
+ (b"dfisht", "\u{297F}"),
+ (b"dfr", "\u{1D521}"),
+ (b"dharl", "\u{21C3}"),
+ (b"dharr", "\u{21C2}"),
+ (b"diam", "\u{22C4}"),
+ (b"diamond", "\u{22C4}"),
+ (b"diamondsuit", "\u{2666}"),
+ (b"diams", "\u{2666}"),
+ (b"die", "\u{00A8}"),
+ (b"digamma", "\u{03DD}"),
+ (b"disin", "\u{22F2}"),
+ (b"div", "\u{00F7}"),
+ (b"divide", "\u{00F7}"),
+ (b"divideontimes", "\u{22C7}"),
+ (b"divonx", "\u{22C7}"),
+ (b"djcy", "\u{0452}"),
+ (b"dlcorn", "\u{231E}"),
+ (b"dlcrop", "\u{230D}"),
+ (b"dollar", "\u{0024}"),
+ (b"dopf", "\u{1D555}"),
+ (b"dot", "\u{02D9}"),
+ (b"doteq", "\u{2250}"),
+ (b"doteqdot", "\u{2251}"),
+ (b"dotminus", "\u{2238}"),
+ (b"dotplus", "\u{2214}"),
+ (b"dotsquare", "\u{22A1}"),
+ (b"doublebarwedge", "\u{2306}"),
+ (b"downarrow", "\u{2193}"),
+ (b"downdownarrows", "\u{21CA}"),
+ (b"downharpoonleft", "\u{21C3}"),
+ (b"downharpoonright", "\u{21C2}"),
+ (b"drbkarow", "\u{2910}"),
+ (b"drcorn", "\u{231F}"),
+ (b"drcrop", "\u{230C}"),
+ (b"dscr", "\u{1D4B9}"),
+ (b"dscy", "\u{0455}"),
+ (b"dsol", "\u{29F6}"),
+ (b"dstrok", "\u{0111}"),
+ (b"dtdot", "\u{22F1}"),
+ (b"dtri", "\u{25BF}"),
+ (b"dtrif", "\u{25BE}"),
+ (b"duarr", "\u{21F5}"),
+ (b"duhar", "\u{296F}"),
+ (b"dwangle", "\u{29A6}"),
+ (b"dzcy", "\u{045F}"),
+ (b"dzigrarr", "\u{27FF}"),
+ (b"eDDot", "\u{2A77}"),
+ (b"eDot", "\u{2251}"),
+ (b"eacute", "\u{00E9}"),
+ (b"easter", "\u{2A6E}"),
+ (b"ecaron", "\u{011B}"),
+ (b"ecir", "\u{2256}"),
+ (b"ecirc", "\u{00EA}"),
+ (b"ecolon", "\u{2255}"),
+ (b"ecy", "\u{044D}"),
+ (b"edot", "\u{0117}"),
+ (b"ee", "\u{2147}"),
+ (b"efDot", "\u{2252}"),
+ (b"efr", "\u{1D522}"),
+ (b"eg", "\u{2A9A}"),
+ (b"egrave", "\u{00E8}"),
+ (b"egs", "\u{2A96}"),
+ (b"egsdot", "\u{2A98}"),
+ (b"el", "\u{2A99}"),
+ (b"elinters", "\u{23E7}"),
+ (b"ell", "\u{2113}"),
+ (b"els", "\u{2A95}"),
+ (b"elsdot", "\u{2A97}"),
+ (b"emacr", "\u{0113}"),
+ (b"empty", "\u{2205}"),
+ (b"emptyset", "\u{2205}"),
+ (b"emptyv", "\u{2205}"),
+ (b"emsp", "\u{2003}"),
+ (b"emsp13", "\u{2004}"),
+ (b"emsp14", "\u{2005}"),
+ (b"eng", "\u{014B}"),
+ (b"ensp", "\u{2002}"),
+ (b"eogon", "\u{0119}"),
+ (b"eopf", "\u{1D556}"),
+ (b"epar", "\u{22D5}"),
+ (b"eparsl", "\u{29E3}"),
+ (b"eplus", "\u{2A71}"),
+ (b"epsi", "\u{03B5}"),
+ (b"epsilon", "\u{03B5}"),
+ (b"epsiv", "\u{03F5}"),
+ (b"eqcirc", "\u{2256}"),
+ (b"eqcolon", "\u{2255}"),
+ (b"eqsim", "\u{2242}"),
+ (b"eqslantgtr", "\u{2A96}"),
+ (b"eqslantless", "\u{2A95}"),
+ (b"equals", "\u{003D}"),
+ (b"equest", "\u{225F}"),
+ (b"equiv", "\u{2261}"),
+ (b"equivDD", "\u{2A78}"),
+ (b"eqvparsl", "\u{29E5}"),
+ (b"erDot", "\u{2253}"),
+ (b"erarr", "\u{2971}"),
+ (b"escr", "\u{212F}"),
+ (b"esdot", "\u{2250}"),
+ (b"esim", "\u{2242}"),
+ (b"eta", "\u{03B7}"),
+ (b"eth", "\u{00F0}"),
+ (b"euml", "\u{00EB}"),
+ (b"euro", "\u{20AC}"),
+ (b"excl", "\u{0021}"),
+ (b"exist", "\u{2203}"),
+ (b"expectation", "\u{2130}"),
+ (b"exponentiale", "\u{2147}"),
+ (b"fallingdotseq", "\u{2252}"),
+ (b"fcy", "\u{0444}"),
+ (b"female", "\u{2640}"),
+ (b"ffilig", "\u{FB03}"),
+ (b"fflig", "\u{FB00}"),
+ (b"ffllig", "\u{FB04}"),
+ (b"ffr", "\u{1D523}"),
+ (b"filig", "\u{FB01}"),
+ (b"fjlig", "\u{0066}\u{006A}"),
+ (b"flat", "\u{266D}"),
+ (b"fllig", "\u{FB02}"),
+ (b"fltns", "\u{25B1}"),
+ (b"fnof", "\u{0192}"),
+ (b"fopf", "\u{1D557}"),
+ (b"forall", "\u{2200}"),
+ (b"fork", "\u{22D4}"),
+ (b"forkv", "\u{2AD9}"),
+ (b"fpartint", "\u{2A0D}"),
+ (b"frac12", "\u{00BD}"),
+ (b"frac13", "\u{2153}"),
+ (b"frac14", "\u{00BC}"),
+ (b"frac15", "\u{2155}"),
+ (b"frac16", "\u{2159}"),
+ (b"frac18", "\u{215B}"),
+ (b"frac23", "\u{2154}"),
+ (b"frac25", "\u{2156}"),
+ (b"frac34", "\u{00BE}"),
+ (b"frac35", "\u{2157}"),
+ (b"frac38", "\u{215C}"),
+ (b"frac45", "\u{2158}"),
+ (b"frac56", "\u{215A}"),
+ (b"frac58", "\u{215D}"),
+ (b"frac78", "\u{215E}"),
+ (b"frasl", "\u{2044}"),
+ (b"frown", "\u{2322}"),
+ (b"fscr", "\u{1D4BB}"),
+ (b"gE", "\u{2267}"),
+ (b"gEl", "\u{2A8C}"),
+ (b"gacute", "\u{01F5}"),
+ (b"gamma", "\u{03B3}"),
+ (b"gammad", "\u{03DD}"),
+ (b"gap", "\u{2A86}"),
+ (b"gbreve", "\u{011F}"),
+ (b"gcirc", "\u{011D}"),
+ (b"gcy", "\u{0433}"),
+ (b"gdot", "\u{0121}"),
+ (b"ge", "\u{2265}"),
+ (b"gel", "\u{22DB}"),
+ (b"geq", "\u{2265}"),
+ (b"geqq", "\u{2267}"),
+ (b"geqslant", "\u{2A7E}"),
+ (b"ges", "\u{2A7E}"),
+ (b"gescc", "\u{2AA9}"),
+ (b"gesdot", "\u{2A80}"),
+ (b"gesdoto", "\u{2A82}"),
+ (b"gesdotol", "\u{2A84}"),
+ (b"gesl", "\u{22DB}\u{FE00}"),
+ (b"gesles", "\u{2A94}"),
+ (b"gfr", "\u{1D524}"),
+ (b"gg", "\u{226B}"),
+ (b"ggg", "\u{22D9}"),
+ (b"gimel", "\u{2137}"),
+ (b"gjcy", "\u{0453}"),
+ (b"gl", "\u{2277}"),
+ (b"glE", "\u{2A92}"),
+ (b"gla", "\u{2AA5}"),
+ (b"glj", "\u{2AA4}"),
+ (b"gnE", "\u{2269}"),
+ (b"gnap", "\u{2A8A}"),
+ (b"gnapprox", "\u{2A8A}"),
+ (b"gne", "\u{2A88}"),
+ (b"gneq", "\u{2A88}"),
+ (b"gneqq", "\u{2269}"),
+ (b"gnsim", "\u{22E7}"),
+ (b"gopf", "\u{1D558}"),
+ (b"grave", "\u{0060}"),
+ (b"gscr", "\u{210A}"),
+ (b"gsim", "\u{2273}"),
+ (b"gsime", "\u{2A8E}"),
+ (b"gsiml", "\u{2A90}"),
+ (b"gt", "\u{003E}"),
+ (b"gtcc", "\u{2AA7}"),
+ (b"gtcir", "\u{2A7A}"),
+ (b"gtdot", "\u{22D7}"),
+ (b"gtlPar", "\u{2995}"),
+ (b"gtquest", "\u{2A7C}"),
+ (b"gtrapprox", "\u{2A86}"),
+ (b"gtrarr", "\u{2978}"),
+ (b"gtrdot", "\u{22D7}"),
+ (b"gtreqless", "\u{22DB}"),
+ (b"gtreqqless", "\u{2A8C}"),
+ (b"gtrless", "\u{2277}"),
+ (b"gtrsim", "\u{2273}"),
+ (b"gvertneqq", "\u{2269}\u{FE00}"),
+ (b"gvnE", "\u{2269}\u{FE00}"),
+ (b"hArr", "\u{21D4}"),
+ (b"hairsp", "\u{200A}"),
+ (b"half", "\u{00BD}"),
+ (b"hamilt", "\u{210B}"),
+ (b"hardcy", "\u{044A}"),
+ (b"harr", "\u{2194}"),
+ (b"harrcir", "\u{2948}"),
+ (b"harrw", "\u{21AD}"),
+ (b"hbar", "\u{210F}"),
+ (b"hcirc", "\u{0125}"),
+ (b"hearts", "\u{2665}"),
+ (b"heartsuit", "\u{2665}"),
+ (b"hellip", "\u{2026}"),
+ (b"hercon", "\u{22B9}"),
+ (b"hfr", "\u{1D525}"),
+ (b"hksearow", "\u{2925}"),
+ (b"hkswarow", "\u{2926}"),
+ (b"hoarr", "\u{21FF}"),
+ (b"homtht", "\u{223B}"),
+ (b"hookleftarrow", "\u{21A9}"),
+ (b"hookrightarrow", "\u{21AA}"),
+ (b"hopf", "\u{1D559}"),
+ (b"horbar", "\u{2015}"),
+ (b"hscr", "\u{1D4BD}"),
+ (b"hslash", "\u{210F}"),
+ (b"hstrok", "\u{0127}"),
+ (b"hybull", "\u{2043}"),
+ (b"hyphen", "\u{2010}"),
+ (b"iacute", "\u{00ED}"),
+ (b"ic", "\u{2063}"),
+ (b"icirc", "\u{00EE}"),
+ (b"icy", "\u{0438}"),
+ (b"iecy", "\u{0435}"),
+ (b"iexcl", "\u{00A1}"),
+ (b"iff", "\u{21D4}"),
+ (b"ifr", "\u{1D526}"),
+ (b"igrave", "\u{00EC}"),
+ (b"ii", "\u{2148}"),
+ (b"iiiint", "\u{2A0C}"),
+ (b"iiint", "\u{222D}"),
+ (b"iinfin", "\u{29DC}"),
+ (b"iiota", "\u{2129}"),
+ (b"ijlig", "\u{0133}"),
+ (b"imacr", "\u{012B}"),
+ (b"image", "\u{2111}"),
+ (b"imagline", "\u{2110}"),
+ (b"imagpart", "\u{2111}"),
+ (b"imath", "\u{0131}"),
+ (b"imof", "\u{22B7}"),
+ (b"imped", "\u{01B5}"),
+ (b"in", "\u{2208}"),
+ (b"incare", "\u{2105}"),
+ (b"infin", "\u{221E}"),
+ (b"infintie", "\u{29DD}"),
+ (b"inodot", "\u{0131}"),
+ (b"int", "\u{222B}"),
+ (b"intcal", "\u{22BA}"),
+ (b"integers", "\u{2124}"),
+ (b"intercal", "\u{22BA}"),
+ (b"intlarhk", "\u{2A17}"),
+ (b"intprod", "\u{2A3C}"),
+ (b"iocy", "\u{0451}"),
+ (b"iogon", "\u{012F}"),
+ (b"iopf", "\u{1D55A}"),
+ (b"iota", "\u{03B9}"),
+ (b"iprod", "\u{2A3C}"),
+ (b"iquest", "\u{00BF}"),
+ (b"iscr", "\u{1D4BE}"),
+ (b"isin", "\u{2208}"),
+ (b"isinE", "\u{22F9}"),
+ (b"isindot", "\u{22F5}"),
+ (b"isins", "\u{22F4}"),
+ (b"isinsv", "\u{22F3}"),
+ (b"isinv", "\u{2208}"),
+ (b"it", "\u{2062}"),
+ (b"itilde", "\u{0129}"),
+ (b"iukcy", "\u{0456}"),
+ (b"iuml", "\u{00EF}"),
+ (b"jcirc", "\u{0135}"),
+ (b"jcy", "\u{0439}"),
+ (b"jfr", "\u{1D527}"),
+ (b"jmath", "\u{0237}"),
+ (b"jopf", "\u{1D55B}"),
+ (b"jscr", "\u{1D4BF}"),
+ (b"jsercy", "\u{0458}"),
+ (b"jukcy", "\u{0454}"),
+ (b"kappa", "\u{03BA}"),
+ (b"kappav", "\u{03F0}"),
+ (b"kcedil", "\u{0137}"),
+ (b"kcy", "\u{043A}"),
+ (b"kfr", "\u{1D528}"),
+ (b"kgreen", "\u{0138}"),
+ (b"khcy", "\u{0445}"),
+ (b"kjcy", "\u{045C}"),
+ (b"kopf", "\u{1D55C}"),
+ (b"kscr", "\u{1D4C0}"),
+ (b"lAarr", "\u{21DA}"),
+ (b"lArr", "\u{21D0}"),
+ (b"lAtail", "\u{291B}"),
+ (b"lBarr", "\u{290E}"),
+ (b"lE", "\u{2266}"),
+ (b"lEg", "\u{2A8B}"),
+ (b"lHar", "\u{2962}"),
+ (b"lacute", "\u{013A}"),
+ (b"laemptyv", "\u{29B4}"),
+ (b"lagran", "\u{2112}"),
+ (b"lambda", "\u{03BB}"),
+ (b"lang", "\u{27E8}"),
+ (b"langd", "\u{2991}"),
+ (b"langle", "\u{27E8}"),
+ (b"lap", "\u{2A85}"),
+ (b"laquo", "\u{00AB}"),
+ (b"larr", "\u{2190}"),
+ (b"larrb", "\u{21E4}"),
+ (b"larrbfs", "\u{291F}"),
+ (b"larrfs", "\u{291D}"),
+ (b"larrhk", "\u{21A9}"),
+ (b"larrlp", "\u{21AB}"),
+ (b"larrpl", "\u{2939}"),
+ (b"larrsim", "\u{2973}"),
+ (b"larrtl", "\u{21A2}"),
+ (b"lat", "\u{2AAB}"),
+ (b"latail", "\u{2919}"),
+ (b"late", "\u{2AAD}"),
+ (b"lates", "\u{2AAD}\u{FE00}"),
+ (b"lbarr", "\u{290C}"),
+ (b"lbbrk", "\u{2772}"),
+ (b"lbrace", "\u{007B}"),
+ (b"lbrack", "\u{005B}"),
+ (b"lbrke", "\u{298B}"),
+ (b"lbrksld", "\u{298F}"),
+ (b"lbrkslu", "\u{298D}"),
+ (b"lcaron", "\u{013E}"),
+ (b"lcedil", "\u{013C}"),
+ (b"lceil", "\u{2308}"),
+ (b"lcub", "\u{007B}"),
+ (b"lcy", "\u{043B}"),
+ (b"ldca", "\u{2936}"),
+ (b"ldquo", "\u{201C}"),
+ (b"ldquor", "\u{201E}"),
+ (b"ldrdhar", "\u{2967}"),
+ (b"ldrushar", "\u{294B}"),
+ (b"ldsh", "\u{21B2}"),
+ (b"le", "\u{2264}"),
+ (b"leftarrow", "\u{2190}"),
+ (b"leftarrowtail", "\u{21A2}"),
+ (b"leftharpoondown", "\u{21BD}"),
+ (b"leftharpoonup", "\u{21BC}"),
+ (b"leftleftarrows", "\u{21C7}"),
+ (b"leftrightarrow", "\u{2194}"),
+ (b"leftrightarrows", "\u{21C6}"),
+ (b"leftrightharpoons", "\u{21CB}"),
+ (b"leftrightsquigarrow", "\u{21AD}"),
+ (b"leftthreetimes", "\u{22CB}"),
+ (b"leg", "\u{22DA}"),
+ (b"leq", "\u{2264}"),
+ (b"leqq", "\u{2266}"),
+ (b"leqslant", "\u{2A7D}"),
+ (b"les", "\u{2A7D}"),
+ (b"lescc", "\u{2AA8}"),
+ (b"lesdot", "\u{2A7F}"),
+ (b"lesdoto", "\u{2A81}"),
+ (b"lesdotor", "\u{2A83}"),
+ (b"lesg", "\u{22DA}\u{FE00}"),
+ (b"lesges", "\u{2A93}"),
+ (b"lessapprox", "\u{2A85}"),
+ (b"lessdot", "\u{22D6}"),
+ (b"lesseqgtr", "\u{22DA}"),
+ (b"lesseqqgtr", "\u{2A8B}"),
+ (b"lessgtr", "\u{2276}"),
+ (b"lesssim", "\u{2272}"),
+ (b"lfisht", "\u{297C}"),
+ (b"lfloor", "\u{230A}"),
+ (b"lfr", "\u{1D529}"),
+ (b"lg", "\u{2276}"),
+ (b"lgE", "\u{2A91}"),
+ (b"lhard", "\u{21BD}"),
+ (b"lharu", "\u{21BC}"),
+ (b"lharul", "\u{296A}"),
+ (b"lhblk", "\u{2584}"),
+ (b"ljcy", "\u{0459}"),
+ (b"ll", "\u{226A}"),
+ (b"llarr", "\u{21C7}"),
+ (b"llcorner", "\u{231E}"),
+ (b"llhard", "\u{296B}"),
+ (b"lltri", "\u{25FA}"),
+ (b"lmidot", "\u{0140}"),
+ (b"lmoust", "\u{23B0}"),
+ (b"lmoustache", "\u{23B0}"),
+ (b"lnE", "\u{2268}"),
+ (b"lnap", "\u{2A89}"),
+ (b"lnapprox", "\u{2A89}"),
+ (b"lne", "\u{2A87}"),
+ (b"lneq", "\u{2A87}"),
+ (b"lneqq", "\u{2268}"),
+ (b"lnsim", "\u{22E6}"),
+ (b"loang", "\u{27EC}"),
+ (b"loarr", "\u{21FD}"),
+ (b"lobrk", "\u{27E6}"),
+ (b"longleftarrow", "\u{27F5}"),
+ (b"longleftrightarrow", "\u{27F7}"),
+ (b"longmapsto", "\u{27FC}"),
+ (b"longrightarrow", "\u{27F6}"),
+ (b"looparrowleft", "\u{21AB}"),
+ (b"looparrowright", "\u{21AC}"),
+ (b"lopar", "\u{2985}"),
+ (b"lopf", "\u{1D55D}"),
+ (b"loplus", "\u{2A2D}"),
+ (b"lotimes", "\u{2A34}"),
+ (b"lowast", "\u{2217}"),
+ (b"lowbar", "\u{005F}"),
+ (b"loz", "\u{25CA}"),
+ (b"lozenge", "\u{25CA}"),
+ (b"lozf", "\u{29EB}"),
+ (b"lpar", "\u{0028}"),
+ (b"lparlt", "\u{2993}"),
+ (b"lrarr", "\u{21C6}"),
+ (b"lrcorner", "\u{231F}"),
+ (b"lrhar", "\u{21CB}"),
+ (b"lrhard", "\u{296D}"),
+ (b"lrm", "\u{200E}"),
+ (b"lrtri", "\u{22BF}"),
+ (b"lsaquo", "\u{2039}"),
+ (b"lscr", "\u{1D4C1}"),
+ (b"lsh", "\u{21B0}"),
+ (b"lsim", "\u{2272}"),
+ (b"lsime", "\u{2A8D}"),
+ (b"lsimg", "\u{2A8F}"),
+ (b"lsqb", "\u{005B}"),
+ (b"lsquo", "\u{2018}"),
+ (b"lsquor", "\u{201A}"),
+ (b"lstrok", "\u{0142}"),
+ (b"lt", "\u{003C}"),
+ (b"ltcc", "\u{2AA6}"),
+ (b"ltcir", "\u{2A79}"),
+ (b"ltdot", "\u{22D6}"),
+ (b"lthree", "\u{22CB}"),
+ (b"ltimes", "\u{22C9}"),
+ (b"ltlarr", "\u{2976}"),
+ (b"ltquest", "\u{2A7B}"),
+ (b"ltrPar", "\u{2996}"),
+ (b"ltri", "\u{25C3}"),
+ (b"ltrie", "\u{22B4}"),
+ (b"ltrif", "\u{25C2}"),
+ (b"lurdshar", "\u{294A}"),
+ (b"luruhar", "\u{2966}"),
+ (b"lvertneqq", "\u{2268}\u{FE00}"),
+ (b"lvnE", "\u{2268}\u{FE00}"),
+ (b"mDDot", "\u{223A}"),
+ (b"macr", "\u{00AF}"),
+ (b"male", "\u{2642}"),
+ (b"malt", "\u{2720}"),
+ (b"maltese", "\u{2720}"),
+ (b"map", "\u{21A6}"),
+ (b"mapsto", "\u{21A6}"),
+ (b"mapstodown", "\u{21A7}"),
+ (b"mapstoleft", "\u{21A4}"),
+ (b"mapstoup", "\u{21A5}"),
+ (b"marker", "\u{25AE}"),
+ (b"mcomma", "\u{2A29}"),
+ (b"mcy", "\u{043C}"),
+ (b"mdash", "\u{2014}"),
+ (b"measuredangle", "\u{2221}"),
+ (b"mfr", "\u{1D52A}"),
+ (b"mho", "\u{2127}"),
+ (b"micro", "\u{00B5}"),
+ (b"mid", "\u{2223}"),
+ (b"midast", "\u{002A}"),
+ (b"midcir", "\u{2AF0}"),
+ (b"middot", "\u{00B7}"),
+ (b"minus", "\u{2212}"),
+ (b"minusb", "\u{229F}"),
+ (b"minusd", "\u{2238}"),
+ (b"minusdu", "\u{2A2A}"),
+ (b"mlcp", "\u{2ADB}"),
+ (b"mldr", "\u{2026}"),
+ (b"mnplus", "\u{2213}"),
+ (b"models", "\u{22A7}"),
+ (b"mopf", "\u{1D55E}"),
+ (b"mp", "\u{2213}"),
+ (b"mscr", "\u{1D4C2}"),
+ (b"mstpos", "\u{223E}"),
+ (b"mu", "\u{03BC}"),
+ (b"multimap", "\u{22B8}"),
+ (b"mumap", "\u{22B8}"),
+ (b"nGg", "\u{22D9}\u{0338}"),
+ (b"nGt", "\u{226B}\u{20D2}"),
+ (b"nGtv", "\u{226B}\u{0338}"),
+ (b"nLeftarrow", "\u{21CD}"),
+ (b"nLeftrightarrow", "\u{21CE}"),
+ (b"nLl", "\u{22D8}\u{0338}"),
+ (b"nLt", "\u{226A}\u{20D2}"),
+ (b"nLtv", "\u{226A}\u{0338}"),
+ (b"nRightarrow", "\u{21CF}"),
+ (b"nVDash", "\u{22AF}"),
+ (b"nVdash", "\u{22AE}"),
+ (b"nabla", "\u{2207}"),
+ (b"nacute", "\u{0144}"),
+ (b"nang", "\u{2220}\u{20D2}"),
+ (b"nap", "\u{2249}"),
+ (b"napE", "\u{2A70}\u{0338}"),
+ (b"napid", "\u{224B}\u{0338}"),
+ (b"napos", "\u{0149}"),
+ (b"napprox", "\u{2249}"),
+ (b"natur", "\u{266E}"),
+ (b"natural", "\u{266E}"),
+ (b"naturals", "\u{2115}"),
+ (b"nbsp", "\u{00A0}"),
+ (b"nbump", "\u{224E}\u{0338}"),
+ (b"nbumpe", "\u{224F}\u{0338}"),
+ (b"ncap", "\u{2A43}"),
+ (b"ncaron", "\u{0148}"),
+ (b"ncedil", "\u{0146}"),
+ (b"ncong", "\u{2247}"),
+ (b"ncongdot", "\u{2A6D}\u{0338}"),
+ (b"ncup", "\u{2A42}"),
+ (b"ncy", "\u{043D}"),
+ (b"ndash", "\u{2013}"),
+ (b"ne", "\u{2260}"),
+ (b"neArr", "\u{21D7}"),
+ (b"nearhk", "\u{2924}"),
+ (b"nearr", "\u{2197}"),
+ (b"nearrow", "\u{2197}"),
+ (b"nedot", "\u{2250}\u{0338}"),
+ (b"nequiv", "\u{2262}"),
+ (b"nesear", "\u{2928}"),
+ (b"nesim", "\u{2242}\u{0338}"),
+ (b"nexist", "\u{2204}"),
+ (b"nexists", "\u{2204}"),
+ (b"nfr", "\u{1D52B}"),
+ (b"ngE", "\u{2267}\u{0338}"),
+ (b"nge", "\u{2271}"),
+ (b"ngeq", "\u{2271}"),
+ (b"ngeqq", "\u{2267}\u{0338}"),
+ (b"ngeqslant", "\u{2A7E}\u{0338}"),
+ (b"nges", "\u{2A7E}\u{0338}"),
+ (b"ngsim", "\u{2275}"),
+ (b"ngt", "\u{226F}"),
+ (b"ngtr", "\u{226F}"),
+ (b"nhArr", "\u{21CE}"),
+ (b"nharr", "\u{21AE}"),
+ (b"nhpar", "\u{2AF2}"),
+ (b"ni", "\u{220B}"),
+ (b"nis", "\u{22FC}"),
+ (b"nisd", "\u{22FA}"),
+ (b"niv", "\u{220B}"),
+ (b"njcy", "\u{045A}"),
+ (b"nlArr", "\u{21CD}"),
+ (b"nlE", "\u{2266}\u{0338}"),
+ (b"nlarr", "\u{219A}"),
+ (b"nldr", "\u{2025}"),
+ (b"nle", "\u{2270}"),
+ (b"nleftarrow", "\u{219A}"),
+ (b"nleftrightarrow", "\u{21AE}"),
+ (b"nleq", "\u{2270}"),
+ (b"nleqq", "\u{2266}\u{0338}"),
+ (b"nleqslant", "\u{2A7D}\u{0338}"),
+ (b"nles", "\u{2A7D}\u{0338}"),
+ (b"nless", "\u{226E}"),
+ (b"nlsim", "\u{2274}"),
+ (b"nlt", "\u{226E}"),
+ (b"nltri", "\u{22EA}"),
+ (b"nltrie", "\u{22EC}"),
+ (b"nmid", "\u{2224}"),
+ (b"nopf", "\u{1D55F}"),
+ (b"not", "\u{00AC}"),
+ (b"notin", "\u{2209}"),
+ (b"notinE", "\u{22F9}\u{0338}"),
+ (b"notindot", "\u{22F5}\u{0338}"),
+ (b"notinva", "\u{2209}"),
+ (b"notinvb", "\u{22F7}"),
+ (b"notinvc", "\u{22F6}"),
+ (b"notni", "\u{220C}"),
+ (b"notniva", "\u{220C}"),
+ (b"notnivb", "\u{22FE}"),
+ (b"notnivc", "\u{22FD}"),
+ (b"npar", "\u{2226}"),
+ (b"nparallel", "\u{2226}"),
+ (b"nparsl", "\u{2AFD}\u{20E5}"),
+ (b"npart", "\u{2202}\u{0338}"),
+ (b"npolint", "\u{2A14}"),
+ (b"npr", "\u{2280}"),
+ (b"nprcue", "\u{22E0}"),
+ (b"npre", "\u{2AAF}\u{0338}"),
+ (b"nprec", "\u{2280}"),
+ (b"npreceq", "\u{2AAF}\u{0338}"),
+ (b"nrArr", "\u{21CF}"),
+ (b"nrarr", "\u{219B}"),
+ (b"nrarrc", "\u{2933}\u{0338}"),
+ (b"nrarrw", "\u{219D}\u{0338}"),
+ (b"nrightarrow", "\u{219B}"),
+ (b"nrtri", "\u{22EB}"),
+ (b"nrtrie", "\u{22ED}"),
+ (b"nsc", "\u{2281}"),
+ (b"nsccue", "\u{22E1}"),
+ (b"nsce", "\u{2AB0}\u{0338}"),
+ (b"nscr", "\u{1D4C3}"),
+ (b"nshortmid", "\u{2224}"),
+ (b"nshortparallel", "\u{2226}"),
+ (b"nsim", "\u{2241}"),
+ (b"nsime", "\u{2244}"),
+ (b"nsimeq", "\u{2244}"),
+ (b"nsmid", "\u{2224}"),
+ (b"nspar", "\u{2226}"),
+ (b"nsqsube", "\u{22E2}"),
+ (b"nsqsupe", "\u{22E3}"),
+ (b"nsub", "\u{2284}"),
+ (b"nsubE", "\u{2AC5}\u{0338}"),
+ (b"nsube", "\u{2288}"),
+ (b"nsubset", "\u{2282}\u{20D2}"),
+ (b"nsubseteq", "\u{2288}"),
+ (b"nsubseteqq", "\u{2AC5}\u{0338}"),
+ (b"nsucc", "\u{2281}"),
+ (b"nsucceq", "\u{2AB0}\u{0338}"),
+ (b"nsup", "\u{2285}"),
+ (b"nsupE", "\u{2AC6}\u{0338}"),
+ (b"nsupe", "\u{2289}"),
+ (b"nsupset", "\u{2283}\u{20D2}"),
+ (b"nsupseteq", "\u{2289}"),
+ (b"nsupseteqq", "\u{2AC6}\u{0338}"),
+ (b"ntgl", "\u{2279}"),
+ (b"ntilde", "\u{00F1}"),
+ (b"ntlg", "\u{2278}"),
+ (b"ntriangleleft", "\u{22EA}"),
+ (b"ntrianglelefteq", "\u{22EC}"),
+ (b"ntriangleright", "\u{22EB}"),
+ (b"ntrianglerighteq", "\u{22ED}"),
+ (b"nu", "\u{03BD}"),
+ (b"num", "\u{0023}"),
+ (b"numero", "\u{2116}"),
+ (b"numsp", "\u{2007}"),
+ (b"nvDash", "\u{22AD}"),
+ (b"nvHarr", "\u{2904}"),
+ (b"nvap", "\u{224D}\u{20D2}"),
+ (b"nvdash", "\u{22AC}"),
+ (b"nvge", "\u{2265}\u{20D2}"),
+ (b"nvgt", "\u{003E}\u{20D2}"),
+ (b"nvinfin", "\u{29DE}"),
+ (b"nvlArr", "\u{2902}"),
+ (b"nvle", "\u{2264}\u{20D2}"),
+ (b"nvlt", "\u{003C}\u{20D2}"),
+ (b"nvltrie", "\u{22B4}\u{20D2}"),
+ (b"nvrArr", "\u{2903}"),
+ (b"nvrtrie", "\u{22B5}\u{20D2}"),
+ (b"nvsim", "\u{223C}\u{20D2}"),
+ (b"nwArr", "\u{21D6}"),
+ (b"nwarhk", "\u{2923}"),
+ (b"nwarr", "\u{2196}"),
+ (b"nwarrow", "\u{2196}"),
+ (b"nwnear", "\u{2927}"),
+ (b"oS", "\u{24C8}"),
+ (b"oacute", "\u{00F3}"),
+ (b"oast", "\u{229B}"),
+ (b"ocir", "\u{229A}"),
+ (b"ocirc", "\u{00F4}"),
+ (b"ocy", "\u{043E}"),
+ (b"odash", "\u{229D}"),
+ (b"odblac", "\u{0151}"),
+ (b"odiv", "\u{2A38}"),
+ (b"odot", "\u{2299}"),
+ (b"odsold", "\u{29BC}"),
+ (b"oelig", "\u{0153}"),
+ (b"ofcir", "\u{29BF}"),
+ (b"ofr", "\u{1D52C}"),
+ (b"ogon", "\u{02DB}"),
+ (b"ograve", "\u{00F2}"),
+ (b"ogt", "\u{29C1}"),
+ (b"ohbar", "\u{29B5}"),
+ (b"ohm", "\u{03A9}"),
+ (b"oint", "\u{222E}"),
+ (b"olarr", "\u{21BA}"),
+ (b"olcir", "\u{29BE}"),
+ (b"olcross", "\u{29BB}"),
+ (b"oline", "\u{203E}"),
+ (b"olt", "\u{29C0}"),
+ (b"omacr", "\u{014D}"),
+ (b"omega", "\u{03C9}"),
+ (b"omicron", "\u{03BF}"),
+ (b"omid", "\u{29B6}"),
+ (b"ominus", "\u{2296}"),
+ (b"oopf", "\u{1D560}"),
+ (b"opar", "\u{29B7}"),
+ (b"operp", "\u{29B9}"),
+ (b"oplus", "\u{2295}"),
+ (b"or", "\u{2228}"),
+ (b"orarr", "\u{21BB}"),
+ (b"ord", "\u{2A5D}"),
+ (b"order", "\u{2134}"),
+ (b"orderof", "\u{2134}"),
+ (b"ordf", "\u{00AA}"),
+ (b"ordm", "\u{00BA}"),
+ (b"origof", "\u{22B6}"),
+ (b"oror", "\u{2A56}"),
+ (b"orslope", "\u{2A57}"),
+ (b"orv", "\u{2A5B}"),
+ (b"oscr", "\u{2134}"),
+ (b"oslash", "\u{00F8}"),
+ (b"osol", "\u{2298}"),
+ (b"otilde", "\u{00F5}"),
+ (b"otimes", "\u{2297}"),
+ (b"otimesas", "\u{2A36}"),
+ (b"ouml", "\u{00F6}"),
+ (b"ovbar", "\u{233D}"),
+ (b"par", "\u{2225}"),
+ (b"para", "\u{00B6}"),
+ (b"parallel", "\u{2225}"),
+ (b"parsim", "\u{2AF3}"),
+ (b"parsl", "\u{2AFD}"),
+ (b"part", "\u{2202}"),
+ (b"pcy", "\u{043F}"),
+ (b"percnt", "\u{0025}"),
+ (b"period", "\u{002E}"),
+ (b"permil", "\u{2030}"),
+ (b"perp", "\u{22A5}"),
+ (b"pertenk", "\u{2031}"),
+ (b"pfr", "\u{1D52D}"),
+ (b"phi", "\u{03C6}"),
+ (b"phiv", "\u{03D5}"),
+ (b"phmmat", "\u{2133}"),
+ (b"phone", "\u{260E}"),
+ (b"pi", "\u{03C0}"),
+ (b"pitchfork", "\u{22D4}"),
+ (b"piv", "\u{03D6}"),
+ (b"planck", "\u{210F}"),
+ (b"planckh", "\u{210E}"),
+ (b"plankv", "\u{210F}"),
+ (b"plus", "\u{002B}"),
+ (b"plusacir", "\u{2A23}"),
+ (b"plusb", "\u{229E}"),
+ (b"pluscir", "\u{2A22}"),
+ (b"plusdo", "\u{2214}"),
+ (b"plusdu", "\u{2A25}"),
+ (b"pluse", "\u{2A72}"),
+ (b"plusmn", "\u{00B1}"),
+ (b"plussim", "\u{2A26}"),
+ (b"plustwo", "\u{2A27}"),
+ (b"pm", "\u{00B1}"),
+ (b"pointint", "\u{2A15}"),
+ (b"popf", "\u{1D561}"),
+ (b"pound", "\u{00A3}"),
+ (b"pr", "\u{227A}"),
+ (b"prE", "\u{2AB3}"),
+ (b"prap", "\u{2AB7}"),
+ (b"prcue", "\u{227C}"),
+ (b"pre", "\u{2AAF}"),
+ (b"prec", "\u{227A}"),
+ (b"precapprox", "\u{2AB7}"),
+ (b"preccurlyeq", "\u{227C}"),
+ (b"preceq", "\u{2AAF}"),
+ (b"precnapprox", "\u{2AB9}"),
+ (b"precneqq", "\u{2AB5}"),
+ (b"precnsim", "\u{22E8}"),
+ (b"precsim", "\u{227E}"),
+ (b"prime", "\u{2032}"),
+ (b"primes", "\u{2119}"),
+ (b"prnE", "\u{2AB5}"),
+ (b"prnap", "\u{2AB9}"),
+ (b"prnsim", "\u{22E8}"),
+ (b"prod", "\u{220F}"),
+ (b"profalar", "\u{232E}"),
+ (b"profline", "\u{2312}"),
+ (b"profsurf", "\u{2313}"),
+ (b"prop", "\u{221D}"),
+ (b"propto", "\u{221D}"),
+ (b"prsim", "\u{227E}"),
+ (b"prurel", "\u{22B0}"),
+ (b"pscr", "\u{1D4C5}"),
+ (b"psi", "\u{03C8}"),
+ (b"puncsp", "\u{2008}"),
+ (b"qfr", "\u{1D52E}"),
+ (b"qint", "\u{2A0C}"),
+ (b"qopf", "\u{1D562}"),
+ (b"qprime", "\u{2057}"),
+ (b"qscr", "\u{1D4C6}"),
+ (b"quaternions", "\u{210D}"),
+ (b"quatint", "\u{2A16}"),
+ (b"quest", "\u{003F}"),
+ (b"questeq", "\u{225F}"),
+ (b"quot", "\u{0022}"),
+ (b"rAarr", "\u{21DB}"),
+ (b"rArr", "\u{21D2}"),
+ (b"rAtail", "\u{291C}"),
+ (b"rBarr", "\u{290F}"),
+ (b"rHar", "\u{2964}"),
+ (b"race", "\u{223D}\u{0331}"),
+ (b"racute", "\u{0155}"),
+ (b"radic", "\u{221A}"),
+ (b"raemptyv", "\u{29B3}"),
+ (b"rang", "\u{27E9}"),
+ (b"rangd", "\u{2992}"),
+ (b"range", "\u{29A5}"),
+ (b"rangle", "\u{27E9}"),
+ (b"raquo", "\u{00BB}"),
+ (b"rarr", "\u{2192}"),
+ (b"rarrap", "\u{2975}"),
+ (b"rarrb", "\u{21E5}"),
+ (b"rarrbfs", "\u{2920}"),
+ (b"rarrc", "\u{2933}"),
+ (b"rarrfs", "\u{291E}"),
+ (b"rarrhk", "\u{21AA}"),
+ (b"rarrlp", "\u{21AC}"),
+ (b"rarrpl", "\u{2945}"),
+ (b"rarrsim", "\u{2974}"),
+ (b"rarrtl", "\u{21A3}"),
+ (b"rarrw", "\u{219D}"),
+ (b"ratail", "\u{291A}"),
+ (b"ratio", "\u{2236}"),
+ (b"rationals", "\u{211A}"),
+ (b"rbarr", "\u{290D}"),
+ (b"rbbrk", "\u{2773}"),
+ (b"rbrace", "\u{007D}"),
+ (b"rbrack", "\u{005D}"),
+ (b"rbrke", "\u{298C}"),
+ (b"rbrksld", "\u{298E}"),
+ (b"rbrkslu", "\u{2990}"),
+ (b"rcaron", "\u{0159}"),
+ (b"rcedil", "\u{0157}"),
+ (b"rceil", "\u{2309}"),
+ (b"rcub", "\u{007D}"),
+ (b"rcy", "\u{0440}"),
+ (b"rdca", "\u{2937}"),
+ (b"rdldhar", "\u{2969}"),
+ (b"rdquo", "\u{201D}"),
+ (b"rdquor", "\u{201D}"),
+ (b"rdsh", "\u{21B3}"),
+ (b"real", "\u{211C}"),
+ (b"realine", "\u{211B}"),
+ (b"realpart", "\u{211C}"),
+ (b"reals", "\u{211D}"),
+ (b"rect", "\u{25AD}"),
+ (b"reg", "\u{00AE}"),
+ (b"rfisht", "\u{297D}"),
+ (b"rfloor", "\u{230B}"),
+ (b"rfr", "\u{1D52F}"),
+ (b"rhard", "\u{21C1}"),
+ (b"rharu", "\u{21C0}"),
+ (b"rharul", "\u{296C}"),
+ (b"rho", "\u{03C1}"),
+ (b"rhov", "\u{03F1}"),
+ (b"rightarrow", "\u{2192}"),
+ (b"rightarrowtail", "\u{21A3}"),
+ (b"rightharpoondown", "\u{21C1}"),
+ (b"rightharpoonup", "\u{21C0}"),
+ (b"rightleftarrows", "\u{21C4}"),
+ (b"rightleftharpoons", "\u{21CC}"),
+ (b"rightrightarrows", "\u{21C9}"),
+ (b"rightsquigarrow", "\u{219D}"),
+ (b"rightthreetimes", "\u{22CC}"),
+ (b"ring", "\u{02DA}"),
+ (b"risingdotseq", "\u{2253}"),
+ (b"rlarr", "\u{21C4}"),
+ (b"rlhar", "\u{21CC}"),
+ (b"rlm", "\u{200F}"),
+ (b"rmoust", "\u{23B1}"),
+ (b"rmoustache", "\u{23B1}"),
+ (b"rnmid", "\u{2AEE}"),
+ (b"roang", "\u{27ED}"),
+ (b"roarr", "\u{21FE}"),
+ (b"robrk", "\u{27E7}"),
+ (b"ropar", "\u{2986}"),
+ (b"ropf", "\u{1D563}"),
+ (b"roplus", "\u{2A2E}"),
+ (b"rotimes", "\u{2A35}"),
+ (b"rpar", "\u{0029}"),
+ (b"rpargt", "\u{2994}"),
+ (b"rppolint", "\u{2A12}"),
+ (b"rrarr", "\u{21C9}"),
+ (b"rsaquo", "\u{203A}"),
+ (b"rscr", "\u{1D4C7}"),
+ (b"rsh", "\u{21B1}"),
+ (b"rsqb", "\u{005D}"),
+ (b"rsquo", "\u{2019}"),
+ (b"rsquor", "\u{2019}"),
+ (b"rthree", "\u{22CC}"),
+ (b"rtimes", "\u{22CA}"),
+ (b"rtri", "\u{25B9}"),
+ (b"rtrie", "\u{22B5}"),
+ (b"rtrif", "\u{25B8}"),
+ (b"rtriltri", "\u{29CE}"),
+ (b"ruluhar", "\u{2968}"),
+ (b"rx", "\u{211E}"),
+ (b"sacute", "\u{015B}"),
+ (b"sbquo", "\u{201A}"),
+ (b"sc", "\u{227B}"),
+ (b"scE", "\u{2AB4}"),
+ (b"scap", "\u{2AB8}"),
+ (b"scaron", "\u{0161}"),
+ (b"sccue", "\u{227D}"),
+ (b"sce", "\u{2AB0}"),
+ (b"scedil", "\u{015F}"),
+ (b"scirc", "\u{015D}"),
+ (b"scnE", "\u{2AB6}"),
+ (b"scnap", "\u{2ABA}"),
+ (b"scnsim", "\u{22E9}"),
+ (b"scpolint", "\u{2A13}"),
+ (b"scsim", "\u{227F}"),
+ (b"scy", "\u{0441}"),
+ (b"sdot", "\u{22C5}"),
+ (b"sdotb", "\u{22A1}"),
+ (b"sdote", "\u{2A66}"),
+ (b"seArr", "\u{21D8}"),
+ (b"searhk", "\u{2925}"),
+ (b"searr", "\u{2198}"),
+ (b"searrow", "\u{2198}"),
+ (b"sect", "\u{00A7}"),
+ (b"semi", "\u{003B}"),
+ (b"seswar", "\u{2929}"),
+ (b"setminus", "\u{2216}"),
+ (b"setmn", "\u{2216}"),
+ (b"sext", "\u{2736}"),
+ (b"sfr", "\u{1D530}"),
+ (b"sfrown", "\u{2322}"),
+ (b"sharp", "\u{266F}"),
+ (b"shchcy", "\u{0449}"),
+ (b"shcy", "\u{0448}"),
+ (b"shortmid", "\u{2223}"),
+ (b"shortparallel", "\u{2225}"),
+ (b"shy", "\u{00AD}"),
+ (b"sigma", "\u{03C3}"),
+ (b"sigmaf", "\u{03C2}"),
+ (b"sigmav", "\u{03C2}"),
+ (b"sim", "\u{223C}"),
+ (b"simdot", "\u{2A6A}"),
+ (b"sime", "\u{2243}"),
+ (b"simeq", "\u{2243}"),
+ (b"simg", "\u{2A9E}"),
+ (b"simgE", "\u{2AA0}"),
+ (b"siml", "\u{2A9D}"),
+ (b"simlE", "\u{2A9F}"),
+ (b"simne", "\u{2246}"),
+ (b"simplus", "\u{2A24}"),
+ (b"simrarr", "\u{2972}"),
+ (b"slarr", "\u{2190}"),
+ (b"smallsetminus", "\u{2216}"),
+ (b"smashp", "\u{2A33}"),
+ (b"smeparsl", "\u{29E4}"),
+ (b"smid", "\u{2223}"),
+ (b"smile", "\u{2323}"),
+ (b"smt", "\u{2AAA}"),
+ (b"smte", "\u{2AAC}"),
+ (b"smtes", "\u{2AAC}\u{FE00}"),
+ (b"softcy", "\u{044C}"),
+ (b"sol", "\u{002F}"),
+ (b"solb", "\u{29C4}"),
+ (b"solbar", "\u{233F}"),
+ (b"sopf", "\u{1D564}"),
+ (b"spades", "\u{2660}"),
+ (b"spadesuit", "\u{2660}"),
+ (b"spar", "\u{2225}"),
+ (b"sqcap", "\u{2293}"),
+ (b"sqcaps", "\u{2293}\u{FE00}"),
+ (b"sqcup", "\u{2294}"),
+ (b"sqcups", "\u{2294}\u{FE00}"),
+ (b"sqsub", "\u{228F}"),
+ (b"sqsube", "\u{2291}"),
+ (b"sqsubset", "\u{228F}"),
+ (b"sqsubseteq", "\u{2291}"),
+ (b"sqsup", "\u{2290}"),
+ (b"sqsupe", "\u{2292}"),
+ (b"sqsupset", "\u{2290}"),
+ (b"sqsupseteq", "\u{2292}"),
+ (b"squ", "\u{25A1}"),
+ (b"square", "\u{25A1}"),
+ (b"squarf", "\u{25AA}"),
+ (b"squf", "\u{25AA}"),
+ (b"srarr", "\u{2192}"),
+ (b"sscr", "\u{1D4C8}"),
+ (b"ssetmn", "\u{2216}"),
+ (b"ssmile", "\u{2323}"),
+ (b"sstarf", "\u{22C6}"),
+ (b"star", "\u{2606}"),
+ (b"starf", "\u{2605}"),
+ (b"straightepsilon", "\u{03F5}"),
+ (b"straightphi", "\u{03D5}"),
+ (b"strns", "\u{00AF}"),
+ (b"sub", "\u{2282}"),
+ (b"subE", "\u{2AC5}"),
+ (b"subdot", "\u{2ABD}"),
+ (b"sube", "\u{2286}"),
+ (b"subedot", "\u{2AC3}"),
+ (b"submult", "\u{2AC1}"),
+ (b"subnE", "\u{2ACB}"),
+ (b"subne", "\u{228A}"),
+ (b"subplus", "\u{2ABF}"),
+ (b"subrarr", "\u{2979}"),
+ (b"subset", "\u{2282}"),
+ (b"subseteq", "\u{2286}"),
+ (b"subseteqq", "\u{2AC5}"),
+ (b"subsetneq", "\u{228A}"),
+ (b"subsetneqq", "\u{2ACB}"),
+ (b"subsim", "\u{2AC7}"),
+ (b"subsub", "\u{2AD5}"),
+ (b"subsup", "\u{2AD3}"),
+ (b"succ", "\u{227B}"),
+ (b"succapprox", "\u{2AB8}"),
+ (b"succcurlyeq", "\u{227D}"),
+ (b"succeq", "\u{2AB0}"),
+ (b"succnapprox", "\u{2ABA}"),
+ (b"succneqq", "\u{2AB6}"),
+ (b"succnsim", "\u{22E9}"),
+ (b"succsim", "\u{227F}"),
+ (b"sum", "\u{2211}"),
+ (b"sung", "\u{266A}"),
+ (b"sup", "\u{2283}"),
+ (b"sup1", "\u{00B9}"),
+ (b"sup2", "\u{00B2}"),
+ (b"sup3", "\u{00B3}"),
+ (b"supE", "\u{2AC6}"),
+ (b"supdot", "\u{2ABE}"),
+ (b"supdsub", "\u{2AD8}"),
+ (b"supe", "\u{2287}"),
+ (b"supedot", "\u{2AC4}"),
+ (b"suphsol", "\u{27C9}"),
+ (b"suphsub", "\u{2AD7}"),
+ (b"suplarr", "\u{297B}"),
+ (b"supmult", "\u{2AC2}"),
+ (b"supnE", "\u{2ACC}"),
+ (b"supne", "\u{228B}"),
+ (b"supplus", "\u{2AC0}"),
+ (b"supset", "\u{2283}"),
+ (b"supseteq", "\u{2287}"),
+ (b"supseteqq", "\u{2AC6}"),
+ (b"supsetneq", "\u{228B}"),
+ (b"supsetneqq", "\u{2ACC}"),
+ (b"supsim", "\u{2AC8}"),
+ (b"supsub", "\u{2AD4}"),
+ (b"supsup", "\u{2AD6}"),
+ (b"swArr", "\u{21D9}"),
+ (b"swarhk", "\u{2926}"),
+ (b"swarr", "\u{2199}"),
+ (b"swarrow", "\u{2199}"),
+ (b"swnwar", "\u{292A}"),
+ (b"szlig", "\u{00DF}"),
+ (b"target", "\u{2316}"),
+ (b"tau", "\u{03C4}"),
+ (b"tbrk", "\u{23B4}"),
+ (b"tcaron", "\u{0165}"),
+ (b"tcedil", "\u{0163}"),
+ (b"tcy", "\u{0442}"),
+ (b"tdot", "\u{20DB}"),
+ (b"telrec", "\u{2315}"),
+ (b"tfr", "\u{1D531}"),
+ (b"there4", "\u{2234}"),
+ (b"therefore", "\u{2234}"),
+ (b"theta", "\u{03B8}"),
+ (b"thetasym", "\u{03D1}"),
+ (b"thetav", "\u{03D1}"),
+ (b"thickapprox", "\u{2248}"),
+ (b"thicksim", "\u{223C}"),
+ (b"thinsp", "\u{2009}"),
+ (b"thkap", "\u{2248}"),
+ (b"thksim", "\u{223C}"),
+ (b"thorn", "\u{00FE}"),
+ (b"tilde", "\u{02DC}"),
+ (b"times", "\u{00D7}"),
+ (b"timesb", "\u{22A0}"),
+ (b"timesbar", "\u{2A31}"),
+ (b"timesd", "\u{2A30}"),
+ (b"tint", "\u{222D}"),
+ (b"toea", "\u{2928}"),
+ (b"top", "\u{22A4}"),
+ (b"topbot", "\u{2336}"),
+ (b"topcir", "\u{2AF1}"),
+ (b"topf", "\u{1D565}"),
+ (b"topfork", "\u{2ADA}"),
+ (b"tosa", "\u{2929}"),
+ (b"tprime", "\u{2034}"),
+ (b"trade", "\u{2122}"),
+ (b"triangle", "\u{25B5}"),
+ (b"triangledown", "\u{25BF}"),
+ (b"triangleleft", "\u{25C3}"),
+ (b"trianglelefteq", "\u{22B4}"),
+ (b"triangleq", "\u{225C}"),
+ (b"triangleright", "\u{25B9}"),
+ (b"trianglerighteq", "\u{22B5}"),
+ (b"tridot", "\u{25EC}"),
+ (b"trie", "\u{225C}"),
+ (b"triminus", "\u{2A3A}"),
+ (b"triplus", "\u{2A39}"),
+ (b"trisb", "\u{29CD}"),
+ (b"tritime", "\u{2A3B}"),
+ (b"trpezium", "\u{23E2}"),
+ (b"tscr", "\u{1D4C9}"),
+ (b"tscy", "\u{0446}"),
+ (b"tshcy", "\u{045B}"),
+ (b"tstrok", "\u{0167}"),
+ (b"twixt", "\u{226C}"),
+ (b"twoheadleftarrow", "\u{219E}"),
+ (b"twoheadrightarrow", "\u{21A0}"),
+ (b"uArr", "\u{21D1}"),
+ (b"uHar", "\u{2963}"),
+ (b"uacute", "\u{00FA}"),
+ (b"uarr", "\u{2191}"),
+ (b"ubrcy", "\u{045E}"),
+ (b"ubreve", "\u{016D}"),
+ (b"ucirc", "\u{00FB}"),
+ (b"ucy", "\u{0443}"),
+ (b"udarr", "\u{21C5}"),
+ (b"udblac", "\u{0171}"),
+ (b"udhar", "\u{296E}"),
+ (b"ufisht", "\u{297E}"),
+ (b"ufr", "\u{1D532}"),
+ (b"ugrave", "\u{00F9}"),
+ (b"uharl", "\u{21BF}"),
+ (b"uharr", "\u{21BE}"),
+ (b"uhblk", "\u{2580}"),
+ (b"ulcorn", "\u{231C}"),
+ (b"ulcorner", "\u{231C}"),
+ (b"ulcrop", "\u{230F}"),
+ (b"ultri", "\u{25F8}"),
+ (b"umacr", "\u{016B}"),
+ (b"uml", "\u{00A8}"),
+ (b"uogon", "\u{0173}"),
+ (b"uopf", "\u{1D566}"),
+ (b"uparrow", "\u{2191}"),
+ (b"updownarrow", "\u{2195}"),
+ (b"upharpoonleft", "\u{21BF}"),
+ (b"upharpoonright", "\u{21BE}"),
+ (b"uplus", "\u{228E}"),
+ (b"upsi", "\u{03C5}"),
+ (b"upsih", "\u{03D2}"),
+ (b"upsilon", "\u{03C5}"),
+ (b"upuparrows", "\u{21C8}"),
+ (b"urcorn", "\u{231D}"),
+ (b"urcorner", "\u{231D}"),
+ (b"urcrop", "\u{230E}"),
+ (b"uring", "\u{016F}"),
+ (b"urtri", "\u{25F9}"),
+ (b"uscr", "\u{1D4CA}"),
+ (b"utdot", "\u{22F0}"),
+ (b"utilde", "\u{0169}"),
+ (b"utri", "\u{25B5}"),
+ (b"utrif", "\u{25B4}"),
+ (b"uuarr", "\u{21C8}"),
+ (b"uuml", "\u{00FC}"),
+ (b"uwangle", "\u{29A7}"),
+ (b"vArr", "\u{21D5}"),
+ (b"vBar", "\u{2AE8}"),
+ (b"vBarv", "\u{2AE9}"),
+ (b"vDash", "\u{22A8}"),
+ (b"vangrt", "\u{299C}"),
+ (b"varepsilon", "\u{03F5}"),
+ (b"varkappa", "\u{03F0}"),
+ (b"varnothing", "\u{2205}"),
+ (b"varphi", "\u{03D5}"),
+ (b"varpi", "\u{03D6}"),
+ (b"varpropto", "\u{221D}"),
+ (b"varr", "\u{2195}"),
+ (b"varrho", "\u{03F1}"),
+ (b"varsigma", "\u{03C2}"),
+ (b"varsubsetneq", "\u{228A}\u{FE00}"),
+ (b"varsubsetneqq", "\u{2ACB}\u{FE00}"),
+ (b"varsupsetneq", "\u{228B}\u{FE00}"),
+ (b"varsupsetneqq", "\u{2ACC}\u{FE00}"),
+ (b"vartheta", "\u{03D1}"),
+ (b"vartriangleleft", "\u{22B2}"),
+ (b"vartriangleright", "\u{22B3}"),
+ (b"vcy", "\u{0432}"),
+ (b"vdash", "\u{22A2}"),
+ (b"vee", "\u{2228}"),
+ (b"veebar", "\u{22BB}"),
+ (b"veeeq", "\u{225A}"),
+ (b"vellip", "\u{22EE}"),
+ (b"verbar", "\u{007C}"),
+ (b"vert", "\u{007C}"),
+ (b"vfr", "\u{1D533}"),
+ (b"vltri", "\u{22B2}"),
+ (b"vnsub", "\u{2282}\u{20D2}"),
+ (b"vnsup", "\u{2283}\u{20D2}"),
+ (b"vopf", "\u{1D567}"),
+ (b"vprop", "\u{221D}"),
+ (b"vrtri", "\u{22B3}"),
+ (b"vscr", "\u{1D4CB}"),
+ (b"vsubnE", "\u{2ACB}\u{FE00}"),
+ (b"vsubne", "\u{228A}\u{FE00}"),
+ (b"vsupnE", "\u{2ACC}\u{FE00}"),
+ (b"vsupne", "\u{228B}\u{FE00}"),
+ (b"vzigzag", "\u{299A}"),
+ (b"wcirc", "\u{0175}"),
+ (b"wedbar", "\u{2A5F}"),
+ (b"wedge", "\u{2227}"),
+ (b"wedgeq", "\u{2259}"),
+ (b"weierp", "\u{2118}"),
+ (b"wfr", "\u{1D534}"),
+ (b"wopf", "\u{1D568}"),
+ (b"wp", "\u{2118}"),
+ (b"wr", "\u{2240}"),
+ (b"wreath", "\u{2240}"),
+ (b"wscr", "\u{1D4CC}"),
+ (b"xcap", "\u{22C2}"),
+ (b"xcirc", "\u{25EF}"),
+ (b"xcup", "\u{22C3}"),
+ (b"xdtri", "\u{25BD}"),
+ (b"xfr", "\u{1D535}"),
+ (b"xhArr", "\u{27FA}"),
+ (b"xharr", "\u{27F7}"),
+ (b"xi", "\u{03BE}"),
+ (b"xlArr", "\u{27F8}"),
+ (b"xlarr", "\u{27F5}"),
+ (b"xmap", "\u{27FC}"),
+ (b"xnis", "\u{22FB}"),
+ (b"xodot", "\u{2A00}"),
+ (b"xopf", "\u{1D569}"),
+ (b"xoplus", "\u{2A01}"),
+ (b"xotime", "\u{2A02}"),
+ (b"xrArr", "\u{27F9}"),
+ (b"xrarr", "\u{27F6}"),
+ (b"xscr", "\u{1D4CD}"),
+ (b"xsqcup", "\u{2A06}"),
+ (b"xuplus", "\u{2A04}"),
+ (b"xutri", "\u{25B3}"),
+ (b"xvee", "\u{22C1}"),
+ (b"xwedge", "\u{22C0}"),
+ (b"yacute", "\u{00FD}"),
+ (b"yacy", "\u{044F}"),
+ (b"ycirc", "\u{0177}"),
+ (b"ycy", "\u{044B}"),
+ (b"yen", "\u{00A5}"),
+ (b"yfr", "\u{1D536}"),
+ (b"yicy", "\u{0457}"),
+ (b"yopf", "\u{1D56A}"),
+ (b"yscr", "\u{1D4CE}"),
+ (b"yucy", "\u{044E}"),
+ (b"yuml", "\u{00FF}"),
+ (b"zacute", "\u{017A}"),
+ (b"zcaron", "\u{017E}"),
+ (b"zcy", "\u{0437}"),
+ (b"zdot", "\u{017C}"),
+ (b"zeetrf", "\u{2128}"),
+ (b"zeta", "\u{03B6}"),
+ (b"zfr", "\u{1D537}"),
+ (b"zhcy", "\u{0436}"),
+ (b"zigrarr", "\u{21DD}"),
+ (b"zopf", "\u{1D56B}"),
+ (b"zscr", "\u{1D4CF}"),
+ (b"zwj", "\u{200D}"),
+ (b"zwnj", "\u{200C}"),
+];
+
+pub(crate) fn get_entity(bytes: &[u8]) -> Option<&'static str> {
+ ENTITIES
+ .binary_search_by_key(&bytes, |&(key, _value)| key)
+ .ok()
+ .map(|i| ENTITIES[i].1)
+}
diff --git a/vendor/pulldown-cmark/src/escape.rs b/vendor/pulldown-cmark/src/escape.rs
new file mode 100644
index 000000000..3e5c224e7
--- /dev/null
+++ b/vendor/pulldown-cmark/src/escape.rs
@@ -0,0 +1,368 @@
+// Copyright 2015 Google Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+//! Utility functions for HTML escaping. Only useful when building your own
+//! HTML renderer.
+
+use std::fmt::{Arguments, Write as FmtWrite};
+use std::io::{self, ErrorKind, Write};
+use std::str::from_utf8;
+
+#[rustfmt::skip]
+static HREF_SAFE: [u8; 128] = [
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1,
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0,
+];
+
+static HEX_CHARS: &[u8] = b"0123456789ABCDEF";
+static AMP_ESCAPE: &str = "&amp;";
+static SINGLE_QUOTE_ESCAPE: &str = "&#x27;";
+
+/// This wrapper exists because we can't have both a blanket implementation
+/// for all types implementing `Write` and types of the for `&mut W` where
+/// `W: StrWrite`. Since we need the latter a lot, we choose to wrap
+/// `Write` types.
+pub struct WriteWrapper<W>(pub W);
+
+/// Trait that allows writing string slices. This is basically an extension
+/// of `std::io::Write` in order to include `String`.
+pub trait StrWrite {
+ fn write_str(&mut self, s: &str) -> io::Result<()>;
+
+ fn write_fmt(&mut self, args: Arguments) -> io::Result<()>;
+}
+
+impl<W> StrWrite for WriteWrapper<W>
+where
+ W: Write,
+{
+ #[inline]
+ fn write_str(&mut self, s: &str) -> io::Result<()> {
+ self.0.write_all(s.as_bytes())
+ }
+
+ #[inline]
+ fn write_fmt(&mut self, args: Arguments) -> io::Result<()> {
+ self.0.write_fmt(args)
+ }
+}
+
+impl<'w> StrWrite for String {
+ #[inline]
+ fn write_str(&mut self, s: &str) -> io::Result<()> {
+ self.push_str(s);
+ Ok(())
+ }
+
+ #[inline]
+ fn write_fmt(&mut self, args: Arguments) -> io::Result<()> {
+ // FIXME: translate fmt error to io error?
+ FmtWrite::write_fmt(self, args).map_err(|_| ErrorKind::Other.into())
+ }
+}
+
+impl<W> StrWrite for &'_ mut W
+where
+ W: StrWrite,
+{
+ #[inline]
+ fn write_str(&mut self, s: &str) -> io::Result<()> {
+ (**self).write_str(s)
+ }
+
+ #[inline]
+ fn write_fmt(&mut self, args: Arguments) -> io::Result<()> {
+ (**self).write_fmt(args)
+ }
+}
+
+/// Writes an href to the buffer, escaping href unsafe bytes.
+pub fn escape_href<W>(mut w: W, s: &str) -> io::Result<()>
+where
+ W: StrWrite,
+{
+ let bytes = s.as_bytes();
+ let mut mark = 0;
+ for i in 0..bytes.len() {
+ let c = bytes[i];
+ if c >= 0x80 || HREF_SAFE[c as usize] == 0 {
+ // character needing escape
+
+ // write partial substring up to mark
+ if mark < i {
+ w.write_str(&s[mark..i])?;
+ }
+ match c {
+ b'&' => {
+ w.write_str(AMP_ESCAPE)?;
+ }
+ b'\'' => {
+ w.write_str(SINGLE_QUOTE_ESCAPE)?;
+ }
+ _ => {
+ let mut buf = [0u8; 3];
+ buf[0] = b'%';
+ buf[1] = HEX_CHARS[((c as usize) >> 4) & 0xF];
+ buf[2] = HEX_CHARS[(c as usize) & 0xF];
+ let escaped = from_utf8(&buf).unwrap();
+ w.write_str(escaped)?;
+ }
+ }
+ mark = i + 1; // all escaped characters are ASCII
+ }
+ }
+ w.write_str(&s[mark..])
+}
+
+const fn create_html_escape_table() -> [u8; 256] {
+ let mut table = [0; 256];
+ table[b'"' as usize] = 1;
+ table[b'&' as usize] = 2;
+ table[b'<' as usize] = 3;
+ table[b'>' as usize] = 4;
+ table
+}
+
+static HTML_ESCAPE_TABLE: [u8; 256] = create_html_escape_table();
+
+static HTML_ESCAPES: [&str; 5] = ["", "&quot;", "&amp;", "&lt;", "&gt;"];
+
+/// Writes the given string to the Write sink, replacing special HTML bytes
+/// (<, >, &, ") by escape sequences.
+pub fn escape_html<W: StrWrite>(w: W, s: &str) -> io::Result<()> {
+ #[cfg(all(target_arch = "x86_64", feature = "simd"))]
+ {
+ simd::escape_html(w, s)
+ }
+ #[cfg(not(all(target_arch = "x86_64", feature = "simd")))]
+ {
+ escape_html_scalar(w, s)
+ }
+}
+
+fn escape_html_scalar<W: StrWrite>(mut w: W, s: &str) -> io::Result<()> {
+ let bytes = s.as_bytes();
+ let mut mark = 0;
+ let mut i = 0;
+ while i < s.len() {
+ match bytes[i..]
+ .iter()
+ .position(|&c| HTML_ESCAPE_TABLE[c as usize] != 0)
+ {
+ Some(pos) => {
+ i += pos;
+ }
+ None => break,
+ }
+ let c = bytes[i];
+ let escape = HTML_ESCAPE_TABLE[c as usize];
+ let escape_seq = HTML_ESCAPES[escape as usize];
+ w.write_str(&s[mark..i])?;
+ w.write_str(escape_seq)?;
+ i += 1;
+ mark = i; // all escaped characters are ASCII
+ }
+ w.write_str(&s[mark..])
+}
+
+#[cfg(all(target_arch = "x86_64", feature = "simd"))]
+mod simd {
+ use super::StrWrite;
+ use std::arch::x86_64::*;
+ use std::io;
+ use std::mem::size_of;
+
+ const VECTOR_SIZE: usize = size_of::<__m128i>();
+
+ pub(super) fn escape_html<W: StrWrite>(mut w: W, s: &str) -> io::Result<()> {
+ // The SIMD accelerated code uses the PSHUFB instruction, which is part
+ // of the SSSE3 instruction set. Further, we can only use this code if
+ // the buffer is at least one VECTOR_SIZE in length to prevent reading
+ // out of bounds. If either of these conditions is not met, we fall back
+ // to scalar code.
+ if is_x86_feature_detected!("ssse3") && s.len() >= VECTOR_SIZE {
+ let bytes = s.as_bytes();
+ let mut mark = 0;
+
+ unsafe {
+ foreach_special_simd(bytes, 0, |i| {
+ let escape_ix = *bytes.get_unchecked(i) as usize;
+ let replacement =
+ super::HTML_ESCAPES[super::HTML_ESCAPE_TABLE[escape_ix] as usize];
+ w.write_str(&s.get_unchecked(mark..i))?;
+ mark = i + 1; // all escaped characters are ASCII
+ w.write_str(replacement)
+ })?;
+ w.write_str(&s.get_unchecked(mark..))
+ }
+ } else {
+ super::escape_html_scalar(w, s)
+ }
+ }
+
+ /// Creates the lookup table for use in `compute_mask`.
+ const fn create_lookup() -> [u8; 16] {
+ let mut table = [0; 16];
+ table[(b'<' & 0x0f) as usize] = b'<';
+ table[(b'>' & 0x0f) as usize] = b'>';
+ table[(b'&' & 0x0f) as usize] = b'&';
+ table[(b'"' & 0x0f) as usize] = b'"';
+ table[0] = 0b0111_1111;
+ table
+ }
+
+ #[target_feature(enable = "ssse3")]
+ /// Computes a byte mask at given offset in the byte buffer. Its first 16 (least significant)
+ /// bits correspond to whether there is an HTML special byte (&, <, ", >) at the 16 bytes
+ /// `bytes[offset..]`. For example, the mask `(1 << 3)` states that there is an HTML byte
+ /// at `offset + 3`. It is only safe to call this function when
+ /// `bytes.len() >= offset + VECTOR_SIZE`.
+ unsafe fn compute_mask(bytes: &[u8], offset: usize) -> i32 {
+ debug_assert!(bytes.len() >= offset + VECTOR_SIZE);
+
+ let table = create_lookup();
+ let lookup = _mm_loadu_si128(table.as_ptr() as *const __m128i);
+ let raw_ptr = bytes.as_ptr().offset(offset as isize) as *const __m128i;
+
+ // Load the vector from memory.
+ let vector = _mm_loadu_si128(raw_ptr);
+ // We take the least significant 4 bits of every byte and use them as indices
+ // to map into the lookup vector.
+ // Note that shuffle maps bytes with their most significant bit set to lookup[0].
+ // Bytes that share their lower nibble with an HTML special byte get mapped to that
+ // corresponding special byte. Note that all HTML special bytes have distinct lower
+ // nibbles. Other bytes either get mapped to 0 or 127.
+ let expected = _mm_shuffle_epi8(lookup, vector);
+ // We compare the original vector to the mapped output. Bytes that shared a lower
+ // nibble with an HTML special byte match *only* if they are that special byte. Bytes
+ // that have either a 0 lower nibble or their most significant bit set were mapped to
+ // 127 and will hence never match. All other bytes have non-zero lower nibbles but
+ // were mapped to 0 and will therefore also not match.
+ let matches = _mm_cmpeq_epi8(expected, vector);
+
+ // Translate matches to a bitmask, where every 1 corresponds to a HTML special character
+ // and a 0 is a non-HTML byte.
+ _mm_movemask_epi8(matches)
+ }
+
+ /// Calls the given function with the index of every byte in the given byteslice
+ /// that is either ", &, <, or > and for no other byte.
+ /// Make sure to only call this when `bytes.len() >= 16`, undefined behaviour may
+ /// occur otherwise.
+ #[target_feature(enable = "ssse3")]
+ unsafe fn foreach_special_simd<F>(
+ bytes: &[u8],
+ mut offset: usize,
+ mut callback: F,
+ ) -> io::Result<()>
+ where
+ F: FnMut(usize) -> io::Result<()>,
+ {
+ // The strategy here is to walk the byte buffer in chunks of VECTOR_SIZE (16)
+ // bytes at a time starting at the given offset. For each chunk, we compute a
+ // a bitmask indicating whether the corresponding byte is a HTML special byte.
+ // We then iterate over all the 1 bits in this mask and call the callback function
+ // with the corresponding index in the buffer.
+ // When the number of HTML special bytes in the buffer is relatively low, this
+ // allows us to quickly go through the buffer without a lookup and for every
+ // single byte.
+
+ debug_assert!(bytes.len() >= VECTOR_SIZE);
+ let upperbound = bytes.len() - VECTOR_SIZE;
+ while offset < upperbound {
+ let mut mask = compute_mask(bytes, offset);
+ while mask != 0 {
+ let ix = mask.trailing_zeros();
+ callback(offset + ix as usize)?;
+ mask ^= mask & -mask;
+ }
+ offset += VECTOR_SIZE;
+ }
+
+ // Final iteration. We align the read with the end of the slice and
+ // shift off the bytes at start we have already scanned.
+ let mut mask = compute_mask(bytes, upperbound);
+ mask >>= offset - upperbound;
+ while mask != 0 {
+ let ix = mask.trailing_zeros();
+ callback(offset + ix as usize)?;
+ mask ^= mask & -mask;
+ }
+ Ok(())
+ }
+
+ #[cfg(test)]
+ mod html_scan_tests {
+ #[test]
+ fn multichunk() {
+ let mut vec = Vec::new();
+ unsafe {
+ super::foreach_special_simd("&aXaaaa.a'aa9a<>aab&".as_bytes(), 0, |ix| {
+ Ok(vec.push(ix))
+ })
+ .unwrap();
+ }
+ assert_eq!(vec, vec![0, 14, 15, 19]);
+ }
+
+ // only match these bytes, and when we match them, match them VECTOR_SIZE times
+ #[test]
+ fn only_right_bytes_matched() {
+ for b in 0..255u8 {
+ let right_byte = b == b'&' || b == b'<' || b == b'>' || b == b'"';
+ let vek = vec![b; super::VECTOR_SIZE];
+ let mut match_count = 0;
+ unsafe {
+ super::foreach_special_simd(&vek, 0, |_| {
+ match_count += 1;
+ Ok(())
+ })
+ .unwrap();
+ }
+ assert!((match_count > 0) == (match_count == super::VECTOR_SIZE));
+ assert_eq!(
+ (match_count == super::VECTOR_SIZE),
+ right_byte,
+ "match_count: {}, byte: {:?}",
+ match_count,
+ b as char
+ );
+ }
+ }
+ }
+}
+
+#[cfg(test)]
+mod test {
+ pub use super::escape_href;
+
+ #[test]
+ fn check_href_escape() {
+ let mut s = String::new();
+ escape_href(&mut s, "&^_").unwrap();
+ assert_eq!(s.as_str(), "&amp;^_");
+ }
+}
diff --git a/vendor/pulldown-cmark/src/firstpass.rs b/vendor/pulldown-cmark/src/firstpass.rs
new file mode 100644
index 000000000..cf3cfbf53
--- /dev/null
+++ b/vendor/pulldown-cmark/src/firstpass.rs
@@ -0,0 +1,1927 @@
+//! The first pass resolves all block structure, generating an AST. Within a block, items
+//! are in a linear chain with potential inline markup identified.
+
+use std::cmp::max;
+use std::ops::Range;
+
+use crate::parse::{scan_containers, Allocations, HeadingAttributes, Item, ItemBody, LinkDef};
+use crate::scanners::*;
+use crate::strings::CowStr;
+use crate::tree::{Tree, TreeIndex};
+use crate::Options;
+use crate::{
+ linklabel::{scan_link_label_rest, LinkLabel},
+ HeadingLevel,
+};
+
+use unicase::UniCase;
+
+/// Runs the first pass, which resolves the block structure of the document,
+/// and returns the resulting tree.
+pub(crate) fn run_first_pass(text: &str, options: Options) -> (Tree<Item>, Allocations) {
+ // This is a very naive heuristic for the number of nodes
+ // we'll need.
+ let start_capacity = max(128, text.len() / 32);
+ let lookup_table = &create_lut(&options);
+ let first_pass = FirstPass {
+ text,
+ tree: Tree::with_capacity(start_capacity),
+ begin_list_item: false,
+ last_line_blank: false,
+ allocs: Allocations::new(),
+ options,
+ lookup_table,
+ };
+ first_pass.run()
+}
+
+/// State for the first parsing pass.
+struct FirstPass<'a, 'b> {
+ text: &'a str,
+ tree: Tree<Item>,
+ begin_list_item: bool,
+ last_line_blank: bool,
+ allocs: Allocations<'a>,
+ options: Options,
+ lookup_table: &'b LookupTable,
+}
+
+impl<'a, 'b> FirstPass<'a, 'b> {
+ fn run(mut self) -> (Tree<Item>, Allocations<'a>) {
+ let mut ix = 0;
+ while ix < self.text.len() {
+ ix = self.parse_block(ix);
+ }
+ for _ in 0..self.tree.spine_len() {
+ self.pop(ix);
+ }
+ (self.tree, self.allocs)
+ }
+
+ /// Returns offset after block.
+ fn parse_block(&mut self, mut start_ix: usize) -> usize {
+ let bytes = self.text.as_bytes();
+ let mut line_start = LineStart::new(&bytes[start_ix..]);
+
+ let i = scan_containers(&self.tree, &mut line_start);
+ for _ in i..self.tree.spine_len() {
+ self.pop(start_ix);
+ }
+
+ if self.options.contains(Options::ENABLE_FOOTNOTES) {
+ // finish footnote if it's still open and was preceded by blank line
+ if let Some(node_ix) = self.tree.peek_up() {
+ if let ItemBody::FootnoteDefinition(..) = self.tree[node_ix].item.body {
+ if self.last_line_blank {
+ self.pop(start_ix);
+ }
+ }
+ }
+
+ // Footnote definitions of the form
+ // [^bar]:
+ // * anything really
+ let container_start = start_ix + line_start.bytes_scanned();
+ if let Some(bytecount) = self.parse_footnote(container_start) {
+ start_ix = container_start + bytecount;
+ start_ix += scan_blank_line(&bytes[start_ix..]).unwrap_or(0);
+ line_start = LineStart::new(&bytes[start_ix..]);
+ }
+ }
+
+ // Process new containers
+ loop {
+ let container_start = start_ix + line_start.bytes_scanned();
+ if let Some((ch, index, indent)) = line_start.scan_list_marker() {
+ let after_marker_index = start_ix + line_start.bytes_scanned();
+ self.continue_list(container_start, ch, index);
+ self.tree.append(Item {
+ start: container_start,
+ end: after_marker_index, // will get updated later if item not empty
+ body: ItemBody::ListItem(indent),
+ });
+ self.tree.push();
+ if let Some(n) = scan_blank_line(&bytes[after_marker_index..]) {
+ self.begin_list_item = true;
+ return after_marker_index + n;
+ }
+ if self.options.contains(Options::ENABLE_TASKLISTS) {
+ if let Some(is_checked) = line_start.scan_task_list_marker() {
+ self.tree.append(Item {
+ start: after_marker_index,
+ end: start_ix + line_start.bytes_scanned(),
+ body: ItemBody::TaskListMarker(is_checked),
+ });
+ }
+ }
+ } else if line_start.scan_blockquote_marker() {
+ self.finish_list(start_ix);
+ self.tree.append(Item {
+ start: container_start,
+ end: 0, // will get set later
+ body: ItemBody::BlockQuote,
+ });
+ self.tree.push();
+ } else {
+ break;
+ }
+ }
+
+ let ix = start_ix + line_start.bytes_scanned();
+
+ if let Some(n) = scan_blank_line(&bytes[ix..]) {
+ if let Some(node_ix) = self.tree.peek_up() {
+ match self.tree[node_ix].item.body {
+ ItemBody::BlockQuote => (),
+ _ => {
+ if self.begin_list_item {
+ // A list item can begin with at most one blank line.
+ self.pop(start_ix);
+ }
+ self.last_line_blank = true;
+ }
+ }
+ }
+ return ix + n;
+ }
+
+ self.begin_list_item = false;
+ self.finish_list(start_ix);
+
+ // Save `remaining_space` here to avoid needing to backtrack `line_start` for HTML blocks
+ let remaining_space = line_start.remaining_space();
+
+ let indent = line_start.scan_space_upto(4);
+ if indent == 4 {
+ let ix = start_ix + line_start.bytes_scanned();
+ let remaining_space = line_start.remaining_space();
+ return self.parse_indented_code_block(ix, remaining_space);
+ }
+
+ let ix = start_ix + line_start.bytes_scanned();
+
+ // HTML Blocks
+ if bytes[ix] == b'<' {
+ // Types 1-5 are all detected by one function and all end with the same
+ // pattern
+ if let Some(html_end_tag) = get_html_end_tag(&bytes[(ix + 1)..]) {
+ return self.parse_html_block_type_1_to_5(ix, html_end_tag, remaining_space);
+ }
+
+ // Detect type 6
+ if starts_html_block_type_6(&bytes[(ix + 1)..]) {
+ return self.parse_html_block_type_6_or_7(ix, remaining_space);
+ }
+
+ // Detect type 7
+ if let Some(_html_bytes) = scan_html_type_7(&bytes[ix..]) {
+ return self.parse_html_block_type_6_or_7(ix, remaining_space);
+ }
+ }
+
+ if let Ok(n) = scan_hrule(&bytes[ix..]) {
+ return self.parse_hrule(n, ix);
+ }
+
+ if let Some(atx_size) = scan_atx_heading(&bytes[ix..]) {
+ return self.parse_atx_heading(ix, atx_size);
+ }
+
+ // parse refdef
+ if let Some((bytecount, label, link_def)) = self.parse_refdef_total(ix) {
+ self.allocs.refdefs.0.entry(label).or_insert(link_def);
+ let ix = ix + bytecount;
+ // try to read trailing whitespace or it will register as a completely blank line
+ // TODO: shouldn't we do this for all block level items?
+ return ix + scan_blank_line(&bytes[ix..]).unwrap_or(0);
+ }
+
+ if let Some((n, fence_ch)) = scan_code_fence(&bytes[ix..]) {
+ return self.parse_fenced_code_block(ix, indent, fence_ch, n);
+ }
+ self.parse_paragraph(ix)
+ }
+
+ /// Returns the offset of the first line after the table.
+ /// Assumptions: current focus is a table element and the table header
+ /// matches the separator line (same number of columns).
+ fn parse_table(&mut self, table_cols: usize, head_start: usize, body_start: usize) -> usize {
+ // parse header. this shouldn't fail because we made sure the table header is ok
+ let (_sep_start, thead_ix) = self.parse_table_row_inner(head_start, table_cols);
+ self.tree[thead_ix].item.body = ItemBody::TableHead;
+
+ // parse body
+ let mut ix = body_start;
+ while let Some((next_ix, _row_ix)) = self.parse_table_row(ix, table_cols) {
+ ix = next_ix;
+ }
+
+ self.pop(ix);
+ ix
+ }
+
+ /// Call this when containers are taken care of.
+ /// Returns bytes scanned, row_ix
+ fn parse_table_row_inner(&mut self, mut ix: usize, row_cells: usize) -> (usize, TreeIndex) {
+ let bytes = self.text.as_bytes();
+ let mut cells = 0;
+ let mut final_cell_ix = None;
+
+ let row_ix = self.tree.append(Item {
+ start: ix,
+ end: 0, // set at end of this function
+ body: ItemBody::TableRow,
+ });
+ self.tree.push();
+
+ loop {
+ ix += scan_ch(&bytes[ix..], b'|');
+ let start_ix = ix;
+ ix += scan_whitespace_no_nl(&bytes[ix..]);
+
+ if let Some(eol_bytes) = scan_eol(&bytes[ix..]) {
+ ix += eol_bytes;
+ break;
+ }
+
+ let cell_ix = self.tree.append(Item {
+ start: start_ix,
+ end: ix,
+ body: ItemBody::TableCell,
+ });
+ self.tree.push();
+ let (next_ix, _brk) = self.parse_line(ix, None, TableParseMode::Active);
+
+ if let Some(cur_ix) = self.tree.cur() {
+ let trailing_whitespace = scan_rev_while(&bytes[..next_ix], is_ascii_whitespace);
+ self.tree[cur_ix].item.end -= trailing_whitespace;
+ }
+
+ self.tree[cell_ix].item.end = next_ix;
+ self.tree.pop();
+
+ ix = next_ix;
+ cells += 1;
+
+ if cells == row_cells {
+ final_cell_ix = Some(cell_ix);
+ }
+ }
+
+ // fill empty cells if needed
+ // note: this is where GFM and commonmark-extra diverge. we follow
+ // GFM here
+ for _ in cells..row_cells {
+ self.tree.append(Item {
+ start: ix,
+ end: ix,
+ body: ItemBody::TableCell,
+ });
+ }
+
+ // drop excess cells
+ if let Some(cell_ix) = final_cell_ix {
+ self.tree[cell_ix].next = None;
+ }
+
+ self.pop(ix);
+
+ (ix, row_ix)
+ }
+
+ /// Returns first offset after the row and the tree index of the row.
+ fn parse_table_row(&mut self, mut ix: usize, row_cells: usize) -> Option<(usize, TreeIndex)> {
+ let bytes = self.text.as_bytes();
+ let mut line_start = LineStart::new(&bytes[ix..]);
+ let current_container =
+ scan_containers(&self.tree, &mut line_start) == self.tree.spine_len();
+ if !current_container {
+ return None;
+ }
+ line_start.scan_all_space();
+ ix += line_start.bytes_scanned();
+ if scan_paragraph_interrupt(&bytes[ix..], current_container) {
+ return None;
+ }
+
+ let (ix, row_ix) = self.parse_table_row_inner(ix, row_cells);
+ Some((ix, row_ix))
+ }
+
+ /// Returns offset of line start after paragraph.
+ fn parse_paragraph(&mut self, start_ix: usize) -> usize {
+ let node_ix = self.tree.append(Item {
+ start: start_ix,
+ end: 0, // will get set later
+ body: ItemBody::Paragraph,
+ });
+ self.tree.push();
+ let bytes = self.text.as_bytes();
+
+ let mut ix = start_ix;
+ loop {
+ let scan_mode = if self.options.contains(Options::ENABLE_TABLES) && ix == start_ix {
+ TableParseMode::Scan
+ } else {
+ TableParseMode::Disabled
+ };
+ let (next_ix, brk) = self.parse_line(ix, None, scan_mode);
+
+ // break out when we find a table
+ if let Some(Item {
+ body: ItemBody::Table(alignment_ix),
+ ..
+ }) = brk
+ {
+ let table_cols = self.allocs[alignment_ix].len();
+ self.tree[node_ix].item.body = ItemBody::Table(alignment_ix);
+ // this clears out any stuff we may have appended - but there may
+ // be a cleaner way
+ self.tree[node_ix].child = None;
+ self.tree.pop();
+ self.tree.push();
+ return self.parse_table(table_cols, ix, next_ix);
+ }
+
+ ix = next_ix;
+ let mut line_start = LineStart::new(&bytes[ix..]);
+ let current_container =
+ scan_containers(&self.tree, &mut line_start) == self.tree.spine_len();
+ if !line_start.scan_space(4) {
+ let ix_new = ix + line_start.bytes_scanned();
+ if current_container {
+ let trailing_backslash_pos = match brk {
+ Some(Item {
+ start,
+ body: ItemBody::HardBreak,
+ ..
+ }) if bytes[start] == b'\\' => Some(start),
+ _ => None,
+ };
+ if let Some(ix_setext) =
+ self.parse_setext_heading(ix_new, node_ix, trailing_backslash_pos.is_some())
+ {
+ if let Some(pos) = trailing_backslash_pos {
+ self.tree.append_text(pos, pos + 1);
+ }
+ ix = ix_setext;
+ break;
+ }
+ }
+ // first check for non-empty lists, then for other interrupts
+ let suffix = &bytes[ix_new..];
+ if scan_paragraph_interrupt(suffix, current_container) {
+ break;
+ }
+ }
+ line_start.scan_all_space();
+ if line_start.is_at_eol() {
+ break;
+ }
+ ix = next_ix + line_start.bytes_scanned();
+ if let Some(item) = brk {
+ self.tree.append(item);
+ }
+ }
+
+ self.pop(ix);
+ ix
+ }
+
+ /// Returns end ix of setext_heading on success.
+ fn parse_setext_heading(
+ &mut self,
+ ix: usize,
+ node_ix: TreeIndex,
+ has_trailing_content: bool,
+ ) -> Option<usize> {
+ let bytes = self.text.as_bytes();
+ let (n, level) = scan_setext_heading(&bytes[ix..])?;
+ let mut attrs = None;
+
+ if let Some(cur_ix) = self.tree.cur() {
+ let parent_ix = self.tree.peek_up().unwrap();
+ let header_start = self.tree[parent_ix].item.start;
+ // Note that `self.tree[parent_ix].item.end` might be zero at this point.
+ // Use the end position of the current node (i.e. the last known child
+ // of the parent) instead.
+ let header_end = self.tree[cur_ix].item.end;
+
+ // extract the trailing attribute block
+ let (content_end, attrs_) =
+ self.extract_and_parse_heading_attribute_block(header_start, header_end);
+ attrs = attrs_;
+
+ // strip trailing whitespace
+ let new_end = if has_trailing_content {
+ content_end
+ } else {
+ let trailing_ws =
+ scan_rev_while(&bytes[header_start..content_end], is_ascii_whitespace_no_nl);
+ content_end - trailing_ws
+ };
+
+ if attrs.is_some() {
+ // remove trailing block attributes
+ self.tree.truncate_siblings(self.text.as_bytes(), new_end);
+ }
+
+ if let Some(cur_ix) = self.tree.cur() {
+ self.tree[cur_ix].item.end = new_end;
+ }
+ }
+
+ self.tree[node_ix].item.body = ItemBody::Heading(
+ level,
+ attrs.map(|attrs| self.allocs.allocate_heading(attrs)),
+ );
+
+ Some(ix + n)
+ }
+
+ /// Parse a line of input, appending text and items to tree.
+ ///
+ /// Returns: index after line and an item representing the break.
+ fn parse_line(
+ &mut self,
+ start: usize,
+ end: Option<usize>,
+ mode: TableParseMode,
+ ) -> (usize, Option<Item>) {
+ let bytes = self.text.as_bytes();
+ let bytes = match end {
+ Some(end) => &bytes[..end],
+ None => bytes,
+ };
+ let bytes_len = bytes.len();
+ let mut pipes = 0;
+ let mut last_pipe_ix = start;
+ let mut begin_text = start;
+
+ let (final_ix, brk) = iterate_special_bytes(self.lookup_table, bytes, start, |ix, byte| {
+ match byte {
+ b'\n' | b'\r' => {
+ if let TableParseMode::Active = mode {
+ return LoopInstruction::BreakAtWith(ix, None);
+ }
+
+ let mut i = ix;
+ let eol_bytes = scan_eol(&bytes[ix..]).unwrap();
+ if mode == TableParseMode::Scan && pipes > 0 {
+ // check if we may be parsing a table
+ let next_line_ix = ix + eol_bytes;
+ let mut line_start = LineStart::new(&bytes[next_line_ix..]);
+ if scan_containers(&self.tree, &mut line_start) == self.tree.spine_len() {
+ let table_head_ix = next_line_ix + line_start.bytes_scanned();
+ let (table_head_bytes, alignment) =
+ scan_table_head(&bytes[table_head_ix..]);
+
+ if table_head_bytes > 0 {
+ // computing header count from number of pipes
+ let header_count =
+ count_header_cols(bytes, pipes, start, last_pipe_ix);
+
+ // make sure they match the number of columns we find in separator line
+ if alignment.len() == header_count {
+ let alignment_ix = self.allocs.allocate_alignment(alignment);
+ let end_ix = table_head_ix + table_head_bytes;
+ return LoopInstruction::BreakAtWith(
+ end_ix,
+ Some(Item {
+ start: i,
+ end: end_ix, // must update later
+ body: ItemBody::Table(alignment_ix),
+ }),
+ );
+ }
+ }
+ }
+ }
+
+ let end_ix = ix + eol_bytes;
+ let trailing_backslashes = scan_rev_while(&bytes[..ix], |b| b == b'\\');
+ if trailing_backslashes % 2 == 1 && end_ix < bytes_len {
+ i -= 1;
+ self.tree.append_text(begin_text, i);
+ return LoopInstruction::BreakAtWith(
+ end_ix,
+ Some(Item {
+ start: i,
+ end: end_ix,
+ body: ItemBody::HardBreak,
+ }),
+ );
+ }
+ let trailing_whitespace =
+ scan_rev_while(&bytes[..ix], is_ascii_whitespace_no_nl);
+ if trailing_whitespace >= 2 {
+ i -= trailing_whitespace;
+ self.tree.append_text(begin_text, i);
+ return LoopInstruction::BreakAtWith(
+ end_ix,
+ Some(Item {
+ start: i,
+ end: end_ix,
+ body: ItemBody::HardBreak,
+ }),
+ );
+ }
+
+ self.tree.append_text(begin_text, ix);
+ LoopInstruction::BreakAtWith(
+ end_ix,
+ Some(Item {
+ start: i,
+ end: end_ix,
+ body: ItemBody::SoftBreak,
+ }),
+ )
+ }
+ b'\\' => {
+ if ix + 1 < bytes_len && is_ascii_punctuation(bytes[ix + 1]) {
+ self.tree.append_text(begin_text, ix);
+ if bytes[ix + 1] == b'`' {
+ let count = 1 + scan_ch_repeat(&bytes[(ix + 2)..], b'`');
+ self.tree.append(Item {
+ start: ix + 1,
+ end: ix + count + 1,
+ body: ItemBody::MaybeCode(count, true),
+ });
+ begin_text = ix + 1 + count;
+ LoopInstruction::ContinueAndSkip(count)
+ } else {
+ begin_text = ix + 1;
+ LoopInstruction::ContinueAndSkip(1)
+ }
+ } else {
+ LoopInstruction::ContinueAndSkip(0)
+ }
+ }
+ c @ b'*' | c @ b'_' | c @ b'~' => {
+ let string_suffix = &self.text[ix..];
+ let count = 1 + scan_ch_repeat(&string_suffix.as_bytes()[1..], c);
+ let can_open = delim_run_can_open(self.text, string_suffix, count, ix);
+ let can_close = delim_run_can_close(self.text, string_suffix, count, ix);
+ let is_valid_seq = c != b'~' || count == 2;
+
+ if (can_open || can_close) && is_valid_seq {
+ self.tree.append_text(begin_text, ix);
+ for i in 0..count {
+ self.tree.append(Item {
+ start: ix + i,
+ end: ix + i + 1,
+ body: ItemBody::MaybeEmphasis(count - i, can_open, can_close),
+ });
+ }
+ begin_text = ix + count;
+ }
+ LoopInstruction::ContinueAndSkip(count - 1)
+ }
+ b'`' => {
+ self.tree.append_text(begin_text, ix);
+ let count = 1 + scan_ch_repeat(&bytes[(ix + 1)..], b'`');
+ self.tree.append(Item {
+ start: ix,
+ end: ix + count,
+ body: ItemBody::MaybeCode(count, false),
+ });
+ begin_text = ix + count;
+ LoopInstruction::ContinueAndSkip(count - 1)
+ }
+ b'<' => {
+ // Note: could detect some non-HTML cases and early escape here, but not
+ // clear that's a win.
+ self.tree.append_text(begin_text, ix);
+ self.tree.append(Item {
+ start: ix,
+ end: ix + 1,
+ body: ItemBody::MaybeHtml,
+ });
+ begin_text = ix + 1;
+ LoopInstruction::ContinueAndSkip(0)
+ }
+ b'!' => {
+ if ix + 1 < bytes_len && bytes[ix + 1] == b'[' {
+ self.tree.append_text(begin_text, ix);
+ self.tree.append(Item {
+ start: ix,
+ end: ix + 2,
+ body: ItemBody::MaybeImage,
+ });
+ begin_text = ix + 2;
+ LoopInstruction::ContinueAndSkip(1)
+ } else {
+ LoopInstruction::ContinueAndSkip(0)
+ }
+ }
+ b'[' => {
+ self.tree.append_text(begin_text, ix);
+ self.tree.append(Item {
+ start: ix,
+ end: ix + 1,
+ body: ItemBody::MaybeLinkOpen,
+ });
+ begin_text = ix + 1;
+ LoopInstruction::ContinueAndSkip(0)
+ }
+ b']' => {
+ self.tree.append_text(begin_text, ix);
+ self.tree.append(Item {
+ start: ix,
+ end: ix + 1,
+ body: ItemBody::MaybeLinkClose(true),
+ });
+ begin_text = ix + 1;
+ LoopInstruction::ContinueAndSkip(0)
+ }
+ b'&' => match scan_entity(&bytes[ix..]) {
+ (n, Some(value)) => {
+ self.tree.append_text(begin_text, ix);
+ self.tree.append(Item {
+ start: ix,
+ end: ix + n,
+ body: ItemBody::SynthesizeText(self.allocs.allocate_cow(value)),
+ });
+ begin_text = ix + n;
+ LoopInstruction::ContinueAndSkip(n - 1)
+ }
+ _ => LoopInstruction::ContinueAndSkip(0),
+ },
+ b'|' => {
+ if let TableParseMode::Active = mode {
+ LoopInstruction::BreakAtWith(ix, None)
+ } else {
+ last_pipe_ix = ix;
+ pipes += 1;
+ LoopInstruction::ContinueAndSkip(0)
+ }
+ }
+ b'.' => {
+ if ix + 2 < bytes.len() && bytes[ix + 1] == b'.' && bytes[ix + 2] == b'.' {
+ self.tree.append_text(begin_text, ix);
+ self.tree.append(Item {
+ start: ix,
+ end: ix + 3,
+ body: ItemBody::SynthesizeChar('…'),
+ });
+ begin_text = ix + 3;
+ LoopInstruction::ContinueAndSkip(2)
+ } else {
+ LoopInstruction::ContinueAndSkip(0)
+ }
+ }
+ b'-' => {
+ let count = 1 + scan_ch_repeat(&bytes[(ix + 1)..], b'-');
+ if count == 1 {
+ LoopInstruction::ContinueAndSkip(0)
+ } else {
+ let itembody = if count == 2 {
+ ItemBody::SynthesizeChar('–')
+ } else if count == 3 {
+ ItemBody::SynthesizeChar('—')
+ } else {
+ let (ems, ens) = match count % 6 {
+ 0 | 3 => (count / 3, 0),
+ 2 | 4 => (0, count / 2),
+ 1 => (count / 3 - 1, 2),
+ _ => (count / 3, 1),
+ };
+ // – and — are 3 bytes each in utf8
+ let mut buf = String::with_capacity(3 * (ems + ens));
+ for _ in 0..ems {
+ buf.push('—');
+ }
+ for _ in 0..ens {
+ buf.push('–');
+ }
+ ItemBody::SynthesizeText(self.allocs.allocate_cow(buf.into()))
+ };
+
+ self.tree.append_text(begin_text, ix);
+ self.tree.append(Item {
+ start: ix,
+ end: ix + count,
+ body: itembody,
+ });
+ begin_text = ix + count;
+ LoopInstruction::ContinueAndSkip(count - 1)
+ }
+ }
+ c @ b'\'' | c @ b'"' => {
+ let string_suffix = &self.text[ix..];
+ let can_open = delim_run_can_open(self.text, string_suffix, 1, ix);
+ let can_close = delim_run_can_close(self.text, string_suffix, 1, ix);
+
+ self.tree.append_text(begin_text, ix);
+ self.tree.append(Item {
+ start: ix,
+ end: ix + 1,
+ body: ItemBody::MaybeSmartQuote(c, can_open, can_close),
+ });
+ begin_text = ix + 1;
+
+ LoopInstruction::ContinueAndSkip(0)
+ }
+ _ => LoopInstruction::ContinueAndSkip(0),
+ }
+ });
+
+ if brk.is_none() {
+ // need to close text at eof
+ self.tree.append_text(begin_text, final_ix);
+ }
+ (final_ix, brk)
+ }
+
+ /// When start_ix is at the beginning of an HTML block of type 1 to 5,
+ /// this will find the end of the block, adding the block itself to the
+ /// tree and also keeping track of the lines of HTML within the block.
+ ///
+ /// The html_end_tag is the tag that must be found on a line to end the block.
+ fn parse_html_block_type_1_to_5(
+ &mut self,
+ start_ix: usize,
+ html_end_tag: &str,
+ mut remaining_space: usize,
+ ) -> usize {
+ let bytes = self.text.as_bytes();
+ let mut ix = start_ix;
+ loop {
+ let line_start_ix = ix;
+ ix += scan_nextline(&bytes[ix..]);
+ self.append_html_line(remaining_space, line_start_ix, ix);
+
+ let mut line_start = LineStart::new(&bytes[ix..]);
+ let n_containers = scan_containers(&self.tree, &mut line_start);
+ if n_containers < self.tree.spine_len() {
+ break;
+ }
+
+ if (&self.text[line_start_ix..ix]).contains(html_end_tag) {
+ break;
+ }
+
+ let next_line_ix = ix + line_start.bytes_scanned();
+ if next_line_ix == self.text.len() {
+ break;
+ }
+ ix = next_line_ix;
+ remaining_space = line_start.remaining_space();
+ }
+ ix
+ }
+
+ /// When start_ix is at the beginning of an HTML block of type 6 or 7,
+ /// this will consume lines until there is a blank line and keep track of
+ /// the HTML within the block.
+ fn parse_html_block_type_6_or_7(
+ &mut self,
+ start_ix: usize,
+ mut remaining_space: usize,
+ ) -> usize {
+ let bytes = self.text.as_bytes();
+ let mut ix = start_ix;
+ loop {
+ let line_start_ix = ix;
+ ix += scan_nextline(&bytes[ix..]);
+ self.append_html_line(remaining_space, line_start_ix, ix);
+
+ let mut line_start = LineStart::new(&bytes[ix..]);
+ let n_containers = scan_containers(&self.tree, &mut line_start);
+ if n_containers < self.tree.spine_len() || line_start.is_at_eol() {
+ break;
+ }
+
+ let next_line_ix = ix + line_start.bytes_scanned();
+ if next_line_ix == self.text.len() || scan_blank_line(&bytes[next_line_ix..]).is_some()
+ {
+ break;
+ }
+ ix = next_line_ix;
+ remaining_space = line_start.remaining_space();
+ }
+ ix
+ }
+
+ fn parse_indented_code_block(&mut self, start_ix: usize, mut remaining_space: usize) -> usize {
+ self.tree.append(Item {
+ start: start_ix,
+ end: 0, // will get set later
+ body: ItemBody::IndentCodeBlock,
+ });
+ self.tree.push();
+ let bytes = self.text.as_bytes();
+ let mut last_nonblank_child = None;
+ let mut last_nonblank_ix = 0;
+ let mut end_ix = 0;
+ let mut last_line_blank = false;
+
+ let mut ix = start_ix;
+ loop {
+ let line_start_ix = ix;
+ ix += scan_nextline(&bytes[ix..]);
+ self.append_code_text(remaining_space, line_start_ix, ix);
+ // TODO(spec clarification): should we synthesize newline at EOF?
+
+ if !last_line_blank {
+ last_nonblank_child = self.tree.cur();
+ last_nonblank_ix = ix;
+ end_ix = ix;
+ }
+
+ let mut line_start = LineStart::new(&bytes[ix..]);
+ let n_containers = scan_containers(&self.tree, &mut line_start);
+ if n_containers < self.tree.spine_len()
+ || !(line_start.scan_space(4) || line_start.is_at_eol())
+ {
+ break;
+ }
+ let next_line_ix = ix + line_start.bytes_scanned();
+ if next_line_ix == self.text.len() {
+ break;
+ }
+ ix = next_line_ix;
+ remaining_space = line_start.remaining_space();
+ last_line_blank = scan_blank_line(&bytes[ix..]).is_some();
+ }
+
+ // Trim trailing blank lines.
+ if let Some(child) = last_nonblank_child {
+ self.tree[child].next = None;
+ self.tree[child].item.end = last_nonblank_ix;
+ }
+ self.pop(end_ix);
+ ix
+ }
+
+ fn parse_fenced_code_block(
+ &mut self,
+ start_ix: usize,
+ indent: usize,
+ fence_ch: u8,
+ n_fence_char: usize,
+ ) -> usize {
+ let bytes = self.text.as_bytes();
+ let mut info_start = start_ix + n_fence_char;
+ info_start += scan_whitespace_no_nl(&bytes[info_start..]);
+ // TODO: info strings are typically very short. wouldn't it be faster
+ // to just do a forward scan here?
+ let mut ix = info_start + scan_nextline(&bytes[info_start..]);
+ let info_end = ix - scan_rev_while(&bytes[info_start..ix], is_ascii_whitespace);
+ let info_string = unescape(&self.text[info_start..info_end]);
+ self.tree.append(Item {
+ start: start_ix,
+ end: 0, // will get set later
+ body: ItemBody::FencedCodeBlock(self.allocs.allocate_cow(info_string)),
+ });
+ self.tree.push();
+ loop {
+ let mut line_start = LineStart::new(&bytes[ix..]);
+ let n_containers = scan_containers(&self.tree, &mut line_start);
+ if n_containers < self.tree.spine_len() {
+ break;
+ }
+ line_start.scan_space(indent);
+ let mut close_line_start = line_start.clone();
+ if !close_line_start.scan_space(4) {
+ let close_ix = ix + close_line_start.bytes_scanned();
+ if let Some(n) = scan_closing_code_fence(&bytes[close_ix..], fence_ch, n_fence_char)
+ {
+ ix = close_ix + n;
+ break;
+ }
+ }
+ let remaining_space = line_start.remaining_space();
+ ix += line_start.bytes_scanned();
+ let next_ix = ix + scan_nextline(&bytes[ix..]);
+ self.append_code_text(remaining_space, ix, next_ix);
+ ix = next_ix;
+ }
+
+ self.pop(ix);
+
+ // try to read trailing whitespace or it will register as a completely blank line
+ ix + scan_blank_line(&bytes[ix..]).unwrap_or(0)
+ }
+
+ fn append_code_text(&mut self, remaining_space: usize, start: usize, end: usize) {
+ if remaining_space > 0 {
+ let cow_ix = self.allocs.allocate_cow(" "[..remaining_space].into());
+ self.tree.append(Item {
+ start,
+ end: start,
+ body: ItemBody::SynthesizeText(cow_ix),
+ });
+ }
+ if self.text.as_bytes()[end - 2] == b'\r' {
+ // Normalize CRLF to LF
+ self.tree.append_text(start, end - 2);
+ self.tree.append_text(end - 1, end);
+ } else {
+ self.tree.append_text(start, end);
+ }
+ }
+
+ /// Appends a line of HTML to the tree.
+ fn append_html_line(&mut self, remaining_space: usize, start: usize, end: usize) {
+ if remaining_space > 0 {
+ let cow_ix = self.allocs.allocate_cow(" "[..remaining_space].into());
+ self.tree.append(Item {
+ start,
+ end: start,
+ // TODO: maybe this should synthesize to html rather than text?
+ body: ItemBody::SynthesizeText(cow_ix),
+ });
+ }
+ if self.text.as_bytes()[end - 2] == b'\r' {
+ // Normalize CRLF to LF
+ self.tree.append(Item {
+ start,
+ end: end - 2,
+ body: ItemBody::Html,
+ });
+ self.tree.append(Item {
+ start: end - 1,
+ end,
+ body: ItemBody::Html,
+ });
+ } else {
+ self.tree.append(Item {
+ start,
+ end,
+ body: ItemBody::Html,
+ });
+ }
+ }
+
+ /// Pop a container, setting its end.
+ fn pop(&mut self, ix: usize) {
+ let cur_ix = self.tree.pop().unwrap();
+ self.tree[cur_ix].item.end = ix;
+ if let ItemBody::List(true, _, _) = self.tree[cur_ix].item.body {
+ surgerize_tight_list(&mut self.tree, cur_ix);
+ }
+ }
+
+ /// Close a list if it's open. Also set loose if last line was blank
+ fn finish_list(&mut self, ix: usize) {
+ if let Some(node_ix) = self.tree.peek_up() {
+ if let ItemBody::List(_, _, _) = self.tree[node_ix].item.body {
+ self.pop(ix);
+ }
+ }
+ if self.last_line_blank {
+ if let Some(node_ix) = self.tree.peek_grandparent() {
+ if let ItemBody::List(ref mut is_tight, _, _) = self.tree[node_ix].item.body {
+ *is_tight = false;
+ }
+ }
+ self.last_line_blank = false;
+ }
+ }
+
+ /// Continue an existing list or start a new one if there's not an open
+ /// list that matches.
+ fn continue_list(&mut self, start: usize, ch: u8, index: u64) {
+ if let Some(node_ix) = self.tree.peek_up() {
+ if let ItemBody::List(ref mut is_tight, existing_ch, _) = self.tree[node_ix].item.body {
+ if existing_ch == ch {
+ if self.last_line_blank {
+ *is_tight = false;
+ self.last_line_blank = false;
+ }
+ return;
+ }
+ }
+ // TODO: this is not the best choice for end; maybe get end from last list item.
+ self.finish_list(start);
+ }
+ self.tree.append(Item {
+ start,
+ end: 0, // will get set later
+ body: ItemBody::List(true, ch, index),
+ });
+ self.tree.push();
+ self.last_line_blank = false;
+ }
+
+ /// Parse a thematic break.
+ ///
+ /// Returns index of start of next line.
+ fn parse_hrule(&mut self, hrule_size: usize, ix: usize) -> usize {
+ self.tree.append(Item {
+ start: ix,
+ end: ix + hrule_size,
+ body: ItemBody::Rule,
+ });
+ ix + hrule_size
+ }
+
+ /// Parse an ATX heading.
+ ///
+ /// Returns index of start of next line.
+ fn parse_atx_heading(&mut self, start: usize, atx_level: HeadingLevel) -> usize {
+ let mut ix = start;
+ let heading_ix = self.tree.append(Item {
+ start,
+ end: 0, // set later
+ body: ItemBody::default(), // set later
+ });
+ ix += atx_level as usize;
+ // next char is space or eol (guaranteed by scan_atx_heading)
+ let bytes = self.text.as_bytes();
+ if let Some(eol_bytes) = scan_eol(&bytes[ix..]) {
+ self.tree[heading_ix].item.end = ix + eol_bytes;
+ self.tree[heading_ix].item.body = ItemBody::Heading(atx_level, None);
+ return ix + eol_bytes;
+ }
+ // skip leading spaces
+ let skip_spaces = scan_whitespace_no_nl(&bytes[ix..]);
+ ix += skip_spaces;
+
+ // now handle the header text
+ let header_start = ix;
+ let header_node_idx = self.tree.push(); // so that we can set the endpoint later
+
+ // trim the trailing attribute block before parsing the entire line, if necessary
+ let (end, content_end, attrs) = if self.options.contains(Options::ENABLE_HEADING_ATTRIBUTES)
+ {
+ // the start of the next line is the end of the header since the
+ // header cannot have line breaks
+ let header_end = header_start + scan_nextline(&bytes[header_start..]);
+ let (content_end, attrs) =
+ self.extract_and_parse_heading_attribute_block(header_start, header_end);
+ self.parse_line(ix, Some(content_end), TableParseMode::Disabled);
+ (header_end, content_end, attrs)
+ } else {
+ ix = self.parse_line(ix, None, TableParseMode::Disabled).0;
+ (ix, ix, None)
+ };
+ self.tree[header_node_idx].item.end = end;
+
+ // remove trailing matter from header text
+ if let Some(cur_ix) = self.tree.cur() {
+ // remove closing of the ATX heading
+ let header_text = &bytes[header_start..content_end];
+ let mut limit = header_text
+ .iter()
+ .rposition(|&b| !(b == b'\n' || b == b'\r' || b == b' '))
+ .map_or(0, |i| i + 1);
+ let closer = header_text[..limit]
+ .iter()
+ .rposition(|&b| b != b'#')
+ .map_or(0, |i| i + 1);
+ if closer == 0 {
+ limit = closer;
+ } else {
+ let spaces = scan_rev_while(&header_text[..closer], |b| b == b' ');
+ if spaces > 0 {
+ limit = closer - spaces;
+ }
+ }
+ self.tree[cur_ix].item.end = limit + header_start;
+ }
+
+ self.tree.pop();
+ self.tree[heading_ix].item.body = ItemBody::Heading(
+ atx_level,
+ attrs.map(|attrs| self.allocs.allocate_heading(attrs)),
+ );
+ end
+ }
+
+ /// Returns the number of bytes scanned on success.
+ fn parse_footnote(&mut self, start: usize) -> Option<usize> {
+ let bytes = &self.text.as_bytes()[start..];
+ if !bytes.starts_with(b"[^") {
+ return None;
+ }
+ let (mut i, label) = self.parse_refdef_label(start + 2)?;
+ i += 2;
+ if scan_ch(&bytes[i..], b':') == 0 {
+ return None;
+ }
+ i += 1;
+ self.finish_list(start);
+ self.tree.append(Item {
+ start,
+ end: 0, // will get set later
+ // TODO: check whether the label here is strictly necessary
+ body: ItemBody::FootnoteDefinition(self.allocs.allocate_cow(label)),
+ });
+ self.tree.push();
+ Some(i)
+ }
+
+ /// Tries to parse a reference label, which can be interrupted by new blocks.
+ /// On success, returns the number of bytes of the label and the label itself.
+ fn parse_refdef_label(&self, start: usize) -> Option<(usize, CowStr<'a>)> {
+ scan_link_label_rest(&self.text[start..], &|bytes| {
+ let mut line_start = LineStart::new(bytes);
+ let current_container =
+ scan_containers(&self.tree, &mut line_start) == self.tree.spine_len();
+ let bytes_scanned = line_start.bytes_scanned();
+ let suffix = &bytes[bytes_scanned..];
+ if scan_paragraph_interrupt(suffix, current_container) {
+ None
+ } else {
+ Some(bytes_scanned)
+ }
+ })
+ }
+
+ /// Returns number of bytes scanned, label and definition on success.
+ fn parse_refdef_total(&mut self, start: usize) -> Option<(usize, LinkLabel<'a>, LinkDef<'a>)> {
+ let bytes = &self.text.as_bytes()[start..];
+ if scan_ch(bytes, b'[') == 0 {
+ return None;
+ }
+ let (mut i, label) = self.parse_refdef_label(start + 1)?;
+ i += 1;
+ if scan_ch(&bytes[i..], b':') == 0 {
+ return None;
+ }
+ i += 1;
+ let (bytecount, link_def) = self.scan_refdef(start, start + i)?;
+ Some((bytecount + i, UniCase::new(label), link_def))
+ }
+
+ /// Returns number of bytes and number of newlines
+ fn scan_refdef_space(&self, bytes: &[u8], mut i: usize) -> Option<(usize, usize)> {
+ let mut newlines = 0;
+ loop {
+ let whitespaces = scan_whitespace_no_nl(&bytes[i..]);
+ i += whitespaces;
+ if let Some(eol_bytes) = scan_eol(&bytes[i..]) {
+ i += eol_bytes;
+ newlines += 1;
+ if newlines > 1 {
+ return None;
+ }
+ } else {
+ break;
+ }
+ let mut line_start = LineStart::new(&bytes[i..]);
+ if self.tree.spine_len() != scan_containers(&self.tree, &mut line_start) {
+ return None;
+ }
+ i += line_start.bytes_scanned();
+ }
+ Some((i, newlines))
+ }
+
+ /// Returns # of bytes and definition.
+ /// Assumes the label of the reference including colon has already been scanned.
+ fn scan_refdef(&self, span_start: usize, start: usize) -> Option<(usize, LinkDef<'a>)> {
+ let bytes = self.text.as_bytes();
+
+ // whitespace between label and url (including up to one newline)
+ let (mut i, _newlines) = self.scan_refdef_space(bytes, start)?;
+
+ // scan link dest
+ let (dest_length, dest) = scan_link_dest(self.text, i, 1)?;
+ if dest_length == 0 {
+ return None;
+ }
+ let dest = unescape(dest);
+ i += dest_length;
+
+ // no title
+ let mut backup = (
+ i - start,
+ LinkDef {
+ dest,
+ title: None,
+ span: span_start..i,
+ },
+ );
+
+ // scan whitespace between dest and label
+ let (mut i, newlines) =
+ if let Some((new_i, mut newlines)) = self.scan_refdef_space(bytes, i) {
+ if i == self.text.len() {
+ newlines += 1;
+ }
+ if new_i == i && newlines == 0 {
+ return None;
+ }
+ if newlines > 1 {
+ return Some(backup);
+ };
+ (new_i, newlines)
+ } else {
+ return Some(backup);
+ };
+
+ // scan title
+ // if this fails but newline == 1, return also a refdef without title
+ if let Some((title_length, title)) = scan_refdef_title(&self.text[i..]) {
+ i += title_length;
+ backup.1.span = span_start..i;
+ backup.1.title = Some(unescape(title));
+ } else if newlines > 0 {
+ return Some(backup);
+ } else {
+ return None;
+ };
+
+ // scan EOL
+ if let Some(bytes) = scan_blank_line(&bytes[i..]) {
+ backup.0 = i + bytes - start;
+ Some(backup)
+ } else if newlines > 0 {
+ Some(backup)
+ } else {
+ None
+ }
+ }
+
+ /// Extracts and parses a heading attribute block if exists.
+ ///
+ /// Returns `(end_offset_of_heading_content, (id, classes))`.
+ ///
+ /// If `header_end` is less than or equal to `header_start`, the given
+ /// input is considered as empty.
+ fn extract_and_parse_heading_attribute_block(
+ &mut self,
+ header_start: usize,
+ header_end: usize,
+ ) -> (usize, Option<HeadingAttributes<'a>>) {
+ if !self.options.contains(Options::ENABLE_HEADING_ATTRIBUTES) {
+ return (header_end, None);
+ }
+
+ // extract the trailing attribute block
+ let header_bytes = &self.text.as_bytes()[header_start..header_end];
+ let (content_len, attr_block_range_rel) =
+ extract_attribute_block_content_from_header_text(header_bytes);
+ let content_end = header_start + content_len;
+ let attrs = attr_block_range_rel.and_then(|r| {
+ parse_inside_attribute_block(
+ &self.text[(header_start + r.start)..(header_start + r.end)],
+ )
+ });
+ (content_end, attrs)
+ }
+}
+
+/// Scanning modes for `Parser`'s `parse_line` method.
+#[derive(PartialEq, Eq, Copy, Clone)]
+enum TableParseMode {
+ /// Inside a paragraph, scanning for table headers.
+ Scan,
+ /// Inside a table.
+ Active,
+ /// Inside a paragraph, not scanning for table headers.
+ Disabled,
+}
+
+/// Computes the number of header columns in a table line by computing the number of dividing pipes
+/// that aren't followed or preceded by whitespace.
+fn count_header_cols(
+ bytes: &[u8],
+ mut pipes: usize,
+ mut start: usize,
+ last_pipe_ix: usize,
+) -> usize {
+ // was first pipe preceded by whitespace? if so, subtract one
+ start += scan_whitespace_no_nl(&bytes[start..]);
+ if bytes[start] == b'|' {
+ pipes -= 1;
+ }
+
+ // was last pipe followed by whitespace? if so, sub one
+ if scan_blank_line(&bytes[(last_pipe_ix + 1)..]).is_some() {
+ pipes
+ } else {
+ pipes + 1
+ }
+}
+
+/// Checks whether we should break a paragraph on the given input.
+fn scan_paragraph_interrupt(bytes: &[u8], current_container: bool) -> bool {
+ scan_eol(bytes).is_some()
+ || scan_hrule(bytes).is_ok()
+ || scan_atx_heading(bytes).is_some()
+ || scan_code_fence(bytes).is_some()
+ || scan_blockquote_start(bytes).is_some()
+ || scan_listitem(bytes).map_or(false, |(ix, delim, index, _)| {
+ ! current_container ||
+ // we don't allow interruption by either empty lists or
+ // numbered lists starting at an index other than 1
+ (delim == b'*' || delim == b'-' || delim == b'+' || index == 1)
+ && !scan_empty_list(&bytes[ix..])
+ })
+ || bytes.starts_with(b"<")
+ && (get_html_end_tag(&bytes[1..]).is_some() || starts_html_block_type_6(&bytes[1..]))
+}
+
+/// Assumes `text_bytes` is preceded by `<`.
+fn get_html_end_tag(text_bytes: &[u8]) -> Option<&'static str> {
+ static BEGIN_TAGS: &[&[u8]; 4] = &[b"pre", b"style", b"script", b"textarea"];
+ static ST_BEGIN_TAGS: &[&[u8]; 3] = &[b"!--", b"?", b"![CDATA["];
+
+ for (beg_tag, end_tag) in BEGIN_TAGS
+ .iter()
+ .zip(["</pre>", "</style>", "</script>", "</textarea>"].iter())
+ {
+ let tag_len = beg_tag.len();
+
+ if text_bytes.len() < tag_len {
+ // begin tags are increasing in size
+ break;
+ }
+
+ if !text_bytes[..tag_len].eq_ignore_ascii_case(beg_tag) {
+ continue;
+ }
+
+ // Must either be the end of the line...
+ if text_bytes.len() == tag_len {
+ return Some(end_tag);
+ }
+
+ // ...or be followed by whitespace, newline, or '>'.
+ let s = text_bytes[tag_len];
+ if is_ascii_whitespace(s) || s == b'>' {
+ return Some(end_tag);
+ }
+ }
+
+ for (beg_tag, end_tag) in ST_BEGIN_TAGS.iter().zip(["-->", "?>", "]]>"].iter()) {
+ if text_bytes.starts_with(beg_tag) {
+ return Some(end_tag);
+ }
+ }
+
+ if text_bytes.len() > 1
+ && text_bytes[0] == b'!'
+ && text_bytes[1] >= b'A'
+ && text_bytes[1] <= b'Z'
+ {
+ Some(">")
+ } else {
+ None
+ }
+}
+
+// https://english.stackexchange.com/a/285573
+fn surgerize_tight_list(tree: &mut Tree<Item>, list_ix: TreeIndex) {
+ let mut list_item = tree[list_ix].child;
+ while let Some(listitem_ix) = list_item {
+ // first child is special, controls how we repoint list_item.child
+ let list_item_firstborn = tree[listitem_ix].child;
+
+ // Check that list item has children - this is not necessarily the case!
+ if let Some(firstborn_ix) = list_item_firstborn {
+ if let ItemBody::Paragraph = tree[firstborn_ix].item.body {
+ tree[listitem_ix].child = tree[firstborn_ix].child;
+ }
+
+ let mut list_item_child = Some(firstborn_ix);
+ let mut node_to_repoint = None;
+ while let Some(child_ix) = list_item_child {
+ // surgerize paragraphs
+ let repoint_ix = if let ItemBody::Paragraph = tree[child_ix].item.body {
+ if let Some(child_firstborn) = tree[child_ix].child {
+ if let Some(repoint_ix) = node_to_repoint {
+ tree[repoint_ix].next = Some(child_firstborn);
+ }
+ let mut child_lastborn = child_firstborn;
+ while let Some(lastborn_next_ix) = tree[child_lastborn].next {
+ child_lastborn = lastborn_next_ix;
+ }
+ child_lastborn
+ } else {
+ child_ix
+ }
+ } else {
+ child_ix
+ };
+
+ node_to_repoint = Some(repoint_ix);
+ tree[repoint_ix].next = tree[child_ix].next;
+ list_item_child = tree[child_ix].next;
+ }
+ }
+
+ list_item = tree[listitem_ix].next;
+ }
+}
+
+/// Determines whether the delimiter run starting at given index is
+/// left-flanking, as defined by the commonmark spec (and isn't intraword
+/// for _ delims).
+/// suffix is &s[ix..], which is passed in as an optimization, since taking
+/// a string subslice is O(n).
+fn delim_run_can_open(s: &str, suffix: &str, run_len: usize, ix: usize) -> bool {
+ let next_char = if let Some(c) = suffix.chars().nth(run_len) {
+ c
+ } else {
+ return false;
+ };
+ if next_char.is_whitespace() {
+ return false;
+ }
+ if ix == 0 {
+ return true;
+ }
+ let delim = suffix.chars().next().unwrap();
+ if delim == '*' && !is_punctuation(next_char) {
+ return true;
+ }
+
+ let prev_char = s[..ix].chars().last().unwrap();
+
+ prev_char.is_whitespace()
+ || is_punctuation(prev_char) && (delim != '\'' || ![']', ')'].contains(&prev_char))
+}
+
+/// Determines whether the delimiter run starting at given index is
+/// left-flanking, as defined by the commonmark spec (and isn't intraword
+/// for _ delims)
+fn delim_run_can_close(s: &str, suffix: &str, run_len: usize, ix: usize) -> bool {
+ if ix == 0 {
+ return false;
+ }
+ let prev_char = s[..ix].chars().last().unwrap();
+ if prev_char.is_whitespace() {
+ return false;
+ }
+ let next_char = if let Some(c) = suffix.chars().nth(run_len) {
+ c
+ } else {
+ return true;
+ };
+ let delim = suffix.chars().next().unwrap();
+ if delim == '*' && !is_punctuation(prev_char) {
+ return true;
+ }
+
+ next_char.is_whitespace() || is_punctuation(next_char)
+}
+
+fn create_lut(options: &Options) -> LookupTable {
+ #[cfg(all(target_arch = "x86_64", feature = "simd"))]
+ {
+ LookupTable {
+ simd: simd::compute_lookup(options),
+ scalar: special_bytes(options),
+ }
+ }
+ #[cfg(not(all(target_arch = "x86_64", feature = "simd")))]
+ {
+ special_bytes(options)
+ }
+}
+
+fn special_bytes(options: &Options) -> [bool; 256] {
+ let mut bytes = [false; 256];
+ let standard_bytes = [
+ b'\n', b'\r', b'*', b'_', b'&', b'\\', b'[', b']', b'<', b'!', b'`',
+ ];
+
+ for &byte in &standard_bytes {
+ bytes[byte as usize] = true;
+ }
+ if options.contains(Options::ENABLE_TABLES) {
+ bytes[b'|' as usize] = true;
+ }
+ if options.contains(Options::ENABLE_STRIKETHROUGH) {
+ bytes[b'~' as usize] = true;
+ }
+ if options.contains(Options::ENABLE_SMART_PUNCTUATION) {
+ for &byte in &[b'.', b'-', b'"', b'\''] {
+ bytes[byte as usize] = true;
+ }
+ }
+
+ bytes
+}
+
+enum LoopInstruction<T> {
+ /// Continue looking for more special bytes, but skip next few bytes.
+ ContinueAndSkip(usize),
+ /// Break looping immediately, returning with the given index and value.
+ BreakAtWith(usize, T),
+}
+
+#[cfg(all(target_arch = "x86_64", feature = "simd"))]
+struct LookupTable {
+ simd: [u8; 16],
+ scalar: [bool; 256],
+}
+
+#[cfg(not(all(target_arch = "x86_64", feature = "simd")))]
+type LookupTable = [bool; 256];
+
+/// This function walks the byte slices from the given index and
+/// calls the callback function on all bytes (and their indices) that are in the following set:
+/// `` ` ``, `\`, `&`, `*`, `_`, `~`, `!`, `<`, `[`, `]`, `|`, `\r`, `\n`
+/// It is guaranteed not call the callback on other bytes.
+/// Whenever `callback(ix, byte)` returns a `ContinueAndSkip(n)` value, the callback
+/// will not be called with an index that is less than `ix + n + 1`.
+/// When the callback returns a `BreakAtWith(end_ix, opt+val)`, no more callbacks will be
+/// called and the function returns immediately with the return value `(end_ix, opt_val)`.
+/// If `BreakAtWith(..)` is never returned, this function will return the first
+/// index that is outside the byteslice bound and a `None` value.
+fn iterate_special_bytes<F, T>(
+ lut: &LookupTable,
+ bytes: &[u8],
+ ix: usize,
+ callback: F,
+) -> (usize, Option<T>)
+where
+ F: FnMut(usize, u8) -> LoopInstruction<Option<T>>,
+{
+ #[cfg(all(target_arch = "x86_64", feature = "simd"))]
+ {
+ simd::iterate_special_bytes(lut, bytes, ix, callback)
+ }
+ #[cfg(not(all(target_arch = "x86_64", feature = "simd")))]
+ {
+ scalar_iterate_special_bytes(lut, bytes, ix, callback)
+ }
+}
+
+fn scalar_iterate_special_bytes<F, T>(
+ lut: &[bool; 256],
+ bytes: &[u8],
+ mut ix: usize,
+ mut callback: F,
+) -> (usize, Option<T>)
+where
+ F: FnMut(usize, u8) -> LoopInstruction<Option<T>>,
+{
+ while ix < bytes.len() {
+ let b = bytes[ix];
+ if lut[b as usize] {
+ match callback(ix, b) {
+ LoopInstruction::ContinueAndSkip(skip) => {
+ ix += skip;
+ }
+ LoopInstruction::BreakAtWith(ix, val) => {
+ return (ix, val);
+ }
+ }
+ }
+ ix += 1;
+ }
+
+ (ix, None)
+}
+
+/// Split the usual heading content range and the content inside the trailing attribute block.
+///
+/// Returns `(leading_content_len, Option<trailing_attr_block_range>)`.
+///
+/// Note that `trailing_attr_block_range` will be empty range when the block
+/// is `{}`, since the range is content inside the wrapping `{` and `}`.
+///
+/// The closing `}` of an attribute block can have trailing whitespaces.
+/// They are automatically trimmed when the attribute block is being searched.
+///
+/// However, this method does not trim the trailing whitespaces of heading content.
+/// It is callers' responsibility to trim them if necessary.
+fn extract_attribute_block_content_from_header_text(
+ heading: &[u8],
+) -> (usize, Option<Range<usize>>) {
+ let heading_len = heading.len();
+ let mut ix = heading_len;
+ ix -= scan_rev_while(heading, |b| {
+ b == b'\n' || b == b'\r' || b == b' ' || b == b'\t'
+ });
+ if ix == 0 {
+ return (heading_len, None);
+ }
+
+ let attr_block_close = ix - 1;
+ if heading.get(attr_block_close) != Some(&b'}') {
+ // The last character is not `}`. No attribute blocks found.
+ return (heading_len, None);
+ }
+ // move cursor before the closing right brace (`}`)
+ ix -= 1;
+
+ ix -= scan_rev_while(&heading[..ix], |b| {
+ // Characters to be excluded:
+ // * `{` and `}`: special characters to open and close an attribute block.
+ // * `\\`: a special character to escape many characters and disable some syntaxes.
+ // + Handling of this escape character differs among markdown processors.
+ // + Escaped characters will be separate text node from neighbors, so
+ // it is not easy to handle unescaped string and trim the trailing block.
+ // * `<` and `>`: special characters to start and end HTML tag.
+ // + No known processors converts `{#<i>foo</i>}` into
+ // `id="&lt;i&gt;foo&lt;/&gt;"` as of this writing, so hopefully
+ // this restriction won't cause compatibility issues.
+ // * `\n` and `\r`: a newline character.
+ // + Setext heading can have multiple lines. However it is hard to support
+ // attribute blocks that have newline inside, since the parsing proceeds line by
+ // line and lines will be separate nodes even they are logically a single text.
+ !matches!(b, b'{' | b'}' | b'<' | b'>' | b'\\' | b'\n' | b'\r')
+ });
+ if ix == 0 {
+ // `{` is not found. No attribute blocks available.
+ return (heading_len, None);
+ }
+ let attr_block_open = ix - 1;
+ if heading[attr_block_open] != b'{' {
+ // `{` is not found. No attribute blocks available.
+ return (heading_len, None);
+ }
+
+ (attr_block_open, Some(ix..attr_block_close))
+}
+
+/// Parses an attribute block content, such as `.class1 #id .class2`.
+///
+/// Returns `(id, classes)`.
+///
+/// It is callers' responsibility to find opening and closing characters of the attribute
+/// block. Usually [`extract_attribute_block_content_from_header_text`] function does it for you.
+///
+/// Note that this parsing requires explicit whitespace separators between
+/// attributes. This is intentional design with the reasons below:
+///
+/// * to keep conversion simple and easy to understand for any possible input,
+/// * to avoid adding less obvious conversion rule that can reduce compatibility
+/// with other implementations more, and
+/// * to follow the major design of implementations with the support for the
+/// attribute blocks extension (as of this writing).
+///
+/// See also: [`Options::ENABLE_HEADING_ATTRIBUTES`].
+///
+/// [`Options::ENABLE_HEADING_ATTRIBUTES`]: `crate::Options::ENABLE_HEADING_ATTRIBUTES`
+fn parse_inside_attribute_block(inside_attr_block: &str) -> Option<HeadingAttributes> {
+ let mut id = None;
+ let mut classes = Vec::new();
+
+ for attr in inside_attr_block.split_ascii_whitespace() {
+ // iterator returned by `str::split_ascii_whitespace` never emits empty
+ // strings, so taking first byte won't panic.
+ if attr.len() > 1 {
+ let first_byte = attr.as_bytes()[0];
+ if first_byte == b'#' {
+ id = Some(&attr[1..]);
+ } else if first_byte == b'.' {
+ classes.push(&attr[1..]);
+ }
+ }
+ }
+
+ Some(HeadingAttributes { id, classes })
+}
+
+#[cfg(all(target_arch = "x86_64", feature = "simd"))]
+mod simd {
+ //! SIMD byte scanning logic.
+ //!
+ //! This module provides functions that allow walking through byteslices, calling
+ //! provided callback functions on special bytes and their indices using SIMD.
+ //! The byteset is defined in `compute_lookup`.
+ //!
+ //! The idea is to load in a chunk of 16 bytes and perform a lookup into a set of
+ //! bytes on all the bytes in this chunk simultaneously. We produce a 16 bit bitmask
+ //! from this and call the callback on every index corresponding to a 1 in this mask
+ //! before moving on to the next chunk. This allows us to move quickly when there
+ //! are no or few matches.
+ //!
+ //! The table lookup is inspired by this [great overview]. However, since all of the
+ //! bytes we're interested in are ASCII, we don't quite need the full generality of
+ //! the universal algorithm and are hence able to skip a few instructions.
+ //!
+ //! [great overview]: http://0x80.pl/articles/simd-byte-lookup.html
+
+ use super::{LookupTable, LoopInstruction};
+ use crate::Options;
+ use core::arch::x86_64::*;
+
+ const VECTOR_SIZE: usize = std::mem::size_of::<__m128i>();
+
+ /// Generates a lookup table containing the bitmaps for our
+ /// special marker bytes. This is effectively a 128 element 2d bitvector,
+ /// that can be indexed by a four bit row index (the lower nibble)
+ /// and a three bit column index (upper nibble).
+ pub(super) fn compute_lookup(options: &Options) -> [u8; 16] {
+ let mut lookup = [0u8; 16];
+ let standard_bytes = [
+ b'\n', b'\r', b'*', b'_', b'&', b'\\', b'[', b']', b'<', b'!', b'`',
+ ];
+
+ for &byte in &standard_bytes {
+ add_lookup_byte(&mut lookup, byte);
+ }
+ if options.contains(Options::ENABLE_TABLES) {
+ add_lookup_byte(&mut lookup, b'|');
+ }
+ if options.contains(Options::ENABLE_STRIKETHROUGH) {
+ add_lookup_byte(&mut lookup, b'~');
+ }
+ if options.contains(Options::ENABLE_SMART_PUNCTUATION) {
+ for &byte in &[b'.', b'-', b'"', b'\''] {
+ add_lookup_byte(&mut lookup, byte);
+ }
+ }
+
+ lookup
+ }
+
+ fn add_lookup_byte(lookup: &mut [u8; 16], byte: u8) {
+ lookup[(byte & 0x0f) as usize] |= 1 << (byte >> 4);
+ }
+
+ /// Computes a bit mask for the given byteslice starting from the given index,
+ /// where the 16 least significant bits indicate (by value of 1) whether or not
+ /// there is a special character at that byte position. The least significant bit
+ /// corresponds to `bytes[ix]` and the most significant bit corresponds to
+ /// `bytes[ix + 15]`.
+ /// It is only safe to call this function when `bytes.len() >= ix + VECTOR_SIZE`.
+ #[target_feature(enable = "ssse3")]
+ #[inline]
+ unsafe fn compute_mask(lut: &[u8; 16], bytes: &[u8], ix: usize) -> i32 {
+ debug_assert!(bytes.len() >= ix + VECTOR_SIZE);
+
+ let bitmap = _mm_loadu_si128(lut.as_ptr() as *const __m128i);
+ // Small lookup table to compute single bit bitshifts
+ // for 16 bytes at once.
+ let bitmask_lookup =
+ _mm_setr_epi8(1, 2, 4, 8, 16, 32, 64, -128, -1, -1, -1, -1, -1, -1, -1, -1);
+
+ // Load input from memory.
+ let raw_ptr = bytes.as_ptr().add(ix) as *const __m128i;
+ let input = _mm_loadu_si128(raw_ptr);
+ // Compute the bitmap using the bottom nibble as an index
+ // into the lookup table. Note that non-ascii bytes will have
+ // their most significant bit set and will map to lookup[0].
+ let bitset = _mm_shuffle_epi8(bitmap, input);
+ // Compute the high nibbles of the input using a 16-bit rightshift of four
+ // and a mask to prevent most-significant bit issues.
+ let higher_nibbles = _mm_and_si128(_mm_srli_epi16(input, 4), _mm_set1_epi8(0x0f));
+ // Create a bitmask for the bitmap by perform a left shift of the value
+ // of the higher nibble. Bytes with their most significant set are mapped
+ // to -1 (all ones).
+ let bitmask = _mm_shuffle_epi8(bitmask_lookup, higher_nibbles);
+ // Test the bit of the bitmap by AND'ing the bitmap and the mask together.
+ let tmp = _mm_and_si128(bitset, bitmask);
+ // Check whether the result was not null. NEQ is not a SIMD intrinsic,
+ // but comparing to the bitmask is logically equivalent. This also prevents us
+ // from matching any non-ASCII bytes since none of the bitmaps were all ones
+ // (-1).
+ let result = _mm_cmpeq_epi8(tmp, bitmask);
+
+ // Return the resulting bitmask.
+ _mm_movemask_epi8(result)
+ }
+
+ /// Calls callback on byte indices and their value.
+ /// Breaks when callback returns LoopInstruction::BreakAtWith(ix, val). And skips the
+ /// number of bytes in callback return value otherwise.
+ /// Returns the final index and a possible break value.
+ pub(super) fn iterate_special_bytes<F, T>(
+ lut: &LookupTable,
+ bytes: &[u8],
+ ix: usize,
+ callback: F,
+ ) -> (usize, Option<T>)
+ where
+ F: FnMut(usize, u8) -> LoopInstruction<Option<T>>,
+ {
+ if is_x86_feature_detected!("ssse3") && bytes.len() >= VECTOR_SIZE {
+ unsafe { simd_iterate_special_bytes(&lut.simd, bytes, ix, callback) }
+ } else {
+ super::scalar_iterate_special_bytes(&lut.scalar, bytes, ix, callback)
+ }
+ }
+
+ /// Calls the callback function for every 1 in the given bitmask with
+ /// the index `offset + ix`, where `ix` is the position of the 1 in the mask.
+ /// Returns `Ok(ix)` to continue from index `ix`, `Err((end_ix, opt_val)` to break with
+ /// final index `end_ix` and optional value `opt_val`.
+ unsafe fn process_mask<F, T>(
+ mut mask: i32,
+ bytes: &[u8],
+ mut offset: usize,
+ callback: &mut F,
+ ) -> Result<usize, (usize, Option<T>)>
+ where
+ F: FnMut(usize, u8) -> LoopInstruction<Option<T>>,
+ {
+ while mask != 0 {
+ let mask_ix = mask.trailing_zeros() as usize;
+ offset += mask_ix;
+ match callback(offset, *bytes.get_unchecked(offset)) {
+ LoopInstruction::ContinueAndSkip(skip) => {
+ offset += skip + 1;
+ mask >>= skip + 1 + mask_ix;
+ }
+ LoopInstruction::BreakAtWith(ix, val) => return Err((ix, val)),
+ }
+ }
+ Ok(offset)
+ }
+
+ #[target_feature(enable = "ssse3")]
+ /// Important: only call this function when `bytes.len() >= 16`. Doing
+ /// so otherwise may exhibit undefined behaviour.
+ unsafe fn simd_iterate_special_bytes<F, T>(
+ lut: &[u8; 16],
+ bytes: &[u8],
+ mut ix: usize,
+ mut callback: F,
+ ) -> (usize, Option<T>)
+ where
+ F: FnMut(usize, u8) -> LoopInstruction<Option<T>>,
+ {
+ debug_assert!(bytes.len() >= VECTOR_SIZE);
+ let upperbound = bytes.len() - VECTOR_SIZE;
+
+ while ix < upperbound {
+ let mask = compute_mask(lut, bytes, ix);
+ let block_start = ix;
+ ix = match process_mask(mask, bytes, ix, &mut callback) {
+ Ok(ix) => std::cmp::max(ix, VECTOR_SIZE + block_start),
+ Err((end_ix, val)) => return (end_ix, val),
+ };
+ }
+
+ if bytes.len() > ix {
+ // shift off the bytes at start we have already scanned
+ let mask = compute_mask(lut, bytes, upperbound) >> ix - upperbound;
+ if let Err((end_ix, val)) = process_mask(mask, bytes, ix, &mut callback) {
+ return (end_ix, val);
+ }
+ }
+
+ (bytes.len(), None)
+ }
+
+ #[cfg(test)]
+ mod simd_test {
+ use super::super::create_lut;
+ use super::{iterate_special_bytes, LoopInstruction};
+ use crate::Options;
+
+ fn check_expected_indices(bytes: &[u8], expected: &[usize], skip: usize) {
+ let mut opts = Options::empty();
+ opts.insert(Options::ENABLE_TABLES);
+ opts.insert(Options::ENABLE_FOOTNOTES);
+ opts.insert(Options::ENABLE_STRIKETHROUGH);
+ opts.insert(Options::ENABLE_TASKLISTS);
+
+ let lut = create_lut(&opts);
+ let mut indices = vec![];
+
+ iterate_special_bytes::<_, i32>(&lut, bytes, 0, |ix, _byte_ty| {
+ indices.push(ix);
+ LoopInstruction::ContinueAndSkip(skip)
+ });
+
+ assert_eq!(&indices[..], expected);
+ }
+
+ #[test]
+ fn simple_no_match() {
+ check_expected_indices("abcdef0123456789".as_bytes(), &[], 0);
+ }
+
+ #[test]
+ fn simple_match() {
+ check_expected_indices("*bcd&f0123456789".as_bytes(), &[0, 4], 0);
+ }
+
+ #[test]
+ fn single_open_fish() {
+ check_expected_indices("<".as_bytes(), &[0], 0);
+ }
+
+ #[test]
+ fn long_match() {
+ check_expected_indices("0123456789abcde~*bcd&f0".as_bytes(), &[15, 16, 20], 0);
+ }
+
+ #[test]
+ fn border_skip() {
+ check_expected_indices("0123456789abcde~~~~d&f0".as_bytes(), &[15, 20], 3);
+ }
+
+ #[test]
+ fn exhaustive_search() {
+ let chars = [
+ b'\n', b'\r', b'*', b'_', b'~', b'|', b'&', b'\\', b'[', b']', b'<', b'!', b'`',
+ ];
+
+ for &c in &chars {
+ for i in 0u8..=255 {
+ if !chars.contains(&i) {
+ // full match
+ let mut buf = [i; 18];
+ buf[3] = c;
+ buf[6] = c;
+
+ check_expected_indices(&buf[..], &[3, 6], 0);
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/vendor/pulldown-cmark/src/html.rs b/vendor/pulldown-cmark/src/html.rs
new file mode 100644
index 000000000..fcfd51740
--- /dev/null
+++ b/vendor/pulldown-cmark/src/html.rs
@@ -0,0 +1,478 @@
+// Copyright 2015 Google Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+//! HTML renderer that takes an iterator of events as input.
+
+use std::collections::HashMap;
+use std::io::{self, Write};
+
+use crate::escape::{escape_href, escape_html, StrWrite, WriteWrapper};
+use crate::strings::CowStr;
+use crate::Event::*;
+use crate::{Alignment, CodeBlockKind, Event, LinkType, Tag};
+
+enum TableState {
+ Head,
+ Body,
+}
+
+struct HtmlWriter<'a, I, W> {
+ /// Iterator supplying events.
+ iter: I,
+
+ /// Writer to write to.
+ writer: W,
+
+ /// Whether or not the last write wrote a newline.
+ end_newline: bool,
+
+ table_state: TableState,
+ table_alignments: Vec<Alignment>,
+ table_cell_index: usize,
+ numbers: HashMap<CowStr<'a>, usize>,
+}
+
+impl<'a, I, W> HtmlWriter<'a, I, W>
+where
+ I: Iterator<Item = Event<'a>>,
+ W: StrWrite,
+{
+ fn new(iter: I, writer: W) -> Self {
+ Self {
+ iter,
+ writer,
+ end_newline: true,
+ table_state: TableState::Head,
+ table_alignments: vec![],
+ table_cell_index: 0,
+ numbers: HashMap::new(),
+ }
+ }
+
+ /// Writes a new line.
+ fn write_newline(&mut self) -> io::Result<()> {
+ self.end_newline = true;
+ self.writer.write_str("\n")
+ }
+
+ /// Writes a buffer, and tracks whether or not a newline was written.
+ #[inline]
+ fn write(&mut self, s: &str) -> io::Result<()> {
+ self.writer.write_str(s)?;
+
+ if !s.is_empty() {
+ self.end_newline = s.ends_with('\n');
+ }
+ Ok(())
+ }
+
+ fn run(mut self) -> io::Result<()> {
+ while let Some(event) = self.iter.next() {
+ match event {
+ Start(tag) => {
+ self.start_tag(tag)?;
+ }
+ End(tag) => {
+ self.end_tag(tag)?;
+ }
+ Text(text) => {
+ escape_html(&mut self.writer, &text)?;
+ self.end_newline = text.ends_with('\n');
+ }
+ Code(text) => {
+ self.write("<code>")?;
+ escape_html(&mut self.writer, &text)?;
+ self.write("</code>")?;
+ }
+ Html(html) => {
+ self.write(&html)?;
+ }
+ SoftBreak => {
+ self.write_newline()?;
+ }
+ HardBreak => {
+ self.write("<br />\n")?;
+ }
+ Rule => {
+ if self.end_newline {
+ self.write("<hr />\n")?;
+ } else {
+ self.write("\n<hr />\n")?;
+ }
+ }
+ FootnoteReference(name) => {
+ let len = self.numbers.len() + 1;
+ self.write("<sup class=\"footnote-reference\"><a href=\"#")?;
+ escape_html(&mut self.writer, &name)?;
+ self.write("\">")?;
+ let number = *self.numbers.entry(name).or_insert(len);
+ write!(&mut self.writer, "{}", number)?;
+ self.write("</a></sup>")?;
+ }
+ TaskListMarker(true) => {
+ self.write("<input disabled=\"\" type=\"checkbox\" checked=\"\"/>\n")?;
+ }
+ TaskListMarker(false) => {
+ self.write("<input disabled=\"\" type=\"checkbox\"/>\n")?;
+ }
+ }
+ }
+ Ok(())
+ }
+
+ /// Writes the start of an HTML tag.
+ fn start_tag(&mut self, tag: Tag<'a>) -> io::Result<()> {
+ match tag {
+ Tag::Paragraph => {
+ if self.end_newline {
+ self.write("<p>")
+ } else {
+ self.write("\n<p>")
+ }
+ }
+ Tag::Heading(level, id, classes) => {
+ if self.end_newline {
+ self.end_newline = false;
+ self.write("<")?;
+ } else {
+ self.write("\n<")?;
+ }
+ write!(&mut self.writer, "{}", level)?;
+ if let Some(id) = id {
+ self.write(" id=\"")?;
+ escape_html(&mut self.writer, id)?;
+ self.write("\"")?;
+ }
+ let mut classes = classes.iter();
+ if let Some(class) = classes.next() {
+ self.write(" class=\"")?;
+ escape_html(&mut self.writer, class)?;
+ for class in classes {
+ self.write(" ")?;
+ escape_html(&mut self.writer, class)?;
+ }
+ self.write("\"")?;
+ }
+ self.write(">")
+ }
+ Tag::Table(alignments) => {
+ self.table_alignments = alignments;
+ self.write("<table>")
+ }
+ Tag::TableHead => {
+ self.table_state = TableState::Head;
+ self.table_cell_index = 0;
+ self.write("<thead><tr>")
+ }
+ Tag::TableRow => {
+ self.table_cell_index = 0;
+ self.write("<tr>")
+ }
+ Tag::TableCell => {
+ match self.table_state {
+ TableState::Head => {
+ self.write("<th")?;
+ }
+ TableState::Body => {
+ self.write("<td")?;
+ }
+ }
+ match self.table_alignments.get(self.table_cell_index) {
+ Some(&Alignment::Left) => self.write(" style=\"text-align: left\">"),
+ Some(&Alignment::Center) => self.write(" style=\"text-align: center\">"),
+ Some(&Alignment::Right) => self.write(" style=\"text-align: right\">"),
+ _ => self.write(">"),
+ }
+ }
+ Tag::BlockQuote => {
+ if self.end_newline {
+ self.write("<blockquote>\n")
+ } else {
+ self.write("\n<blockquote>\n")
+ }
+ }
+ Tag::CodeBlock(info) => {
+ if !self.end_newline {
+ self.write_newline()?;
+ }
+ match info {
+ CodeBlockKind::Fenced(info) => {
+ let lang = info.split(' ').next().unwrap();
+ if lang.is_empty() {
+ self.write("<pre><code>")
+ } else {
+ self.write("<pre><code class=\"language-")?;
+ escape_html(&mut self.writer, lang)?;
+ self.write("\">")
+ }
+ }
+ CodeBlockKind::Indented => self.write("<pre><code>"),
+ }
+ }
+ Tag::List(Some(1)) => {
+ if self.end_newline {
+ self.write("<ol>\n")
+ } else {
+ self.write("\n<ol>\n")
+ }
+ }
+ Tag::List(Some(start)) => {
+ if self.end_newline {
+ self.write("<ol start=\"")?;
+ } else {
+ self.write("\n<ol start=\"")?;
+ }
+ write!(&mut self.writer, "{}", start)?;
+ self.write("\">\n")
+ }
+ Tag::List(None) => {
+ if self.end_newline {
+ self.write("<ul>\n")
+ } else {
+ self.write("\n<ul>\n")
+ }
+ }
+ Tag::Item => {
+ if self.end_newline {
+ self.write("<li>")
+ } else {
+ self.write("\n<li>")
+ }
+ }
+ Tag::Emphasis => self.write("<em>"),
+ Tag::Strong => self.write("<strong>"),
+ Tag::Strikethrough => self.write("<del>"),
+ Tag::Link(LinkType::Email, dest, title) => {
+ self.write("<a href=\"mailto:")?;
+ escape_href(&mut self.writer, &dest)?;
+ if !title.is_empty() {
+ self.write("\" title=\"")?;
+ escape_html(&mut self.writer, &title)?;
+ }
+ self.write("\">")
+ }
+ Tag::Link(_link_type, dest, title) => {
+ self.write("<a href=\"")?;
+ escape_href(&mut self.writer, &dest)?;
+ if !title.is_empty() {
+ self.write("\" title=\"")?;
+ escape_html(&mut self.writer, &title)?;
+ }
+ self.write("\">")
+ }
+ Tag::Image(_link_type, dest, title) => {
+ self.write("<img src=\"")?;
+ escape_href(&mut self.writer, &dest)?;
+ self.write("\" alt=\"")?;
+ self.raw_text()?;
+ if !title.is_empty() {
+ self.write("\" title=\"")?;
+ escape_html(&mut self.writer, &title)?;
+ }
+ self.write("\" />")
+ }
+ Tag::FootnoteDefinition(name) => {
+ if self.end_newline {
+ self.write("<div class=\"footnote-definition\" id=\"")?;
+ } else {
+ self.write("\n<div class=\"footnote-definition\" id=\"")?;
+ }
+ escape_html(&mut self.writer, &*name)?;
+ self.write("\"><sup class=\"footnote-definition-label\">")?;
+ let len = self.numbers.len() + 1;
+ let number = *self.numbers.entry(name).or_insert(len);
+ write!(&mut self.writer, "{}", number)?;
+ self.write("</sup>")
+ }
+ }
+ }
+
+ fn end_tag(&mut self, tag: Tag) -> io::Result<()> {
+ match tag {
+ Tag::Paragraph => {
+ self.write("</p>\n")?;
+ }
+ Tag::Heading(level, _id, _classes) => {
+ self.write("</")?;
+ write!(&mut self.writer, "{}", level)?;
+ self.write(">\n")?;
+ }
+ Tag::Table(_) => {
+ self.write("</tbody></table>\n")?;
+ }
+ Tag::TableHead => {
+ self.write("</tr></thead><tbody>\n")?;
+ self.table_state = TableState::Body;
+ }
+ Tag::TableRow => {
+ self.write("</tr>\n")?;
+ }
+ Tag::TableCell => {
+ match self.table_state {
+ TableState::Head => {
+ self.write("</th>")?;
+ }
+ TableState::Body => {
+ self.write("</td>")?;
+ }
+ }
+ self.table_cell_index += 1;
+ }
+ Tag::BlockQuote => {
+ self.write("</blockquote>\n")?;
+ }
+ Tag::CodeBlock(_) => {
+ self.write("</code></pre>\n")?;
+ }
+ Tag::List(Some(_)) => {
+ self.write("</ol>\n")?;
+ }
+ Tag::List(None) => {
+ self.write("</ul>\n")?;
+ }
+ Tag::Item => {
+ self.write("</li>\n")?;
+ }
+ Tag::Emphasis => {
+ self.write("</em>")?;
+ }
+ Tag::Strong => {
+ self.write("</strong>")?;
+ }
+ Tag::Strikethrough => {
+ self.write("</del>")?;
+ }
+ Tag::Link(_, _, _) => {
+ self.write("</a>")?;
+ }
+ Tag::Image(_, _, _) => (), // shouldn't happen, handled in start
+ Tag::FootnoteDefinition(_) => {
+ self.write("</div>\n")?;
+ }
+ }
+ Ok(())
+ }
+
+ // run raw text, consuming end tag
+ fn raw_text(&mut self) -> io::Result<()> {
+ let mut nest = 0;
+ while let Some(event) = self.iter.next() {
+ match event {
+ Start(_) => nest += 1,
+ End(_) => {
+ if nest == 0 {
+ break;
+ }
+ nest -= 1;
+ }
+ Html(text) | Code(text) | Text(text) => {
+ escape_html(&mut self.writer, &text)?;
+ self.end_newline = text.ends_with('\n');
+ }
+ SoftBreak | HardBreak | Rule => {
+ self.write(" ")?;
+ }
+ FootnoteReference(name) => {
+ let len = self.numbers.len() + 1;
+ let number = *self.numbers.entry(name).or_insert(len);
+ write!(&mut self.writer, "[{}]", number)?;
+ }
+ TaskListMarker(true) => self.write("[x]")?,
+ TaskListMarker(false) => self.write("[ ]")?,
+ }
+ }
+ Ok(())
+ }
+}
+
+/// Iterate over an `Iterator` of `Event`s, generate HTML for each `Event`, and
+/// push it to a `String`.
+///
+/// # Examples
+///
+/// ```
+/// use pulldown_cmark::{html, Parser};
+///
+/// let markdown_str = r#"
+/// hello
+/// =====
+///
+/// * alpha
+/// * beta
+/// "#;
+/// let parser = Parser::new(markdown_str);
+///
+/// let mut html_buf = String::new();
+/// html::push_html(&mut html_buf, parser);
+///
+/// assert_eq!(html_buf, r#"<h1>hello</h1>
+/// <ul>
+/// <li>alpha</li>
+/// <li>beta</li>
+/// </ul>
+/// "#);
+/// ```
+pub fn push_html<'a, I>(s: &mut String, iter: I)
+where
+ I: Iterator<Item = Event<'a>>,
+{
+ HtmlWriter::new(iter, s).run().unwrap();
+}
+
+/// Iterate over an `Iterator` of `Event`s, generate HTML for each `Event`, and
+/// write it out to a writable stream.
+///
+/// **Note**: using this function with an unbuffered writer like a file or socket
+/// will result in poor performance. Wrap these in a
+/// [`BufWriter`](https://doc.rust-lang.org/std/io/struct.BufWriter.html) to
+/// prevent unnecessary slowdowns.
+///
+/// # Examples
+///
+/// ```
+/// use pulldown_cmark::{html, Parser};
+/// use std::io::Cursor;
+///
+/// let markdown_str = r#"
+/// hello
+/// =====
+///
+/// * alpha
+/// * beta
+/// "#;
+/// let mut bytes = Vec::new();
+/// let parser = Parser::new(markdown_str);
+///
+/// html::write_html(Cursor::new(&mut bytes), parser);
+///
+/// assert_eq!(&String::from_utf8_lossy(&bytes)[..], r#"<h1>hello</h1>
+/// <ul>
+/// <li>alpha</li>
+/// <li>beta</li>
+/// </ul>
+/// "#);
+/// ```
+pub fn write_html<'a, I, W>(writer: W, iter: I) -> io::Result<()>
+where
+ I: Iterator<Item = Event<'a>>,
+ W: Write,
+{
+ HtmlWriter::new(iter, WriteWrapper(writer)).run()
+}
diff --git a/vendor/pulldown-cmark/src/lib.rs b/vendor/pulldown-cmark/src/lib.rs
new file mode 100644
index 000000000..9d2386e5a
--- /dev/null
+++ b/vendor/pulldown-cmark/src/lib.rs
@@ -0,0 +1,289 @@
+// Copyright 2015 Google Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+//! Pull parser for [CommonMark](https://commonmark.org). This crate provides a [Parser](struct.Parser.html) struct
+//! which is an iterator over [Event](enum.Event.html)s. This iterator can be used
+//! directly, or to output HTML using the [HTML module](html/index.html).
+//!
+//! By default, only CommonMark features are enabled. To use extensions like tables,
+//! footnotes or task lists, enable them by setting the corresponding flags in the
+//! [Options](struct.Options.html) struct.
+//!
+//! # Example
+//! ```rust
+//! use pulldown_cmark::{Parser, Options, html};
+//!
+//! let markdown_input = "Hello world, this is a ~~complicated~~ *very simple* example.";
+//!
+//! // Set up options and parser. Strikethroughs are not part of the CommonMark standard
+//! // and we therefore must enable it explicitly.
+//! let mut options = Options::empty();
+//! options.insert(Options::ENABLE_STRIKETHROUGH);
+//! let parser = Parser::new_ext(markdown_input, options);
+//!
+//! // Write to String buffer.
+//! let mut html_output = String::new();
+//! html::push_html(&mut html_output, parser);
+//!
+//! // Check that the output is what we expected.
+//! let expected_html = "<p>Hello world, this is a <del>complicated</del> <em>very simple</em> example.</p>\n";
+//! assert_eq!(expected_html, &html_output);
+//! ```
+
+// When compiled for the rustc compiler itself we want to make sure that this is
+// an unstable crate.
+#![cfg_attr(rustbuild, feature(staged_api, rustc_private))]
+#![cfg_attr(rustbuild, unstable(feature = "rustc_private", issue = "27812"))]
+// Forbid unsafe code unless the SIMD feature is enabled.
+#![cfg_attr(not(feature = "simd"), forbid(unsafe_code))]
+
+#[cfg(feature = "serde")]
+use serde::{Deserialize, Serialize};
+
+pub mod html;
+
+mod entities;
+pub mod escape;
+mod firstpass;
+mod linklabel;
+mod parse;
+mod puncttable;
+mod scanners;
+mod strings;
+mod tree;
+
+use std::{convert::TryFrom, fmt::Display};
+
+pub use crate::parse::{BrokenLink, BrokenLinkCallback, LinkDef, OffsetIter, Parser, RefDefs};
+pub use crate::strings::{CowStr, InlineStr};
+
+/// Codeblock kind.
+#[derive(Clone, Debug, PartialEq)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+pub enum CodeBlockKind<'a> {
+ Indented,
+ /// The value contained in the tag describes the language of the code, which may be empty.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ Fenced(CowStr<'a>),
+}
+
+impl<'a> CodeBlockKind<'a> {
+ pub fn is_indented(&self) -> bool {
+ matches!(*self, CodeBlockKind::Indented)
+ }
+
+ pub fn is_fenced(&self) -> bool {
+ matches!(*self, CodeBlockKind::Fenced(_))
+ }
+}
+
+/// Tags for elements that can contain other elements.
+#[derive(Clone, Debug, PartialEq)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+pub enum Tag<'a> {
+ /// A paragraph of text and other inline elements.
+ Paragraph,
+
+ /// A heading. The first field indicates the level of the heading,
+ /// the second the fragment identifier, and the third the classes.
+ Heading(HeadingLevel, Option<&'a str>, Vec<&'a str>),
+
+ BlockQuote,
+ /// A code block.
+ CodeBlock(CodeBlockKind<'a>),
+
+ /// A list. If the list is ordered the field indicates the number of the first item.
+ /// Contains only list items.
+ List(Option<u64>), // TODO: add delim and tight for ast (not needed for html)
+ /// A list item.
+ Item,
+ /// A footnote definition. The value contained is the footnote's label by which it can
+ /// be referred to.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ FootnoteDefinition(CowStr<'a>),
+
+ /// A table. Contains a vector describing the text-alignment for each of its columns.
+ Table(Vec<Alignment>),
+ /// A table header. Contains only `TableCell`s. Note that the table body starts immediately
+ /// after the closure of the `TableHead` tag. There is no `TableBody` tag.
+ TableHead,
+ /// A table row. Is used both for header rows as body rows. Contains only `TableCell`s.
+ TableRow,
+ TableCell,
+
+ // span-level tags
+ Emphasis,
+ Strong,
+ Strikethrough,
+
+ /// A link. The first field is the link type, the second the destination URL and the third is a title.
+ Link(LinkType, CowStr<'a>, CowStr<'a>),
+
+ /// An image. The first field is the link type, the second the destination URL and the third is a title.
+ Image(LinkType, CowStr<'a>, CowStr<'a>),
+}
+
+#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+pub enum HeadingLevel {
+ H1 = 1,
+ H2,
+ H3,
+ H4,
+ H5,
+ H6,
+}
+
+impl Display for HeadingLevel {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ match self {
+ Self::H1 => write!(f, "h1"),
+ Self::H2 => write!(f, "h2"),
+ Self::H3 => write!(f, "h3"),
+ Self::H4 => write!(f, "h4"),
+ Self::H5 => write!(f, "h5"),
+ Self::H6 => write!(f, "h6"),
+ }
+ }
+}
+
+/// Returned when trying to convert a `usize` into a `Heading` but it fails
+/// because the usize isn't a valid heading level
+#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
+pub struct InvalidHeadingLevel(usize);
+
+impl TryFrom<usize> for HeadingLevel {
+ type Error = InvalidHeadingLevel;
+
+ fn try_from(value: usize) -> Result<Self, Self::Error> {
+ match value {
+ 1 => Ok(Self::H1),
+ 2 => Ok(Self::H2),
+ 3 => Ok(Self::H3),
+ 4 => Ok(Self::H4),
+ 5 => Ok(Self::H5),
+ 6 => Ok(Self::H6),
+ _ => Err(InvalidHeadingLevel(value)),
+ }
+ }
+}
+
+/// Type specifier for inline links. See [the Tag::Link](enum.Tag.html#variant.Link) for more information.
+#[derive(Clone, Debug, PartialEq, Copy)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+pub enum LinkType {
+ /// Inline link like `[foo](bar)`
+ Inline,
+ /// Reference link like `[foo][bar]`
+ Reference,
+ /// Reference without destination in the document, but resolved by the broken_link_callback
+ ReferenceUnknown,
+ /// Collapsed link like `[foo][]`
+ Collapsed,
+ /// Collapsed link without destination in the document, but resolved by the broken_link_callback
+ CollapsedUnknown,
+ /// Shortcut link like `[foo]`
+ Shortcut,
+ /// Shortcut without destination in the document, but resolved by the broken_link_callback
+ ShortcutUnknown,
+ /// Autolink like `<http://foo.bar/baz>`
+ Autolink,
+ /// Email address in autolink like `<john@example.org>`
+ Email,
+}
+
+impl LinkType {
+ fn to_unknown(self) -> Self {
+ match self {
+ LinkType::Reference => LinkType::ReferenceUnknown,
+ LinkType::Collapsed => LinkType::CollapsedUnknown,
+ LinkType::Shortcut => LinkType::ShortcutUnknown,
+ _ => unreachable!(),
+ }
+ }
+}
+
+/// Markdown events that are generated in a preorder traversal of the document
+/// tree, with additional `End` events whenever all of an inner node's children
+/// have been visited.
+#[derive(Clone, Debug, PartialEq)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+pub enum Event<'a> {
+ /// Start of a tagged element. Events that are yielded after this event
+ /// and before its corresponding `End` event are inside this element.
+ /// Start and end events are guaranteed to be balanced.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ Start(Tag<'a>),
+ /// End of a tagged element.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ End(Tag<'a>),
+ /// A text node.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ Text(CowStr<'a>),
+ /// An inline code node.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ Code(CowStr<'a>),
+ /// An HTML node.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ Html(CowStr<'a>),
+ /// A reference to a footnote with given label, which may or may not be defined
+ /// by an event with a `Tag::FootnoteDefinition` tag. Definitions and references to them may
+ /// occur in any order.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ FootnoteReference(CowStr<'a>),
+ /// A soft line break.
+ SoftBreak,
+ /// A hard line break.
+ HardBreak,
+ /// A horizontal ruler.
+ Rule,
+ /// A task list marker, rendered as a checkbox in HTML. Contains a true when it is checked.
+ TaskListMarker(bool),
+}
+
+/// Table column text alignment.
+#[derive(Copy, Clone, Debug, PartialEq)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+
+pub enum Alignment {
+ /// Default text alignment.
+ None,
+ Left,
+ Center,
+ Right,
+}
+
+bitflags::bitflags! {
+ /// Option struct containing flags for enabling extra features
+ /// that are not part of the CommonMark spec.
+ pub struct Options: u32 {
+ const ENABLE_TABLES = 1 << 1;
+ const ENABLE_FOOTNOTES = 1 << 2;
+ const ENABLE_STRIKETHROUGH = 1 << 3;
+ const ENABLE_TASKLISTS = 1 << 4;
+ const ENABLE_SMART_PUNCTUATION = 1 << 5;
+ /// Extension to allow headings to have ID and classes.
+ ///
+ /// `# text { #id .class1 .class2 }` is interpreted as a level 1 heading
+ /// with the content `text`, ID `id`, and classes `class1` and `class2`.
+ /// Note that attributes (ID and classes) should be space-separated.
+ const ENABLE_HEADING_ATTRIBUTES = 1 << 6;
+ }
+}
diff --git a/vendor/pulldown-cmark/src/linklabel.rs b/vendor/pulldown-cmark/src/linklabel.rs
new file mode 100644
index 000000000..23b4b828b
--- /dev/null
+++ b/vendor/pulldown-cmark/src/linklabel.rs
@@ -0,0 +1,135 @@
+// Copyright 2018 Google LLC
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+//! Link label parsing and matching.
+
+use unicase::UniCase;
+
+use crate::scanners::{is_ascii_whitespace, scan_eol};
+use crate::strings::CowStr;
+
+pub(crate) enum ReferenceLabel<'a> {
+ Link(CowStr<'a>),
+ Footnote(CowStr<'a>),
+}
+
+pub(crate) type LinkLabel<'a> = UniCase<CowStr<'a>>;
+
+/// Assumes the opening bracket has already been scanned.
+/// The line break handler determines what happens when a linebreak
+/// is found. It is passed the bytes following the line break and
+/// either returns `Some(k)`, where `k` is the number of bytes to skip,
+/// or `None` to abort parsing the label.
+/// Returns the number of bytes read (including closing bracket) and label on success.
+pub(crate) fn scan_link_label_rest<'t>(
+ text: &'t str,
+ linebreak_handler: &dyn Fn(&[u8]) -> Option<usize>,
+) -> Option<(usize, CowStr<'t>)> {
+ let bytes = text.as_bytes();
+ let mut ix = 0;
+ let mut only_white_space = true;
+ let mut codepoints = 0;
+ // no worries, doesn't allocate until we push things onto it
+ let mut label = String::new();
+ let mut mark = 0;
+
+ loop {
+ if codepoints >= 1000 {
+ return None;
+ }
+ match *bytes.get(ix)? {
+ b'[' => return None,
+ b']' => break,
+ b'\\' => {
+ ix += 2;
+ codepoints += 2;
+ only_white_space = false;
+ }
+ b if is_ascii_whitespace(b) => {
+ // normalize labels by collapsing whitespaces, including linebreaks
+ let mut whitespaces = 0;
+ let mut linebreaks = 0;
+ let whitespace_start = ix;
+
+ while ix < bytes.len() && is_ascii_whitespace(bytes[ix]) {
+ if let Some(eol_bytes) = scan_eol(&bytes[ix..]) {
+ linebreaks += 1;
+ if linebreaks > 1 {
+ return None;
+ }
+ ix += eol_bytes;
+ ix += linebreak_handler(&bytes[ix..])?;
+ whitespaces += 2; // indicate that we need to replace
+ } else {
+ whitespaces += if bytes[ix] == b' ' { 1 } else { 2 };
+ ix += 1;
+ }
+ }
+ if whitespaces > 1 {
+ label.push_str(&text[mark..whitespace_start]);
+ label.push(' ');
+ mark = ix;
+ codepoints += ix - whitespace_start;
+ } else {
+ codepoints += 1;
+ }
+ }
+ b => {
+ only_white_space = false;
+ ix += 1;
+ if b & 0b1000_0000 != 0 {
+ codepoints += 1;
+ }
+ }
+ }
+ }
+
+ if only_white_space {
+ None
+ } else {
+ let cow = if mark == 0 {
+ text[..ix].into()
+ } else {
+ label.push_str(&text[mark..ix]);
+ label.into()
+ };
+ Some((ix + 1, cow))
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::scan_link_label_rest;
+
+ #[test]
+ fn whitespace_normalization() {
+ let input = "«\t\tBlurry Eyes\t\t»][blurry_eyes]";
+ let expected_output = "« Blurry Eyes »"; // regular spaces!
+
+ let (_bytes, normalized_label) = scan_link_label_rest(input, &|_| None).unwrap();
+ assert_eq!(expected_output, normalized_label.as_ref());
+ }
+
+ #[test]
+ fn return_carriage_linefeed_ok() {
+ let input = "hello\r\nworld\r\n]";
+ assert!(scan_link_label_rest(input, &|_| Some(0)).is_some());
+ }
+}
diff --git a/vendor/pulldown-cmark/src/main.rs b/vendor/pulldown-cmark/src/main.rs
new file mode 100644
index 000000000..5335e1f58
--- /dev/null
+++ b/vendor/pulldown-cmark/src/main.rs
@@ -0,0 +1,123 @@
+// Copyright 2015 Google Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+//! Command line tool to exercise pulldown-cmark.
+
+#![forbid(unsafe_code)]
+
+use pulldown_cmark::{html, Options, Parser};
+
+use std::env;
+use std::io::{self, Read};
+use std::mem;
+
+fn dry_run(text: &str, opts: Options) {
+ let p = Parser::new_ext(text, opts);
+ let count = p.count();
+ println!("{} events", count);
+}
+
+fn print_events(text: &str, opts: Options) {
+ let parser = Parser::new_ext(text, opts).into_offset_iter();
+ for (event, range) in parser {
+ println!("{:?}: {:?}", range, event);
+ }
+ println!("EOF");
+}
+
+fn brief(program: &str) -> String {
+ format!(
+ "Usage: {} [options]\n\n{}",
+ program, "Reads markdown from standard input and emits HTML.",
+ )
+}
+
+pub fn main() -> std::io::Result<()> {
+ let args: Vec<_> = env::args().collect();
+ let mut opts = getopts::Options::new();
+ opts.optflag("h", "help", "this help message");
+ opts.optflag("d", "dry-run", "dry run, produce no output");
+ opts.optflag("e", "events", "print event sequence instead of rendering");
+ opts.optflag("T", "enable-tables", "enable GitHub-style tables");
+ opts.optflag("F", "enable-footnotes", "enable Hoedown-style footnotes");
+ opts.optflag(
+ "S",
+ "enable-strikethrough",
+ "enable GitHub-style strikethrough",
+ );
+ opts.optflag("L", "enable-tasklists", "enable GitHub-style task lists");
+ opts.optflag("P", "enable-smart-punctuation", "enable smart punctuation");
+ opts.optflag(
+ "H",
+ "enable-heading-attributes",
+ "enable heading attributes",
+ );
+
+ let matches = match opts.parse(&args[1..]) {
+ Ok(m) => m,
+ Err(f) => {
+ eprintln!("{}\n{}", f, opts.usage(&brief(&args[0])));
+ std::process::exit(1);
+ }
+ };
+ if matches.opt_present("help") {
+ println!("{}", opts.usage(&brief(&args[0])));
+ return Ok(());
+ }
+ let mut opts = Options::empty();
+ if matches.opt_present("enable-tables") {
+ opts.insert(Options::ENABLE_TABLES);
+ }
+ if matches.opt_present("enable-footnotes") {
+ opts.insert(Options::ENABLE_FOOTNOTES);
+ }
+ if matches.opt_present("enable-strikethrough") {
+ opts.insert(Options::ENABLE_STRIKETHROUGH);
+ }
+ if matches.opt_present("enable-tasklists") {
+ opts.insert(Options::ENABLE_TASKLISTS);
+ }
+ if matches.opt_present("enable-smart-punctuation") {
+ opts.insert(Options::ENABLE_SMART_PUNCTUATION);
+ }
+ if matches.opt_present("enable-heading-attributes") {
+ opts.insert(Options::ENABLE_HEADING_ATTRIBUTES);
+ }
+
+ let mut input = String::new();
+ io::stdin().lock().read_to_string(&mut input)?;
+ if matches.opt_present("events") {
+ print_events(&input, opts);
+ } else if matches.opt_present("dry-run") {
+ dry_run(&input, opts);
+ } else {
+ let mut p = Parser::new_ext(&input, opts);
+ let stdio = io::stdout();
+ let buffer = std::io::BufWriter::with_capacity(1024 * 1024, stdio.lock());
+ html::write_html(buffer, &mut p)?;
+ // Since the program will now terminate and the memory will be returned
+ // to the operating system anyway, there is no point in tidely cleaning
+ // up all the datastructures we have used. We shouldn't do this if we'd
+ // do other things after this, because this is basically intentionally
+ // leaking data. Skipping cleanup lets us return a bit (~5%) faster.
+ mem::forget(p);
+ }
+ Ok(())
+}
diff --git a/vendor/pulldown-cmark/src/parse.rs b/vendor/pulldown-cmark/src/parse.rs
new file mode 100644
index 000000000..8355ce2f8
--- /dev/null
+++ b/vendor/pulldown-cmark/src/parse.rs
@@ -0,0 +1,1904 @@
+// Copyright 2017 Google Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+//! Tree-based two pass parser.
+
+use std::cmp::{max, min};
+use std::collections::{HashMap, VecDeque};
+use std::iter::FusedIterator;
+use std::num::NonZeroUsize;
+use std::ops::{Index, Range};
+
+use unicase::UniCase;
+
+use crate::firstpass::run_first_pass;
+use crate::linklabel::{scan_link_label_rest, LinkLabel, ReferenceLabel};
+use crate::scanners::*;
+use crate::strings::CowStr;
+use crate::tree::{Tree, TreeIndex};
+use crate::{Alignment, CodeBlockKind, Event, HeadingLevel, LinkType, Options, Tag};
+
+// Allowing arbitrary depth nested parentheses inside link destinations
+// can create denial of service vulnerabilities if we're not careful.
+// The simplest countermeasure is to limit their depth, which is
+// explicitly allowed by the spec as long as the limit is at least 3:
+// https://spec.commonmark.org/0.29/#link-destination
+const LINK_MAX_NESTED_PARENS: usize = 5;
+
+#[derive(Debug, Default, Clone, Copy)]
+pub(crate) struct Item {
+ pub start: usize,
+ pub end: usize,
+ pub body: ItemBody,
+}
+
+#[derive(Debug, PartialEq, Clone, Copy)]
+pub(crate) enum ItemBody {
+ Paragraph,
+ Text,
+ SoftBreak,
+ HardBreak,
+
+ // These are possible inline items, need to be resolved in second pass.
+
+ // repeats, can_open, can_close
+ MaybeEmphasis(usize, bool, bool),
+ // quote byte, can_open, can_close
+ MaybeSmartQuote(u8, bool, bool),
+ MaybeCode(usize, bool), // number of backticks, preceded by backslash
+ MaybeHtml,
+ MaybeLinkOpen,
+ // bool indicates whether or not the preceding section could be a reference
+ MaybeLinkClose(bool),
+ MaybeImage,
+
+ // These are inline items after resolution.
+ Emphasis,
+ Strong,
+ Strikethrough,
+ Code(CowIndex),
+ Link(LinkIndex),
+ Image(LinkIndex),
+ FootnoteReference(CowIndex),
+ TaskListMarker(bool), // true for checked
+
+ Rule,
+ Heading(HeadingLevel, Option<HeadingIndex>), // heading level
+ FencedCodeBlock(CowIndex),
+ IndentCodeBlock,
+ Html,
+ OwnedHtml(CowIndex),
+ BlockQuote,
+ List(bool, u8, u64), // is_tight, list character, list start index
+ ListItem(usize), // indent level
+ SynthesizeText(CowIndex),
+ SynthesizeChar(char),
+ FootnoteDefinition(CowIndex),
+
+ // Tables
+ Table(AlignmentIndex),
+ TableHead,
+ TableRow,
+ TableCell,
+
+ // Dummy node at the top of the tree - should not be used otherwise!
+ Root,
+}
+
+impl<'a> ItemBody {
+ fn is_inline(&self) -> bool {
+ matches!(
+ *self,
+ ItemBody::MaybeEmphasis(..)
+ | ItemBody::MaybeSmartQuote(..)
+ | ItemBody::MaybeHtml
+ | ItemBody::MaybeCode(..)
+ | ItemBody::MaybeLinkOpen
+ | ItemBody::MaybeLinkClose(..)
+ | ItemBody::MaybeImage
+ )
+ }
+}
+
+impl<'a> Default for ItemBody {
+ fn default() -> Self {
+ ItemBody::Root
+ }
+}
+
+pub struct BrokenLink<'a> {
+ pub span: std::ops::Range<usize>,
+ pub link_type: LinkType,
+ pub reference: CowStr<'a>,
+}
+
+/// Markdown event iterator.
+pub struct Parser<'input, 'callback> {
+ text: &'input str,
+ options: Options,
+ tree: Tree<Item>,
+ allocs: Allocations<'input>,
+ broken_link_callback: BrokenLinkCallback<'input, 'callback>,
+ html_scan_guard: HtmlScanGuard,
+
+ // used by inline passes. store them here for reuse
+ inline_stack: InlineStack,
+ link_stack: LinkStack,
+}
+
+impl<'input, 'callback> Parser<'input, 'callback> {
+ /// Creates a new event iterator for a markdown string without any options enabled.
+ pub fn new(text: &'input str) -> Self {
+ Parser::new_ext(text, Options::empty())
+ }
+
+ /// Creates a new event iterator for a markdown string with given options.
+ pub fn new_ext(text: &'input str, options: Options) -> Self {
+ Parser::new_with_broken_link_callback(text, options, None)
+ }
+
+ /// In case the parser encounters any potential links that have a broken
+ /// reference (e.g `[foo]` when there is no `[foo]: ` entry at the bottom)
+ /// the provided callback will be called with the reference name,
+ /// and the returned pair will be used as the link name and title if it is not
+ /// `None`.
+ pub fn new_with_broken_link_callback(
+ text: &'input str,
+ options: Options,
+ broken_link_callback: BrokenLinkCallback<'input, 'callback>,
+ ) -> Self {
+ let (mut tree, allocs) = run_first_pass(text, options);
+ tree.reset();
+ let inline_stack = Default::default();
+ let link_stack = Default::default();
+ let html_scan_guard = Default::default();
+ Parser {
+ text,
+ options,
+ tree,
+ allocs,
+ broken_link_callback,
+ inline_stack,
+ link_stack,
+ html_scan_guard,
+ }
+ }
+
+ /// Returns a reference to the internal `RefDefs` object, which provides access
+ /// to the internal map of reference definitions.
+ pub fn reference_definitions(&self) -> &RefDefs {
+ &self.allocs.refdefs
+ }
+
+ /// Handle inline markup.
+ ///
+ /// When the parser encounters any item indicating potential inline markup, all
+ /// inline markup passes are run on the remainder of the chain.
+ ///
+ /// Note: there's some potential for optimization here, but that's future work.
+ fn handle_inline(&mut self) {
+ self.handle_inline_pass1();
+ self.handle_emphasis();
+ }
+
+ /// Handle inline HTML, code spans, and links.
+ ///
+ /// This function handles both inline HTML and code spans, because they have
+ /// the same precedence. It also handles links, even though they have lower
+ /// precedence, because the URL of links must not be processed.
+ fn handle_inline_pass1(&mut self) {
+ let mut code_delims = CodeDelims::new();
+ let mut cur = self.tree.cur();
+ let mut prev = None;
+
+ let block_end = self.tree[self.tree.peek_up().unwrap()].item.end;
+ let block_text = &self.text[..block_end];
+
+ while let Some(mut cur_ix) = cur {
+ match self.tree[cur_ix].item.body {
+ ItemBody::MaybeHtml => {
+ let next = self.tree[cur_ix].next;
+ let autolink = if let Some(next_ix) = next {
+ scan_autolink(block_text, self.tree[next_ix].item.start)
+ } else {
+ None
+ };
+
+ if let Some((ix, uri, link_type)) = autolink {
+ let node = scan_nodes_to_ix(&self.tree, next, ix);
+ let text_node = self.tree.create_node(Item {
+ start: self.tree[cur_ix].item.start + 1,
+ end: ix - 1,
+ body: ItemBody::Text,
+ });
+ let link_ix = self.allocs.allocate_link(link_type, uri, "".into());
+ self.tree[cur_ix].item.body = ItemBody::Link(link_ix);
+ self.tree[cur_ix].item.end = ix;
+ self.tree[cur_ix].next = node;
+ self.tree[cur_ix].child = Some(text_node);
+ prev = cur;
+ cur = node;
+ if let Some(node_ix) = cur {
+ self.tree[node_ix].item.start = max(self.tree[node_ix].item.start, ix);
+ }
+ continue;
+ } else {
+ let inline_html = next.and_then(|next_ix| {
+ self.scan_inline_html(
+ block_text.as_bytes(),
+ self.tree[next_ix].item.start,
+ )
+ });
+ if let Some((span, ix)) = inline_html {
+ let node = scan_nodes_to_ix(&self.tree, next, ix);
+ self.tree[cur_ix].item.body = if !span.is_empty() {
+ let converted_string =
+ String::from_utf8(span).expect("invalid utf8");
+ ItemBody::OwnedHtml(
+ self.allocs.allocate_cow(converted_string.into()),
+ )
+ } else {
+ ItemBody::Html
+ };
+ self.tree[cur_ix].item.end = ix;
+ self.tree[cur_ix].next = node;
+ prev = cur;
+ cur = node;
+ if let Some(node_ix) = cur {
+ self.tree[node_ix].item.start =
+ max(self.tree[node_ix].item.start, ix);
+ }
+ continue;
+ }
+ }
+ self.tree[cur_ix].item.body = ItemBody::Text;
+ }
+ ItemBody::MaybeCode(mut search_count, preceded_by_backslash) => {
+ if preceded_by_backslash {
+ search_count -= 1;
+ if search_count == 0 {
+ self.tree[cur_ix].item.body = ItemBody::Text;
+ prev = cur;
+ cur = self.tree[cur_ix].next;
+ continue;
+ }
+ }
+
+ if code_delims.is_populated() {
+ // we have previously scanned all codeblock delimiters,
+ // so we can reuse that work
+ if let Some(scan_ix) = code_delims.find(cur_ix, search_count) {
+ self.make_code_span(cur_ix, scan_ix, preceded_by_backslash);
+ } else {
+ self.tree[cur_ix].item.body = ItemBody::Text;
+ }
+ } else {
+ // we haven't previously scanned all codeblock delimiters,
+ // so walk the AST
+ let mut scan = if search_count > 0 {
+ self.tree[cur_ix].next
+ } else {
+ None
+ };
+ while let Some(scan_ix) = scan {
+ if let ItemBody::MaybeCode(delim_count, _) =
+ self.tree[scan_ix].item.body
+ {
+ if search_count == delim_count {
+ self.make_code_span(cur_ix, scan_ix, preceded_by_backslash);
+ code_delims.clear();
+ break;
+ } else {
+ code_delims.insert(delim_count, scan_ix);
+ }
+ }
+ scan = self.tree[scan_ix].next;
+ }
+ if scan == None {
+ self.tree[cur_ix].item.body = ItemBody::Text;
+ }
+ }
+ }
+ ItemBody::MaybeLinkOpen => {
+ self.tree[cur_ix].item.body = ItemBody::Text;
+ self.link_stack.push(LinkStackEl {
+ node: cur_ix,
+ ty: LinkStackTy::Link,
+ });
+ }
+ ItemBody::MaybeImage => {
+ self.tree[cur_ix].item.body = ItemBody::Text;
+ self.link_stack.push(LinkStackEl {
+ node: cur_ix,
+ ty: LinkStackTy::Image,
+ });
+ }
+ ItemBody::MaybeLinkClose(could_be_ref) => {
+ self.tree[cur_ix].item.body = ItemBody::Text;
+ if let Some(tos) = self.link_stack.pop() {
+ if tos.ty == LinkStackTy::Disabled {
+ continue;
+ }
+ let next = self.tree[cur_ix].next;
+ if let Some((next_ix, url, title)) =
+ self.scan_inline_link(block_text, self.tree[cur_ix].item.end, next)
+ {
+ let next_node = scan_nodes_to_ix(&self.tree, next, next_ix);
+ if let Some(prev_ix) = prev {
+ self.tree[prev_ix].next = None;
+ }
+ cur = Some(tos.node);
+ cur_ix = tos.node;
+ let link_ix = self.allocs.allocate_link(LinkType::Inline, url, title);
+ self.tree[cur_ix].item.body = if tos.ty == LinkStackTy::Image {
+ ItemBody::Image(link_ix)
+ } else {
+ ItemBody::Link(link_ix)
+ };
+ self.tree[cur_ix].child = self.tree[cur_ix].next;
+ self.tree[cur_ix].next = next_node;
+ self.tree[cur_ix].item.end = next_ix;
+ if let Some(next_node_ix) = next_node {
+ self.tree[next_node_ix].item.start =
+ max(self.tree[next_node_ix].item.start, next_ix);
+ }
+
+ if tos.ty == LinkStackTy::Link {
+ self.link_stack.disable_all_links();
+ }
+ } else {
+ // ok, so its not an inline link. maybe it is a reference
+ // to a defined link?
+ let scan_result = scan_reference(
+ &self.tree,
+ block_text,
+ next,
+ self.options.contains(Options::ENABLE_FOOTNOTES),
+ );
+ let (node_after_link, link_type) = match scan_result {
+ // [label][reference]
+ RefScan::LinkLabel(_, end_ix) => {
+ // Toggle reference viability of the last closing bracket,
+ // so that we can skip it on future iterations in case
+ // it fails in this one. In particular, we won't call
+ // the broken link callback twice on one reference.
+ let reference_close_node = if let Some(node) =
+ scan_nodes_to_ix(&self.tree, next, end_ix - 1)
+ {
+ node
+ } else {
+ continue;
+ };
+ self.tree[reference_close_node].item.body =
+ ItemBody::MaybeLinkClose(false);
+ let next_node = self.tree[reference_close_node].next;
+
+ (next_node, LinkType::Reference)
+ }
+ // [reference][]
+ RefScan::Collapsed(next_node) => {
+ // This reference has already been tried, and it's not
+ // valid. Skip it.
+ if !could_be_ref {
+ continue;
+ }
+ (next_node, LinkType::Collapsed)
+ }
+ // [shortcut]
+ //
+ // [shortcut]: /blah
+ RefScan::Failed => {
+ if !could_be_ref {
+ continue;
+ }
+ (next, LinkType::Shortcut)
+ }
+ };
+
+ // FIXME: references and labels are mixed in the naming of variables
+ // below. Disambiguate!
+
+ // (label, source_ix end)
+ let label: Option<(ReferenceLabel<'input>, usize)> = match scan_result {
+ RefScan::LinkLabel(l, end_ix) => {
+ Some((ReferenceLabel::Link(l), end_ix))
+ }
+ RefScan::Collapsed(..) | RefScan::Failed => {
+ // No label? maybe it is a shortcut reference
+ let label_start = self.tree[tos.node].item.end - 1;
+ scan_link_label(
+ &self.tree,
+ &self.text[label_start..self.tree[cur_ix].item.end],
+ self.options.contains(Options::ENABLE_FOOTNOTES),
+ )
+ .map(|(ix, label)| (label, label_start + ix))
+ }
+ };
+
+ // see if it's a footnote reference
+ if let Some((ReferenceLabel::Footnote(l), end)) = label {
+ self.tree[tos.node].next = node_after_link;
+ self.tree[tos.node].child = None;
+ self.tree[tos.node].item.body =
+ ItemBody::FootnoteReference(self.allocs.allocate_cow(l));
+ self.tree[tos.node].item.end = end;
+ prev = Some(tos.node);
+ cur = node_after_link;
+ self.link_stack.clear();
+ continue;
+ } else if let Some((ReferenceLabel::Link(link_label), end)) = label {
+ let type_url_title = self
+ .allocs
+ .refdefs
+ .get(link_label.as_ref())
+ .map(|matching_def| {
+ // found a matching definition!
+ let title = matching_def
+ .title
+ .as_ref()
+ .cloned()
+ .unwrap_or_else(|| "".into());
+ let url = matching_def.dest.clone();
+ (link_type, url, title)
+ })
+ .or_else(|| {
+ match self.broken_link_callback.as_mut() {
+ Some(callback) => {
+ // Construct a BrokenLink struct, which will be passed to the callback
+ let broken_link = BrokenLink {
+ span: (self.tree[tos.node].item.start)..end,
+ link_type,
+ reference: link_label,
+ };
+
+ callback(broken_link).map(|(url, title)| {
+ (link_type.to_unknown(), url, title)
+ })
+ }
+ None => None,
+ }
+ });
+
+ if let Some((def_link_type, url, title)) = type_url_title {
+ let link_ix =
+ self.allocs.allocate_link(def_link_type, url, title);
+ self.tree[tos.node].item.body = if tos.ty == LinkStackTy::Image
+ {
+ ItemBody::Image(link_ix)
+ } else {
+ ItemBody::Link(link_ix)
+ };
+ let label_node = self.tree[tos.node].next;
+
+ // lets do some tree surgery to add the link to the tree
+ // 1st: skip the label node and close node
+ self.tree[tos.node].next = node_after_link;
+
+ // then, if it exists, add the label node as a child to the link node
+ if label_node != cur {
+ self.tree[tos.node].child = label_node;
+
+ // finally: disconnect list of children
+ if let Some(prev_ix) = prev {
+ self.tree[prev_ix].next = None;
+ }
+ }
+
+ self.tree[tos.node].item.end = end;
+
+ // set up cur so next node will be node_after_link
+ cur = Some(tos.node);
+ cur_ix = tos.node;
+
+ if tos.ty == LinkStackTy::Link {
+ self.link_stack.disable_all_links();
+ }
+ }
+ }
+ }
+ }
+ }
+ _ => (),
+ }
+ prev = cur;
+ cur = self.tree[cur_ix].next;
+ }
+ self.link_stack.clear();
+ }
+
+ fn handle_emphasis(&mut self) {
+ let mut prev = None;
+ let mut prev_ix: TreeIndex;
+ let mut cur = self.tree.cur();
+
+ let mut single_quote_open: Option<TreeIndex> = None;
+ let mut double_quote_open: bool = false;
+
+ while let Some(mut cur_ix) = cur {
+ match self.tree[cur_ix].item.body {
+ ItemBody::MaybeEmphasis(mut count, can_open, can_close) => {
+ let c = self.text.as_bytes()[self.tree[cur_ix].item.start];
+ let both = can_open && can_close;
+ if can_close {
+ while let Some(el) =
+ self.inline_stack.find_match(&mut self.tree, c, count, both)
+ {
+ // have a match!
+ if let Some(prev_ix) = prev {
+ self.tree[prev_ix].next = None;
+ }
+ let match_count = min(count, el.count);
+ // start, end are tree node indices
+ let mut end = cur_ix - 1;
+ let mut start = el.start + el.count;
+
+ // work from the inside out
+ while start > el.start + el.count - match_count {
+ let (inc, ty) = if c == b'~' {
+ (2, ItemBody::Strikethrough)
+ } else if start > el.start + el.count - match_count + 1 {
+ (2, ItemBody::Strong)
+ } else {
+ (1, ItemBody::Emphasis)
+ };
+
+ let root = start - inc;
+ end = end + inc;
+ self.tree[root].item.body = ty;
+ self.tree[root].item.end = self.tree[end].item.end;
+ self.tree[root].child = Some(start);
+ self.tree[root].next = None;
+ start = root;
+ }
+
+ // set next for top most emph level
+ prev_ix = el.start + el.count - match_count;
+ prev = Some(prev_ix);
+ cur = self.tree[cur_ix + match_count - 1].next;
+ self.tree[prev_ix].next = cur;
+
+ if el.count > match_count {
+ self.inline_stack.push(InlineEl {
+ start: el.start,
+ count: el.count - match_count,
+ c: el.c,
+ both,
+ })
+ }
+ count -= match_count;
+ if count > 0 {
+ cur_ix = cur.unwrap();
+ } else {
+ break;
+ }
+ }
+ }
+ if count > 0 {
+ if can_open {
+ self.inline_stack.push(InlineEl {
+ start: cur_ix,
+ count,
+ c,
+ both,
+ });
+ } else {
+ for i in 0..count {
+ self.tree[cur_ix + i].item.body = ItemBody::Text;
+ }
+ }
+ prev_ix = cur_ix + count - 1;
+ prev = Some(prev_ix);
+ cur = self.tree[prev_ix].next;
+ }
+ }
+ ItemBody::MaybeSmartQuote(c, can_open, can_close) => {
+ self.tree[cur_ix].item.body = match c {
+ b'\'' => {
+ if let (Some(open_ix), true) = (single_quote_open, can_close) {
+ self.tree[open_ix].item.body = ItemBody::SynthesizeChar('‘');
+ single_quote_open = None;
+ } else if can_open {
+ single_quote_open = Some(cur_ix);
+ }
+ ItemBody::SynthesizeChar('’')
+ }
+ _ /* double quote */ => {
+ if can_close && double_quote_open {
+ double_quote_open = false;
+ ItemBody::SynthesizeChar('”')
+ } else {
+ if can_open && !double_quote_open {
+ double_quote_open = true;
+ }
+ ItemBody::SynthesizeChar('“')
+ }
+ }
+ };
+ prev = cur;
+ cur = self.tree[cur_ix].next;
+ }
+ _ => {
+ prev = cur;
+ cur = self.tree[cur_ix].next;
+ }
+ }
+ }
+ self.inline_stack.pop_all(&mut self.tree);
+ }
+
+ /// Returns next byte index, url and title.
+ fn scan_inline_link(
+ &self,
+ underlying: &'input str,
+ mut ix: usize,
+ node: Option<TreeIndex>,
+ ) -> Option<(usize, CowStr<'input>, CowStr<'input>)> {
+ if scan_ch(&underlying.as_bytes()[ix..], b'(') == 0 {
+ return None;
+ }
+ ix += 1;
+ ix += scan_while(&underlying.as_bytes()[ix..], is_ascii_whitespace);
+
+ let (dest_length, dest) = scan_link_dest(underlying, ix, LINK_MAX_NESTED_PARENS)?;
+ let dest = unescape(dest);
+ ix += dest_length;
+
+ ix += scan_while(&underlying.as_bytes()[ix..], is_ascii_whitespace);
+
+ let title = if let Some((bytes_scanned, t)) = self.scan_link_title(underlying, ix, node) {
+ ix += bytes_scanned;
+ ix += scan_while(&underlying.as_bytes()[ix..], is_ascii_whitespace);
+ t
+ } else {
+ "".into()
+ };
+ if scan_ch(&underlying.as_bytes()[ix..], b')') == 0 {
+ return None;
+ }
+ ix += 1;
+
+ Some((ix, dest, title))
+ }
+
+ // returns (bytes scanned, title cow)
+ fn scan_link_title(
+ &self,
+ text: &'input str,
+ start_ix: usize,
+ node: Option<TreeIndex>,
+ ) -> Option<(usize, CowStr<'input>)> {
+ let bytes = text.as_bytes();
+ let open = match bytes.get(start_ix) {
+ Some(b @ b'\'') | Some(b @ b'\"') | Some(b @ b'(') => *b,
+ _ => return None,
+ };
+ let close = if open == b'(' { b')' } else { open };
+
+ let mut title = String::new();
+ let mut mark = start_ix + 1;
+ let mut i = start_ix + 1;
+
+ while i < bytes.len() {
+ let c = bytes[i];
+
+ if c == close {
+ let cow = if mark == 1 {
+ (i - start_ix + 1, text[mark..i].into())
+ } else {
+ title.push_str(&text[mark..i]);
+ (i - start_ix + 1, title.into())
+ };
+
+ return Some(cow);
+ }
+ if c == open {
+ return None;
+ }
+
+ if c == b'\n' || c == b'\r' {
+ if let Some(node_ix) = scan_nodes_to_ix(&self.tree, node, i + 1) {
+ if self.tree[node_ix].item.start > i {
+ title.push_str(&text[mark..i]);
+ title.push('\n');
+ i = self.tree[node_ix].item.start;
+ mark = i;
+ continue;
+ }
+ }
+ }
+ if c == b'&' {
+ if let (n, Some(value)) = scan_entity(&bytes[i..]) {
+ title.push_str(&text[mark..i]);
+ title.push_str(&value);
+ i += n;
+ mark = i;
+ continue;
+ }
+ }
+ if c == b'\\' && i + 1 < bytes.len() && is_ascii_punctuation(bytes[i + 1]) {
+ title.push_str(&text[mark..i]);
+ i += 1;
+ mark = i;
+ }
+
+ i += 1;
+ }
+
+ None
+ }
+
+ /// Make a code span.
+ ///
+ /// Both `open` and `close` are matching MaybeCode items.
+ fn make_code_span(&mut self, open: TreeIndex, close: TreeIndex, preceding_backslash: bool) {
+ let first_ix = self.tree[open].next.unwrap();
+ let bytes = self.text.as_bytes();
+ let mut span_start = self.tree[open].item.end;
+ let mut span_end = self.tree[close].item.start;
+ let mut buf: Option<String> = None;
+
+ // detect all-space sequences, since they are kept as-is as of commonmark 0.29
+ if !bytes[span_start..span_end].iter().all(|&b| b == b' ') {
+ let opening = matches!(bytes[span_start], b' ' | b'\r' | b'\n');
+ let closing = matches!(bytes[span_end - 1], b' ' | b'\r' | b'\n');
+ let drop_enclosing_whitespace = opening && closing;
+
+ if drop_enclosing_whitespace {
+ span_start += 1;
+ if span_start < span_end {
+ span_end -= 1;
+ }
+ }
+
+ let mut ix = first_ix;
+
+ while ix != close {
+ let next_ix = self.tree[ix].next.unwrap();
+ if let ItemBody::HardBreak | ItemBody::SoftBreak = self.tree[ix].item.body {
+ if drop_enclosing_whitespace {
+ // check whether break should be ignored
+ if ix == first_ix {
+ ix = next_ix;
+ span_start = min(span_end, self.tree[ix].item.start);
+ continue;
+ } else if next_ix == close && ix > first_ix {
+ break;
+ }
+ }
+
+ let end = bytes[self.tree[ix].item.start..]
+ .iter()
+ .position(|&b| b == b'\r' || b == b'\n')
+ .unwrap()
+ + self.tree[ix].item.start;
+ if let Some(ref mut buf) = buf {
+ buf.push_str(&self.text[self.tree[ix].item.start..end]);
+ buf.push(' ');
+ } else {
+ let mut new_buf = String::with_capacity(span_end - span_start);
+ new_buf.push_str(&self.text[span_start..end]);
+ new_buf.push(' ');
+ buf = Some(new_buf);
+ }
+ } else if let Some(ref mut buf) = buf {
+ let end = if next_ix == close {
+ span_end
+ } else {
+ self.tree[ix].item.end
+ };
+ buf.push_str(&self.text[self.tree[ix].item.start..end]);
+ }
+ ix = next_ix;
+ }
+ }
+
+ let cow = if let Some(buf) = buf {
+ buf.into()
+ } else {
+ self.text[span_start..span_end].into()
+ };
+ if preceding_backslash {
+ self.tree[open].item.body = ItemBody::Text;
+ self.tree[open].item.end = self.tree[open].item.start + 1;
+ self.tree[open].next = Some(close);
+ self.tree[close].item.body = ItemBody::Code(self.allocs.allocate_cow(cow));
+ self.tree[close].item.start = self.tree[open].item.start + 1;
+ } else {
+ self.tree[open].item.body = ItemBody::Code(self.allocs.allocate_cow(cow));
+ self.tree[open].item.end = self.tree[close].item.end;
+ self.tree[open].next = self.tree[close].next;
+ }
+ }
+
+ /// On success, returns a buffer containing the inline html and byte offset.
+ /// When no bytes were skipped, the buffer will be empty and the html can be
+ /// represented as a subslice of the input string.
+ fn scan_inline_html(&mut self, bytes: &[u8], ix: usize) -> Option<(Vec<u8>, usize)> {
+ let c = *bytes.get(ix)?;
+ if c == b'!' {
+ Some((
+ vec![],
+ scan_inline_html_comment(bytes, ix + 1, &mut self.html_scan_guard)?,
+ ))
+ } else if c == b'?' {
+ Some((
+ vec![],
+ scan_inline_html_processing(bytes, ix + 1, &mut self.html_scan_guard)?,
+ ))
+ } else {
+ let (span, i) = scan_html_block_inner(
+ // Subtract 1 to include the < character
+ &bytes[(ix - 1)..],
+ Some(&|bytes| {
+ let mut line_start = LineStart::new(bytes);
+ let _ = scan_containers(&self.tree, &mut line_start);
+ line_start.bytes_scanned()
+ }),
+ )?;
+ Some((span, i + ix - 1))
+ }
+ }
+
+ /// Consumes the event iterator and produces an iterator that produces
+ /// `(Event, Range)` pairs, where the `Range` value maps to the corresponding
+ /// range in the markdown source.
+ pub fn into_offset_iter(self) -> OffsetIter<'input, 'callback> {
+ OffsetIter { inner: self }
+ }
+}
+
+/// Returns number of containers scanned.
+pub(crate) fn scan_containers(tree: &Tree<Item>, line_start: &mut LineStart) -> usize {
+ let mut i = 0;
+ for &node_ix in tree.walk_spine() {
+ match tree[node_ix].item.body {
+ ItemBody::BlockQuote => {
+ // `scan_blockquote_marker` saves & restores internally
+ if !line_start.scan_blockquote_marker() {
+ break;
+ }
+ }
+ ItemBody::ListItem(indent) => {
+ let save = line_start.clone();
+ if !line_start.scan_space(indent) && !line_start.is_at_eol() {
+ *line_start = save;
+ break;
+ }
+ }
+ _ => (),
+ }
+ i += 1;
+ }
+ i
+}
+
+impl<'a> Tree<Item> {
+ pub(crate) fn append_text(&mut self, start: usize, end: usize) {
+ if end > start {
+ if let Some(ix) = self.cur() {
+ if ItemBody::Text == self[ix].item.body && self[ix].item.end == start {
+ self[ix].item.end = end;
+ return;
+ }
+ }
+ self.append(Item {
+ start,
+ end,
+ body: ItemBody::Text,
+ });
+ }
+ }
+}
+
+#[derive(Copy, Clone, Debug)]
+struct InlineEl {
+ start: TreeIndex, // offset of tree node
+ count: usize,
+ c: u8, // b'*' or b'_'
+ both: bool, // can both open and close
+}
+
+#[derive(Debug, Clone, Default)]
+struct InlineStack {
+ stack: Vec<InlineEl>,
+ // Lower bounds for matching indices in the stack. For example
+ // a strikethrough delimiter will never match with any element
+ // in the stack with index smaller than
+ // `lower_bounds[InlineStack::TILDES]`.
+ lower_bounds: [usize; 7],
+}
+
+impl InlineStack {
+ /// These are indices into the lower bounds array.
+ /// Not both refers to the property that the delimiter can not both
+ /// be opener as a closer.
+ const UNDERSCORE_NOT_BOTH: usize = 0;
+ const ASTERISK_NOT_BOTH: usize = 1;
+ const ASTERISK_BASE: usize = 2;
+ const TILDES: usize = 5;
+ const UNDERSCORE_BOTH: usize = 6;
+
+ fn pop_all(&mut self, tree: &mut Tree<Item>) {
+ for el in self.stack.drain(..) {
+ for i in 0..el.count {
+ tree[el.start + i].item.body = ItemBody::Text;
+ }
+ }
+ self.lower_bounds = [0; 7];
+ }
+
+ fn get_lowerbound(&self, c: u8, count: usize, both: bool) -> usize {
+ if c == b'_' {
+ if both {
+ self.lower_bounds[InlineStack::UNDERSCORE_BOTH]
+ } else {
+ self.lower_bounds[InlineStack::UNDERSCORE_NOT_BOTH]
+ }
+ } else if c == b'*' {
+ let mod3_lower = self.lower_bounds[InlineStack::ASTERISK_BASE + count % 3];
+ if both {
+ mod3_lower
+ } else {
+ min(
+ mod3_lower,
+ self.lower_bounds[InlineStack::ASTERISK_NOT_BOTH],
+ )
+ }
+ } else {
+ self.lower_bounds[InlineStack::TILDES]
+ }
+ }
+
+ fn set_lowerbound(&mut self, c: u8, count: usize, both: bool, new_bound: usize) {
+ if c == b'_' {
+ if both {
+ self.lower_bounds[InlineStack::UNDERSCORE_BOTH] = new_bound;
+ } else {
+ self.lower_bounds[InlineStack::UNDERSCORE_NOT_BOTH] = new_bound;
+ }
+ } else if c == b'*' {
+ self.lower_bounds[InlineStack::ASTERISK_BASE + count % 3] = new_bound;
+ if !both {
+ self.lower_bounds[InlineStack::ASTERISK_NOT_BOTH] = new_bound;
+ }
+ } else {
+ self.lower_bounds[InlineStack::TILDES] = new_bound;
+ }
+ }
+
+ fn find_match(
+ &mut self,
+ tree: &mut Tree<Item>,
+ c: u8,
+ count: usize,
+ both: bool,
+ ) -> Option<InlineEl> {
+ let lowerbound = min(self.stack.len(), self.get_lowerbound(c, count, both));
+ let res = self.stack[lowerbound..]
+ .iter()
+ .cloned()
+ .enumerate()
+ .rfind(|(_, el)| {
+ el.c == c && (!both && !el.both || (count + el.count) % 3 != 0 || count % 3 == 0)
+ });
+
+ if let Some((matching_ix, matching_el)) = res {
+ let matching_ix = matching_ix + lowerbound;
+ for el in &self.stack[(matching_ix + 1)..] {
+ for i in 0..el.count {
+ tree[el.start + i].item.body = ItemBody::Text;
+ }
+ }
+ self.stack.truncate(matching_ix);
+ Some(matching_el)
+ } else {
+ self.set_lowerbound(c, count, both, self.stack.len());
+ None
+ }
+ }
+
+ fn push(&mut self, el: InlineEl) {
+ self.stack.push(el)
+ }
+}
+
+#[derive(Debug, Clone)]
+enum RefScan<'a> {
+ // label, source ix of label end
+ LinkLabel(CowStr<'a>, usize),
+ // contains next node index
+ Collapsed(Option<TreeIndex>),
+ Failed,
+}
+
+/// Skips forward within a block to a node which spans (ends inclusive) the given
+/// index into the source.
+fn scan_nodes_to_ix(
+ tree: &Tree<Item>,
+ mut node: Option<TreeIndex>,
+ ix: usize,
+) -> Option<TreeIndex> {
+ while let Some(node_ix) = node {
+ if tree[node_ix].item.end <= ix {
+ node = tree[node_ix].next;
+ } else {
+ break;
+ }
+ }
+ node
+}
+
+/// Scans an inline link label, which cannot be interrupted.
+/// Returns number of bytes (including brackets) and label on success.
+fn scan_link_label<'text, 'tree>(
+ tree: &'tree Tree<Item>,
+ text: &'text str,
+ allow_footnote_refs: bool,
+) -> Option<(usize, ReferenceLabel<'text>)> {
+ let bytes = &text.as_bytes();
+ if bytes.len() < 2 || bytes[0] != b'[' {
+ return None;
+ }
+ let linebreak_handler = |bytes: &[u8]| {
+ let mut line_start = LineStart::new(bytes);
+ let _ = scan_containers(tree, &mut line_start);
+ Some(line_start.bytes_scanned())
+ };
+ let pair = if allow_footnote_refs && b'^' == bytes[1] {
+ let (byte_index, cow) = scan_link_label_rest(&text[2..], &linebreak_handler)?;
+ (byte_index + 2, ReferenceLabel::Footnote(cow))
+ } else {
+ let (byte_index, cow) = scan_link_label_rest(&text[1..], &linebreak_handler)?;
+ (byte_index + 1, ReferenceLabel::Link(cow))
+ };
+ Some(pair)
+}
+
+fn scan_reference<'a, 'b>(
+ tree: &'a Tree<Item>,
+ text: &'b str,
+ cur: Option<TreeIndex>,
+ allow_footnote_refs: bool,
+) -> RefScan<'b> {
+ let cur_ix = match cur {
+ None => return RefScan::Failed,
+ Some(cur_ix) => cur_ix,
+ };
+ let start = tree[cur_ix].item.start;
+ let tail = &text.as_bytes()[start..];
+
+ if tail.starts_with(b"[]") {
+ // TODO: this unwrap is sus and should be looked at closer
+ let closing_node = tree[cur_ix].next.unwrap();
+ RefScan::Collapsed(tree[closing_node].next)
+ } else if let Some((ix, ReferenceLabel::Link(label))) =
+ scan_link_label(tree, &text[start..], allow_footnote_refs)
+ {
+ RefScan::LinkLabel(label, start + ix)
+ } else {
+ RefScan::Failed
+ }
+}
+
+#[derive(Clone, Default)]
+struct LinkStack {
+ inner: Vec<LinkStackEl>,
+ disabled_ix: usize,
+}
+
+impl LinkStack {
+ fn push(&mut self, el: LinkStackEl) {
+ self.inner.push(el);
+ }
+
+ fn pop(&mut self) -> Option<LinkStackEl> {
+ let el = self.inner.pop();
+ self.disabled_ix = std::cmp::min(self.disabled_ix, self.inner.len());
+ el
+ }
+
+ fn clear(&mut self) {
+ self.inner.clear();
+ self.disabled_ix = 0;
+ }
+
+ fn disable_all_links(&mut self) {
+ for el in &mut self.inner[self.disabled_ix..] {
+ if el.ty == LinkStackTy::Link {
+ el.ty = LinkStackTy::Disabled;
+ }
+ }
+ self.disabled_ix = self.inner.len();
+ }
+}
+
+#[derive(Clone, Debug)]
+struct LinkStackEl {
+ node: TreeIndex,
+ ty: LinkStackTy,
+}
+
+#[derive(PartialEq, Clone, Debug)]
+enum LinkStackTy {
+ Link,
+ Image,
+ Disabled,
+}
+
+/// Contains the destination URL, title and source span of a reference definition.
+#[derive(Clone)]
+pub struct LinkDef<'a> {
+ pub dest: CowStr<'a>,
+ pub title: Option<CowStr<'a>>,
+ pub span: Range<usize>,
+}
+
+/// Tracks tree indices of code span delimiters of each length. It should prevent
+/// quadratic scanning behaviours by providing (amortized) constant time lookups.
+struct CodeDelims {
+ inner: HashMap<usize, VecDeque<TreeIndex>>,
+ seen_first: bool,
+}
+
+impl CodeDelims {
+ fn new() -> Self {
+ Self {
+ inner: Default::default(),
+ seen_first: false,
+ }
+ }
+
+ fn insert(&mut self, count: usize, ix: TreeIndex) {
+ if self.seen_first {
+ self.inner
+ .entry(count)
+ .or_insert_with(Default::default)
+ .push_back(ix);
+ } else {
+ // Skip the first insert, since that delimiter will always
+ // be an opener and not a closer.
+ self.seen_first = true;
+ }
+ }
+
+ fn is_populated(&self) -> bool {
+ !self.inner.is_empty()
+ }
+
+ fn find(&mut self, open_ix: TreeIndex, count: usize) -> Option<TreeIndex> {
+ while let Some(ix) = self.inner.get_mut(&count)?.pop_front() {
+ if ix > open_ix {
+ return Some(ix);
+ }
+ }
+ None
+ }
+
+ fn clear(&mut self) {
+ self.inner.clear();
+ self.seen_first = false;
+ }
+}
+
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+pub(crate) struct LinkIndex(usize);
+
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+pub(crate) struct CowIndex(usize);
+
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+pub(crate) struct AlignmentIndex(usize);
+
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+pub(crate) struct HeadingIndex(NonZeroUsize);
+
+#[derive(Clone)]
+pub(crate) struct Allocations<'a> {
+ pub refdefs: RefDefs<'a>,
+ links: Vec<(LinkType, CowStr<'a>, CowStr<'a>)>,
+ cows: Vec<CowStr<'a>>,
+ alignments: Vec<Vec<Alignment>>,
+ headings: Vec<HeadingAttributes<'a>>,
+}
+
+/// Used by the heading attributes extension.
+#[derive(Clone)]
+pub(crate) struct HeadingAttributes<'a> {
+ pub id: Option<&'a str>,
+ pub classes: Vec<&'a str>,
+}
+
+/// Keeps track of the reference definitions defined in the document.
+#[derive(Clone, Default)]
+pub struct RefDefs<'input>(pub(crate) HashMap<LinkLabel<'input>, LinkDef<'input>>);
+
+impl<'input, 'b, 's> RefDefs<'input>
+where
+ 's: 'b,
+{
+ /// Performs a lookup on reference label using unicode case folding.
+ pub fn get(&'s self, key: &'b str) -> Option<&'b LinkDef<'input>> {
+ self.0.get(&UniCase::new(key.into()))
+ }
+
+ /// Provides an iterator over all the document's reference definitions.
+ pub fn iter(&'s self) -> impl Iterator<Item = (&'s str, &'s LinkDef<'input>)> {
+ self.0.iter().map(|(k, v)| (k.as_ref(), v))
+ }
+}
+
+impl<'a> Allocations<'a> {
+ pub fn new() -> Self {
+ Self {
+ refdefs: RefDefs::default(),
+ links: Vec::with_capacity(128),
+ cows: Vec::new(),
+ alignments: Vec::new(),
+ headings: Vec::new(),
+ }
+ }
+
+ pub fn allocate_cow(&mut self, cow: CowStr<'a>) -> CowIndex {
+ let ix = self.cows.len();
+ self.cows.push(cow);
+ CowIndex(ix)
+ }
+
+ pub fn allocate_link(&mut self, ty: LinkType, url: CowStr<'a>, title: CowStr<'a>) -> LinkIndex {
+ let ix = self.links.len();
+ self.links.push((ty, url, title));
+ LinkIndex(ix)
+ }
+
+ pub fn allocate_alignment(&mut self, alignment: Vec<Alignment>) -> AlignmentIndex {
+ let ix = self.alignments.len();
+ self.alignments.push(alignment);
+ AlignmentIndex(ix)
+ }
+
+ pub fn allocate_heading(&mut self, attrs: HeadingAttributes<'a>) -> HeadingIndex {
+ let ix = self.headings.len();
+ self.headings.push(attrs);
+ // This won't panic. `self.headings.len()` can't be `usize::MAX` since
+ // such a long Vec cannot fit in memory.
+ let ix_nonzero = NonZeroUsize::new(ix.wrapping_add(1)).expect("too many headings");
+ HeadingIndex(ix_nonzero)
+ }
+}
+
+impl<'a> Index<CowIndex> for Allocations<'a> {
+ type Output = CowStr<'a>;
+
+ fn index(&self, ix: CowIndex) -> &Self::Output {
+ self.cows.index(ix.0)
+ }
+}
+
+impl<'a> Index<LinkIndex> for Allocations<'a> {
+ type Output = (LinkType, CowStr<'a>, CowStr<'a>);
+
+ fn index(&self, ix: LinkIndex) -> &Self::Output {
+ self.links.index(ix.0)
+ }
+}
+
+impl<'a> Index<AlignmentIndex> for Allocations<'a> {
+ type Output = Vec<Alignment>;
+
+ fn index(&self, ix: AlignmentIndex) -> &Self::Output {
+ self.alignments.index(ix.0)
+ }
+}
+
+impl<'a> Index<HeadingIndex> for Allocations<'a> {
+ type Output = HeadingAttributes<'a>;
+
+ fn index(&self, ix: HeadingIndex) -> &Self::Output {
+ self.headings.index(ix.0.get() - 1)
+ }
+}
+
+/// A struct containing information on the reachability of certain inline HTML
+/// elements. In particular, for cdata elements (`<![CDATA[`), processing
+/// elements (`<?`) and declarations (`<!DECLARATION`). The respectives usizes
+/// represent the indices before which a scan will always fail and can hence
+/// be skipped.
+#[derive(Clone, Default)]
+pub(crate) struct HtmlScanGuard {
+ pub cdata: usize,
+ pub processing: usize,
+ pub declaration: usize,
+}
+
+pub type BrokenLinkCallback<'input, 'borrow> =
+ Option<&'borrow mut dyn FnMut(BrokenLink<'input>) -> Option<(CowStr<'input>, CowStr<'input>)>>;
+
+/// Markdown event and source range iterator.
+///
+/// Generates tuples where the first element is the markdown event and the second
+/// is a the corresponding range in the source string.
+///
+/// Constructed from a `Parser` using its
+/// [`into_offset_iter`](struct.Parser.html#method.into_offset_iter) method.
+pub struct OffsetIter<'a, 'b> {
+ inner: Parser<'a, 'b>,
+}
+
+impl<'a, 'b> OffsetIter<'a, 'b> {
+ /// Returns a reference to the internal reference definition tracker.
+ pub fn reference_definitions(&self) -> &RefDefs {
+ self.inner.reference_definitions()
+ }
+}
+
+impl<'a, 'b> Iterator for OffsetIter<'a, 'b> {
+ type Item = (Event<'a>, Range<usize>);
+
+ fn next(&mut self) -> Option<Self::Item> {
+ match self.inner.tree.cur() {
+ None => {
+ let ix = self.inner.tree.pop()?;
+ let tag = item_to_tag(&self.inner.tree[ix].item, &self.inner.allocs);
+ self.inner.tree.next_sibling(ix);
+ let span = self.inner.tree[ix].item.start..self.inner.tree[ix].item.end;
+ debug_assert!(span.start <= span.end);
+ Some((Event::End(tag), span))
+ }
+ Some(cur_ix) => {
+ if self.inner.tree[cur_ix].item.body.is_inline() {
+ self.inner.handle_inline();
+ }
+
+ let node = self.inner.tree[cur_ix];
+ let item = node.item;
+ let event = item_to_event(item, self.inner.text, &self.inner.allocs);
+ if let Event::Start(..) = event {
+ self.inner.tree.push();
+ } else {
+ self.inner.tree.next_sibling(cur_ix);
+ }
+ debug_assert!(item.start <= item.end);
+ Some((event, item.start..item.end))
+ }
+ }
+ }
+}
+
+fn item_to_tag<'a>(item: &Item, allocs: &Allocations<'a>) -> Tag<'a> {
+ match item.body {
+ ItemBody::Paragraph => Tag::Paragraph,
+ ItemBody::Emphasis => Tag::Emphasis,
+ ItemBody::Strong => Tag::Strong,
+ ItemBody::Strikethrough => Tag::Strikethrough,
+ ItemBody::Link(link_ix) => {
+ let &(ref link_type, ref url, ref title) = allocs.index(link_ix);
+ Tag::Link(*link_type, url.clone(), title.clone())
+ }
+ ItemBody::Image(link_ix) => {
+ let &(ref link_type, ref url, ref title) = allocs.index(link_ix);
+ Tag::Image(*link_type, url.clone(), title.clone())
+ }
+ ItemBody::Heading(level, Some(heading_ix)) => {
+ let HeadingAttributes { id, classes } = allocs.index(heading_ix);
+ Tag::Heading(level, *id, classes.clone())
+ }
+ ItemBody::Heading(level, None) => Tag::Heading(level, None, Vec::new()),
+ ItemBody::FencedCodeBlock(cow_ix) => {
+ Tag::CodeBlock(CodeBlockKind::Fenced(allocs[cow_ix].clone()))
+ }
+ ItemBody::IndentCodeBlock => Tag::CodeBlock(CodeBlockKind::Indented),
+ ItemBody::BlockQuote => Tag::BlockQuote,
+ ItemBody::List(_, c, listitem_start) => {
+ if c == b'.' || c == b')' {
+ Tag::List(Some(listitem_start))
+ } else {
+ Tag::List(None)
+ }
+ }
+ ItemBody::ListItem(_) => Tag::Item,
+ ItemBody::TableHead => Tag::TableHead,
+ ItemBody::TableCell => Tag::TableCell,
+ ItemBody::TableRow => Tag::TableRow,
+ ItemBody::Table(alignment_ix) => Tag::Table(allocs[alignment_ix].clone()),
+ ItemBody::FootnoteDefinition(cow_ix) => Tag::FootnoteDefinition(allocs[cow_ix].clone()),
+ _ => panic!("unexpected item body {:?}", item.body),
+ }
+}
+
+fn item_to_event<'a>(item: Item, text: &'a str, allocs: &Allocations<'a>) -> Event<'a> {
+ let tag = match item.body {
+ ItemBody::Text => return Event::Text(text[item.start..item.end].into()),
+ ItemBody::Code(cow_ix) => return Event::Code(allocs[cow_ix].clone()),
+ ItemBody::SynthesizeText(cow_ix) => return Event::Text(allocs[cow_ix].clone()),
+ ItemBody::SynthesizeChar(c) => return Event::Text(c.into()),
+ ItemBody::Html => return Event::Html(text[item.start..item.end].into()),
+ ItemBody::OwnedHtml(cow_ix) => return Event::Html(allocs[cow_ix].clone()),
+ ItemBody::SoftBreak => return Event::SoftBreak,
+ ItemBody::HardBreak => return Event::HardBreak,
+ ItemBody::FootnoteReference(cow_ix) => {
+ return Event::FootnoteReference(allocs[cow_ix].clone())
+ }
+ ItemBody::TaskListMarker(checked) => return Event::TaskListMarker(checked),
+ ItemBody::Rule => return Event::Rule,
+
+ ItemBody::Paragraph => Tag::Paragraph,
+ ItemBody::Emphasis => Tag::Emphasis,
+ ItemBody::Strong => Tag::Strong,
+ ItemBody::Strikethrough => Tag::Strikethrough,
+ ItemBody::Link(link_ix) => {
+ let &(ref link_type, ref url, ref title) = allocs.index(link_ix);
+ Tag::Link(*link_type, url.clone(), title.clone())
+ }
+ ItemBody::Image(link_ix) => {
+ let &(ref link_type, ref url, ref title) = allocs.index(link_ix);
+ Tag::Image(*link_type, url.clone(), title.clone())
+ }
+ ItemBody::Heading(level, Some(heading_ix)) => {
+ let HeadingAttributes { id, classes } = allocs.index(heading_ix);
+ Tag::Heading(level, *id, classes.clone())
+ }
+ ItemBody::Heading(level, None) => Tag::Heading(level, None, Vec::new()),
+ ItemBody::FencedCodeBlock(cow_ix) => {
+ Tag::CodeBlock(CodeBlockKind::Fenced(allocs[cow_ix].clone()))
+ }
+ ItemBody::IndentCodeBlock => Tag::CodeBlock(CodeBlockKind::Indented),
+ ItemBody::BlockQuote => Tag::BlockQuote,
+ ItemBody::List(_, c, listitem_start) => {
+ if c == b'.' || c == b')' {
+ Tag::List(Some(listitem_start))
+ } else {
+ Tag::List(None)
+ }
+ }
+ ItemBody::ListItem(_) => Tag::Item,
+ ItemBody::TableHead => Tag::TableHead,
+ ItemBody::TableCell => Tag::TableCell,
+ ItemBody::TableRow => Tag::TableRow,
+ ItemBody::Table(alignment_ix) => Tag::Table(allocs[alignment_ix].clone()),
+ ItemBody::FootnoteDefinition(cow_ix) => Tag::FootnoteDefinition(allocs[cow_ix].clone()),
+ _ => panic!("unexpected item body {:?}", item.body),
+ };
+
+ Event::Start(tag)
+}
+
+impl<'a, 'b> Iterator for Parser<'a, 'b> {
+ type Item = Event<'a>;
+
+ fn next(&mut self) -> Option<Event<'a>> {
+ match self.tree.cur() {
+ None => {
+ let ix = self.tree.pop()?;
+ let tag = item_to_tag(&self.tree[ix].item, &self.allocs);
+ self.tree.next_sibling(ix);
+ Some(Event::End(tag))
+ }
+ Some(cur_ix) => {
+ if self.tree[cur_ix].item.body.is_inline() {
+ self.handle_inline();
+ }
+
+ let node = self.tree[cur_ix];
+ let item = node.item;
+ let event = item_to_event(item, self.text, &self.allocs);
+ if let Event::Start(..) = event {
+ self.tree.push();
+ } else {
+ self.tree.next_sibling(cur_ix);
+ }
+ Some(event)
+ }
+ }
+ }
+}
+
+impl FusedIterator for Parser<'_, '_> {}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+ use crate::tree::Node;
+
+ // TODO: move these tests to tests/html.rs?
+
+ fn parser_with_extensions(text: &str) -> Parser<'_, 'static> {
+ let mut opts = Options::empty();
+ opts.insert(Options::ENABLE_TABLES);
+ opts.insert(Options::ENABLE_FOOTNOTES);
+ opts.insert(Options::ENABLE_STRIKETHROUGH);
+ opts.insert(Options::ENABLE_TASKLISTS);
+
+ Parser::new_ext(text, opts)
+ }
+
+ #[test]
+ #[cfg(target_pointer_width = "64")]
+ fn node_size() {
+ let node_size = std::mem::size_of::<Node<Item>>();
+ assert_eq!(48, node_size);
+ }
+
+ #[test]
+ #[cfg(target_pointer_width = "64")]
+ fn body_size() {
+ let body_size = std::mem::size_of::<ItemBody>();
+ assert_eq!(16, body_size);
+ }
+
+ #[test]
+ fn single_open_fish_bracket() {
+ // dont crash
+ assert_eq!(3, Parser::new("<").count());
+ }
+
+ #[test]
+ fn lone_hashtag() {
+ // dont crash
+ assert_eq!(2, Parser::new("#").count());
+ }
+
+ #[test]
+ fn lots_of_backslashes() {
+ // dont crash
+ Parser::new("\\\\\r\r").count();
+ Parser::new("\\\r\r\\.\\\\\r\r\\.\\").count();
+ }
+
+ #[test]
+ fn issue_320() {
+ // dont crash
+ parser_with_extensions(":\r\t> |\r:\r\t> |\r").count();
+ }
+
+ #[test]
+ fn issue_319() {
+ // dont crash
+ parser_with_extensions("|\r-]([^|\r-]([^").count();
+ parser_with_extensions("|\r\r=][^|\r\r=][^car").count();
+ }
+
+ #[test]
+ fn issue_303() {
+ // dont crash
+ parser_with_extensions("[^\r\ra]").count();
+ parser_with_extensions("\r\r]Z[^\x00\r\r]Z[^\x00").count();
+ }
+
+ #[test]
+ fn issue_313() {
+ // dont crash
+ parser_with_extensions("*]0[^\r\r*]0[^").count();
+ parser_with_extensions("[^\r> `][^\r> `][^\r> `][").count();
+ }
+
+ #[test]
+ fn issue_311() {
+ // dont crash
+ parser_with_extensions("\\\u{0d}-\u{09}\\\u{0d}-\u{09}").count();
+ }
+
+ #[test]
+ fn issue_283() {
+ let input = std::str::from_utf8(b"\xf0\x9b\xb2\x9f<td:^\xf0\x9b\xb2\x9f").unwrap();
+ // dont crash
+ parser_with_extensions(input).count();
+ }
+
+ #[test]
+ fn issue_289() {
+ // dont crash
+ parser_with_extensions("> - \\\n> - ").count();
+ parser_with_extensions("- \n\n").count();
+ }
+
+ #[test]
+ fn issue_306() {
+ // dont crash
+ parser_with_extensions("*\r_<__*\r_<__*\r_<__*\r_<__").count();
+ }
+
+ #[test]
+ fn issue_305() {
+ // dont crash
+ parser_with_extensions("_6**6*_*").count();
+ }
+
+ #[test]
+ fn another_emphasis_panic() {
+ parser_with_extensions("*__#_#__*").count();
+ }
+
+ #[test]
+ fn offset_iter() {
+ let event_offsets: Vec<_> = Parser::new("*hello* world")
+ .into_offset_iter()
+ .map(|(_ev, range)| range)
+ .collect();
+ let expected_offsets = vec![(0..13), (0..7), (1..6), (0..7), (7..13), (0..13)];
+ assert_eq!(expected_offsets, event_offsets);
+ }
+
+ #[test]
+ fn reference_link_offsets() {
+ let range =
+ Parser::new("# H1\n[testing][Some reference]\n\n[Some reference]: https://github.com")
+ .into_offset_iter()
+ .filter_map(|(ev, range)| match ev {
+ Event::Start(Tag::Link(LinkType::Reference, ..), ..) => Some(range),
+ _ => None,
+ })
+ .next()
+ .unwrap();
+ assert_eq!(5..30, range);
+ }
+
+ #[test]
+ fn footnote_offsets() {
+ let range = parser_with_extensions("Testing this[^1] out.\n\n[^1]: Footnote.")
+ .into_offset_iter()
+ .filter_map(|(ev, range)| match ev {
+ Event::FootnoteReference(..) => Some(range),
+ _ => None,
+ })
+ .next()
+ .unwrap();
+ assert_eq!(12..16, range);
+ }
+
+ #[test]
+ fn table_offset() {
+ let markdown = "a\n\nTesting|This|Outtt\n--|:--:|--:\nSome Data|Other data|asdf";
+ let event_offset = parser_with_extensions(markdown)
+ .into_offset_iter()
+ .map(|(_ev, range)| range)
+ .nth(3)
+ .unwrap();
+ let expected_offset = 3..59;
+ assert_eq!(expected_offset, event_offset);
+ }
+
+ #[test]
+ fn table_cell_span() {
+ let markdown = "a|b|c\n--|--|--\na| |c";
+ let event_offset = parser_with_extensions(markdown)
+ .into_offset_iter()
+ .filter_map(|(ev, span)| match ev {
+ Event::Start(Tag::TableCell) => Some(span),
+ _ => None,
+ })
+ .nth(4)
+ .unwrap();
+ let expected_offset_start = "a|b|c\n--|--|--\na|".len();
+ assert_eq!(
+ expected_offset_start..(expected_offset_start + 2),
+ event_offset
+ );
+ }
+
+ #[test]
+ fn offset_iter_issue_378() {
+ let event_offsets: Vec<_> = Parser::new("a [b](c) d")
+ .into_offset_iter()
+ .map(|(_ev, range)| range)
+ .collect();
+ let expected_offsets = vec![(0..10), (0..2), (2..8), (3..4), (2..8), (8..10), (0..10)];
+ assert_eq!(expected_offsets, event_offsets);
+ }
+
+ #[test]
+ fn offset_iter_issue_404() {
+ let event_offsets: Vec<_> = Parser::new("###\n")
+ .into_offset_iter()
+ .map(|(_ev, range)| range)
+ .collect();
+ let expected_offsets = vec![(0..4), (0..4)];
+ assert_eq!(expected_offsets, event_offsets);
+ }
+
+ // FIXME: add this one regression suite
+ #[test]
+ fn link_def_at_eof() {
+ let test_str = "[My site][world]\n\n[world]: https://vincentprouillet.com";
+ let expected = "<p><a href=\"https://vincentprouillet.com\">My site</a></p>\n";
+
+ let mut buf = String::new();
+ crate::html::push_html(&mut buf, Parser::new(test_str));
+ assert_eq!(expected, buf);
+ }
+
+ #[test]
+ fn no_footnote_refs_without_option() {
+ let test_str = "a [^a]\n\n[^a]: yolo";
+ let expected = "<p>a <a href=\"yolo\">^a</a></p>\n";
+
+ let mut buf = String::new();
+ crate::html::push_html(&mut buf, Parser::new(test_str));
+ assert_eq!(expected, buf);
+ }
+
+ #[test]
+ fn ref_def_at_eof() {
+ let test_str = "[test]:\\";
+ let expected = "";
+
+ let mut buf = String::new();
+ crate::html::push_html(&mut buf, Parser::new(test_str));
+ assert_eq!(expected, buf);
+ }
+
+ #[test]
+ fn ref_def_cr_lf() {
+ let test_str = "[a]: /u\r\n\n[a]";
+ let expected = "<p><a href=\"/u\">a</a></p>\n";
+
+ let mut buf = String::new();
+ crate::html::push_html(&mut buf, Parser::new(test_str));
+ assert_eq!(expected, buf);
+ }
+
+ #[test]
+ fn no_dest_refdef() {
+ let test_str = "[a]:";
+ let expected = "<p>[a]:</p>\n";
+
+ let mut buf = String::new();
+ crate::html::push_html(&mut buf, Parser::new(test_str));
+ assert_eq!(expected, buf);
+ }
+
+ #[test]
+ fn broken_links_called_only_once() {
+ for &(markdown, expected) in &[
+ ("See also [`g()`][crate::g].", 1),
+ ("See also [`g()`][crate::g][].", 1),
+ ("[brokenlink1] some other node [brokenlink2]", 2),
+ ] {
+ let mut times_called = 0;
+ let callback = &mut |_broken_link: BrokenLink| {
+ times_called += 1;
+ None
+ };
+ let parser =
+ Parser::new_with_broken_link_callback(markdown, Options::empty(), Some(callback));
+ for _ in parser {}
+ assert_eq!(times_called, expected);
+ }
+ }
+
+ #[test]
+ fn simple_broken_link_callback() {
+ let test_str = "This is a link w/o def: [hello][world]";
+ let mut callback = |broken_link: BrokenLink| {
+ assert_eq!("world", broken_link.reference.as_ref());
+ assert_eq!(&test_str[broken_link.span], "[hello][world]");
+ let url = "YOLO".into();
+ let title = "SWAG".to_owned().into();
+ Some((url, title))
+ };
+ let parser =
+ Parser::new_with_broken_link_callback(test_str, Options::empty(), Some(&mut callback));
+ let mut link_tag_count = 0;
+ for (typ, url, title) in parser.filter_map(|event| match event {
+ Event::Start(tag) | Event::End(tag) => match tag {
+ Tag::Link(typ, url, title) => Some((typ, url, title)),
+ _ => None,
+ },
+ _ => None,
+ }) {
+ link_tag_count += 1;
+ assert_eq!(typ, LinkType::ReferenceUnknown);
+ assert_eq!(url.as_ref(), "YOLO");
+ assert_eq!(title.as_ref(), "SWAG");
+ }
+ assert!(link_tag_count > 0);
+ }
+
+ #[test]
+ fn code_block_kind_check_fenced() {
+ let parser = Parser::new("hello\n```test\ntadam\n```");
+ let mut found = 0;
+ for (ev, _range) in parser.into_offset_iter() {
+ match ev {
+ Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(syntax))) => {
+ assert_eq!(syntax.as_ref(), "test");
+ found += 1;
+ }
+ _ => {}
+ }
+ }
+ assert_eq!(found, 1);
+ }
+
+ #[test]
+ fn code_block_kind_check_indented() {
+ let parser = Parser::new("hello\n\n ```test\n tadam\nhello");
+ let mut found = 0;
+ for (ev, _range) in parser.into_offset_iter() {
+ match ev {
+ Event::Start(Tag::CodeBlock(CodeBlockKind::Indented)) => {
+ found += 1;
+ }
+ _ => {}
+ }
+ }
+ assert_eq!(found, 1);
+ }
+
+ #[test]
+ fn ref_defs() {
+ let input = r###"[a B c]: http://example.com
+[another]: https://google.com
+
+text
+
+[final ONE]: http://wikipedia.org
+"###;
+ let mut parser = Parser::new(input);
+
+ assert!(parser.reference_definitions().get("a b c").is_some());
+ assert!(parser.reference_definitions().get("nope").is_none());
+
+ if let Some(_event) = parser.next() {
+ // testing keys with shorter lifetimes than parser and its input
+ let s = "final one".to_owned();
+ let link_def = parser.reference_definitions().get(&s).unwrap();
+ let span = &input[link_def.span.clone()];
+ assert_eq!(span, "[final ONE]: http://wikipedia.org");
+ }
+ }
+
+ #[test]
+ fn common_lifetime_patterns_allowed<'b>() {
+ let temporary_str = String::from("xyz");
+
+ // NOTE: this is a limitation of Rust, it doesn't allow putting lifetime parameters on the closure itself.
+ // Hack it by attaching the lifetime to the test function instead.
+ // TODO: why is the `'b` lifetime required at all? Changing it to `'_` breaks things :(
+ let mut closure = |link: BrokenLink<'b>| Some(("#".into(), link.reference.into()));
+
+ fn function<'a>(link: BrokenLink<'a>) -> Option<(CowStr<'a>, CowStr<'a>)> {
+ Some(("#".into(), link.reference))
+ }
+
+ for _ in Parser::new_with_broken_link_callback(
+ "static lifetime",
+ Options::empty(),
+ Some(&mut closure),
+ ) {}
+ /* This fails to compile. Because the closure can't say `for <'a> fn(BrokenLink<'a>) ->
+ * CowStr<'a>` and has to use the enclosing `'b` lifetime parameter, `temporary_str` lives
+ * shorter than `'b`. I think this is unlikely to occur in real life, and if it does, the
+ * fix is simple: move it out to a function that allows annotating the lifetimes.
+ */
+ //for _ in Parser::new_with_broken_link_callback(&temporary_str, Options::empty(), Some(&mut callback)) {
+ //}
+
+ for _ in Parser::new_with_broken_link_callback(
+ "static lifetime",
+ Options::empty(),
+ Some(&mut function),
+ ) {}
+ for _ in Parser::new_with_broken_link_callback(
+ &temporary_str,
+ Options::empty(),
+ Some(&mut function),
+ ) {}
+ }
+}
diff --git a/vendor/pulldown-cmark/src/puncttable.rs b/vendor/pulldown-cmark/src/puncttable.rs
new file mode 100644
index 000000000..5acdfbea7
--- /dev/null
+++ b/vendor/pulldown-cmark/src/puncttable.rs
@@ -0,0 +1,351 @@
+// Copyright 2015 Google Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+//! CommonMark punctuation set based on spec and Unicode properties.
+
+// Autogenerated by mk_puncttable.py
+
+const PUNCT_MASKS_ASCII: [u16; 8] = [
+ 0x0000, // U+0000...U+000F
+ 0x0000, // U+0010...U+001F
+ 0xfffe, // U+0020...U+002F
+ 0xfc00, // U+0030...U+003F
+ 0x0001, // U+0040...U+004F
+ 0xf800, // U+0050...U+005F
+ 0x0001, // U+0060...U+006F
+ 0x7800, // U+0070...U+007F
+];
+
+const PUNCT_TAB: [u16; 132] = [
+ 10, // U+00A0...U+00AF
+ 11, // U+00B0...U+00BF
+ 55, // U+0370...U+037F
+ 56, // U+0380...U+038F
+ 85, // U+0550...U+055F
+ 88, // U+0580...U+058F
+ 91, // U+05B0...U+05BF
+ 92, // U+05C0...U+05CF
+ 95, // U+05F0...U+05FF
+ 96, // U+0600...U+060F
+ 97, // U+0610...U+061F
+ 102, // U+0660...U+066F
+ 109, // U+06D0...U+06DF
+ 112, // U+0700...U+070F
+ 127, // U+07F0...U+07FF
+ 131, // U+0830...U+083F
+ 133, // U+0850...U+085F
+ 150, // U+0960...U+096F
+ 151, // U+0970...U+097F
+ 175, // U+0AF0...U+0AFF
+ 223, // U+0DF0...U+0DFF
+ 228, // U+0E40...U+0E4F
+ 229, // U+0E50...U+0E5F
+ 240, // U+0F00...U+0F0F
+ 241, // U+0F10...U+0F1F
+ 243, // U+0F30...U+0F3F
+ 248, // U+0F80...U+0F8F
+ 253, // U+0FD0...U+0FDF
+ 260, // U+1040...U+104F
+ 271, // U+10F0...U+10FF
+ 310, // U+1360...U+136F
+ 320, // U+1400...U+140F
+ 358, // U+1660...U+166F
+ 361, // U+1690...U+169F
+ 366, // U+16E0...U+16EF
+ 371, // U+1730...U+173F
+ 381, // U+17D0...U+17DF
+ 384, // U+1800...U+180F
+ 404, // U+1940...U+194F
+ 417, // U+1A10...U+1A1F
+ 426, // U+1AA0...U+1AAF
+ 437, // U+1B50...U+1B5F
+ 438, // U+1B60...U+1B6F
+ 447, // U+1BF0...U+1BFF
+ 451, // U+1C30...U+1C3F
+ 455, // U+1C70...U+1C7F
+ 460, // U+1CC0...U+1CCF
+ 461, // U+1CD0...U+1CDF
+ 513, // U+2010...U+201F
+ 514, // U+2020...U+202F
+ 515, // U+2030...U+203F
+ 516, // U+2040...U+204F
+ 517, // U+2050...U+205F
+ 519, // U+2070...U+207F
+ 520, // U+2080...U+208F
+ 560, // U+2300...U+230F
+ 562, // U+2320...U+232F
+ 630, // U+2760...U+276F
+ 631, // U+2770...U+277F
+ 636, // U+27C0...U+27CF
+ 638, // U+27E0...U+27EF
+ 664, // U+2980...U+298F
+ 665, // U+2990...U+299F
+ 669, // U+29D0...U+29DF
+ 671, // U+29F0...U+29FF
+ 719, // U+2CF0...U+2CFF
+ 727, // U+2D70...U+2D7F
+ 736, // U+2E00...U+2E0F
+ 737, // U+2E10...U+2E1F
+ 738, // U+2E20...U+2E2F
+ 739, // U+2E30...U+2E3F
+ 740, // U+2E40...U+2E4F
+ 768, // U+3000...U+300F
+ 769, // U+3010...U+301F
+ 771, // U+3030...U+303F
+ 778, // U+30A0...U+30AF
+ 783, // U+30F0...U+30FF
+ 2639, // U+A4F0...U+A4FF
+ 2656, // U+A600...U+A60F
+ 2663, // U+A670...U+A67F
+ 2671, // U+A6F0...U+A6FF
+ 2695, // U+A870...U+A87F
+ 2700, // U+A8C0...U+A8CF
+ 2703, // U+A8F0...U+A8FF
+ 2706, // U+A920...U+A92F
+ 2709, // U+A950...U+A95F
+ 2716, // U+A9C0...U+A9CF
+ 2717, // U+A9D0...U+A9DF
+ 2725, // U+AA50...U+AA5F
+ 2733, // U+AAD0...U+AADF
+ 2735, // U+AAF0...U+AAFF
+ 2750, // U+ABE0...U+ABEF
+ 4051, // U+FD30...U+FD3F
+ 4065, // U+FE10...U+FE1F
+ 4067, // U+FE30...U+FE3F
+ 4068, // U+FE40...U+FE4F
+ 4069, // U+FE50...U+FE5F
+ 4070, // U+FE60...U+FE6F
+ 4080, // U+FF00...U+FF0F
+ 4081, // U+FF10...U+FF1F
+ 4082, // U+FF20...U+FF2F
+ 4083, // U+FF30...U+FF3F
+ 4085, // U+FF50...U+FF5F
+ 4086, // U+FF60...U+FF6F
+ 4112, // U+10100...U+1010F
+ 4153, // U+10390...U+1039F
+ 4157, // U+103D0...U+103DF
+ 4182, // U+10560...U+1056F
+ 4229, // U+10850...U+1085F
+ 4241, // U+10910...U+1091F
+ 4243, // U+10930...U+1093F
+ 4261, // U+10A50...U+10A5F
+ 4263, // U+10A70...U+10A7F
+ 4271, // U+10AF0...U+10AFF
+ 4275, // U+10B30...U+10B3F
+ 4281, // U+10B90...U+10B9F
+ 4356, // U+11040...U+1104F
+ 4363, // U+110B0...U+110BF
+ 4364, // U+110C0...U+110CF
+ 4372, // U+11140...U+1114F
+ 4375, // U+11170...U+1117F
+ 4380, // U+111C0...U+111CF
+ 4387, // U+11230...U+1123F
+ 4428, // U+114C0...U+114CF
+ 4444, // U+115C0...U+115CF
+ 4452, // U+11640...U+1164F
+ 4679, // U+12470...U+1247F
+ 5798, // U+16A60...U+16A6F
+ 5807, // U+16AF0...U+16AFF
+ 5811, // U+16B30...U+16B3F
+ 5812, // U+16B40...U+16B4F
+ 7113, // U+1BC90...U+1BC9F
+];
+
+const PUNCT_MASKS: [u16; 132] = [
+ 0x0882, // U+00A0...U+00AF
+ 0x88c0, // U+00B0...U+00BF
+ 0x4000, // U+0370...U+037F
+ 0x0080, // U+0380...U+038F
+ 0xfc00, // U+0550...U+055F
+ 0x0600, // U+0580...U+058F
+ 0x4000, // U+05B0...U+05BF
+ 0x0049, // U+05C0...U+05CF
+ 0x0018, // U+05F0...U+05FF
+ 0x3600, // U+0600...U+060F
+ 0xc800, // U+0610...U+061F
+ 0x3c00, // U+0660...U+066F
+ 0x0010, // U+06D0...U+06DF
+ 0x3fff, // U+0700...U+070F
+ 0x0380, // U+07F0...U+07FF
+ 0x7fff, // U+0830...U+083F
+ 0x4000, // U+0850...U+085F
+ 0x0030, // U+0960...U+096F
+ 0x0001, // U+0970...U+097F
+ 0x0001, // U+0AF0...U+0AFF
+ 0x0010, // U+0DF0...U+0DFF
+ 0x8000, // U+0E40...U+0E4F
+ 0x0c00, // U+0E50...U+0E5F
+ 0xfff0, // U+0F00...U+0F0F
+ 0x0017, // U+0F10...U+0F1F
+ 0x3c00, // U+0F30...U+0F3F
+ 0x0020, // U+0F80...U+0F8F
+ 0x061f, // U+0FD0...U+0FDF
+ 0xfc00, // U+1040...U+104F
+ 0x0800, // U+10F0...U+10FF
+ 0x01ff, // U+1360...U+136F
+ 0x0001, // U+1400...U+140F
+ 0x6000, // U+1660...U+166F
+ 0x1800, // U+1690...U+169F
+ 0x3800, // U+16E0...U+16EF
+ 0x0060, // U+1730...U+173F
+ 0x0770, // U+17D0...U+17DF
+ 0x07ff, // U+1800...U+180F
+ 0x0030, // U+1940...U+194F
+ 0xc000, // U+1A10...U+1A1F
+ 0x3f7f, // U+1AA0...U+1AAF
+ 0xfc00, // U+1B50...U+1B5F
+ 0x0001, // U+1B60...U+1B6F
+ 0xf000, // U+1BF0...U+1BFF
+ 0xf800, // U+1C30...U+1C3F
+ 0xc000, // U+1C70...U+1C7F
+ 0x00ff, // U+1CC0...U+1CCF
+ 0x0008, // U+1CD0...U+1CDF
+ 0xffff, // U+2010...U+201F
+ 0x00ff, // U+2020...U+202F
+ 0xffff, // U+2030...U+203F
+ 0xffef, // U+2040...U+204F
+ 0x7ffb, // U+2050...U+205F
+ 0x6000, // U+2070...U+207F
+ 0x6000, // U+2080...U+208F
+ 0x0f00, // U+2300...U+230F
+ 0x0600, // U+2320...U+232F
+ 0xff00, // U+2760...U+276F
+ 0x003f, // U+2770...U+277F
+ 0x0060, // U+27C0...U+27CF
+ 0xffc0, // U+27E0...U+27EF
+ 0xfff8, // U+2980...U+298F
+ 0x01ff, // U+2990...U+299F
+ 0x0f00, // U+29D0...U+29DF
+ 0x3000, // U+29F0...U+29FF
+ 0xde00, // U+2CF0...U+2CFF
+ 0x0001, // U+2D70...U+2D7F
+ 0xffff, // U+2E00...U+2E0F
+ 0xffff, // U+2E10...U+2E1F
+ 0x7fff, // U+2E20...U+2E2F
+ 0xffff, // U+2E30...U+2E3F
+ 0x0007, // U+2E40...U+2E4F
+ 0xff0e, // U+3000...U+300F
+ 0xfff3, // U+3010...U+301F
+ 0x2001, // U+3030...U+303F
+ 0x0001, // U+30A0...U+30AF
+ 0x0800, // U+30F0...U+30FF
+ 0xc000, // U+A4F0...U+A4FF
+ 0xe000, // U+A600...U+A60F
+ 0x4008, // U+A670...U+A67F
+ 0x00fc, // U+A6F0...U+A6FF
+ 0x00f0, // U+A870...U+A87F
+ 0xc000, // U+A8C0...U+A8CF
+ 0x0700, // U+A8F0...U+A8FF
+ 0xc000, // U+A920...U+A92F
+ 0x8000, // U+A950...U+A95F
+ 0x3ffe, // U+A9C0...U+A9CF
+ 0xc000, // U+A9D0...U+A9DF
+ 0xf000, // U+AA50...U+AA5F
+ 0xc000, // U+AAD0...U+AADF
+ 0x0003, // U+AAF0...U+AAFF
+ 0x0800, // U+ABE0...U+ABEF
+ 0xc000, // U+FD30...U+FD3F
+ 0x03ff, // U+FE10...U+FE1F
+ 0xffff, // U+FE30...U+FE3F
+ 0xffff, // U+FE40...U+FE4F
+ 0xfff7, // U+FE50...U+FE5F
+ 0x0d0b, // U+FE60...U+FE6F
+ 0xf7ee, // U+FF00...U+FF0F
+ 0x8c00, // U+FF10...U+FF1F
+ 0x0001, // U+FF20...U+FF2F
+ 0xb800, // U+FF30...U+FF3F
+ 0xa800, // U+FF50...U+FF5F
+ 0x003f, // U+FF60...U+FF6F
+ 0x0007, // U+10100...U+1010F
+ 0x8000, // U+10390...U+1039F
+ 0x0001, // U+103D0...U+103DF
+ 0x8000, // U+10560...U+1056F
+ 0x0080, // U+10850...U+1085F
+ 0x8000, // U+10910...U+1091F
+ 0x8000, // U+10930...U+1093F
+ 0x01ff, // U+10A50...U+10A5F
+ 0x8000, // U+10A70...U+10A7F
+ 0x007f, // U+10AF0...U+10AFF
+ 0xfe00, // U+10B30...U+10B3F
+ 0x1e00, // U+10B90...U+10B9F
+ 0x3f80, // U+11040...U+1104F
+ 0xd800, // U+110B0...U+110BF
+ 0x0003, // U+110C0...U+110CF
+ 0x000f, // U+11140...U+1114F
+ 0x0030, // U+11170...U+1117F
+ 0x21e0, // U+111C0...U+111CF
+ 0x3f00, // U+11230...U+1123F
+ 0x0040, // U+114C0...U+114CF
+ 0x03fe, // U+115C0...U+115CF
+ 0x000e, // U+11640...U+1164F
+ 0x001f, // U+12470...U+1247F
+ 0xc000, // U+16A60...U+16A6F
+ 0x0020, // U+16AF0...U+16AFF
+ 0x0f80, // U+16B30...U+16B3F
+ 0x0010, // U+16B40...U+16B4F
+ 0x8000, // U+1BC90...U+1BC9F
+];
+
+pub(crate) fn is_ascii_punctuation(c: u8) -> bool {
+ c < 128 && (PUNCT_MASKS_ASCII[(c / 16) as usize] & (1 << (c & 15))) != 0
+}
+
+pub(crate) fn is_punctuation(c: char) -> bool {
+ let cp = c as u32;
+ if cp < 128 {
+ return is_ascii_punctuation(cp as u8);
+ }
+ if cp > 0x1BC9F {
+ return false;
+ }
+ let high = (cp / 16) as u16;
+ match PUNCT_TAB.binary_search(&high) {
+ Ok(index) => (PUNCT_MASKS[index] & (1 << (cp & 15))) != 0,
+ _ => false,
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::{is_ascii_punctuation, is_punctuation};
+
+ #[test]
+ fn test_ascii() {
+ assert!(is_ascii_punctuation(b'!'));
+ assert!(is_ascii_punctuation(b'@'));
+ assert!(is_ascii_punctuation(b'~'));
+ assert!(!is_ascii_punctuation(b' '));
+ assert!(!is_ascii_punctuation(b'0'));
+ assert!(!is_ascii_punctuation(b'A'));
+ assert!(!is_ascii_punctuation(0xA1));
+ }
+
+ #[test]
+ fn test_unicode() {
+ assert!(is_punctuation('~'));
+ assert!(!is_punctuation(' '));
+
+ assert!(is_punctuation('\u{00A1}'));
+ assert!(is_punctuation('\u{060C}'));
+ assert!(is_punctuation('\u{FF65}'));
+ assert!(is_punctuation('\u{1BC9F}'));
+ assert!(!is_punctuation('\u{1BCA0}'));
+ }
+}
diff --git a/vendor/pulldown-cmark/src/scanners.rs b/vendor/pulldown-cmark/src/scanners.rs
new file mode 100644
index 000000000..176c495eb
--- /dev/null
+++ b/vendor/pulldown-cmark/src/scanners.rs
@@ -0,0 +1,1327 @@
+// Copyright 2015 Google Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+//! Scanners for fragments of CommonMark syntax
+
+use std::convert::TryInto;
+use std::{char, convert::TryFrom};
+
+use crate::parse::HtmlScanGuard;
+pub(crate) use crate::puncttable::{is_ascii_punctuation, is_punctuation};
+use crate::strings::CowStr;
+use crate::{entities, HeadingLevel};
+use crate::{Alignment, LinkType};
+
+use memchr::memchr;
+
+// sorted for binary search
+const HTML_TAGS: [&str; 62] = [
+ "address",
+ "article",
+ "aside",
+ "base",
+ "basefont",
+ "blockquote",
+ "body",
+ "caption",
+ "center",
+ "col",
+ "colgroup",
+ "dd",
+ "details",
+ "dialog",
+ "dir",
+ "div",
+ "dl",
+ "dt",
+ "fieldset",
+ "figcaption",
+ "figure",
+ "footer",
+ "form",
+ "frame",
+ "frameset",
+ "h1",
+ "h2",
+ "h3",
+ "h4",
+ "h5",
+ "h6",
+ "head",
+ "header",
+ "hr",
+ "html",
+ "iframe",
+ "legend",
+ "li",
+ "link",
+ "main",
+ "menu",
+ "menuitem",
+ "nav",
+ "noframes",
+ "ol",
+ "optgroup",
+ "option",
+ "p",
+ "param",
+ "section",
+ "source",
+ "summary",
+ "table",
+ "tbody",
+ "td",
+ "tfoot",
+ "th",
+ "thead",
+ "title",
+ "tr",
+ "track",
+ "ul",
+];
+
+/// Analysis of the beginning of a line, including indentation and container
+/// markers.
+#[derive(Clone)]
+pub(crate) struct LineStart<'a> {
+ bytes: &'a [u8],
+ tab_start: usize,
+ ix: usize,
+ spaces_remaining: usize,
+ // no thematic breaks can occur before this offset.
+ // this prevents scanning over and over up to a certain point
+ min_hrule_offset: usize,
+}
+
+impl<'a> LineStart<'a> {
+ pub(crate) fn new(bytes: &[u8]) -> LineStart {
+ LineStart {
+ bytes,
+ tab_start: 0,
+ ix: 0,
+ spaces_remaining: 0,
+ min_hrule_offset: 0,
+ }
+ }
+
+ /// Try to scan a number of spaces.
+ ///
+ /// Returns true if all spaces were consumed.
+ ///
+ /// Note: consumes some spaces even if not successful.
+ pub(crate) fn scan_space(&mut self, n_space: usize) -> bool {
+ self.scan_space_inner(n_space) == 0
+ }
+
+ /// Scan a number of spaces up to a maximum.
+ ///
+ /// Returns number of spaces scanned.
+ pub(crate) fn scan_space_upto(&mut self, n_space: usize) -> usize {
+ n_space - self.scan_space_inner(n_space)
+ }
+
+ /// Returns unused remainder of spaces.
+ fn scan_space_inner(&mut self, mut n_space: usize) -> usize {
+ let n_from_remaining = self.spaces_remaining.min(n_space);
+ self.spaces_remaining -= n_from_remaining;
+ n_space -= n_from_remaining;
+ while n_space > 0 && self.ix < self.bytes.len() {
+ match self.bytes[self.ix] {
+ b' ' => {
+ self.ix += 1;
+ n_space -= 1;
+ }
+ b'\t' => {
+ let spaces = 4 - (self.ix - self.tab_start) % 4;
+ self.ix += 1;
+ self.tab_start = self.ix;
+ let n = spaces.min(n_space);
+ n_space -= n;
+ self.spaces_remaining = spaces - n;
+ }
+ _ => break,
+ }
+ }
+ n_space
+ }
+
+ /// Scan all available ASCII whitespace (not including eol).
+ pub(crate) fn scan_all_space(&mut self) {
+ self.spaces_remaining = 0;
+ self.ix += self.bytes[self.ix..]
+ .iter()
+ .take_while(|&&b| b == b' ' || b == b'\t')
+ .count();
+ }
+
+ /// Determine whether we're at end of line (includes end of file).
+ pub(crate) fn is_at_eol(&self) -> bool {
+ self.bytes
+ .get(self.ix)
+ .map(|&c| c == b'\r' || c == b'\n')
+ .unwrap_or(true)
+ }
+
+ fn scan_ch(&mut self, c: u8) -> bool {
+ if self.ix < self.bytes.len() && self.bytes[self.ix] == c {
+ self.ix += 1;
+ true
+ } else {
+ false
+ }
+ }
+
+ pub(crate) fn scan_blockquote_marker(&mut self) -> bool {
+ let save = self.clone();
+ let _ = self.scan_space(3);
+ if self.scan_ch(b'>') {
+ let _ = self.scan_space(1);
+ true
+ } else {
+ *self = save;
+ false
+ }
+ }
+
+ /// Scan a list marker.
+ ///
+ /// Return value is the character, the start index, and the indent in spaces.
+ /// For ordered list markers, the character will be one of b'.' or b')'. For
+ /// bullet list markers, it will be one of b'-', b'+', or b'*'.
+ pub(crate) fn scan_list_marker(&mut self) -> Option<(u8, u64, usize)> {
+ let save = self.clone();
+ let indent = self.scan_space_upto(4);
+ if indent < 4 && self.ix < self.bytes.len() {
+ let c = self.bytes[self.ix];
+ if c == b'-' || c == b'+' || c == b'*' {
+ if self.ix >= self.min_hrule_offset {
+ // there could be an hrule here
+ if let Err(min_offset) = scan_hrule(&self.bytes[self.ix..]) {
+ self.min_hrule_offset = min_offset;
+ } else {
+ *self = save;
+ return None;
+ }
+ }
+ self.ix += 1;
+ if self.scan_space(1) || self.is_at_eol() {
+ return self.finish_list_marker(c, 0, indent + 2);
+ }
+ } else if c >= b'0' && c <= b'9' {
+ let start_ix = self.ix;
+ let mut ix = self.ix + 1;
+ let mut val = u64::from(c - b'0');
+ while ix < self.bytes.len() && ix - start_ix < 10 {
+ let c = self.bytes[ix];
+ ix += 1;
+ if c >= b'0' && c <= b'9' {
+ val = val * 10 + u64::from(c - b'0');
+ } else if c == b')' || c == b'.' {
+ self.ix = ix;
+ if self.scan_space(1) || self.is_at_eol() {
+ return self.finish_list_marker(c, val, indent + self.ix - start_ix);
+ } else {
+ break;
+ }
+ } else {
+ break;
+ }
+ }
+ }
+ }
+ *self = save;
+ None
+ }
+
+ fn finish_list_marker(
+ &mut self,
+ c: u8,
+ start: u64,
+ mut indent: usize,
+ ) -> Option<(u8, u64, usize)> {
+ let save = self.clone();
+
+ // skip the rest of the line if it's blank
+ if scan_blank_line(&self.bytes[self.ix..]).is_some() {
+ return Some((c, start, indent));
+ }
+
+ let post_indent = self.scan_space_upto(4);
+ if post_indent < 4 {
+ indent += post_indent;
+ } else {
+ *self = save;
+ }
+ Some((c, start, indent))
+ }
+
+ /// Returns Some(is_checked) when a task list marker was found. Resets itself
+ /// to original state otherwise.
+ pub(crate) fn scan_task_list_marker(&mut self) -> Option<bool> {
+ let save = self.clone();
+ self.scan_space_upto(3);
+
+ if !self.scan_ch(b'[') {
+ *self = save;
+ return None;
+ }
+ let is_checked = match self.bytes.get(self.ix) {
+ Some(&c) if is_ascii_whitespace_no_nl(c) => {
+ self.ix += 1;
+ false
+ }
+ Some(b'x') | Some(b'X') => {
+ self.ix += 1;
+ true
+ }
+ _ => {
+ *self = save;
+ return None;
+ }
+ };
+ if !self.scan_ch(b']') {
+ *self = save;
+ return None;
+ }
+ if !self
+ .bytes
+ .get(self.ix)
+ .map(|&b| is_ascii_whitespace_no_nl(b))
+ .unwrap_or(false)
+ {
+ *self = save;
+ return None;
+ }
+ Some(is_checked)
+ }
+
+ pub(crate) fn bytes_scanned(&self) -> usize {
+ self.ix
+ }
+
+ pub(crate) fn remaining_space(&self) -> usize {
+ self.spaces_remaining
+ }
+}
+
+pub(crate) fn is_ascii_whitespace(c: u8) -> bool {
+ (c >= 0x09 && c <= 0x0d) || c == b' '
+}
+
+pub(crate) fn is_ascii_whitespace_no_nl(c: u8) -> bool {
+ c == b'\t' || c == 0x0b || c == 0x0c || c == b' '
+}
+
+fn is_ascii_alpha(c: u8) -> bool {
+ matches!(c, b'a'..=b'z' | b'A'..=b'Z')
+}
+
+fn is_ascii_alphanumeric(c: u8) -> bool {
+ matches!(c, b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z')
+}
+
+fn is_ascii_letterdigitdash(c: u8) -> bool {
+ c == b'-' || is_ascii_alphanumeric(c)
+}
+
+fn is_digit(c: u8) -> bool {
+ b'0' <= c && c <= b'9'
+}
+
+fn is_valid_unquoted_attr_value_char(c: u8) -> bool {
+ !matches!(
+ c,
+ b'\'' | b'"' | b' ' | b'=' | b'>' | b'<' | b'`' | b'\n' | b'\r'
+ )
+}
+
+// scan a single character
+pub(crate) fn scan_ch(data: &[u8], c: u8) -> usize {
+ if !data.is_empty() && data[0] == c {
+ 1
+ } else {
+ 0
+ }
+}
+
+pub(crate) fn scan_while<F>(data: &[u8], mut f: F) -> usize
+where
+ F: FnMut(u8) -> bool,
+{
+ data.iter().take_while(|&&c| f(c)).count()
+}
+
+pub(crate) fn scan_rev_while<F>(data: &[u8], mut f: F) -> usize
+where
+ F: FnMut(u8) -> bool,
+{
+ data.iter().rev().take_while(|&&c| f(c)).count()
+}
+
+pub(crate) fn scan_ch_repeat(data: &[u8], c: u8) -> usize {
+ scan_while(data, |x| x == c)
+}
+
+// Note: this scans ASCII whitespace only, for Unicode whitespace use
+// a different function.
+pub(crate) fn scan_whitespace_no_nl(data: &[u8]) -> usize {
+ scan_while(data, is_ascii_whitespace_no_nl)
+}
+
+fn scan_attr_value_chars(data: &[u8]) -> usize {
+ scan_while(data, is_valid_unquoted_attr_value_char)
+}
+
+pub(crate) fn scan_eol(bytes: &[u8]) -> Option<usize> {
+ if bytes.is_empty() {
+ return Some(0);
+ }
+ match bytes[0] {
+ b'\n' => Some(1),
+ b'\r' => Some(if bytes.get(1) == Some(&b'\n') { 2 } else { 1 }),
+ _ => None,
+ }
+}
+
+pub(crate) fn scan_blank_line(bytes: &[u8]) -> Option<usize> {
+ let i = scan_whitespace_no_nl(bytes);
+ scan_eol(&bytes[i..]).map(|n| i + n)
+}
+
+pub(crate) fn scan_nextline(bytes: &[u8]) -> usize {
+ memchr(b'\n', bytes).map_or(bytes.len(), |x| x + 1)
+}
+
+// return: end byte for closing code fence, or None
+// if the line is not a closing code fence
+pub(crate) fn scan_closing_code_fence(
+ bytes: &[u8],
+ fence_char: u8,
+ n_fence_char: usize,
+) -> Option<usize> {
+ if bytes.is_empty() {
+ return Some(0);
+ }
+ let mut i = 0;
+ let num_fence_chars_found = scan_ch_repeat(&bytes[i..], fence_char);
+ if num_fence_chars_found < n_fence_char {
+ return None;
+ }
+ i += num_fence_chars_found;
+ let num_trailing_spaces = scan_ch_repeat(&bytes[i..], b' ');
+ i += num_trailing_spaces;
+ scan_eol(&bytes[i..]).map(|_| i)
+}
+
+// returned pair is (number of bytes, number of spaces)
+fn calc_indent(text: &[u8], max: usize) -> (usize, usize) {
+ let mut spaces = 0;
+ let mut offset = 0;
+
+ for (i, &b) in text.iter().enumerate() {
+ match b {
+ b' ' => {
+ spaces += 1;
+ if spaces == max {
+ break;
+ }
+ }
+ b'\t' => {
+ let new_spaces = spaces + 4 - (spaces & 3);
+ if new_spaces > max {
+ break;
+ }
+ spaces = new_spaces;
+ }
+ _ => break,
+ }
+ offset = i;
+ }
+
+ (offset, spaces)
+}
+
+/// Scan hrule opening sequence.
+///
+/// Returns Ok(x) when it finds an hrule, where x is the
+/// size of line containing the hrule, including the trailing newline.
+///
+/// Returns Err(x) when it does not find an hrule and x is
+/// the offset in data before no hrule can appear.
+pub(crate) fn scan_hrule(bytes: &[u8]) -> Result<usize, usize> {
+ if bytes.len() < 3 {
+ return Err(0);
+ }
+ let c = bytes[0];
+ if !(c == b'*' || c == b'-' || c == b'_') {
+ return Err(0);
+ }
+ let mut n = 0;
+ let mut i = 0;
+
+ while i < bytes.len() {
+ match bytes[i] {
+ b'\n' | b'\r' => {
+ i += scan_eol(&bytes[i..]).unwrap_or(0);
+ break;
+ }
+ c2 if c2 == c => {
+ n += 1;
+ }
+ b' ' | b'\t' => (),
+ _ => return Err(i),
+ }
+ i += 1;
+ }
+ if n >= 3 {
+ Ok(i)
+ } else {
+ Err(i)
+ }
+}
+
+/// Scan an ATX heading opening sequence.
+///
+/// Returns number of bytes in prefix and level.
+pub(crate) fn scan_atx_heading(data: &[u8]) -> Option<HeadingLevel> {
+ let level = scan_ch_repeat(data, b'#');
+ if data.get(level).copied().map_or(true, is_ascii_whitespace) {
+ HeadingLevel::try_from(level).ok()
+ } else {
+ None
+ }
+}
+
+/// Scan a setext heading underline.
+///
+/// Returns number of bytes in line (including trailing newline) and level.
+pub(crate) fn scan_setext_heading(data: &[u8]) -> Option<(usize, HeadingLevel)> {
+ let c = *data.get(0)?;
+ let level = if c == b'=' {
+ HeadingLevel::H1
+ } else if c == b'-' {
+ HeadingLevel::H2
+ } else {
+ return None;
+ };
+ let mut i = 1 + scan_ch_repeat(&data[1..], c);
+ i += scan_blank_line(&data[i..])?;
+ Some((i, level))
+}
+
+// returns number of bytes in line (including trailing
+// newline) and column alignments
+pub(crate) fn scan_table_head(data: &[u8]) -> (usize, Vec<Alignment>) {
+ let (mut i, spaces) = calc_indent(data, 4);
+ if spaces > 3 || i == data.len() {
+ return (0, vec![]);
+ }
+ let mut cols = vec![];
+ let mut active_col = Alignment::None;
+ let mut start_col = true;
+ if data[i] == b'|' {
+ i += 1;
+ }
+ for c in &data[i..] {
+ if let Some(n) = scan_eol(&data[i..]) {
+ i += n;
+ break;
+ }
+ match *c {
+ b' ' => (),
+ b':' => {
+ active_col = match (start_col, active_col) {
+ (true, Alignment::None) => Alignment::Left,
+ (false, Alignment::Left) => Alignment::Center,
+ (false, Alignment::None) => Alignment::Right,
+ _ => active_col,
+ };
+ start_col = false;
+ }
+ b'-' => {
+ start_col = false;
+ }
+ b'|' => {
+ start_col = true;
+ cols.push(active_col);
+ active_col = Alignment::None;
+ }
+ _ => {
+ cols = vec![];
+ start_col = true;
+ break;
+ }
+ }
+ i += 1;
+ }
+
+ if !start_col {
+ cols.push(active_col);
+ }
+
+ (i, cols)
+}
+
+/// Scan code fence.
+///
+/// Returns number of bytes scanned and the char that is repeated to make the code fence.
+pub(crate) fn scan_code_fence(data: &[u8]) -> Option<(usize, u8)> {
+ let c = *data.get(0)?;
+ if !(c == b'`' || c == b'~') {
+ return None;
+ }
+ let i = 1 + scan_ch_repeat(&data[1..], c);
+ if i >= 3 {
+ if c == b'`' {
+ let suffix = &data[i..];
+ let next_line = i + scan_nextline(suffix);
+ // FIXME: make sure this is correct
+ if suffix[..(next_line - i)].iter().any(|&b| b == b'`') {
+ return None;
+ }
+ }
+ Some((i, c))
+ } else {
+ None
+ }
+}
+
+pub(crate) fn scan_blockquote_start(data: &[u8]) -> Option<usize> {
+ if data.starts_with(b"> ") {
+ Some(2)
+ } else {
+ None
+ }
+}
+
+/// This already assumes the list item has been scanned.
+pub(crate) fn scan_empty_list(data: &[u8]) -> bool {
+ let mut ix = 0;
+ for _ in 0..2 {
+ if let Some(bytes) = scan_blank_line(&data[ix..]) {
+ ix += bytes;
+ } else {
+ return false;
+ }
+ }
+ true
+}
+
+// return number of bytes scanned, delimiter, start index, and indent
+pub(crate) fn scan_listitem(bytes: &[u8]) -> Option<(usize, u8, usize, usize)> {
+ let mut c = *bytes.get(0)?;
+ let (w, start) = match c {
+ b'-' | b'+' | b'*' => (1, 0),
+ b'0'..=b'9' => {
+ let (length, start) = parse_decimal(bytes);
+ c = *bytes.get(length)?;
+ if !(c == b'.' || c == b')') {
+ return None;
+ }
+ (length + 1, start)
+ }
+ _ => {
+ return None;
+ }
+ };
+ // TODO: replace calc_indent with scan_leading_whitespace, for tab correctness
+ let (mut postn, mut postindent) = calc_indent(&bytes[w..], 5);
+ if postindent == 0 {
+ scan_eol(&bytes[w..])?;
+ postindent += 1;
+ } else if postindent > 4 {
+ postn = 1;
+ postindent = 1;
+ }
+ if scan_blank_line(&bytes[w..]).is_some() {
+ postn = 0;
+ postindent = 1;
+ }
+ Some((w + postn, c, start, w + postindent))
+}
+
+// returns (number of bytes, parsed decimal)
+fn parse_decimal(bytes: &[u8]) -> (usize, usize) {
+ match bytes
+ .iter()
+ .take_while(|&&b| is_digit(b))
+ .try_fold((0, 0usize), |(count, acc), c| {
+ let digit = usize::from(c - b'0');
+ match acc
+ .checked_mul(10)
+ .and_then(|ten_acc| ten_acc.checked_add(digit))
+ {
+ Some(number) => Ok((count + 1, number)),
+ // stop early on overflow
+ None => Err((count, acc)),
+ }
+ }) {
+ Ok(p) | Err(p) => p,
+ }
+}
+
+// returns (number of bytes, parsed hex)
+fn parse_hex(bytes: &[u8]) -> (usize, usize) {
+ match bytes.iter().try_fold((0, 0usize), |(count, acc), c| {
+ let mut c = *c;
+ let digit = if c >= b'0' && c <= b'9' {
+ usize::from(c - b'0')
+ } else {
+ // make lower case
+ c |= 0x20;
+ if c >= b'a' && c <= b'f' {
+ usize::from(c - b'a' + 10)
+ } else {
+ return Err((count, acc));
+ }
+ };
+ match acc
+ .checked_mul(16)
+ .and_then(|sixteen_acc| sixteen_acc.checked_add(digit))
+ {
+ Some(number) => Ok((count + 1, number)),
+ // stop early on overflow
+ None => Err((count, acc)),
+ }
+ }) {
+ Ok(p) | Err(p) => p,
+ }
+}
+
+fn char_from_codepoint(input: usize) -> Option<char> {
+ let mut codepoint = input.try_into().ok()?;
+ if codepoint == 0 {
+ codepoint = 0xFFFD;
+ }
+ char::from_u32(codepoint)
+}
+
+// doesn't bother to check data[0] == '&'
+pub(crate) fn scan_entity(bytes: &[u8]) -> (usize, Option<CowStr<'static>>) {
+ let mut end = 1;
+ if scan_ch(&bytes[end..], b'#') == 1 {
+ end += 1;
+ let (bytecount, codepoint) = if end < bytes.len() && bytes[end] | 0x20 == b'x' {
+ end += 1;
+ parse_hex(&bytes[end..])
+ } else {
+ parse_decimal(&bytes[end..])
+ };
+ end += bytecount;
+ return if bytecount == 0 || scan_ch(&bytes[end..], b';') == 0 {
+ (0, None)
+ } else if let Some(c) = char_from_codepoint(codepoint) {
+ (end + 1, Some(c.into()))
+ } else {
+ (0, None)
+ };
+ }
+ end += scan_while(&bytes[end..], is_ascii_alphanumeric);
+ if scan_ch(&bytes[end..], b';') == 1 {
+ if let Some(value) = entities::get_entity(&bytes[1..end]) {
+ return (end + 1, Some(value.into()));
+ }
+ }
+ (0, None)
+}
+
+// FIXME: we can most likely re-use other scanners
+// returns (bytelength, title_str)
+pub(crate) fn scan_refdef_title(text: &str) -> Option<(usize, &str)> {
+ let mut chars = text.chars().peekable();
+ let closing_delim = match chars.next()? {
+ '\'' => '\'',
+ '"' => '"',
+ '(' => ')',
+ _ => return None,
+ };
+ let mut bytecount = 1;
+
+ while let Some(c) = chars.next() {
+ match c {
+ '\n' => {
+ bytecount += 1;
+ let mut next = *chars.peek()?;
+ while is_ascii_whitespace_no_nl(next as u8) {
+ bytecount += chars.next()?.len_utf8();
+ next = *chars.peek()?;
+ }
+ if *chars.peek()? == '\n' {
+ // blank line - not allowed
+ return None;
+ }
+ }
+ '\\' => {
+ let next_char = chars.next()?;
+ bytecount += 1 + next_char.len_utf8();
+ }
+ c if c == closing_delim => {
+ return Some((bytecount + 1, &text[1..bytecount]));
+ }
+ c => {
+ bytecount += c.len_utf8();
+ }
+ }
+ }
+ None
+}
+
+// note: dest returned is raw, still needs to be unescaped
+// TODO: check that nested parens are really not allowed for refdefs
+// TODO(performance): this func should probably its own unescaping
+pub(crate) fn scan_link_dest(
+ data: &str,
+ start_ix: usize,
+ max_next: usize,
+) -> Option<(usize, &str)> {
+ let bytes = &data.as_bytes()[start_ix..];
+ let mut i = scan_ch(bytes, b'<');
+
+ if i != 0 {
+ // pointy links
+ while i < bytes.len() {
+ match bytes[i] {
+ b'\n' | b'\r' | b'<' => return None,
+ b'>' => return Some((i + 1, &data[(start_ix + 1)..(start_ix + i)])),
+ b'\\' if i + 1 < bytes.len() && is_ascii_punctuation(bytes[i + 1]) => {
+ i += 1;
+ }
+ _ => {}
+ }
+ i += 1;
+ }
+ None
+ } else {
+ // non-pointy links
+ let mut nest = 0;
+ while i < bytes.len() {
+ match bytes[i] {
+ 0x0..=0x20 => {
+ break;
+ }
+ b'(' => {
+ if nest > max_next {
+ return None;
+ }
+ nest += 1;
+ }
+ b')' => {
+ if nest == 0 {
+ break;
+ }
+ nest -= 1;
+ }
+ b'\\' if i + 1 < bytes.len() && is_ascii_punctuation(bytes[i + 1]) => {
+ i += 1;
+ }
+ _ => {}
+ }
+ i += 1;
+ }
+ Some((i, &data[start_ix..(start_ix + i)]))
+ }
+}
+
+/// Returns bytes scanned
+fn scan_attribute_name(data: &[u8]) -> Option<usize> {
+ let (&c, tail) = data.split_first()?;
+ if is_ascii_alpha(c) || c == b'_' || c == b':' {
+ Some(
+ 1 + scan_while(tail, |c| {
+ is_ascii_alphanumeric(c) || c == b'_' || c == b'.' || c == b':' || c == b'-'
+ }),
+ )
+ } else {
+ None
+ }
+}
+
+/// Returns the index immediately following the attribute on success.
+/// The argument `buffer_ix` refers to the index into `data` from which we
+/// should copy into `buffer` when we find bytes to skip.
+fn scan_attribute(
+ data: &[u8],
+ mut ix: usize,
+ newline_handler: Option<&dyn Fn(&[u8]) -> usize>,
+ buffer: &mut Vec<u8>,
+ buffer_ix: &mut usize,
+) -> Option<usize> {
+ ix += scan_attribute_name(&data[ix..])?;
+ let n_whitespace =
+ scan_whitespace_with_newline_handler(data, ix, newline_handler, buffer, buffer_ix)? - ix;
+ ix += n_whitespace;
+ if scan_ch(&data[ix..], b'=') == 1 {
+ ix += 1;
+ ix = scan_whitespace_with_newline_handler(data, ix, newline_handler, buffer, buffer_ix)?;
+ ix = scan_attribute_value(data, ix, newline_handler, buffer, buffer_ix)?;
+ } else if n_whitespace > 0 {
+ // Leave whitespace for next attribute.
+ ix -= 1;
+ }
+ Some(ix)
+}
+
+/// Scans whitespace and possibly newlines according to the
+/// behavior defined by the newline handler. When bytes are skipped,
+/// all preceding non-skipped bytes are pushed to the buffer.
+fn scan_whitespace_with_newline_handler(
+ data: &[u8],
+ mut i: usize,
+ newline_handler: Option<&dyn Fn(&[u8]) -> usize>,
+ buffer: &mut Vec<u8>,
+ buffer_ix: &mut usize,
+) -> Option<usize> {
+ while i < data.len() {
+ if !is_ascii_whitespace(data[i]) {
+ return Some(i);
+ }
+ if let Some(eol_bytes) = scan_eol(&data[i..]) {
+ let handler = newline_handler?;
+ i += eol_bytes;
+ let skipped_bytes = handler(&data[i..]);
+
+ if skipped_bytes > 0 {
+ buffer.extend(&data[*buffer_ix..i]);
+ *buffer_ix = i + skipped_bytes;
+ }
+
+ i += skipped_bytes;
+ } else {
+ i += 1;
+ }
+ }
+
+ Some(i)
+}
+
+/// Returns the index immediately following the attribute value on success.
+fn scan_attribute_value(
+ data: &[u8],
+ mut i: usize,
+ newline_handler: Option<&dyn Fn(&[u8]) -> usize>,
+ buffer: &mut Vec<u8>,
+ buffer_ix: &mut usize,
+) -> Option<usize> {
+ match *data.get(i)? {
+ b @ b'"' | b @ b'\'' => {
+ i += 1;
+ while i < data.len() {
+ if data[i] == b {
+ return Some(i + 1);
+ }
+ if let Some(eol_bytes) = scan_eol(&data[i..]) {
+ let handler = newline_handler?;
+ i += eol_bytes;
+ let skipped_bytes = handler(&data[i..]);
+
+ if skipped_bytes > 0 {
+ buffer.extend(&data[*buffer_ix..i]);
+ *buffer_ix = i + skipped_bytes;
+ }
+ i += skipped_bytes;
+ } else {
+ i += 1;
+ }
+ }
+ return None;
+ }
+ b' ' | b'=' | b'>' | b'<' | b'`' | b'\n' | b'\r' => {
+ return None;
+ }
+ _ => {
+ // unquoted attribute value
+ i += scan_attr_value_chars(&data[i..]);
+ }
+ }
+
+ Some(i)
+}
+
+// Remove backslash escapes and resolve entities
+pub(crate) fn unescape(input: &str) -> CowStr<'_> {
+ let mut result = String::new();
+ let mut mark = 0;
+ let mut i = 0;
+ let bytes = input.as_bytes();
+ while i < bytes.len() {
+ match bytes[i] {
+ b'\\' if i + 1 < bytes.len() && is_ascii_punctuation(bytes[i + 1]) => {
+ result.push_str(&input[mark..i]);
+ mark = i + 1;
+ i += 2;
+ }
+ b'&' => match scan_entity(&bytes[i..]) {
+ (n, Some(value)) => {
+ result.push_str(&input[mark..i]);
+ result.push_str(&value);
+ i += n;
+ mark = i;
+ }
+ _ => i += 1,
+ },
+ b'\r' => {
+ result.push_str(&input[mark..i]);
+ i += 1;
+ mark = i;
+ }
+ _ => i += 1,
+ }
+ }
+ if mark == 0 {
+ input.into()
+ } else {
+ result.push_str(&input[mark..]);
+ result.into()
+ }
+}
+
+/// Assumes `data` is preceded by `<`.
+pub(crate) fn starts_html_block_type_6(data: &[u8]) -> bool {
+ let i = scan_ch(data, b'/');
+ let tail = &data[i..];
+ let n = scan_while(tail, is_ascii_alphanumeric);
+ if !is_html_tag(&tail[..n]) {
+ return false;
+ }
+ // Starting condition says the next byte must be either a space, a tab,
+ // the end of the line, the string >, or the string />
+ let tail = &tail[n..];
+ tail.is_empty()
+ || tail[0] == b' '
+ || tail[0] == b'\t'
+ || tail[0] == b'\r'
+ || tail[0] == b'\n'
+ || tail[0] == b'>'
+ || tail.len() >= 2 && &tail[..2] == b"/>"
+}
+
+fn is_html_tag(tag: &[u8]) -> bool {
+ HTML_TAGS
+ .binary_search_by(|probe| {
+ let probe_bytes_iter = probe.as_bytes().iter();
+ let tag_bytes_iter = tag.iter();
+
+ probe_bytes_iter
+ .zip(tag_bytes_iter)
+ .find_map(|(&a, &b)| {
+ // We can compare case insensitively because the probes are
+ // all lower case alpha strings.
+ match a.cmp(&(b | 0x20)) {
+ std::cmp::Ordering::Equal => None,
+ inequality => Some(inequality),
+ }
+ })
+ .unwrap_or_else(|| probe.len().cmp(&tag.len()))
+ })
+ .is_ok()
+}
+
+/// Assumes that `data` starts with `<`.
+/// Returns the index into data directly after the html tag on success.
+pub(crate) fn scan_html_type_7(data: &[u8]) -> Option<usize> {
+ // Block type html does not allow for newlines, so we
+ // do not pass a newline handler.
+ let (_span, i) = scan_html_block_inner(data, None)?;
+ scan_blank_line(&data[i..])?;
+ Some(i)
+}
+
+/// Assumes that `data` starts with `<`.
+/// Returns the number of bytes scanned and the html in case of
+/// success.
+/// When some bytes were skipped, because the html was split over
+/// multiple leafs (e.g. over multiple lines in a blockquote),
+/// the html is returned as a vector of bytes.
+/// If no bytes were skipped, the buffer will be empty.
+pub(crate) fn scan_html_block_inner(
+ data: &[u8],
+ newline_handler: Option<&dyn Fn(&[u8]) -> usize>,
+) -> Option<(Vec<u8>, usize)> {
+ let mut buffer = Vec::new();
+ let mut last_buf_index = 0;
+
+ let close_tag_bytes = scan_ch(&data[1..], b'/');
+ let l = scan_while(&data[(1 + close_tag_bytes)..], is_ascii_alpha);
+ if l == 0 {
+ return None;
+ }
+ let mut i = 1 + close_tag_bytes + l;
+ i += scan_while(&data[i..], is_ascii_letterdigitdash);
+
+ if close_tag_bytes == 0 {
+ loop {
+ let old_i = i;
+ loop {
+ i += scan_whitespace_no_nl(&data[i..]);
+ if let Some(eol_bytes) = scan_eol(&data[i..]) {
+ if eol_bytes == 0 {
+ return None;
+ }
+ let handler = newline_handler?;
+ i += eol_bytes;
+ let skipped_bytes = handler(&data[i..]);
+
+ let data_len = data.len() - i;
+
+ debug_assert!(
+ skipped_bytes <= data_len,
+ "Handler tried to skip too many bytes, fed {}, skipped {}",
+ data_len,
+ skipped_bytes
+ );
+
+ if skipped_bytes > 0 {
+ buffer.extend(&data[last_buf_index..i]);
+ i += skipped_bytes;
+ last_buf_index = i;
+ }
+ } else {
+ break;
+ }
+ }
+ if let Some(b'/') | Some(b'>') = data.get(i) {
+ break;
+ }
+ if old_i == i {
+ // No whitespace, which is mandatory.
+ return None;
+ }
+ i = scan_attribute(data, i, newline_handler, &mut buffer, &mut last_buf_index)?;
+ }
+ }
+
+ i += scan_whitespace_no_nl(&data[i..]);
+
+ if close_tag_bytes == 0 {
+ i += scan_ch(&data[i..], b'/');
+ }
+
+ if scan_ch(&data[i..], b'>') == 0 {
+ None
+ } else {
+ i += 1;
+ if !buffer.is_empty() {
+ buffer.extend(&data[last_buf_index..i]);
+ }
+ Some((buffer, i))
+ }
+}
+
+/// Returns (next_byte_offset, uri, type)
+pub(crate) fn scan_autolink(text: &str, start_ix: usize) -> Option<(usize, CowStr<'_>, LinkType)> {
+ scan_uri(text, start_ix)
+ .map(|(bytes, uri)| (bytes, uri, LinkType::Autolink))
+ .or_else(|| scan_email(text, start_ix).map(|(bytes, uri)| (bytes, uri, LinkType::Email)))
+}
+
+/// Returns (next_byte_offset, uri)
+fn scan_uri(text: &str, start_ix: usize) -> Option<(usize, CowStr<'_>)> {
+ let bytes = &text.as_bytes()[start_ix..];
+
+ // scheme's first byte must be an ascii letter
+ if bytes.is_empty() || !is_ascii_alpha(bytes[0]) {
+ return None;
+ }
+
+ let mut i = 1;
+
+ while i < bytes.len() {
+ let c = bytes[i];
+ i += 1;
+ match c {
+ c if is_ascii_alphanumeric(c) => (),
+ b'.' | b'-' | b'+' => (),
+ b':' => break,
+ _ => return None,
+ }
+ }
+
+ // scheme length must be between 2 and 32 characters long. scheme
+ // must be followed by colon
+ if i < 3 || i > 33 {
+ return None;
+ }
+
+ while i < bytes.len() {
+ match bytes[i] {
+ b'>' => return Some((start_ix + i + 1, text[start_ix..(start_ix + i)].into())),
+ b'\0'..=b' ' | b'<' => return None,
+ _ => (),
+ }
+ i += 1;
+ }
+
+ None
+}
+
+/// Returns (next_byte_offset, email)
+fn scan_email(text: &str, start_ix: usize) -> Option<(usize, CowStr<'_>)> {
+ // using a regex library would be convenient, but doing it by hand is not too bad
+ let bytes = &text.as_bytes()[start_ix..];
+ let mut i = 0;
+
+ while i < bytes.len() {
+ let c = bytes[i];
+ i += 1;
+ match c {
+ c if is_ascii_alphanumeric(c) => (),
+ b'.' | b'!' | b'#' | b'$' | b'%' | b'&' | b'\'' | b'*' | b'+' | b'/' | b'=' | b'?'
+ | b'^' | b'_' | b'`' | b'{' | b'|' | b'}' | b'~' | b'-' => (),
+ b'@' => break,
+ _ => return None,
+ }
+ }
+
+ loop {
+ let label_start_ix = i;
+ let mut fresh_label = true;
+
+ while i < bytes.len() {
+ match bytes[i] {
+ c if is_ascii_alphanumeric(c) => (),
+ b'-' if fresh_label => {
+ return None;
+ }
+ b'-' => (),
+ _ => break,
+ }
+ fresh_label = false;
+ i += 1;
+ }
+
+ if i == label_start_ix || i - label_start_ix > 63 || bytes[i - 1] == b'-' {
+ return None;
+ }
+
+ if scan_ch(&bytes[i..], b'.') == 0 {
+ break;
+ }
+ i += 1;
+ }
+
+ if scan_ch(&bytes[i..], b'>') == 0 {
+ return None;
+ }
+
+ Some((start_ix + i + 1, text[start_ix..(start_ix + i)].into()))
+}
+
+/// Scan comment, declaration, or CDATA section, with initial "<!" already consumed.
+/// Returns byte offset on match.
+pub(crate) fn scan_inline_html_comment(
+ bytes: &[u8],
+ mut ix: usize,
+ scan_guard: &mut HtmlScanGuard,
+) -> Option<usize> {
+ let c = *bytes.get(ix)?;
+ ix += 1;
+ match c {
+ b'-' => {
+ let dashes = scan_ch_repeat(&bytes[ix..], b'-');
+ if dashes < 1 {
+ return None;
+ }
+ // Saw "<!--", scan comment.
+ ix += dashes;
+ if scan_ch(&bytes[ix..], b'>') == 1 {
+ return None;
+ }
+
+ while let Some(x) = memchr(b'-', &bytes[ix..]) {
+ ix += x + 1;
+ if scan_ch(&bytes[ix..], b'-') == 1 {
+ ix += 1;
+ return if scan_ch(&bytes[ix..], b'>') == 1 {
+ Some(ix + 1)
+ } else {
+ None
+ };
+ }
+ }
+ None
+ }
+ b'[' if bytes[ix..].starts_with(b"CDATA[") && ix > scan_guard.cdata => {
+ ix += b"CDATA[".len();
+ ix = memchr(b']', &bytes[ix..]).map_or(bytes.len(), |x| ix + x);
+ let close_brackets = scan_ch_repeat(&bytes[ix..], b']');
+ ix += close_brackets;
+
+ if close_brackets == 0 || scan_ch(&bytes[ix..], b'>') == 0 {
+ scan_guard.cdata = ix;
+ None
+ } else {
+ Some(ix + 1)
+ }
+ }
+ b'A'..=b'Z' if ix > scan_guard.declaration => {
+ // Scan declaration.
+ ix += scan_while(&bytes[ix..], |c| c >= b'A' && c <= b'Z');
+ let whitespace = scan_while(&bytes[ix..], is_ascii_whitespace);
+ if whitespace == 0 {
+ return None;
+ }
+ ix += whitespace;
+ ix = memchr(b'>', &bytes[ix..]).map_or(bytes.len(), |x| ix + x);
+ if scan_ch(&bytes[ix..], b'>') == 0 {
+ scan_guard.declaration = ix;
+ None
+ } else {
+ Some(ix + 1)
+ }
+ }
+ _ => None,
+ }
+}
+
+/// Scan processing directive, with initial "<?" already consumed.
+/// Returns the next byte offset on success.
+pub(crate) fn scan_inline_html_processing(
+ bytes: &[u8],
+ mut ix: usize,
+ scan_guard: &mut HtmlScanGuard,
+) -> Option<usize> {
+ if ix <= scan_guard.processing {
+ return None;
+ }
+ while let Some(offset) = memchr(b'?', &bytes[ix..]) {
+ ix += offset + 1;
+ if scan_ch(&bytes[ix..], b'>') == 1 {
+ return Some(ix + 1);
+ }
+ }
+ scan_guard.processing = ix;
+ None
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+ #[test]
+ fn overflow_list() {
+ assert!(
+ scan_listitem(b"4444444444444444444444444444444444444444444444444444444444!").is_none()
+ );
+ }
+
+ #[test]
+ fn overflow_by_addition() {
+ assert!(scan_listitem(b"1844674407370955161615!").is_none());
+ }
+}
diff --git a/vendor/pulldown-cmark/src/strings.rs b/vendor/pulldown-cmark/src/strings.rs
new file mode 100644
index 000000000..fb6c1be0d
--- /dev/null
+++ b/vendor/pulldown-cmark/src/strings.rs
@@ -0,0 +1,373 @@
+use std::borrow::{Borrow, Cow, ToOwned};
+use std::convert::{AsRef, TryFrom};
+use std::fmt;
+use std::hash::{Hash, Hasher};
+use std::ops::Deref;
+use std::str::from_utf8;
+
+const MAX_INLINE_STR_LEN: usize = 3 * std::mem::size_of::<isize>() - 2;
+
+/// Returned when trying to convert a `&str` into a `InlineStr`
+/// but it fails because it doesn't fit.
+#[derive(Debug)]
+pub struct StringTooLongError;
+
+/// An inline string that can contain almost three words
+/// of utf-8 text.
+#[derive(Debug, Clone, Copy, Eq)]
+pub struct InlineStr {
+ inner: [u8; MAX_INLINE_STR_LEN],
+ len: u8,
+}
+
+impl<'a> AsRef<str> for InlineStr {
+ fn as_ref(&self) -> &str {
+ self.deref()
+ }
+}
+
+impl Hash for InlineStr {
+ fn hash<H: Hasher>(&self, state: &mut H) {
+ self.deref().hash(state);
+ }
+}
+
+impl From<char> for InlineStr {
+ fn from(c: char) -> Self {
+ let mut inner = [0u8; MAX_INLINE_STR_LEN];
+ c.encode_utf8(&mut inner);
+ let len = c.len_utf8() as u8;
+ Self { inner, len }
+ }
+}
+
+impl<'a> std::cmp::PartialEq<InlineStr> for InlineStr {
+ fn eq(&self, other: &InlineStr) -> bool {
+ self.deref() == other.deref()
+ }
+}
+
+impl TryFrom<&str> for InlineStr {
+ type Error = StringTooLongError;
+
+ fn try_from(s: &str) -> Result<InlineStr, StringTooLongError> {
+ let len = s.len();
+ if len <= MAX_INLINE_STR_LEN {
+ let mut inner = [0u8; MAX_INLINE_STR_LEN];
+ inner[..len].copy_from_slice(s.as_bytes());
+ let len = len as u8;
+ Ok(Self { inner, len })
+ } else {
+ Err(StringTooLongError)
+ }
+ }
+}
+
+impl Deref for InlineStr {
+ type Target = str;
+
+ fn deref(&self) -> &str {
+ let len = self.len as usize;
+ from_utf8(&self.inner[..len]).unwrap()
+ }
+}
+
+impl fmt::Display for InlineStr {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "{}", self.as_ref())
+ }
+}
+
+/// A copy-on-write string that can be owned, borrowed
+/// or inlined.
+///
+/// It is three words long.
+#[derive(Debug, Eq)]
+pub enum CowStr<'a> {
+ /// An owned, immutable string.
+ Boxed(Box<str>),
+ /// A borrowed string.
+ Borrowed(&'a str),
+ /// A short inline string.
+ Inlined(InlineStr),
+}
+
+#[cfg(feature = "serde")]
+mod serde_impl {
+ use super::CowStr;
+ use serde::{de, Deserialize, Deserializer, Serialize, Serializer};
+ use std::fmt;
+
+ impl<'a> Serialize for CowStr<'a> {
+ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+ where
+ S: Serializer,
+ {
+ serializer.serialize_str(self.as_ref())
+ }
+ }
+
+ struct CowStrVisitor;
+
+ impl<'de> de::Visitor<'de> for CowStrVisitor {
+ type Value = CowStr<'de>;
+
+ fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
+ formatter.write_str("a string")
+ }
+
+ fn visit_borrowed_str<E>(self, v: &'de str) -> Result<Self::Value, E>
+ where
+ E: de::Error,
+ {
+ Ok(CowStr::Borrowed(v))
+ }
+ }
+
+ impl<'a, 'de: 'a> Deserialize<'de> for CowStr<'a> {
+ fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+ where
+ D: Deserializer<'de>,
+ {
+ deserializer.deserialize_str(CowStrVisitor)
+ }
+ }
+}
+
+impl<'a> AsRef<str> for CowStr<'a> {
+ fn as_ref(&self) -> &str {
+ self.deref()
+ }
+}
+
+impl<'a> Hash for CowStr<'a> {
+ fn hash<H: Hasher>(&self, state: &mut H) {
+ self.deref().hash(state);
+ }
+}
+
+impl<'a> std::clone::Clone for CowStr<'a> {
+ fn clone(&self) -> Self {
+ match self {
+ CowStr::Boxed(s) => match InlineStr::try_from(&**s) {
+ Ok(inline) => CowStr::Inlined(inline),
+ Err(..) => CowStr::Boxed(s.clone()),
+ },
+ CowStr::Borrowed(s) => CowStr::Borrowed(s),
+ CowStr::Inlined(s) => CowStr::Inlined(*s),
+ }
+ }
+}
+
+impl<'a> std::cmp::PartialEq<CowStr<'a>> for CowStr<'a> {
+ fn eq(&self, other: &CowStr) -> bool {
+ self.deref() == other.deref()
+ }
+}
+
+impl<'a> From<&'a str> for CowStr<'a> {
+ fn from(s: &'a str) -> Self {
+ CowStr::Borrowed(s)
+ }
+}
+
+impl<'a> From<String> for CowStr<'a> {
+ fn from(s: String) -> Self {
+ CowStr::Boxed(s.into_boxed_str())
+ }
+}
+
+impl<'a> From<char> for CowStr<'a> {
+ fn from(c: char) -> Self {
+ CowStr::Inlined(c.into())
+ }
+}
+
+impl<'a> From<Cow<'a, str>> for CowStr<'a> {
+ fn from(s: Cow<'a, str>) -> Self {
+ match s {
+ Cow::Borrowed(s) => CowStr::Borrowed(s),
+ Cow::Owned(s) => CowStr::Boxed(s.into_boxed_str()),
+ }
+ }
+}
+
+impl<'a> From<CowStr<'a>> for Cow<'a, str> {
+ fn from(s: CowStr<'a>) -> Self {
+ match s {
+ CowStr::Boxed(s) => Cow::Owned(s.to_string()),
+ CowStr::Inlined(s) => Cow::Owned(s.to_string()),
+ CowStr::Borrowed(s) => Cow::Borrowed(s),
+ }
+ }
+}
+
+impl<'a> From<Cow<'a, char>> for CowStr<'a> {
+ fn from(s: Cow<'a, char>) -> Self {
+ CowStr::Inlined(InlineStr::from(*s))
+ }
+}
+
+impl<'a> Deref for CowStr<'a> {
+ type Target = str;
+
+ fn deref(&self) -> &str {
+ match self {
+ CowStr::Boxed(ref b) => &*b,
+ CowStr::Borrowed(b) => b,
+ CowStr::Inlined(ref s) => s.deref(),
+ }
+ }
+}
+
+impl<'a> Borrow<str> for CowStr<'a> {
+ fn borrow(&self) -> &str {
+ self.deref()
+ }
+}
+
+impl<'a> CowStr<'a> {
+ pub fn into_string(self) -> String {
+ match self {
+ CowStr::Boxed(b) => b.into(),
+ CowStr::Borrowed(b) => b.to_owned(),
+ CowStr::Inlined(s) => s.deref().to_owned(),
+ }
+ }
+}
+
+impl<'a> fmt::Display for CowStr<'a> {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "{}", self.as_ref())
+ }
+}
+
+#[cfg(test)]
+mod test_special_string {
+ use super::*;
+
+ #[test]
+ fn inlinestr_ascii() {
+ let s: InlineStr = 'a'.into();
+ assert_eq!("a", s.deref());
+ }
+
+ #[test]
+ fn inlinestr_unicode() {
+ let s: InlineStr = '🍔'.into();
+ assert_eq!("🍔", s.deref());
+ }
+
+ #[test]
+ fn cowstr_size() {
+ let size = std::mem::size_of::<CowStr>();
+ let word_size = std::mem::size_of::<isize>();
+ assert_eq!(3 * word_size, size);
+ }
+
+ #[test]
+ fn cowstr_char_to_string() {
+ let c = '藏';
+ let smort: CowStr = c.into();
+ let owned: String = smort.to_string();
+ let expected = "藏".to_owned();
+ assert_eq!(expected, owned);
+ }
+
+ #[test]
+ fn max_inline_str_len_atleast_four() {
+ // we need 4 bytes to store a char
+ assert!(MAX_INLINE_STR_LEN >= 4);
+ }
+
+ #[test]
+ #[cfg(target_pointer_width = "64")]
+ fn inlinestr_fits_twentytwo() {
+ let s = "0123456789abcdefghijkl";
+ let stack_str = InlineStr::try_from(s).unwrap();
+ assert_eq!(stack_str.deref(), s);
+ }
+
+ #[test]
+ #[cfg(target_pointer_width = "64")]
+ fn inlinestr_not_fits_twentythree() {
+ let s = "0123456789abcdefghijklm";
+ let _stack_str = InlineStr::try_from(s).unwrap_err();
+ }
+
+ #[test]
+ #[cfg(target_pointer_width = "64")]
+ fn small_boxed_str_clones_to_stack() {
+ let s = "0123456789abcde".to_owned();
+ let smort: CowStr = s.into();
+ let smort_clone = smort.clone();
+
+ if let CowStr::Inlined(..) = smort_clone {
+ } else {
+ panic!("Expected a Inlined variant!");
+ }
+ }
+
+ #[test]
+ fn cow_to_cow_str() {
+ let s = "some text";
+ let cow = Cow::Borrowed(s);
+ let actual = CowStr::from(cow);
+ let expected = CowStr::Borrowed(s);
+ assert_eq!(actual, expected);
+ assert!(variant_eq(&actual, &expected));
+
+ let s = "some text".to_string();
+ let cow: Cow<str> = Cow::Owned(s.clone());
+ let actual = CowStr::from(cow);
+ let expected = CowStr::Boxed(s.into_boxed_str());
+ assert_eq!(actual, expected);
+ assert!(variant_eq(&actual, &expected));
+ }
+
+ #[test]
+ fn cow_str_to_cow() {
+ let s = "some text";
+ let cow_str = CowStr::Borrowed(s);
+ let actual = Cow::from(cow_str);
+ let expected = Cow::Borrowed(s);
+ assert_eq!(actual, expected);
+ assert!(variant_eq(&actual, &expected));
+
+ let s = "s";
+ let inline_str: InlineStr = InlineStr::try_from(s).unwrap();
+ let cow_str = CowStr::Inlined(inline_str);
+ let actual = Cow::from(cow_str);
+ let expected: Cow<str> = Cow::Owned(s.to_string());
+ assert_eq!(actual, expected);
+ assert!(variant_eq(&actual, &expected));
+
+ let s = "s";
+ let cow_str = CowStr::Boxed(s.to_string().into_boxed_str());
+ let actual = Cow::from(cow_str);
+ let expected: Cow<str> = Cow::Owned(s.to_string());
+ assert_eq!(actual, expected);
+ assert!(variant_eq(&actual, &expected));
+ }
+
+ #[test]
+ fn cow_char_to_cow_str() {
+ let c = 'c';
+ let cow: Cow<char> = Cow::Owned(c);
+ let actual = CowStr::from(cow);
+ let expected = CowStr::Inlined(InlineStr::from(c));
+ assert_eq!(actual, expected);
+ assert!(variant_eq(&actual, &expected));
+
+ let c = 'c';
+ let cow: Cow<char> = Cow::Borrowed(&c);
+ let actual = CowStr::from(cow);
+ let expected = CowStr::Inlined(InlineStr::from(c));
+ assert_eq!(actual, expected);
+ assert!(variant_eq(&actual, &expected));
+ }
+
+ fn variant_eq<T>(a: &T, b: &T) -> bool {
+ std::mem::discriminant(a) == std::mem::discriminant(b)
+ }
+}
diff --git a/vendor/pulldown-cmark/src/tree.rs b/vendor/pulldown-cmark/src/tree.rs
new file mode 100644
index 000000000..8e971bc20
--- /dev/null
+++ b/vendor/pulldown-cmark/src/tree.rs
@@ -0,0 +1,276 @@
+// Copyright 2018 Google LLC
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+//! A Vec-based container for a tree structure.
+
+use std::num::NonZeroUsize;
+use std::ops::{Add, Sub};
+
+use crate::parse::{Item, ItemBody};
+
+#[derive(Debug, Eq, PartialEq, Copy, Clone, PartialOrd)]
+pub(crate) struct TreeIndex(NonZeroUsize);
+
+impl TreeIndex {
+ fn new(i: usize) -> Self {
+ TreeIndex(NonZeroUsize::new(i).unwrap())
+ }
+
+ pub fn get(self) -> usize {
+ self.0.get()
+ }
+}
+
+impl Add<usize> for TreeIndex {
+ type Output = TreeIndex;
+
+ fn add(self, rhs: usize) -> Self {
+ let inner = self.0.get() + rhs;
+ TreeIndex::new(inner)
+ }
+}
+
+impl Sub<usize> for TreeIndex {
+ type Output = TreeIndex;
+
+ fn sub(self, rhs: usize) -> Self {
+ let inner = self.0.get().checked_sub(rhs).unwrap();
+ TreeIndex::new(inner)
+ }
+}
+
+#[derive(Debug, Clone, Copy)]
+pub(crate) struct Node<T> {
+ pub child: Option<TreeIndex>,
+ pub next: Option<TreeIndex>,
+ pub item: T,
+}
+
+/// A tree abstraction, intended for fast building as a preorder traversal.
+#[derive(Clone)]
+pub(crate) struct Tree<T> {
+ nodes: Vec<Node<T>>,
+ spine: Vec<TreeIndex>, // indices of nodes on path to current node
+ cur: Option<TreeIndex>,
+}
+
+impl<T: Default> Tree<T> {
+ // Indices start at one, so we place a dummy value at index zero.
+ // The alternative would be subtracting one from every TreeIndex
+ // every time we convert it to usize to index our nodes.
+ pub(crate) fn with_capacity(cap: usize) -> Tree<T> {
+ let mut nodes = Vec::with_capacity(cap);
+ nodes.push(Node {
+ child: None,
+ next: None,
+ item: <T as Default>::default(),
+ });
+ Tree {
+ nodes,
+ spine: Vec::new(),
+ cur: None,
+ }
+ }
+
+ /// Returns the index of the element currently in focus.
+ pub(crate) fn cur(&self) -> Option<TreeIndex> {
+ self.cur
+ }
+
+ /// Append one item to the current position in the tree.
+ pub(crate) fn append(&mut self, item: T) -> TreeIndex {
+ let ix = self.create_node(item);
+ let this = Some(ix);
+
+ if let Some(ix) = self.cur {
+ self[ix].next = this;
+ } else if let Some(&parent) = self.spine.last() {
+ self[parent].child = this;
+ }
+ self.cur = this;
+ ix
+ }
+
+ /// Create an isolated node.
+ pub(crate) fn create_node(&mut self, item: T) -> TreeIndex {
+ let this = self.nodes.len();
+ self.nodes.push(Node {
+ child: None,
+ next: None,
+ item,
+ });
+ TreeIndex::new(this)
+ }
+
+ /// Push down one level, so that new items become children of the current node.
+ /// The new focus index is returned.
+ pub(crate) fn push(&mut self) -> TreeIndex {
+ let cur_ix = self.cur.unwrap();
+ self.spine.push(cur_ix);
+ self.cur = self[cur_ix].child;
+ cur_ix
+ }
+
+ /// Pop back up a level.
+ pub(crate) fn pop(&mut self) -> Option<TreeIndex> {
+ let ix = Some(self.spine.pop()?);
+ self.cur = ix;
+ ix
+ }
+
+ /// Look at the parent node.
+ pub(crate) fn peek_up(&self) -> Option<TreeIndex> {
+ self.spine.last().copied()
+ }
+
+ /// Look at grandparent node.
+ pub(crate) fn peek_grandparent(&self) -> Option<TreeIndex> {
+ if self.spine.len() >= 2 {
+ Some(self.spine[self.spine.len() - 2])
+ } else {
+ None
+ }
+ }
+
+ /// Returns true when there are no nodes other than the root node
+ /// in the tree, false otherwise.
+ pub(crate) fn is_empty(&self) -> bool {
+ self.nodes.len() <= 1
+ }
+
+ /// Returns the length of the spine.
+ pub(crate) fn spine_len(&self) -> usize {
+ self.spine.len()
+ }
+
+ /// Resets the focus to the first node added to the tree, if it exists.
+ pub(crate) fn reset(&mut self) {
+ self.cur = if self.is_empty() {
+ None
+ } else {
+ Some(TreeIndex::new(1))
+ };
+ self.spine.clear();
+ }
+
+ /// Walks the spine from a root node up to, but not including, the current node.
+ pub(crate) fn walk_spine(&self) -> impl std::iter::DoubleEndedIterator<Item = &TreeIndex> {
+ self.spine.iter()
+ }
+
+ /// Moves focus to the next sibling of the given node.
+ pub(crate) fn next_sibling(&mut self, cur_ix: TreeIndex) -> Option<TreeIndex> {
+ self.cur = self[cur_ix].next;
+ self.cur
+ }
+}
+
+impl Tree<Item> {
+ /// Truncates the preceding siblings to the given end position,
+ /// and returns the new current node.
+ pub(crate) fn truncate_siblings(&mut self, bytes: &[u8], end_byte_ix: usize) {
+ let parent_ix = self.peek_up().unwrap();
+ let mut next_child_ix = self[parent_ix].child;
+ let mut prev_child_ix = None;
+
+ // drop or truncate children based on its range
+ while let Some(child_ix) = next_child_ix {
+ let child_end = self[child_ix].item.end;
+ if child_end < end_byte_ix {
+ // preserve this node, and go to the next
+ prev_child_ix = Some(child_ix);
+ next_child_ix = self[child_ix].next;
+ continue;
+ } else if child_end == end_byte_ix {
+ // this will be the last node
+ self[child_ix].next = None;
+ // focus to the new last child (this node)
+ self.cur = Some(child_ix);
+ } else if self[child_ix].item.start == end_byte_ix {
+ // check whether the previous character is a backslash
+ let is_previous_char_backslash_escape =
+ end_byte_ix.checked_sub(1).map_or(false, |prev| {
+ (bytes[prev] == b'\\') && (self[child_ix].item.body == ItemBody::Text)
+ });
+ if is_previous_char_backslash_escape {
+ // rescue the backslash as a plain text content
+ let last_byte_ix = end_byte_ix - 1;
+ self[child_ix].item.start = last_byte_ix;
+ self[child_ix].item.end = end_byte_ix;
+ self.cur = Some(child_ix);
+ } else if let Some(prev_child_ix) = prev_child_ix {
+ // the node will become empty. drop the node
+ // a preceding sibling exists
+ self[prev_child_ix].next = None;
+ self.cur = Some(prev_child_ix);
+ } else {
+ // no preceding siblings. remove the node from the parent
+ self[parent_ix].child = None;
+ self.cur = None;
+ }
+ } else {
+ debug_assert!(self[child_ix].item.start < end_byte_ix);
+ debug_assert!(end_byte_ix < child_end);
+ // truncate the node
+ self[child_ix].item.end = end_byte_ix;
+ self[child_ix].next = None;
+ // focus to the new last child
+ self.cur = Some(child_ix);
+ }
+ break;
+ }
+ }
+}
+
+impl<T> std::fmt::Debug for Tree<T>
+where
+ T: std::fmt::Debug,
+{
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+ fn debug_tree<T>(
+ tree: &Tree<T>,
+ cur: TreeIndex,
+ indent: usize,
+ f: &mut std::fmt::Formatter,
+ ) -> std::fmt::Result
+ where
+ T: std::fmt::Debug,
+ {
+ for _ in 0..indent {
+ write!(f, " ")?;
+ }
+ writeln!(f, "{:?}", &tree[cur].item)?;
+ if let Some(child_ix) = tree[cur].child {
+ debug_tree(tree, child_ix, indent + 1, f)?;
+ }
+ if let Some(next_ix) = tree[cur].next {
+ debug_tree(tree, next_ix, indent, f)?;
+ }
+ Ok(())
+ }
+
+ if self.nodes.len() > 1 {
+ let cur = TreeIndex(NonZeroUsize::new(1).unwrap());
+ debug_tree(self, cur, 0, f)
+ } else {
+ write!(f, "Empty tree")
+ }
+ }
+}
+
+impl<T> std::ops::Index<TreeIndex> for Tree<T> {
+ type Output = Node<T>;
+
+ fn index(&self, ix: TreeIndex) -> &Self::Output {
+ self.nodes.index(ix.get())
+ }
+}
+
+impl<T> std::ops::IndexMut<TreeIndex> for Tree<T> {
+ fn index_mut(&mut self, ix: TreeIndex) -> &mut Node<T> {
+ self.nodes.index_mut(ix.get())
+ }
+}