diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:02:58 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:02:58 +0000 |
commit | 698f8c2f01ea549d77d7dc3338a12e04c11057b9 (patch) | |
tree | 173a775858bd501c378080a10dca74132f05bc50 /vendor/pulldown-cmark/src | |
parent | Initial commit. (diff) | |
download | rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.tar.xz rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.zip |
Adding upstream version 1.64.0+dfsg1.upstream/1.64.0+dfsg1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/pulldown-cmark/src')
-rw-r--r-- | vendor/pulldown-cmark/src/entities.rs | 2158 | ||||
-rw-r--r-- | vendor/pulldown-cmark/src/escape.rs | 368 | ||||
-rw-r--r-- | vendor/pulldown-cmark/src/firstpass.rs | 1927 | ||||
-rw-r--r-- | vendor/pulldown-cmark/src/html.rs | 478 | ||||
-rw-r--r-- | vendor/pulldown-cmark/src/lib.rs | 289 | ||||
-rw-r--r-- | vendor/pulldown-cmark/src/linklabel.rs | 135 | ||||
-rw-r--r-- | vendor/pulldown-cmark/src/main.rs | 123 | ||||
-rw-r--r-- | vendor/pulldown-cmark/src/parse.rs | 1904 | ||||
-rw-r--r-- | vendor/pulldown-cmark/src/puncttable.rs | 351 | ||||
-rw-r--r-- | vendor/pulldown-cmark/src/scanners.rs | 1327 | ||||
-rw-r--r-- | vendor/pulldown-cmark/src/strings.rs | 373 | ||||
-rw-r--r-- | vendor/pulldown-cmark/src/tree.rs | 276 |
12 files changed, 9709 insertions, 0 deletions
diff --git a/vendor/pulldown-cmark/src/entities.rs b/vendor/pulldown-cmark/src/entities.rs new file mode 100644 index 000000000..042c9bccc --- /dev/null +++ b/vendor/pulldown-cmark/src/entities.rs @@ -0,0 +1,2158 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +//! Expansions of HTML5 entities + +// Autogenerated by mk_entities.py + +const ENTITIES: [(&[u8], &str); 2125] = [ + (b"AElig", "\u{00C6}"), + (b"AMP", "\u{0026}"), + (b"Aacute", "\u{00C1}"), + (b"Abreve", "\u{0102}"), + (b"Acirc", "\u{00C2}"), + (b"Acy", "\u{0410}"), + (b"Afr", "\u{1D504}"), + (b"Agrave", "\u{00C0}"), + (b"Alpha", "\u{0391}"), + (b"Amacr", "\u{0100}"), + (b"And", "\u{2A53}"), + (b"Aogon", "\u{0104}"), + (b"Aopf", "\u{1D538}"), + (b"ApplyFunction", "\u{2061}"), + (b"Aring", "\u{00C5}"), + (b"Ascr", "\u{1D49C}"), + (b"Assign", "\u{2254}"), + (b"Atilde", "\u{00C3}"), + (b"Auml", "\u{00C4}"), + (b"Backslash", "\u{2216}"), + (b"Barv", "\u{2AE7}"), + (b"Barwed", "\u{2306}"), + (b"Bcy", "\u{0411}"), + (b"Because", "\u{2235}"), + (b"Bernoullis", "\u{212C}"), + (b"Beta", "\u{0392}"), + (b"Bfr", "\u{1D505}"), + (b"Bopf", "\u{1D539}"), + (b"Breve", "\u{02D8}"), + (b"Bscr", "\u{212C}"), + (b"Bumpeq", "\u{224E}"), + (b"CHcy", "\u{0427}"), + (b"COPY", "\u{00A9}"), + (b"Cacute", "\u{0106}"), + (b"Cap", "\u{22D2}"), + (b"CapitalDifferentialD", "\u{2145}"), + (b"Cayleys", "\u{212D}"), + (b"Ccaron", "\u{010C}"), + (b"Ccedil", "\u{00C7}"), + (b"Ccirc", "\u{0108}"), + (b"Cconint", "\u{2230}"), + (b"Cdot", "\u{010A}"), + (b"Cedilla", "\u{00B8}"), + (b"CenterDot", "\u{00B7}"), + (b"Cfr", "\u{212D}"), + (b"Chi", "\u{03A7}"), + (b"CircleDot", "\u{2299}"), + (b"CircleMinus", "\u{2296}"), + (b"CirclePlus", "\u{2295}"), + (b"CircleTimes", "\u{2297}"), + (b"ClockwiseContourIntegral", "\u{2232}"), + (b"CloseCurlyDoubleQuote", "\u{201D}"), + (b"CloseCurlyQuote", "\u{2019}"), + (b"Colon", "\u{2237}"), + (b"Colone", "\u{2A74}"), + (b"Congruent", "\u{2261}"), + (b"Conint", "\u{222F}"), + (b"ContourIntegral", "\u{222E}"), + (b"Copf", "\u{2102}"), + (b"Coproduct", "\u{2210}"), + (b"CounterClockwiseContourIntegral", "\u{2233}"), + (b"Cross", "\u{2A2F}"), + (b"Cscr", "\u{1D49E}"), + (b"Cup", "\u{22D3}"), + (b"CupCap", "\u{224D}"), + (b"DD", "\u{2145}"), + (b"DDotrahd", "\u{2911}"), + (b"DJcy", "\u{0402}"), + (b"DScy", "\u{0405}"), + (b"DZcy", "\u{040F}"), + (b"Dagger", "\u{2021}"), + (b"Darr", "\u{21A1}"), + (b"Dashv", "\u{2AE4}"), + (b"Dcaron", "\u{010E}"), + (b"Dcy", "\u{0414}"), + (b"Del", "\u{2207}"), + (b"Delta", "\u{0394}"), + (b"Dfr", "\u{1D507}"), + (b"DiacriticalAcute", "\u{00B4}"), + (b"DiacriticalDot", "\u{02D9}"), + (b"DiacriticalDoubleAcute", "\u{02DD}"), + (b"DiacriticalGrave", "\u{0060}"), + (b"DiacriticalTilde", "\u{02DC}"), + (b"Diamond", "\u{22C4}"), + (b"DifferentialD", "\u{2146}"), + (b"Dopf", "\u{1D53B}"), + (b"Dot", "\u{00A8}"), + (b"DotDot", "\u{20DC}"), + (b"DotEqual", "\u{2250}"), + (b"DoubleContourIntegral", "\u{222F}"), + (b"DoubleDot", "\u{00A8}"), + (b"DoubleDownArrow", "\u{21D3}"), + (b"DoubleLeftArrow", "\u{21D0}"), + (b"DoubleLeftRightArrow", "\u{21D4}"), + (b"DoubleLeftTee", "\u{2AE4}"), + (b"DoubleLongLeftArrow", "\u{27F8}"), + (b"DoubleLongLeftRightArrow", "\u{27FA}"), + (b"DoubleLongRightArrow", "\u{27F9}"), + (b"DoubleRightArrow", "\u{21D2}"), + (b"DoubleRightTee", "\u{22A8}"), + (b"DoubleUpArrow", "\u{21D1}"), + (b"DoubleUpDownArrow", "\u{21D5}"), + (b"DoubleVerticalBar", "\u{2225}"), + (b"DownArrow", "\u{2193}"), + (b"DownArrowBar", "\u{2913}"), + (b"DownArrowUpArrow", "\u{21F5}"), + (b"DownBreve", "\u{0311}"), + (b"DownLeftRightVector", "\u{2950}"), + (b"DownLeftTeeVector", "\u{295E}"), + (b"DownLeftVector", "\u{21BD}"), + (b"DownLeftVectorBar", "\u{2956}"), + (b"DownRightTeeVector", "\u{295F}"), + (b"DownRightVector", "\u{21C1}"), + (b"DownRightVectorBar", "\u{2957}"), + (b"DownTee", "\u{22A4}"), + (b"DownTeeArrow", "\u{21A7}"), + (b"Downarrow", "\u{21D3}"), + (b"Dscr", "\u{1D49F}"), + (b"Dstrok", "\u{0110}"), + (b"ENG", "\u{014A}"), + (b"ETH", "\u{00D0}"), + (b"Eacute", "\u{00C9}"), + (b"Ecaron", "\u{011A}"), + (b"Ecirc", "\u{00CA}"), + (b"Ecy", "\u{042D}"), + (b"Edot", "\u{0116}"), + (b"Efr", "\u{1D508}"), + (b"Egrave", "\u{00C8}"), + (b"Element", "\u{2208}"), + (b"Emacr", "\u{0112}"), + (b"EmptySmallSquare", "\u{25FB}"), + (b"EmptyVerySmallSquare", "\u{25AB}"), + (b"Eogon", "\u{0118}"), + (b"Eopf", "\u{1D53C}"), + (b"Epsilon", "\u{0395}"), + (b"Equal", "\u{2A75}"), + (b"EqualTilde", "\u{2242}"), + (b"Equilibrium", "\u{21CC}"), + (b"Escr", "\u{2130}"), + (b"Esim", "\u{2A73}"), + (b"Eta", "\u{0397}"), + (b"Euml", "\u{00CB}"), + (b"Exists", "\u{2203}"), + (b"ExponentialE", "\u{2147}"), + (b"Fcy", "\u{0424}"), + (b"Ffr", "\u{1D509}"), + (b"FilledSmallSquare", "\u{25FC}"), + (b"FilledVerySmallSquare", "\u{25AA}"), + (b"Fopf", "\u{1D53D}"), + (b"ForAll", "\u{2200}"), + (b"Fouriertrf", "\u{2131}"), + (b"Fscr", "\u{2131}"), + (b"GJcy", "\u{0403}"), + (b"GT", "\u{003E}"), + (b"Gamma", "\u{0393}"), + (b"Gammad", "\u{03DC}"), + (b"Gbreve", "\u{011E}"), + (b"Gcedil", "\u{0122}"), + (b"Gcirc", "\u{011C}"), + (b"Gcy", "\u{0413}"), + (b"Gdot", "\u{0120}"), + (b"Gfr", "\u{1D50A}"), + (b"Gg", "\u{22D9}"), + (b"Gopf", "\u{1D53E}"), + (b"GreaterEqual", "\u{2265}"), + (b"GreaterEqualLess", "\u{22DB}"), + (b"GreaterFullEqual", "\u{2267}"), + (b"GreaterGreater", "\u{2AA2}"), + (b"GreaterLess", "\u{2277}"), + (b"GreaterSlantEqual", "\u{2A7E}"), + (b"GreaterTilde", "\u{2273}"), + (b"Gscr", "\u{1D4A2}"), + (b"Gt", "\u{226B}"), + (b"HARDcy", "\u{042A}"), + (b"Hacek", "\u{02C7}"), + (b"Hat", "\u{005E}"), + (b"Hcirc", "\u{0124}"), + (b"Hfr", "\u{210C}"), + (b"HilbertSpace", "\u{210B}"), + (b"Hopf", "\u{210D}"), + (b"HorizontalLine", "\u{2500}"), + (b"Hscr", "\u{210B}"), + (b"Hstrok", "\u{0126}"), + (b"HumpDownHump", "\u{224E}"), + (b"HumpEqual", "\u{224F}"), + (b"IEcy", "\u{0415}"), + (b"IJlig", "\u{0132}"), + (b"IOcy", "\u{0401}"), + (b"Iacute", "\u{00CD}"), + (b"Icirc", "\u{00CE}"), + (b"Icy", "\u{0418}"), + (b"Idot", "\u{0130}"), + (b"Ifr", "\u{2111}"), + (b"Igrave", "\u{00CC}"), + (b"Im", "\u{2111}"), + (b"Imacr", "\u{012A}"), + (b"ImaginaryI", "\u{2148}"), + (b"Implies", "\u{21D2}"), + (b"Int", "\u{222C}"), + (b"Integral", "\u{222B}"), + (b"Intersection", "\u{22C2}"), + (b"InvisibleComma", "\u{2063}"), + (b"InvisibleTimes", "\u{2062}"), + (b"Iogon", "\u{012E}"), + (b"Iopf", "\u{1D540}"), + (b"Iota", "\u{0399}"), + (b"Iscr", "\u{2110}"), + (b"Itilde", "\u{0128}"), + (b"Iukcy", "\u{0406}"), + (b"Iuml", "\u{00CF}"), + (b"Jcirc", "\u{0134}"), + (b"Jcy", "\u{0419}"), + (b"Jfr", "\u{1D50D}"), + (b"Jopf", "\u{1D541}"), + (b"Jscr", "\u{1D4A5}"), + (b"Jsercy", "\u{0408}"), + (b"Jukcy", "\u{0404}"), + (b"KHcy", "\u{0425}"), + (b"KJcy", "\u{040C}"), + (b"Kappa", "\u{039A}"), + (b"Kcedil", "\u{0136}"), + (b"Kcy", "\u{041A}"), + (b"Kfr", "\u{1D50E}"), + (b"Kopf", "\u{1D542}"), + (b"Kscr", "\u{1D4A6}"), + (b"LJcy", "\u{0409}"), + (b"LT", "\u{003C}"), + (b"Lacute", "\u{0139}"), + (b"Lambda", "\u{039B}"), + (b"Lang", "\u{27EA}"), + (b"Laplacetrf", "\u{2112}"), + (b"Larr", "\u{219E}"), + (b"Lcaron", "\u{013D}"), + (b"Lcedil", "\u{013B}"), + (b"Lcy", "\u{041B}"), + (b"LeftAngleBracket", "\u{27E8}"), + (b"LeftArrow", "\u{2190}"), + (b"LeftArrowBar", "\u{21E4}"), + (b"LeftArrowRightArrow", "\u{21C6}"), + (b"LeftCeiling", "\u{2308}"), + (b"LeftDoubleBracket", "\u{27E6}"), + (b"LeftDownTeeVector", "\u{2961}"), + (b"LeftDownVector", "\u{21C3}"), + (b"LeftDownVectorBar", "\u{2959}"), + (b"LeftFloor", "\u{230A}"), + (b"LeftRightArrow", "\u{2194}"), + (b"LeftRightVector", "\u{294E}"), + (b"LeftTee", "\u{22A3}"), + (b"LeftTeeArrow", "\u{21A4}"), + (b"LeftTeeVector", "\u{295A}"), + (b"LeftTriangle", "\u{22B2}"), + (b"LeftTriangleBar", "\u{29CF}"), + (b"LeftTriangleEqual", "\u{22B4}"), + (b"LeftUpDownVector", "\u{2951}"), + (b"LeftUpTeeVector", "\u{2960}"), + (b"LeftUpVector", "\u{21BF}"), + (b"LeftUpVectorBar", "\u{2958}"), + (b"LeftVector", "\u{21BC}"), + (b"LeftVectorBar", "\u{2952}"), + (b"Leftarrow", "\u{21D0}"), + (b"Leftrightarrow", "\u{21D4}"), + (b"LessEqualGreater", "\u{22DA}"), + (b"LessFullEqual", "\u{2266}"), + (b"LessGreater", "\u{2276}"), + (b"LessLess", "\u{2AA1}"), + (b"LessSlantEqual", "\u{2A7D}"), + (b"LessTilde", "\u{2272}"), + (b"Lfr", "\u{1D50F}"), + (b"Ll", "\u{22D8}"), + (b"Lleftarrow", "\u{21DA}"), + (b"Lmidot", "\u{013F}"), + (b"LongLeftArrow", "\u{27F5}"), + (b"LongLeftRightArrow", "\u{27F7}"), + (b"LongRightArrow", "\u{27F6}"), + (b"Longleftarrow", "\u{27F8}"), + (b"Longleftrightarrow", "\u{27FA}"), + (b"Longrightarrow", "\u{27F9}"), + (b"Lopf", "\u{1D543}"), + (b"LowerLeftArrow", "\u{2199}"), + (b"LowerRightArrow", "\u{2198}"), + (b"Lscr", "\u{2112}"), + (b"Lsh", "\u{21B0}"), + (b"Lstrok", "\u{0141}"), + (b"Lt", "\u{226A}"), + (b"Map", "\u{2905}"), + (b"Mcy", "\u{041C}"), + (b"MediumSpace", "\u{205F}"), + (b"Mellintrf", "\u{2133}"), + (b"Mfr", "\u{1D510}"), + (b"MinusPlus", "\u{2213}"), + (b"Mopf", "\u{1D544}"), + (b"Mscr", "\u{2133}"), + (b"Mu", "\u{039C}"), + (b"NJcy", "\u{040A}"), + (b"Nacute", "\u{0143}"), + (b"Ncaron", "\u{0147}"), + (b"Ncedil", "\u{0145}"), + (b"Ncy", "\u{041D}"), + (b"NegativeMediumSpace", "\u{200B}"), + (b"NegativeThickSpace", "\u{200B}"), + (b"NegativeThinSpace", "\u{200B}"), + (b"NegativeVeryThinSpace", "\u{200B}"), + (b"NestedGreaterGreater", "\u{226B}"), + (b"NestedLessLess", "\u{226A}"), + (b"NewLine", "\u{000A}"), + (b"Nfr", "\u{1D511}"), + (b"NoBreak", "\u{2060}"), + (b"NonBreakingSpace", "\u{00A0}"), + (b"Nopf", "\u{2115}"), + (b"Not", "\u{2AEC}"), + (b"NotCongruent", "\u{2262}"), + (b"NotCupCap", "\u{226D}"), + (b"NotDoubleVerticalBar", "\u{2226}"), + (b"NotElement", "\u{2209}"), + (b"NotEqual", "\u{2260}"), + (b"NotEqualTilde", "\u{2242}\u{0338}"), + (b"NotExists", "\u{2204}"), + (b"NotGreater", "\u{226F}"), + (b"NotGreaterEqual", "\u{2271}"), + (b"NotGreaterFullEqual", "\u{2267}\u{0338}"), + (b"NotGreaterGreater", "\u{226B}\u{0338}"), + (b"NotGreaterLess", "\u{2279}"), + (b"NotGreaterSlantEqual", "\u{2A7E}\u{0338}"), + (b"NotGreaterTilde", "\u{2275}"), + (b"NotHumpDownHump", "\u{224E}\u{0338}"), + (b"NotHumpEqual", "\u{224F}\u{0338}"), + (b"NotLeftTriangle", "\u{22EA}"), + (b"NotLeftTriangleBar", "\u{29CF}\u{0338}"), + (b"NotLeftTriangleEqual", "\u{22EC}"), + (b"NotLess", "\u{226E}"), + (b"NotLessEqual", "\u{2270}"), + (b"NotLessGreater", "\u{2278}"), + (b"NotLessLess", "\u{226A}\u{0338}"), + (b"NotLessSlantEqual", "\u{2A7D}\u{0338}"), + (b"NotLessTilde", "\u{2274}"), + (b"NotNestedGreaterGreater", "\u{2AA2}\u{0338}"), + (b"NotNestedLessLess", "\u{2AA1}\u{0338}"), + (b"NotPrecedes", "\u{2280}"), + (b"NotPrecedesEqual", "\u{2AAF}\u{0338}"), + (b"NotPrecedesSlantEqual", "\u{22E0}"), + (b"NotReverseElement", "\u{220C}"), + (b"NotRightTriangle", "\u{22EB}"), + (b"NotRightTriangleBar", "\u{29D0}\u{0338}"), + (b"NotRightTriangleEqual", "\u{22ED}"), + (b"NotSquareSubset", "\u{228F}\u{0338}"), + (b"NotSquareSubsetEqual", "\u{22E2}"), + (b"NotSquareSuperset", "\u{2290}\u{0338}"), + (b"NotSquareSupersetEqual", "\u{22E3}"), + (b"NotSubset", "\u{2282}\u{20D2}"), + (b"NotSubsetEqual", "\u{2288}"), + (b"NotSucceeds", "\u{2281}"), + (b"NotSucceedsEqual", "\u{2AB0}\u{0338}"), + (b"NotSucceedsSlantEqual", "\u{22E1}"), + (b"NotSucceedsTilde", "\u{227F}\u{0338}"), + (b"NotSuperset", "\u{2283}\u{20D2}"), + (b"NotSupersetEqual", "\u{2289}"), + (b"NotTilde", "\u{2241}"), + (b"NotTildeEqual", "\u{2244}"), + (b"NotTildeFullEqual", "\u{2247}"), + (b"NotTildeTilde", "\u{2249}"), + (b"NotVerticalBar", "\u{2224}"), + (b"Nscr", "\u{1D4A9}"), + (b"Ntilde", "\u{00D1}"), + (b"Nu", "\u{039D}"), + (b"OElig", "\u{0152}"), + (b"Oacute", "\u{00D3}"), + (b"Ocirc", "\u{00D4}"), + (b"Ocy", "\u{041E}"), + (b"Odblac", "\u{0150}"), + (b"Ofr", "\u{1D512}"), + (b"Ograve", "\u{00D2}"), + (b"Omacr", "\u{014C}"), + (b"Omega", "\u{03A9}"), + (b"Omicron", "\u{039F}"), + (b"Oopf", "\u{1D546}"), + (b"OpenCurlyDoubleQuote", "\u{201C}"), + (b"OpenCurlyQuote", "\u{2018}"), + (b"Or", "\u{2A54}"), + (b"Oscr", "\u{1D4AA}"), + (b"Oslash", "\u{00D8}"), + (b"Otilde", "\u{00D5}"), + (b"Otimes", "\u{2A37}"), + (b"Ouml", "\u{00D6}"), + (b"OverBar", "\u{203E}"), + (b"OverBrace", "\u{23DE}"), + (b"OverBracket", "\u{23B4}"), + (b"OverParenthesis", "\u{23DC}"), + (b"PartialD", "\u{2202}"), + (b"Pcy", "\u{041F}"), + (b"Pfr", "\u{1D513}"), + (b"Phi", "\u{03A6}"), + (b"Pi", "\u{03A0}"), + (b"PlusMinus", "\u{00B1}"), + (b"Poincareplane", "\u{210C}"), + (b"Popf", "\u{2119}"), + (b"Pr", "\u{2ABB}"), + (b"Precedes", "\u{227A}"), + (b"PrecedesEqual", "\u{2AAF}"), + (b"PrecedesSlantEqual", "\u{227C}"), + (b"PrecedesTilde", "\u{227E}"), + (b"Prime", "\u{2033}"), + (b"Product", "\u{220F}"), + (b"Proportion", "\u{2237}"), + (b"Proportional", "\u{221D}"), + (b"Pscr", "\u{1D4AB}"), + (b"Psi", "\u{03A8}"), + (b"QUOT", "\u{0022}"), + (b"Qfr", "\u{1D514}"), + (b"Qopf", "\u{211A}"), + (b"Qscr", "\u{1D4AC}"), + (b"RBarr", "\u{2910}"), + (b"REG", "\u{00AE}"), + (b"Racute", "\u{0154}"), + (b"Rang", "\u{27EB}"), + (b"Rarr", "\u{21A0}"), + (b"Rarrtl", "\u{2916}"), + (b"Rcaron", "\u{0158}"), + (b"Rcedil", "\u{0156}"), + (b"Rcy", "\u{0420}"), + (b"Re", "\u{211C}"), + (b"ReverseElement", "\u{220B}"), + (b"ReverseEquilibrium", "\u{21CB}"), + (b"ReverseUpEquilibrium", "\u{296F}"), + (b"Rfr", "\u{211C}"), + (b"Rho", "\u{03A1}"), + (b"RightAngleBracket", "\u{27E9}"), + (b"RightArrow", "\u{2192}"), + (b"RightArrowBar", "\u{21E5}"), + (b"RightArrowLeftArrow", "\u{21C4}"), + (b"RightCeiling", "\u{2309}"), + (b"RightDoubleBracket", "\u{27E7}"), + (b"RightDownTeeVector", "\u{295D}"), + (b"RightDownVector", "\u{21C2}"), + (b"RightDownVectorBar", "\u{2955}"), + (b"RightFloor", "\u{230B}"), + (b"RightTee", "\u{22A2}"), + (b"RightTeeArrow", "\u{21A6}"), + (b"RightTeeVector", "\u{295B}"), + (b"RightTriangle", "\u{22B3}"), + (b"RightTriangleBar", "\u{29D0}"), + (b"RightTriangleEqual", "\u{22B5}"), + (b"RightUpDownVector", "\u{294F}"), + (b"RightUpTeeVector", "\u{295C}"), + (b"RightUpVector", "\u{21BE}"), + (b"RightUpVectorBar", "\u{2954}"), + (b"RightVector", "\u{21C0}"), + (b"RightVectorBar", "\u{2953}"), + (b"Rightarrow", "\u{21D2}"), + (b"Ropf", "\u{211D}"), + (b"RoundImplies", "\u{2970}"), + (b"Rrightarrow", "\u{21DB}"), + (b"Rscr", "\u{211B}"), + (b"Rsh", "\u{21B1}"), + (b"RuleDelayed", "\u{29F4}"), + (b"SHCHcy", "\u{0429}"), + (b"SHcy", "\u{0428}"), + (b"SOFTcy", "\u{042C}"), + (b"Sacute", "\u{015A}"), + (b"Sc", "\u{2ABC}"), + (b"Scaron", "\u{0160}"), + (b"Scedil", "\u{015E}"), + (b"Scirc", "\u{015C}"), + (b"Scy", "\u{0421}"), + (b"Sfr", "\u{1D516}"), + (b"ShortDownArrow", "\u{2193}"), + (b"ShortLeftArrow", "\u{2190}"), + (b"ShortRightArrow", "\u{2192}"), + (b"ShortUpArrow", "\u{2191}"), + (b"Sigma", "\u{03A3}"), + (b"SmallCircle", "\u{2218}"), + (b"Sopf", "\u{1D54A}"), + (b"Sqrt", "\u{221A}"), + (b"Square", "\u{25A1}"), + (b"SquareIntersection", "\u{2293}"), + (b"SquareSubset", "\u{228F}"), + (b"SquareSubsetEqual", "\u{2291}"), + (b"SquareSuperset", "\u{2290}"), + (b"SquareSupersetEqual", "\u{2292}"), + (b"SquareUnion", "\u{2294}"), + (b"Sscr", "\u{1D4AE}"), + (b"Star", "\u{22C6}"), + (b"Sub", "\u{22D0}"), + (b"Subset", "\u{22D0}"), + (b"SubsetEqual", "\u{2286}"), + (b"Succeeds", "\u{227B}"), + (b"SucceedsEqual", "\u{2AB0}"), + (b"SucceedsSlantEqual", "\u{227D}"), + (b"SucceedsTilde", "\u{227F}"), + (b"SuchThat", "\u{220B}"), + (b"Sum", "\u{2211}"), + (b"Sup", "\u{22D1}"), + (b"Superset", "\u{2283}"), + (b"SupersetEqual", "\u{2287}"), + (b"Supset", "\u{22D1}"), + (b"THORN", "\u{00DE}"), + (b"TRADE", "\u{2122}"), + (b"TSHcy", "\u{040B}"), + (b"TScy", "\u{0426}"), + (b"Tab", "\u{0009}"), + (b"Tau", "\u{03A4}"), + (b"Tcaron", "\u{0164}"), + (b"Tcedil", "\u{0162}"), + (b"Tcy", "\u{0422}"), + (b"Tfr", "\u{1D517}"), + (b"Therefore", "\u{2234}"), + (b"Theta", "\u{0398}"), + (b"ThickSpace", "\u{205F}\u{200A}"), + (b"ThinSpace", "\u{2009}"), + (b"Tilde", "\u{223C}"), + (b"TildeEqual", "\u{2243}"), + (b"TildeFullEqual", "\u{2245}"), + (b"TildeTilde", "\u{2248}"), + (b"Topf", "\u{1D54B}"), + (b"TripleDot", "\u{20DB}"), + (b"Tscr", "\u{1D4AF}"), + (b"Tstrok", "\u{0166}"), + (b"Uacute", "\u{00DA}"), + (b"Uarr", "\u{219F}"), + (b"Uarrocir", "\u{2949}"), + (b"Ubrcy", "\u{040E}"), + (b"Ubreve", "\u{016C}"), + (b"Ucirc", "\u{00DB}"), + (b"Ucy", "\u{0423}"), + (b"Udblac", "\u{0170}"), + (b"Ufr", "\u{1D518}"), + (b"Ugrave", "\u{00D9}"), + (b"Umacr", "\u{016A}"), + (b"UnderBar", "\u{005F}"), + (b"UnderBrace", "\u{23DF}"), + (b"UnderBracket", "\u{23B5}"), + (b"UnderParenthesis", "\u{23DD}"), + (b"Union", "\u{22C3}"), + (b"UnionPlus", "\u{228E}"), + (b"Uogon", "\u{0172}"), + (b"Uopf", "\u{1D54C}"), + (b"UpArrow", "\u{2191}"), + (b"UpArrowBar", "\u{2912}"), + (b"UpArrowDownArrow", "\u{21C5}"), + (b"UpDownArrow", "\u{2195}"), + (b"UpEquilibrium", "\u{296E}"), + (b"UpTee", "\u{22A5}"), + (b"UpTeeArrow", "\u{21A5}"), + (b"Uparrow", "\u{21D1}"), + (b"Updownarrow", "\u{21D5}"), + (b"UpperLeftArrow", "\u{2196}"), + (b"UpperRightArrow", "\u{2197}"), + (b"Upsi", "\u{03D2}"), + (b"Upsilon", "\u{03A5}"), + (b"Uring", "\u{016E}"), + (b"Uscr", "\u{1D4B0}"), + (b"Utilde", "\u{0168}"), + (b"Uuml", "\u{00DC}"), + (b"VDash", "\u{22AB}"), + (b"Vbar", "\u{2AEB}"), + (b"Vcy", "\u{0412}"), + (b"Vdash", "\u{22A9}"), + (b"Vdashl", "\u{2AE6}"), + (b"Vee", "\u{22C1}"), + (b"Verbar", "\u{2016}"), + (b"Vert", "\u{2016}"), + (b"VerticalBar", "\u{2223}"), + (b"VerticalLine", "\u{007C}"), + (b"VerticalSeparator", "\u{2758}"), + (b"VerticalTilde", "\u{2240}"), + (b"VeryThinSpace", "\u{200A}"), + (b"Vfr", "\u{1D519}"), + (b"Vopf", "\u{1D54D}"), + (b"Vscr", "\u{1D4B1}"), + (b"Vvdash", "\u{22AA}"), + (b"Wcirc", "\u{0174}"), + (b"Wedge", "\u{22C0}"), + (b"Wfr", "\u{1D51A}"), + (b"Wopf", "\u{1D54E}"), + (b"Wscr", "\u{1D4B2}"), + (b"Xfr", "\u{1D51B}"), + (b"Xi", "\u{039E}"), + (b"Xopf", "\u{1D54F}"), + (b"Xscr", "\u{1D4B3}"), + (b"YAcy", "\u{042F}"), + (b"YIcy", "\u{0407}"), + (b"YUcy", "\u{042E}"), + (b"Yacute", "\u{00DD}"), + (b"Ycirc", "\u{0176}"), + (b"Ycy", "\u{042B}"), + (b"Yfr", "\u{1D51C}"), + (b"Yopf", "\u{1D550}"), + (b"Yscr", "\u{1D4B4}"), + (b"Yuml", "\u{0178}"), + (b"ZHcy", "\u{0416}"), + (b"Zacute", "\u{0179}"), + (b"Zcaron", "\u{017D}"), + (b"Zcy", "\u{0417}"), + (b"Zdot", "\u{017B}"), + (b"ZeroWidthSpace", "\u{200B}"), + (b"Zeta", "\u{0396}"), + (b"Zfr", "\u{2128}"), + (b"Zopf", "\u{2124}"), + (b"Zscr", "\u{1D4B5}"), + (b"aacute", "\u{00E1}"), + (b"abreve", "\u{0103}"), + (b"ac", "\u{223E}"), + (b"acE", "\u{223E}\u{0333}"), + (b"acd", "\u{223F}"), + (b"acirc", "\u{00E2}"), + (b"acute", "\u{00B4}"), + (b"acy", "\u{0430}"), + (b"aelig", "\u{00E6}"), + (b"af", "\u{2061}"), + (b"afr", "\u{1D51E}"), + (b"agrave", "\u{00E0}"), + (b"alefsym", "\u{2135}"), + (b"aleph", "\u{2135}"), + (b"alpha", "\u{03B1}"), + (b"amacr", "\u{0101}"), + (b"amalg", "\u{2A3F}"), + (b"amp", "\u{0026}"), + (b"and", "\u{2227}"), + (b"andand", "\u{2A55}"), + (b"andd", "\u{2A5C}"), + (b"andslope", "\u{2A58}"), + (b"andv", "\u{2A5A}"), + (b"ang", "\u{2220}"), + (b"ange", "\u{29A4}"), + (b"angle", "\u{2220}"), + (b"angmsd", "\u{2221}"), + (b"angmsdaa", "\u{29A8}"), + (b"angmsdab", "\u{29A9}"), + (b"angmsdac", "\u{29AA}"), + (b"angmsdad", "\u{29AB}"), + (b"angmsdae", "\u{29AC}"), + (b"angmsdaf", "\u{29AD}"), + (b"angmsdag", "\u{29AE}"), + (b"angmsdah", "\u{29AF}"), + (b"angrt", "\u{221F}"), + (b"angrtvb", "\u{22BE}"), + (b"angrtvbd", "\u{299D}"), + (b"angsph", "\u{2222}"), + (b"angst", "\u{00C5}"), + (b"angzarr", "\u{237C}"), + (b"aogon", "\u{0105}"), + (b"aopf", "\u{1D552}"), + (b"ap", "\u{2248}"), + (b"apE", "\u{2A70}"), + (b"apacir", "\u{2A6F}"), + (b"ape", "\u{224A}"), + (b"apid", "\u{224B}"), + (b"apos", "\u{0027}"), + (b"approx", "\u{2248}"), + (b"approxeq", "\u{224A}"), + (b"aring", "\u{00E5}"), + (b"ascr", "\u{1D4B6}"), + (b"ast", "\u{002A}"), + (b"asymp", "\u{2248}"), + (b"asympeq", "\u{224D}"), + (b"atilde", "\u{00E3}"), + (b"auml", "\u{00E4}"), + (b"awconint", "\u{2233}"), + (b"awint", "\u{2A11}"), + (b"bNot", "\u{2AED}"), + (b"backcong", "\u{224C}"), + (b"backepsilon", "\u{03F6}"), + (b"backprime", "\u{2035}"), + (b"backsim", "\u{223D}"), + (b"backsimeq", "\u{22CD}"), + (b"barvee", "\u{22BD}"), + (b"barwed", "\u{2305}"), + (b"barwedge", "\u{2305}"), + (b"bbrk", "\u{23B5}"), + (b"bbrktbrk", "\u{23B6}"), + (b"bcong", "\u{224C}"), + (b"bcy", "\u{0431}"), + (b"bdquo", "\u{201E}"), + (b"becaus", "\u{2235}"), + (b"because", "\u{2235}"), + (b"bemptyv", "\u{29B0}"), + (b"bepsi", "\u{03F6}"), + (b"bernou", "\u{212C}"), + (b"beta", "\u{03B2}"), + (b"beth", "\u{2136}"), + (b"between", "\u{226C}"), + (b"bfr", "\u{1D51F}"), + (b"bigcap", "\u{22C2}"), + (b"bigcirc", "\u{25EF}"), + (b"bigcup", "\u{22C3}"), + (b"bigodot", "\u{2A00}"), + (b"bigoplus", "\u{2A01}"), + (b"bigotimes", "\u{2A02}"), + (b"bigsqcup", "\u{2A06}"), + (b"bigstar", "\u{2605}"), + (b"bigtriangledown", "\u{25BD}"), + (b"bigtriangleup", "\u{25B3}"), + (b"biguplus", "\u{2A04}"), + (b"bigvee", "\u{22C1}"), + (b"bigwedge", "\u{22C0}"), + (b"bkarow", "\u{290D}"), + (b"blacklozenge", "\u{29EB}"), + (b"blacksquare", "\u{25AA}"), + (b"blacktriangle", "\u{25B4}"), + (b"blacktriangledown", "\u{25BE}"), + (b"blacktriangleleft", "\u{25C2}"), + (b"blacktriangleright", "\u{25B8}"), + (b"blank", "\u{2423}"), + (b"blk12", "\u{2592}"), + (b"blk14", "\u{2591}"), + (b"blk34", "\u{2593}"), + (b"block", "\u{2588}"), + (b"bne", "\u{003D}\u{20E5}"), + (b"bnequiv", "\u{2261}\u{20E5}"), + (b"bnot", "\u{2310}"), + (b"bopf", "\u{1D553}"), + (b"bot", "\u{22A5}"), + (b"bottom", "\u{22A5}"), + (b"bowtie", "\u{22C8}"), + (b"boxDL", "\u{2557}"), + (b"boxDR", "\u{2554}"), + (b"boxDl", "\u{2556}"), + (b"boxDr", "\u{2553}"), + (b"boxH", "\u{2550}"), + (b"boxHD", "\u{2566}"), + (b"boxHU", "\u{2569}"), + (b"boxHd", "\u{2564}"), + (b"boxHu", "\u{2567}"), + (b"boxUL", "\u{255D}"), + (b"boxUR", "\u{255A}"), + (b"boxUl", "\u{255C}"), + (b"boxUr", "\u{2559}"), + (b"boxV", "\u{2551}"), + (b"boxVH", "\u{256C}"), + (b"boxVL", "\u{2563}"), + (b"boxVR", "\u{2560}"), + (b"boxVh", "\u{256B}"), + (b"boxVl", "\u{2562}"), + (b"boxVr", "\u{255F}"), + (b"boxbox", "\u{29C9}"), + (b"boxdL", "\u{2555}"), + (b"boxdR", "\u{2552}"), + (b"boxdl", "\u{2510}"), + (b"boxdr", "\u{250C}"), + (b"boxh", "\u{2500}"), + (b"boxhD", "\u{2565}"), + (b"boxhU", "\u{2568}"), + (b"boxhd", "\u{252C}"), + (b"boxhu", "\u{2534}"), + (b"boxminus", "\u{229F}"), + (b"boxplus", "\u{229E}"), + (b"boxtimes", "\u{22A0}"), + (b"boxuL", "\u{255B}"), + (b"boxuR", "\u{2558}"), + (b"boxul", "\u{2518}"), + (b"boxur", "\u{2514}"), + (b"boxv", "\u{2502}"), + (b"boxvH", "\u{256A}"), + (b"boxvL", "\u{2561}"), + (b"boxvR", "\u{255E}"), + (b"boxvh", "\u{253C}"), + (b"boxvl", "\u{2524}"), + (b"boxvr", "\u{251C}"), + (b"bprime", "\u{2035}"), + (b"breve", "\u{02D8}"), + (b"brvbar", "\u{00A6}"), + (b"bscr", "\u{1D4B7}"), + (b"bsemi", "\u{204F}"), + (b"bsim", "\u{223D}"), + (b"bsime", "\u{22CD}"), + (b"bsol", "\u{005C}"), + (b"bsolb", "\u{29C5}"), + (b"bsolhsub", "\u{27C8}"), + (b"bull", "\u{2022}"), + (b"bullet", "\u{2022}"), + (b"bump", "\u{224E}"), + (b"bumpE", "\u{2AAE}"), + (b"bumpe", "\u{224F}"), + (b"bumpeq", "\u{224F}"), + (b"cacute", "\u{0107}"), + (b"cap", "\u{2229}"), + (b"capand", "\u{2A44}"), + (b"capbrcup", "\u{2A49}"), + (b"capcap", "\u{2A4B}"), + (b"capcup", "\u{2A47}"), + (b"capdot", "\u{2A40}"), + (b"caps", "\u{2229}\u{FE00}"), + (b"caret", "\u{2041}"), + (b"caron", "\u{02C7}"), + (b"ccaps", "\u{2A4D}"), + (b"ccaron", "\u{010D}"), + (b"ccedil", "\u{00E7}"), + (b"ccirc", "\u{0109}"), + (b"ccups", "\u{2A4C}"), + (b"ccupssm", "\u{2A50}"), + (b"cdot", "\u{010B}"), + (b"cedil", "\u{00B8}"), + (b"cemptyv", "\u{29B2}"), + (b"cent", "\u{00A2}"), + (b"centerdot", "\u{00B7}"), + (b"cfr", "\u{1D520}"), + (b"chcy", "\u{0447}"), + (b"check", "\u{2713}"), + (b"checkmark", "\u{2713}"), + (b"chi", "\u{03C7}"), + (b"cir", "\u{25CB}"), + (b"cirE", "\u{29C3}"), + (b"circ", "\u{02C6}"), + (b"circeq", "\u{2257}"), + (b"circlearrowleft", "\u{21BA}"), + (b"circlearrowright", "\u{21BB}"), + (b"circledR", "\u{00AE}"), + (b"circledS", "\u{24C8}"), + (b"circledast", "\u{229B}"), + (b"circledcirc", "\u{229A}"), + (b"circleddash", "\u{229D}"), + (b"cire", "\u{2257}"), + (b"cirfnint", "\u{2A10}"), + (b"cirmid", "\u{2AEF}"), + (b"cirscir", "\u{29C2}"), + (b"clubs", "\u{2663}"), + (b"clubsuit", "\u{2663}"), + (b"colon", "\u{003A}"), + (b"colone", "\u{2254}"), + (b"coloneq", "\u{2254}"), + (b"comma", "\u{002C}"), + (b"commat", "\u{0040}"), + (b"comp", "\u{2201}"), + (b"compfn", "\u{2218}"), + (b"complement", "\u{2201}"), + (b"complexes", "\u{2102}"), + (b"cong", "\u{2245}"), + (b"congdot", "\u{2A6D}"), + (b"conint", "\u{222E}"), + (b"copf", "\u{1D554}"), + (b"coprod", "\u{2210}"), + (b"copy", "\u{00A9}"), + (b"copysr", "\u{2117}"), + (b"crarr", "\u{21B5}"), + (b"cross", "\u{2717}"), + (b"cscr", "\u{1D4B8}"), + (b"csub", "\u{2ACF}"), + (b"csube", "\u{2AD1}"), + (b"csup", "\u{2AD0}"), + (b"csupe", "\u{2AD2}"), + (b"ctdot", "\u{22EF}"), + (b"cudarrl", "\u{2938}"), + (b"cudarrr", "\u{2935}"), + (b"cuepr", "\u{22DE}"), + (b"cuesc", "\u{22DF}"), + (b"cularr", "\u{21B6}"), + (b"cularrp", "\u{293D}"), + (b"cup", "\u{222A}"), + (b"cupbrcap", "\u{2A48}"), + (b"cupcap", "\u{2A46}"), + (b"cupcup", "\u{2A4A}"), + (b"cupdot", "\u{228D}"), + (b"cupor", "\u{2A45}"), + (b"cups", "\u{222A}\u{FE00}"), + (b"curarr", "\u{21B7}"), + (b"curarrm", "\u{293C}"), + (b"curlyeqprec", "\u{22DE}"), + (b"curlyeqsucc", "\u{22DF}"), + (b"curlyvee", "\u{22CE}"), + (b"curlywedge", "\u{22CF}"), + (b"curren", "\u{00A4}"), + (b"curvearrowleft", "\u{21B6}"), + (b"curvearrowright", "\u{21B7}"), + (b"cuvee", "\u{22CE}"), + (b"cuwed", "\u{22CF}"), + (b"cwconint", "\u{2232}"), + (b"cwint", "\u{2231}"), + (b"cylcty", "\u{232D}"), + (b"dArr", "\u{21D3}"), + (b"dHar", "\u{2965}"), + (b"dagger", "\u{2020}"), + (b"daleth", "\u{2138}"), + (b"darr", "\u{2193}"), + (b"dash", "\u{2010}"), + (b"dashv", "\u{22A3}"), + (b"dbkarow", "\u{290F}"), + (b"dblac", "\u{02DD}"), + (b"dcaron", "\u{010F}"), + (b"dcy", "\u{0434}"), + (b"dd", "\u{2146}"), + (b"ddagger", "\u{2021}"), + (b"ddarr", "\u{21CA}"), + (b"ddotseq", "\u{2A77}"), + (b"deg", "\u{00B0}"), + (b"delta", "\u{03B4}"), + (b"demptyv", "\u{29B1}"), + (b"dfisht", "\u{297F}"), + (b"dfr", "\u{1D521}"), + (b"dharl", "\u{21C3}"), + (b"dharr", "\u{21C2}"), + (b"diam", "\u{22C4}"), + (b"diamond", "\u{22C4}"), + (b"diamondsuit", "\u{2666}"), + (b"diams", "\u{2666}"), + (b"die", "\u{00A8}"), + (b"digamma", "\u{03DD}"), + (b"disin", "\u{22F2}"), + (b"div", "\u{00F7}"), + (b"divide", "\u{00F7}"), + (b"divideontimes", "\u{22C7}"), + (b"divonx", "\u{22C7}"), + (b"djcy", "\u{0452}"), + (b"dlcorn", "\u{231E}"), + (b"dlcrop", "\u{230D}"), + (b"dollar", "\u{0024}"), + (b"dopf", "\u{1D555}"), + (b"dot", "\u{02D9}"), + (b"doteq", "\u{2250}"), + (b"doteqdot", "\u{2251}"), + (b"dotminus", "\u{2238}"), + (b"dotplus", "\u{2214}"), + (b"dotsquare", "\u{22A1}"), + (b"doublebarwedge", "\u{2306}"), + (b"downarrow", "\u{2193}"), + (b"downdownarrows", "\u{21CA}"), + (b"downharpoonleft", "\u{21C3}"), + (b"downharpoonright", "\u{21C2}"), + (b"drbkarow", "\u{2910}"), + (b"drcorn", "\u{231F}"), + (b"drcrop", "\u{230C}"), + (b"dscr", "\u{1D4B9}"), + (b"dscy", "\u{0455}"), + (b"dsol", "\u{29F6}"), + (b"dstrok", "\u{0111}"), + (b"dtdot", "\u{22F1}"), + (b"dtri", "\u{25BF}"), + (b"dtrif", "\u{25BE}"), + (b"duarr", "\u{21F5}"), + (b"duhar", "\u{296F}"), + (b"dwangle", "\u{29A6}"), + (b"dzcy", "\u{045F}"), + (b"dzigrarr", "\u{27FF}"), + (b"eDDot", "\u{2A77}"), + (b"eDot", "\u{2251}"), + (b"eacute", "\u{00E9}"), + (b"easter", "\u{2A6E}"), + (b"ecaron", "\u{011B}"), + (b"ecir", "\u{2256}"), + (b"ecirc", "\u{00EA}"), + (b"ecolon", "\u{2255}"), + (b"ecy", "\u{044D}"), + (b"edot", "\u{0117}"), + (b"ee", "\u{2147}"), + (b"efDot", "\u{2252}"), + (b"efr", "\u{1D522}"), + (b"eg", "\u{2A9A}"), + (b"egrave", "\u{00E8}"), + (b"egs", "\u{2A96}"), + (b"egsdot", "\u{2A98}"), + (b"el", "\u{2A99}"), + (b"elinters", "\u{23E7}"), + (b"ell", "\u{2113}"), + (b"els", "\u{2A95}"), + (b"elsdot", "\u{2A97}"), + (b"emacr", "\u{0113}"), + (b"empty", "\u{2205}"), + (b"emptyset", "\u{2205}"), + (b"emptyv", "\u{2205}"), + (b"emsp", "\u{2003}"), + (b"emsp13", "\u{2004}"), + (b"emsp14", "\u{2005}"), + (b"eng", "\u{014B}"), + (b"ensp", "\u{2002}"), + (b"eogon", "\u{0119}"), + (b"eopf", "\u{1D556}"), + (b"epar", "\u{22D5}"), + (b"eparsl", "\u{29E3}"), + (b"eplus", "\u{2A71}"), + (b"epsi", "\u{03B5}"), + (b"epsilon", "\u{03B5}"), + (b"epsiv", "\u{03F5}"), + (b"eqcirc", "\u{2256}"), + (b"eqcolon", "\u{2255}"), + (b"eqsim", "\u{2242}"), + (b"eqslantgtr", "\u{2A96}"), + (b"eqslantless", "\u{2A95}"), + (b"equals", "\u{003D}"), + (b"equest", "\u{225F}"), + (b"equiv", "\u{2261}"), + (b"equivDD", "\u{2A78}"), + (b"eqvparsl", "\u{29E5}"), + (b"erDot", "\u{2253}"), + (b"erarr", "\u{2971}"), + (b"escr", "\u{212F}"), + (b"esdot", "\u{2250}"), + (b"esim", "\u{2242}"), + (b"eta", "\u{03B7}"), + (b"eth", "\u{00F0}"), + (b"euml", "\u{00EB}"), + (b"euro", "\u{20AC}"), + (b"excl", "\u{0021}"), + (b"exist", "\u{2203}"), + (b"expectation", "\u{2130}"), + (b"exponentiale", "\u{2147}"), + (b"fallingdotseq", "\u{2252}"), + (b"fcy", "\u{0444}"), + (b"female", "\u{2640}"), + (b"ffilig", "\u{FB03}"), + (b"fflig", "\u{FB00}"), + (b"ffllig", "\u{FB04}"), + (b"ffr", "\u{1D523}"), + (b"filig", "\u{FB01}"), + (b"fjlig", "\u{0066}\u{006A}"), + (b"flat", "\u{266D}"), + (b"fllig", "\u{FB02}"), + (b"fltns", "\u{25B1}"), + (b"fnof", "\u{0192}"), + (b"fopf", "\u{1D557}"), + (b"forall", "\u{2200}"), + (b"fork", "\u{22D4}"), + (b"forkv", "\u{2AD9}"), + (b"fpartint", "\u{2A0D}"), + (b"frac12", "\u{00BD}"), + (b"frac13", "\u{2153}"), + (b"frac14", "\u{00BC}"), + (b"frac15", "\u{2155}"), + (b"frac16", "\u{2159}"), + (b"frac18", "\u{215B}"), + (b"frac23", "\u{2154}"), + (b"frac25", "\u{2156}"), + (b"frac34", "\u{00BE}"), + (b"frac35", "\u{2157}"), + (b"frac38", "\u{215C}"), + (b"frac45", "\u{2158}"), + (b"frac56", "\u{215A}"), + (b"frac58", "\u{215D}"), + (b"frac78", "\u{215E}"), + (b"frasl", "\u{2044}"), + (b"frown", "\u{2322}"), + (b"fscr", "\u{1D4BB}"), + (b"gE", "\u{2267}"), + (b"gEl", "\u{2A8C}"), + (b"gacute", "\u{01F5}"), + (b"gamma", "\u{03B3}"), + (b"gammad", "\u{03DD}"), + (b"gap", "\u{2A86}"), + (b"gbreve", "\u{011F}"), + (b"gcirc", "\u{011D}"), + (b"gcy", "\u{0433}"), + (b"gdot", "\u{0121}"), + (b"ge", "\u{2265}"), + (b"gel", "\u{22DB}"), + (b"geq", "\u{2265}"), + (b"geqq", "\u{2267}"), + (b"geqslant", "\u{2A7E}"), + (b"ges", "\u{2A7E}"), + (b"gescc", "\u{2AA9}"), + (b"gesdot", "\u{2A80}"), + (b"gesdoto", "\u{2A82}"), + (b"gesdotol", "\u{2A84}"), + (b"gesl", "\u{22DB}\u{FE00}"), + (b"gesles", "\u{2A94}"), + (b"gfr", "\u{1D524}"), + (b"gg", "\u{226B}"), + (b"ggg", "\u{22D9}"), + (b"gimel", "\u{2137}"), + (b"gjcy", "\u{0453}"), + (b"gl", "\u{2277}"), + (b"glE", "\u{2A92}"), + (b"gla", "\u{2AA5}"), + (b"glj", "\u{2AA4}"), + (b"gnE", "\u{2269}"), + (b"gnap", "\u{2A8A}"), + (b"gnapprox", "\u{2A8A}"), + (b"gne", "\u{2A88}"), + (b"gneq", "\u{2A88}"), + (b"gneqq", "\u{2269}"), + (b"gnsim", "\u{22E7}"), + (b"gopf", "\u{1D558}"), + (b"grave", "\u{0060}"), + (b"gscr", "\u{210A}"), + (b"gsim", "\u{2273}"), + (b"gsime", "\u{2A8E}"), + (b"gsiml", "\u{2A90}"), + (b"gt", "\u{003E}"), + (b"gtcc", "\u{2AA7}"), + (b"gtcir", "\u{2A7A}"), + (b"gtdot", "\u{22D7}"), + (b"gtlPar", "\u{2995}"), + (b"gtquest", "\u{2A7C}"), + (b"gtrapprox", "\u{2A86}"), + (b"gtrarr", "\u{2978}"), + (b"gtrdot", "\u{22D7}"), + (b"gtreqless", "\u{22DB}"), + (b"gtreqqless", "\u{2A8C}"), + (b"gtrless", "\u{2277}"), + (b"gtrsim", "\u{2273}"), + (b"gvertneqq", "\u{2269}\u{FE00}"), + (b"gvnE", "\u{2269}\u{FE00}"), + (b"hArr", "\u{21D4}"), + (b"hairsp", "\u{200A}"), + (b"half", "\u{00BD}"), + (b"hamilt", "\u{210B}"), + (b"hardcy", "\u{044A}"), + (b"harr", "\u{2194}"), + (b"harrcir", "\u{2948}"), + (b"harrw", "\u{21AD}"), + (b"hbar", "\u{210F}"), + (b"hcirc", "\u{0125}"), + (b"hearts", "\u{2665}"), + (b"heartsuit", "\u{2665}"), + (b"hellip", "\u{2026}"), + (b"hercon", "\u{22B9}"), + (b"hfr", "\u{1D525}"), + (b"hksearow", "\u{2925}"), + (b"hkswarow", "\u{2926}"), + (b"hoarr", "\u{21FF}"), + (b"homtht", "\u{223B}"), + (b"hookleftarrow", "\u{21A9}"), + (b"hookrightarrow", "\u{21AA}"), + (b"hopf", "\u{1D559}"), + (b"horbar", "\u{2015}"), + (b"hscr", "\u{1D4BD}"), + (b"hslash", "\u{210F}"), + (b"hstrok", "\u{0127}"), + (b"hybull", "\u{2043}"), + (b"hyphen", "\u{2010}"), + (b"iacute", "\u{00ED}"), + (b"ic", "\u{2063}"), + (b"icirc", "\u{00EE}"), + (b"icy", "\u{0438}"), + (b"iecy", "\u{0435}"), + (b"iexcl", "\u{00A1}"), + (b"iff", "\u{21D4}"), + (b"ifr", "\u{1D526}"), + (b"igrave", "\u{00EC}"), + (b"ii", "\u{2148}"), + (b"iiiint", "\u{2A0C}"), + (b"iiint", "\u{222D}"), + (b"iinfin", "\u{29DC}"), + (b"iiota", "\u{2129}"), + (b"ijlig", "\u{0133}"), + (b"imacr", "\u{012B}"), + (b"image", "\u{2111}"), + (b"imagline", "\u{2110}"), + (b"imagpart", "\u{2111}"), + (b"imath", "\u{0131}"), + (b"imof", "\u{22B7}"), + (b"imped", "\u{01B5}"), + (b"in", "\u{2208}"), + (b"incare", "\u{2105}"), + (b"infin", "\u{221E}"), + (b"infintie", "\u{29DD}"), + (b"inodot", "\u{0131}"), + (b"int", "\u{222B}"), + (b"intcal", "\u{22BA}"), + (b"integers", "\u{2124}"), + (b"intercal", "\u{22BA}"), + (b"intlarhk", "\u{2A17}"), + (b"intprod", "\u{2A3C}"), + (b"iocy", "\u{0451}"), + (b"iogon", "\u{012F}"), + (b"iopf", "\u{1D55A}"), + (b"iota", "\u{03B9}"), + (b"iprod", "\u{2A3C}"), + (b"iquest", "\u{00BF}"), + (b"iscr", "\u{1D4BE}"), + (b"isin", "\u{2208}"), + (b"isinE", "\u{22F9}"), + (b"isindot", "\u{22F5}"), + (b"isins", "\u{22F4}"), + (b"isinsv", "\u{22F3}"), + (b"isinv", "\u{2208}"), + (b"it", "\u{2062}"), + (b"itilde", "\u{0129}"), + (b"iukcy", "\u{0456}"), + (b"iuml", "\u{00EF}"), + (b"jcirc", "\u{0135}"), + (b"jcy", "\u{0439}"), + (b"jfr", "\u{1D527}"), + (b"jmath", "\u{0237}"), + (b"jopf", "\u{1D55B}"), + (b"jscr", "\u{1D4BF}"), + (b"jsercy", "\u{0458}"), + (b"jukcy", "\u{0454}"), + (b"kappa", "\u{03BA}"), + (b"kappav", "\u{03F0}"), + (b"kcedil", "\u{0137}"), + (b"kcy", "\u{043A}"), + (b"kfr", "\u{1D528}"), + (b"kgreen", "\u{0138}"), + (b"khcy", "\u{0445}"), + (b"kjcy", "\u{045C}"), + (b"kopf", "\u{1D55C}"), + (b"kscr", "\u{1D4C0}"), + (b"lAarr", "\u{21DA}"), + (b"lArr", "\u{21D0}"), + (b"lAtail", "\u{291B}"), + (b"lBarr", "\u{290E}"), + (b"lE", "\u{2266}"), + (b"lEg", "\u{2A8B}"), + (b"lHar", "\u{2962}"), + (b"lacute", "\u{013A}"), + (b"laemptyv", "\u{29B4}"), + (b"lagran", "\u{2112}"), + (b"lambda", "\u{03BB}"), + (b"lang", "\u{27E8}"), + (b"langd", "\u{2991}"), + (b"langle", "\u{27E8}"), + (b"lap", "\u{2A85}"), + (b"laquo", "\u{00AB}"), + (b"larr", "\u{2190}"), + (b"larrb", "\u{21E4}"), + (b"larrbfs", "\u{291F}"), + (b"larrfs", "\u{291D}"), + (b"larrhk", "\u{21A9}"), + (b"larrlp", "\u{21AB}"), + (b"larrpl", "\u{2939}"), + (b"larrsim", "\u{2973}"), + (b"larrtl", "\u{21A2}"), + (b"lat", "\u{2AAB}"), + (b"latail", "\u{2919}"), + (b"late", "\u{2AAD}"), + (b"lates", "\u{2AAD}\u{FE00}"), + (b"lbarr", "\u{290C}"), + (b"lbbrk", "\u{2772}"), + (b"lbrace", "\u{007B}"), + (b"lbrack", "\u{005B}"), + (b"lbrke", "\u{298B}"), + (b"lbrksld", "\u{298F}"), + (b"lbrkslu", "\u{298D}"), + (b"lcaron", "\u{013E}"), + (b"lcedil", "\u{013C}"), + (b"lceil", "\u{2308}"), + (b"lcub", "\u{007B}"), + (b"lcy", "\u{043B}"), + (b"ldca", "\u{2936}"), + (b"ldquo", "\u{201C}"), + (b"ldquor", "\u{201E}"), + (b"ldrdhar", "\u{2967}"), + (b"ldrushar", "\u{294B}"), + (b"ldsh", "\u{21B2}"), + (b"le", "\u{2264}"), + (b"leftarrow", "\u{2190}"), + (b"leftarrowtail", "\u{21A2}"), + (b"leftharpoondown", "\u{21BD}"), + (b"leftharpoonup", "\u{21BC}"), + (b"leftleftarrows", "\u{21C7}"), + (b"leftrightarrow", "\u{2194}"), + (b"leftrightarrows", "\u{21C6}"), + (b"leftrightharpoons", "\u{21CB}"), + (b"leftrightsquigarrow", "\u{21AD}"), + (b"leftthreetimes", "\u{22CB}"), + (b"leg", "\u{22DA}"), + (b"leq", "\u{2264}"), + (b"leqq", "\u{2266}"), + (b"leqslant", "\u{2A7D}"), + (b"les", "\u{2A7D}"), + (b"lescc", "\u{2AA8}"), + (b"lesdot", "\u{2A7F}"), + (b"lesdoto", "\u{2A81}"), + (b"lesdotor", "\u{2A83}"), + (b"lesg", "\u{22DA}\u{FE00}"), + (b"lesges", "\u{2A93}"), + (b"lessapprox", "\u{2A85}"), + (b"lessdot", "\u{22D6}"), + (b"lesseqgtr", "\u{22DA}"), + (b"lesseqqgtr", "\u{2A8B}"), + (b"lessgtr", "\u{2276}"), + (b"lesssim", "\u{2272}"), + (b"lfisht", "\u{297C}"), + (b"lfloor", "\u{230A}"), + (b"lfr", "\u{1D529}"), + (b"lg", "\u{2276}"), + (b"lgE", "\u{2A91}"), + (b"lhard", "\u{21BD}"), + (b"lharu", "\u{21BC}"), + (b"lharul", "\u{296A}"), + (b"lhblk", "\u{2584}"), + (b"ljcy", "\u{0459}"), + (b"ll", "\u{226A}"), + (b"llarr", "\u{21C7}"), + (b"llcorner", "\u{231E}"), + (b"llhard", "\u{296B}"), + (b"lltri", "\u{25FA}"), + (b"lmidot", "\u{0140}"), + (b"lmoust", "\u{23B0}"), + (b"lmoustache", "\u{23B0}"), + (b"lnE", "\u{2268}"), + (b"lnap", "\u{2A89}"), + (b"lnapprox", "\u{2A89}"), + (b"lne", "\u{2A87}"), + (b"lneq", "\u{2A87}"), + (b"lneqq", "\u{2268}"), + (b"lnsim", "\u{22E6}"), + (b"loang", "\u{27EC}"), + (b"loarr", "\u{21FD}"), + (b"lobrk", "\u{27E6}"), + (b"longleftarrow", "\u{27F5}"), + (b"longleftrightarrow", "\u{27F7}"), + (b"longmapsto", "\u{27FC}"), + (b"longrightarrow", "\u{27F6}"), + (b"looparrowleft", "\u{21AB}"), + (b"looparrowright", "\u{21AC}"), + (b"lopar", "\u{2985}"), + (b"lopf", "\u{1D55D}"), + (b"loplus", "\u{2A2D}"), + (b"lotimes", "\u{2A34}"), + (b"lowast", "\u{2217}"), + (b"lowbar", "\u{005F}"), + (b"loz", "\u{25CA}"), + (b"lozenge", "\u{25CA}"), + (b"lozf", "\u{29EB}"), + (b"lpar", "\u{0028}"), + (b"lparlt", "\u{2993}"), + (b"lrarr", "\u{21C6}"), + (b"lrcorner", "\u{231F}"), + (b"lrhar", "\u{21CB}"), + (b"lrhard", "\u{296D}"), + (b"lrm", "\u{200E}"), + (b"lrtri", "\u{22BF}"), + (b"lsaquo", "\u{2039}"), + (b"lscr", "\u{1D4C1}"), + (b"lsh", "\u{21B0}"), + (b"lsim", "\u{2272}"), + (b"lsime", "\u{2A8D}"), + (b"lsimg", "\u{2A8F}"), + (b"lsqb", "\u{005B}"), + (b"lsquo", "\u{2018}"), + (b"lsquor", "\u{201A}"), + (b"lstrok", "\u{0142}"), + (b"lt", "\u{003C}"), + (b"ltcc", "\u{2AA6}"), + (b"ltcir", "\u{2A79}"), + (b"ltdot", "\u{22D6}"), + (b"lthree", "\u{22CB}"), + (b"ltimes", "\u{22C9}"), + (b"ltlarr", "\u{2976}"), + (b"ltquest", "\u{2A7B}"), + (b"ltrPar", "\u{2996}"), + (b"ltri", "\u{25C3}"), + (b"ltrie", "\u{22B4}"), + (b"ltrif", "\u{25C2}"), + (b"lurdshar", "\u{294A}"), + (b"luruhar", "\u{2966}"), + (b"lvertneqq", "\u{2268}\u{FE00}"), + (b"lvnE", "\u{2268}\u{FE00}"), + (b"mDDot", "\u{223A}"), + (b"macr", "\u{00AF}"), + (b"male", "\u{2642}"), + (b"malt", "\u{2720}"), + (b"maltese", "\u{2720}"), + (b"map", "\u{21A6}"), + (b"mapsto", "\u{21A6}"), + (b"mapstodown", "\u{21A7}"), + (b"mapstoleft", "\u{21A4}"), + (b"mapstoup", "\u{21A5}"), + (b"marker", "\u{25AE}"), + (b"mcomma", "\u{2A29}"), + (b"mcy", "\u{043C}"), + (b"mdash", "\u{2014}"), + (b"measuredangle", "\u{2221}"), + (b"mfr", "\u{1D52A}"), + (b"mho", "\u{2127}"), + (b"micro", "\u{00B5}"), + (b"mid", "\u{2223}"), + (b"midast", "\u{002A}"), + (b"midcir", "\u{2AF0}"), + (b"middot", "\u{00B7}"), + (b"minus", "\u{2212}"), + (b"minusb", "\u{229F}"), + (b"minusd", "\u{2238}"), + (b"minusdu", "\u{2A2A}"), + (b"mlcp", "\u{2ADB}"), + (b"mldr", "\u{2026}"), + (b"mnplus", "\u{2213}"), + (b"models", "\u{22A7}"), + (b"mopf", "\u{1D55E}"), + (b"mp", "\u{2213}"), + (b"mscr", "\u{1D4C2}"), + (b"mstpos", "\u{223E}"), + (b"mu", "\u{03BC}"), + (b"multimap", "\u{22B8}"), + (b"mumap", "\u{22B8}"), + (b"nGg", "\u{22D9}\u{0338}"), + (b"nGt", "\u{226B}\u{20D2}"), + (b"nGtv", "\u{226B}\u{0338}"), + (b"nLeftarrow", "\u{21CD}"), + (b"nLeftrightarrow", "\u{21CE}"), + (b"nLl", "\u{22D8}\u{0338}"), + (b"nLt", "\u{226A}\u{20D2}"), + (b"nLtv", "\u{226A}\u{0338}"), + (b"nRightarrow", "\u{21CF}"), + (b"nVDash", "\u{22AF}"), + (b"nVdash", "\u{22AE}"), + (b"nabla", "\u{2207}"), + (b"nacute", "\u{0144}"), + (b"nang", "\u{2220}\u{20D2}"), + (b"nap", "\u{2249}"), + (b"napE", "\u{2A70}\u{0338}"), + (b"napid", "\u{224B}\u{0338}"), + (b"napos", "\u{0149}"), + (b"napprox", "\u{2249}"), + (b"natur", "\u{266E}"), + (b"natural", "\u{266E}"), + (b"naturals", "\u{2115}"), + (b"nbsp", "\u{00A0}"), + (b"nbump", "\u{224E}\u{0338}"), + (b"nbumpe", "\u{224F}\u{0338}"), + (b"ncap", "\u{2A43}"), + (b"ncaron", "\u{0148}"), + (b"ncedil", "\u{0146}"), + (b"ncong", "\u{2247}"), + (b"ncongdot", "\u{2A6D}\u{0338}"), + (b"ncup", "\u{2A42}"), + (b"ncy", "\u{043D}"), + (b"ndash", "\u{2013}"), + (b"ne", "\u{2260}"), + (b"neArr", "\u{21D7}"), + (b"nearhk", "\u{2924}"), + (b"nearr", "\u{2197}"), + (b"nearrow", "\u{2197}"), + (b"nedot", "\u{2250}\u{0338}"), + (b"nequiv", "\u{2262}"), + (b"nesear", "\u{2928}"), + (b"nesim", "\u{2242}\u{0338}"), + (b"nexist", "\u{2204}"), + (b"nexists", "\u{2204}"), + (b"nfr", "\u{1D52B}"), + (b"ngE", "\u{2267}\u{0338}"), + (b"nge", "\u{2271}"), + (b"ngeq", "\u{2271}"), + (b"ngeqq", "\u{2267}\u{0338}"), + (b"ngeqslant", "\u{2A7E}\u{0338}"), + (b"nges", "\u{2A7E}\u{0338}"), + (b"ngsim", "\u{2275}"), + (b"ngt", "\u{226F}"), + (b"ngtr", "\u{226F}"), + (b"nhArr", "\u{21CE}"), + (b"nharr", "\u{21AE}"), + (b"nhpar", "\u{2AF2}"), + (b"ni", "\u{220B}"), + (b"nis", "\u{22FC}"), + (b"nisd", "\u{22FA}"), + (b"niv", "\u{220B}"), + (b"njcy", "\u{045A}"), + (b"nlArr", "\u{21CD}"), + (b"nlE", "\u{2266}\u{0338}"), + (b"nlarr", "\u{219A}"), + (b"nldr", "\u{2025}"), + (b"nle", "\u{2270}"), + (b"nleftarrow", "\u{219A}"), + (b"nleftrightarrow", "\u{21AE}"), + (b"nleq", "\u{2270}"), + (b"nleqq", "\u{2266}\u{0338}"), + (b"nleqslant", "\u{2A7D}\u{0338}"), + (b"nles", "\u{2A7D}\u{0338}"), + (b"nless", "\u{226E}"), + (b"nlsim", "\u{2274}"), + (b"nlt", "\u{226E}"), + (b"nltri", "\u{22EA}"), + (b"nltrie", "\u{22EC}"), + (b"nmid", "\u{2224}"), + (b"nopf", "\u{1D55F}"), + (b"not", "\u{00AC}"), + (b"notin", "\u{2209}"), + (b"notinE", "\u{22F9}\u{0338}"), + (b"notindot", "\u{22F5}\u{0338}"), + (b"notinva", "\u{2209}"), + (b"notinvb", "\u{22F7}"), + (b"notinvc", "\u{22F6}"), + (b"notni", "\u{220C}"), + (b"notniva", "\u{220C}"), + (b"notnivb", "\u{22FE}"), + (b"notnivc", "\u{22FD}"), + (b"npar", "\u{2226}"), + (b"nparallel", "\u{2226}"), + (b"nparsl", "\u{2AFD}\u{20E5}"), + (b"npart", "\u{2202}\u{0338}"), + (b"npolint", "\u{2A14}"), + (b"npr", "\u{2280}"), + (b"nprcue", "\u{22E0}"), + (b"npre", "\u{2AAF}\u{0338}"), + (b"nprec", "\u{2280}"), + (b"npreceq", "\u{2AAF}\u{0338}"), + (b"nrArr", "\u{21CF}"), + (b"nrarr", "\u{219B}"), + (b"nrarrc", "\u{2933}\u{0338}"), + (b"nrarrw", "\u{219D}\u{0338}"), + (b"nrightarrow", "\u{219B}"), + (b"nrtri", "\u{22EB}"), + (b"nrtrie", "\u{22ED}"), + (b"nsc", "\u{2281}"), + (b"nsccue", "\u{22E1}"), + (b"nsce", "\u{2AB0}\u{0338}"), + (b"nscr", "\u{1D4C3}"), + (b"nshortmid", "\u{2224}"), + (b"nshortparallel", "\u{2226}"), + (b"nsim", "\u{2241}"), + (b"nsime", "\u{2244}"), + (b"nsimeq", "\u{2244}"), + (b"nsmid", "\u{2224}"), + (b"nspar", "\u{2226}"), + (b"nsqsube", "\u{22E2}"), + (b"nsqsupe", "\u{22E3}"), + (b"nsub", "\u{2284}"), + (b"nsubE", "\u{2AC5}\u{0338}"), + (b"nsube", "\u{2288}"), + (b"nsubset", "\u{2282}\u{20D2}"), + (b"nsubseteq", "\u{2288}"), + (b"nsubseteqq", "\u{2AC5}\u{0338}"), + (b"nsucc", "\u{2281}"), + (b"nsucceq", "\u{2AB0}\u{0338}"), + (b"nsup", "\u{2285}"), + (b"nsupE", "\u{2AC6}\u{0338}"), + (b"nsupe", "\u{2289}"), + (b"nsupset", "\u{2283}\u{20D2}"), + (b"nsupseteq", "\u{2289}"), + (b"nsupseteqq", "\u{2AC6}\u{0338}"), + (b"ntgl", "\u{2279}"), + (b"ntilde", "\u{00F1}"), + (b"ntlg", "\u{2278}"), + (b"ntriangleleft", "\u{22EA}"), + (b"ntrianglelefteq", "\u{22EC}"), + (b"ntriangleright", "\u{22EB}"), + (b"ntrianglerighteq", "\u{22ED}"), + (b"nu", "\u{03BD}"), + (b"num", "\u{0023}"), + (b"numero", "\u{2116}"), + (b"numsp", "\u{2007}"), + (b"nvDash", "\u{22AD}"), + (b"nvHarr", "\u{2904}"), + (b"nvap", "\u{224D}\u{20D2}"), + (b"nvdash", "\u{22AC}"), + (b"nvge", "\u{2265}\u{20D2}"), + (b"nvgt", "\u{003E}\u{20D2}"), + (b"nvinfin", "\u{29DE}"), + (b"nvlArr", "\u{2902}"), + (b"nvle", "\u{2264}\u{20D2}"), + (b"nvlt", "\u{003C}\u{20D2}"), + (b"nvltrie", "\u{22B4}\u{20D2}"), + (b"nvrArr", "\u{2903}"), + (b"nvrtrie", "\u{22B5}\u{20D2}"), + (b"nvsim", "\u{223C}\u{20D2}"), + (b"nwArr", "\u{21D6}"), + (b"nwarhk", "\u{2923}"), + (b"nwarr", "\u{2196}"), + (b"nwarrow", "\u{2196}"), + (b"nwnear", "\u{2927}"), + (b"oS", "\u{24C8}"), + (b"oacute", "\u{00F3}"), + (b"oast", "\u{229B}"), + (b"ocir", "\u{229A}"), + (b"ocirc", "\u{00F4}"), + (b"ocy", "\u{043E}"), + (b"odash", "\u{229D}"), + (b"odblac", "\u{0151}"), + (b"odiv", "\u{2A38}"), + (b"odot", "\u{2299}"), + (b"odsold", "\u{29BC}"), + (b"oelig", "\u{0153}"), + (b"ofcir", "\u{29BF}"), + (b"ofr", "\u{1D52C}"), + (b"ogon", "\u{02DB}"), + (b"ograve", "\u{00F2}"), + (b"ogt", "\u{29C1}"), + (b"ohbar", "\u{29B5}"), + (b"ohm", "\u{03A9}"), + (b"oint", "\u{222E}"), + (b"olarr", "\u{21BA}"), + (b"olcir", "\u{29BE}"), + (b"olcross", "\u{29BB}"), + (b"oline", "\u{203E}"), + (b"olt", "\u{29C0}"), + (b"omacr", "\u{014D}"), + (b"omega", "\u{03C9}"), + (b"omicron", "\u{03BF}"), + (b"omid", "\u{29B6}"), + (b"ominus", "\u{2296}"), + (b"oopf", "\u{1D560}"), + (b"opar", "\u{29B7}"), + (b"operp", "\u{29B9}"), + (b"oplus", "\u{2295}"), + (b"or", "\u{2228}"), + (b"orarr", "\u{21BB}"), + (b"ord", "\u{2A5D}"), + (b"order", "\u{2134}"), + (b"orderof", "\u{2134}"), + (b"ordf", "\u{00AA}"), + (b"ordm", "\u{00BA}"), + (b"origof", "\u{22B6}"), + (b"oror", "\u{2A56}"), + (b"orslope", "\u{2A57}"), + (b"orv", "\u{2A5B}"), + (b"oscr", "\u{2134}"), + (b"oslash", "\u{00F8}"), + (b"osol", "\u{2298}"), + (b"otilde", "\u{00F5}"), + (b"otimes", "\u{2297}"), + (b"otimesas", "\u{2A36}"), + (b"ouml", "\u{00F6}"), + (b"ovbar", "\u{233D}"), + (b"par", "\u{2225}"), + (b"para", "\u{00B6}"), + (b"parallel", "\u{2225}"), + (b"parsim", "\u{2AF3}"), + (b"parsl", "\u{2AFD}"), + (b"part", "\u{2202}"), + (b"pcy", "\u{043F}"), + (b"percnt", "\u{0025}"), + (b"period", "\u{002E}"), + (b"permil", "\u{2030}"), + (b"perp", "\u{22A5}"), + (b"pertenk", "\u{2031}"), + (b"pfr", "\u{1D52D}"), + (b"phi", "\u{03C6}"), + (b"phiv", "\u{03D5}"), + (b"phmmat", "\u{2133}"), + (b"phone", "\u{260E}"), + (b"pi", "\u{03C0}"), + (b"pitchfork", "\u{22D4}"), + (b"piv", "\u{03D6}"), + (b"planck", "\u{210F}"), + (b"planckh", "\u{210E}"), + (b"plankv", "\u{210F}"), + (b"plus", "\u{002B}"), + (b"plusacir", "\u{2A23}"), + (b"plusb", "\u{229E}"), + (b"pluscir", "\u{2A22}"), + (b"plusdo", "\u{2214}"), + (b"plusdu", "\u{2A25}"), + (b"pluse", "\u{2A72}"), + (b"plusmn", "\u{00B1}"), + (b"plussim", "\u{2A26}"), + (b"plustwo", "\u{2A27}"), + (b"pm", "\u{00B1}"), + (b"pointint", "\u{2A15}"), + (b"popf", "\u{1D561}"), + (b"pound", "\u{00A3}"), + (b"pr", "\u{227A}"), + (b"prE", "\u{2AB3}"), + (b"prap", "\u{2AB7}"), + (b"prcue", "\u{227C}"), + (b"pre", "\u{2AAF}"), + (b"prec", "\u{227A}"), + (b"precapprox", "\u{2AB7}"), + (b"preccurlyeq", "\u{227C}"), + (b"preceq", "\u{2AAF}"), + (b"precnapprox", "\u{2AB9}"), + (b"precneqq", "\u{2AB5}"), + (b"precnsim", "\u{22E8}"), + (b"precsim", "\u{227E}"), + (b"prime", "\u{2032}"), + (b"primes", "\u{2119}"), + (b"prnE", "\u{2AB5}"), + (b"prnap", "\u{2AB9}"), + (b"prnsim", "\u{22E8}"), + (b"prod", "\u{220F}"), + (b"profalar", "\u{232E}"), + (b"profline", "\u{2312}"), + (b"profsurf", "\u{2313}"), + (b"prop", "\u{221D}"), + (b"propto", "\u{221D}"), + (b"prsim", "\u{227E}"), + (b"prurel", "\u{22B0}"), + (b"pscr", "\u{1D4C5}"), + (b"psi", "\u{03C8}"), + (b"puncsp", "\u{2008}"), + (b"qfr", "\u{1D52E}"), + (b"qint", "\u{2A0C}"), + (b"qopf", "\u{1D562}"), + (b"qprime", "\u{2057}"), + (b"qscr", "\u{1D4C6}"), + (b"quaternions", "\u{210D}"), + (b"quatint", "\u{2A16}"), + (b"quest", "\u{003F}"), + (b"questeq", "\u{225F}"), + (b"quot", "\u{0022}"), + (b"rAarr", "\u{21DB}"), + (b"rArr", "\u{21D2}"), + (b"rAtail", "\u{291C}"), + (b"rBarr", "\u{290F}"), + (b"rHar", "\u{2964}"), + (b"race", "\u{223D}\u{0331}"), + (b"racute", "\u{0155}"), + (b"radic", "\u{221A}"), + (b"raemptyv", "\u{29B3}"), + (b"rang", "\u{27E9}"), + (b"rangd", "\u{2992}"), + (b"range", "\u{29A5}"), + (b"rangle", "\u{27E9}"), + (b"raquo", "\u{00BB}"), + (b"rarr", "\u{2192}"), + (b"rarrap", "\u{2975}"), + (b"rarrb", "\u{21E5}"), + (b"rarrbfs", "\u{2920}"), + (b"rarrc", "\u{2933}"), + (b"rarrfs", "\u{291E}"), + (b"rarrhk", "\u{21AA}"), + (b"rarrlp", "\u{21AC}"), + (b"rarrpl", "\u{2945}"), + (b"rarrsim", "\u{2974}"), + (b"rarrtl", "\u{21A3}"), + (b"rarrw", "\u{219D}"), + (b"ratail", "\u{291A}"), + (b"ratio", "\u{2236}"), + (b"rationals", "\u{211A}"), + (b"rbarr", "\u{290D}"), + (b"rbbrk", "\u{2773}"), + (b"rbrace", "\u{007D}"), + (b"rbrack", "\u{005D}"), + (b"rbrke", "\u{298C}"), + (b"rbrksld", "\u{298E}"), + (b"rbrkslu", "\u{2990}"), + (b"rcaron", "\u{0159}"), + (b"rcedil", "\u{0157}"), + (b"rceil", "\u{2309}"), + (b"rcub", "\u{007D}"), + (b"rcy", "\u{0440}"), + (b"rdca", "\u{2937}"), + (b"rdldhar", "\u{2969}"), + (b"rdquo", "\u{201D}"), + (b"rdquor", "\u{201D}"), + (b"rdsh", "\u{21B3}"), + (b"real", "\u{211C}"), + (b"realine", "\u{211B}"), + (b"realpart", "\u{211C}"), + (b"reals", "\u{211D}"), + (b"rect", "\u{25AD}"), + (b"reg", "\u{00AE}"), + (b"rfisht", "\u{297D}"), + (b"rfloor", "\u{230B}"), + (b"rfr", "\u{1D52F}"), + (b"rhard", "\u{21C1}"), + (b"rharu", "\u{21C0}"), + (b"rharul", "\u{296C}"), + (b"rho", "\u{03C1}"), + (b"rhov", "\u{03F1}"), + (b"rightarrow", "\u{2192}"), + (b"rightarrowtail", "\u{21A3}"), + (b"rightharpoondown", "\u{21C1}"), + (b"rightharpoonup", "\u{21C0}"), + (b"rightleftarrows", "\u{21C4}"), + (b"rightleftharpoons", "\u{21CC}"), + (b"rightrightarrows", "\u{21C9}"), + (b"rightsquigarrow", "\u{219D}"), + (b"rightthreetimes", "\u{22CC}"), + (b"ring", "\u{02DA}"), + (b"risingdotseq", "\u{2253}"), + (b"rlarr", "\u{21C4}"), + (b"rlhar", "\u{21CC}"), + (b"rlm", "\u{200F}"), + (b"rmoust", "\u{23B1}"), + (b"rmoustache", "\u{23B1}"), + (b"rnmid", "\u{2AEE}"), + (b"roang", "\u{27ED}"), + (b"roarr", "\u{21FE}"), + (b"robrk", "\u{27E7}"), + (b"ropar", "\u{2986}"), + (b"ropf", "\u{1D563}"), + (b"roplus", "\u{2A2E}"), + (b"rotimes", "\u{2A35}"), + (b"rpar", "\u{0029}"), + (b"rpargt", "\u{2994}"), + (b"rppolint", "\u{2A12}"), + (b"rrarr", "\u{21C9}"), + (b"rsaquo", "\u{203A}"), + (b"rscr", "\u{1D4C7}"), + (b"rsh", "\u{21B1}"), + (b"rsqb", "\u{005D}"), + (b"rsquo", "\u{2019}"), + (b"rsquor", "\u{2019}"), + (b"rthree", "\u{22CC}"), + (b"rtimes", "\u{22CA}"), + (b"rtri", "\u{25B9}"), + (b"rtrie", "\u{22B5}"), + (b"rtrif", "\u{25B8}"), + (b"rtriltri", "\u{29CE}"), + (b"ruluhar", "\u{2968}"), + (b"rx", "\u{211E}"), + (b"sacute", "\u{015B}"), + (b"sbquo", "\u{201A}"), + (b"sc", "\u{227B}"), + (b"scE", "\u{2AB4}"), + (b"scap", "\u{2AB8}"), + (b"scaron", "\u{0161}"), + (b"sccue", "\u{227D}"), + (b"sce", "\u{2AB0}"), + (b"scedil", "\u{015F}"), + (b"scirc", "\u{015D}"), + (b"scnE", "\u{2AB6}"), + (b"scnap", "\u{2ABA}"), + (b"scnsim", "\u{22E9}"), + (b"scpolint", "\u{2A13}"), + (b"scsim", "\u{227F}"), + (b"scy", "\u{0441}"), + (b"sdot", "\u{22C5}"), + (b"sdotb", "\u{22A1}"), + (b"sdote", "\u{2A66}"), + (b"seArr", "\u{21D8}"), + (b"searhk", "\u{2925}"), + (b"searr", "\u{2198}"), + (b"searrow", "\u{2198}"), + (b"sect", "\u{00A7}"), + (b"semi", "\u{003B}"), + (b"seswar", "\u{2929}"), + (b"setminus", "\u{2216}"), + (b"setmn", "\u{2216}"), + (b"sext", "\u{2736}"), + (b"sfr", "\u{1D530}"), + (b"sfrown", "\u{2322}"), + (b"sharp", "\u{266F}"), + (b"shchcy", "\u{0449}"), + (b"shcy", "\u{0448}"), + (b"shortmid", "\u{2223}"), + (b"shortparallel", "\u{2225}"), + (b"shy", "\u{00AD}"), + (b"sigma", "\u{03C3}"), + (b"sigmaf", "\u{03C2}"), + (b"sigmav", "\u{03C2}"), + (b"sim", "\u{223C}"), + (b"simdot", "\u{2A6A}"), + (b"sime", "\u{2243}"), + (b"simeq", "\u{2243}"), + (b"simg", "\u{2A9E}"), + (b"simgE", "\u{2AA0}"), + (b"siml", "\u{2A9D}"), + (b"simlE", "\u{2A9F}"), + (b"simne", "\u{2246}"), + (b"simplus", "\u{2A24}"), + (b"simrarr", "\u{2972}"), + (b"slarr", "\u{2190}"), + (b"smallsetminus", "\u{2216}"), + (b"smashp", "\u{2A33}"), + (b"smeparsl", "\u{29E4}"), + (b"smid", "\u{2223}"), + (b"smile", "\u{2323}"), + (b"smt", "\u{2AAA}"), + (b"smte", "\u{2AAC}"), + (b"smtes", "\u{2AAC}\u{FE00}"), + (b"softcy", "\u{044C}"), + (b"sol", "\u{002F}"), + (b"solb", "\u{29C4}"), + (b"solbar", "\u{233F}"), + (b"sopf", "\u{1D564}"), + (b"spades", "\u{2660}"), + (b"spadesuit", "\u{2660}"), + (b"spar", "\u{2225}"), + (b"sqcap", "\u{2293}"), + (b"sqcaps", "\u{2293}\u{FE00}"), + (b"sqcup", "\u{2294}"), + (b"sqcups", "\u{2294}\u{FE00}"), + (b"sqsub", "\u{228F}"), + (b"sqsube", "\u{2291}"), + (b"sqsubset", "\u{228F}"), + (b"sqsubseteq", "\u{2291}"), + (b"sqsup", "\u{2290}"), + (b"sqsupe", "\u{2292}"), + (b"sqsupset", "\u{2290}"), + (b"sqsupseteq", "\u{2292}"), + (b"squ", "\u{25A1}"), + (b"square", "\u{25A1}"), + (b"squarf", "\u{25AA}"), + (b"squf", "\u{25AA}"), + (b"srarr", "\u{2192}"), + (b"sscr", "\u{1D4C8}"), + (b"ssetmn", "\u{2216}"), + (b"ssmile", "\u{2323}"), + (b"sstarf", "\u{22C6}"), + (b"star", "\u{2606}"), + (b"starf", "\u{2605}"), + (b"straightepsilon", "\u{03F5}"), + (b"straightphi", "\u{03D5}"), + (b"strns", "\u{00AF}"), + (b"sub", "\u{2282}"), + (b"subE", "\u{2AC5}"), + (b"subdot", "\u{2ABD}"), + (b"sube", "\u{2286}"), + (b"subedot", "\u{2AC3}"), + (b"submult", "\u{2AC1}"), + (b"subnE", "\u{2ACB}"), + (b"subne", "\u{228A}"), + (b"subplus", "\u{2ABF}"), + (b"subrarr", "\u{2979}"), + (b"subset", "\u{2282}"), + (b"subseteq", "\u{2286}"), + (b"subseteqq", "\u{2AC5}"), + (b"subsetneq", "\u{228A}"), + (b"subsetneqq", "\u{2ACB}"), + (b"subsim", "\u{2AC7}"), + (b"subsub", "\u{2AD5}"), + (b"subsup", "\u{2AD3}"), + (b"succ", "\u{227B}"), + (b"succapprox", "\u{2AB8}"), + (b"succcurlyeq", "\u{227D}"), + (b"succeq", "\u{2AB0}"), + (b"succnapprox", "\u{2ABA}"), + (b"succneqq", "\u{2AB6}"), + (b"succnsim", "\u{22E9}"), + (b"succsim", "\u{227F}"), + (b"sum", "\u{2211}"), + (b"sung", "\u{266A}"), + (b"sup", "\u{2283}"), + (b"sup1", "\u{00B9}"), + (b"sup2", "\u{00B2}"), + (b"sup3", "\u{00B3}"), + (b"supE", "\u{2AC6}"), + (b"supdot", "\u{2ABE}"), + (b"supdsub", "\u{2AD8}"), + (b"supe", "\u{2287}"), + (b"supedot", "\u{2AC4}"), + (b"suphsol", "\u{27C9}"), + (b"suphsub", "\u{2AD7}"), + (b"suplarr", "\u{297B}"), + (b"supmult", "\u{2AC2}"), + (b"supnE", "\u{2ACC}"), + (b"supne", "\u{228B}"), + (b"supplus", "\u{2AC0}"), + (b"supset", "\u{2283}"), + (b"supseteq", "\u{2287}"), + (b"supseteqq", "\u{2AC6}"), + (b"supsetneq", "\u{228B}"), + (b"supsetneqq", "\u{2ACC}"), + (b"supsim", "\u{2AC8}"), + (b"supsub", "\u{2AD4}"), + (b"supsup", "\u{2AD6}"), + (b"swArr", "\u{21D9}"), + (b"swarhk", "\u{2926}"), + (b"swarr", "\u{2199}"), + (b"swarrow", "\u{2199}"), + (b"swnwar", "\u{292A}"), + (b"szlig", "\u{00DF}"), + (b"target", "\u{2316}"), + (b"tau", "\u{03C4}"), + (b"tbrk", "\u{23B4}"), + (b"tcaron", "\u{0165}"), + (b"tcedil", "\u{0163}"), + (b"tcy", "\u{0442}"), + (b"tdot", "\u{20DB}"), + (b"telrec", "\u{2315}"), + (b"tfr", "\u{1D531}"), + (b"there4", "\u{2234}"), + (b"therefore", "\u{2234}"), + (b"theta", "\u{03B8}"), + (b"thetasym", "\u{03D1}"), + (b"thetav", "\u{03D1}"), + (b"thickapprox", "\u{2248}"), + (b"thicksim", "\u{223C}"), + (b"thinsp", "\u{2009}"), + (b"thkap", "\u{2248}"), + (b"thksim", "\u{223C}"), + (b"thorn", "\u{00FE}"), + (b"tilde", "\u{02DC}"), + (b"times", "\u{00D7}"), + (b"timesb", "\u{22A0}"), + (b"timesbar", "\u{2A31}"), + (b"timesd", "\u{2A30}"), + (b"tint", "\u{222D}"), + (b"toea", "\u{2928}"), + (b"top", "\u{22A4}"), + (b"topbot", "\u{2336}"), + (b"topcir", "\u{2AF1}"), + (b"topf", "\u{1D565}"), + (b"topfork", "\u{2ADA}"), + (b"tosa", "\u{2929}"), + (b"tprime", "\u{2034}"), + (b"trade", "\u{2122}"), + (b"triangle", "\u{25B5}"), + (b"triangledown", "\u{25BF}"), + (b"triangleleft", "\u{25C3}"), + (b"trianglelefteq", "\u{22B4}"), + (b"triangleq", "\u{225C}"), + (b"triangleright", "\u{25B9}"), + (b"trianglerighteq", "\u{22B5}"), + (b"tridot", "\u{25EC}"), + (b"trie", "\u{225C}"), + (b"triminus", "\u{2A3A}"), + (b"triplus", "\u{2A39}"), + (b"trisb", "\u{29CD}"), + (b"tritime", "\u{2A3B}"), + (b"trpezium", "\u{23E2}"), + (b"tscr", "\u{1D4C9}"), + (b"tscy", "\u{0446}"), + (b"tshcy", "\u{045B}"), + (b"tstrok", "\u{0167}"), + (b"twixt", "\u{226C}"), + (b"twoheadleftarrow", "\u{219E}"), + (b"twoheadrightarrow", "\u{21A0}"), + (b"uArr", "\u{21D1}"), + (b"uHar", "\u{2963}"), + (b"uacute", "\u{00FA}"), + (b"uarr", "\u{2191}"), + (b"ubrcy", "\u{045E}"), + (b"ubreve", "\u{016D}"), + (b"ucirc", "\u{00FB}"), + (b"ucy", "\u{0443}"), + (b"udarr", "\u{21C5}"), + (b"udblac", "\u{0171}"), + (b"udhar", "\u{296E}"), + (b"ufisht", "\u{297E}"), + (b"ufr", "\u{1D532}"), + (b"ugrave", "\u{00F9}"), + (b"uharl", "\u{21BF}"), + (b"uharr", "\u{21BE}"), + (b"uhblk", "\u{2580}"), + (b"ulcorn", "\u{231C}"), + (b"ulcorner", "\u{231C}"), + (b"ulcrop", "\u{230F}"), + (b"ultri", "\u{25F8}"), + (b"umacr", "\u{016B}"), + (b"uml", "\u{00A8}"), + (b"uogon", "\u{0173}"), + (b"uopf", "\u{1D566}"), + (b"uparrow", "\u{2191}"), + (b"updownarrow", "\u{2195}"), + (b"upharpoonleft", "\u{21BF}"), + (b"upharpoonright", "\u{21BE}"), + (b"uplus", "\u{228E}"), + (b"upsi", "\u{03C5}"), + (b"upsih", "\u{03D2}"), + (b"upsilon", "\u{03C5}"), + (b"upuparrows", "\u{21C8}"), + (b"urcorn", "\u{231D}"), + (b"urcorner", "\u{231D}"), + (b"urcrop", "\u{230E}"), + (b"uring", "\u{016F}"), + (b"urtri", "\u{25F9}"), + (b"uscr", "\u{1D4CA}"), + (b"utdot", "\u{22F0}"), + (b"utilde", "\u{0169}"), + (b"utri", "\u{25B5}"), + (b"utrif", "\u{25B4}"), + (b"uuarr", "\u{21C8}"), + (b"uuml", "\u{00FC}"), + (b"uwangle", "\u{29A7}"), + (b"vArr", "\u{21D5}"), + (b"vBar", "\u{2AE8}"), + (b"vBarv", "\u{2AE9}"), + (b"vDash", "\u{22A8}"), + (b"vangrt", "\u{299C}"), + (b"varepsilon", "\u{03F5}"), + (b"varkappa", "\u{03F0}"), + (b"varnothing", "\u{2205}"), + (b"varphi", "\u{03D5}"), + (b"varpi", "\u{03D6}"), + (b"varpropto", "\u{221D}"), + (b"varr", "\u{2195}"), + (b"varrho", "\u{03F1}"), + (b"varsigma", "\u{03C2}"), + (b"varsubsetneq", "\u{228A}\u{FE00}"), + (b"varsubsetneqq", "\u{2ACB}\u{FE00}"), + (b"varsupsetneq", "\u{228B}\u{FE00}"), + (b"varsupsetneqq", "\u{2ACC}\u{FE00}"), + (b"vartheta", "\u{03D1}"), + (b"vartriangleleft", "\u{22B2}"), + (b"vartriangleright", "\u{22B3}"), + (b"vcy", "\u{0432}"), + (b"vdash", "\u{22A2}"), + (b"vee", "\u{2228}"), + (b"veebar", "\u{22BB}"), + (b"veeeq", "\u{225A}"), + (b"vellip", "\u{22EE}"), + (b"verbar", "\u{007C}"), + (b"vert", "\u{007C}"), + (b"vfr", "\u{1D533}"), + (b"vltri", "\u{22B2}"), + (b"vnsub", "\u{2282}\u{20D2}"), + (b"vnsup", "\u{2283}\u{20D2}"), + (b"vopf", "\u{1D567}"), + (b"vprop", "\u{221D}"), + (b"vrtri", "\u{22B3}"), + (b"vscr", "\u{1D4CB}"), + (b"vsubnE", "\u{2ACB}\u{FE00}"), + (b"vsubne", "\u{228A}\u{FE00}"), + (b"vsupnE", "\u{2ACC}\u{FE00}"), + (b"vsupne", "\u{228B}\u{FE00}"), + (b"vzigzag", "\u{299A}"), + (b"wcirc", "\u{0175}"), + (b"wedbar", "\u{2A5F}"), + (b"wedge", "\u{2227}"), + (b"wedgeq", "\u{2259}"), + (b"weierp", "\u{2118}"), + (b"wfr", "\u{1D534}"), + (b"wopf", "\u{1D568}"), + (b"wp", "\u{2118}"), + (b"wr", "\u{2240}"), + (b"wreath", "\u{2240}"), + (b"wscr", "\u{1D4CC}"), + (b"xcap", "\u{22C2}"), + (b"xcirc", "\u{25EF}"), + (b"xcup", "\u{22C3}"), + (b"xdtri", "\u{25BD}"), + (b"xfr", "\u{1D535}"), + (b"xhArr", "\u{27FA}"), + (b"xharr", "\u{27F7}"), + (b"xi", "\u{03BE}"), + (b"xlArr", "\u{27F8}"), + (b"xlarr", "\u{27F5}"), + (b"xmap", "\u{27FC}"), + (b"xnis", "\u{22FB}"), + (b"xodot", "\u{2A00}"), + (b"xopf", "\u{1D569}"), + (b"xoplus", "\u{2A01}"), + (b"xotime", "\u{2A02}"), + (b"xrArr", "\u{27F9}"), + (b"xrarr", "\u{27F6}"), + (b"xscr", "\u{1D4CD}"), + (b"xsqcup", "\u{2A06}"), + (b"xuplus", "\u{2A04}"), + (b"xutri", "\u{25B3}"), + (b"xvee", "\u{22C1}"), + (b"xwedge", "\u{22C0}"), + (b"yacute", "\u{00FD}"), + (b"yacy", "\u{044F}"), + (b"ycirc", "\u{0177}"), + (b"ycy", "\u{044B}"), + (b"yen", "\u{00A5}"), + (b"yfr", "\u{1D536}"), + (b"yicy", "\u{0457}"), + (b"yopf", "\u{1D56A}"), + (b"yscr", "\u{1D4CE}"), + (b"yucy", "\u{044E}"), + (b"yuml", "\u{00FF}"), + (b"zacute", "\u{017A}"), + (b"zcaron", "\u{017E}"), + (b"zcy", "\u{0437}"), + (b"zdot", "\u{017C}"), + (b"zeetrf", "\u{2128}"), + (b"zeta", "\u{03B6}"), + (b"zfr", "\u{1D537}"), + (b"zhcy", "\u{0436}"), + (b"zigrarr", "\u{21DD}"), + (b"zopf", "\u{1D56B}"), + (b"zscr", "\u{1D4CF}"), + (b"zwj", "\u{200D}"), + (b"zwnj", "\u{200C}"), +]; + +pub(crate) fn get_entity(bytes: &[u8]) -> Option<&'static str> { + ENTITIES + .binary_search_by_key(&bytes, |&(key, _value)| key) + .ok() + .map(|i| ENTITIES[i].1) +} diff --git a/vendor/pulldown-cmark/src/escape.rs b/vendor/pulldown-cmark/src/escape.rs new file mode 100644 index 000000000..3e5c224e7 --- /dev/null +++ b/vendor/pulldown-cmark/src/escape.rs @@ -0,0 +1,368 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +//! Utility functions for HTML escaping. Only useful when building your own +//! HTML renderer. + +use std::fmt::{Arguments, Write as FmtWrite}; +use std::io::{self, ErrorKind, Write}; +use std::str::from_utf8; + +#[rustfmt::skip] +static HREF_SAFE: [u8; 128] = [ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, +]; + +static HEX_CHARS: &[u8] = b"0123456789ABCDEF"; +static AMP_ESCAPE: &str = "&"; +static SINGLE_QUOTE_ESCAPE: &str = "'"; + +/// This wrapper exists because we can't have both a blanket implementation +/// for all types implementing `Write` and types of the for `&mut W` where +/// `W: StrWrite`. Since we need the latter a lot, we choose to wrap +/// `Write` types. +pub struct WriteWrapper<W>(pub W); + +/// Trait that allows writing string slices. This is basically an extension +/// of `std::io::Write` in order to include `String`. +pub trait StrWrite { + fn write_str(&mut self, s: &str) -> io::Result<()>; + + fn write_fmt(&mut self, args: Arguments) -> io::Result<()>; +} + +impl<W> StrWrite for WriteWrapper<W> +where + W: Write, +{ + #[inline] + fn write_str(&mut self, s: &str) -> io::Result<()> { + self.0.write_all(s.as_bytes()) + } + + #[inline] + fn write_fmt(&mut self, args: Arguments) -> io::Result<()> { + self.0.write_fmt(args) + } +} + +impl<'w> StrWrite for String { + #[inline] + fn write_str(&mut self, s: &str) -> io::Result<()> { + self.push_str(s); + Ok(()) + } + + #[inline] + fn write_fmt(&mut self, args: Arguments) -> io::Result<()> { + // FIXME: translate fmt error to io error? + FmtWrite::write_fmt(self, args).map_err(|_| ErrorKind::Other.into()) + } +} + +impl<W> StrWrite for &'_ mut W +where + W: StrWrite, +{ + #[inline] + fn write_str(&mut self, s: &str) -> io::Result<()> { + (**self).write_str(s) + } + + #[inline] + fn write_fmt(&mut self, args: Arguments) -> io::Result<()> { + (**self).write_fmt(args) + } +} + +/// Writes an href to the buffer, escaping href unsafe bytes. +pub fn escape_href<W>(mut w: W, s: &str) -> io::Result<()> +where + W: StrWrite, +{ + let bytes = s.as_bytes(); + let mut mark = 0; + for i in 0..bytes.len() { + let c = bytes[i]; + if c >= 0x80 || HREF_SAFE[c as usize] == 0 { + // character needing escape + + // write partial substring up to mark + if mark < i { + w.write_str(&s[mark..i])?; + } + match c { + b'&' => { + w.write_str(AMP_ESCAPE)?; + } + b'\'' => { + w.write_str(SINGLE_QUOTE_ESCAPE)?; + } + _ => { + let mut buf = [0u8; 3]; + buf[0] = b'%'; + buf[1] = HEX_CHARS[((c as usize) >> 4) & 0xF]; + buf[2] = HEX_CHARS[(c as usize) & 0xF]; + let escaped = from_utf8(&buf).unwrap(); + w.write_str(escaped)?; + } + } + mark = i + 1; // all escaped characters are ASCII + } + } + w.write_str(&s[mark..]) +} + +const fn create_html_escape_table() -> [u8; 256] { + let mut table = [0; 256]; + table[b'"' as usize] = 1; + table[b'&' as usize] = 2; + table[b'<' as usize] = 3; + table[b'>' as usize] = 4; + table +} + +static HTML_ESCAPE_TABLE: [u8; 256] = create_html_escape_table(); + +static HTML_ESCAPES: [&str; 5] = ["", """, "&", "<", ">"]; + +/// Writes the given string to the Write sink, replacing special HTML bytes +/// (<, >, &, ") by escape sequences. +pub fn escape_html<W: StrWrite>(w: W, s: &str) -> io::Result<()> { + #[cfg(all(target_arch = "x86_64", feature = "simd"))] + { + simd::escape_html(w, s) + } + #[cfg(not(all(target_arch = "x86_64", feature = "simd")))] + { + escape_html_scalar(w, s) + } +} + +fn escape_html_scalar<W: StrWrite>(mut w: W, s: &str) -> io::Result<()> { + let bytes = s.as_bytes(); + let mut mark = 0; + let mut i = 0; + while i < s.len() { + match bytes[i..] + .iter() + .position(|&c| HTML_ESCAPE_TABLE[c as usize] != 0) + { + Some(pos) => { + i += pos; + } + None => break, + } + let c = bytes[i]; + let escape = HTML_ESCAPE_TABLE[c as usize]; + let escape_seq = HTML_ESCAPES[escape as usize]; + w.write_str(&s[mark..i])?; + w.write_str(escape_seq)?; + i += 1; + mark = i; // all escaped characters are ASCII + } + w.write_str(&s[mark..]) +} + +#[cfg(all(target_arch = "x86_64", feature = "simd"))] +mod simd { + use super::StrWrite; + use std::arch::x86_64::*; + use std::io; + use std::mem::size_of; + + const VECTOR_SIZE: usize = size_of::<__m128i>(); + + pub(super) fn escape_html<W: StrWrite>(mut w: W, s: &str) -> io::Result<()> { + // The SIMD accelerated code uses the PSHUFB instruction, which is part + // of the SSSE3 instruction set. Further, we can only use this code if + // the buffer is at least one VECTOR_SIZE in length to prevent reading + // out of bounds. If either of these conditions is not met, we fall back + // to scalar code. + if is_x86_feature_detected!("ssse3") && s.len() >= VECTOR_SIZE { + let bytes = s.as_bytes(); + let mut mark = 0; + + unsafe { + foreach_special_simd(bytes, 0, |i| { + let escape_ix = *bytes.get_unchecked(i) as usize; + let replacement = + super::HTML_ESCAPES[super::HTML_ESCAPE_TABLE[escape_ix] as usize]; + w.write_str(&s.get_unchecked(mark..i))?; + mark = i + 1; // all escaped characters are ASCII + w.write_str(replacement) + })?; + w.write_str(&s.get_unchecked(mark..)) + } + } else { + super::escape_html_scalar(w, s) + } + } + + /// Creates the lookup table for use in `compute_mask`. + const fn create_lookup() -> [u8; 16] { + let mut table = [0; 16]; + table[(b'<' & 0x0f) as usize] = b'<'; + table[(b'>' & 0x0f) as usize] = b'>'; + table[(b'&' & 0x0f) as usize] = b'&'; + table[(b'"' & 0x0f) as usize] = b'"'; + table[0] = 0b0111_1111; + table + } + + #[target_feature(enable = "ssse3")] + /// Computes a byte mask at given offset in the byte buffer. Its first 16 (least significant) + /// bits correspond to whether there is an HTML special byte (&, <, ", >) at the 16 bytes + /// `bytes[offset..]`. For example, the mask `(1 << 3)` states that there is an HTML byte + /// at `offset + 3`. It is only safe to call this function when + /// `bytes.len() >= offset + VECTOR_SIZE`. + unsafe fn compute_mask(bytes: &[u8], offset: usize) -> i32 { + debug_assert!(bytes.len() >= offset + VECTOR_SIZE); + + let table = create_lookup(); + let lookup = _mm_loadu_si128(table.as_ptr() as *const __m128i); + let raw_ptr = bytes.as_ptr().offset(offset as isize) as *const __m128i; + + // Load the vector from memory. + let vector = _mm_loadu_si128(raw_ptr); + // We take the least significant 4 bits of every byte and use them as indices + // to map into the lookup vector. + // Note that shuffle maps bytes with their most significant bit set to lookup[0]. + // Bytes that share their lower nibble with an HTML special byte get mapped to that + // corresponding special byte. Note that all HTML special bytes have distinct lower + // nibbles. Other bytes either get mapped to 0 or 127. + let expected = _mm_shuffle_epi8(lookup, vector); + // We compare the original vector to the mapped output. Bytes that shared a lower + // nibble with an HTML special byte match *only* if they are that special byte. Bytes + // that have either a 0 lower nibble or their most significant bit set were mapped to + // 127 and will hence never match. All other bytes have non-zero lower nibbles but + // were mapped to 0 and will therefore also not match. + let matches = _mm_cmpeq_epi8(expected, vector); + + // Translate matches to a bitmask, where every 1 corresponds to a HTML special character + // and a 0 is a non-HTML byte. + _mm_movemask_epi8(matches) + } + + /// Calls the given function with the index of every byte in the given byteslice + /// that is either ", &, <, or > and for no other byte. + /// Make sure to only call this when `bytes.len() >= 16`, undefined behaviour may + /// occur otherwise. + #[target_feature(enable = "ssse3")] + unsafe fn foreach_special_simd<F>( + bytes: &[u8], + mut offset: usize, + mut callback: F, + ) -> io::Result<()> + where + F: FnMut(usize) -> io::Result<()>, + { + // The strategy here is to walk the byte buffer in chunks of VECTOR_SIZE (16) + // bytes at a time starting at the given offset. For each chunk, we compute a + // a bitmask indicating whether the corresponding byte is a HTML special byte. + // We then iterate over all the 1 bits in this mask and call the callback function + // with the corresponding index in the buffer. + // When the number of HTML special bytes in the buffer is relatively low, this + // allows us to quickly go through the buffer without a lookup and for every + // single byte. + + debug_assert!(bytes.len() >= VECTOR_SIZE); + let upperbound = bytes.len() - VECTOR_SIZE; + while offset < upperbound { + let mut mask = compute_mask(bytes, offset); + while mask != 0 { + let ix = mask.trailing_zeros(); + callback(offset + ix as usize)?; + mask ^= mask & -mask; + } + offset += VECTOR_SIZE; + } + + // Final iteration. We align the read with the end of the slice and + // shift off the bytes at start we have already scanned. + let mut mask = compute_mask(bytes, upperbound); + mask >>= offset - upperbound; + while mask != 0 { + let ix = mask.trailing_zeros(); + callback(offset + ix as usize)?; + mask ^= mask & -mask; + } + Ok(()) + } + + #[cfg(test)] + mod html_scan_tests { + #[test] + fn multichunk() { + let mut vec = Vec::new(); + unsafe { + super::foreach_special_simd("&aXaaaa.a'aa9a<>aab&".as_bytes(), 0, |ix| { + Ok(vec.push(ix)) + }) + .unwrap(); + } + assert_eq!(vec, vec![0, 14, 15, 19]); + } + + // only match these bytes, and when we match them, match them VECTOR_SIZE times + #[test] + fn only_right_bytes_matched() { + for b in 0..255u8 { + let right_byte = b == b'&' || b == b'<' || b == b'>' || b == b'"'; + let vek = vec![b; super::VECTOR_SIZE]; + let mut match_count = 0; + unsafe { + super::foreach_special_simd(&vek, 0, |_| { + match_count += 1; + Ok(()) + }) + .unwrap(); + } + assert!((match_count > 0) == (match_count == super::VECTOR_SIZE)); + assert_eq!( + (match_count == super::VECTOR_SIZE), + right_byte, + "match_count: {}, byte: {:?}", + match_count, + b as char + ); + } + } + } +} + +#[cfg(test)] +mod test { + pub use super::escape_href; + + #[test] + fn check_href_escape() { + let mut s = String::new(); + escape_href(&mut s, "&^_").unwrap(); + assert_eq!(s.as_str(), "&^_"); + } +} diff --git a/vendor/pulldown-cmark/src/firstpass.rs b/vendor/pulldown-cmark/src/firstpass.rs new file mode 100644 index 000000000..cf3cfbf53 --- /dev/null +++ b/vendor/pulldown-cmark/src/firstpass.rs @@ -0,0 +1,1927 @@ +//! The first pass resolves all block structure, generating an AST. Within a block, items +//! are in a linear chain with potential inline markup identified. + +use std::cmp::max; +use std::ops::Range; + +use crate::parse::{scan_containers, Allocations, HeadingAttributes, Item, ItemBody, LinkDef}; +use crate::scanners::*; +use crate::strings::CowStr; +use crate::tree::{Tree, TreeIndex}; +use crate::Options; +use crate::{ + linklabel::{scan_link_label_rest, LinkLabel}, + HeadingLevel, +}; + +use unicase::UniCase; + +/// Runs the first pass, which resolves the block structure of the document, +/// and returns the resulting tree. +pub(crate) fn run_first_pass(text: &str, options: Options) -> (Tree<Item>, Allocations) { + // This is a very naive heuristic for the number of nodes + // we'll need. + let start_capacity = max(128, text.len() / 32); + let lookup_table = &create_lut(&options); + let first_pass = FirstPass { + text, + tree: Tree::with_capacity(start_capacity), + begin_list_item: false, + last_line_blank: false, + allocs: Allocations::new(), + options, + lookup_table, + }; + first_pass.run() +} + +/// State for the first parsing pass. +struct FirstPass<'a, 'b> { + text: &'a str, + tree: Tree<Item>, + begin_list_item: bool, + last_line_blank: bool, + allocs: Allocations<'a>, + options: Options, + lookup_table: &'b LookupTable, +} + +impl<'a, 'b> FirstPass<'a, 'b> { + fn run(mut self) -> (Tree<Item>, Allocations<'a>) { + let mut ix = 0; + while ix < self.text.len() { + ix = self.parse_block(ix); + } + for _ in 0..self.tree.spine_len() { + self.pop(ix); + } + (self.tree, self.allocs) + } + + /// Returns offset after block. + fn parse_block(&mut self, mut start_ix: usize) -> usize { + let bytes = self.text.as_bytes(); + let mut line_start = LineStart::new(&bytes[start_ix..]); + + let i = scan_containers(&self.tree, &mut line_start); + for _ in i..self.tree.spine_len() { + self.pop(start_ix); + } + + if self.options.contains(Options::ENABLE_FOOTNOTES) { + // finish footnote if it's still open and was preceded by blank line + if let Some(node_ix) = self.tree.peek_up() { + if let ItemBody::FootnoteDefinition(..) = self.tree[node_ix].item.body { + if self.last_line_blank { + self.pop(start_ix); + } + } + } + + // Footnote definitions of the form + // [^bar]: + // * anything really + let container_start = start_ix + line_start.bytes_scanned(); + if let Some(bytecount) = self.parse_footnote(container_start) { + start_ix = container_start + bytecount; + start_ix += scan_blank_line(&bytes[start_ix..]).unwrap_or(0); + line_start = LineStart::new(&bytes[start_ix..]); + } + } + + // Process new containers + loop { + let container_start = start_ix + line_start.bytes_scanned(); + if let Some((ch, index, indent)) = line_start.scan_list_marker() { + let after_marker_index = start_ix + line_start.bytes_scanned(); + self.continue_list(container_start, ch, index); + self.tree.append(Item { + start: container_start, + end: after_marker_index, // will get updated later if item not empty + body: ItemBody::ListItem(indent), + }); + self.tree.push(); + if let Some(n) = scan_blank_line(&bytes[after_marker_index..]) { + self.begin_list_item = true; + return after_marker_index + n; + } + if self.options.contains(Options::ENABLE_TASKLISTS) { + if let Some(is_checked) = line_start.scan_task_list_marker() { + self.tree.append(Item { + start: after_marker_index, + end: start_ix + line_start.bytes_scanned(), + body: ItemBody::TaskListMarker(is_checked), + }); + } + } + } else if line_start.scan_blockquote_marker() { + self.finish_list(start_ix); + self.tree.append(Item { + start: container_start, + end: 0, // will get set later + body: ItemBody::BlockQuote, + }); + self.tree.push(); + } else { + break; + } + } + + let ix = start_ix + line_start.bytes_scanned(); + + if let Some(n) = scan_blank_line(&bytes[ix..]) { + if let Some(node_ix) = self.tree.peek_up() { + match self.tree[node_ix].item.body { + ItemBody::BlockQuote => (), + _ => { + if self.begin_list_item { + // A list item can begin with at most one blank line. + self.pop(start_ix); + } + self.last_line_blank = true; + } + } + } + return ix + n; + } + + self.begin_list_item = false; + self.finish_list(start_ix); + + // Save `remaining_space` here to avoid needing to backtrack `line_start` for HTML blocks + let remaining_space = line_start.remaining_space(); + + let indent = line_start.scan_space_upto(4); + if indent == 4 { + let ix = start_ix + line_start.bytes_scanned(); + let remaining_space = line_start.remaining_space(); + return self.parse_indented_code_block(ix, remaining_space); + } + + let ix = start_ix + line_start.bytes_scanned(); + + // HTML Blocks + if bytes[ix] == b'<' { + // Types 1-5 are all detected by one function and all end with the same + // pattern + if let Some(html_end_tag) = get_html_end_tag(&bytes[(ix + 1)..]) { + return self.parse_html_block_type_1_to_5(ix, html_end_tag, remaining_space); + } + + // Detect type 6 + if starts_html_block_type_6(&bytes[(ix + 1)..]) { + return self.parse_html_block_type_6_or_7(ix, remaining_space); + } + + // Detect type 7 + if let Some(_html_bytes) = scan_html_type_7(&bytes[ix..]) { + return self.parse_html_block_type_6_or_7(ix, remaining_space); + } + } + + if let Ok(n) = scan_hrule(&bytes[ix..]) { + return self.parse_hrule(n, ix); + } + + if let Some(atx_size) = scan_atx_heading(&bytes[ix..]) { + return self.parse_atx_heading(ix, atx_size); + } + + // parse refdef + if let Some((bytecount, label, link_def)) = self.parse_refdef_total(ix) { + self.allocs.refdefs.0.entry(label).or_insert(link_def); + let ix = ix + bytecount; + // try to read trailing whitespace or it will register as a completely blank line + // TODO: shouldn't we do this for all block level items? + return ix + scan_blank_line(&bytes[ix..]).unwrap_or(0); + } + + if let Some((n, fence_ch)) = scan_code_fence(&bytes[ix..]) { + return self.parse_fenced_code_block(ix, indent, fence_ch, n); + } + self.parse_paragraph(ix) + } + + /// Returns the offset of the first line after the table. + /// Assumptions: current focus is a table element and the table header + /// matches the separator line (same number of columns). + fn parse_table(&mut self, table_cols: usize, head_start: usize, body_start: usize) -> usize { + // parse header. this shouldn't fail because we made sure the table header is ok + let (_sep_start, thead_ix) = self.parse_table_row_inner(head_start, table_cols); + self.tree[thead_ix].item.body = ItemBody::TableHead; + + // parse body + let mut ix = body_start; + while let Some((next_ix, _row_ix)) = self.parse_table_row(ix, table_cols) { + ix = next_ix; + } + + self.pop(ix); + ix + } + + /// Call this when containers are taken care of. + /// Returns bytes scanned, row_ix + fn parse_table_row_inner(&mut self, mut ix: usize, row_cells: usize) -> (usize, TreeIndex) { + let bytes = self.text.as_bytes(); + let mut cells = 0; + let mut final_cell_ix = None; + + let row_ix = self.tree.append(Item { + start: ix, + end: 0, // set at end of this function + body: ItemBody::TableRow, + }); + self.tree.push(); + + loop { + ix += scan_ch(&bytes[ix..], b'|'); + let start_ix = ix; + ix += scan_whitespace_no_nl(&bytes[ix..]); + + if let Some(eol_bytes) = scan_eol(&bytes[ix..]) { + ix += eol_bytes; + break; + } + + let cell_ix = self.tree.append(Item { + start: start_ix, + end: ix, + body: ItemBody::TableCell, + }); + self.tree.push(); + let (next_ix, _brk) = self.parse_line(ix, None, TableParseMode::Active); + + if let Some(cur_ix) = self.tree.cur() { + let trailing_whitespace = scan_rev_while(&bytes[..next_ix], is_ascii_whitespace); + self.tree[cur_ix].item.end -= trailing_whitespace; + } + + self.tree[cell_ix].item.end = next_ix; + self.tree.pop(); + + ix = next_ix; + cells += 1; + + if cells == row_cells { + final_cell_ix = Some(cell_ix); + } + } + + // fill empty cells if needed + // note: this is where GFM and commonmark-extra diverge. we follow + // GFM here + for _ in cells..row_cells { + self.tree.append(Item { + start: ix, + end: ix, + body: ItemBody::TableCell, + }); + } + + // drop excess cells + if let Some(cell_ix) = final_cell_ix { + self.tree[cell_ix].next = None; + } + + self.pop(ix); + + (ix, row_ix) + } + + /// Returns first offset after the row and the tree index of the row. + fn parse_table_row(&mut self, mut ix: usize, row_cells: usize) -> Option<(usize, TreeIndex)> { + let bytes = self.text.as_bytes(); + let mut line_start = LineStart::new(&bytes[ix..]); + let current_container = + scan_containers(&self.tree, &mut line_start) == self.tree.spine_len(); + if !current_container { + return None; + } + line_start.scan_all_space(); + ix += line_start.bytes_scanned(); + if scan_paragraph_interrupt(&bytes[ix..], current_container) { + return None; + } + + let (ix, row_ix) = self.parse_table_row_inner(ix, row_cells); + Some((ix, row_ix)) + } + + /// Returns offset of line start after paragraph. + fn parse_paragraph(&mut self, start_ix: usize) -> usize { + let node_ix = self.tree.append(Item { + start: start_ix, + end: 0, // will get set later + body: ItemBody::Paragraph, + }); + self.tree.push(); + let bytes = self.text.as_bytes(); + + let mut ix = start_ix; + loop { + let scan_mode = if self.options.contains(Options::ENABLE_TABLES) && ix == start_ix { + TableParseMode::Scan + } else { + TableParseMode::Disabled + }; + let (next_ix, brk) = self.parse_line(ix, None, scan_mode); + + // break out when we find a table + if let Some(Item { + body: ItemBody::Table(alignment_ix), + .. + }) = brk + { + let table_cols = self.allocs[alignment_ix].len(); + self.tree[node_ix].item.body = ItemBody::Table(alignment_ix); + // this clears out any stuff we may have appended - but there may + // be a cleaner way + self.tree[node_ix].child = None; + self.tree.pop(); + self.tree.push(); + return self.parse_table(table_cols, ix, next_ix); + } + + ix = next_ix; + let mut line_start = LineStart::new(&bytes[ix..]); + let current_container = + scan_containers(&self.tree, &mut line_start) == self.tree.spine_len(); + if !line_start.scan_space(4) { + let ix_new = ix + line_start.bytes_scanned(); + if current_container { + let trailing_backslash_pos = match brk { + Some(Item { + start, + body: ItemBody::HardBreak, + .. + }) if bytes[start] == b'\\' => Some(start), + _ => None, + }; + if let Some(ix_setext) = + self.parse_setext_heading(ix_new, node_ix, trailing_backslash_pos.is_some()) + { + if let Some(pos) = trailing_backslash_pos { + self.tree.append_text(pos, pos + 1); + } + ix = ix_setext; + break; + } + } + // first check for non-empty lists, then for other interrupts + let suffix = &bytes[ix_new..]; + if scan_paragraph_interrupt(suffix, current_container) { + break; + } + } + line_start.scan_all_space(); + if line_start.is_at_eol() { + break; + } + ix = next_ix + line_start.bytes_scanned(); + if let Some(item) = brk { + self.tree.append(item); + } + } + + self.pop(ix); + ix + } + + /// Returns end ix of setext_heading on success. + fn parse_setext_heading( + &mut self, + ix: usize, + node_ix: TreeIndex, + has_trailing_content: bool, + ) -> Option<usize> { + let bytes = self.text.as_bytes(); + let (n, level) = scan_setext_heading(&bytes[ix..])?; + let mut attrs = None; + + if let Some(cur_ix) = self.tree.cur() { + let parent_ix = self.tree.peek_up().unwrap(); + let header_start = self.tree[parent_ix].item.start; + // Note that `self.tree[parent_ix].item.end` might be zero at this point. + // Use the end position of the current node (i.e. the last known child + // of the parent) instead. + let header_end = self.tree[cur_ix].item.end; + + // extract the trailing attribute block + let (content_end, attrs_) = + self.extract_and_parse_heading_attribute_block(header_start, header_end); + attrs = attrs_; + + // strip trailing whitespace + let new_end = if has_trailing_content { + content_end + } else { + let trailing_ws = + scan_rev_while(&bytes[header_start..content_end], is_ascii_whitespace_no_nl); + content_end - trailing_ws + }; + + if attrs.is_some() { + // remove trailing block attributes + self.tree.truncate_siblings(self.text.as_bytes(), new_end); + } + + if let Some(cur_ix) = self.tree.cur() { + self.tree[cur_ix].item.end = new_end; + } + } + + self.tree[node_ix].item.body = ItemBody::Heading( + level, + attrs.map(|attrs| self.allocs.allocate_heading(attrs)), + ); + + Some(ix + n) + } + + /// Parse a line of input, appending text and items to tree. + /// + /// Returns: index after line and an item representing the break. + fn parse_line( + &mut self, + start: usize, + end: Option<usize>, + mode: TableParseMode, + ) -> (usize, Option<Item>) { + let bytes = self.text.as_bytes(); + let bytes = match end { + Some(end) => &bytes[..end], + None => bytes, + }; + let bytes_len = bytes.len(); + let mut pipes = 0; + let mut last_pipe_ix = start; + let mut begin_text = start; + + let (final_ix, brk) = iterate_special_bytes(self.lookup_table, bytes, start, |ix, byte| { + match byte { + b'\n' | b'\r' => { + if let TableParseMode::Active = mode { + return LoopInstruction::BreakAtWith(ix, None); + } + + let mut i = ix; + let eol_bytes = scan_eol(&bytes[ix..]).unwrap(); + if mode == TableParseMode::Scan && pipes > 0 { + // check if we may be parsing a table + let next_line_ix = ix + eol_bytes; + let mut line_start = LineStart::new(&bytes[next_line_ix..]); + if scan_containers(&self.tree, &mut line_start) == self.tree.spine_len() { + let table_head_ix = next_line_ix + line_start.bytes_scanned(); + let (table_head_bytes, alignment) = + scan_table_head(&bytes[table_head_ix..]); + + if table_head_bytes > 0 { + // computing header count from number of pipes + let header_count = + count_header_cols(bytes, pipes, start, last_pipe_ix); + + // make sure they match the number of columns we find in separator line + if alignment.len() == header_count { + let alignment_ix = self.allocs.allocate_alignment(alignment); + let end_ix = table_head_ix + table_head_bytes; + return LoopInstruction::BreakAtWith( + end_ix, + Some(Item { + start: i, + end: end_ix, // must update later + body: ItemBody::Table(alignment_ix), + }), + ); + } + } + } + } + + let end_ix = ix + eol_bytes; + let trailing_backslashes = scan_rev_while(&bytes[..ix], |b| b == b'\\'); + if trailing_backslashes % 2 == 1 && end_ix < bytes_len { + i -= 1; + self.tree.append_text(begin_text, i); + return LoopInstruction::BreakAtWith( + end_ix, + Some(Item { + start: i, + end: end_ix, + body: ItemBody::HardBreak, + }), + ); + } + let trailing_whitespace = + scan_rev_while(&bytes[..ix], is_ascii_whitespace_no_nl); + if trailing_whitespace >= 2 { + i -= trailing_whitespace; + self.tree.append_text(begin_text, i); + return LoopInstruction::BreakAtWith( + end_ix, + Some(Item { + start: i, + end: end_ix, + body: ItemBody::HardBreak, + }), + ); + } + + self.tree.append_text(begin_text, ix); + LoopInstruction::BreakAtWith( + end_ix, + Some(Item { + start: i, + end: end_ix, + body: ItemBody::SoftBreak, + }), + ) + } + b'\\' => { + if ix + 1 < bytes_len && is_ascii_punctuation(bytes[ix + 1]) { + self.tree.append_text(begin_text, ix); + if bytes[ix + 1] == b'`' { + let count = 1 + scan_ch_repeat(&bytes[(ix + 2)..], b'`'); + self.tree.append(Item { + start: ix + 1, + end: ix + count + 1, + body: ItemBody::MaybeCode(count, true), + }); + begin_text = ix + 1 + count; + LoopInstruction::ContinueAndSkip(count) + } else { + begin_text = ix + 1; + LoopInstruction::ContinueAndSkip(1) + } + } else { + LoopInstruction::ContinueAndSkip(0) + } + } + c @ b'*' | c @ b'_' | c @ b'~' => { + let string_suffix = &self.text[ix..]; + let count = 1 + scan_ch_repeat(&string_suffix.as_bytes()[1..], c); + let can_open = delim_run_can_open(self.text, string_suffix, count, ix); + let can_close = delim_run_can_close(self.text, string_suffix, count, ix); + let is_valid_seq = c != b'~' || count == 2; + + if (can_open || can_close) && is_valid_seq { + self.tree.append_text(begin_text, ix); + for i in 0..count { + self.tree.append(Item { + start: ix + i, + end: ix + i + 1, + body: ItemBody::MaybeEmphasis(count - i, can_open, can_close), + }); + } + begin_text = ix + count; + } + LoopInstruction::ContinueAndSkip(count - 1) + } + b'`' => { + self.tree.append_text(begin_text, ix); + let count = 1 + scan_ch_repeat(&bytes[(ix + 1)..], b'`'); + self.tree.append(Item { + start: ix, + end: ix + count, + body: ItemBody::MaybeCode(count, false), + }); + begin_text = ix + count; + LoopInstruction::ContinueAndSkip(count - 1) + } + b'<' => { + // Note: could detect some non-HTML cases and early escape here, but not + // clear that's a win. + self.tree.append_text(begin_text, ix); + self.tree.append(Item { + start: ix, + end: ix + 1, + body: ItemBody::MaybeHtml, + }); + begin_text = ix + 1; + LoopInstruction::ContinueAndSkip(0) + } + b'!' => { + if ix + 1 < bytes_len && bytes[ix + 1] == b'[' { + self.tree.append_text(begin_text, ix); + self.tree.append(Item { + start: ix, + end: ix + 2, + body: ItemBody::MaybeImage, + }); + begin_text = ix + 2; + LoopInstruction::ContinueAndSkip(1) + } else { + LoopInstruction::ContinueAndSkip(0) + } + } + b'[' => { + self.tree.append_text(begin_text, ix); + self.tree.append(Item { + start: ix, + end: ix + 1, + body: ItemBody::MaybeLinkOpen, + }); + begin_text = ix + 1; + LoopInstruction::ContinueAndSkip(0) + } + b']' => { + self.tree.append_text(begin_text, ix); + self.tree.append(Item { + start: ix, + end: ix + 1, + body: ItemBody::MaybeLinkClose(true), + }); + begin_text = ix + 1; + LoopInstruction::ContinueAndSkip(0) + } + b'&' => match scan_entity(&bytes[ix..]) { + (n, Some(value)) => { + self.tree.append_text(begin_text, ix); + self.tree.append(Item { + start: ix, + end: ix + n, + body: ItemBody::SynthesizeText(self.allocs.allocate_cow(value)), + }); + begin_text = ix + n; + LoopInstruction::ContinueAndSkip(n - 1) + } + _ => LoopInstruction::ContinueAndSkip(0), + }, + b'|' => { + if let TableParseMode::Active = mode { + LoopInstruction::BreakAtWith(ix, None) + } else { + last_pipe_ix = ix; + pipes += 1; + LoopInstruction::ContinueAndSkip(0) + } + } + b'.' => { + if ix + 2 < bytes.len() && bytes[ix + 1] == b'.' && bytes[ix + 2] == b'.' { + self.tree.append_text(begin_text, ix); + self.tree.append(Item { + start: ix, + end: ix + 3, + body: ItemBody::SynthesizeChar('…'), + }); + begin_text = ix + 3; + LoopInstruction::ContinueAndSkip(2) + } else { + LoopInstruction::ContinueAndSkip(0) + } + } + b'-' => { + let count = 1 + scan_ch_repeat(&bytes[(ix + 1)..], b'-'); + if count == 1 { + LoopInstruction::ContinueAndSkip(0) + } else { + let itembody = if count == 2 { + ItemBody::SynthesizeChar('–') + } else if count == 3 { + ItemBody::SynthesizeChar('—') + } else { + let (ems, ens) = match count % 6 { + 0 | 3 => (count / 3, 0), + 2 | 4 => (0, count / 2), + 1 => (count / 3 - 1, 2), + _ => (count / 3, 1), + }; + // – and — are 3 bytes each in utf8 + let mut buf = String::with_capacity(3 * (ems + ens)); + for _ in 0..ems { + buf.push('—'); + } + for _ in 0..ens { + buf.push('–'); + } + ItemBody::SynthesizeText(self.allocs.allocate_cow(buf.into())) + }; + + self.tree.append_text(begin_text, ix); + self.tree.append(Item { + start: ix, + end: ix + count, + body: itembody, + }); + begin_text = ix + count; + LoopInstruction::ContinueAndSkip(count - 1) + } + } + c @ b'\'' | c @ b'"' => { + let string_suffix = &self.text[ix..]; + let can_open = delim_run_can_open(self.text, string_suffix, 1, ix); + let can_close = delim_run_can_close(self.text, string_suffix, 1, ix); + + self.tree.append_text(begin_text, ix); + self.tree.append(Item { + start: ix, + end: ix + 1, + body: ItemBody::MaybeSmartQuote(c, can_open, can_close), + }); + begin_text = ix + 1; + + LoopInstruction::ContinueAndSkip(0) + } + _ => LoopInstruction::ContinueAndSkip(0), + } + }); + + if brk.is_none() { + // need to close text at eof + self.tree.append_text(begin_text, final_ix); + } + (final_ix, brk) + } + + /// When start_ix is at the beginning of an HTML block of type 1 to 5, + /// this will find the end of the block, adding the block itself to the + /// tree and also keeping track of the lines of HTML within the block. + /// + /// The html_end_tag is the tag that must be found on a line to end the block. + fn parse_html_block_type_1_to_5( + &mut self, + start_ix: usize, + html_end_tag: &str, + mut remaining_space: usize, + ) -> usize { + let bytes = self.text.as_bytes(); + let mut ix = start_ix; + loop { + let line_start_ix = ix; + ix += scan_nextline(&bytes[ix..]); + self.append_html_line(remaining_space, line_start_ix, ix); + + let mut line_start = LineStart::new(&bytes[ix..]); + let n_containers = scan_containers(&self.tree, &mut line_start); + if n_containers < self.tree.spine_len() { + break; + } + + if (&self.text[line_start_ix..ix]).contains(html_end_tag) { + break; + } + + let next_line_ix = ix + line_start.bytes_scanned(); + if next_line_ix == self.text.len() { + break; + } + ix = next_line_ix; + remaining_space = line_start.remaining_space(); + } + ix + } + + /// When start_ix is at the beginning of an HTML block of type 6 or 7, + /// this will consume lines until there is a blank line and keep track of + /// the HTML within the block. + fn parse_html_block_type_6_or_7( + &mut self, + start_ix: usize, + mut remaining_space: usize, + ) -> usize { + let bytes = self.text.as_bytes(); + let mut ix = start_ix; + loop { + let line_start_ix = ix; + ix += scan_nextline(&bytes[ix..]); + self.append_html_line(remaining_space, line_start_ix, ix); + + let mut line_start = LineStart::new(&bytes[ix..]); + let n_containers = scan_containers(&self.tree, &mut line_start); + if n_containers < self.tree.spine_len() || line_start.is_at_eol() { + break; + } + + let next_line_ix = ix + line_start.bytes_scanned(); + if next_line_ix == self.text.len() || scan_blank_line(&bytes[next_line_ix..]).is_some() + { + break; + } + ix = next_line_ix; + remaining_space = line_start.remaining_space(); + } + ix + } + + fn parse_indented_code_block(&mut self, start_ix: usize, mut remaining_space: usize) -> usize { + self.tree.append(Item { + start: start_ix, + end: 0, // will get set later + body: ItemBody::IndentCodeBlock, + }); + self.tree.push(); + let bytes = self.text.as_bytes(); + let mut last_nonblank_child = None; + let mut last_nonblank_ix = 0; + let mut end_ix = 0; + let mut last_line_blank = false; + + let mut ix = start_ix; + loop { + let line_start_ix = ix; + ix += scan_nextline(&bytes[ix..]); + self.append_code_text(remaining_space, line_start_ix, ix); + // TODO(spec clarification): should we synthesize newline at EOF? + + if !last_line_blank { + last_nonblank_child = self.tree.cur(); + last_nonblank_ix = ix; + end_ix = ix; + } + + let mut line_start = LineStart::new(&bytes[ix..]); + let n_containers = scan_containers(&self.tree, &mut line_start); + if n_containers < self.tree.spine_len() + || !(line_start.scan_space(4) || line_start.is_at_eol()) + { + break; + } + let next_line_ix = ix + line_start.bytes_scanned(); + if next_line_ix == self.text.len() { + break; + } + ix = next_line_ix; + remaining_space = line_start.remaining_space(); + last_line_blank = scan_blank_line(&bytes[ix..]).is_some(); + } + + // Trim trailing blank lines. + if let Some(child) = last_nonblank_child { + self.tree[child].next = None; + self.tree[child].item.end = last_nonblank_ix; + } + self.pop(end_ix); + ix + } + + fn parse_fenced_code_block( + &mut self, + start_ix: usize, + indent: usize, + fence_ch: u8, + n_fence_char: usize, + ) -> usize { + let bytes = self.text.as_bytes(); + let mut info_start = start_ix + n_fence_char; + info_start += scan_whitespace_no_nl(&bytes[info_start..]); + // TODO: info strings are typically very short. wouldn't it be faster + // to just do a forward scan here? + let mut ix = info_start + scan_nextline(&bytes[info_start..]); + let info_end = ix - scan_rev_while(&bytes[info_start..ix], is_ascii_whitespace); + let info_string = unescape(&self.text[info_start..info_end]); + self.tree.append(Item { + start: start_ix, + end: 0, // will get set later + body: ItemBody::FencedCodeBlock(self.allocs.allocate_cow(info_string)), + }); + self.tree.push(); + loop { + let mut line_start = LineStart::new(&bytes[ix..]); + let n_containers = scan_containers(&self.tree, &mut line_start); + if n_containers < self.tree.spine_len() { + break; + } + line_start.scan_space(indent); + let mut close_line_start = line_start.clone(); + if !close_line_start.scan_space(4) { + let close_ix = ix + close_line_start.bytes_scanned(); + if let Some(n) = scan_closing_code_fence(&bytes[close_ix..], fence_ch, n_fence_char) + { + ix = close_ix + n; + break; + } + } + let remaining_space = line_start.remaining_space(); + ix += line_start.bytes_scanned(); + let next_ix = ix + scan_nextline(&bytes[ix..]); + self.append_code_text(remaining_space, ix, next_ix); + ix = next_ix; + } + + self.pop(ix); + + // try to read trailing whitespace or it will register as a completely blank line + ix + scan_blank_line(&bytes[ix..]).unwrap_or(0) + } + + fn append_code_text(&mut self, remaining_space: usize, start: usize, end: usize) { + if remaining_space > 0 { + let cow_ix = self.allocs.allocate_cow(" "[..remaining_space].into()); + self.tree.append(Item { + start, + end: start, + body: ItemBody::SynthesizeText(cow_ix), + }); + } + if self.text.as_bytes()[end - 2] == b'\r' { + // Normalize CRLF to LF + self.tree.append_text(start, end - 2); + self.tree.append_text(end - 1, end); + } else { + self.tree.append_text(start, end); + } + } + + /// Appends a line of HTML to the tree. + fn append_html_line(&mut self, remaining_space: usize, start: usize, end: usize) { + if remaining_space > 0 { + let cow_ix = self.allocs.allocate_cow(" "[..remaining_space].into()); + self.tree.append(Item { + start, + end: start, + // TODO: maybe this should synthesize to html rather than text? + body: ItemBody::SynthesizeText(cow_ix), + }); + } + if self.text.as_bytes()[end - 2] == b'\r' { + // Normalize CRLF to LF + self.tree.append(Item { + start, + end: end - 2, + body: ItemBody::Html, + }); + self.tree.append(Item { + start: end - 1, + end, + body: ItemBody::Html, + }); + } else { + self.tree.append(Item { + start, + end, + body: ItemBody::Html, + }); + } + } + + /// Pop a container, setting its end. + fn pop(&mut self, ix: usize) { + let cur_ix = self.tree.pop().unwrap(); + self.tree[cur_ix].item.end = ix; + if let ItemBody::List(true, _, _) = self.tree[cur_ix].item.body { + surgerize_tight_list(&mut self.tree, cur_ix); + } + } + + /// Close a list if it's open. Also set loose if last line was blank + fn finish_list(&mut self, ix: usize) { + if let Some(node_ix) = self.tree.peek_up() { + if let ItemBody::List(_, _, _) = self.tree[node_ix].item.body { + self.pop(ix); + } + } + if self.last_line_blank { + if let Some(node_ix) = self.tree.peek_grandparent() { + if let ItemBody::List(ref mut is_tight, _, _) = self.tree[node_ix].item.body { + *is_tight = false; + } + } + self.last_line_blank = false; + } + } + + /// Continue an existing list or start a new one if there's not an open + /// list that matches. + fn continue_list(&mut self, start: usize, ch: u8, index: u64) { + if let Some(node_ix) = self.tree.peek_up() { + if let ItemBody::List(ref mut is_tight, existing_ch, _) = self.tree[node_ix].item.body { + if existing_ch == ch { + if self.last_line_blank { + *is_tight = false; + self.last_line_blank = false; + } + return; + } + } + // TODO: this is not the best choice for end; maybe get end from last list item. + self.finish_list(start); + } + self.tree.append(Item { + start, + end: 0, // will get set later + body: ItemBody::List(true, ch, index), + }); + self.tree.push(); + self.last_line_blank = false; + } + + /// Parse a thematic break. + /// + /// Returns index of start of next line. + fn parse_hrule(&mut self, hrule_size: usize, ix: usize) -> usize { + self.tree.append(Item { + start: ix, + end: ix + hrule_size, + body: ItemBody::Rule, + }); + ix + hrule_size + } + + /// Parse an ATX heading. + /// + /// Returns index of start of next line. + fn parse_atx_heading(&mut self, start: usize, atx_level: HeadingLevel) -> usize { + let mut ix = start; + let heading_ix = self.tree.append(Item { + start, + end: 0, // set later + body: ItemBody::default(), // set later + }); + ix += atx_level as usize; + // next char is space or eol (guaranteed by scan_atx_heading) + let bytes = self.text.as_bytes(); + if let Some(eol_bytes) = scan_eol(&bytes[ix..]) { + self.tree[heading_ix].item.end = ix + eol_bytes; + self.tree[heading_ix].item.body = ItemBody::Heading(atx_level, None); + return ix + eol_bytes; + } + // skip leading spaces + let skip_spaces = scan_whitespace_no_nl(&bytes[ix..]); + ix += skip_spaces; + + // now handle the header text + let header_start = ix; + let header_node_idx = self.tree.push(); // so that we can set the endpoint later + + // trim the trailing attribute block before parsing the entire line, if necessary + let (end, content_end, attrs) = if self.options.contains(Options::ENABLE_HEADING_ATTRIBUTES) + { + // the start of the next line is the end of the header since the + // header cannot have line breaks + let header_end = header_start + scan_nextline(&bytes[header_start..]); + let (content_end, attrs) = + self.extract_and_parse_heading_attribute_block(header_start, header_end); + self.parse_line(ix, Some(content_end), TableParseMode::Disabled); + (header_end, content_end, attrs) + } else { + ix = self.parse_line(ix, None, TableParseMode::Disabled).0; + (ix, ix, None) + }; + self.tree[header_node_idx].item.end = end; + + // remove trailing matter from header text + if let Some(cur_ix) = self.tree.cur() { + // remove closing of the ATX heading + let header_text = &bytes[header_start..content_end]; + let mut limit = header_text + .iter() + .rposition(|&b| !(b == b'\n' || b == b'\r' || b == b' ')) + .map_or(0, |i| i + 1); + let closer = header_text[..limit] + .iter() + .rposition(|&b| b != b'#') + .map_or(0, |i| i + 1); + if closer == 0 { + limit = closer; + } else { + let spaces = scan_rev_while(&header_text[..closer], |b| b == b' '); + if spaces > 0 { + limit = closer - spaces; + } + } + self.tree[cur_ix].item.end = limit + header_start; + } + + self.tree.pop(); + self.tree[heading_ix].item.body = ItemBody::Heading( + atx_level, + attrs.map(|attrs| self.allocs.allocate_heading(attrs)), + ); + end + } + + /// Returns the number of bytes scanned on success. + fn parse_footnote(&mut self, start: usize) -> Option<usize> { + let bytes = &self.text.as_bytes()[start..]; + if !bytes.starts_with(b"[^") { + return None; + } + let (mut i, label) = self.parse_refdef_label(start + 2)?; + i += 2; + if scan_ch(&bytes[i..], b':') == 0 { + return None; + } + i += 1; + self.finish_list(start); + self.tree.append(Item { + start, + end: 0, // will get set later + // TODO: check whether the label here is strictly necessary + body: ItemBody::FootnoteDefinition(self.allocs.allocate_cow(label)), + }); + self.tree.push(); + Some(i) + } + + /// Tries to parse a reference label, which can be interrupted by new blocks. + /// On success, returns the number of bytes of the label and the label itself. + fn parse_refdef_label(&self, start: usize) -> Option<(usize, CowStr<'a>)> { + scan_link_label_rest(&self.text[start..], &|bytes| { + let mut line_start = LineStart::new(bytes); + let current_container = + scan_containers(&self.tree, &mut line_start) == self.tree.spine_len(); + let bytes_scanned = line_start.bytes_scanned(); + let suffix = &bytes[bytes_scanned..]; + if scan_paragraph_interrupt(suffix, current_container) { + None + } else { + Some(bytes_scanned) + } + }) + } + + /// Returns number of bytes scanned, label and definition on success. + fn parse_refdef_total(&mut self, start: usize) -> Option<(usize, LinkLabel<'a>, LinkDef<'a>)> { + let bytes = &self.text.as_bytes()[start..]; + if scan_ch(bytes, b'[') == 0 { + return None; + } + let (mut i, label) = self.parse_refdef_label(start + 1)?; + i += 1; + if scan_ch(&bytes[i..], b':') == 0 { + return None; + } + i += 1; + let (bytecount, link_def) = self.scan_refdef(start, start + i)?; + Some((bytecount + i, UniCase::new(label), link_def)) + } + + /// Returns number of bytes and number of newlines + fn scan_refdef_space(&self, bytes: &[u8], mut i: usize) -> Option<(usize, usize)> { + let mut newlines = 0; + loop { + let whitespaces = scan_whitespace_no_nl(&bytes[i..]); + i += whitespaces; + if let Some(eol_bytes) = scan_eol(&bytes[i..]) { + i += eol_bytes; + newlines += 1; + if newlines > 1 { + return None; + } + } else { + break; + } + let mut line_start = LineStart::new(&bytes[i..]); + if self.tree.spine_len() != scan_containers(&self.tree, &mut line_start) { + return None; + } + i += line_start.bytes_scanned(); + } + Some((i, newlines)) + } + + /// Returns # of bytes and definition. + /// Assumes the label of the reference including colon has already been scanned. + fn scan_refdef(&self, span_start: usize, start: usize) -> Option<(usize, LinkDef<'a>)> { + let bytes = self.text.as_bytes(); + + // whitespace between label and url (including up to one newline) + let (mut i, _newlines) = self.scan_refdef_space(bytes, start)?; + + // scan link dest + let (dest_length, dest) = scan_link_dest(self.text, i, 1)?; + if dest_length == 0 { + return None; + } + let dest = unescape(dest); + i += dest_length; + + // no title + let mut backup = ( + i - start, + LinkDef { + dest, + title: None, + span: span_start..i, + }, + ); + + // scan whitespace between dest and label + let (mut i, newlines) = + if let Some((new_i, mut newlines)) = self.scan_refdef_space(bytes, i) { + if i == self.text.len() { + newlines += 1; + } + if new_i == i && newlines == 0 { + return None; + } + if newlines > 1 { + return Some(backup); + }; + (new_i, newlines) + } else { + return Some(backup); + }; + + // scan title + // if this fails but newline == 1, return also a refdef without title + if let Some((title_length, title)) = scan_refdef_title(&self.text[i..]) { + i += title_length; + backup.1.span = span_start..i; + backup.1.title = Some(unescape(title)); + } else if newlines > 0 { + return Some(backup); + } else { + return None; + }; + + // scan EOL + if let Some(bytes) = scan_blank_line(&bytes[i..]) { + backup.0 = i + bytes - start; + Some(backup) + } else if newlines > 0 { + Some(backup) + } else { + None + } + } + + /// Extracts and parses a heading attribute block if exists. + /// + /// Returns `(end_offset_of_heading_content, (id, classes))`. + /// + /// If `header_end` is less than or equal to `header_start`, the given + /// input is considered as empty. + fn extract_and_parse_heading_attribute_block( + &mut self, + header_start: usize, + header_end: usize, + ) -> (usize, Option<HeadingAttributes<'a>>) { + if !self.options.contains(Options::ENABLE_HEADING_ATTRIBUTES) { + return (header_end, None); + } + + // extract the trailing attribute block + let header_bytes = &self.text.as_bytes()[header_start..header_end]; + let (content_len, attr_block_range_rel) = + extract_attribute_block_content_from_header_text(header_bytes); + let content_end = header_start + content_len; + let attrs = attr_block_range_rel.and_then(|r| { + parse_inside_attribute_block( + &self.text[(header_start + r.start)..(header_start + r.end)], + ) + }); + (content_end, attrs) + } +} + +/// Scanning modes for `Parser`'s `parse_line` method. +#[derive(PartialEq, Eq, Copy, Clone)] +enum TableParseMode { + /// Inside a paragraph, scanning for table headers. + Scan, + /// Inside a table. + Active, + /// Inside a paragraph, not scanning for table headers. + Disabled, +} + +/// Computes the number of header columns in a table line by computing the number of dividing pipes +/// that aren't followed or preceded by whitespace. +fn count_header_cols( + bytes: &[u8], + mut pipes: usize, + mut start: usize, + last_pipe_ix: usize, +) -> usize { + // was first pipe preceded by whitespace? if so, subtract one + start += scan_whitespace_no_nl(&bytes[start..]); + if bytes[start] == b'|' { + pipes -= 1; + } + + // was last pipe followed by whitespace? if so, sub one + if scan_blank_line(&bytes[(last_pipe_ix + 1)..]).is_some() { + pipes + } else { + pipes + 1 + } +} + +/// Checks whether we should break a paragraph on the given input. +fn scan_paragraph_interrupt(bytes: &[u8], current_container: bool) -> bool { + scan_eol(bytes).is_some() + || scan_hrule(bytes).is_ok() + || scan_atx_heading(bytes).is_some() + || scan_code_fence(bytes).is_some() + || scan_blockquote_start(bytes).is_some() + || scan_listitem(bytes).map_or(false, |(ix, delim, index, _)| { + ! current_container || + // we don't allow interruption by either empty lists or + // numbered lists starting at an index other than 1 + (delim == b'*' || delim == b'-' || delim == b'+' || index == 1) + && !scan_empty_list(&bytes[ix..]) + }) + || bytes.starts_with(b"<") + && (get_html_end_tag(&bytes[1..]).is_some() || starts_html_block_type_6(&bytes[1..])) +} + +/// Assumes `text_bytes` is preceded by `<`. +fn get_html_end_tag(text_bytes: &[u8]) -> Option<&'static str> { + static BEGIN_TAGS: &[&[u8]; 4] = &[b"pre", b"style", b"script", b"textarea"]; + static ST_BEGIN_TAGS: &[&[u8]; 3] = &[b"!--", b"?", b"![CDATA["]; + + for (beg_tag, end_tag) in BEGIN_TAGS + .iter() + .zip(["</pre>", "</style>", "</script>", "</textarea>"].iter()) + { + let tag_len = beg_tag.len(); + + if text_bytes.len() < tag_len { + // begin tags are increasing in size + break; + } + + if !text_bytes[..tag_len].eq_ignore_ascii_case(beg_tag) { + continue; + } + + // Must either be the end of the line... + if text_bytes.len() == tag_len { + return Some(end_tag); + } + + // ...or be followed by whitespace, newline, or '>'. + let s = text_bytes[tag_len]; + if is_ascii_whitespace(s) || s == b'>' { + return Some(end_tag); + } + } + + for (beg_tag, end_tag) in ST_BEGIN_TAGS.iter().zip(["-->", "?>", "]]>"].iter()) { + if text_bytes.starts_with(beg_tag) { + return Some(end_tag); + } + } + + if text_bytes.len() > 1 + && text_bytes[0] == b'!' + && text_bytes[1] >= b'A' + && text_bytes[1] <= b'Z' + { + Some(">") + } else { + None + } +} + +// https://english.stackexchange.com/a/285573 +fn surgerize_tight_list(tree: &mut Tree<Item>, list_ix: TreeIndex) { + let mut list_item = tree[list_ix].child; + while let Some(listitem_ix) = list_item { + // first child is special, controls how we repoint list_item.child + let list_item_firstborn = tree[listitem_ix].child; + + // Check that list item has children - this is not necessarily the case! + if let Some(firstborn_ix) = list_item_firstborn { + if let ItemBody::Paragraph = tree[firstborn_ix].item.body { + tree[listitem_ix].child = tree[firstborn_ix].child; + } + + let mut list_item_child = Some(firstborn_ix); + let mut node_to_repoint = None; + while let Some(child_ix) = list_item_child { + // surgerize paragraphs + let repoint_ix = if let ItemBody::Paragraph = tree[child_ix].item.body { + if let Some(child_firstborn) = tree[child_ix].child { + if let Some(repoint_ix) = node_to_repoint { + tree[repoint_ix].next = Some(child_firstborn); + } + let mut child_lastborn = child_firstborn; + while let Some(lastborn_next_ix) = tree[child_lastborn].next { + child_lastborn = lastborn_next_ix; + } + child_lastborn + } else { + child_ix + } + } else { + child_ix + }; + + node_to_repoint = Some(repoint_ix); + tree[repoint_ix].next = tree[child_ix].next; + list_item_child = tree[child_ix].next; + } + } + + list_item = tree[listitem_ix].next; + } +} + +/// Determines whether the delimiter run starting at given index is +/// left-flanking, as defined by the commonmark spec (and isn't intraword +/// for _ delims). +/// suffix is &s[ix..], which is passed in as an optimization, since taking +/// a string subslice is O(n). +fn delim_run_can_open(s: &str, suffix: &str, run_len: usize, ix: usize) -> bool { + let next_char = if let Some(c) = suffix.chars().nth(run_len) { + c + } else { + return false; + }; + if next_char.is_whitespace() { + return false; + } + if ix == 0 { + return true; + } + let delim = suffix.chars().next().unwrap(); + if delim == '*' && !is_punctuation(next_char) { + return true; + } + + let prev_char = s[..ix].chars().last().unwrap(); + + prev_char.is_whitespace() + || is_punctuation(prev_char) && (delim != '\'' || ![']', ')'].contains(&prev_char)) +} + +/// Determines whether the delimiter run starting at given index is +/// left-flanking, as defined by the commonmark spec (and isn't intraword +/// for _ delims) +fn delim_run_can_close(s: &str, suffix: &str, run_len: usize, ix: usize) -> bool { + if ix == 0 { + return false; + } + let prev_char = s[..ix].chars().last().unwrap(); + if prev_char.is_whitespace() { + return false; + } + let next_char = if let Some(c) = suffix.chars().nth(run_len) { + c + } else { + return true; + }; + let delim = suffix.chars().next().unwrap(); + if delim == '*' && !is_punctuation(prev_char) { + return true; + } + + next_char.is_whitespace() || is_punctuation(next_char) +} + +fn create_lut(options: &Options) -> LookupTable { + #[cfg(all(target_arch = "x86_64", feature = "simd"))] + { + LookupTable { + simd: simd::compute_lookup(options), + scalar: special_bytes(options), + } + } + #[cfg(not(all(target_arch = "x86_64", feature = "simd")))] + { + special_bytes(options) + } +} + +fn special_bytes(options: &Options) -> [bool; 256] { + let mut bytes = [false; 256]; + let standard_bytes = [ + b'\n', b'\r', b'*', b'_', b'&', b'\\', b'[', b']', b'<', b'!', b'`', + ]; + + for &byte in &standard_bytes { + bytes[byte as usize] = true; + } + if options.contains(Options::ENABLE_TABLES) { + bytes[b'|' as usize] = true; + } + if options.contains(Options::ENABLE_STRIKETHROUGH) { + bytes[b'~' as usize] = true; + } + if options.contains(Options::ENABLE_SMART_PUNCTUATION) { + for &byte in &[b'.', b'-', b'"', b'\''] { + bytes[byte as usize] = true; + } + } + + bytes +} + +enum LoopInstruction<T> { + /// Continue looking for more special bytes, but skip next few bytes. + ContinueAndSkip(usize), + /// Break looping immediately, returning with the given index and value. + BreakAtWith(usize, T), +} + +#[cfg(all(target_arch = "x86_64", feature = "simd"))] +struct LookupTable { + simd: [u8; 16], + scalar: [bool; 256], +} + +#[cfg(not(all(target_arch = "x86_64", feature = "simd")))] +type LookupTable = [bool; 256]; + +/// This function walks the byte slices from the given index and +/// calls the callback function on all bytes (and their indices) that are in the following set: +/// `` ` ``, `\`, `&`, `*`, `_`, `~`, `!`, `<`, `[`, `]`, `|`, `\r`, `\n` +/// It is guaranteed not call the callback on other bytes. +/// Whenever `callback(ix, byte)` returns a `ContinueAndSkip(n)` value, the callback +/// will not be called with an index that is less than `ix + n + 1`. +/// When the callback returns a `BreakAtWith(end_ix, opt+val)`, no more callbacks will be +/// called and the function returns immediately with the return value `(end_ix, opt_val)`. +/// If `BreakAtWith(..)` is never returned, this function will return the first +/// index that is outside the byteslice bound and a `None` value. +fn iterate_special_bytes<F, T>( + lut: &LookupTable, + bytes: &[u8], + ix: usize, + callback: F, +) -> (usize, Option<T>) +where + F: FnMut(usize, u8) -> LoopInstruction<Option<T>>, +{ + #[cfg(all(target_arch = "x86_64", feature = "simd"))] + { + simd::iterate_special_bytes(lut, bytes, ix, callback) + } + #[cfg(not(all(target_arch = "x86_64", feature = "simd")))] + { + scalar_iterate_special_bytes(lut, bytes, ix, callback) + } +} + +fn scalar_iterate_special_bytes<F, T>( + lut: &[bool; 256], + bytes: &[u8], + mut ix: usize, + mut callback: F, +) -> (usize, Option<T>) +where + F: FnMut(usize, u8) -> LoopInstruction<Option<T>>, +{ + while ix < bytes.len() { + let b = bytes[ix]; + if lut[b as usize] { + match callback(ix, b) { + LoopInstruction::ContinueAndSkip(skip) => { + ix += skip; + } + LoopInstruction::BreakAtWith(ix, val) => { + return (ix, val); + } + } + } + ix += 1; + } + + (ix, None) +} + +/// Split the usual heading content range and the content inside the trailing attribute block. +/// +/// Returns `(leading_content_len, Option<trailing_attr_block_range>)`. +/// +/// Note that `trailing_attr_block_range` will be empty range when the block +/// is `{}`, since the range is content inside the wrapping `{` and `}`. +/// +/// The closing `}` of an attribute block can have trailing whitespaces. +/// They are automatically trimmed when the attribute block is being searched. +/// +/// However, this method does not trim the trailing whitespaces of heading content. +/// It is callers' responsibility to trim them if necessary. +fn extract_attribute_block_content_from_header_text( + heading: &[u8], +) -> (usize, Option<Range<usize>>) { + let heading_len = heading.len(); + let mut ix = heading_len; + ix -= scan_rev_while(heading, |b| { + b == b'\n' || b == b'\r' || b == b' ' || b == b'\t' + }); + if ix == 0 { + return (heading_len, None); + } + + let attr_block_close = ix - 1; + if heading.get(attr_block_close) != Some(&b'}') { + // The last character is not `}`. No attribute blocks found. + return (heading_len, None); + } + // move cursor before the closing right brace (`}`) + ix -= 1; + + ix -= scan_rev_while(&heading[..ix], |b| { + // Characters to be excluded: + // * `{` and `}`: special characters to open and close an attribute block. + // * `\\`: a special character to escape many characters and disable some syntaxes. + // + Handling of this escape character differs among markdown processors. + // + Escaped characters will be separate text node from neighbors, so + // it is not easy to handle unescaped string and trim the trailing block. + // * `<` and `>`: special characters to start and end HTML tag. + // + No known processors converts `{#<i>foo</i>}` into + // `id="<i>foo</>"` as of this writing, so hopefully + // this restriction won't cause compatibility issues. + // * `\n` and `\r`: a newline character. + // + Setext heading can have multiple lines. However it is hard to support + // attribute blocks that have newline inside, since the parsing proceeds line by + // line and lines will be separate nodes even they are logically a single text. + !matches!(b, b'{' | b'}' | b'<' | b'>' | b'\\' | b'\n' | b'\r') + }); + if ix == 0 { + // `{` is not found. No attribute blocks available. + return (heading_len, None); + } + let attr_block_open = ix - 1; + if heading[attr_block_open] != b'{' { + // `{` is not found. No attribute blocks available. + return (heading_len, None); + } + + (attr_block_open, Some(ix..attr_block_close)) +} + +/// Parses an attribute block content, such as `.class1 #id .class2`. +/// +/// Returns `(id, classes)`. +/// +/// It is callers' responsibility to find opening and closing characters of the attribute +/// block. Usually [`extract_attribute_block_content_from_header_text`] function does it for you. +/// +/// Note that this parsing requires explicit whitespace separators between +/// attributes. This is intentional design with the reasons below: +/// +/// * to keep conversion simple and easy to understand for any possible input, +/// * to avoid adding less obvious conversion rule that can reduce compatibility +/// with other implementations more, and +/// * to follow the major design of implementations with the support for the +/// attribute blocks extension (as of this writing). +/// +/// See also: [`Options::ENABLE_HEADING_ATTRIBUTES`]. +/// +/// [`Options::ENABLE_HEADING_ATTRIBUTES`]: `crate::Options::ENABLE_HEADING_ATTRIBUTES` +fn parse_inside_attribute_block(inside_attr_block: &str) -> Option<HeadingAttributes> { + let mut id = None; + let mut classes = Vec::new(); + + for attr in inside_attr_block.split_ascii_whitespace() { + // iterator returned by `str::split_ascii_whitespace` never emits empty + // strings, so taking first byte won't panic. + if attr.len() > 1 { + let first_byte = attr.as_bytes()[0]; + if first_byte == b'#' { + id = Some(&attr[1..]); + } else if first_byte == b'.' { + classes.push(&attr[1..]); + } + } + } + + Some(HeadingAttributes { id, classes }) +} + +#[cfg(all(target_arch = "x86_64", feature = "simd"))] +mod simd { + //! SIMD byte scanning logic. + //! + //! This module provides functions that allow walking through byteslices, calling + //! provided callback functions on special bytes and their indices using SIMD. + //! The byteset is defined in `compute_lookup`. + //! + //! The idea is to load in a chunk of 16 bytes and perform a lookup into a set of + //! bytes on all the bytes in this chunk simultaneously. We produce a 16 bit bitmask + //! from this and call the callback on every index corresponding to a 1 in this mask + //! before moving on to the next chunk. This allows us to move quickly when there + //! are no or few matches. + //! + //! The table lookup is inspired by this [great overview]. However, since all of the + //! bytes we're interested in are ASCII, we don't quite need the full generality of + //! the universal algorithm and are hence able to skip a few instructions. + //! + //! [great overview]: http://0x80.pl/articles/simd-byte-lookup.html + + use super::{LookupTable, LoopInstruction}; + use crate::Options; + use core::arch::x86_64::*; + + const VECTOR_SIZE: usize = std::mem::size_of::<__m128i>(); + + /// Generates a lookup table containing the bitmaps for our + /// special marker bytes. This is effectively a 128 element 2d bitvector, + /// that can be indexed by a four bit row index (the lower nibble) + /// and a three bit column index (upper nibble). + pub(super) fn compute_lookup(options: &Options) -> [u8; 16] { + let mut lookup = [0u8; 16]; + let standard_bytes = [ + b'\n', b'\r', b'*', b'_', b'&', b'\\', b'[', b']', b'<', b'!', b'`', + ]; + + for &byte in &standard_bytes { + add_lookup_byte(&mut lookup, byte); + } + if options.contains(Options::ENABLE_TABLES) { + add_lookup_byte(&mut lookup, b'|'); + } + if options.contains(Options::ENABLE_STRIKETHROUGH) { + add_lookup_byte(&mut lookup, b'~'); + } + if options.contains(Options::ENABLE_SMART_PUNCTUATION) { + for &byte in &[b'.', b'-', b'"', b'\''] { + add_lookup_byte(&mut lookup, byte); + } + } + + lookup + } + + fn add_lookup_byte(lookup: &mut [u8; 16], byte: u8) { + lookup[(byte & 0x0f) as usize] |= 1 << (byte >> 4); + } + + /// Computes a bit mask for the given byteslice starting from the given index, + /// where the 16 least significant bits indicate (by value of 1) whether or not + /// there is a special character at that byte position. The least significant bit + /// corresponds to `bytes[ix]` and the most significant bit corresponds to + /// `bytes[ix + 15]`. + /// It is only safe to call this function when `bytes.len() >= ix + VECTOR_SIZE`. + #[target_feature(enable = "ssse3")] + #[inline] + unsafe fn compute_mask(lut: &[u8; 16], bytes: &[u8], ix: usize) -> i32 { + debug_assert!(bytes.len() >= ix + VECTOR_SIZE); + + let bitmap = _mm_loadu_si128(lut.as_ptr() as *const __m128i); + // Small lookup table to compute single bit bitshifts + // for 16 bytes at once. + let bitmask_lookup = + _mm_setr_epi8(1, 2, 4, 8, 16, 32, 64, -128, -1, -1, -1, -1, -1, -1, -1, -1); + + // Load input from memory. + let raw_ptr = bytes.as_ptr().add(ix) as *const __m128i; + let input = _mm_loadu_si128(raw_ptr); + // Compute the bitmap using the bottom nibble as an index + // into the lookup table. Note that non-ascii bytes will have + // their most significant bit set and will map to lookup[0]. + let bitset = _mm_shuffle_epi8(bitmap, input); + // Compute the high nibbles of the input using a 16-bit rightshift of four + // and a mask to prevent most-significant bit issues. + let higher_nibbles = _mm_and_si128(_mm_srli_epi16(input, 4), _mm_set1_epi8(0x0f)); + // Create a bitmask for the bitmap by perform a left shift of the value + // of the higher nibble. Bytes with their most significant set are mapped + // to -1 (all ones). + let bitmask = _mm_shuffle_epi8(bitmask_lookup, higher_nibbles); + // Test the bit of the bitmap by AND'ing the bitmap and the mask together. + let tmp = _mm_and_si128(bitset, bitmask); + // Check whether the result was not null. NEQ is not a SIMD intrinsic, + // but comparing to the bitmask is logically equivalent. This also prevents us + // from matching any non-ASCII bytes since none of the bitmaps were all ones + // (-1). + let result = _mm_cmpeq_epi8(tmp, bitmask); + + // Return the resulting bitmask. + _mm_movemask_epi8(result) + } + + /// Calls callback on byte indices and their value. + /// Breaks when callback returns LoopInstruction::BreakAtWith(ix, val). And skips the + /// number of bytes in callback return value otherwise. + /// Returns the final index and a possible break value. + pub(super) fn iterate_special_bytes<F, T>( + lut: &LookupTable, + bytes: &[u8], + ix: usize, + callback: F, + ) -> (usize, Option<T>) + where + F: FnMut(usize, u8) -> LoopInstruction<Option<T>>, + { + if is_x86_feature_detected!("ssse3") && bytes.len() >= VECTOR_SIZE { + unsafe { simd_iterate_special_bytes(&lut.simd, bytes, ix, callback) } + } else { + super::scalar_iterate_special_bytes(&lut.scalar, bytes, ix, callback) + } + } + + /// Calls the callback function for every 1 in the given bitmask with + /// the index `offset + ix`, where `ix` is the position of the 1 in the mask. + /// Returns `Ok(ix)` to continue from index `ix`, `Err((end_ix, opt_val)` to break with + /// final index `end_ix` and optional value `opt_val`. + unsafe fn process_mask<F, T>( + mut mask: i32, + bytes: &[u8], + mut offset: usize, + callback: &mut F, + ) -> Result<usize, (usize, Option<T>)> + where + F: FnMut(usize, u8) -> LoopInstruction<Option<T>>, + { + while mask != 0 { + let mask_ix = mask.trailing_zeros() as usize; + offset += mask_ix; + match callback(offset, *bytes.get_unchecked(offset)) { + LoopInstruction::ContinueAndSkip(skip) => { + offset += skip + 1; + mask >>= skip + 1 + mask_ix; + } + LoopInstruction::BreakAtWith(ix, val) => return Err((ix, val)), + } + } + Ok(offset) + } + + #[target_feature(enable = "ssse3")] + /// Important: only call this function when `bytes.len() >= 16`. Doing + /// so otherwise may exhibit undefined behaviour. + unsafe fn simd_iterate_special_bytes<F, T>( + lut: &[u8; 16], + bytes: &[u8], + mut ix: usize, + mut callback: F, + ) -> (usize, Option<T>) + where + F: FnMut(usize, u8) -> LoopInstruction<Option<T>>, + { + debug_assert!(bytes.len() >= VECTOR_SIZE); + let upperbound = bytes.len() - VECTOR_SIZE; + + while ix < upperbound { + let mask = compute_mask(lut, bytes, ix); + let block_start = ix; + ix = match process_mask(mask, bytes, ix, &mut callback) { + Ok(ix) => std::cmp::max(ix, VECTOR_SIZE + block_start), + Err((end_ix, val)) => return (end_ix, val), + }; + } + + if bytes.len() > ix { + // shift off the bytes at start we have already scanned + let mask = compute_mask(lut, bytes, upperbound) >> ix - upperbound; + if let Err((end_ix, val)) = process_mask(mask, bytes, ix, &mut callback) { + return (end_ix, val); + } + } + + (bytes.len(), None) + } + + #[cfg(test)] + mod simd_test { + use super::super::create_lut; + use super::{iterate_special_bytes, LoopInstruction}; + use crate::Options; + + fn check_expected_indices(bytes: &[u8], expected: &[usize], skip: usize) { + let mut opts = Options::empty(); + opts.insert(Options::ENABLE_TABLES); + opts.insert(Options::ENABLE_FOOTNOTES); + opts.insert(Options::ENABLE_STRIKETHROUGH); + opts.insert(Options::ENABLE_TASKLISTS); + + let lut = create_lut(&opts); + let mut indices = vec![]; + + iterate_special_bytes::<_, i32>(&lut, bytes, 0, |ix, _byte_ty| { + indices.push(ix); + LoopInstruction::ContinueAndSkip(skip) + }); + + assert_eq!(&indices[..], expected); + } + + #[test] + fn simple_no_match() { + check_expected_indices("abcdef0123456789".as_bytes(), &[], 0); + } + + #[test] + fn simple_match() { + check_expected_indices("*bcd&f0123456789".as_bytes(), &[0, 4], 0); + } + + #[test] + fn single_open_fish() { + check_expected_indices("<".as_bytes(), &[0], 0); + } + + #[test] + fn long_match() { + check_expected_indices("0123456789abcde~*bcd&f0".as_bytes(), &[15, 16, 20], 0); + } + + #[test] + fn border_skip() { + check_expected_indices("0123456789abcde~~~~d&f0".as_bytes(), &[15, 20], 3); + } + + #[test] + fn exhaustive_search() { + let chars = [ + b'\n', b'\r', b'*', b'_', b'~', b'|', b'&', b'\\', b'[', b']', b'<', b'!', b'`', + ]; + + for &c in &chars { + for i in 0u8..=255 { + if !chars.contains(&i) { + // full match + let mut buf = [i; 18]; + buf[3] = c; + buf[6] = c; + + check_expected_indices(&buf[..], &[3, 6], 0); + } + } + } + } + } +} diff --git a/vendor/pulldown-cmark/src/html.rs b/vendor/pulldown-cmark/src/html.rs new file mode 100644 index 000000000..fcfd51740 --- /dev/null +++ b/vendor/pulldown-cmark/src/html.rs @@ -0,0 +1,478 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +//! HTML renderer that takes an iterator of events as input. + +use std::collections::HashMap; +use std::io::{self, Write}; + +use crate::escape::{escape_href, escape_html, StrWrite, WriteWrapper}; +use crate::strings::CowStr; +use crate::Event::*; +use crate::{Alignment, CodeBlockKind, Event, LinkType, Tag}; + +enum TableState { + Head, + Body, +} + +struct HtmlWriter<'a, I, W> { + /// Iterator supplying events. + iter: I, + + /// Writer to write to. + writer: W, + + /// Whether or not the last write wrote a newline. + end_newline: bool, + + table_state: TableState, + table_alignments: Vec<Alignment>, + table_cell_index: usize, + numbers: HashMap<CowStr<'a>, usize>, +} + +impl<'a, I, W> HtmlWriter<'a, I, W> +where + I: Iterator<Item = Event<'a>>, + W: StrWrite, +{ + fn new(iter: I, writer: W) -> Self { + Self { + iter, + writer, + end_newline: true, + table_state: TableState::Head, + table_alignments: vec![], + table_cell_index: 0, + numbers: HashMap::new(), + } + } + + /// Writes a new line. + fn write_newline(&mut self) -> io::Result<()> { + self.end_newline = true; + self.writer.write_str("\n") + } + + /// Writes a buffer, and tracks whether or not a newline was written. + #[inline] + fn write(&mut self, s: &str) -> io::Result<()> { + self.writer.write_str(s)?; + + if !s.is_empty() { + self.end_newline = s.ends_with('\n'); + } + Ok(()) + } + + fn run(mut self) -> io::Result<()> { + while let Some(event) = self.iter.next() { + match event { + Start(tag) => { + self.start_tag(tag)?; + } + End(tag) => { + self.end_tag(tag)?; + } + Text(text) => { + escape_html(&mut self.writer, &text)?; + self.end_newline = text.ends_with('\n'); + } + Code(text) => { + self.write("<code>")?; + escape_html(&mut self.writer, &text)?; + self.write("</code>")?; + } + Html(html) => { + self.write(&html)?; + } + SoftBreak => { + self.write_newline()?; + } + HardBreak => { + self.write("<br />\n")?; + } + Rule => { + if self.end_newline { + self.write("<hr />\n")?; + } else { + self.write("\n<hr />\n")?; + } + } + FootnoteReference(name) => { + let len = self.numbers.len() + 1; + self.write("<sup class=\"footnote-reference\"><a href=\"#")?; + escape_html(&mut self.writer, &name)?; + self.write("\">")?; + let number = *self.numbers.entry(name).or_insert(len); + write!(&mut self.writer, "{}", number)?; + self.write("</a></sup>")?; + } + TaskListMarker(true) => { + self.write("<input disabled=\"\" type=\"checkbox\" checked=\"\"/>\n")?; + } + TaskListMarker(false) => { + self.write("<input disabled=\"\" type=\"checkbox\"/>\n")?; + } + } + } + Ok(()) + } + + /// Writes the start of an HTML tag. + fn start_tag(&mut self, tag: Tag<'a>) -> io::Result<()> { + match tag { + Tag::Paragraph => { + if self.end_newline { + self.write("<p>") + } else { + self.write("\n<p>") + } + } + Tag::Heading(level, id, classes) => { + if self.end_newline { + self.end_newline = false; + self.write("<")?; + } else { + self.write("\n<")?; + } + write!(&mut self.writer, "{}", level)?; + if let Some(id) = id { + self.write(" id=\"")?; + escape_html(&mut self.writer, id)?; + self.write("\"")?; + } + let mut classes = classes.iter(); + if let Some(class) = classes.next() { + self.write(" class=\"")?; + escape_html(&mut self.writer, class)?; + for class in classes { + self.write(" ")?; + escape_html(&mut self.writer, class)?; + } + self.write("\"")?; + } + self.write(">") + } + Tag::Table(alignments) => { + self.table_alignments = alignments; + self.write("<table>") + } + Tag::TableHead => { + self.table_state = TableState::Head; + self.table_cell_index = 0; + self.write("<thead><tr>") + } + Tag::TableRow => { + self.table_cell_index = 0; + self.write("<tr>") + } + Tag::TableCell => { + match self.table_state { + TableState::Head => { + self.write("<th")?; + } + TableState::Body => { + self.write("<td")?; + } + } + match self.table_alignments.get(self.table_cell_index) { + Some(&Alignment::Left) => self.write(" style=\"text-align: left\">"), + Some(&Alignment::Center) => self.write(" style=\"text-align: center\">"), + Some(&Alignment::Right) => self.write(" style=\"text-align: right\">"), + _ => self.write(">"), + } + } + Tag::BlockQuote => { + if self.end_newline { + self.write("<blockquote>\n") + } else { + self.write("\n<blockquote>\n") + } + } + Tag::CodeBlock(info) => { + if !self.end_newline { + self.write_newline()?; + } + match info { + CodeBlockKind::Fenced(info) => { + let lang = info.split(' ').next().unwrap(); + if lang.is_empty() { + self.write("<pre><code>") + } else { + self.write("<pre><code class=\"language-")?; + escape_html(&mut self.writer, lang)?; + self.write("\">") + } + } + CodeBlockKind::Indented => self.write("<pre><code>"), + } + } + Tag::List(Some(1)) => { + if self.end_newline { + self.write("<ol>\n") + } else { + self.write("\n<ol>\n") + } + } + Tag::List(Some(start)) => { + if self.end_newline { + self.write("<ol start=\"")?; + } else { + self.write("\n<ol start=\"")?; + } + write!(&mut self.writer, "{}", start)?; + self.write("\">\n") + } + Tag::List(None) => { + if self.end_newline { + self.write("<ul>\n") + } else { + self.write("\n<ul>\n") + } + } + Tag::Item => { + if self.end_newline { + self.write("<li>") + } else { + self.write("\n<li>") + } + } + Tag::Emphasis => self.write("<em>"), + Tag::Strong => self.write("<strong>"), + Tag::Strikethrough => self.write("<del>"), + Tag::Link(LinkType::Email, dest, title) => { + self.write("<a href=\"mailto:")?; + escape_href(&mut self.writer, &dest)?; + if !title.is_empty() { + self.write("\" title=\"")?; + escape_html(&mut self.writer, &title)?; + } + self.write("\">") + } + Tag::Link(_link_type, dest, title) => { + self.write("<a href=\"")?; + escape_href(&mut self.writer, &dest)?; + if !title.is_empty() { + self.write("\" title=\"")?; + escape_html(&mut self.writer, &title)?; + } + self.write("\">") + } + Tag::Image(_link_type, dest, title) => { + self.write("<img src=\"")?; + escape_href(&mut self.writer, &dest)?; + self.write("\" alt=\"")?; + self.raw_text()?; + if !title.is_empty() { + self.write("\" title=\"")?; + escape_html(&mut self.writer, &title)?; + } + self.write("\" />") + } + Tag::FootnoteDefinition(name) => { + if self.end_newline { + self.write("<div class=\"footnote-definition\" id=\"")?; + } else { + self.write("\n<div class=\"footnote-definition\" id=\"")?; + } + escape_html(&mut self.writer, &*name)?; + self.write("\"><sup class=\"footnote-definition-label\">")?; + let len = self.numbers.len() + 1; + let number = *self.numbers.entry(name).or_insert(len); + write!(&mut self.writer, "{}", number)?; + self.write("</sup>") + } + } + } + + fn end_tag(&mut self, tag: Tag) -> io::Result<()> { + match tag { + Tag::Paragraph => { + self.write("</p>\n")?; + } + Tag::Heading(level, _id, _classes) => { + self.write("</")?; + write!(&mut self.writer, "{}", level)?; + self.write(">\n")?; + } + Tag::Table(_) => { + self.write("</tbody></table>\n")?; + } + Tag::TableHead => { + self.write("</tr></thead><tbody>\n")?; + self.table_state = TableState::Body; + } + Tag::TableRow => { + self.write("</tr>\n")?; + } + Tag::TableCell => { + match self.table_state { + TableState::Head => { + self.write("</th>")?; + } + TableState::Body => { + self.write("</td>")?; + } + } + self.table_cell_index += 1; + } + Tag::BlockQuote => { + self.write("</blockquote>\n")?; + } + Tag::CodeBlock(_) => { + self.write("</code></pre>\n")?; + } + Tag::List(Some(_)) => { + self.write("</ol>\n")?; + } + Tag::List(None) => { + self.write("</ul>\n")?; + } + Tag::Item => { + self.write("</li>\n")?; + } + Tag::Emphasis => { + self.write("</em>")?; + } + Tag::Strong => { + self.write("</strong>")?; + } + Tag::Strikethrough => { + self.write("</del>")?; + } + Tag::Link(_, _, _) => { + self.write("</a>")?; + } + Tag::Image(_, _, _) => (), // shouldn't happen, handled in start + Tag::FootnoteDefinition(_) => { + self.write("</div>\n")?; + } + } + Ok(()) + } + + // run raw text, consuming end tag + fn raw_text(&mut self) -> io::Result<()> { + let mut nest = 0; + while let Some(event) = self.iter.next() { + match event { + Start(_) => nest += 1, + End(_) => { + if nest == 0 { + break; + } + nest -= 1; + } + Html(text) | Code(text) | Text(text) => { + escape_html(&mut self.writer, &text)?; + self.end_newline = text.ends_with('\n'); + } + SoftBreak | HardBreak | Rule => { + self.write(" ")?; + } + FootnoteReference(name) => { + let len = self.numbers.len() + 1; + let number = *self.numbers.entry(name).or_insert(len); + write!(&mut self.writer, "[{}]", number)?; + } + TaskListMarker(true) => self.write("[x]")?, + TaskListMarker(false) => self.write("[ ]")?, + } + } + Ok(()) + } +} + +/// Iterate over an `Iterator` of `Event`s, generate HTML for each `Event`, and +/// push it to a `String`. +/// +/// # Examples +/// +/// ``` +/// use pulldown_cmark::{html, Parser}; +/// +/// let markdown_str = r#" +/// hello +/// ===== +/// +/// * alpha +/// * beta +/// "#; +/// let parser = Parser::new(markdown_str); +/// +/// let mut html_buf = String::new(); +/// html::push_html(&mut html_buf, parser); +/// +/// assert_eq!(html_buf, r#"<h1>hello</h1> +/// <ul> +/// <li>alpha</li> +/// <li>beta</li> +/// </ul> +/// "#); +/// ``` +pub fn push_html<'a, I>(s: &mut String, iter: I) +where + I: Iterator<Item = Event<'a>>, +{ + HtmlWriter::new(iter, s).run().unwrap(); +} + +/// Iterate over an `Iterator` of `Event`s, generate HTML for each `Event`, and +/// write it out to a writable stream. +/// +/// **Note**: using this function with an unbuffered writer like a file or socket +/// will result in poor performance. Wrap these in a +/// [`BufWriter`](https://doc.rust-lang.org/std/io/struct.BufWriter.html) to +/// prevent unnecessary slowdowns. +/// +/// # Examples +/// +/// ``` +/// use pulldown_cmark::{html, Parser}; +/// use std::io::Cursor; +/// +/// let markdown_str = r#" +/// hello +/// ===== +/// +/// * alpha +/// * beta +/// "#; +/// let mut bytes = Vec::new(); +/// let parser = Parser::new(markdown_str); +/// +/// html::write_html(Cursor::new(&mut bytes), parser); +/// +/// assert_eq!(&String::from_utf8_lossy(&bytes)[..], r#"<h1>hello</h1> +/// <ul> +/// <li>alpha</li> +/// <li>beta</li> +/// </ul> +/// "#); +/// ``` +pub fn write_html<'a, I, W>(writer: W, iter: I) -> io::Result<()> +where + I: Iterator<Item = Event<'a>>, + W: Write, +{ + HtmlWriter::new(iter, WriteWrapper(writer)).run() +} diff --git a/vendor/pulldown-cmark/src/lib.rs b/vendor/pulldown-cmark/src/lib.rs new file mode 100644 index 000000000..9d2386e5a --- /dev/null +++ b/vendor/pulldown-cmark/src/lib.rs @@ -0,0 +1,289 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +//! Pull parser for [CommonMark](https://commonmark.org). This crate provides a [Parser](struct.Parser.html) struct +//! which is an iterator over [Event](enum.Event.html)s. This iterator can be used +//! directly, or to output HTML using the [HTML module](html/index.html). +//! +//! By default, only CommonMark features are enabled. To use extensions like tables, +//! footnotes or task lists, enable them by setting the corresponding flags in the +//! [Options](struct.Options.html) struct. +//! +//! # Example +//! ```rust +//! use pulldown_cmark::{Parser, Options, html}; +//! +//! let markdown_input = "Hello world, this is a ~~complicated~~ *very simple* example."; +//! +//! // Set up options and parser. Strikethroughs are not part of the CommonMark standard +//! // and we therefore must enable it explicitly. +//! let mut options = Options::empty(); +//! options.insert(Options::ENABLE_STRIKETHROUGH); +//! let parser = Parser::new_ext(markdown_input, options); +//! +//! // Write to String buffer. +//! let mut html_output = String::new(); +//! html::push_html(&mut html_output, parser); +//! +//! // Check that the output is what we expected. +//! let expected_html = "<p>Hello world, this is a <del>complicated</del> <em>very simple</em> example.</p>\n"; +//! assert_eq!(expected_html, &html_output); +//! ``` + +// When compiled for the rustc compiler itself we want to make sure that this is +// an unstable crate. +#![cfg_attr(rustbuild, feature(staged_api, rustc_private))] +#![cfg_attr(rustbuild, unstable(feature = "rustc_private", issue = "27812"))] +// Forbid unsafe code unless the SIMD feature is enabled. +#![cfg_attr(not(feature = "simd"), forbid(unsafe_code))] + +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; + +pub mod html; + +mod entities; +pub mod escape; +mod firstpass; +mod linklabel; +mod parse; +mod puncttable; +mod scanners; +mod strings; +mod tree; + +use std::{convert::TryFrom, fmt::Display}; + +pub use crate::parse::{BrokenLink, BrokenLinkCallback, LinkDef, OffsetIter, Parser, RefDefs}; +pub use crate::strings::{CowStr, InlineStr}; + +/// Codeblock kind. +#[derive(Clone, Debug, PartialEq)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub enum CodeBlockKind<'a> { + Indented, + /// The value contained in the tag describes the language of the code, which may be empty. + #[cfg_attr(feature = "serde", serde(borrow))] + Fenced(CowStr<'a>), +} + +impl<'a> CodeBlockKind<'a> { + pub fn is_indented(&self) -> bool { + matches!(*self, CodeBlockKind::Indented) + } + + pub fn is_fenced(&self) -> bool { + matches!(*self, CodeBlockKind::Fenced(_)) + } +} + +/// Tags for elements that can contain other elements. +#[derive(Clone, Debug, PartialEq)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub enum Tag<'a> { + /// A paragraph of text and other inline elements. + Paragraph, + + /// A heading. The first field indicates the level of the heading, + /// the second the fragment identifier, and the third the classes. + Heading(HeadingLevel, Option<&'a str>, Vec<&'a str>), + + BlockQuote, + /// A code block. + CodeBlock(CodeBlockKind<'a>), + + /// A list. If the list is ordered the field indicates the number of the first item. + /// Contains only list items. + List(Option<u64>), // TODO: add delim and tight for ast (not needed for html) + /// A list item. + Item, + /// A footnote definition. The value contained is the footnote's label by which it can + /// be referred to. + #[cfg_attr(feature = "serde", serde(borrow))] + FootnoteDefinition(CowStr<'a>), + + /// A table. Contains a vector describing the text-alignment for each of its columns. + Table(Vec<Alignment>), + /// A table header. Contains only `TableCell`s. Note that the table body starts immediately + /// after the closure of the `TableHead` tag. There is no `TableBody` tag. + TableHead, + /// A table row. Is used both for header rows as body rows. Contains only `TableCell`s. + TableRow, + TableCell, + + // span-level tags + Emphasis, + Strong, + Strikethrough, + + /// A link. The first field is the link type, the second the destination URL and the third is a title. + Link(LinkType, CowStr<'a>, CowStr<'a>), + + /// An image. The first field is the link type, the second the destination URL and the third is a title. + Image(LinkType, CowStr<'a>, CowStr<'a>), +} + +#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub enum HeadingLevel { + H1 = 1, + H2, + H3, + H4, + H5, + H6, +} + +impl Display for HeadingLevel { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::H1 => write!(f, "h1"), + Self::H2 => write!(f, "h2"), + Self::H3 => write!(f, "h3"), + Self::H4 => write!(f, "h4"), + Self::H5 => write!(f, "h5"), + Self::H6 => write!(f, "h6"), + } + } +} + +/// Returned when trying to convert a `usize` into a `Heading` but it fails +/// because the usize isn't a valid heading level +#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)] +pub struct InvalidHeadingLevel(usize); + +impl TryFrom<usize> for HeadingLevel { + type Error = InvalidHeadingLevel; + + fn try_from(value: usize) -> Result<Self, Self::Error> { + match value { + 1 => Ok(Self::H1), + 2 => Ok(Self::H2), + 3 => Ok(Self::H3), + 4 => Ok(Self::H4), + 5 => Ok(Self::H5), + 6 => Ok(Self::H6), + _ => Err(InvalidHeadingLevel(value)), + } + } +} + +/// Type specifier for inline links. See [the Tag::Link](enum.Tag.html#variant.Link) for more information. +#[derive(Clone, Debug, PartialEq, Copy)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub enum LinkType { + /// Inline link like `[foo](bar)` + Inline, + /// Reference link like `[foo][bar]` + Reference, + /// Reference without destination in the document, but resolved by the broken_link_callback + ReferenceUnknown, + /// Collapsed link like `[foo][]` + Collapsed, + /// Collapsed link without destination in the document, but resolved by the broken_link_callback + CollapsedUnknown, + /// Shortcut link like `[foo]` + Shortcut, + /// Shortcut without destination in the document, but resolved by the broken_link_callback + ShortcutUnknown, + /// Autolink like `<http://foo.bar/baz>` + Autolink, + /// Email address in autolink like `<john@example.org>` + Email, +} + +impl LinkType { + fn to_unknown(self) -> Self { + match self { + LinkType::Reference => LinkType::ReferenceUnknown, + LinkType::Collapsed => LinkType::CollapsedUnknown, + LinkType::Shortcut => LinkType::ShortcutUnknown, + _ => unreachable!(), + } + } +} + +/// Markdown events that are generated in a preorder traversal of the document +/// tree, with additional `End` events whenever all of an inner node's children +/// have been visited. +#[derive(Clone, Debug, PartialEq)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub enum Event<'a> { + /// Start of a tagged element. Events that are yielded after this event + /// and before its corresponding `End` event are inside this element. + /// Start and end events are guaranteed to be balanced. + #[cfg_attr(feature = "serde", serde(borrow))] + Start(Tag<'a>), + /// End of a tagged element. + #[cfg_attr(feature = "serde", serde(borrow))] + End(Tag<'a>), + /// A text node. + #[cfg_attr(feature = "serde", serde(borrow))] + Text(CowStr<'a>), + /// An inline code node. + #[cfg_attr(feature = "serde", serde(borrow))] + Code(CowStr<'a>), + /// An HTML node. + #[cfg_attr(feature = "serde", serde(borrow))] + Html(CowStr<'a>), + /// A reference to a footnote with given label, which may or may not be defined + /// by an event with a `Tag::FootnoteDefinition` tag. Definitions and references to them may + /// occur in any order. + #[cfg_attr(feature = "serde", serde(borrow))] + FootnoteReference(CowStr<'a>), + /// A soft line break. + SoftBreak, + /// A hard line break. + HardBreak, + /// A horizontal ruler. + Rule, + /// A task list marker, rendered as a checkbox in HTML. Contains a true when it is checked. + TaskListMarker(bool), +} + +/// Table column text alignment. +#[derive(Copy, Clone, Debug, PartialEq)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] + +pub enum Alignment { + /// Default text alignment. + None, + Left, + Center, + Right, +} + +bitflags::bitflags! { + /// Option struct containing flags for enabling extra features + /// that are not part of the CommonMark spec. + pub struct Options: u32 { + const ENABLE_TABLES = 1 << 1; + const ENABLE_FOOTNOTES = 1 << 2; + const ENABLE_STRIKETHROUGH = 1 << 3; + const ENABLE_TASKLISTS = 1 << 4; + const ENABLE_SMART_PUNCTUATION = 1 << 5; + /// Extension to allow headings to have ID and classes. + /// + /// `# text { #id .class1 .class2 }` is interpreted as a level 1 heading + /// with the content `text`, ID `id`, and classes `class1` and `class2`. + /// Note that attributes (ID and classes) should be space-separated. + const ENABLE_HEADING_ATTRIBUTES = 1 << 6; + } +} diff --git a/vendor/pulldown-cmark/src/linklabel.rs b/vendor/pulldown-cmark/src/linklabel.rs new file mode 100644 index 000000000..23b4b828b --- /dev/null +++ b/vendor/pulldown-cmark/src/linklabel.rs @@ -0,0 +1,135 @@ +// Copyright 2018 Google LLC +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +//! Link label parsing and matching. + +use unicase::UniCase; + +use crate::scanners::{is_ascii_whitespace, scan_eol}; +use crate::strings::CowStr; + +pub(crate) enum ReferenceLabel<'a> { + Link(CowStr<'a>), + Footnote(CowStr<'a>), +} + +pub(crate) type LinkLabel<'a> = UniCase<CowStr<'a>>; + +/// Assumes the opening bracket has already been scanned. +/// The line break handler determines what happens when a linebreak +/// is found. It is passed the bytes following the line break and +/// either returns `Some(k)`, where `k` is the number of bytes to skip, +/// or `None` to abort parsing the label. +/// Returns the number of bytes read (including closing bracket) and label on success. +pub(crate) fn scan_link_label_rest<'t>( + text: &'t str, + linebreak_handler: &dyn Fn(&[u8]) -> Option<usize>, +) -> Option<(usize, CowStr<'t>)> { + let bytes = text.as_bytes(); + let mut ix = 0; + let mut only_white_space = true; + let mut codepoints = 0; + // no worries, doesn't allocate until we push things onto it + let mut label = String::new(); + let mut mark = 0; + + loop { + if codepoints >= 1000 { + return None; + } + match *bytes.get(ix)? { + b'[' => return None, + b']' => break, + b'\\' => { + ix += 2; + codepoints += 2; + only_white_space = false; + } + b if is_ascii_whitespace(b) => { + // normalize labels by collapsing whitespaces, including linebreaks + let mut whitespaces = 0; + let mut linebreaks = 0; + let whitespace_start = ix; + + while ix < bytes.len() && is_ascii_whitespace(bytes[ix]) { + if let Some(eol_bytes) = scan_eol(&bytes[ix..]) { + linebreaks += 1; + if linebreaks > 1 { + return None; + } + ix += eol_bytes; + ix += linebreak_handler(&bytes[ix..])?; + whitespaces += 2; // indicate that we need to replace + } else { + whitespaces += if bytes[ix] == b' ' { 1 } else { 2 }; + ix += 1; + } + } + if whitespaces > 1 { + label.push_str(&text[mark..whitespace_start]); + label.push(' '); + mark = ix; + codepoints += ix - whitespace_start; + } else { + codepoints += 1; + } + } + b => { + only_white_space = false; + ix += 1; + if b & 0b1000_0000 != 0 { + codepoints += 1; + } + } + } + } + + if only_white_space { + None + } else { + let cow = if mark == 0 { + text[..ix].into() + } else { + label.push_str(&text[mark..ix]); + label.into() + }; + Some((ix + 1, cow)) + } +} + +#[cfg(test)] +mod test { + use super::scan_link_label_rest; + + #[test] + fn whitespace_normalization() { + let input = "«\t\tBlurry Eyes\t\t»][blurry_eyes]"; + let expected_output = "« Blurry Eyes »"; // regular spaces! + + let (_bytes, normalized_label) = scan_link_label_rest(input, &|_| None).unwrap(); + assert_eq!(expected_output, normalized_label.as_ref()); + } + + #[test] + fn return_carriage_linefeed_ok() { + let input = "hello\r\nworld\r\n]"; + assert!(scan_link_label_rest(input, &|_| Some(0)).is_some()); + } +} diff --git a/vendor/pulldown-cmark/src/main.rs b/vendor/pulldown-cmark/src/main.rs new file mode 100644 index 000000000..5335e1f58 --- /dev/null +++ b/vendor/pulldown-cmark/src/main.rs @@ -0,0 +1,123 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +//! Command line tool to exercise pulldown-cmark. + +#![forbid(unsafe_code)] + +use pulldown_cmark::{html, Options, Parser}; + +use std::env; +use std::io::{self, Read}; +use std::mem; + +fn dry_run(text: &str, opts: Options) { + let p = Parser::new_ext(text, opts); + let count = p.count(); + println!("{} events", count); +} + +fn print_events(text: &str, opts: Options) { + let parser = Parser::new_ext(text, opts).into_offset_iter(); + for (event, range) in parser { + println!("{:?}: {:?}", range, event); + } + println!("EOF"); +} + +fn brief(program: &str) -> String { + format!( + "Usage: {} [options]\n\n{}", + program, "Reads markdown from standard input and emits HTML.", + ) +} + +pub fn main() -> std::io::Result<()> { + let args: Vec<_> = env::args().collect(); + let mut opts = getopts::Options::new(); + opts.optflag("h", "help", "this help message"); + opts.optflag("d", "dry-run", "dry run, produce no output"); + opts.optflag("e", "events", "print event sequence instead of rendering"); + opts.optflag("T", "enable-tables", "enable GitHub-style tables"); + opts.optflag("F", "enable-footnotes", "enable Hoedown-style footnotes"); + opts.optflag( + "S", + "enable-strikethrough", + "enable GitHub-style strikethrough", + ); + opts.optflag("L", "enable-tasklists", "enable GitHub-style task lists"); + opts.optflag("P", "enable-smart-punctuation", "enable smart punctuation"); + opts.optflag( + "H", + "enable-heading-attributes", + "enable heading attributes", + ); + + let matches = match opts.parse(&args[1..]) { + Ok(m) => m, + Err(f) => { + eprintln!("{}\n{}", f, opts.usage(&brief(&args[0]))); + std::process::exit(1); + } + }; + if matches.opt_present("help") { + println!("{}", opts.usage(&brief(&args[0]))); + return Ok(()); + } + let mut opts = Options::empty(); + if matches.opt_present("enable-tables") { + opts.insert(Options::ENABLE_TABLES); + } + if matches.opt_present("enable-footnotes") { + opts.insert(Options::ENABLE_FOOTNOTES); + } + if matches.opt_present("enable-strikethrough") { + opts.insert(Options::ENABLE_STRIKETHROUGH); + } + if matches.opt_present("enable-tasklists") { + opts.insert(Options::ENABLE_TASKLISTS); + } + if matches.opt_present("enable-smart-punctuation") { + opts.insert(Options::ENABLE_SMART_PUNCTUATION); + } + if matches.opt_present("enable-heading-attributes") { + opts.insert(Options::ENABLE_HEADING_ATTRIBUTES); + } + + let mut input = String::new(); + io::stdin().lock().read_to_string(&mut input)?; + if matches.opt_present("events") { + print_events(&input, opts); + } else if matches.opt_present("dry-run") { + dry_run(&input, opts); + } else { + let mut p = Parser::new_ext(&input, opts); + let stdio = io::stdout(); + let buffer = std::io::BufWriter::with_capacity(1024 * 1024, stdio.lock()); + html::write_html(buffer, &mut p)?; + // Since the program will now terminate and the memory will be returned + // to the operating system anyway, there is no point in tidely cleaning + // up all the datastructures we have used. We shouldn't do this if we'd + // do other things after this, because this is basically intentionally + // leaking data. Skipping cleanup lets us return a bit (~5%) faster. + mem::forget(p); + } + Ok(()) +} diff --git a/vendor/pulldown-cmark/src/parse.rs b/vendor/pulldown-cmark/src/parse.rs new file mode 100644 index 000000000..8355ce2f8 --- /dev/null +++ b/vendor/pulldown-cmark/src/parse.rs @@ -0,0 +1,1904 @@ +// Copyright 2017 Google Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +//! Tree-based two pass parser. + +use std::cmp::{max, min}; +use std::collections::{HashMap, VecDeque}; +use std::iter::FusedIterator; +use std::num::NonZeroUsize; +use std::ops::{Index, Range}; + +use unicase::UniCase; + +use crate::firstpass::run_first_pass; +use crate::linklabel::{scan_link_label_rest, LinkLabel, ReferenceLabel}; +use crate::scanners::*; +use crate::strings::CowStr; +use crate::tree::{Tree, TreeIndex}; +use crate::{Alignment, CodeBlockKind, Event, HeadingLevel, LinkType, Options, Tag}; + +// Allowing arbitrary depth nested parentheses inside link destinations +// can create denial of service vulnerabilities if we're not careful. +// The simplest countermeasure is to limit their depth, which is +// explicitly allowed by the spec as long as the limit is at least 3: +// https://spec.commonmark.org/0.29/#link-destination +const LINK_MAX_NESTED_PARENS: usize = 5; + +#[derive(Debug, Default, Clone, Copy)] +pub(crate) struct Item { + pub start: usize, + pub end: usize, + pub body: ItemBody, +} + +#[derive(Debug, PartialEq, Clone, Copy)] +pub(crate) enum ItemBody { + Paragraph, + Text, + SoftBreak, + HardBreak, + + // These are possible inline items, need to be resolved in second pass. + + // repeats, can_open, can_close + MaybeEmphasis(usize, bool, bool), + // quote byte, can_open, can_close + MaybeSmartQuote(u8, bool, bool), + MaybeCode(usize, bool), // number of backticks, preceded by backslash + MaybeHtml, + MaybeLinkOpen, + // bool indicates whether or not the preceding section could be a reference + MaybeLinkClose(bool), + MaybeImage, + + // These are inline items after resolution. + Emphasis, + Strong, + Strikethrough, + Code(CowIndex), + Link(LinkIndex), + Image(LinkIndex), + FootnoteReference(CowIndex), + TaskListMarker(bool), // true for checked + + Rule, + Heading(HeadingLevel, Option<HeadingIndex>), // heading level + FencedCodeBlock(CowIndex), + IndentCodeBlock, + Html, + OwnedHtml(CowIndex), + BlockQuote, + List(bool, u8, u64), // is_tight, list character, list start index + ListItem(usize), // indent level + SynthesizeText(CowIndex), + SynthesizeChar(char), + FootnoteDefinition(CowIndex), + + // Tables + Table(AlignmentIndex), + TableHead, + TableRow, + TableCell, + + // Dummy node at the top of the tree - should not be used otherwise! + Root, +} + +impl<'a> ItemBody { + fn is_inline(&self) -> bool { + matches!( + *self, + ItemBody::MaybeEmphasis(..) + | ItemBody::MaybeSmartQuote(..) + | ItemBody::MaybeHtml + | ItemBody::MaybeCode(..) + | ItemBody::MaybeLinkOpen + | ItemBody::MaybeLinkClose(..) + | ItemBody::MaybeImage + ) + } +} + +impl<'a> Default for ItemBody { + fn default() -> Self { + ItemBody::Root + } +} + +pub struct BrokenLink<'a> { + pub span: std::ops::Range<usize>, + pub link_type: LinkType, + pub reference: CowStr<'a>, +} + +/// Markdown event iterator. +pub struct Parser<'input, 'callback> { + text: &'input str, + options: Options, + tree: Tree<Item>, + allocs: Allocations<'input>, + broken_link_callback: BrokenLinkCallback<'input, 'callback>, + html_scan_guard: HtmlScanGuard, + + // used by inline passes. store them here for reuse + inline_stack: InlineStack, + link_stack: LinkStack, +} + +impl<'input, 'callback> Parser<'input, 'callback> { + /// Creates a new event iterator for a markdown string without any options enabled. + pub fn new(text: &'input str) -> Self { + Parser::new_ext(text, Options::empty()) + } + + /// Creates a new event iterator for a markdown string with given options. + pub fn new_ext(text: &'input str, options: Options) -> Self { + Parser::new_with_broken_link_callback(text, options, None) + } + + /// In case the parser encounters any potential links that have a broken + /// reference (e.g `[foo]` when there is no `[foo]: ` entry at the bottom) + /// the provided callback will be called with the reference name, + /// and the returned pair will be used as the link name and title if it is not + /// `None`. + pub fn new_with_broken_link_callback( + text: &'input str, + options: Options, + broken_link_callback: BrokenLinkCallback<'input, 'callback>, + ) -> Self { + let (mut tree, allocs) = run_first_pass(text, options); + tree.reset(); + let inline_stack = Default::default(); + let link_stack = Default::default(); + let html_scan_guard = Default::default(); + Parser { + text, + options, + tree, + allocs, + broken_link_callback, + inline_stack, + link_stack, + html_scan_guard, + } + } + + /// Returns a reference to the internal `RefDefs` object, which provides access + /// to the internal map of reference definitions. + pub fn reference_definitions(&self) -> &RefDefs { + &self.allocs.refdefs + } + + /// Handle inline markup. + /// + /// When the parser encounters any item indicating potential inline markup, all + /// inline markup passes are run on the remainder of the chain. + /// + /// Note: there's some potential for optimization here, but that's future work. + fn handle_inline(&mut self) { + self.handle_inline_pass1(); + self.handle_emphasis(); + } + + /// Handle inline HTML, code spans, and links. + /// + /// This function handles both inline HTML and code spans, because they have + /// the same precedence. It also handles links, even though they have lower + /// precedence, because the URL of links must not be processed. + fn handle_inline_pass1(&mut self) { + let mut code_delims = CodeDelims::new(); + let mut cur = self.tree.cur(); + let mut prev = None; + + let block_end = self.tree[self.tree.peek_up().unwrap()].item.end; + let block_text = &self.text[..block_end]; + + while let Some(mut cur_ix) = cur { + match self.tree[cur_ix].item.body { + ItemBody::MaybeHtml => { + let next = self.tree[cur_ix].next; + let autolink = if let Some(next_ix) = next { + scan_autolink(block_text, self.tree[next_ix].item.start) + } else { + None + }; + + if let Some((ix, uri, link_type)) = autolink { + let node = scan_nodes_to_ix(&self.tree, next, ix); + let text_node = self.tree.create_node(Item { + start: self.tree[cur_ix].item.start + 1, + end: ix - 1, + body: ItemBody::Text, + }); + let link_ix = self.allocs.allocate_link(link_type, uri, "".into()); + self.tree[cur_ix].item.body = ItemBody::Link(link_ix); + self.tree[cur_ix].item.end = ix; + self.tree[cur_ix].next = node; + self.tree[cur_ix].child = Some(text_node); + prev = cur; + cur = node; + if let Some(node_ix) = cur { + self.tree[node_ix].item.start = max(self.tree[node_ix].item.start, ix); + } + continue; + } else { + let inline_html = next.and_then(|next_ix| { + self.scan_inline_html( + block_text.as_bytes(), + self.tree[next_ix].item.start, + ) + }); + if let Some((span, ix)) = inline_html { + let node = scan_nodes_to_ix(&self.tree, next, ix); + self.tree[cur_ix].item.body = if !span.is_empty() { + let converted_string = + String::from_utf8(span).expect("invalid utf8"); + ItemBody::OwnedHtml( + self.allocs.allocate_cow(converted_string.into()), + ) + } else { + ItemBody::Html + }; + self.tree[cur_ix].item.end = ix; + self.tree[cur_ix].next = node; + prev = cur; + cur = node; + if let Some(node_ix) = cur { + self.tree[node_ix].item.start = + max(self.tree[node_ix].item.start, ix); + } + continue; + } + } + self.tree[cur_ix].item.body = ItemBody::Text; + } + ItemBody::MaybeCode(mut search_count, preceded_by_backslash) => { + if preceded_by_backslash { + search_count -= 1; + if search_count == 0 { + self.tree[cur_ix].item.body = ItemBody::Text; + prev = cur; + cur = self.tree[cur_ix].next; + continue; + } + } + + if code_delims.is_populated() { + // we have previously scanned all codeblock delimiters, + // so we can reuse that work + if let Some(scan_ix) = code_delims.find(cur_ix, search_count) { + self.make_code_span(cur_ix, scan_ix, preceded_by_backslash); + } else { + self.tree[cur_ix].item.body = ItemBody::Text; + } + } else { + // we haven't previously scanned all codeblock delimiters, + // so walk the AST + let mut scan = if search_count > 0 { + self.tree[cur_ix].next + } else { + None + }; + while let Some(scan_ix) = scan { + if let ItemBody::MaybeCode(delim_count, _) = + self.tree[scan_ix].item.body + { + if search_count == delim_count { + self.make_code_span(cur_ix, scan_ix, preceded_by_backslash); + code_delims.clear(); + break; + } else { + code_delims.insert(delim_count, scan_ix); + } + } + scan = self.tree[scan_ix].next; + } + if scan == None { + self.tree[cur_ix].item.body = ItemBody::Text; + } + } + } + ItemBody::MaybeLinkOpen => { + self.tree[cur_ix].item.body = ItemBody::Text; + self.link_stack.push(LinkStackEl { + node: cur_ix, + ty: LinkStackTy::Link, + }); + } + ItemBody::MaybeImage => { + self.tree[cur_ix].item.body = ItemBody::Text; + self.link_stack.push(LinkStackEl { + node: cur_ix, + ty: LinkStackTy::Image, + }); + } + ItemBody::MaybeLinkClose(could_be_ref) => { + self.tree[cur_ix].item.body = ItemBody::Text; + if let Some(tos) = self.link_stack.pop() { + if tos.ty == LinkStackTy::Disabled { + continue; + } + let next = self.tree[cur_ix].next; + if let Some((next_ix, url, title)) = + self.scan_inline_link(block_text, self.tree[cur_ix].item.end, next) + { + let next_node = scan_nodes_to_ix(&self.tree, next, next_ix); + if let Some(prev_ix) = prev { + self.tree[prev_ix].next = None; + } + cur = Some(tos.node); + cur_ix = tos.node; + let link_ix = self.allocs.allocate_link(LinkType::Inline, url, title); + self.tree[cur_ix].item.body = if tos.ty == LinkStackTy::Image { + ItemBody::Image(link_ix) + } else { + ItemBody::Link(link_ix) + }; + self.tree[cur_ix].child = self.tree[cur_ix].next; + self.tree[cur_ix].next = next_node; + self.tree[cur_ix].item.end = next_ix; + if let Some(next_node_ix) = next_node { + self.tree[next_node_ix].item.start = + max(self.tree[next_node_ix].item.start, next_ix); + } + + if tos.ty == LinkStackTy::Link { + self.link_stack.disable_all_links(); + } + } else { + // ok, so its not an inline link. maybe it is a reference + // to a defined link? + let scan_result = scan_reference( + &self.tree, + block_text, + next, + self.options.contains(Options::ENABLE_FOOTNOTES), + ); + let (node_after_link, link_type) = match scan_result { + // [label][reference] + RefScan::LinkLabel(_, end_ix) => { + // Toggle reference viability of the last closing bracket, + // so that we can skip it on future iterations in case + // it fails in this one. In particular, we won't call + // the broken link callback twice on one reference. + let reference_close_node = if let Some(node) = + scan_nodes_to_ix(&self.tree, next, end_ix - 1) + { + node + } else { + continue; + }; + self.tree[reference_close_node].item.body = + ItemBody::MaybeLinkClose(false); + let next_node = self.tree[reference_close_node].next; + + (next_node, LinkType::Reference) + } + // [reference][] + RefScan::Collapsed(next_node) => { + // This reference has already been tried, and it's not + // valid. Skip it. + if !could_be_ref { + continue; + } + (next_node, LinkType::Collapsed) + } + // [shortcut] + // + // [shortcut]: /blah + RefScan::Failed => { + if !could_be_ref { + continue; + } + (next, LinkType::Shortcut) + } + }; + + // FIXME: references and labels are mixed in the naming of variables + // below. Disambiguate! + + // (label, source_ix end) + let label: Option<(ReferenceLabel<'input>, usize)> = match scan_result { + RefScan::LinkLabel(l, end_ix) => { + Some((ReferenceLabel::Link(l), end_ix)) + } + RefScan::Collapsed(..) | RefScan::Failed => { + // No label? maybe it is a shortcut reference + let label_start = self.tree[tos.node].item.end - 1; + scan_link_label( + &self.tree, + &self.text[label_start..self.tree[cur_ix].item.end], + self.options.contains(Options::ENABLE_FOOTNOTES), + ) + .map(|(ix, label)| (label, label_start + ix)) + } + }; + + // see if it's a footnote reference + if let Some((ReferenceLabel::Footnote(l), end)) = label { + self.tree[tos.node].next = node_after_link; + self.tree[tos.node].child = None; + self.tree[tos.node].item.body = + ItemBody::FootnoteReference(self.allocs.allocate_cow(l)); + self.tree[tos.node].item.end = end; + prev = Some(tos.node); + cur = node_after_link; + self.link_stack.clear(); + continue; + } else if let Some((ReferenceLabel::Link(link_label), end)) = label { + let type_url_title = self + .allocs + .refdefs + .get(link_label.as_ref()) + .map(|matching_def| { + // found a matching definition! + let title = matching_def + .title + .as_ref() + .cloned() + .unwrap_or_else(|| "".into()); + let url = matching_def.dest.clone(); + (link_type, url, title) + }) + .or_else(|| { + match self.broken_link_callback.as_mut() { + Some(callback) => { + // Construct a BrokenLink struct, which will be passed to the callback + let broken_link = BrokenLink { + span: (self.tree[tos.node].item.start)..end, + link_type, + reference: link_label, + }; + + callback(broken_link).map(|(url, title)| { + (link_type.to_unknown(), url, title) + }) + } + None => None, + } + }); + + if let Some((def_link_type, url, title)) = type_url_title { + let link_ix = + self.allocs.allocate_link(def_link_type, url, title); + self.tree[tos.node].item.body = if tos.ty == LinkStackTy::Image + { + ItemBody::Image(link_ix) + } else { + ItemBody::Link(link_ix) + }; + let label_node = self.tree[tos.node].next; + + // lets do some tree surgery to add the link to the tree + // 1st: skip the label node and close node + self.tree[tos.node].next = node_after_link; + + // then, if it exists, add the label node as a child to the link node + if label_node != cur { + self.tree[tos.node].child = label_node; + + // finally: disconnect list of children + if let Some(prev_ix) = prev { + self.tree[prev_ix].next = None; + } + } + + self.tree[tos.node].item.end = end; + + // set up cur so next node will be node_after_link + cur = Some(tos.node); + cur_ix = tos.node; + + if tos.ty == LinkStackTy::Link { + self.link_stack.disable_all_links(); + } + } + } + } + } + } + _ => (), + } + prev = cur; + cur = self.tree[cur_ix].next; + } + self.link_stack.clear(); + } + + fn handle_emphasis(&mut self) { + let mut prev = None; + let mut prev_ix: TreeIndex; + let mut cur = self.tree.cur(); + + let mut single_quote_open: Option<TreeIndex> = None; + let mut double_quote_open: bool = false; + + while let Some(mut cur_ix) = cur { + match self.tree[cur_ix].item.body { + ItemBody::MaybeEmphasis(mut count, can_open, can_close) => { + let c = self.text.as_bytes()[self.tree[cur_ix].item.start]; + let both = can_open && can_close; + if can_close { + while let Some(el) = + self.inline_stack.find_match(&mut self.tree, c, count, both) + { + // have a match! + if let Some(prev_ix) = prev { + self.tree[prev_ix].next = None; + } + let match_count = min(count, el.count); + // start, end are tree node indices + let mut end = cur_ix - 1; + let mut start = el.start + el.count; + + // work from the inside out + while start > el.start + el.count - match_count { + let (inc, ty) = if c == b'~' { + (2, ItemBody::Strikethrough) + } else if start > el.start + el.count - match_count + 1 { + (2, ItemBody::Strong) + } else { + (1, ItemBody::Emphasis) + }; + + let root = start - inc; + end = end + inc; + self.tree[root].item.body = ty; + self.tree[root].item.end = self.tree[end].item.end; + self.tree[root].child = Some(start); + self.tree[root].next = None; + start = root; + } + + // set next for top most emph level + prev_ix = el.start + el.count - match_count; + prev = Some(prev_ix); + cur = self.tree[cur_ix + match_count - 1].next; + self.tree[prev_ix].next = cur; + + if el.count > match_count { + self.inline_stack.push(InlineEl { + start: el.start, + count: el.count - match_count, + c: el.c, + both, + }) + } + count -= match_count; + if count > 0 { + cur_ix = cur.unwrap(); + } else { + break; + } + } + } + if count > 0 { + if can_open { + self.inline_stack.push(InlineEl { + start: cur_ix, + count, + c, + both, + }); + } else { + for i in 0..count { + self.tree[cur_ix + i].item.body = ItemBody::Text; + } + } + prev_ix = cur_ix + count - 1; + prev = Some(prev_ix); + cur = self.tree[prev_ix].next; + } + } + ItemBody::MaybeSmartQuote(c, can_open, can_close) => { + self.tree[cur_ix].item.body = match c { + b'\'' => { + if let (Some(open_ix), true) = (single_quote_open, can_close) { + self.tree[open_ix].item.body = ItemBody::SynthesizeChar('‘'); + single_quote_open = None; + } else if can_open { + single_quote_open = Some(cur_ix); + } + ItemBody::SynthesizeChar('’') + } + _ /* double quote */ => { + if can_close && double_quote_open { + double_quote_open = false; + ItemBody::SynthesizeChar('”') + } else { + if can_open && !double_quote_open { + double_quote_open = true; + } + ItemBody::SynthesizeChar('“') + } + } + }; + prev = cur; + cur = self.tree[cur_ix].next; + } + _ => { + prev = cur; + cur = self.tree[cur_ix].next; + } + } + } + self.inline_stack.pop_all(&mut self.tree); + } + + /// Returns next byte index, url and title. + fn scan_inline_link( + &self, + underlying: &'input str, + mut ix: usize, + node: Option<TreeIndex>, + ) -> Option<(usize, CowStr<'input>, CowStr<'input>)> { + if scan_ch(&underlying.as_bytes()[ix..], b'(') == 0 { + return None; + } + ix += 1; + ix += scan_while(&underlying.as_bytes()[ix..], is_ascii_whitespace); + + let (dest_length, dest) = scan_link_dest(underlying, ix, LINK_MAX_NESTED_PARENS)?; + let dest = unescape(dest); + ix += dest_length; + + ix += scan_while(&underlying.as_bytes()[ix..], is_ascii_whitespace); + + let title = if let Some((bytes_scanned, t)) = self.scan_link_title(underlying, ix, node) { + ix += bytes_scanned; + ix += scan_while(&underlying.as_bytes()[ix..], is_ascii_whitespace); + t + } else { + "".into() + }; + if scan_ch(&underlying.as_bytes()[ix..], b')') == 0 { + return None; + } + ix += 1; + + Some((ix, dest, title)) + } + + // returns (bytes scanned, title cow) + fn scan_link_title( + &self, + text: &'input str, + start_ix: usize, + node: Option<TreeIndex>, + ) -> Option<(usize, CowStr<'input>)> { + let bytes = text.as_bytes(); + let open = match bytes.get(start_ix) { + Some(b @ b'\'') | Some(b @ b'\"') | Some(b @ b'(') => *b, + _ => return None, + }; + let close = if open == b'(' { b')' } else { open }; + + let mut title = String::new(); + let mut mark = start_ix + 1; + let mut i = start_ix + 1; + + while i < bytes.len() { + let c = bytes[i]; + + if c == close { + let cow = if mark == 1 { + (i - start_ix + 1, text[mark..i].into()) + } else { + title.push_str(&text[mark..i]); + (i - start_ix + 1, title.into()) + }; + + return Some(cow); + } + if c == open { + return None; + } + + if c == b'\n' || c == b'\r' { + if let Some(node_ix) = scan_nodes_to_ix(&self.tree, node, i + 1) { + if self.tree[node_ix].item.start > i { + title.push_str(&text[mark..i]); + title.push('\n'); + i = self.tree[node_ix].item.start; + mark = i; + continue; + } + } + } + if c == b'&' { + if let (n, Some(value)) = scan_entity(&bytes[i..]) { + title.push_str(&text[mark..i]); + title.push_str(&value); + i += n; + mark = i; + continue; + } + } + if c == b'\\' && i + 1 < bytes.len() && is_ascii_punctuation(bytes[i + 1]) { + title.push_str(&text[mark..i]); + i += 1; + mark = i; + } + + i += 1; + } + + None + } + + /// Make a code span. + /// + /// Both `open` and `close` are matching MaybeCode items. + fn make_code_span(&mut self, open: TreeIndex, close: TreeIndex, preceding_backslash: bool) { + let first_ix = self.tree[open].next.unwrap(); + let bytes = self.text.as_bytes(); + let mut span_start = self.tree[open].item.end; + let mut span_end = self.tree[close].item.start; + let mut buf: Option<String> = None; + + // detect all-space sequences, since they are kept as-is as of commonmark 0.29 + if !bytes[span_start..span_end].iter().all(|&b| b == b' ') { + let opening = matches!(bytes[span_start], b' ' | b'\r' | b'\n'); + let closing = matches!(bytes[span_end - 1], b' ' | b'\r' | b'\n'); + let drop_enclosing_whitespace = opening && closing; + + if drop_enclosing_whitespace { + span_start += 1; + if span_start < span_end { + span_end -= 1; + } + } + + let mut ix = first_ix; + + while ix != close { + let next_ix = self.tree[ix].next.unwrap(); + if let ItemBody::HardBreak | ItemBody::SoftBreak = self.tree[ix].item.body { + if drop_enclosing_whitespace { + // check whether break should be ignored + if ix == first_ix { + ix = next_ix; + span_start = min(span_end, self.tree[ix].item.start); + continue; + } else if next_ix == close && ix > first_ix { + break; + } + } + + let end = bytes[self.tree[ix].item.start..] + .iter() + .position(|&b| b == b'\r' || b == b'\n') + .unwrap() + + self.tree[ix].item.start; + if let Some(ref mut buf) = buf { + buf.push_str(&self.text[self.tree[ix].item.start..end]); + buf.push(' '); + } else { + let mut new_buf = String::with_capacity(span_end - span_start); + new_buf.push_str(&self.text[span_start..end]); + new_buf.push(' '); + buf = Some(new_buf); + } + } else if let Some(ref mut buf) = buf { + let end = if next_ix == close { + span_end + } else { + self.tree[ix].item.end + }; + buf.push_str(&self.text[self.tree[ix].item.start..end]); + } + ix = next_ix; + } + } + + let cow = if let Some(buf) = buf { + buf.into() + } else { + self.text[span_start..span_end].into() + }; + if preceding_backslash { + self.tree[open].item.body = ItemBody::Text; + self.tree[open].item.end = self.tree[open].item.start + 1; + self.tree[open].next = Some(close); + self.tree[close].item.body = ItemBody::Code(self.allocs.allocate_cow(cow)); + self.tree[close].item.start = self.tree[open].item.start + 1; + } else { + self.tree[open].item.body = ItemBody::Code(self.allocs.allocate_cow(cow)); + self.tree[open].item.end = self.tree[close].item.end; + self.tree[open].next = self.tree[close].next; + } + } + + /// On success, returns a buffer containing the inline html and byte offset. + /// When no bytes were skipped, the buffer will be empty and the html can be + /// represented as a subslice of the input string. + fn scan_inline_html(&mut self, bytes: &[u8], ix: usize) -> Option<(Vec<u8>, usize)> { + let c = *bytes.get(ix)?; + if c == b'!' { + Some(( + vec![], + scan_inline_html_comment(bytes, ix + 1, &mut self.html_scan_guard)?, + )) + } else if c == b'?' { + Some(( + vec![], + scan_inline_html_processing(bytes, ix + 1, &mut self.html_scan_guard)?, + )) + } else { + let (span, i) = scan_html_block_inner( + // Subtract 1 to include the < character + &bytes[(ix - 1)..], + Some(&|bytes| { + let mut line_start = LineStart::new(bytes); + let _ = scan_containers(&self.tree, &mut line_start); + line_start.bytes_scanned() + }), + )?; + Some((span, i + ix - 1)) + } + } + + /// Consumes the event iterator and produces an iterator that produces + /// `(Event, Range)` pairs, where the `Range` value maps to the corresponding + /// range in the markdown source. + pub fn into_offset_iter(self) -> OffsetIter<'input, 'callback> { + OffsetIter { inner: self } + } +} + +/// Returns number of containers scanned. +pub(crate) fn scan_containers(tree: &Tree<Item>, line_start: &mut LineStart) -> usize { + let mut i = 0; + for &node_ix in tree.walk_spine() { + match tree[node_ix].item.body { + ItemBody::BlockQuote => { + // `scan_blockquote_marker` saves & restores internally + if !line_start.scan_blockquote_marker() { + break; + } + } + ItemBody::ListItem(indent) => { + let save = line_start.clone(); + if !line_start.scan_space(indent) && !line_start.is_at_eol() { + *line_start = save; + break; + } + } + _ => (), + } + i += 1; + } + i +} + +impl<'a> Tree<Item> { + pub(crate) fn append_text(&mut self, start: usize, end: usize) { + if end > start { + if let Some(ix) = self.cur() { + if ItemBody::Text == self[ix].item.body && self[ix].item.end == start { + self[ix].item.end = end; + return; + } + } + self.append(Item { + start, + end, + body: ItemBody::Text, + }); + } + } +} + +#[derive(Copy, Clone, Debug)] +struct InlineEl { + start: TreeIndex, // offset of tree node + count: usize, + c: u8, // b'*' or b'_' + both: bool, // can both open and close +} + +#[derive(Debug, Clone, Default)] +struct InlineStack { + stack: Vec<InlineEl>, + // Lower bounds for matching indices in the stack. For example + // a strikethrough delimiter will never match with any element + // in the stack with index smaller than + // `lower_bounds[InlineStack::TILDES]`. + lower_bounds: [usize; 7], +} + +impl InlineStack { + /// These are indices into the lower bounds array. + /// Not both refers to the property that the delimiter can not both + /// be opener as a closer. + const UNDERSCORE_NOT_BOTH: usize = 0; + const ASTERISK_NOT_BOTH: usize = 1; + const ASTERISK_BASE: usize = 2; + const TILDES: usize = 5; + const UNDERSCORE_BOTH: usize = 6; + + fn pop_all(&mut self, tree: &mut Tree<Item>) { + for el in self.stack.drain(..) { + for i in 0..el.count { + tree[el.start + i].item.body = ItemBody::Text; + } + } + self.lower_bounds = [0; 7]; + } + + fn get_lowerbound(&self, c: u8, count: usize, both: bool) -> usize { + if c == b'_' { + if both { + self.lower_bounds[InlineStack::UNDERSCORE_BOTH] + } else { + self.lower_bounds[InlineStack::UNDERSCORE_NOT_BOTH] + } + } else if c == b'*' { + let mod3_lower = self.lower_bounds[InlineStack::ASTERISK_BASE + count % 3]; + if both { + mod3_lower + } else { + min( + mod3_lower, + self.lower_bounds[InlineStack::ASTERISK_NOT_BOTH], + ) + } + } else { + self.lower_bounds[InlineStack::TILDES] + } + } + + fn set_lowerbound(&mut self, c: u8, count: usize, both: bool, new_bound: usize) { + if c == b'_' { + if both { + self.lower_bounds[InlineStack::UNDERSCORE_BOTH] = new_bound; + } else { + self.lower_bounds[InlineStack::UNDERSCORE_NOT_BOTH] = new_bound; + } + } else if c == b'*' { + self.lower_bounds[InlineStack::ASTERISK_BASE + count % 3] = new_bound; + if !both { + self.lower_bounds[InlineStack::ASTERISK_NOT_BOTH] = new_bound; + } + } else { + self.lower_bounds[InlineStack::TILDES] = new_bound; + } + } + + fn find_match( + &mut self, + tree: &mut Tree<Item>, + c: u8, + count: usize, + both: bool, + ) -> Option<InlineEl> { + let lowerbound = min(self.stack.len(), self.get_lowerbound(c, count, both)); + let res = self.stack[lowerbound..] + .iter() + .cloned() + .enumerate() + .rfind(|(_, el)| { + el.c == c && (!both && !el.both || (count + el.count) % 3 != 0 || count % 3 == 0) + }); + + if let Some((matching_ix, matching_el)) = res { + let matching_ix = matching_ix + lowerbound; + for el in &self.stack[(matching_ix + 1)..] { + for i in 0..el.count { + tree[el.start + i].item.body = ItemBody::Text; + } + } + self.stack.truncate(matching_ix); + Some(matching_el) + } else { + self.set_lowerbound(c, count, both, self.stack.len()); + None + } + } + + fn push(&mut self, el: InlineEl) { + self.stack.push(el) + } +} + +#[derive(Debug, Clone)] +enum RefScan<'a> { + // label, source ix of label end + LinkLabel(CowStr<'a>, usize), + // contains next node index + Collapsed(Option<TreeIndex>), + Failed, +} + +/// Skips forward within a block to a node which spans (ends inclusive) the given +/// index into the source. +fn scan_nodes_to_ix( + tree: &Tree<Item>, + mut node: Option<TreeIndex>, + ix: usize, +) -> Option<TreeIndex> { + while let Some(node_ix) = node { + if tree[node_ix].item.end <= ix { + node = tree[node_ix].next; + } else { + break; + } + } + node +} + +/// Scans an inline link label, which cannot be interrupted. +/// Returns number of bytes (including brackets) and label on success. +fn scan_link_label<'text, 'tree>( + tree: &'tree Tree<Item>, + text: &'text str, + allow_footnote_refs: bool, +) -> Option<(usize, ReferenceLabel<'text>)> { + let bytes = &text.as_bytes(); + if bytes.len() < 2 || bytes[0] != b'[' { + return None; + } + let linebreak_handler = |bytes: &[u8]| { + let mut line_start = LineStart::new(bytes); + let _ = scan_containers(tree, &mut line_start); + Some(line_start.bytes_scanned()) + }; + let pair = if allow_footnote_refs && b'^' == bytes[1] { + let (byte_index, cow) = scan_link_label_rest(&text[2..], &linebreak_handler)?; + (byte_index + 2, ReferenceLabel::Footnote(cow)) + } else { + let (byte_index, cow) = scan_link_label_rest(&text[1..], &linebreak_handler)?; + (byte_index + 1, ReferenceLabel::Link(cow)) + }; + Some(pair) +} + +fn scan_reference<'a, 'b>( + tree: &'a Tree<Item>, + text: &'b str, + cur: Option<TreeIndex>, + allow_footnote_refs: bool, +) -> RefScan<'b> { + let cur_ix = match cur { + None => return RefScan::Failed, + Some(cur_ix) => cur_ix, + }; + let start = tree[cur_ix].item.start; + let tail = &text.as_bytes()[start..]; + + if tail.starts_with(b"[]") { + // TODO: this unwrap is sus and should be looked at closer + let closing_node = tree[cur_ix].next.unwrap(); + RefScan::Collapsed(tree[closing_node].next) + } else if let Some((ix, ReferenceLabel::Link(label))) = + scan_link_label(tree, &text[start..], allow_footnote_refs) + { + RefScan::LinkLabel(label, start + ix) + } else { + RefScan::Failed + } +} + +#[derive(Clone, Default)] +struct LinkStack { + inner: Vec<LinkStackEl>, + disabled_ix: usize, +} + +impl LinkStack { + fn push(&mut self, el: LinkStackEl) { + self.inner.push(el); + } + + fn pop(&mut self) -> Option<LinkStackEl> { + let el = self.inner.pop(); + self.disabled_ix = std::cmp::min(self.disabled_ix, self.inner.len()); + el + } + + fn clear(&mut self) { + self.inner.clear(); + self.disabled_ix = 0; + } + + fn disable_all_links(&mut self) { + for el in &mut self.inner[self.disabled_ix..] { + if el.ty == LinkStackTy::Link { + el.ty = LinkStackTy::Disabled; + } + } + self.disabled_ix = self.inner.len(); + } +} + +#[derive(Clone, Debug)] +struct LinkStackEl { + node: TreeIndex, + ty: LinkStackTy, +} + +#[derive(PartialEq, Clone, Debug)] +enum LinkStackTy { + Link, + Image, + Disabled, +} + +/// Contains the destination URL, title and source span of a reference definition. +#[derive(Clone)] +pub struct LinkDef<'a> { + pub dest: CowStr<'a>, + pub title: Option<CowStr<'a>>, + pub span: Range<usize>, +} + +/// Tracks tree indices of code span delimiters of each length. It should prevent +/// quadratic scanning behaviours by providing (amortized) constant time lookups. +struct CodeDelims { + inner: HashMap<usize, VecDeque<TreeIndex>>, + seen_first: bool, +} + +impl CodeDelims { + fn new() -> Self { + Self { + inner: Default::default(), + seen_first: false, + } + } + + fn insert(&mut self, count: usize, ix: TreeIndex) { + if self.seen_first { + self.inner + .entry(count) + .or_insert_with(Default::default) + .push_back(ix); + } else { + // Skip the first insert, since that delimiter will always + // be an opener and not a closer. + self.seen_first = true; + } + } + + fn is_populated(&self) -> bool { + !self.inner.is_empty() + } + + fn find(&mut self, open_ix: TreeIndex, count: usize) -> Option<TreeIndex> { + while let Some(ix) = self.inner.get_mut(&count)?.pop_front() { + if ix > open_ix { + return Some(ix); + } + } + None + } + + fn clear(&mut self) { + self.inner.clear(); + self.seen_first = false; + } +} + +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub(crate) struct LinkIndex(usize); + +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub(crate) struct CowIndex(usize); + +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub(crate) struct AlignmentIndex(usize); + +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub(crate) struct HeadingIndex(NonZeroUsize); + +#[derive(Clone)] +pub(crate) struct Allocations<'a> { + pub refdefs: RefDefs<'a>, + links: Vec<(LinkType, CowStr<'a>, CowStr<'a>)>, + cows: Vec<CowStr<'a>>, + alignments: Vec<Vec<Alignment>>, + headings: Vec<HeadingAttributes<'a>>, +} + +/// Used by the heading attributes extension. +#[derive(Clone)] +pub(crate) struct HeadingAttributes<'a> { + pub id: Option<&'a str>, + pub classes: Vec<&'a str>, +} + +/// Keeps track of the reference definitions defined in the document. +#[derive(Clone, Default)] +pub struct RefDefs<'input>(pub(crate) HashMap<LinkLabel<'input>, LinkDef<'input>>); + +impl<'input, 'b, 's> RefDefs<'input> +where + 's: 'b, +{ + /// Performs a lookup on reference label using unicode case folding. + pub fn get(&'s self, key: &'b str) -> Option<&'b LinkDef<'input>> { + self.0.get(&UniCase::new(key.into())) + } + + /// Provides an iterator over all the document's reference definitions. + pub fn iter(&'s self) -> impl Iterator<Item = (&'s str, &'s LinkDef<'input>)> { + self.0.iter().map(|(k, v)| (k.as_ref(), v)) + } +} + +impl<'a> Allocations<'a> { + pub fn new() -> Self { + Self { + refdefs: RefDefs::default(), + links: Vec::with_capacity(128), + cows: Vec::new(), + alignments: Vec::new(), + headings: Vec::new(), + } + } + + pub fn allocate_cow(&mut self, cow: CowStr<'a>) -> CowIndex { + let ix = self.cows.len(); + self.cows.push(cow); + CowIndex(ix) + } + + pub fn allocate_link(&mut self, ty: LinkType, url: CowStr<'a>, title: CowStr<'a>) -> LinkIndex { + let ix = self.links.len(); + self.links.push((ty, url, title)); + LinkIndex(ix) + } + + pub fn allocate_alignment(&mut self, alignment: Vec<Alignment>) -> AlignmentIndex { + let ix = self.alignments.len(); + self.alignments.push(alignment); + AlignmentIndex(ix) + } + + pub fn allocate_heading(&mut self, attrs: HeadingAttributes<'a>) -> HeadingIndex { + let ix = self.headings.len(); + self.headings.push(attrs); + // This won't panic. `self.headings.len()` can't be `usize::MAX` since + // such a long Vec cannot fit in memory. + let ix_nonzero = NonZeroUsize::new(ix.wrapping_add(1)).expect("too many headings"); + HeadingIndex(ix_nonzero) + } +} + +impl<'a> Index<CowIndex> for Allocations<'a> { + type Output = CowStr<'a>; + + fn index(&self, ix: CowIndex) -> &Self::Output { + self.cows.index(ix.0) + } +} + +impl<'a> Index<LinkIndex> for Allocations<'a> { + type Output = (LinkType, CowStr<'a>, CowStr<'a>); + + fn index(&self, ix: LinkIndex) -> &Self::Output { + self.links.index(ix.0) + } +} + +impl<'a> Index<AlignmentIndex> for Allocations<'a> { + type Output = Vec<Alignment>; + + fn index(&self, ix: AlignmentIndex) -> &Self::Output { + self.alignments.index(ix.0) + } +} + +impl<'a> Index<HeadingIndex> for Allocations<'a> { + type Output = HeadingAttributes<'a>; + + fn index(&self, ix: HeadingIndex) -> &Self::Output { + self.headings.index(ix.0.get() - 1) + } +} + +/// A struct containing information on the reachability of certain inline HTML +/// elements. In particular, for cdata elements (`<![CDATA[`), processing +/// elements (`<?`) and declarations (`<!DECLARATION`). The respectives usizes +/// represent the indices before which a scan will always fail and can hence +/// be skipped. +#[derive(Clone, Default)] +pub(crate) struct HtmlScanGuard { + pub cdata: usize, + pub processing: usize, + pub declaration: usize, +} + +pub type BrokenLinkCallback<'input, 'borrow> = + Option<&'borrow mut dyn FnMut(BrokenLink<'input>) -> Option<(CowStr<'input>, CowStr<'input>)>>; + +/// Markdown event and source range iterator. +/// +/// Generates tuples where the first element is the markdown event and the second +/// is a the corresponding range in the source string. +/// +/// Constructed from a `Parser` using its +/// [`into_offset_iter`](struct.Parser.html#method.into_offset_iter) method. +pub struct OffsetIter<'a, 'b> { + inner: Parser<'a, 'b>, +} + +impl<'a, 'b> OffsetIter<'a, 'b> { + /// Returns a reference to the internal reference definition tracker. + pub fn reference_definitions(&self) -> &RefDefs { + self.inner.reference_definitions() + } +} + +impl<'a, 'b> Iterator for OffsetIter<'a, 'b> { + type Item = (Event<'a>, Range<usize>); + + fn next(&mut self) -> Option<Self::Item> { + match self.inner.tree.cur() { + None => { + let ix = self.inner.tree.pop()?; + let tag = item_to_tag(&self.inner.tree[ix].item, &self.inner.allocs); + self.inner.tree.next_sibling(ix); + let span = self.inner.tree[ix].item.start..self.inner.tree[ix].item.end; + debug_assert!(span.start <= span.end); + Some((Event::End(tag), span)) + } + Some(cur_ix) => { + if self.inner.tree[cur_ix].item.body.is_inline() { + self.inner.handle_inline(); + } + + let node = self.inner.tree[cur_ix]; + let item = node.item; + let event = item_to_event(item, self.inner.text, &self.inner.allocs); + if let Event::Start(..) = event { + self.inner.tree.push(); + } else { + self.inner.tree.next_sibling(cur_ix); + } + debug_assert!(item.start <= item.end); + Some((event, item.start..item.end)) + } + } + } +} + +fn item_to_tag<'a>(item: &Item, allocs: &Allocations<'a>) -> Tag<'a> { + match item.body { + ItemBody::Paragraph => Tag::Paragraph, + ItemBody::Emphasis => Tag::Emphasis, + ItemBody::Strong => Tag::Strong, + ItemBody::Strikethrough => Tag::Strikethrough, + ItemBody::Link(link_ix) => { + let &(ref link_type, ref url, ref title) = allocs.index(link_ix); + Tag::Link(*link_type, url.clone(), title.clone()) + } + ItemBody::Image(link_ix) => { + let &(ref link_type, ref url, ref title) = allocs.index(link_ix); + Tag::Image(*link_type, url.clone(), title.clone()) + } + ItemBody::Heading(level, Some(heading_ix)) => { + let HeadingAttributes { id, classes } = allocs.index(heading_ix); + Tag::Heading(level, *id, classes.clone()) + } + ItemBody::Heading(level, None) => Tag::Heading(level, None, Vec::new()), + ItemBody::FencedCodeBlock(cow_ix) => { + Tag::CodeBlock(CodeBlockKind::Fenced(allocs[cow_ix].clone())) + } + ItemBody::IndentCodeBlock => Tag::CodeBlock(CodeBlockKind::Indented), + ItemBody::BlockQuote => Tag::BlockQuote, + ItemBody::List(_, c, listitem_start) => { + if c == b'.' || c == b')' { + Tag::List(Some(listitem_start)) + } else { + Tag::List(None) + } + } + ItemBody::ListItem(_) => Tag::Item, + ItemBody::TableHead => Tag::TableHead, + ItemBody::TableCell => Tag::TableCell, + ItemBody::TableRow => Tag::TableRow, + ItemBody::Table(alignment_ix) => Tag::Table(allocs[alignment_ix].clone()), + ItemBody::FootnoteDefinition(cow_ix) => Tag::FootnoteDefinition(allocs[cow_ix].clone()), + _ => panic!("unexpected item body {:?}", item.body), + } +} + +fn item_to_event<'a>(item: Item, text: &'a str, allocs: &Allocations<'a>) -> Event<'a> { + let tag = match item.body { + ItemBody::Text => return Event::Text(text[item.start..item.end].into()), + ItemBody::Code(cow_ix) => return Event::Code(allocs[cow_ix].clone()), + ItemBody::SynthesizeText(cow_ix) => return Event::Text(allocs[cow_ix].clone()), + ItemBody::SynthesizeChar(c) => return Event::Text(c.into()), + ItemBody::Html => return Event::Html(text[item.start..item.end].into()), + ItemBody::OwnedHtml(cow_ix) => return Event::Html(allocs[cow_ix].clone()), + ItemBody::SoftBreak => return Event::SoftBreak, + ItemBody::HardBreak => return Event::HardBreak, + ItemBody::FootnoteReference(cow_ix) => { + return Event::FootnoteReference(allocs[cow_ix].clone()) + } + ItemBody::TaskListMarker(checked) => return Event::TaskListMarker(checked), + ItemBody::Rule => return Event::Rule, + + ItemBody::Paragraph => Tag::Paragraph, + ItemBody::Emphasis => Tag::Emphasis, + ItemBody::Strong => Tag::Strong, + ItemBody::Strikethrough => Tag::Strikethrough, + ItemBody::Link(link_ix) => { + let &(ref link_type, ref url, ref title) = allocs.index(link_ix); + Tag::Link(*link_type, url.clone(), title.clone()) + } + ItemBody::Image(link_ix) => { + let &(ref link_type, ref url, ref title) = allocs.index(link_ix); + Tag::Image(*link_type, url.clone(), title.clone()) + } + ItemBody::Heading(level, Some(heading_ix)) => { + let HeadingAttributes { id, classes } = allocs.index(heading_ix); + Tag::Heading(level, *id, classes.clone()) + } + ItemBody::Heading(level, None) => Tag::Heading(level, None, Vec::new()), + ItemBody::FencedCodeBlock(cow_ix) => { + Tag::CodeBlock(CodeBlockKind::Fenced(allocs[cow_ix].clone())) + } + ItemBody::IndentCodeBlock => Tag::CodeBlock(CodeBlockKind::Indented), + ItemBody::BlockQuote => Tag::BlockQuote, + ItemBody::List(_, c, listitem_start) => { + if c == b'.' || c == b')' { + Tag::List(Some(listitem_start)) + } else { + Tag::List(None) + } + } + ItemBody::ListItem(_) => Tag::Item, + ItemBody::TableHead => Tag::TableHead, + ItemBody::TableCell => Tag::TableCell, + ItemBody::TableRow => Tag::TableRow, + ItemBody::Table(alignment_ix) => Tag::Table(allocs[alignment_ix].clone()), + ItemBody::FootnoteDefinition(cow_ix) => Tag::FootnoteDefinition(allocs[cow_ix].clone()), + _ => panic!("unexpected item body {:?}", item.body), + }; + + Event::Start(tag) +} + +impl<'a, 'b> Iterator for Parser<'a, 'b> { + type Item = Event<'a>; + + fn next(&mut self) -> Option<Event<'a>> { + match self.tree.cur() { + None => { + let ix = self.tree.pop()?; + let tag = item_to_tag(&self.tree[ix].item, &self.allocs); + self.tree.next_sibling(ix); + Some(Event::End(tag)) + } + Some(cur_ix) => { + if self.tree[cur_ix].item.body.is_inline() { + self.handle_inline(); + } + + let node = self.tree[cur_ix]; + let item = node.item; + let event = item_to_event(item, self.text, &self.allocs); + if let Event::Start(..) = event { + self.tree.push(); + } else { + self.tree.next_sibling(cur_ix); + } + Some(event) + } + } + } +} + +impl FusedIterator for Parser<'_, '_> {} + +#[cfg(test)] +mod test { + use super::*; + use crate::tree::Node; + + // TODO: move these tests to tests/html.rs? + + fn parser_with_extensions(text: &str) -> Parser<'_, 'static> { + let mut opts = Options::empty(); + opts.insert(Options::ENABLE_TABLES); + opts.insert(Options::ENABLE_FOOTNOTES); + opts.insert(Options::ENABLE_STRIKETHROUGH); + opts.insert(Options::ENABLE_TASKLISTS); + + Parser::new_ext(text, opts) + } + + #[test] + #[cfg(target_pointer_width = "64")] + fn node_size() { + let node_size = std::mem::size_of::<Node<Item>>(); + assert_eq!(48, node_size); + } + + #[test] + #[cfg(target_pointer_width = "64")] + fn body_size() { + let body_size = std::mem::size_of::<ItemBody>(); + assert_eq!(16, body_size); + } + + #[test] + fn single_open_fish_bracket() { + // dont crash + assert_eq!(3, Parser::new("<").count()); + } + + #[test] + fn lone_hashtag() { + // dont crash + assert_eq!(2, Parser::new("#").count()); + } + + #[test] + fn lots_of_backslashes() { + // dont crash + Parser::new("\\\\\r\r").count(); + Parser::new("\\\r\r\\.\\\\\r\r\\.\\").count(); + } + + #[test] + fn issue_320() { + // dont crash + parser_with_extensions(":\r\t> |\r:\r\t> |\r").count(); + } + + #[test] + fn issue_319() { + // dont crash + parser_with_extensions("|\r-]([^|\r-]([^").count(); + parser_with_extensions("|\r\r=][^|\r\r=][^car").count(); + } + + #[test] + fn issue_303() { + // dont crash + parser_with_extensions("[^\r\ra]").count(); + parser_with_extensions("\r\r]Z[^\x00\r\r]Z[^\x00").count(); + } + + #[test] + fn issue_313() { + // dont crash + parser_with_extensions("*]0[^\r\r*]0[^").count(); + parser_with_extensions("[^\r> `][^\r> `][^\r> `][").count(); + } + + #[test] + fn issue_311() { + // dont crash + parser_with_extensions("\\\u{0d}-\u{09}\\\u{0d}-\u{09}").count(); + } + + #[test] + fn issue_283() { + let input = std::str::from_utf8(b"\xf0\x9b\xb2\x9f<td:^\xf0\x9b\xb2\x9f").unwrap(); + // dont crash + parser_with_extensions(input).count(); + } + + #[test] + fn issue_289() { + // dont crash + parser_with_extensions("> - \\\n> - ").count(); + parser_with_extensions("- \n\n").count(); + } + + #[test] + fn issue_306() { + // dont crash + parser_with_extensions("*\r_<__*\r_<__*\r_<__*\r_<__").count(); + } + + #[test] + fn issue_305() { + // dont crash + parser_with_extensions("_6**6*_*").count(); + } + + #[test] + fn another_emphasis_panic() { + parser_with_extensions("*__#_#__*").count(); + } + + #[test] + fn offset_iter() { + let event_offsets: Vec<_> = Parser::new("*hello* world") + .into_offset_iter() + .map(|(_ev, range)| range) + .collect(); + let expected_offsets = vec![(0..13), (0..7), (1..6), (0..7), (7..13), (0..13)]; + assert_eq!(expected_offsets, event_offsets); + } + + #[test] + fn reference_link_offsets() { + let range = + Parser::new("# H1\n[testing][Some reference]\n\n[Some reference]: https://github.com") + .into_offset_iter() + .filter_map(|(ev, range)| match ev { + Event::Start(Tag::Link(LinkType::Reference, ..), ..) => Some(range), + _ => None, + }) + .next() + .unwrap(); + assert_eq!(5..30, range); + } + + #[test] + fn footnote_offsets() { + let range = parser_with_extensions("Testing this[^1] out.\n\n[^1]: Footnote.") + .into_offset_iter() + .filter_map(|(ev, range)| match ev { + Event::FootnoteReference(..) => Some(range), + _ => None, + }) + .next() + .unwrap(); + assert_eq!(12..16, range); + } + + #[test] + fn table_offset() { + let markdown = "a\n\nTesting|This|Outtt\n--|:--:|--:\nSome Data|Other data|asdf"; + let event_offset = parser_with_extensions(markdown) + .into_offset_iter() + .map(|(_ev, range)| range) + .nth(3) + .unwrap(); + let expected_offset = 3..59; + assert_eq!(expected_offset, event_offset); + } + + #[test] + fn table_cell_span() { + let markdown = "a|b|c\n--|--|--\na| |c"; + let event_offset = parser_with_extensions(markdown) + .into_offset_iter() + .filter_map(|(ev, span)| match ev { + Event::Start(Tag::TableCell) => Some(span), + _ => None, + }) + .nth(4) + .unwrap(); + let expected_offset_start = "a|b|c\n--|--|--\na|".len(); + assert_eq!( + expected_offset_start..(expected_offset_start + 2), + event_offset + ); + } + + #[test] + fn offset_iter_issue_378() { + let event_offsets: Vec<_> = Parser::new("a [b](c) d") + .into_offset_iter() + .map(|(_ev, range)| range) + .collect(); + let expected_offsets = vec![(0..10), (0..2), (2..8), (3..4), (2..8), (8..10), (0..10)]; + assert_eq!(expected_offsets, event_offsets); + } + + #[test] + fn offset_iter_issue_404() { + let event_offsets: Vec<_> = Parser::new("###\n") + .into_offset_iter() + .map(|(_ev, range)| range) + .collect(); + let expected_offsets = vec![(0..4), (0..4)]; + assert_eq!(expected_offsets, event_offsets); + } + + // FIXME: add this one regression suite + #[test] + fn link_def_at_eof() { + let test_str = "[My site][world]\n\n[world]: https://vincentprouillet.com"; + let expected = "<p><a href=\"https://vincentprouillet.com\">My site</a></p>\n"; + + let mut buf = String::new(); + crate::html::push_html(&mut buf, Parser::new(test_str)); + assert_eq!(expected, buf); + } + + #[test] + fn no_footnote_refs_without_option() { + let test_str = "a [^a]\n\n[^a]: yolo"; + let expected = "<p>a <a href=\"yolo\">^a</a></p>\n"; + + let mut buf = String::new(); + crate::html::push_html(&mut buf, Parser::new(test_str)); + assert_eq!(expected, buf); + } + + #[test] + fn ref_def_at_eof() { + let test_str = "[test]:\\"; + let expected = ""; + + let mut buf = String::new(); + crate::html::push_html(&mut buf, Parser::new(test_str)); + assert_eq!(expected, buf); + } + + #[test] + fn ref_def_cr_lf() { + let test_str = "[a]: /u\r\n\n[a]"; + let expected = "<p><a href=\"/u\">a</a></p>\n"; + + let mut buf = String::new(); + crate::html::push_html(&mut buf, Parser::new(test_str)); + assert_eq!(expected, buf); + } + + #[test] + fn no_dest_refdef() { + let test_str = "[a]:"; + let expected = "<p>[a]:</p>\n"; + + let mut buf = String::new(); + crate::html::push_html(&mut buf, Parser::new(test_str)); + assert_eq!(expected, buf); + } + + #[test] + fn broken_links_called_only_once() { + for &(markdown, expected) in &[ + ("See also [`g()`][crate::g].", 1), + ("See also [`g()`][crate::g][].", 1), + ("[brokenlink1] some other node [brokenlink2]", 2), + ] { + let mut times_called = 0; + let callback = &mut |_broken_link: BrokenLink| { + times_called += 1; + None + }; + let parser = + Parser::new_with_broken_link_callback(markdown, Options::empty(), Some(callback)); + for _ in parser {} + assert_eq!(times_called, expected); + } + } + + #[test] + fn simple_broken_link_callback() { + let test_str = "This is a link w/o def: [hello][world]"; + let mut callback = |broken_link: BrokenLink| { + assert_eq!("world", broken_link.reference.as_ref()); + assert_eq!(&test_str[broken_link.span], "[hello][world]"); + let url = "YOLO".into(); + let title = "SWAG".to_owned().into(); + Some((url, title)) + }; + let parser = + Parser::new_with_broken_link_callback(test_str, Options::empty(), Some(&mut callback)); + let mut link_tag_count = 0; + for (typ, url, title) in parser.filter_map(|event| match event { + Event::Start(tag) | Event::End(tag) => match tag { + Tag::Link(typ, url, title) => Some((typ, url, title)), + _ => None, + }, + _ => None, + }) { + link_tag_count += 1; + assert_eq!(typ, LinkType::ReferenceUnknown); + assert_eq!(url.as_ref(), "YOLO"); + assert_eq!(title.as_ref(), "SWAG"); + } + assert!(link_tag_count > 0); + } + + #[test] + fn code_block_kind_check_fenced() { + let parser = Parser::new("hello\n```test\ntadam\n```"); + let mut found = 0; + for (ev, _range) in parser.into_offset_iter() { + match ev { + Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(syntax))) => { + assert_eq!(syntax.as_ref(), "test"); + found += 1; + } + _ => {} + } + } + assert_eq!(found, 1); + } + + #[test] + fn code_block_kind_check_indented() { + let parser = Parser::new("hello\n\n ```test\n tadam\nhello"); + let mut found = 0; + for (ev, _range) in parser.into_offset_iter() { + match ev { + Event::Start(Tag::CodeBlock(CodeBlockKind::Indented)) => { + found += 1; + } + _ => {} + } + } + assert_eq!(found, 1); + } + + #[test] + fn ref_defs() { + let input = r###"[a B c]: http://example.com +[another]: https://google.com + +text + +[final ONE]: http://wikipedia.org +"###; + let mut parser = Parser::new(input); + + assert!(parser.reference_definitions().get("a b c").is_some()); + assert!(parser.reference_definitions().get("nope").is_none()); + + if let Some(_event) = parser.next() { + // testing keys with shorter lifetimes than parser and its input + let s = "final one".to_owned(); + let link_def = parser.reference_definitions().get(&s).unwrap(); + let span = &input[link_def.span.clone()]; + assert_eq!(span, "[final ONE]: http://wikipedia.org"); + } + } + + #[test] + fn common_lifetime_patterns_allowed<'b>() { + let temporary_str = String::from("xyz"); + + // NOTE: this is a limitation of Rust, it doesn't allow putting lifetime parameters on the closure itself. + // Hack it by attaching the lifetime to the test function instead. + // TODO: why is the `'b` lifetime required at all? Changing it to `'_` breaks things :( + let mut closure = |link: BrokenLink<'b>| Some(("#".into(), link.reference.into())); + + fn function<'a>(link: BrokenLink<'a>) -> Option<(CowStr<'a>, CowStr<'a>)> { + Some(("#".into(), link.reference)) + } + + for _ in Parser::new_with_broken_link_callback( + "static lifetime", + Options::empty(), + Some(&mut closure), + ) {} + /* This fails to compile. Because the closure can't say `for <'a> fn(BrokenLink<'a>) -> + * CowStr<'a>` and has to use the enclosing `'b` lifetime parameter, `temporary_str` lives + * shorter than `'b`. I think this is unlikely to occur in real life, and if it does, the + * fix is simple: move it out to a function that allows annotating the lifetimes. + */ + //for _ in Parser::new_with_broken_link_callback(&temporary_str, Options::empty(), Some(&mut callback)) { + //} + + for _ in Parser::new_with_broken_link_callback( + "static lifetime", + Options::empty(), + Some(&mut function), + ) {} + for _ in Parser::new_with_broken_link_callback( + &temporary_str, + Options::empty(), + Some(&mut function), + ) {} + } +} diff --git a/vendor/pulldown-cmark/src/puncttable.rs b/vendor/pulldown-cmark/src/puncttable.rs new file mode 100644 index 000000000..5acdfbea7 --- /dev/null +++ b/vendor/pulldown-cmark/src/puncttable.rs @@ -0,0 +1,351 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +//! CommonMark punctuation set based on spec and Unicode properties. + +// Autogenerated by mk_puncttable.py + +const PUNCT_MASKS_ASCII: [u16; 8] = [ + 0x0000, // U+0000...U+000F + 0x0000, // U+0010...U+001F + 0xfffe, // U+0020...U+002F + 0xfc00, // U+0030...U+003F + 0x0001, // U+0040...U+004F + 0xf800, // U+0050...U+005F + 0x0001, // U+0060...U+006F + 0x7800, // U+0070...U+007F +]; + +const PUNCT_TAB: [u16; 132] = [ + 10, // U+00A0...U+00AF + 11, // U+00B0...U+00BF + 55, // U+0370...U+037F + 56, // U+0380...U+038F + 85, // U+0550...U+055F + 88, // U+0580...U+058F + 91, // U+05B0...U+05BF + 92, // U+05C0...U+05CF + 95, // U+05F0...U+05FF + 96, // U+0600...U+060F + 97, // U+0610...U+061F + 102, // U+0660...U+066F + 109, // U+06D0...U+06DF + 112, // U+0700...U+070F + 127, // U+07F0...U+07FF + 131, // U+0830...U+083F + 133, // U+0850...U+085F + 150, // U+0960...U+096F + 151, // U+0970...U+097F + 175, // U+0AF0...U+0AFF + 223, // U+0DF0...U+0DFF + 228, // U+0E40...U+0E4F + 229, // U+0E50...U+0E5F + 240, // U+0F00...U+0F0F + 241, // U+0F10...U+0F1F + 243, // U+0F30...U+0F3F + 248, // U+0F80...U+0F8F + 253, // U+0FD0...U+0FDF + 260, // U+1040...U+104F + 271, // U+10F0...U+10FF + 310, // U+1360...U+136F + 320, // U+1400...U+140F + 358, // U+1660...U+166F + 361, // U+1690...U+169F + 366, // U+16E0...U+16EF + 371, // U+1730...U+173F + 381, // U+17D0...U+17DF + 384, // U+1800...U+180F + 404, // U+1940...U+194F + 417, // U+1A10...U+1A1F + 426, // U+1AA0...U+1AAF + 437, // U+1B50...U+1B5F + 438, // U+1B60...U+1B6F + 447, // U+1BF0...U+1BFF + 451, // U+1C30...U+1C3F + 455, // U+1C70...U+1C7F + 460, // U+1CC0...U+1CCF + 461, // U+1CD0...U+1CDF + 513, // U+2010...U+201F + 514, // U+2020...U+202F + 515, // U+2030...U+203F + 516, // U+2040...U+204F + 517, // U+2050...U+205F + 519, // U+2070...U+207F + 520, // U+2080...U+208F + 560, // U+2300...U+230F + 562, // U+2320...U+232F + 630, // U+2760...U+276F + 631, // U+2770...U+277F + 636, // U+27C0...U+27CF + 638, // U+27E0...U+27EF + 664, // U+2980...U+298F + 665, // U+2990...U+299F + 669, // U+29D0...U+29DF + 671, // U+29F0...U+29FF + 719, // U+2CF0...U+2CFF + 727, // U+2D70...U+2D7F + 736, // U+2E00...U+2E0F + 737, // U+2E10...U+2E1F + 738, // U+2E20...U+2E2F + 739, // U+2E30...U+2E3F + 740, // U+2E40...U+2E4F + 768, // U+3000...U+300F + 769, // U+3010...U+301F + 771, // U+3030...U+303F + 778, // U+30A0...U+30AF + 783, // U+30F0...U+30FF + 2639, // U+A4F0...U+A4FF + 2656, // U+A600...U+A60F + 2663, // U+A670...U+A67F + 2671, // U+A6F0...U+A6FF + 2695, // U+A870...U+A87F + 2700, // U+A8C0...U+A8CF + 2703, // U+A8F0...U+A8FF + 2706, // U+A920...U+A92F + 2709, // U+A950...U+A95F + 2716, // U+A9C0...U+A9CF + 2717, // U+A9D0...U+A9DF + 2725, // U+AA50...U+AA5F + 2733, // U+AAD0...U+AADF + 2735, // U+AAF0...U+AAFF + 2750, // U+ABE0...U+ABEF + 4051, // U+FD30...U+FD3F + 4065, // U+FE10...U+FE1F + 4067, // U+FE30...U+FE3F + 4068, // U+FE40...U+FE4F + 4069, // U+FE50...U+FE5F + 4070, // U+FE60...U+FE6F + 4080, // U+FF00...U+FF0F + 4081, // U+FF10...U+FF1F + 4082, // U+FF20...U+FF2F + 4083, // U+FF30...U+FF3F + 4085, // U+FF50...U+FF5F + 4086, // U+FF60...U+FF6F + 4112, // U+10100...U+1010F + 4153, // U+10390...U+1039F + 4157, // U+103D0...U+103DF + 4182, // U+10560...U+1056F + 4229, // U+10850...U+1085F + 4241, // U+10910...U+1091F + 4243, // U+10930...U+1093F + 4261, // U+10A50...U+10A5F + 4263, // U+10A70...U+10A7F + 4271, // U+10AF0...U+10AFF + 4275, // U+10B30...U+10B3F + 4281, // U+10B90...U+10B9F + 4356, // U+11040...U+1104F + 4363, // U+110B0...U+110BF + 4364, // U+110C0...U+110CF + 4372, // U+11140...U+1114F + 4375, // U+11170...U+1117F + 4380, // U+111C0...U+111CF + 4387, // U+11230...U+1123F + 4428, // U+114C0...U+114CF + 4444, // U+115C0...U+115CF + 4452, // U+11640...U+1164F + 4679, // U+12470...U+1247F + 5798, // U+16A60...U+16A6F + 5807, // U+16AF0...U+16AFF + 5811, // U+16B30...U+16B3F + 5812, // U+16B40...U+16B4F + 7113, // U+1BC90...U+1BC9F +]; + +const PUNCT_MASKS: [u16; 132] = [ + 0x0882, // U+00A0...U+00AF + 0x88c0, // U+00B0...U+00BF + 0x4000, // U+0370...U+037F + 0x0080, // U+0380...U+038F + 0xfc00, // U+0550...U+055F + 0x0600, // U+0580...U+058F + 0x4000, // U+05B0...U+05BF + 0x0049, // U+05C0...U+05CF + 0x0018, // U+05F0...U+05FF + 0x3600, // U+0600...U+060F + 0xc800, // U+0610...U+061F + 0x3c00, // U+0660...U+066F + 0x0010, // U+06D0...U+06DF + 0x3fff, // U+0700...U+070F + 0x0380, // U+07F0...U+07FF + 0x7fff, // U+0830...U+083F + 0x4000, // U+0850...U+085F + 0x0030, // U+0960...U+096F + 0x0001, // U+0970...U+097F + 0x0001, // U+0AF0...U+0AFF + 0x0010, // U+0DF0...U+0DFF + 0x8000, // U+0E40...U+0E4F + 0x0c00, // U+0E50...U+0E5F + 0xfff0, // U+0F00...U+0F0F + 0x0017, // U+0F10...U+0F1F + 0x3c00, // U+0F30...U+0F3F + 0x0020, // U+0F80...U+0F8F + 0x061f, // U+0FD0...U+0FDF + 0xfc00, // U+1040...U+104F + 0x0800, // U+10F0...U+10FF + 0x01ff, // U+1360...U+136F + 0x0001, // U+1400...U+140F + 0x6000, // U+1660...U+166F + 0x1800, // U+1690...U+169F + 0x3800, // U+16E0...U+16EF + 0x0060, // U+1730...U+173F + 0x0770, // U+17D0...U+17DF + 0x07ff, // U+1800...U+180F + 0x0030, // U+1940...U+194F + 0xc000, // U+1A10...U+1A1F + 0x3f7f, // U+1AA0...U+1AAF + 0xfc00, // U+1B50...U+1B5F + 0x0001, // U+1B60...U+1B6F + 0xf000, // U+1BF0...U+1BFF + 0xf800, // U+1C30...U+1C3F + 0xc000, // U+1C70...U+1C7F + 0x00ff, // U+1CC0...U+1CCF + 0x0008, // U+1CD0...U+1CDF + 0xffff, // U+2010...U+201F + 0x00ff, // U+2020...U+202F + 0xffff, // U+2030...U+203F + 0xffef, // U+2040...U+204F + 0x7ffb, // U+2050...U+205F + 0x6000, // U+2070...U+207F + 0x6000, // U+2080...U+208F + 0x0f00, // U+2300...U+230F + 0x0600, // U+2320...U+232F + 0xff00, // U+2760...U+276F + 0x003f, // U+2770...U+277F + 0x0060, // U+27C0...U+27CF + 0xffc0, // U+27E0...U+27EF + 0xfff8, // U+2980...U+298F + 0x01ff, // U+2990...U+299F + 0x0f00, // U+29D0...U+29DF + 0x3000, // U+29F0...U+29FF + 0xde00, // U+2CF0...U+2CFF + 0x0001, // U+2D70...U+2D7F + 0xffff, // U+2E00...U+2E0F + 0xffff, // U+2E10...U+2E1F + 0x7fff, // U+2E20...U+2E2F + 0xffff, // U+2E30...U+2E3F + 0x0007, // U+2E40...U+2E4F + 0xff0e, // U+3000...U+300F + 0xfff3, // U+3010...U+301F + 0x2001, // U+3030...U+303F + 0x0001, // U+30A0...U+30AF + 0x0800, // U+30F0...U+30FF + 0xc000, // U+A4F0...U+A4FF + 0xe000, // U+A600...U+A60F + 0x4008, // U+A670...U+A67F + 0x00fc, // U+A6F0...U+A6FF + 0x00f0, // U+A870...U+A87F + 0xc000, // U+A8C0...U+A8CF + 0x0700, // U+A8F0...U+A8FF + 0xc000, // U+A920...U+A92F + 0x8000, // U+A950...U+A95F + 0x3ffe, // U+A9C0...U+A9CF + 0xc000, // U+A9D0...U+A9DF + 0xf000, // U+AA50...U+AA5F + 0xc000, // U+AAD0...U+AADF + 0x0003, // U+AAF0...U+AAFF + 0x0800, // U+ABE0...U+ABEF + 0xc000, // U+FD30...U+FD3F + 0x03ff, // U+FE10...U+FE1F + 0xffff, // U+FE30...U+FE3F + 0xffff, // U+FE40...U+FE4F + 0xfff7, // U+FE50...U+FE5F + 0x0d0b, // U+FE60...U+FE6F + 0xf7ee, // U+FF00...U+FF0F + 0x8c00, // U+FF10...U+FF1F + 0x0001, // U+FF20...U+FF2F + 0xb800, // U+FF30...U+FF3F + 0xa800, // U+FF50...U+FF5F + 0x003f, // U+FF60...U+FF6F + 0x0007, // U+10100...U+1010F + 0x8000, // U+10390...U+1039F + 0x0001, // U+103D0...U+103DF + 0x8000, // U+10560...U+1056F + 0x0080, // U+10850...U+1085F + 0x8000, // U+10910...U+1091F + 0x8000, // U+10930...U+1093F + 0x01ff, // U+10A50...U+10A5F + 0x8000, // U+10A70...U+10A7F + 0x007f, // U+10AF0...U+10AFF + 0xfe00, // U+10B30...U+10B3F + 0x1e00, // U+10B90...U+10B9F + 0x3f80, // U+11040...U+1104F + 0xd800, // U+110B0...U+110BF + 0x0003, // U+110C0...U+110CF + 0x000f, // U+11140...U+1114F + 0x0030, // U+11170...U+1117F + 0x21e0, // U+111C0...U+111CF + 0x3f00, // U+11230...U+1123F + 0x0040, // U+114C0...U+114CF + 0x03fe, // U+115C0...U+115CF + 0x000e, // U+11640...U+1164F + 0x001f, // U+12470...U+1247F + 0xc000, // U+16A60...U+16A6F + 0x0020, // U+16AF0...U+16AFF + 0x0f80, // U+16B30...U+16B3F + 0x0010, // U+16B40...U+16B4F + 0x8000, // U+1BC90...U+1BC9F +]; + +pub(crate) fn is_ascii_punctuation(c: u8) -> bool { + c < 128 && (PUNCT_MASKS_ASCII[(c / 16) as usize] & (1 << (c & 15))) != 0 +} + +pub(crate) fn is_punctuation(c: char) -> bool { + let cp = c as u32; + if cp < 128 { + return is_ascii_punctuation(cp as u8); + } + if cp > 0x1BC9F { + return false; + } + let high = (cp / 16) as u16; + match PUNCT_TAB.binary_search(&high) { + Ok(index) => (PUNCT_MASKS[index] & (1 << (cp & 15))) != 0, + _ => false, + } +} + +#[cfg(test)] +mod tests { + use super::{is_ascii_punctuation, is_punctuation}; + + #[test] + fn test_ascii() { + assert!(is_ascii_punctuation(b'!')); + assert!(is_ascii_punctuation(b'@')); + assert!(is_ascii_punctuation(b'~')); + assert!(!is_ascii_punctuation(b' ')); + assert!(!is_ascii_punctuation(b'0')); + assert!(!is_ascii_punctuation(b'A')); + assert!(!is_ascii_punctuation(0xA1)); + } + + #[test] + fn test_unicode() { + assert!(is_punctuation('~')); + assert!(!is_punctuation(' ')); + + assert!(is_punctuation('\u{00A1}')); + assert!(is_punctuation('\u{060C}')); + assert!(is_punctuation('\u{FF65}')); + assert!(is_punctuation('\u{1BC9F}')); + assert!(!is_punctuation('\u{1BCA0}')); + } +} diff --git a/vendor/pulldown-cmark/src/scanners.rs b/vendor/pulldown-cmark/src/scanners.rs new file mode 100644 index 000000000..176c495eb --- /dev/null +++ b/vendor/pulldown-cmark/src/scanners.rs @@ -0,0 +1,1327 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +//! Scanners for fragments of CommonMark syntax + +use std::convert::TryInto; +use std::{char, convert::TryFrom}; + +use crate::parse::HtmlScanGuard; +pub(crate) use crate::puncttable::{is_ascii_punctuation, is_punctuation}; +use crate::strings::CowStr; +use crate::{entities, HeadingLevel}; +use crate::{Alignment, LinkType}; + +use memchr::memchr; + +// sorted for binary search +const HTML_TAGS: [&str; 62] = [ + "address", + "article", + "aside", + "base", + "basefont", + "blockquote", + "body", + "caption", + "center", + "col", + "colgroup", + "dd", + "details", + "dialog", + "dir", + "div", + "dl", + "dt", + "fieldset", + "figcaption", + "figure", + "footer", + "form", + "frame", + "frameset", + "h1", + "h2", + "h3", + "h4", + "h5", + "h6", + "head", + "header", + "hr", + "html", + "iframe", + "legend", + "li", + "link", + "main", + "menu", + "menuitem", + "nav", + "noframes", + "ol", + "optgroup", + "option", + "p", + "param", + "section", + "source", + "summary", + "table", + "tbody", + "td", + "tfoot", + "th", + "thead", + "title", + "tr", + "track", + "ul", +]; + +/// Analysis of the beginning of a line, including indentation and container +/// markers. +#[derive(Clone)] +pub(crate) struct LineStart<'a> { + bytes: &'a [u8], + tab_start: usize, + ix: usize, + spaces_remaining: usize, + // no thematic breaks can occur before this offset. + // this prevents scanning over and over up to a certain point + min_hrule_offset: usize, +} + +impl<'a> LineStart<'a> { + pub(crate) fn new(bytes: &[u8]) -> LineStart { + LineStart { + bytes, + tab_start: 0, + ix: 0, + spaces_remaining: 0, + min_hrule_offset: 0, + } + } + + /// Try to scan a number of spaces. + /// + /// Returns true if all spaces were consumed. + /// + /// Note: consumes some spaces even if not successful. + pub(crate) fn scan_space(&mut self, n_space: usize) -> bool { + self.scan_space_inner(n_space) == 0 + } + + /// Scan a number of spaces up to a maximum. + /// + /// Returns number of spaces scanned. + pub(crate) fn scan_space_upto(&mut self, n_space: usize) -> usize { + n_space - self.scan_space_inner(n_space) + } + + /// Returns unused remainder of spaces. + fn scan_space_inner(&mut self, mut n_space: usize) -> usize { + let n_from_remaining = self.spaces_remaining.min(n_space); + self.spaces_remaining -= n_from_remaining; + n_space -= n_from_remaining; + while n_space > 0 && self.ix < self.bytes.len() { + match self.bytes[self.ix] { + b' ' => { + self.ix += 1; + n_space -= 1; + } + b'\t' => { + let spaces = 4 - (self.ix - self.tab_start) % 4; + self.ix += 1; + self.tab_start = self.ix; + let n = spaces.min(n_space); + n_space -= n; + self.spaces_remaining = spaces - n; + } + _ => break, + } + } + n_space + } + + /// Scan all available ASCII whitespace (not including eol). + pub(crate) fn scan_all_space(&mut self) { + self.spaces_remaining = 0; + self.ix += self.bytes[self.ix..] + .iter() + .take_while(|&&b| b == b' ' || b == b'\t') + .count(); + } + + /// Determine whether we're at end of line (includes end of file). + pub(crate) fn is_at_eol(&self) -> bool { + self.bytes + .get(self.ix) + .map(|&c| c == b'\r' || c == b'\n') + .unwrap_or(true) + } + + fn scan_ch(&mut self, c: u8) -> bool { + if self.ix < self.bytes.len() && self.bytes[self.ix] == c { + self.ix += 1; + true + } else { + false + } + } + + pub(crate) fn scan_blockquote_marker(&mut self) -> bool { + let save = self.clone(); + let _ = self.scan_space(3); + if self.scan_ch(b'>') { + let _ = self.scan_space(1); + true + } else { + *self = save; + false + } + } + + /// Scan a list marker. + /// + /// Return value is the character, the start index, and the indent in spaces. + /// For ordered list markers, the character will be one of b'.' or b')'. For + /// bullet list markers, it will be one of b'-', b'+', or b'*'. + pub(crate) fn scan_list_marker(&mut self) -> Option<(u8, u64, usize)> { + let save = self.clone(); + let indent = self.scan_space_upto(4); + if indent < 4 && self.ix < self.bytes.len() { + let c = self.bytes[self.ix]; + if c == b'-' || c == b'+' || c == b'*' { + if self.ix >= self.min_hrule_offset { + // there could be an hrule here + if let Err(min_offset) = scan_hrule(&self.bytes[self.ix..]) { + self.min_hrule_offset = min_offset; + } else { + *self = save; + return None; + } + } + self.ix += 1; + if self.scan_space(1) || self.is_at_eol() { + return self.finish_list_marker(c, 0, indent + 2); + } + } else if c >= b'0' && c <= b'9' { + let start_ix = self.ix; + let mut ix = self.ix + 1; + let mut val = u64::from(c - b'0'); + while ix < self.bytes.len() && ix - start_ix < 10 { + let c = self.bytes[ix]; + ix += 1; + if c >= b'0' && c <= b'9' { + val = val * 10 + u64::from(c - b'0'); + } else if c == b')' || c == b'.' { + self.ix = ix; + if self.scan_space(1) || self.is_at_eol() { + return self.finish_list_marker(c, val, indent + self.ix - start_ix); + } else { + break; + } + } else { + break; + } + } + } + } + *self = save; + None + } + + fn finish_list_marker( + &mut self, + c: u8, + start: u64, + mut indent: usize, + ) -> Option<(u8, u64, usize)> { + let save = self.clone(); + + // skip the rest of the line if it's blank + if scan_blank_line(&self.bytes[self.ix..]).is_some() { + return Some((c, start, indent)); + } + + let post_indent = self.scan_space_upto(4); + if post_indent < 4 { + indent += post_indent; + } else { + *self = save; + } + Some((c, start, indent)) + } + + /// Returns Some(is_checked) when a task list marker was found. Resets itself + /// to original state otherwise. + pub(crate) fn scan_task_list_marker(&mut self) -> Option<bool> { + let save = self.clone(); + self.scan_space_upto(3); + + if !self.scan_ch(b'[') { + *self = save; + return None; + } + let is_checked = match self.bytes.get(self.ix) { + Some(&c) if is_ascii_whitespace_no_nl(c) => { + self.ix += 1; + false + } + Some(b'x') | Some(b'X') => { + self.ix += 1; + true + } + _ => { + *self = save; + return None; + } + }; + if !self.scan_ch(b']') { + *self = save; + return None; + } + if !self + .bytes + .get(self.ix) + .map(|&b| is_ascii_whitespace_no_nl(b)) + .unwrap_or(false) + { + *self = save; + return None; + } + Some(is_checked) + } + + pub(crate) fn bytes_scanned(&self) -> usize { + self.ix + } + + pub(crate) fn remaining_space(&self) -> usize { + self.spaces_remaining + } +} + +pub(crate) fn is_ascii_whitespace(c: u8) -> bool { + (c >= 0x09 && c <= 0x0d) || c == b' ' +} + +pub(crate) fn is_ascii_whitespace_no_nl(c: u8) -> bool { + c == b'\t' || c == 0x0b || c == 0x0c || c == b' ' +} + +fn is_ascii_alpha(c: u8) -> bool { + matches!(c, b'a'..=b'z' | b'A'..=b'Z') +} + +fn is_ascii_alphanumeric(c: u8) -> bool { + matches!(c, b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z') +} + +fn is_ascii_letterdigitdash(c: u8) -> bool { + c == b'-' || is_ascii_alphanumeric(c) +} + +fn is_digit(c: u8) -> bool { + b'0' <= c && c <= b'9' +} + +fn is_valid_unquoted_attr_value_char(c: u8) -> bool { + !matches!( + c, + b'\'' | b'"' | b' ' | b'=' | b'>' | b'<' | b'`' | b'\n' | b'\r' + ) +} + +// scan a single character +pub(crate) fn scan_ch(data: &[u8], c: u8) -> usize { + if !data.is_empty() && data[0] == c { + 1 + } else { + 0 + } +} + +pub(crate) fn scan_while<F>(data: &[u8], mut f: F) -> usize +where + F: FnMut(u8) -> bool, +{ + data.iter().take_while(|&&c| f(c)).count() +} + +pub(crate) fn scan_rev_while<F>(data: &[u8], mut f: F) -> usize +where + F: FnMut(u8) -> bool, +{ + data.iter().rev().take_while(|&&c| f(c)).count() +} + +pub(crate) fn scan_ch_repeat(data: &[u8], c: u8) -> usize { + scan_while(data, |x| x == c) +} + +// Note: this scans ASCII whitespace only, for Unicode whitespace use +// a different function. +pub(crate) fn scan_whitespace_no_nl(data: &[u8]) -> usize { + scan_while(data, is_ascii_whitespace_no_nl) +} + +fn scan_attr_value_chars(data: &[u8]) -> usize { + scan_while(data, is_valid_unquoted_attr_value_char) +} + +pub(crate) fn scan_eol(bytes: &[u8]) -> Option<usize> { + if bytes.is_empty() { + return Some(0); + } + match bytes[0] { + b'\n' => Some(1), + b'\r' => Some(if bytes.get(1) == Some(&b'\n') { 2 } else { 1 }), + _ => None, + } +} + +pub(crate) fn scan_blank_line(bytes: &[u8]) -> Option<usize> { + let i = scan_whitespace_no_nl(bytes); + scan_eol(&bytes[i..]).map(|n| i + n) +} + +pub(crate) fn scan_nextline(bytes: &[u8]) -> usize { + memchr(b'\n', bytes).map_or(bytes.len(), |x| x + 1) +} + +// return: end byte for closing code fence, or None +// if the line is not a closing code fence +pub(crate) fn scan_closing_code_fence( + bytes: &[u8], + fence_char: u8, + n_fence_char: usize, +) -> Option<usize> { + if bytes.is_empty() { + return Some(0); + } + let mut i = 0; + let num_fence_chars_found = scan_ch_repeat(&bytes[i..], fence_char); + if num_fence_chars_found < n_fence_char { + return None; + } + i += num_fence_chars_found; + let num_trailing_spaces = scan_ch_repeat(&bytes[i..], b' '); + i += num_trailing_spaces; + scan_eol(&bytes[i..]).map(|_| i) +} + +// returned pair is (number of bytes, number of spaces) +fn calc_indent(text: &[u8], max: usize) -> (usize, usize) { + let mut spaces = 0; + let mut offset = 0; + + for (i, &b) in text.iter().enumerate() { + match b { + b' ' => { + spaces += 1; + if spaces == max { + break; + } + } + b'\t' => { + let new_spaces = spaces + 4 - (spaces & 3); + if new_spaces > max { + break; + } + spaces = new_spaces; + } + _ => break, + } + offset = i; + } + + (offset, spaces) +} + +/// Scan hrule opening sequence. +/// +/// Returns Ok(x) when it finds an hrule, where x is the +/// size of line containing the hrule, including the trailing newline. +/// +/// Returns Err(x) when it does not find an hrule and x is +/// the offset in data before no hrule can appear. +pub(crate) fn scan_hrule(bytes: &[u8]) -> Result<usize, usize> { + if bytes.len() < 3 { + return Err(0); + } + let c = bytes[0]; + if !(c == b'*' || c == b'-' || c == b'_') { + return Err(0); + } + let mut n = 0; + let mut i = 0; + + while i < bytes.len() { + match bytes[i] { + b'\n' | b'\r' => { + i += scan_eol(&bytes[i..]).unwrap_or(0); + break; + } + c2 if c2 == c => { + n += 1; + } + b' ' | b'\t' => (), + _ => return Err(i), + } + i += 1; + } + if n >= 3 { + Ok(i) + } else { + Err(i) + } +} + +/// Scan an ATX heading opening sequence. +/// +/// Returns number of bytes in prefix and level. +pub(crate) fn scan_atx_heading(data: &[u8]) -> Option<HeadingLevel> { + let level = scan_ch_repeat(data, b'#'); + if data.get(level).copied().map_or(true, is_ascii_whitespace) { + HeadingLevel::try_from(level).ok() + } else { + None + } +} + +/// Scan a setext heading underline. +/// +/// Returns number of bytes in line (including trailing newline) and level. +pub(crate) fn scan_setext_heading(data: &[u8]) -> Option<(usize, HeadingLevel)> { + let c = *data.get(0)?; + let level = if c == b'=' { + HeadingLevel::H1 + } else if c == b'-' { + HeadingLevel::H2 + } else { + return None; + }; + let mut i = 1 + scan_ch_repeat(&data[1..], c); + i += scan_blank_line(&data[i..])?; + Some((i, level)) +} + +// returns number of bytes in line (including trailing +// newline) and column alignments +pub(crate) fn scan_table_head(data: &[u8]) -> (usize, Vec<Alignment>) { + let (mut i, spaces) = calc_indent(data, 4); + if spaces > 3 || i == data.len() { + return (0, vec![]); + } + let mut cols = vec![]; + let mut active_col = Alignment::None; + let mut start_col = true; + if data[i] == b'|' { + i += 1; + } + for c in &data[i..] { + if let Some(n) = scan_eol(&data[i..]) { + i += n; + break; + } + match *c { + b' ' => (), + b':' => { + active_col = match (start_col, active_col) { + (true, Alignment::None) => Alignment::Left, + (false, Alignment::Left) => Alignment::Center, + (false, Alignment::None) => Alignment::Right, + _ => active_col, + }; + start_col = false; + } + b'-' => { + start_col = false; + } + b'|' => { + start_col = true; + cols.push(active_col); + active_col = Alignment::None; + } + _ => { + cols = vec![]; + start_col = true; + break; + } + } + i += 1; + } + + if !start_col { + cols.push(active_col); + } + + (i, cols) +} + +/// Scan code fence. +/// +/// Returns number of bytes scanned and the char that is repeated to make the code fence. +pub(crate) fn scan_code_fence(data: &[u8]) -> Option<(usize, u8)> { + let c = *data.get(0)?; + if !(c == b'`' || c == b'~') { + return None; + } + let i = 1 + scan_ch_repeat(&data[1..], c); + if i >= 3 { + if c == b'`' { + let suffix = &data[i..]; + let next_line = i + scan_nextline(suffix); + // FIXME: make sure this is correct + if suffix[..(next_line - i)].iter().any(|&b| b == b'`') { + return None; + } + } + Some((i, c)) + } else { + None + } +} + +pub(crate) fn scan_blockquote_start(data: &[u8]) -> Option<usize> { + if data.starts_with(b"> ") { + Some(2) + } else { + None + } +} + +/// This already assumes the list item has been scanned. +pub(crate) fn scan_empty_list(data: &[u8]) -> bool { + let mut ix = 0; + for _ in 0..2 { + if let Some(bytes) = scan_blank_line(&data[ix..]) { + ix += bytes; + } else { + return false; + } + } + true +} + +// return number of bytes scanned, delimiter, start index, and indent +pub(crate) fn scan_listitem(bytes: &[u8]) -> Option<(usize, u8, usize, usize)> { + let mut c = *bytes.get(0)?; + let (w, start) = match c { + b'-' | b'+' | b'*' => (1, 0), + b'0'..=b'9' => { + let (length, start) = parse_decimal(bytes); + c = *bytes.get(length)?; + if !(c == b'.' || c == b')') { + return None; + } + (length + 1, start) + } + _ => { + return None; + } + }; + // TODO: replace calc_indent with scan_leading_whitespace, for tab correctness + let (mut postn, mut postindent) = calc_indent(&bytes[w..], 5); + if postindent == 0 { + scan_eol(&bytes[w..])?; + postindent += 1; + } else if postindent > 4 { + postn = 1; + postindent = 1; + } + if scan_blank_line(&bytes[w..]).is_some() { + postn = 0; + postindent = 1; + } + Some((w + postn, c, start, w + postindent)) +} + +// returns (number of bytes, parsed decimal) +fn parse_decimal(bytes: &[u8]) -> (usize, usize) { + match bytes + .iter() + .take_while(|&&b| is_digit(b)) + .try_fold((0, 0usize), |(count, acc), c| { + let digit = usize::from(c - b'0'); + match acc + .checked_mul(10) + .and_then(|ten_acc| ten_acc.checked_add(digit)) + { + Some(number) => Ok((count + 1, number)), + // stop early on overflow + None => Err((count, acc)), + } + }) { + Ok(p) | Err(p) => p, + } +} + +// returns (number of bytes, parsed hex) +fn parse_hex(bytes: &[u8]) -> (usize, usize) { + match bytes.iter().try_fold((0, 0usize), |(count, acc), c| { + let mut c = *c; + let digit = if c >= b'0' && c <= b'9' { + usize::from(c - b'0') + } else { + // make lower case + c |= 0x20; + if c >= b'a' && c <= b'f' { + usize::from(c - b'a' + 10) + } else { + return Err((count, acc)); + } + }; + match acc + .checked_mul(16) + .and_then(|sixteen_acc| sixteen_acc.checked_add(digit)) + { + Some(number) => Ok((count + 1, number)), + // stop early on overflow + None => Err((count, acc)), + } + }) { + Ok(p) | Err(p) => p, + } +} + +fn char_from_codepoint(input: usize) -> Option<char> { + let mut codepoint = input.try_into().ok()?; + if codepoint == 0 { + codepoint = 0xFFFD; + } + char::from_u32(codepoint) +} + +// doesn't bother to check data[0] == '&' +pub(crate) fn scan_entity(bytes: &[u8]) -> (usize, Option<CowStr<'static>>) { + let mut end = 1; + if scan_ch(&bytes[end..], b'#') == 1 { + end += 1; + let (bytecount, codepoint) = if end < bytes.len() && bytes[end] | 0x20 == b'x' { + end += 1; + parse_hex(&bytes[end..]) + } else { + parse_decimal(&bytes[end..]) + }; + end += bytecount; + return if bytecount == 0 || scan_ch(&bytes[end..], b';') == 0 { + (0, None) + } else if let Some(c) = char_from_codepoint(codepoint) { + (end + 1, Some(c.into())) + } else { + (0, None) + }; + } + end += scan_while(&bytes[end..], is_ascii_alphanumeric); + if scan_ch(&bytes[end..], b';') == 1 { + if let Some(value) = entities::get_entity(&bytes[1..end]) { + return (end + 1, Some(value.into())); + } + } + (0, None) +} + +// FIXME: we can most likely re-use other scanners +// returns (bytelength, title_str) +pub(crate) fn scan_refdef_title(text: &str) -> Option<(usize, &str)> { + let mut chars = text.chars().peekable(); + let closing_delim = match chars.next()? { + '\'' => '\'', + '"' => '"', + '(' => ')', + _ => return None, + }; + let mut bytecount = 1; + + while let Some(c) = chars.next() { + match c { + '\n' => { + bytecount += 1; + let mut next = *chars.peek()?; + while is_ascii_whitespace_no_nl(next as u8) { + bytecount += chars.next()?.len_utf8(); + next = *chars.peek()?; + } + if *chars.peek()? == '\n' { + // blank line - not allowed + return None; + } + } + '\\' => { + let next_char = chars.next()?; + bytecount += 1 + next_char.len_utf8(); + } + c if c == closing_delim => { + return Some((bytecount + 1, &text[1..bytecount])); + } + c => { + bytecount += c.len_utf8(); + } + } + } + None +} + +// note: dest returned is raw, still needs to be unescaped +// TODO: check that nested parens are really not allowed for refdefs +// TODO(performance): this func should probably its own unescaping +pub(crate) fn scan_link_dest( + data: &str, + start_ix: usize, + max_next: usize, +) -> Option<(usize, &str)> { + let bytes = &data.as_bytes()[start_ix..]; + let mut i = scan_ch(bytes, b'<'); + + if i != 0 { + // pointy links + while i < bytes.len() { + match bytes[i] { + b'\n' | b'\r' | b'<' => return None, + b'>' => return Some((i + 1, &data[(start_ix + 1)..(start_ix + i)])), + b'\\' if i + 1 < bytes.len() && is_ascii_punctuation(bytes[i + 1]) => { + i += 1; + } + _ => {} + } + i += 1; + } + None + } else { + // non-pointy links + let mut nest = 0; + while i < bytes.len() { + match bytes[i] { + 0x0..=0x20 => { + break; + } + b'(' => { + if nest > max_next { + return None; + } + nest += 1; + } + b')' => { + if nest == 0 { + break; + } + nest -= 1; + } + b'\\' if i + 1 < bytes.len() && is_ascii_punctuation(bytes[i + 1]) => { + i += 1; + } + _ => {} + } + i += 1; + } + Some((i, &data[start_ix..(start_ix + i)])) + } +} + +/// Returns bytes scanned +fn scan_attribute_name(data: &[u8]) -> Option<usize> { + let (&c, tail) = data.split_first()?; + if is_ascii_alpha(c) || c == b'_' || c == b':' { + Some( + 1 + scan_while(tail, |c| { + is_ascii_alphanumeric(c) || c == b'_' || c == b'.' || c == b':' || c == b'-' + }), + ) + } else { + None + } +} + +/// Returns the index immediately following the attribute on success. +/// The argument `buffer_ix` refers to the index into `data` from which we +/// should copy into `buffer` when we find bytes to skip. +fn scan_attribute( + data: &[u8], + mut ix: usize, + newline_handler: Option<&dyn Fn(&[u8]) -> usize>, + buffer: &mut Vec<u8>, + buffer_ix: &mut usize, +) -> Option<usize> { + ix += scan_attribute_name(&data[ix..])?; + let n_whitespace = + scan_whitespace_with_newline_handler(data, ix, newline_handler, buffer, buffer_ix)? - ix; + ix += n_whitespace; + if scan_ch(&data[ix..], b'=') == 1 { + ix += 1; + ix = scan_whitespace_with_newline_handler(data, ix, newline_handler, buffer, buffer_ix)?; + ix = scan_attribute_value(data, ix, newline_handler, buffer, buffer_ix)?; + } else if n_whitespace > 0 { + // Leave whitespace for next attribute. + ix -= 1; + } + Some(ix) +} + +/// Scans whitespace and possibly newlines according to the +/// behavior defined by the newline handler. When bytes are skipped, +/// all preceding non-skipped bytes are pushed to the buffer. +fn scan_whitespace_with_newline_handler( + data: &[u8], + mut i: usize, + newline_handler: Option<&dyn Fn(&[u8]) -> usize>, + buffer: &mut Vec<u8>, + buffer_ix: &mut usize, +) -> Option<usize> { + while i < data.len() { + if !is_ascii_whitespace(data[i]) { + return Some(i); + } + if let Some(eol_bytes) = scan_eol(&data[i..]) { + let handler = newline_handler?; + i += eol_bytes; + let skipped_bytes = handler(&data[i..]); + + if skipped_bytes > 0 { + buffer.extend(&data[*buffer_ix..i]); + *buffer_ix = i + skipped_bytes; + } + + i += skipped_bytes; + } else { + i += 1; + } + } + + Some(i) +} + +/// Returns the index immediately following the attribute value on success. +fn scan_attribute_value( + data: &[u8], + mut i: usize, + newline_handler: Option<&dyn Fn(&[u8]) -> usize>, + buffer: &mut Vec<u8>, + buffer_ix: &mut usize, +) -> Option<usize> { + match *data.get(i)? { + b @ b'"' | b @ b'\'' => { + i += 1; + while i < data.len() { + if data[i] == b { + return Some(i + 1); + } + if let Some(eol_bytes) = scan_eol(&data[i..]) { + let handler = newline_handler?; + i += eol_bytes; + let skipped_bytes = handler(&data[i..]); + + if skipped_bytes > 0 { + buffer.extend(&data[*buffer_ix..i]); + *buffer_ix = i + skipped_bytes; + } + i += skipped_bytes; + } else { + i += 1; + } + } + return None; + } + b' ' | b'=' | b'>' | b'<' | b'`' | b'\n' | b'\r' => { + return None; + } + _ => { + // unquoted attribute value + i += scan_attr_value_chars(&data[i..]); + } + } + + Some(i) +} + +// Remove backslash escapes and resolve entities +pub(crate) fn unescape(input: &str) -> CowStr<'_> { + let mut result = String::new(); + let mut mark = 0; + let mut i = 0; + let bytes = input.as_bytes(); + while i < bytes.len() { + match bytes[i] { + b'\\' if i + 1 < bytes.len() && is_ascii_punctuation(bytes[i + 1]) => { + result.push_str(&input[mark..i]); + mark = i + 1; + i += 2; + } + b'&' => match scan_entity(&bytes[i..]) { + (n, Some(value)) => { + result.push_str(&input[mark..i]); + result.push_str(&value); + i += n; + mark = i; + } + _ => i += 1, + }, + b'\r' => { + result.push_str(&input[mark..i]); + i += 1; + mark = i; + } + _ => i += 1, + } + } + if mark == 0 { + input.into() + } else { + result.push_str(&input[mark..]); + result.into() + } +} + +/// Assumes `data` is preceded by `<`. +pub(crate) fn starts_html_block_type_6(data: &[u8]) -> bool { + let i = scan_ch(data, b'/'); + let tail = &data[i..]; + let n = scan_while(tail, is_ascii_alphanumeric); + if !is_html_tag(&tail[..n]) { + return false; + } + // Starting condition says the next byte must be either a space, a tab, + // the end of the line, the string >, or the string /> + let tail = &tail[n..]; + tail.is_empty() + || tail[0] == b' ' + || tail[0] == b'\t' + || tail[0] == b'\r' + || tail[0] == b'\n' + || tail[0] == b'>' + || tail.len() >= 2 && &tail[..2] == b"/>" +} + +fn is_html_tag(tag: &[u8]) -> bool { + HTML_TAGS + .binary_search_by(|probe| { + let probe_bytes_iter = probe.as_bytes().iter(); + let tag_bytes_iter = tag.iter(); + + probe_bytes_iter + .zip(tag_bytes_iter) + .find_map(|(&a, &b)| { + // We can compare case insensitively because the probes are + // all lower case alpha strings. + match a.cmp(&(b | 0x20)) { + std::cmp::Ordering::Equal => None, + inequality => Some(inequality), + } + }) + .unwrap_or_else(|| probe.len().cmp(&tag.len())) + }) + .is_ok() +} + +/// Assumes that `data` starts with `<`. +/// Returns the index into data directly after the html tag on success. +pub(crate) fn scan_html_type_7(data: &[u8]) -> Option<usize> { + // Block type html does not allow for newlines, so we + // do not pass a newline handler. + let (_span, i) = scan_html_block_inner(data, None)?; + scan_blank_line(&data[i..])?; + Some(i) +} + +/// Assumes that `data` starts with `<`. +/// Returns the number of bytes scanned and the html in case of +/// success. +/// When some bytes were skipped, because the html was split over +/// multiple leafs (e.g. over multiple lines in a blockquote), +/// the html is returned as a vector of bytes. +/// If no bytes were skipped, the buffer will be empty. +pub(crate) fn scan_html_block_inner( + data: &[u8], + newline_handler: Option<&dyn Fn(&[u8]) -> usize>, +) -> Option<(Vec<u8>, usize)> { + let mut buffer = Vec::new(); + let mut last_buf_index = 0; + + let close_tag_bytes = scan_ch(&data[1..], b'/'); + let l = scan_while(&data[(1 + close_tag_bytes)..], is_ascii_alpha); + if l == 0 { + return None; + } + let mut i = 1 + close_tag_bytes + l; + i += scan_while(&data[i..], is_ascii_letterdigitdash); + + if close_tag_bytes == 0 { + loop { + let old_i = i; + loop { + i += scan_whitespace_no_nl(&data[i..]); + if let Some(eol_bytes) = scan_eol(&data[i..]) { + if eol_bytes == 0 { + return None; + } + let handler = newline_handler?; + i += eol_bytes; + let skipped_bytes = handler(&data[i..]); + + let data_len = data.len() - i; + + debug_assert!( + skipped_bytes <= data_len, + "Handler tried to skip too many bytes, fed {}, skipped {}", + data_len, + skipped_bytes + ); + + if skipped_bytes > 0 { + buffer.extend(&data[last_buf_index..i]); + i += skipped_bytes; + last_buf_index = i; + } + } else { + break; + } + } + if let Some(b'/') | Some(b'>') = data.get(i) { + break; + } + if old_i == i { + // No whitespace, which is mandatory. + return None; + } + i = scan_attribute(data, i, newline_handler, &mut buffer, &mut last_buf_index)?; + } + } + + i += scan_whitespace_no_nl(&data[i..]); + + if close_tag_bytes == 0 { + i += scan_ch(&data[i..], b'/'); + } + + if scan_ch(&data[i..], b'>') == 0 { + None + } else { + i += 1; + if !buffer.is_empty() { + buffer.extend(&data[last_buf_index..i]); + } + Some((buffer, i)) + } +} + +/// Returns (next_byte_offset, uri, type) +pub(crate) fn scan_autolink(text: &str, start_ix: usize) -> Option<(usize, CowStr<'_>, LinkType)> { + scan_uri(text, start_ix) + .map(|(bytes, uri)| (bytes, uri, LinkType::Autolink)) + .or_else(|| scan_email(text, start_ix).map(|(bytes, uri)| (bytes, uri, LinkType::Email))) +} + +/// Returns (next_byte_offset, uri) +fn scan_uri(text: &str, start_ix: usize) -> Option<(usize, CowStr<'_>)> { + let bytes = &text.as_bytes()[start_ix..]; + + // scheme's first byte must be an ascii letter + if bytes.is_empty() || !is_ascii_alpha(bytes[0]) { + return None; + } + + let mut i = 1; + + while i < bytes.len() { + let c = bytes[i]; + i += 1; + match c { + c if is_ascii_alphanumeric(c) => (), + b'.' | b'-' | b'+' => (), + b':' => break, + _ => return None, + } + } + + // scheme length must be between 2 and 32 characters long. scheme + // must be followed by colon + if i < 3 || i > 33 { + return None; + } + + while i < bytes.len() { + match bytes[i] { + b'>' => return Some((start_ix + i + 1, text[start_ix..(start_ix + i)].into())), + b'\0'..=b' ' | b'<' => return None, + _ => (), + } + i += 1; + } + + None +} + +/// Returns (next_byte_offset, email) +fn scan_email(text: &str, start_ix: usize) -> Option<(usize, CowStr<'_>)> { + // using a regex library would be convenient, but doing it by hand is not too bad + let bytes = &text.as_bytes()[start_ix..]; + let mut i = 0; + + while i < bytes.len() { + let c = bytes[i]; + i += 1; + match c { + c if is_ascii_alphanumeric(c) => (), + b'.' | b'!' | b'#' | b'$' | b'%' | b'&' | b'\'' | b'*' | b'+' | b'/' | b'=' | b'?' + | b'^' | b'_' | b'`' | b'{' | b'|' | b'}' | b'~' | b'-' => (), + b'@' => break, + _ => return None, + } + } + + loop { + let label_start_ix = i; + let mut fresh_label = true; + + while i < bytes.len() { + match bytes[i] { + c if is_ascii_alphanumeric(c) => (), + b'-' if fresh_label => { + return None; + } + b'-' => (), + _ => break, + } + fresh_label = false; + i += 1; + } + + if i == label_start_ix || i - label_start_ix > 63 || bytes[i - 1] == b'-' { + return None; + } + + if scan_ch(&bytes[i..], b'.') == 0 { + break; + } + i += 1; + } + + if scan_ch(&bytes[i..], b'>') == 0 { + return None; + } + + Some((start_ix + i + 1, text[start_ix..(start_ix + i)].into())) +} + +/// Scan comment, declaration, or CDATA section, with initial "<!" already consumed. +/// Returns byte offset on match. +pub(crate) fn scan_inline_html_comment( + bytes: &[u8], + mut ix: usize, + scan_guard: &mut HtmlScanGuard, +) -> Option<usize> { + let c = *bytes.get(ix)?; + ix += 1; + match c { + b'-' => { + let dashes = scan_ch_repeat(&bytes[ix..], b'-'); + if dashes < 1 { + return None; + } + // Saw "<!--", scan comment. + ix += dashes; + if scan_ch(&bytes[ix..], b'>') == 1 { + return None; + } + + while let Some(x) = memchr(b'-', &bytes[ix..]) { + ix += x + 1; + if scan_ch(&bytes[ix..], b'-') == 1 { + ix += 1; + return if scan_ch(&bytes[ix..], b'>') == 1 { + Some(ix + 1) + } else { + None + }; + } + } + None + } + b'[' if bytes[ix..].starts_with(b"CDATA[") && ix > scan_guard.cdata => { + ix += b"CDATA[".len(); + ix = memchr(b']', &bytes[ix..]).map_or(bytes.len(), |x| ix + x); + let close_brackets = scan_ch_repeat(&bytes[ix..], b']'); + ix += close_brackets; + + if close_brackets == 0 || scan_ch(&bytes[ix..], b'>') == 0 { + scan_guard.cdata = ix; + None + } else { + Some(ix + 1) + } + } + b'A'..=b'Z' if ix > scan_guard.declaration => { + // Scan declaration. + ix += scan_while(&bytes[ix..], |c| c >= b'A' && c <= b'Z'); + let whitespace = scan_while(&bytes[ix..], is_ascii_whitespace); + if whitespace == 0 { + return None; + } + ix += whitespace; + ix = memchr(b'>', &bytes[ix..]).map_or(bytes.len(), |x| ix + x); + if scan_ch(&bytes[ix..], b'>') == 0 { + scan_guard.declaration = ix; + None + } else { + Some(ix + 1) + } + } + _ => None, + } +} + +/// Scan processing directive, with initial "<?" already consumed. +/// Returns the next byte offset on success. +pub(crate) fn scan_inline_html_processing( + bytes: &[u8], + mut ix: usize, + scan_guard: &mut HtmlScanGuard, +) -> Option<usize> { + if ix <= scan_guard.processing { + return None; + } + while let Some(offset) = memchr(b'?', &bytes[ix..]) { + ix += offset + 1; + if scan_ch(&bytes[ix..], b'>') == 1 { + return Some(ix + 1); + } + } + scan_guard.processing = ix; + None +} + +#[cfg(test)] +mod test { + use super::*; + #[test] + fn overflow_list() { + assert!( + scan_listitem(b"4444444444444444444444444444444444444444444444444444444444!").is_none() + ); + } + + #[test] + fn overflow_by_addition() { + assert!(scan_listitem(b"1844674407370955161615!").is_none()); + } +} diff --git a/vendor/pulldown-cmark/src/strings.rs b/vendor/pulldown-cmark/src/strings.rs new file mode 100644 index 000000000..fb6c1be0d --- /dev/null +++ b/vendor/pulldown-cmark/src/strings.rs @@ -0,0 +1,373 @@ +use std::borrow::{Borrow, Cow, ToOwned}; +use std::convert::{AsRef, TryFrom}; +use std::fmt; +use std::hash::{Hash, Hasher}; +use std::ops::Deref; +use std::str::from_utf8; + +const MAX_INLINE_STR_LEN: usize = 3 * std::mem::size_of::<isize>() - 2; + +/// Returned when trying to convert a `&str` into a `InlineStr` +/// but it fails because it doesn't fit. +#[derive(Debug)] +pub struct StringTooLongError; + +/// An inline string that can contain almost three words +/// of utf-8 text. +#[derive(Debug, Clone, Copy, Eq)] +pub struct InlineStr { + inner: [u8; MAX_INLINE_STR_LEN], + len: u8, +} + +impl<'a> AsRef<str> for InlineStr { + fn as_ref(&self) -> &str { + self.deref() + } +} + +impl Hash for InlineStr { + fn hash<H: Hasher>(&self, state: &mut H) { + self.deref().hash(state); + } +} + +impl From<char> for InlineStr { + fn from(c: char) -> Self { + let mut inner = [0u8; MAX_INLINE_STR_LEN]; + c.encode_utf8(&mut inner); + let len = c.len_utf8() as u8; + Self { inner, len } + } +} + +impl<'a> std::cmp::PartialEq<InlineStr> for InlineStr { + fn eq(&self, other: &InlineStr) -> bool { + self.deref() == other.deref() + } +} + +impl TryFrom<&str> for InlineStr { + type Error = StringTooLongError; + + fn try_from(s: &str) -> Result<InlineStr, StringTooLongError> { + let len = s.len(); + if len <= MAX_INLINE_STR_LEN { + let mut inner = [0u8; MAX_INLINE_STR_LEN]; + inner[..len].copy_from_slice(s.as_bytes()); + let len = len as u8; + Ok(Self { inner, len }) + } else { + Err(StringTooLongError) + } + } +} + +impl Deref for InlineStr { + type Target = str; + + fn deref(&self) -> &str { + let len = self.len as usize; + from_utf8(&self.inner[..len]).unwrap() + } +} + +impl fmt::Display for InlineStr { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.as_ref()) + } +} + +/// A copy-on-write string that can be owned, borrowed +/// or inlined. +/// +/// It is three words long. +#[derive(Debug, Eq)] +pub enum CowStr<'a> { + /// An owned, immutable string. + Boxed(Box<str>), + /// A borrowed string. + Borrowed(&'a str), + /// A short inline string. + Inlined(InlineStr), +} + +#[cfg(feature = "serde")] +mod serde_impl { + use super::CowStr; + use serde::{de, Deserialize, Deserializer, Serialize, Serializer}; + use std::fmt; + + impl<'a> Serialize for CowStr<'a> { + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + serializer.serialize_str(self.as_ref()) + } + } + + struct CowStrVisitor; + + impl<'de> de::Visitor<'de> for CowStrVisitor { + type Value = CowStr<'de>; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("a string") + } + + fn visit_borrowed_str<E>(self, v: &'de str) -> Result<Self::Value, E> + where + E: de::Error, + { + Ok(CowStr::Borrowed(v)) + } + } + + impl<'a, 'de: 'a> Deserialize<'de> for CowStr<'a> { + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: Deserializer<'de>, + { + deserializer.deserialize_str(CowStrVisitor) + } + } +} + +impl<'a> AsRef<str> for CowStr<'a> { + fn as_ref(&self) -> &str { + self.deref() + } +} + +impl<'a> Hash for CowStr<'a> { + fn hash<H: Hasher>(&self, state: &mut H) { + self.deref().hash(state); + } +} + +impl<'a> std::clone::Clone for CowStr<'a> { + fn clone(&self) -> Self { + match self { + CowStr::Boxed(s) => match InlineStr::try_from(&**s) { + Ok(inline) => CowStr::Inlined(inline), + Err(..) => CowStr::Boxed(s.clone()), + }, + CowStr::Borrowed(s) => CowStr::Borrowed(s), + CowStr::Inlined(s) => CowStr::Inlined(*s), + } + } +} + +impl<'a> std::cmp::PartialEq<CowStr<'a>> for CowStr<'a> { + fn eq(&self, other: &CowStr) -> bool { + self.deref() == other.deref() + } +} + +impl<'a> From<&'a str> for CowStr<'a> { + fn from(s: &'a str) -> Self { + CowStr::Borrowed(s) + } +} + +impl<'a> From<String> for CowStr<'a> { + fn from(s: String) -> Self { + CowStr::Boxed(s.into_boxed_str()) + } +} + +impl<'a> From<char> for CowStr<'a> { + fn from(c: char) -> Self { + CowStr::Inlined(c.into()) + } +} + +impl<'a> From<Cow<'a, str>> for CowStr<'a> { + fn from(s: Cow<'a, str>) -> Self { + match s { + Cow::Borrowed(s) => CowStr::Borrowed(s), + Cow::Owned(s) => CowStr::Boxed(s.into_boxed_str()), + } + } +} + +impl<'a> From<CowStr<'a>> for Cow<'a, str> { + fn from(s: CowStr<'a>) -> Self { + match s { + CowStr::Boxed(s) => Cow::Owned(s.to_string()), + CowStr::Inlined(s) => Cow::Owned(s.to_string()), + CowStr::Borrowed(s) => Cow::Borrowed(s), + } + } +} + +impl<'a> From<Cow<'a, char>> for CowStr<'a> { + fn from(s: Cow<'a, char>) -> Self { + CowStr::Inlined(InlineStr::from(*s)) + } +} + +impl<'a> Deref for CowStr<'a> { + type Target = str; + + fn deref(&self) -> &str { + match self { + CowStr::Boxed(ref b) => &*b, + CowStr::Borrowed(b) => b, + CowStr::Inlined(ref s) => s.deref(), + } + } +} + +impl<'a> Borrow<str> for CowStr<'a> { + fn borrow(&self) -> &str { + self.deref() + } +} + +impl<'a> CowStr<'a> { + pub fn into_string(self) -> String { + match self { + CowStr::Boxed(b) => b.into(), + CowStr::Borrowed(b) => b.to_owned(), + CowStr::Inlined(s) => s.deref().to_owned(), + } + } +} + +impl<'a> fmt::Display for CowStr<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.as_ref()) + } +} + +#[cfg(test)] +mod test_special_string { + use super::*; + + #[test] + fn inlinestr_ascii() { + let s: InlineStr = 'a'.into(); + assert_eq!("a", s.deref()); + } + + #[test] + fn inlinestr_unicode() { + let s: InlineStr = '🍔'.into(); + assert_eq!("🍔", s.deref()); + } + + #[test] + fn cowstr_size() { + let size = std::mem::size_of::<CowStr>(); + let word_size = std::mem::size_of::<isize>(); + assert_eq!(3 * word_size, size); + } + + #[test] + fn cowstr_char_to_string() { + let c = '藏'; + let smort: CowStr = c.into(); + let owned: String = smort.to_string(); + let expected = "藏".to_owned(); + assert_eq!(expected, owned); + } + + #[test] + fn max_inline_str_len_atleast_four() { + // we need 4 bytes to store a char + assert!(MAX_INLINE_STR_LEN >= 4); + } + + #[test] + #[cfg(target_pointer_width = "64")] + fn inlinestr_fits_twentytwo() { + let s = "0123456789abcdefghijkl"; + let stack_str = InlineStr::try_from(s).unwrap(); + assert_eq!(stack_str.deref(), s); + } + + #[test] + #[cfg(target_pointer_width = "64")] + fn inlinestr_not_fits_twentythree() { + let s = "0123456789abcdefghijklm"; + let _stack_str = InlineStr::try_from(s).unwrap_err(); + } + + #[test] + #[cfg(target_pointer_width = "64")] + fn small_boxed_str_clones_to_stack() { + let s = "0123456789abcde".to_owned(); + let smort: CowStr = s.into(); + let smort_clone = smort.clone(); + + if let CowStr::Inlined(..) = smort_clone { + } else { + panic!("Expected a Inlined variant!"); + } + } + + #[test] + fn cow_to_cow_str() { + let s = "some text"; + let cow = Cow::Borrowed(s); + let actual = CowStr::from(cow); + let expected = CowStr::Borrowed(s); + assert_eq!(actual, expected); + assert!(variant_eq(&actual, &expected)); + + let s = "some text".to_string(); + let cow: Cow<str> = Cow::Owned(s.clone()); + let actual = CowStr::from(cow); + let expected = CowStr::Boxed(s.into_boxed_str()); + assert_eq!(actual, expected); + assert!(variant_eq(&actual, &expected)); + } + + #[test] + fn cow_str_to_cow() { + let s = "some text"; + let cow_str = CowStr::Borrowed(s); + let actual = Cow::from(cow_str); + let expected = Cow::Borrowed(s); + assert_eq!(actual, expected); + assert!(variant_eq(&actual, &expected)); + + let s = "s"; + let inline_str: InlineStr = InlineStr::try_from(s).unwrap(); + let cow_str = CowStr::Inlined(inline_str); + let actual = Cow::from(cow_str); + let expected: Cow<str> = Cow::Owned(s.to_string()); + assert_eq!(actual, expected); + assert!(variant_eq(&actual, &expected)); + + let s = "s"; + let cow_str = CowStr::Boxed(s.to_string().into_boxed_str()); + let actual = Cow::from(cow_str); + let expected: Cow<str> = Cow::Owned(s.to_string()); + assert_eq!(actual, expected); + assert!(variant_eq(&actual, &expected)); + } + + #[test] + fn cow_char_to_cow_str() { + let c = 'c'; + let cow: Cow<char> = Cow::Owned(c); + let actual = CowStr::from(cow); + let expected = CowStr::Inlined(InlineStr::from(c)); + assert_eq!(actual, expected); + assert!(variant_eq(&actual, &expected)); + + let c = 'c'; + let cow: Cow<char> = Cow::Borrowed(&c); + let actual = CowStr::from(cow); + let expected = CowStr::Inlined(InlineStr::from(c)); + assert_eq!(actual, expected); + assert!(variant_eq(&actual, &expected)); + } + + fn variant_eq<T>(a: &T, b: &T) -> bool { + std::mem::discriminant(a) == std::mem::discriminant(b) + } +} diff --git a/vendor/pulldown-cmark/src/tree.rs b/vendor/pulldown-cmark/src/tree.rs new file mode 100644 index 000000000..8e971bc20 --- /dev/null +++ b/vendor/pulldown-cmark/src/tree.rs @@ -0,0 +1,276 @@ +// Copyright 2018 Google LLC +// +// Use of this source code is governed by an MIT-style +// license that can be found in the LICENSE file or at +// https://opensource.org/licenses/MIT. + +//! A Vec-based container for a tree structure. + +use std::num::NonZeroUsize; +use std::ops::{Add, Sub}; + +use crate::parse::{Item, ItemBody}; + +#[derive(Debug, Eq, PartialEq, Copy, Clone, PartialOrd)] +pub(crate) struct TreeIndex(NonZeroUsize); + +impl TreeIndex { + fn new(i: usize) -> Self { + TreeIndex(NonZeroUsize::new(i).unwrap()) + } + + pub fn get(self) -> usize { + self.0.get() + } +} + +impl Add<usize> for TreeIndex { + type Output = TreeIndex; + + fn add(self, rhs: usize) -> Self { + let inner = self.0.get() + rhs; + TreeIndex::new(inner) + } +} + +impl Sub<usize> for TreeIndex { + type Output = TreeIndex; + + fn sub(self, rhs: usize) -> Self { + let inner = self.0.get().checked_sub(rhs).unwrap(); + TreeIndex::new(inner) + } +} + +#[derive(Debug, Clone, Copy)] +pub(crate) struct Node<T> { + pub child: Option<TreeIndex>, + pub next: Option<TreeIndex>, + pub item: T, +} + +/// A tree abstraction, intended for fast building as a preorder traversal. +#[derive(Clone)] +pub(crate) struct Tree<T> { + nodes: Vec<Node<T>>, + spine: Vec<TreeIndex>, // indices of nodes on path to current node + cur: Option<TreeIndex>, +} + +impl<T: Default> Tree<T> { + // Indices start at one, so we place a dummy value at index zero. + // The alternative would be subtracting one from every TreeIndex + // every time we convert it to usize to index our nodes. + pub(crate) fn with_capacity(cap: usize) -> Tree<T> { + let mut nodes = Vec::with_capacity(cap); + nodes.push(Node { + child: None, + next: None, + item: <T as Default>::default(), + }); + Tree { + nodes, + spine: Vec::new(), + cur: None, + } + } + + /// Returns the index of the element currently in focus. + pub(crate) fn cur(&self) -> Option<TreeIndex> { + self.cur + } + + /// Append one item to the current position in the tree. + pub(crate) fn append(&mut self, item: T) -> TreeIndex { + let ix = self.create_node(item); + let this = Some(ix); + + if let Some(ix) = self.cur { + self[ix].next = this; + } else if let Some(&parent) = self.spine.last() { + self[parent].child = this; + } + self.cur = this; + ix + } + + /// Create an isolated node. + pub(crate) fn create_node(&mut self, item: T) -> TreeIndex { + let this = self.nodes.len(); + self.nodes.push(Node { + child: None, + next: None, + item, + }); + TreeIndex::new(this) + } + + /// Push down one level, so that new items become children of the current node. + /// The new focus index is returned. + pub(crate) fn push(&mut self) -> TreeIndex { + let cur_ix = self.cur.unwrap(); + self.spine.push(cur_ix); + self.cur = self[cur_ix].child; + cur_ix + } + + /// Pop back up a level. + pub(crate) fn pop(&mut self) -> Option<TreeIndex> { + let ix = Some(self.spine.pop()?); + self.cur = ix; + ix + } + + /// Look at the parent node. + pub(crate) fn peek_up(&self) -> Option<TreeIndex> { + self.spine.last().copied() + } + + /// Look at grandparent node. + pub(crate) fn peek_grandparent(&self) -> Option<TreeIndex> { + if self.spine.len() >= 2 { + Some(self.spine[self.spine.len() - 2]) + } else { + None + } + } + + /// Returns true when there are no nodes other than the root node + /// in the tree, false otherwise. + pub(crate) fn is_empty(&self) -> bool { + self.nodes.len() <= 1 + } + + /// Returns the length of the spine. + pub(crate) fn spine_len(&self) -> usize { + self.spine.len() + } + + /// Resets the focus to the first node added to the tree, if it exists. + pub(crate) fn reset(&mut self) { + self.cur = if self.is_empty() { + None + } else { + Some(TreeIndex::new(1)) + }; + self.spine.clear(); + } + + /// Walks the spine from a root node up to, but not including, the current node. + pub(crate) fn walk_spine(&self) -> impl std::iter::DoubleEndedIterator<Item = &TreeIndex> { + self.spine.iter() + } + + /// Moves focus to the next sibling of the given node. + pub(crate) fn next_sibling(&mut self, cur_ix: TreeIndex) -> Option<TreeIndex> { + self.cur = self[cur_ix].next; + self.cur + } +} + +impl Tree<Item> { + /// Truncates the preceding siblings to the given end position, + /// and returns the new current node. + pub(crate) fn truncate_siblings(&mut self, bytes: &[u8], end_byte_ix: usize) { + let parent_ix = self.peek_up().unwrap(); + let mut next_child_ix = self[parent_ix].child; + let mut prev_child_ix = None; + + // drop or truncate children based on its range + while let Some(child_ix) = next_child_ix { + let child_end = self[child_ix].item.end; + if child_end < end_byte_ix { + // preserve this node, and go to the next + prev_child_ix = Some(child_ix); + next_child_ix = self[child_ix].next; + continue; + } else if child_end == end_byte_ix { + // this will be the last node + self[child_ix].next = None; + // focus to the new last child (this node) + self.cur = Some(child_ix); + } else if self[child_ix].item.start == end_byte_ix { + // check whether the previous character is a backslash + let is_previous_char_backslash_escape = + end_byte_ix.checked_sub(1).map_or(false, |prev| { + (bytes[prev] == b'\\') && (self[child_ix].item.body == ItemBody::Text) + }); + if is_previous_char_backslash_escape { + // rescue the backslash as a plain text content + let last_byte_ix = end_byte_ix - 1; + self[child_ix].item.start = last_byte_ix; + self[child_ix].item.end = end_byte_ix; + self.cur = Some(child_ix); + } else if let Some(prev_child_ix) = prev_child_ix { + // the node will become empty. drop the node + // a preceding sibling exists + self[prev_child_ix].next = None; + self.cur = Some(prev_child_ix); + } else { + // no preceding siblings. remove the node from the parent + self[parent_ix].child = None; + self.cur = None; + } + } else { + debug_assert!(self[child_ix].item.start < end_byte_ix); + debug_assert!(end_byte_ix < child_end); + // truncate the node + self[child_ix].item.end = end_byte_ix; + self[child_ix].next = None; + // focus to the new last child + self.cur = Some(child_ix); + } + break; + } + } +} + +impl<T> std::fmt::Debug for Tree<T> +where + T: std::fmt::Debug, +{ + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + fn debug_tree<T>( + tree: &Tree<T>, + cur: TreeIndex, + indent: usize, + f: &mut std::fmt::Formatter, + ) -> std::fmt::Result + where + T: std::fmt::Debug, + { + for _ in 0..indent { + write!(f, " ")?; + } + writeln!(f, "{:?}", &tree[cur].item)?; + if let Some(child_ix) = tree[cur].child { + debug_tree(tree, child_ix, indent + 1, f)?; + } + if let Some(next_ix) = tree[cur].next { + debug_tree(tree, next_ix, indent, f)?; + } + Ok(()) + } + + if self.nodes.len() > 1 { + let cur = TreeIndex(NonZeroUsize::new(1).unwrap()); + debug_tree(self, cur, 0, f) + } else { + write!(f, "Empty tree") + } + } +} + +impl<T> std::ops::Index<TreeIndex> for Tree<T> { + type Output = Node<T>; + + fn index(&self, ix: TreeIndex) -> &Self::Output { + self.nodes.index(ix.get()) + } +} + +impl<T> std::ops::IndexMut<TreeIndex> for Tree<T> { + fn index_mut(&mut self, ix: TreeIndex) -> &mut Node<T> { + self.nodes.index_mut(ix.get()) + } +} |