diff options
Diffstat (limited to 'comm/chat/protocols/xmpp/lib/sax')
-rw-r--r-- | comm/chat/protocols/xmpp/lib/sax/LICENSE | 41 | ||||
-rw-r--r-- | comm/chat/protocols/xmpp/lib/sax/sax.js | 1648 |
2 files changed, 1689 insertions, 0 deletions
diff --git a/comm/chat/protocols/xmpp/lib/sax/LICENSE b/comm/chat/protocols/xmpp/lib/sax/LICENSE new file mode 100644 index 0000000000..ccffa082c9 --- /dev/null +++ b/comm/chat/protocols/xmpp/lib/sax/LICENSE @@ -0,0 +1,41 @@ +The ISC License + +Copyright (c) Isaac Z. Schlueter and Contributors + +Permission to use, copy, modify, and/or distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR +IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +==== + +`String.fromCodePoint` by Mathias Bynens used according to terms of MIT +License, as follows: + + Copyright Mathias Bynens <https://mathiasbynens.be/> + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/comm/chat/protocols/xmpp/lib/sax/sax.js b/comm/chat/protocols/xmpp/lib/sax/sax.js new file mode 100644 index 0000000000..564d8d4235 --- /dev/null +++ b/comm/chat/protocols/xmpp/lib/sax/sax.js @@ -0,0 +1,1648 @@ +/* This program is made available under an ISC-style license. */ +(function(sax) { + // wrapper for non-node envs + sax.parser = function(strict, opt) { + return new SAXParser(strict, opt); + }; + sax.SAXParser = SAXParser; + sax.SAXStream = SAXStream; + sax.createStream = createStream; + + // When we pass the MAX_BUFFER_LENGTH position, start checking for buffer overruns. + // When we check, schedule the next check for MAX_BUFFER_LENGTH - (max(buffer lengths)), + // since that's the earliest that a buffer overrun could occur. This way, checks are + // as rare as required, but as often as necessary to ensure never crossing this bound. + // Furthermore, buffers are only tested at most once per write(), so passing a very + // large string into write() might have undesirable effects, but this is manageable by + // the caller, so it is assumed to be safe. Thus, a call to write() may, in the extreme + // edge case, result in creating at most one complete copy of the string passed in. + // Set to Infinity to have unlimited buffers. + sax.MAX_BUFFER_LENGTH = 64 * 1024; + + var buffers = [ + "comment", + "sgmlDecl", + "textNode", + "tagName", + "doctype", + "procInstName", + "procInstBody", + "entity", + "attribName", + "attribValue", + "cdata", + "script", + ]; + + sax.EVENTS = [ + "text", + "processinginstruction", + "sgmldeclaration", + "doctype", + "comment", + "opentagstart", + "attribute", + "opentag", + "closetag", + "opencdata", + "cdata", + "closecdata", + "error", + "end", + "ready", + "script", + "opennamespace", + "closenamespace", + ]; + + function SAXParser(strict, opt) { + if (!(this instanceof SAXParser)) { + return new SAXParser(strict, opt); + } + + var parser = this; + clearBuffers(parser); + parser.q = parser.c = ""; + parser.bufferCheckPosition = sax.MAX_BUFFER_LENGTH; + parser.opt = opt || {}; + parser.opt.lowercase = parser.opt.lowercase || parser.opt.lowercasetags; + parser.looseCase = parser.opt.lowercase ? "toLowerCase" : "toUpperCase"; + parser.tags = []; + parser.closed = parser.closedRoot = parser.sawRoot = false; + parser.tag = parser.error = null; + parser.strict = !!strict; + parser.noscript = !!(strict || parser.opt.noscript); + parser.state = S.BEGIN; + parser.strictEntities = parser.opt.strictEntities; + parser.ENTITIES = parser.strictEntities + ? Object.create(sax.XML_ENTITIES) + : Object.create(sax.ENTITIES); + parser.attribList = []; + + // namespaces form a prototype chain. + // it always points at the current tag, + // which protos to its parent tag. + if (parser.opt.xmlns) { + parser.ns = Object.create(rootNS); + } + + // mostly just for error reporting + parser.trackPosition = parser.opt.position !== false; + if (parser.trackPosition) { + parser.position = parser.line = parser.column = 0; + } + emit(parser, "onready"); + } + + if (!Object.create) { + Object.create = function(o) { + function F() {} + F.prototype = o; + var newf = new F(); + return newf; + }; + } + + if (!Object.keys) { + Object.keys = function(o) { + var a = []; + for (var i in o) { + if (o.hasOwnProperty(i)) { + a.push(i); + } + } + return a; + }; + } + + function checkBufferLength(parser) { + var maxAllowed = Math.max(sax.MAX_BUFFER_LENGTH, 10); + var maxActual = 0; + for (var i = 0, l = buffers.length; i < l; i++) { + var len = parser[buffers[i]].length; + if (len > maxAllowed) { + // Text/cdata nodes can get big, and since they're buffered, + // we can get here under normal conditions. + // Avoid issues by emitting the text node now, + // so at least it won't get any bigger. + switch (buffers[i]) { + case "textNode": + closeText(parser); + break; + + case "cdata": + emitNode(parser, "oncdata", parser.cdata); + parser.cdata = ""; + break; + + case "script": + emitNode(parser, "onscript", parser.script); + parser.script = ""; + break; + + default: + error(parser, "Max buffer length exceeded: " + buffers[i]); + } + } + maxActual = Math.max(maxActual, len); + } + // schedule the next check for the earliest possible buffer overrun. + var m = sax.MAX_BUFFER_LENGTH - maxActual; + parser.bufferCheckPosition = m + parser.position; + } + + function clearBuffers(parser) { + for (var i = 0, l = buffers.length; i < l; i++) { + parser[buffers[i]] = ""; + } + } + + function flushBuffers(parser) { + closeText(parser); + if (parser.cdata !== "") { + emitNode(parser, "oncdata", parser.cdata); + parser.cdata = ""; + } + if (parser.script !== "") { + emitNode(parser, "onscript", parser.script); + parser.script = ""; + } + } + + SAXParser.prototype = { + end() { + end(this); + }, + write, + resume() { + this.error = null; + return this; + }, + close() { + return this.write(null); + }, + flush() { + flushBuffers(this); + }, + }; + + var Stream; + try { + Stream = require("stream").Stream; + } catch (ex) { + Stream = function() {}; + } + + var streamWraps = sax.EVENTS.filter(function(ev) { + return ev !== "error" && ev !== "end"; + }); + + function createStream(strict, opt) { + return new SAXStream(strict, opt); + } + + function SAXStream(strict, opt) { + if (!(this instanceof SAXStream)) { + return new SAXStream(strict, opt); + } + + Stream.apply(this); + + this._parser = new SAXParser(strict, opt); + this.writable = true; + this.readable = true; + + var me = this; + + this._parser.onend = function() { + me.emit("end"); + }; + + this._parser.onerror = function(er) { + me.emit("error", er); + + // if didn't throw, then means error was handled. + // go ahead and clear error, so we can write again. + me._parser.error = null; + }; + + this._decoder = null; + + streamWraps.forEach(function(ev) { + Object.defineProperty(me, "on" + ev, { + get() { + return me._parser["on" + ev]; + }, + set(h) { + if (!h) { + me.removeAllListeners(ev); + me._parser["on" + ev] = h; + return h; + } + me.on(ev, h); + }, + enumerable: true, + configurable: false, + }); + }); + } + + SAXStream.prototype = Object.create(Stream.prototype, { + constructor: { + value: SAXStream, + }, + }); + + SAXStream.prototype.write = function(data) { + if ( + typeof Buffer === "function" && + typeof Buffer.isBuffer === "function" && + Buffer.isBuffer(data) + ) { + if (!this._decoder) { + var SD = require("string_decoder").StringDecoder; + this._decoder = new SD("utf8"); + } + data = this._decoder.write(data); + } + + this._parser.write(data.toString()); + this.emit("data", data); + return true; + }; + + SAXStream.prototype.end = function(chunk) { + if (chunk && chunk.length) { + this.write(chunk); + } + this._parser.end(); + return true; + }; + + SAXStream.prototype.on = function(ev, handler) { + var me = this; + if (!me._parser["on" + ev] && streamWraps.indexOf(ev) !== -1) { + me._parser["on" + ev] = function() { + var args = + arguments.length === 1 + ? [arguments[0]] + : Array.apply(null, arguments); + args.splice(0, 0, ev); + me.emit.apply(me, args); + }; + } + + return Stream.prototype.on.call(me, ev, handler); + }; + + // this really needs to be replaced with character classes. + // XML allows all manner of ridiculous numbers and digits. + var CDATA = "[CDATA["; + var DOCTYPE = "DOCTYPE"; + var XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace"; + var XMLNS_NAMESPACE = "http://www.w3.org/2000/xmlns/"; + var rootNS = { xml: XML_NAMESPACE, xmlns: XMLNS_NAMESPACE }; + + // http://www.w3.org/TR/REC-xml/#NT-NameStartChar + // This implementation works on strings, a single character at a time + // as such, it cannot ever support astral-plane characters (10000-EFFFF) + // without a significant breaking change to either this parser, or the + // JavaScript language. Implementation of an emoji-capable xml parser + // is left as an exercise for the reader. + var nameStart = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/; + + var nameBody = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040.\d-]/; + + var entityStart = /[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/; + var entityBody = /[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040.\d-]/; + + function isWhitespace(c) { + return c === " " || c === "\n" || c === "\r" || c === "\t"; + } + + function isQuote(c) { + return c === '"' || c === "'"; + } + + function isAttribEnd(c) { + return c === ">" || isWhitespace(c); + } + + function isMatch(regex, c) { + return regex.test(c); + } + + function notMatch(regex, c) { + return !isMatch(regex, c); + } + + var S = 0; + sax.STATE = { + BEGIN: S++, // leading byte order mark or whitespace + BEGIN_WHITESPACE: S++, // leading whitespace + TEXT: S++, // general stuff + TEXT_ENTITY: S++, // & and such. + OPEN_WAKA: S++, // < + SGML_DECL: S++, // <!BLARG + SGML_DECL_QUOTED: S++, // <!BLARG foo "bar + DOCTYPE: S++, // <!DOCTYPE + DOCTYPE_QUOTED: S++, // <!DOCTYPE "//blah + DOCTYPE_DTD: S++, // <!DOCTYPE "//blah" [ ... + DOCTYPE_DTD_QUOTED: S++, // <!DOCTYPE "//blah" [ "foo + COMMENT_STARTING: S++, // <!- + COMMENT: S++, // <!-- + COMMENT_ENDING: S++, // <!-- blah - + COMMENT_ENDED: S++, // <!-- blah -- + CDATA: S++, // <![CDATA[ something + CDATA_ENDING: S++, // ] + CDATA_ENDING_2: S++, // ]] + PROC_INST: S++, // <?hi + PROC_INST_BODY: S++, // <?hi there + PROC_INST_ENDING: S++, // <?hi "there" ? + OPEN_TAG: S++, // <strong + OPEN_TAG_SLASH: S++, // <strong / + ATTRIB: S++, // <a + ATTRIB_NAME: S++, // <a foo + ATTRIB_NAME_SAW_WHITE: S++, // <a foo _ + ATTRIB_VALUE: S++, // <a foo= + ATTRIB_VALUE_QUOTED: S++, // <a foo="bar + ATTRIB_VALUE_CLOSED: S++, // <a foo="bar" + ATTRIB_VALUE_UNQUOTED: S++, // <a foo=bar + ATTRIB_VALUE_ENTITY_Q: S++, // <foo bar=""" + ATTRIB_VALUE_ENTITY_U: S++, // <foo bar=" + CLOSE_TAG: S++, // </a + CLOSE_TAG_SAW_WHITE: S++, // </a > + SCRIPT: S++, // <script> ... + SCRIPT_ENDING: S++, // <script> ... < + }; + + sax.XML_ENTITIES = { + amp: "&", + gt: ">", + lt: "<", + quot: '"', + apos: "'", + }; + + sax.ENTITIES = { + amp: "&", + gt: ">", + lt: "<", + quot: '"', + apos: "'", + AElig: 198, + Aacute: 193, + Acirc: 194, + Agrave: 192, + Aring: 197, + Atilde: 195, + Auml: 196, + Ccedil: 199, + ETH: 208, + Eacute: 201, + Ecirc: 202, + Egrave: 200, + Euml: 203, + Iacute: 205, + Icirc: 206, + Igrave: 204, + Iuml: 207, + Ntilde: 209, + Oacute: 211, + Ocirc: 212, + Ograve: 210, + Oslash: 216, + Otilde: 213, + Ouml: 214, + THORN: 222, + Uacute: 218, + Ucirc: 219, + Ugrave: 217, + Uuml: 220, + Yacute: 221, + aacute: 225, + acirc: 226, + aelig: 230, + agrave: 224, + aring: 229, + atilde: 227, + auml: 228, + ccedil: 231, + eacute: 233, + ecirc: 234, + egrave: 232, + eth: 240, + euml: 235, + iacute: 237, + icirc: 238, + igrave: 236, + iuml: 239, + ntilde: 241, + oacute: 243, + ocirc: 244, + ograve: 242, + oslash: 248, + otilde: 245, + ouml: 246, + szlig: 223, + thorn: 254, + uacute: 250, + ucirc: 251, + ugrave: 249, + uuml: 252, + yacute: 253, + yuml: 255, + copy: 169, + reg: 174, + nbsp: 160, + iexcl: 161, + cent: 162, + pound: 163, + curren: 164, + yen: 165, + brvbar: 166, + sect: 167, + uml: 168, + ordf: 170, + laquo: 171, + not: 172, + shy: 173, + macr: 175, + deg: 176, + plusmn: 177, + sup1: 185, + sup2: 178, + sup3: 179, + acute: 180, + micro: 181, + para: 182, + middot: 183, + cedil: 184, + ordm: 186, + raquo: 187, + frac14: 188, + frac12: 189, + frac34: 190, + iquest: 191, + times: 215, + divide: 247, + OElig: 338, + oelig: 339, + Scaron: 352, + scaron: 353, + Yuml: 376, + fnof: 402, + circ: 710, + tilde: 732, + Alpha: 913, + Beta: 914, + Gamma: 915, + Delta: 916, + Epsilon: 917, + Zeta: 918, + Eta: 919, + Theta: 920, + Iota: 921, + Kappa: 922, + Lambda: 923, + Mu: 924, + Nu: 925, + Xi: 926, + Omicron: 927, + Pi: 928, + Rho: 929, + Sigma: 931, + Tau: 932, + Upsilon: 933, + Phi: 934, + Chi: 935, + Psi: 936, + Omega: 937, + alpha: 945, + beta: 946, + gamma: 947, + delta: 948, + epsilon: 949, + zeta: 950, + eta: 951, + theta: 952, + iota: 953, + kappa: 954, + lambda: 955, + mu: 956, + nu: 957, + xi: 958, + omicron: 959, + pi: 960, + rho: 961, + sigmaf: 962, + sigma: 963, + tau: 964, + upsilon: 965, + phi: 966, + chi: 967, + psi: 968, + omega: 969, + thetasym: 977, + upsih: 978, + piv: 982, + ensp: 8194, + emsp: 8195, + thinsp: 8201, + zwnj: 8204, + zwj: 8205, + lrm: 8206, + rlm: 8207, + ndash: 8211, + mdash: 8212, + lsquo: 8216, + rsquo: 8217, + sbquo: 8218, + ldquo: 8220, + rdquo: 8221, + bdquo: 8222, + dagger: 8224, + Dagger: 8225, + bull: 8226, + hellip: 8230, + permil: 8240, + prime: 8242, + Prime: 8243, + lsaquo: 8249, + rsaquo: 8250, + oline: 8254, + frasl: 8260, + euro: 8364, + image: 8465, + weierp: 8472, + real: 8476, + trade: 8482, + alefsym: 8501, + larr: 8592, + uarr: 8593, + rarr: 8594, + darr: 8595, + harr: 8596, + crarr: 8629, + lArr: 8656, + uArr: 8657, + rArr: 8658, + dArr: 8659, + hArr: 8660, + forall: 8704, + part: 8706, + exist: 8707, + empty: 8709, + nabla: 8711, + isin: 8712, + notin: 8713, + ni: 8715, + prod: 8719, + sum: 8721, + minus: 8722, + lowast: 8727, + radic: 8730, + prop: 8733, + infin: 8734, + ang: 8736, + and: 8743, + or: 8744, + cap: 8745, + cup: 8746, + int: 8747, + there4: 8756, + sim: 8764, + cong: 8773, + asymp: 8776, + ne: 8800, + equiv: 8801, + le: 8804, + ge: 8805, + sub: 8834, + sup: 8835, + nsub: 8836, + sube: 8838, + supe: 8839, + oplus: 8853, + otimes: 8855, + perp: 8869, + sdot: 8901, + lceil: 8968, + rceil: 8969, + lfloor: 8970, + rfloor: 8971, + lang: 9001, + rang: 9002, + loz: 9674, + spades: 9824, + clubs: 9827, + hearts: 9829, + diams: 9830, + }; + + Object.keys(sax.ENTITIES).forEach(function(key) { + var e = sax.ENTITIES[key]; + var s = typeof e === "number" ? String.fromCharCode(e) : e; + sax.ENTITIES[key] = s; + }); + + for (var s in sax.STATE) { + sax.STATE[sax.STATE[s]] = s; + } + + // shorthand + S = sax.STATE; + + function emit(parser, event, data) { + parser[event] && parser[event](data); + } + + function emitNode(parser, nodeType, data) { + if (parser.textNode) { + closeText(parser); + } + emit(parser, nodeType, data); + } + + function closeText(parser) { + parser.textNode = textopts(parser.opt, parser.textNode); + if (parser.textNode) { + emit(parser, "ontext", parser.textNode); + } + parser.textNode = ""; + } + + function textopts(opt, text) { + if (opt.trim) { + text = text.trim(); + } + if (opt.normalize) { + text = text.replace(/\s+/g, " "); + } + return text; + } + + function error(parser, er) { + closeText(parser); + if (parser.trackPosition) { + er += + "\nLine: " + + parser.line + + "\nColumn: " + + parser.column + + "\nChar: " + + parser.c; + } + er = new Error(er); + parser.error = er; + emit(parser, "onerror", er); + return parser; + } + + function end(parser) { + if (parser.sawRoot && !parser.closedRoot) { + strictFail(parser, "Unclosed root tag"); + } + if ( + parser.state !== S.BEGIN && + parser.state !== S.BEGIN_WHITESPACE && + parser.state !== S.TEXT + ) { + error(parser, "Unexpected end"); + } + closeText(parser); + parser.c = ""; + parser.closed = true; + emit(parser, "onend"); + SAXParser.call(parser, parser.strict, parser.opt); + return parser; + } + + function strictFail(parser, message) { + if (typeof parser !== "object" || !(parser instanceof SAXParser)) { + throw new Error("bad call to strictFail"); + } + if (parser.strict) { + error(parser, message); + } + } + + function newTag(parser) { + if (!parser.strict) { + parser.tagName = parser.tagName[parser.looseCase](); + } + var parent = parser.tags[parser.tags.length - 1] || parser; + var tag = (parser.tag = { name: parser.tagName, attributes: {} }); + + // will be overridden if tag contails an xmlns="foo" or xmlns:foo="bar" + if (parser.opt.xmlns) { + tag.ns = parent.ns; + } + parser.attribList.length = 0; + emitNode(parser, "onopentagstart", tag); + } + + function qname(name, attribute) { + var i = name.indexOf(":"); + var qualName = i < 0 ? ["", name] : name.split(":"); + var prefix = qualName[0]; + var local = qualName[1]; + + // <x "xmlns"="http://foo"> + if (attribute && name === "xmlns") { + prefix = "xmlns"; + local = ""; + } + + return { prefix, local }; + } + + function attrib(parser) { + if (!parser.strict) { + parser.attribName = parser.attribName[parser.looseCase](); + } + + if ( + parser.attribList.indexOf(parser.attribName) !== -1 || + parser.tag.attributes.hasOwnProperty(parser.attribName) + ) { + parser.attribName = parser.attribValue = ""; + return; + } + + if (parser.opt.xmlns) { + var qn = qname(parser.attribName, true); + var prefix = qn.prefix; + var local = qn.local; + + if (prefix === "xmlns") { + // namespace binding attribute. push the binding into scope + if (local === "xml" && parser.attribValue !== XML_NAMESPACE) { + strictFail( + parser, + "xml: prefix must be bound to " + + XML_NAMESPACE + + "\n" + + "Actual: " + + parser.attribValue + ); + } else if ( + local === "xmlns" && + parser.attribValue !== XMLNS_NAMESPACE + ) { + strictFail( + parser, + "xmlns: prefix must be bound to " + + XMLNS_NAMESPACE + + "\n" + + "Actual: " + + parser.attribValue + ); + } else { + var tag = parser.tag; + var parent = parser.tags[parser.tags.length - 1] || parser; + if (tag.ns === parent.ns) { + tag.ns = Object.create(parent.ns); + } + tag.ns[local] = parser.attribValue; + } + } + + // defer onattribute events until all attributes have been seen + // so any new bindings can take effect. preserve attribute order + // so deferred events can be emitted in document order + parser.attribList.push([parser.attribName, parser.attribValue]); + } else { + // in non-xmlns mode, we can emit the event right away + parser.tag.attributes[parser.attribName] = parser.attribValue; + emitNode(parser, "onattribute", { + name: parser.attribName, + value: parser.attribValue, + }); + } + + parser.attribName = parser.attribValue = ""; + } + + function openTag(parser, selfClosing) { + if (parser.opt.xmlns) { + // emit namespace binding events + var tag = parser.tag; + + // add namespace info to tag + var qn = qname(parser.tagName); + tag.prefix = qn.prefix; + tag.local = qn.local; + tag.uri = tag.ns[qn.prefix] || ""; + + if (tag.prefix && !tag.uri) { + strictFail( + parser, + "Unbound namespace prefix: " + JSON.stringify(parser.tagName) + ); + tag.uri = qn.prefix; + } + + var parent = parser.tags[parser.tags.length - 1] || parser; + if (tag.ns && parent.ns !== tag.ns) { + Object.keys(tag.ns).forEach(function(p) { + emitNode(parser, "onopennamespace", { + prefix: p, + uri: tag.ns[p], + }); + }); + } + + // handle deferred onattribute events + // Note: do not apply default ns to attributes: + // http://www.w3.org/TR/REC-xml-names/#defaulting + for (var i = 0, l = parser.attribList.length; i < l; i++) { + var nv = parser.attribList[i]; + var name = nv[0]; + var value = nv[1]; + var qualName = qname(name, true); + var prefix = qualName.prefix; + var local = qualName.local; + var uri = prefix === "" ? "" : tag.ns[prefix] || ""; + var a = { + name, + value, + prefix, + local, + uri, + }; + + // if there's any attributes with an undefined namespace, + // then fail on them now. + if (prefix && prefix !== "xmlns" && !uri) { + strictFail( + parser, + "Unbound namespace prefix: " + JSON.stringify(prefix) + ); + a.uri = prefix; + } + parser.tag.attributes[name] = a; + emitNode(parser, "onattribute", a); + } + parser.attribList.length = 0; + } + + parser.tag.isSelfClosing = !!selfClosing; + + // process the tag + parser.sawRoot = true; + parser.tags.push(parser.tag); + emitNode(parser, "onopentag", parser.tag); + if (!selfClosing) { + // special case for <script> in non-strict mode. + if (!parser.noscript && parser.tagName.toLowerCase() === "script") { + parser.state = S.SCRIPT; + } else { + parser.state = S.TEXT; + } + parser.tag = null; + parser.tagName = ""; + } + parser.attribName = parser.attribValue = ""; + parser.attribList.length = 0; + } + + function closeTag(parser) { + if (!parser.tagName) { + strictFail(parser, "Weird empty close tag."); + parser.textNode += "</>"; + parser.state = S.TEXT; + return; + } + + if (parser.script) { + if (parser.tagName !== "script") { + parser.script += "</" + parser.tagName + ">"; + parser.tagName = ""; + parser.state = S.SCRIPT; + return; + } + emitNode(parser, "onscript", parser.script); + parser.script = ""; + } + + // first make sure that the closing tag actually exists. + // <a><b></c></b></a> will close everything, otherwise. + var t = parser.tags.length; + var tagName = parser.tagName; + if (!parser.strict) { + tagName = tagName[parser.looseCase](); + } + var closeTo = tagName; + while (t--) { + var close = parser.tags[t]; + if (close.name !== closeTo) { + // fail the first time in strict mode + strictFail(parser, "Unexpected close tag"); + } else { + break; + } + } + + // didn't find it. we already failed for strict, so just abort. + if (t < 0) { + strictFail(parser, "Unmatched closing tag: " + parser.tagName); + parser.textNode += "</" + parser.tagName + ">"; + parser.state = S.TEXT; + return; + } + parser.tagName = tagName; + var s = parser.tags.length; + while (s-- > t) { + var tag = (parser.tag = parser.tags.pop()); + parser.tagName = parser.tag.name; + emitNode(parser, "onclosetag", parser.tagName); + + var x = {}; + for (var i in tag.ns) { + x[i] = tag.ns[i]; + } + + var parent = parser.tags[parser.tags.length - 1] || parser; + if (parser.opt.xmlns && tag.ns !== parent.ns) { + // remove namespace bindings introduced by tag + Object.keys(tag.ns).forEach(function(p) { + var n = tag.ns[p]; + emitNode(parser, "onclosenamespace", { prefix: p, uri: n }); + }); + } + } + if (t === 0) { + parser.closedRoot = true; + } + parser.tagName = parser.attribValue = parser.attribName = ""; + parser.attribList.length = 0; + parser.state = S.TEXT; + } + + function parseEntity(parser) { + var entity = parser.entity; + var entityLC = entity.toLowerCase(); + var num; + var numStr = ""; + + if (parser.ENTITIES[entity]) { + return parser.ENTITIES[entity]; + } + if (parser.ENTITIES[entityLC]) { + return parser.ENTITIES[entityLC]; + } + entity = entityLC; + if (entity.charAt(0) === "#") { + if (entity.charAt(1) === "x") { + entity = entity.slice(2); + num = parseInt(entity, 16); + numStr = num.toString(16); + } else { + entity = entity.slice(1); + num = parseInt(entity, 10); + numStr = num.toString(10); + } + } + entity = entity.replace(/^0+/, ""); + if (isNaN(num) || numStr.toLowerCase() !== entity) { + strictFail(parser, "Invalid character entity"); + return "&" + parser.entity + ";"; + } + + return String.fromCodePoint(num); + } + + function beginWhiteSpace(parser, c) { + if (c === "<") { + parser.state = S.OPEN_WAKA; + parser.startTagPosition = parser.position; + } else if (!isWhitespace(c)) { + // have to process this as a text node. + // weird, but happens. + strictFail(parser, "Non-whitespace before first tag."); + parser.textNode = c; + parser.state = S.TEXT; + } + } + + function charAt(chunk, i) { + var result = ""; + if (i < chunk.length) { + result = chunk.charAt(i); + } + return result; + } + + function write(chunk) { + var parser = this; + if (this.error) { + throw this.error; + } + if (parser.closed) { + return error( + parser, + "Cannot write after close. Assign an onready handler." + ); + } + if (chunk === null) { + return end(parser); + } + if (typeof chunk === "object") { + chunk = chunk.toString(); + } + var i = 0; + var c = ""; + while (true) { + c = charAt(chunk, i++); + parser.c = c; + + if (!c) { + break; + } + + if (parser.trackPosition) { + parser.position++; + if (c === "\n") { + parser.line++; + parser.column = 0; + } else { + parser.column++; + } + } + + switch (parser.state) { + case S.BEGIN: + parser.state = S.BEGIN_WHITESPACE; + if (c === "\uFEFF") { + continue; + } + beginWhiteSpace(parser, c); + continue; + + case S.BEGIN_WHITESPACE: + beginWhiteSpace(parser, c); + continue; + + case S.TEXT: + if (parser.sawRoot && !parser.closedRoot) { + var starti = i - 1; + while (c && c !== "<" && c !== "&") { + c = charAt(chunk, i++); + if (c && parser.trackPosition) { + parser.position++; + if (c === "\n") { + parser.line++; + parser.column = 0; + } else { + parser.column++; + } + } + } + parser.textNode += chunk.substring(starti, i - 1); + } + if ( + c === "<" && + !(parser.sawRoot && parser.closedRoot && !parser.strict) + ) { + parser.state = S.OPEN_WAKA; + parser.startTagPosition = parser.position; + } else { + if (!isWhitespace(c) && (!parser.sawRoot || parser.closedRoot)) { + strictFail(parser, "Text data outside of root node."); + } + if (c === "&") { + parser.state = S.TEXT_ENTITY; + } else { + parser.textNode += c; + } + } + continue; + + case S.SCRIPT: + // only non-strict + if (c === "<") { + parser.state = S.SCRIPT_ENDING; + } else { + parser.script += c; + } + continue; + + case S.SCRIPT_ENDING: + if (c === "/") { + parser.state = S.CLOSE_TAG; + } else { + parser.script += "<" + c; + parser.state = S.SCRIPT; + } + continue; + + case S.OPEN_WAKA: + // either a /, ?, !, or text is coming next. + if (c === "!") { + parser.state = S.SGML_DECL; + parser.sgmlDecl = ""; + } else if (isWhitespace(c)) { + // wait for it... + } else if (isMatch(nameStart, c)) { + parser.state = S.OPEN_TAG; + parser.tagName = c; + } else if (c === "/") { + parser.state = S.CLOSE_TAG; + parser.tagName = ""; + } else if (c === "?") { + parser.state = S.PROC_INST; + parser.procInstName = parser.procInstBody = ""; + } else { + strictFail(parser, "Unencoded <"); + // if there was some whitespace, then add that in. + if (parser.startTagPosition + 1 < parser.position) { + var pad = parser.position - parser.startTagPosition; + c = new Array(pad).join(" ") + c; + } + parser.textNode += "<" + c; + parser.state = S.TEXT; + } + continue; + + case S.SGML_DECL: + if ((parser.sgmlDecl + c).toUpperCase() === CDATA) { + emitNode(parser, "onopencdata"); + parser.state = S.CDATA; + parser.sgmlDecl = ""; + parser.cdata = ""; + } else if (parser.sgmlDecl + c === "--") { + parser.state = S.COMMENT; + parser.comment = ""; + parser.sgmlDecl = ""; + } else if ((parser.sgmlDecl + c).toUpperCase() === DOCTYPE) { + parser.state = S.DOCTYPE; + if (parser.doctype || parser.sawRoot) { + strictFail(parser, "Inappropriately located doctype declaration"); + } + parser.doctype = ""; + parser.sgmlDecl = ""; + } else if (c === ">") { + emitNode(parser, "onsgmldeclaration", parser.sgmlDecl); + parser.sgmlDecl = ""; + parser.state = S.TEXT; + } else if (isQuote(c)) { + parser.state = S.SGML_DECL_QUOTED; + parser.sgmlDecl += c; + } else { + parser.sgmlDecl += c; + } + continue; + + case S.SGML_DECL_QUOTED: + if (c === parser.q) { + parser.state = S.SGML_DECL; + parser.q = ""; + } + parser.sgmlDecl += c; + continue; + + case S.DOCTYPE: + if (c === ">") { + parser.state = S.TEXT; + emitNode(parser, "ondoctype", parser.doctype); + parser.doctype = true; // just remember that we saw it. + } else { + parser.doctype += c; + if (c === "[") { + parser.state = S.DOCTYPE_DTD; + } else if (isQuote(c)) { + parser.state = S.DOCTYPE_QUOTED; + parser.q = c; + } + } + continue; + + case S.DOCTYPE_QUOTED: + parser.doctype += c; + if (c === parser.q) { + parser.q = ""; + parser.state = S.DOCTYPE; + } + continue; + + case S.DOCTYPE_DTD: + parser.doctype += c; + if (c === "]") { + parser.state = S.DOCTYPE; + } else if (isQuote(c)) { + parser.state = S.DOCTYPE_DTD_QUOTED; + parser.q = c; + } + continue; + + case S.DOCTYPE_DTD_QUOTED: + parser.doctype += c; + if (c === parser.q) { + parser.state = S.DOCTYPE_DTD; + parser.q = ""; + } + continue; + + case S.COMMENT: + if (c === "-") { + parser.state = S.COMMENT_ENDING; + } else { + parser.comment += c; + } + continue; + + case S.COMMENT_ENDING: + if (c === "-") { + parser.state = S.COMMENT_ENDED; + parser.comment = textopts(parser.opt, parser.comment); + if (parser.comment) { + emitNode(parser, "oncomment", parser.comment); + } + parser.comment = ""; + } else { + parser.comment += "-" + c; + parser.state = S.COMMENT; + } + continue; + + case S.COMMENT_ENDED: + if (c !== ">") { + strictFail(parser, "Malformed comment"); + // allow <!-- blah -- bloo --> in non-strict mode, + // which is a comment of " blah -- bloo " + parser.comment += "--" + c; + parser.state = S.COMMENT; + } else { + parser.state = S.TEXT; + } + continue; + + case S.CDATA: + if (c === "]") { + parser.state = S.CDATA_ENDING; + } else { + parser.cdata += c; + } + continue; + + case S.CDATA_ENDING: + if (c === "]") { + parser.state = S.CDATA_ENDING_2; + } else { + parser.cdata += "]" + c; + parser.state = S.CDATA; + } + continue; + + case S.CDATA_ENDING_2: + if (c === ">") { + if (parser.cdata) { + emitNode(parser, "oncdata", parser.cdata); + } + emitNode(parser, "onclosecdata"); + parser.cdata = ""; + parser.state = S.TEXT; + } else if (c === "]") { + parser.cdata += "]"; + } else { + parser.cdata += "]]" + c; + parser.state = S.CDATA; + } + continue; + + case S.PROC_INST: + if (c === "?") { + parser.state = S.PROC_INST_ENDING; + } else if (isWhitespace(c)) { + parser.state = S.PROC_INST_BODY; + } else { + parser.procInstName += c; + } + continue; + + case S.PROC_INST_BODY: + if (!parser.procInstBody && isWhitespace(c)) { + continue; + } else if (c === "?") { + parser.state = S.PROC_INST_ENDING; + } else { + parser.procInstBody += c; + } + continue; + + case S.PROC_INST_ENDING: + if (c === ">") { + emitNode(parser, "onprocessinginstruction", { + name: parser.procInstName, + body: parser.procInstBody, + }); + parser.procInstName = parser.procInstBody = ""; + parser.state = S.TEXT; + } else { + parser.procInstBody += "?" + c; + parser.state = S.PROC_INST_BODY; + } + continue; + + case S.OPEN_TAG: + if (isMatch(nameBody, c)) { + parser.tagName += c; + } else { + newTag(parser); + if (c === ">") { + openTag(parser); + } else if (c === "/") { + parser.state = S.OPEN_TAG_SLASH; + } else { + if (!isWhitespace(c)) { + strictFail(parser, "Invalid character in tag name"); + } + parser.state = S.ATTRIB; + } + } + continue; + + case S.OPEN_TAG_SLASH: + if (c === ">") { + openTag(parser, true); + closeTag(parser); + } else { + strictFail( + parser, + "Forward-slash in opening tag not followed by >" + ); + parser.state = S.ATTRIB; + } + continue; + + case S.ATTRIB: + // haven't read the attribute name yet. + if (isWhitespace(c)) { + continue; + } else if (c === ">") { + openTag(parser); + } else if (c === "/") { + parser.state = S.OPEN_TAG_SLASH; + } else if (isMatch(nameStart, c)) { + parser.attribName = c; + parser.attribValue = ""; + parser.state = S.ATTRIB_NAME; + } else { + strictFail(parser, "Invalid attribute name"); + } + continue; + + case S.ATTRIB_NAME: + if (c === "=") { + parser.state = S.ATTRIB_VALUE; + } else if (c === ">") { + strictFail(parser, "Attribute without value"); + parser.attribValue = parser.attribName; + attrib(parser); + openTag(parser); + } else if (isWhitespace(c)) { + parser.state = S.ATTRIB_NAME_SAW_WHITE; + } else if (isMatch(nameBody, c)) { + parser.attribName += c; + } else { + strictFail(parser, "Invalid attribute name"); + } + continue; + + case S.ATTRIB_NAME_SAW_WHITE: + if (c === "=") { + parser.state = S.ATTRIB_VALUE; + } else if (isWhitespace(c)) { + continue; + } else { + strictFail(parser, "Attribute without value"); + parser.tag.attributes[parser.attribName] = ""; + parser.attribValue = ""; + emitNode(parser, "onattribute", { + name: parser.attribName, + value: "", + }); + parser.attribName = ""; + if (c === ">") { + openTag(parser); + } else if (isMatch(nameStart, c)) { + parser.attribName = c; + parser.state = S.ATTRIB_NAME; + } else { + strictFail(parser, "Invalid attribute name"); + parser.state = S.ATTRIB; + } + } + continue; + + case S.ATTRIB_VALUE: + if (isWhitespace(c)) { + continue; + } else if (isQuote(c)) { + parser.q = c; + parser.state = S.ATTRIB_VALUE_QUOTED; + } else { + strictFail(parser, "Unquoted attribute value"); + parser.state = S.ATTRIB_VALUE_UNQUOTED; + parser.attribValue = c; + } + continue; + + case S.ATTRIB_VALUE_QUOTED: + if (c !== parser.q) { + if (c === "&") { + parser.state = S.ATTRIB_VALUE_ENTITY_Q; + } else { + parser.attribValue += c; + } + continue; + } + attrib(parser); + parser.q = ""; + parser.state = S.ATTRIB_VALUE_CLOSED; + continue; + + case S.ATTRIB_VALUE_CLOSED: + if (isWhitespace(c)) { + parser.state = S.ATTRIB; + } else if (c === ">") { + openTag(parser); + } else if (c === "/") { + parser.state = S.OPEN_TAG_SLASH; + } else if (isMatch(nameStart, c)) { + strictFail(parser, "No whitespace between attributes"); + parser.attribName = c; + parser.attribValue = ""; + parser.state = S.ATTRIB_NAME; + } else { + strictFail(parser, "Invalid attribute name"); + } + continue; + + case S.ATTRIB_VALUE_UNQUOTED: + if (!isAttribEnd(c)) { + if (c === "&") { + parser.state = S.ATTRIB_VALUE_ENTITY_U; + } else { + parser.attribValue += c; + } + continue; + } + attrib(parser); + if (c === ">") { + openTag(parser); + } else { + parser.state = S.ATTRIB; + } + continue; + + case S.CLOSE_TAG: + if (!parser.tagName) { + if (isWhitespace(c)) { + continue; + } else if (notMatch(nameStart, c)) { + if (parser.script) { + parser.script += "</" + c; + parser.state = S.SCRIPT; + } else { + strictFail(parser, "Invalid tagname in closing tag."); + } + } else { + parser.tagName = c; + } + } else if (c === ">") { + closeTag(parser); + } else if (isMatch(nameBody, c)) { + parser.tagName += c; + } else if (parser.script) { + parser.script += "</" + parser.tagName; + parser.tagName = ""; + parser.state = S.SCRIPT; + } else { + if (!isWhitespace(c)) { + strictFail(parser, "Invalid tagname in closing tag"); + } + parser.state = S.CLOSE_TAG_SAW_WHITE; + } + continue; + + case S.CLOSE_TAG_SAW_WHITE: + if (isWhitespace(c)) { + continue; + } + if (c === ">") { + closeTag(parser); + } else { + strictFail(parser, "Invalid characters in closing tag"); + } + continue; + + case S.TEXT_ENTITY: + case S.ATTRIB_VALUE_ENTITY_Q: + case S.ATTRIB_VALUE_ENTITY_U: + var returnState; + var buffer; + switch (parser.state) { + case S.TEXT_ENTITY: + returnState = S.TEXT; + buffer = "textNode"; + break; + + case S.ATTRIB_VALUE_ENTITY_Q: + returnState = S.ATTRIB_VALUE_QUOTED; + buffer = "attribValue"; + break; + + case S.ATTRIB_VALUE_ENTITY_U: + returnState = S.ATTRIB_VALUE_UNQUOTED; + buffer = "attribValue"; + break; + } + + if (c === ";") { + parser[buffer] += parseEntity(parser); + parser.entity = ""; + parser.state = returnState; + } else if ( + isMatch(parser.entity.length ? entityBody : entityStart, c) + ) { + parser.entity += c; + } else { + strictFail(parser, "Invalid character in entity name"); + parser[buffer] += "&" + parser.entity + c; + parser.entity = ""; + parser.state = returnState; + } + + continue; + + default: + throw new Error(parser, "Unknown state: " + parser.state); + } + } // while + + if (parser.position >= parser.bufferCheckPosition) { + checkBufferLength(parser); + } + return parser; + } + + /*! http://mths.be/fromcodepoint v0.1.0 by @mathias */ + /* istanbul ignore next */ + if (!String.fromCodePoint) { + (function() { + var stringFromCharCode = String.fromCharCode; + var floor = Math.floor; + var fromCodePoint = function() { + var MAX_SIZE = 0x4000; + var codeUnits = []; + var highSurrogate; + var lowSurrogate; + var index = -1; + var length = arguments.length; + if (!length) { + return ""; + } + var result = ""; + while (++index < length) { + var codePoint = Number(arguments[index]); + if ( + !isFinite(codePoint) || // `NaN`, `+Infinity`, or `-Infinity` + codePoint < 0 || // not a valid Unicode code point + codePoint > 0x10ffff || // not a valid Unicode code point + floor(codePoint) !== codePoint // not an integer + ) { + throw RangeError("Invalid code point: " + codePoint); + } + if (codePoint <= 0xffff) { + // BMP code point + codeUnits.push(codePoint); + } else { + // Astral code point; split in surrogate halves + // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae + codePoint -= 0x10000; + highSurrogate = (codePoint >> 10) + 0xd800; + lowSurrogate = (codePoint % 0x400) + 0xdc00; + codeUnits.push(highSurrogate, lowSurrogate); + } + if (index + 1 === length || codeUnits.length > MAX_SIZE) { + result += stringFromCharCode.apply(null, codeUnits); + codeUnits.length = 0; + } + } + return result; + }; + /* istanbul ignore next */ + if (Object.defineProperty) { + Object.defineProperty(String, "fromCodePoint", { + value: fromCodePoint, + configurable: true, + writable: true, + }); + } else { + String.fromCodePoint = fromCodePoint; + } + })(); + } +})(typeof exports === "undefined" ? (this.sax = {}) : exports); |