diff options
Diffstat (limited to 'web/server/h2o/libh2o/misc/oktavia/src/sax.jsx')
-rw-r--r-- | web/server/h2o/libh2o/misc/oktavia/src/sax.jsx | 1356 |
1 files changed, 0 insertions, 1356 deletions
diff --git a/web/server/h2o/libh2o/misc/oktavia/src/sax.jsx b/web/server/h2o/libh2o/misc/oktavia/src/sax.jsx deleted file mode 100644 index d34cb8253..000000000 --- a/web/server/h2o/libh2o/misc/oktavia/src/sax.jsx +++ /dev/null @@ -1,1356 +0,0 @@ -// When we pass the MAX_BUFFER_LENGTH position, start checking for buffer overruns. -// When we check, schedule the next check for MAX_BUFFER_LENGTH - (max(buffer lengths)), -// since that's the earliest that a buffer overrun could occur. This way, checks are -// as rare as required, but as often as necessary to ensure never crossing this bound. -// Furthermore, buffers are only tested at most once per write(), so passing a very -// large string into write() might have undesirable effects, but this is manageable by -// the caller, so it is assumed to be safe. Thus, a call to write() may, in the extreme -// edge case, result in creating at most one complete copy of the string passed in. -// Set to Infinity to have unlimited buffers. - - -class Tag -{ - var name : string; - var attributes : Map.<string>; - var isSelfClosing : boolean; - function constructor (name : string) - { - this.name = name; - this.attributes = {} : Map.<string>; - this.isSelfClosing = false; - } -} - -class _Common -{ - static const buffers = [ - "comment", "sgmlDecl", "textNode", "tagName", "doctype", - "procInstName", "procInstBody", "entity", "attribName", - "attribValue", "cdata", "script" - ]; - - static const EVENTS = // for discoverability. - [ "text", - "processinginstruction", - "sgmldeclaration", - "doctype", - "comment", - "attribute", - "opentag", - "closetag", - "opencdata", - "cdata", - "clo_State.CDATA", - "error", - "end", - "ready", - "script", - "opennamespace", - "closenamespace" - ]; - - static const MAX_BUFFER_LENGTH = 64 * 1024; -} - -class _State -{ - static const BEGIN = 1; - static const TEXT = 2; // general stuff - static const TEXT_ENTITY = 3; // & and such. - static const OPEN_WAKA = 4; // < - static const SGML_DECL = 5; // <!BLARG - static const SGML_DECL_QUOTED = 6; // <!BLARG foo "bar - static const DOCTYPE = 7; // <!DOCTYPE - static const DOCTYPE_QUOTED = 8; // <!DOCTYPE "//blah - static const DOCTYPE_DTD = 9; // <!DOCTYPE "//blah" [ ... - static const DOCTYPE_DTD_QUOTED = 10; // <!DOCTYPE "//blah" [ "foo - static const COMMENT_STARTING = 11; // <!- - static const COMMENT = 12; // <!-- - static const COMMENT_ENDING = 13; // <!-- blah - - static const COMMENT_ENDED = 14; // <!-- blah -- - static const CDATA = 15; // <![CDATA[ something - static const CDATA_ENDING = 16; // ] - static const CDATA_ENDING_2 = 17; // ]] - static const PROC_INST = 18; // <?hi - static const PROC_INST_BODY = 19; // <?hi there - static const PROC_INST_ENDING = 20; // <?hi "there" ? - static const OPEN_TAG = 21; // <strong - static const OPEN_TAG_SLASH = 22; // <strong / - static const ATTRIB = 23; // <a - static const ATTRIB_NAME = 24; // <a foo - static const ATTRIB_NAME_SAW_WHITE = 25; // <a foo _ - static const ATTRIB_VALUE = 26; // <a foo= - static const ATTRIB_VALUE_QUOTED = 27; // <a foo="bar - static const ATTRIB_VALUE_UNQUOTED = 28; // <a foo=bar - static const ATTRIB_VALUE_ENTITY_Q = 29; // <foo bar=""" - static const ATTRIB_VALUE_ENTITY_U = 30; // <foo bar=" - static const CLOSE_TAG = 31; // </a - static const CLOSE_TAG_SAW_WHITE = 32; // </a > - static const SCRIPT = 33; // <script> ... - static const SCRIPT_ENDING = 34; // <script> ... < -} - - -class SAXHandler -{ - var position : int; - var column : int; - var line : int; - function constructor () - { - this.position = 0; - this.column = 0; - this.line = 0; - } - function onerror (error : Error) : void - { - } - function ontext (text : string) : void - { - } - function ondoctype (doctype : string) : void - { - } - function onprocessinginstruction (name : string, body : string) : void - { - } - function onsgmldeclaration (sgmlDecl : string) : void - { - } - function onopentag (tagname : string, attributes : Map.<string>) : void - { - } - function onclosetag (tagname : string) : void - { - } - function onattribute (name : string, value : string) : void - { - } - function oncomment (comment : string) : void - { - } - function onopencdata () : void - { - } - function oncdata (cdata : string) : void - { - } - function onclosecdata () : void - { - } - function onend () : void - { - } - function onready () : void - { - } - function onscript (script : string) : void - { - } -} - -class SAXParser -{ - var q : string; - var c : string; - var bufferCheckPosition : int; - var looseCase : string; - var tags = [] : Tag[]; - var closed : boolean; - var closedRoot : boolean; - var sawRoot : boolean; - var tag : Nullable.<Tag>; - var error : Nullable.<Error>; - var handler : SAXHandler; - var ENTITIES : Map.<string>; - var strict : boolean; - var tagName : string; - var state : int; - var line : int; - var column : int; - var position : int; - var startTagPosition : int; - var attribName : string; - var attribValue : string; - var script : string; - var textNode : string; - var attribList : string[][]; - var noscript : boolean; - var cdata : string; - var procInstBody : string; - var procInstName : string; - var doctype : string; - var entity : string; - var sgmlDecl : string; - var comment : string; - var preTags : int; - - function constructor(handler : SAXHandler) - { - this._init(handler, false); - } - - function constructor(handler : SAXHandler, strict : boolean) - { - this._init(handler, strict); - } - - function _init (handler : SAXHandler, strict : boolean) : void - { - this.handler = handler; - this.clearBuffers(); - this.q = ""; - this.bufferCheckPosition = _Common.MAX_BUFFER_LENGTH; - //this.opt = opt || {} - //this.opt.lowercase = this.opt.lowercase || this.opt.lowercasetags - this.looseCase = 'toLowerCase'; // this.opt.lowercase ? "toLowerCase" : "toUpperCase" - this.tags = [] : Tag[]; - this.closed = this.closedRoot = this.sawRoot = false; - this.tag = null; - this.error = null; - this.strict = strict; - this.noscript = strict; //!!(strict || this.opt.noscript); - this.state = _State.BEGIN; - this.ENTITIES = _Entities.entity_list(); - this.attribList = [] : string[][]; - this.noscript = false; - this.preTags = 0; - - this.handler.onready(); - } - - function set_noscript (flag : boolean) : void - { - this.noscript = flag; - } - - function resume () : SAXParser - { - this.error = null; - return this; - } - - function close () : SAXParser - { - return this.parse(''); - } - - function parse (chunk : string) : SAXParser - { - var _ = new Char(); - if (this.error) - { - throw this.error; - } - if (this.closed) - { - return this.emiterror("Cannot write after close. Assign an onready handler."); - } - var i = 0, c = ""; - while (this.c = c = chunk.charAt(i++)) - { - this.position++; - if (c == "\n") - { - this.handler.line++; - this.handler.column = 0; - } - else - { - this.handler.column++; - } - switch (this.state) - { - case _State.BEGIN: - //log "BEGIN"; - if (c == "<") - { - this.state = _State.OPEN_WAKA; - this.startTagPosition = this.position; - } - else if (_.not(_.whitespace, c)) - { - // have to process this as a text node. - // weird, but happens. - this.strictFail("Non-whitespace before first tag."); - this.textNode = c; - this.state = _State.TEXT; - } - continue; - - case _State.TEXT: - //log "TEXT"; - if (this.sawRoot && !this.closedRoot) - { - var starti = i - 1; - while (c && c != "<" && c != "&") - { - c = chunk.charAt(i++); - if (c) - { - this.position++; - if (c == "\n") - { - this.handler.line++; - this.handler.column = 0; - } - else - { - this.handler.column++; - } - } - } - this.textNode += chunk.substring(starti, i - 1); - } - if (c == "<") - { - this.state = _State.OPEN_WAKA; - this.startTagPosition = this.position; - } - else - { - if (_.not(_.whitespace, c) && (!this.sawRoot || this.closedRoot)) - this.strictFail("Text data outside of root node."); - if (c == "&") this.state = _State.TEXT_ENTITY; - else this.textNode += c; - } - continue; - - case _State.SCRIPT: - //log "SCRIPT"; - // only non-strict - if (c == "<") { - this.state = _State.SCRIPT_ENDING; - } else this.script += c; - continue; - - case _State.SCRIPT_ENDING: - //log "SCRIPT END"; - if (c == "/") { - this.state = _State.CLOSE_TAG; - } else { - this.script += "<" + c; - this.state = _State.SCRIPT; - } - continue; - - case _State.OPEN_WAKA: - //log "OPEN_WAKA"; - // either a /, ?, !, or text is coming next. - if (c == "!") { - this.state = _State.SGML_DECL; - this.sgmlDecl = ""; - } else if (_.is(_.whitespace, c)) { - // wait for it... - } else if (_.is(_.nameStart,c)) { - this.state = _State.OPEN_TAG; - this.tagName = c; - } else if (c == "/") { - this.state = _State.CLOSE_TAG; - this.tagName = ""; - } else if (c == "?") { - this.state = _State.PROC_INST; - this.procInstName = this.procInstBody = ""; - } else { - this.strictFail("Unencoded <"); - // if there was some whitespace, then add that in. - if (this.startTagPosition + 1 < this.position) { - var pad = this.position - this.startTagPosition; - for (var i = 0; i < pad; i++) - { - c = " " + c; - } - } - this.textNode += "<" + c; - this.state = _State.TEXT; - } - continue; - - case _State.SGML_DECL: - //log "SGML_DECL"; - if ((this.sgmlDecl+c).toUpperCase() == _.CDATA) { - this.closetext_if_exist(); - this.handler.onopencdata(); - this.state = _State.CDATA; - this.sgmlDecl = ""; - this.cdata = ""; - } else if (this.sgmlDecl+c == "--") { - this.state = _State.COMMENT; - this.comment = ""; - this.sgmlDecl = ""; - } else if ((this.sgmlDecl+c).toUpperCase() == _.DOCTYPE) { - this.state = _State.DOCTYPE; - if (this.doctype || this.sawRoot) - { - this.strictFail("Inappropriately located doctype declaration"); - } - this.doctype = ""; - this.sgmlDecl = ""; - } else if (c == ">") { - this.closetext_if_exist(); - this.handler.onsgmldeclaration(this.sgmlDecl); - this.sgmlDecl = ""; - this.state = _State.TEXT; - } else if (_.is(_.quote, c)) { - this.state = _State.SGML_DECL_QUOTED; - this.sgmlDecl += c; - } else this.sgmlDecl += c; - continue; - - case _State.SGML_DECL_QUOTED: - //log "SGML_DECL_QUOTED"; - if (c == this.q) { - this.state = _State.SGML_DECL; - this.q = ""; - } - this.sgmlDecl += c; - continue; - - case _State.DOCTYPE: - //log "DOCTYPE"; - if (c == ">") { - this.state = _State.TEXT; - this.closetext_if_exist(); - this.handler.ondoctype(this.doctype.trim()); - } else { - this.doctype += c; - if (c == "[") this.state = _State.DOCTYPE_DTD; - else if (_.is(_.quote, c)) { - this.state = _State.DOCTYPE_QUOTED; - this.q = c; - } - } - continue; - - case _State.DOCTYPE_QUOTED: - //log "DOCTYPE_QUOTED"; - this.doctype += c; - if (c == this.q) { - this.q = ""; - this.state = _State.DOCTYPE; - } - continue; - - case _State.DOCTYPE_DTD: - //log "DOCTYPE_DTD"; - this.doctype += c; - if (c == "]") this.state = _State.DOCTYPE; - else if (_.is(_.quote,c)) { - this.state = _State.DOCTYPE_DTD_QUOTED; - this.q = c; - } - continue; - - case _State.DOCTYPE_DTD_QUOTED: - //log "DOCTYPE_DTD_QUOTED"; - this.doctype += c; - if (c == this.q) { - this.state = _State.DOCTYPE_DTD; - this.q = ""; - } - continue; - - case _State.COMMENT: - //log "COMMENT"; - if (c == "-") this.state = _State.COMMENT_ENDING; - else this.comment += c; - continue; - - case _State.COMMENT_ENDING: - //log "COMMENT_ENDING"; - if (c == "-") { - this.state = _State.COMMENT_ENDED; - this.comment = this.textopts(this.comment); - if (this.comment) - { - this.closetext_if_exist(); - this.handler.oncomment(this.comment.trim()); - } - this.comment = ""; - } else { - this.comment += "-" + c; - this.state = _State.COMMENT; - } - continue; - - case _State.COMMENT_ENDED: - //log "COMMENT_ENDED"; - if (c != ">") { - this.strictFail("Malformed comment"); - // allow <!-- blah -- bloo --> in non-strict mode, - // which is a comment of " blah -- bloo " - this.comment += "--" + c; - this.state = _State.COMMENT; - } else this.state = _State.TEXT; - continue; - - case _State.CDATA: - //log "CDATA"; - if (c == "]") this.state = _State.CDATA_ENDING; - else this.cdata += c; - continue; - - case _State.CDATA_ENDING: - //log "CDATA_ENDING"; - if (c == "]") this.state = _State.CDATA_ENDING_2; - else { - this.cdata += "]" + c; - this.state = _State.CDATA; - } - continue; - - case _State.CDATA_ENDING_2: - //log "CDATA_ENDING 2"; - if (c == ">") { - if (this.cdata) - { - this.closetext_if_exist(); - } - this.handler.oncdata(this.cdata); - this.handler.onclosecdata(); - this.cdata = ""; - this.state = _State.TEXT; - } else if (c == "]") { - this.cdata += "]"; - } else { - this.cdata += "]]" + c; - this.state = _State.CDATA; - } - continue; - - case _State.PROC_INST: - if (c == "?") this.state = _State.PROC_INST_ENDING; - else if (_.is(_.whitespace, c)) this.state = _State.PROC_INST_BODY; - else this.procInstName += c; - continue; - - case _State.PROC_INST_BODY: - if (!this.procInstBody && _.is(_.whitespace, c)) continue; - else if (c == "?") this.state = _State.PROC_INST_ENDING; - else this.procInstBody += c; - continue; - - case _State.PROC_INST_ENDING: - if (c == ">") { - this.closetext_if_exist(); - this.handler.onprocessinginstruction(this.procInstName, this.procInstBody); - this.procInstName = this.procInstBody = ""; - this.state = _State.TEXT; - } else { - this.procInstBody += "?" + c; - this.state = _State.PROC_INST_BODY; - } - continue; - - case _State.OPEN_TAG: - //log "OPEN TAG"; - if (_.is(_.nameBody, c)) this.tagName += c; - else { - this.newTag(); - if (c == ">") this.openTag(); - else if (c == "/") this.state = _State.OPEN_TAG_SLASH; - else { - if (_.not(_.whitespace, c)) this.strictFail("Invalid character in tag name"); - this.state = _State.ATTRIB; - } - } - continue; - - case _State.OPEN_TAG_SLASH: - //log "OPEN TAG SLASH"; - if (c == ">") { - this.openTag(true); - this.closeTag(); - } else { - this.strictFail("Forward-slash in opening tag not followed by >"); - this.state = _State.ATTRIB; - } - continue; - - case _State.ATTRIB: - //log "ATTRIB"; - // haven't read the attribute name yet. - if (_.is(_.whitespace, c)) continue; - else if (c == ">") this.openTag(); - else if (c == "/") this.state = _State.OPEN_TAG_SLASH; - else if (_.is(_.nameStart, c)) { - this.attribName = c; - this.attribValue = ""; - this.state = _State.ATTRIB_NAME; - } else this.strictFail("Invalid attribute name"); - continue; - - case _State.ATTRIB_NAME: - //log "ATTRIB_NAME"; - if (c == "=") this.state = _State.ATTRIB_VALUE; - else if (c == ">") { - this.strictFail("Attribute without value"); - this.attribValue = this.attribName; - this.attrib(); - this.openTag(); - } - else if (_.is(_.whitespace, c)) this.state = _State.ATTRIB_NAME_SAW_WHITE; - else if (_.is(_.nameBody, c)) this.attribName += c; - else this.strictFail("Invalid attribute name"); - continue; - - case _State.ATTRIB_NAME_SAW_WHITE: - if (c == "=") this.state = _State.ATTRIB_VALUE; - else if (_.is(_.whitespace, c)) continue; - else { - this.strictFail( "Attribute without value"); - this.tag.attributes[this.attribName] = ""; - this.attribValue = ""; - this.closetext_if_exist(); - this.handler.onattribute(this.attribName, ""); - this.attribName = ""; - if (c == ">") this.openTag(); - else if (_.is(_.nameStart, c)) { - this.attribName = c; - this.state = _State.ATTRIB_NAME; - } else { - this.strictFail("Invalid attribute name"); - this.state = _State.ATTRIB; - } - } - continue; - - case _State.ATTRIB_VALUE: - if (_.is(_.whitespace, c)) continue; - else if (_.is(_.quote, c)) { - this.q = c; - this.state = _State.ATTRIB_VALUE_QUOTED; - } else { - this.strictFail("Unquoted attribute value"); - this.state = _State.ATTRIB_VALUE_UNQUOTED; - this.attribValue = c; - } - continue; - - case _State.ATTRIB_VALUE_QUOTED: - if (c != this.q) { - if (c == "&") this.state = _State.ATTRIB_VALUE_ENTITY_Q; - else this.attribValue += c; - continue; - } - this.attrib(); - this.q = ""; - this.state = _State.ATTRIB; - continue; - - case _State.ATTRIB_VALUE_UNQUOTED: - if (_.not(_.attribEnd,c)) { - if (c == "&") this.state = _State.ATTRIB_VALUE_ENTITY_U; - else this.attribValue += c; - continue; - } - this.attrib(); - if (c == ">") this.openTag(); - else this.state = _State.ATTRIB; - continue; - - case _State.CLOSE_TAG: - //log "CLOSE_TAG", c; - if (!this.tagName) - { - if (_.is(_.whitespace, c)) - { - continue; - } - else if (_.not(_.nameStart, c)) - { - if (this.script) - { - this.script += "</" + c; - this.state = _State.SCRIPT; - } - else - { - this.strictFail("Invalid tagname in closing tag."); - } - } - else - { - this.tagName = c; - } - } - else if (c == ">") - { - this.closeTag(); - } - else if (_.is(_.nameBody, c)) - { - this.tagName += c; - } - else if (this.script) - { - this.script += "</" + this.tagName; - this.tagName = ""; - this.state = _State.SCRIPT; - } - else - { - if (_.not(_.whitespace, c)) - { - this.strictFail("Invalid tagname in closing tag"); - } - this.state = _State.CLOSE_TAG_SAW_WHITE; - } - continue; - - case _State.CLOSE_TAG_SAW_WHITE: - if (_.is(_.whitespace, c)) continue; - if (c == ">") this.closeTag(); - else this.strictFail("Invalid characters in closing tag"); - continue; - - case _State.TEXT_ENTITY: - //log "TEXT_ENTITY"; - if (c == ";") { - this.textNode += this.parseEntity(); - this.entity = ""; - this.state = _State.TEXT; - } - else if (_.is(_.entity, c)) this.entity += c; - else { - this.strictFail("Invalid character entity"); - this.textNode += "&" + this.entity + c; - this.entity = ""; - this.state = _State.TEXT; - } - continue; - - case _State.ATTRIB_VALUE_ENTITY_Q: - case _State.ATTRIB_VALUE_ENTITY_U: - var returnState; - if (this.state == _State.ATTRIB_VALUE_ENTITY_Q) - { - returnState = _State.ATTRIB_VALUE_QUOTED; - } - else - { - returnState = _State.ATTRIB_VALUE_UNQUOTED; - } - if (c == ";") { - this.attribValue += this.parseEntity(); - this.entity = ""; - this.state = returnState; - } - else if (_.is(_.entity, c)) this.entity += c; - else { - this.strictFail("Invalid character entity"); - this.attribValue += "&" + this.entity + c; - this.entity = ""; - this.state = returnState; - } - continue; - - default: - throw new Error("Unknown state: " + (this.state as string)); - } - } - this.end(); - return this; - } - - function clearBuffers () : void - { - this.comment = ''; - this.sgmlDecl = ''; - this.textNode = ''; - this.tagName = ''; - this.doctype = ''; - this.procInstName = ''; - this.procInstBody = ''; - this.entity = ''; - this.attribName = ''; - this.attribValue = ''; - this.cdata = ''; - this.script = ''; - } - - function closetext_if_exist() : void - { - if (this.textNode != '') - { - this.closetext(); - } - } - - function closetext () : void - { - if (this.preTags == 0) - { - var text = this.textopts(this.textNode); - if (text) - { - this.handler.ontext(text); - } - } - else if (this.textNode) - { - this.handler.ontext(this.textNode); - } - this.textNode = ""; - } - - function textopts (text : string) : string - { - text = text.replace(/[\n\t]/g, ' '); - text = text.replace(/\s\s+/g, " "); - return text; - } - - function emiterror (er : string) : SAXParser - { - this.closetext(); - er += "\nLine: " + (this.line as string) + - "\nColumn: " + (this.column as string) + - "\nChar: " + this.c; - var error = new Error(er); - this.error = error; - this.handler.onerror(error); - return this; - } - - function end () : void - { - if (!this.closedRoot) - { - this.strictFail("Unclosed root tag"); - } - if (this.state != _State.TEXT) - { - this.emiterror("Unexpected end"); - } - this.closetext(); - this.c = ""; - this.closed = true; - this.handler.onend(); - } - - function strictFail (message : string) : void - { - if (this.strict) - { - this.emiterror(message); - } - } - - function newTag () : void - { - if (!this.strict) this.tagName = this.tagName.toLowerCase(); - var parent = this.tags[this.tags.length - 1] || this; - var tag = this.tag = new Tag(this.tagName); - this.attribList.length = 0; - } - - function attrib () : void - { - if (!this.strict) this.attribName = this.attribName.toLowerCase(); - - if (this.tag.attributes.hasOwnProperty(this.attribName)) { - this.attribName = this.attribValue = ""; - return; - } - - this.tag.attributes[this.attribName] = this.attribValue; - this.closetext_if_exist(); - this.handler.onattribute(this.attribName, this.attribValue); - this.attribName = this.attribValue = ""; - } - - function openTag () : void - { - this.openTag(false); - } - - function openTag (selfClosing : boolean) : void - { - this.tag.isSelfClosing = selfClosing; - - // process the tag - this.sawRoot = true; - this.tags.push(this.tag); - this.closetext_if_exist(); - this.handler.onopentag(this.tag.name, this.tag.attributes); - if (this.tag.name == 'pre') - { - this.preTags++; - } - if (!selfClosing) - { - // special case for <script> in non-strict mode. - if (!this.noscript && this.tagName.toLowerCase() == "script") - { - this.state = _State.SCRIPT; - } - else - { - this.state = _State.TEXT; - } - this.tag = null; - this.tagName = ""; - } - this.attribName = this.attribValue = ""; - this.attribList.length = 0; - } - - function closeTag () : void - { - if (!this.tagName) - { - this.strictFail("Weird empty close tag."); - this.textNode += "</>"; - this.state = _State.TEXT; - return; - } - - if (this.script) - { - if (this.tagName != "script") - { - this.script += "</" + this.tagName + ">"; - this.tagName = ""; - this.state = _State.SCRIPT; - return; - } - this.closetext_if_exist(); - this.handler.onscript(this.script); - this.script = ""; - } - - // first make sure that the closing tag actually exists. - // <a><b></c></b></a> will close everything, otherwise. - var t = this.tags.length; - var tagName = this.tagName; - if (!this.strict) tagName = tagName.toLowerCase(); - var closeTo = tagName; - while (t --) { - var close = this.tags[t]; - if (close.name != closeTo) { - // fail the first time in strict mode - this.strictFail("Unexpected close tag"); - } else break; - } - - // didn't find it. we already failed for strict, so just abort. - if (t < 0) - { - this.strictFail("Unmatched closing tag: "+this.tagName); - this.textNode += "</" + this.tagName + ">"; - this.state = _State.TEXT; - return; - } - this.tagName = tagName; - var s = this.tags.length; - while (s --> t) - { - var tag = this.tag = this.tags.pop(); - this.tagName = this.tag.name; - this.closetext_if_exist(); - this.handler.onclosetag(this.tagName); - var parent = this.tags[this.tags.length - 1]; - if (this.tagName == 'pre') - { - this.preTags--; - } - } - if (t == 0) - { - this.closedRoot = true; - } - this.tagName = this.attribValue = this.attribName = ""; - this.attribList.length = 0; - this.state = _State.TEXT; - } - - function parseEntity () : string - { - var entity = this.entity; - var entityLC = entity.toLowerCase(); - var num = 0; - var numStr = ""; - if (this.ENTITIES[entity]) - { - return this.ENTITIES[entity]; - } - if (this.ENTITIES[entityLC]) - { - return this.ENTITIES[entityLC]; - } - entity = entityLC; - if (entity.charAt(0) == "#") - { - if (entity.charAt(1) == "x") - { - entity = entity.slice(2); - num = Number.parseInt(entity, 16); - numStr = num.toString(16); - } - else - { - entity = entity.slice(1); - num = Number.parseInt(entity, 10); - numStr = num.toString(10); - } - } - entity = entity.replace(/^0+/, ""); - if (numStr.toLowerCase() != entity) { - this.strictFail("Invalid character entity"); - return "&"+this.entity + ";"; - } - return String.fromCharCode(num); - } -} - -class Char -{ - var whitespace : Map.<boolean>; - var number : Map.<boolean>; - var letter : Map.<boolean>; - var quote : Map.<boolean>; - var entity : Map.<boolean>; - var attribEnd : Map.<boolean>; - var nameStart : RegExp; - var nameBody : RegExp; - var CDATA : string; - var DOCTYPE : string; - var XML_NAMESPACE : string; - - function constructor() - { - // character classes and tokens - var whitespace = "\r\n\t "; - // this really needs to be replaced with character classes. - // XML allows all manner of ridiculous numbers and digits. - var number = "0124356789"; - var letter = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; - // (Letter | "_" | ":") - var quote = "'\""; - var entity = number+letter+"#"; - var attribEnd = whitespace + ">"; - this.CDATA = "[CDATA["; - this.DOCTYPE = "DOCTYPE"; - this.XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace"; - - // turn all the string character sets into character class objects. - this.whitespace = this._charClass(whitespace); - this.number = this._charClass(number); - this.letter = this._charClass(letter); - this.quote = this._charClass(quote); - this.entity = this._charClass(entity); - this.attribEnd = this._charClass(attribEnd); - this.nameStart = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/; - - this.nameBody = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040\.\d-]/; - } - - function _charClass (str : string) : Map.<boolean> - { - var result = {} : Map.<boolean>; - for (var i = 0; i < str.length; i++) - { - result[str.slice(i, i + 1)] = true; - } - return result; - } - - function is (charclass : RegExp, c : string) : boolean - { - return charclass.test(c); - } - - function is (charclass : Map.<boolean>, c : string) : boolean - { - return charclass.hasOwnProperty(c); - } - - function not (charclass : RegExp, c : string) : boolean { - return !this.is(charclass, c); - } - - function not (charclass : Map.<boolean>, c : string) : boolean { - return !this.is(charclass, c); - } -} - - -class _Entities -{ - static const _entities = { - "amp" : "&", - "gt" : ">", - "lt" : "<", - "quot" : "\"", - "apos" : "'", - "AElig" : 198, - "Aacute" : 193, - "Acirc" : 194, - "Agrave" : 192, - "Aring" : 197, - "Atilde" : 195, - "Auml" : 196, - "Ccedil" : 199, - "ETH" : 208, - "Eacute" : 201, - "Ecirc" : 202, - "Egrave" : 200, - "Euml" : 203, - "Iacute" : 205, - "Icirc" : 206, - "Igrave" : 204, - "Iuml" : 207, - "Ntilde" : 209, - "Oacute" : 211, - "Ocirc" : 212, - "Ograve" : 210, - "Oslash" : 216, - "Otilde" : 213, - "Ouml" : 214, - "THORN" : 222, - "Uacute" : 218, - "Ucirc" : 219, - "Ugrave" : 217, - "Uuml" : 220, - "Yacute" : 221, - "aacute" : 225, - "acirc" : 226, - "aelig" : 230, - "agrave" : 224, - "aring" : 229, - "atilde" : 227, - "auml" : 228, - "ccedil" : 231, - "eacute" : 233, - "ecirc" : 234, - "egrave" : 232, - "eth" : 240, - "euml" : 235, - "iacute" : 237, - "icirc" : 238, - "igrave" : 236, - "iuml" : 239, - "ntilde" : 241, - "oacute" : 243, - "ocirc" : 244, - "ograve" : 242, - "oslash" : 248, - "otilde" : 245, - "ouml" : 246, - "szlig" : 223, - "thorn" : 254, - "uacute" : 250, - "ucirc" : 251, - "ugrave" : 249, - "uuml" : 252, - "yacute" : 253, - "yuml" : 255, - "copy" : 169, - "reg" : 174, - "nbsp" : 160, - "iexcl" : 161, - "cent" : 162, - "pound" : 163, - "curren" : 164, - "yen" : 165, - "brvbar" : 166, - "sect" : 167, - "uml" : 168, - "ordf" : 170, - "laquo" : 171, - "not" : 172, - "shy" : 173, - "macr" : 175, - "deg" : 176, - "plusmn" : 177, - "sup1" : 185, - "sup2" : 178, - "sup3" : 179, - "acute" : 180, - "micro" : 181, - "para" : 182, - "middot" : 183, - "cedil" : 184, - "ordm" : 186, - "raquo" : 187, - "frac14" : 188, - "frac12" : 189, - "frac34" : 190, - "iquest" : 191, - "times" : 215, - "divide" : 247, - "OElig" : 338, - "oelig" : 339, - "Scaron" : 352, - "scaron" : 353, - "Yuml" : 376, - "fnof" : 402, - "circ" : 710, - "tilde" : 732, - "Alpha" : 913, - "Beta" : 914, - "Gamma" : 915, - "Delta" : 916, - "Epsilon" : 917, - "Zeta" : 918, - "Eta" : 919, - "Theta" : 920, - "Iota" : 921, - "Kappa" : 922, - "Lambda" : 923, - "Mu" : 924, - "Nu" : 925, - "Xi" : 926, - "Omicron" : 927, - "Pi" : 928, - "Rho" : 929, - "Sigma" : 931, - "Tau" : 932, - "Upsilon" : 933, - "Phi" : 934, - "Chi" : 935, - "Psi" : 936, - "Omega" : 937, - "alpha" : 945, - "beta" : 946, - "gamma" : 947, - "delta" : 948, - "epsilon" : 949, - "zeta" : 950, - "eta" : 951, - "theta" : 952, - "iota" : 953, - "kappa" : 954, - "lambda" : 955, - "mu" : 956, - "nu" : 957, - "xi" : 958, - "omicron" : 959, - "pi" : 960, - "rho" : 961, - "sigmaf" : 962, - "sigma" : 963, - "tau" : 964, - "upsilon" : 965, - "phi" : 966, - "chi" : 967, - "psi" : 968, - "omega" : 969, - "thetasym" : 977, - "upsih" : 978, - "piv" : 982, - "ensp" : 8194, - "emsp" : 8195, - "thinsp" : 8201, - "zwnj" : 8204, - "zwj" : 8205, - "lrm" : 8206, - "rlm" : 8207, - "ndash" : 8211, - "mdash" : 8212, - "lsquo" : 8216, - "rsquo" : 8217, - "sbquo" : 8218, - "ldquo" : 8220, - "rdquo" : 8221, - "bdquo" : 8222, - "dagger" : 8224, - "Dagger" : 8225, - "bull" : 8226, - "hellip" : 8230, - "permil" : 8240, - "prime" : 8242, - "Prime" : 8243, - "lsaquo" : 8249, - "rsaquo" : 8250, - "oline" : 8254, - "frasl" : 8260, - "euro" : 8364, - "image" : 8465, - "weierp" : 8472, - "real" : 8476, - "trade" : 8482, - "alefsym" : 8501, - "larr" : 8592, - "uarr" : 8593, - "rarr" : 8594, - "darr" : 8595, - "harr" : 8596, - "crarr" : 8629, - "lArr" : 8656, - "uArr" : 8657, - "rArr" : 8658, - "dArr" : 8659, - "hArr" : 8660, - "forall" : 8704, - "part" : 8706, - "exist" : 8707, - "empty" : 8709, - "nabla" : 8711, - "isin" : 8712, - "notin" : 8713, - "ni" : 8715, - "prod" : 8719, - "sum" : 8721, - "minus" : 8722, - "lowast" : 8727, - "radic" : 8730, - "prop" : 8733, - "infin" : 8734, - "ang" : 8736, - "and" : 8743, - "or" : 8744, - "cap" : 8745, - "cup" : 8746, - "int" : 8747, - "there4" : 8756, - "sim" : 8764, - "cong" : 8773, - "asymp" : 8776, - "ne" : 8800, - "equiv" : 8801, - "le" : 8804, - "ge" : 8805, - "sub" : 8834, - "sup" : 8835, - "nsub" : 8836, - "sube" : 8838, - "supe" : 8839, - "oplus" : 8853, - "otimes" : 8855, - "perp" : 8869, - "sdot" : 8901, - "lceil" : 8968, - "rceil" : 8969, - "lfloor" : 8970, - "rfloor" : 8971, - "lang" : 9001, - "rang" : 9002, - "loz" : 9674, - "spades" : 9824, - "clubs" : 9827, - "hearts" : 9829, - "diams" : 9830 - } : Map.<variant>; - - static function entity_list () : Map.<string> - { - var result = {} : Map.<string>; - for (var key in _Entities._entities) - { - var value : variant = _Entities._entities[key]; - if (typeof(value) == 'string') - { - result[key] = value as string; - } - else if (typeof(value) == 'number') - { - result[key] = String.fromCharCode(value as int); - } - } - return result; - } -} - |