diff options
Diffstat (limited to 'comm/mailnews/db/mork/morkParser.cpp')
-rw-r--r-- | comm/mailnews/db/mork/morkParser.cpp | 1331 |
1 files changed, 1331 insertions, 0 deletions
diff --git a/comm/mailnews/db/mork/morkParser.cpp b/comm/mailnews/db/mork/morkParser.cpp new file mode 100644 index 0000000000..8ca635014f --- /dev/null +++ b/comm/mailnews/db/mork/morkParser.cpp @@ -0,0 +1,1331 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef _MDB_ +# include "mdb.h" +#endif + +#ifndef _MORK_ +# include "mork.h" +#endif + +#ifndef _MORKNODE_ +# include "morkNode.h" +#endif + +#ifndef _MORKMAP_ +# include "morkMap.h" +#endif + +#ifndef _MORKENV_ +# include "morkEnv.h" +#endif + +#ifndef _MORKPARSER_ +# include "morkParser.h" +#endif + +#ifndef _MORKSTREAM_ +# include "morkStream.h" +#endif + +#ifndef _MORKBLOB_ +# include "morkBlob.h" +#endif + +#ifndef _MORKSINK_ +# include "morkSink.h" +#endif + +#ifndef _MORKCH_ +# include "morkCh.h" +#endif + +#ifndef _MORKSTORE_ +# include "morkStore.h" +#endif + +// 456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789 + +// ````` ````` ````` ````` ````` +// { ===== begin morkNode interface ===== + +/*public virtual*/ void morkParser::CloseMorkNode( + morkEnv* ev) // CloseParser() only if open +{ + if (this->IsOpenNode()) { + this->MarkClosing(); + this->CloseParser(ev); + this->MarkShut(); + } +} + +/*public virtual*/ +morkParser::~morkParser() // assert CloseParser() executed earlier +{ + MORK_ASSERT(mParser_Heap == 0); + MORK_ASSERT(mParser_Stream == 0); +} + +/*public non-poly*/ +morkParser::morkParser(morkEnv* ev, const morkUsage& inUsage, + nsIMdbHeap* ioHeap, morkStream* ioStream, + mdb_count inBytesPerParseSegment, nsIMdbHeap* ioSlotHeap) + : morkNode(ev, inUsage, ioHeap), + mParser_Heap(0), + mParser_Stream(0), + mParser_MoreGranularity(inBytesPerParseSegment), + mParser_State(morkParser_kStartState) + + , + mParser_GroupContentStartPos(0) + + , + mParser_TableMid(), + mParser_RowMid(), + mParser_CellMid() + + , + mParser_InPort(morkBool_kFalse), + mParser_InDict(morkBool_kFalse), + mParser_InCell(morkBool_kFalse), + mParser_InMeta(morkBool_kFalse) + + , + mParser_InPortRow(morkBool_kFalse), + mParser_InRow(morkBool_kFalse), + mParser_InTable(morkBool_kFalse), + mParser_InGroup(morkBool_kFalse) + + , + mParser_AtomChange(morkChange_kNil), + mParser_CellChange(morkChange_kNil), + mParser_RowChange(morkChange_kNil), + mParser_TableChange(morkChange_kNil) + + , + mParser_Change(morkChange_kNil), + mParser_IsBroken(morkBool_kFalse), + mParser_IsDone(morkBool_kFalse), + mParser_DoMore(morkBool_kTrue) + + , + mParser_Mid() + + , + mParser_ScopeCoil(ev, ioSlotHeap), + mParser_ValueCoil(ev, ioSlotHeap), + mParser_ColumnCoil(ev, ioSlotHeap), + mParser_StringCoil(ev, ioSlotHeap) + + , + mParser_ScopeSpool(ev, &mParser_ScopeCoil), + mParser_ValueSpool(ev, &mParser_ValueCoil), + mParser_ColumnSpool(ev, &mParser_ColumnCoil), + mParser_StringSpool(ev, &mParser_StringCoil) + + , + mParser_MidYarn(ev, morkUsage(morkUsage_kMember), ioSlotHeap) { + if (inBytesPerParseSegment < morkParser_kMinGranularity) + inBytesPerParseSegment = morkParser_kMinGranularity; + else if (inBytesPerParseSegment > morkParser_kMaxGranularity) + inBytesPerParseSegment = morkParser_kMaxGranularity; + + mParser_MoreGranularity = inBytesPerParseSegment; + + if (ioSlotHeap && ioStream) { + nsIMdbHeap_SlotStrongHeap(ioSlotHeap, ev, &mParser_Heap); + morkStream::SlotStrongStream(ioStream, ev, &mParser_Stream); + + if (ev->Good()) { + mParser_Tag = morkParser_kTag; + mNode_Derived = morkDerived_kParser; + } + } else + ev->NilPointerError(); +} + +/*public non-poly*/ void morkParser::CloseParser( + morkEnv* ev) // called by CloseMorkNode(); +{ + if (this->IsNode()) { + if (!this->IsShutNode()) { + mParser_ScopeCoil.CloseCoil(ev); + mParser_ValueCoil.CloseCoil(ev); + mParser_ColumnCoil.CloseCoil(ev); + mParser_StringCoil.CloseCoil(ev); + nsIMdbHeap_SlotStrongHeap((nsIMdbHeap*)0, ev, &mParser_Heap); + morkStream::SlotStrongStream((morkStream*)0, ev, &mParser_Stream); + this->MarkShut(); + } + } else + this->NonNodeError(ev); +} + +// } ===== end morkNode methods ===== +// ````` ````` ````` ````` ````` + +/*protected non-poly*/ void morkParser::NonGoodParserError( + morkEnv* ev) // when GoodParserTag() is false +{ + ev->NewError("non-morkNode"); +} + +/*protected non-poly*/ void morkParser::NonUsableParserError(morkEnv* ev) // +{ + if (this->IsNode()) { + if (this->IsOpenNode()) { + if (this->GoodParserTag()) { + // okay + } else + this->NonGoodParserError(ev); + } else + this->NonOpenNodeError(ev); + } else + this->NonNodeError(ev); +} + +/*protected non-poly*/ void morkParser::StartParse(morkEnv* ev) { + MORK_USED_1(ev); + mParser_InCell = morkBool_kFalse; + mParser_InMeta = morkBool_kFalse; + mParser_InDict = morkBool_kFalse; + mParser_InPortRow = morkBool_kFalse; + + mParser_RowMid.ClearMid(); + mParser_TableMid.ClearMid(); + mParser_CellMid.ClearMid(); + + mParser_GroupId = 0; + mParser_InPort = morkBool_kTrue; + + mParser_GroupSpan.ClearSpan(); + mParser_DictSpan.ClearSpan(); + mParser_AliasSpan.ClearSpan(); + mParser_MetaSpan.ClearSpan(); + mParser_TableSpan.ClearSpan(); + mParser_RowSpan.ClearSpan(); + mParser_CellSpan.ClearSpan(); + mParser_ColumnSpan.ClearSpan(); + mParser_SlotSpan.ClearSpan(); + + mParser_PortSpan.ClearSpan(); +} + +/*protected non-poly*/ void morkParser::StopParse(morkEnv* ev) { + if (mParser_InCell) { + mParser_InCell = morkBool_kFalse; + mParser_CellSpan.SetEndWithEnd(mParser_PortSpan); + this->OnCellEnd(ev, mParser_CellSpan); + } + if (mParser_InMeta) { + mParser_InMeta = morkBool_kFalse; + mParser_MetaSpan.SetEndWithEnd(mParser_PortSpan); + this->OnMetaEnd(ev, mParser_MetaSpan); + } + if (mParser_InDict) { + mParser_InDict = morkBool_kFalse; + mParser_DictSpan.SetEndWithEnd(mParser_PortSpan); + this->OnDictEnd(ev, mParser_DictSpan); + } + if (mParser_InPortRow) { + mParser_InPortRow = morkBool_kFalse; + mParser_RowSpan.SetEndWithEnd(mParser_PortSpan); + this->OnPortRowEnd(ev, mParser_RowSpan); + } + if (mParser_InRow) { + mParser_InRow = morkBool_kFalse; + mParser_RowMid.ClearMid(); + mParser_RowSpan.SetEndWithEnd(mParser_PortSpan); + this->OnRowEnd(ev, mParser_RowSpan); + } + if (mParser_InTable) { + mParser_InTable = morkBool_kFalse; + mParser_TableMid.ClearMid(); + mParser_TableSpan.SetEndWithEnd(mParser_PortSpan); + this->OnTableEnd(ev, mParser_TableSpan); + } + if (mParser_GroupId) { + mParser_GroupId = 0; + mParser_GroupSpan.SetEndWithEnd(mParser_PortSpan); + this->OnGroupAbortEnd(ev, mParser_GroupSpan); + } + if (mParser_InPort) { + mParser_InPort = morkBool_kFalse; + this->OnPortEnd(ev, mParser_PortSpan); + } +} + +int morkParser::eat_comment(morkEnv* ev) // last char was '/' +{ + morkStream* s = mParser_Stream; + // Note morkStream::Getc() returns EOF when an error occurs, so + // we don't need to check for both c != EOF and ev->Good() below. + + int c = s->Getc(ev); + if (c == '/') // C++ style comment? + { + while ((c = s->Getc(ev)) != EOF && c != 0xA && c != 0xD) + ; /* empty */ + + if (c == 0xA || c == 0xD) c = this->eat_line_break(ev, c); + } else if (c == '*') /* C style comment? */ + { + int depth = 1; // count depth of comments until depth reaches zero + + while (depth > 0 && c != EOF) // still looking for comment end(s)? + { + while ((c = s->Getc(ev)) != EOF && c != '/' && c != '*') { + if (c == 0xA || c == 0xD) // need to count a line break? + { + c = this->eat_line_break(ev, c); + if (c == '/' || c == '*') break; // end while loop + } + } + + if (c == '*') // maybe end of a comment, if next char is '/'? + { + if ((c = s->Getc(ev)) == '/') // end of comment? + { + --depth; // depth of comments has decreased by one + if (!depth) // comments all done? + c = s->Getc(ev); // return the byte after end of comment + } else if (c != EOF) // need to put the char back? + s->Ungetc(c); // especially need to put back '*', 0xA, or 0xD + } else if (c == '/') // maybe nested comemnt, if next char is '*'? + { + if ((c = s->Getc(ev)) == '*') // nested comment? + ++depth; // depth of comments has increased by one + else if (c != EOF) // need to put the char back? + s->Ungetc(c); // especially need to put back '/', 0xA, or 0xD + } + + if (ev->Bad()) c = EOF; + } + if (c == EOF && depth > 0) ev->NewWarning("EOF before end of comment"); + } else + ev->NewWarning("expected / or *"); + + return c; +} + +int morkParser::eat_line_break(morkEnv* ev, int inLast) { + morkStream* s = mParser_Stream; + int c = s->Getc(ev); // get next char after 0xA or 0xD + this->CountLineBreak(); + if (c == 0xA || c == 0xD) // another line break character? + { + if (c != inLast) // not the same as the last one? + c = s->Getc(ev); // get next char after two-byte linebreak + } + return c; +} + +int morkParser::eat_line_continue(morkEnv* ev) // last char was '\' +{ + morkStream* s = mParser_Stream; + int c = s->Getc(ev); + if (c == 0xA || c == 0xD) // linebreak follows \ as expected? + { + c = this->eat_line_break(ev, c); + } else + ev->NewWarning("expected linebreak"); + + return c; +} + +int morkParser::NextChar(morkEnv* ev) // next non-white content +{ + morkStream* s = mParser_Stream; + int c = s->Getc(ev); + while (c > 0 && ev->Good()) { + if (c == '/') + c = this->eat_comment(ev); + else if (c == 0xA || c == 0xD) + c = this->eat_line_break(ev, c); + else if (c == '\\') + c = this->eat_line_continue(ev); + else if (morkCh_IsWhite(c)) + c = s->Getc(ev); + else + break; // end while loop when return c is acceptable + } + if (ev->Bad()) { + mParser_State = morkParser_kBrokenState; + mParser_DoMore = morkBool_kFalse; + mParser_IsDone = morkBool_kTrue; + mParser_IsBroken = morkBool_kTrue; + c = EOF; + } else if (c == EOF) { + mParser_DoMore = morkBool_kFalse; + mParser_IsDone = morkBool_kTrue; + } + return c; +} + +void morkParser::OnCellState(morkEnv* ev) { ev->StubMethodOnlyError(); } + +void morkParser::OnMetaState(morkEnv* ev) { ev->StubMethodOnlyError(); } + +void morkParser::OnRowState(morkEnv* ev) { ev->StubMethodOnlyError(); } + +void morkParser::OnTableState(morkEnv* ev) { ev->StubMethodOnlyError(); } + +void morkParser::OnDictState(morkEnv* ev) { ev->StubMethodOnlyError(); } + +morkBuf* morkParser::ReadName(morkEnv* ev, int c) { + morkBuf* outBuf = 0; + + if (!morkCh_IsName(c)) ev->NewError("not a name char"); + + morkCoil* coil = &mParser_ColumnCoil; + coil->ClearBufFill(); + + morkSpool* spool = &mParser_ColumnSpool; + spool->Seek(ev, /*pos*/ 0); + + if (ev->Good()) { + spool->Putc(ev, c); + + morkStream* s = mParser_Stream; + while ((c = s->Getc(ev)) != EOF && morkCh_IsMore(c) && ev->Good()) + spool->Putc(ev, c); + + if (ev->Good()) { + if (c != EOF) { + s->Ungetc(c); + spool->FlushSink(ev); // update coil->mBuf_Fill + } else + this->UnexpectedEofError(ev); + + if (ev->Good()) outBuf = coil; + } + } + return outBuf; +} + +mork_bool morkParser::ReadMid(morkEnv* ev, morkMid* outMid) { + outMid->ClearMid(); + + morkStream* s = mParser_Stream; + int next; + outMid->mMid_Oid.mOid_Id = this->ReadHex(ev, &next); + int c = next; + if (c == ':') { + if ((c = s->Getc(ev)) != EOF && ev->Good()) { + if (c == '^') { + outMid->mMid_Oid.mOid_Scope = this->ReadHex(ev, &next); + if (ev->Good()) s->Ungetc(next); + } else if (morkCh_IsName(c)) { + outMid->mMid_Buf = this->ReadName(ev, c); + } else + ev->NewError("expected name or hex after ':' following ID"); + } + + if (c == EOF && ev->Good()) this->UnexpectedEofError(ev); + } else + s->Ungetc(c); + + return ev->Good(); +} + +void morkParser::ReadCell(morkEnv* ev) { + mParser_CellMid.ClearMid(); + // this->StartSpanOnLastByte(ev, &mParser_CellSpan); + morkMid* cellMid = 0; // if mid syntax is used for column + morkBuf* cellBuf = 0; // if naked string is used for column + + morkStream* s = mParser_Stream; + int c; + if ((c = s->Getc(ev)) != EOF && ev->Good()) { + // this->StartSpanOnLastByte(ev, &mParser_ColumnSpan); + if (c == '^') { + cellMid = &mParser_CellMid; + this->ReadMid(ev, cellMid); + // if ( !mParser_CellMid.mMid_Oid.mOid_Scope ) + // mParser_CellMid.mMid_Oid.mOid_Scope = (mork_scope) 'c'; + } else { + if (mParser_InMeta && c == morkStore_kFormColumn) { + ReadCellForm(ev, c); + return; + } else + cellBuf = this->ReadName(ev, c); + } + if (ev->Good()) { + // this->EndSpanOnThisByte(ev, &mParser_ColumnSpan); + + mParser_InCell = morkBool_kTrue; + this->OnNewCell(ev, *mParser_CellSpan.AsPlace(), cellMid, + cellBuf); // , mParser_CellChange + + mParser_CellChange = morkChange_kNil; + if ((c = this->NextChar(ev)) != EOF && ev->Good()) { + // this->StartSpanOnLastByte(ev, &mParser_SlotSpan); + if (c == '=') { + morkBuf* buf = this->ReadValue(ev); + if (buf) { + // this->EndSpanOnThisByte(ev, &mParser_SlotSpan); + this->OnValue(ev, mParser_SlotSpan, *buf); + } + } else if (c == '^') { + if (this->ReadMid(ev, &mParser_Mid)) { + // this->EndSpanOnThisByte(ev, &mParser_SlotSpan); + if ((c = this->NextChar(ev)) != EOF && ev->Good()) { + if (c != ')') ev->NewError("expected ')' after cell ^ID value"); + } else if (c == EOF) + this->UnexpectedEofError(ev); + + if (ev->Good()) this->OnValueMid(ev, mParser_SlotSpan, mParser_Mid); + } + } else if (c == 'r' || c == 't' || c == '"' || c == '\'') { + ev->NewError("cell syntax not yet supported"); + } else { + ev->NewError("unknown cell syntax"); + } + } + + // this->EndSpanOnThisByte(ev, &mParser_CellSpan); + mParser_InCell = morkBool_kFalse; + this->OnCellEnd(ev, mParser_CellSpan); + } + } + mParser_CellChange = morkChange_kNil; + + if (c == EOF && ev->Good()) this->UnexpectedEofError(ev); +} + +void morkParser::ReadRowPos(morkEnv* ev) { + int c; // next character + mork_pos rowPos = this->ReadHex(ev, &c); + + if (ev->Good() && c != EOF) // should put back byte after hex? + mParser_Stream->Ungetc(c); + + this->OnRowPos(ev, rowPos); +} + +void morkParser::ReadRow(morkEnv* ev, int c) +// zm:Row ::= zm:S? '[' zm:S? zm:Id zm:RowItem* zm:S? ']' +// zm:RowItem ::= zm:MetaRow | zm:Cell +// zm:MetaRow ::= zm:S? '[' zm:S? zm:Cell* zm:S? ']' /* meta attributes */ +// zm:Cell ::= zm:S? '(' zm:Column zm:S? zm:Slot? ')' +{ + if (ev->Good()) { + // this->StartSpanOnLastByte(ev, &mParser_RowSpan); + if (mParser_Change) mParser_RowChange = mParser_Change; + + mork_bool cutAllRowCols = morkBool_kFalse; + + if (c == '[') { + if ((c = this->NextChar(ev)) == '-') + cutAllRowCols = morkBool_kTrue; + else if (ev->Good() && c != EOF) + mParser_Stream->Ungetc(c); + + if (this->ReadMid(ev, &mParser_RowMid)) { + mParser_InRow = morkBool_kTrue; + this->OnNewRow(ev, *mParser_RowSpan.AsPlace(), mParser_RowMid, + cutAllRowCols); + + mParser_Change = mParser_RowChange = morkChange_kNil; + + while ((c = this->NextChar(ev)) != EOF && ev->Good() && c != ']') { + switch (c) { + case '(': // cell + this->ReadCell(ev); + break; + + case '[': // meta + this->ReadMeta(ev, ']'); + break; + + // case '+': // plus + // mParser_CellChange = morkChange_kAdd; + // break; + + case '-': // minus + // mParser_CellChange = morkChange_kCut; + this->OnMinusCell(ev); + break; + + // case '!': // bang + // mParser_CellChange = morkChange_kSet; + // break; + + default: + ev->NewWarning("unexpected byte in row"); + break; + } // switch + } // while + + if (ev->Good()) { + if ((c = this->NextChar(ev)) == '!') + this->ReadRowPos(ev); + else if (c != EOF && ev->Good()) + mParser_Stream->Ungetc(c); + } + + // this->EndSpanOnThisByte(ev, &mParser_RowSpan); + mParser_InRow = morkBool_kFalse; + this->OnRowEnd(ev, mParser_RowSpan); + + } // if ReadMid + } // if '[' + + else // c != '[' + { + morkStream* s = mParser_Stream; + s->Ungetc(c); + if (this->ReadMid(ev, &mParser_RowMid)) { + mParser_InRow = morkBool_kTrue; + this->OnNewRow(ev, *mParser_RowSpan.AsPlace(), mParser_RowMid, + cutAllRowCols); + + mParser_Change = mParser_RowChange = morkChange_kNil; + + if (ev->Good()) { + if ((c = this->NextChar(ev)) == '!') + this->ReadRowPos(ev); + else if (c != EOF && ev->Good()) + s->Ungetc(c); + } + + // this->EndSpanOnThisByte(ev, &mParser_RowSpan); + mParser_InRow = morkBool_kFalse; + this->OnRowEnd(ev, mParser_RowSpan); + } + } + } + + if (ev->Bad()) + mParser_State = morkParser_kBrokenState; + else if (c == EOF) + mParser_State = morkParser_kDoneState; +} + +void morkParser::ReadTable(morkEnv* ev) +// zm:Table ::= zm:S? '{' zm:S? zm:Id zm:TableItem* zm:S? '}' +// zm:TableItem ::= zm:MetaTable | zm:RowRef | zm:Row +// zm:MetaTable ::= zm:S? '{' zm:S? zm:Cell* zm:S? '}' /* meta attributes */ +{ + // this->StartSpanOnLastByte(ev, &mParser_TableSpan); + + if (mParser_Change) mParser_TableChange = mParser_Change; + + mork_bool cutAllTableRows = morkBool_kFalse; + + int c = this->NextChar(ev); + if (c == '-') + cutAllTableRows = morkBool_kTrue; + else if (ev->Good() && c != EOF) + mParser_Stream->Ungetc(c); + + if (ev->Good() && this->ReadMid(ev, &mParser_TableMid)) { + mParser_InTable = morkBool_kTrue; + this->OnNewTable(ev, *mParser_TableSpan.AsPlace(), mParser_TableMid, + cutAllTableRows); + + mParser_Change = mParser_TableChange = morkChange_kNil; + + while ((c = this->NextChar(ev)) != EOF && ev->Good() && c != '}') { + if (morkCh_IsHex(c)) { + this->ReadRow(ev, c); + } else { + switch (c) { + case '[': // row + this->ReadRow(ev, '['); + break; + + case '{': // meta + this->ReadMeta(ev, '}'); + break; + + // case '+': // plus + // mParser_RowChange = morkChange_kAdd; + // break; + + case '-': // minus + // mParser_RowChange = morkChange_kCut; + this->OnMinusRow(ev); + break; + + // case '!': // bang + // mParser_RowChange = morkChange_kSet; + // break; + + default: + ev->NewWarning("unexpected byte in table"); + break; + } + } + } + + // this->EndSpanOnThisByte(ev, &mParser_TableSpan); + mParser_InTable = morkBool_kFalse; + this->OnTableEnd(ev, mParser_TableSpan); + + if (ev->Bad()) + mParser_State = morkParser_kBrokenState; + else if (c == EOF) + mParser_State = morkParser_kDoneState; + } +} + +mork_id morkParser::ReadHex(morkEnv* ev, int* outNextChar) +// zm:Hex ::= [0-9a-fA-F] /* a single hex digit */ +// zm:Hex+ ::= zm:Hex | zm:Hex zm:Hex+ +{ + mork_id hex = 0; + + morkStream* s = mParser_Stream; + int c = this->NextChar(ev); + + if (ev->Good()) { + if (c != EOF) { + if (morkCh_IsHex(c)) { + do { + if (morkCh_IsDigit(c)) // '0' through '9'? + c -= '0'; + else if (morkCh_IsUpper(c)) // 'A' through 'F'? + c -= ('A' - 10); // c = (c - 'A') + 10; + else // 'a' through 'f'? + c -= ('a' - 10); // c = (c - 'a') + 10; + + hex = (hex << 4) + c; + } while ((c = s->Getc(ev)) != EOF && ev->Good() && morkCh_IsHex(c)); + } else + this->ExpectedHexDigitError(ev, c); + } + } + if (c == EOF) this->EofInsteadOfHexError(ev); + + *outNextChar = c; + return hex; +} + +/*static*/ void morkParser::EofInsteadOfHexError(morkEnv* ev) { + ev->NewWarning("eof instead of hex"); +} + +/*static*/ void morkParser::ExpectedHexDigitError(morkEnv* ev, int c) { + MORK_USED_1(c); + ev->NewWarning("expected hex digit"); +} + +/*static*/ void morkParser::ExpectedEqualError(morkEnv* ev) { + ev->NewWarning("expected '='"); +} + +/*static*/ void morkParser::UnexpectedEofError(morkEnv* ev) { + ev->NewWarning("unexpected eof"); +} + +morkBuf* morkParser::ReadValue(morkEnv* ev) { + morkBuf* outBuf = 0; + + morkCoil* coil = &mParser_ValueCoil; + coil->ClearBufFill(); + + morkSpool* spool = &mParser_ValueSpool; + spool->Seek(ev, /*pos*/ 0); + + if (ev->Good()) { + morkStream* s = mParser_Stream; + int c; + while ((c = s->Getc(ev)) != EOF && c != ')' && ev->Good()) { + if (c == '\\') // next char is escaped by '\'? + { + if ((c = s->Getc(ev)) == 0xA || c == 0xD) // linebreak after \? + { + c = this->eat_line_break(ev, c); + if (c == ')' || c == '\\' || c == '$') { + s->Ungetc(c); // just let while loop test read this again + continue; // goto next iteration of while loop + } + } + if (c == EOF || ev->Bad()) break; // end while loop + } else if (c == '$') // "$" escapes next two hex digits? + { + if ((c = s->Getc(ev)) != EOF && ev->Good()) { + mork_ch first = (mork_ch)c; // first hex digit + if ((c = s->Getc(ev)) != EOF && ev->Good()) { + mork_ch second = (mork_ch)c; // second hex digit + c = ev->HexToByte(first, second); + } else + break; // end while loop + } else + break; // end while loop + } + spool->Putc(ev, c); + } + + if (ev->Good()) { + if (c != EOF) + spool->FlushSink(ev); // update coil->mBuf_Fill + else + this->UnexpectedEofError(ev); + + if (ev->Good()) outBuf = coil; + } + } + return outBuf; +} + +void morkParser::ReadDictForm(morkEnv* ev) { + int nextChar; + nextChar = this->NextChar(ev); + if (nextChar == '(') { + nextChar = this->NextChar(ev); + if (nextChar == morkStore_kFormColumn) { + int dictForm; + + nextChar = this->NextChar(ev); + if (nextChar == '=') { + dictForm = this->NextChar(ev); + nextChar = this->NextChar(ev); + } else if (nextChar == '^') { + dictForm = this->ReadHex(ev, &nextChar); + } else { + ev->NewWarning("unexpected byte in dict form"); + return; + } + mParser_ValueCoil.mText_Form = dictForm; + if (nextChar == ')') { + nextChar = this->NextChar(ev); + if (nextChar == '>') return; + } + } + } + ev->NewWarning("unexpected byte in dict form"); +} + +void morkParser::ReadCellForm(morkEnv* ev, int c) { + MORK_ASSERT(c == morkStore_kFormColumn); + int nextChar; + nextChar = this->NextChar(ev); + int cellForm; + + if (nextChar == '=') { + cellForm = this->NextChar(ev); + nextChar = this->NextChar(ev); + } else if (nextChar == '^') { + cellForm = this->ReadHex(ev, &nextChar); + } else { + ev->NewWarning("unexpected byte in cell form"); + return; + } + // ### not sure about this. Which form should we set? + // mBuilder_CellForm = mBuilder_RowForm = cellForm; + if (nextChar == ')') { + OnCellForm(ev, cellForm); + return; + } + ev->NewWarning("unexpected byte in cell form"); +} + +void morkParser::ReadAlias(morkEnv* ev) +// zm:Alias ::= zm:S? '(' ('#')? zm:Hex+ zm:S? zm:Value ')' +// zm:Value ::= '=' ([^)$\] | '\' zm:NonCRLF | zm:Continue | zm:Dollar)* +{ + // this->StartSpanOnLastByte(ev, &mParser_AliasSpan); + + int nextChar; + mork_id hex = this->ReadHex(ev, &nextChar); + int c = nextChar; + + mParser_Mid.ClearMid(); + mParser_Mid.mMid_Oid.mOid_Id = hex; + + if (morkCh_IsWhite(c) && ev->Good()) c = this->NextChar(ev); + + if (ev->Good()) { + if (c == '<') { + ReadDictForm(ev); + if (ev->Good()) c = this->NextChar(ev); + } + if (c == '=') { + mParser_Mid.mMid_Buf = this->ReadValue(ev); + if (mParser_Mid.mMid_Buf) { + // this->EndSpanOnThisByte(ev, &mParser_AliasSpan); + this->OnAlias(ev, mParser_AliasSpan, mParser_Mid); + // need to reset this somewhere. + mParser_ValueCoil.mText_Form = 0; + } + } else + this->ExpectedEqualError(ev); + } +} + +void morkParser::ReadMeta(morkEnv* ev, int inEndMeta) +// zm:MetaDict ::= zm:S? '<' zm:S? zm:Cell* zm:S? '>' /* meta attributes */ +// zm:MetaTable ::= zm:S? '{' zm:S? zm:Cell* zm:S? '}' /* meta attributes */ +// zm:MetaRow ::= zm:S? '[' zm:S? zm:Cell* zm:S? ']' /* meta attributes */ +{ + // this->StartSpanOnLastByte(ev, &mParser_MetaSpan); + mParser_InMeta = morkBool_kTrue; + this->OnNewMeta(ev, *mParser_MetaSpan.AsPlace()); + + mork_bool more = morkBool_kTrue; // until end meta + int c; + while (more && (c = this->NextChar(ev)) != EOF && ev->Good()) { + switch (c) { + case '(': // cell + this->ReadCell(ev); + break; + + case '>': // maybe end meta? + if (inEndMeta == '>') + more = morkBool_kFalse; // stop reading meta + else + this->UnexpectedByteInMetaWarning(ev); + break; + + case '}': // maybe end meta? + if (inEndMeta == '}') + more = morkBool_kFalse; // stop reading meta + else + this->UnexpectedByteInMetaWarning(ev); + break; + + case ']': // maybe end meta? + if (inEndMeta == ']') + more = morkBool_kFalse; // stop reading meta + else + this->UnexpectedByteInMetaWarning(ev); + break; + + case '[': // maybe table meta row? + if (mParser_InTable) + this->ReadRow(ev, '['); + else + this->UnexpectedByteInMetaWarning(ev); + break; + + default: + if (mParser_InTable && morkCh_IsHex(c)) + this->ReadRow(ev, c); + else + this->UnexpectedByteInMetaWarning(ev); + break; + } + } + + // this->EndSpanOnThisByte(ev, &mParser_MetaSpan); + mParser_InMeta = morkBool_kFalse; + this->OnMetaEnd(ev, mParser_MetaSpan); +} + +/*static*/ void morkParser::UnexpectedByteInMetaWarning(morkEnv* ev) { + ev->NewWarning("unexpected byte in meta"); +} + +/*static*/ void morkParser::NonParserTypeError(morkEnv* ev) { + ev->NewError("non morkParser"); +} + +mork_bool morkParser::MatchPattern(morkEnv* ev, const char* inPattern) { + // if an error occurs, we want original inPattern in the debugger: + const char* pattern = inPattern; // mutable copy of pointer + morkStream* s = mParser_Stream; + int c; + while (*pattern && ev->Good()) { + char byte = *pattern++; + if ((c = s->Getc(ev)) != byte) { + ev->NewError("byte not in expected pattern"); + } + } + return ev->Good(); +} + +mork_bool morkParser::FindGroupEnd(morkEnv* ev) { + mork_bool foundEnd = morkBool_kFalse; + + // char gidBuf[ 64 ]; // to hold hex pattern we want + // (void) ev->TokenAsHex(gidBuf, mParser_GroupId); + + morkStream* s = mParser_Stream; + int c; + + while ((c = s->Getc(ev)) != EOF && ev->Good() && !foundEnd) { + if (c == '@') // maybe start of group ending? + { + // this->EndSpanOnThisByte(ev, &mParser_GroupSpan); + if ((c = s->Getc(ev)) == '$') // '$' follows '@' ? + { + if ((c = s->Getc(ev)) == '$') // '$' follows "@$" ? + { + if ((c = s->Getc(ev)) == '}') { + foundEnd = this->ReadEndGroupId(ev); + // this->EndSpanOnThisByte(ev, &mParser_GroupSpan); + + } else + ev->NewError("expected '}' after @$$"); + } + } + if (!foundEnd && c == '@') s->Ungetc(c); + } + } + + return foundEnd && ev->Good(); +} + +void morkParser::ReadGroup(morkEnv* mev) { + nsIMdbEnv* ev = mev->AsMdbEnv(); + int next = 0; + mParser_GroupId = this->ReadHex(mev, &next); + if (next == '{') { + morkStream* s = mParser_Stream; + int c; + if ((c = s->Getc(mev)) == '@') { + // we really need the following span inside morkBuilder::OnNewGroup(): + this->StartSpanOnThisByte(mev, &mParser_GroupSpan); + mork_pos startPos = mParser_GroupSpan.mSpan_Start.mPlace_Pos; + + // if ( !store->mStore_FirstCommitGroupPos ) + // store->mStore_FirstCommitGroupPos = startPos; + // else if ( !store->mStore_SecondCommitGroupPos ) + // store->mStore_SecondCommitGroupPos = startPos; + + if (this->FindGroupEnd(mev)) { + mork_pos outPos; + s->Seek(ev, startPos, &outPos); + if (mev->Good()) { + this->OnNewGroup(mev, mParser_GroupSpan.mSpan_Start, mParser_GroupId); + + this->ReadContent(mev, /*inInsideGroup*/ morkBool_kTrue); + + this->OnGroupCommitEnd(mev, mParser_GroupSpan); + } + } + } else + mev->NewError("expected '@' after @$${id{"); + } else + mev->NewError("expected '{' after @$$id"); +} + +mork_bool morkParser::ReadAt(morkEnv* ev, mork_bool inInsideGroup) +/* groups must be ignored until properly terminated */ +// zm:Group ::= zm:GroupStart zm:Content zm:GroupEnd /* transaction */ +// zm:GroupStart ::= zm:S? '@$${' zm:Hex+ '{@' /* xaction id has own space */ +// zm:GroupEnd ::= zm:GroupCommit | zm:GroupAbort +// zm:GroupCommit ::= zm:S? '@$$}' zm:Hex+ '}@' /* id matches start id */ +// zm:GroupAbort ::= zm:S? '@$$}~~}@' /* id matches start id */ +/* We must allow started transactions to be aborted in summary files. */ +/* Note '$$' will never occur unescaped in values we will see in Mork. */ +{ + if (this->MatchPattern(ev, "$$")) { + morkStream* s = mParser_Stream; + int c; + if (((c = s->Getc(ev)) == '{' || c == '}') && ev->Good()) { + if (c == '{') // start of new group? + { + if (!inInsideGroup) + this->ReadGroup(ev); + else + ev->NewError("nested @$${ inside another group"); + } else // c == '}' // end of old group? + { + if (inInsideGroup) { + this->ReadEndGroupId(ev); + mParser_GroupId = 0; + } else + ev->NewError("unmatched @$$} outside any group"); + } + } else + ev->NewError("expected '{' or '}' after @$$"); + } + return ev->Good(); +} + +mork_bool morkParser::ReadEndGroupId(morkEnv* ev) { + mork_bool outSawGroupId = morkBool_kFalse; + morkStream* s = mParser_Stream; + int c; + if ((c = s->Getc(ev)) != EOF && ev->Good()) { + if (c == '~') // transaction is aborted? + { + this->MatchPattern(ev, "~}@"); // finish rest of pattern + } else // push back byte and read expected trailing hex id + { + s->Ungetc(c); + int next = 0; + mork_gid endGroupId = this->ReadHex(ev, &next); + if (ev->Good()) { + if (endGroupId == mParser_GroupId) // matches start? + { + if (next == '}') // '}' after @$$}id ? + { + if ((c = s->Getc(ev)) == '@') // '@' after @$$}id} ? + { + // looks good, so return with no error + outSawGroupId = morkBool_kTrue; + mParser_InGroup = false; + } else + ev->NewError("expected '@' after @$$}id}"); + } else + ev->NewError("expected '}' after @$$}id"); + } else + ev->NewError("end group id mismatch"); + } + } + } + return (outSawGroupId && ev->Good()); +} + +void morkParser::ReadDict(morkEnv* ev) +// zm:Dict ::= zm:S? '<' zm:DictItem* zm:S? '>' +// zm:DictItem ::= zm:MetaDict | zm:Alias +// zm:MetaDict ::= zm:S? '<' zm:S? zm:Cell* zm:S? '>' /* meta attributes */ +// zm:Alias ::= zm:S? '(' ('#')? zm:Hex+ zm:S? zm:Value ')' +{ + mParser_Change = morkChange_kNil; + mParser_AtomChange = morkChange_kNil; + + // this->StartSpanOnLastByte(ev, &mParser_DictSpan); + mParser_InDict = morkBool_kTrue; + this->OnNewDict(ev, *mParser_DictSpan.AsPlace()); + + int c; + while ((c = this->NextChar(ev)) != EOF && ev->Good() && c != '>') { + switch (c) { + case '(': // alias + this->ReadAlias(ev); + break; + + case '<': // meta + this->ReadMeta(ev, '>'); + break; + + default: + ev->NewWarning("unexpected byte in dict"); + break; + } + } + + // this->EndSpanOnThisByte(ev, &mParser_DictSpan); + mParser_InDict = morkBool_kFalse; + this->OnDictEnd(ev, mParser_DictSpan); + + if (ev->Bad()) + mParser_State = morkParser_kBrokenState; + else if (c == EOF) + mParser_State = morkParser_kDoneState; +} + +void morkParser::EndSpanOnThisByte(morkEnv* mev, morkSpan* ioSpan) { + mork_pos here; + nsIMdbEnv* ev = mev->AsMdbEnv(); + nsresult rv = mParser_Stream->Tell(ev, &here); + if (NS_SUCCEEDED(rv) && mev->Good()) { + this->SetHerePos(here); + ioSpan->SetEndWithEnd(mParser_PortSpan); + } +} + +void morkParser::EndSpanOnLastByte(morkEnv* mev, morkSpan* ioSpan) { + mork_pos here; + nsIMdbEnv* ev = mev->AsMdbEnv(); + nsresult rv = mParser_Stream->Tell(ev, &here); + if (NS_SUCCEEDED(rv) && mev->Good()) { + if (here > 0) + --here; + else + here = 0; + + this->SetHerePos(here); + ioSpan->SetEndWithEnd(mParser_PortSpan); + } +} + +void morkParser::StartSpanOnLastByte(morkEnv* mev, morkSpan* ioSpan) { + mork_pos here; + nsIMdbEnv* ev = mev->AsMdbEnv(); + nsresult rv = mParser_Stream->Tell(ev, &here); + if (NS_SUCCEEDED(rv) && mev->Good()) { + if (here > 0) + --here; + else + here = 0; + + this->SetHerePos(here); + ioSpan->SetStartWithEnd(mParser_PortSpan); + ioSpan->SetEndWithEnd(mParser_PortSpan); + } +} + +void morkParser::StartSpanOnThisByte(morkEnv* mev, morkSpan* ioSpan) { + mork_pos here; + nsIMdbEnv* ev = mev->AsMdbEnv(); + nsresult rv = mParser_Stream->Tell(ev, &here); + if (NS_SUCCEEDED(rv) && mev->Good()) { + this->SetHerePos(here); + ioSpan->SetStartWithEnd(mParser_PortSpan); + ioSpan->SetEndWithEnd(mParser_PortSpan); + } +} + +mork_bool morkParser::ReadContent(morkEnv* ev, mork_bool inInsideGroup) { + int c; + mork_bool keep_going = true; + while (keep_going && (c = this->NextChar(ev)) != EOF && ev->Good()) { + switch (c) { + case '[': // row + this->ReadRow(ev, '['); + keep_going = false; + break; + + case '{': // table + this->ReadTable(ev); + keep_going = false; + break; + + case '<': // dict + this->ReadDict(ev); + keep_going = false; + break; + + case '@': // group + return this->ReadAt(ev, inInsideGroup); + // break; + + // case '+': // plus + // mParser_Change = morkChange_kAdd; + // break; + + // case '-': // minus + // mParser_Change = morkChange_kCut; + // break; + + // case '!': // bang + // mParser_Change = morkChange_kSet; + // break; + + default: + ev->NewWarning("unexpected byte in ReadContent()"); + break; + } + } + if (ev->Bad()) + mParser_State = morkParser_kBrokenState; + else if (c == EOF) + mParser_State = morkParser_kDoneState; + + return (ev->Good() && c != EOF); +} + +void morkParser::OnPortState(morkEnv* ev) { + mork_bool firstTime = !mParser_InPort; + mParser_InPort = morkBool_kTrue; + if (firstTime) this->OnNewPort(ev, *mParser_PortSpan.AsPlace()); + + mork_bool done = !this->ReadContent(ev, mParser_InGroup /*inInsideGroup*/); + + if (done) { + mParser_InPort = morkBool_kFalse; + this->OnPortEnd(ev, mParser_PortSpan); + } + + if (ev->Bad()) mParser_State = morkParser_kBrokenState; +} + +void morkParser::OnStartState(morkEnv* mev) { + morkStream* s = mParser_Stream; + nsIMdbEnv* ev = mev->AsMdbEnv(); + if (s && s->IsNode() && s->IsOpenNode()) { + mork_pos outPos; + nsresult rv = s->Seek(ev, 0, &outPos); + if (NS_SUCCEEDED(rv) && mev->Good()) { + this->StartParse(mev); + mParser_State = morkParser_kPortState; + } + } else + mev->NilPointerError(); + + if (mev->Bad()) mParser_State = morkParser_kBrokenState; +} + +/*protected non-poly*/ void morkParser::ParseChunk(morkEnv* ev) { + mParser_Change = morkChange_kNil; + mParser_DoMore = morkBool_kTrue; + + switch (mParser_State) { + case morkParser_kCellState: // 0 + this->OnCellState(ev); + break; + + case morkParser_kMetaState: // 1 + this->OnMetaState(ev); + break; + + case morkParser_kRowState: // 2 + this->OnRowState(ev); + break; + + case morkParser_kTableState: // 3 + this->OnTableState(ev); + break; + + case morkParser_kDictState: // 4 + this->OnDictState(ev); + break; + + case morkParser_kPortState: // 5 + this->OnPortState(ev); + break; + + case morkParser_kStartState: // 6 + this->OnStartState(ev); + break; + + case morkParser_kDoneState: // 7 + mParser_DoMore = morkBool_kFalse; + mParser_IsDone = morkBool_kTrue; + this->StopParse(ev); + break; + case morkParser_kBrokenState: // 8 + mParser_DoMore = morkBool_kFalse; + mParser_IsBroken = morkBool_kTrue; + this->StopParse(ev); + break; + default: // ? + MORK_ASSERT(morkBool_kFalse); + mParser_State = morkParser_kBrokenState; + break; + } +} + +/*public non-poly*/ mdb_count +morkParser::ParseMore( // return count of bytes consumed now + morkEnv* ev, // context + mork_pos* outPos, // current byte pos in the stream afterwards + mork_bool* outDone, // is parsing finished? + mork_bool* outBroken // is parsing irreparably dead and broken? +) { + mdb_count outCount = 0; + if (this->IsNode() && this->GoodParserTag() && this->IsOpenNode()) { + mork_pos startPos = this->HerePos(); + + if (!mParser_IsDone && !mParser_IsBroken) this->ParseChunk(ev); + + // HerePos is only updated for groups. I'd like it to be more accurate. + + mork_pos here; + mParser_Stream->Tell(ev, &here); + + if (outDone) *outDone = mParser_IsDone; + if (outBroken) *outBroken = mParser_IsBroken; + if (outPos) *outPos = here; + + if (here > startPos) outCount = (mdb_count)(here - startPos); + } else { + this->NonUsableParserError(ev); + if (outDone) *outDone = morkBool_kTrue; + if (outBroken) *outBroken = morkBool_kTrue; + if (outPos) *outPos = 0; + } + return outCount; +} + +// 456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789 |