Adding upstream version 4:7.4.7.upstream/4%7.4.7 upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-07 09:06:44 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-07 09:06:44 +0000
commit: ed5640d8b587fbcfed7dd7967f3de04b37a76f26 (patch)
tree: 7a5f7c6c9d02226d7471cb3cc8fbbf631b415303 /basic/source/comp/token.cxx
parent: Initial commit. (diff)
download: libreoffice-ed5640d8b587fbcfed7dd7967f3de04b37a76f26.tar.xz
libreoffice-ed5640d8b587fbcfed7dd7967f3de04b37a76f26.zip
1 files changed, 571 insertions, 0 deletions
diff --git a/basic/source/comp/token.cxx b/basic/source/comp/token.cxx
new file mode 100644
index 000000000..a060b3fd9
--- /dev/null
+++ b/basic/source/comp/token.cxx
@@ -0,0 +1,571 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ *   Licensed to the Apache Software Foundation (ASF) under one or more
+ *   contributor license agreements. See the NOTICE file distributed
+ *   with this work for additional information regarding copyright
+ *   ownership. The ASF licenses this file to you under the Apache
+ *   License, Version 2.0 (the "License"); you may not use this file
+ *   except in compliance with the License. You may obtain a copy of
+ *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+
+#include <array>
+
+#include <basic/sberrors.hxx>
+#include <sal/macros.h>
+#include <basiccharclass.hxx>
+#include <token.hxx>
+
+namespace {
+
+struct TokenTable { SbiToken t; const char *s; };
+
+}
+
+const TokenTable aTokTable_Basic [] = {
+    { CAT,      "&" },
+    { MUL,      "*" },
+    { PLUS,     "+" },
+    { MINUS,    "-" },
+    { DIV,      "/" },
+    { EOS,      ":" },
+    { ASSIGN,   ":=" },
+    { LT,       "<" },
+    { LE,       "<=" },
+    { NE,       "<>" },
+    { EQ,       "=" },
+    { GT,       ">" },
+    { GE,       ">=" },
+    { ACCESS,   "Access" },
+    { ALIAS,    "Alias" },
+    { AND,      "And" },
+    { ANY,      "Any" },
+    { APPEND,   "Append" },
+    { AS,       "As" },
+    { ATTRIBUTE,"Attribute" },
+    { BASE,     "Base" },
+    { BINARY,   "Binary" },
+    { TBOOLEAN, "Boolean" },
+    { BYREF,    "ByRef", },
+    { TBYTE,    "Byte", },
+    { BYVAL,    "ByVal", },
+    { CALL,     "Call" },
+    { CASE,     "Case" },
+    { CDECL_,   "Cdecl" },
+    { CLASSMODULE, "ClassModule" },
+    { CLOSE,    "Close" },
+    { COMPARE,  "Compare" },
+    { COMPATIBLE,"Compatible" },
+    { CONST_,   "Const" },
+    { TCURRENCY,"Currency" },
+    { TDATE,    "Date" },
+    { DECLARE,  "Declare" },
+    { DEFBOOL,  "DefBool" },
+    { DEFCUR,   "DefCur" },
+    { DEFDATE,  "DefDate" },
+    { DEFDBL,   "DefDbl" },
+    { DEFERR,   "DefErr" },
+    { DEFINT,   "DefInt" },
+    { DEFLNG,   "DefLng" },
+    { DEFOBJ,   "DefObj" },
+    { DEFSNG,   "DefSng" },
+    { DEFSTR,   "DefStr" },
+    { DEFVAR,   "DefVar" },
+    { DIM,      "Dim" },
+    { DO,       "Do" },
+    { TDOUBLE,  "Double" },
+    { EACH,     "Each" },
+    { ELSE,     "Else" },
+    { ELSEIF,   "ElseIf" },
+    { END,      "End" },
+    { ENDENUM,  "End Enum" },
+    { ENDFUNC,  "End Function" },
+    { ENDIF,    "End If" },
+    { ENDPROPERTY, "End Property" },
+    { ENDSELECT,"End Select" },
+    { ENDSUB,   "End Sub" },
+    { ENDTYPE,  "End Type" },
+    { ENDIF,    "EndIf" },
+    { ENUM,     "Enum" },
+    { EQV,      "Eqv" },
+    { ERASE,    "Erase" },
+    { ERROR_,   "Error" },
+    { EXIT,     "Exit" },
+    { BASIC_EXPLICIT, "Explicit" },
+    { FOR,      "For" },
+    { FUNCTION, "Function" },
+    { GET,      "Get" },
+    { GLOBAL,   "Global" },
+    { GOSUB,    "GoSub" },
+    { GOTO,     "GoTo" },
+    { IF,       "If" },
+    { IMP,      "Imp" },
+    { IMPLEMENTS, "Implements" },
+    { IN_,      "In" },
+    { INPUT,    "Input" },              // also INPUT #
+    { TINTEGER, "Integer" },
+    { IS,       "Is" },
+    { LET,      "Let" },
+    { LIB,      "Lib" },
+    { LIKE,     "Like" },
+    { LINE,     "Line" },
+    { LINEINPUT,"Line Input" },
+    { LOCAL,    "Local" },
+    { LOCK,     "Lock" },
+    { TLONG,    "Long" },
+    { LOOP,     "Loop" },
+    { LPRINT,   "LPrint" },
+    { LSET,     "LSet" }, // JSM
+    { MOD,      "Mod" },
+    { NAME,     "Name" },
+    { NEW,      "New" },
+    { NEXT,     "Next" },
+    { NOT,      "Not" },
+    { TOBJECT,  "Object" },
+    { ON,       "On" },
+    { OPEN,     "Open" },
+    { OPTION,   "Option" },
+    { OPTIONAL_, "Optional" },
+    { OR,       "Or" },
+    { OUTPUT,   "Output" },
+    { PARAMARRAY,   "ParamArray" },
+    { PRESERVE, "Preserve" },
+    { PRINT,    "Print" },
+    { PRIVATE,  "Private" },
+    { PROPERTY, "Property" },
+    { PTRSAFE,  "PtrSafe" },
+    { PUBLIC,   "Public" },
+    { RANDOM,   "Random" },
+    { READ,     "Read" },
+    { REDIM,    "ReDim" },
+    { REM,      "Rem" },
+    { RESUME,   "Resume" },
+    { RETURN,   "Return" },
+    { RSET,     "RSet" }, // JSM
+    { SELECT,   "Select" },
+    { SET,      "Set" },
+    { SHARED,   "Shared" },
+    { TSINGLE,  "Single" },
+    { STATIC,   "Static" },
+    { STEP,     "Step" },
+    { STOP,     "Stop" },
+    { TSTRING,  "String" },
+    { SUB,      "Sub" },
+    { STOP,     "System" },
+    { TEXT,     "Text" },
+    { THEN,     "Then" },
+    { TO,       "To", },
+    { TYPE,     "Type" },
+    { TYPEOF,   "TypeOf" },
+    { UNTIL,    "Until" },
+    { TVARIANT, "Variant" },
+    { VBASUPPORT,   "VbaSupport" },
+    { WEND,     "Wend" },
+    { WHILE,    "While" },
+    { WITH,     "With" },
+    { WITHEVENTS,   "WithEvents" },
+    { WRITE,    "Write" },              // also WRITE #
+    { XOR,      "Xor" },
+};
+
+namespace {
+
+// #i109076
+class TokenLabelInfo
+{
+    std::array<bool,VBASUPPORT+1> m_pTokenCanBeLabelTab;
+
+public:
+    TokenLabelInfo();
+
+    bool canTokenBeLabel( SbiToken eTok )
+        { return m_pTokenCanBeLabelTab[eTok]; }
+};
+
+}
+
+// #i109076
+TokenLabelInfo::TokenLabelInfo()
+{
+    m_pTokenCanBeLabelTab.fill(false);
+
+    // Token accepted as label by VBA
+    static const SbiToken eLabelToken[] = { ACCESS, ALIAS, APPEND, BASE, BINARY, CLASSMODULE,
+                               COMPARE, COMPATIBLE, DEFERR, ERROR_, BASIC_EXPLICIT, LIB, LINE, LPRINT, NAME,
+                               TOBJECT, OUTPUT, PROPERTY, RANDOM, READ, STEP, STOP, TEXT, VBASUPPORT };
+    for( SbiToken eTok : eLabelToken )
+    {
+        m_pTokenCanBeLabelTab[eTok] = true;
+    }
+}
+
+
+SbiTokenizer::SbiTokenizer( const OUString& rSrc, StarBASIC* pb )
+    : SbiScanner(rSrc, pb)
+    , eCurTok(NIL)
+    , ePush(NIL)
+    , nPLine(0)
+    , nPCol1(0)
+    , nPCol2(0)
+    , bEof(false)
+    , bEos(true)
+    , bAs(false)
+    , bErrorIsSymbol(true)
+{
+}
+
+void SbiTokenizer::Push( SbiToken t )
+{
+    if( ePush != NIL )
+        Error( ERRCODE_BASIC_INTERNAL_ERROR, "PUSH" );
+    else ePush = t;
+}
+
+void SbiTokenizer::Error( ErrCode code, const OUString &aMsg )
+{
+    aError = aMsg;
+    Error( code );
+}
+
+void SbiTokenizer::Error( ErrCode code, SbiToken tok )
+{
+    aError = Symbol( tok );
+    Error( code );
+}
+
+// reading in the next token without absorbing it
+
+SbiToken SbiTokenizer::Peek()
+{
+    if( ePush == NIL )
+    {
+        sal_Int32 nOldLine = nLine;
+        sal_Int32 nOldCol1 = nCol1;
+        sal_Int32 nOldCol2 = nCol2;
+        ePush = Next();
+        nPLine = nLine; nLine = nOldLine;
+        nPCol1 = nCol1; nCol1 = nOldCol1;
+        nPCol2 = nCol2; nCol2 = nOldCol2;
+    }
+    eCurTok = ePush;
+    return eCurTok;
+}
+
+// For decompilation. Numbers and symbols return an empty string.
+
+const OUString& SbiTokenizer::Symbol( SbiToken t )
+{
+    // character token?
+    if( t < FIRSTKWD )
+    {
+        aSym = OUString(sal::static_int_cast<sal_Unicode>(t));
+        return aSym;
+    }
+    switch( t )
+    {
+    case NEG   :
+        aSym = "-";
+        return aSym;
+    case EOS   :
+        aSym = ":/CRLF";
+        return aSym;
+    case EOLN  :
+        aSym = "CRLF";
+        return aSym;
+    default:
+        break;
+    }
+    for( auto& rTok : aTokTable_Basic )
+    {
+        if( rTok.t == t )
+        {
+            aSym = OStringToOUString(rTok.s, RTL_TEXTENCODING_ASCII_US);
+            return aSym;
+        }
+    }
+    const sal_Unicode *p = aSym.getStr();
+    if (*p <= ' ')
+    {
+        aSym = "???";
+    }
+    return aSym;
+}
+
+// Reading in the next token and put it down.
+// Tokens that don't appear in the token table
+// are directly returned as a character.
+// Some words are treated in a special way.
+
+SbiToken SbiTokenizer::Next()
+{
+    if (bEof)
+    {
+        return EOLN;
+    }
+    // have read in one already?
+    if( ePush != NIL )
+    {
+        eCurTok = ePush;
+        ePush = NIL;
+        nLine = nPLine;
+        nCol1 = nPCol1;
+        nCol2 = nPCol2;
+        bEos = IsEoln( eCurTok );
+        return eCurTok;
+    }
+    const TokenTable *tp;
+
+    if( !NextSym() )
+    {
+        bEof = bEos = true;
+        eCurTok = EOLN;
+        return eCurTok;
+    }
+
+    if( aSym.startsWith("\n") )
+    {
+        bEos = true;
+        eCurTok = EOLN;
+        return eCurTok;
+    }
+    bEos = false;
+
+    if( bNumber )
+    {
+        eCurTok = NUMBER;
+        return eCurTok;
+    }
+    else if( ( eScanType == SbxDATE || eScanType == SbxSTRING ) && !bSymbol )
+    {
+        eCurTok = FIXSTRING;
+        return eCurTok;
+    }
+    else if( aSym.isEmpty() )
+    {
+        //something went wrong
+        bEof = bEos = true;
+        eCurTok = EOLN;
+        return eCurTok;
+    }
+    // Special cases of characters that are between "Z" and "a". ICompare()
+    // evaluates the position of these characters in different ways.
+    else if( aSym[0] == '^' )
+    {
+        eCurTok = EXPON;
+        return eCurTok;
+    }
+    else if( aSym[0] == '\\' )
+    {
+        eCurTok = IDIV;
+        return eCurTok;
+    }
+    else
+    {
+        if( eScanType != SbxVARIANT )
+        {
+            eCurTok = SYMBOL;
+            return eCurTok;
+        }
+        // valid token?
+        short lb = 0;
+        short ub = std::size(aTokTable_Basic)-1;
+        short delta;
+        do
+        {
+            delta = (ub - lb) >> 1;
+            tp = &aTokTable_Basic[ lb + delta ];
+            sal_Int32 res = aSym.compareToIgnoreAsciiCaseAscii( tp->s );
+
+            if( res == 0 )
+            {
+                goto special;
+            }
+            if( res < 0 )
+            {
+                if ((ub - lb) == 2)
+                {
+                    ub = lb;
+                }
+                else
+                {
+                    ub = ub - delta;
+                }
+            }
+            else
+            {
+                if ((ub -lb) == 2)
+                {
+                    lb = ub;
+                }
+                else
+                {
+                    lb = lb + delta;
+                }
+            }
+        }
+        while( delta );
+        // Symbol? if not >= token
+        sal_Unicode ch = aSym[0];
+        if( !BasicCharClass::isAlpha( ch, bCompatible ) && !bSymbol )
+        {
+            eCurTok = static_cast<SbiToken>(ch & 0x00FF);
+            return eCurTok;
+        }
+        eCurTok = SYMBOL;
+        return eCurTok;
+    }
+special:
+    // #i92642
+    bool bStartOfLine = (eCurTok == NIL || eCurTok == REM || eCurTok == EOLN ||
+            eCurTok == THEN || eCurTok == ELSE); // single line If
+    if( !bStartOfLine && (tp->t == NAME || tp->t == LINE) )
+    {
+        eCurTok = SYMBOL;
+        return eCurTok;
+    }
+    else if( tp->t == TEXT )
+    {
+        eCurTok = SYMBOL;
+        return eCurTok;
+    }
+    // maybe we can expand this for other statements that have parameters
+    // that are keywords ( and those keywords are only used within such
+    // statements )
+    // what's happening here is that if we come across 'append' ( and we are
+    // not in the middle of parsing a special statement ( like 'Open')
+    // we just treat keyword 'append' as a normal 'SYMBOL'.
+    // Also we accept Dim APPEND
+    else if ( ( !bInStatement || eCurTok == DIM ) && tp->t == APPEND )
+    {
+        eCurTok = SYMBOL;
+        return eCurTok;
+    }
+    // #i92642: Special LINE token handling -> SbiParser::Line()
+
+    // END IF, CASE, SUB, DEF, FUNCTION, TYPE, CLASS, WITH
+    if( tp->t == END )
+    {
+        // from 15.3.96, special treatment for END, at Peek() the current
+        // time is lost, so memorize everything and restore after
+        sal_Int32 nOldLine = nLine;
+        sal_Int32 nOldCol  = nCol;
+        sal_Int32 nOldCol1 = nCol1;
+        sal_Int32 nOldCol2 = nCol2;
+        OUString aOldSym = aSym;
+        SaveLine();             // save pLine in the scanner
+
+        eCurTok = Peek();
+        switch( eCurTok )
+        {
+        case IF:       Next(); eCurTok = ENDIF; break;
+        case SELECT:   Next(); eCurTok = ENDSELECT; break;
+        case SUB:      Next(); eCurTok = ENDSUB; break;
+        case FUNCTION: Next(); eCurTok = ENDFUNC; break;
+        case PROPERTY: Next(); eCurTok = ENDPROPERTY; break;
+        case TYPE:     Next(); eCurTok = ENDTYPE; break;
+        case ENUM:     Next(); eCurTok = ENDENUM; break;
+        case WITH:     Next(); eCurTok = ENDWITH; break;
+        default :      eCurTok = END; break;
+        }
+        nCol1 = nOldCol1;
+        if( eCurTok == END )
+        {
+            // reset everything so that token is read completely newly after END
+            ePush = NIL;
+            nLine = nOldLine;
+            nCol  = nOldCol;
+            nCol2 = nOldCol2;
+            aSym = aOldSym;
+            RestoreLine();
+        }
+        return eCurTok;
+    }
+    // are data types keywords?
+    // there is ERROR(), DATA(), STRING() etc.
+    eCurTok = tp->t;
+    // AS: data types are keywords
+    if( tp->t == AS )
+    {
+        bAs = true;
+    }
+    else
+    {
+        if( bAs )
+        {
+            bAs = false;
+        }
+        else if( eCurTok >= DATATYPE1 && eCurTok <= DATATYPE2 && (bErrorIsSymbol || eCurTok != ERROR_) )
+        {
+            eCurTok = SYMBOL;
+        }
+    }
+
+    // CLASSMODULE, PROPERTY, GET, ENUM token only visible in compatible mode
+    SbiToken eTok = tp->t;
+    if( bCompatible )
+    {
+        // #129904 Suppress system
+        if( eTok == STOP && aSym.equalsIgnoreAsciiCase("system") )
+        {
+            eCurTok = SYMBOL;
+        }
+        if( eTok == GET && bStartOfLine )
+        {
+            eCurTok = SYMBOL;
+        }
+    }
+    else
+    {
+        if( eTok == CLASSMODULE ||
+            eTok == IMPLEMENTS ||
+            eTok == PARAMARRAY ||
+            eTok == ENUM ||
+            eTok == PROPERTY ||
+            eTok == GET ||
+            eTok == TYPEOF )
+        {
+            eCurTok = SYMBOL;
+        }
+    }
+
+    bEos = IsEoln( eCurTok );
+    return eCurTok;
+}
+
+bool SbiTokenizer::MayBeLabel( bool bNeedsColon )
+{
+    static TokenLabelInfo gaStaticTokenLabelInfo;
+
+    if( eCurTok == SYMBOL || gaStaticTokenLabelInfo.canTokenBeLabel( eCurTok ) )
+    {
+        return !bNeedsColon || DoesColonFollow();
+    }
+    else
+    {
+        return ( eCurTok == NUMBER
+                  && eScanType == SbxINTEGER
+                  && nVal >= 0 );
+    }
+}
+
+
+OUString SbiTokenizer::GetKeywordCase( const OUString& sKeyword )
+{
+    for( auto& rTok : aTokTable_Basic )
+    {
+        if( sKeyword.equalsIgnoreAsciiCaseAscii(rTok.s) )
+            return OStringToOUString(rTok.s, RTL_TEXTENCODING_ASCII_US);
+    }
+    return OUString();
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-07 09:06:44 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-07 09:06:44 +0000
commit	ed5640d8b587fbcfed7dd7967f3de04b37a76f26 (patch)
tree	7a5f7c6c9d02226d7471cb3cc8fbbf631b415303 /basic/source/comp/token.cxx
parent	Initial commit. (diff)
download	libreoffice-ed5640d8b587fbcfed7dd7967f3de04b37a76f26.tar.xz libreoffice-ed5640d8b587fbcfed7dd7967f3de04b37a76f26.zip