diff options
Diffstat (limited to 'basic/source/comp/scanner.cxx')
-rw-r--r-- | basic/source/comp/scanner.cxx | 717 |
1 files changed, 717 insertions, 0 deletions
diff --git a/basic/source/comp/scanner.cxx b/basic/source/comp/scanner.cxx new file mode 100644 index 0000000000..45b65a29b1 --- /dev/null +++ b/basic/source/comp/scanner.cxx @@ -0,0 +1,717 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <basiccharclass.hxx> +#include <scanner.hxx> +#include <sbintern.hxx> +#include <runtime.hxx> + +#include <basic/sberrors.hxx> +#include <i18nlangtag/lang.h> +#include <svl/numformat.hxx> +#include <svl/zforlist.hxx> +#include <rtl/character.hxx> +#include <o3tl/string_view.hxx> +#include <utility> + +SbiScanner::SbiScanner(OUString _aBuf, StarBASIC* p) + : aBuf(std::move(_aBuf)) + , nLineIdx(-1) + , nSaveLineIdx(-1) + , pBasic(p) + , eScanType(SbxVARIANT) + , nVal(0) + , nSavedCol1(0) + , nCol(0) + , nErrors(0) + , nColLock(0) + , nBufPos(0) + , nLine(0) + , nCol1(0) + , nCol2(0) + , bSymbol(false) + , bNumber(false) + , bSpaces(false) + , bAbort(false) + , bHash(true) + , bError(false) + , bCompatible(false) + , bVBASupportOn(false) + , bPrevLineExtentsComment(false) + , bClosingUnderscore(false) + , bLineEndsWithWhitespace(false) + , bInStatement(false) +{ +} + +void SbiScanner::LockColumn() +{ + if( !nColLock++ ) + nSavedCol1 = nCol1; +} + +void SbiScanner::UnlockColumn() +{ + if( nColLock ) + nColLock--; +} + +void SbiScanner::GenError( ErrCode code ) +{ + if( GetSbData()->bBlockCompilerError ) + { + bAbort = true; + return; + } + if( !bError ) + { + bool bRes = true; + // report only one error per statement + bError = true; + if( pBasic ) + { + // in case of EXPECTED or UNEXPECTED it always refers + // to the last token, so take the Col1 over + sal_Int32 nc = nColLock ? nSavedCol1 : nCol1; + if ( code.anyOf( + ERRCODE_BASIC_EXPECTED, + ERRCODE_BASIC_UNEXPECTED, + ERRCODE_BASIC_SYMBOL_EXPECTED, + ERRCODE_BASIC_LABEL_EXPECTED) ) + { + nc = nCol1; + if( nc > nCol2 ) nCol2 = nc; + } + bRes = pBasic->CError( code, aError, nLine, nc, nCol2 ); + } + bAbort = bAbort || !bRes || ( code == ERRCODE_BASIC_NO_MEMORY || code == ERRCODE_BASIC_PROG_TOO_LARGE ); + } + nErrors++; +} + + +// used by SbiTokenizer::MayBeLabel() to detect a label +bool SbiScanner::DoesColonFollow() +{ + if(nCol < aLine.getLength() && aLine[nCol] == ':') + { + ++nLineIdx; ++nCol; + return true; + } + else + return false; +} + +// test for legal suffix +static SbxDataType GetSuffixType( sal_Unicode c ) +{ + switch (c) + { + case '%': + return SbxINTEGER; + case '&': + return SbxLONG; + case '!': + return SbxSINGLE; + case '#': + return SbxDOUBLE; + case '@': + return SbxCURRENCY; + case '$': + return SbxSTRING; + default: + return SbxVARIANT; + } +} + +// reading the next symbol into the variables aSym, nVal and eType +// return value is sal_False at EOF or errors +#define BUF_SIZE 80 + +void SbiScanner::scanAlphanumeric() +{ + sal_Int32 n = nCol; + while(nCol < aLine.getLength() && (BasicCharClass::isAlphaNumeric(aLine[nCol], bCompatible) || aLine[nCol] == '_')) + { + ++nLineIdx; + ++nCol; + } + aSym = aLine.copy(n, nCol - n); +} + +void SbiScanner::scanGoto() +{ + sal_Int32 n = nCol; + while(n < aLine.getLength() && BasicCharClass::isWhitespace(aLine[n])) + ++n; + + if(n + 1 < aLine.getLength()) + { + std::u16string_view aTemp = aLine.subView(n, 2); + if(o3tl::equalsIgnoreAsciiCase(aTemp, u"to")) + { + aSym = "goto"; + nLineIdx += n + 2 - nCol; + nCol = n + 2; + } + } +} + +bool SbiScanner::readLine() +{ + if(nBufPos >= aBuf.getLength()) + return false; + + sal_Int32 n = nBufPos; + sal_Int32 nLen = aBuf.getLength(); + + while(n < nLen && aBuf[n] != '\r' && aBuf[n] != '\n') + ++n; + + // Trim trailing whitespace + sal_Int32 nEnd = n; + while(nBufPos < nEnd && BasicCharClass::isWhitespace(aBuf[nEnd - 1])) + --nEnd; + + // tdf#149402 - check if line ends with a whitespace + bLineEndsWithWhitespace = (n > nEnd); + aLine = aBuf.copy(nBufPos, nEnd - nBufPos); + + // Fast-forward past the line ending + if(n + 1 < nLen && aBuf[n] == '\r' && aBuf[n + 1] == '\n') + n += 2; + else if(n < nLen) + ++n; + + nBufPos = n; + nLineIdx = 0; + + ++nLine; + nCol = nCol1 = nCol2 = 0; + nColLock = 0; + + return true; +} + +bool SbiScanner::NextSym() +{ + // memorize for the EOLN-case + sal_Int32 nOldLine = nLine; + sal_Int32 nOldCol1 = nCol1; + sal_Int32 nOldCol2 = nCol2; + sal_Unicode buf[ BUF_SIZE ], *p = buf; + + eScanType = SbxVARIANT; + aSym.clear(); + bHash = bSymbol = bNumber = bSpaces = false; + bool bCompilerDirective = false; + + // read in line? + if (nLineIdx == -1) + { + if(!readLine()) + return false; + + nOldLine = nLine; + nOldCol1 = nOldCol2 = 0; + } + + const sal_Int32 nLineIdxScanStart = nLineIdx; + + if(nCol < aLine.getLength() && BasicCharClass::isWhitespace(aLine[nCol])) + { + bSpaces = true; + while(nCol < aLine.getLength() && BasicCharClass::isWhitespace(aLine[nCol])) + { + ++nLineIdx; + ++nCol; + } + } + + nCol1 = nCol; + + // only blank line? + if(nCol >= aLine.getLength()) + goto eoln; + + if( bPrevLineExtentsComment ) + goto PrevLineCommentLbl; + + if(nCol < aLine.getLength() && aLine[nCol] == '#') + { + sal_Int32 nLineTempIdx = nLineIdx; + do + { + nLineTempIdx++; + } while (nLineTempIdx < aLine.getLength() && !BasicCharClass::isWhitespace(aLine[nLineTempIdx]) + && aLine[nLineTempIdx] != '#' && aLine[nLineTempIdx] != ','); + // leave it if it is a date literal - it will be handled later + if (nLineTempIdx >= aLine.getLength() || aLine[nLineTempIdx] != '#') + { + ++nLineIdx; + ++nCol; + //ignore compiler directives (# is first non-space character) + if (nOldCol2 == 0) + bCompilerDirective = true; + else + bHash = true; + } + } + + // copy character if symbol + if(nCol < aLine.getLength() && (BasicCharClass::isAlpha(aLine[nCol], bCompatible) || aLine[nCol] == '_')) + { + // if there's nothing behind '_' , it's the end of a line! + if(nCol + 1 == aLine.getLength() && aLine[nCol] == '_') + { + // Note that nCol is not incremented here... + ++nLineIdx; + goto eoln; + } + + bSymbol = true; + + scanAlphanumeric(); + + // Special handling for "go to" + if(nCol < aLine.getLength() && bCompatible && aSym.equalsIgnoreAsciiCase("go")) + scanGoto(); + + // tdf#125637 - check for closing underscore + if (nCol == aLine.getLength() && aLine[nCol - 1] == '_') + { + bClosingUnderscore = true; + } + // type recognition? + // don't test the exclamation mark + // if there's a symbol behind it + else if((nCol >= aLine.getLength() || aLine[nCol] != '!') || + (nCol + 1 >= aLine.getLength() || !BasicCharClass::isAlpha(aLine[nCol + 1], bCompatible))) + { + if(nCol < aLine.getLength()) + { + SbxDataType t(GetSuffixType(aLine[nCol])); + if( t != SbxVARIANT ) + { + eScanType = t; + ++nLineIdx; + ++nCol; + } + } + } + } + + // read in and convert if number + else if((nCol < aLine.getLength() && rtl::isAsciiDigit(aLine[nCol])) || + (nCol + 1 < aLine.getLength() && aLine[nCol] == '.' && rtl::isAsciiDigit(aLine[nCol + 1]))) + { + short exp = 0; + short dec = 0; + eScanType = SbxDOUBLE; + bool bScanError = false; + bool bBufOverflow = false; + // All this because of 'D' or 'd' floating point type, sigh... + while(!bScanError && nCol < aLine.getLength() && strchr("0123456789.DEde", aLine[nCol])) + { + // from 4.1.1996: buffer full? -> go on scanning empty + if( (p-buf) == (BUF_SIZE-1) ) + { + bBufOverflow = true; + ++nLineIdx; + ++nCol; + continue; + } + // point or exponent? + if(aLine[nCol] == '.') + { + if( ++dec > 1 ) + bScanError = true; + else + *p++ = '.'; + } + else if(strchr("DdEe", aLine[nCol])) + { + if (++exp > 1) + bScanError = true; + else + { + *p++ = 'E'; + if (nCol + 1 < aLine.getLength() && (aLine[nCol+1] == '+' || aLine[nCol+1] == '-')) + { + ++nLineIdx; + ++nCol; + if( (p-buf) == (BUF_SIZE-1) ) + { + bBufOverflow = true; + continue; + } + *p++ = aLine[nCol]; + } + } + } + else + { + *p++ = aLine[nCol]; + } + ++nLineIdx; + ++nCol; + } + *p = 0; + aSym = p; bNumber = true; + + // For bad characters, scan and parse errors generate only one error. + ErrCode nError = ERRCODE_NONE; + if (bScanError) + { + --nLineIdx; + --nCol; + aError = OUString( aLine[nCol]); + nError = ERRCODE_BASIC_BAD_CHAR_IN_NUMBER; + } + + rtl_math_ConversionStatus eStatus = rtl_math_ConversionStatus_Ok; + const sal_Unicode* pParseEnd = buf; + nVal = rtl_math_uStringToDouble( buf, buf+(p-buf), '.', ',', &eStatus, &pParseEnd ); + if (pParseEnd != buf+(p-buf)) + { + // e.g. "12e" or "12e+", or with bScanError "12d"+"E". + sal_Int32 nChars = buf+(p-buf) - pParseEnd; + nLineIdx -= nChars; + nCol -= nChars; + // For bScanError, nLineIdx and nCol were already decremented, just + // add that character to the parse end. + if (bScanError) + ++nChars; + // Copy error position from original string, not the buffer + // replacement where "12dE" => "12EE". + aError = aLine.copy( nCol, nChars); + nError = ERRCODE_BASIC_BAD_CHAR_IN_NUMBER; + } + else if (eStatus != rtl_math_ConversionStatus_Ok) + { + // Keep the scan error and character at position, if any. + if (!nError) + nError = ERRCODE_BASIC_MATH_OVERFLOW; + } + + if (nError) + GenError( nError ); + + if( !dec && !exp ) + { + if( nVal >= SbxMININT && nVal <= SbxMAXINT ) + eScanType = SbxINTEGER; + else if( nVal >= SbxMINLNG && nVal <= SbxMAXLNG ) + eScanType = SbxLONG; + } + + if( bBufOverflow ) + GenError( ERRCODE_BASIC_MATH_OVERFLOW ); + + // type recognition? + if( nCol < aLine.getLength() ) + { + SbxDataType t(GetSuffixType(aLine[nCol])); + if( t != SbxVARIANT ) + { + eScanType = t; + ++nLineIdx; + ++nCol; + } + // tdf#130476 - don't allow String trailing data type character with numbers + if ( t == SbxSTRING ) + { + GenError( ERRCODE_BASIC_SYNTAX ); + } + } + } + + // Hex/octal number? Read in and convert: + else if(aLine.getLength() - nCol > 1 && aLine[nCol] == '&') + { + ++nLineIdx; ++nCol; + sal_Unicode base = 16; + sal_Unicode xch = aLine[nCol]; + ++nLineIdx; ++nCol; + switch( rtl::toAsciiUpperCase( xch ) ) + { + case 'O': + base = 8; + break; + case 'H': + break; + default : + // treated as an operator + --nLineIdx; --nCol; nCol1 = nCol-1; + aSym = "&"; + return true; + } + bNumber = true; + // Hex literals are signed Integers ( as defined by basic + // e.g. -2,147,483,648 through 2,147,483,647 (signed) + sal_uInt64 lu = 0; + bool bOverflow = false; + while(nCol < aLine.getLength() && BasicCharClass::isAlphaNumeric(aLine[nCol], false)) + { + sal_Unicode ch = rtl::toAsciiUpperCase(aLine[nCol]); + ++nLineIdx; ++nCol; + if( ((base == 16 ) && rtl::isAsciiHexDigit( ch ) ) || + ((base == 8) && rtl::isAsciiOctalDigit( ch ))) + { + int i = ch - '0'; + if( i > 9 ) i -= 7; + lu = ( lu * base ) + i; + if( lu > SAL_MAX_UINT32 ) + { + bOverflow = true; + } + } + else + { + aError = OUString(ch); + GenError( ERRCODE_BASIC_BAD_CHAR_IN_NUMBER ); + } + } + + // tdf#130476 - take into account trailing data type characters + if( nCol < aLine.getLength() ) + { + SbxDataType t(GetSuffixType(aLine[nCol])); + if( t != SbxVARIANT ) + { + eScanType = t; + ++nLineIdx; + ++nCol; + } + // tdf#130476 - don't allow String trailing data type character with numbers + if ( t == SbxSTRING ) + { + GenError( ERRCODE_BASIC_SYNTAX ); + } + } + + // tdf#130476 - take into account trailing data type characters + switch ( eScanType ) + { + case SbxINTEGER: + nVal = static_cast<double>( static_cast<sal_Int16>(lu) ); + if ( lu > SbxMAXUINT ) + { + bOverflow = true; + } + break; + case SbxLONG: nVal = static_cast<double>( static_cast<sal_Int32>(lu) ); break; + case SbxVARIANT: + { + // tdf#62326 - If the value of the hex string without explicit type character lies within + // the range of 0x8000 (SbxMAXINT + 1) and 0xFFFF (SbxMAXUINT) inclusive, cast the value + // to 16 bit in order to get signed integers, e.g., SbxMININT through SbxMAXINT + sal_Int32 ls = (lu > SbxMAXINT && lu <= SbxMAXUINT) ? static_cast<sal_Int16>(lu) : static_cast<sal_Int32>(lu); + eScanType = ( ls >= SbxMININT && ls <= SbxMAXINT ) ? SbxINTEGER : SbxLONG; + nVal = static_cast<double>(ls); + break; + } + default: + nVal = static_cast<double>(lu); + break; + } + if( bOverflow ) + GenError( ERRCODE_BASIC_MATH_OVERFLOW ); + } + + // Strings: + else if (nLineIdx < aLine.getLength() && (aLine[nLineIdx] == '"' || aLine[nLineIdx] == '[')) + { + sal_Unicode cSep = aLine[nLineIdx]; + if( cSep == '[' ) + { + bSymbol = true; + cSep = ']'; + } + sal_Int32 n = nCol + 1; + while (nLineIdx < aLine.getLength()) + { + do + { + nLineIdx++; + nCol++; + } + while (nLineIdx < aLine.getLength() && (aLine[nLineIdx] != cSep)); + if (nLineIdx < aLine.getLength() && aLine[nLineIdx] == cSep) + { + nLineIdx++; nCol++; + if (nLineIdx >= aLine.getLength() || aLine[nLineIdx] != cSep || cSep == ']') + { + // If VBA Interop then doesn't eat the [] chars + if ( cSep == ']' && bVBASupportOn ) + aSym = aLine.copy( n - 1, nCol - n + 1); + else + aSym = aLine.copy( n, nCol - n - 1 ); + // get out duplicate string delimiters + OUStringBuffer aSymBuf(aSym.getLength()); + for ( sal_Int32 i = 0, len = aSym.getLength(); i < len; ++i ) + { + aSymBuf.append( aSym[i] ); + if ( aSym[i] == cSep && ( i+1 < len ) && aSym[i+1] == cSep ) + ++i; + } + aSym = aSymBuf.makeStringAndClear(); + if( cSep != ']' ) + eScanType = SbxSTRING; + break; + } + } + else + { + aError = OUString(cSep); + GenError( ERRCODE_BASIC_EXPECTED ); + } + } + } + + // Date: + else if (nLineIdx < aLine.getLength() && aLine[nLineIdx] == '#') + { + sal_Int32 n = nCol + 1; + do + { + nLineIdx++; + nCol++; + } + while (nLineIdx < aLine.getLength() && (aLine[nLineIdx] != '#')); + if (nLineIdx < aLine.getLength() && aLine[nLineIdx] == '#') + { + nLineIdx++; nCol++; + aSym = aLine.copy( n, nCol - n - 1 ); + + // parse date literal + std::shared_ptr<SvNumberFormatter> pFormatter; + if (GetSbData()->pInst) + { + pFormatter = GetSbData()->pInst->GetNumberFormatter(); + } + else + { + sal_uInt32 nDummy; + pFormatter = SbiInstance::PrepareNumberFormatter( nDummy, nDummy, nDummy ); + } + sal_uInt32 nIndex = pFormatter->GetStandardIndex( LANGUAGE_ENGLISH_US); + bool bSuccess = pFormatter->IsNumberFormat(aSym, nIndex, nVal); + if( bSuccess ) + { + SvNumFormatType nType_ = pFormatter->GetType(nIndex); + if( !(nType_ & SvNumFormatType::DATE) ) + bSuccess = false; + } + + if (!bSuccess) + GenError( ERRCODE_BASIC_CONVERSION ); + + bNumber = true; + eScanType = SbxDOUBLE; + } + else + { + aError = OUString('#'); + GenError( ERRCODE_BASIC_EXPECTED ); + } + } + // invalid characters: + else if (nLineIdx < aLine.getLength() && aLine[nLineIdx] >= 0x7F) + { + GenError( ERRCODE_BASIC_SYNTAX ); nLineIdx++; nCol++; + } + // other groups: + else + { + sal_Int32 n = 1; + auto nChar = nLineIdx < aLine.getLength() ? aLine[nLineIdx] : 0; + ++nLineIdx; + if (nLineIdx < aLine.getLength()) + { + switch (nChar) + { + case '<': if( aLine[nLineIdx] == '>' || aLine[nLineIdx] == '=' ) n = 2; break; + case '>': if( aLine[nLineIdx] == '=' ) n = 2; break; + case ':': if( aLine[nLineIdx] == '=' ) n = 2; break; + } + } + aSym = aLine.copy(nCol, std::min(n, aLine.getLength() - nCol)); + nLineIdx += n-1; nCol = nCol + n; + } + + nCol2 = nCol-1; + +PrevLineCommentLbl: + + if( bPrevLineExtentsComment || (eScanType != SbxSTRING && + ( bCompilerDirective || + aSym.startsWith("'") || + aSym.equalsIgnoreAsciiCase( "REM" ) ) ) ) + { + bPrevLineExtentsComment = false; + aSym = "REM"; + sal_Int32 nLen = aLine.getLength() - nLineIdx; + // tdf#149402 - don't extend comment if line ends in a whitespace (BasicCharClass::isWhitespace) + if (bCompatible && !bLineEndsWithWhitespace && aLine[nLineIdx + nLen - 1] == '_' + && aLine[nLineIdx + nLen - 2] == ' ') + bPrevLineExtentsComment = true; + nCol2 = nCol2 + nLen; + nLineIdx = -1; + } + + if (nLineIdx == nLineIdxScanStart) + { + GenError( ERRCODE_BASIC_SYMBOL_EXPECTED ); + return false; + } + + return true; + + +eoln: + if (nCol && aLine[--nLineIdx] == '_' && !bClosingUnderscore) + { + nLineIdx = -1; + bool bRes = NextSym(); + if( aSym.startsWith(".") ) + { + // object _ + // .Method + // ^^^ <- spaces is legal in MSO VBA + bSpaces = false; + } + return bRes; + } + else + { + nLineIdx = -1; + nLine = nOldLine; + nCol1 = nOldCol1; + nCol2 = nOldCol2; + aSym = "\n"; + nColLock = 0; + bClosingUnderscore = false; + // tdf#149157 - break multiline continuation in a comment after a new line + bPrevLineExtentsComment = false; + return true; + } +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |