diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-15 05:54:39 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-15 05:54:39 +0000 |
commit | 267c6f2ac71f92999e969232431ba04678e7437e (patch) | |
tree | 358c9467650e1d0a1d7227a21dac2e3d08b622b2 /sw/source/filter/html/parcss1.cxx | |
parent | Initial commit. (diff) | |
download | libreoffice-267c6f2ac71f92999e969232431ba04678e7437e.tar.xz libreoffice-267c6f2ac71f92999e969232431ba04678e7437e.zip |
Adding upstream version 4:24.2.0.upstream/4%24.2.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sw/source/filter/html/parcss1.cxx')
-rw-r--r-- | sw/source/filter/html/parcss1.cxx | 1388 |
1 files changed, 1388 insertions, 0 deletions
diff --git a/sw/source/filter/html/parcss1.cxx b/sw/source/filter/html/parcss1.cxx new file mode 100644 index 0000000000..f3145f1fa5 --- /dev/null +++ b/sw/source/filter/html/parcss1.cxx @@ -0,0 +1,1388 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <o3tl/string_view.hxx> +#include <osl/diagnose.h> +#include <rtl/character.hxx> +#include <rtl/ustrbuf.hxx> +#include <tools/color.hxx> +#include <tools/solar.h> +#include <svtools/htmltokn.h> +#include <comphelper/string.hxx> +#include "parcss1.hxx" + +// Loop-Check: Used to avoid infinite loops, is checked after every +// loop, if there is progress of the input position +#define LOOP_CHECK + +#ifdef LOOP_CHECK + +#define LOOP_CHECK_DECL \ + sal_Int32 nOldInPos = SAL_MAX_INT32; +#define LOOP_CHECK_RESTART \ + nOldInPos = SAL_MAX_INT32; +#define LOOP_CHECK_CHECK( where ) \ + OSL_ENSURE( nOldInPos!=m_nInPos || m_cNextCh==sal_Unicode(EOF), where ); \ + if( nOldInPos==m_nInPos && m_cNextCh!=sal_Unicode(EOF) ) \ + break; \ + else \ + nOldInPos = m_nInPos; + +#else + +#define LOOP_CHECK_DECL +#define LOOP_CHECK_RESTART +#define LOOP_CHECK_CHECK( where ) + +#endif + +const sal_Int32 MAX_LEN = 1024; + +void CSS1Parser::InitRead( const OUString& rIn ) +{ + m_nlLineNr = 0; + m_nlLinePos = 0; + + m_bWhiteSpace = true; // if nothing was read it's like there was WS + m_bEOF = false; + m_eState = CSS1_PAR_WORKING; + m_nValue = 0.; + + m_aIn = rIn; + m_nInPos = 0; + m_cNextCh = GetNextChar(); + m_nToken = GetNextToken(); +} + +sal_Unicode CSS1Parser::GetNextChar() +{ + if( m_nInPos >= m_aIn.getLength() ) + { + m_bEOF = true; + return sal_Unicode(EOF); + } + + sal_Unicode c = m_aIn[m_nInPos]; + m_nInPos++; + + if( c == '\n' ) + { + ++m_nlLineNr; + m_nlLinePos = 1; + } + else + ++m_nlLinePos; + + return c; +} + +// This function implements the scanner described in + +// http://www.w3.org/pub/WWW/TR/WD-css1.html +// resp. http://www.w3.org/pub/WWW/TR/WD-css1-960220.html + +// for CSS1. It's a direct implementation of the +// described Lex grammar. + +CSS1Token CSS1Parser::GetNextToken() +{ + CSS1Token nRet = CSS1_NULL; + m_aToken.clear(); + + do { + // remember if white space was read + bool bPrevWhiteSpace = m_bWhiteSpace; + m_bWhiteSpace = false; + + bool bNextCh = true; + switch( m_cNextCh ) + { + case '/': // COMMENT | '/' + { + m_cNextCh = GetNextChar(); + if( '*' == m_cNextCh ) + { + // COMMENT + m_cNextCh = GetNextChar(); + + bool bAsterisk = false; + while( !(bAsterisk && '/'==m_cNextCh) && !IsEOF() ) + { + bAsterisk = ('*'==m_cNextCh); + m_cNextCh = GetNextChar(); + } + } + else + { + // '/' + bNextCh = false; + nRet = CSS1_SLASH; + } + } + break; + + case '@': // '@import' | '@XXX' + { + m_cNextCh = GetNextChar(); + if (rtl::isAsciiAlpha(m_cNextCh)) + { + // scan the next identifier + OUStringBuffer sTmpBuffer(32); + do { + sTmpBuffer.append( m_cNextCh ); + m_cNextCh = GetNextChar(); + } while( (rtl::isAsciiAlphanumeric(m_cNextCh) || + '-' == m_cNextCh) && !IsEOF() ); + + m_aToken += sTmpBuffer; + + // check if we know it + switch( m_aToken[0] ) + { + case 'i': + case 'I': + if( m_aToken.equalsIgnoreAsciiCase( "import" ) ) + nRet = CSS1_IMPORT_SYM; + break; + case 'p': + case 'P': + if( m_aToken.equalsIgnoreAsciiCase( "page" ) ) + nRet = CSS1_PAGE_SYM; + break; + } + + // error handling: ignore '@indent' and the rest until + // semicolon at end of the next block + if( CSS1_NULL==nRet ) + { + m_aToken.clear(); + int nBlockLvl = 0; + sal_Unicode cQuoteCh = 0; + bool bDone = false, bEscape = false; + while( !bDone && !IsEOF() ) + { + bool bOldEscape = bEscape; + bEscape = false; + switch( m_cNextCh ) + { + case '{': + if( !cQuoteCh && !bOldEscape ) + nBlockLvl++; + break; + case ';': + if( !cQuoteCh && !bOldEscape ) + bDone = nBlockLvl==0; + break; + case '}': + if( !cQuoteCh && !bOldEscape ) + bDone = --nBlockLvl==0; + break; + case '\"': + case '\'': + if( !bOldEscape ) + { + if( cQuoteCh ) + { + if( cQuoteCh == m_cNextCh ) + cQuoteCh = 0; + } + else + { + cQuoteCh = m_cNextCh; + } + } + break; + case '\\': + if( !bOldEscape ) + bEscape = true; + break; + } + m_cNextCh = GetNextChar(); + } + } + + bNextCh = false; + } + } + break; + + case '!': // '!' 'legal' | '!' 'important' | syntax error + { + // ignore white space + m_cNextCh = GetNextChar(); + while( ( ' ' == m_cNextCh || + (m_cNextCh >= 0x09 && m_cNextCh <= 0x0d) ) && !IsEOF() ) + { + m_bWhiteSpace = true; + m_cNextCh = GetNextChar(); + } + + if( 'i'==m_cNextCh || 'I'==m_cNextCh) + { + // scan next identifier + OUStringBuffer sTmpBuffer(32); + do { + sTmpBuffer.append( m_cNextCh ); + m_cNextCh = GetNextChar(); + } while( (rtl::isAsciiAlphanumeric(m_cNextCh) || + '-' == m_cNextCh) && !IsEOF() ); + + m_aToken += sTmpBuffer; + + if( ( 'i'==m_aToken[0] || 'I'==m_aToken[0] ) && + m_aToken.equalsIgnoreAsciiCase( "important" ) ) + { + // '!' 'important' + nRet = CSS1_IMPORTANT_SYM; + } + else + { + // error handling: ignore '!', not IDENT + nRet = CSS1_IDENT; + } + + m_bWhiteSpace = false; + bNextCh = false; + } + else + { + // error handling: ignore '!' + bNextCh = false; + } + } + break; + + case '\"': + case '\'': // STRING + { + // \... isn't possible yet!!! + sal_Unicode cQuoteChar = m_cNextCh; + m_cNextCh = GetNextChar(); + + OUStringBuffer sTmpBuffer( MAX_LEN ); + do { + sTmpBuffer.append( m_cNextCh ); + m_cNextCh = GetNextChar(); + } while( cQuoteChar != m_cNextCh && !IsEOF() ); + + m_aToken += sTmpBuffer; + + nRet = CSS1_STRING; + } + break; + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': // NUMBER | PERCENTAGE | LENGTH + { + // save current position + std::size_t nInPosSave = m_nInPos; + sal_Unicode cNextChSave = m_cNextCh; + sal_uInt32 nlLineNrSave = m_nlLineNr; + sal_uInt32 nlLinePosSave = m_nlLinePos; + bool bEOFSave = m_bEOF; + + // first try to parse a hex digit + OUStringBuffer sTmpBuffer( 16 ); + do { + sTmpBuffer.append( m_cNextCh ); + m_cNextCh = GetNextChar(); + } while( sTmpBuffer.getLength() < 7 && + ( ('0'<=m_cNextCh && '9'>=m_cNextCh) || + ('A'<=m_cNextCh && 'F'>=m_cNextCh) || + ('a'<=m_cNextCh && 'f'>=m_cNextCh) ) && + !IsEOF() ); + + if( sTmpBuffer.getLength()==6 ) + { + // we found a color in hex + m_aToken += sTmpBuffer; + nRet = CSS1_HEXCOLOR; + bNextCh = false; + + break; + } + + // otherwise we try a number + m_nInPos = nInPosSave; + m_cNextCh = cNextChSave; + m_nlLineNr = nlLineNrSave; + m_nlLinePos = nlLinePosSave; + m_bEOF = bEOFSave; + + // first parse the number + sTmpBuffer.setLength( 0 ); + do { + sTmpBuffer.append( m_cNextCh ); + m_cNextCh = GetNextChar(); + } while( (('0'<=m_cNextCh && '9'>=m_cNextCh) || '.'==m_cNextCh) && + !IsEOF() ); + + m_aToken += sTmpBuffer; + m_nValue = m_aToken.toDouble(); + + // ignore white space + while( ( ' ' == m_cNextCh || + (m_cNextCh >= 0x09 && m_cNextCh <= 0x0d) ) && !IsEOF() ) + { + m_bWhiteSpace = true; + m_cNextCh = GetNextChar(); + } + + // check now, of there is a unit + switch( m_cNextCh ) + { + case '%': // PERCENTAGE + m_bWhiteSpace = false; + nRet = CSS1_PERCENTAGE; + break; + + case 'c': + case 'C': // LENGTH cm | LENGTH IDENT + case 'e': + case 'E': // LENGTH (em | ex) | LENGTH IDENT + case 'i': + case 'I': // LENGTH inch | LENGTH IDENT + case 'p': + case 'P': // LENGTH (pt | px | pc) | LENGTH IDENT + case 'm': + case 'M': // LENGTH mm | LENGTH IDENT + { + // save current position + sal_Int32 nInPosOld = m_nInPos; + sal_Unicode cNextChOld = m_cNextCh; + sal_uInt32 nlLineNrOld = m_nlLineNr; + sal_uInt32 nlLinePosOld = m_nlLinePos; + bool bEOFOld = m_bEOF; + + // parse the next identifier + OUString aIdent; + OUStringBuffer sTmpBuffer2(64); + do { + sTmpBuffer2.append( m_cNextCh ); + m_cNextCh = GetNextChar(); + } while( (rtl::isAsciiAlphanumeric(m_cNextCh) || + '-' == m_cNextCh) && !IsEOF() ); + + aIdent += sTmpBuffer2; + + // Is it a unit? + const char *pCmp1 = nullptr, *pCmp2 = nullptr, *pCmp3 = nullptr; + double nScale1 = 1., nScale2 = 1.; + CSS1Token nToken1 = CSS1_LENGTH, + nToken2 = CSS1_LENGTH, + nToken3 = CSS1_LENGTH; + switch( aIdent[0] ) + { + case 'c': + case 'C': + pCmp1 = "cm"; + nScale1 = (72.*20.)/2.54; // twip + break; + case 'e': + case 'E': + pCmp1 = "em"; + nToken1 = CSS1_EMS; + + pCmp2 = "ex"; + nToken2 = CSS1_EMX; + break; + case 'i': + case 'I': + pCmp1 = "in"; + nScale1 = 72.*20.; // twip + break; + case 'm': + case 'M': + pCmp1 = "mm"; + nScale1 = (72.*20.)/25.4; // twip + break; + case 'p': + case 'P': + pCmp1 = "pt"; + nScale1 = 20.; // twip + + pCmp2 = "pc"; + nScale2 = 12.*20.; // twip + + pCmp3 = "px"; + nToken3 = CSS1_PIXLENGTH; + break; + } + + double nScale = 0.0; + OSL_ENSURE( pCmp1, "Where does the first digit come from?" ); + if( aIdent.equalsIgnoreAsciiCaseAscii( pCmp1 ) ) + { + nScale = nScale1; + nRet = nToken1; + } + else if( pCmp2 && + aIdent.equalsIgnoreAsciiCaseAscii( pCmp2 ) ) + { + nScale = nScale2; + nRet = nToken2; + } + else if( pCmp3 && + aIdent.equalsIgnoreAsciiCaseAscii( pCmp3 ) ) + { + nScale = 1.; // nScale3 + nRet = nToken3; + } + else + { + nRet = CSS1_NUMBER; + } + + if( CSS1_LENGTH==nRet && nScale!=1.0 ) + m_nValue *= nScale; + + if( nRet == CSS1_NUMBER ) + { + m_nInPos = nInPosOld; + m_cNextCh = cNextChOld; + m_nlLineNr = nlLineNrOld; + m_nlLinePos = nlLinePosOld; + m_bEOF = bEOFOld; + } + else + { + m_bWhiteSpace = false; + } + bNextCh = false; + } + break; + default: // NUMBER IDENT + bNextCh = false; + nRet = CSS1_NUMBER; + break; + } + } + break; + + case ':': // ':' + // catch link/visited/active !!! + nRet = CSS1_COLON; + break; + + case '.': // DOT_W_WS | DOT_WO_WS + nRet = bPrevWhiteSpace ? CSS1_DOT_W_WS : CSS1_DOT_WO_WS; + break; + + case '+': // '+' + nRet = CSS1_PLUS; + break; + + case '-': // '-' + nRet = CSS1_MINUS; + break; + + case '{': // '{' + nRet = CSS1_OBRACE; + break; + + case '}': // '}' + nRet = CSS1_CBRACE; + break; + + case ';': // ';' + nRet = CSS1_SEMICOLON; + break; + + case ',': // ',' + nRet = CSS1_COMMA; + break; + + case '#': // '#' + m_cNextCh = GetNextChar(); + if( ('0'<=m_cNextCh && '9'>=m_cNextCh) || + ('a'<=m_cNextCh && 'f'>=m_cNextCh) || + ('A'<=m_cNextCh && 'F'>=m_cNextCh) ) + { + // save current position + sal_Int32 nInPosSave = m_nInPos; + sal_Unicode cNextChSave = m_cNextCh; + sal_uInt32 nlLineNrSave = m_nlLineNr; + sal_uInt32 nlLinePosSave = m_nlLinePos; + bool bEOFSave = m_bEOF; + + // first try to parse a hex digit + OUStringBuffer sTmpBuffer(8); + do { + sTmpBuffer.append( m_cNextCh ); + m_cNextCh = GetNextChar(); + } while( sTmpBuffer.getLength() < 9 && + ( ('0'<=m_cNextCh && '9'>=m_cNextCh) || + ('A'<=m_cNextCh && 'F'>=m_cNextCh) || + ('a'<=m_cNextCh && 'f'>=m_cNextCh) ) && + !IsEOF() ); + + if( sTmpBuffer.getLength()==6 || sTmpBuffer.getLength()==3 ) + { + // we found a color in hex (RGB) + m_aToken += sTmpBuffer; + nRet = CSS1_HEXCOLOR; + bNextCh = false; + + break; + } + + if( sTmpBuffer.getLength()==8 ) + { + // we found a color in hex (RGBA) + // we convert it to RGB assuming white background + sal_uInt32 nColor = sTmpBuffer.makeStringAndClear().toUInt32(16); + sal_uInt32 nRed = (nColor & 0xff000000) >> 24; + sal_uInt32 nGreen = (nColor & 0xff0000) >> 16; + sal_uInt32 nBlue = (nColor & 0xff00) >> 8; + double nAlpha = (nColor & 0xff) / 255.0; + nRed = (1 - nAlpha) * 255 + nAlpha * nRed; + nGreen = (1 - nAlpha) * 255 + nAlpha * nGreen; + nBlue = (1 - nAlpha) * 255 + nAlpha * nBlue; + nColor = (nRed << 16) + (nGreen << 8) + nBlue; + m_aToken += OUString::number(nColor, 16); + nRet = CSS1_HEXCOLOR; + bNextCh = false; + + break; + } + + // otherwise we try a number + m_nInPos = nInPosSave; + m_cNextCh = cNextChSave; + m_nlLineNr = nlLineNrSave; + m_nlLinePos = nlLinePosSave; + m_bEOF = bEOFSave; + } + + nRet = CSS1_HASH; + bNextCh = false; + break; + + case ' ': + case '\t': + case '\r': + case '\n': // White-Space + m_bWhiteSpace = true; + break; + + case sal_Unicode(EOF): + if( IsEOF() ) + { + m_eState = CSS1_PAR_ACCEPTED; + bNextCh = false; + break; + } + [[fallthrough]]; + + default: // IDENT | syntax error + if (rtl::isAsciiAlpha(m_cNextCh)) + { + // IDENT + + bool bHexColor = true; + + // parse the next identifier + OUStringBuffer sTmpBuffer(64); + do { + sTmpBuffer.append( m_cNextCh ); + if( bHexColor ) + { + bHexColor = sTmpBuffer.getLength()<7 && + ( ('0'<=m_cNextCh && '9'>=m_cNextCh) || + ('A'<=m_cNextCh && 'F'>=m_cNextCh) || + ('a'<=m_cNextCh && 'f'>=m_cNextCh) ); + } + m_cNextCh = GetNextChar(); + } while( (rtl::isAsciiAlphanumeric(m_cNextCh) || + '-' == m_cNextCh) && !IsEOF() ); + + m_aToken += sTmpBuffer; + + if( bHexColor && sTmpBuffer.getLength()==6 ) + { + bNextCh = false; + nRet = CSS1_HEXCOLOR; + + break; + } + if( '('==m_cNextCh && + ( (('u'==m_aToken[0] || 'U'==m_aToken[0]) && + m_aToken.equalsIgnoreAsciiCase( "url" )) || + (('r'==m_aToken[0] || 'R'==m_aToken[0]) && + (m_aToken.equalsIgnoreAsciiCase( "rgb" ) || m_aToken.equalsIgnoreAsciiCase( "rgba" ) ) + ) ) ) + { + int nNestCnt = 0; + OUStringBuffer sTmpBuffer2(64); + do { + sTmpBuffer2.append( m_cNextCh ); + switch( m_cNextCh ) + { + case '(': nNestCnt++; break; + case ')': nNestCnt--; break; + } + m_cNextCh = GetNextChar(); + } while( (nNestCnt>1 || ')'!=m_cNextCh) && !IsEOF() ); + sTmpBuffer2.append( m_cNextCh ); + m_aToken += sTmpBuffer2; + bNextCh = true; + nRet = 'u'==m_aToken[0] || 'U'==m_aToken[0] + ? CSS1_URL + : CSS1_RGB; + } + else + { + bNextCh = false; + nRet = CSS1_IDENT; + } + } + // error handling: ignore digit + break; + } + if( bNextCh ) + m_cNextCh = GetNextChar(); + + } while( CSS1_NULL==nRet && IsParserWorking() ); + + return nRet; +} + +// These functions implement the parser described in + +// http://www.w3.org/pub/WWW/TR/WD-css1.html +// resp. http://www.w3.org/pub/WWW/TR/WD-css1-960220.html + +// for CSS1. It's a direct implementation of the +// described Lex grammar. + +// stylesheet +// : import* rule* + +// import +// : IMPORT_SYM url + +// url +// : STRING + +void CSS1Parser::ParseStyleSheet() +{ + LOOP_CHECK_DECL + + // import* + bool bDone = false; + while( !bDone && IsParserWorking() ) + { + LOOP_CHECK_CHECK( "Infinite loop in ParseStyleSheet()/import *" ) + + switch( m_nToken ) + { + case CSS1_IMPORT_SYM: + // IMPORT_SYM url + // URL are skipped without checks + m_nToken = GetNextToken(); + break; + case CSS1_IDENT: // Look-Aheads + case CSS1_DOT_W_WS: + case CSS1_HASH: + case CSS1_PAGE_SYM: + // rule + bDone = true; + break; + default: + // error handling: ignore + break; + } + + if( !bDone ) + m_nToken = GetNextToken(); + } + + LOOP_CHECK_RESTART + + // rule * + while( IsParserWorking() ) + { + LOOP_CHECK_CHECK( "Infinite loop in ParseStyleSheet()/rule *" ) + + switch( m_nToken ) + { + case CSS1_IDENT: // Look-Aheads + case CSS1_DOT_W_WS: + case CSS1_HASH: + case CSS1_PAGE_SYM: + // rule + ParseRule(); + break; + default: + // error handling: ignore + m_nToken = GetNextToken(); + break; + } + } +} + +// rule +// : selector [ ',' selector ]* +// '{' declaration [ ';' declaration ]* '}' + +void CSS1Parser::ParseRule() +{ + // selector + std::unique_ptr<CSS1Selector> pSelector = ParseSelector(); + if( !pSelector ) + return; + + // process selector + SelectorParsed( std::move(pSelector), true ); + + LOOP_CHECK_DECL + + // [ ',' selector ]* + while( CSS1_COMMA==m_nToken && IsParserWorking() ) + { + LOOP_CHECK_CHECK( "Infinite loop in ParseRule()/selector *" ) + + // ignore ',' + m_nToken = GetNextToken(); + + // selector + pSelector = ParseSelector(); + if( !pSelector ) + return; + + // process selector + SelectorParsed( std::move(pSelector), false ); + } + + // '{' + if( CSS1_OBRACE != m_nToken ) + return; + m_nToken = GetNextToken(); + + // declaration + OUString aProperty; + std::unique_ptr<CSS1Expression> pExpr = ParseDeclaration( aProperty ); + if( !pExpr ) + return; + + // process expression + DeclarationParsed( aProperty, std::move(pExpr) ); + + LOOP_CHECK_RESTART + + // [ ';' declaration ]* + while( CSS1_SEMICOLON==m_nToken && IsParserWorking() ) + { + LOOP_CHECK_CHECK( "Infinite loop in ParseRule()/declaration *" ) + + // ';' + m_nToken = GetNextToken(); + + // declaration + if( CSS1_IDENT == m_nToken ) + { + std::unique_ptr<CSS1Expression> pExp = ParseDeclaration( aProperty ); + if( pExp ) + { + // process expression + DeclarationParsed( aProperty, std::move(pExp)); + } + } + } + + // '}' + if( CSS1_CBRACE == m_nToken ) + m_nToken = GetNextToken(); +} + +// selector +// : simple_selector+ [ ':' pseudo_element ]? + +// simple_selector +// : element_name [ DOT_WO_WS class ]? +// | DOT_W_WS class +// | id_selector + +// element_name +// : IDENT + +// class +// : IDENT + +// id_selector +// : '#' IDENT + +// pseudo_element +// : IDENT + +std::unique_ptr<CSS1Selector> CSS1Parser::ParseSelector() +{ + std::unique_ptr<CSS1Selector> pRoot; + CSS1Selector *pLast = nullptr; + + bool bDone = false; + CSS1Selector *pNew = nullptr; + + LOOP_CHECK_DECL + + // simple_selector+ + while( !bDone && IsParserWorking() ) + { + LOOP_CHECK_CHECK( "Infinite loop in ParseSelector()" ) + + bool bNextToken = true; + + switch( m_nToken ) + { + case CSS1_IDENT: + { + // element_name [ DOT_WO_WS class ]? + + // element_name + OUString aElement = m_aToken; + CSS1SelectorType eType = CSS1_SELTYPE_ELEMENT; + m_nToken = GetNextToken(); + + if( CSS1_DOT_WO_WS == m_nToken ) + { + // DOT_WO_WS + m_nToken = GetNextToken(); + + // class + if( CSS1_IDENT == m_nToken ) + { + aElement += "." + m_aToken; + eType = CSS1_SELTYPE_ELEM_CLASS; + } + else + { + // missing class + return pRoot; + } + } + else + { + // that was a look-ahead + bNextToken = false; + } + pNew = new CSS1Selector( eType, aElement ); + } + break; + case CSS1_DOT_W_WS: + // DOT_W_WS class + + // DOT_W_WS + m_nToken = GetNextToken(); + + if( CSS1_IDENT==m_nToken ) + { + // class + pNew = new CSS1Selector( CSS1_SELTYPE_CLASS, m_aToken ); + } + else + { + // missing class + return pRoot; + } + break; + case CSS1_HASH: + // '#' id_selector + + // '#' + m_nToken = GetNextToken(); + + if( CSS1_IDENT==m_nToken ) + { + // id_selector + pNew = new CSS1Selector( CSS1_SELTYPE_ID, m_aToken ); + } + else + { + // missing id_selector + return pRoot; + } + break; + + case CSS1_PAGE_SYM: + { + // @page + pNew = new CSS1Selector( CSS1_SELTYPE_PAGE, m_aToken ); + } + break; + + default: + // stop because we don't know what's next + bDone = true; + break; + } + + // if created a new selector then save it + if( pNew ) + { + OSL_ENSURE( (pRoot!=nullptr) == (pLast!=nullptr), + "Root-Selector, but no Last" ); + if( pLast ) + pLast->SetNext( pNew ); + else + pRoot.reset(pNew); + + pLast = pNew; + pNew = nullptr; + } + + if( bNextToken && !bDone ) + m_nToken = GetNextToken(); + } + + if( !pRoot ) + { + // missing simple_selector + return pRoot; + } + + // [ ':' pseudo_element ]? + if( CSS1_COLON==m_nToken && IsParserWorking() ) + { + // ':' pseudo element + m_nToken = GetNextToken(); + if( CSS1_IDENT==m_nToken ) + { + if (pLast) + pLast->SetNext( new CSS1Selector(CSS1_SELTYPE_PSEUDO,m_aToken) ); + m_nToken = GetNextToken(); + } + else + { + // missing pseudo_element + return pRoot; + } + } + + return pRoot; +} + +// declaration +// : property ':' expr prio? +// | /* empty */ + +// expression +// : term [ operator term ]* + +// term +// : unary_operator? +// [ NUMBER | STRING | PERCENTAGE | LENGTH | EMS | EXS | IDENT | +// HEXCOLOR | URL | RGB ] + +// operator +// : '/' | ',' | /* empty */ + +// unary_operator +// : '-' | '+' + +// property +// : ident + +// the sign is only used for numeric values (except PERCENTAGE) +// and it's applied on nValue! +std::unique_ptr<CSS1Expression> CSS1Parser::ParseDeclaration( OUString& rProperty ) +{ + std::unique_ptr<CSS1Expression> pRoot; + CSS1Expression *pLast = nullptr; + + // property + if( CSS1_IDENT != m_nToken ) + { + // missing property + return pRoot; + } + rProperty = m_aToken; + + m_nToken = GetNextToken(); + + // ':' + if( CSS1_COLON != m_nToken ) + { + // missing ':' + return pRoot; + } + m_nToken = GetNextToken(); + + // term [operator term]* + // here we're pretty lax regarding the syntax, but this shouldn't + // be a problem + bool bDone = false; + sal_Unicode cSign = 0, cOp = 0; + CSS1Expression *pNew = nullptr; + + LOOP_CHECK_DECL + + while( !bDone && IsParserWorking() ) + { + LOOP_CHECK_CHECK( "Infinite loop in ParseDeclaration()" ) + + switch( m_nToken ) + { + case CSS1_MINUS: + cSign = '-'; + break; + + case CSS1_PLUS: + cSign = '+'; + break; + + case CSS1_NUMBER: + case CSS1_LENGTH: + case CSS1_PIXLENGTH: + case CSS1_EMS: + case CSS1_EMX: + if( '-'==cSign ) + m_nValue = -m_nValue; + [[fallthrough]]; + case CSS1_STRING: + case CSS1_PERCENTAGE: + case CSS1_IDENT: + case CSS1_URL: + case CSS1_RGB: + case CSS1_HEXCOLOR: + pNew = new CSS1Expression( m_nToken, m_aToken, m_nValue, cOp ); + m_nValue = 0; // otherwise this also is applied to next ident + cSign = 0; + cOp = 0; + break; + + case CSS1_SLASH: + cOp = '/'; + cSign = 0; + break; + + case CSS1_COMMA: + cOp = ','; + cSign = 0; + break; + + default: + bDone = true; + break; + } + + // if created a new expression save it + if( pNew ) + { + OSL_ENSURE( (pRoot!=nullptr) == (pLast!=nullptr), + "Root-Selector, but no Last" ); + if( pLast ) + pLast->SetNext( pNew ); + else + pRoot.reset(pNew); + + pLast = pNew; + pNew = nullptr; + } + + if( !bDone ) + m_nToken = GetNextToken(); + } + + if( !pRoot ) + { + // missing term + return pRoot; + } + + // prio? + if( CSS1_IMPORTANT_SYM==m_nToken ) + { + // IMPORTANT_SYM + m_nToken = GetNextToken(); + } + + return pRoot; +} + +CSS1Parser::CSS1Parser() + : m_bWhiteSpace(false) + , m_bEOF(false) + , m_cNextCh(0) + , m_nInPos(0) + , m_nlLineNr(0) + , m_nlLinePos(0) + , m_nValue(0) + , m_eState(CSS1_PAR_ACCEPTED) + , m_nToken(CSS1_NULL) +{ +} + +CSS1Parser::~CSS1Parser() +{ +} + +void CSS1Parser::ParseStyleSheet( const OUString& rIn ) +{ + OUString aTmp( rIn ); + + sal_Unicode c; + while( !aTmp.isEmpty() && + ( ' '==(c=aTmp[0]) || '\t'==c || '\r'==c || '\n'==c ) ) + aTmp = aTmp.copy( 1 ); + + while( !aTmp.isEmpty() && ( ' '==(c=aTmp[aTmp.getLength()-1]) + || '\t'==c || '\r'==c || '\n'==c ) ) + aTmp = aTmp.copy( 0, aTmp.getLength()-1 ); + + // remove SGML comments + if( aTmp.getLength() >= 4 && + aTmp.startsWith( "<!--" ) ) + aTmp = aTmp.copy( 4 ); + + if( aTmp.getLength() >=3 && + aTmp.endsWith("-->") ) + aTmp = aTmp.copy( 0, aTmp.getLength() - 3 ); + + if( aTmp.isEmpty() ) + return; + + InitRead( aTmp ); + + ParseStyleSheet(); +} + +void CSS1Parser::ParseStyleOption( const OUString& rIn ) +{ + if( rIn.isEmpty() ) + return; + + InitRead( rIn ); + + // fdo#41796: skip over spurious semicolons + while (CSS1_SEMICOLON == m_nToken) + { + m_nToken = GetNextToken(); + } + + OUString aProperty; + std::unique_ptr<CSS1Expression> pExpr = ParseDeclaration( aProperty ); + if( !pExpr ) + return; + + // process expression + DeclarationParsed( aProperty, std::move(pExpr) ); + + LOOP_CHECK_DECL + + // [ ';' declaration ]* + while( CSS1_SEMICOLON==m_nToken && IsParserWorking() ) + { + LOOP_CHECK_CHECK( "Infinite loop in ParseStyleOption()" ) + + m_nToken = GetNextToken(); + if( CSS1_IDENT==m_nToken ) + { + std::unique_ptr<CSS1Expression> pExp = ParseDeclaration( aProperty ); + if( pExp ) + { + // process expression + DeclarationParsed( aProperty, std::move(pExp) ); + } + } + } +} + +void CSS1Parser::SelectorParsed( std::unique_ptr<CSS1Selector> /* pSelector */, bool /*bFirst*/ ) +{ +} + +void CSS1Parser::DeclarationParsed( const OUString& /*rProperty*/, + std::unique_ptr<CSS1Expression> /* pExpr */ ) +{ +} + +CSS1Selector::~CSS1Selector() +{ + delete m_pNext; +} + +CSS1Expression::~CSS1Expression() +{ + delete pNext; +} + +void CSS1Expression::GetURL( OUString& rURL ) const +{ + OSL_ENSURE( CSS1_URL==eType, "CSS1-Expression is not URL" ); + + OSL_ENSURE( aValue.startsWithIgnoreAsciiCase( "url" ) && + aValue.getLength() > 5 && + '(' == aValue[3] && + ')' == aValue[aValue.getLength()-1], + "no valid URL(...)" ); + + if( aValue.getLength() <= 5 ) + return; + + rURL = aValue.copy( 4, aValue.getLength() - 5 ); + + // tdf#94088 original stripped only spaces, but there may also be + // double quotes in CSS style URLs, so be prepared to spaces followed + // by a single quote followed by spaces + const sal_Unicode aSpace(' '); + const sal_Unicode aSingleQuote('\''); + + rURL = comphelper::string::strip(rURL, aSpace); + rURL = comphelper::string::strip(rURL, aSingleQuote); + rURL = comphelper::string::strip(rURL, aSpace); +} + +bool CSS1Expression::GetColor( Color &rColor ) const +{ + OSL_ENSURE( CSS1_IDENT==eType || CSS1_RGB==eType || + CSS1_HEXCOLOR==eType || CSS1_STRING==eType, + "CSS1-Expression cannot be colour" ); + + bool bRet = false; + sal_uInt32 nColor = SAL_MAX_UINT32; + + switch( eType ) + { + case CSS1_RGB: + { + // fourth value to 255 means no alpha transparency + // so the right by default value + sal_uInt8 aColors[4] = { 0, 0, 0, 255 }; + + // it can be "rgb" or "rgba" + if (!aValue.startsWithIgnoreAsciiCase( "rgb" ) || aValue.getLength() < 6 || + (aValue[3] != '(' && aValue[4] != '(' ) || aValue[aValue.getLength()-1] != ')') + { + break; + } + + sal_Int32 nPos = aValue.startsWithIgnoreAsciiCase( "rgba" )?5:4; // start after "rgba(" or "rgb(" + char cSep = (aValue.indexOf(',') != -1)?',':' '; + // alpha value can be after a "/" or "," + bool bIsSepAlphaDiv = (aValue.indexOf('/') != -1)?true:false; + for ( int nCol = 0; nCol < 4 && nPos > 0; ++nCol ) + { + const std::u16string_view aNumber = o3tl::getToken(aValue, 0, cSep, nPos); + + sal_Int32 nNumber = o3tl::toInt32(aNumber); + if( nNumber<0 ) + { + nNumber = 0; + } + else if( aNumber.find('%') != std::u16string_view::npos ) + { + if( nNumber > 100 ) + nNumber = 100; + nNumber *= 255; + nNumber /= 100; + } + else if( nNumber > 255 ) + nNumber = 255; + else if( aNumber.find('.') != std::u16string_view::npos ) + { + // in this case aNumber contains something like "0.3" so not an sal_Int32 + nNumber = static_cast<sal_Int32>(255.0*o3tl::toDouble(aNumber)); + } + aColors[nCol] = static_cast<sal_uInt8>(nNumber); + // rgb with alpha and '/' has this form: rgb(255 0 0 / 50%) + if (bIsSepAlphaDiv && nCol == 2) + { + // but there can be some spaces or not before and after the "/", so skip them + while (aValue[nPos] == '/' || aValue[nPos] == ' ') + ++nPos; + } + } + + rColor.SetRed( aColors[0] ); + rColor.SetGreen( aColors[1] ); + rColor.SetBlue( aColors[2] ); + rColor.SetAlpha( aColors[3] ); + + bRet = true; // something different than a colour isn't possible + } + break; + + case CSS1_IDENT: + case CSS1_STRING: + { + OUString aTmp( aValue.toAsciiUpperCase() ); + nColor = GetHTMLColor( aTmp ); + bRet = nColor != SAL_MAX_UINT32; + } + if( bRet || CSS1_STRING != eType || aValue.isEmpty() || + aValue[0] != '#' ) + break; + [[fallthrough]]; + case CSS1_HEXCOLOR: + { + // MS-IE hack: colour can also be a string + sal_Int32 nOffset = CSS1_STRING==eType ? 1 : 0; + bool bDouble = aValue.getLength()-nOffset == 3; + sal_Int32 i = nOffset, nEnd = (bDouble ? 3 : 6) + nOffset; + + nColor = 0; + for( ; i<nEnd; i++ ) + { + sal_Unicode c = (i<aValue.getLength() ? aValue[i] + : '0' ); + if( c >= '0' && c <= '9' ) + c -= 48; + else if( c >= 'A' && c <= 'F' ) + c -= 55; + else if( c >= 'a' && c <= 'f' ) + c -= 87; + else + c = 16; + + nColor *= 16; + if( c<16 ) + nColor += c; + if( bDouble ) + { + nColor *= 16; + if( c<16 ) + nColor += c; + } + } + bRet = true; + } + break; + default: + ; + } + + if( bRet && nColor!=SAL_MAX_UINT32 ) + { + rColor.SetRed( static_cast<sal_uInt8>((nColor & 0x00ff0000UL) >> 16) ); + rColor.SetGreen( static_cast<sal_uInt8>((nColor & 0x0000ff00UL) >> 8) ); + rColor.SetBlue( static_cast<sal_uInt8>(nColor & 0x000000ffUL) ); + } + + return bRet; +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |