From 940b4d1848e8c70ab7642901a68594e8016caffc Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 27 Apr 2024 18:51:28 +0200 Subject: Adding upstream version 1:7.0.4. Signed-off-by: Daniel Baumann --- sax/source/tools/CachedOutputStream.hxx | 118 ++ sax/source/tools/converter.cxx | 2448 +++++++++++++++++++++++++++++++ sax/source/tools/fastattribs.cxx | 281 ++++ sax/source/tools/fastserializer.cxx | 813 ++++++++++ sax/source/tools/fastserializer.hxx | 254 ++++ sax/source/tools/fshelper.cxx | 158 ++ 6 files changed, 4072 insertions(+) create mode 100644 sax/source/tools/CachedOutputStream.hxx create mode 100644 sax/source/tools/converter.cxx create mode 100644 sax/source/tools/fastattribs.cxx create mode 100644 sax/source/tools/fastserializer.cxx create mode 100644 sax/source/tools/fastserializer.hxx create mode 100644 sax/source/tools/fshelper.cxx (limited to 'sax/source/tools') diff --git a/sax/source/tools/CachedOutputStream.hxx b/sax/source/tools/CachedOutputStream.hxx new file mode 100644 index 000000000..c3f442f9d --- /dev/null +++ b/sax/source/tools/CachedOutputStream.hxx @@ -0,0 +1,118 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#ifndef INCLUDED_SAX_SOURCE_TOOLS_CACHEDOUTPUTSTREAM_HXX +#define INCLUDED_SAX_SOURCE_TOOLS_CACHEDOUTPUTSTREAM_HXX + +#include + +#include +#include + +#include +#include + +namespace sax_fastparser { + +class ForMergeBase +{ +public: + virtual ~ForMergeBase() {} + virtual void append( const css::uno::Sequence& rWhat ) = 0; +}; + +class CachedOutputStream +{ + /// When buffer hits this size, it's written to mxOutputStream + static const sal_Int32 mnMaximumSize = 0x10000; + + /// ForMerge structure is used for sorting elements in Writer + std::shared_ptr< ForMergeBase > mpForMerge; + const css::uno::Sequence mpCache; + /// Output stream, usually writing data into files. + css::uno::Reference< css::io::XOutputStream > mxOutputStream; + uno_Sequence *pSeq; + sal_Int32 mnCacheWrittenSize; + bool mbWriteToOutStream; + +public: + CachedOutputStream() : mpCache(mnMaximumSize) + , pSeq(mpCache.get()) + , mnCacheWrittenSize(0) + , mbWriteToOutStream(true) + {} + + const css::uno::Reference< css::io::XOutputStream >& getOutputStream() const + { + return mxOutputStream; + } + + void setOutputStream( const css::uno::Reference< css::io::XOutputStream >& xOutputStream ) + { + mxOutputStream = xOutputStream; + } + + void setOutput( std::shared_ptr< ForMergeBase > pForMerge ) + { + flush(); + mbWriteToOutStream = false; + mpForMerge = pForMerge; + } + + void resetOutputToStream() + { + flush(); + mbWriteToOutStream = true; + mpForMerge.reset(); + } + + /// cache string and if limit is hit, flush + void writeBytes( const sal_Int8* pStr, sal_Int32 nLen ) + { + // Write when the buffer gets big enough + if (mnCacheWrittenSize + nLen > mnMaximumSize) + { + flush(); + + // Writer does some elements sorting, so it can accumulate + // pretty big strings in FastSaxSerializer::ForMerge. + // In that case, just flush data and write immediately. + if (nLen > mnMaximumSize) + { + if (mbWriteToOutStream) + mxOutputStream->writeBytes( css::uno::Sequence(pStr, nLen) ); + else + mpForMerge->append( css::uno::Sequence(pStr, nLen) ); + return; + } + } + + memcpy(pSeq->elements + mnCacheWrittenSize, pStr, nLen); + mnCacheWrittenSize += nLen; + } + + /// immediately write buffer into mxOutputStream and clear + void flush() + { + // resize the Sequence to written size + pSeq->nElements = mnCacheWrittenSize; + if (mbWriteToOutStream) + mxOutputStream->writeBytes( mpCache ); + else + mpForMerge->append( mpCache ); + // and next time write to the beginning + mnCacheWrittenSize = 0; + } +}; + +} + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sax/source/tools/converter.cxx b/sax/source/tools/converter.cxx new file mode 100644 index 000000000..ac6eba928 --- /dev/null +++ b/sax/source/tools/converter.cxx @@ -0,0 +1,2448 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include + +using namespace com::sun::star; +using namespace com::sun::star::uno; +using namespace com::sun::star::util; +using namespace ::com::sun::star::i18n; + + +namespace sax { + +static const char* const gpsMM = "mm"; +static const char* const gpsCM = "cm"; +static const char* const gpsPT = "pt"; +static const char* const gpsINCH = "in"; +static const char* const gpsPC = "pc"; + +const sal_Int8 XML_MAXDIGITSCOUNT_TIME = 14; + +/** convert string to measure using optional min and max values*/ +bool Converter::convertMeasure( sal_Int32& rValue, + const OUString& rString, + sal_Int16 nTargetUnit /* = MeasureUnit::MM_100TH */, + sal_Int32 nMin /* = SAL_MIN_INT32 */, + sal_Int32 nMax /* = SAL_MAX_INT32 */ ) +{ + bool bNeg = false; + double nVal = 0; + + sal_Int32 nPos = 0; + sal_Int32 const nLen = rString.getLength(); + + // skip white space + while( (nPos < nLen) && (rString[nPos] <= ' ') ) + nPos++; + + if( nPos < nLen && '-' == rString[nPos] ) + { + bNeg = true; + nPos++; + } + + // get number + while( nPos < nLen && + '0' <= rString[nPos] && + '9' >= rString[nPos] ) + { + // TODO: check overflow! + nVal *= 10; + nVal += (rString[nPos] - '0'); + nPos++; + } + if( nPos < nLen && '.' == rString[nPos] ) + { + nPos++; + double nDiv = 1.; + + while( nPos < nLen && + '0' <= rString[nPos] && + '9' >= rString[nPos] ) + { + // TODO: check overflow! + nDiv *= 10; + nVal += ( static_cast(rString[nPos] - '0') / nDiv ); + nPos++; + } + } + + // skip white space + while( (nPos < nLen) && (rString[nPos] <= ' ') ) + nPos++; + + if( nPos < nLen ) + { + + if( MeasureUnit::PERCENT == nTargetUnit ) + { + if( '%' != rString[nPos] ) + return false; + } + else if( MeasureUnit::PIXEL == nTargetUnit ) + { + if( nPos + 1 >= nLen || + ('p' != rString[nPos] && + 'P' != rString[nPos])|| + ('x' != rString[nPos+1] && + 'X' != rString[nPos+1]) ) + return false; + } + else + { + OSL_ENSURE( MeasureUnit::TWIP == nTargetUnit || MeasureUnit::POINT == nTargetUnit || + MeasureUnit::MM_100TH == nTargetUnit || MeasureUnit::MM_10TH == nTargetUnit || + MeasureUnit::PIXEL == nTargetUnit, "unit is not supported"); + const char *aCmpsL[3] = { nullptr, nullptr, nullptr }; + const char *aCmpsU[3] = { nullptr, nullptr, nullptr }; + double aScales[3] = { 1., 1., 1. }; + + if( MeasureUnit::TWIP == nTargetUnit ) + { + switch( rString[nPos] ) + { + case u'c': + case u'C': + aCmpsL[0] = "cm"; + aCmpsU[0] = "CM"; + aScales[0] = (72.*20.)/2.54; // twip + break; + case u'i': + case u'I': + aCmpsL[0] = "in"; + aCmpsU[0] = "IN"; + aScales[0] = 72.*20.; // twip + break; + case u'm': + case u'M': + aCmpsL[0] = "mm"; + aCmpsU[0] = "MM"; + aScales[0] = (72.*20.)/25.4; // twip + break; + case u'p': + case u'P': + aCmpsL[0] = "pt"; + aCmpsU[0] = "PT"; + aScales[0] = 20.; // twip + + aCmpsL[1] = "pc"; + aCmpsU[1] = "PC"; + aScales[1] = 12.*20.; // twip + break; + } + } + else if( MeasureUnit::MM_100TH == nTargetUnit || MeasureUnit::MM_10TH == nTargetUnit ) + { + double nScaleFactor = (MeasureUnit::MM_100TH == nTargetUnit) ? 100.0 : 10.0; + switch( rString[nPos] ) + { + case u'c': + case u'C': + aCmpsL[0] = "cm"; + aCmpsU[0] = "CM"; + aScales[0] = 10.0 * nScaleFactor; // mm/100 + break; + case u'i': + case u'I': + aCmpsL[0] = "in"; + aCmpsU[0] = "IN"; + aScales[0] = 1000.*2.54; // mm/100 + break; + case u'm': + case u'M': + aCmpsL[0] = "mm"; + aCmpsU[0] = "MM"; + aScales[0] = 1.0 * nScaleFactor; // mm/100 + break; + case u'p': + case u'P': + aCmpsL[0] = "pt"; + aCmpsU[0] = "PT"; + aScales[0] = (10.0 * nScaleFactor*2.54)/72.; // mm/100 + + aCmpsL[1] = "pc"; + aCmpsU[1] = "PC"; + aScales[1] = (10.0 * nScaleFactor*2.54)/12.; // mm/100 + + aCmpsL[2] = "px"; + aCmpsU[2] = "PX"; + aScales[2] = 0.28 * nScaleFactor; // mm/100 + break; + } + } + else if( MeasureUnit::POINT == nTargetUnit ) + { + if( rString[nPos] == 'p' || rString[nPos] == 'P' ) + { + aCmpsL[0] = "pt"; + aCmpsU[0] = "PT"; + aScales[0] = 1; + } + } + + if( aCmpsL[0] == nullptr ) + return false; + + double nScale = 0.; + for( sal_uInt16 i= 0; i < 3; i++ ) + { + sal_Int32 nTmp = nPos; // come back to the initial position before each iteration + const char *pL = aCmpsL[i]; + if( pL ) + { + const char *pU = aCmpsU[i]; + while( nTmp < nLen && *pL ) + { + sal_Unicode c = rString[nTmp]; + if( c != *pL && c != *pU ) + break; + pL++; + pU++; + nTmp++; + } + if( !*pL && (nTmp == nLen || ' ' == rString[nTmp]) ) + { + nScale = aScales[i]; + break; + } + } + } + + if( 0. == nScale ) + return false; + + // TODO: check overflow + if( nScale != 1. ) + nVal *= nScale; + } + } + + nVal += .5; + if( bNeg ) + nVal = -nVal; + + if( nVal <= static_cast(nMin) ) + rValue = nMin; + else if( nVal >= static_cast(nMax) ) + rValue = nMax; + else + rValue = static_cast(nVal); + + return true; +} + +/** convert measure in given unit to string with given unit */ +void Converter::convertMeasure( OUStringBuffer& rBuffer, + sal_Int32 nMeasure, + sal_Int16 nSourceUnit /* = MeasureUnit::MM_100TH */, + sal_Int16 nTargetUnit /* = MeasureUnit::INCH */ ) +{ + if( nSourceUnit == MeasureUnit::PERCENT ) + { + OSL_ENSURE( nTargetUnit == MeasureUnit::PERCENT, + "MeasureUnit::PERCENT only maps to MeasureUnit::PERCENT!" ); + + rBuffer.append( nMeasure ); + rBuffer.append( '%' ); + + return; + } + sal_Int64 nValue(nMeasure); // extend to 64-bit first to avoid overflow + // the sign is processed separately + if (nValue < 0) + { + nValue = -nValue; + rBuffer.append( '-' ); + } + + // The new length is (nVal * nMul)/(nDiv*nFac*10) + long nMul = 1000; + long nDiv = 1; + long nFac = 100; + const char* psUnit = nullptr; + switch( nSourceUnit ) + { + case MeasureUnit::TWIP: + switch( nTargetUnit ) + { + case MeasureUnit::MM_100TH: + case MeasureUnit::MM_10TH: + OSL_ENSURE( MeasureUnit::INCH == nTargetUnit,"output unit not supported for twip values" ); + [[fallthrough]]; + case MeasureUnit::MM: + // 0.01mm = 0.57twip (exactly) + nMul = 25400; // 25.4 * 1000 + nDiv = 1440; // 72 * 20; + nFac = 100; + psUnit = gpsMM; + break; + + case MeasureUnit::CM: + // 0.001cm = 0.57twip (exactly) + nMul = 25400; // 2.54 * 10000 + nDiv = 1440; // 72 * 20; + nFac = 1000; + psUnit = gpsCM; + break; + + case MeasureUnit::POINT: + // 0.01pt = 0.2twip (exactly) + nMul = 1000; + nDiv = 20; + nFac = 100; + psUnit = gpsPT; + break; + + case MeasureUnit::INCH: + default: + OSL_ENSURE( MeasureUnit::INCH == nTargetUnit, + "output unit not supported for twip values" ); + // 0.0001in = 0.144twip (exactly) + nMul = 100000; + nDiv = 1440; // 72 * 20; + nFac = 10000; + psUnit = gpsINCH; + break; + } + break; + + case MeasureUnit::POINT: + // 1pt = 1pt (exactly) + OSL_ENSURE( MeasureUnit::POINT == nTargetUnit, + "output unit not supported for pt values" ); + nMul = 10; + nDiv = 1; + nFac = 1; + psUnit = gpsPT; + break; + case MeasureUnit::MM_10TH: + case MeasureUnit::MM_100TH: + { + int nFac2 = (MeasureUnit::MM_100TH == nSourceUnit) ? 100 : 10; + switch( nTargetUnit ) + { + case MeasureUnit::MM_100TH: + case MeasureUnit::MM_10TH: + OSL_ENSURE( MeasureUnit::INCH == nTargetUnit, + "output unit not supported for 1/100mm values" ); + [[fallthrough]]; + case MeasureUnit::MM: + // 0.01mm = 1 mm/100 (exactly) + nMul = 10; + nDiv = 1; + nFac = nFac2; + psUnit = gpsMM; + break; + + case MeasureUnit::CM: + // 0.001mm = 1 mm/100 (exactly) + nMul = 10; + nDiv = 1; // 72 * 20; + nFac = 10*nFac2; + psUnit = gpsCM; + break; + + case MeasureUnit::POINT: + // 0.01pt = 0.35 mm/100 (exactly) + nMul = 72000; + nDiv = 2540; + nFac = nFac2; + psUnit = gpsPT; + break; + + case MeasureUnit::INCH: + default: + OSL_ENSURE( MeasureUnit::INCH == nTargetUnit, + "output unit not supported for 1/100mm values" ); + // 0.0001in = 0.254 mm/100 (exactly) + nMul = 100000; + nDiv = 2540; + nFac = 100*nFac2; + psUnit = gpsINCH; + break; + } + break; + } + default: + OSL_ENSURE(false, "sax::Converter::convertMeasure(): " + "source unit not supported"); + break; + } + + OSL_ENSURE(nValue <= SAL_MAX_INT64 / nMul, "convertMeasure: overflow"); + nValue *= nMul; + nValue /= nDiv; + nValue += 5; + nValue /= 10; + + rBuffer.append( static_cast(nValue / nFac) ); + if (nFac > 1 && (nValue % nFac) != 0) + { + rBuffer.append( '.' ); + while (nFac > 1 && (nValue % nFac) != 0) + { + nFac /= 10; + rBuffer.append( static_cast((nValue / nFac) % 10) ); + } + } + + if( psUnit ) + rBuffer.appendAscii( psUnit ); +} + +/** convert string to boolean */ +bool Converter::convertBool( bool& rBool, const OUString& rString ) +{ + rBool = rString == "true"; + + return rBool || (rString == "false"); +} + +/** convert boolean to string */ +void Converter::convertBool( OUStringBuffer& rBuffer, bool bValue ) +{ + rBuffer.append( bValue ); +} + +/** convert string to percent */ +bool Converter::convertPercent( sal_Int32& rPercent, const OUString& rString ) +{ + return convertMeasure( rPercent, rString, MeasureUnit::PERCENT ); +} + +/** convert percent to string */ +void Converter::convertPercent( OUStringBuffer& rBuffer, sal_Int32 nValue ) +{ + rBuffer.append( nValue ); + rBuffer.append( '%' ); +} + +/** convert string to pixel measure */ +bool Converter::convertMeasurePx( sal_Int32& rPixel, const OUString& rString ) +{ + return convertMeasure( rPixel, rString, MeasureUnit::PIXEL ); +} + +/** convert pixel measure to string */ +void Converter::convertMeasurePx( OUStringBuffer& rBuffer, sal_Int32 nValue ) +{ + rBuffer.append( nValue ); + rBuffer.append( 'p' ); + rBuffer.append( 'x' ); +} + +static int lcl_gethex( int nChar ) +{ + if( nChar >= '0' && nChar <= '9' ) + return nChar - '0'; + else if( nChar >= 'a' && nChar <= 'f' ) + return nChar - 'a' + 10; + else if( nChar >= 'A' && nChar <= 'F' ) + return nChar - 'A' + 10; + else + return 0; +} + +/** convert string to rgb color */ +bool Converter::convertColor( sal_Int32& rColor, const OUString& rValue ) +{ + if( rValue.getLength() != 7 || rValue[0] != '#' ) + return false; + + rColor = lcl_gethex( rValue[1] ) * 16 + lcl_gethex( rValue[2] ); + rColor <<= 8; + + rColor |= ( lcl_gethex( rValue[3] ) * 16 + lcl_gethex( rValue[4] ) ); + rColor <<= 8; + + rColor |= ( lcl_gethex( rValue[5] ) * 16 + lcl_gethex( rValue[6] ) ); + + return true; +} + +static const char aHexTab[] = "0123456789abcdef"; + +/** convert color to string */ +void Converter::convertColor( OUStringBuffer& rBuffer, sal_Int32 nColor ) +{ + rBuffer.append( '#' ); + + sal_uInt8 nCol = static_cast(nColor >> 16); + rBuffer.append( sal_Unicode( aHexTab[ nCol >> 4 ] ) ); + rBuffer.append( sal_Unicode( aHexTab[ nCol & 0xf ] ) ); + + nCol = static_cast(nColor >> 8); + rBuffer.append( sal_Unicode( aHexTab[ nCol >> 4 ] ) ); + rBuffer.append( sal_Unicode( aHexTab[ nCol & 0xf ] ) ); + + nCol = static_cast(nColor); + rBuffer.append( sal_Unicode( aHexTab[ nCol >> 4 ] ) ); + rBuffer.append( sal_Unicode( aHexTab[ nCol & 0xf ] ) ); +} + +/** convert string to number with optional min and max values */ +bool Converter::convertNumber( sal_Int32& rValue, + std::u16string_view aString, + sal_Int32 nMin, sal_Int32 nMax ) +{ + rValue = 0; + sal_Int64 nNumber = 0; + bool bRet = convertNumber64(nNumber,aString,nMin,nMax); + if ( bRet ) + rValue = static_cast(nNumber); + return bRet; +} + +/** convert string to 64-bit number with optional min and max values */ +bool Converter::convertNumber64( sal_Int64& rValue, + std::u16string_view aString, + sal_Int64 nMin, sal_Int64 nMax ) +{ + sal_Int32 nPos = 0; + sal_Int32 const nLen = aString.size(); + + // skip white space + while( (nPos < nLen) && (aString[nPos] <= ' ') ) + nPos++; + + sal_Int32 nNumberStartPos = nPos; + + if( nPos < nLen && '-' == aString[nPos] ) + { + nPos++; + } + + // get number + while( nPos < nLen && + '0' <= aString[nPos] && + '9' >= aString[nPos] ) + { + nPos++; + } + + rValue = rtl_ustr_toInt64_WithLength(aString.data() + nNumberStartPos, 10, nPos - nNumberStartPos); + + if( rValue < nMin ) + rValue = nMin; + else if( rValue > nMax ) + rValue = nMax; + + return ( nPos == nLen && rValue >= nMin && rValue <= nMax ); +} + +/** convert double number to string (using ::rtl::math) */ +void Converter::convertDouble( OUStringBuffer& rBuffer, + double fNumber, + bool bWriteUnits, + sal_Int16 nSourceUnit, + sal_Int16 nTargetUnit) +{ + if(MeasureUnit::PERCENT == nSourceUnit) + { + OSL_ENSURE( nTargetUnit == MeasureUnit::PERCENT, "MeasureUnit::PERCENT only maps to MeasureUnit::PERCENT!" ); + ::rtl::math::doubleToUStringBuffer( rBuffer, fNumber, rtl_math_StringFormat_Automatic, rtl_math_DecimalPlaces_Max, '.', true); + if(bWriteUnits) + rBuffer.append('%'); + } + else + { + OUStringBuffer sUnit; + double fFactor = GetConversionFactor(sUnit, nSourceUnit, nTargetUnit); + if(fFactor != 1.0) + fNumber *= fFactor; + ::rtl::math::doubleToUStringBuffer( rBuffer, fNumber, rtl_math_StringFormat_Automatic, rtl_math_DecimalPlaces_Max, '.', true); + if(bWriteUnits) + rBuffer.append(sUnit.makeStringAndClear()); + } +} + +/** convert double number to string (using ::rtl::math) */ +void Converter::convertDouble( OUStringBuffer& rBuffer, double fNumber) +{ + ::rtl::math::doubleToUStringBuffer( rBuffer, fNumber, rtl_math_StringFormat_Automatic, rtl_math_DecimalPlaces_Max, '.', true); +} + +/** convert string to double number (using ::rtl::math) */ +bool Converter::convertDouble(double& rValue, + const OUString& rString, sal_Int16 nSourceUnit, sal_Int16 nTargetUnit) +{ + rtl_math_ConversionStatus eStatus; + rValue = ::rtl::math::stringToDouble( rString, '.', ',', &eStatus ); + + if(eStatus == rtl_math_ConversionStatus_Ok) + { + OUStringBuffer sUnit; + // fdo#48969: switch source and target because factor is used to divide! + double const fFactor = + GetConversionFactor(sUnit, nTargetUnit, nSourceUnit); + if(fFactor != 1.0 && fFactor != 0.0) + rValue /= fFactor; + } + + return ( eStatus == rtl_math_ConversionStatus_Ok ); +} + +/** convert string to double number (using ::rtl::math) */ +bool Converter::convertDouble(double& rValue, const OUString& rString) +{ + rtl_math_ConversionStatus eStatus; + rValue = ::rtl::math::stringToDouble( rString, '.', ',', &eStatus ); + return ( eStatus == rtl_math_ConversionStatus_Ok ); +} + +/** convert number, 10th of degrees with range [0..3600] to SVG angle */ +void Converter::convertAngle(OUStringBuffer& rBuffer, sal_Int16 const nAngle, + SvtSaveOptions::ODFSaneDefaultVersion const nVersion) +{ + if (nVersion < SvtSaveOptions::ODFSVER_012 || nVersion == SvtSaveOptions::ODFSVER_012_EXT_COMPAT) + { + // wrong, but backward compatible with OOo/LO < 4.4 + rBuffer.append(static_cast(nAngle)); + } + else + { // OFFICE-3774 tdf#89475 write valid ODF 1.2 angle; needs LO 4.4 to import + double fAngle(double(nAngle) / 10.0); + ::sax::Converter::convertDouble(rBuffer, fAngle); + rBuffer.append("deg"); + } +} + +/** convert SVG angle to number, 10th of degrees with range [0..3600] */ +bool Converter::convertAngle(sal_Int16& rAngle, OUString const& rString, + bool const isWrongOOo10thDegAngle) +{ + // ODF 1.1 leaves it undefined what the number means, but ODF 1.2 says it's + // degrees, while OOo has historically used 10th of degrees :( + // So import degrees when we see the "deg" suffix but continue with 10th of + // degrees for now for the sake of existing OOo/LO documents, until the + // new versions that can read "deg" suffix are widely deployed and we can + // start to write the "deg" suffix. + sal_Int32 nValue(0); + double fValue(0.0); + bool bRet = ::sax::Converter::convertDouble(fValue, rString); + if (-1 != rString.indexOf("deg")) + { + nValue = fValue * 10.0; + } + else if (-1 != rString.indexOf("grad")) + { + nValue = (fValue * 9.0 / 10.0) * 10.0; + } + else if (-1 != rString.indexOf("rad")) + { + nValue = basegfx::rad2deg(fValue) * 10.0; + } + else // no explicit unit + { + if (isWrongOOo10thDegAngle) + { + nValue = fValue; // wrong, but backward compatible with OOo/LO < 7.0 + } + else + { + nValue = fValue * 10.0; // ODF 1.2 + } + } + // limit to valid range [0..3600] + nValue = nValue % 3600; + if (nValue < 0) + { + nValue += 3600; + } + assert(0 <= nValue && nValue <= 3600); + if (bRet) + { + rAngle = sal::static_int_cast(nValue); + } + return bRet; +} + +/** convert double to ISO "duration" string; negative durations allowed */ +void Converter::convertDuration(OUStringBuffer& rBuffer, + const double fTime) +{ + double fValue = fTime; + + // take care of negative durations as specified in: + // XML Schema, W3C Working Draft 07 April 2000, section 3.2.6.1 + if (fValue < 0.0) + { + rBuffer.append('-'); + fValue = - fValue; + } + + rBuffer.append( "PT" ); + fValue *= 24; + double fHoursValue = ::rtl::math::approxFloor (fValue); + fValue -= fHoursValue; + fValue *= 60; + double fMinsValue = ::rtl::math::approxFloor (fValue); + fValue -= fMinsValue; + fValue *= 60; + double fSecsValue = ::rtl::math::approxFloor (fValue); + fValue -= fSecsValue; + double fNanoSecsValue; + if (fValue > 0.00000000001) + fNanoSecsValue = ::rtl::math::round( fValue, XML_MAXDIGITSCOUNT_TIME - 5); + else + fNanoSecsValue = 0.0; + + if (fNanoSecsValue == 1.0) + { + fNanoSecsValue = 0.0; + fSecsValue += 1.0; + } + if (fSecsValue >= 60.0) + { + fSecsValue -= 60.0; + fMinsValue += 1.0; + } + if (fMinsValue >= 60.0) + { + fMinsValue -= 60.0; + fHoursValue += 1.0; + } + + if (fHoursValue < 10) + rBuffer.append( '0'); + rBuffer.append( sal_Int32( fHoursValue)); + rBuffer.append( 'H'); + if (fMinsValue < 10) + rBuffer.append( '0'); + rBuffer.append( sal_Int32( fMinsValue)); + rBuffer.append( 'M'); + if (fSecsValue < 10) + rBuffer.append( '0'); + rBuffer.append( sal_Int32( fSecsValue)); + if (fNanoSecsValue > 0.0) + { + OUString aNS( ::rtl::math::doubleToUString( fValue, + rtl_math_StringFormat_F, XML_MAXDIGITSCOUNT_TIME - 5, '.', + true)); + if ( aNS.getLength() > 2 ) + { + rBuffer.append( '.'); + rBuffer.append( std::u16string_view(aNS).substr(2) ); // strip "0." + } + } + rBuffer.append( 'S'); +} + +/** convert ISO "duration" string to double; negative durations allowed */ +bool Converter::convertDuration(double& rfTime, + const OUString& rString) +{ + OUString aTrimmed = rString.trim().toAsciiUpperCase(); + const sal_Unicode* pStr = aTrimmed.getStr(); + + // negative time duration? + bool bIsNegativeDuration = false; + if ( '-' == (*pStr) ) + { + bIsNegativeDuration = true; + pStr++; + } + + if ( *(pStr++) != 'P' ) // duration must start with "P" + return false; + + OUStringBuffer sDoubleStr; + bool bSuccess = true; + bool bDone = false; + bool bTimePart = false; + bool bIsFraction = false; + sal_Int32 nDays = 0; + sal_Int32 nHours = 0; + sal_Int32 nMins = 0; + sal_Int32 nSecs = 0; + sal_Int32 nTemp = 0; + + while ( bSuccess && !bDone ) + { + sal_Unicode c = *(pStr++); + if ( !c ) // end + bDone = true; + else if ( '0' <= c && '9' >= c ) + { + if ( nTemp >= SAL_MAX_INT32 / 10 ) + bSuccess = false; + else + { + if ( !bIsFraction ) + { + nTemp *= 10; + nTemp += (c - u'0'); + } + else + { + sDoubleStr.append(c); + } + } + } + else if ( bTimePart ) + { + if ( c == 'H' ) + { + nHours = nTemp; + nTemp = 0; + } + else if ( c == 'M' ) + { + nMins = nTemp; + nTemp = 0; + } + else if ( (c == ',') || (c == '.') ) + { + nSecs = nTemp; + nTemp = 0; + bIsFraction = true; + sDoubleStr = "0."; + } + else if ( c == 'S' ) + { + if ( !bIsFraction ) + { + nSecs = nTemp; + nTemp = 0; + sDoubleStr = "0.0"; + } + } + else + bSuccess = false; // invalid character + } + else + { + if ( c == 'T' ) // "T" starts time part + bTimePart = true; + else if ( c == 'D' ) + { + nDays = nTemp; + nTemp = 0; + } + else if ( c == 'Y' || c == 'M' ) + { + //! how many days is a year or month? + + OSL_FAIL( "years or months in duration: not implemented"); + bSuccess = false; + } + else + bSuccess = false; // invalid character + } + } + + if ( bSuccess ) + { + if ( nDays ) + nHours += nDays * 24; // add the days to the hours part + double fHour = nHours; + double fMin = nMins; + double fSec = nSecs; + double fFraction = sDoubleStr.makeStringAndClear().toDouble(); + double fTempTime = fHour / 24; + fTempTime += fMin / (24 * 60); + fTempTime += fSec / (24 * 60 * 60); + fTempTime += fFraction / (24 * 60 * 60); + + // negative duration? + if ( bIsNegativeDuration ) + { + fTempTime = -fTempTime; + } + + rfTime = fTempTime; + } + return bSuccess; +} + +/** convert util::Duration to ISO8601 "duration" string */ +void Converter::convertDuration(OUStringBuffer& rBuffer, + const ::util::Duration& rDuration) +{ + if (rDuration.Negative) + { + rBuffer.append('-'); + } + rBuffer.append('P'); + const bool bHaveDate(rDuration.Years != 0 || + rDuration.Months != 0 || + rDuration.Days != 0); + if (rDuration.Years) + { + rBuffer.append(static_cast(rDuration.Years)); + rBuffer.append('Y'); + } + if (rDuration.Months) + { + rBuffer.append(static_cast(rDuration.Months)); + rBuffer.append('M'); + } + if (rDuration.Days) + { + rBuffer.append(static_cast(rDuration.Days)); + rBuffer.append('D'); + } + if ( rDuration.Hours != 0 + || rDuration.Minutes != 0 + || rDuration.Seconds != 0 + || rDuration.NanoSeconds != 0 ) + { + rBuffer.append('T'); // time separator + if (rDuration.Hours) + { + rBuffer.append(static_cast(rDuration.Hours)); + rBuffer.append('H'); + } + if (rDuration.Minutes) + { + rBuffer.append(static_cast(rDuration.Minutes)); + rBuffer.append('M'); + } + if (rDuration.Seconds != 0 || rDuration.NanoSeconds != 0) + { + // seconds must not be omitted (i.e. ".42S" is not valid) + rBuffer.append(static_cast(rDuration.Seconds)); + if (rDuration.NanoSeconds) + { + OSL_ENSURE(rDuration.NanoSeconds < 1000000000,"NanoSeconds cannot be more than 999 999 999"); + rBuffer.append('.'); + std::ostringstream ostr; + ostr.fill('0'); + ostr.width(9); + ostr << rDuration.NanoSeconds; + rBuffer.append(OUString::createFromAscii(ostr.str().c_str())); + } + rBuffer.append('S'); + } + } + else if (!bHaveDate) + { + // zero duration: XMLSchema-2 says there must be at least one component + rBuffer.append('0'); + rBuffer.append('D'); + } +} + +namespace { + +enum Result { R_NOTHING, R_OVERFLOW, R_SUCCESS }; + +} + +static Result +readUnsignedNumber(const OUString & rString, + sal_Int32 & io_rnPos, sal_Int32 & o_rNumber) +{ + sal_Int32 nPos(io_rnPos); + + while (nPos < rString.getLength()) + { + const sal_Unicode c = rString[nPos]; + if (!(('0' <= c) && (c <= '9'))) + break; + ++nPos; + } + + if (io_rnPos == nPos) // read something? + { + o_rNumber = -1; + return R_NOTHING; + } + + const sal_Int64 nTemp = rtl_ustr_toInt64_WithLength(rString.getStr() + io_rnPos, 10, nPos - io_rnPos); + + const bool bOverflow = (nTemp >= SAL_MAX_INT32); + + io_rnPos = nPos; + o_rNumber = nTemp; + return bOverflow ? R_OVERFLOW : R_SUCCESS; +} + +static Result +readUnsignedNumberMaxDigits(int maxDigits, + const OUString & rString, sal_Int32 & io_rnPos, + sal_Int32 & o_rNumber) +{ + bool bOverflow(false); + sal_Int64 nTemp(0); + sal_Int32 nPos(io_rnPos); + OSL_ENSURE(maxDigits >= 0, "negative amount of digits makes no sense"); + + while (nPos < rString.getLength()) + { + const sal_Unicode c = rString[nPos]; + if (('0' <= c) && (c <= '9')) + { + if (maxDigits > 0) + { + nTemp *= 10; + nTemp += (c - u'0'); + if (nTemp >= SAL_MAX_INT32) + { + bOverflow = true; + } + --maxDigits; + } + } + else + { + break; + } + ++nPos; + } + + if (io_rnPos == nPos) // read something? + { + o_rNumber = -1; + return R_NOTHING; + } + + io_rnPos = nPos; + o_rNumber = nTemp; + return bOverflow ? R_OVERFLOW : R_SUCCESS; +} + +static bool +readDurationT(const OUString & rString, sal_Int32 & io_rnPos) +{ + if ((io_rnPos < rString.getLength()) && + (rString[io_rnPos] == 'T')) + { + ++io_rnPos; + return true; + } + return false; +} + +static bool +readDurationComponent(const OUString & rString, + sal_Int32 & io_rnPos, sal_Int32 & io_rnTemp, bool & io_rbTimePart, + sal_Int32 & o_rnTarget, const sal_Unicode c) +{ + if (io_rnPos < rString.getLength()) + { + if (c == rString[io_rnPos]) + { + ++io_rnPos; + if (-1 != io_rnTemp) + { + o_rnTarget = io_rnTemp; + io_rnTemp = -1; + if (!io_rbTimePart) + { + io_rbTimePart = readDurationT(rString, io_rnPos); + } + return (R_OVERFLOW != + readUnsignedNumber(rString, io_rnPos, io_rnTemp)); + } + else + { + return false; + } + } + } + return true; +} + +/** convert ISO8601 "duration" string to util::Duration */ +bool Converter::convertDuration(util::Duration& rDuration, + const OUString& rString) +{ + const OUString string = rString.trim().toAsciiUpperCase(); + sal_Int32 nPos(0); + + bool bIsNegativeDuration(false); + if (!string.isEmpty() && ('-' == string[0])) + { + bIsNegativeDuration = true; + ++nPos; + } + + if ((nPos < string.getLength()) + && (string[nPos] != 'P')) // duration must start with "P" + { + return false; + } + + ++nPos; + + /// last read number; -1 == no valid number! always reset after using! + sal_Int32 nTemp(-1); + bool bTimePart(false); // have we read 'T'? + bool bSuccess(false); + sal_Int32 nYears(0); + sal_Int32 nMonths(0); + sal_Int32 nDays(0); + sal_Int32 nHours(0); + sal_Int32 nMinutes(0); + sal_Int32 nSeconds(0); + sal_Int32 nNanoSeconds(0); + + bTimePart = readDurationT(string, nPos); + bSuccess = (R_SUCCESS == readUnsignedNumber(string, nPos, nTemp)); + + if (!bTimePart && bSuccess) + { + bSuccess = readDurationComponent(string, nPos, nTemp, bTimePart, + nYears, 'Y'); + } + + if (!bTimePart && bSuccess) + { + bSuccess = readDurationComponent(string, nPos, nTemp, bTimePart, + nMonths, 'M'); + } + + if (!bTimePart && bSuccess) + { + bSuccess = readDurationComponent(string, nPos, nTemp, bTimePart, + nDays, 'D'); + } + + if (bTimePart) + { + if (-1 == nTemp) // a 'T' must be followed by a component + { + bSuccess = false; + } + + if (bSuccess) + { + bSuccess = readDurationComponent(string, nPos, nTemp, bTimePart, + nHours, 'H'); + } + + if (bSuccess) + { + bSuccess = readDurationComponent(string, nPos, nTemp, bTimePart, + nMinutes, 'M'); + } + + // eeek! seconds are icky. + if ((nPos < string.getLength()) && bSuccess) + { + if (string[nPos] == '.' || + string[nPos] == ',') + { + ++nPos; + if (-1 != nTemp) + { + nSeconds = nTemp; + nTemp = -1; + const sal_Int32 nStart(nPos); + bSuccess = readUnsignedNumberMaxDigits(9, string, nPos, nTemp) == R_SUCCESS; + if ((nPos < string.getLength()) && bSuccess) + { + if (-1 != nTemp) + { + nNanoSeconds = nTemp; + sal_Int32 nDigits = nPos - nStart; + assert(nDigits >= 0); + for (; nDigits < 9; ++nDigits) + { + nNanoSeconds *= 10; + } + nTemp=-1; + if ('S' == string[nPos]) + { + ++nPos; + } + else + { + bSuccess = false; + } + } + else + { + bSuccess = false; + } + } + } + else + { + bSuccess = false; + } + } + else if ('S' == string[nPos]) + { + ++nPos; + if (-1 != nTemp) + { + nSeconds = nTemp; + nTemp = -1; + } + else + { + bSuccess = false; + } + } + } + } + + if (nPos != string.getLength()) // string not processed completely? + { + bSuccess = false; + } + + if (nTemp != -1) // unprocessed number? + { + bSuccess = false; + } + + if (bSuccess) + { + rDuration.Negative = bIsNegativeDuration; + rDuration.Years = static_cast(nYears); + rDuration.Months = static_cast(nMonths); + rDuration.Days = static_cast(nDays); + rDuration.Hours = static_cast(nHours); + rDuration.Minutes = static_cast(nMinutes); + rDuration.Seconds = static_cast(nSeconds); + rDuration.NanoSeconds = nNanoSeconds; + } + + return bSuccess; +} + + +static void +lcl_AppendTimezone(OUStringBuffer & i_rBuffer, int const nOffset) +{ + if (0 == nOffset) + { + i_rBuffer.append('Z'); + } + else + { + if (0 < nOffset) + { + i_rBuffer.append('+'); + } + else + { + i_rBuffer.append('-'); + } + const sal_Int32 nHours (abs(nOffset) / 60); + const sal_Int32 nMinutes(abs(nOffset) % 60); + SAL_WARN_IF(nHours > 14 || (nHours == 14 && nMinutes > 0), + "sax", "convertDateTime: timezone overflow"); + if (nHours < 10) + { + i_rBuffer.append('0'); + } + i_rBuffer.append(nHours); + i_rBuffer.append(':'); + if (nMinutes < 10) + { + i_rBuffer.append('0'); + } + i_rBuffer.append(nMinutes); + } +} + +/** convert util::Date to ISO "date" string */ +void Converter::convertDate( + OUStringBuffer& i_rBuffer, + const util::Date& i_rDate, + sal_Int16 const*const pTimeZoneOffset) +{ + const util::DateTime dt(0, 0, 0, 0, + i_rDate.Day, i_rDate.Month, i_rDate.Year, false); + convertDateTime(i_rBuffer, dt, pTimeZoneOffset); +} + +static void convertTime( + OUStringBuffer& i_rBuffer, + const css::util::DateTime& i_rDateTime) +{ + if (i_rDateTime.Hours < 10) { + i_rBuffer.append('0'); + } + i_rBuffer.append( static_cast(i_rDateTime.Hours) ) + .append(':'); + if (i_rDateTime.Minutes < 10) { + i_rBuffer.append('0'); + } + i_rBuffer.append( static_cast(i_rDateTime.Minutes) ) + .append(':'); + if (i_rDateTime.Seconds < 10) { + i_rBuffer.append('0'); + } + i_rBuffer.append( static_cast(i_rDateTime.Seconds) ); + if (i_rDateTime.NanoSeconds > 0) { + OSL_ENSURE(i_rDateTime.NanoSeconds < 1000000000,"NanoSeconds cannot be more than 999 999 999"); + i_rBuffer.append('.'); + std::ostringstream ostr; + ostr.fill('0'); + ostr.width(9); + ostr << i_rDateTime.NanoSeconds; + i_rBuffer.append(OUString::createFromAscii(ostr.str().c_str())); + } +} + +static void convertTimeZone( + OUStringBuffer& i_rBuffer, + const css::util::DateTime& i_rDateTime, + sal_Int16 const* pTimeZoneOffset) +{ + if (pTimeZoneOffset) + { + lcl_AppendTimezone(i_rBuffer, *pTimeZoneOffset); + } + else if (i_rDateTime.IsUTC) + { + lcl_AppendTimezone(i_rBuffer, 0); + } +} + +/** convert util::DateTime to ISO "time" or "dateTime" string */ +void Converter::convertTimeOrDateTime( + OUStringBuffer& i_rBuffer, + const css::util::DateTime& i_rDateTime) +{ + if (i_rDateTime.Year == 0 || + i_rDateTime.Month < 1 || i_rDateTime.Month > 12 || + i_rDateTime.Day < 1 || i_rDateTime.Day > 31) + { + convertTime(i_rBuffer, i_rDateTime); + convertTimeZone(i_rBuffer, i_rDateTime, nullptr); + } + else + { + convertDateTime(i_rBuffer, i_rDateTime, nullptr, true); + } +} + +/** convert util::DateTime to ISO "date" or "dateTime" string */ +void Converter::convertDateTime( + OUStringBuffer& i_rBuffer, + const css::util::DateTime& i_rDateTime, + sal_Int16 const*const pTimeZoneOffset, + bool i_bAddTimeIf0AM ) +{ + const sal_Unicode dash('-'); + const sal_Unicode zero('0'); + + sal_Int32 const nYear(abs(i_rDateTime.Year)); + if (i_rDateTime.Year < 0) { + i_rBuffer.append(dash); // negative + } + if (nYear < 1000) { + i_rBuffer.append(zero); + } + if (nYear < 100) { + i_rBuffer.append(zero); + } + if (nYear < 10) { + i_rBuffer.append(zero); + } + i_rBuffer.append(nYear).append(dash); + if( i_rDateTime.Month < 10 ) { + i_rBuffer.append(zero); + } + i_rBuffer.append( static_cast(i_rDateTime.Month) ).append(dash); + if( i_rDateTime.Day < 10 ) { + i_rBuffer.append(zero); + } + i_rBuffer.append( static_cast(i_rDateTime.Day) ); + + if( i_rDateTime.Seconds != 0 || + i_rDateTime.Minutes != 0 || + i_rDateTime.Hours != 0 || + i_bAddTimeIf0AM ) + { + i_rBuffer.append('T'); + convertTime(i_rBuffer, i_rDateTime); + } + + convertTimeZone(i_rBuffer, i_rDateTime, pTimeZoneOffset); +} + +/** convert ISO "date" or "dateTime" string to util::DateTime */ +bool Converter::parseDateTime( util::DateTime& rDateTime, + const OUString& rString ) +{ + bool isDateTime; + return parseDateOrDateTime(nullptr, rDateTime, isDateTime, nullptr, + rString); +} + +static bool lcl_isLeapYear(const sal_uInt32 nYear) +{ + return ((nYear % 4) == 0) + && (((nYear % 100) != 0) || ((nYear % 400) == 0)); +} + +static sal_uInt16 +lcl_MaxDaysPerMonth(const sal_Int32 nMonth, const sal_Int32 nYear) +{ + static const sal_uInt16 s_MaxDaysPerMonth[12] = + { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }; + assert(0 < nMonth && nMonth <= 12); + if ((2 == nMonth) && lcl_isLeapYear(nYear)) + { + return 29; + } + return s_MaxDaysPerMonth[nMonth - 1]; +} + +static void lcl_ConvertToUTC( + sal_Int16 & o_rYear, sal_uInt16 & o_rMonth, sal_uInt16 & o_rDay, + sal_uInt16 & o_rHours, sal_uInt16 & o_rMinutes, + int const nSourceOffset) +{ + sal_Int16 nOffsetHours(abs(nSourceOffset) / 60); + sal_Int16 const nOffsetMinutes(abs(nSourceOffset) % 60); + o_rMinutes += nOffsetMinutes; + if (nSourceOffset < 0) + { + o_rMinutes += nOffsetMinutes; + if (60 <= o_rMinutes) + { + o_rMinutes -= 60; + ++nOffsetHours; + } + o_rHours += nOffsetHours; + if (o_rHours < 24) + { + return; + } + sal_Int16 nDayAdd(0); + while (24 <= o_rHours) + { + o_rHours -= 24; + ++nDayAdd; + } + if (o_rDay == 0) + { + return; // handle time without date - don't adjust what isn't there + } + o_rDay += nDayAdd; + sal_Int16 const nDaysInMonth(lcl_MaxDaysPerMonth(o_rMonth, o_rYear)); + if (o_rDay <= nDaysInMonth) + { + return; + } + o_rDay -= nDaysInMonth; + ++o_rMonth; + if (o_rMonth <= 12) + { + return; + } + o_rMonth = 1; + ++o_rYear; // works for negative year too + } + else if (0 < nSourceOffset) + { + // argh everything is unsigned + if (o_rMinutes < nOffsetMinutes) + { + o_rMinutes += 60; + ++nOffsetHours; + } + o_rMinutes -= nOffsetMinutes; + sal_Int16 nDaySubtract(0); + while (o_rHours < nOffsetHours) + { + o_rHours += 24; + ++nDaySubtract; + } + o_rHours -= nOffsetHours; + if (o_rDay == 0) + { + return; // handle time without date - don't adjust what isn't there + } + if (nDaySubtract < o_rDay) + { + o_rDay -= nDaySubtract; + return; + } + sal_Int16 const nPrevMonth((o_rMonth == 1) ? 12 : o_rMonth - 1); + sal_Int16 const nDaysInMonth(lcl_MaxDaysPerMonth(nPrevMonth, o_rYear)); + o_rDay += nDaysInMonth; + --o_rMonth; + if (0 == o_rMonth) + { + o_rMonth = 12; + --o_rYear; // works for negative year too + } + o_rDay -= nDaySubtract; + } +} + +static bool +readDateTimeComponent(const OUString & rString, + sal_Int32 & io_rnPos, sal_Int32 & o_rnTarget, + const sal_Int32 nMinLength, const bool bExactLength) +{ + const sal_Int32 nOldPos(io_rnPos); + sal_Int32 nTemp(0); + if (R_SUCCESS != readUnsignedNumber(rString, io_rnPos, nTemp)) + { + return false; + } + const sal_Int32 nTokenLength(io_rnPos - nOldPos); + if ((nTokenLength < nMinLength) || + (bExactLength && (nTokenLength > nMinLength))) + { + return false; // bad length + } + o_rnTarget = nTemp; + return true; +} + +/** convert ISO "date" or "dateTime" string to util::DateTime or util::Date */ +static bool lcl_parseDate( + bool & isNegative, + sal_Int32 & nYear, sal_Int32 & nMonth, sal_Int32 & nDay, + bool & bHaveTime, + sal_Int32 & nPos, + const OUString & string, + bool const bIgnoreInvalidOrMissingDate) +{ + bool bSuccess = true; + + if (string.getLength() > nPos) + { + if ('-' == string[nPos]) + { + isNegative = true; + ++nPos; + } + } + + { + // While W3C XMLSchema specifies years with a minimum of 4 digits, be + // lenient in what we accept for years < 1000. One digit is acceptable + // if the remainders match. + bSuccess = readDateTimeComponent(string, nPos, nYear, 1, false); + if (!bIgnoreInvalidOrMissingDate) + { + bSuccess &= (0 < nYear); + } + bSuccess &= (nPos < string.getLength()); // not last token + } + if (bSuccess && ('-' != string[nPos])) // separator + { + bSuccess = false; + } + if (bSuccess) + { + ++nPos; + + bSuccess = readDateTimeComponent(string, nPos, nMonth, 2, true); + if (!bIgnoreInvalidOrMissingDate) + { + bSuccess &= (0 < nMonth); + } + bSuccess &= (nMonth <= 12); + bSuccess &= (nPos < string.getLength()); // not last token + } + if (bSuccess && ('-' != string[nPos])) // separator + { + bSuccess = false; + } + if (bSuccess) + { + ++nPos; + + bSuccess = readDateTimeComponent(string, nPos, nDay, 2, true); + if (!bIgnoreInvalidOrMissingDate) + { + bSuccess &= (0 < nDay); + } + if (nMonth > 0) // not possible to check if month was missing + { + bSuccess &= (nDay <= lcl_MaxDaysPerMonth(nMonth, nYear)); + } + else assert(bIgnoreInvalidOrMissingDate); + } + + if (bSuccess && (nPos < string.getLength())) + { + if ('T' == string[nPos]) // time separator + { + bHaveTime = true; + ++nPos; + } + } + + return bSuccess; +} + +/** convert ISO "date" or "dateTime" string to util::DateTime or util::Date */ +static bool lcl_parseDateTime( + util::Date *const pDate, util::DateTime & rDateTime, + bool & rbDateTime, + std::optional *const pTimeZoneOffset, + const OUString & rString, + bool const bIgnoreInvalidOrMissingDate) +{ + bool bSuccess = true; + + const OUString string = rString.trim().toAsciiUpperCase(); + + bool isNegative(false); + sal_Int32 nYear(0); + sal_Int32 nMonth(0); + sal_Int32 nDay(0); + sal_Int32 nPos(0); + bool bHaveTime(false); + + if ( !bIgnoreInvalidOrMissingDate + || string.indexOf(':') == -1 // no time? + || (string.indexOf('-') != -1 + && string.indexOf('-') < string.indexOf(':'))) + { + bSuccess &= lcl_parseDate(isNegative, nYear, nMonth, nDay, + bHaveTime, nPos, string, bIgnoreInvalidOrMissingDate); + } + else + { + bHaveTime = true; + } + + sal_Int32 nHours(0); + sal_Int32 nMinutes(0); + sal_Int32 nSeconds(0); + sal_Int32 nNanoSeconds(0); + if (bSuccess && bHaveTime) + { + { + bSuccess = readDateTimeComponent(string, nPos, nHours, 2, true); + bSuccess &= (0 <= nHours) && (nHours <= 24); + bSuccess &= (nPos < string.getLength()); // not last token + } + if (bSuccess && (':' != string[nPos])) // separator + { + bSuccess = false; + } + if (bSuccess) + { + ++nPos; + + bSuccess = readDateTimeComponent(string, nPos, nMinutes, 2, true); + bSuccess &= (0 <= nMinutes) && (nMinutes < 60); + bSuccess &= (nPos < string.getLength()); // not last token + } + if (bSuccess && (':' != string[nPos])) // separator + { + bSuccess = false; + } + if (bSuccess) + { + ++nPos; + + bSuccess = readDateTimeComponent(string, nPos, nSeconds, 2, true); + bSuccess &= (0 <= nSeconds) && (nSeconds < 60); + } + if (bSuccess && (nPos < string.getLength()) && + ('.' == string[nPos] || ',' == string[nPos])) // fraction separator + { + ++nPos; + const sal_Int32 nStart(nPos); + sal_Int32 nTemp(0); + if (R_NOTHING == readUnsignedNumberMaxDigits(9, string, nPos, nTemp)) + { + bSuccess = false; + } + if (bSuccess) + { + sal_Int32 nDigits = std::min(nPos - nStart, 9); + assert(nDigits > 0); + for (; nDigits < 9; ++nDigits) + { + nTemp *= 10; + } + nNanoSeconds = nTemp; + } + } + + if (bSuccess && (nHours == 24)) + { + if (!((0 == nMinutes) && (0 == nSeconds) && (0 == nNanoSeconds))) + { + bSuccess = false; // only 24:00:00 is valid + } + } + } + + bool bHaveTimezone(false); + bool bHaveTimezonePlus(false); + bool bHaveTimezoneMinus(false); + if (bSuccess && (nPos < string.getLength())) + { + const sal_Unicode c(string[nPos]); + if ('+' == c) + { + bHaveTimezone = true; + bHaveTimezonePlus = true; + ++nPos; + } + else if ('-' == c) + { + bHaveTimezone = true; + bHaveTimezoneMinus = true; + ++nPos; + } + else if ('Z' == c) + { + bHaveTimezone = true; + ++nPos; + } + else + { + bSuccess = false; + } + } + sal_Int32 nTimezoneHours(0); + sal_Int32 nTimezoneMinutes(0); + if (bSuccess && (bHaveTimezonePlus || bHaveTimezoneMinus)) + { + bSuccess = readDateTimeComponent( + string, nPos, nTimezoneHours, 2, true); + bSuccess &= (0 <= nTimezoneHours) && (nTimezoneHours <= 14); + bSuccess &= (nPos < string.getLength()); // not last token + if (bSuccess && (':' != string[nPos])) // separator + { + bSuccess = false; + } + if (bSuccess) + { + ++nPos; + + bSuccess = readDateTimeComponent( + string, nPos, nTimezoneMinutes, 2, true); + bSuccess &= (0 <= nTimezoneMinutes) && (nTimezoneMinutes < 60); + } + if (bSuccess && (nTimezoneHours == 14)) + { + if (0 != nTimezoneMinutes) + { + bSuccess = false; // only +-14:00 is valid + } + } + } + + bSuccess &= (nPos == string.getLength()); // trailing junk? + + if (bSuccess) + { + sal_Int16 const nTimezoneOffset = (bHaveTimezoneMinus ? -1 : +1) + * ((nTimezoneHours * 60) + nTimezoneMinutes); + if (!pDate || bHaveTime) // time is optional + { + rDateTime.Year = + (isNegative ? -1 : +1) * static_cast(nYear); + rDateTime.Month = static_cast(nMonth); + rDateTime.Day = static_cast(nDay); + rDateTime.Hours = static_cast(nHours); + rDateTime.Minutes = static_cast(nMinutes); + rDateTime.Seconds = static_cast(nSeconds); + rDateTime.NanoSeconds = static_cast(nNanoSeconds); + if (bHaveTimezone) + { + if (pTimeZoneOffset) + { + *pTimeZoneOffset = nTimezoneOffset; + rDateTime.IsUTC = (0 == nTimezoneOffset); + } + else + { + lcl_ConvertToUTC(rDateTime.Year, rDateTime.Month, + rDateTime.Day, rDateTime.Hours, rDateTime.Minutes, + nTimezoneOffset); + rDateTime.IsUTC = true; + } + } + else + { + if (pTimeZoneOffset) + { + pTimeZoneOffset->reset(); + } + rDateTime.IsUTC = false; + } + rbDateTime = bHaveTime; + } + else + { + pDate->Year = + (isNegative ? -1 : +1) * static_cast(nYear); + pDate->Month = static_cast(nMonth); + pDate->Day = static_cast(nDay); + if (bHaveTimezone) + { + if (pTimeZoneOffset) + { + *pTimeZoneOffset = nTimezoneOffset; + } + else + { + // a Date cannot be adjusted + SAL_INFO("sax", "dropping timezone"); + } + } + else + { + if (pTimeZoneOffset) + { + pTimeZoneOffset->reset(); + } + } + rbDateTime = false; + } + } + return bSuccess; +} + +/** convert ISO "time" or "dateTime" string to util::DateTime */ +bool Converter::parseTimeOrDateTime( + util::DateTime & rDateTime, + const OUString & rString) +{ + bool dummy; + return lcl_parseDateTime( + nullptr, rDateTime, dummy, nullptr, rString, true); +} + +/** convert ISO "date" or "dateTime" string to util::DateTime or util::Date */ +bool Converter::parseDateOrDateTime( + util::Date *const pDate, util::DateTime & rDateTime, + bool & rbDateTime, + std::optional *const pTimeZoneOffset, + const OUString & rString ) +{ + return lcl_parseDateTime( + pDate, rDateTime, rbDateTime, pTimeZoneOffset, rString, false); +} + + +/** gets the position of the first comma after npos in the string + rStr. Commas inside '"' pairs are not matched */ +sal_Int32 Converter::indexOfComma( const OUString& rStr, + sal_Int32 nPos ) +{ + sal_Unicode cQuote = 0; + sal_Int32 nLen = rStr.getLength(); + for( ; nPos < nLen; nPos++ ) + { + sal_Unicode c = rStr[nPos]; + switch( c ) + { + case u'\'': + if( 0 == cQuote ) + cQuote = c; + else if( '\'' == cQuote ) + cQuote = 0; + break; + + case u'"': + if( 0 == cQuote ) + cQuote = c; + else if( '\"' == cQuote ) + cQuote = 0; + break; + + case u',': + if( 0 == cQuote ) + return nPos; + break; + } + } + + return -1; +} + +double Converter::GetConversionFactor(OUStringBuffer& rUnit, sal_Int16 nSourceUnit, sal_Int16 nTargetUnit) +{ + double fRetval(1.0); + rUnit.setLength(0); + + + if(nSourceUnit != nTargetUnit) + { + const char* psUnit = nullptr; + + switch(nSourceUnit) + { + case MeasureUnit::TWIP: + { + switch(nTargetUnit) + { + case MeasureUnit::MM_100TH: + { + // 0.01mm = 0.57twip (exactly) + fRetval = ((25400.0 / 1440.0) / 10.0); + break; + } + case MeasureUnit::MM_10TH: + { + // 0.01mm = 0.57twip (exactly) + fRetval = ((25400.0 / 1440.0) / 100.0); + break; + } + case MeasureUnit::MM: + { + // 0.01mm = 0.57twip (exactly) + fRetval = ((25400.0 / 1440.0) / 1000.0); + psUnit = gpsMM; + break; + } + case MeasureUnit::CM: + { + // 0.001cm = 0.57twip (exactly) + fRetval = ((25400.0 / 1440.0) / 10000.0); + psUnit = gpsCM; + break; + } + case MeasureUnit::POINT: + { + // 0.01pt = 0.2twip (exactly) + fRetval = ((1000.0 / 20.0) / 1000.0); + psUnit = gpsPT; + break; + } + case MeasureUnit::INCH: + default: + { + OSL_ENSURE( MeasureUnit::INCH == nTargetUnit, "output unit not supported for twip values"); + // 0.0001in = 0.144twip (exactly) + fRetval = ((100000.0 / 1440.0) / 100000.0); + psUnit = gpsINCH; + break; + } + } + break; + } + case MeasureUnit::POINT: + { + switch(nTargetUnit) + { + case MeasureUnit::MM_100TH: + { + // 1mm = 72 / 25.4 pt (exactly) + fRetval = ( 2540.0 / 72.0 ); + break; + } + case MeasureUnit::MM_10TH: + { + // 1mm = 72 / 25.4 pt (exactly) + fRetval = ( 254.0 / 72.0 ); + break; + } + case MeasureUnit::MM: + { + // 1mm = 72 / 25.4 pt (exactly) + fRetval = ( 25.4 / 72.0 ); + psUnit = gpsMM; + break; + + } + case MeasureUnit::CM: + { + // 1cm = 72 / 2.54 pt (exactly) + fRetval = ( 2.54 / 72.0 ); + psUnit = gpsCM; + break; + } + case MeasureUnit::TWIP: + { + // 1twip = 72 / 1440 pt (exactly) + fRetval = 20.0; // 1440.0 / 72.0 + psUnit = gpsPC; + break; + } + case MeasureUnit::INCH: + default: + { + OSL_ENSURE( MeasureUnit::INCH == nTargetUnit, "output unit not supported for pt values"); + // 1in = 72 pt (exactly) + fRetval = ( 1.0 / 72.0 ); + psUnit = gpsINCH; + break; + } + } + break; + } + case MeasureUnit::MM_10TH: + { + switch(nTargetUnit) + { + case MeasureUnit::MM_100TH: + { + fRetval = 10.0; + break; + } + case MeasureUnit::MM: + { + // 0.01mm = 1 mm/100 (exactly) + fRetval = ((10.0 / 1.0) / 100.0); + psUnit = gpsMM; + break; + } + case MeasureUnit::CM: + { + fRetval = ((10.0 / 1.0) / 1000.0); + psUnit = gpsCM; + break; + } + case MeasureUnit::POINT: + { + // 0.01pt = 0.35 mm/100 (exactly) + fRetval = ((72000.0 / 2540.0) / 100.0); + psUnit = gpsPT; + break; + } + case MeasureUnit::TWIP: + { + fRetval = ((20.0 * 72000.0 / 2540.0) / 100.0); + psUnit = gpsPC; + break; + } + case MeasureUnit::INCH: + default: + { + OSL_ENSURE( MeasureUnit::INCH == nTargetUnit, "output unit not supported for 1/10mm values"); + // 0.0001in = 0.254 mm/100 (exactly) + fRetval = ((100000.0 / 2540.0) / 10000.0); + psUnit = gpsINCH; + break; + } + } + break; + } + case MeasureUnit::MM_100TH: + { + switch(nTargetUnit) + { + case MeasureUnit::MM_10TH: + { + fRetval = ((10.0 / 1.0) / 100.0); + break; + } + case MeasureUnit::MM: + { + // 0.01mm = 1 mm/100 (exactly) + fRetval = ((10.0 / 1.0) / 1000.0); + psUnit = gpsMM; + break; + } + case MeasureUnit::CM: + { + fRetval = ((10.0 / 1.0) / 10000.0); + psUnit = gpsCM; + break; + } + case MeasureUnit::POINT: + { + // 0.01pt = 0.35 mm/100 (exactly) + fRetval = ((72000.0 / 2540.0) / 1000.0); + psUnit = gpsPT; + break; + } + case MeasureUnit::TWIP: + { + fRetval = ((20.0 * 72000.0 / 2540.0) / 1000.0); + psUnit = gpsPC; + break; + } + case MeasureUnit::INCH: + default: + { + OSL_ENSURE( MeasureUnit::INCH == nTargetUnit, "output unit not supported for 1/100mm values"); + // 0.0001in = 0.254 mm/100 (exactly) + fRetval = ((100000.0 / 2540.0) / 100000.0); + psUnit = gpsINCH; + break; + } + } + break; + } + case MeasureUnit::MM: + { + switch(nTargetUnit) + { + case MeasureUnit::MM_100TH: + { + fRetval = 100.0; + break; + } + case MeasureUnit::MM_10TH: + { + fRetval = 10.0; + break; + } + case MeasureUnit::CM: + { + fRetval = 0.1; + psUnit = gpsCM; + break; + } + case MeasureUnit::POINT: + { + fRetval = 72.0 / (2.54 * 10); + psUnit = gpsPT; + break; + } + case MeasureUnit::TWIP: + { + fRetval = (20.0 * 72.0) / (2.54 * 10); + psUnit = gpsPC; + break; + } + case MeasureUnit::INCH: + default: + { + OSL_ENSURE( MeasureUnit::INCH == nTargetUnit, "output unit not supported for cm values"); + fRetval = 1 / (2.54 * 10); + psUnit = gpsINCH; + break; + } + } + break; + } + case MeasureUnit::CM: + { + switch(nTargetUnit) + { + case MeasureUnit::MM_100TH: + { + fRetval = 1000.0; + break; + } + case MeasureUnit::MM_10TH: + { + fRetval = 100.0; + break; + } + case MeasureUnit::MM: + { + fRetval = 10.0; + psUnit = gpsMM; + break; + } + case MeasureUnit::CM: + { + break; + } + case MeasureUnit::POINT: + { + fRetval = 72.0 / 2.54; + psUnit = gpsPT; + break; + } + case MeasureUnit::TWIP: + { + fRetval = (20.0 * 72.0) / 2.54; + psUnit = gpsPC; + break; + } + case MeasureUnit::INCH: + default: + { + OSL_ENSURE( MeasureUnit::INCH == nTargetUnit, "output unit not supported for cm values"); + fRetval = 1 / 2.54; + psUnit = gpsINCH; + break; + } + } + break; + } + case MeasureUnit::INCH: + { + switch (nTargetUnit) + { + case MeasureUnit::MM_100TH: + { + fRetval = 2540; + break; + } + case MeasureUnit::MM_10TH: + { + fRetval = 254; + break; + } + case MeasureUnit::MM: + { + fRetval = 25.4; + psUnit = gpsMM; + break; + } + case MeasureUnit::CM: + { + fRetval = 2.54; + psUnit = gpsCM; + break; + } + case MeasureUnit::POINT: + { + fRetval = 72.0; + psUnit = gpsPT; + break; + } + case MeasureUnit::TWIP: + { + fRetval = 72.0 * 20.0; + psUnit = gpsPC; + break; + } + default: + { + OSL_FAIL("output unit not supported for in values"); + fRetval = 1; + psUnit = gpsINCH; + break; + } + } + break; + } + default: + OSL_ENSURE(false, "sax::Converter::GetConversionFactor(): " + "source unit not supported"); + break; + } + + if( psUnit ) + rUnit.appendAscii( psUnit ); + } + + return fRetval; +} + +sal_Int16 Converter::GetUnitFromString(const OUString& rString, sal_Int16 nDefaultUnit) +{ + sal_Int32 nPos = 0; + sal_Int32 nLen = rString.getLength(); + sal_Int16 nRetUnit = nDefaultUnit; + + // skip white space + while( nPos < nLen && ' ' == rString[nPos] ) + nPos++; + + // skip negative + if( nPos < nLen && '-' == rString[nPos] ) + nPos++; + + // skip number + while( nPos < nLen && '0' <= rString[nPos] && '9' >= rString[nPos] ) + nPos++; + + if( nPos < nLen && '.' == rString[nPos] ) + { + nPos++; + while( nPos < nLen && '0' <= rString[nPos] && '9' >= rString[nPos] ) + nPos++; + } + + // skip white space + while( nPos < nLen && ' ' == rString[nPos] ) + nPos++; + + if( nPos < nLen ) + { + switch(rString[nPos]) + { + case u'%' : + { + nRetUnit = MeasureUnit::PERCENT; + break; + } + case u'c': + case u'C': + { + if(nPos+1 < nLen && (rString[nPos+1] == 'm' + || rString[nPos+1] == 'M')) + nRetUnit = MeasureUnit::CM; + break; + } + case u'e': + case u'E': + { + // CSS1_EMS or CSS1_EMX later + break; + } + case u'i': + case u'I': + { + if(nPos+1 < nLen && (rString[nPos+1] == 'n' + || rString[nPos+1] == 'N')) + nRetUnit = MeasureUnit::INCH; + break; + } + case u'm': + case u'M': + { + if(nPos+1 < nLen && (rString[nPos+1] == 'm' + || rString[nPos+1] == 'M')) + nRetUnit = MeasureUnit::MM; + break; + } + case u'p': + case u'P': + { + if(nPos+1 < nLen && (rString[nPos+1] == 't' + || rString[nPos+1] == 'T')) + nRetUnit = MeasureUnit::POINT; + if(nPos+1 < nLen && (rString[nPos+1] == 'c' + || rString[nPos+1] == 'C')) + nRetUnit = MeasureUnit::TWIP; + break; + } + } + } + + return nRetUnit; +} + + +bool Converter::convertAny(OUStringBuffer& rsValue, + OUStringBuffer& rsType , + const css::uno::Any& rValue) +{ + bool bConverted = false; + + rsValue.setLength(0); + rsType.setLength (0); + + switch (rValue.getValueTypeClass()) + { + case css::uno::TypeClass_BYTE : + case css::uno::TypeClass_SHORT : + case css::uno::TypeClass_UNSIGNED_SHORT : + case css::uno::TypeClass_LONG : + case css::uno::TypeClass_UNSIGNED_LONG : + { + sal_Int32 nTempValue = 0; + if (rValue >>= nTempValue) + { + rsType.append("integer"); + bConverted = true; + rsValue.append(nTempValue); + } + } + break; + + case css::uno::TypeClass_BOOLEAN : + { + bool bTempValue = false; + if (rValue >>= bTempValue) + { + rsType.append("boolean"); + bConverted = true; + ::sax::Converter::convertBool(rsValue, bTempValue); + } + } + break; + + case css::uno::TypeClass_FLOAT : + case css::uno::TypeClass_DOUBLE : + { + double fTempValue = 0.0; + if (rValue >>= fTempValue) + { + rsType.append("float"); + bConverted = true; + ::sax::Converter::convertDouble(rsValue, fTempValue); + } + } + break; + + case css::uno::TypeClass_STRING : + { + OUString sTempValue; + if (rValue >>= sTempValue) + { + rsType.append("string"); + bConverted = true; + rsValue.append(sTempValue); + } + } + break; + + case css::uno::TypeClass_STRUCT : + { + css::util::Date aDate ; + css::util::Time aTime ; + css::util::DateTime aDateTime; + + if (rValue >>= aDate) + { + rsType.append("date"); + bConverted = true; + css::util::DateTime aTempValue; + aTempValue.Day = aDate.Day; + aTempValue.Month = aDate.Month; + aTempValue.Year = aDate.Year; + aTempValue.NanoSeconds = 0; + aTempValue.Seconds = 0; + aTempValue.Minutes = 0; + aTempValue.Hours = 0; + ::sax::Converter::convertDateTime(rsValue, aTempValue, nullptr); + } + else + if (rValue >>= aTime) + { + rsType.append("time"); + bConverted = true; + css::util::Duration aTempValue; + aTempValue.Days = 0; + aTempValue.Months = 0; + aTempValue.Years = 0; + aTempValue.NanoSeconds = aTime.NanoSeconds; + aTempValue.Seconds = aTime.Seconds; + aTempValue.Minutes = aTime.Minutes; + aTempValue.Hours = aTime.Hours; + ::sax::Converter::convertDuration(rsValue, aTempValue); + } + else + if (rValue >>= aDateTime) + { + rsType.append("date"); + bConverted = true; + ::sax::Converter::convertDateTime(rsValue, aDateTime, nullptr); + } + } + break; + default: + break; + } + + return bConverted; +} + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sax/source/tools/fastattribs.cxx b/sax/source/tools/fastattribs.cxx new file mode 100644 index 000000000..3df391c86 --- /dev/null +++ b/sax/source/tools/fastattribs.cxx @@ -0,0 +1,281 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include + +#include +#include +#include + +using namespace ::com::sun::star::uno; +using namespace ::com::sun::star::xml; +using namespace ::com::sun::star::xml::sax; +namespace sax_fastparser +{ + +// wastage to keep MSVC happy vs. an in-line {} +FastTokenHandlerBase::~FastTokenHandlerBase() +{ +} + +UnknownAttribute::UnknownAttribute( const OUString& rNamespaceURL, const OString& rName, const OString& value ) + : maNamespaceURL( rNamespaceURL ), maName( rName ), maValue( value ) +{ +} + +UnknownAttribute::UnknownAttribute( const OString& rName, const OString& value ) + : maName( rName ), maValue( value ) +{ +} + +void UnknownAttribute::FillAttribute( Attribute* pAttrib ) const +{ + if( pAttrib ) + { + pAttrib->Name = OStringToOUString( maName, RTL_TEXTENCODING_UTF8 ); + pAttrib->NamespaceURL = maNamespaceURL; + pAttrib->Value = OStringToOUString( maValue, RTL_TEXTENCODING_UTF8 ); + } +} + +FastAttributeList::FastAttributeList( sax_fastparser::FastTokenHandlerBase *pTokenHandler) +: mpTokenHandler( pTokenHandler ) +{ + // random initial size of buffer to store attribute values + mnChunkLength = 58; + mpChunk = static_cast(malloc( mnChunkLength )); + maAttributeValues.push_back( 0 ); +} + +FastAttributeList::~FastAttributeList() +{ + free( mpChunk ); +} + +void FastAttributeList::clear() +{ + maAttributeTokens.clear(); + maAttributeValues.resize(1); + assert(maAttributeValues[0] == 0); + maUnknownAttributes.clear(); +} + +void FastAttributeList::add( sal_Int32 nToken, const char* pValue, size_t nValueLength ) +{ + assert(nToken != -1); + maAttributeTokens.push_back( nToken ); + sal_Int32 nWritePosition = maAttributeValues.back(); + maAttributeValues.push_back( maAttributeValues.back() + nValueLength + 1 ); + if (maAttributeValues.back() > mnChunkLength) + { + const sal_Int32 newLen = std::max(mnChunkLength * 2, maAttributeValues.back()); + if (auto p = static_cast(realloc(mpChunk, newLen))) + { + mnChunkLength = newLen; + mpChunk = p; + } + else + throw std::bad_alloc(); + } + strncpy(mpChunk + nWritePosition, pValue, nValueLength); + mpChunk[nWritePosition + nValueLength] = '\0'; +} + +void FastAttributeList::add( sal_Int32 nToken, const char* pValue ) +{ + add( nToken, pValue, strlen( pValue )); +} + +void FastAttributeList::add( sal_Int32 nToken, const OString& rValue ) +{ + add( nToken, rValue.getStr(), rValue.getLength() ); +} + +void FastAttributeList::addNS( sal_Int32 nNamespaceToken, sal_Int32 nToken, const OString& rValue ) +{ + sal_Int32 nCombinedToken = (nNamespaceToken << 16) | nToken; + add( nCombinedToken, rValue ); +} + +void FastAttributeList::addUnknown( const OUString& rNamespaceURL, const OString& rName, const OString& value ) +{ + maUnknownAttributes.emplace_back( rNamespaceURL, rName, value ); +} + +void FastAttributeList::addUnknown( const OString& rName, const OString& value ) +{ + maUnknownAttributes.emplace_back( rName, value ); +} + +// XFastAttributeList +sal_Bool FastAttributeList::hasAttribute( ::sal_Int32 Token ) +{ + for (sal_Int32 i : maAttributeTokens) + if (i == Token) + return true; + + return false; +} + +sal_Int32 FastAttributeList::getValueToken( ::sal_Int32 Token ) +{ + for (size_t i = 0; i < maAttributeTokens.size(); ++i) + if (maAttributeTokens[i] == Token) + return FastTokenHandlerBase::getTokenFromChars( + mpTokenHandler, + getFastAttributeValue(i), + AttributeValueLength( i ) ); + + throw SAXException("FastAttributeList::getValueToken: unknown token " + OUString::number(Token), nullptr, Any()); +} + +sal_Int32 FastAttributeList::getOptionalValueToken( ::sal_Int32 Token, ::sal_Int32 Default ) +{ + for (size_t i = 0; i < maAttributeTokens.size(); ++i) + if (maAttributeTokens[i] == Token) + return FastTokenHandlerBase::getTokenFromChars( + mpTokenHandler, + getFastAttributeValue(i), + AttributeValueLength( i ) ); + + return Default; +} + +// performance sensitive shortcuts to avoid allocation ... +bool FastAttributeList::getAsInteger( sal_Int32 nToken, sal_Int32 &rInt) const +{ + rInt = 0; + for (size_t i = 0; i < maAttributeTokens.size(); ++i) + if (maAttributeTokens[i] == nToken) + { + rInt = rtl_str_toInt32( getFastAttributeValue(i), 10 ); + return true; + } + return false; +} + +sal_Int32 FastAttributeList::getAsIntegerByIndex( sal_Int32 nTokenIndex ) const +{ + return rtl_str_toInt32( getFastAttributeValue(nTokenIndex), 10 ); +} + +bool FastAttributeList::getAsDouble( sal_Int32 nToken, double &rDouble) const +{ + rDouble = 0.0; + for (size_t i = 0; i < maAttributeTokens.size(); ++i) + if (maAttributeTokens[i] == nToken) + { + rDouble = rtl_str_toDouble( getFastAttributeValue(i) ); + return true; + } + return false; +} + +bool FastAttributeList::getAsChar( sal_Int32 nToken, const char*& rPos ) const +{ + for (size_t i = 0, n = maAttributeTokens.size(); i < n; ++i) + { + if (maAttributeTokens[i] != nToken) + continue; + + sal_Int32 nOffset = maAttributeValues[i]; + rPos = mpChunk + nOffset; + return true; + } + + return false; +} + +const char* FastAttributeList::getAsCharByIndex( sal_Int32 nTokenIndex ) const +{ + sal_Int32 nOffset = maAttributeValues[nTokenIndex]; + return mpChunk + nOffset; +} + +OUString FastAttributeList::getValue( ::sal_Int32 Token ) +{ + for (size_t i = 0; i < maAttributeTokens.size(); ++i) + if (maAttributeTokens[i] == Token) + return OUString( getFastAttributeValue(i), AttributeValueLength(i), RTL_TEXTENCODING_UTF8 ); + + throw SAXException("FastAttributeList::getValue: unknown token " + OUString::number(Token), nullptr, Any()); +} + +OUString FastAttributeList::getValueByIndex( ::sal_Int32 nTokenIndex ) const +{ + return OUString( getFastAttributeValue(nTokenIndex), AttributeValueLength(nTokenIndex), RTL_TEXTENCODING_UTF8 ); +} + +OUString FastAttributeList::getOptionalValue( ::sal_Int32 Token ) +{ + for (size_t i = 0; i < maAttributeTokens.size(); ++i) + if (maAttributeTokens[i] == Token) + return OUString( getFastAttributeValue(i), AttributeValueLength(i), RTL_TEXTENCODING_UTF8 ); + + return OUString(); +} +Sequence< Attribute > FastAttributeList::getUnknownAttributes( ) +{ + auto nSize = maUnknownAttributes.size(); + if (nSize == 0) + return {}; + Sequence< Attribute > aSeq( nSize ); + Attribute* pAttr = aSeq.getArray(); + for( const auto& rAttr : maUnknownAttributes ) + rAttr.FillAttribute( pAttr++ ); + return aSeq; +} +Sequence< FastAttribute > FastAttributeList::getFastAttributes( ) +{ + Sequence< FastAttribute > aSeq( maAttributeTokens.size() ); + FastAttribute* pAttr = aSeq.getArray(); + for (size_t i = 0; i < maAttributeTokens.size(); ++i) + { + pAttr->Token = maAttributeTokens[i]; + pAttr->Value = OUString( getFastAttributeValue(i), AttributeValueLength(i), RTL_TEXTENCODING_UTF8 ); + pAttr++; + } + return aSeq; +} + +FastAttributeList::FastAttributeIter FastAttributeList::find( sal_Int32 nToken ) const +{ + for (size_t i = 0; i < maAttributeTokens.size(); ++i) + if( maAttributeTokens[i] == nToken ) + return FastAttributeIter(*this, i); + return end(); +} + +sal_Int32 FastTokenHandlerBase::getTokenFromChars( + const FastTokenHandlerBase *pTokenHandler, + const char *pToken, size_t nLen /* = 0 */ ) +{ + sal_Int32 nRet; + + if( !nLen ) + nLen = strlen( pToken ); + + nRet = pTokenHandler->getTokenDirect( pToken, static_cast(nLen) ); + + return nRet; +} + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sax/source/tools/fastserializer.cxx b/sax/source/tools/fastserializer.cxx new file mode 100644 index 000000000..202641999 --- /dev/null +++ b/sax/source/tools/fastserializer.cxx @@ -0,0 +1,813 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include "fastserializer.hxx" + +#include +#include +#include +#include +#include + +#include + +#if OSL_DEBUG_LEVEL > 0 +#include +#include +#endif + +using ::std::vector; +using ::com::sun::star::uno::Sequence; +using ::com::sun::star::io::XOutputStream; + +#define HAS_NAMESPACE(x) ((x & 0xffff0000) != 0) +#define NAMESPACE(x) (x >> 16) +#define TOKEN(x) (x & 0xffff) +// number of characters without terminating 0 +#define N_CHARS(string) (SAL_N_ELEMENTS(string) - 1) + +static const char sClosingBracket[] = ">"; +static const char sSlashAndClosingBracket[] = "/>"; +static const char sColon[] = ":"; +static const char sOpeningBracket[] = "<"; +static const char sOpeningBracketAndSlash[] = "\n"; + +namespace sax_fastparser { + FastSaxSerializer::FastSaxSerializer( const css::uno::Reference< css::io::XOutputStream >& xOutputStream ) + : maCachedOutputStream() + , maMarkStack() + , mbMarkStackEmpty(true) + , mpDoubleStr(nullptr) + , mnDoubleStrCapacity(RTL_STR_MAX_VALUEOFDOUBLE) + , mbXescape(true) + { + rtl_string_new_WithLength(&mpDoubleStr, mnDoubleStrCapacity); + mxFastTokenHandler = css::xml::sax::FastTokenHandler::create( + ::comphelper::getProcessComponentContext()); + assert(xOutputStream.is()); // cannot do anything without that + maCachedOutputStream.setOutputStream( xOutputStream ); + } + + FastSaxSerializer::~FastSaxSerializer() + { + rtl_string_release(mpDoubleStr); + } + + void FastSaxSerializer::startDocument() + { + writeBytes(sXmlHeader, N_CHARS(sXmlHeader)); + } + + void FastSaxSerializer::write( double value ) + { + rtl_math_doubleToString( + &mpDoubleStr, &mnDoubleStrCapacity, 0, value, rtl_math_StringFormat_G, + RTL_STR_MAX_VALUEOFDOUBLE - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', nullptr, + 0, true); + + write(mpDoubleStr->buffer, mpDoubleStr->length); + // and "clear" the string + mpDoubleStr->length = 0; + mnDoubleStrCapacity = RTL_STR_MAX_VALUEOFDOUBLE; + } + + void FastSaxSerializer::write( const OUString& sOutput, bool bEscape ) + { + write( OUStringToOString(sOutput, RTL_TEXTENCODING_UTF8), bEscape ); + + } + + void FastSaxSerializer::write( const OString& sOutput, bool bEscape ) + { + write( sOutput.getStr(), sOutput.getLength(), bEscape ); + } + + /** Characters not allowed in XML 1.0 + XML 1.1 would exclude only U+0000 + */ + static bool invalidChar( char c ) + { + if (static_cast(c) >= 0x20) + return false; + + switch (c) + { + case 0x09: + case 0x0a: + case 0x0d: + return false; + } + return true; + } + + static bool isHexDigit( char c ) + { + return ('0' <= c && c <= '9') || ('A' <= c && c <= 'F') || ('a' <= c && c <= 'f'); + } + + void FastSaxSerializer::write( const char* pStr, sal_Int32 nLen, bool bEscape ) + { + if (nLen == -1) + nLen = pStr ? strlen(pStr) : 0; + + if (!bEscape) + { + writeBytes( pStr, nLen ); + return; + } + + bool bGood = true; + const sal_Int32 kXescapeLen = 7; + char bufXescape[kXescapeLen+1]; + sal_Int32 nNextXescape = 0; + for (sal_Int32 i = 0; i < nLen; ++i) + { + char c = pStr[ i ]; + switch( c ) + { + case '<': writeBytes( "<", 4 ); break; + case '>': writeBytes( ">", 4 ); break; + case '&': writeBytes( "&", 5 ); break; + case '\'': writeBytes( "'", 6 ); break; + case '"': writeBytes( """, 6 ); break; + case '\t': +#if 0 + // Seems OOXML prefers the _xHHHH_ escape over the + // entity in *some* cases, apparently in attribute + // values but not in element data. + // Would need to distinguish at a higher level. + if (mbXescape) + { + snprintf( bufXescape, kXescapeLen+1, "_x%04x_", + static_cast(static_cast(c))); + writeBytes( bufXescape, kXescapeLen); + } + else +#endif + { + writeBytes( " ", 4 ); + } + break; + case '\n': +#if 0 + if (mbXescape) + { + snprintf( bufXescape, kXescapeLen+1, "_x%04x_", + static_cast(static_cast(c))); + writeBytes( bufXescape, kXescapeLen); + } + else +#endif + { + writeBytes( " ", 5 ); + } + break; + case '\r': +#if 0 + if (mbXescape) + { + snprintf( bufXescape, kXescapeLen+1, "_x%04x_", + static_cast(static_cast(c))); + writeBytes( bufXescape, kXescapeLen); + } + else +#endif + { + writeBytes( " ", 5 ); + } + break; + default: + if (mbXescape) + { + char c1, c2, c3, c4; + // Escape characters not valid in XML 1.0 as + // _xHHHH_. A literal "_xHHHH_" has to be + // escaped as _x005F_xHHHH_ (effectively + // escaping the leading '_'). + // See ECMA-376-1:2016 page 3736, + // 22.4.2.4 bstr (Basic String) + // for reference. + if (c == '_' && i >= nNextXescape && i <= nLen - kXescapeLen && + pStr[i+6] == '_' && + ((pStr[i+1] | 0x20) == 'x') && + isHexDigit( c1 = pStr[i+2] ) && + isHexDigit( c2 = pStr[i+3] ) && + isHexDigit( c3 = pStr[i+4] ) && + isHexDigit( c4 = pStr[i+5] )) + { + // OOXML has the odd habit to write some + // names using this that when re-saving + // should *not* be escaped, specifically + // _x0020_ for blanks in w:xpath values. + if (!(c1 == '0' && c2 == '0' && c3 == '2' && c4 == '0')) + { + // When encountering "_x005F_xHHHH_" + // assume that is an already escaped + // sequence that was not unescaped and + // shall be written as is, to not end + // up with "_x005F_x005F_xHHHH_" and + // repeated... + if (c1 == '0' && c2 == '0' && c3 == '5' && (c4 | 0x20) == 'f' && + i + kXescapeLen <= nLen - 6 && + pStr[i+kXescapeLen+5] == '_' && + ((pStr[i+kXescapeLen+0] | 0x20) == 'x') && + isHexDigit( pStr[i+kXescapeLen+1] ) && + isHexDigit( pStr[i+kXescapeLen+2] ) && + isHexDigit( pStr[i+kXescapeLen+3] ) && + isHexDigit( pStr[i+kXescapeLen+4] )) + { + writeBytes( &c, 1 ); + // Remember this fake escapement. + nNextXescape = i + kXescapeLen + 6; + } + else + { + writeBytes( "_x005F_", kXescapeLen); + // Remember this escapement so in + // _xHHHH_xHHHH_ only the first '_' + // is escaped. + nNextXescape = i + kXescapeLen; + } + break; + } + } + if (invalidChar(c)) + { + snprintf( bufXescape, kXescapeLen+1, "_x%04x_", + static_cast(static_cast(c))); + writeBytes( bufXescape, kXescapeLen); + break; + } + /* TODO: also U+FFFE and U+FFFF are not allowed + * in XML 1.0, assuming we're writing UTF-8 + * those should be escaped as well to be + * conformant. Likely that would involve + * scanning for both encoded sequences and + * write as _xHHHH_? */ + } +#if OSL_DEBUG_LEVEL > 0 + else + { + if (bGood && invalidChar(pStr[i])) + { + bGood = false; + // The SAL_WARN() for the single character is + // issued in writeBytes(), just gather for the + // SAL_WARN_IF() below. + } + } +#endif + writeBytes( &c, 1 ); + break; + } + } + SAL_WARN_IF( !bGood && nLen > 1, "sax", "in '" << OString(pStr,std::min(nLen,42)) << "'"); + } + + void FastSaxSerializer::endDocument() + { + assert(mbMarkStackEmpty && maMarkStack.empty()); + maCachedOutputStream.flush(); + } + + void FastSaxSerializer::writeId( ::sal_Int32 nElement ) + { + if( HAS_NAMESPACE( nElement ) ) { + auto const Namespace(mxFastTokenHandler->getUTF8Identifier(NAMESPACE(nElement))); + assert(Namespace.hasElements()); + writeBytes(Namespace); + writeBytes(sColon, N_CHARS(sColon)); + auto const Element(mxFastTokenHandler->getUTF8Identifier(TOKEN(nElement))); + assert(Element.hasElements()); + writeBytes(Element); + } else { + auto const Element(mxFastTokenHandler->getUTF8Identifier(nElement)); + assert(Element.hasElements()); + writeBytes(Element); + } + } + +#ifdef DBG_UTIL + OString FastSaxSerializer::getId( ::sal_Int32 nElement ) + { + if (HAS_NAMESPACE(nElement)) { + Sequence const ns( + mxFastTokenHandler->getUTF8Identifier(NAMESPACE(nElement))); + Sequence const name( + mxFastTokenHandler->getUTF8Identifier(TOKEN(nElement))); + return OString(reinterpret_cast(ns.getConstArray()), ns.getLength()) + + OString(sColon, N_CHARS(sColon)) + + OString(reinterpret_cast(name.getConstArray()), name.getLength()); + } else { + Sequence const name( + mxFastTokenHandler->getUTF8Identifier(nElement)); + return OString(reinterpret_cast(name.getConstArray()), name.getLength()); + } + } +#endif + + void FastSaxSerializer::startFastElement( ::sal_Int32 Element, FastAttributeList const * pAttrList ) + { + if ( !mbMarkStackEmpty ) + { + maCachedOutputStream.flush(); + maMarkStack.top()->setCurrentElement( Element ); + } + +#ifdef DBG_UTIL + if (mbMarkStackEmpty) + m_DebugStartedElements.push(Element); + else + maMarkStack.top()->m_DebugStartedElements.push_back(Element); +#endif + + writeBytes(sOpeningBracket, N_CHARS(sOpeningBracket)); + + writeId(Element); + if (pAttrList) + writeFastAttributeList(*pAttrList); + else + writeTokenValueList(); + + writeBytes(sClosingBracket, N_CHARS(sClosingBracket)); + } + + void FastSaxSerializer::endFastElement( ::sal_Int32 Element ) + { +#ifdef DBG_UTIL + // Well-formedness constraint: Element Type Match + if (mbMarkStackEmpty) + { + assert(!m_DebugStartedElements.empty()); + assert(Element == m_DebugStartedElements.top()); + m_DebugStartedElements.pop(); + } + else + { + if (dynamic_cast(maMarkStack.top().get())) + { + // Sort is always well-formed fragment + assert(!maMarkStack.top()->m_DebugStartedElements.empty()); + } + if (maMarkStack.top()->m_DebugStartedElements.empty()) + { + maMarkStack.top()->m_DebugEndedElements.push_back(Element); + } + else + { + assert(Element == maMarkStack.top()->m_DebugStartedElements.back()); + maMarkStack.top()->m_DebugStartedElements.pop_back(); + } + } +#endif + + writeBytes(sOpeningBracketAndSlash, N_CHARS(sOpeningBracketAndSlash)); + + writeId(Element); + + writeBytes(sClosingBracket, N_CHARS(sClosingBracket)); + } + + void FastSaxSerializer::singleFastElement( ::sal_Int32 Element, FastAttributeList const * pAttrList ) + { + if ( !mbMarkStackEmpty ) + { + maCachedOutputStream.flush(); + maMarkStack.top()->setCurrentElement( Element ); + } + + writeBytes(sOpeningBracket, N_CHARS(sOpeningBracket)); + + writeId(Element); + if (pAttrList) + writeFastAttributeList(*pAttrList); + else + writeTokenValueList(); + + writeBytes(sSlashAndClosingBracket, N_CHARS(sSlashAndClosingBracket)); + } + + css::uno::Reference< css::io::XOutputStream > const & FastSaxSerializer::getOutputStream() const + { + return maCachedOutputStream.getOutputStream(); + } + + void FastSaxSerializer::writeTokenValueList() + { +#ifdef DBG_UTIL + ::std::set DebugAttributes; +#endif + for (const TokenValue & rTokenValue : maTokenValues) + { + writeBytes(sSpace, N_CHARS(sSpace)); + + sal_Int32 nToken = rTokenValue.nToken; + writeId(nToken); + +#ifdef DBG_UTIL + // Well-formedness constraint: Unique Att Spec + OString const nameId(getId(nToken)); + assert(DebugAttributes.find(nameId) == DebugAttributes.end()); + DebugAttributes.insert(nameId); +#endif + + writeBytes(sEqualSignAndQuote, N_CHARS(sEqualSignAndQuote)); + + write(rTokenValue.pValue, -1, true); + + writeBytes(sQuote, N_CHARS(sQuote)); + } + maTokenValues.clear(); + } + + void FastSaxSerializer::writeFastAttributeList(FastAttributeList const & rAttrList) + { +#ifdef DBG_UTIL + ::std::set DebugAttributes; +#endif + const std::vector< sal_Int32 >& Tokens = rAttrList.getFastAttributeTokens(); + for (size_t j = 0; j < Tokens.size(); j++) + { + writeBytes(sSpace, N_CHARS(sSpace)); + + sal_Int32 nToken = Tokens[j]; + writeId(nToken); + +#ifdef DBG_UTIL + // Well-formedness constraint: Unique Att Spec + OString const nameId(getId(nToken)); + SAL_WARN_IF(DebugAttributes.find(nameId) != DebugAttributes.end(), "sax", "Duplicate attribute: " << nameId ); + assert(DebugAttributes.find(nameId) == DebugAttributes.end()); + DebugAttributes.insert(nameId); +#endif + + writeBytes(sEqualSignAndQuote, N_CHARS(sEqualSignAndQuote)); + + const char* pAttributeValue = rAttrList.getFastAttributeValue(j); + + // tdf#127274 don't escape the special VML shape type id "#_x0000_t202" + bool bEscape = !(pAttributeValue && strcmp(pAttributeValue, "#_x0000_t202") == 0); + + write(pAttributeValue, rAttrList.AttributeValueLength(j), bEscape); + + writeBytes(sQuote, N_CHARS(sQuote)); + } + } + + void FastSaxSerializer::mark(sal_Int32 const nTag, const Int32Sequence& rOrder) + { + if (rOrder.hasElements()) + { + auto pSort = std::make_shared(nTag, rOrder); + maMarkStack.push( pSort ); + maCachedOutputStream.setOutput( pSort ); + } + else + { + auto pMerge = std::make_shared(nTag); + maMarkStack.push( pMerge ); + maCachedOutputStream.setOutput( pMerge ); + } + mbMarkStackEmpty = false; + } + +#ifdef DBG_UTIL + static void lcl_DebugMergeAppend( + std::deque & rLeftEndedElements, + std::deque & rLeftStartedElements, + std::deque & rRightEndedElements, + std::deque & rRightStartedElements) + { + while (!rRightEndedElements.empty()) + { + if (rLeftStartedElements.empty()) + { + rLeftEndedElements.push_back(rRightEndedElements.front()); + } + else + { + assert(rLeftStartedElements.back() == rRightEndedElements.front()); + rLeftStartedElements.pop_back(); + } + rRightEndedElements.pop_front(); + } + while (!rRightStartedElements.empty()) + { + rLeftStartedElements.push_back(rRightStartedElements.front()); + rRightStartedElements.pop_front(); + } + } + + static void lcl_DebugMergePrepend( + std::deque & rLeftEndedElements, + std::deque & rLeftStartedElements, + std::deque & rRightEndedElements, + std::deque & rRightStartedElements) + { + while (!rLeftStartedElements.empty()) + { + if (rRightEndedElements.empty()) + { + rRightStartedElements.push_front(rLeftStartedElements.back()); + } + else + { + assert(rRightEndedElements.front() == rLeftStartedElements.back()); + rRightEndedElements.pop_front(); + } + rLeftStartedElements.pop_back(); + } + while (!rLeftEndedElements.empty()) + { + rRightEndedElements.push_front(rLeftEndedElements.back()); + rLeftEndedElements.pop_back(); + } + } +#endif + + void FastSaxSerializer::mergeTopMarks( + sal_Int32 const nTag, sax_fastparser::MergeMarks const eMergeType) + { + SAL_WARN_IF(mbMarkStackEmpty, "sax", "Empty mark stack - nothing to merge"); + assert(!mbMarkStackEmpty); // should never happen + if ( mbMarkStackEmpty ) + return; + + assert(maMarkStack.top()->m_Tag == nTag && "mark/merge tag mismatch!"); + (void) nTag; +#ifdef DBG_UTIL + if (dynamic_cast(maMarkStack.top().get())) + { + // Sort is always well-formed fragment + assert(maMarkStack.top()->m_DebugStartedElements.empty()); + assert(maMarkStack.top()->m_DebugEndedElements.empty()); + } + lcl_DebugMergeAppend( + maMarkStack.top()->m_DebugEndedElements, + maMarkStack.top()->m_DebugStartedElements, + maMarkStack.top()->m_DebugPostponedEndedElements, + maMarkStack.top()->m_DebugPostponedStartedElements); +#endif + + // flush, so that we get everything in getData() + maCachedOutputStream.flush(); + + if (maMarkStack.size() == 1) + { +#ifdef DBG_UTIL + while (!maMarkStack.top()->m_DebugEndedElements.empty()) + { + assert(maMarkStack.top()->m_DebugEndedElements.front() == m_DebugStartedElements.top()); + maMarkStack.top()->m_DebugEndedElements.pop_front(); + m_DebugStartedElements.pop(); + } + while (!maMarkStack.top()->m_DebugStartedElements.empty()) + { + m_DebugStartedElements.push(maMarkStack.top()->m_DebugStartedElements.front()); + maMarkStack.top()->m_DebugStartedElements.pop_front(); + } +#endif + Sequence aSeq( maMarkStack.top()->getData() ); + maMarkStack.pop(); + mbMarkStackEmpty = true; + maCachedOutputStream.resetOutputToStream(); + maCachedOutputStream.writeBytes( aSeq.getConstArray(), aSeq.getLength() ); + return; + } + +#ifdef DBG_UTIL + ::std::deque topDebugStartedElements(maMarkStack.top()->m_DebugStartedElements); + ::std::deque topDebugEndedElements(maMarkStack.top()->m_DebugEndedElements); +#endif + const Int8Sequence aMerge( maMarkStack.top()->getData() ); + maMarkStack.pop(); +#ifdef DBG_UTIL + switch (eMergeType) + { + case MergeMarks::APPEND: + lcl_DebugMergeAppend( + maMarkStack.top()->m_DebugEndedElements, + maMarkStack.top()->m_DebugStartedElements, + topDebugEndedElements, + topDebugStartedElements); + break; + case MergeMarks::PREPEND: + if (dynamic_cast(maMarkStack.top().get())) // argh... + { + lcl_DebugMergeAppend( + maMarkStack.top()->m_DebugEndedElements, + maMarkStack.top()->m_DebugStartedElements, + topDebugEndedElements, + topDebugStartedElements); + } + else + { + lcl_DebugMergePrepend( + topDebugEndedElements, + topDebugStartedElements, + maMarkStack.top()->m_DebugEndedElements, + maMarkStack.top()->m_DebugStartedElements); + } + break; + case MergeMarks::POSTPONE: + lcl_DebugMergeAppend( + maMarkStack.top()->m_DebugPostponedEndedElements, + maMarkStack.top()->m_DebugPostponedStartedElements, + topDebugEndedElements, + topDebugStartedElements); + break; + } +#endif + if (maMarkStack.empty()) + { + mbMarkStackEmpty = true; + maCachedOutputStream.resetOutputToStream(); + } + else + { + maCachedOutputStream.setOutput( maMarkStack.top() ); + } + + switch ( eMergeType ) + { + case MergeMarks::APPEND: maMarkStack.top()->append( aMerge ); break; + case MergeMarks::PREPEND: maMarkStack.top()->prepend( aMerge ); break; + case MergeMarks::POSTPONE: maMarkStack.top()->postpone( aMerge ); break; + } + } + + void FastSaxSerializer::writeBytes( const Sequence< sal_Int8 >& rData ) + { + maCachedOutputStream.writeBytes( rData.getConstArray(), rData.getLength() ); + } + + void FastSaxSerializer::writeBytes( const char* pStr, size_t nLen ) + { +#if OSL_DEBUG_LEVEL > 0 + { + bool bGood = true; + for (size_t i=0; i < nLen; ++i) + { + if (invalidChar(pStr[i])) + { + bGood = false; + SAL_WARN("sax", "FastSaxSerializer::writeBytes - illegal XML character 0x" << + std::hex << int(static_cast(pStr[i]))); + } + } + SAL_WARN_IF( !bGood && nLen > 1, "sax", "in '" << OString(pStr,std::min(nLen,42)) << "'"); + } +#endif + maCachedOutputStream.writeBytes( reinterpret_cast(pStr), nLen ); + } + + FastSaxSerializer::Int8Sequence& FastSaxSerializer::ForMerge::getData() + { + merge( maData, maPostponed, true ); + maPostponed.realloc( 0 ); + + return maData; + } + +#if OSL_DEBUG_LEVEL > 0 + void FastSaxSerializer::ForMerge::print( ) + { + std::cerr << "Data: "; + for ( sal_Int32 i=0, len=maData.getLength(); i < len; i++ ) + { + std::cerr << maData[i]; + } + + std::cerr << "\nPostponed: "; + for ( sal_Int32 i=0, len=maPostponed.getLength(); i < len; i++ ) + { + std::cerr << maPostponed[i]; + } + + std::cerr << "\n"; + } +#endif + + void FastSaxSerializer::ForMerge::prepend( const Int8Sequence &rWhat ) + { + merge( maData, rWhat, false ); + } + + void FastSaxSerializer::ForMerge::append( const css::uno::Sequence &rWhat ) + { + merge( maData, rWhat, true ); + } + + void FastSaxSerializer::ForMerge::postpone( const Int8Sequence &rWhat ) + { + merge( maPostponed, rWhat, true ); + } + + void FastSaxSerializer::ForMerge::merge( Int8Sequence &rTop, const Int8Sequence &rMerge, bool bAppend ) + { + sal_Int32 nMergeLen = rMerge.getLength(); + if ( nMergeLen <= 0 ) + return; + + sal_Int32 nTopLen = rTop.getLength(); + + rTop.realloc( nTopLen + nMergeLen ); + if ( bAppend ) + { + // append the rMerge to the rTop + memcpy( rTop.getArray() + nTopLen, rMerge.getConstArray(), nMergeLen ); + } + else + { + // prepend the rMerge to the rTop + memmove( rTop.getArray() + nMergeLen, rTop.getConstArray(), nTopLen ); + memcpy( rTop.getArray(), rMerge.getConstArray(), nMergeLen ); + } + } + + void FastSaxSerializer::ForMerge::resetData( ) + { + maData = Int8Sequence(); + } + + void FastSaxSerializer::ForSort::setCurrentElement( sal_Int32 nElement ) + { + vector< sal_Int32 > aOrder( comphelper::sequenceToContainer >(maOrder) ); + if( std::find( aOrder.begin(), aOrder.end(), nElement ) != aOrder.end() ) + { + mnCurrentElement = nElement; + if ( maData.find( nElement ) == maData.end() ) + maData[ nElement ] = Int8Sequence(); + } + } + + void FastSaxSerializer::ForSort::prepend( const Int8Sequence &rWhat ) + { + append( rWhat ); + } + + void FastSaxSerializer::ForSort::append( const css::uno::Sequence &rWhat ) + { + merge( maData[mnCurrentElement], rWhat, true ); + } + + void FastSaxSerializer::ForSort::sort() + { + // Clear the ForMerge data to avoid duplicate items + resetData(); + + // Sort it all + std::map< sal_Int32, Int8Sequence >::iterator iter; + for ( const auto nIndex : std::as_const(maOrder) ) + { + iter = maData.find( nIndex ); + if ( iter != maData.end() ) + ForMerge::append( iter->second ); + } + } + + FastSaxSerializer::Int8Sequence& FastSaxSerializer::ForSort::getData() + { + sort( ); + return ForMerge::getData(); + } + +#if OSL_DEBUG_LEVEL > 0 + void FastSaxSerializer::ForSort::print( ) + { + for ( const auto& [rElement, rData] : maData ) + { + std::cerr << "pair: " << rElement; + for ( sal_Int32 i=0, len=rData.getLength(); i < len; ++i ) + std::cerr << rData[i]; + std::cerr << "\n"; + } + + sort( ); + ForMerge::print(); + } +#endif + +} // namespace sax_fastparser + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sax/source/tools/fastserializer.hxx b/sax/source/tools/fastserializer.hxx new file mode 100644 index 000000000..0b7ad72d8 --- /dev/null +++ b/sax/source/tools/fastserializer.hxx @@ -0,0 +1,254 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_SAX_SOURCE_TOOLS_FASTSERIALIZER_HXX +#define INCLUDED_SAX_SOURCE_TOOLS_FASTSERIALIZER_HXX + +#include +#include + +#include +#include +#include "CachedOutputStream.hxx" + +#include +#include +#include + +namespace sax_fastparser { + +struct TokenValue +{ + sal_Int32 nToken; + const char *pValue; + TokenValue(sal_Int32 _nToken, const char *_pValue) : nToken(_nToken), pValue(_pValue) {} +}; +typedef std::vector TokenValueList; + +/// Receives notification of sax document events to write into an XOutputStream. +class FastSaxSerializer +{ + typedef css::uno::Sequence< ::sal_Int8 > Int8Sequence; + typedef css::uno::Sequence< ::sal_Int32 > Int32Sequence; + +public: + explicit FastSaxSerializer(const css::uno::Reference< css::io::XOutputStream >& xOutputStream); + ~FastSaxSerializer(); + + css::uno::Reference< css::io::XOutputStream > const & getOutputStream() const; + /// called by FSHelper to put data in for writeTokenValueList + TokenValueList& getTokenValueList() { return maTokenValues; } + + /** called by the parser when parsing of an XML stream is started. + */ + void startDocument(); + + /** called by the parser after the last XML element of a stream is processed. + */ + void endDocument(); + + /** receives notification of the beginning of an element. + + @param Element + contains the integer token from the XFastTokenHandler + registered at the XFastParser.
+ + If the element has a namespace that was registered with the + XFastParser, Element contains the integer + token of the elements local name from the XFastTokenHandler + and the integer token of the namespace combined with an arithmetic + or operation. + + @param pAttrList + Contains a FastAttributeList to access the attributes + from the element. + + */ + void startFastElement( ::sal_Int32 Element, FastAttributeList const * pAttrList = nullptr ); + + /** receives notification of the end of a known element. + @see startFastElement + */ + void endFastElement( ::sal_Int32 Element ); + + /** receives notification of the beginning of a single element. + + @param Element + contains the integer token from the XFastTokenHandler + registered at the XFastParser.
+ + If the element has a namespace that was registered with the + XFastParser, Element contains the integer + token of the elements local name from the XFastTokenHandler + and the integer token of the namespace combined with an arithmetic + or operation. + + @param pAttrList + Contains a FastAttributeList to access the attributes + from the element. + + */ + void singleFastElement( ::sal_Int32 Element, FastAttributeList const * pAttrList = nullptr ); + + // C++ helpers + void writeId( ::sal_Int32 Element ); + OString getId( ::sal_Int32 Element ); + + void write( double value ); + void write( const OUString& s, bool bEscape = false ); + void write( const OString& s, bool bEscape = false ); + void write( const char* pStr, sal_Int32 nLen, bool bEscape = false ); + +public: + /** From now on, don't write directly to the stream, but to top of a stack. + + This is to be able to change the order of the data being written. + If you need to write eg. + p, r, rPr, [something], /rPr, t, [text], /t, /r, /p, + but get it in order + p, r, t, [text], /t, rPr, [something], /rPr, /r, /p, + simply do + p, r, mark(), t, [text], /t, mark(), rPr, [something], /rPr, + mergeTopMarks( MergeMarks::PREPEND ), mergeTopMarks( MergeMarks::APPEND ), /r, /p + and you are done. + + @param nTag debugging aid to ensure mark and merge match in LIFO order + */ + void mark(sal_Int32 nTag, const Int32Sequence& rOrder); + + /** Merge 2 topmost marks. + + The possibilities: prepend the top before the second top-most + mark, append it, append it later or ignore; prepending brings the possibility + to switch parts of the output, appending later allows to write some + output in advance. + + Writes the result to the output stream if the mark stack becomes empty + by the operation. + + When the MergeMarks::POSTPONE is specified, the merge happens just + before the next merge. + + @param nTag debugging aid to ensure mark and merge match in LIFO order + + @see mark() + */ + void mergeTopMarks(sal_Int32 nTag, + sax_fastparser::MergeMarks eMergeType); + +private: + /** Helper class to cache data and write in chunks to XOutputStream or ForMerge::append. + * Its flush method needs to be called before touching maMarkStack + * to ensure correct order of ForSort methods. + */ + CachedOutputStream maCachedOutputStream; + css::uno::Reference< css::xml::sax::XFastTokenHandler > mxFastTokenHandler; + + class ForMerge : public ForMergeBase + { + Int8Sequence maData; + Int8Sequence maPostponed; + + public: + sal_Int32 const m_Tag; +#ifdef DBG_UTIL + // pending close tags, followed by pending open tags + std::deque m_DebugEndedElements; + std::deque m_DebugStartedElements; + // ... and another buffer for maPostponed ... + std::deque m_DebugPostponedEndedElements; + std::deque m_DebugPostponedStartedElements; +#endif + + explicit ForMerge(sal_Int32 const nTag) : m_Tag(nTag) {} + + virtual void setCurrentElement( ::sal_Int32 /*nToken*/ ) {} + virtual Int8Sequence& getData(); +#if OSL_DEBUG_LEVEL > 0 + virtual void print(); +#endif + + virtual void prepend( const Int8Sequence &rWhat ); + virtual void append( const css::uno::Sequence &rWhat ) override; + void postpone( const Int8Sequence &rWhat ); + + protected: + void resetData( ); + static void merge( Int8Sequence &rTop, const Int8Sequence &rMerge, bool bAppend ); + }; + + class ForSort : public ForMerge + { + std::map< ::sal_Int32, Int8Sequence > maData; + sal_Int32 mnCurrentElement; + + Int32Sequence maOrder; + + public: + ForSort(sal_Int32 const nTag, const Int32Sequence& rOrder) + : ForMerge(nTag) + , mnCurrentElement( 0 ) + , maOrder( rOrder ) + {} + + void setCurrentElement( ::sal_Int32 nToken ) override; + + virtual Int8Sequence& getData() override; + +#if OSL_DEBUG_LEVEL > 0 + virtual void print() override; +#endif + + virtual void prepend( const Int8Sequence &rWhat ) override; + virtual void append( const css::uno::Sequence &rWhat ) override; + private: + void sort(); + }; + + std::stack< std::shared_ptr< ForMerge > > maMarkStack; + bool mbMarkStackEmpty; + // Would be better to use OStringBuffer instead of these two + // but then we couldn't get the rtl_String* member :-( + rtl_String *mpDoubleStr; + sal_Int32 mnDoubleStrCapacity; + TokenValueList maTokenValues; + bool mbXescape; ///< whether to escape invalid XML characters as _xHHHH_ in write(const char*,sal_Int32,true) + /* TODO: make that configurable from the outside for + * some specific cases? */ + +#ifdef DBG_UTIL + std::stack m_DebugStartedElements; +#endif + + void writeTokenValueList(); + void writeFastAttributeList(FastAttributeList const & rAttrList); + + /** Forward the call to the output stream, or write to the stack. + + The latter in the case that we are inside a mark(). + */ + void writeBytes( const css::uno::Sequence< ::sal_Int8 >& aData ); + void writeBytes( const char* pStr, size_t nLen ); +}; + +} // namespace sax_fastparser + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sax/source/tools/fshelper.cxx b/sax/source/tools/fshelper.cxx new file mode 100644 index 000000000..41857c95e --- /dev/null +++ b/sax/source/tools/fshelper.cxx @@ -0,0 +1,158 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include +#include "fastserializer.hxx" +#include + +using namespace ::com::sun::star; +using namespace ::com::sun::star::uno; + +namespace sax_fastparser { + +FastSerializerHelper::FastSerializerHelper(const Reference< io::XOutputStream >& xOutputStream, bool bWriteHeader ) : + mpSerializer(new FastSaxSerializer(xOutputStream)) +{ + if( bWriteHeader ) + mpSerializer->startDocument(); +} + +FastSerializerHelper::~FastSerializerHelper() +{ + mpSerializer->endDocument(); + delete mpSerializer; +} + +void FastSerializerHelper::startElement(sal_Int32 elementTokenId) +{ + mpSerializer->startFastElement(elementTokenId); +} +void FastSerializerHelper::pushAttributeValue(sal_Int32 attribute, const char* value) +{ + mpSerializer->getTokenValueList().emplace_back(attribute, value); +} +void FastSerializerHelper::pushAttributeValue(sal_Int32 attribute, const OString& value) +{ + mpSerializer->getTokenValueList().emplace_back(attribute, value.getStr()); +} +void FastSerializerHelper::singleElement(sal_Int32 elementTokenId) +{ + mpSerializer->singleFastElement(elementTokenId); +} + +void FastSerializerHelper::endElement(sal_Int32 elementTokenId) +{ + mpSerializer->endFastElement(elementTokenId); +} + +void FastSerializerHelper::startElement(sal_Int32 elementTokenId, const XFastAttributeListRef& xAttrList) +{ + FastAttributeList* pAttrList = dynamic_cast< FastAttributeList* >(xAttrList.get()); + assert(pAttrList); + mpSerializer->startFastElement(elementTokenId, pAttrList); +} + +void FastSerializerHelper::singleElement(sal_Int32 elementTokenId, const XFastAttributeListRef& xAttrList) +{ + FastAttributeList* pAttrList = dynamic_cast< FastAttributeList* >(xAttrList.get()); + assert(pAttrList); + mpSerializer->singleFastElement(elementTokenId, pAttrList); +} + +FastSerializerHelper* FastSerializerHelper::write(const char* value) +{ + mpSerializer->write(value, -1); + return this; +} + +FastSerializerHelper* FastSerializerHelper::write(const OString& value) +{ + mpSerializer->write(value); + return this; +} + +FastSerializerHelper* FastSerializerHelper::write(const OUString& value) +{ + mpSerializer->write(value); + return this; +} + +FastSerializerHelper* FastSerializerHelper::write(sal_Int32 value) +{ + mpSerializer->write(OString::number(value)); + return this; +} + +FastSerializerHelper* FastSerializerHelper::write(sal_Int64 value) +{ + mpSerializer->write(OString::number(value)); + return this; +} + +FastSerializerHelper* FastSerializerHelper::write(double value) +{ + mpSerializer->write(value); + return this; +} + +FastSerializerHelper* FastSerializerHelper::writeEscaped(const char* value) +{ + mpSerializer->write(value, -1, true); + return this; +} + +FastSerializerHelper* FastSerializerHelper::writeEscaped(const OUString& value) +{ + if (!value.isEmpty()) + mpSerializer->write(value, true); + return this; +} + +FastSerializerHelper* FastSerializerHelper::writeId(sal_Int32 tokenId) +{ + mpSerializer->writeId(tokenId); + return this; +} + +css::uno::Reference< css::io::XOutputStream > const & FastSerializerHelper::getOutputStream() const +{ + return mpSerializer->getOutputStream(); +} + +void FastSerializerHelper::mark( + sal_Int32 const nTag, const Sequence& rOrder) +{ + mpSerializer->mark(nTag, rOrder); +} + +void FastSerializerHelper::mergeTopMarks( + sal_Int32 const nTag, MergeMarks const eMergeType) +{ + mpSerializer->mergeTopMarks(nTag, eMergeType); +} + +FastAttributeList * FastSerializerHelper::createAttrList() +{ + return new FastAttributeList( nullptr ); +} + + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ -- cgit v1.2.3