summaryrefslogtreecommitdiffstats
path: root/sax/source/tools
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--sax/source/tools/CachedOutputStream.hxx118
-rw-r--r--sax/source/tools/converter.cxx2535
-rw-r--r--sax/source/tools/fastattribs.cxx336
-rw-r--r--sax/source/tools/fastserializer.cxx845
-rw-r--r--sax/source/tools/fastserializer.hxx255
-rw-r--r--sax/source/tools/fshelper.cxx155
6 files changed, 4244 insertions, 0 deletions
diff --git a/sax/source/tools/CachedOutputStream.hxx b/sax/source/tools/CachedOutputStream.hxx
new file mode 100644
index 000000000..7d9e514c3
--- /dev/null
+++ b/sax/source/tools/CachedOutputStream.hxx
@@ -0,0 +1,118 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_SAX_SOURCE_TOOLS_CACHEDOUTPUTSTREAM_HXX
+#define INCLUDED_SAX_SOURCE_TOOLS_CACHEDOUTPUTSTREAM_HXX
+
+#include <sal/types.h>
+
+#include <com/sun/star/io/XOutputStream.hpp>
+#include <com/sun/star/uno/Sequence.hxx>
+
+#include <cstring>
+#include <memory>
+
+namespace sax_fastparser {
+
+class ForMergeBase
+{
+public:
+ virtual ~ForMergeBase() {}
+ virtual void append( const css::uno::Sequence<sal_Int8>& rWhat ) = 0;
+};
+
+class CachedOutputStream
+{
+ /// When buffer hits this size, it's written to mxOutputStream
+ static const sal_Int32 mnMaximumSize = 0x100000; // 1Mbyte
+
+ /// ForMerge structure is used for sorting elements in Writer
+ std::shared_ptr< ForMergeBase > mpForMerge;
+ const css::uno::Sequence<sal_Int8> mpCache;
+ /// Output stream, usually writing data into files.
+ css::uno::Reference< css::io::XOutputStream > mxOutputStream;
+ uno_Sequence *pSeq;
+ sal_Int32 mnCacheWrittenSize;
+ bool mbWriteToOutStream;
+
+public:
+ CachedOutputStream() : mpCache(mnMaximumSize)
+ , pSeq(mpCache.get())
+ , mnCacheWrittenSize(0)
+ , mbWriteToOutStream(true)
+ {}
+
+ const css::uno::Reference< css::io::XOutputStream >& getOutputStream() const
+ {
+ return mxOutputStream;
+ }
+
+ void setOutputStream( const css::uno::Reference< css::io::XOutputStream >& xOutputStream )
+ {
+ mxOutputStream = xOutputStream;
+ }
+
+ void setOutput( std::shared_ptr< ForMergeBase > pForMerge )
+ {
+ flush();
+ mbWriteToOutStream = false;
+ mpForMerge = pForMerge;
+ }
+
+ void resetOutputToStream()
+ {
+ flush();
+ mbWriteToOutStream = true;
+ mpForMerge.reset();
+ }
+
+ /// cache string and if limit is hit, flush
+ void writeBytes( const sal_Int8* pStr, sal_Int32 nLen )
+ {
+ // Write when the buffer gets big enough
+ if (mnCacheWrittenSize + nLen > mnMaximumSize)
+ {
+ flush();
+
+ // Writer does some elements sorting, so it can accumulate
+ // pretty big strings in FastSaxSerializer::ForMerge.
+ // In that case, just flush data and write immediately.
+ if (nLen > mnMaximumSize)
+ {
+ if (mbWriteToOutStream)
+ mxOutputStream->writeBytes( css::uno::Sequence<sal_Int8>(pStr, nLen) );
+ else
+ mpForMerge->append( css::uno::Sequence<sal_Int8>(pStr, nLen) );
+ return;
+ }
+ }
+
+ memcpy(pSeq->elements + mnCacheWrittenSize, pStr, nLen);
+ mnCacheWrittenSize += nLen;
+ }
+
+ /// immediately write buffer into mxOutputStream and clear
+ void flush()
+ {
+ // resize the Sequence to written size
+ pSeq->nElements = mnCacheWrittenSize;
+ if (mbWriteToOutStream)
+ mxOutputStream->writeBytes( mpCache );
+ else
+ mpForMerge->append( mpCache );
+ // and next time write to the beginning
+ mnCacheWrittenSize = 0;
+ }
+};
+
+}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sax/source/tools/converter.cxx b/sax/source/tools/converter.cxx
new file mode 100644
index 000000000..2abfe3575
--- /dev/null
+++ b/sax/source/tools/converter.cxx
@@ -0,0 +1,2535 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <sax/tools/converter.hxx>
+
+#include <com/sun/star/i18n/UnicodeType.hpp>
+#include <com/sun/star/util/DateTime.hpp>
+#include <com/sun/star/util/Date.hpp>
+#include <com/sun/star/util/Duration.hpp>
+#include <com/sun/star/util/Time.hpp>
+#include <optional>
+
+#include <rtl/ustrbuf.hxx>
+#include <rtl/math.hxx>
+#include <rtl/character.hxx>
+#include <sal/log.hxx>
+#include <o3tl/typed_flags_set.hxx>
+#include <o3tl/unit_conversion.hxx>
+#include <osl/diagnose.h>
+#include <tools/long.hxx>
+
+#include <algorithm>
+#include <string_view>
+
+using namespace com::sun::star;
+using namespace com::sun::star::uno;
+using namespace com::sun::star::util;
+using namespace ::com::sun::star::i18n;
+
+
+namespace sax {
+
+const std::string_view gpsMM = "mm";
+const std::string_view gpsCM = "cm";
+const std::string_view gpsPT = "pt";
+const std::string_view gpsINCH = "in";
+const std::string_view gpsPC = "pc";
+
+const sal_Int8 XML_MAXDIGITSCOUNT_TIME = 14;
+
+static sal_Int64 toInt64_WithLength(const sal_Unicode * str, sal_Int16 radix, sal_Int32 nStrLength )
+{
+ return rtl_ustr_toInt64_WithLength(str, radix, nStrLength);
+}
+static sal_Int64 toInt64_WithLength(const char * str, sal_Int16 radix, sal_Int32 nStrLength )
+{
+ return rtl_str_toInt64_WithLength(str, radix, nStrLength);
+}
+
+namespace
+{
+o3tl::Length Measure2O3tlUnit(sal_Int16 nUnit)
+{
+ switch (nUnit)
+ {
+ case MeasureUnit::TWIP:
+ return o3tl::Length::twip;
+ case MeasureUnit::POINT:
+ return o3tl::Length::pt;
+ case MeasureUnit::MM_10TH:
+ return o3tl::Length::mm10;
+ case MeasureUnit::MM_100TH:
+ return o3tl::Length::mm100;
+ case MeasureUnit::MM:
+ return o3tl::Length::mm;
+ case MeasureUnit::CM:
+ return o3tl::Length::cm;
+ default:
+ SAL_WARN("sax", "unit not supported for length");
+ [[fallthrough]];
+ case MeasureUnit::INCH:
+ return o3tl::Length::in;
+ }
+}
+
+std::string_view Measure2UnitString(sal_Int16 nUnit)
+{
+ switch (nUnit)
+ {
+ case MeasureUnit::TWIP:
+ return gpsPC; // ??
+ case MeasureUnit::POINT:
+ return gpsPT;
+ case MeasureUnit::MM_10TH:
+ case MeasureUnit::MM_100TH:
+ return {};
+ case MeasureUnit::MM:
+ return gpsMM;
+ case MeasureUnit::CM:
+ return gpsCM;
+ case MeasureUnit::INCH:
+ default:
+ return gpsINCH;
+ }
+}
+
+template <typename V> bool wordEndsWith(V string, std::string_view expected)
+{
+ V substr = string.substr(0, expected.size());
+ return std::equal(substr.begin(), substr.end(), expected.begin(), expected.end(),
+ [](sal_uInt32 c1, sal_uInt32 c2) { return rtl::toAsciiLowerCase(c1) == c2; })
+ && (string.size() == expected.size() || string[expected.size()] == ' ');
+}
+
+}
+
+/** convert string to measure using optional min and max values*/
+template<typename V>
+static bool lcl_convertMeasure( sal_Int32& rValue,
+ V rString,
+ sal_Int16 nTargetUnit /* = MeasureUnit::MM_100TH */,
+ sal_Int32 nMin /* = SAL_MIN_INT32 */,
+ sal_Int32 nMax /* = SAL_MAX_INT32 */ )
+{
+ bool bNeg = false;
+ double nVal = 0;
+
+ sal_Int32 nPos = 0;
+ sal_Int32 const nLen = rString.size();
+
+ // skip white space
+ while( (nPos < nLen) && (rString[nPos] <= ' ') )
+ nPos++;
+
+ if( nPos < nLen && '-' == rString[nPos] )
+ {
+ bNeg = true;
+ nPos++;
+ }
+
+ // get number
+ while( nPos < nLen &&
+ '0' <= rString[nPos] &&
+ '9' >= rString[nPos] )
+ {
+ // TODO: check overflow!
+ nVal *= 10;
+ nVal += (rString[nPos] - '0');
+ nPos++;
+ }
+ if( nPos < nLen && '.' == rString[nPos] )
+ {
+ nPos++;
+ double nDiv = 1.;
+
+ while( nPos < nLen &&
+ '0' <= rString[nPos] &&
+ '9' >= rString[nPos] )
+ {
+ // TODO: check overflow!
+ nDiv *= 10;
+ nVal += ( static_cast<double>(rString[nPos] - '0') / nDiv );
+ nPos++;
+ }
+ }
+
+ // skip white space
+ while( (nPos < nLen) && (rString[nPos] <= ' ') )
+ nPos++;
+
+ if( nPos < nLen )
+ {
+
+ if( MeasureUnit::PERCENT == nTargetUnit )
+ {
+ if( '%' != rString[nPos] )
+ return false;
+ }
+ else if( MeasureUnit::PIXEL == nTargetUnit )
+ {
+ if( nPos + 1 >= nLen ||
+ ('p' != rString[nPos] &&
+ 'P' != rString[nPos])||
+ ('x' != rString[nPos+1] &&
+ 'X' != rString[nPos+1]) )
+ return false;
+ }
+ else
+ {
+ OSL_ENSURE( MeasureUnit::TWIP == nTargetUnit || MeasureUnit::POINT == nTargetUnit ||
+ MeasureUnit::MM_100TH == nTargetUnit || MeasureUnit::MM_10TH == nTargetUnit ||
+ MeasureUnit::PIXEL == nTargetUnit, "unit is not supported");
+
+ o3tl::Length eFrom = o3tl::Length::invalid;
+
+ if( MeasureUnit::TWIP == nTargetUnit )
+ {
+ switch (rtl::toAsciiLowerCase<sal_uInt32>(rString[nPos]))
+ {
+ case u'c':
+ if (wordEndsWith(rString.substr(nPos + 1), "m"))
+ eFrom = o3tl::Length::cm;
+ break;
+ case u'i':
+ if (wordEndsWith(rString.substr(nPos + 1), "n"))
+ eFrom = o3tl::Length::in;
+ break;
+ case u'm':
+ if (wordEndsWith(rString.substr(nPos + 1), "m"))
+ eFrom = o3tl::Length::mm;
+ break;
+ case u'p':
+ if (wordEndsWith(rString.substr(nPos + 1), "t"))
+ eFrom = o3tl::Length::pt;
+ else if (wordEndsWith(rString.substr(nPos + 1), "c"))
+ eFrom = o3tl::Length::pc;
+ break;
+ }
+ }
+ else if( MeasureUnit::MM_100TH == nTargetUnit || MeasureUnit::MM_10TH == nTargetUnit )
+ {
+ switch (rtl::toAsciiLowerCase<sal_uInt32>(rString[nPos]))
+ {
+ case u'c':
+ if (wordEndsWith(rString.substr(nPos + 1), "m"))
+ eFrom = o3tl::Length::cm;
+ break;
+ case u'i':
+ if (wordEndsWith(rString.substr(nPos + 1), "n"))
+ eFrom = o3tl::Length::in;
+ break;
+ case u'm':
+ if (wordEndsWith(rString.substr(nPos + 1), "m"))
+ eFrom = o3tl::Length::mm;
+ break;
+ case u'p':
+ if (wordEndsWith(rString.substr(nPos + 1), "t"))
+ eFrom = o3tl::Length::pt;
+ else if (wordEndsWith(rString.substr(nPos + 1), "c"))
+ eFrom = o3tl::Length::pc;
+ else if (wordEndsWith(rString.substr(nPos + 1), "x"))
+ eFrom = o3tl::Length::px;
+ break;
+ }
+ }
+ else if( MeasureUnit::POINT == nTargetUnit )
+ {
+ if (wordEndsWith(rString.substr(nPos), "pt"))
+ eFrom = o3tl::Length::pt;
+ }
+
+ if (eFrom == o3tl::Length::invalid)
+ return false;
+
+ // TODO: check overflow
+ nVal = o3tl::convert(nVal, eFrom, Measure2O3tlUnit(nTargetUnit));
+ }
+ }
+
+ nVal += .5;
+ if( bNeg )
+ nVal = -nVal;
+
+ if( nVal <= static_cast<double>(nMin) )
+ rValue = nMin;
+ else if( nVal >= static_cast<double>(nMax) )
+ rValue = nMax;
+ else
+ rValue = static_cast<sal_Int32>(nVal);
+
+ return true;
+}
+
+/** convert string to measure using optional min and max values*/
+bool Converter::convertMeasure( sal_Int32& rValue,
+ std::u16string_view rString,
+ sal_Int16 nTargetUnit /* = MeasureUnit::MM_100TH */,
+ sal_Int32 nMin /* = SAL_MIN_INT32 */,
+ sal_Int32 nMax /* = SAL_MAX_INT32 */ )
+{
+ return lcl_convertMeasure(rValue, rString, nTargetUnit, nMin, nMax);
+}
+
+/** convert string to measure using optional min and max values*/
+bool Converter::convertMeasure( sal_Int32& rValue,
+ std::string_view rString,
+ sal_Int16 nTargetUnit /* = MeasureUnit::MM_100TH */,
+ sal_Int32 nMin /* = SAL_MIN_INT32 */,
+ sal_Int32 nMax /* = SAL_MAX_INT32 */ )
+{
+ return lcl_convertMeasure(rValue, rString, nTargetUnit, nMin, nMax);
+}
+
+
+/** convert measure in given unit to string with given unit */
+void Converter::convertMeasure( OUStringBuffer& rBuffer,
+ sal_Int32 nMeasure,
+ sal_Int16 nSourceUnit /* = MeasureUnit::MM_100TH */,
+ sal_Int16 nTargetUnit /* = MeasureUnit::INCH */ )
+{
+ if( nSourceUnit == MeasureUnit::PERCENT )
+ {
+ OSL_ENSURE( nTargetUnit == MeasureUnit::PERCENT,
+ "MeasureUnit::PERCENT only maps to MeasureUnit::PERCENT!" );
+
+ rBuffer.append( nMeasure );
+ rBuffer.append( '%' );
+
+ return;
+ }
+ sal_Int64 nValue(nMeasure); // extend to 64-bit first to avoid overflow
+ // the sign is processed separately
+ if (nValue < 0)
+ {
+ nValue = -nValue;
+ rBuffer.append( '-' );
+ }
+
+ o3tl::Length eFrom = o3tl::Length::in, eTo = o3tl::Length::in;
+ int nFac = 100; // used to get specific number of decimals (2 by default)
+ std::string_view psUnit;
+ switch( nSourceUnit )
+ {
+ case MeasureUnit::TWIP:
+ eFrom = o3tl::Length::twip;
+ switch( nTargetUnit )
+ {
+ case MeasureUnit::MM_100TH:
+ case MeasureUnit::MM_10TH:
+ OSL_ENSURE( MeasureUnit::INCH == nTargetUnit,"output unit not supported for twip values" );
+ [[fallthrough]];
+ case MeasureUnit::MM:
+ eTo = o3tl::Length::mm;
+ nFac = 100;
+ psUnit = gpsMM;
+ break;
+
+ case MeasureUnit::CM:
+ eTo = o3tl::Length::cm;
+ nFac = 1000;
+ psUnit = gpsCM;
+ break;
+
+ case MeasureUnit::POINT:
+ eTo = o3tl::Length::pt;
+ nFac = 100;
+ psUnit = gpsPT;
+ break;
+
+ case MeasureUnit::INCH:
+ default:
+ OSL_ENSURE( MeasureUnit::INCH == nTargetUnit,
+ "output unit not supported for twip values" );
+ nFac = 10000;
+ psUnit = gpsINCH;
+ break;
+ }
+ break;
+
+ case MeasureUnit::POINT:
+ // 1pt = 1pt (exactly)
+ OSL_ENSURE( MeasureUnit::POINT == nTargetUnit,
+ "output unit not supported for pt values" );
+ eFrom = eTo = o3tl::Length::pt;
+ nFac = 1;
+ psUnit = gpsPT;
+ break;
+ case MeasureUnit::MM_10TH:
+ case MeasureUnit::MM_100TH:
+ {
+ int nFac2 = (MeasureUnit::MM_100TH == nSourceUnit) ? 100 : 10;
+ eFrom = Measure2O3tlUnit(nSourceUnit);
+ switch( nTargetUnit )
+ {
+ case MeasureUnit::MM_100TH:
+ case MeasureUnit::MM_10TH:
+ OSL_ENSURE( MeasureUnit::INCH == nTargetUnit,
+ "output unit not supported for 1/100mm values" );
+ [[fallthrough]];
+ case MeasureUnit::MM:
+ eTo = o3tl::Length::mm;
+ nFac = nFac2;
+ psUnit = gpsMM;
+ break;
+
+ case MeasureUnit::CM:
+ eTo = o3tl::Length::cm;
+ nFac = 10*nFac2;
+ psUnit = gpsCM;
+ break;
+
+ case MeasureUnit::POINT:
+ eTo = o3tl::Length::pt;
+ nFac = nFac2;
+ psUnit = gpsPT;
+ break;
+
+ case MeasureUnit::INCH:
+ default:
+ OSL_ENSURE( MeasureUnit::INCH == nTargetUnit,
+ "output unit not supported for 1/100mm values" );
+ nFac = 100*nFac2;
+ psUnit = gpsINCH;
+ break;
+ }
+ break;
+ }
+ default:
+ OSL_ENSURE(false, "sax::Converter::convertMeasure(): "
+ "source unit not supported");
+ break;
+ }
+
+ nValue = o3tl::convert(nValue * nFac, eFrom, eTo);
+
+ rBuffer.append( static_cast<sal_Int64>(nValue / nFac) );
+ if (nFac > 1 && (nValue % nFac) != 0)
+ {
+ rBuffer.append( '.' );
+ while (nFac > 1 && (nValue % nFac) != 0)
+ {
+ nFac /= 10;
+ rBuffer.append( static_cast<sal_Int32>((nValue / nFac) % 10) );
+ }
+ }
+
+ if (psUnit.length() > 0)
+ rBuffer.appendAscii(psUnit.data(), psUnit.length());
+}
+
+/** convert string to boolean */
+bool Converter::convertBool( bool& rBool, std::u16string_view rString )
+{
+ rBool = rString == u"true";
+
+ return rBool || (rString == u"false");
+}
+
+/** convert string to boolean */
+bool Converter::convertBool( bool& rBool, std::string_view rString )
+{
+ rBool = rString == "true";
+
+ return rBool || (rString == "false");
+}
+
+/** convert boolean to string */
+void Converter::convertBool( OUStringBuffer& rBuffer, bool bValue )
+{
+ rBuffer.append( bValue );
+}
+
+/** convert string to percent */
+bool Converter::convertPercent( sal_Int32& rPercent, std::u16string_view rString )
+{
+ return convertMeasure( rPercent, rString, MeasureUnit::PERCENT );
+}
+
+/** convert string to percent */
+bool Converter::convertPercent( sal_Int32& rPercent, std::string_view rString )
+{
+ return convertMeasure( rPercent, rString, MeasureUnit::PERCENT );
+}
+
+/** convert percent to string */
+void Converter::convertPercent( OUStringBuffer& rBuffer, sal_Int32 nValue )
+{
+ rBuffer.append( nValue );
+ rBuffer.append( '%' );
+}
+
+/** convert string to pixel measure */
+bool Converter::convertMeasurePx( sal_Int32& rPixel, std::u16string_view rString )
+{
+ return convertMeasure( rPixel, rString, MeasureUnit::PIXEL );
+}
+
+/** convert string to pixel measure */
+bool Converter::convertMeasurePx( sal_Int32& rPixel, std::string_view rString )
+{
+ return convertMeasure( rPixel, rString, MeasureUnit::PIXEL );
+}
+
+/** convert pixel measure to string */
+void Converter::convertMeasurePx( OUStringBuffer& rBuffer, sal_Int32 nValue )
+{
+ rBuffer.append( nValue );
+ rBuffer.append( 'p' );
+ rBuffer.append( 'x' );
+}
+
+static int lcl_gethex( int nChar )
+{
+ if( nChar >= '0' && nChar <= '9' )
+ return nChar - '0';
+ else if( nChar >= 'a' && nChar <= 'f' )
+ return nChar - 'a' + 10;
+ else if( nChar >= 'A' && nChar <= 'F' )
+ return nChar - 'A' + 10;
+ else
+ return 0;
+}
+
+/** convert string to rgb color */
+template<typename V>
+static bool lcl_convertColor( sal_Int32& rColor, V rValue )
+{
+ if( rValue.size() != 7 || rValue[0] != '#' )
+ return false;
+
+ rColor = lcl_gethex( rValue[1] ) * 16 + lcl_gethex( rValue[2] );
+ rColor <<= 8;
+
+ rColor |= lcl_gethex( rValue[3] ) * 16 + lcl_gethex( rValue[4] );
+ rColor <<= 8;
+
+ rColor |= lcl_gethex( rValue[5] ) * 16 + lcl_gethex( rValue[6] );
+
+ return true;
+}
+
+/** convert string to rgb color */
+bool Converter::convertColor( sal_Int32& rColor, std::u16string_view rValue )
+{
+ return lcl_convertColor(rColor, rValue);
+}
+
+/** convert string to rgb color */
+bool Converter::convertColor( sal_Int32& rColor, std::string_view rValue )
+{
+ return lcl_convertColor(rColor, rValue);
+}
+
+const char aHexTab[] = "0123456789abcdef";
+
+/** convert color to string */
+void Converter::convertColor( OUStringBuffer& rBuffer, sal_Int32 nColor )
+{
+ rBuffer.append( '#' );
+
+ sal_uInt8 nCol = static_cast<sal_uInt8>(nColor >> 16);
+ rBuffer.append( sal_Unicode( aHexTab[ nCol >> 4 ] ) );
+ rBuffer.append( sal_Unicode( aHexTab[ nCol & 0xf ] ) );
+
+ nCol = static_cast<sal_uInt8>(nColor >> 8);
+ rBuffer.append( sal_Unicode( aHexTab[ nCol >> 4 ] ) );
+ rBuffer.append( sal_Unicode( aHexTab[ nCol & 0xf ] ) );
+
+ nCol = static_cast<sal_uInt8>(nColor);
+ rBuffer.append( sal_Unicode( aHexTab[ nCol >> 4 ] ) );
+ rBuffer.append( sal_Unicode( aHexTab[ nCol & 0xf ] ) );
+}
+
+/** convert string to number with optional min and max values */
+bool Converter::convertNumber( sal_Int32& rValue,
+ std::u16string_view aString,
+ sal_Int32 nMin, sal_Int32 nMax )
+{
+ rValue = 0;
+ sal_Int64 nNumber = 0;
+ bool bRet = convertNumber64(nNumber,aString,nMin,nMax);
+ if ( bRet )
+ rValue = static_cast<sal_Int32>(nNumber);
+ return bRet;
+}
+
+/** convert string to number with optional min and max values */
+bool Converter::convertNumber( sal_Int32& rValue,
+ std::string_view aString,
+ sal_Int32 nMin, sal_Int32 nMax )
+{
+ rValue = 0;
+ sal_Int64 nNumber = 0;
+ bool bRet = convertNumber64(nNumber,aString,nMin,nMax);
+ if ( bRet )
+ rValue = static_cast<sal_Int32>(nNumber);
+ return bRet;
+}
+
+/** convert string to 64-bit number with optional min and max values */
+template<typename V>
+static bool lcl_convertNumber64( sal_Int64& rValue,
+ V aString,
+ sal_Int64 nMin, sal_Int64 nMax )
+{
+ sal_Int32 nPos = 0;
+ sal_Int32 const nLen = aString.size();
+
+ // skip white space
+ while( (nPos < nLen) && (aString[nPos] <= ' ') )
+ nPos++;
+
+ sal_Int32 nNumberStartPos = nPos;
+
+ if( nPos < nLen && '-' == aString[nPos] )
+ {
+ nPos++;
+ }
+
+ // get number
+ while( nPos < nLen &&
+ '0' <= aString[nPos] &&
+ '9' >= aString[nPos] )
+ {
+ nPos++;
+ }
+
+ rValue = toInt64_WithLength(aString.data() + nNumberStartPos, 10, nPos - nNumberStartPos);
+
+ if( rValue < nMin )
+ rValue = nMin;
+ else if( rValue > nMax )
+ rValue = nMax;
+
+ return ( nPos == nLen && rValue >= nMin && rValue <= nMax );
+}
+
+/** convert string to 64-bit number with optional min and max values */
+bool Converter::convertNumber64( sal_Int64& rValue,
+ std::u16string_view aString,
+ sal_Int64 nMin, sal_Int64 nMax )
+{
+ return lcl_convertNumber64(rValue, aString, nMin, nMax);
+}
+
+/** convert string to 64-bit number with optional min and max values */
+bool Converter::convertNumber64( sal_Int64& rValue,
+ std::string_view aString,
+ sal_Int64 nMin, sal_Int64 nMax )
+{
+ return lcl_convertNumber64(rValue, aString, nMin, nMax);
+}
+
+
+/** convert double number to string (using ::rtl::math) */
+void Converter::convertDouble( OUStringBuffer& rBuffer,
+ double fNumber,
+ bool bWriteUnits,
+ sal_Int16 nSourceUnit,
+ sal_Int16 nTargetUnit)
+{
+ if(MeasureUnit::PERCENT == nSourceUnit)
+ {
+ OSL_ENSURE( nTargetUnit == MeasureUnit::PERCENT, "MeasureUnit::PERCENT only maps to MeasureUnit::PERCENT!" );
+ ::rtl::math::doubleToUStringBuffer( rBuffer, fNumber, rtl_math_StringFormat_Automatic, rtl_math_DecimalPlaces_Max, '.', true);
+ if(bWriteUnits)
+ rBuffer.append('%');
+ }
+ else
+ {
+ OUStringBuffer sUnit;
+ double fFactor = GetConversionFactor(sUnit, nSourceUnit, nTargetUnit);
+ if(fFactor != 1.0)
+ fNumber *= fFactor;
+ ::rtl::math::doubleToUStringBuffer( rBuffer, fNumber, rtl_math_StringFormat_Automatic, rtl_math_DecimalPlaces_Max, '.', true);
+ if(bWriteUnits)
+ rBuffer.append(sUnit);
+ }
+}
+
+/** convert double number to string (using ::rtl::math) */
+void Converter::convertDouble( OUStringBuffer& rBuffer, double fNumber)
+{
+ ::rtl::math::doubleToUStringBuffer( rBuffer, fNumber, rtl_math_StringFormat_Automatic, rtl_math_DecimalPlaces_Max, '.', true);
+}
+
+/** convert string to double number (using ::rtl::math) */
+bool Converter::convertDouble(double& rValue,
+ std::u16string_view rString, sal_Int16 nSourceUnit, sal_Int16 nTargetUnit)
+{
+ if (!convertDouble(rValue, rString))
+ return false;
+
+ OUStringBuffer sUnit;
+ // fdo#48969: switch source and target because factor is used to divide!
+ double const fFactor =
+ GetConversionFactor(sUnit, nTargetUnit, nSourceUnit);
+ if(fFactor != 1.0 && fFactor != 0.0)
+ rValue /= fFactor;
+ return true;
+}
+
+/** convert string to double number (using ::rtl::math) */
+bool Converter::convertDouble(double& rValue,
+ std::string_view rString, sal_Int16 nSourceUnit, sal_Int16 nTargetUnit)
+{
+ if (!convertDouble(rValue, rString))
+ return false;
+
+ OStringBuffer sUnit;
+ // fdo#48969: switch source and target because factor is used to divide!
+ double const fFactor =
+ GetConversionFactor(sUnit, nTargetUnit, nSourceUnit);
+ if(fFactor != 1.0 && fFactor != 0.0)
+ rValue /= fFactor;
+ return true;
+}
+
+/** convert string to double number (using ::rtl::math) */
+bool Converter::convertDouble(double& rValue, std::u16string_view rString)
+{
+ rtl_math_ConversionStatus eStatus;
+ rValue = rtl_math_uStringToDouble(rString.data(),
+ rString.data() + rString.size(),
+ /*cDecSeparator*/'.', /*cGroupSeparator*/',',
+ &eStatus, nullptr);
+ return ( eStatus == rtl_math_ConversionStatus_Ok );
+}
+
+/** convert string to double number (using ::rtl::math) */
+bool Converter::convertDouble(double& rValue, std::string_view rString)
+{
+ rtl_math_ConversionStatus eStatus;
+ rValue = rtl_math_stringToDouble(rString.data(),
+ rString.data() + rString.size(),
+ /*cDecSeparator*/'.', /*cGroupSeparator*/',',
+ &eStatus, nullptr);
+ return ( eStatus == rtl_math_ConversionStatus_Ok );
+}
+
+/** convert number, 10th of degrees with range [0..3600] to SVG angle */
+void Converter::convertAngle(OUStringBuffer& rBuffer, sal_Int16 const nAngle,
+ SvtSaveOptions::ODFSaneDefaultVersion const nVersion)
+{
+ if (nVersion < SvtSaveOptions::ODFSVER_012 || nVersion == SvtSaveOptions::ODFSVER_012_EXT_COMPAT)
+ {
+ // wrong, but backward compatible with OOo/LO < 4.4
+ rBuffer.append(static_cast<sal_Int32>(nAngle));
+ }
+ else
+ { // OFFICE-3774 tdf#89475 write valid ODF 1.2 angle; needs LO 4.4 to import
+ double fAngle(double(nAngle) / 10.0);
+ ::sax::Converter::convertDouble(rBuffer, fAngle);
+ rBuffer.append("deg");
+ }
+}
+
+/** convert SVG angle to number, 10th of degrees with range [0..3600] */
+bool Converter::convertAngle(sal_Int16& rAngle, std::u16string_view rString,
+ bool const isWrongOOo10thDegAngle)
+{
+ // ODF 1.1 leaves it undefined what the number means, but ODF 1.2 says it's
+ // degrees, while OOo has historically used 10th of degrees :(
+ // So import degrees when we see the "deg" suffix but continue with 10th of
+ // degrees for now for the sake of existing OOo/LO documents, until the
+ // new versions that can read "deg" suffix are widely deployed and we can
+ // start to write the "deg" suffix.
+ sal_Int32 nValue(0);
+ double fValue(0.0);
+ bool bRet = ::sax::Converter::convertDouble(fValue, rString);
+ if (std::u16string_view::npos != rString.find(u"deg"))
+ {
+ nValue = fValue * 10.0;
+ }
+ else if (std::u16string_view::npos != rString.find(u"grad"))
+ {
+ nValue = (fValue * 9.0 / 10.0) * 10.0;
+ }
+ else if (std::u16string_view::npos != rString.find(u"rad"))
+ {
+ nValue = basegfx::rad2deg<10>(fValue);
+ }
+ else // no explicit unit
+ {
+ if (isWrongOOo10thDegAngle)
+ {
+ nValue = fValue; // wrong, but backward compatible with OOo/LO < 7.0
+ }
+ else
+ {
+ nValue = fValue * 10.0; // ODF 1.2
+ }
+ }
+ // limit to valid range [0..3600]
+ nValue = nValue % 3600;
+ if (nValue < 0)
+ {
+ nValue += 3600;
+ }
+ assert(0 <= nValue && nValue <= 3600);
+ if (bRet)
+ {
+ rAngle = sal::static_int_cast<sal_Int16>(nValue);
+ }
+ return bRet;
+}
+
+/** convert SVG angle to number, 10th of degrees with range [0..3600] */
+bool Converter::convertAngle(sal_Int16& rAngle, std::string_view rString,
+ bool const isWrongOOo10thDegAngle)
+{
+ // ODF 1.1 leaves it undefined what the number means, but ODF 1.2 says it's
+ // degrees, while OOo has historically used 10th of degrees :(
+ // So import degrees when we see the "deg" suffix but continue with 10th of
+ // degrees for now for the sake of existing OOo/LO documents, until the
+ // new versions that can read "deg" suffix are widely deployed and we can
+ // start to write the "deg" suffix.
+ sal_Int32 nValue(0);
+ double fValue(0.0);
+ bool bRet = ::sax::Converter::convertDouble(fValue, rString);
+ if (std::string_view::npos != rString.find("deg"))
+ {
+ nValue = fValue * 10.0;
+ }
+ else if (std::string_view::npos != rString.find("grad"))
+ {
+ nValue = (fValue * 9.0 / 10.0) * 10.0;
+ }
+ else if (std::string_view::npos != rString.find("rad"))
+ {
+ nValue = basegfx::rad2deg<10>(fValue);
+ }
+ else // no explicit unit
+ {
+ if (isWrongOOo10thDegAngle)
+ {
+ nValue = fValue; // wrong, but backward compatible with OOo/LO < 7.0
+ }
+ else
+ {
+ nValue = fValue * 10.0; // ODF 1.2
+ }
+ }
+ // limit to valid range [0..3600]
+ nValue = nValue % 3600;
+ if (nValue < 0)
+ {
+ nValue += 3600;
+ }
+ assert(0 <= nValue && nValue <= 3600);
+ if (bRet)
+ {
+ rAngle = sal::static_int_cast<sal_Int16>(nValue);
+ }
+ return bRet;
+}
+
+/** convert double to ISO "duration" string; negative durations allowed */
+void Converter::convertDuration(OUStringBuffer& rBuffer,
+ const double fTime)
+{
+ double fValue = fTime;
+
+ // take care of negative durations as specified in:
+ // XML Schema, W3C Working Draft 07 April 2000, section 3.2.6.1
+ if (fValue < 0.0)
+ {
+ rBuffer.append('-');
+ fValue = - fValue;
+ }
+
+ rBuffer.append( "PT" );
+ fValue *= 24;
+ double fHoursValue = ::rtl::math::approxFloor (fValue);
+ fValue -= fHoursValue;
+ fValue *= 60;
+ double fMinsValue = ::rtl::math::approxFloor (fValue);
+ fValue -= fMinsValue;
+ fValue *= 60;
+ double fSecsValue = ::rtl::math::approxFloor (fValue);
+ fValue -= fSecsValue;
+ double fNanoSecsValue;
+ if (fValue > 0.00000000001)
+ fNanoSecsValue = ::rtl::math::round( fValue, XML_MAXDIGITSCOUNT_TIME - 5);
+ else
+ fNanoSecsValue = 0.0;
+
+ if (fNanoSecsValue == 1.0)
+ {
+ fNanoSecsValue = 0.0;
+ fSecsValue += 1.0;
+ }
+ if (fSecsValue >= 60.0)
+ {
+ fSecsValue -= 60.0;
+ fMinsValue += 1.0;
+ }
+ if (fMinsValue >= 60.0)
+ {
+ fMinsValue -= 60.0;
+ fHoursValue += 1.0;
+ }
+
+ if (fHoursValue < 10)
+ rBuffer.append( '0');
+ rBuffer.append( sal_Int32( fHoursValue));
+ rBuffer.append( 'H');
+ if (fMinsValue < 10)
+ rBuffer.append( '0');
+ rBuffer.append( sal_Int32( fMinsValue));
+ rBuffer.append( 'M');
+ if (fSecsValue < 10)
+ rBuffer.append( '0');
+ rBuffer.append( sal_Int32( fSecsValue));
+ if (fNanoSecsValue > 0.0)
+ {
+ OUString aNS( ::rtl::math::doubleToUString( fValue,
+ rtl_math_StringFormat_F, XML_MAXDIGITSCOUNT_TIME - 5, '.',
+ true));
+ if ( aNS.getLength() > 2 )
+ {
+ rBuffer.append( '.');
+ rBuffer.append( aNS.subView(2) ); // strip "0."
+ }
+ }
+ rBuffer.append( 'S');
+}
+
+static std::u16string_view trim(std::u16string_view in) {
+ auto left = in.begin();
+ for (;; ++left) {
+ if (left == in.end())
+ return std::u16string_view();
+ if (!isspace(*left))
+ break;
+ }
+ auto right = in.end() - 1;
+ for (; right > left && isspace(*right); --right);
+ return std::u16string_view(&*left, std::distance(left, right) + 1);
+}
+
+static std::string_view trim(std::string_view in) {
+ auto left = in.begin();
+ for (;; ++left) {
+ if (left == in.end())
+ return std::string_view();
+ if (!isspace(*left))
+ break;
+ }
+ auto right = in.end() - 1;
+ for (; right > left && isspace(*right); --right);
+ return std::string_view(&*left, std::distance(left, right) + 1);
+}
+
+/** helper function of Converter::convertDuration */
+template<typename V>
+static bool convertDurationHelper(double& rfTime, V pStr)
+{
+ // negative time duration?
+ bool bIsNegativeDuration = false;
+ if ( '-' == (*pStr) )
+ {
+ bIsNegativeDuration = true;
+ pStr++;
+ }
+
+ if ( *pStr != 'P' && *pStr != 'p' ) // duration must start with "P"
+ return false;
+ pStr++;
+
+ OUStringBuffer sDoubleStr;
+ bool bSuccess = true;
+ bool bDone = false;
+ bool bTimePart = false;
+ bool bIsFraction = false;
+ sal_Int32 nDays = 0;
+ sal_Int32 nHours = 0;
+ sal_Int32 nMins = 0;
+ sal_Int32 nSecs = 0;
+ sal_Int32 nTemp = 0;
+
+ while ( bSuccess && !bDone )
+ {
+ sal_Unicode c = *(pStr++);
+ if ( !c ) // end
+ bDone = true;
+ else if ( '0' <= c && '9' >= c )
+ {
+ if ( nTemp >= SAL_MAX_INT32 / 10 )
+ bSuccess = false;
+ else
+ {
+ if ( !bIsFraction )
+ {
+ nTemp *= 10;
+ nTemp += (c - u'0');
+ }
+ else
+ {
+ sDoubleStr.append(c);
+ }
+ }
+ }
+ else if ( bTimePart )
+ {
+ if ( c == 'H' || c == 'h' )
+ {
+ nHours = nTemp;
+ nTemp = 0;
+ }
+ else if ( c == 'M' || c == 'm')
+ {
+ nMins = nTemp;
+ nTemp = 0;
+ }
+ else if ( (c == ',') || (c == '.') )
+ {
+ nSecs = nTemp;
+ nTemp = 0;
+ bIsFraction = true;
+ sDoubleStr = "0.";
+ }
+ else if ( c == 'S' || c == 's' )
+ {
+ if ( !bIsFraction )
+ {
+ nSecs = nTemp;
+ nTemp = 0;
+ sDoubleStr = "0.0";
+ }
+ }
+ else
+ bSuccess = false; // invalid character
+ }
+ else
+ {
+ if ( c == 'T' || c == 't' ) // "T" starts time part
+ bTimePart = true;
+ else if ( c == 'D' || c == 'd')
+ {
+ nDays = nTemp;
+ nTemp = 0;
+ }
+ else if ( c == 'Y' || c == 'y' || c == 'M' || c == 'm' )
+ {
+ //! how many days is a year or month?
+
+ OSL_FAIL( "years or months in duration: not implemented");
+ bSuccess = false;
+ }
+ else
+ bSuccess = false; // invalid character
+ }
+ }
+
+ if ( bSuccess )
+ {
+ if ( nDays )
+ nHours += nDays * 24; // add the days to the hours part
+ double fHour = nHours;
+ double fMin = nMins;
+ double fSec = nSecs;
+ double fFraction = sDoubleStr.makeStringAndClear().toDouble();
+ double fTempTime = fHour / 24;
+ fTempTime += fMin / (24 * 60);
+ fTempTime += fSec / (24 * 60 * 60);
+ fTempTime += fFraction / (24 * 60 * 60);
+
+ // negative duration?
+ if ( bIsNegativeDuration )
+ {
+ fTempTime = -fTempTime;
+ }
+
+ rfTime = fTempTime;
+ }
+ return bSuccess;
+}
+
+/** convert ISO "duration" string to double; negative durations allowed */
+bool Converter::convertDuration(double& rfTime,
+ std::string_view rString)
+{
+ std::string_view aTrimmed = trim(rString);
+ const char* pStr = aTrimmed.data();
+
+ return convertDurationHelper(rfTime, pStr);
+}
+
+/** convert util::Duration to ISO8601 "duration" string */
+void Converter::convertDuration(OUStringBuffer& rBuffer,
+ const ::util::Duration& rDuration)
+{
+ if (rDuration.Negative)
+ {
+ rBuffer.append('-');
+ }
+ rBuffer.append('P');
+ const bool bHaveDate(rDuration.Years != 0 ||
+ rDuration.Months != 0 ||
+ rDuration.Days != 0);
+ if (rDuration.Years)
+ {
+ rBuffer.append(static_cast<sal_Int32>(rDuration.Years));
+ rBuffer.append('Y');
+ }
+ if (rDuration.Months)
+ {
+ rBuffer.append(static_cast<sal_Int32>(rDuration.Months));
+ rBuffer.append('M');
+ }
+ if (rDuration.Days)
+ {
+ rBuffer.append(static_cast<sal_Int32>(rDuration.Days));
+ rBuffer.append('D');
+ }
+ if ( rDuration.Hours != 0
+ || rDuration.Minutes != 0
+ || rDuration.Seconds != 0
+ || rDuration.NanoSeconds != 0 )
+ {
+ rBuffer.append('T'); // time separator
+ if (rDuration.Hours)
+ {
+ rBuffer.append(static_cast<sal_Int32>(rDuration.Hours));
+ rBuffer.append('H');
+ }
+ if (rDuration.Minutes)
+ {
+ rBuffer.append(static_cast<sal_Int32>(rDuration.Minutes));
+ rBuffer.append('M');
+ }
+ if (rDuration.Seconds != 0 || rDuration.NanoSeconds != 0)
+ {
+ // seconds must not be omitted (i.e. ".42S" is not valid)
+ rBuffer.append(static_cast<sal_Int32>(rDuration.Seconds));
+ if (rDuration.NanoSeconds)
+ {
+ OSL_ENSURE(rDuration.NanoSeconds < 1000000000,"NanoSeconds cannot be more than 999 999 999");
+ rBuffer.append('.');
+ std::ostringstream ostr;
+ ostr.fill('0');
+ ostr.width(9);
+ ostr << rDuration.NanoSeconds;
+ rBuffer.append(OUString::createFromAscii(ostr.str().c_str()));
+ }
+ rBuffer.append('S');
+ }
+ }
+ else if (!bHaveDate)
+ {
+ // zero duration: XMLSchema-2 says there must be at least one component
+ rBuffer.append('0');
+ rBuffer.append('D');
+ }
+}
+
+namespace {
+
+enum Result { R_NOTHING, R_OVERFLOW, R_SUCCESS };
+
+}
+
+template <typename V>
+static Result
+readUnsignedNumber(V rString,
+ size_t & io_rnPos, sal_Int32 & o_rNumber)
+{
+ size_t nPos(io_rnPos);
+
+ while (nPos < rString.size())
+ {
+ const typename V::value_type c = rString[nPos];
+ if (('0' > c) || (c > '9'))
+ break;
+ ++nPos;
+ }
+
+ if (io_rnPos == nPos) // read something?
+ {
+ o_rNumber = -1;
+ return R_NOTHING;
+ }
+
+ const sal_Int64 nTemp = toInt64_WithLength(rString.data() + io_rnPos, 10, nPos - io_rnPos);
+
+ const bool bOverflow = (nTemp >= SAL_MAX_INT32);
+
+ io_rnPos = nPos;
+ o_rNumber = nTemp;
+ return bOverflow ? R_OVERFLOW : R_SUCCESS;
+}
+
+template<typename V>
+static Result
+readUnsignedNumberMaxDigits(int maxDigits,
+ V rString, size_t & io_rnPos,
+ sal_Int32 & o_rNumber)
+{
+ bool bOverflow(false);
+ sal_Int64 nTemp(0);
+ size_t nPos(io_rnPos);
+ OSL_ENSURE(maxDigits >= 0, "negative amount of digits makes no sense");
+
+ while (nPos < rString.size())
+ {
+ const sal_Unicode c = rString[nPos];
+ if (('0' <= c) && (c <= '9'))
+ {
+ if (maxDigits > 0)
+ {
+ nTemp *= 10;
+ nTemp += (c - u'0');
+ if (nTemp >= SAL_MAX_INT32)
+ {
+ bOverflow = true;
+ }
+ --maxDigits;
+ }
+ }
+ else
+ {
+ break;
+ }
+ ++nPos;
+ }
+
+ if (io_rnPos == nPos) // read something?
+ {
+ o_rNumber = -1;
+ return R_NOTHING;
+ }
+
+ io_rnPos = nPos;
+ o_rNumber = nTemp;
+ return bOverflow ? R_OVERFLOW : R_SUCCESS;
+}
+
+template<typename V>
+static bool
+readDurationT(V rString, size_t & io_rnPos)
+{
+ if ((io_rnPos < rString.size()) &&
+ (rString[io_rnPos] == 'T' || rString[io_rnPos] == 't'))
+ {
+ ++io_rnPos;
+ return true;
+ }
+ return false;
+}
+
+template<typename V>
+static bool
+readDurationComponent(V rString,
+ size_t & io_rnPos, sal_Int32 & io_rnTemp, bool & io_rbTimePart,
+ sal_Int32 & o_rnTarget, const sal_Unicode cLower, const sal_Unicode cUpper)
+{
+ if (io_rnPos < rString.size())
+ {
+ if (cLower == rString[io_rnPos] || cUpper == rString[io_rnPos])
+ {
+ ++io_rnPos;
+ if (-1 != io_rnTemp)
+ {
+ o_rnTarget = io_rnTemp;
+ io_rnTemp = -1;
+ if (!io_rbTimePart)
+ {
+ io_rbTimePart = readDurationT(rString, io_rnPos);
+ }
+ return (R_OVERFLOW !=
+ readUnsignedNumber(rString, io_rnPos, io_rnTemp));
+ }
+ else
+ {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+/** convert ISO8601 "duration" string to util::Duration */
+bool Converter::convertDuration(util::Duration& rDuration,
+ std::u16string_view rString)
+{
+ std::u16string_view string = trim(rString);
+ size_t nPos(0);
+
+ bool bIsNegativeDuration(false);
+ if (!string.empty() && ('-' == string[0]))
+ {
+ bIsNegativeDuration = true;
+ ++nPos;
+ }
+
+ if (nPos < string.size()
+ && string[nPos] != 'P' && string[nPos] != 'p') // duration must start with "P"
+ {
+ return false;
+ }
+
+ ++nPos;
+
+ /// last read number; -1 == no valid number! always reset after using!
+ sal_Int32 nTemp(-1);
+ bool bTimePart(false); // have we read 'T'?
+ bool bSuccess(false);
+ sal_Int32 nYears(0);
+ sal_Int32 nMonths(0);
+ sal_Int32 nDays(0);
+ sal_Int32 nHours(0);
+ sal_Int32 nMinutes(0);
+ sal_Int32 nSeconds(0);
+ sal_Int32 nNanoSeconds(0);
+
+ bTimePart = readDurationT(string, nPos);
+ bSuccess = (R_SUCCESS == readUnsignedNumber(string, nPos, nTemp));
+
+ if (!bTimePart && bSuccess)
+ {
+ bSuccess = readDurationComponent(string, nPos, nTemp, bTimePart,
+ nYears, 'y', 'Y');
+ }
+
+ if (!bTimePart && bSuccess)
+ {
+ bSuccess = readDurationComponent(string, nPos, nTemp, bTimePart,
+ nMonths, 'm', 'M');
+ }
+
+ if (!bTimePart && bSuccess)
+ {
+ bSuccess = readDurationComponent(string, nPos, nTemp, bTimePart,
+ nDays, 'd', 'D');
+ }
+
+ if (bTimePart)
+ {
+ if (-1 == nTemp) // a 'T' must be followed by a component
+ {
+ bSuccess = false;
+ }
+
+ if (bSuccess)
+ {
+ bSuccess = readDurationComponent(string, nPos, nTemp, bTimePart,
+ nHours, 'h', 'H');
+ }
+
+ if (bSuccess)
+ {
+ bSuccess = readDurationComponent(string, nPos, nTemp, bTimePart,
+ nMinutes, 'm', 'M');
+ }
+
+ // eeek! seconds are icky.
+ if ((nPos < string.size()) && bSuccess)
+ {
+ if (string[nPos] == '.' ||
+ string[nPos] == ',')
+ {
+ ++nPos;
+ if (-1 != nTemp)
+ {
+ nSeconds = nTemp;
+ nTemp = -1;
+ const sal_Int32 nStart(nPos);
+ bSuccess = readUnsignedNumberMaxDigits(9, string, nPos, nTemp) == R_SUCCESS;
+ if ((nPos < string.size()) && bSuccess)
+ {
+ if (-1 != nTemp)
+ {
+ nNanoSeconds = nTemp;
+ sal_Int32 nDigits = nPos - nStart;
+ assert(nDigits >= 0);
+ for (; nDigits < 9; ++nDigits)
+ {
+ nNanoSeconds *= 10;
+ }
+ nTemp=-1;
+ if ('S' == string[nPos] || 's' == string[nPos])
+ {
+ ++nPos;
+ }
+ else
+ {
+ bSuccess = false;
+ }
+ }
+ else
+ {
+ bSuccess = false;
+ }
+ }
+ }
+ else
+ {
+ bSuccess = false;
+ }
+ }
+ else if ('S' == string[nPos] || 's' == string[nPos])
+ {
+ ++nPos;
+ if (-1 != nTemp)
+ {
+ nSeconds = nTemp;
+ nTemp = -1;
+ }
+ else
+ {
+ bSuccess = false;
+ }
+ }
+ }
+ }
+
+ if (nPos != string.size()) // string not processed completely?
+ {
+ bSuccess = false;
+ }
+
+ if (nTemp != -1) // unprocessed number?
+ {
+ bSuccess = false;
+ }
+
+ if (bSuccess)
+ {
+ rDuration.Negative = bIsNegativeDuration;
+ rDuration.Years = static_cast<sal_Int16>(nYears);
+ rDuration.Months = static_cast<sal_Int16>(nMonths);
+ rDuration.Days = static_cast<sal_Int16>(nDays);
+ rDuration.Hours = static_cast<sal_Int16>(nHours);
+ rDuration.Minutes = static_cast<sal_Int16>(nMinutes);
+ rDuration.Seconds = static_cast<sal_Int16>(nSeconds);
+ rDuration.NanoSeconds = nNanoSeconds;
+ }
+
+ return bSuccess;
+}
+
+/** convert ISO8601 "duration" string to util::Duration */
+bool Converter::convertDuration(util::Duration& rDuration,
+ std::string_view rString)
+{
+ std::string_view string = trim(rString);
+ size_t nPos(0);
+
+ bool bIsNegativeDuration(false);
+ if (!string.empty() && ('-' == string[0]))
+ {
+ bIsNegativeDuration = true;
+ ++nPos;
+ }
+
+ if (nPos < string.size()
+ && string[nPos] != 'P' && string[nPos] != 'p') // duration must start with "P"
+ {
+ return false;
+ }
+
+ ++nPos;
+
+ /// last read number; -1 == no valid number! always reset after using!
+ sal_Int32 nTemp(-1);
+ bool bTimePart(false); // have we read 'T'?
+ bool bSuccess(false);
+ sal_Int32 nYears(0);
+ sal_Int32 nMonths(0);
+ sal_Int32 nDays(0);
+ sal_Int32 nHours(0);
+ sal_Int32 nMinutes(0);
+ sal_Int32 nSeconds(0);
+ sal_Int32 nNanoSeconds(0);
+
+ bTimePart = readDurationT(string, nPos);
+ bSuccess = (R_SUCCESS == readUnsignedNumber(string, nPos, nTemp));
+
+ if (!bTimePart && bSuccess)
+ {
+ bSuccess = readDurationComponent(string, nPos, nTemp, bTimePart,
+ nYears, 'y', 'Y');
+ }
+
+ if (!bTimePart && bSuccess)
+ {
+ bSuccess = readDurationComponent(string, nPos, nTemp, bTimePart,
+ nMonths, 'm', 'M');
+ }
+
+ if (!bTimePart && bSuccess)
+ {
+ bSuccess = readDurationComponent(string, nPos, nTemp, bTimePart,
+ nDays, 'd', 'D');
+ }
+
+ if (bTimePart)
+ {
+ if (-1 == nTemp) // a 'T' must be followed by a component
+ {
+ bSuccess = false;
+ }
+
+ if (bSuccess)
+ {
+ bSuccess = readDurationComponent(string, nPos, nTemp, bTimePart,
+ nHours, 'h', 'H');
+ }
+
+ if (bSuccess)
+ {
+ bSuccess = readDurationComponent(string, nPos, nTemp, bTimePart,
+ nMinutes, 'm', 'M');
+ }
+
+ // eeek! seconds are icky.
+ if ((nPos < string.size()) && bSuccess)
+ {
+ if (string[nPos] == '.' ||
+ string[nPos] == ',')
+ {
+ ++nPos;
+ if (-1 != nTemp)
+ {
+ nSeconds = nTemp;
+ nTemp = -1;
+ const sal_Int32 nStart(nPos);
+ bSuccess = readUnsignedNumberMaxDigits(9, string, nPos, nTemp) == R_SUCCESS;
+ if ((nPos < string.size()) && bSuccess)
+ {
+ if (-1 != nTemp)
+ {
+ nNanoSeconds = nTemp;
+ sal_Int32 nDigits = nPos - nStart;
+ assert(nDigits >= 0);
+ for (; nDigits < 9; ++nDigits)
+ {
+ nNanoSeconds *= 10;
+ }
+ nTemp=-1;
+ if ('S' == string[nPos] || 's' == string[nPos])
+ {
+ ++nPos;
+ }
+ else
+ {
+ bSuccess = false;
+ }
+ }
+ else
+ {
+ bSuccess = false;
+ }
+ }
+ }
+ else
+ {
+ bSuccess = false;
+ }
+ }
+ else if ('S' == string[nPos] || 's' == string[nPos])
+ {
+ ++nPos;
+ if (-1 != nTemp)
+ {
+ nSeconds = nTemp;
+ nTemp = -1;
+ }
+ else
+ {
+ bSuccess = false;
+ }
+ }
+ }
+ }
+
+ if (nPos != string.size()) // string not processed completely?
+ {
+ bSuccess = false;
+ }
+
+ if (nTemp != -1) // unprocessed number?
+ {
+ bSuccess = false;
+ }
+
+ if (bSuccess)
+ {
+ rDuration.Negative = bIsNegativeDuration;
+ rDuration.Years = static_cast<sal_Int16>(nYears);
+ rDuration.Months = static_cast<sal_Int16>(nMonths);
+ rDuration.Days = static_cast<sal_Int16>(nDays);
+ rDuration.Hours = static_cast<sal_Int16>(nHours);
+ rDuration.Minutes = static_cast<sal_Int16>(nMinutes);
+ rDuration.Seconds = static_cast<sal_Int16>(nSeconds);
+ rDuration.NanoSeconds = nNanoSeconds;
+ }
+
+ return bSuccess;
+}
+
+static void
+lcl_AppendTimezone(OUStringBuffer & i_rBuffer, int const nOffset)
+{
+ if (0 == nOffset)
+ {
+ i_rBuffer.append('Z');
+ }
+ else
+ {
+ if (0 < nOffset)
+ {
+ i_rBuffer.append('+');
+ }
+ else
+ {
+ i_rBuffer.append('-');
+ }
+ const sal_Int32 nHours (abs(nOffset) / 60);
+ const sal_Int32 nMinutes(abs(nOffset) % 60);
+ SAL_WARN_IF(nHours > 14 || (nHours == 14 && nMinutes > 0),
+ "sax", "convertDateTime: timezone overflow");
+ if (nHours < 10)
+ {
+ i_rBuffer.append('0');
+ }
+ i_rBuffer.append(nHours);
+ i_rBuffer.append(':');
+ if (nMinutes < 10)
+ {
+ i_rBuffer.append('0');
+ }
+ i_rBuffer.append(nMinutes);
+ }
+}
+
+/** convert util::Date to ISO "date" string */
+void Converter::convertDate(
+ OUStringBuffer& i_rBuffer,
+ const util::Date& i_rDate,
+ sal_Int16 const*const pTimeZoneOffset)
+{
+ const util::DateTime dt(0, 0, 0, 0,
+ i_rDate.Day, i_rDate.Month, i_rDate.Year, false);
+ convertDateTime(i_rBuffer, dt, pTimeZoneOffset);
+}
+
+static void convertTime(
+ OUStringBuffer& i_rBuffer,
+ const css::util::DateTime& i_rDateTime)
+{
+ if (i_rDateTime.Hours < 10) {
+ i_rBuffer.append('0');
+ }
+ i_rBuffer.append( static_cast<sal_Int32>(i_rDateTime.Hours) )
+ .append(':');
+ if (i_rDateTime.Minutes < 10) {
+ i_rBuffer.append('0');
+ }
+ i_rBuffer.append( static_cast<sal_Int32>(i_rDateTime.Minutes) )
+ .append(':');
+ if (i_rDateTime.Seconds < 10) {
+ i_rBuffer.append('0');
+ }
+ i_rBuffer.append( static_cast<sal_Int32>(i_rDateTime.Seconds) );
+ if (i_rDateTime.NanoSeconds > 0) {
+ OSL_ENSURE(i_rDateTime.NanoSeconds < 1000000000,"NanoSeconds cannot be more than 999 999 999");
+ i_rBuffer.append('.');
+ std::ostringstream ostr;
+ ostr.fill('0');
+ ostr.width(9);
+ ostr << i_rDateTime.NanoSeconds;
+ i_rBuffer.append(OUString::createFromAscii(ostr.str().c_str()));
+ }
+}
+
+static void convertTimeZone(
+ OUStringBuffer& i_rBuffer,
+ const css::util::DateTime& i_rDateTime,
+ sal_Int16 const* pTimeZoneOffset)
+{
+ if (pTimeZoneOffset)
+ {
+ lcl_AppendTimezone(i_rBuffer, *pTimeZoneOffset);
+ }
+ else if (i_rDateTime.IsUTC)
+ {
+ lcl_AppendTimezone(i_rBuffer, 0);
+ }
+}
+
+/** convert util::DateTime to ISO "time" or "dateTime" string */
+void Converter::convertTimeOrDateTime(
+ OUStringBuffer& i_rBuffer,
+ const css::util::DateTime& i_rDateTime)
+{
+ if (i_rDateTime.Year == 0 ||
+ i_rDateTime.Month < 1 || i_rDateTime.Month > 12 ||
+ i_rDateTime.Day < 1 || i_rDateTime.Day > 31)
+ {
+ convertTime(i_rBuffer, i_rDateTime);
+ convertTimeZone(i_rBuffer, i_rDateTime, nullptr);
+ }
+ else
+ {
+ convertDateTime(i_rBuffer, i_rDateTime, nullptr, true);
+ }
+}
+
+/** convert util::DateTime to ISO "date" or "dateTime" string */
+void Converter::convertDateTime(
+ OUStringBuffer& i_rBuffer,
+ const css::util::DateTime& i_rDateTime,
+ sal_Int16 const*const pTimeZoneOffset,
+ bool i_bAddTimeIf0AM )
+{
+ const sal_Unicode dash('-');
+ const sal_Unicode zero('0');
+
+ sal_Int32 const nYear(abs(i_rDateTime.Year));
+ if (i_rDateTime.Year < 0) {
+ i_rBuffer.append(dash); // negative
+ }
+ if (nYear < 1000) {
+ i_rBuffer.append(zero);
+ }
+ if (nYear < 100) {
+ i_rBuffer.append(zero);
+ }
+ if (nYear < 10) {
+ i_rBuffer.append(zero);
+ }
+ i_rBuffer.append( OUString::number(nYear) + OUStringChar(dash) );
+ if( i_rDateTime.Month < 10 ) {
+ i_rBuffer.append(zero);
+ }
+ i_rBuffer.append( OUString::number(i_rDateTime.Month) + OUStringChar(dash) );
+ if( i_rDateTime.Day < 10 ) {
+ i_rBuffer.append(zero);
+ }
+ i_rBuffer.append( static_cast<sal_Int32>(i_rDateTime.Day) );
+
+ if( i_rDateTime.Seconds != 0 ||
+ i_rDateTime.Minutes != 0 ||
+ i_rDateTime.Hours != 0 ||
+ i_bAddTimeIf0AM )
+ {
+ i_rBuffer.append('T');
+ convertTime(i_rBuffer, i_rDateTime);
+ }
+
+ convertTimeZone(i_rBuffer, i_rDateTime, pTimeZoneOffset);
+}
+
+/** convert ISO "date" or "dateTime" string to util::DateTime */
+bool Converter::parseDateTime( util::DateTime& rDateTime,
+ std::u16string_view rString )
+{
+ bool isDateTime;
+ return parseDateOrDateTime(nullptr, rDateTime, isDateTime, nullptr,
+ rString);
+}
+
+/** convert ISO "date" or "dateTime" string to util::DateTime */
+bool Converter::parseDateTime( util::DateTime& rDateTime,
+ std::string_view rString )
+{
+ bool isDateTime;
+ return parseDateOrDateTime(nullptr, rDateTime, isDateTime, nullptr,
+ rString);
+}
+
+static bool lcl_isLeapYear(const sal_uInt32 nYear)
+{
+ return ((nYear % 4) == 0)
+ && (((nYear % 100) != 0) || ((nYear % 400) == 0));
+}
+
+static sal_uInt16
+lcl_MaxDaysPerMonth(const sal_Int32 nMonth, const sal_Int32 nYear)
+{
+ static const sal_uInt16 s_MaxDaysPerMonth[12] =
+ { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 };
+ assert(0 < nMonth && nMonth <= 12);
+ if ((2 == nMonth) && lcl_isLeapYear(nYear))
+ {
+ return 29;
+ }
+ return s_MaxDaysPerMonth[nMonth - 1];
+}
+
+static void lcl_ConvertToUTC(
+ sal_Int16 & o_rYear, sal_uInt16 & o_rMonth, sal_uInt16 & o_rDay,
+ sal_uInt16 & o_rHours, sal_uInt16 & o_rMinutes,
+ int const nSourceOffset)
+{
+ sal_Int16 nOffsetHours(abs(nSourceOffset) / 60);
+ sal_Int16 const nOffsetMinutes(abs(nSourceOffset) % 60);
+ o_rMinutes += nOffsetMinutes;
+ if (nSourceOffset < 0)
+ {
+ o_rMinutes += nOffsetMinutes;
+ if (60 <= o_rMinutes)
+ {
+ o_rMinutes -= 60;
+ ++nOffsetHours;
+ }
+ o_rHours += nOffsetHours;
+ if (o_rHours < 24)
+ {
+ return;
+ }
+ sal_Int16 nDayAdd(0);
+ while (24 <= o_rHours)
+ {
+ o_rHours -= 24;
+ ++nDayAdd;
+ }
+ if (o_rDay == 0)
+ {
+ return; // handle time without date - don't adjust what isn't there
+ }
+ o_rDay += nDayAdd;
+ sal_Int16 const nDaysInMonth(lcl_MaxDaysPerMonth(o_rMonth, o_rYear));
+ if (o_rDay <= nDaysInMonth)
+ {
+ return;
+ }
+ o_rDay -= nDaysInMonth;
+ ++o_rMonth;
+ if (o_rMonth <= 12)
+ {
+ return;
+ }
+ o_rMonth = 1;
+ ++o_rYear; // works for negative year too
+ }
+ else if (0 < nSourceOffset)
+ {
+ // argh everything is unsigned
+ if (o_rMinutes < nOffsetMinutes)
+ {
+ o_rMinutes += 60;
+ ++nOffsetHours;
+ }
+ o_rMinutes -= nOffsetMinutes;
+ sal_Int16 nDaySubtract(0);
+ while (o_rHours < nOffsetHours)
+ {
+ o_rHours += 24;
+ ++nDaySubtract;
+ }
+ o_rHours -= nOffsetHours;
+ if (o_rDay == 0)
+ {
+ return; // handle time without date - don't adjust what isn't there
+ }
+ if (nDaySubtract < o_rDay)
+ {
+ o_rDay -= nDaySubtract;
+ return;
+ }
+ sal_Int16 const nPrevMonth((o_rMonth == 1) ? 12 : o_rMonth - 1);
+ sal_Int16 const nDaysInMonth(lcl_MaxDaysPerMonth(nPrevMonth, o_rYear));
+ o_rDay += nDaysInMonth;
+ --o_rMonth;
+ if (0 == o_rMonth)
+ {
+ o_rMonth = 12;
+ --o_rYear; // works for negative year too
+ }
+ o_rDay -= nDaySubtract;
+ }
+}
+
+template <typename V>
+static bool
+readDateTimeComponent(V rString,
+ size_t & io_rnPos, sal_Int32 & o_rnTarget,
+ const sal_Int32 nMinLength, const bool bExactLength)
+{
+ const size_t nOldPos(io_rnPos);
+ sal_Int32 nTemp(0);
+ if (R_SUCCESS != readUnsignedNumber<V>(rString, io_rnPos, nTemp))
+ {
+ return false;
+ }
+ const sal_Int32 nTokenLength(io_rnPos - nOldPos);
+ if ((nTokenLength < nMinLength) ||
+ (bExactLength && (nTokenLength > nMinLength)))
+ {
+ return false; // bad length
+ }
+ o_rnTarget = nTemp;
+ return true;
+}
+
+/** convert ISO "date" or "dateTime" string to util::DateTime or util::Date */
+template<typename V>
+static bool lcl_parseDate(
+ bool & isNegative,
+ sal_Int32 & nYear, sal_Int32 & nMonth, sal_Int32 & nDay,
+ bool & bHaveTime,
+ size_t & nPos,
+ V string,
+ bool const bIgnoreInvalidOrMissingDate)
+{
+ bool bSuccess = true;
+
+ if (string.size() > nPos)
+ {
+ if ('-' == string[nPos])
+ {
+ isNegative = true;
+ ++nPos;
+ }
+ }
+
+ {
+ // While W3C XMLSchema specifies years with a minimum of 4 digits, be
+ // lenient in what we accept for years < 1000. One digit is acceptable
+ // if the remainders match.
+ bSuccess = readDateTimeComponent<V>(string, nPos, nYear, 1, false);
+ if (!bIgnoreInvalidOrMissingDate)
+ {
+ bSuccess &= (0 < nYear);
+ }
+ bSuccess &= (nPos < string.size()); // not last token
+ }
+ if (bSuccess && ('-' != string[nPos])) // separator
+ {
+ bSuccess = false;
+ }
+ if (bSuccess)
+ {
+ ++nPos;
+
+ bSuccess = readDateTimeComponent<V>(string, nPos, nMonth, 2, true);
+ if (!bIgnoreInvalidOrMissingDate)
+ {
+ bSuccess &= (0 < nMonth);
+ }
+ bSuccess &= (nMonth <= 12);
+ bSuccess &= (nPos < string.size()); // not last token
+ }
+ if (bSuccess && ('-' != string[nPos])) // separator
+ {
+ bSuccess = false;
+ }
+ if (bSuccess)
+ {
+ ++nPos;
+
+ bSuccess = readDateTimeComponent(string, nPos, nDay, 2, true);
+ if (!bIgnoreInvalidOrMissingDate)
+ {
+ bSuccess &= (0 < nDay);
+ }
+ if (nMonth > 0) // not possible to check if month was missing
+ {
+ bSuccess &= (nDay <= lcl_MaxDaysPerMonth(nMonth, nYear));
+ }
+ else assert(bIgnoreInvalidOrMissingDate);
+ }
+
+ if (bSuccess && (nPos < string.size()))
+ {
+ if ('T' == string[nPos] || 't' == string[nPos]) // time separator
+ {
+ bHaveTime = true;
+ ++nPos;
+ }
+ }
+
+ return bSuccess;
+}
+
+/** convert ISO "date" or "dateTime" string to util::DateTime or util::Date */
+template <typename V>
+static bool lcl_parseDateTime(
+ util::Date *const pDate, util::DateTime & rDateTime,
+ bool & rbDateTime,
+ std::optional<sal_Int16> *const pTimeZoneOffset,
+ V string,
+ bool const bIgnoreInvalidOrMissingDate)
+{
+ bool bSuccess = true;
+
+ string = trim(string);
+
+ bool isNegative(false);
+ sal_Int32 nYear(0);
+ sal_Int32 nMonth(0);
+ sal_Int32 nDay(0);
+ size_t nPos(0);
+ bool bHaveTime(false);
+
+ if ( !bIgnoreInvalidOrMissingDate
+ || string.find(':') == V::npos // no time?
+ || (string.find('-') != V::npos
+ && string.find('-') < string.find(':')))
+ {
+ bSuccess &= lcl_parseDate<V>(isNegative, nYear, nMonth, nDay,
+ bHaveTime, nPos, string, bIgnoreInvalidOrMissingDate);
+ }
+ else
+ {
+ bHaveTime = true;
+ }
+
+ sal_Int32 nHours(0);
+ sal_Int32 nMinutes(0);
+ sal_Int32 nSeconds(0);
+ sal_Int32 nNanoSeconds(0);
+ if (bSuccess && bHaveTime)
+ {
+ {
+ bSuccess = readDateTimeComponent(string, nPos, nHours, 2, true);
+ bSuccess &= (0 <= nHours) && (nHours <= 24);
+ bSuccess &= (nPos < string.size()); // not last token
+ }
+ if (bSuccess && (':' != string[nPos])) // separator
+ {
+ bSuccess = false;
+ }
+ if (bSuccess)
+ {
+ ++nPos;
+
+ bSuccess = readDateTimeComponent(string, nPos, nMinutes, 2, true);
+ bSuccess &= (0 <= nMinutes) && (nMinutes < 60);
+ bSuccess &= (nPos < string.size()); // not last token
+ }
+ if (bSuccess && (':' != string[nPos])) // separator
+ {
+ bSuccess = false;
+ }
+ if (bSuccess)
+ {
+ ++nPos;
+
+ bSuccess = readDateTimeComponent(string, nPos, nSeconds, 2, true);
+ bSuccess &= (0 <= nSeconds) && (nSeconds < 60);
+ }
+ if (bSuccess && (nPos < string.size()) &&
+ ('.' == string[nPos] || ',' == string[nPos])) // fraction separator
+ {
+ ++nPos;
+ const sal_Int32 nStart(nPos);
+ sal_Int32 nTemp(0);
+ if (R_NOTHING == readUnsignedNumberMaxDigits<V>(9, string, nPos, nTemp))
+ {
+ bSuccess = false;
+ }
+ if (bSuccess)
+ {
+ sal_Int32 nDigits = std::min<sal_Int32>(nPos - nStart, 9);
+ assert(nDigits > 0);
+ for (; nDigits < 9; ++nDigits)
+ {
+ nTemp *= 10;
+ }
+ nNanoSeconds = nTemp;
+ }
+ }
+
+ if (bSuccess && (nHours == 24))
+ {
+ if (!((0 == nMinutes) && (0 == nSeconds) && (0 == nNanoSeconds)))
+ {
+ bSuccess = false; // only 24:00:00 is valid
+ }
+ }
+ }
+
+ bool bHaveTimezone(false);
+ bool bHaveTimezonePlus(false);
+ bool bHaveTimezoneMinus(false);
+ if (bSuccess && (nPos < string.size()))
+ {
+ const sal_Unicode c(string[nPos]);
+ if ('+' == c)
+ {
+ bHaveTimezone = true;
+ bHaveTimezonePlus = true;
+ ++nPos;
+ }
+ else if ('-' == c)
+ {
+ bHaveTimezone = true;
+ bHaveTimezoneMinus = true;
+ ++nPos;
+ }
+ else if ('Z' == c || 'z' == c)
+ {
+ bHaveTimezone = true;
+ ++nPos;
+ }
+ else
+ {
+ bSuccess = false;
+ }
+ }
+ sal_Int32 nTimezoneHours(0);
+ sal_Int32 nTimezoneMinutes(0);
+ if (bSuccess && (bHaveTimezonePlus || bHaveTimezoneMinus))
+ {
+ bSuccess = readDateTimeComponent<V>(
+ string, nPos, nTimezoneHours, 2, true);
+ bSuccess &= (0 <= nTimezoneHours) && (nTimezoneHours <= 14);
+ bSuccess &= (nPos < string.size()); // not last token
+ if (bSuccess && (':' != string[nPos])) // separator
+ {
+ bSuccess = false;
+ }
+ if (bSuccess)
+ {
+ ++nPos;
+
+ bSuccess = readDateTimeComponent<V>(
+ string, nPos, nTimezoneMinutes, 2, true);
+ bSuccess &= (0 <= nTimezoneMinutes) && (nTimezoneMinutes < 60);
+ }
+ if (bSuccess && (nTimezoneHours == 14))
+ {
+ if (0 != nTimezoneMinutes)
+ {
+ bSuccess = false; // only +-14:00 is valid
+ }
+ }
+ }
+
+ bSuccess &= (nPos == string.size()); // trailing junk?
+
+ if (bSuccess)
+ {
+ sal_Int16 const nTimezoneOffset = (bHaveTimezoneMinus ? -1 : +1)
+ * ((nTimezoneHours * 60) + nTimezoneMinutes);
+ if (!pDate || bHaveTime) // time is optional
+ {
+ rDateTime.Year =
+ (isNegative ? -1 : +1) * static_cast<sal_Int16>(nYear);
+ rDateTime.Month = static_cast<sal_uInt16>(nMonth);
+ rDateTime.Day = static_cast<sal_uInt16>(nDay);
+ rDateTime.Hours = static_cast<sal_uInt16>(nHours);
+ rDateTime.Minutes = static_cast<sal_uInt16>(nMinutes);
+ rDateTime.Seconds = static_cast<sal_uInt16>(nSeconds);
+ rDateTime.NanoSeconds = static_cast<sal_uInt32>(nNanoSeconds);
+ if (bHaveTimezone)
+ {
+ if (pTimeZoneOffset)
+ {
+ *pTimeZoneOffset = nTimezoneOffset;
+ rDateTime.IsUTC = (0 == nTimezoneOffset);
+ }
+ else
+ {
+ lcl_ConvertToUTC(rDateTime.Year, rDateTime.Month,
+ rDateTime.Day, rDateTime.Hours, rDateTime.Minutes,
+ nTimezoneOffset);
+ rDateTime.IsUTC = true;
+ }
+ }
+ else
+ {
+ if (pTimeZoneOffset)
+ {
+ pTimeZoneOffset->reset();
+ }
+ rDateTime.IsUTC = false;
+ }
+ rbDateTime = bHaveTime;
+ }
+ else
+ {
+ pDate->Year =
+ (isNegative ? -1 : +1) * static_cast<sal_Int16>(nYear);
+ pDate->Month = static_cast<sal_uInt16>(nMonth);
+ pDate->Day = static_cast<sal_uInt16>(nDay);
+ if (bHaveTimezone)
+ {
+ if (pTimeZoneOffset)
+ {
+ *pTimeZoneOffset = nTimezoneOffset;
+ }
+ else
+ {
+ // a Date cannot be adjusted
+ SAL_INFO("sax", "dropping timezone");
+ }
+ }
+ else
+ {
+ if (pTimeZoneOffset)
+ {
+ pTimeZoneOffset->reset();
+ }
+ }
+ rbDateTime = false;
+ }
+ }
+ return bSuccess;
+}
+
+/** convert ISO "time" or "dateTime" string to util::DateTime */
+bool Converter::parseTimeOrDateTime(
+ util::DateTime & rDateTime,
+ std::u16string_view rString)
+{
+ bool dummy;
+ return lcl_parseDateTime(
+ nullptr, rDateTime, dummy, nullptr, rString, true);
+}
+
+/** convert ISO "time" or "dateTime" string to util::DateTime */
+bool Converter::parseTimeOrDateTime(
+ util::DateTime & rDateTime,
+ std::string_view rString)
+{
+ bool dummy;
+ return lcl_parseDateTime(
+ nullptr, rDateTime, dummy, nullptr, rString, true);
+}
+
+/** convert ISO "date" or "dateTime" string to util::DateTime or util::Date */
+bool Converter::parseDateOrDateTime(
+ util::Date *const pDate, util::DateTime & rDateTime,
+ bool & rbDateTime,
+ std::optional<sal_Int16> *const pTimeZoneOffset,
+ std::u16string_view rString )
+{
+ return lcl_parseDateTime(
+ pDate, rDateTime, rbDateTime, pTimeZoneOffset, rString, false);
+}
+
+/** convert ISO "date" or "dateTime" string to util::DateTime or util::Date */
+bool Converter::parseDateOrDateTime(
+ util::Date *const pDate, util::DateTime & rDateTime,
+ bool & rbDateTime,
+ std::optional<sal_Int16> *const pTimeZoneOffset,
+ std::string_view rString )
+{
+ return lcl_parseDateTime(
+ pDate, rDateTime, rbDateTime, pTimeZoneOffset, rString, false);
+}
+
+/** gets the position of the first comma after npos in the string
+ rStr. Commas inside '"' pairs are not matched */
+sal_Int32 Converter::indexOfComma( std::u16string_view rStr,
+ sal_Int32 nPos )
+{
+ sal_Unicode cQuote = 0;
+ sal_Int32 nLen = rStr.size();
+ for( ; nPos < nLen; nPos++ )
+ {
+ sal_Unicode c = rStr[nPos];
+ switch( c )
+ {
+ case u'\'':
+ if( 0 == cQuote )
+ cQuote = c;
+ else if( '\'' == cQuote )
+ cQuote = 0;
+ break;
+
+ case u'"':
+ if( 0 == cQuote )
+ cQuote = c;
+ else if( '\"' == cQuote )
+ cQuote = 0;
+ break;
+
+ case u',':
+ if( 0 == cQuote )
+ return nPos;
+ break;
+ }
+ }
+
+ return -1;
+}
+
+double Converter::GetConversionFactor(OUStringBuffer& rUnit, sal_Int16 nSourceUnit, sal_Int16 nTargetUnit)
+{
+ double fRetval(1.0);
+ rUnit.setLength(0);
+
+
+ if(nSourceUnit != nTargetUnit)
+ {
+ const o3tl::Length eFrom = Measure2O3tlUnit(nSourceUnit);
+ const o3tl::Length eTo = Measure2O3tlUnit(nTargetUnit);
+ fRetval = o3tl::convert(1.0, eFrom, eTo);
+
+ if (const auto sUnit = Measure2UnitString(nTargetUnit); sUnit.size() > 0)
+ rUnit.appendAscii(sUnit.data(), sUnit.size());
+ }
+
+ return fRetval;
+}
+
+double Converter::GetConversionFactor(OStringBuffer& rUnit, sal_Int16 nSourceUnit, sal_Int16 nTargetUnit)
+{
+ double fRetval(1.0);
+ rUnit.setLength(0);
+
+
+ if(nSourceUnit != nTargetUnit)
+ {
+ const o3tl::Length eFrom = Measure2O3tlUnit(nSourceUnit);
+ const o3tl::Length eTo = Measure2O3tlUnit(nTargetUnit);
+ fRetval = o3tl::convert(1.0, eFrom, eTo);
+
+ if (const auto sUnit = Measure2UnitString(nTargetUnit); sUnit.size() > 0)
+ rUnit.append(sUnit.data(), sUnit.size());
+ }
+
+ return fRetval;
+}
+
+template<typename V>
+static sal_Int16 lcl_GetUnitFromString(V rString, sal_Int16 nDefaultUnit)
+{
+ sal_Int32 nPos = 0;
+ sal_Int32 nLen = rString.size();
+ sal_Int16 nRetUnit = nDefaultUnit;
+
+ // skip white space
+ while( nPos < nLen && ' ' == rString[nPos] )
+ nPos++;
+
+ // skip negative
+ if( nPos < nLen && '-' == rString[nPos] )
+ nPos++;
+
+ // skip number
+ while( nPos < nLen && '0' <= rString[nPos] && '9' >= rString[nPos] )
+ nPos++;
+
+ if( nPos < nLen && '.' == rString[nPos] )
+ {
+ nPos++;
+ while( nPos < nLen && '0' <= rString[nPos] && '9' >= rString[nPos] )
+ nPos++;
+ }
+
+ // skip white space
+ while( nPos < nLen && ' ' == rString[nPos] )
+ nPos++;
+
+ if( nPos < nLen )
+ {
+ switch(rString[nPos])
+ {
+ case '%' :
+ {
+ nRetUnit = MeasureUnit::PERCENT;
+ break;
+ }
+ case 'c':
+ case 'C':
+ {
+ if(nPos+1 < nLen && (rString[nPos+1] == 'm'
+ || rString[nPos+1] == 'M'))
+ nRetUnit = MeasureUnit::CM;
+ break;
+ }
+ case 'e':
+ case 'E':
+ {
+ // CSS1_EMS or CSS1_EMX later
+ break;
+ }
+ case 'i':
+ case 'I':
+ {
+ if(nPos+1 < nLen && (rString[nPos+1] == 'n'
+ || rString[nPos+1] == 'N'))
+ nRetUnit = MeasureUnit::INCH;
+ break;
+ }
+ case 'm':
+ case 'M':
+ {
+ if(nPos+1 < nLen && (rString[nPos+1] == 'm'
+ || rString[nPos+1] == 'M'))
+ nRetUnit = MeasureUnit::MM;
+ break;
+ }
+ case 'p':
+ case 'P':
+ {
+ if(nPos+1 < nLen && (rString[nPos+1] == 't'
+ || rString[nPos+1] == 'T'))
+ nRetUnit = MeasureUnit::POINT;
+ if(nPos+1 < nLen && (rString[nPos+1] == 'c'
+ || rString[nPos+1] == 'C'))
+ nRetUnit = MeasureUnit::TWIP;
+ break;
+ }
+ }
+ }
+
+ return nRetUnit;
+}
+
+sal_Int16 Converter::GetUnitFromString(std::u16string_view rString, sal_Int16 nDefaultUnit)
+{
+ return lcl_GetUnitFromString(rString, nDefaultUnit);
+}
+sal_Int16 Converter::GetUnitFromString(std::string_view rString, sal_Int16 nDefaultUnit)
+{
+ return lcl_GetUnitFromString(rString, nDefaultUnit);
+}
+
+bool Converter::convertAny(OUStringBuffer& rsValue,
+ OUStringBuffer& rsType ,
+ const css::uno::Any& rValue)
+{
+ bool bConverted = false;
+
+ rsValue.setLength(0);
+ rsType.setLength (0);
+
+ switch (rValue.getValueTypeClass())
+ {
+ case css::uno::TypeClass_BYTE :
+ case css::uno::TypeClass_SHORT :
+ case css::uno::TypeClass_UNSIGNED_SHORT :
+ case css::uno::TypeClass_LONG :
+ case css::uno::TypeClass_UNSIGNED_LONG :
+ {
+ sal_Int32 nTempValue = 0;
+ if (rValue >>= nTempValue)
+ {
+ rsType.append("integer");
+ bConverted = true;
+ rsValue.append(nTempValue);
+ }
+ }
+ break;
+
+ case css::uno::TypeClass_BOOLEAN :
+ {
+ bool bTempValue = false;
+ if (rValue >>= bTempValue)
+ {
+ rsType.append("boolean");
+ bConverted = true;
+ ::sax::Converter::convertBool(rsValue, bTempValue);
+ }
+ }
+ break;
+
+ case css::uno::TypeClass_FLOAT :
+ case css::uno::TypeClass_DOUBLE :
+ {
+ double fTempValue = 0.0;
+ if (rValue >>= fTempValue)
+ {
+ rsType.append("float");
+ bConverted = true;
+ ::sax::Converter::convertDouble(rsValue, fTempValue);
+ }
+ }
+ break;
+
+ case css::uno::TypeClass_STRING :
+ {
+ OUString sTempValue;
+ if (rValue >>= sTempValue)
+ {
+ rsType.append("string");
+ bConverted = true;
+ rsValue.append(sTempValue);
+ }
+ }
+ break;
+
+ case css::uno::TypeClass_STRUCT :
+ {
+ css::util::Date aDate ;
+ css::util::Time aTime ;
+ css::util::DateTime aDateTime;
+
+ if (rValue >>= aDate)
+ {
+ rsType.append("date");
+ bConverted = true;
+ css::util::DateTime aTempValue;
+ aTempValue.Day = aDate.Day;
+ aTempValue.Month = aDate.Month;
+ aTempValue.Year = aDate.Year;
+ aTempValue.NanoSeconds = 0;
+ aTempValue.Seconds = 0;
+ aTempValue.Minutes = 0;
+ aTempValue.Hours = 0;
+ ::sax::Converter::convertDateTime(rsValue, aTempValue, nullptr);
+ }
+ else
+ if (rValue >>= aTime)
+ {
+ rsType.append("time");
+ bConverted = true;
+ css::util::Duration aTempValue;
+ aTempValue.Days = 0;
+ aTempValue.Months = 0;
+ aTempValue.Years = 0;
+ aTempValue.NanoSeconds = aTime.NanoSeconds;
+ aTempValue.Seconds = aTime.Seconds;
+ aTempValue.Minutes = aTime.Minutes;
+ aTempValue.Hours = aTime.Hours;
+ ::sax::Converter::convertDuration(rsValue, aTempValue);
+ }
+ else
+ if (rValue >>= aDateTime)
+ {
+ rsType.append("date");
+ bConverted = true;
+ ::sax::Converter::convertDateTime(rsValue, aDateTime, nullptr);
+ }
+ }
+ break;
+ default:
+ break;
+ }
+
+ return bConverted;
+}
+
+void Converter::convertBytesToHexBinary(OUStringBuffer& rBuffer, const void* pBytes,
+ sal_Int32 nBytes)
+{
+ rBuffer.setLength(0);
+ rBuffer.ensureCapacity(nBytes * 2);
+ auto pChars = static_cast<const unsigned char*>(pBytes);
+ for (sal_Int32 i = 0; i < nBytes; ++i)
+ {
+ sal_Int32 c = *pChars++;
+ if (c < 16)
+ rBuffer.append('0');
+ rBuffer.append(c, 16);
+ }
+}
+
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sax/source/tools/fastattribs.cxx b/sax/source/tools/fastattribs.cxx
new file mode 100644
index 000000000..9b309d5fb
--- /dev/null
+++ b/sax/source/tools/fastattribs.cxx
@@ -0,0 +1,336 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <algorithm>
+
+#include <com/sun/star/xml/sax/SAXException.hpp>
+#include <rtl/math.h>
+#include <sax/fastattribs.hxx>
+#include <utility>
+
+using namespace ::com::sun::star::uno;
+using namespace ::com::sun::star::xml;
+using namespace ::com::sun::star::xml::sax;
+namespace sax_fastparser
+{
+
+// wastage to keep MSVC happy vs. an in-line {}
+FastTokenHandlerBase::~FastTokenHandlerBase()
+{
+}
+
+UnknownAttribute::UnknownAttribute( OUString aNamespaceURL, OString aName, OString value )
+ : maNamespaceURL(std::move( aNamespaceURL )), maName(std::move( aName )), maValue(std::move( value ))
+{
+}
+
+UnknownAttribute::UnknownAttribute( OString aName, OString value )
+ : maName(std::move( aName )), maValue(std::move( value ))
+{
+}
+
+void UnknownAttribute::FillAttribute( Attribute* pAttrib ) const
+{
+ if( pAttrib )
+ {
+ pAttrib->Name = OStringToOUString( maName, RTL_TEXTENCODING_UTF8 );
+ pAttrib->NamespaceURL = maNamespaceURL;
+ pAttrib->Value = OStringToOUString( maValue, RTL_TEXTENCODING_UTF8 );
+ }
+}
+
+FastAttributeList::FastAttributeList( sax_fastparser::FastTokenHandlerBase *pTokenHandler)
+: mpTokenHandler( pTokenHandler )
+{
+ // random initial size of buffer to store attribute values
+ mnChunkLength = 58;
+ mpChunk = static_cast<char *>(malloc( mnChunkLength ));
+ maAttributeValues.push_back( 0 );
+}
+
+FastAttributeList::FastAttributeList( const css::uno::Reference< css::xml::sax::XFastAttributeList > & xAttrList )
+{
+ const auto& rOther = castToFastAttributeList(xAttrList);
+ mpTokenHandler = rOther.mpTokenHandler;
+ mpChunk = static_cast<char *>(malloc( rOther.mnChunkLength ));
+ mnChunkLength = rOther.mnChunkLength;
+ memcpy(mpChunk, rOther.mpChunk, rOther.mnChunkLength);
+ maAttributeValues = rOther.maAttributeValues;
+ maAttributeTokens = rOther.maAttributeTokens;
+ maUnknownAttributes = rOther.maUnknownAttributes;
+}
+
+css::uno::Reference< ::css::util::XCloneable > FastAttributeList::createClone()
+{
+ return new FastAttributeList(this);
+}
+
+FastAttributeList::~FastAttributeList()
+{
+ free( mpChunk );
+}
+
+void FastAttributeList::clear()
+{
+ maAttributeTokens.clear();
+ maAttributeValues.resize(1);
+ assert(maAttributeValues[0] == 0);
+ maUnknownAttributes.clear();
+}
+
+void FastAttributeList::add( sal_Int32 nToken, const char* pValue, size_t nValueLength )
+{
+ assert(nToken != -1);
+ assert(nToken != 0);
+ assert(nValueLength < SAL_MAX_INT32); // protect against absurd values
+ maAttributeTokens.push_back( nToken );
+ sal_Int32 nWritePosition = maAttributeValues.back();
+ maAttributeValues.push_back( maAttributeValues.back() + nValueLength + 1 );
+ if (maAttributeValues.back() > mnChunkLength)
+ {
+ const sal_Int32 newLen = std::max(mnChunkLength * 2, maAttributeValues.back());
+ auto p = static_cast<char*>(realloc(mpChunk, newLen));
+ if (!p)
+ throw std::bad_alloc();
+
+ mnChunkLength = newLen;
+ mpChunk = p;
+
+ }
+ memcpy(mpChunk + nWritePosition, pValue, nValueLength);
+ mpChunk[nWritePosition + nValueLength] = '\0';
+}
+
+void FastAttributeList::add( sal_Int32 nToken, const char* pValue )
+{
+ add( nToken, pValue, strlen( pValue ));
+}
+
+void FastAttributeList::add( sal_Int32 nToken, const OString& rValue )
+{
+ add( nToken, rValue.getStr(), rValue.getLength() );
+}
+
+void FastAttributeList::add(sal_Int32 nToken, std::u16string_view sValue)
+{
+ add(nToken, OUStringToOString(sValue, RTL_TEXTENCODING_UTF8));
+}
+
+void FastAttributeList::addNS( sal_Int32 nNamespaceToken, sal_Int32 nToken, const OString& rValue )
+{
+ sal_Int32 nCombinedToken = (nNamespaceToken << 16) | nToken;
+ add( nCombinedToken, rValue );
+}
+
+void FastAttributeList::addNS(sal_Int32 nNamespaceToken, sal_Int32 nToken,
+ std::u16string_view sValue)
+{
+ sal_Int32 nCombinedToken = (nNamespaceToken << 16) | nToken;
+ add(nCombinedToken, sValue);
+}
+
+void FastAttributeList::addUnknown( const OUString& rNamespaceURL, const OString& rName, const OString& value )
+{
+ maUnknownAttributes.emplace_back( rNamespaceURL, rName, value );
+}
+
+void FastAttributeList::addUnknown( const OString& rName, const OString& value )
+{
+ maUnknownAttributes.emplace_back( rName, value );
+}
+
+void FastAttributeList::add( const css::uno::Reference<css::xml::sax::XFastAttributeList>& xAttrList )
+{
+ const auto& rOther = castToFastAttributeList(xAttrList);
+ add(rOther);
+}
+
+void FastAttributeList::add( const FastAttributeList& rOther )
+{
+ for (size_t i=0; i < rOther.maAttributeTokens.size(); ++i)
+ add(rOther.maAttributeTokens[i], rOther.getFastAttributeValue(i), rOther.AttributeValueLength(i));
+ for (const auto & i : rOther.maUnknownAttributes)
+ addUnknown(i.maNamespaceURL, i.maName, i.maValue);
+}
+
+// XFastAttributeList
+sal_Bool FastAttributeList::hasAttribute( ::sal_Int32 Token )
+{
+ for (sal_Int32 i : maAttributeTokens)
+ if (i == Token)
+ return true;
+
+ return false;
+}
+
+sal_Int32 FastAttributeList::getValueToken( ::sal_Int32 Token )
+{
+ for (size_t i = 0; i < maAttributeTokens.size(); ++i)
+ if (maAttributeTokens[i] == Token)
+ return FastTokenHandlerBase::getTokenFromChars(
+ mpTokenHandler,
+ getFastAttributeValue(i),
+ AttributeValueLength( i ) );
+
+ throw SAXException("FastAttributeList::getValueToken: unknown token " + OUString::number(Token), nullptr, Any());
+}
+
+sal_Int32 FastAttributeList::getOptionalValueToken( ::sal_Int32 Token, ::sal_Int32 Default )
+{
+ for (size_t i = 0; i < maAttributeTokens.size(); ++i)
+ if (maAttributeTokens[i] == Token)
+ return FastTokenHandlerBase::getTokenFromChars(
+ mpTokenHandler,
+ getFastAttributeValue(i),
+ AttributeValueLength( i ) );
+
+ return Default;
+}
+
+// performance sensitive shortcuts to avoid allocation ...
+bool FastAttributeList::getAsInteger( sal_Int32 nToken, sal_Int32 &rInt) const
+{
+ rInt = 0;
+ for (size_t i = 0; i < maAttributeTokens.size(); ++i)
+ if (maAttributeTokens[i] == nToken)
+ {
+ sal_Int64 n = rtl_str_toInt64_WithLength( getFastAttributeValue(i), 10, AttributeValueLength(i) );
+ if (n < SAL_MIN_INT32 || n > SAL_MAX_INT32) {
+ n = 0;
+ }
+ rInt = n;
+ return true;
+ }
+ return false;
+}
+
+sal_Int32 FastAttributeList::getAsIntegerByIndex( sal_Int32 nTokenIndex ) const
+{
+ sal_Int64 n = rtl_str_toInt64_WithLength( getFastAttributeValue(nTokenIndex), 10, AttributeValueLength(nTokenIndex) );
+ if (n < SAL_MIN_INT32 || n > SAL_MAX_INT32) {
+ n = 0;
+ }
+ return n;
+}
+
+bool FastAttributeList::getAsDouble( sal_Int32 nToken, double &rDouble) const
+{
+ rDouble = 0.0;
+ for (size_t i = 0; i < maAttributeTokens.size(); ++i)
+ if (maAttributeTokens[i] == nToken)
+ {
+ auto const p = getFastAttributeValue(i);
+ rDouble = rtl_math_stringToDouble( p, p + AttributeValueLength(i), '.', 0, nullptr, nullptr );
+ return true;
+ }
+ return false;
+}
+
+bool FastAttributeList::getAsChar( sal_Int32 nToken, const char*& rPos ) const
+{
+ for (size_t i = 0, n = maAttributeTokens.size(); i < n; ++i)
+ {
+ if (maAttributeTokens[i] != nToken)
+ continue;
+
+ sal_Int32 nOffset = maAttributeValues[i];
+ rPos = mpChunk + nOffset;
+ return true;
+ }
+
+ return false;
+}
+
+const char* FastAttributeList::getAsCharByIndex( sal_Int32 nTokenIndex ) const
+{
+ sal_Int32 nOffset = maAttributeValues[nTokenIndex];
+ return mpChunk + nOffset;
+}
+
+OUString FastAttributeList::getValue( ::sal_Int32 Token )
+{
+ for (size_t i = 0; i < maAttributeTokens.size(); ++i)
+ if (maAttributeTokens[i] == Token)
+ return OUString( getFastAttributeValue(i), AttributeValueLength(i), RTL_TEXTENCODING_UTF8 );
+
+ throw SAXException("FastAttributeList::getValue: unknown token " + OUString::number(Token), nullptr, Any());
+}
+
+OUString FastAttributeList::getValueByIndex( ::sal_Int32 nTokenIndex ) const
+{
+ return OUString( getFastAttributeValue(nTokenIndex), AttributeValueLength(nTokenIndex), RTL_TEXTENCODING_UTF8 );
+}
+
+OUString FastAttributeList::getOptionalValue( ::sal_Int32 Token )
+{
+ for (size_t i = 0; i < maAttributeTokens.size(); ++i)
+ if (maAttributeTokens[i] == Token)
+ return OUString( getFastAttributeValue(i), AttributeValueLength(i), RTL_TEXTENCODING_UTF8 );
+
+ return OUString();
+}
+Sequence< Attribute > FastAttributeList::getUnknownAttributes( )
+{
+ auto nSize = maUnknownAttributes.size();
+ if (nSize == 0)
+ return {};
+ Sequence< Attribute > aSeq( nSize );
+ Attribute* pAttr = aSeq.getArray();
+ for( const auto& rAttr : maUnknownAttributes )
+ rAttr.FillAttribute( pAttr++ );
+ return aSeq;
+}
+Sequence< FastAttribute > FastAttributeList::getFastAttributes( )
+{
+ Sequence< FastAttribute > aSeq( maAttributeTokens.size() );
+ FastAttribute* pAttr = aSeq.getArray();
+ for (size_t i = 0; i < maAttributeTokens.size(); ++i)
+ {
+ pAttr->Token = maAttributeTokens[i];
+ pAttr->Value = OUString( getFastAttributeValue(i), AttributeValueLength(i), RTL_TEXTENCODING_UTF8 );
+ pAttr++;
+ }
+ return aSeq;
+}
+
+FastAttributeList::FastAttributeIter FastAttributeList::find( sal_Int32 nToken ) const
+{
+ for (size_t i = 0; i < maAttributeTokens.size(); ++i)
+ if( maAttributeTokens[i] == nToken )
+ return FastAttributeIter(*this, i);
+ return end();
+}
+
+sal_Int32 FastTokenHandlerBase::getTokenFromChars(
+ const FastTokenHandlerBase *pTokenHandler,
+ const char *pToken, size_t nLen /* = 0 */ )
+{
+ sal_Int32 nRet;
+
+ if( !nLen )
+ nLen = strlen( pToken );
+
+ nRet = pTokenHandler->getTokenDirect( pToken, static_cast<sal_Int32>(nLen) );
+
+ return nRet;
+}
+
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sax/source/tools/fastserializer.cxx b/sax/source/tools/fastserializer.cxx
new file mode 100644
index 000000000..b4b9de8fb
--- /dev/null
+++ b/sax/source/tools/fastserializer.cxx
@@ -0,0 +1,845 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include "fastserializer.hxx"
+
+#include <com/sun/star/xml/sax/FastTokenHandler.hpp>
+#include <rtl/math.h>
+#include <sal/log.hxx>
+#include <comphelper/processfactory.hxx>
+#include <comphelper/sequence.hxx>
+
+#include <cassert>
+#include <optional>
+#include <string.h>
+#include <string_view>
+#include <utility>
+
+#if OSL_DEBUG_LEVEL > 0
+#include <iostream>
+#include <set>
+#endif
+
+using ::std::vector;
+using ::com::sun::star::uno::Sequence;
+using ::com::sun::star::io::XOutputStream;
+
+#define HAS_NAMESPACE(x) ((x & 0xffff0000) != 0)
+#define NAMESPACE(x) (x >> 16)
+#define TOKEN(x) (x & 0xffff)
+// number of characters without terminating 0
+#define N_CHARS(string) (SAL_N_ELEMENTS(string) - 1)
+
+const char sClosingBracket[] = ">";
+const char sSlashAndClosingBracket[] = "/>";
+constexpr OStringLiteral sColon = ":";
+const char sOpeningBracket[] = "<";
+const char sOpeningBracketAndSlash[] = "</";
+const char sQuote[] = "\"";
+const char sEqualSignAndQuote[] = "=\"";
+const char sSpace[] = " ";
+const char sXmlHeader[] = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n";
+
+namespace sax_fastparser {
+ FastSaxSerializer::FastSaxSerializer( const css::uno::Reference< css::io::XOutputStream >& xOutputStream )
+ : mbMarkStackEmpty(true)
+ , mpDoubleStr(nullptr)
+ , mnDoubleStrCapacity(RTL_STR_MAX_VALUEOFDOUBLE)
+ , mbXescape(true)
+ {
+ rtl_string_new_WithLength(&mpDoubleStr, mnDoubleStrCapacity);
+ mxFastTokenHandler = css::xml::sax::FastTokenHandler::create(
+ ::comphelper::getProcessComponentContext());
+ assert(xOutputStream.is()); // cannot do anything without that
+ maCachedOutputStream.setOutputStream( xOutputStream );
+ }
+
+ FastSaxSerializer::~FastSaxSerializer()
+ {
+ rtl_string_release(mpDoubleStr);
+ }
+
+ void FastSaxSerializer::startDocument()
+ {
+ writeBytes(sXmlHeader, N_CHARS(sXmlHeader));
+ }
+
+ void FastSaxSerializer::write( double value )
+ {
+ rtl_math_doubleToString(
+ &mpDoubleStr, &mnDoubleStrCapacity, 0, value, rtl_math_StringFormat_G,
+ RTL_STR_MAX_VALUEOFDOUBLE - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', nullptr,
+ 0, true);
+
+ write(mpDoubleStr->buffer, mpDoubleStr->length);
+ // and "clear" the string
+ mpDoubleStr->length = 0;
+ mnDoubleStrCapacity = RTL_STR_MAX_VALUEOFDOUBLE;
+ }
+
+ void FastSaxSerializer::write( std::u16string_view sOutput, bool bEscape )
+ {
+ write( OUStringToOString(sOutput, RTL_TEXTENCODING_UTF8), bEscape );
+
+ }
+
+ void FastSaxSerializer::write( const OString& sOutput, bool bEscape )
+ {
+ write( sOutput.getStr(), sOutput.getLength(), bEscape );
+ }
+
+ /** Characters not allowed in XML 1.0
+ XML 1.1 would exclude only U+0000
+
+ This assumes that `string` is UTF-8, but which appears to generally be the case: The only
+ user of this FastSaxSerializer code is FastSerializerHelper, and when its constructor
+ (sax/source/tools/fshelper.cxx) is called with bWriteHeader being true, it calls
+ FastSaxSerializer::startDocument, which writes sXmlHeader claiming encoding="UTF-8". The
+ only place that appears to construct FastSerializerHelper appears to be
+ XmlFilterBase::openFragmentStreamWithSerializer (oox/source/core/xmlfilterbase.cxx), and it
+ only passes false for bWriteHeader when the given rMediaType contains "vml" but not "+xml"
+ (see <https://git.libreoffice.org/core/+/6a11add2c4ea975356cfb7bab02301788c79c904%5E!/>
+ "XLSX VML Export fixes", stating "Don't write xml headers for vml files"). But lets assume
+ that even such Vector Markup Language files are written as UTF-8.
+ */
+ template<typename Int> static std::optional<std::pair<unsigned, Int>> invalidChar(
+ char const * string, Int length, Int index )
+ {
+ assert(index < length);
+ auto const c = string[index];
+
+ if (static_cast<unsigned char>(c) >= 0x20 && c != '\xEF')
+ return {};
+
+ switch (c)
+ {
+ case 0x09:
+ case 0x0a:
+ case 0x0d:
+ return {};
+ case '\xEF': // U+FFFE, U+FFFF:
+ if (length - index >= 3 && string[index + 1] == '\xBF') {
+ switch (string[index + 2]) {
+ case '\xBE':
+ return std::pair(0xFFFE, 3);
+ case '\xBF':
+ return std::pair(0xFFFF, 3);
+ }
+ }
+ return {};
+ }
+ return std::pair(static_cast<unsigned char>(c), 1);
+ }
+
+ static bool isHexDigit( char c )
+ {
+ return ('0' <= c && c <= '9') || ('A' <= c && c <= 'F') || ('a' <= c && c <= 'f');
+ }
+
+ void FastSaxSerializer::write( const char* pStr, sal_Int32 nLen, bool bEscape )
+ {
+ if (nLen == -1)
+ nLen = pStr ? strlen(pStr) : 0;
+
+ if (!bEscape)
+ {
+ writeBytes( pStr, nLen );
+ return;
+ }
+
+ bool bGood = true;
+ const sal_Int32 kXescapeLen = 7;
+ char bufXescape[kXescapeLen+1];
+ sal_Int32 nNextXescape = 0;
+ for (sal_Int32 i = 0; i < nLen;)
+ {
+ char c = pStr[ i ];
+ switch( c )
+ {
+ case '<': writeBytes( "&lt;", 4 ); break;
+ case '>': writeBytes( "&gt;", 4 ); break;
+ case '&': writeBytes( "&amp;", 5 ); break;
+ case '\'': writeBytes( "&apos;", 6 ); break;
+ case '"': writeBytes( "&quot;", 6 ); break;
+ case '\t':
+#if 0
+ // Seems OOXML prefers the _xHHHH_ escape over the
+ // entity in *some* cases, apparently in attribute
+ // values but not in element data.
+ // Would need to distinguish at a higher level.
+ if (mbXescape)
+ {
+ snprintf( bufXescape, kXescapeLen+1, "_x%04x_",
+ static_cast<unsigned int>(static_cast<unsigned char>(c)));
+ writeBytes( bufXescape, kXescapeLen);
+ }
+ else
+#endif
+ {
+ writeBytes( "&#9;", 4 );
+ }
+ break;
+ case '\n':
+#if 0
+ if (mbXescape)
+ {
+ snprintf( bufXescape, kXescapeLen+1, "_x%04x_",
+ static_cast<unsigned int>(static_cast<unsigned char>(c)));
+ writeBytes( bufXescape, kXescapeLen);
+ }
+ else
+#endif
+ {
+ writeBytes( "&#10;", 5 );
+ }
+ break;
+ case '\r':
+#if 0
+ if (mbXescape)
+ {
+ snprintf( bufXescape, kXescapeLen+1, "_x%04x_",
+ static_cast<unsigned int>(static_cast<unsigned char>(c)));
+ writeBytes( bufXescape, kXescapeLen);
+ }
+ else
+#endif
+ {
+ writeBytes( "&#13;", 5 );
+ }
+ break;
+ default:
+ if (mbXescape)
+ {
+ char c1, c2, c3, c4;
+ // Escape characters not valid in XML 1.0 as
+ // _xHHHH_. A literal "_xHHHH_" has to be
+ // escaped as _x005F_xHHHH_ (effectively
+ // escaping the leading '_').
+ // See ECMA-376-1:2016 page 3736,
+ // 22.4.2.4 bstr (Basic String)
+ // for reference.
+ if (c == '_' && i >= nNextXescape && i <= nLen - kXescapeLen &&
+ pStr[i+6] == '_' &&
+ ((pStr[i+1] | 0x20) == 'x') &&
+ isHexDigit( c1 = pStr[i+2] ) &&
+ isHexDigit( c2 = pStr[i+3] ) &&
+ isHexDigit( c3 = pStr[i+4] ) &&
+ isHexDigit( c4 = pStr[i+5] ))
+ {
+ // OOXML has the odd habit to write some
+ // names using this that when re-saving
+ // should *not* be escaped, specifically
+ // _x0020_ for blanks in w:xpath values.
+ if (!(c1 == '0' && c2 == '0' && c3 == '2' && c4 == '0'))
+ {
+ // When encountering "_x005F_xHHHH_"
+ // assume that is an already escaped
+ // sequence that was not unescaped and
+ // shall be written as is, to not end
+ // up with "_x005F_x005F_xHHHH_" and
+ // repeated...
+ if (c1 == '0' && c2 == '0' && c3 == '5' && (c4 | 0x20) == 'f' &&
+ i + kXescapeLen <= nLen - 6 &&
+ pStr[i+kXescapeLen+5] == '_' &&
+ ((pStr[i+kXescapeLen+0] | 0x20) == 'x') &&
+ isHexDigit( pStr[i+kXescapeLen+1] ) &&
+ isHexDigit( pStr[i+kXescapeLen+2] ) &&
+ isHexDigit( pStr[i+kXescapeLen+3] ) &&
+ isHexDigit( pStr[i+kXescapeLen+4] ))
+ {
+ writeBytes( &c, 1 );
+ // Remember this fake escapement.
+ nNextXescape = i + kXescapeLen + 6;
+ }
+ else
+ {
+ writeBytes( "_x005F_", kXescapeLen);
+ // Remember this escapement so in
+ // _xHHHH_xHHHH_ only the first '_'
+ // is escaped.
+ nNextXescape = i + kXescapeLen;
+ }
+ break;
+ }
+ }
+ if (auto const inv = invalidChar(pStr, nLen, i))
+ {
+ snprintf( bufXescape, kXescapeLen+1, "_x%04x_",
+ inv->first);
+ writeBytes( bufXescape, kXescapeLen);
+ i += inv->second;
+ continue;
+ }
+ }
+#if OSL_DEBUG_LEVEL > 0
+ else
+ {
+ if (bGood && invalidChar(pStr, nLen, i))
+ {
+ bGood = false;
+ // The SAL_WARN() for the single character is
+ // issued in writeBytes(), just gather for the
+ // SAL_WARN_IF() below.
+ }
+ }
+#endif
+ writeBytes( &c, 1 );
+ break;
+ }
+ ++i;
+ }
+ SAL_WARN_IF( !bGood && nLen > 1, "sax", "in '" << OString(pStr,std::min<sal_Int32>(nLen,42)) << "'");
+ }
+
+ void FastSaxSerializer::endDocument()
+ {
+ assert(mbMarkStackEmpty && maMarkStack.empty());
+ maCachedOutputStream.flush();
+ }
+
+ void FastSaxSerializer::writeId( ::sal_Int32 nElement )
+ {
+ if( HAS_NAMESPACE( nElement ) ) {
+ auto const Namespace(mxFastTokenHandler->getUTF8Identifier(NAMESPACE(nElement)));
+ assert(Namespace.hasElements());
+ writeBytes(Namespace);
+ writeBytes(sColon.getStr(), sColon.getLength());
+ auto const Element(mxFastTokenHandler->getUTF8Identifier(TOKEN(nElement)));
+ assert(Element.hasElements());
+ writeBytes(Element);
+ } else {
+ auto const Element(mxFastTokenHandler->getUTF8Identifier(nElement));
+ assert(Element.hasElements());
+ writeBytes(Element);
+ }
+ }
+
+#ifdef DBG_UTIL
+ OString FastSaxSerializer::getId( ::sal_Int32 nElement )
+ {
+ if (HAS_NAMESPACE(nElement)) {
+ Sequence<sal_Int8> const ns(
+ mxFastTokenHandler->getUTF8Identifier(NAMESPACE(nElement)));
+ Sequence<sal_Int8> const name(
+ mxFastTokenHandler->getUTF8Identifier(TOKEN(nElement)));
+ return std::string_view(
+ reinterpret_cast<char const*>(ns.getConstArray()), ns.getLength())
+ + sColon
+ + std::string_view(
+ reinterpret_cast<char const*>(name.getConstArray()), name.getLength());
+ } else {
+ Sequence<sal_Int8> const name(
+ mxFastTokenHandler->getUTF8Identifier(nElement));
+ return OString(reinterpret_cast<char const*>(name.getConstArray()), name.getLength());
+ }
+ }
+#endif
+
+ void FastSaxSerializer::startFastElement( ::sal_Int32 Element, FastAttributeList const * pAttrList )
+ {
+ if ( !mbMarkStackEmpty )
+ {
+ maCachedOutputStream.flush();
+ maMarkStack.top()->setCurrentElement( Element );
+ }
+
+#ifdef DBG_UTIL
+ if (mbMarkStackEmpty)
+ m_DebugStartedElements.push(Element);
+ else
+ maMarkStack.top()->m_DebugStartedElements.push_back(Element);
+#endif
+
+ writeBytes(sOpeningBracket, N_CHARS(sOpeningBracket));
+
+ writeId(Element);
+ if (pAttrList)
+ writeFastAttributeList(*pAttrList);
+ else
+ writeTokenValueList();
+
+ writeBytes(sClosingBracket, N_CHARS(sClosingBracket));
+ }
+
+ void FastSaxSerializer::endFastElement( ::sal_Int32 Element )
+ {
+#ifdef DBG_UTIL
+ // Well-formedness constraint: Element Type Match
+ if (mbMarkStackEmpty)
+ {
+ assert(!m_DebugStartedElements.empty());
+ assert(Element == m_DebugStartedElements.top());
+ m_DebugStartedElements.pop();
+ }
+ else
+ {
+ if (dynamic_cast<ForSort*>(maMarkStack.top().get()))
+ {
+ // Sort is always well-formed fragment
+ assert(!maMarkStack.top()->m_DebugStartedElements.empty());
+ }
+ if (maMarkStack.top()->m_DebugStartedElements.empty())
+ {
+ maMarkStack.top()->m_DebugEndedElements.push_back(Element);
+ }
+ else
+ {
+ assert(Element == maMarkStack.top()->m_DebugStartedElements.back());
+ maMarkStack.top()->m_DebugStartedElements.pop_back();
+ }
+ }
+#endif
+
+ writeBytes(sOpeningBracketAndSlash, N_CHARS(sOpeningBracketAndSlash));
+
+ writeId(Element);
+
+ writeBytes(sClosingBracket, N_CHARS(sClosingBracket));
+ }
+
+ void FastSaxSerializer::singleFastElement( ::sal_Int32 Element, FastAttributeList const * pAttrList )
+ {
+ if ( !mbMarkStackEmpty )
+ {
+ maCachedOutputStream.flush();
+ maMarkStack.top()->setCurrentElement( Element );
+ }
+
+ writeBytes(sOpeningBracket, N_CHARS(sOpeningBracket));
+
+ writeId(Element);
+ if (pAttrList)
+ writeFastAttributeList(*pAttrList);
+ else
+ writeTokenValueList();
+
+ writeBytes(sSlashAndClosingBracket, N_CHARS(sSlashAndClosingBracket));
+ }
+
+ css::uno::Reference< css::io::XOutputStream > const & FastSaxSerializer::getOutputStream() const
+ {
+ return maCachedOutputStream.getOutputStream();
+ }
+
+ void FastSaxSerializer::writeTokenValueList()
+ {
+#ifdef DBG_UTIL
+ ::std::set<OString> DebugAttributes;
+#endif
+ for (const TokenValue & rTokenValue : maTokenValues)
+ {
+ writeBytes(sSpace, N_CHARS(sSpace));
+
+ sal_Int32 nToken = rTokenValue.nToken;
+ writeId(nToken);
+
+#ifdef DBG_UTIL
+ // Well-formedness constraint: Unique Att Spec
+ OString const nameId(getId(nToken));
+ assert(DebugAttributes.find(nameId) == DebugAttributes.end());
+ DebugAttributes.insert(nameId);
+#endif
+
+ writeBytes(sEqualSignAndQuote, N_CHARS(sEqualSignAndQuote));
+
+ write(rTokenValue.pValue, -1, true);
+
+ writeBytes(sQuote, N_CHARS(sQuote));
+ }
+ maTokenValues.clear();
+ }
+
+ void FastSaxSerializer::writeFastAttributeList(FastAttributeList const & rAttrList)
+ {
+#ifdef DBG_UTIL
+ ::std::set<OString> DebugAttributes;
+#endif
+ const std::vector< sal_Int32 >& Tokens = rAttrList.getFastAttributeTokens();
+ for (size_t j = 0; j < Tokens.size(); j++)
+ {
+ writeBytes(sSpace, N_CHARS(sSpace));
+
+ sal_Int32 nToken = Tokens[j];
+ writeId(nToken);
+
+#ifdef DBG_UTIL
+ // Well-formedness constraint: Unique Att Spec
+ OString const nameId(getId(nToken));
+ SAL_WARN_IF(DebugAttributes.find(nameId) != DebugAttributes.end(), "sax", "Duplicate attribute: " << nameId );
+ assert(DebugAttributes.find(nameId) == DebugAttributes.end());
+ DebugAttributes.insert(nameId);
+#endif
+
+ writeBytes(sEqualSignAndQuote, N_CHARS(sEqualSignAndQuote));
+
+ const char* pAttributeValue = rAttrList.getFastAttributeValue(j);
+
+ // tdf#117274 don't escape the special VML shape type id "#_x0000_t202"
+ bool bEscape = !(pAttributeValue
+ && *pAttributeValue != '\0'
+ && (*pAttributeValue == '#'
+ ? strncmp(pAttributeValue, "#_x0000_t", 9) == 0
+ : strncmp(pAttributeValue, "_x0000_t", 8) == 0));
+
+ write(pAttributeValue, rAttrList.AttributeValueLength(j), bEscape);
+
+ writeBytes(sQuote, N_CHARS(sQuote));
+ }
+ }
+
+ void FastSaxSerializer::mark(sal_Int32 const nTag, const Int32Sequence& rOrder)
+ {
+ if (rOrder.hasElements())
+ {
+ auto pSort = std::make_shared<ForSort>(nTag, rOrder);
+ maMarkStack.push( pSort );
+ maCachedOutputStream.setOutput( pSort );
+ }
+ else
+ {
+ auto pMerge = std::make_shared<ForMerge>(nTag);
+ maMarkStack.push( pMerge );
+ maCachedOutputStream.setOutput( pMerge );
+ }
+ mbMarkStackEmpty = false;
+ }
+
+#ifdef DBG_UTIL
+ static void lcl_DebugMergeAppend(
+ std::deque<sal_Int32> & rLeftEndedElements,
+ std::deque<sal_Int32> & rLeftStartedElements,
+ std::deque<sal_Int32> & rRightEndedElements,
+ std::deque<sal_Int32> & rRightStartedElements)
+ {
+ while (!rRightEndedElements.empty())
+ {
+ if (rLeftStartedElements.empty())
+ {
+ rLeftEndedElements.push_back(rRightEndedElements.front());
+ }
+ else
+ {
+ assert(rLeftStartedElements.back() == rRightEndedElements.front());
+ rLeftStartedElements.pop_back();
+ }
+ rRightEndedElements.pop_front();
+ }
+ while (!rRightStartedElements.empty())
+ {
+ rLeftStartedElements.push_back(rRightStartedElements.front());
+ rRightStartedElements.pop_front();
+ }
+ }
+
+ static void lcl_DebugMergePrepend(
+ std::deque<sal_Int32> & rLeftEndedElements,
+ std::deque<sal_Int32> & rLeftStartedElements,
+ std::deque<sal_Int32> & rRightEndedElements,
+ std::deque<sal_Int32> & rRightStartedElements)
+ {
+ while (!rLeftStartedElements.empty())
+ {
+ if (rRightEndedElements.empty())
+ {
+ rRightStartedElements.push_front(rLeftStartedElements.back());
+ }
+ else
+ {
+ assert(rRightEndedElements.front() == rLeftStartedElements.back());
+ rRightEndedElements.pop_front();
+ }
+ rLeftStartedElements.pop_back();
+ }
+ while (!rLeftEndedElements.empty())
+ {
+ rRightEndedElements.push_front(rLeftEndedElements.back());
+ rLeftEndedElements.pop_back();
+ }
+ }
+#endif
+
+ void FastSaxSerializer::mergeTopMarks(
+ sal_Int32 const nTag, sax_fastparser::MergeMarks const eMergeType)
+ {
+ SAL_WARN_IF(mbMarkStackEmpty, "sax", "Empty mark stack - nothing to merge");
+ assert(!mbMarkStackEmpty); // should never happen
+ if ( mbMarkStackEmpty )
+ return;
+
+ assert(maMarkStack.top()->m_Tag == nTag && "mark/merge tag mismatch!");
+ (void) nTag;
+#ifdef DBG_UTIL
+ if (dynamic_cast<ForSort*>(maMarkStack.top().get()))
+ {
+ // Sort is always well-formed fragment
+ assert(maMarkStack.top()->m_DebugStartedElements.empty());
+ assert(maMarkStack.top()->m_DebugEndedElements.empty());
+ }
+ lcl_DebugMergeAppend(
+ maMarkStack.top()->m_DebugEndedElements,
+ maMarkStack.top()->m_DebugStartedElements,
+ maMarkStack.top()->m_DebugPostponedEndedElements,
+ maMarkStack.top()->m_DebugPostponedStartedElements);
+#endif
+
+ // flush, so that we get everything in getData()
+ maCachedOutputStream.flush();
+
+ if (maMarkStack.size() == 1)
+ {
+#ifdef DBG_UTIL
+ while (!maMarkStack.top()->m_DebugEndedElements.empty())
+ {
+ assert(maMarkStack.top()->m_DebugEndedElements.front() == m_DebugStartedElements.top());
+ maMarkStack.top()->m_DebugEndedElements.pop_front();
+ m_DebugStartedElements.pop();
+ }
+ while (!maMarkStack.top()->m_DebugStartedElements.empty())
+ {
+ m_DebugStartedElements.push(maMarkStack.top()->m_DebugStartedElements.front());
+ maMarkStack.top()->m_DebugStartedElements.pop_front();
+ }
+#endif
+ Sequence<sal_Int8> aSeq( maMarkStack.top()->getData() );
+ maMarkStack.pop();
+ mbMarkStackEmpty = true;
+ maCachedOutputStream.resetOutputToStream();
+ maCachedOutputStream.writeBytes( aSeq.getConstArray(), aSeq.getLength() );
+ return;
+ }
+
+#ifdef DBG_UTIL
+ ::std::deque<sal_Int32> topDebugStartedElements(maMarkStack.top()->m_DebugStartedElements);
+ ::std::deque<sal_Int32> topDebugEndedElements(maMarkStack.top()->m_DebugEndedElements);
+#endif
+ const Int8Sequence aMerge( maMarkStack.top()->getData() );
+ maMarkStack.pop();
+#ifdef DBG_UTIL
+ switch (eMergeType)
+ {
+ case MergeMarks::APPEND:
+ lcl_DebugMergeAppend(
+ maMarkStack.top()->m_DebugEndedElements,
+ maMarkStack.top()->m_DebugStartedElements,
+ topDebugEndedElements,
+ topDebugStartedElements);
+ break;
+ case MergeMarks::PREPEND:
+ if (dynamic_cast<ForSort*>(maMarkStack.top().get())) // argh...
+ {
+ lcl_DebugMergeAppend(
+ maMarkStack.top()->m_DebugEndedElements,
+ maMarkStack.top()->m_DebugStartedElements,
+ topDebugEndedElements,
+ topDebugStartedElements);
+ }
+ else
+ {
+ lcl_DebugMergePrepend(
+ topDebugEndedElements,
+ topDebugStartedElements,
+ maMarkStack.top()->m_DebugEndedElements,
+ maMarkStack.top()->m_DebugStartedElements);
+ }
+ break;
+ case MergeMarks::POSTPONE:
+ lcl_DebugMergeAppend(
+ maMarkStack.top()->m_DebugPostponedEndedElements,
+ maMarkStack.top()->m_DebugPostponedStartedElements,
+ topDebugEndedElements,
+ topDebugStartedElements);
+ break;
+ }
+#endif
+ if (maMarkStack.empty())
+ {
+ mbMarkStackEmpty = true;
+ maCachedOutputStream.resetOutputToStream();
+ }
+ else
+ {
+ maCachedOutputStream.setOutput( maMarkStack.top() );
+ }
+
+ switch ( eMergeType )
+ {
+ case MergeMarks::APPEND: maMarkStack.top()->append( aMerge ); break;
+ case MergeMarks::PREPEND: maMarkStack.top()->prepend( aMerge ); break;
+ case MergeMarks::POSTPONE: maMarkStack.top()->postpone( aMerge ); break;
+ }
+ }
+
+ void FastSaxSerializer::writeBytes( const Sequence< sal_Int8 >& rData )
+ {
+ maCachedOutputStream.writeBytes( rData.getConstArray(), rData.getLength() );
+ }
+
+ void FastSaxSerializer::writeBytes( const char* pStr, size_t nLen )
+ {
+#if OSL_DEBUG_LEVEL > 0
+ {
+ bool bGood = true;
+ for (size_t i=0; i < nLen;)
+ {
+ if (auto const inv = invalidChar(pStr, nLen, i))
+ {
+ bGood = false;
+ SAL_WARN("sax", "FastSaxSerializer::writeBytes - illegal XML character 0x" <<
+ std::hex << inv->first);
+ i += inv->second;
+ continue;
+ }
+ ++i;
+ }
+ SAL_WARN_IF( !bGood && nLen > 1, "sax", "in '" << OString(pStr,std::min<sal_Int32>(nLen,42)) << "'");
+ }
+#endif
+ maCachedOutputStream.writeBytes( reinterpret_cast<const sal_Int8*>(pStr), nLen );
+ }
+
+ FastSaxSerializer::Int8Sequence& FastSaxSerializer::ForMerge::getData()
+ {
+ merge( maData, maPostponed, true );
+ maPostponed.realloc( 0 );
+
+ return maData;
+ }
+
+#if OSL_DEBUG_LEVEL > 0
+ void FastSaxSerializer::ForMerge::print( )
+ {
+ std::cerr << "Data: ";
+ for ( sal_Int32 i=0, len=maData.getLength(); i < len; i++ )
+ {
+ std::cerr << maData[i];
+ }
+
+ std::cerr << "\nPostponed: ";
+ for ( sal_Int32 i=0, len=maPostponed.getLength(); i < len; i++ )
+ {
+ std::cerr << maPostponed[i];
+ }
+
+ std::cerr << "\n";
+ }
+#endif
+
+ void FastSaxSerializer::ForMerge::prepend( const Int8Sequence &rWhat )
+ {
+ merge( maData, rWhat, false );
+ }
+
+ void FastSaxSerializer::ForMerge::append( const css::uno::Sequence<sal_Int8> &rWhat )
+ {
+ merge( maData, rWhat, true );
+ }
+
+ void FastSaxSerializer::ForMerge::postpone( const Int8Sequence &rWhat )
+ {
+ merge( maPostponed, rWhat, true );
+ }
+
+ void FastSaxSerializer::ForMerge::merge( Int8Sequence &rTop, const Int8Sequence &rMerge, bool bAppend )
+ {
+ sal_Int32 nMergeLen = rMerge.getLength();
+ if ( nMergeLen <= 0 )
+ return;
+
+ sal_Int32 nTopLen = rTop.getLength();
+
+ rTop.realloc( nTopLen + nMergeLen );
+ if ( bAppend )
+ {
+ // append the rMerge to the rTop
+ memcpy( rTop.getArray() + nTopLen, rMerge.getConstArray(), nMergeLen );
+ }
+ else
+ {
+ // prepend the rMerge to the rTop
+ memmove( rTop.getArray() + nMergeLen, rTop.getConstArray(), nTopLen );
+ memcpy( rTop.getArray(), rMerge.getConstArray(), nMergeLen );
+ }
+ }
+
+ void FastSaxSerializer::ForMerge::resetData( )
+ {
+ maData = Int8Sequence();
+ }
+
+ void FastSaxSerializer::ForSort::setCurrentElement( sal_Int32 nElement )
+ {
+ const auto & rOrder = maOrder;
+ if( std::find( rOrder.begin(), rOrder.end(), nElement ) != rOrder.end() )
+ {
+ mnCurrentElement = nElement;
+ if ( maData.find( nElement ) == maData.end() )
+ maData[ nElement ] = Int8Sequence();
+ }
+ }
+
+ void FastSaxSerializer::ForSort::prepend( const Int8Sequence &rWhat )
+ {
+ append( rWhat );
+ }
+
+ void FastSaxSerializer::ForSort::append( const css::uno::Sequence<sal_Int8> &rWhat )
+ {
+ merge( maData[mnCurrentElement], rWhat, true );
+ }
+
+ void FastSaxSerializer::ForSort::sort()
+ {
+ // Clear the ForMerge data to avoid duplicate items
+ resetData();
+
+ // Sort it all
+ std::map< sal_Int32, Int8Sequence >::iterator iter;
+ for ( const auto nIndex : std::as_const(maOrder) )
+ {
+ iter = maData.find( nIndex );
+ if ( iter != maData.end() )
+ ForMerge::append( iter->second );
+ }
+ }
+
+ FastSaxSerializer::Int8Sequence& FastSaxSerializer::ForSort::getData()
+ {
+ sort( );
+ return ForMerge::getData();
+ }
+
+#if OSL_DEBUG_LEVEL > 0
+ void FastSaxSerializer::ForSort::print( )
+ {
+ for ( const auto& [rElement, rData] : maData )
+ {
+ std::cerr << "pair: " << rElement;
+ for ( sal_Int32 i=0, len=rData.getLength(); i < len; ++i )
+ std::cerr << rData[i];
+ std::cerr << "\n";
+ }
+
+ sort( );
+ ForMerge::print();
+ }
+#endif
+
+} // namespace sax_fastparser
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sax/source/tools/fastserializer.hxx b/sax/source/tools/fastserializer.hxx
new file mode 100644
index 000000000..109ada3c7
--- /dev/null
+++ b/sax/source/tools/fastserializer.hxx
@@ -0,0 +1,255 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#ifndef INCLUDED_SAX_SOURCE_TOOLS_FASTSERIALIZER_HXX
+#define INCLUDED_SAX_SOURCE_TOOLS_FASTSERIALIZER_HXX
+
+#include <com/sun/star/xml/sax/XFastTokenHandler.hpp>
+#include <com/sun/star/io/XOutputStream.hpp>
+
+#include <sax/fastattribs.hxx>
+#include <sax/fshelper.hxx>
+#include "CachedOutputStream.hxx"
+
+#include <stack>
+#include <string_view>
+#include <map>
+#include <memory>
+
+namespace sax_fastparser {
+
+struct TokenValue
+{
+ sal_Int32 nToken;
+ const char *pValue;
+ TokenValue(sal_Int32 _nToken, const char *_pValue) : nToken(_nToken), pValue(_pValue) {}
+};
+typedef std::vector<TokenValue> TokenValueList;
+
+/// Receives notification of sax document events to write into an XOutputStream.
+class FastSaxSerializer
+{
+ typedef css::uno::Sequence< ::sal_Int8 > Int8Sequence;
+ typedef css::uno::Sequence< ::sal_Int32 > Int32Sequence;
+
+public:
+ explicit FastSaxSerializer(const css::uno::Reference< css::io::XOutputStream >& xOutputStream);
+ ~FastSaxSerializer();
+
+ css::uno::Reference< css::io::XOutputStream > const & getOutputStream() const;
+ /// called by FSHelper to put data in for writeTokenValueList
+ TokenValueList& getTokenValueList() { return maTokenValues; }
+
+ /** called by the parser when parsing of an XML stream is started.
+ */
+ void startDocument();
+
+ /** called by the parser after the last XML element of a stream is processed.
+ */
+ void endDocument();
+
+ /** receives notification of the beginning of an element.
+
+ @param Element
+ contains the integer token from the <type>XFastTokenHandler</type>
+ registered at the <type>XFastParser</type>.<br>
+
+ If the element has a namespace that was registered with the
+ <type>XFastParser</type>, <param>Element</param> contains the integer
+ token of the elements local name from the <type>XFastTokenHandler</type>
+ and the integer token of the namespace combined with an arithmetic
+ <b>or</b> operation.
+
+ @param pAttrList
+ Contains a <type>FastAttributeList</type> to access the attributes
+ from the element.
+
+ */
+ void startFastElement( ::sal_Int32 Element, FastAttributeList const * pAttrList = nullptr );
+
+ /** receives notification of the end of a known element.
+ @see startFastElement
+ */
+ void endFastElement( ::sal_Int32 Element );
+
+ /** receives notification of the beginning of a single element.
+
+ @param Element
+ contains the integer token from the <type>XFastTokenHandler</type>
+ registered at the <type>XFastParser</type>.<br>
+
+ If the element has a namespace that was registered with the
+ <type>XFastParser</type>, <param>Element</param> contains the integer
+ token of the elements local name from the <type>XFastTokenHandler</type>
+ and the integer token of the namespace combined with an arithmetic
+ <b>or</b> operation.
+
+ @param pAttrList
+ Contains a <type>FastAttributeList</type> to access the attributes
+ from the element.
+
+ */
+ void singleFastElement( ::sal_Int32 Element, FastAttributeList const * pAttrList = nullptr );
+
+ // C++ helpers
+ void writeId( ::sal_Int32 Element );
+ OString getId( ::sal_Int32 Element );
+
+ void write( double value );
+ void write( std::u16string_view s, bool bEscape = false );
+ void write( const OString& s, bool bEscape = false );
+ void write( const char* pStr, sal_Int32 nLen, bool bEscape = false );
+
+public:
+ /** From now on, don't write directly to the stream, but to top of a stack.
+
+ This is to be able to change the order of the data being written.
+ If you need to write eg.
+ p, r, rPr, [something], /rPr, t, [text], /t, /r, /p,
+ but get it in order
+ p, r, t, [text], /t, rPr, [something], /rPr, /r, /p,
+ simply do
+ p, r, mark(), t, [text], /t, mark(), rPr, [something], /rPr,
+ mergeTopMarks( MergeMarks::PREPEND ), mergeTopMarks( MergeMarks::APPEND ), /r, /p
+ and you are done.
+
+ @param nTag debugging aid to ensure mark and merge match in LIFO order
+ */
+ void mark(sal_Int32 nTag, const Int32Sequence& rOrder);
+
+ /** Merge 2 topmost marks.
+
+ The possibilities: prepend the top before the second top-most
+ mark, append it, append it later or ignore; prepending brings the possibility
+ to switch parts of the output, appending later allows to write some
+ output in advance.
+
+ Writes the result to the output stream if the mark stack becomes empty
+ by the operation.
+
+ When the MergeMarks::POSTPONE is specified, the merge happens just
+ before the next merge.
+
+ @param nTag debugging aid to ensure mark and merge match in LIFO order
+
+ @see mark()
+ */
+ void mergeTopMarks(sal_Int32 nTag,
+ sax_fastparser::MergeMarks eMergeType);
+
+private:
+ /** Helper class to cache data and write in chunks to XOutputStream or ForMerge::append.
+ * Its flush method needs to be called before touching maMarkStack
+ * to ensure correct order of ForSort methods.
+ */
+ CachedOutputStream maCachedOutputStream;
+ css::uno::Reference< css::xml::sax::XFastTokenHandler > mxFastTokenHandler;
+
+ class ForMerge : public ForMergeBase
+ {
+ Int8Sequence maData;
+ Int8Sequence maPostponed;
+
+ public:
+ sal_Int32 const m_Tag;
+#ifdef DBG_UTIL
+ // pending close tags, followed by pending open tags
+ std::deque<sal_Int32> m_DebugEndedElements;
+ std::deque<sal_Int32> m_DebugStartedElements;
+ // ... and another buffer for maPostponed ...
+ std::deque<sal_Int32> m_DebugPostponedEndedElements;
+ std::deque<sal_Int32> m_DebugPostponedStartedElements;
+#endif
+
+ explicit ForMerge(sal_Int32 const nTag) : m_Tag(nTag) {}
+
+ virtual void setCurrentElement( ::sal_Int32 /*nToken*/ ) {}
+ virtual Int8Sequence& getData();
+#if OSL_DEBUG_LEVEL > 0
+ virtual void print();
+#endif
+
+ virtual void prepend( const Int8Sequence &rWhat );
+ virtual void append( const css::uno::Sequence<sal_Int8> &rWhat ) override;
+ void postpone( const Int8Sequence &rWhat );
+
+ protected:
+ void resetData( );
+ static void merge( Int8Sequence &rTop, const Int8Sequence &rMerge, bool bAppend );
+ };
+
+ class ForSort : public ForMerge
+ {
+ std::map< ::sal_Int32, Int8Sequence > maData;
+ sal_Int32 mnCurrentElement;
+
+ Int32Sequence maOrder;
+
+ public:
+ ForSort(sal_Int32 const nTag, const Int32Sequence& rOrder)
+ : ForMerge(nTag)
+ , mnCurrentElement( 0 )
+ , maOrder( rOrder )
+ {}
+
+ void setCurrentElement( ::sal_Int32 nToken ) override;
+
+ virtual Int8Sequence& getData() override;
+
+#if OSL_DEBUG_LEVEL > 0
+ virtual void print() override;
+#endif
+
+ virtual void prepend( const Int8Sequence &rWhat ) override;
+ virtual void append( const css::uno::Sequence<sal_Int8> &rWhat ) override;
+ private:
+ void sort();
+ };
+
+ std::stack< std::shared_ptr< ForMerge > > maMarkStack;
+ bool mbMarkStackEmpty;
+ // Would be better to use OStringBuffer instead of these two
+ // but then we couldn't get the rtl_String* member :-(
+ rtl_String *mpDoubleStr;
+ sal_Int32 mnDoubleStrCapacity;
+ TokenValueList maTokenValues;
+ bool mbXescape; ///< whether to escape invalid XML characters as _xHHHH_ in write(const char*,sal_Int32,true)
+ /* TODO: make that configurable from the outside for
+ * some specific cases? */
+
+#ifdef DBG_UTIL
+ std::stack<sal_Int32> m_DebugStartedElements;
+#endif
+
+ void writeTokenValueList();
+ void writeFastAttributeList(FastAttributeList const & rAttrList);
+
+ /** Forward the call to the output stream, or write to the stack.
+
+ The latter in the case that we are inside a mark().
+ */
+ void writeBytes( const css::uno::Sequence< ::sal_Int8 >& aData );
+ void writeBytes( const char* pStr, size_t nLen );
+};
+
+} // namespace sax_fastparser
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sax/source/tools/fshelper.cxx b/sax/source/tools/fshelper.cxx
new file mode 100644
index 000000000..a560e5bd1
--- /dev/null
+++ b/sax/source/tools/fshelper.cxx
@@ -0,0 +1,155 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <sax/fshelper.hxx>
+#include "fastserializer.hxx"
+
+using namespace ::com::sun::star;
+using namespace ::com::sun::star::uno;
+
+namespace sax_fastparser {
+
+FastSerializerHelper::FastSerializerHelper(const Reference< io::XOutputStream >& xOutputStream, bool bWriteHeader ) :
+ mpSerializer(new FastSaxSerializer(xOutputStream))
+{
+ if( bWriteHeader )
+ mpSerializer->startDocument();
+}
+
+FastSerializerHelper::~FastSerializerHelper()
+{
+ mpSerializer->endDocument();
+ delete mpSerializer;
+}
+
+void FastSerializerHelper::startElement(sal_Int32 elementTokenId)
+{
+ mpSerializer->startFastElement(elementTokenId);
+}
+void FastSerializerHelper::pushAttributeValue(sal_Int32 attribute, const char* value)
+{
+ mpSerializer->getTokenValueList().emplace_back(attribute, value);
+}
+void FastSerializerHelper::pushAttributeValue(sal_Int32 attribute, const OString& value)
+{
+ mpSerializer->getTokenValueList().emplace_back(attribute, value.getStr());
+}
+void FastSerializerHelper::singleElement(sal_Int32 elementTokenId)
+{
+ mpSerializer->singleFastElement(elementTokenId);
+}
+
+void FastSerializerHelper::endElement(sal_Int32 elementTokenId)
+{
+ mpSerializer->endFastElement(elementTokenId);
+}
+
+void FastSerializerHelper::startElement(sal_Int32 elementTokenId, const rtl::Reference<FastAttributeList>& xAttrList)
+{
+ assert(xAttrList);
+ mpSerializer->startFastElement(elementTokenId, xAttrList.get());
+}
+
+void FastSerializerHelper::singleElement(sal_Int32 elementTokenId, const rtl::Reference<FastAttributeList>& xAttrList)
+{
+ assert(xAttrList);
+ mpSerializer->singleFastElement(elementTokenId, xAttrList.get());
+}
+
+FastSerializerHelper* FastSerializerHelper::write(const char* value)
+{
+ mpSerializer->write(value, -1);
+ return this;
+}
+
+FastSerializerHelper* FastSerializerHelper::write(const OString& value)
+{
+ mpSerializer->write(value);
+ return this;
+}
+
+FastSerializerHelper* FastSerializerHelper::write(std::u16string_view value)
+{
+ mpSerializer->write(value);
+ return this;
+}
+
+FastSerializerHelper* FastSerializerHelper::write(sal_Int32 value)
+{
+ mpSerializer->write(OString::number(value));
+ return this;
+}
+
+FastSerializerHelper* FastSerializerHelper::write(sal_Int64 value)
+{
+ mpSerializer->write(OString::number(value));
+ return this;
+}
+
+FastSerializerHelper* FastSerializerHelper::write(double value)
+{
+ mpSerializer->write(value);
+ return this;
+}
+
+FastSerializerHelper* FastSerializerHelper::writeEscaped(const char* value)
+{
+ mpSerializer->write(value, -1, true);
+ return this;
+}
+
+FastSerializerHelper* FastSerializerHelper::writeEscaped(std::u16string_view value)
+{
+ if (!value.empty())
+ mpSerializer->write(value, true);
+ return this;
+}
+
+FastSerializerHelper* FastSerializerHelper::writeId(sal_Int32 tokenId)
+{
+ mpSerializer->writeId(tokenId);
+ return this;
+}
+
+css::uno::Reference< css::io::XOutputStream > const & FastSerializerHelper::getOutputStream() const
+{
+ return mpSerializer->getOutputStream();
+}
+
+void FastSerializerHelper::mark(
+ sal_Int32 const nTag, const Sequence<sal_Int32>& rOrder)
+{
+ mpSerializer->mark(nTag, rOrder);
+}
+
+void FastSerializerHelper::mergeTopMarks(
+ sal_Int32 const nTag, MergeMarks const eMergeType)
+{
+ mpSerializer->mergeTopMarks(nTag, eMergeType);
+}
+
+rtl::Reference<FastAttributeList> FastSerializerHelper::createAttrList()
+{
+ return new FastAttributeList( nullptr );
+}
+
+
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */