1465 lines
45 KiB
C++
1465 lines
45 KiB
C++
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
|
|
/*
|
|
* This file is part of the LibreOffice project.
|
|
*
|
|
* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
*
|
|
* This file incorporates work covered by the following license notice:
|
|
*
|
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
* contributor license agreements. See the NOTICE file distributed
|
|
* with this work for additional information regarding copyright
|
|
* ownership. The ASF licenses this file to you under the Apache
|
|
* License, Version 2.0 (the "License"); you may not use this file
|
|
* except in compliance with the License. You may obtain a copy of
|
|
* the License at http://www.apache.org/licenses/LICENSE-2.0 .
|
|
*/
|
|
|
|
#include <string.h>
|
|
|
|
#include <cassert>
|
|
#include <set>
|
|
#include <stack>
|
|
#include <vector>
|
|
|
|
#include <com/sun/star/io/IOException.hpp>
|
|
#include <com/sun/star/lang/WrappedTargetRuntimeException.hpp>
|
|
#include <com/sun/star/lang/XServiceInfo.hpp>
|
|
#include <com/sun/star/uno/XComponentContext.hpp>
|
|
#include <com/sun/star/util/XCloneable.hpp>
|
|
#include <com/sun/star/xml/sax/SAXInvalidCharacterException.hpp>
|
|
#include <com/sun/star/xml/sax/XWriter.hpp>
|
|
|
|
#include <cppuhelper/exc_hlp.hxx>
|
|
#include <cppuhelper/weak.hxx>
|
|
#include <cppuhelper/implbase.hxx>
|
|
#include <cppuhelper/supportsservice.hxx>
|
|
|
|
#include <osl/diagnose.h>
|
|
#include <rtl/character.hxx>
|
|
#include <sal/log.hxx>
|
|
|
|
#include <memory>
|
|
|
|
using namespace ::cppu;
|
|
using namespace ::com::sun::star::uno;
|
|
using namespace ::com::sun::star::lang;
|
|
using namespace ::com::sun::star::xml::sax;
|
|
using namespace ::com::sun::star::io;
|
|
|
|
#define LINEFEED 10
|
|
#define SEQUENCESIZE 1024
|
|
#define MAXCOLUMNCOUNT 72
|
|
|
|
/******
|
|
*
|
|
*
|
|
* Character conversion functions
|
|
*
|
|
*
|
|
*****/
|
|
|
|
namespace
|
|
{
|
|
enum SaxInvalidCharacterError
|
|
{
|
|
SAX_NONE,
|
|
SAX_WARNING,
|
|
SAX_ERROR
|
|
};
|
|
|
|
// Stuff for custom entity names
|
|
struct ReplacementPair
|
|
{
|
|
OUString name;
|
|
OUString replacement;
|
|
};
|
|
inline bool operator<(const ReplacementPair& lhs, const ReplacementPair& rhs)
|
|
{
|
|
return lhs.replacement.compareTo(rhs.replacement) < 0;
|
|
}
|
|
|
|
class SaxWriterHelper
|
|
{
|
|
#ifdef DBG_UTIL
|
|
public:
|
|
::std::stack<OUString> m_DebugStartedElements;
|
|
#endif
|
|
|
|
private:
|
|
Reference<XOutputStream> m_out;
|
|
Sequence<sal_Int8> m_Sequence;
|
|
sal_Int8* mp_Sequence;
|
|
|
|
sal_Int32 nLastLineFeedPos; // is negative after writing a sequence
|
|
sal_uInt32 nCurrentPos;
|
|
bool m_bStartElementFinished;
|
|
|
|
std::vector<ReplacementPair> m_Replacements;
|
|
|
|
/// @throws SAXException
|
|
sal_uInt32 writeSequence();
|
|
|
|
// use only if to insert the bytes more space in the sequence is needed and
|
|
// so the sequence has to write out and reset rPos to 0
|
|
// writes sequence only on overflow, sequence could be full on the end (rPos == SEQUENCESIZE)
|
|
/// @throws SAXException
|
|
void AddBytes(sal_Int8* pTarget, sal_uInt32& rPos, const sal_Int8* pBytes,
|
|
sal_uInt32 nBytesCount);
|
|
/// @throws SAXException
|
|
bool convertToXML(const sal_Unicode* pStr, sal_Int32 nStrLen, bool bDoNormalization,
|
|
bool bNormalizeWhitespace, sal_Int8* pTarget, sal_uInt32& rPos);
|
|
/// @throws SAXException
|
|
void FinishStartElement();
|
|
|
|
// Search for the correct replacement
|
|
const ReplacementPair* findXMLReplacement(const sal_Unicode* pStr, sal_Int32 nStrLen);
|
|
|
|
public:
|
|
explicit SaxWriterHelper(Reference<XOutputStream> const& m_TempOut)
|
|
: m_out(m_TempOut)
|
|
, m_Sequence(SEQUENCESIZE)
|
|
, mp_Sequence(nullptr)
|
|
, nLastLineFeedPos(0)
|
|
, nCurrentPos(0)
|
|
, m_bStartElementFinished(true)
|
|
{
|
|
OSL_ENSURE(SEQUENCESIZE > 50, "Sequence cache size too small");
|
|
mp_Sequence = m_Sequence.getArray();
|
|
}
|
|
~SaxWriterHelper()
|
|
{
|
|
OSL_ENSURE(!nCurrentPos, "cached Sequence not written");
|
|
OSL_ENSURE(m_bStartElementFinished, "StartElement not completely written");
|
|
}
|
|
|
|
/// @throws SAXException
|
|
void insertIndentation(sal_uInt32 m_nLevel);
|
|
|
|
// returns whether it works correct or invalid characters were in the string
|
|
// If there are invalid characters in the string it returns sal_False.
|
|
// Than the calling method has to throw the needed Exception.
|
|
/// @throws SAXException
|
|
bool writeString(const OUString& rWriteOutString, bool bDoNormalization,
|
|
bool bNormalizeWhitespace);
|
|
|
|
sal_uInt32 GetLastColumnCount() const noexcept
|
|
{
|
|
return static_cast<sal_uInt32>(nCurrentPos - nLastLineFeedPos);
|
|
}
|
|
|
|
/// @throws SAXException
|
|
void startDocument();
|
|
|
|
// returns whether it works correct or invalid characters were in the strings
|
|
// If there are invalid characters in one of the strings it returns sal_False.
|
|
// Than the calling method has to throw the needed Exception.
|
|
/// @throws SAXException
|
|
SaxInvalidCharacterError startElement(const OUString& rName,
|
|
const Reference<XAttributeList>& xAttribs);
|
|
/// @throws SAXException
|
|
bool FinishEmptyElement();
|
|
|
|
// returns whether it works correct or invalid characters were in the string
|
|
// If there are invalid characters in the string it returns sal_False.
|
|
// Than the calling method has to throw the needed Exception.
|
|
/// @throws SAXException
|
|
bool endElement(const OUString& rName);
|
|
/// @throws SAXException
|
|
void endDocument();
|
|
|
|
// returns whether it works correct or invalid characters were in the strings
|
|
// If there are invalid characters in the string it returns sal_False.
|
|
// Than the calling method has to throw the needed Exception.
|
|
/// @throws SAXException
|
|
bool processingInstruction(const OUString& rTarget, const OUString& rData);
|
|
/// @throws SAXException
|
|
void startCDATA();
|
|
/// @throws SAXException
|
|
void endCDATA();
|
|
|
|
// returns whether it works correct or invalid characters were in the strings
|
|
// If there are invalid characters in the string it returns sal_False.
|
|
// Than the calling method has to throw the needed Exception.
|
|
/// @throws SAXException
|
|
bool comment(const OUString& rComment);
|
|
|
|
/// @throws SAXException
|
|
void clearBuffer();
|
|
|
|
// Use custom entity names
|
|
void setCustomEntityNames(
|
|
const ::css::uno::Sequence<::css::beans::Pair<::rtl::OUString, ::rtl::OUString>>&
|
|
replacements);
|
|
|
|
// Calculate length for convertToXML
|
|
sal_Int32 calcXMLByteLength(const OUString& rStr, bool bDoNormalization,
|
|
bool bNormalizeWhitespace);
|
|
};
|
|
|
|
const bool g_bValidCharsBelow32[32] = {
|
|
// clang-format off
|
|
// 0 1 2 3 4 5 6 7
|
|
false, false, false, false, false, false, false, false, //0
|
|
false, true, true, false, false, true, false, false, //8
|
|
false, false, false, false, false, false, false, false, //16
|
|
false, false, false, false, false, false, false, false
|
|
// clang-format on
|
|
};
|
|
|
|
bool IsInvalidChar(const sal_Unicode aChar)
|
|
{
|
|
bool bRet(false);
|
|
// check first for the most common characters
|
|
if (aChar < 32 || aChar >= 0xd800)
|
|
bRet = ((aChar < 32 && !g_bValidCharsBelow32[aChar]) || aChar == 0xffff || aChar == 0xfffe);
|
|
return bRet;
|
|
}
|
|
|
|
/********
|
|
* write through to the output stream
|
|
*
|
|
*****/
|
|
sal_uInt32 SaxWriterHelper::writeSequence()
|
|
{
|
|
try
|
|
{
|
|
m_out->writeBytes(m_Sequence);
|
|
}
|
|
catch (const IOException&)
|
|
{
|
|
css::uno::Any anyEx = cppu::getCaughtException();
|
|
throw SAXException(u"IO exception during writing"_ustr, Reference<XInterface>(), anyEx);
|
|
}
|
|
nLastLineFeedPos -= SEQUENCESIZE;
|
|
return 0;
|
|
}
|
|
|
|
void SaxWriterHelper::AddBytes(sal_Int8* pTarget, sal_uInt32& rPos, const sal_Int8* pBytes,
|
|
sal_uInt32 nBytesCount)
|
|
{
|
|
OSL_ENSURE((rPos + nBytesCount) > SEQUENCESIZE, "wrong use of AddBytesMethod");
|
|
sal_uInt32 nCount(SEQUENCESIZE - rPos);
|
|
memcpy(&(pTarget[rPos]), pBytes, nCount);
|
|
|
|
OSL_ENSURE(rPos + nCount == SEQUENCESIZE, "the position should be the at the end");
|
|
|
|
rPos = writeSequence();
|
|
sal_uInt32 nRestCount(nBytesCount - nCount);
|
|
if ((rPos + nRestCount) <= SEQUENCESIZE)
|
|
{
|
|
memcpy(&(pTarget[rPos]), &pBytes[nCount], nRestCount);
|
|
rPos += nRestCount;
|
|
}
|
|
else
|
|
AddBytes(pTarget, rPos, &pBytes[nCount], nRestCount);
|
|
}
|
|
|
|
void SaxWriterHelper::setCustomEntityNames(
|
|
const ::css::uno::Sequence<::css::beans::Pair<::rtl::OUString, ::rtl::OUString>>& replacements)
|
|
{
|
|
m_Replacements.resize(replacements.size());
|
|
for (size_t i = 0; i < replacements.size(); ++i)
|
|
{
|
|
m_Replacements[i].name = replacements[i].First;
|
|
m_Replacements[i].replacement = replacements[i].Second;
|
|
}
|
|
if (replacements.size() > 1)
|
|
std::sort(m_Replacements.begin(), m_Replacements.end());
|
|
}
|
|
|
|
/** Converts a UTF-16 string to UTF-8 and does XML normalization
|
|
|
|
@param pTarget
|
|
Pointer to a piece of memory, to where the output should be written. The caller
|
|
must call calcXMLByteLength on the same string, to ensure,
|
|
that there is enough memory for converting.
|
|
*/
|
|
bool SaxWriterHelper::convertToXML(const sal_Unicode* pStr, sal_Int32 nStrLen,
|
|
bool bDoNormalization, bool bNormalizeWhitespace,
|
|
sal_Int8* pTarget, sal_uInt32& rPos)
|
|
{
|
|
bool bRet(true);
|
|
sal_uInt32 nSurrogate = 0;
|
|
|
|
for (sal_Int32 i = 0; i < nStrLen; i++)
|
|
{
|
|
sal_Unicode c = pStr[i];
|
|
if (IsInvalidChar(c))
|
|
bRet = false;
|
|
else if ((c >= 0x0001) && (c <= 0x007F)) // Deal with ascii
|
|
{
|
|
if (bDoNormalization)
|
|
{
|
|
switch (c)
|
|
{
|
|
case '&': // resemble to &
|
|
{
|
|
if ((rPos + 5) > SEQUENCESIZE)
|
|
AddBytes(pTarget, rPos, reinterpret_cast<sal_Int8 const*>("&"), 5);
|
|
else
|
|
{
|
|
memcpy(&(pTarget[rPos]), "&", 5);
|
|
rPos += 5;
|
|
}
|
|
}
|
|
break;
|
|
case '<':
|
|
{
|
|
if ((rPos + 4) > SEQUENCESIZE)
|
|
AddBytes(pTarget, rPos, reinterpret_cast<sal_Int8 const*>("<"), 4);
|
|
else
|
|
{
|
|
memcpy(&(pTarget[rPos]), "<", 4);
|
|
rPos += 4; // <
|
|
}
|
|
}
|
|
break;
|
|
case '>':
|
|
{
|
|
if ((rPos + 4) > SEQUENCESIZE)
|
|
AddBytes(pTarget, rPos, reinterpret_cast<sal_Int8 const*>(">"), 4);
|
|
else
|
|
{
|
|
memcpy(&(pTarget[rPos]), ">", 4);
|
|
rPos += 4; // >
|
|
}
|
|
}
|
|
break;
|
|
case '\'':
|
|
{
|
|
if ((rPos + 6) > SEQUENCESIZE)
|
|
AddBytes(pTarget, rPos, reinterpret_cast<sal_Int8 const*>("'"), 6);
|
|
else
|
|
{
|
|
memcpy(&(pTarget[rPos]), "'", 6);
|
|
rPos += 6; // '
|
|
}
|
|
}
|
|
break;
|
|
case '"':
|
|
{
|
|
if ((rPos + 6) > SEQUENCESIZE)
|
|
AddBytes(pTarget, rPos, reinterpret_cast<sal_Int8 const*>("""), 6);
|
|
else
|
|
{
|
|
memcpy(&(pTarget[rPos]), """, 6);
|
|
rPos += 6; // "
|
|
}
|
|
}
|
|
break;
|
|
case 13:
|
|
{
|
|
if ((rPos + 6) > SEQUENCESIZE)
|
|
AddBytes(pTarget, rPos, reinterpret_cast<sal_Int8 const*>("
"), 6);
|
|
else
|
|
{
|
|
memcpy(&(pTarget[rPos]), "
", 6);
|
|
rPos += 6;
|
|
}
|
|
}
|
|
break;
|
|
case LINEFEED:
|
|
{
|
|
if (bNormalizeWhitespace)
|
|
{
|
|
if ((rPos + 6) > SEQUENCESIZE)
|
|
AddBytes(pTarget, rPos, reinterpret_cast<sal_Int8 const*>("
"),
|
|
6);
|
|
else
|
|
{
|
|
memcpy(&(pTarget[rPos]), "
", 6);
|
|
rPos += 6;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
pTarget[rPos] = LINEFEED;
|
|
nLastLineFeedPos = rPos;
|
|
rPos++;
|
|
}
|
|
}
|
|
break;
|
|
case 9:
|
|
{
|
|
if (bNormalizeWhitespace)
|
|
{
|
|
if ((rPos + 6) > SEQUENCESIZE)
|
|
AddBytes(pTarget, rPos, reinterpret_cast<sal_Int8 const*>("	"),
|
|
6);
|
|
else
|
|
{
|
|
memcpy(&(pTarget[rPos]), "	", 6);
|
|
rPos += 6;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
pTarget[rPos] = 9;
|
|
rPos++;
|
|
}
|
|
}
|
|
break;
|
|
default:
|
|
{
|
|
pTarget[rPos] = static_cast<sal_Int8>(c);
|
|
rPos++;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
pTarget[rPos] = static_cast<sal_Int8>(c);
|
|
if (static_cast<sal_Int8>(c) == LINEFEED)
|
|
nLastLineFeedPos = rPos;
|
|
rPos++;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// Deal with replacements
|
|
if (bDoNormalization && !m_Replacements.empty())
|
|
{
|
|
// search
|
|
const ReplacementPair* it = findXMLReplacement(&pStr[i], nStrLen - i);
|
|
|
|
// replace
|
|
if (it != nullptr)
|
|
{
|
|
OString name = ::rtl::OUStringToOString(it->name, RTL_TEXTENCODING_UTF8);
|
|
if (rPos + name.getLength() > SEQUENCESIZE)
|
|
AddBytes(pTarget, rPos, reinterpret_cast<sal_Int8 const*>(name.getStr()),
|
|
name.getLength());
|
|
else
|
|
{
|
|
memcpy(&(pTarget[rPos]), name.getStr(), name.getLength());
|
|
rPos += name.getLength();
|
|
}
|
|
i += it->replacement.getLength() - 1;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
// Deal with other unicode cases
|
|
if (rtl::isHighSurrogate(c))
|
|
{
|
|
// 1. surrogate: save (until 2. surrogate)
|
|
if (nSurrogate != 0) // left-over lone 1st Unicode surrogate
|
|
{
|
|
OSL_FAIL("left-over Unicode surrogate");
|
|
bRet = false;
|
|
}
|
|
nSurrogate = c;
|
|
}
|
|
else if (rtl::isLowSurrogate(c))
|
|
{
|
|
// 2. surrogate: write as UTF-8
|
|
if (nSurrogate) // can only be 1st surrogate
|
|
{
|
|
nSurrogate = rtl::combineSurrogates(nSurrogate, c);
|
|
sal_Int8 aBytes[] = { sal_Int8(0xF0 | ((nSurrogate >> 18) & 0x0F)),
|
|
sal_Int8(0x80 | ((nSurrogate >> 12) & 0x3F)),
|
|
sal_Int8(0x80 | ((nSurrogate >> 6) & 0x3F)),
|
|
sal_Int8(0x80 | ((nSurrogate >> 0) & 0x3F)) };
|
|
if ((rPos + 4) > SEQUENCESIZE)
|
|
AddBytes(pTarget, rPos, aBytes, 4);
|
|
else
|
|
{
|
|
pTarget[rPos] = aBytes[0];
|
|
rPos++;
|
|
pTarget[rPos] = aBytes[1];
|
|
rPos++;
|
|
pTarget[rPos] = aBytes[2];
|
|
rPos++;
|
|
pTarget[rPos] = aBytes[3];
|
|
rPos++;
|
|
}
|
|
}
|
|
else // lone 2nd surrogate
|
|
{
|
|
OSL_FAIL("illegal Unicode character");
|
|
bRet = false;
|
|
}
|
|
|
|
// reset surrogate
|
|
nSurrogate = 0;
|
|
}
|
|
else if (c > 0x07FF)
|
|
{
|
|
sal_Int8 aBytes[]
|
|
= { sal_Int8(0xE0 | ((c >> 12) & 0x0F)), sal_Int8(0x80 | ((c >> 6) & 0x3F)),
|
|
sal_Int8(0x80 | ((c >> 0) & 0x3F)) };
|
|
if ((rPos + 3) > SEQUENCESIZE)
|
|
AddBytes(pTarget, rPos, aBytes, 3);
|
|
else
|
|
{
|
|
pTarget[rPos] = aBytes[0];
|
|
rPos++;
|
|
pTarget[rPos] = aBytes[1];
|
|
rPos++;
|
|
pTarget[rPos] = aBytes[2];
|
|
rPos++;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
sal_Int8 aBytes[]
|
|
= { sal_Int8(0xC0 | ((c >> 6) & 0x1F)), sal_Int8(0x80 | ((c >> 0) & 0x3F)) };
|
|
if ((rPos + 2) > SEQUENCESIZE)
|
|
AddBytes(pTarget, rPos, aBytes, 2);
|
|
else
|
|
{
|
|
pTarget[rPos] = aBytes[0];
|
|
rPos++;
|
|
pTarget[rPos] = aBytes[1];
|
|
rPos++;
|
|
}
|
|
}
|
|
}
|
|
|
|
OSL_ENSURE(rPos <= SEQUENCESIZE, "not reset current position");
|
|
if (rPos == SEQUENCESIZE)
|
|
rPos = writeSequence();
|
|
|
|
// reset left-over surrogate
|
|
if ((nSurrogate != 0) && !rtl::isHighSurrogate(c))
|
|
{
|
|
OSL_FAIL("left-over Unicode surrogate");
|
|
nSurrogate = 0;
|
|
bRet = false;
|
|
}
|
|
}
|
|
if (nSurrogate != 0) // trailing lone 1st surrogate
|
|
{
|
|
OSL_FAIL("left-over Unicode surrogate");
|
|
bRet = false;
|
|
}
|
|
return bRet;
|
|
}
|
|
|
|
void SaxWriterHelper::FinishStartElement()
|
|
{
|
|
if (!m_bStartElementFinished)
|
|
{
|
|
mp_Sequence[nCurrentPos] = '>';
|
|
nCurrentPos++;
|
|
if (nCurrentPos == SEQUENCESIZE)
|
|
nCurrentPos = writeSequence();
|
|
m_bStartElementFinished = true;
|
|
}
|
|
}
|
|
|
|
void SaxWriterHelper::insertIndentation(sal_uInt32 m_nLevel)
|
|
{
|
|
FinishStartElement();
|
|
if (m_nLevel > 0)
|
|
{
|
|
if ((nCurrentPos + m_nLevel + 1) <= SEQUENCESIZE)
|
|
{
|
|
mp_Sequence[nCurrentPos] = LINEFEED;
|
|
nLastLineFeedPos = nCurrentPos;
|
|
nCurrentPos++;
|
|
memset(&(mp_Sequence[nCurrentPos]), 32, m_nLevel);
|
|
nCurrentPos += m_nLevel;
|
|
if (nCurrentPos == SEQUENCESIZE)
|
|
nCurrentPos = writeSequence();
|
|
}
|
|
else
|
|
{
|
|
sal_uInt32 nCount(m_nLevel + 1);
|
|
std::unique_ptr<sal_Int8[]> pBytes(new sal_Int8[nCount]);
|
|
pBytes[0] = LINEFEED;
|
|
memset(&(pBytes[1]), 32, m_nLevel);
|
|
AddBytes(mp_Sequence, nCurrentPos, pBytes.get(), nCount);
|
|
pBytes.reset();
|
|
nLastLineFeedPos = nCurrentPos - nCount;
|
|
if (nCurrentPos == SEQUENCESIZE)
|
|
nCurrentPos = writeSequence();
|
|
}
|
|
}
|
|
else
|
|
{
|
|
mp_Sequence[nCurrentPos] = LINEFEED;
|
|
nLastLineFeedPos = nCurrentPos;
|
|
nCurrentPos++;
|
|
if (nCurrentPos == SEQUENCESIZE)
|
|
nCurrentPos = writeSequence();
|
|
}
|
|
}
|
|
|
|
bool SaxWriterHelper::writeString(const OUString& rWriteOutString, bool bDoNormalization,
|
|
bool bNormalizeWhitespace)
|
|
{
|
|
FinishStartElement();
|
|
return convertToXML(rWriteOutString.getStr(), rWriteOutString.getLength(), bDoNormalization,
|
|
bNormalizeWhitespace, mp_Sequence, nCurrentPos);
|
|
}
|
|
|
|
void SaxWriterHelper::startDocument()
|
|
{
|
|
const char pc[] = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
|
|
const int nLen = strlen(pc);
|
|
if ((nCurrentPos + nLen) <= SEQUENCESIZE)
|
|
{
|
|
memcpy(mp_Sequence, pc, nLen);
|
|
nCurrentPos += nLen;
|
|
}
|
|
else
|
|
{
|
|
AddBytes(mp_Sequence, nCurrentPos, reinterpret_cast<sal_Int8 const*>(pc), nLen);
|
|
}
|
|
OSL_ENSURE(nCurrentPos <= SEQUENCESIZE, "not reset current position");
|
|
if (nCurrentPos == SEQUENCESIZE)
|
|
nCurrentPos = writeSequence();
|
|
mp_Sequence[nCurrentPos] = LINEFEED;
|
|
nCurrentPos++;
|
|
if (nCurrentPos == SEQUENCESIZE)
|
|
nCurrentPos = writeSequence();
|
|
}
|
|
|
|
#ifndef NDEBUG
|
|
bool inrange(sal_Unicode c, sal_Unicode start, sal_Unicode end) { return c >= start && c <= end; }
|
|
#endif
|
|
|
|
void CheckValidName(OUString const& rName)
|
|
{
|
|
#ifdef NDEBUG
|
|
(void)rName;
|
|
#else
|
|
assert(!rName.isEmpty());
|
|
bool hasColon(false);
|
|
for (sal_Int32 i = 0; i < rName.getLength(); ++i)
|
|
{
|
|
auto const c(rName[i]);
|
|
if (c == ':')
|
|
{
|
|
// see https://www.w3.org/TR/REC-xml-names/#ns-qualnames
|
|
SAL_WARN_IF(hasColon, "sax", "only one colon allowed: " << rName);
|
|
assert(!hasColon && "only one colon allowed");
|
|
hasColon = true;
|
|
}
|
|
else if (!rtl::isAsciiAlphanumeric(c) && c != '_' && c != '-' && c != '.'
|
|
&& !inrange(c, 0x00C0, 0x00D6) && !inrange(c, 0x00D8, 0x00F6)
|
|
&& !inrange(c, 0x00F8, 0x02FF) && !inrange(c, 0x0370, 0x037D)
|
|
&& !inrange(c, 0x037F, 0x1FFF) && !inrange(c, 0x200C, 0x200D)
|
|
&& !inrange(c, 0x2070, 0x218F) && !inrange(c, 0x2C00, 0x2FEF)
|
|
&& !inrange(c, 0x3001, 0xD7FF) && !inrange(c, 0xF900, 0xFDCF)
|
|
&& !inrange(c, 0xFDF0, 0xFFFD) && c != 0x00B7 && !inrange(c, 0x0300, 0x036F)
|
|
&& !inrange(c, 0x203F, 0x2040))
|
|
{
|
|
// https://www.w3.org/TR/xml11/#NT-NameChar
|
|
// (currently we don't warn about invalid start chars)
|
|
SAL_WARN("sax", "unexpected character in attribute name: " << rName);
|
|
assert(!"unexpected character in attribute name");
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
|
|
SaxInvalidCharacterError SaxWriterHelper::startElement(const OUString& rName,
|
|
const Reference<XAttributeList>& xAttribs)
|
|
{
|
|
FinishStartElement();
|
|
|
|
#ifdef DBG_UTIL
|
|
m_DebugStartedElements.push(rName);
|
|
::std::set<OUString> DebugAttributes;
|
|
#endif
|
|
|
|
mp_Sequence[nCurrentPos] = '<';
|
|
nCurrentPos++;
|
|
if (nCurrentPos == SEQUENCESIZE)
|
|
nCurrentPos = writeSequence();
|
|
|
|
SaxInvalidCharacterError eRet(SAX_NONE);
|
|
CheckValidName(rName);
|
|
if (!writeString(rName, false, false))
|
|
eRet = SAX_ERROR;
|
|
|
|
sal_Int16 nAttribCount = xAttribs.is() ? xAttribs->getLength() : 0;
|
|
for (sal_Int16 i = 0; i < nAttribCount; i++)
|
|
{
|
|
mp_Sequence[nCurrentPos] = ' ';
|
|
nCurrentPos++;
|
|
if (nCurrentPos == SEQUENCESIZE)
|
|
nCurrentPos = writeSequence();
|
|
|
|
OUString const aAttrName(xAttribs->getNameByIndex(i));
|
|
#ifdef DBG_UTIL
|
|
// Well-formedness constraint: Unique Att Spec
|
|
assert(DebugAttributes.find(aAttrName) == DebugAttributes.end());
|
|
DebugAttributes.insert(aAttrName);
|
|
#endif
|
|
CheckValidName(aAttrName);
|
|
if (!writeString(aAttrName, false, false))
|
|
eRet = SAX_ERROR;
|
|
|
|
mp_Sequence[nCurrentPos] = '=';
|
|
nCurrentPos++;
|
|
if (nCurrentPos == SEQUENCESIZE)
|
|
nCurrentPos = writeSequence();
|
|
mp_Sequence[nCurrentPos] = '"';
|
|
nCurrentPos++;
|
|
if (nCurrentPos == SEQUENCESIZE)
|
|
nCurrentPos = writeSequence();
|
|
|
|
if (!writeString(xAttribs->getValueByIndex(i), true, true) && eRet != SAX_ERROR)
|
|
eRet = SAX_WARNING;
|
|
|
|
mp_Sequence[nCurrentPos] = '"';
|
|
nCurrentPos++;
|
|
if (nCurrentPos == SEQUENCESIZE)
|
|
nCurrentPos = writeSequence();
|
|
}
|
|
|
|
m_bStartElementFinished = false; // because the '>' character is not added,
|
|
// because it is possible, that the "/>"
|
|
// characters have to add
|
|
return eRet;
|
|
}
|
|
|
|
bool SaxWriterHelper::FinishEmptyElement()
|
|
{
|
|
if (m_bStartElementFinished)
|
|
return false;
|
|
|
|
mp_Sequence[nCurrentPos] = '/';
|
|
nCurrentPos++;
|
|
if (nCurrentPos == SEQUENCESIZE)
|
|
nCurrentPos = writeSequence();
|
|
mp_Sequence[nCurrentPos] = '>';
|
|
nCurrentPos++;
|
|
if (nCurrentPos == SEQUENCESIZE)
|
|
nCurrentPos = writeSequence();
|
|
|
|
m_bStartElementFinished = true;
|
|
|
|
return true;
|
|
}
|
|
|
|
bool SaxWriterHelper::endElement(const OUString& rName)
|
|
{
|
|
FinishStartElement();
|
|
|
|
mp_Sequence[nCurrentPos] = '<';
|
|
nCurrentPos++;
|
|
if (nCurrentPos == SEQUENCESIZE)
|
|
nCurrentPos = writeSequence();
|
|
mp_Sequence[nCurrentPos] = '/';
|
|
nCurrentPos++;
|
|
if (nCurrentPos == SEQUENCESIZE)
|
|
nCurrentPos = writeSequence();
|
|
|
|
CheckValidName(rName);
|
|
bool bRet(writeString(rName, false, false));
|
|
|
|
mp_Sequence[nCurrentPos] = '>';
|
|
nCurrentPos++;
|
|
if (nCurrentPos == SEQUENCESIZE)
|
|
nCurrentPos = writeSequence();
|
|
|
|
return bRet;
|
|
}
|
|
|
|
void SaxWriterHelper::endDocument()
|
|
{
|
|
if (nCurrentPos > 0)
|
|
{
|
|
m_Sequence.realloc(nCurrentPos);
|
|
nCurrentPos = writeSequence();
|
|
//m_Sequence.realloc(SEQUENCESIZE);
|
|
}
|
|
}
|
|
|
|
void SaxWriterHelper::clearBuffer()
|
|
{
|
|
FinishStartElement();
|
|
if (nCurrentPos > 0)
|
|
{
|
|
m_Sequence.realloc(nCurrentPos);
|
|
nCurrentPos = writeSequence();
|
|
m_Sequence.realloc(SEQUENCESIZE);
|
|
// Be sure to update the array pointer after the reallocation.
|
|
mp_Sequence = m_Sequence.getArray();
|
|
}
|
|
}
|
|
|
|
bool SaxWriterHelper::processingInstruction(const OUString& rTarget, const OUString& rData)
|
|
{
|
|
FinishStartElement();
|
|
mp_Sequence[nCurrentPos] = '<';
|
|
nCurrentPos++;
|
|
if (nCurrentPos == SEQUENCESIZE)
|
|
nCurrentPos = writeSequence();
|
|
mp_Sequence[nCurrentPos] = '?';
|
|
nCurrentPos++;
|
|
if (nCurrentPos == SEQUENCESIZE)
|
|
nCurrentPos = writeSequence();
|
|
|
|
bool bRet(writeString(rTarget, false, false));
|
|
|
|
mp_Sequence[nCurrentPos] = ' ';
|
|
nCurrentPos++;
|
|
if (nCurrentPos == SEQUENCESIZE)
|
|
nCurrentPos = writeSequence();
|
|
|
|
if (!writeString(rData, false, false))
|
|
bRet = false;
|
|
|
|
mp_Sequence[nCurrentPos] = '?';
|
|
nCurrentPos++;
|
|
if (nCurrentPos == SEQUENCESIZE)
|
|
nCurrentPos = writeSequence();
|
|
mp_Sequence[nCurrentPos] = '>';
|
|
nCurrentPos++;
|
|
if (nCurrentPos == SEQUENCESIZE)
|
|
nCurrentPos = writeSequence();
|
|
|
|
return bRet;
|
|
}
|
|
|
|
void SaxWriterHelper::startCDATA()
|
|
{
|
|
FinishStartElement();
|
|
if ((nCurrentPos + 9) <= SEQUENCESIZE)
|
|
{
|
|
memcpy(&(mp_Sequence[nCurrentPos]), "<![CDATA[", 9);
|
|
nCurrentPos += 9;
|
|
}
|
|
else
|
|
AddBytes(mp_Sequence, nCurrentPos, reinterpret_cast<sal_Int8 const*>("<![CDATA["), 9);
|
|
if (nCurrentPos == SEQUENCESIZE)
|
|
nCurrentPos = writeSequence();
|
|
}
|
|
|
|
void SaxWriterHelper::endCDATA()
|
|
{
|
|
FinishStartElement();
|
|
if ((nCurrentPos + 3) <= SEQUENCESIZE)
|
|
{
|
|
memcpy(&(mp_Sequence[nCurrentPos]), "]]>", 3);
|
|
nCurrentPos += 3;
|
|
}
|
|
else
|
|
AddBytes(mp_Sequence, nCurrentPos, reinterpret_cast<sal_Int8 const*>("]]>"), 3);
|
|
if (nCurrentPos == SEQUENCESIZE)
|
|
nCurrentPos = writeSequence();
|
|
}
|
|
|
|
bool SaxWriterHelper::comment(const OUString& rComment)
|
|
{
|
|
FinishStartElement();
|
|
mp_Sequence[nCurrentPos] = '<';
|
|
nCurrentPos++;
|
|
if (nCurrentPos == SEQUENCESIZE)
|
|
nCurrentPos = writeSequence();
|
|
mp_Sequence[nCurrentPos] = '!';
|
|
nCurrentPos++;
|
|
if (nCurrentPos == SEQUENCESIZE)
|
|
nCurrentPos = writeSequence();
|
|
mp_Sequence[nCurrentPos] = '-';
|
|
nCurrentPos++;
|
|
if (nCurrentPos == SEQUENCESIZE)
|
|
nCurrentPos = writeSequence();
|
|
mp_Sequence[nCurrentPos] = '-';
|
|
nCurrentPos++;
|
|
if (nCurrentPos == SEQUENCESIZE)
|
|
nCurrentPos = writeSequence();
|
|
|
|
bool bRet(writeString(rComment, false, false));
|
|
|
|
mp_Sequence[nCurrentPos] = '-';
|
|
nCurrentPos++;
|
|
if (nCurrentPos == SEQUENCESIZE)
|
|
nCurrentPos = writeSequence();
|
|
mp_Sequence[nCurrentPos] = '-';
|
|
nCurrentPos++;
|
|
if (nCurrentPos == SEQUENCESIZE)
|
|
nCurrentPos = writeSequence();
|
|
mp_Sequence[nCurrentPos] = '>';
|
|
nCurrentPos++;
|
|
if (nCurrentPos == SEQUENCESIZE)
|
|
nCurrentPos = writeSequence();
|
|
|
|
return bRet;
|
|
}
|
|
|
|
sal_Int32 SaxWriterHelper::calcXMLByteLength(const OUString& rStr, bool bDoNormalization,
|
|
bool bNormalizeWhitespace)
|
|
{
|
|
sal_Int32 nOutputLength = 0;
|
|
sal_uInt32 nSurrogate = 0;
|
|
|
|
const sal_Unicode* pStr = rStr.getStr();
|
|
sal_Int32 nStrLen = rStr.getLength();
|
|
for (sal_Int32 i = 0; i < nStrLen; i++)
|
|
{
|
|
sal_uInt16 c = pStr[i];
|
|
if (!IsInvalidChar(c) && (c >= 0x0001) && (c <= 0x007F))
|
|
{
|
|
if (bDoNormalization)
|
|
{
|
|
switch (c)
|
|
{
|
|
case '&': // resemble to &
|
|
nOutputLength += 5;
|
|
break;
|
|
case '<': // <
|
|
case '>': // >
|
|
nOutputLength += 4;
|
|
break;
|
|
case '\'': // '
|
|
case '"': // "
|
|
case 13: // 
|
|
nOutputLength += 6;
|
|
break;
|
|
|
|
case 10: // 

|
|
case 9: // 	
|
|
if (bNormalizeWhitespace)
|
|
{
|
|
nOutputLength += 6;
|
|
}
|
|
else
|
|
{
|
|
nOutputLength++;
|
|
}
|
|
break;
|
|
default:
|
|
nOutputLength++;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
nOutputLength++;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// Deal with replacements
|
|
if (bDoNormalization && !m_Replacements.empty())
|
|
{
|
|
// search
|
|
const ReplacementPair* it = findXMLReplacement(&pStr[i], nStrLen - i);
|
|
|
|
if (it != nullptr)
|
|
{
|
|
nOutputLength
|
|
+= ::rtl::OUStringToOString(it->name, RTL_TEXTENCODING_UTF8).getLength();
|
|
i += it->replacement.getLength() - 1;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
// Deal with other unicode cases
|
|
if (rtl::isHighSurrogate(c))
|
|
{
|
|
// save surrogate
|
|
nSurrogate = c;
|
|
}
|
|
else if (rtl::isLowSurrogate(c))
|
|
{
|
|
// 2. surrogate: write as UTF-8 (if range is OK
|
|
if (nSurrogate)
|
|
nOutputLength += 4;
|
|
nSurrogate = 0;
|
|
}
|
|
else if (c > 0x07FF)
|
|
{
|
|
nOutputLength += 3;
|
|
}
|
|
else
|
|
{
|
|
nOutputLength += 2;
|
|
}
|
|
}
|
|
|
|
// surrogate processing
|
|
if ((nSurrogate != 0) && !rtl::isHighSurrogate(c))
|
|
nSurrogate = 0;
|
|
}
|
|
|
|
return nOutputLength;
|
|
}
|
|
|
|
const ReplacementPair* SaxWriterHelper::findXMLReplacement(const sal_Unicode* pStr,
|
|
sal_Int32 nStrLen)
|
|
{
|
|
for (size_t iter = 0; iter < m_Replacements.size(); ++iter)
|
|
{
|
|
if (m_Replacements[iter].replacement.getLength() > nStrLen)
|
|
continue;
|
|
sal_Int32 matches = m_Replacements[iter].replacement.compareTo(
|
|
std::u16string_view(pStr, m_Replacements[iter].replacement.getLength()));
|
|
if (matches == 0)
|
|
return &m_Replacements[iter];
|
|
if (matches > 0)
|
|
return nullptr;
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
class SAXWriter : public WeakImplHelper<XWriter, XServiceInfo>
|
|
{
|
|
public:
|
|
SAXWriter()
|
|
: m_bDocStarted(false)
|
|
, m_bIsCDATA(false)
|
|
, m_bForceLineBreak(false)
|
|
, m_bAllowLineBreak(false)
|
|
, m_nLevel(0)
|
|
{
|
|
}
|
|
|
|
public: // XActiveDataSource
|
|
virtual void SAL_CALL setOutputStream(const Reference<XOutputStream>& aStream) override
|
|
{
|
|
try
|
|
{
|
|
// temporary: set same stream again to clear buffer
|
|
if (m_out == aStream && m_pSaxWriterHelper && m_bDocStarted)
|
|
m_pSaxWriterHelper->clearBuffer();
|
|
else
|
|
{
|
|
m_out = aStream;
|
|
m_pSaxWriterHelper.reset(new SaxWriterHelper(m_out));
|
|
m_bDocStarted = false;
|
|
m_nLevel = 0;
|
|
m_bIsCDATA = false;
|
|
}
|
|
}
|
|
catch (const SAXException& e)
|
|
{
|
|
throw css::lang::WrappedTargetRuntimeException(e.Message, getXWeak(),
|
|
e.WrappedException);
|
|
}
|
|
}
|
|
virtual Reference<XOutputStream> SAL_CALL getOutputStream() override { return m_out; }
|
|
|
|
public: // XDocumentHandler
|
|
virtual void SAL_CALL startDocument() override;
|
|
|
|
virtual void SAL_CALL endDocument() override;
|
|
|
|
virtual void SAL_CALL startElement(const OUString& aName,
|
|
const Reference<XAttributeList>& xAttribs) override;
|
|
|
|
virtual void SAL_CALL endElement(const OUString& aName) override;
|
|
|
|
virtual void SAL_CALL characters(const OUString& aChars) override;
|
|
|
|
virtual void SAL_CALL ignorableWhitespace(const OUString& aWhitespaces) override;
|
|
virtual void SAL_CALL processingInstruction(const OUString& aTarget,
|
|
const OUString& aData) override;
|
|
virtual void SAL_CALL setDocumentLocator(const Reference<XLocator>& xLocator) override;
|
|
virtual void SAL_CALL setCustomEntityNames(
|
|
const ::css::uno::Sequence<::css::beans::Pair<::rtl::OUString, ::rtl::OUString>>&
|
|
replacements) override;
|
|
|
|
public: // XExtendedDocumentHandler
|
|
virtual void SAL_CALL startCDATA() override;
|
|
virtual void SAL_CALL endCDATA() override;
|
|
virtual void SAL_CALL comment(const OUString& sComment) override;
|
|
virtual void SAL_CALL unknown(const OUString& sString) override;
|
|
virtual void SAL_CALL allowLineBreak() override;
|
|
|
|
public: // XServiceInfo
|
|
OUString SAL_CALL getImplementationName() override;
|
|
Sequence<OUString> SAL_CALL getSupportedServiceNames() override;
|
|
sal_Bool SAL_CALL supportsService(const OUString& ServiceName) override;
|
|
|
|
private:
|
|
sal_Int32 getIndentPrefixLength(sal_Int32 nFirstLineBreakOccurrence) noexcept;
|
|
|
|
Reference<XOutputStream> m_out;
|
|
std::unique_ptr<SaxWriterHelper> m_pSaxWriterHelper;
|
|
|
|
// Status information
|
|
bool m_bDocStarted : 1;
|
|
bool m_bIsCDATA : 1;
|
|
bool m_bForceLineBreak : 1;
|
|
bool m_bAllowLineBreak : 1;
|
|
sal_Int32 m_nLevel;
|
|
};
|
|
|
|
sal_Int32 SAXWriter::getIndentPrefixLength(sal_Int32 nFirstLineBreakOccurrence) noexcept
|
|
{
|
|
sal_Int32 nLength = -1;
|
|
if (m_pSaxWriterHelper)
|
|
{
|
|
if (m_bForceLineBreak
|
|
|| (m_bAllowLineBreak
|
|
&& ((nFirstLineBreakOccurrence + m_pSaxWriterHelper->GetLastColumnCount())
|
|
> MAXCOLUMNCOUNT)))
|
|
nLength = m_nLevel;
|
|
}
|
|
m_bForceLineBreak = false;
|
|
m_bAllowLineBreak = false;
|
|
return nLength;
|
|
}
|
|
|
|
bool isFirstCharWhitespace(const sal_Unicode* p) noexcept { return *p == ' '; }
|
|
|
|
// XServiceInfo
|
|
OUString SAXWriter::getImplementationName()
|
|
{
|
|
return u"com.sun.star.extensions.xml.sax.Writer"_ustr;
|
|
}
|
|
|
|
// XServiceInfo
|
|
sal_Bool SAXWriter::supportsService(const OUString& ServiceName)
|
|
{
|
|
return cppu::supportsService(this, ServiceName);
|
|
}
|
|
|
|
// XServiceInfo
|
|
Sequence<OUString> SAXWriter::getSupportedServiceNames()
|
|
{
|
|
return { u"com.sun.star.xml.sax.Writer"_ustr };
|
|
}
|
|
|
|
void SAXWriter::startDocument()
|
|
{
|
|
if (m_bDocStarted || !m_out.is() || !m_pSaxWriterHelper)
|
|
{
|
|
throw SAXException();
|
|
}
|
|
m_bDocStarted = true;
|
|
m_pSaxWriterHelper->startDocument();
|
|
}
|
|
|
|
void SAXWriter::endDocument()
|
|
{
|
|
if (!m_bDocStarted)
|
|
{
|
|
throw SAXException(u"endDocument called before startDocument"_ustr, Reference<XInterface>(),
|
|
Any());
|
|
}
|
|
if (m_nLevel)
|
|
{
|
|
throw SAXException(u"unexpected end of document"_ustr, Reference<XInterface>(), Any());
|
|
}
|
|
m_pSaxWriterHelper->endDocument();
|
|
try
|
|
{
|
|
m_out->closeOutput();
|
|
}
|
|
catch (const IOException&)
|
|
{
|
|
css::uno::Any anyEx = cppu::getCaughtException();
|
|
throw SAXException(u"IO exception during closing the IO Stream"_ustr,
|
|
Reference<XInterface>(), anyEx);
|
|
}
|
|
}
|
|
|
|
void SAXWriter::startElement(const OUString& aName, const Reference<XAttributeList>& xAttribs)
|
|
{
|
|
if (!m_bDocStarted)
|
|
{
|
|
throw SAXException(u"startElement called before startDocument"_ustr, {}, {});
|
|
}
|
|
if (m_bIsCDATA)
|
|
{
|
|
throw SAXException(u"startElement call not allowed with CDATA sections"_ustr, {}, {});
|
|
}
|
|
|
|
sal_Int32 nLength(0);
|
|
if (m_bAllowLineBreak)
|
|
{
|
|
sal_Int32 nAttribCount = xAttribs.is() ? xAttribs->getLength() : 0;
|
|
|
|
nLength++; // "<"
|
|
nLength += m_pSaxWriterHelper->calcXMLByteLength(aName, false, false); // the tag name
|
|
|
|
sal_Int16 n;
|
|
for (n = 0; n < static_cast<sal_Int16>(nAttribCount); n++)
|
|
{
|
|
nLength++; // " "
|
|
OUString tmp = xAttribs->getNameByIndex(n);
|
|
|
|
nLength += m_pSaxWriterHelper->calcXMLByteLength(tmp, false, false);
|
|
|
|
nLength += 2; // ="
|
|
|
|
tmp = xAttribs->getValueByIndex(n);
|
|
|
|
nLength += m_pSaxWriterHelper->calcXMLByteLength(tmp, true, true);
|
|
|
|
nLength += 1; // "
|
|
}
|
|
|
|
nLength++; // '>'
|
|
}
|
|
|
|
// Is there a new indentation necessary ?
|
|
sal_Int32 nPrefix(getIndentPrefixLength(nLength));
|
|
|
|
// write into sequence
|
|
if (nPrefix >= 0)
|
|
m_pSaxWriterHelper->insertIndentation(nPrefix);
|
|
|
|
SaxInvalidCharacterError eRet(m_pSaxWriterHelper->startElement(aName, xAttribs));
|
|
|
|
m_nLevel++;
|
|
|
|
if (eRet == SAX_WARNING)
|
|
{
|
|
throw SAXInvalidCharacterException(
|
|
u"Invalid character during XML-Export in an attribute value"_ustr, {}, {});
|
|
}
|
|
else if (eRet == SAX_ERROR)
|
|
{
|
|
throw SAXException(u"Invalid character during XML-Export"_ustr, {}, {});
|
|
}
|
|
}
|
|
|
|
void SAXWriter::endElement(const OUString& aName)
|
|
{
|
|
if (!m_bDocStarted)
|
|
{
|
|
throw SAXException();
|
|
}
|
|
m_nLevel--;
|
|
|
|
if (m_nLevel < 0)
|
|
{
|
|
throw SAXException();
|
|
}
|
|
bool bRet(true);
|
|
|
|
// check here because Helper's endElement is not always called
|
|
#ifdef DBG_UTIL
|
|
assert(!m_pSaxWriterHelper->m_DebugStartedElements.empty());
|
|
// Well-formedness constraint: Element Type Match
|
|
assert(aName == m_pSaxWriterHelper->m_DebugStartedElements.top());
|
|
m_pSaxWriterHelper->m_DebugStartedElements.pop();
|
|
#endif
|
|
|
|
if (m_pSaxWriterHelper->FinishEmptyElement())
|
|
m_bForceLineBreak = false;
|
|
else
|
|
{
|
|
// only ascii chars allowed
|
|
sal_Int32 nLength(0);
|
|
if (m_bAllowLineBreak)
|
|
nLength = 3 + m_pSaxWriterHelper->calcXMLByteLength(aName, false, false);
|
|
sal_Int32 nPrefix = getIndentPrefixLength(nLength);
|
|
|
|
if (nPrefix >= 0)
|
|
m_pSaxWriterHelper->insertIndentation(nPrefix);
|
|
|
|
bRet = m_pSaxWriterHelper->endElement(aName);
|
|
}
|
|
|
|
if (!bRet)
|
|
{
|
|
throw SAXException(u"Invalid character during XML-Export"_ustr, {}, {});
|
|
}
|
|
}
|
|
|
|
void SAXWriter::characters(const OUString& aChars)
|
|
{
|
|
if (!m_bDocStarted)
|
|
{
|
|
throw SAXException(u"characters method called before startDocument"_ustr, {}, {});
|
|
}
|
|
|
|
bool bThrowException(false);
|
|
if (!aChars.isEmpty())
|
|
{
|
|
if (m_bIsCDATA)
|
|
bThrowException = !m_pSaxWriterHelper->writeString(aChars, false, false);
|
|
else
|
|
{
|
|
// Note : nFirstLineBreakOccurrence is not exact, because we don't know, how
|
|
// many 2 and 3 byte chars are inbetween. However this whole stuff
|
|
// is eitherway for pretty printing only, so it does not need to be exact.
|
|
sal_Int32 nLength(0);
|
|
sal_Int32 nIndentPrefix(-1);
|
|
if (m_bAllowLineBreak)
|
|
{
|
|
// returns position of first ascii 10 within the string, -1 when no 10 in string.
|
|
sal_Int32 nFirstLineBreakOccurrence = aChars.indexOf(LINEFEED);
|
|
|
|
nLength = m_pSaxWriterHelper->calcXMLByteLength(aChars, !m_bIsCDATA, false);
|
|
nIndentPrefix = getIndentPrefixLength(
|
|
nFirstLineBreakOccurrence >= 0 ? nFirstLineBreakOccurrence : nLength);
|
|
}
|
|
else
|
|
nIndentPrefix = getIndentPrefixLength(nLength);
|
|
|
|
// insert indentation
|
|
if (nIndentPrefix >= 0)
|
|
{
|
|
if (isFirstCharWhitespace(aChars.getStr()))
|
|
m_pSaxWriterHelper->insertIndentation(nIndentPrefix - 1);
|
|
else
|
|
m_pSaxWriterHelper->insertIndentation(nIndentPrefix);
|
|
}
|
|
bThrowException = !m_pSaxWriterHelper->writeString(aChars, true, false);
|
|
}
|
|
}
|
|
if (bThrowException)
|
|
{
|
|
throw SAXInvalidCharacterException(u"Invalid character during XML-Export"_ustr, {}, {});
|
|
}
|
|
}
|
|
|
|
void SAXWriter::ignorableWhitespace(const OUString&)
|
|
{
|
|
if (!m_bDocStarted)
|
|
{
|
|
throw SAXException();
|
|
}
|
|
|
|
m_bForceLineBreak = true;
|
|
}
|
|
|
|
void SAXWriter::processingInstruction(const OUString& aTarget, const OUString& aData)
|
|
{
|
|
if (!m_bDocStarted || m_bIsCDATA)
|
|
{
|
|
throw SAXException();
|
|
}
|
|
|
|
sal_Int32 nLength(0);
|
|
if (m_bAllowLineBreak)
|
|
{
|
|
nLength = 2; // "<?"
|
|
nLength += m_pSaxWriterHelper->calcXMLByteLength(aTarget, false, false);
|
|
|
|
nLength += 1; // " "
|
|
|
|
nLength += m_pSaxWriterHelper->calcXMLByteLength(aData, false, false);
|
|
|
|
nLength += 2; // "?>"
|
|
}
|
|
|
|
sal_Int32 nPrefix = getIndentPrefixLength(nLength);
|
|
|
|
if (nPrefix >= 0)
|
|
m_pSaxWriterHelper->insertIndentation(nPrefix);
|
|
|
|
if (!m_pSaxWriterHelper->processingInstruction(aTarget, aData))
|
|
{
|
|
throw SAXException(u"Invalid character during XML-Export"_ustr, {}, {});
|
|
}
|
|
}
|
|
|
|
void SAXWriter::setDocumentLocator(const Reference<XLocator>&) {}
|
|
|
|
void SAXWriter::setCustomEntityNames(
|
|
const ::css::uno::Sequence<::css::beans::Pair<::rtl::OUString, ::rtl::OUString>>& replacements)
|
|
{
|
|
m_pSaxWriterHelper->setCustomEntityNames(replacements);
|
|
}
|
|
|
|
void SAXWriter::startCDATA()
|
|
{
|
|
if (!m_bDocStarted || m_bIsCDATA)
|
|
{
|
|
throw SAXException();
|
|
}
|
|
|
|
sal_Int32 nPrefix = getIndentPrefixLength(9);
|
|
if (nPrefix >= 0)
|
|
m_pSaxWriterHelper->insertIndentation(nPrefix);
|
|
|
|
m_pSaxWriterHelper->startCDATA();
|
|
|
|
m_bIsCDATA = true;
|
|
}
|
|
|
|
void SAXWriter::endCDATA()
|
|
{
|
|
if (!m_bDocStarted || !m_bIsCDATA)
|
|
{
|
|
throw SAXException(u"endCDATA was called without startCDATA"_ustr, {}, {});
|
|
}
|
|
|
|
sal_Int32 nPrefix = getIndentPrefixLength(3);
|
|
if (nPrefix >= 0)
|
|
m_pSaxWriterHelper->insertIndentation(nPrefix);
|
|
|
|
m_pSaxWriterHelper->endCDATA();
|
|
|
|
m_bIsCDATA = false;
|
|
}
|
|
|
|
void SAXWriter::comment(const OUString& sComment)
|
|
{
|
|
if (!m_bDocStarted || m_bIsCDATA)
|
|
{
|
|
throw SAXException();
|
|
}
|
|
|
|
sal_Int32 nLength(0);
|
|
if (m_bAllowLineBreak)
|
|
{
|
|
nLength = 4; // "<!--"
|
|
nLength += m_pSaxWriterHelper->calcXMLByteLength(sComment, false, false);
|
|
|
|
nLength += 3;
|
|
}
|
|
|
|
sal_Int32 nPrefix = getIndentPrefixLength(nLength);
|
|
if (nPrefix >= 0)
|
|
m_pSaxWriterHelper->insertIndentation(nPrefix);
|
|
|
|
if (!m_pSaxWriterHelper->comment(sComment))
|
|
{
|
|
throw SAXException(u"Invalid character during XML-Export"_ustr, {}, {});
|
|
}
|
|
}
|
|
|
|
void SAXWriter::allowLineBreak()
|
|
{
|
|
if (!m_bDocStarted || m_bAllowLineBreak)
|
|
{
|
|
throw SAXException();
|
|
}
|
|
|
|
m_bAllowLineBreak = true;
|
|
}
|
|
|
|
void SAXWriter::unknown(const OUString& sString)
|
|
{
|
|
if (!m_bDocStarted)
|
|
{
|
|
throw SAXException();
|
|
}
|
|
if (m_bIsCDATA)
|
|
{
|
|
throw SAXException();
|
|
}
|
|
|
|
if (sString.startsWith("<?xml"))
|
|
return;
|
|
|
|
sal_Int32 nLength(0);
|
|
if (m_bAllowLineBreak)
|
|
nLength = m_pSaxWriterHelper->calcXMLByteLength(sString, false, false);
|
|
|
|
sal_Int32 nPrefix = getIndentPrefixLength(nLength);
|
|
if (nPrefix >= 0)
|
|
m_pSaxWriterHelper->insertIndentation(nPrefix);
|
|
|
|
if (!m_pSaxWriterHelper->writeString(sString, false, false))
|
|
{
|
|
throw SAXException(u"Invalid character during XML-Export"_ustr, {}, {});
|
|
}
|
|
}
|
|
|
|
} // namespace
|
|
|
|
extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface*
|
|
com_sun_star_extensions_xml_sax_Writer_get_implementation(css::uno::XComponentContext*,
|
|
css::uno::Sequence<css::uno::Any> const&)
|
|
{
|
|
return cppu::acquire(new SAXWriter);
|
|
}
|
|
|
|
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|