summaryrefslogtreecommitdiffstats
path: root/l10ntools/source/xmlparse.cxx
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--l10ntools/source/xmlparse.cxx1108
1 files changed, 1108 insertions, 0 deletions
diff --git a/l10ntools/source/xmlparse.cxx b/l10ntools/source/xmlparse.cxx
new file mode 100644
index 0000000000..397e071a30
--- /dev/null
+++ b/l10ntools/source/xmlparse.cxx
@@ -0,0 +1,1108 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+#include <sal/config.h>
+
+#include <cassert>
+#include <stdio.h>
+#include <string_view>
+
+#include <helper.hxx>
+#include <utility>
+#include <xmlparse.hxx>
+#include <fstream>
+#include <iostream>
+#include <osl/file.hxx>
+#include <osl/process.h>
+#include <o3tl/string_view.hxx>
+#include <rtl/ustring.hxx>
+#include <rtl/strbuf.hxx>
+#include <unicode/regex.h>
+
+using namespace osl;
+
+constexpr OString XML_LANG = "xml-lang"_ostr;
+
+
+
+
+XMLChildNode::XMLChildNode( XMLParentNode *pPar )
+ : m_pParent( pPar )
+{
+ if ( m_pParent )
+ m_pParent->AddChild( this );
+}
+
+
+XMLChildNode::XMLChildNode( const XMLChildNode& rObj)
+ : XMLNode(rObj),
+ m_pParent(rObj.m_pParent)
+{
+}
+
+XMLChildNode& XMLChildNode::operator=(const XMLChildNode& rObj)
+{
+ if(this != &rObj)
+ {
+ m_pParent=rObj.m_pParent;
+ }
+ return *this;
+}
+
+
+
+
+XMLParentNode::~XMLParentNode()
+{
+ if( m_pChildList )
+ {
+ RemoveAndDeleteAllChildren();
+ }
+}
+
+XMLParentNode::XMLParentNode( const XMLParentNode& rObj)
+: XMLChildNode( rObj )
+{
+ if( !rObj.m_pChildList )
+ return;
+
+ m_pChildList.reset( new XMLChildNodeList );
+ for ( size_t i = 0; i < rObj.m_pChildList->size(); i++ )
+ {
+ XMLChildNode* pNode = (*rObj.m_pChildList)[ i ];
+ if( pNode != nullptr)
+ {
+ switch(pNode->GetNodeType())
+ {
+ case XMLNodeType::ELEMENT:
+ AddChild( new XMLElement( *static_cast<XMLElement* >(pNode) ) ); break;
+ case XMLNodeType::DATA:
+ AddChild( new XMLData ( *static_cast<XMLData* > (pNode) ) ); break;
+ case XMLNodeType::COMMENT:
+ AddChild( new XMLComment( *static_cast<XMLComment* >(pNode) ) ); break;
+ case XMLNodeType::DEFAULT:
+ AddChild( new XMLDefault( *static_cast<XMLDefault* >(pNode) ) ); break;
+ default: fprintf(stdout,"XMLParentNode::XMLParentNode( const XMLParentNode& rObj) strange obj");
+ }
+ }
+ }
+}
+
+XMLParentNode& XMLParentNode::operator=(const XMLParentNode& rObj)
+{
+ if(this!=&rObj)
+ {
+ XMLChildNode::operator=(rObj);
+ if( m_pChildList )
+ {
+ RemoveAndDeleteAllChildren();
+ }
+ if( rObj.m_pChildList )
+ {
+ m_pChildList.reset( new XMLChildNodeList );
+ for ( size_t i = 0; i < rObj.m_pChildList->size(); i++ )
+ AddChild( (*rObj.m_pChildList)[ i ] );
+ }
+ else
+ m_pChildList.reset();
+
+ }
+ return *this;
+}
+void XMLParentNode::AddChild( XMLChildNode *pChild )
+{
+ if ( !m_pChildList )
+ m_pChildList.reset( new XMLChildNodeList );
+ m_pChildList->push_back( pChild );
+}
+
+void XMLParentNode::RemoveAndDeleteAllChildren()
+{
+ if ( m_pChildList )
+ {
+ for ( size_t i = 0; i < m_pChildList->size(); i++ )
+ delete (*m_pChildList)[ i ];
+ m_pChildList->clear();
+ }
+}
+
+
+
+
+void XMLFile::Write( OString const &aFilename )
+{
+ std::ofstream s(
+ aFilename.getStr(), std::ios_base::out | std::ios_base::trunc);
+ if (!s.is_open())
+ {
+ std::cerr
+ << "Error: helpex cannot create file " << aFilename
+ << '\n';
+ std::exit(EXIT_FAILURE);
+ }
+ Write(s);
+ s.close();
+}
+
+void XMLFile::Write( std::ofstream &rStream , XMLNode *pCur )
+{
+ if ( !pCur )
+ Write( rStream, this );
+ else {
+ switch( pCur->GetNodeType())
+ {
+ case XMLNodeType::XFILE:
+ {
+ if( GetChildList())
+ for ( size_t i = 0; i < GetChildList()->size(); i++ )
+ Write( rStream, (*GetChildList())[ i ] );
+ }
+ break;
+ case XMLNodeType::ELEMENT:
+ {
+ XMLElement *pElement = static_cast<XMLElement*>(pCur);
+ rStream << "<";
+ rStream << pElement->GetName();
+ if ( pElement->GetAttributeList())
+ for ( size_t j = 0; j < pElement->GetAttributeList()->size(); j++ )
+ {
+ rStream << " ";
+ OString sData( (*pElement->GetAttributeList())[ j ]->GetName() );
+ rStream << XMLUtil::QuotHTML( sData );
+ rStream << "=\"";
+ sData = (*pElement->GetAttributeList())[ j ]->GetValue();
+ rStream << XMLUtil::QuotHTML( sData );
+ rStream << "\"";
+ }
+ if ( !pElement->GetChildList())
+ rStream << "/>";
+ else
+ {
+ rStream << ">";
+ for ( size_t k = 0; k < pElement->GetChildList()->size(); k++ )
+ Write( rStream, (*pElement->GetChildList())[ k ] );
+ rStream << "</";
+ rStream << pElement->GetName();
+ rStream << ">";
+ }
+ }
+ break;
+ case XMLNodeType::DATA:
+ {
+ OString sData( static_cast<const XMLData*>(pCur)->GetData());
+ rStream << XMLUtil::QuotHTML( sData );
+ }
+ break;
+ case XMLNodeType::COMMENT:
+ {
+ const XMLComment *pComment = static_cast<const XMLComment*>(pCur);
+ rStream << "<!--";
+ rStream << pComment->GetComment();
+ rStream << "-->";
+ }
+ break;
+ case XMLNodeType::DEFAULT:
+ {
+ const XMLDefault *pDefault = static_cast<const XMLDefault*>(pCur);
+ rStream << pDefault->GetDefault();
+ }
+ break;
+ }
+ }
+}
+
+void XMLFile::Print( XMLNode *pCur, sal_uInt16 nLevel )
+{
+ if ( !pCur )
+ Print( this );
+ else
+ {
+ switch( pCur->GetNodeType())
+ {
+ case XMLNodeType::XFILE:
+ {
+ if( GetChildList())
+ for ( size_t i = 0; i < GetChildList()->size(); i++ )
+ Print( (*GetChildList())[ i ] );
+ }
+ break;
+ case XMLNodeType::ELEMENT:
+ {
+ XMLElement *pElement = static_cast<XMLElement*>(pCur);
+
+ fprintf( stdout, "<%s", pElement->GetName().getStr());
+ if ( pElement->GetAttributeList())
+ {
+ for (size_t j = 0; j < pElement->GetAttributeList()->size(); ++j)
+ {
+ const OString aAttrName((*pElement->GetAttributeList())[j]->GetName());
+ if (aAttrName != XML_LANG)
+ {
+ fprintf( stdout, " %s=\"%s\"",
+ aAttrName.getStr(),
+ (*pElement->GetAttributeList())[ j ]->GetValue().getStr());
+ }
+ }
+ }
+ if ( !pElement->GetChildList())
+ fprintf( stdout, "/>" );
+ else
+ {
+ fprintf( stdout, ">" );
+ for ( size_t k = 0; k < pElement->GetChildList()->size(); k++ )
+ Print( (*pElement->GetChildList())[ k ], nLevel + 1 );
+ fprintf( stdout, "</%s>", pElement->GetName().getStr());
+ }
+ }
+ break;
+ case XMLNodeType::DATA:
+ {
+ const XMLData *pData = static_cast<const XMLData*>(pCur);
+ fprintf( stdout, "%s", pData->GetData().getStr());
+ }
+ break;
+ case XMLNodeType::COMMENT:
+ {
+ const XMLComment *pComment = static_cast<const XMLComment*>(pCur);
+ fprintf( stdout, "<!--%s-->", pComment->GetComment().getStr());
+ }
+ break;
+ case XMLNodeType::DEFAULT:
+ {
+ const XMLDefault *pDefault = static_cast<const XMLDefault*>(pCur);
+ fprintf( stdout, "%s", pDefault->GetDefault().getStr());
+ }
+ break;
+ }
+ }
+}
+XMLFile::~XMLFile()
+{
+ if( m_pXMLStrings )
+ {
+ for (auto const& pos : *m_pXMLStrings)
+ {
+ delete pos.second; // Check and delete content also ?
+ }
+ }
+}
+
+XMLFile::XMLFile( OString _sFileName ) // the file name, empty if created from memory stream
+ : XMLParentNode( nullptr )
+ , m_sFileName(std::move( _sFileName ))
+{
+ m_aNodes_localize.emplace( "bookmark"_ostr , true );
+ m_aNodes_localize.emplace( "variable"_ostr , true );
+ m_aNodes_localize.emplace( "paragraph"_ostr , true );
+ m_aNodes_localize.emplace( "h1"_ostr , true );
+ m_aNodes_localize.emplace( "h2"_ostr , true );
+ m_aNodes_localize.emplace( "h3"_ostr , true );
+ m_aNodes_localize.emplace( "h4"_ostr , true );
+ m_aNodes_localize.emplace( "h5"_ostr , true );
+ m_aNodes_localize.emplace( "h6"_ostr , true );
+ m_aNodes_localize.emplace( "note"_ostr , true );
+ m_aNodes_localize.emplace( "tip"_ostr , true );
+ m_aNodes_localize.emplace( "warning"_ostr , true );
+ m_aNodes_localize.emplace( "alt"_ostr , true );
+ m_aNodes_localize.emplace( "caption"_ostr , true );
+ m_aNodes_localize.emplace( "title"_ostr , true );
+ m_aNodes_localize.emplace( "link"_ostr , true );
+}
+
+void XMLFile::Extract()
+{
+ m_pXMLStrings.reset( new XMLHashMap );
+ SearchL10NElements( this );
+}
+
+void XMLFile::InsertL10NElement( XMLElement* pElement )
+{
+ OString sId, sLanguage("en-US"_ostr);
+ LangHashMap* pElem;
+
+ if( pElement->GetAttributeList() != nullptr )
+ {
+ for ( size_t j = 0; j < pElement->GetAttributeList()->size(); j++ )
+ {
+ const OString sTempStr((*pElement->GetAttributeList())[ j ]->GetName());
+ // Get the "id" Attribute
+ if (sTempStr == "id")
+ {
+ sId = (*pElement->GetAttributeList())[ j ]->GetValue();
+ }
+ // Get the "xml-lang" Attribute
+ if (sTempStr == XML_LANG)
+ {
+ sLanguage = (*pElement->GetAttributeList())[j]->GetValue();
+ }
+
+ }
+ }
+ else
+ {
+ fprintf(stdout,"XMLFile::InsertL10NElement: No AttributeList found");
+ fprintf(stdout,"++++++++++++++++++++++++++++++++++++++++++++++++++");
+ Print( pElement );
+ fprintf(stdout,"++++++++++++++++++++++++++++++++++++++++++++++++++");
+ }
+
+ XMLHashMap::iterator pos = m_pXMLStrings->find( sId );
+ if( pos == m_pXMLStrings->end() ) // No instance, create new one
+ {
+ pElem = new LangHashMap;
+ (*pElem)[ sLanguage ]=pElement;
+ m_pXMLStrings->emplace( sId , pElem );
+ m_vOrder.push_back( sId );
+ }
+ else // Already there
+ {
+ pElem=pos->second;
+ if ( pElem->count(sLanguage) )
+ {
+ fprintf(stdout,"Error: Duplicated entry. ID = %s LANG = %s in File %s\n", sId.getStr(), sLanguage.getStr(), m_sFileName.getStr() );
+ exit( -1 );
+ }
+ (*pElem)[ sLanguage ]=pElement;
+ }
+}
+
+XMLFile::XMLFile( const XMLFile& rObj )
+ : XMLParentNode( rObj )
+ , m_sFileName( rObj.m_sFileName )
+{
+ if( this != &rObj )
+ {
+ m_aNodes_localize = rObj.m_aNodes_localize;
+ m_vOrder = rObj.m_vOrder;
+ }
+}
+
+XMLFile& XMLFile::operator=(const XMLFile& rObj)
+{
+ if( this == &rObj )
+ return *this;
+
+ XMLParentNode::operator=(rObj);
+
+ m_aNodes_localize = rObj.m_aNodes_localize;
+ m_vOrder = rObj.m_vOrder;
+
+ m_pXMLStrings.reset();
+
+ if( rObj.m_pXMLStrings )
+ {
+ m_pXMLStrings.reset( new XMLHashMap );
+ for (auto const& pos : *rObj.m_pXMLStrings)
+ {
+ LangHashMap* pElem=pos.second;
+ LangHashMap* pNewelem = new LangHashMap;
+ for (auto const& pos2 : *pElem)
+ {
+ (*pNewelem)[ pos2.first ] = new XMLElement( *pos2.second );
+ }
+ (*m_pXMLStrings)[ pos.first ] = pNewelem;
+ }
+ }
+ return *this;
+}
+
+void XMLFile::SearchL10NElements( XMLChildNode *pCur )
+{
+ if ( !pCur )
+ SearchL10NElements( this );
+ else
+ {
+ switch( pCur->GetNodeType())
+ {
+ case XMLNodeType::XFILE:
+ {
+ if( GetChildList())
+ {
+ for ( size_t i = 0; i < GetChildList()->size(); i++ )
+ {
+ XMLChildNode* pElement = (*GetChildList())[ i ];
+ if( pElement->GetNodeType() == XMLNodeType::ELEMENT )
+ SearchL10NElements( pElement );
+ }
+ }
+ }
+ break;
+ case XMLNodeType::ELEMENT:
+ {
+ bool bInsert = true;
+ XMLElement *pElement = static_cast<XMLElement*>(pCur);
+ const OString sName(pElement->GetName().toAsciiLowerCase());
+ if ( pElement->GetAttributeList())
+ {
+ for ( size_t j = 0 , cnt = pElement->GetAttributeList()->size(); j < cnt && bInsert; ++j )
+ {
+ if ((*pElement->GetAttributeList())[j]->GetName() == "localize")
+ {
+ bInsert=false;
+ break;
+ }
+ }
+ }
+
+ if ( bInsert && ( m_aNodes_localize.find( sName ) != m_aNodes_localize.end() ) )
+ InsertL10NElement(pElement);
+ else if ( bInsert && pElement->GetChildList() )
+ {
+ for ( size_t k = 0; k < pElement->GetChildList()->size(); k++ )
+ SearchL10NElements( (*pElement->GetChildList())[ k ] );
+ }
+ }
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+bool XMLFile::CheckExportStatus( XMLChildNode *pCur )
+{
+ static bool bStatusExport = true;
+
+ if ( !pCur )
+ CheckExportStatus( this );
+ else {
+ switch( pCur->GetNodeType())
+ {
+ case XMLNodeType::XFILE:
+ {
+ if( GetChildList())
+ {
+ for ( size_t i = 0; i < GetChildList()->size(); i++ )
+ {
+ XMLChildNode* pElement = (*GetChildList())[ i ];
+ if( pElement->GetNodeType() == XMLNodeType::ELEMENT ) CheckExportStatus( pElement );//, i);
+ }
+ }
+ }
+ break;
+ case XMLNodeType::ELEMENT:
+ {
+ XMLElement *pElement = static_cast<XMLElement*>(pCur);
+ if (pElement->GetName().equalsIgnoreAsciiCase("TOPIC"))
+ {
+ if ( pElement->GetAttributeList())
+ {
+ for (size_t j = 0 , cnt = pElement->GetAttributeList()->size(); j < cnt; ++j)
+ {
+ const OString tmpStr((*pElement->GetAttributeList())[j]->GetName());
+ if (tmpStr.equalsIgnoreAsciiCase("STATUS"))
+ {
+ const OString tmpStrVal((*pElement->GetAttributeList())[j]->GetValue());
+ if (!tmpStrVal.equalsIgnoreAsciiCase("PUBLISH") &&
+ !tmpStrVal.equalsIgnoreAsciiCase("DEPRECATED"))
+ {
+ bStatusExport = false;
+ }
+ }
+
+ }
+ }
+ }
+ else if ( pElement->GetChildList() )
+ {
+ for (size_t k = 0; k < pElement->GetChildList()->size(); ++k)
+ CheckExportStatus( (*pElement->GetChildList())[k] );
+ }
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ return bStatusExport;
+}
+
+XMLElement::XMLElement(
+ OString _sName, // the element name
+ XMLParentNode *pParent // parent node of this element
+)
+ : XMLParentNode( pParent )
+ , m_sElementName(std::move( _sName ))
+{
+}
+
+XMLElement::XMLElement(const XMLElement& rObj)
+ : XMLParentNode( rObj )
+ , m_sElementName( rObj.m_sElementName )
+{
+ if ( rObj.m_pAttributes )
+ {
+ m_pAttributes.reset( new XMLAttributeList );
+ for ( size_t i = 0; i < rObj.m_pAttributes->size(); i++ )
+ AddAttribute( (*rObj.m_pAttributes)[ i ]->GetName(), (*rObj.m_pAttributes)[ i ]->GetValue() );
+ }
+}
+
+XMLElement& XMLElement::operator=(const XMLElement& rObj)
+{
+ if( this !=& rObj )
+ {
+ XMLParentNode::operator=(rObj);
+ m_sElementName = rObj.m_sElementName;
+
+ if ( m_pAttributes )
+ {
+ for ( size_t i = 0; i < m_pAttributes->size(); i++ )
+ delete (*m_pAttributes)[ i ];
+ m_pAttributes.reset();
+ }
+ if ( rObj.m_pAttributes )
+ {
+ m_pAttributes.reset( new XMLAttributeList );
+ for ( size_t i = 0; i < rObj.m_pAttributes->size(); i++ )
+ AddAttribute( (*rObj.m_pAttributes)[ i ]->GetName(), (*rObj.m_pAttributes)[ i ]->GetValue() );
+ }
+ }
+ return *this;
+}
+
+void XMLElement::AddAttribute( const OString &rAttribute, const OString &rValue )
+{
+ if ( !m_pAttributes )
+ m_pAttributes.reset( new XMLAttributeList );
+ m_pAttributes->push_back( new XMLAttribute( rAttribute, rValue ) );
+}
+
+void XMLElement::ChangeLanguageTag( const OString &rValue )
+{
+ if ( m_pAttributes )
+ {
+ bool bWasSet = false;
+ for (size_t i = 0; i < m_pAttributes->size(); ++i)
+ {
+ if ((*m_pAttributes)[ i ]->GetName() == XML_LANG)
+ {
+ (*m_pAttributes)[ i ]->setValue(rValue);
+ bWasSet = true;
+ }
+ }
+
+ if (!bWasSet)
+ AddAttribute(XML_LANG, rValue);
+ }
+ XMLChildNodeList* pCList = GetChildList();
+
+ if( !pCList )
+ return;
+
+ for ( size_t i = 0; i < pCList->size(); i++ )
+ {
+ XMLChildNode* pNode = (*pCList)[ i ];
+ if( pNode && pNode->GetNodeType() == XMLNodeType::ELEMENT )
+ {
+ XMLElement* pElem = static_cast< XMLElement* >(pNode);
+ pElem->ChangeLanguageTag( rValue );
+ pElem = nullptr;
+ pNode = nullptr;
+ }
+ }
+ pCList = nullptr;
+}
+
+XMLElement::~XMLElement()
+{
+ if ( m_pAttributes )
+ {
+ for ( size_t i = 0; i < m_pAttributes->size(); i++ )
+ delete (*m_pAttributes)[ i ];
+ }
+}
+
+OString XMLElement::ToOString()
+{
+ OStringBuffer sBuffer;
+ Print(this,sBuffer,true);
+ return sBuffer.makeStringAndClear();
+}
+
+void XMLElement::Print(XMLNode *pCur, OStringBuffer& rBuffer, bool bRootelement ) const
+{
+ if( pCur )
+ {
+ if( bRootelement )
+ {
+ XMLElement *pElement = static_cast<XMLElement*>(pCur);
+ if ( pElement->GetAttributeList())
+ {
+ if ( pElement->GetChildList())
+ {
+ for ( size_t k = 0; k < pElement->GetChildList()->size(); k++ )
+ {
+ XMLChildNode* pTmp = (*pElement->GetChildList())[ k ];
+ Print( pTmp, rBuffer , false);
+ }
+ }
+ }
+ }
+ else
+ {
+ switch( pCur->GetNodeType())
+ {
+ case XMLNodeType::ELEMENT:
+ {
+ XMLElement *pElement = static_cast<XMLElement*>(pCur);
+
+ if( !pElement->GetName().equalsIgnoreAsciiCase("comment") )
+ {
+ rBuffer.append( "<" );
+ rBuffer.append( pElement->GetName() );
+ if ( pElement->GetAttributeList())
+ {
+ for ( size_t j = 0; j < pElement->GetAttributeList()->size(); j++ )
+ {
+ const OString aAttrName( (*pElement->GetAttributeList())[ j ]->GetName() );
+ if (aAttrName != XML_LANG)
+ {
+ rBuffer.append(
+ " " + aAttrName + "=\"" +
+ (*pElement->GetAttributeList())[ j ]->GetValue() + "\"" );
+ }
+ }
+ }
+ if ( !pElement->GetChildList())
+ rBuffer.append( "/>" );
+ else
+ {
+ rBuffer.append( ">" );
+ for ( size_t k = 0; k < pElement->GetChildList()->size(); k++ )
+ {
+ XMLChildNode* pTmp = (*pElement->GetChildList())[ k ];
+ Print( pTmp, rBuffer , false);
+ }
+ rBuffer.append( "</" + pElement->GetName() + ">" );
+ }
+ }
+ }
+ break;
+ case XMLNodeType::DATA:
+ {
+ const XMLData *pData = static_cast<const XMLData*>(pCur);
+ rBuffer.append( pData->GetData() );
+ }
+ break;
+ case XMLNodeType::COMMENT:
+ {
+ const XMLComment *pComment = static_cast<const XMLComment*>(pCur);
+ rBuffer.append( "<!--" + pComment->GetComment() + "-->" );
+ }
+ break;
+ case XMLNodeType::DEFAULT:
+ {
+ const XMLDefault *pDefault = static_cast<const XMLDefault*>(pCur);
+ rBuffer.append( pDefault->GetDefault() );
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ }
+ else
+ {
+ fprintf(stdout,"\n#+------Error: NULL Pointer in XMLELement::Print------+#\n");
+ return;
+ }
+}
+
+
+
+
+namespace
+{
+
+OUString lcl_pathnameToAbsoluteUrl(std::string_view rPathname)
+{
+ OUString sPath = OStringToOUString(rPathname, RTL_TEXTENCODING_UTF8 );
+ OUString sUrl;
+ if (osl::FileBase::getFileURLFromSystemPath(sPath, sUrl)
+ != osl::FileBase::E_None)
+ {
+ std::cerr << "Error: Cannot convert input pathname to URL\n";
+ std::exit(EXIT_FAILURE);
+ }
+ OUString sCwd;
+ if (osl_getProcessWorkingDir(&sCwd.pData) != osl_Process_E_None)
+ {
+ std::cerr << "Error: Cannot determine cwd\n";
+ std::exit(EXIT_FAILURE);
+ }
+ if (osl::FileBase::getAbsoluteFileURL(sCwd, sUrl, sUrl)
+ != osl::FileBase::E_None)
+ {
+ std::cerr << "Error: Cannot convert input URL to absolute URL\n";
+ std::exit(EXIT_FAILURE);
+ }
+ return sUrl;
+}
+}
+
+
+SimpleXMLParser::SimpleXMLParser()
+ : m_pCurNode(nullptr)
+ , m_pCurData(nullptr)
+{
+ m_aParser = XML_ParserCreate( nullptr );
+ XML_SetUserData( m_aParser, this );
+ XML_SetElementHandler( m_aParser, StartElementHandler, EndElementHandler );
+ XML_SetCharacterDataHandler( m_aParser, CharacterDataHandler );
+ XML_SetCommentHandler( m_aParser, CommentHandler );
+ XML_SetDefaultHandler( m_aParser, DefaultHandler );
+}
+
+SimpleXMLParser::~SimpleXMLParser()
+{
+ XML_ParserFree( m_aParser );
+}
+
+void SimpleXMLParser::StartElementHandler(
+ void *userData, const XML_Char *name, const XML_Char **atts )
+{
+ static_cast<SimpleXMLParser *>(userData)->StartElement( name, atts );
+}
+
+void SimpleXMLParser::EndElementHandler(
+ void *userData, const XML_Char * /*name*/ )
+{
+ static_cast<SimpleXMLParser *>(userData)->EndElement();
+}
+
+void SimpleXMLParser::CharacterDataHandler(
+ void *userData, const XML_Char *s, int len )
+{
+ static_cast<SimpleXMLParser *>(userData)->CharacterData( s, len );
+}
+
+void SimpleXMLParser::CommentHandler(
+ void *userData, const XML_Char *data )
+{
+ static_cast<SimpleXMLParser *>(userData)->Comment( data );
+}
+
+void SimpleXMLParser::DefaultHandler(
+ void *userData, const XML_Char *s, int len )
+{
+ static_cast<SimpleXMLParser *>(userData)->Default( s, len );
+}
+
+void SimpleXMLParser::StartElement(
+ const XML_Char *name, const XML_Char **atts )
+{
+ XMLElement *pElement = new XMLElement( OString(name), m_pCurNode );
+ m_pCurNode = pElement;
+ m_pCurData = nullptr;
+
+ int i = 0;
+ while( atts[i] )
+ {
+ pElement->AddAttribute( atts[ i ], atts[ i + 1 ] );
+ i += 2;
+ }
+}
+
+void SimpleXMLParser::EndElement()
+{
+ m_pCurNode = m_pCurNode->GetParent();
+ m_pCurData = nullptr;
+}
+
+void SimpleXMLParser::CharacterData( const XML_Char *s, int len )
+{
+ if ( !m_pCurData )
+ {
+ OString x( s, len );
+ m_pCurData = new XMLData( helper::UnQuotHTML(x) , m_pCurNode );
+ }
+ else
+ {
+ OString x( s, len );
+ m_pCurData->AddData( helper::UnQuotHTML(x) );
+
+ }
+}
+
+void SimpleXMLParser::Comment( const XML_Char *data )
+{
+ m_pCurData = nullptr;
+ new XMLComment( OString( data ), m_pCurNode );
+}
+
+void SimpleXMLParser::Default( const XML_Char *s, int len )
+{
+ m_pCurData = nullptr;
+ new XMLDefault(OString( s, len ), m_pCurNode );
+}
+
+bool SimpleXMLParser::Execute( const OString &rFileName, XMLFile* pXMLFile )
+{
+ m_aErrorInformation.m_eCode = XML_ERROR_NONE;
+ m_aErrorInformation.m_nLine = 0;
+ m_aErrorInformation.m_nColumn = 0;
+ m_aErrorInformation.m_sMessage = "ERROR: Unable to open file "_ostr;
+ m_aErrorInformation.m_sMessage += rFileName;
+
+ OUString aFileURL(lcl_pathnameToAbsoluteUrl(rFileName));
+
+ oslFileHandle h;
+ if (osl_openFile(aFileURL.pData, &h, osl_File_OpenFlag_Read)
+ != osl_File_E_None)
+ {
+ return false;
+ }
+
+ sal_uInt64 s;
+ oslFileError e = osl_getFileSize(h, &s);
+ void * p = nullptr;
+ if (e == osl_File_E_None)
+ {
+ e = osl_mapFile(h, &p, s, 0, 0);
+ }
+ if (e != osl_File_E_None)
+ {
+ osl_closeFile(h);
+ return false;
+ }
+
+ pXMLFile->SetName( rFileName );
+
+ m_pCurNode = pXMLFile;
+ m_pCurData = nullptr;
+
+ m_aErrorInformation.m_eCode = XML_ERROR_NONE;
+ m_aErrorInformation.m_nLine = 0;
+ m_aErrorInformation.m_nColumn = 0;
+ if ( !pXMLFile->GetName().isEmpty())
+ {
+ m_aErrorInformation.m_sMessage = "File " + pXMLFile->GetName() + " parsed successfully";
+ }
+ else
+ m_aErrorInformation.m_sMessage = "XML-File parsed successfully"_ostr;
+
+ bool result = XML_Parse(m_aParser, static_cast< char * >(p), s, true);
+ if (!result)
+ {
+ m_aErrorInformation.m_eCode = XML_GetErrorCode( m_aParser );
+ m_aErrorInformation.m_nLine = XML_GetErrorLineNumber( m_aParser );
+ m_aErrorInformation.m_nColumn = XML_GetErrorColumnNumber( m_aParser );
+
+ m_aErrorInformation.m_sMessage = "ERROR: "_ostr;
+ if ( !pXMLFile->GetName().isEmpty())
+ m_aErrorInformation.m_sMessage += pXMLFile->GetName();
+ else
+ m_aErrorInformation.m_sMessage += "XML-File (";
+
+ m_aErrorInformation.m_sMessage +=
+ OString::number(sal::static_int_cast< sal_Int64 >(m_aErrorInformation.m_nLine)) + "," +
+ OString::number(sal::static_int_cast< sal_Int64 >(m_aErrorInformation.m_nColumn)) + "): ";
+
+ switch (m_aErrorInformation.m_eCode)
+ {
+ case XML_ERROR_NO_MEMORY:
+ m_aErrorInformation.m_sMessage += "No memory";
+ break;
+ case XML_ERROR_SYNTAX:
+ m_aErrorInformation.m_sMessage += "Syntax";
+ break;
+ case XML_ERROR_NO_ELEMENTS:
+ m_aErrorInformation.m_sMessage += "No elements";
+ break;
+ case XML_ERROR_INVALID_TOKEN:
+ m_aErrorInformation.m_sMessage += "Invalid token";
+ break;
+ case XML_ERROR_UNCLOSED_TOKEN:
+ m_aErrorInformation.m_sMessage += "Unclosed token";
+ break;
+ case XML_ERROR_PARTIAL_CHAR:
+ m_aErrorInformation.m_sMessage += "Partial char";
+ break;
+ case XML_ERROR_TAG_MISMATCH:
+ m_aErrorInformation.m_sMessage += "Tag mismatch";
+ break;
+ case XML_ERROR_DUPLICATE_ATTRIBUTE:
+ m_aErrorInformation.m_sMessage += "Duplicated attribute";
+ break;
+ case XML_ERROR_JUNK_AFTER_DOC_ELEMENT:
+ m_aErrorInformation.m_sMessage += "Junk after doc element";
+ break;
+ case XML_ERROR_PARAM_ENTITY_REF:
+ m_aErrorInformation.m_sMessage += "Param entity ref";
+ break;
+ case XML_ERROR_UNDEFINED_ENTITY:
+ m_aErrorInformation.m_sMessage += "Undefined entity";
+ break;
+ case XML_ERROR_RECURSIVE_ENTITY_REF:
+ m_aErrorInformation.m_sMessage += "Recursive entity ref";
+ break;
+ case XML_ERROR_ASYNC_ENTITY:
+ m_aErrorInformation.m_sMessage += "Async_entity";
+ break;
+ case XML_ERROR_BAD_CHAR_REF:
+ m_aErrorInformation.m_sMessage += "Bad char ref";
+ break;
+ case XML_ERROR_BINARY_ENTITY_REF:
+ m_aErrorInformation.m_sMessage += "Binary entity";
+ break;
+ case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF:
+ m_aErrorInformation.m_sMessage += "Attribute external entity ref";
+ break;
+ case XML_ERROR_MISPLACED_XML_PI:
+ m_aErrorInformation.m_sMessage += "Misplaced xml pi";
+ break;
+ case XML_ERROR_UNKNOWN_ENCODING:
+ m_aErrorInformation.m_sMessage += "Unknown encoding";
+ break;
+ case XML_ERROR_INCORRECT_ENCODING:
+ m_aErrorInformation.m_sMessage += "Incorrect encoding";
+ break;
+ case XML_ERROR_UNCLOSED_CDATA_SECTION:
+ m_aErrorInformation.m_sMessage += "Unclosed cdata section";
+ break;
+ case XML_ERROR_EXTERNAL_ENTITY_HANDLING:
+ m_aErrorInformation.m_sMessage += "External entity handling";
+ break;
+ case XML_ERROR_NOT_STANDALONE:
+ m_aErrorInformation.m_sMessage += "Not standalone";
+ break;
+ case XML_ERROR_NONE:
+ break;
+ default:
+ break;
+ }
+ }
+
+ osl_unmapMappedFile(h, p, s);
+ osl_closeFile(h);
+
+ return result;
+}
+
+namespace
+{
+
+icu::UnicodeString lcl_QuotRange(
+ const icu::UnicodeString& rString, const sal_Int32 nStart,
+ const sal_Int32 nEnd, bool bInsideTag = false )
+{
+ icu::UnicodeString sReturn;
+ assert( nStart < nEnd );
+ assert( nStart >= 0 );
+ assert( nEnd <= rString.length() );
+ for (sal_Int32 i = nStart; i < nEnd; ++i)
+ {
+ switch (rString[i])
+ {
+ case '<':
+ sReturn.append("&lt;");
+ break;
+ case '>':
+ sReturn.append("&gt;");
+ break;
+ case '"':
+ if( !bInsideTag )
+ sReturn.append("&quot;");
+ else
+ sReturn.append(rString[i]);
+ break;
+ case '&':
+ if (rString.startsWith("&amp;", i, 5))
+ sReturn.append('&');
+ else
+ sReturn.append("&amp;");
+ break;
+ default:
+ sReturn.append(rString[i]);
+ break;
+ }
+ }
+ return sReturn;
+}
+
+bool lcl_isTag( const icu::UnicodeString& rString )
+{
+ static const int nSize = 20;
+ static const icu::UnicodeString vTags[nSize] = {
+ "ahelp", "link", "item", "emph", "defaultinline",
+ "switchinline", "caseinline", "variable",
+ "bookmark_value", "image", "object",
+ "embedvar", "alt", "sup", "sub",
+ "menuitem", "keycode", "input", "literal", "widget"
+ };
+
+ for( int nIndex = 0; nIndex < nSize; ++nIndex )
+ {
+ if( rString.startsWith("<" + vTags[nIndex]) ||
+ rString == "</" + vTags[nIndex] + ">" )
+ return true;
+ }
+
+ return rString == "<br/>" || rString =="<help-id-missing/>";
+}
+
+} /// anonymous namespace
+
+OString XMLUtil::QuotHTML( const OString &rString )
+{
+ if( o3tl::trim(rString).empty() )
+ return rString;
+ UErrorCode nIcuErr = U_ZERO_ERROR;
+ static const sal_uInt32 nSearchFlags =
+ UREGEX_DOTALL | UREGEX_CASE_INSENSITIVE;
+ static const icu::UnicodeString sSearchPat( "<[/]\?\?[a-z_-]+?(?:| +[a-z]+?=\".*?\") *[/]\?\?>" );
+
+ const OUString sOUSource = OStringToOUString(rString, RTL_TEXTENCODING_UTF8);
+ icu::UnicodeString sSource(
+ reinterpret_cast<const UChar*>(
+ sOUSource.getStr()), sOUSource.getLength() );
+
+ icu::RegexMatcher aRegexMatcher( sSearchPat, nSearchFlags, nIcuErr );
+ aRegexMatcher.reset( sSource );
+
+ icu::UnicodeString sReturn;
+ int32_t nEndPos = 0;
+ int32_t nStartPos = 0;
+ while( aRegexMatcher.find(nStartPos, nIcuErr) && U_SUCCESS(nIcuErr) )
+ {
+ nStartPos = aRegexMatcher.start(nIcuErr);
+ if ( nEndPos < nStartPos )
+ sReturn.append(lcl_QuotRange(sSource, nEndPos, nStartPos));
+ nEndPos = aRegexMatcher.end(nIcuErr);
+ icu::UnicodeString sMatch = aRegexMatcher.group(nIcuErr);
+ if( lcl_isTag(sMatch) )
+ {
+ sReturn.append("<");
+ sReturn.append(lcl_QuotRange(sSource, nStartPos+1, nEndPos-1, true));
+ sReturn.append(">");
+ }
+ else
+ sReturn.append(lcl_QuotRange(sSource, nStartPos, nEndPos));
+ nStartPos = nEndPos;
+ }
+ if( nEndPos < sSource.length() )
+ sReturn.append(lcl_QuotRange(sSource, nEndPos, sSource.length()));
+ sReturn.append('\0');
+ return
+ OUStringToOString(
+ reinterpret_cast<const sal_Unicode*>(sReturn.getBuffer()),
+ RTL_TEXTENCODING_UTF8);
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */