summaryrefslogtreecommitdiffstats
path: root/filter/source/xsltfilter/LibXSLTTransformer.cxx
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 09:06:44 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 09:06:44 +0000
commited5640d8b587fbcfed7dd7967f3de04b37a76f26 (patch)
tree7a5f7c6c9d02226d7471cb3cc8fbbf631b415303 /filter/source/xsltfilter/LibXSLTTransformer.cxx
parentInitial commit. (diff)
downloadlibreoffice-ed5640d8b587fbcfed7dd7967f3de04b37a76f26.tar.xz
libreoffice-ed5640d8b587fbcfed7dd7967f3de04b37a76f26.zip
Adding upstream version 4:7.4.7.upstream/4%7.4.7upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'filter/source/xsltfilter/LibXSLTTransformer.cxx')
-rw-r--r--filter/source/xsltfilter/LibXSLTTransformer.cxx571
1 files changed, 571 insertions, 0 deletions
diff --git a/filter/source/xsltfilter/LibXSLTTransformer.cxx b/filter/source/xsltfilter/LibXSLTTransformer.cxx
new file mode 100644
index 000000000..1a7c34805
--- /dev/null
+++ b/filter/source/xsltfilter/LibXSLTTransformer.cxx
@@ -0,0 +1,571 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <algorithm>
+#include <cstring>
+#include <map>
+#include <optional>
+#include <string_view>
+#include <utility>
+#include <vector>
+#include <libxml/parser.h>
+#include <libxml/xmlIO.h>
+#include <libxml/xpath.h>
+#include <libxml/xpathInternals.h>
+#include <libxml/xmlstring.h>
+#include <libxslt/transform.h>
+#include <libxslt/xsltutils.h>
+#include <libxslt/variables.h>
+#include <libxslt/extensions.h>
+#include <libexslt/exslt.h>
+
+#include <cppuhelper/factory.hxx>
+#include <cppuhelper/supportsservice.hxx>
+
+#include <osl/file.hxx>
+#include <com/sun/star/uno/Any.hxx>
+#include <com/sun/star/beans/NamedValue.hpp>
+#include <com/sun/star/io/XInputStream.hpp>
+#include <com/sun/star/io/XOutputStream.hpp>
+#include <com/sun/star/io/XStreamListener.hpp>
+
+#include "LibXSLTTransformer.hxx"
+#include "OleHandler.hxx"
+
+using namespace ::cppu;
+using namespace ::osl;
+using namespace ::com::sun::star::beans;
+using namespace ::com::sun::star::io;
+using namespace ::com::sun::star::uno;
+using namespace ::com::sun::star::lang;
+using namespace ::com::sun::star::registry;
+using ::std::pair;
+
+namespace XSLT
+{
+ const char* const LibXSLTTransformer::PARAM_SOURCE_URL = "sourceURL";
+ const char* const LibXSLTTransformer::PARAM_SOURCE_BASE_URL =
+ "sourceBaseURL";
+ const char* const LibXSLTTransformer::PARAM_TARGET_URL = "targetURL";
+ const char* const LibXSLTTransformer::PARAM_TARGET_BASE_URL =
+ "targetBaseURL";
+ const char* const LibXSLTTransformer::PARAM_DOCTYPE_PUBLIC = "publicType";
+
+ const sal_Int32 Reader::OUTPUT_BUFFER_SIZE = 4096;
+
+ const sal_Int32 Reader::INPUT_BUFFER_SIZE = 4096;
+
+ namespace {
+
+ /**
+ * ParserInputBufferCallback forwards IO call-backs to libxml stream IO.
+ */
+ struct ParserInputBufferCallback
+ {
+ static int
+ on_read(void * context, char * buffer, int len)
+ {
+ Reader * tmp = static_cast<Reader*> (context);
+ return tmp->read(buffer, len);
+ }
+ static int
+ on_close(void * )
+ {
+ return 0;
+ }
+ };
+ /**
+ * ParserOutputBufferCallback forwards IO call-backs to libxml stream IO.
+ */
+ struct ParserOutputBufferCallback
+ {
+ static int
+ on_write(void * context, const char * buffer, int len)
+ {
+ Reader * tmp = static_cast<Reader*> (context);
+ return tmp->write(buffer, len);
+ }
+ static int
+ on_close(void * context)
+ {
+ Reader * tmp = static_cast<Reader*> (context);
+ tmp->closeOutput();
+ return 0;
+ }
+ };
+ /**
+ * ExtFuncOleCB forwards XPath extension function calls registered with libxslt to the OleHandler instance that actually
+ * provides the implementation for those functions.
+ *
+ * The OLE extension module currently supplies two functions
+ * insertByName: registers an OLE object to be later inserted into the output tree.
+ * getByName: reads a previously registered OLE object and returns a base64 encoded string representation.
+ */
+ struct ExtFuncOleCB
+ {
+ static void *
+ init(xsltTransformContextPtr, const xmlChar*)
+ {
+ return nullptr;
+ }
+ static void
+ insertByName(xmlXPathParserContextPtr ctxt, int nargs)
+ {
+ xsltTransformContextPtr tctxt;
+ void *data;
+ if (nargs != 2) {
+ xsltGenericError(xsltGenericErrorContext,
+ "insertByName: requires exactly 2 arguments\n");
+ return;
+ }
+ tctxt = xsltXPathGetTransformContext(ctxt);
+ if (tctxt == nullptr) {
+ xsltGenericError(xsltGenericErrorContext,
+ "xsltExtFunctionTest: failed to get the transformation context\n");
+ return;
+ }
+ // XXX: someone with better knowledge of libxslt might come up with a better
+ // idea to pass the OleHandler than by attaching it to tctxt->_private. See also
+ // below.
+ data = tctxt->_private;
+ if (data == nullptr) {
+ xsltGenericError(xsltGenericErrorContext,
+ "xsltExtFunctionTest: failed to get module data\n");
+ return;
+ }
+ OleHandler * oh = static_cast<OleHandler*> (data);
+
+ xmlXPathObjectPtr value = valuePop(ctxt);
+ value = ensureStringValue(value, ctxt);
+ xmlXPathObjectPtr streamName = valuePop(ctxt);
+ streamName = ensureStringValue(streamName, ctxt);
+
+ oh->insertByName(OStringToOUString(reinterpret_cast<char*>(streamName->stringval), RTL_TEXTENCODING_UTF8),
+ std::string_view(reinterpret_cast<char*>(value->stringval)));
+ valuePush(ctxt, xmlXPathNewCString(""));
+ }
+
+ static xmlXPathObjectPtr ensureStringValue(xmlXPathObjectPtr obj, const xmlXPathParserContextPtr ctxt)
+ {
+ if (obj->type != XPATH_STRING) {
+ valuePush(ctxt, obj);
+ xmlXPathStringFunction(ctxt, 1);
+ obj = valuePop(ctxt);
+ }
+ return obj;
+ }
+
+ static void getByName(xmlXPathParserContextPtr ctxt, int nargs)
+ {
+ xsltTransformContextPtr tctxt;
+ void *data;
+ if (nargs != 1) {
+ xsltGenericError(xsltGenericErrorContext,
+ "getByName: requires exactly 1 argument\n");
+ return;
+ }
+
+ tctxt = xsltXPathGetTransformContext(ctxt);
+ if (tctxt == nullptr) {
+ xsltGenericError(xsltGenericErrorContext,
+ "xsltExtFunctionTest: failed to get the transformation context\n");
+ return;
+ }
+ // XXX: someone with better knowledge of libxslt might come up with a better
+ // idea to pass the OleHandler than by attaching it to tctxt->_private
+ data = tctxt->_private;
+ if (data == nullptr) {
+ xsltGenericError(xsltGenericErrorContext,
+ "xsltExtFunctionTest: failed to get module data\n");
+ return;
+ }
+ OleHandler * oh = static_cast<OleHandler*> (data);
+ xmlXPathObjectPtr streamName = valuePop(ctxt);
+ streamName = ensureStringValue(streamName, ctxt);
+ const OString content = oh->getByName(OStringToOUString(reinterpret_cast<char*>(streamName->stringval), RTL_TEXTENCODING_UTF8));
+ valuePush(ctxt, xmlXPathNewCString(content.getStr()));
+ xmlXPathFreeObject(streamName);
+ }
+ };
+
+ }
+
+ Reader::Reader(LibXSLTTransformer* transformer) :
+ Thread("LibXSLTTransformer"), m_transformer(transformer),
+ m_readBuf(INPUT_BUFFER_SIZE), m_writeBuf(OUTPUT_BUFFER_SIZE),
+ m_tcontext(nullptr)
+ {
+ LIBXML_TEST_VERSION;
+ }
+ ;
+
+ int
+ Reader::read(char * buffer, int len)
+ {
+ // const char *ptr = (const char *) context;
+ if (buffer == nullptr || len < 0)
+ return -1;
+ sal_Int32 n;
+ css::uno::Reference<XInputStream> xis = m_transformer->getInputStream();
+ n = xis->readBytes(m_readBuf, len);
+ if (n > 0)
+ {
+ memcpy(buffer, m_readBuf.getArray(), n);
+ }
+ return n;
+ }
+
+ int
+ Reader::write(const char * buffer, int len)
+ {
+ if (buffer == nullptr || len < 0)
+ return -1;
+ if (len > 0)
+ {
+ css::uno::Reference<XOutputStream> xos = m_transformer->getOutputStream();
+ sal_Int32 writeLen = len;
+ sal_Int32 bufLen = ::std::min(writeLen, OUTPUT_BUFFER_SIZE);
+ const sal_uInt8* memPtr =
+ reinterpret_cast<const sal_uInt8*> (buffer);
+ while (writeLen > 0)
+ {
+ sal_Int32 n = ::std::min(writeLen, bufLen);
+ m_writeBuf.realloc(n);
+ memcpy(m_writeBuf.getArray(), memPtr,
+ static_cast<size_t> (n));
+ xos->writeBytes(m_writeBuf);
+ memPtr += n;
+ writeLen -= n;
+ }
+ }
+ return len;
+ }
+
+ void
+ Reader::closeOutput()
+ {
+ css::uno::Reference<XOutputStream> xos = m_transformer->getOutputStream();
+ if (xos.is())
+ {
+ xos->flush();
+ xos->closeOutput();
+ }
+ m_transformer->done();
+ }
+
+ void
+ Reader::execute()
+ {
+ OSL_ASSERT(m_transformer != nullptr);
+ OSL_ASSERT(m_transformer->getInputStream().is());
+ OSL_ASSERT(m_transformer->getOutputStream().is());
+ OSL_ASSERT(!m_transformer->getStyleSheetURL().isEmpty() || !m_transformer->getStyleSheetText().isEmpty());
+ ::std::map<const char*, OString> pmap = m_transformer->getParameters();
+ ::std::vector< const char* > params( pmap.size() * 2 + 1 ); // build parameters
+ int paramIndex = 0;
+ for (auto const& elem : pmap)
+ {
+ params[paramIndex++] = elem.first;
+ params[paramIndex++] = elem.second.getStr();
+ }
+ params[paramIndex] = nullptr;
+ xmlDocPtr doc = xmlReadIO(&ParserInputBufferCallback::on_read,
+ &ParserInputBufferCallback::on_close,
+ static_cast<void*> (this), nullptr, nullptr, 0);
+ xsltStylesheetPtr styleSheet = nullptr;
+ if (m_transformer->getStyleSheetURL().getLength())
+ styleSheet = xsltParseStylesheetFile(
+ reinterpret_cast<const xmlChar *>(m_transformer->getStyleSheetURL().getStr()));
+ else if (m_transformer->getStyleSheetText().getLength())
+ {
+ xmlDocPtr styleSheetDoc = xmlReadMemory(
+ m_transformer->getStyleSheetText().getStr(),
+ m_transformer->getStyleSheetText().getLength(),
+ "noname.xml", nullptr, 0);
+
+ styleSheet = xsltParseStylesheetDoc(styleSheetDoc);
+ }
+
+ if (!styleSheet)
+ {
+ m_transformer->error("No stylesheet was created");
+ }
+
+ xmlDocPtr result = nullptr;
+ exsltRegisterAll();
+ registerExtensionModule();
+#ifdef DEBUG_FILTER_LIBXSLTTRANSFORMER
+ xsltSetGenericDebugFunc(stderr, NULL);
+ xsltDebugDumpExtensions(NULL);
+#endif
+ std::optional<OleHandler> oh(std::in_place, m_transformer->getComponentContext());
+ if (styleSheet)
+ {
+ xsltTransformContextPtr tcontext = xsltNewTransformContext(
+ styleSheet, doc);
+ {
+ std::scoped_lock<std::mutex> g(m_mutex);
+ m_tcontext = tcontext;
+ }
+ oh->registercontext(m_tcontext);
+ xsltQuoteUserParams(m_tcontext, params.data());
+ result = xsltApplyStylesheetUser(styleSheet, doc, nullptr, nullptr, nullptr,
+ m_tcontext);
+ }
+
+ if (result)
+ {
+ xmlCharEncodingHandlerPtr encoder = xmlGetCharEncodingHandler(
+ XML_CHAR_ENCODING_UTF8);
+ xmlOutputBufferPtr outBuf = xmlAllocOutputBuffer(encoder);
+ outBuf->context = static_cast<void *> (this);
+ outBuf->writecallback = &ParserOutputBufferCallback::on_write;
+ outBuf->closecallback = &ParserOutputBufferCallback::on_close;
+ xsltSaveResultTo(outBuf, result, styleSheet);
+ (void)xmlOutputBufferClose(outBuf);
+ }
+ else
+ {
+ xmlErrorPtr lastErr = xmlGetLastError();
+ OUString msg;
+ if (lastErr)
+ msg = OStringToOUString(lastErr->message, RTL_TEXTENCODING_UTF8);
+ else
+ msg = "Unknown XSLT transformation error";
+
+ m_transformer->error(msg);
+ }
+ oh.reset();
+ xsltFreeStylesheet(styleSheet);
+ xsltTransformContextPtr tcontext = nullptr;
+ {
+ std::scoped_lock<std::mutex> g(m_mutex);
+ std::swap(m_tcontext, tcontext);
+ }
+ xsltFreeTransformContext(tcontext);
+ xmlFreeDoc(doc);
+ xmlFreeDoc(result);
+ }
+
+ void
+ Reader::registerExtensionModule()
+ {
+ const xmlChar* oleModuleURI = reinterpret_cast<const xmlChar *>(EXT_MODULE_OLE_URI);
+ xsltRegisterExtModule(oleModuleURI, &ExtFuncOleCB::init, nullptr);
+ xsltRegisterExtModuleFunction(
+ reinterpret_cast<const xmlChar*>("insertByName"),
+ oleModuleURI,
+ &ExtFuncOleCB::insertByName);
+ xsltRegisterExtModuleFunction(
+ reinterpret_cast<const xmlChar*>("getByName"),
+ oleModuleURI,
+ &ExtFuncOleCB::getByName);
+
+ }
+
+ void Reader::forceStateStopped()
+ {
+ std::scoped_lock<std::mutex> g(m_mutex);
+ if (!m_tcontext)
+ return;
+ //tdf#100057 If we force a cancel, libxslt will of course just keep on going unless something
+ //tells it to stop. Here we force the stopped state so that libxslt will stop processing
+ //and so Reader::execute will complete and we can join cleanly
+ m_tcontext->state = XSLT_STATE_STOPPED;
+ }
+
+ Reader::~Reader()
+ {
+ }
+
+ LibXSLTTransformer::LibXSLTTransformer(
+ css::uno::Reference<XComponentContext> xContext) :
+ m_xContext(std::move(xContext))
+ {
+ }
+
+ // XServiceInfo
+ sal_Bool LibXSLTTransformer::supportsService(const OUString& sServiceName)
+ {
+ return cppu::supportsService(this, sServiceName);
+ }
+ OUString LibXSLTTransformer::getImplementationName()
+ {
+ return "com.sun.star.comp.documentconversion.XSLTFilter";
+ }
+ css::uno::Sequence< OUString > LibXSLTTransformer::getSupportedServiceNames()
+ {
+ return { "com.sun.star.documentconversion.XSLTFilter" };
+ }
+
+ void
+ LibXSLTTransformer::setInputStream(
+ const css::uno::Reference<XInputStream>& inputStream)
+ {
+ m_rInputStream = inputStream;
+ }
+
+ css::uno::Reference<XInputStream>
+ LibXSLTTransformer::getInputStream()
+ {
+ return m_rInputStream;
+ }
+
+ void
+ LibXSLTTransformer::setOutputStream(
+ const css::uno::Reference<XOutputStream>& outputStream)
+ {
+ m_rOutputStream = outputStream;
+ }
+
+ css::uno::Reference<XOutputStream>
+ LibXSLTTransformer::getOutputStream()
+ {
+ return m_rOutputStream;
+ }
+
+ void
+ LibXSLTTransformer::addListener(const css::uno::Reference<XStreamListener>& listener)
+ {
+ m_listeners.push_front(listener);
+ }
+
+ void
+ LibXSLTTransformer::removeListener(
+ const css::uno::Reference<XStreamListener>& listener)
+ {
+ m_listeners.erase( std::remove(m_listeners.begin(), m_listeners.end(), listener ), m_listeners.end() );
+ }
+
+ void
+ LibXSLTTransformer::start()
+ {
+ for (const css::uno::Reference<XStreamListener>& xl : m_listeners)
+ {
+ xl->started();
+ }
+ OSL_ENSURE(!m_Reader.is(), "Somebody forgot to call terminate *and* holds a reference to this LibXSLTTransformer instance");
+ m_Reader = new Reader(this);
+ m_Reader->launch();
+ }
+
+ void
+ LibXSLTTransformer::error(const OUString& msg)
+ {
+ Any arg;
+ arg <<= Exception(msg, *this);
+ for (const css::uno::Reference<XStreamListener>& xl : m_listeners)
+ {
+ if (xl.is())
+ {
+ xl->error(arg);
+ }
+ }
+ }
+
+ void
+ LibXSLTTransformer::done()
+ {
+ for (const css::uno::Reference<XStreamListener>& xl : m_listeners)
+ {
+ if (xl.is())
+ {
+ xl->closed();
+ }
+ }
+ }
+
+ void
+ LibXSLTTransformer::terminate()
+ {
+ if (m_Reader.is())
+ {
+ m_Reader->terminate();
+ m_Reader->forceStateStopped();
+ m_Reader->join();
+ }
+ m_Reader.clear();
+ m_parameters.clear();
+ }
+
+ void
+ LibXSLTTransformer::initialize(const Sequence<Any>& args)
+ {
+ Sequence<Any> params;
+ if (!(args[0] >>= params))
+ { // backward compatibility for old clients using createInstance
+ params = args;
+ }
+ xmlSubstituteEntitiesDefault(0);
+ m_parameters.clear();
+ for (const Any& p : std::as_const(params))
+ {
+ NamedValue nv;
+ p >>= nv;
+ OString nameUTF8 = OUStringToOString(nv.Name,
+ RTL_TEXTENCODING_UTF8);
+ OUString value;
+ OString valueUTF8;
+ if (nv.Value >>= value)
+ {
+ valueUTF8 = OUStringToOString(value,
+ RTL_TEXTENCODING_UTF8);
+ }
+ else
+ {
+ // ignore non-string parameters
+ continue;
+ }
+ if (nameUTF8 == "StylesheetURL")
+ {
+ m_styleSheetURL = valueUTF8;
+ }
+ if (nameUTF8 == "StylesheetText")
+ {
+ m_styleSheetText = valueUTF8;
+ }
+ else if (nameUTF8 == "SourceURL")
+ {
+ m_parameters.insert(pair<const char*, OString> (
+ PARAM_SOURCE_URL, valueUTF8));
+ }
+ else if (nameUTF8 == "SourceBaseURL")
+ {
+ m_parameters.insert(pair<const char*, OString> (
+ PARAM_SOURCE_BASE_URL, valueUTF8));
+ }
+ else if (nameUTF8 == "TargetURL")
+ {
+ m_parameters.insert(pair<const char*, OString> (
+ PARAM_TARGET_URL, valueUTF8));
+ }
+ else if (nameUTF8 == "TargetBaseURL")
+ {
+ m_parameters.insert(pair<const char*, OString> (
+ PARAM_TARGET_BASE_URL, valueUTF8));
+ }
+ else if (nameUTF8 == "DoctypePublic")
+ {
+ m_parameters.insert(pair<const char*, OString> (
+ PARAM_DOCTYPE_PUBLIC, valueUTF8));
+ }
+ }
+ }
+}
+
+extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface*
+filter_LibXSLTTransformer_get_implementation(
+ css::uno::XComponentContext* context, css::uno::Sequence<css::uno::Any> const&)
+{
+ return cppu::acquire(new XSLT::LibXSLTTransformer(context));
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
+