From ed5640d8b587fbcfed7dd7967f3de04b37a76f26 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 11:06:44 +0200 Subject: Adding upstream version 4:7.4.7. Signed-off-by: Daniel Baumann --- vcl/ios/HtmlFmtFlt.cxx | 172 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 172 insertions(+) create mode 100644 vcl/ios/HtmlFmtFlt.cxx (limited to 'vcl/ios/HtmlFmtFlt.cxx') diff --git a/vcl/ios/HtmlFmtFlt.cxx b/vcl/ios/HtmlFmtFlt.cxx new file mode 100644 index 000000000..4f90ced3b --- /dev/null +++ b/vcl/ios/HtmlFmtFlt.cxx @@ -0,0 +1,172 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; fill-column: 100 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include "HtmlFmtFlt.hxx" + +#include +#include + +#include +#include +#include +#include +#include + +using namespace com::sun::star::uno; + +// converts the openoffice text/html clipboard format to the HTML Format +// well known under MS Windows +// the MS HTML Format has a header before the real html data + +// Version:1.0 Version number of the clipboard. Starting is 0.9 +// StartHTML: Byte count from the beginning of the clipboard to the start +// of the context, or -1 if no context +// EndHTML: Byte count from the beginning of the clipboard to the end +// of the context, or -1 if no context +// StartFragment: Byte count from the beginning of the clipboard to the +// start of the fragment +// EndFragment: Byte count from the beginning of the clipboard to the +// end of the fragment +// StartSelection: Byte count from the beginning of the clipboard to the +// start of the selection +// EndSelection: Byte count from the beginning of the clipboard to the +// end of the selection + +// StartSelection and EndSelection are optional +// The fragment should be preceded and followed by the HTML comments +// and (no space between !-- and the +// text + +namespace +{ +std::string GetHtmlFormatHeader(size_t startHtml, size_t endHtml, size_t startFragment, + size_t endFragment) +{ + std::ostringstream htmlHeader; + htmlHeader << "Version:1.0" << '\r' << '\n'; + htmlHeader << "StartHTML:" << std::setw(10) << std::setfill('0') << std::dec << startHtml + << '\r' << '\n'; + htmlHeader << "EndHTML:" << std::setw(10) << std::setfill('0') << std::dec << endHtml << '\r' + << '\n'; + htmlHeader << "StartFragment:" << std::setw(10) << std::setfill('0') << std::dec + << startFragment << '\r' << '\n'; + htmlHeader << "EndFragment:" << std::setw(10) << std::setfill('0') << std::dec << endFragment + << '\r' << '\n'; + return htmlHeader.str(); +} +} + +// the office always writes the start and end html tag in upper cases and +// without spaces both tags don't allow parameters +const std::string TAG_HTML = std::string(""); +const std::string TAG_END_HTML = std::string(""); + +// The body tag may have parameters so we need to search for the +// closing '>' manually e.g. #92840# +const std::string TAG_BODY = std::string(" SAL_CALL TextHtmlToHTMLFormat(Sequence const& aTextHtml) +{ + OSL_ASSERT(aTextHtml.getLength() > 0); + + if (aTextHtml.getLength() <= 0) + return Sequence(); + + // fill the buffer with dummy values to calc the exact length + std::string dummyHtmlHeader = GetHtmlFormatHeader(0, 0, 0, 0); + size_t lHtmlFormatHeader = dummyHtmlHeader.length(); + + std::string textHtml(reinterpret_cast(aTextHtml.getConstArray()), + reinterpret_cast(aTextHtml.getConstArray()) + + aTextHtml.getLength()); + + std::string::size_type nStartHtml = textHtml.find(TAG_HTML) + lHtmlFormatHeader + - 1; // we start one before '' Word 2000 does also so + std::string::size_type nEndHtml = textHtml.find(TAG_END_HTML) + lHtmlFormatHeader + + TAG_END_HTML.length() + + 1; // our SOffice 5.2 wants 2 behind ? + + // The body tag may have parameters so we need to search for the + // closing '>' manually e.g. #92840# + std::string::size_type nStartFragment + = textHtml.find(">", textHtml.find(TAG_BODY)) + lHtmlFormatHeader + 1; + std::string::size_type nEndFragment = textHtml.find(TAG_END_BODY) + lHtmlFormatHeader; + + std::string htmlFormat + = GetHtmlFormatHeader(nStartHtml, nEndHtml, nStartFragment, nEndFragment); + htmlFormat += textHtml; + + Sequence byteSequence(htmlFormat.length() + 1); // space the trailing '\0' + memset(byteSequence.getArray(), 0, byteSequence.getLength()); + + memcpy(static_cast(byteSequence.getArray()), + static_cast(htmlFormat.c_str()), htmlFormat.length()); + + return byteSequence; +} + +const char* const HtmlStartTag = " HTMLFormatToTextHtml(const Sequence& aHTMLFormat) +{ + assert(isHTMLFormat(aHTMLFormat) && "No HTML Format provided"); + + Sequence& nonconstHTMLFormatRef = const_cast&>(aHTMLFormat); + char* dataStart = reinterpret_cast(nonconstHTMLFormatRef.getArray()); + char* dataEnd = dataStart + nonconstHTMLFormatRef.getLength() - 1; + const char* htmlStartTag = strcasestr(dataStart, HtmlStartTag); + + assert(htmlStartTag && "Seems to be no HTML at all"); + + // It doesn't seem to be HTML? Well then simply return what has been + // provided in non-debug builds + if (htmlStartTag == nullptr) + { + return aHTMLFormat; + } + + sal_Int32 len = dataEnd - htmlStartTag; + Sequence plainHtmlData(len); + + memcpy(static_cast(plainHtmlData.getArray()), htmlStartTag, len); + + return plainHtmlData; +} + +/* A simple format detection. We are just comparing the first few bytes + of the provided byte sequence to see whether or not it is the MS + Office Html format. If it shows that this is not reliable enough we + can improve this +*/ +const char HtmlFormatStart[] = "Version:"; +int const HtmlFormatStartLen = (sizeof(HtmlFormatStart) - 1); + +bool isHTMLFormat(const Sequence& aHtmlSequence) +{ + if (aHtmlSequence.getLength() < HtmlFormatStartLen) + return false; + + return rtl_str_compareIgnoreAsciiCase_WithLength( + HtmlFormatStart, HtmlFormatStartLen, + reinterpret_cast(aHtmlSequence.getConstArray()), HtmlFormatStartLen) + == 0; +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ -- cgit v1.2.3