diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:06:44 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:06:44 +0000 |
commit | ed5640d8b587fbcfed7dd7967f3de04b37a76f26 (patch) | |
tree | 7a5f7c6c9d02226d7471cb3cc8fbbf631b415303 /filter/source/xmlfilterdetect | |
parent | Initial commit. (diff) | |
download | libreoffice-upstream.tar.xz libreoffice-upstream.zip |
Adding upstream version 4:7.4.7.upstream/4%7.4.7upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'filter/source/xmlfilterdetect')
-rw-r--r-- | filter/source/xmlfilterdetect/filterdetect.cxx | 246 | ||||
-rw-r--r-- | filter/source/xmlfilterdetect/filterdetect.hxx | 59 | ||||
-rw-r--r-- | filter/source/xmlfilterdetect/xmlfd.component | 26 |
3 files changed, 331 insertions, 0 deletions
diff --git a/filter/source/xmlfilterdetect/filterdetect.cxx b/filter/source/xmlfilterdetect/filterdetect.cxx new file mode 100644 index 000000000..e7cab3323 --- /dev/null +++ b/filter/source/xmlfilterdetect/filterdetect.cxx @@ -0,0 +1,246 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include "filterdetect.hxx" +#include <com/sun/star/io/XInputStream.hpp> +#include <com/sun/star/uno/XComponentContext.hpp> +#include <com/sun/star/container/XNameAccess.hpp> +#include <com/sun/star/beans/PropertyState.hpp> +#include <cppuhelper/supportsservice.hxx> +#include <tools/diagnose_ex.h> +#include <ucbhelper/content.hxx> +#include <unotools/ucbstreamhelper.hxx> +#include <svl/inettype.hxx> +#include <memory> +#include <o3tl/string_view.hxx> + +using namespace com::sun::star::container; +using namespace com::sun::star::uno; +using namespace com::sun::star::beans; + +namespace { + +OUString supportedByType( std::u16string_view clipBoardFormat, std::u16string_view resultString, const OUString& checkType) +{ + OUString sTypeName; + if ( o3tl::starts_with(clipBoardFormat, u"doctype:") ) + { + std::u16string_view tryStr = clipBoardFormat.substr(8); + if (resultString.find(tryStr) != std::u16string_view::npos) + { + sTypeName = checkType; + } + } + return sTypeName; +} + +bool IsMediaTypeXML( const OUString& mediaType ) +{ + if (!mediaType.isEmpty()) + { + OUString sType, sSubType; + if (INetContentTypes::parse(mediaType, sType, sSubType) + && sType == "application") + { + // RFC 3023: application/xml; don't detect text/xml + if (sSubType == "xml") + return true; + // Registered media types: application/XXXX+xml + if (sSubType.endsWith("+xml")) + return true; + } + } + return false; +} + +} + +OUString SAL_CALL FilterDetect::detect( css::uno::Sequence< css::beans::PropertyValue >& aArguments ) +{ + OUString sTypeName; + OUString sUrl; + Sequence<PropertyValue > lProps ; + + css::uno::Reference< css::io::XInputStream > xInStream; + const PropertyValue * pValue = aArguments.getConstArray(); + sal_Int32 nLength; + OUString resultString; + + nLength = aArguments.getLength(); + sal_Int32 location=nLength; + for (sal_Int32 i = 0 ; i < nLength; i++) + { + if ( pValue[i].Name == "TypeName" ) + { + location=i; + } + else if ( pValue[i].Name == "URL" ) + { + pValue[i].Value >>= sUrl; + } + else if ( pValue[i].Name == "InputStream" ) + { + pValue[i].Value >>= xInStream ; + } + } + try + { + if (!xInStream.is()) + { + ::ucbhelper::Content aContent( + sUrl, Reference< css::ucb::XCommandEnvironment >(), + mxCtx); + xInStream = aContent.openStream(); + if (!xInStream.is()) + { + return sTypeName; + } + } + + std::unique_ptr< SvStream > pInStream( ::utl::UcbStreamHelper::CreateStream( xInStream ) ); + pInStream->StartReadingUnicodeText( RTL_TEXTENCODING_DONTKNOW ); + sal_uInt64 const nUniPos = pInStream->Tell(); + + const sal_uInt16 nSize = 4000; + bool bTryUtf16 = false; + + if ( nUniPos == 0 ) // No BOM detected, try to guess UTF-16 endianness + { + sal_uInt16 nHeader = 0; + pInStream->ReadUInt16( nHeader ); + if ( nHeader == 0x003C ) + bTryUtf16 = true; + else if ( nHeader == 0x3C00 ) + { + bTryUtf16 = true; + pInStream->SetEndian( pInStream->GetEndian() == SvStreamEndian::LITTLE ? SvStreamEndian::BIG : SvStreamEndian::LITTLE ); + } + pInStream->Seek( STREAM_SEEK_TO_BEGIN ); + } + + if ( nUniPos == 3 || ( nUniPos == 0 && !bTryUtf16 ) ) // UTF-8 or non-Unicode + { + OString const str(read_uInt8s_ToOString(*pInStream, nSize)); + resultString = OUString(str.getStr(), str.getLength(), + RTL_TEXTENCODING_ASCII_US, + RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT|RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT|RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT); + } + else if ( nUniPos == 2 || bTryUtf16 ) // UTF-16 + resultString = read_uInt16s_ToOUString( *pInStream, nSize ); + + if ( !resultString.startsWith( "<?xml" ) ) + { + // Check the content type; XML declaration is optional in XML files according to XML 1.0 ch.2.8 + // (see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-prolog-dtd) + OUString sMediaType; + try + { + ::ucbhelper::Content aContent( + sUrl, Reference< css::ucb::XCommandEnvironment >(), + mxCtx); + aContent.getPropertyValue("MediaType") >>= sMediaType; + if (sMediaType.isEmpty()) + { + aContent.getPropertyValue("Content-Type") >>= sMediaType; + } + } + catch (...) {} + + if (!IsMediaTypeXML(sMediaType)) + { + // This is not an XML stream. It makes no sense to try to detect + // a non-XML file type here. + return OUString(); + } + } + + // test typedetect code + Reference <XNameAccess> xTypeCont(mxCtx->getServiceManager()->createInstanceWithContext("com.sun.star.document.TypeDetection", mxCtx), UNO_QUERY); + Sequence < OUString > myTypes= xTypeCont->getElementNames(); + nLength = myTypes.getLength(); + + sal_Int32 new_nlength=0; + sal_Int32 i = 0 ; + while ((i < nLength) && (sTypeName.isEmpty())) + { + Any elem = xTypeCont->getByName(myTypes[i]); + elem >>=lProps; + new_nlength = lProps.getLength(); + sal_Int32 j =0; + while (j < new_nlength && (sTypeName.isEmpty())) + { + OUString tmpStr; + lProps[j].Value >>=tmpStr; + if ( lProps[j].Name == "ClipboardFormat" && !tmpStr.isEmpty() ) + { + sTypeName = supportedByType(tmpStr,resultString, myTypes[i]); + } + j++; + } + i++; + } + } + catch (const Exception &) + { + TOOLS_WARN_EXCEPTION("filter.xmlfd", "An Exception occurred while opening File stream"); + } + + if (!sTypeName.isEmpty()) + { + if (location == aArguments.getLength()) + { + aArguments.realloc(nLength+1); + aArguments.getArray()[location].Name = "TypeName"; + } + aArguments.getArray()[location].Value <<=sTypeName; + } + + return sTypeName; +} + +// XInitialization +void SAL_CALL FilterDetect::initialize( const Sequence< Any >& /*aArguments*/ ) +{ +} + +// XServiceInfo +OUString SAL_CALL FilterDetect::getImplementationName( ) +{ + return "com.sun.star.comp.filters.XMLFilterDetect"; +} + +sal_Bool SAL_CALL FilterDetect::supportsService( const OUString& rServiceName ) +{ + return cppu::supportsService( this, rServiceName ); +} + +Sequence< OUString > SAL_CALL FilterDetect::getSupportedServiceNames( ) +{ + return { "com.sun.star.document.ExtendedTypeDetection" }; +} + +extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface* +filter_XMLFilterDetect_get_implementation( + css::uno::XComponentContext* context, css::uno::Sequence<css::uno::Any> const&) +{ + return cppu::acquire(new FilterDetect(context)); +} + + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/filter/source/xmlfilterdetect/filterdetect.hxx b/filter/source/xmlfilterdetect/filterdetect.hxx new file mode 100644 index 000000000..c84b9f066 --- /dev/null +++ b/filter/source/xmlfilterdetect/filterdetect.hxx @@ -0,0 +1,59 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#pragma once + +#include <com/sun/star/document/XExtendedFilterDetection.hpp> +#include <com/sun/star/lang/XInitialization.hpp> +#include <com/sun/star/lang/XServiceInfo.hpp> +#include <cppuhelper/implbase.hxx> +#include <utility> + +namespace com::sun::star::uno { + class XComponentContext; +} + +class FilterDetect final : public cppu::WeakImplHelper < + css::document::XExtendedFilterDetection, + css::lang::XInitialization, + css::lang::XServiceInfo +> +{ + + css::uno::Reference< css::uno::XComponentContext > mxCtx; + +public: + explicit FilterDetect( css::uno::Reference< css::uno::XComponentContext > xCtx) + : mxCtx(std::move( xCtx )) {} + + //XExtendedFilterDetection + virtual OUString SAL_CALL detect( css::uno::Sequence< css::beans::PropertyValue >& lDescriptor ) override; + + // XInitialization + virtual void SAL_CALL initialize( const css::uno::Sequence< css::uno::Any >& aArguments ) override; + + // XServiceInfo + virtual OUString SAL_CALL getImplementationName( ) override; + + virtual sal_Bool SAL_CALL supportsService( const OUString& ServiceName ) override; + + virtual css::uno::Sequence< OUString > SAL_CALL getSupportedServiceNames( ) override; +}; + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/filter/source/xmlfilterdetect/xmlfd.component b/filter/source/xmlfilterdetect/xmlfd.component new file mode 100644 index 000000000..77a8ea0fd --- /dev/null +++ b/filter/source/xmlfilterdetect/xmlfd.component @@ -0,0 +1,26 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + --> + +<component loader="com.sun.star.loader.SharedLibrary" environment="@CPPU_ENV@" + xmlns="http://openoffice.org/2010/uno-components"> + <implementation name="com.sun.star.comp.filters.XMLFilterDetect" + constructor="filter_XMLFilterDetect_get_implementation"> + <service name="com.sun.star.document.ExtendedTypeDetection"/> + </implementation> +</component> |