diff options
Diffstat (limited to 'filter/source/xmlfilterdetect/filterdetect.cxx')
-rw-r--r-- | filter/source/xmlfilterdetect/filterdetect.cxx | 246 |
1 files changed, 246 insertions, 0 deletions
diff --git a/filter/source/xmlfilterdetect/filterdetect.cxx b/filter/source/xmlfilterdetect/filterdetect.cxx new file mode 100644 index 000000000..e7cab3323 --- /dev/null +++ b/filter/source/xmlfilterdetect/filterdetect.cxx @@ -0,0 +1,246 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include "filterdetect.hxx" +#include <com/sun/star/io/XInputStream.hpp> +#include <com/sun/star/uno/XComponentContext.hpp> +#include <com/sun/star/container/XNameAccess.hpp> +#include <com/sun/star/beans/PropertyState.hpp> +#include <cppuhelper/supportsservice.hxx> +#include <tools/diagnose_ex.h> +#include <ucbhelper/content.hxx> +#include <unotools/ucbstreamhelper.hxx> +#include <svl/inettype.hxx> +#include <memory> +#include <o3tl/string_view.hxx> + +using namespace com::sun::star::container; +using namespace com::sun::star::uno; +using namespace com::sun::star::beans; + +namespace { + +OUString supportedByType( std::u16string_view clipBoardFormat, std::u16string_view resultString, const OUString& checkType) +{ + OUString sTypeName; + if ( o3tl::starts_with(clipBoardFormat, u"doctype:") ) + { + std::u16string_view tryStr = clipBoardFormat.substr(8); + if (resultString.find(tryStr) != std::u16string_view::npos) + { + sTypeName = checkType; + } + } + return sTypeName; +} + +bool IsMediaTypeXML( const OUString& mediaType ) +{ + if (!mediaType.isEmpty()) + { + OUString sType, sSubType; + if (INetContentTypes::parse(mediaType, sType, sSubType) + && sType == "application") + { + // RFC 3023: application/xml; don't detect text/xml + if (sSubType == "xml") + return true; + // Registered media types: application/XXXX+xml + if (sSubType.endsWith("+xml")) + return true; + } + } + return false; +} + +} + +OUString SAL_CALL FilterDetect::detect( css::uno::Sequence< css::beans::PropertyValue >& aArguments ) +{ + OUString sTypeName; + OUString sUrl; + Sequence<PropertyValue > lProps ; + + css::uno::Reference< css::io::XInputStream > xInStream; + const PropertyValue * pValue = aArguments.getConstArray(); + sal_Int32 nLength; + OUString resultString; + + nLength = aArguments.getLength(); + sal_Int32 location=nLength; + for (sal_Int32 i = 0 ; i < nLength; i++) + { + if ( pValue[i].Name == "TypeName" ) + { + location=i; + } + else if ( pValue[i].Name == "URL" ) + { + pValue[i].Value >>= sUrl; + } + else if ( pValue[i].Name == "InputStream" ) + { + pValue[i].Value >>= xInStream ; + } + } + try + { + if (!xInStream.is()) + { + ::ucbhelper::Content aContent( + sUrl, Reference< css::ucb::XCommandEnvironment >(), + mxCtx); + xInStream = aContent.openStream(); + if (!xInStream.is()) + { + return sTypeName; + } + } + + std::unique_ptr< SvStream > pInStream( ::utl::UcbStreamHelper::CreateStream( xInStream ) ); + pInStream->StartReadingUnicodeText( RTL_TEXTENCODING_DONTKNOW ); + sal_uInt64 const nUniPos = pInStream->Tell(); + + const sal_uInt16 nSize = 4000; + bool bTryUtf16 = false; + + if ( nUniPos == 0 ) // No BOM detected, try to guess UTF-16 endianness + { + sal_uInt16 nHeader = 0; + pInStream->ReadUInt16( nHeader ); + if ( nHeader == 0x003C ) + bTryUtf16 = true; + else if ( nHeader == 0x3C00 ) + { + bTryUtf16 = true; + pInStream->SetEndian( pInStream->GetEndian() == SvStreamEndian::LITTLE ? SvStreamEndian::BIG : SvStreamEndian::LITTLE ); + } + pInStream->Seek( STREAM_SEEK_TO_BEGIN ); + } + + if ( nUniPos == 3 || ( nUniPos == 0 && !bTryUtf16 ) ) // UTF-8 or non-Unicode + { + OString const str(read_uInt8s_ToOString(*pInStream, nSize)); + resultString = OUString(str.getStr(), str.getLength(), + RTL_TEXTENCODING_ASCII_US, + RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT|RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT|RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT); + } + else if ( nUniPos == 2 || bTryUtf16 ) // UTF-16 + resultString = read_uInt16s_ToOUString( *pInStream, nSize ); + + if ( !resultString.startsWith( "<?xml" ) ) + { + // Check the content type; XML declaration is optional in XML files according to XML 1.0 ch.2.8 + // (see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-prolog-dtd) + OUString sMediaType; + try + { + ::ucbhelper::Content aContent( + sUrl, Reference< css::ucb::XCommandEnvironment >(), + mxCtx); + aContent.getPropertyValue("MediaType") >>= sMediaType; + if (sMediaType.isEmpty()) + { + aContent.getPropertyValue("Content-Type") >>= sMediaType; + } + } + catch (...) {} + + if (!IsMediaTypeXML(sMediaType)) + { + // This is not an XML stream. It makes no sense to try to detect + // a non-XML file type here. + return OUString(); + } + } + + // test typedetect code + Reference <XNameAccess> xTypeCont(mxCtx->getServiceManager()->createInstanceWithContext("com.sun.star.document.TypeDetection", mxCtx), UNO_QUERY); + Sequence < OUString > myTypes= xTypeCont->getElementNames(); + nLength = myTypes.getLength(); + + sal_Int32 new_nlength=0; + sal_Int32 i = 0 ; + while ((i < nLength) && (sTypeName.isEmpty())) + { + Any elem = xTypeCont->getByName(myTypes[i]); + elem >>=lProps; + new_nlength = lProps.getLength(); + sal_Int32 j =0; + while (j < new_nlength && (sTypeName.isEmpty())) + { + OUString tmpStr; + lProps[j].Value >>=tmpStr; + if ( lProps[j].Name == "ClipboardFormat" && !tmpStr.isEmpty() ) + { + sTypeName = supportedByType(tmpStr,resultString, myTypes[i]); + } + j++; + } + i++; + } + } + catch (const Exception &) + { + TOOLS_WARN_EXCEPTION("filter.xmlfd", "An Exception occurred while opening File stream"); + } + + if (!sTypeName.isEmpty()) + { + if (location == aArguments.getLength()) + { + aArguments.realloc(nLength+1); + aArguments.getArray()[location].Name = "TypeName"; + } + aArguments.getArray()[location].Value <<=sTypeName; + } + + return sTypeName; +} + +// XInitialization +void SAL_CALL FilterDetect::initialize( const Sequence< Any >& /*aArguments*/ ) +{ +} + +// XServiceInfo +OUString SAL_CALL FilterDetect::getImplementationName( ) +{ + return "com.sun.star.comp.filters.XMLFilterDetect"; +} + +sal_Bool SAL_CALL FilterDetect::supportsService( const OUString& rServiceName ) +{ + return cppu::supportsService( this, rServiceName ); +} + +Sequence< OUString > SAL_CALL FilterDetect::getSupportedServiceNames( ) +{ + return { "com.sun.star.document.ExtendedTypeDetection" }; +} + +extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface* +filter_XMLFilterDetect_get_implementation( + css::uno::XComponentContext* context, css::uno::Sequence<css::uno::Any> const&) +{ + return cppu::acquire(new FilterDetect(context)); +} + + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |