summaryrefslogtreecommitdiffstats
path: root/filter/source/config/cache/typedetection.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'filter/source/config/cache/typedetection.cxx')
-rw-r--r--filter/source/config/cache/typedetection.cxx1204
1 files changed, 1204 insertions, 0 deletions
diff --git a/filter/source/config/cache/typedetection.cxx b/filter/source/config/cache/typedetection.cxx
new file mode 100644
index 000000000..ae8c194fa
--- /dev/null
+++ b/filter/source/config/cache/typedetection.cxx
@@ -0,0 +1,1204 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include "typedetection.hxx"
+#include "constant.hxx"
+
+#include <com/sun/star/document/XExtendedFilterDetection.hpp>
+#include <com/sun/star/frame/Desktop.hpp>
+#include <com/sun/star/util/URLTransformer.hpp>
+#include <com/sun/star/util/XURLTransformer.hpp>
+
+#include <com/sun/star/io/XInputStream.hpp>
+#include <com/sun/star/io/XSeekable.hpp>
+#include <com/sun/star/task/XInteractionHandler.hpp>
+#include <o3tl/string_view.hxx>
+#include <tools/wldcrd.hxx>
+#include <sal/log.hxx>
+#include <framework/interaction.hxx>
+#include <tools/diagnose_ex.h>
+#include <tools/urlobj.hxx>
+#include <comphelper/fileurl.hxx>
+#include <comphelper/sequence.hxx>
+#include <utility>
+
+#define DEBUG_TYPE_DETECTION 0
+
+#if DEBUG_TYPE_DETECTION
+#include <iostream>
+using std::cout;
+using std::endl;
+#endif
+
+using namespace com::sun::star;
+
+namespace filter::config{
+
+TypeDetection::TypeDetection(const css::uno::Reference< css::uno::XComponentContext >& rxContext)
+ : m_xContext(rxContext)
+ , m_xTerminateListener(new TerminateDetection(this))
+ , m_bCancel(false)
+{
+ css::frame::Desktop::create(m_xContext)->addTerminateListener(m_xTerminateListener);
+ BaseContainer::init("com.sun.star.comp.filter.config.TypeDetection" ,
+ { "com.sun.star.document.TypeDetection" },
+ FilterCache::E_TYPE );
+}
+
+
+TypeDetection::~TypeDetection()
+{
+ css::frame::Desktop::create(m_xContext)->removeTerminateListener(m_xTerminateListener);
+}
+
+
+OUString SAL_CALL TypeDetection::queryTypeByURL(const OUString& sURL)
+{
+ OUString sType;
+
+ // SAFE ->
+ osl::MutexGuard aLock(m_aMutex);
+
+ css::util::URL aURL;
+ aURL.Complete = sURL;
+ css::uno::Reference< css::util::XURLTransformer > xParser( css::util::URLTransformer::create(m_xContext) );
+ xParser->parseStrict(aURL);
+
+ // set std types as minimum requirement first!
+ // Only in case no type was found for given URL,
+ // use optional types too ...
+ auto & cache = GetTheFilterCache();
+ FlatDetection lFlatTypes;
+ cache.detectFlatForURL(aURL, lFlatTypes);
+
+ if (
+ (lFlatTypes.empty() ) &&
+ (!cache.isFillState(FilterCache::E_CONTAINS_TYPES))
+ )
+ {
+ cache.load(FilterCache::E_CONTAINS_TYPES);
+ cache.detectFlatForURL(aURL, lFlatTypes);
+ }
+
+ // first item is guaranteed as "preferred" one!
+ if (!lFlatTypes.empty())
+ {
+ const FlatDetectionInfo& aMatch = *(lFlatTypes.begin());
+ sType = aMatch.sType;
+ }
+
+ return sType;
+ // <- SAFE
+}
+
+namespace {
+
+/**
+ * Rank format types in order of complexity. More complex formats are
+ * ranked higher so that they get tested sooner over simpler formats.
+ *
+ * Guidelines to determine how complex a format is (subject to change):
+ *
+ * 1) compressed text (XML, HTML, etc)
+ * 2) binary
+ * 3) non-compressed text
+ * 3.1) structured text
+ * 3.1.1) dialect of a structured text (e.g. docbook XML)
+ * 3.1.2) generic structured text (e.g. generic XML)
+ * 3.2) non-structured text
+ *
+ * In each category, rank them from strictly-structured to
+ * loosely-structured.
+ */
+int getFlatTypeRank(std::u16string_view rType)
+{
+ // List formats from more complex to less complex.
+ // TODO: Add more.
+ static const char* ranks[] = {
+
+ // Compressed XML (ODF XML zip formats)
+ "writer8_template",
+ "writer8",
+ "calc8_template",
+ "calc8",
+ "impress8_template",
+ "impress8",
+ "draw8_template",
+ "draw8",
+ "chart8",
+ "math8",
+ "writerglobal8_template",
+ "writerglobal8",
+ "writerweb8_writer_template",
+ "StarBase",
+
+ // Compressed XML (OOXML)
+ "writer_OOXML_Text_Template",
+ "writer_OOXML",
+ "writer_MS_Word_2007_Template",
+ "writer_MS_Word_2007",
+ "Office Open XML Spreadsheet Template",
+ "Office Open XML Spreadsheet",
+ "MS Excel 2007 XML Template",
+ "MS Excel 2007 XML",
+ "MS PowerPoint 2007 XML Template",
+ "MS PowerPoint 2007 XML AutoPlay",
+ "MS PowerPoint 2007 XML",
+
+ // Compressed XML (Uniform/Unified Office Format)
+ "Unified_Office_Format_text",
+ "Unified_Office_Format_spreadsheet",
+ "Unified_Office_Format_presentation",
+
+ // Compressed XML (StarOffice XML zip formats)
+ "calc_StarOffice_XML_Calc",
+ "calc_StarOffice_XML_Calc_Template",
+ "chart_StarOffice_XML_Chart",
+ "draw_StarOffice_XML_Draw",
+ "draw_StarOffice_XML_Draw_Template",
+ "impress_StarOffice_XML_Impress",
+ "impress_StarOffice_XML_Impress_Template",
+ "math_StarOffice_XML_Math",
+ "writer_StarOffice_XML_Writer",
+ "writer_StarOffice_XML_Writer_Template",
+ "writer_globaldocument_StarOffice_XML_Writer_GlobalDocument",
+ "writer_web_StarOffice_XML_Writer_Web_Template",
+
+ // Compressed text
+ "pdf_Portable_Document_Format",
+
+ // Binary
+ "writer_T602_Document",
+ "writer_WordPerfect_Document",
+ "writer_MS_Works_Document",
+ "writer_MS_Word_97_Vorlage",
+ "writer_MS_Word_97",
+ "writer_MS_Word_95_Vorlage",
+ "writer_MS_Word_95",
+ "writer_MS_WinWord_60",
+ "writer_MS_WinWord_5",
+ "MS Excel 2007 Binary",
+ "calc_MS_Excel_97_VorlageTemplate",
+ "calc_MS_Excel_97",
+ "calc_MS_Excel_95_VorlageTemplate",
+ "calc_MS_Excel_95",
+ "calc_MS_Excel_5095_VorlageTemplate",
+ "calc_MS_Excel_5095",
+ "calc_MS_Excel_40_VorlageTemplate",
+ "calc_MS_Excel_40",
+ "calc_Pocket_Excel_File",
+ "impress_MS_PowerPoint_97_Vorlage",
+ "impress_MS_PowerPoint_97_AutoPlay",
+ "impress_MS_PowerPoint_97",
+ "calc_Lotus",
+ "calc_QPro",
+ "calc_SYLK",
+ "calc_DIF",
+ "calc_dBase",
+
+ // Binary (raster and vector image files)
+ "emf_MS_Windows_Metafile",
+ "wmf_MS_Windows_Metafile",
+ "met_OS2_Metafile",
+ "svm_StarView_Metafile",
+ "sgv_StarDraw_20",
+ "tif_Tag_Image_File",
+ "tga_Truevision_TARGA",
+ "sgf_StarOffice_Writer_SGF",
+ "ras_Sun_Rasterfile",
+ "psd_Adobe_Photoshop",
+ "png_Portable_Network_Graphic",
+ "jpg_JPEG",
+ "mov_MOV",
+ "gif_Graphics_Interchange",
+ "bmp_MS_Windows",
+ "pcx_Zsoft_Paintbrush",
+ "pct_Mac_Pict",
+ "pcd_Photo_CD_Base",
+ "pcd_Photo_CD_Base4",
+ "pcd_Photo_CD_Base16",
+ "webp_WebP",
+ "impress_CGM_Computer_Graphics_Metafile", // There is binary and ascii variants ?
+ "draw_WordPerfect_Graphics",
+ "draw_Visio_Document",
+ "draw_Publisher_Document",
+ "draw_Corel_Presentation_Exchange",
+ "draw_CorelDraw_Document",
+ "writer_LotusWordPro_Document",
+ "writer_MIZI_Hwp_97", // Hanword (Hancom Office)
+
+ // Non-compressed XML
+ "writer_ODT_FlatXML",
+ "calc_ODS_FlatXML",
+ "impress_ODP_FlatXML",
+ "draw_ODG_FlatXML",
+ "calc_ADO_rowset_XML",
+ "calc_MS_Excel_2003_XML",
+ "writer_MS_Word_2003_XML",
+ "writer_DocBook_File",
+ "XHTML_File",
+ "svg_Scalable_Vector_Graphics",
+ "math_MathML_XML_Math",
+
+ // Non-compressed text
+ "dxf_AutoCAD_Interchange",
+ "eps_Encapsulated_PostScript",
+ "pbm_Portable_Bitmap", // There is 'raw' and 'ascii' variants.
+ "ppm_Portable_Pixelmap", // There is 'raw' and 'ascii' variants.
+ "pgm_Portable_Graymap", // There is 'raw' and 'ascii' variants.
+ "xpm_XPM",
+ "xbm_X_Consortium",
+ "writer_Rich_Text_Format",
+ "writer_web_HTML_help",
+ "generic_HTML",
+
+ "generic_Text", // Plain text (catch all)
+
+ // Anything ranked lower than generic_Text will never be used during
+ // type detection (since generic_Text catches all).
+
+ // Export only
+ "writer_layout_dump_xml",
+ "writer_indexing_export",
+ "graphic_HTML",
+
+ // Internal use only
+ "StarBaseReportChart",
+ "StarBaseReport",
+ "math_MathType_3x", // MathType equation embedded in Word doc.
+ };
+
+ size_t n = SAL_N_ELEMENTS(ranks);
+
+ for (size_t i = 0; i < n; ++i)
+ {
+ if (o3tl::equalsAscii(rType, ranks[i]))
+ return n - i - 1;
+ }
+
+ // Not ranked. Treat them equally. Unranked formats have higher priority
+ // than the ranked internal ones since they may be defined externally.
+ return n;
+}
+
+/**
+ * Types with matching pattern first, then extension, then custom ranks by
+ * types, then types that are supported by the document service come next.
+ * Lastly, sort them alphabetically.
+ */
+struct SortByPriority
+{
+ bool operator() (const FlatDetectionInfo& r1, const FlatDetectionInfo& r2) const
+ {
+ if (r1.bMatchByPattern != r2.bMatchByPattern)
+ return r1.bMatchByPattern;
+
+ if (r1.bMatchByExtension != r2.bMatchByExtension)
+ return r1.bMatchByExtension;
+
+ int rank1 = getFlatTypeRank(r1.sType);
+ int rank2 = getFlatTypeRank(r2.sType);
+
+ if (rank1 != rank2)
+ return rank1 > rank2;
+
+ if (r1.bPreselectedByDocumentService != r2.bPreselectedByDocumentService)
+ return r1.bPreselectedByDocumentService;
+
+ // All things being equal, sort them alphabetically.
+ return r1.sType > r2.sType;
+ }
+};
+
+struct SortByType
+{
+ bool operator() (const FlatDetectionInfo& r1, const FlatDetectionInfo& r2) const
+ {
+ return r1.sType > r2.sType;
+ }
+};
+
+struct EqualByType
+{
+ bool operator() (const FlatDetectionInfo& r1, const FlatDetectionInfo& r2) const
+ {
+ return r1.sType == r2.sType;
+ }
+};
+
+class FindByType
+{
+ OUString maType;
+public:
+ explicit FindByType(OUString aType) : maType(std::move(aType)) {}
+ bool operator() (const FlatDetectionInfo& rInfo) const
+ {
+ return rInfo.sType == maType;
+ }
+};
+
+#if DEBUG_TYPE_DETECTION
+void printFlatDetectionList(const char* caption, const FlatDetection& types)
+{
+ cout << "-- " << caption << " (size=" << types.size() << ")" << endl;
+ for (auto const& item : types)
+ {
+ cout << " type='" << item.sType << "'; match by extension (" << item.bMatchByExtension
+ << "); match by pattern (" << item.bMatchByPattern << "); pre-selected by doc service ("
+ << item.bPreselectedByDocumentService << ")" << endl;
+ }
+ cout << "--" << endl;
+}
+#endif
+
+}
+
+OUString SAL_CALL TypeDetection::queryTypeByDescriptor(css::uno::Sequence< css::beans::PropertyValue >& lDescriptor,
+ sal_Bool bAllowDeep )
+{
+ // make the descriptor more usable :-)
+ utl::MediaDescriptor stlDescriptor(lDescriptor);
+ OUString sType, sURL;
+
+ try
+ {
+ // SAFE -> ----------------------------------
+ osl::ClearableMutexGuard aLock(m_aMutex);
+
+ // parse given URL to split it into e.g. main and jump marks ...
+ sURL = stlDescriptor.getUnpackedValueOrDefault(utl::MediaDescriptor::PROP_URL, OUString());
+
+#if OSL_DEBUG_LEVEL > 0
+ if (stlDescriptor.find( "FileName" ) != stlDescriptor.end())
+ OSL_FAIL("Detect using of deprecated and already unsupported MediaDescriptor property \"FileName\"!");
+#endif
+
+ css::util::URL aURL;
+ aURL.Complete = sURL;
+ css::uno::Reference< css::util::XURLTransformer > xParser(css::util::URLTransformer::create(m_xContext));
+ xParser->parseStrict(aURL);
+
+ OUString aSelectedFilter = stlDescriptor.getUnpackedValueOrDefault(
+ utl::MediaDescriptor::PROP_FILTERNAME, OUString());
+ if (!aSelectedFilter.isEmpty())
+ {
+ // Caller specified the filter type. Honor it. Just get the default
+ // type for that filter, and bail out.
+ if (impl_validateAndSetFilterOnDescriptor(stlDescriptor, aSelectedFilter))
+ return stlDescriptor[utl::MediaDescriptor::PROP_TYPENAME].get<OUString>();
+ }
+
+ FlatDetection lFlatTypes;
+ impl_getAllFormatTypes(aURL, stlDescriptor, lFlatTypes);
+
+ aLock.clear();
+ // <- SAFE ----------------------------------
+
+ // Properly prioritize all candidate types.
+ std::stable_sort(lFlatTypes.begin(), lFlatTypes.end(), SortByPriority());
+ auto last = std::unique(lFlatTypes.begin(), lFlatTypes.end(), EqualByType());
+ lFlatTypes.erase(last, lFlatTypes.end());
+
+ OUString sLastChance;
+
+ // verify every flat detected (or preselected!) type
+ // by calling its registered deep detection service.
+ // But break this loop if a type match to the given descriptor
+ // by a URL pattern(!) or if deep detection isn't allowed from
+ // outside (bAllowDeep=sal_False) or break the whole detection by
+ // throwing an exception if creation of the might needed input
+ // stream failed by e.g. an IO exception ...
+ if (!lFlatTypes.empty())
+ sType = impl_detectTypeFlatAndDeep(stlDescriptor, lFlatTypes, bAllowDeep, sLastChance);
+
+ // flat detection failed
+ // pure deep detection failed
+ // => ask might existing InteractionHandler
+ // means: ask user for its decision
+ if (sType.isEmpty() && !m_bCancel)
+ sType = impl_askUserForTypeAndFilterIfAllowed(stlDescriptor);
+
+
+ // no real detected type - but a might valid one.
+ // update descriptor and set last chance for return.
+ if (sType.isEmpty() && !sLastChance.isEmpty() && !m_bCancel)
+ {
+ OSL_FAIL("set first flat detected type without a registered deep detection service as \"last chance\" ... nevertheless some other deep detections said \"NO\". I TRY IT!");
+ sType = sLastChance;
+ }
+ }
+ catch(const css::uno::RuntimeException&)
+ {
+ throw;
+ }
+ catch(const css::uno::Exception&)
+ {
+ TOOLS_WARN_EXCEPTION("filter.config", "caught exception while querying type of " << sURL);
+ sType.clear();
+ }
+
+ // adapt media descriptor, so it contains the right values
+ // for type/filter name/document service/ etcpp.
+ impl_checkResultsAndAddBestFilter(stlDescriptor, sType); // Attention: sType is used as IN/OUT param here and will might be changed inside this method !!!
+ impl_validateAndSetTypeOnDescriptor(stlDescriptor, sType);
+
+ stlDescriptor >> lDescriptor;
+ return sType;
+}
+
+
+void TypeDetection::impl_checkResultsAndAddBestFilter(utl::MediaDescriptor& rDescriptor,
+ OUString& sType )
+{
+ // a)
+ // Don't overwrite a might preselected filter!
+ OUString sFilter = rDescriptor.getUnpackedValueOrDefault(
+ utl::MediaDescriptor::PROP_FILTERNAME,
+ OUString());
+ if (!sFilter.isEmpty())
+ return;
+
+ auto & cache = GetTheFilterCache();
+
+ // b)
+ // check a preselected document service too.
+ // Then we have to search a suitable filter within this module.
+ OUString sDocumentService = rDescriptor.getUnpackedValueOrDefault(
+ utl::MediaDescriptor::PROP_DOCUMENTSERVICE,
+ OUString());
+ if (!sDocumentService.isEmpty())
+ {
+ try
+ {
+ OUString sRealType = sType;
+
+ // SAFE ->
+ ::osl::ResettableMutexGuard aLock(m_aMutex);
+
+ // Attention: For executing next lines of code, We must be sure that
+ // all filters already loaded :-(
+ // That can disturb our "load on demand feature". But we have no other chance!
+ cache.load(FilterCache::E_CONTAINS_FILTERS);
+
+ css::beans::NamedValue lIProps[] {
+ { PROPNAME_DOCUMENTSERVICE, uno::Any(sDocumentService) },
+ { PROPNAME_TYPE, uno::Any(sRealType) } };
+ std::vector<OUString> lFilters = cache.getMatchingItemsByProps(FilterCache::E_FILTER, lIProps);
+
+ aLock.clear();
+ // <- SAFE
+
+ for (auto const& filter : lFilters)
+ {
+ // SAFE ->
+ aLock.reset();
+ try
+ {
+ CacheItem aFilter = cache.getItem(FilterCache::E_FILTER, filter);
+ sal_Int32 nFlags = 0;
+ aFilter[PROPNAME_FLAGS] >>= nFlags;
+
+ if (static_cast<SfxFilterFlags>(nFlags) & SfxFilterFlags::IMPORT)
+ sFilter = filter;
+ if (static_cast<SfxFilterFlags>(nFlags) & SfxFilterFlags::PREFERED)
+ break;
+ }
+ catch(const css::uno::Exception&) {}
+ aLock.clear();
+ // <- SAFE
+ }
+
+ if (!sFilter.isEmpty())
+ {
+ rDescriptor[utl::MediaDescriptor::PROP_TYPENAME ] <<= sRealType;
+ rDescriptor[utl::MediaDescriptor::PROP_FILTERNAME] <<= sFilter;
+ sType = sRealType;
+ return;
+ }
+ }
+ catch(const css::uno::Exception&)
+ {}
+ }
+
+ // c)
+ // We can use the preferred filter for the specified type.
+ // Such preferred filter points:
+ // - to the default filter of the preferred application
+ // - or to any other filter if no preferred filter was set.
+ // Note: It's an optimization only!
+ // It's not guaranteed, that such preferred filter exists.
+ sFilter.clear();
+ try
+ {
+ // SAFE ->
+ osl::ClearableMutexGuard aLock(m_aMutex);
+
+ CacheItem aType = cache.getItem(FilterCache::E_TYPE, sType);
+ aType[PROPNAME_PREFERREDFILTER] >>= sFilter;
+ cache.getItem(FilterCache::E_FILTER, sFilter);
+
+ aLock.clear();
+ // <- SAFE
+
+ // no exception => found valid type and filter => set it on the given descriptor
+ rDescriptor[utl::MediaDescriptor::PROP_TYPENAME ] <<= sType ;
+ rDescriptor[utl::MediaDescriptor::PROP_FILTERNAME] <<= sFilter;
+ return;
+ }
+ catch(const css::uno::Exception&)
+ {}
+
+ // d)
+ // Search for any import(!) filter, which is registered for this type.
+ sFilter.clear();
+ try
+ {
+ // SAFE ->
+ ::osl::ResettableMutexGuard aLock(m_aMutex);
+
+ // Attention: For executing next lines of code, We must be sure that
+ // all filters already loaded :-(
+ // That can disturb our "load on demand feature". But we have no other chance!
+ cache.load(FilterCache::E_CONTAINS_FILTERS);
+
+ css::beans::NamedValue lIProps[] {
+ { PROPNAME_TYPE, uno::Any(sType) } };
+ std::vector<OUString> lFilters = cache.getMatchingItemsByProps(FilterCache::E_FILTER, lIProps);
+
+ aLock.clear();
+ // <- SAFE
+
+ for (auto const& filter : lFilters)
+ {
+ sFilter = filter;
+
+ // SAFE ->
+ aLock.reset();
+ try
+ {
+ CacheItem aFilter = cache.getItem(FilterCache::E_FILTER, sFilter);
+ sal_Int32 nFlags = 0;
+ aFilter[PROPNAME_FLAGS] >>= nFlags;
+
+ if (static_cast<SfxFilterFlags>(nFlags) & SfxFilterFlags::IMPORT)
+ break;
+ }
+ catch(const css::uno::Exception&)
+ { continue; }
+ aLock.clear();
+ // <- SAFE
+
+ sFilter.clear();
+ }
+
+ if (!sFilter.isEmpty())
+ {
+ rDescriptor[utl::MediaDescriptor::PROP_TYPENAME ] <<= sType ;
+ rDescriptor[utl::MediaDescriptor::PROP_FILTERNAME] <<= sFilter;
+ return;
+ }
+ }
+ catch(const css::uno::Exception&)
+ {}
+}
+
+
+bool TypeDetection::impl_getPreselectionForType(
+ const OUString& sPreSelType, const util::URL& aParsedURL, FlatDetection& rFlatTypes, bool bDocService)
+{
+ // Can be used to suppress execution of some parts of this method
+ // if it's already clear that detected type is valid or not.
+ // It's necessary to use shared code at the end, which update
+ // all return parameters consistency!
+ bool bBreakDetection = false;
+
+ // Further we must know if it matches by pattern
+ // Every flat detected type by pattern won't be detected deep!
+ bool bMatchByPattern = false;
+
+ // And we must know if a preselection must be preferred, because
+ // it matches by its extension too.
+ bool bMatchByExtension = false;
+
+ // validate type
+ OUString sType(sPreSelType);
+ CacheItem aType;
+ try
+ {
+ // SAFE -> --------------------------
+ osl::MutexGuard aLock(m_aMutex);
+ aType = GetTheFilterCache().getItem(FilterCache::E_TYPE, sType);
+ // <- SAFE --------------------------
+ }
+ catch(const css::container::NoSuchElementException&)
+ {
+ sType.clear();
+ bBreakDetection = true;
+ }
+
+ if (!bBreakDetection)
+ {
+ // We can't check a preselected type for a given stream!
+ // So we must believe, that it can work ...
+ if ( aParsedURL.Complete == "private:stream" )
+ bBreakDetection = true;
+ }
+
+ if (!bBreakDetection)
+ {
+ // extract extension from URL .. to check it case-insensitive !
+ INetURLObject aParser (aParsedURL.Main);
+ OUString sExtension = aParser.getExtension(INetURLObject::LAST_SEGMENT ,
+ true ,
+ INetURLObject::DecodeMechanism::WithCharset);
+ sExtension = sExtension.toAsciiLowerCase();
+
+ // otherwise we must know, if it matches to the given URL really.
+ // especially if it matches by its extension or pattern registration.
+ const css::uno::Sequence<OUString> lExtensions = aType[PROPNAME_EXTENSIONS].get<css::uno::Sequence<OUString> >();
+ const css::uno::Sequence<OUString> lURLPattern = aType[PROPNAME_URLPATTERN].get<css::uno::Sequence<OUString> >();
+
+ for (auto const& extension : lExtensions)
+ {
+ OUString sCheckExtension(extension.toAsciiLowerCase());
+ if (sCheckExtension == sExtension)
+ {
+ bBreakDetection = true;
+ bMatchByExtension = true;
+ break;
+ }
+ }
+
+ if (!bBreakDetection)
+ {
+ for (auto const& elem : lURLPattern)
+ {
+ WildCard aCheck(elem);
+ if (aCheck.Matches(aParsedURL.Main))
+ {
+ bMatchByPattern = true;
+ break;
+ }
+ }
+ }
+ }
+
+ // if it's a valid type - set it on all return values!
+ if (!sType.isEmpty())
+ {
+ FlatDetection::iterator it = std::find_if(rFlatTypes.begin(), rFlatTypes.end(), FindByType(sType));
+ if (it != rFlatTypes.end())
+ {
+ if (bMatchByExtension)
+ it->bMatchByExtension = true;
+ if (bMatchByPattern)
+ it->bMatchByPattern = true;
+ if (bDocService)
+ it->bPreselectedByDocumentService = true;
+ }
+
+ return true;
+ }
+
+ // not valid!
+ return false;
+}
+
+void TypeDetection::impl_getPreselectionForDocumentService(
+ const OUString& sPreSelDocumentService, const util::URL& aParsedURL, FlatDetection& rFlatTypes)
+{
+ // get all filters, which match to this doc service
+ std::vector<OUString> lFilters;
+ try
+ {
+ // SAFE -> --------------------------
+ osl::MutexGuard aLock(m_aMutex);
+
+ // Attention: For executing next lines of code, We must be sure that
+ // all filters already loaded :-(
+ // That can disturb our "load on demand feature". But we have no other chance!
+ auto & cache = GetTheFilterCache();
+ cache.load(FilterCache::E_CONTAINS_FILTERS);
+
+ css::beans::NamedValue lIProps[] {
+ { PROPNAME_DOCUMENTSERVICE, css::uno::Any(sPreSelDocumentService) } };
+ lFilters = cache.getMatchingItemsByProps(FilterCache::E_FILTER, lIProps);
+ // <- SAFE --------------------------
+ }
+ catch (const css::container::NoSuchElementException&)
+ {
+ lFilters.clear();
+ }
+
+ // step over all filters, and check if its registered type
+ // match the given URL.
+ // But use temp. list of "preselected types" instead of incoming rFlatTypes list!
+ // The reason behind: we must filter the obtained results. And copying stl entries
+ // is an easier job than removing them .-)
+ for (auto const& filter : lFilters)
+ {
+ OUString aType = impl_getTypeFromFilter(filter);
+ if (aType.isEmpty())
+ continue;
+
+ impl_getPreselectionForType(aType, aParsedURL, rFlatTypes, true);
+ }
+}
+
+OUString TypeDetection::impl_getTypeFromFilter(const OUString& rFilterName)
+{
+ CacheItem aFilter;
+ try
+ {
+ osl::MutexGuard aLock(m_aMutex);
+ aFilter = GetTheFilterCache().getItem(FilterCache::E_FILTER, rFilterName);
+ }
+ catch (const container::NoSuchElementException&)
+ {
+ return OUString();
+ }
+
+ OUString aType;
+ aFilter[PROPNAME_TYPE] >>= aType;
+ return aType;
+}
+
+void TypeDetection::impl_getAllFormatTypes(
+ const util::URL& aParsedURL, utl::MediaDescriptor const & rDescriptor, FlatDetection& rFlatTypes)
+{
+ rFlatTypes.clear();
+
+ // Get all filters that we have.
+ std::vector<OUString> aFilterNames;
+ try
+ {
+ osl::MutexGuard aLock(m_aMutex);
+ auto & cache = GetTheFilterCache();
+ cache.load(FilterCache::E_CONTAINS_FILTERS);
+ aFilterNames = cache.getItemNames(FilterCache::E_FILTER);
+ }
+ catch (const container::NoSuchElementException&)
+ {
+ return;
+ }
+
+ // Retrieve the default type for each of these filters, and store them.
+ for (auto const& filterName : aFilterNames)
+ {
+ OUString aType = impl_getTypeFromFilter(filterName);
+
+ if (aType.isEmpty())
+ continue;
+
+ FlatDetectionInfo aInfo; // all flags set to false by default.
+ aInfo.sType = aType;
+ rFlatTypes.push_back(aInfo);
+ }
+
+ {
+ // Get all types that match the URL alone.
+ FlatDetection aFlatByURL;
+ GetTheFilterCache().detectFlatForURL(aParsedURL, aFlatByURL);
+ for (auto const& elem : aFlatByURL)
+ {
+ FlatDetection::iterator itPos = std::find_if(rFlatTypes.begin(), rFlatTypes.end(), FindByType(elem.sType));
+ if (itPos == rFlatTypes.end())
+ // Not in the list yet.
+ rFlatTypes.push_back(elem);
+ else
+ {
+ // Already in the list. Update the flags.
+ FlatDetectionInfo& rInfo = *itPos;
+ const FlatDetectionInfo& rThisInfo = elem;
+ if (rThisInfo.bMatchByExtension)
+ rInfo.bMatchByExtension = true;
+ if (rThisInfo.bMatchByPattern)
+ rInfo.bMatchByPattern = true;
+ if (rThisInfo.bPreselectedByDocumentService)
+ rInfo.bPreselectedByDocumentService = true;
+ }
+ }
+ }
+
+ // Remove duplicates.
+ std::stable_sort(rFlatTypes.begin(), rFlatTypes.end(), SortByType());
+ auto last = std::unique(rFlatTypes.begin(), rFlatTypes.end(), EqualByType());
+ rFlatTypes.erase(last, rFlatTypes.end());
+
+ // Mark pre-selected type (if any) to have it prioritized.
+ OUString sSelectedType = rDescriptor.getUnpackedValueOrDefault(utl::MediaDescriptor::PROP_TYPENAME, OUString());
+ if (!sSelectedType.isEmpty())
+ impl_getPreselectionForType(sSelectedType, aParsedURL, rFlatTypes, false);
+
+ // Mark all types preferred by the current document service, to have it prioritized.
+ OUString sSelectedDoc = rDescriptor.getUnpackedValueOrDefault(utl::MediaDescriptor::PROP_DOCUMENTSERVICE, OUString());
+ if (!sSelectedDoc.isEmpty())
+ impl_getPreselectionForDocumentService(sSelectedDoc, aParsedURL, rFlatTypes);
+}
+
+
+OUString TypeDetection::impl_detectTypeFlatAndDeep( utl::MediaDescriptor& rDescriptor ,
+ const FlatDetection& lFlatTypes ,
+ bool bAllowDeep ,
+ OUString& rLastChance )
+{
+ // reset it everytimes, so the outside code can distinguish between
+ // a set and a not set value.
+ rLastChance.clear();
+
+ // step over all possible types for this URL.
+ // solutions:
+ // a) no types => no detection
+ // b) deep detection not allowed => return first valid type of list (because it's the preferred or the first valid one)
+ // or(!) match by URLPattern => in such case a deep detection will be suppressed!
+ // c) type has no detect service => safe the first occurred type without a detect service
+ // as "last chance"(!). It will be used outside of this method
+ // if no further type could be detected.
+ // It must be the first one, because it can be a preferred type.
+ // Our types list was sorted by such criteria!
+ // d) detect service return a valid result => return its decision
+ // e) detect service return an invalid result
+ // or any needed information could not be
+ // obtained from the cache => ignore it, and continue with search
+
+ for (auto const& flatTypeInfo : lFlatTypes)
+ {
+ if (m_bCancel)
+ break;
+ OUString sFlatType = flatTypeInfo.sType;
+
+ if (!impl_validateAndSetTypeOnDescriptor(rDescriptor, sFlatType))
+ continue;
+
+ // b)
+ if (
+ (!bAllowDeep ) ||
+ (flatTypeInfo.bMatchByPattern)
+ )
+ {
+ return sFlatType;
+ }
+
+ try
+ {
+ // SAFE -> ----------------------------------
+ osl::ClearableMutexGuard aLock(m_aMutex);
+ CacheItem aType = GetTheFilterCache().getItem(FilterCache::E_TYPE, sFlatType);
+ aLock.clear();
+
+ OUString sDetectService;
+ aType[PROPNAME_DETECTSERVICE] >>= sDetectService;
+
+ // c)
+ if (sDetectService.isEmpty())
+ {
+ // flat detected types without any registered deep detection service and not
+ // preselected by the user can be used as LAST CHANCE in case no other type could
+ // be detected. Of course only the first type without deep detector can be used.
+ // Further ones has to be ignored.
+ if (rLastChance.isEmpty())
+ rLastChance = sFlatType;
+
+ continue;
+ }
+
+ OUString sDeepType = impl_askDetectService(sDetectService, rDescriptor);
+
+ // d)
+ if (!sDeepType.isEmpty())
+ return sDeepType;
+ }
+ catch(const css::container::NoSuchElementException&)
+ {}
+ // e)
+ }
+
+ return OUString();
+ // <- SAFE ----------------------------------
+}
+
+void TypeDetection::impl_seekStreamToZero(utl::MediaDescriptor const & rDescriptor)
+{
+ // try to seek to 0 ...
+ // But because XSeekable is an optional interface ... try it only .-)
+ css::uno::Reference< css::io::XInputStream > xStream = rDescriptor.getUnpackedValueOrDefault(
+ utl::MediaDescriptor::PROP_INPUTSTREAM,
+ css::uno::Reference< css::io::XInputStream >());
+ css::uno::Reference< css::io::XSeekable > xSeek(xStream, css::uno::UNO_QUERY);
+ if (!xSeek.is())
+ return;
+
+ try
+ {
+ xSeek->seek(0);
+ }
+ catch(const css::uno::RuntimeException&)
+ {
+ throw;
+ }
+ catch(const css::uno::Exception&)
+ {
+ }
+}
+
+OUString TypeDetection::impl_askDetectService(const OUString& sDetectService,
+ utl::MediaDescriptor& rDescriptor )
+{
+ // Open the stream and add it to the media descriptor if this method is called for the first time.
+ // All following requests to this method will detect, that there already exists a stream .-)
+ // Attention: This method throws an exception if the stream could not be opened.
+ // It's important to break any further detection in such case.
+ // Catch it on the highest detection level only !!!
+ impl_openStream(rDescriptor);
+
+ // seek to 0 is an optional feature to be more robust against
+ // "simple implemented detect services" .-)
+ impl_seekStreamToZero(rDescriptor);
+
+ css::uno::Reference< css::document::XExtendedFilterDetection > xDetector;
+ css::uno::Reference< css::uno::XComponentContext > xContext;
+
+ // SAFE ->
+ {
+ osl::MutexGuard aLock(m_aMutex);
+ xContext = m_xContext;
+ }
+ // <- SAFE
+
+ try
+ {
+ // Attention! If e.g. an office module was not installed sometimes we
+ // find a registered detect service, which is referred inside the
+ // configuration ... but not really installed. On the other side we use
+ // third party components here, which can make trouble anyway. So we
+ // should handle errors during creation of such services more
+ // gracefully .-)
+ xDetector.set(
+ xContext->getServiceManager()->createInstanceWithContext(sDetectService, xContext),
+ css::uno::UNO_QUERY_THROW);
+ }
+ catch (...)
+ {
+ }
+
+ if ( ! xDetector.is())
+ return OUString();
+
+ OUString sDeepType;
+ try
+ {
+ // start deep detection
+ // Don't forget to convert stl descriptor to its uno representation.
+
+ /* Attention!
+ You have to use an explicit instance of this uno sequence...
+ Because it's used as an in out parameter. And in case of a temp. used object
+ we will run into memory corruptions!
+ */
+ css::uno::Sequence< css::beans::PropertyValue > lDescriptor;
+ rDescriptor >> lDescriptor;
+ sDeepType = xDetector->detect(lDescriptor);
+ rDescriptor << lDescriptor;
+ }
+ catch (...)
+ {
+ // We should ignore errors here.
+ // Thrown exceptions mostly will end in crash recovery...
+ // But might be we find another deep detection service which can detect the same
+ // document without a problem .-)
+ sDeepType.clear();
+ }
+
+ // seek to 0 is an optional feature to be more robust against
+ // "simple implemented detect services" .-)
+ impl_seekStreamToZero(rDescriptor);
+
+ // analyze the results
+ // a) detect service returns "" => return "" too and remove TYPE/FILTER prop from descriptor
+ // b) returned type is unknown => return "" too and remove TYPE/FILTER prop from descriptor
+ // c) returned type is valid => check TYPE/FILTER props inside descriptor and return the type
+
+ // this special helper checks for a valid type
+ // and set right values on the descriptor!
+ bool bValidType = impl_validateAndSetTypeOnDescriptor(rDescriptor, sDeepType);
+ if (bValidType)
+ return sDeepType;
+
+ return OUString();
+}
+
+
+OUString TypeDetection::impl_askUserForTypeAndFilterIfAllowed(utl::MediaDescriptor& rDescriptor)
+{
+ css::uno::Reference< css::task::XInteractionHandler > xInteraction =
+ rDescriptor.getUnpackedValueOrDefault(utl::MediaDescriptor::PROP_INTERACTIONHANDLER,
+ css::uno::Reference< css::task::XInteractionHandler >());
+
+ if (!xInteraction.is())
+ return OUString();
+
+ OUString sURL =
+ rDescriptor.getUnpackedValueOrDefault(utl::MediaDescriptor::PROP_URL,
+ OUString());
+
+ css::uno::Reference< css::io::XInputStream > xStream =
+ rDescriptor.getUnpackedValueOrDefault(utl::MediaDescriptor::PROP_INPUTSTREAM,
+ css::uno::Reference< css::io::XInputStream >());
+
+ // Don't disturb the user for "non existing files - means empty URLs" or
+ // if we were forced to detect a stream.
+ // Reason behind: we must be sure to ask user for "unknown contents" only...
+ // and not for "missing files". Especially if detection is done by a stream only
+ // we can't check if the stream points to an "existing content"!
+ if (
+ (sURL.isEmpty() ) || // "non existing file" ?
+ (!xStream.is() ) || // non existing file !
+ (sURL.equalsIgnoreAsciiCase("private:stream")) // not a good idea .-)
+ )
+ return OUString();
+
+ try
+ {
+ // create a new request to ask user for its decision about the usable filter
+ ::framework::RequestFilterSelect aRequest(sURL);
+ xInteraction->handle(aRequest.GetRequest());
+
+ // "Cancel" pressed? => return with error
+ if (aRequest.isAbort())
+ return OUString();
+
+ // "OK" pressed => verify the selected filter, get its corresponding
+ // type and return it. (BTW: We must update the media descriptor here ...)
+ // The user selected explicitly a filter ... but normally we are interested on
+ // a type here only. But we must be sure, that the selected filter is used
+ // too and no ambiguous filter registration disturb us .-)
+
+ OUString sFilter = aRequest.getFilter();
+ if (!impl_validateAndSetFilterOnDescriptor(rDescriptor, sFilter))
+ return OUString();
+
+ OUString sType;
+ rDescriptor[utl::MediaDescriptor::PROP_TYPENAME] >>= sType;
+ return sType;
+ }
+ catch(const css::uno::Exception&)
+ {}
+
+ return OUString();
+}
+
+
+void TypeDetection::impl_openStream(utl::MediaDescriptor& rDescriptor)
+{
+ bool bSuccess = false;
+ OUString sURL = rDescriptor.getUnpackedValueOrDefault( utl::MediaDescriptor::PROP_URL, OUString() );
+ bool bRequestedReadOnly = rDescriptor.getUnpackedValueOrDefault( utl::MediaDescriptor::PROP_READONLY, false );
+ if ( comphelper::isFileUrl( sURL ) )
+ {
+ // OOo uses own file locking mechanics in case of local file
+ bSuccess = rDescriptor.addInputStreamOwnLock();
+ }
+ else
+ bSuccess = rDescriptor.addInputStream();
+
+ if ( !bSuccess )
+ throw css::uno::Exception(
+ "Could not open stream for <" + sURL + ">",
+ static_cast<OWeakObject *>(this));
+
+ if ( !bRequestedReadOnly )
+ {
+ // The MediaDescriptor implementation adds ReadOnly argument if the file can not be opened for writing
+ // this argument should be either removed or an additional argument should be added so that application
+ // can separate the case when the user explicitly requests readonly document.
+ // The current solution is to remove it here.
+ rDescriptor.erase( utl::MediaDescriptor::PROP_READONLY );
+ }
+}
+
+
+void TypeDetection::impl_removeTypeFilterFromDescriptor(utl::MediaDescriptor& rDescriptor)
+{
+ utl::MediaDescriptor::iterator pItType = rDescriptor.find(utl::MediaDescriptor::PROP_TYPENAME );
+ utl::MediaDescriptor::iterator pItFilter = rDescriptor.find(utl::MediaDescriptor::PROP_FILTERNAME);
+ if (pItType != rDescriptor.end())
+ rDescriptor.erase(pItType);
+ if (pItFilter != rDescriptor.end())
+ rDescriptor.erase(pItFilter);
+}
+
+
+bool TypeDetection::impl_validateAndSetTypeOnDescriptor( utl::MediaDescriptor& rDescriptor,
+ const OUString& sType )
+{
+ // SAFE ->
+ {
+ osl::MutexGuard aLock(m_aMutex);
+ if (GetTheFilterCache().hasItem(FilterCache::E_TYPE, sType))
+ {
+ rDescriptor[utl::MediaDescriptor::PROP_TYPENAME] <<= sType;
+ return true;
+ }
+ }
+ // <- SAFE
+
+ // remove all related information from the descriptor
+ impl_removeTypeFilterFromDescriptor(rDescriptor);
+ return false;
+}
+
+
+bool TypeDetection::impl_validateAndSetFilterOnDescriptor( utl::MediaDescriptor& rDescriptor,
+ const OUString& sFilter )
+{
+ try
+ {
+ // SAFE ->
+ osl::ClearableMutexGuard aLock(m_aMutex);
+
+ auto & cache = GetTheFilterCache();
+ CacheItem aFilter = cache.getItem(FilterCache::E_FILTER, sFilter);
+ OUString sType;
+ aFilter[PROPNAME_TYPE] >>= sType;
+
+ aLock.clear();
+ // <- SAFE
+
+ // found valid type and filter => set it on the given descriptor
+ rDescriptor[utl::MediaDescriptor::PROP_TYPENAME ] <<= sType ;
+ rDescriptor[utl::MediaDescriptor::PROP_FILTERNAME] <<= sFilter;
+ return true;
+ }
+ catch(const css::container::NoSuchElementException&){}
+
+ // remove all related information from the descriptor
+ impl_removeTypeFilterFromDescriptor(rDescriptor);
+ return false;
+}
+
+} // namespace filter
+
+extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface*
+filter_TypeDetection_get_implementation(
+ css::uno::XComponentContext* context, css::uno::Sequence<css::uno::Any> const&)
+{
+ return cppu::acquire(new filter::config::TypeDetection(context));
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */