summaryrefslogtreecommitdiffstats
path: root/sc/source/ui/unoobj/scdetect.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'sc/source/ui/unoobj/scdetect.cxx')
-rw-r--r--sc/source/ui/unoobj/scdetect.cxx353
1 files changed, 353 insertions, 0 deletions
diff --git a/sc/source/ui/unoobj/scdetect.cxx b/sc/source/ui/unoobj/scdetect.cxx
new file mode 100644
index 0000000000..e5fc5848e1
--- /dev/null
+++ b/sc/source/ui/unoobj/scdetect.cxx
@@ -0,0 +1,353 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include "scdetect.hxx"
+
+#include <sal/macros.h>
+
+#include <com/sun/star/beans/PropertyValue.hpp>
+#include <com/sun/star/uno/XComponentContext.hpp>
+#include <cppuhelper/supportsservice.hxx>
+#include <com/sun/star/io/XInputStream.hpp>
+#include <unotools/mediadescriptor.hxx>
+#include <sfx2/docfile.hxx>
+#include <sfx2/docfilt.hxx>
+#include <sfx2/fcontnr.hxx>
+
+using namespace ::com::sun::star;
+using utl::MediaDescriptor;
+
+namespace {
+
+// table with search pattern
+// meaning of the sequences
+// 0x00??: the exact byte 0x?? must be at that place
+// 0x0100: read over a byte (don't care)
+// 0x02nn: a byte of 0xnn variations follows
+// 0x8000: recognition finished
+
+#define M_DC 0x0100
+#define M_ALT(CNT) (0x0200+(CNT))
+#define M_END 0x8000
+
+const sal_uInt16 pLotus[] = // Lotus 1/1A/2
+ { 0x0000, 0x0000, 0x0002, 0x0000,
+ M_ALT(2), 0x0004, 0x0006,
+ 0x0004, M_END };
+
+const sal_uInt16 pLotusNew[] = // Lotus >= 9.7
+ { 0x0000, 0x0000, M_DC, 0x0000, // Rec# + Len (0x1a)
+ M_ALT(3), 0x0003, 0x0004, 0x0005, // File Revision Code 97->ME
+ 0x0010, 0x0004, 0x0000, 0x0000,
+ M_END };
+
+const sal_uInt16 pLotus2[] = // Lotus >3
+ { 0x0000, 0x0000, 0x001A, 0x0000, // Rec# + Len (26)
+ M_ALT(2), 0x0000, 0x0002, // File Revision Code
+ 0x0010,
+ 0x0004, 0x0000, // File Revision Subcode
+ M_END };
+
+const sal_uInt16 pQPro[] =
+ { 0x0000, 0x0000, 0x0002, 0x0000,
+ M_ALT(4), 0x0001, 0x0002, // WB1, WB2
+ 0x0006, 0x0007, // QPro 6/7 (?)
+ 0x0010,
+ M_END };
+
+const sal_uInt16 pDIF1[] = // DIF with CR-LF
+ {
+ 'T', 'A', 'B', 'L', 'E',
+ M_DC, M_DC,
+ '0', ',', '1',
+ M_DC, M_DC,
+ '\"',
+ M_END };
+
+const sal_uInt16 pDIF2[] = // DIF with CR or LF
+ {
+ 'T', 'A', 'B', 'L', 'E',
+ M_DC,
+ '0', ',', '1',
+ M_DC,
+ '\"',
+ M_END };
+
+const sal_uInt16 pSylk[] = // Sylk
+ {
+ 'I', 'D', ';',
+ M_ALT(3), 'P', 'N', 'E', // 'P' plus undocumented Excel extensions 'N' and 'E'
+ M_END };
+
+bool detectThisFormat(SvStream& rStr, const sal_uInt16* pSearch)
+{
+ sal_uInt8 nByte;
+ rStr.Seek( 0 ); // in the beginning everything was bad...
+ rStr.ReadUChar( nByte );
+ bool bSync = true;
+ while( !rStr.eof() && bSync )
+ {
+ sal_uInt16 nMuster = *pSearch;
+
+ if( nMuster < 0x0100 )
+ { // compare bytes
+ if( static_cast<sal_uInt8>(nMuster) != nByte )
+ bSync = false;
+ }
+ else if( nMuster & M_DC )
+ { // don't care
+ }
+ else if( nMuster & M_ALT(0) )
+ { // alternative Bytes
+ sal_uInt8 nCntAlt = static_cast<sal_uInt8>(nMuster);
+ bSync = false; // first unsynchron
+ while( nCntAlt > 0 )
+ {
+ pSearch++;
+ if( static_cast<sal_uInt8>(*pSearch) == nByte )
+ bSync = true; // only now synchronization
+ nCntAlt--;
+ }
+ }
+ else if( nMuster & M_END )
+ { // Format detected
+ return true;
+ }
+
+ pSearch++;
+ rStr.ReadUChar( nByte );
+ }
+
+ return false;
+}
+
+}
+
+ScFilterDetect::ScFilterDetect()
+{
+}
+
+ScFilterDetect::~ScFilterDetect()
+{
+}
+
+#if 0
+// This method is no longer used, but I do want to keep this for now to see
+// if we could transfer this check to the now centralized ascii detection
+// code in the filter module.
+static sal_Bool lcl_MayBeAscii( SvStream& rStream )
+{
+ // ASCII/CSV is considered possible if there are no null bytes, or a Byte
+ // Order Mark is present, or if, for Unicode UCS2/UTF-16, all null bytes
+ // are on either even or uneven byte positions.
+
+ rStream.Seek(STREAM_SEEK_TO_BEGIN);
+
+ const size_t nBufSize = 2048;
+ sal_uInt16 aBuffer[ nBufSize ];
+ sal_uInt8* pByte = reinterpret_cast<sal_uInt8*>(aBuffer);
+ sal_uLong nBytesRead = rStream.Read( pByte, nBufSize*2);
+
+ if ( nBytesRead >= 2 && (aBuffer[0] == 0xfffe || aBuffer[0] == 0xfeff) )
+ {
+ // Unicode BOM file may contain null bytes.
+ return sal_True;
+ }
+
+ const sal_uInt16* p = aBuffer;
+ sal_uInt16 nMask = 0xffff;
+ nBytesRead /= 2;
+ while( nBytesRead-- && nMask )
+ {
+ sal_uInt16 nVal = *p++ & nMask;
+ if (!(nVal & 0x00ff))
+ nMask &= 0xff00;
+ if (!(nVal & 0xff00))
+ nMask &= 0x00ff;
+ }
+
+ return nMask != 0;
+}
+#endif
+
+static bool lcl_MayBeDBase( SvStream& rStream )
+{
+ // Look for dbf marker, see connectivity/source/inc/dbase/DTable.hxx
+ // DBFType for values.
+ const sal_uInt8 nValidMarks[] = {
+ 0x03, 0x04, 0x05, 0x30, 0x31, 0x43, 0xB3, 0x83, 0x8b, 0x8e, 0xf5 };
+ sal_uInt8 nMark;
+ rStream.Seek(STREAM_SEEK_TO_BEGIN);
+ rStream.ReadUChar( nMark );
+ bool bValidMark = false;
+ for (size_t i=0; i < SAL_N_ELEMENTS(nValidMarks) && !bValidMark; ++i)
+ {
+ if (nValidMarks[i] == nMark)
+ bValidMark = true;
+ }
+ if ( !bValidMark )
+ return false;
+
+ const size_t nHeaderBlockSize = 32;
+ // Empty dbf is >= 32*2+1 bytes in size.
+ const size_t nEmptyDbf = nHeaderBlockSize * 2 + 1;
+
+ sal_uInt64 nSize = rStream.TellEnd();
+ if ( nSize < nEmptyDbf )
+ return false;
+
+ // count of records at 4
+ rStream.Seek(4);
+ sal_uInt32 nRecords(0);
+ rStream.ReadUInt32(nRecords);
+
+ // length of header starts at 8
+ rStream.Seek(8);
+ sal_uInt16 nHeaderLen;
+ rStream.ReadUInt16( nHeaderLen );
+
+ // size of record at 10
+ sal_uInt16 nRecordSize(0);
+ rStream.ReadUInt16(nRecordSize);
+
+ if ( nHeaderLen < nEmptyDbf || nSize < nHeaderLen )
+ return false;
+
+ // see DTable.cxx ODbaseTable::readHeader()
+ if (0 == nRecordSize)
+ return false;
+
+ // see DTable.cxx ODbaseTable::construct() line 546
+ if (0 == nRecords)
+ {
+ nRecords = (nSize - nHeaderLen) / nRecordSize;
+ }
+
+ // tdf#84834 sanity check of size
+ // tdf#106423: a dbf file can have 0 record, so no need to check nRecords
+ if (nSize < nHeaderLen + nRecords * sal_uInt64(nRecordSize))
+ return false;
+
+ // Last byte of header must be 0x0d, this is how it's specified.
+ // #i9581#,#i26407# but some applications don't follow the specification
+ // and pad the header with one byte 0x00 to reach an
+ // even boundary. Some (#i88577# ) even pad more or pad using a 0x1a ^Z
+ // control character (#i8857#). This results in:
+ // Last byte of header must be 0x0d on 32 bytes boundary.
+ sal_uInt16 nBlocks = (nHeaderLen - 1) / nHeaderBlockSize;
+ sal_uInt8 nEndFlag = 0;
+ while ( nBlocks > 1 && nEndFlag != 0x0d ) {
+ rStream.Seek( nBlocks-- * nHeaderBlockSize );
+ rStream.ReadUChar( nEndFlag );
+ }
+
+ return ( 0x0d == nEndFlag );
+}
+
+OUString SAL_CALL ScFilterDetect::detect( uno::Sequence<beans::PropertyValue>& lDescriptor )
+{
+ MediaDescriptor aMediaDesc( lDescriptor );
+ OUString aTypeName = aMediaDesc.getUnpackedValueOrDefault( MediaDescriptor::PROP_TYPENAME, OUString() );
+ uno::Reference< io::XInputStream > xStream ( aMediaDesc[MediaDescriptor::PROP_INPUTSTREAM], uno::UNO_QUERY );
+ if ( !xStream.is() )
+ return OUString();
+
+ SfxMedium aMedium;
+ aMedium.UseInteractionHandler( false );
+ aMedium.setStreamToLoadFrom( xStream, true );
+
+ SvStream* pStream = aMedium.GetInStream();
+ if ( !pStream || pStream->GetError() )
+ // No stream, no detection.
+ return OUString();
+
+ const char* pSearchFilterName = nullptr;
+ if (aTypeName == "calc_Lotus")
+ {
+ if (!detectThisFormat(*pStream, pLotus) && !detectThisFormat(*pStream, pLotusNew) && !detectThisFormat(*pStream, pLotus2))
+ return OUString();
+
+ pSearchFilterName = "Lotus";
+ }
+ else if (aTypeName == "calc_QPro")
+ {
+ if (!detectThisFormat(*pStream, pQPro))
+ return OUString();
+
+ pSearchFilterName = "Quattro Pro 6.0";
+ }
+ else if (aTypeName == "calc_SYLK")
+ {
+ if (!detectThisFormat(*pStream, pSylk))
+ return OUString();
+
+ pSearchFilterName = "SYLK";
+ }
+ else if (aTypeName == "calc_DIF")
+ {
+ if (!detectThisFormat(*pStream, pDIF1) && !detectThisFormat(*pStream, pDIF2))
+ return OUString();
+
+ pSearchFilterName = "DIF";
+ }
+ else if (aTypeName == "calc_dBase")
+ {
+ if (!lcl_MayBeDBase(*pStream))
+ return OUString();
+
+ pSearchFilterName = "dBase";
+ }
+ else
+ return OUString();
+
+ SfxFilterMatcher aMatcher("scalc");
+ std::shared_ptr<const SfxFilter> pFilter = aMatcher.GetFilter4FilterName(OUString::createFromAscii(pSearchFilterName));
+
+ if (!pFilter)
+ return OUString();
+
+ aMediaDesc[MediaDescriptor::PROP_FILTERNAME] <<= pFilter->GetName();
+ aMediaDesc >> lDescriptor;
+ return aTypeName;
+}
+
+OUString SAL_CALL ScFilterDetect::getImplementationName()
+{
+ return "com.sun.star.comp.calc.FormatDetector";
+}
+
+sal_Bool ScFilterDetect::supportsService( const OUString& sServiceName )
+{
+ return cppu::supportsService(this, sServiceName);
+}
+
+css::uno::Sequence<OUString> ScFilterDetect::getSupportedServiceNames()
+{
+ return { "com.sun.star.frame.ExtendedTypeDetection" };
+}
+
+extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface*
+com_sun_star_comp_calc_FormatDetector_get_implementation(css::uno::XComponentContext* /*context*/,
+ css::uno::Sequence<css::uno::Any> const &)
+{
+ return cppu::acquire(new ScFilterDetect);
+}
+
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */