summaryrefslogtreecommitdiffstats
path: root/sc/source/filter/dif/difimp.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'sc/source/filter/dif/difimp.cxx')
-rw-r--r--sc/source/filter/dif/difimp.cxx674
1 files changed, 674 insertions, 0 deletions
diff --git a/sc/source/filter/dif/difimp.cxx b/sc/source/filter/dif/difimp.cxx
new file mode 100644
index 000000000..fd88cdf4c
--- /dev/null
+++ b/sc/source/filter/dif/difimp.cxx
@@ -0,0 +1,674 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <svl/numformat.hxx>
+#include <tools/stream.hxx>
+#include <osl/diagnose.h>
+#include <dif.hxx>
+#include <docpool.hxx>
+#include <document.hxx>
+#include <fprogressbar.hxx>
+#include <ftools.hxx>
+#include <patattr.hxx>
+#include <scerrors.hxx>
+#include <scitems.hxx>
+#include <stringutil.hxx>
+#include <table.hxx>
+#include <memory>
+
+const std::u16string_view pKeyTABLE = u"TABLE";
+const std::u16string_view pKeyVECTORS = u"VECTORS";
+const std::u16string_view pKeyTUPLES = u"TUPLES";
+const std::u16string_view pKeyDATA = u"DATA";
+const std::u16string_view pKeyBOT = u"BOT";
+const std::u16string_view pKeyEOD = u"EOD";
+
+ErrCode ScFormatFilterPluginImpl::ScImportDif(SvStream& rIn, ScDocument* pDoc, const ScAddress& rInsPos,
+ const rtl_TextEncoding eVon )
+{
+ DifParser aDifParser( rIn, *pDoc, eVon );
+
+ SCTAB nBaseTab = rInsPos.Tab();
+
+ TOPIC eTopic = T_UNKNOWN;
+ bool bSyntErrWarn = false;
+ bool bOverflowWarn = false;
+
+ OUStringBuffer& rData = aDifParser.m_aData;
+
+ rIn.Seek( 0 );
+
+ ScfStreamProgressBar aPrgrsBar( rIn, pDoc->GetDocumentShell() );
+
+ while( eTopic != T_DATA && eTopic != T_END )
+ {
+ eTopic = aDifParser.GetNextTopic();
+
+ aPrgrsBar.Progress();
+
+ const bool bData = !rData.isEmpty();
+
+ switch( eTopic )
+ {
+ case T_TABLE:
+ {
+ if( aDifParser.nVector != 0 || aDifParser.nVal != 1 )
+ bSyntErrWarn = true;
+ if( bData )
+ pDoc->RenameTab(nBaseTab, rData.toString());
+ }
+ break;
+ case T_VECTORS:
+ {
+ if( aDifParser.nVector != 0 )
+ bSyntErrWarn = true;
+ }
+ break;
+ case T_TUPLES:
+ {
+ if( aDifParser.nVector != 0 )
+ bSyntErrWarn = true;
+ }
+ break;
+ case T_DATA:
+ {
+ if( aDifParser.nVector != 0 || aDifParser.nVal != 0 )
+ bSyntErrWarn = true;
+ }
+ break;
+ case T_LABEL:
+ case T_COMMENT:
+ case T_SIZE:
+ case T_PERIODICITY:
+ case T_MAJORSTART:
+ case T_MINORSTART:
+ case T_TRUELENGTH:
+ case T_UINITS:
+ case T_DISPLAYUNITS:
+ case T_END:
+ case T_UNKNOWN:
+ break;
+ default:
+ OSL_FAIL( "ScImportDif - missing enum" );
+ }
+
+ }
+
+ if( eTopic == T_DATA )
+ { // data starts here
+ SCCOL nBaseCol = rInsPos.Col();
+
+ SCCOL nColCnt = SCCOL_MAX;
+ SCROW nRowCnt = rInsPos.Row();
+ DifAttrCache aAttrCache;
+
+ DATASET eCurrent = D_UNKNOWN;
+
+ ScSetStringParam aStrParam; // used to set string value without number detection.
+ aStrParam.setTextInput();
+
+ while( eCurrent != D_EOD )
+ {
+ eCurrent = aDifParser.GetNextDataset();
+
+ aPrgrsBar.Progress();
+ ScAddress aPos(nColCnt, nRowCnt, nBaseTab);
+ const OUString aData = rData.makeStringAndClear();
+
+ switch( eCurrent )
+ {
+ case D_BOT:
+ if( nColCnt < SCCOL_MAX )
+ nRowCnt++;
+ nColCnt = nBaseCol;
+ break;
+ case D_EOD:
+ break;
+ case D_NUMERIC: // Number cell
+ if( nColCnt == SCCOL_MAX )
+ nColCnt = nBaseCol;
+
+ if( pDoc->ValidCol(nColCnt) && pDoc->ValidRow(nRowCnt) )
+ {
+ pDoc->EnsureTable(nBaseTab);
+
+ if( DifParser::IsV( aData.getStr() ) )
+ {
+ pDoc->SetValue(aPos, aDifParser.fVal);
+ aAttrCache.SetNumFormat( pDoc, nColCnt, nRowCnt,
+ aDifParser.nNumFormat );
+ }
+ else if( aData == "TRUE" || aData == "FALSE" )
+ {
+ pDoc->SetValue(aPos, aDifParser.fVal);
+ aAttrCache.SetNumFormat( pDoc, nColCnt, nRowCnt,
+ aDifParser.nNumFormat );
+ }
+ else if( aData == "NA" || aData == "ERROR" )
+ {
+ pDoc->SetString(aPos, aData, &aStrParam);
+ }
+ else
+ {
+ OUString aTmp = "#IND:" + aData + "?";
+ pDoc->SetString(aPos, aTmp, &aStrParam);
+ }
+ }
+ else
+ bOverflowWarn = true;
+
+ nColCnt++;
+ break;
+ case D_STRING: // Text cell
+ if( nColCnt == SCCOL_MAX )
+ nColCnt = nBaseCol;
+
+ if( pDoc->ValidCol(nColCnt) && pDoc->ValidRow(nRowCnt) )
+ {
+ if (!aData.isEmpty())
+ {
+ pDoc->EnsureTable(nBaseTab);
+ pDoc->SetTextCell(aPos, aData);
+ }
+ }
+ else
+ bOverflowWarn = true;
+
+ nColCnt++;
+ break;
+ case D_UNKNOWN:
+ break;
+ case D_SYNT_ERROR:
+ break;
+ default:
+ OSL_FAIL( "ScImportDif - missing enum" );
+ }
+ }
+
+ aAttrCache.Apply( *pDoc, nBaseTab );
+ }
+ else
+ return SCERR_IMPORT_FORMAT;
+
+ if( bSyntErrWarn )
+
+ // FIXME: Add proper warning!
+ return SCWARN_IMPORT_RANGE_OVERFLOW;
+
+ else if( bOverflowWarn )
+ return SCWARN_IMPORT_RANGE_OVERFLOW;
+ else
+ return ERRCODE_NONE;
+}
+
+DifParser::DifParser( SvStream& rNewIn, const ScDocument& rDoc, rtl_TextEncoding eCharSet )
+ : fVal(0.0)
+ , nVector(0)
+ , nVal(0)
+ , nNumFormat(0)
+ , pNumFormatter(rDoc.GetFormatTable())
+ , rIn(rNewIn)
+{
+ if ( rIn.GetStreamCharSet() != eCharSet )
+ {
+ OSL_FAIL( "CharSet passed overrides and modifies StreamCharSet" );
+ rIn.SetStreamCharSet( eCharSet );
+ }
+ rIn.StartReadingUnicodeText( eCharSet );
+}
+
+TOPIC DifParser::GetNextTopic()
+{
+ enum STATE { S_VectorVal, S_Data, S_END, S_START, S_UNKNOWN, S_ERROR_L2 };
+
+ static const std::u16string_view ppKeys[] =
+ {
+ pKeyTABLE, // 0
+ pKeyVECTORS,
+ pKeyTUPLES,
+ pKeyDATA,
+ u"LABEL",
+ u"COMMENT", // 5
+ u"SIZE",
+ u"PERIODICITY",
+ u"MAJORSTART",
+ u"MINORSTART",
+ u"TRUELENGTH", // 10
+ u"UINITS",
+ u"DISPLAYUNITS",
+ u"" // 13
+ };
+
+ static const TOPIC pTopics[] =
+ {
+ T_TABLE, // 0
+ T_VECTORS,
+ T_TUPLES,
+ T_DATA,
+ T_LABEL,
+ T_COMMENT, // 5
+ T_SIZE,
+ T_PERIODICITY,
+ T_MAJORSTART,
+ T_MINORSTART,
+ T_TRUELENGTH, // 10
+ T_UINITS,
+ T_DISPLAYUNITS,
+ T_UNKNOWN // 13
+ };
+
+ STATE eS = S_START;
+ OUString aLine;
+
+ nVector = 0;
+ nVal = 0;
+ TOPIC eRet = T_UNKNOWN;
+
+ while( eS != S_END )
+ {
+ if( !ReadNextLine( aLine ) )
+ {
+ eS = S_END;
+ eRet = T_END;
+ }
+
+ switch( eS )
+ {
+ case S_START:
+ {
+ const std::u16string_view* pRef;
+ sal_uInt16 nCnt = 0;
+ bool bSearch = true;
+
+ pRef = &ppKeys[ nCnt ];
+
+ while( bSearch )
+ {
+ if( aLine == *pRef )
+ {
+ eRet = pTopics[ nCnt ];
+ bSearch = false;
+ }
+ else
+ {
+ nCnt++;
+ pRef = &ppKeys[ nCnt ];
+ if( pRef->empty() )
+ bSearch = false;
+ }
+ }
+
+ if( !pRef->empty() )
+ eS = S_VectorVal;
+ else
+ eS = S_UNKNOWN;
+ }
+ break;
+ case S_VectorVal:
+ {
+ const sal_Unicode* pCur = aLine.getStr();
+
+ pCur = ScanIntVal( pCur, nVector );
+
+ if( pCur && *pCur == ',' )
+ {
+ pCur++;
+ ScanIntVal( pCur, nVal );
+ eS = S_Data;
+ }
+ else
+ eS = S_ERROR_L2;
+ }
+ break;
+ case S_Data:
+ OSL_ENSURE( aLine.getLength() >= 2,
+ "+GetNextTopic(): <String> is too short!" );
+ if( aLine.getLength() > 2 )
+ m_aData.append(aLine.subView(1, aLine.getLength() - 2));
+ else
+ m_aData.truncate();
+ eS = S_END;
+ break;
+ case S_END:
+ OSL_FAIL( "DifParser::GetNextTopic - unexpected state" );
+ break;
+ case S_UNKNOWN:
+ // skip 2 lines
+ ReadNextLine( aLine );
+ [[fallthrough]];
+ case S_ERROR_L2: // error happened in line 2
+ // skip 1 line
+ ReadNextLine( aLine );
+ eS = S_END;
+ break;
+ default:
+ OSL_FAIL( "DifParser::GetNextTopic - missing enum" );
+ }
+ }
+
+ return eRet;
+}
+
+static void lcl_DeEscapeQuotesDif(OUStringBuffer& rString)
+{
+ // Special handling for DIF import: Escaped (duplicated) quotes are resolved.
+ // Single quote characters are left in place because older versions didn't
+ // escape quotes in strings (and Excel doesn't when using the clipboard).
+ // The quotes around the string are removed before this function is called.
+
+ rString = rString.makeStringAndClear().replaceAll("\"\"", "\"");
+}
+
+// Determine if passed in string is numeric data and set fVal/nNumFormat if so
+DATASET DifParser::GetNumberDataset( const sal_Unicode* pPossibleNumericData )
+{
+ DATASET eRet = D_SYNT_ERROR;
+
+ OSL_ENSURE( pNumFormatter, "-DifParser::GetNumberDataset(): No Formatter, more fun!" );
+ OUString aTestVal( pPossibleNumericData );
+ sal_uInt32 nFormat = 0;
+ double fTmpVal;
+ if( pNumFormatter->IsNumberFormat( aTestVal, nFormat, fTmpVal ) )
+ {
+ fVal = fTmpVal;
+ nNumFormat = nFormat;
+ eRet = D_NUMERIC;
+ }
+ else
+ eRet = D_SYNT_ERROR;
+
+ return eRet;
+}
+
+bool DifParser::ReadNextLine( OUString& rStr )
+{
+ if( aLookAheadLine.isEmpty() )
+ {
+ return rIn.ReadUniOrByteStringLine( rStr, rIn.GetStreamCharSet() );
+ }
+ else
+ {
+ rStr = aLookAheadLine;
+ aLookAheadLine.clear();
+ return true;
+ }
+}
+
+// Look ahead in the stream to determine if the next line is the first line of
+// a valid data record structure
+bool DifParser::LookAhead()
+{
+ const sal_Unicode* pCurrentBuffer;
+ bool bValidStructure = false;
+
+ OSL_ENSURE( aLookAheadLine.isEmpty(), "*DifParser::LookAhead(): LookAhead called twice in a row" );
+ rIn.ReadUniOrByteStringLine( aLookAheadLine, rIn.GetStreamCharSet() );
+
+ pCurrentBuffer = aLookAheadLine.getStr();
+
+ switch( *pCurrentBuffer )
+ {
+ case '-': // Special Datatype
+ pCurrentBuffer++;
+
+ if( Is1_0( pCurrentBuffer ) )
+ {
+ bValidStructure = true;
+ }
+ break;
+ case '0': // Numeric Data
+ pCurrentBuffer++;
+ if( *pCurrentBuffer == ',' )
+ {
+ pCurrentBuffer++;
+ bValidStructure = ( GetNumberDataset(pCurrentBuffer) != D_SYNT_ERROR );
+ }
+ break;
+ case '1': // String Data
+ if( Is1_0( aLookAheadLine.getStr() ) )
+ {
+ bValidStructure = true;
+ }
+ break;
+ }
+ return bValidStructure;
+}
+
+DATASET DifParser::GetNextDataset()
+{
+ DATASET eRet = D_UNKNOWN;
+ OUString aLine;
+ const sal_Unicode* pCurrentBuffer;
+
+ ReadNextLine( aLine );
+
+ pCurrentBuffer = aLine.getStr();
+
+ switch( *pCurrentBuffer )
+ {
+ case '-': // Special Datatype
+ pCurrentBuffer++;
+
+ if( Is1_0( pCurrentBuffer ) )
+ {
+ ReadNextLine( aLine );
+ if( IsBOT( aLine.getStr() ) )
+ eRet = D_BOT;
+ else if( IsEOD( aLine.getStr() ) )
+ eRet = D_EOD;
+ }
+ break;
+ case '0': // Numeric Data
+ pCurrentBuffer++; // value in fVal, 2. line in m_aData
+ if( *pCurrentBuffer == ',' )
+ {
+ pCurrentBuffer++;
+ eRet = GetNumberDataset(pCurrentBuffer);
+ OUString aTmpLine;
+ ReadNextLine( aTmpLine );
+ if ( eRet == D_SYNT_ERROR )
+ { // for broken records write "#ERR: data" to cell
+ m_aData = OUString::Concat("#ERR: ") + pCurrentBuffer + " (" + aTmpLine + ")";
+ eRet = D_STRING;
+ }
+ else
+ {
+ m_aData = aTmpLine;
+ }
+ }
+ break;
+ case '1': // String Data
+ if( Is1_0( aLine.getStr() ) )
+ {
+ ReadNextLine( aLine );
+ sal_Int32 nLineLength = aLine.getLength();
+ const sal_Unicode* pLine = aLine.getStr();
+
+ if( nLineLength >= 1 && *pLine == '"' )
+ {
+ // Quotes are not always escaped (duplicated), see lcl_DeEscapeQuotesDif
+ // A look ahead into the next line is needed in order to deal with
+ // multiline strings containing quotes
+ if( LookAhead() )
+ {
+ // Single line string
+ if( nLineLength >= 2 && pLine[nLineLength - 1] == '"' )
+ {
+ m_aData = aLine.subView( 1, nLineLength - 2 );
+ lcl_DeEscapeQuotesDif(m_aData);
+ eRet = D_STRING;
+ }
+ }
+ else
+ {
+ // Multiline string
+ m_aData = aLine.subView( 1 );
+ bool bContinue = true;
+ while ( bContinue )
+ {
+ m_aData.append("\n");
+ bContinue = !rIn.eof() && ReadNextLine( aLine );
+ if( bContinue )
+ {
+ nLineLength = aLine.getLength();
+ if( nLineLength >= 1 )
+ {
+ pLine = aLine.getStr();
+ bContinue = !LookAhead();
+ if( bContinue )
+ {
+ m_aData.append(aLine);
+ }
+ else if( pLine[nLineLength - 1] == '"' )
+ {
+ m_aData.append(aLine.subView(0, nLineLength -1));
+ lcl_DeEscapeQuotesDif(m_aData);
+ eRet = D_STRING;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ break;
+ }
+
+ if( eRet == D_UNKNOWN )
+ ReadNextLine( aLine );
+
+ if( rIn.eof() )
+ eRet = D_EOD;
+
+ return eRet;
+}
+
+const sal_Unicode* DifParser::ScanIntVal( const sal_Unicode* pStart, sal_uInt32& rRet )
+{
+ // eat leading whitespace, not specified, but seen in the wild
+ while (*pStart == ' ' || *pStart == '\t')
+ ++pStart;
+
+ sal_Unicode cCurrent = *pStart;
+
+ if( IsNumber( cCurrent ) )
+ rRet = static_cast<sal_uInt32>( cCurrent - '0' );
+ else
+ return nullptr;
+
+ pStart++;
+ cCurrent = *pStart;
+
+ while( IsNumber( cCurrent ) && rRet < ( 0xFFFFFFFF / 10 ) )
+ {
+ rRet *= 10;
+ rRet += static_cast<sal_uInt32>( cCurrent - '0' );
+
+ pStart++;
+ cCurrent = *pStart;
+ }
+
+ return pStart;
+}
+
+DifColumn::DifColumn ()
+ : mpCurrent(nullptr)
+{
+}
+
+void DifColumn::SetNumFormat( const ScDocument* pDoc, SCROW nRow, const sal_uInt32 nNumFormat )
+{
+ OSL_ENSURE( pDoc->ValidRow(nRow), "*DifColumn::SetNumFormat(): Row too big!" );
+
+ if( nNumFormat > 0 )
+ {
+ if(mpCurrent)
+ {
+ OSL_ENSURE( nRow > 0,
+ "*DifColumn::SetNumFormat(): more cannot be zero!" );
+ OSL_ENSURE( nRow > mpCurrent->nEnd,
+ "*DifColumn::SetNumFormat(): start from scratch?" );
+
+ if( mpCurrent->nNumFormat == nNumFormat && mpCurrent->nEnd == nRow - 1 )
+ mpCurrent->nEnd = nRow;
+ else
+ NewEntry( nRow, nNumFormat );
+ }
+ else
+ NewEntry(nRow,nNumFormat );
+ }
+ else
+ mpCurrent = nullptr;
+}
+
+void DifColumn::NewEntry( const SCROW nPos, const sal_uInt32 nNumFormat )
+{
+ maEntries.emplace_back();
+ mpCurrent = &maEntries.back();
+ mpCurrent->nStart = mpCurrent->nEnd = nPos;
+ mpCurrent->nNumFormat = nNumFormat;
+
+}
+
+void DifColumn::Apply( ScDocument& rDoc, const SCCOL nCol, const SCTAB nTab )
+{
+ ScPatternAttr aAttr( rDoc.GetPool() );
+ SfxItemSet &rItemSet = aAttr.GetItemSet();
+
+ for (const auto& rEntry : maEntries)
+ {
+ OSL_ENSURE( rEntry.nNumFormat > 0,
+ "+DifColumn::Apply(): Number format must not be 0!" );
+
+ rItemSet.Put( SfxUInt32Item( ATTR_VALUE_FORMAT, rEntry.nNumFormat ) );
+
+ rDoc.ApplyPatternAreaTab( nCol, rEntry.nStart, nCol, rEntry.nEnd, nTab, aAttr );
+
+ rItemSet.ClearItem();
+ }
+}
+
+DifAttrCache::DifAttrCache()
+{
+}
+
+DifAttrCache::~DifAttrCache()
+{
+}
+
+void DifAttrCache::SetNumFormat( const ScDocument* pDoc, const SCCOL nCol, const SCROW nRow, const sal_uInt32 nNumFormat )
+{
+ OSL_ENSURE( pDoc->ValidCol(nCol), "-DifAttrCache::SetNumFormat(): Col too big!" );
+
+ if( !maColMap.count(nCol) )
+ maColMap[ nCol ].reset( new DifColumn );
+
+ maColMap[ nCol ]->SetNumFormat( pDoc, nRow, nNumFormat );
+}
+
+void DifAttrCache::Apply( ScDocument& rDoc, SCTAB nTab )
+{
+ for( SCCOL nCol : rDoc.GetWritableColumnsRange(nTab, 0, rDoc.MaxCol()) )
+ {
+ if( maColMap.count(nCol) )
+ maColMap[ nCol ]->Apply( rDoc, nCol, nTab );
+ }
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */