diff options
Diffstat (limited to 'sc/source/filter/dif/difimp.cxx')
-rw-r--r-- | sc/source/filter/dif/difimp.cxx | 674 |
1 files changed, 674 insertions, 0 deletions
diff --git a/sc/source/filter/dif/difimp.cxx b/sc/source/filter/dif/difimp.cxx new file mode 100644 index 000000000..fd88cdf4c --- /dev/null +++ b/sc/source/filter/dif/difimp.cxx @@ -0,0 +1,674 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <svl/numformat.hxx> +#include <tools/stream.hxx> +#include <osl/diagnose.h> +#include <dif.hxx> +#include <docpool.hxx> +#include <document.hxx> +#include <fprogressbar.hxx> +#include <ftools.hxx> +#include <patattr.hxx> +#include <scerrors.hxx> +#include <scitems.hxx> +#include <stringutil.hxx> +#include <table.hxx> +#include <memory> + +const std::u16string_view pKeyTABLE = u"TABLE"; +const std::u16string_view pKeyVECTORS = u"VECTORS"; +const std::u16string_view pKeyTUPLES = u"TUPLES"; +const std::u16string_view pKeyDATA = u"DATA"; +const std::u16string_view pKeyBOT = u"BOT"; +const std::u16string_view pKeyEOD = u"EOD"; + +ErrCode ScFormatFilterPluginImpl::ScImportDif(SvStream& rIn, ScDocument* pDoc, const ScAddress& rInsPos, + const rtl_TextEncoding eVon ) +{ + DifParser aDifParser( rIn, *pDoc, eVon ); + + SCTAB nBaseTab = rInsPos.Tab(); + + TOPIC eTopic = T_UNKNOWN; + bool bSyntErrWarn = false; + bool bOverflowWarn = false; + + OUStringBuffer& rData = aDifParser.m_aData; + + rIn.Seek( 0 ); + + ScfStreamProgressBar aPrgrsBar( rIn, pDoc->GetDocumentShell() ); + + while( eTopic != T_DATA && eTopic != T_END ) + { + eTopic = aDifParser.GetNextTopic(); + + aPrgrsBar.Progress(); + + const bool bData = !rData.isEmpty(); + + switch( eTopic ) + { + case T_TABLE: + { + if( aDifParser.nVector != 0 || aDifParser.nVal != 1 ) + bSyntErrWarn = true; + if( bData ) + pDoc->RenameTab(nBaseTab, rData.toString()); + } + break; + case T_VECTORS: + { + if( aDifParser.nVector != 0 ) + bSyntErrWarn = true; + } + break; + case T_TUPLES: + { + if( aDifParser.nVector != 0 ) + bSyntErrWarn = true; + } + break; + case T_DATA: + { + if( aDifParser.nVector != 0 || aDifParser.nVal != 0 ) + bSyntErrWarn = true; + } + break; + case T_LABEL: + case T_COMMENT: + case T_SIZE: + case T_PERIODICITY: + case T_MAJORSTART: + case T_MINORSTART: + case T_TRUELENGTH: + case T_UINITS: + case T_DISPLAYUNITS: + case T_END: + case T_UNKNOWN: + break; + default: + OSL_FAIL( "ScImportDif - missing enum" ); + } + + } + + if( eTopic == T_DATA ) + { // data starts here + SCCOL nBaseCol = rInsPos.Col(); + + SCCOL nColCnt = SCCOL_MAX; + SCROW nRowCnt = rInsPos.Row(); + DifAttrCache aAttrCache; + + DATASET eCurrent = D_UNKNOWN; + + ScSetStringParam aStrParam; // used to set string value without number detection. + aStrParam.setTextInput(); + + while( eCurrent != D_EOD ) + { + eCurrent = aDifParser.GetNextDataset(); + + aPrgrsBar.Progress(); + ScAddress aPos(nColCnt, nRowCnt, nBaseTab); + const OUString aData = rData.makeStringAndClear(); + + switch( eCurrent ) + { + case D_BOT: + if( nColCnt < SCCOL_MAX ) + nRowCnt++; + nColCnt = nBaseCol; + break; + case D_EOD: + break; + case D_NUMERIC: // Number cell + if( nColCnt == SCCOL_MAX ) + nColCnt = nBaseCol; + + if( pDoc->ValidCol(nColCnt) && pDoc->ValidRow(nRowCnt) ) + { + pDoc->EnsureTable(nBaseTab); + + if( DifParser::IsV( aData.getStr() ) ) + { + pDoc->SetValue(aPos, aDifParser.fVal); + aAttrCache.SetNumFormat( pDoc, nColCnt, nRowCnt, + aDifParser.nNumFormat ); + } + else if( aData == "TRUE" || aData == "FALSE" ) + { + pDoc->SetValue(aPos, aDifParser.fVal); + aAttrCache.SetNumFormat( pDoc, nColCnt, nRowCnt, + aDifParser.nNumFormat ); + } + else if( aData == "NA" || aData == "ERROR" ) + { + pDoc->SetString(aPos, aData, &aStrParam); + } + else + { + OUString aTmp = "#IND:" + aData + "?"; + pDoc->SetString(aPos, aTmp, &aStrParam); + } + } + else + bOverflowWarn = true; + + nColCnt++; + break; + case D_STRING: // Text cell + if( nColCnt == SCCOL_MAX ) + nColCnt = nBaseCol; + + if( pDoc->ValidCol(nColCnt) && pDoc->ValidRow(nRowCnt) ) + { + if (!aData.isEmpty()) + { + pDoc->EnsureTable(nBaseTab); + pDoc->SetTextCell(aPos, aData); + } + } + else + bOverflowWarn = true; + + nColCnt++; + break; + case D_UNKNOWN: + break; + case D_SYNT_ERROR: + break; + default: + OSL_FAIL( "ScImportDif - missing enum" ); + } + } + + aAttrCache.Apply( *pDoc, nBaseTab ); + } + else + return SCERR_IMPORT_FORMAT; + + if( bSyntErrWarn ) + + // FIXME: Add proper warning! + return SCWARN_IMPORT_RANGE_OVERFLOW; + + else if( bOverflowWarn ) + return SCWARN_IMPORT_RANGE_OVERFLOW; + else + return ERRCODE_NONE; +} + +DifParser::DifParser( SvStream& rNewIn, const ScDocument& rDoc, rtl_TextEncoding eCharSet ) + : fVal(0.0) + , nVector(0) + , nVal(0) + , nNumFormat(0) + , pNumFormatter(rDoc.GetFormatTable()) + , rIn(rNewIn) +{ + if ( rIn.GetStreamCharSet() != eCharSet ) + { + OSL_FAIL( "CharSet passed overrides and modifies StreamCharSet" ); + rIn.SetStreamCharSet( eCharSet ); + } + rIn.StartReadingUnicodeText( eCharSet ); +} + +TOPIC DifParser::GetNextTopic() +{ + enum STATE { S_VectorVal, S_Data, S_END, S_START, S_UNKNOWN, S_ERROR_L2 }; + + static const std::u16string_view ppKeys[] = + { + pKeyTABLE, // 0 + pKeyVECTORS, + pKeyTUPLES, + pKeyDATA, + u"LABEL", + u"COMMENT", // 5 + u"SIZE", + u"PERIODICITY", + u"MAJORSTART", + u"MINORSTART", + u"TRUELENGTH", // 10 + u"UINITS", + u"DISPLAYUNITS", + u"" // 13 + }; + + static const TOPIC pTopics[] = + { + T_TABLE, // 0 + T_VECTORS, + T_TUPLES, + T_DATA, + T_LABEL, + T_COMMENT, // 5 + T_SIZE, + T_PERIODICITY, + T_MAJORSTART, + T_MINORSTART, + T_TRUELENGTH, // 10 + T_UINITS, + T_DISPLAYUNITS, + T_UNKNOWN // 13 + }; + + STATE eS = S_START; + OUString aLine; + + nVector = 0; + nVal = 0; + TOPIC eRet = T_UNKNOWN; + + while( eS != S_END ) + { + if( !ReadNextLine( aLine ) ) + { + eS = S_END; + eRet = T_END; + } + + switch( eS ) + { + case S_START: + { + const std::u16string_view* pRef; + sal_uInt16 nCnt = 0; + bool bSearch = true; + + pRef = &ppKeys[ nCnt ]; + + while( bSearch ) + { + if( aLine == *pRef ) + { + eRet = pTopics[ nCnt ]; + bSearch = false; + } + else + { + nCnt++; + pRef = &ppKeys[ nCnt ]; + if( pRef->empty() ) + bSearch = false; + } + } + + if( !pRef->empty() ) + eS = S_VectorVal; + else + eS = S_UNKNOWN; + } + break; + case S_VectorVal: + { + const sal_Unicode* pCur = aLine.getStr(); + + pCur = ScanIntVal( pCur, nVector ); + + if( pCur && *pCur == ',' ) + { + pCur++; + ScanIntVal( pCur, nVal ); + eS = S_Data; + } + else + eS = S_ERROR_L2; + } + break; + case S_Data: + OSL_ENSURE( aLine.getLength() >= 2, + "+GetNextTopic(): <String> is too short!" ); + if( aLine.getLength() > 2 ) + m_aData.append(aLine.subView(1, aLine.getLength() - 2)); + else + m_aData.truncate(); + eS = S_END; + break; + case S_END: + OSL_FAIL( "DifParser::GetNextTopic - unexpected state" ); + break; + case S_UNKNOWN: + // skip 2 lines + ReadNextLine( aLine ); + [[fallthrough]]; + case S_ERROR_L2: // error happened in line 2 + // skip 1 line + ReadNextLine( aLine ); + eS = S_END; + break; + default: + OSL_FAIL( "DifParser::GetNextTopic - missing enum" ); + } + } + + return eRet; +} + +static void lcl_DeEscapeQuotesDif(OUStringBuffer& rString) +{ + // Special handling for DIF import: Escaped (duplicated) quotes are resolved. + // Single quote characters are left in place because older versions didn't + // escape quotes in strings (and Excel doesn't when using the clipboard). + // The quotes around the string are removed before this function is called. + + rString = rString.makeStringAndClear().replaceAll("\"\"", "\""); +} + +// Determine if passed in string is numeric data and set fVal/nNumFormat if so +DATASET DifParser::GetNumberDataset( const sal_Unicode* pPossibleNumericData ) +{ + DATASET eRet = D_SYNT_ERROR; + + OSL_ENSURE( pNumFormatter, "-DifParser::GetNumberDataset(): No Formatter, more fun!" ); + OUString aTestVal( pPossibleNumericData ); + sal_uInt32 nFormat = 0; + double fTmpVal; + if( pNumFormatter->IsNumberFormat( aTestVal, nFormat, fTmpVal ) ) + { + fVal = fTmpVal; + nNumFormat = nFormat; + eRet = D_NUMERIC; + } + else + eRet = D_SYNT_ERROR; + + return eRet; +} + +bool DifParser::ReadNextLine( OUString& rStr ) +{ + if( aLookAheadLine.isEmpty() ) + { + return rIn.ReadUniOrByteStringLine( rStr, rIn.GetStreamCharSet() ); + } + else + { + rStr = aLookAheadLine; + aLookAheadLine.clear(); + return true; + } +} + +// Look ahead in the stream to determine if the next line is the first line of +// a valid data record structure +bool DifParser::LookAhead() +{ + const sal_Unicode* pCurrentBuffer; + bool bValidStructure = false; + + OSL_ENSURE( aLookAheadLine.isEmpty(), "*DifParser::LookAhead(): LookAhead called twice in a row" ); + rIn.ReadUniOrByteStringLine( aLookAheadLine, rIn.GetStreamCharSet() ); + + pCurrentBuffer = aLookAheadLine.getStr(); + + switch( *pCurrentBuffer ) + { + case '-': // Special Datatype + pCurrentBuffer++; + + if( Is1_0( pCurrentBuffer ) ) + { + bValidStructure = true; + } + break; + case '0': // Numeric Data + pCurrentBuffer++; + if( *pCurrentBuffer == ',' ) + { + pCurrentBuffer++; + bValidStructure = ( GetNumberDataset(pCurrentBuffer) != D_SYNT_ERROR ); + } + break; + case '1': // String Data + if( Is1_0( aLookAheadLine.getStr() ) ) + { + bValidStructure = true; + } + break; + } + return bValidStructure; +} + +DATASET DifParser::GetNextDataset() +{ + DATASET eRet = D_UNKNOWN; + OUString aLine; + const sal_Unicode* pCurrentBuffer; + + ReadNextLine( aLine ); + + pCurrentBuffer = aLine.getStr(); + + switch( *pCurrentBuffer ) + { + case '-': // Special Datatype + pCurrentBuffer++; + + if( Is1_0( pCurrentBuffer ) ) + { + ReadNextLine( aLine ); + if( IsBOT( aLine.getStr() ) ) + eRet = D_BOT; + else if( IsEOD( aLine.getStr() ) ) + eRet = D_EOD; + } + break; + case '0': // Numeric Data + pCurrentBuffer++; // value in fVal, 2. line in m_aData + if( *pCurrentBuffer == ',' ) + { + pCurrentBuffer++; + eRet = GetNumberDataset(pCurrentBuffer); + OUString aTmpLine; + ReadNextLine( aTmpLine ); + if ( eRet == D_SYNT_ERROR ) + { // for broken records write "#ERR: data" to cell + m_aData = OUString::Concat("#ERR: ") + pCurrentBuffer + " (" + aTmpLine + ")"; + eRet = D_STRING; + } + else + { + m_aData = aTmpLine; + } + } + break; + case '1': // String Data + if( Is1_0( aLine.getStr() ) ) + { + ReadNextLine( aLine ); + sal_Int32 nLineLength = aLine.getLength(); + const sal_Unicode* pLine = aLine.getStr(); + + if( nLineLength >= 1 && *pLine == '"' ) + { + // Quotes are not always escaped (duplicated), see lcl_DeEscapeQuotesDif + // A look ahead into the next line is needed in order to deal with + // multiline strings containing quotes + if( LookAhead() ) + { + // Single line string + if( nLineLength >= 2 && pLine[nLineLength - 1] == '"' ) + { + m_aData = aLine.subView( 1, nLineLength - 2 ); + lcl_DeEscapeQuotesDif(m_aData); + eRet = D_STRING; + } + } + else + { + // Multiline string + m_aData = aLine.subView( 1 ); + bool bContinue = true; + while ( bContinue ) + { + m_aData.append("\n"); + bContinue = !rIn.eof() && ReadNextLine( aLine ); + if( bContinue ) + { + nLineLength = aLine.getLength(); + if( nLineLength >= 1 ) + { + pLine = aLine.getStr(); + bContinue = !LookAhead(); + if( bContinue ) + { + m_aData.append(aLine); + } + else if( pLine[nLineLength - 1] == '"' ) + { + m_aData.append(aLine.subView(0, nLineLength -1)); + lcl_DeEscapeQuotesDif(m_aData); + eRet = D_STRING; + } + } + } + } + } + } + } + break; + } + + if( eRet == D_UNKNOWN ) + ReadNextLine( aLine ); + + if( rIn.eof() ) + eRet = D_EOD; + + return eRet; +} + +const sal_Unicode* DifParser::ScanIntVal( const sal_Unicode* pStart, sal_uInt32& rRet ) +{ + // eat leading whitespace, not specified, but seen in the wild + while (*pStart == ' ' || *pStart == '\t') + ++pStart; + + sal_Unicode cCurrent = *pStart; + + if( IsNumber( cCurrent ) ) + rRet = static_cast<sal_uInt32>( cCurrent - '0' ); + else + return nullptr; + + pStart++; + cCurrent = *pStart; + + while( IsNumber( cCurrent ) && rRet < ( 0xFFFFFFFF / 10 ) ) + { + rRet *= 10; + rRet += static_cast<sal_uInt32>( cCurrent - '0' ); + + pStart++; + cCurrent = *pStart; + } + + return pStart; +} + +DifColumn::DifColumn () + : mpCurrent(nullptr) +{ +} + +void DifColumn::SetNumFormat( const ScDocument* pDoc, SCROW nRow, const sal_uInt32 nNumFormat ) +{ + OSL_ENSURE( pDoc->ValidRow(nRow), "*DifColumn::SetNumFormat(): Row too big!" ); + + if( nNumFormat > 0 ) + { + if(mpCurrent) + { + OSL_ENSURE( nRow > 0, + "*DifColumn::SetNumFormat(): more cannot be zero!" ); + OSL_ENSURE( nRow > mpCurrent->nEnd, + "*DifColumn::SetNumFormat(): start from scratch?" ); + + if( mpCurrent->nNumFormat == nNumFormat && mpCurrent->nEnd == nRow - 1 ) + mpCurrent->nEnd = nRow; + else + NewEntry( nRow, nNumFormat ); + } + else + NewEntry(nRow,nNumFormat ); + } + else + mpCurrent = nullptr; +} + +void DifColumn::NewEntry( const SCROW nPos, const sal_uInt32 nNumFormat ) +{ + maEntries.emplace_back(); + mpCurrent = &maEntries.back(); + mpCurrent->nStart = mpCurrent->nEnd = nPos; + mpCurrent->nNumFormat = nNumFormat; + +} + +void DifColumn::Apply( ScDocument& rDoc, const SCCOL nCol, const SCTAB nTab ) +{ + ScPatternAttr aAttr( rDoc.GetPool() ); + SfxItemSet &rItemSet = aAttr.GetItemSet(); + + for (const auto& rEntry : maEntries) + { + OSL_ENSURE( rEntry.nNumFormat > 0, + "+DifColumn::Apply(): Number format must not be 0!" ); + + rItemSet.Put( SfxUInt32Item( ATTR_VALUE_FORMAT, rEntry.nNumFormat ) ); + + rDoc.ApplyPatternAreaTab( nCol, rEntry.nStart, nCol, rEntry.nEnd, nTab, aAttr ); + + rItemSet.ClearItem(); + } +} + +DifAttrCache::DifAttrCache() +{ +} + +DifAttrCache::~DifAttrCache() +{ +} + +void DifAttrCache::SetNumFormat( const ScDocument* pDoc, const SCCOL nCol, const SCROW nRow, const sal_uInt32 nNumFormat ) +{ + OSL_ENSURE( pDoc->ValidCol(nCol), "-DifAttrCache::SetNumFormat(): Col too big!" ); + + if( !maColMap.count(nCol) ) + maColMap[ nCol ].reset( new DifColumn ); + + maColMap[ nCol ]->SetNumFormat( pDoc, nRow, nNumFormat ); +} + +void DifAttrCache::Apply( ScDocument& rDoc, SCTAB nTab ) +{ + for( SCCOL nCol : rDoc.GetWritableColumnsRange(nTab, 0, rDoc.MaxCol()) ) + { + if( maColMap.count(nCol) ) + maColMap[ nCol ]->Apply( rDoc, nCol, nTab ); + } +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |