diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-15 05:54:39 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-15 05:54:39 +0000 |
commit | 267c6f2ac71f92999e969232431ba04678e7437e (patch) | |
tree | 358c9467650e1d0a1d7227a21dac2e3d08b622b2 /sc/source/ui/docshell/impex.cxx | |
parent | Initial commit. (diff) | |
download | libreoffice-267c6f2ac71f92999e969232431ba04678e7437e.tar.xz libreoffice-267c6f2ac71f92999e969232431ba04678e7437e.zip |
Adding upstream version 4:24.2.0.upstream/4%24.2.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sc/source/ui/docshell/impex.cxx')
-rw-r--r-- | sc/source/ui/docshell/impex.cxx | 2894 |
1 files changed, 2894 insertions, 0 deletions
diff --git a/sc/source/ui/docshell/impex.cxx b/sc/source/ui/docshell/impex.cxx new file mode 100644 index 0000000000..4a585657de --- /dev/null +++ b/sc/source/ui/docshell/impex.cxx @@ -0,0 +1,2894 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <comphelper/processfactory.hxx> +#include <i18nlangtag/languagetag.hxx> +#include <i18nutil/unicode.hxx> +#include <sot/formats.hxx> +#include <sfx2/mieclip.hxx> +#include <com/sun/star/i18n/CalendarFieldIndex.hpp> +#include <sal/log.hxx> +#include <unotools/charclass.hxx> +#include <osl/module.hxx> +#include <o3tl/string_view.hxx> + +#include <global.hxx> +#include <docsh.hxx> +#include <undoblk.hxx> +#include <rangenam.hxx> +#include <tabvwsh.hxx> +#include <filter.hxx> +#include <asciiopt.hxx> +#include <formulacell.hxx> +#include <cellform.hxx> +#include <progress.hxx> +#include <scitems.hxx> +#include <editable.hxx> +#include <compiler.hxx> +#include <warnbox.hxx> +#include <clipparam.hxx> +#include <impex.hxx> +#include <editutil.hxx> +#include <patattr.hxx> +#include <docpool.hxx> +#include <stringutil.hxx> +#include <cellvalue.hxx> +#include <tokenarray.hxx> +#include <documentimport.hxx> +#include <refundo.hxx> +#include <mtvelements.hxx> + +#include <globstr.hrc> +#include <scresid.hxx> +#include <o3tl/safeint.hxx> +#include <tools/svlibrary.h> +#include <unotools/configmgr.hxx> +#include <vcl/svapp.hxx> +#include <vcl/weld.hxx> +#include <editeng/editobj.hxx> +#include <svl/numformat.hxx> +#include <rtl/character.hxx> +#include <rtl/math.hxx> +#include <sax/tools/converter.hxx> + +#include <memory> +#include <string_view> + +#include <unicode/uchar.h> + +#include <osl/endian.h> + +// We don't want to end up with 2GB read in one line just because of malformed +// multiline fields, so chop it _somewhere_, which is twice supported columns +// times arbitrary maximum cell content length, 2*1024*64K=128M, and because +// it's sal_Unicode that's 256MB. If it's 2GB of data without LF we're out of +// luck anyway. +constexpr sal_Int32 nArbitraryCellLengthLimit = SAL_MAX_UINT16; +constexpr sal_Int32 nArbitraryLineLengthLimit = 2 * MAXCOLCOUNT * nArbitraryCellLengthLimit; + +namespace +{ + const char SYLK_LF[] = "\x1b :"; + + bool lcl_IsEndianSwap( const SvStream& rStrm ) + { + #ifdef OSL_BIGENDIAN + return rStrm.GetEndian() != SvStreamEndian::BIG; + #else + return rStrm.GetEndian() != SvStreamEndian::LITTLE; + #endif + } +} + +namespace { + +enum class SylkVersion +{ + SCALC3, // Wrote wrongly quoted strings and unescaped semicolons. + OOO32, // Correct strings, plus multiline content. + OWN, // Place our new versions, if any, before this value. + OTHER // Assume that aliens wrote correct strings. +}; + +} + +// Whole document without Undo +ScImportExport::ScImportExport( ScDocument& r ) + : pDocSh( r.GetDocumentShell() ), rDoc( r ), + nSizeLimit( 0 ), nMaxImportRow(!utl::ConfigManager::IsFuzzing() ? rDoc.MaxRow() : SCROWS32K), + cSep( '\t' ), cStr( '"' ), + bFormulas( false ), bIncludeFiltered( true ), + bAll( true ), bSingle( true ), bUndo( false ), + bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ), + mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ), mbIncludeBOM(false) +{ + pUndoDoc = nullptr; + pExtOptions = nullptr; +} + +// Insert am current cell without range(es) +ScImportExport::ScImportExport( ScDocument& r, const ScAddress& rPt ) + : pDocSh( r.GetDocumentShell() ), rDoc( r ), + aRange( rPt ), + nSizeLimit( 0 ), nMaxImportRow(!utl::ConfigManager::IsFuzzing() ? rDoc.MaxRow() : SCROWS32K), + cSep( '\t' ), cStr( '"' ), + bFormulas( false ), bIncludeFiltered( true ), + bAll( false ), bSingle( true ), bUndo( pDocSh != nullptr ), + bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ), + mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ), mbIncludeBOM(false) +{ + pUndoDoc = nullptr; + pExtOptions = nullptr; +} + +// ctor with a range is only used for export +//! ctor with a string (and bSingle=true) is also used for DdeSetData +ScImportExport::ScImportExport( ScDocument& r, const ScRange& rRange ) + : pDocSh( r.GetDocumentShell() ), rDoc( r ), + aRange( rRange ), + nSizeLimit( 0 ), nMaxImportRow(!utl::ConfigManager::IsFuzzing() ? rDoc.MaxRow() : SCROWS32K), + cSep( '\t' ), cStr( '"' ), + bFormulas( false ), bIncludeFiltered( true ), + bAll( false ), bSingle( false ), bUndo( pDocSh != nullptr ), + bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ), + mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ), mbIncludeBOM(false) +{ + pUndoDoc = nullptr; + pExtOptions = nullptr; + // Only one sheet (table) supported + aRange.aEnd.SetTab( aRange.aStart.Tab() ); +} + +// Evaluate input string - either range, cell or the whole document (when error) +// If a View exists, the TabNo of the view will be used. +ScImportExport::ScImportExport( ScDocument& r, const OUString& rPos ) + : pDocSh( r.GetDocumentShell() ), rDoc( r ), + nSizeLimit( 0 ), nMaxImportRow(!utl::ConfigManager::IsFuzzing() ? rDoc.MaxRow() : SCROWS32K), + cSep( '\t' ), cStr( '"' ), + bFormulas( false ), bIncludeFiltered( true ), + bAll( false ), bSingle( true ), bUndo( pDocSh != nullptr ), + bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ), + mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ), mbIncludeBOM(false) +{ + pUndoDoc = nullptr; + pExtOptions = nullptr; + + SCTAB nTab = ScDocShell::GetCurTab(); + aRange.aStart.SetTab( nTab ); + OUString aPos( rPos ); + // Named range? + ScRangeName* pRange = rDoc.GetRangeName(); + if (pRange) + { + const ScRangeData* pData = pRange->findByUpperName(ScGlobal::getCharClass().uppercase(aPos)); + if (pData) + { + if( pData->HasType( ScRangeData::Type::RefArea ) + || pData->HasType( ScRangeData::Type::AbsArea ) + || pData->HasType( ScRangeData::Type::AbsPos ) ) + { + aPos = pData->GetSymbol(); + } + } + } + formula::FormulaGrammar::AddressConvention eConv = rDoc.GetAddressConvention(); + // Range? + if (aRange.Parse(aPos, rDoc, eConv) & ScRefFlags::VALID) + bSingle = false; + // Cell? + else if (aRange.aStart.Parse(aPos, rDoc, eConv) & ScRefFlags::VALID) + aRange.aEnd = aRange.aStart; + else + bAll = true; +} + +ScImportExport::~ScImportExport() COVERITY_NOEXCEPT_FALSE +{ + pUndoDoc.reset(); + pExtOptions.reset(); +} + +void ScImportExport::SetExtOptions( const ScAsciiOptions& rOpt ) +{ + if ( pExtOptions ) + *pExtOptions = rOpt; + else + pExtOptions.reset(new ScAsciiOptions( rOpt )); + + // "normal" Options + + cSep = ScAsciiOptions::GetWeightedFieldSep( rOpt.GetFieldSeps(), false); + cStr = rOpt.GetTextSep(); +} + +void ScImportExport::SetFilterOptions(const OUString& rFilterOptions) +{ + maFilterOptions = rFilterOptions; +} + +bool ScImportExport::IsFormatSupported( SotClipboardFormatId nFormat ) +{ + return nFormat == SotClipboardFormatId::STRING + || nFormat == SotClipboardFormatId::STRING_TSVC + || nFormat == SotClipboardFormatId::SYLK + || nFormat == SotClipboardFormatId::LINK + || nFormat == SotClipboardFormatId::HTML + || nFormat == SotClipboardFormatId::HTML_SIMPLE + || nFormat == SotClipboardFormatId::DIF; +} + +// Prepare for Undo +bool ScImportExport::StartPaste() +{ + if ( !bAll ) + { + ScEditableTester aTester( rDoc, aRange ); + if ( !aTester.IsEditable() ) + { + std::unique_ptr<weld::MessageDialog> xInfoBox(Application::CreateMessageDialog(ScDocShell::GetActiveDialogParent(), + VclMessageType::Info, VclButtonsType::Ok, + ScResId(aTester.GetMessageId()))); + xInfoBox->run(); + return false; + } + } + if( bUndo && pDocSh && rDoc.IsUndoEnabled()) + { + pUndoDoc.reset(new ScDocument( SCDOCMODE_UNDO )); + pUndoDoc->InitUndo( rDoc, aRange.aStart.Tab(), aRange.aEnd.Tab() ); + rDoc.CopyToDocument(aRange, InsertDeleteFlags::ALL | InsertDeleteFlags::NOCAPTIONS, false, *pUndoDoc); + } + return true; +} + +// Create Undo/Redo actions, Invalidate/Repaint +void ScImportExport::EndPaste(bool bAutoRowHeight) +{ + bool bHeight = bAutoRowHeight && pDocSh && pDocSh->AdjustRowHeight( + aRange.aStart.Row(), aRange.aEnd.Row(), aRange.aStart.Tab() ); + + if( pUndoDoc && rDoc.IsUndoEnabled() && pDocSh ) + { + ScDocumentUniquePtr pRedoDoc(new ScDocument( SCDOCMODE_UNDO )); + pRedoDoc->InitUndo( rDoc, aRange.aStart.Tab(), aRange.aEnd.Tab() ); + rDoc.CopyToDocument(aRange, InsertDeleteFlags::ALL | InsertDeleteFlags::NOCAPTIONS, false, *pRedoDoc); + ScMarkData aDestMark(pRedoDoc->GetSheetLimits()); + aDestMark.SetMarkArea(aRange); + pDocSh->GetUndoManager()->AddUndoAction( + std::make_unique<ScUndoPaste>(pDocSh, aRange, aDestMark, std::move(pUndoDoc), std::move(pRedoDoc), InsertDeleteFlags::ALL, nullptr)); + } + pUndoDoc.reset(); + if( pDocSh ) + { + if (!bHeight) + pDocSh->PostPaint( aRange, PaintPartFlags::Grid ); + pDocSh->SetDocumentModified(); + } + ScTabViewShell* pViewSh = ScTabViewShell::GetActiveViewShell(); + if ( pViewSh ) + pViewSh->UpdateInputHandler(); + +} + +bool ScImportExport::ExportData( std::u16string_view rMimeType, + css::uno::Any & rValue ) +{ + SvMemoryStream aStrm; + SotClipboardFormatId fmtId = SotExchange::GetFormatIdFromMimeType(rMimeType); + if (fmtId == SotClipboardFormatId::STRING) + aStrm.SetStreamCharSet(RTL_TEXTENCODING_UNICODE); + // mba: no BaseURL for data exchange + if (ExportStream(aStrm, OUString(), fmtId)) + { + if (fmtId == SotClipboardFormatId::STRING) + { + assert(aStrm.TellEnd() % sizeof(sal_Unicode) == 0); + rValue <<= OUString(static_cast<const sal_Unicode*>(aStrm.GetData()), + aStrm.TellEnd() / sizeof(sal_Unicode)); + } + else + { + aStrm.WriteUChar(0); + rValue <<= css::uno::Sequence<sal_Int8>(static_cast<sal_Int8 const*>(aStrm.GetData()), + aStrm.TellEnd()); + } + return true; + } + return false; +} + +bool ScImportExport::ImportString( const OUString& rText, SotClipboardFormatId nFmt ) +{ + switch ( nFmt ) + { + // formats supporting unicode + case SotClipboardFormatId::STRING : + case SotClipboardFormatId::STRING_TSVC : + { + ScImportStringStream aStrm( rText); + return ImportStream( aStrm, OUString(), nFmt ); + // ImportStream must handle RTL_TEXTENCODING_UNICODE + } + default: + { + rtl_TextEncoding eEnc = osl_getThreadTextEncoding(); + OString aTmp( rText.getStr(), rText.getLength(), eEnc ); + SvMemoryStream aStrm( const_cast<char *>(aTmp.getStr()), aTmp.getLength() * sizeof(char), StreamMode::READ ); + aStrm.SetStreamCharSet( eEnc ); + SetNoEndianSwap( aStrm ); //! no swapping in memory + return ImportStream( aStrm, OUString(), nFmt ); + } + } +} + +bool ScImportExport::ExportString( OUString& rText, SotClipboardFormatId nFmt ) +{ + if ( nFmt != SotClipboardFormatId::STRING && nFmt != SotClipboardFormatId::STRING_TSVC ) + { + SAL_WARN("sc.ui", "ScImportExport::ExportString: Unicode not supported for other formats than SotClipboardFormatId::STRING[_TSV]"); + rtl_TextEncoding eEnc = osl_getThreadTextEncoding(); + OString aTmp; + bool bOk = ExportByteString( aTmp, eEnc, nFmt ); + rText = OStringToOUString( aTmp, eEnc ); + return bOk; + } + // nSizeLimit not needed for OUString + + SvMemoryStream aStrm; + aStrm.SetStreamCharSet( RTL_TEXTENCODING_UNICODE ); + SetNoEndianSwap( aStrm ); //! no swapping in memory + // mba: no BaseURL for data exc + if( ExportStream( aStrm, OUString(), nFmt ) ) + { + aStrm.WriteUInt16( 0 ); + rText = OUString( static_cast<const sal_Unicode*>(aStrm.GetData()) ); + return true; + } + rText.clear(); + return false; + + // ExportStream must handle RTL_TEXTENCODING_UNICODE +} + +bool ScImportExport::ExportByteString( OString& rText, rtl_TextEncoding eEnc, SotClipboardFormatId nFmt ) +{ + OSL_ENSURE( eEnc != RTL_TEXTENCODING_UNICODE, "ScImportExport::ExportByteString: Unicode not supported" ); + if ( eEnc == RTL_TEXTENCODING_UNICODE ) + eEnc = osl_getThreadTextEncoding(); + + if (!nSizeLimit) + nSizeLimit = SAL_MAX_UINT16; + + SvMemoryStream aStrm; + aStrm.SetStreamCharSet( eEnc ); + SetNoEndianSwap( aStrm ); //! no swapping in memory + // mba: no BaseURL for data exchange + if( ExportStream( aStrm, OUString(), nFmt ) ) + { + aStrm.WriteChar( 0 ); + if( aStrm.TellEnd() <= nSizeLimit ) + { + rText = static_cast<const char*>(aStrm.GetData()); + return true; + } + } + rText.clear(); + return false; +} + +bool ScImportExport::ImportStream( SvStream& rStrm, const OUString& rBaseURL, SotClipboardFormatId nFmt ) +{ + if( nFmt == SotClipboardFormatId::STRING || nFmt == SotClipboardFormatId::STRING_TSVC ) + { + if( ExtText2Doc( rStrm ) ) // evaluate pExtOptions + return true; + } + if( nFmt == SotClipboardFormatId::SYLK ) + { + if( Sylk2Doc( rStrm ) ) + return true; + } + if( nFmt == SotClipboardFormatId::DIF ) + { + if( Dif2Doc( rStrm ) ) + return true; + } + if( nFmt == SotClipboardFormatId::RTF || nFmt == SotClipboardFormatId::RICHTEXT ) + { + if( RTF2Doc( rStrm, rBaseURL ) ) + return true; + } + if( nFmt == SotClipboardFormatId::LINK ) + return true; // Link-Import? + if ( nFmt == SotClipboardFormatId::HTML ) + { + if( HTML2Doc( rStrm, rBaseURL ) ) + return true; + } + if ( nFmt == SotClipboardFormatId::HTML_SIMPLE ) + { + MSE40HTMLClipFormatObj aMSE40ClpObj; // needed to skip the header data + SvStream* pHTML = aMSE40ClpObj.IsValid( rStrm ); + if ( pHTML && HTML2Doc( *pHTML, rBaseURL ) ) + return true; + } + + return false; +} + +bool ScImportExport::ExportStream( SvStream& rStrm, const OUString& rBaseURL, SotClipboardFormatId nFmt ) +{ + if( nFmt == SotClipboardFormatId::STRING || nFmt == SotClipboardFormatId::STRING_TSVC ) + { + if( Doc2Text( rStrm ) ) + return true; + } + if( nFmt == SotClipboardFormatId::SYLK ) + { + if( Doc2Sylk( rStrm ) ) + return true; + } + if( nFmt == SotClipboardFormatId::DIF ) + { + if( Doc2Dif( rStrm ) ) + return true; + } + if( nFmt == SotClipboardFormatId::LINK && !bAll ) + { + OUString aDocName; + if ( rDoc.IsClipboard() ) + aDocName = ScGlobal::GetClipDocName(); + else + { + ScDocShell* pShell = rDoc.GetDocumentShell(); + if (pShell) + aDocName = pShell->GetTitle( SFX_TITLE_FULLNAME ); + } + + OSL_ENSURE( !aDocName.isEmpty(), "ClipBoard document has no name! :-/" ); + if( !aDocName.isEmpty() ) + { + // Always use Calc A1 syntax for paste link. + OUString aRefName; + ScRefFlags nFlags = ScRefFlags::VALID | ScRefFlags::TAB_3D; + if( bSingle ) + aRefName = aRange.aStart.Format(nFlags, &rDoc, formula::FormulaGrammar::CONV_OOO); + else + { + if( aRange.aStart.Tab() != aRange.aEnd.Tab() ) + nFlags |= ScRefFlags::TAB2_3D; + aRefName = aRange.Format(rDoc, nFlags, formula::FormulaGrammar::CONV_OOO); + } + OUString aAppName = Application::GetAppName(); + + // extra bits are used to tell the client to prefer external + // reference link. + + WriteUnicodeOrByteString( rStrm, aAppName, true ); + WriteUnicodeOrByteString( rStrm, aDocName, true ); + WriteUnicodeOrByteString( rStrm, aRefName, true ); + WriteUnicodeOrByteString( rStrm, u"calc:extref", true ); + if ( rStrm.GetStreamCharSet() == RTL_TEXTENCODING_UNICODE ) + rStrm.WriteUInt16( 0 ); + else + rStrm.WriteChar( 0 ); + return rStrm.GetError() == ERRCODE_NONE; + } + } + if( nFmt == SotClipboardFormatId::HTML ) + { + if( Doc2HTML( rStrm, rBaseURL ) ) + return true; + } + if( nFmt == SotClipboardFormatId::RTF || nFmt == SotClipboardFormatId::RICHTEXT ) + { + if( Doc2RTF( rStrm ) ) + return true; + } + + return false; +} + +void ScImportExport::WriteUnicodeOrByteString( SvStream& rStrm, std::u16string_view rString, bool bZero ) +{ + rtl_TextEncoding eEnc = rStrm.GetStreamCharSet(); + if ( eEnc == RTL_TEXTENCODING_UNICODE ) + { + if ( !lcl_IsEndianSwap( rStrm ) ) + rStrm.WriteBytes(rString.data(), rString.size() * sizeof(sal_Unicode)); + else + { + const sal_Unicode* p = rString.data(); + const sal_Unicode* const pStop = p + rString.size(); + while ( p < pStop ) + { + rStrm.WriteUInt16( *p ); + } + } + if ( bZero ) + rStrm.WriteUInt16( 0 ); + } + else + { + OString aByteStr(OUStringToOString(rString, eEnc)); + rStrm.WriteOString( aByteStr ); + if ( bZero ) + rStrm.WriteChar( 0 ); + } +} + +// This function could be replaced by endlub() +void ScImportExport::WriteUnicodeOrByteEndl( SvStream& rStrm ) +{ + if ( rStrm.GetStreamCharSet() == RTL_TEXTENCODING_UNICODE ) + { // same as endl() but unicode + switch ( rStrm.GetLineDelimiter() ) + { + case LINEEND_CR : + rStrm.WriteUInt16( '\r' ); + break; + case LINEEND_LF : + rStrm.WriteUInt16( '\n' ); + break; + default: + rStrm.WriteUInt16( '\r' ).WriteUInt16( '\n' ); + } + } + else + endl( rStrm ); +} + +// tdf#104927 +// http://www.unicode.org/reports/tr11/ +sal_Int32 ScImportExport::CountVisualWidth(const OUString& rStr, sal_Int32& nIdx, sal_Int32 nMaxWidth) +{ + sal_Int32 nWidth = 0; + while(nIdx < rStr.getLength() && nWidth < nMaxWidth) + { + sal_uInt32 nCode = rStr.iterateCodePoints(&nIdx); + + auto nEaWidth = u_getIntPropertyValue(nCode, UCHAR_EAST_ASIAN_WIDTH); + if (nEaWidth == U_EA_FULLWIDTH || nEaWidth == U_EA_WIDE) + nWidth += 2; + else if (!u_getIntPropertyValue(nCode, UCHAR_DEFAULT_IGNORABLE_CODE_POINT)) + nWidth += 1; + } + + if (nIdx < rStr.getLength()) + { + sal_Int32 nTmpIdx = nIdx; + sal_uInt32 nCode = rStr.iterateCodePoints(&nTmpIdx); + + if (u_getIntPropertyValue(nCode, UCHAR_DEFAULT_IGNORABLE_CODE_POINT)) + nIdx = nTmpIdx; + } + return nWidth; +} + +sal_Int32 ScImportExport::CountVisualWidth(const OUString& rStr) +{ + sal_Int32 nIdx = 0; + return CountVisualWidth(rStr, nIdx, SAL_MAX_INT32); +} + +void ScImportExport::SetNoEndianSwap( SvStream& rStrm ) +{ +#ifdef OSL_BIGENDIAN + rStrm.SetEndian( SvStreamEndian::BIG ); +#else + rStrm.SetEndian( SvStreamEndian::LITTLE ); +#endif +} + +static inline bool lcl_isFieldEnd( sal_Unicode c, const sal_Unicode* pSeps ) +{ + return !c || ScGlobal::UnicodeStrChr( pSeps, c); +} + +namespace { + +enum QuoteType +{ + FIELDSTART_QUOTE, + FIRST_QUOTE, + SECOND_QUOTE, + FIELDEND_QUOTE, + DONTKNOW_QUOTE +}; + +} + +/** Determine if *p is a quote that ends a quoted field. + + Precondition: we are parsing a quoted field already and *p is a quote. + + @return + FIELDEND_QUOTE if end of field quote + DONTKNOW_QUOTE anything else + */ +static QuoteType lcl_isFieldEndQuote( const sal_Unicode* p, const sal_Unicode* pSeps, sal_Unicode& rcDetectSep ) +{ + // Due to broken CSV generators that don't double embedded quotes check if + // a field separator immediately or with trailing spaces follows the quote, + // only then end the field, or at end of string. + constexpr sal_Unicode cBlank = ' '; + if (p[1] == cBlank && ScGlobal::UnicodeStrChr( pSeps, cBlank)) + return FIELDEND_QUOTE; + // Detect a possible blank separator if it's not already in the list (which + // was checked right above for p[1]==cBlank). + const bool bBlankSep = (p[1] == cBlank && !rcDetectSep && p[2] && p[2] != cBlank); + while (p[1] == cBlank) + ++p; + if (lcl_isFieldEnd( p[1], pSeps)) + return FIELDEND_QUOTE; + // Extended separator detection after a closing quote (with or without + // blanks). Note that nQuotes is incremented *after* the call so is not yet + // even here, and that with separator detection we reach here only if + // lcl_isEscapedOrFieldEndQuote() did not already detect FIRST_QUOTE or + // SECOND_QUOTE for an escaped embedded quote, thus nQuotes does not have + // to be checked. + if (!rcDetectSep) + { + constexpr sal_Unicode vSep[] = { ',', '\t', ';' }; + for (const sal_Unicode c : vSep) + { + if (p[1] == c) + { + rcDetectSep = c; + return FIELDEND_QUOTE; + } + } + } + // Blank separator is least significant, after others. + if (bBlankSep) + { + rcDetectSep = cBlank; + return FIELDEND_QUOTE; + } + return DONTKNOW_QUOTE; +} + +/** Determine if *p is a quote that is escaped by being doubled or ends a + quoted field. + + Precondition: *p is a quote. + + @param nQuotes + Quote characters encountered so far. + Odd (after opening quote) means either no embedded quotes or only quote + pairs so far. + Even means either not in a quoted field or already one quote + encountered, the first of a pair. + + @return + FIELDSTART_QUOTE if first quote in a field, either starting content or + embedded so caller should check beforehand. + FIRST_QUOTE if first of a doubled quote + SECOND_QUOTE if second of a doubled quote + FIELDEND_QUOTE if end of field quote + DONTKNOW_QUOTE if an unescaped quote we don't consider as end of field, + do not increment nQuotes in caller then! + */ +static QuoteType lcl_isEscapedOrFieldEndQuote( sal_Int32 nQuotes, const sal_Unicode* p, + const sal_Unicode* pSeps, sal_Unicode cStr, sal_Unicode& rcDetectSep ) +{ + if ((nQuotes & 1) == 0) + { + if (p[-1] == cStr) + return SECOND_QUOTE; + else + { + SAL_WARN( "sc", "lcl_isEscapedOrFieldEndQuote: really want a FIELDSTART_QUOTE?"); + return FIELDSTART_QUOTE; + } + } + if (p[1] == cStr) + return FIRST_QUOTE; + return lcl_isFieldEndQuote( p, pSeps, rcDetectSep); +} + +/** Append characters of [p1,p2) to rField. + + @returns TRUE if ok; FALSE if data overflow, truncated + */ +static bool lcl_appendLineData( OUString& rField, const sal_Unicode* p1, const sal_Unicode* p2 ) +{ + if (rField.getLength() + (p2 - p1) <= nArbitraryCellLengthLimit) + { + rField += std::u16string_view( p1, p2 - p1 ); + return true; + } + else + { + SAL_WARN( "sc", "lcl_appendLineData: data overflow"); + rField += std::u16string_view( p1, nArbitraryCellLengthLimit - rField.getLength() ); + return false; + } +} + +namespace { + +enum class DoubledQuoteMode +{ + KEEP_ALL, // both are taken, additionally start and end quote are included in string + ESCAPE, // escaped quote, one is taken, one ignored +}; + +} + +/** Scan for a quoted string. + + Precondition: initial current position *p is a cStr quote. + + For DoubledQuoteMode::ESCAPE, if after the closing quote there is a field + end (with or without trailing blanks and as determined by + lcl_isFieldEndQuote()), then the content is appended to rField with quotes + processed and removed. Else if no field end after the quoted string was + detected, nothing is appended and processing continues and is repeated + until the next quote. If no closing quote at a field end was found at all, + nothing is appended and the initial position is returned and caller has to + decide, usually just taking all as literal data. + + For DoubledQuoteMode::KEEP_ALL, the string up to and including the closing + quote is appended to rField and the next position returned, regardless + whether there is a field separator following or not. + + */ +static const sal_Unicode* lcl_ScanString( const sal_Unicode* p, OUString& rField, + const sal_Unicode* pSeps, sal_Unicode cStr, DoubledQuoteMode eMode, bool& rbOverflowCell ) +{ + OUString aString; + bool bClosingQuote = (eMode == DoubledQuoteMode::KEEP_ALL); + const sal_Unicode* const pStart = p; + if (eMode != DoubledQuoteMode::KEEP_ALL) + p++; //! jump over opening quote + bool bCont; + do + { + bCont = false; + const sal_Unicode* p0 = p; + for( ;; ) + { + if (!*p) + { + // Encountering end of data after an opening quote is not a + // quoted string, ReadCsvLine() concatenated lines with '\n' + // for a properly quoted embedded linefeed. + if (eMode == DoubledQuoteMode::KEEP_ALL) + // Caller would append that data anyway, so we can do it + // already here. + break; + + return pStart; + } + + if( *p == cStr ) + { + if ( *++p != cStr ) + { + // break or continue for loop + if (eMode == DoubledQuoteMode::ESCAPE) + { + sal_Unicode cDetectSep = 0xffff; // No separator detection here. + if (lcl_isFieldEndQuote( p-1, pSeps, cDetectSep) == FIELDEND_QUOTE) + { + bClosingQuote = true; + break; + } + else + continue; + } + else + break; + } + // doubled quote char + switch ( eMode ) + { + case DoubledQuoteMode::KEEP_ALL : + p++; // both for us (not breaking for-loop) + break; + case DoubledQuoteMode::ESCAPE : + p++; // one for us (breaking for-loop) + bCont = true; // and more + break; + } + if ( eMode == DoubledQuoteMode::ESCAPE ) + break; + } + else + p++; + } + if ( p0 < p ) + { + if (!lcl_appendLineData( aString, p0, ((eMode != DoubledQuoteMode::KEEP_ALL && (*p || *(p-1) == cStr)) ? p-1 : p))) + rbOverflowCell = true; + } + } while ( bCont ); + + if (!bClosingQuote) + return pStart; + + if (!aString.isEmpty()) + rField += aString; + + return p; +} + +static void lcl_UnescapeSylk( OUString & rString, SylkVersion eVersion ) +{ + // Older versions didn't escape the semicolon. + // Older versions quoted the string and doubled embedded quotes, but not + // the semicolons, which was plain wrong. + if (eVersion >= SylkVersion::OOO32) + rString = rString.replaceAll(";;", ";"); + else + rString = rString.replaceAll("\"\"", "\""); + + rString = rString.replaceAll(SYLK_LF, "\n"); +} + +static const sal_Unicode* lcl_ScanSylkString( const sal_Unicode* p, + OUString& rString, SylkVersion eVersion ) +{ + const sal_Unicode* pStartQuote = p; + const sal_Unicode* pEndQuote = nullptr; + while( *(++p) ) + { + if( *p == '"' ) + { + pEndQuote = p; + if (eVersion >= SylkVersion::OOO32) + { + if (*(p+1) == ';') + { + if (*(p+2) == ';') + { + p += 2; // escaped ';' + pEndQuote = nullptr; + } + else + break; // end field + } + } + else + { + if (*(p+1) == '"') + { + ++p; // escaped '"' + pEndQuote = nullptr; + } + else if (*(p+1) == ';') + break; // end field + } + } + } + if (!pEndQuote) + pEndQuote = p; // Take all data as string. + rString += std::u16string_view(pStartQuote + 1, pEndQuote - pStartQuote - 1 ); + lcl_UnescapeSylk( rString, eVersion); + return p; +} + +static const sal_Unicode* lcl_ScanSylkFormula( const sal_Unicode* p, + OUString& rString, SylkVersion eVersion ) +{ + const sal_Unicode* pStart = p; + if (eVersion >= SylkVersion::OOO32) + { + while (*p) + { + if (*p == ';') + { + if (*(p+1) == ';') + ++p; // escaped ';' + else + break; // end field + } + ++p; + } + rString += std::u16string_view( pStart, p - pStart); + lcl_UnescapeSylk( rString, eVersion); + } + else + { + // Nasty. If in old versions the formula contained a semicolon, it was + // quoted and embedded quotes were doubled, but semicolons were not. If + // there was no semicolon, it could still contain quotes and doubled + // embedded quotes if it was something like ="a""b", which was saved as + // E"a""b" as is and has to be preserved, even if older versions + // couldn't even load it correctly. However, theoretically another + // field might follow and thus the line contain a semicolon again, such + // as ...;E"a""b";... + bool bQuoted = false; + if (*p == '"') + { + // May be a quoted expression or just a string constant expression + // with quotes. + while (*(++p)) + { + if (*p == '"') + { + if (*(p+1) == '"') + ++p; // escaped '"' + else + break; // closing '"', had no ';' yet + } + else if (*p == ';') + { + bQuoted = true; // ';' within quoted expression + break; + } + } + p = pStart; + } + if (bQuoted) + p = lcl_ScanSylkString( p, rString, eVersion); + else + { + while (*p && *p != ';') + ++p; + rString += std::u16string_view( pStart, p - pStart); + } + } + return p; +} + +static void lcl_WriteString( SvStream& rStrm, OUString& rString, sal_Unicode cQuote, sal_Unicode cEsc ) +{ + if (cEsc) + { + // the goal is to replace cStr by cStr+cStr + OUString strFrom(cEsc); + OUString strTo = strFrom + strFrom; + rString = rString.replaceAll(strFrom, strTo); + } + + if (cQuote) + { + rString = OUStringChar(cQuote) + rString + OUStringChar(cQuote); + } + + ScImportExport::WriteUnicodeOrByteString( rStrm, rString ); +} + +static void lcl_WriteSimpleString( SvStream& rStrm, std::u16string_view rString ) +{ + ScImportExport::WriteUnicodeOrByteString( rStrm, rString ); +} + +bool ScImportExport::Text2Doc( SvStream& rStrm ) +{ + bool bOk = true; + + sal_Unicode pSeps[2]; + pSeps[0] = cSep; + pSeps[1] = 0; + + ScSetStringParam aSetStringParam; + aSetStringParam.mbCheckLinkFormula = true; + + SCCOL nStartCol = aRange.aStart.Col(); + SCROW nStartRow = aRange.aStart.Row(); + SCCOL nEndCol = aRange.aEnd.Col(); + SCROW nEndRow = aRange.aEnd.Row(); + sal_uInt64 nOldPos = rStrm.Tell(); + rStrm.StartReadingUnicodeText( rStrm.GetStreamCharSet() ); + bool bData = !bSingle; + if( !bSingle) + bOk = StartPaste(); + + while( bOk ) + { + OUString aLine; + OUString aCell; + SCROW nRow = nStartRow; + rStrm.Seek( nOldPos ); + for( ;; ) + { + rStrm.ReadUniOrByteStringLine( aLine, rStrm.GetStreamCharSet(), nArbitraryLineLengthLimit ); + // tdf#125440 When inserting tab separated string, consider quotes as field markers + DoubledQuoteMode mode = aLine.indexOf("\t") >= 0 ? DoubledQuoteMode::ESCAPE : DoubledQuoteMode::KEEP_ALL; + if( rStrm.eof() ) + break; + SCCOL nCol = nStartCol; + const sal_Unicode* p = aLine.getStr(); + while( *p ) + { + aCell.clear(); + const sal_Unicode* q = p; + if (*p == cStr) + { + // Look for a pairing quote. + q = p = lcl_ScanString( p, aCell, pSeps, cStr, mode, bOverflowCell ); + } + // All until next separator. + while (*p && *p != cSep) + ++p; + if (!lcl_appendLineData( aCell, q, p)) + bOverflowCell = true; // display warning on import + if (*p) + ++p; + if (rDoc.ValidCol(nCol) && rDoc.ValidRow(nRow) ) + { + if( bSingle ) + { + if (nCol>nEndCol) nEndCol = nCol; + if (nRow>nEndRow) nEndRow = nRow; + } + if( bData && nCol <= nEndCol && nRow <= nEndRow ) + rDoc.SetString( nCol, nRow, aRange.aStart.Tab(), aCell, &aSetStringParam ); + } + else // too many columns/rows + { + if (!rDoc.ValidRow(nRow)) + bOverflowRow = true; // display warning on import + if (!rDoc.ValidCol(nCol)) + bOverflowCol = true; // display warning on import + } + ++nCol; + } + ++nRow; + } + + if( !bData ) + { + aRange.aEnd.SetCol( nEndCol ); + aRange.aEnd.SetRow( nEndRow ); + bOk = StartPaste(); + bData = true; + } + else + break; + } + + EndPaste(); + if (bOk && mbImportBroadcast) + { + rDoc.BroadcastCells(aRange, SfxHintId::ScDataChanged); + pDocSh->PostDataChanged(); + } + + return bOk; +} + +// Extended Ascii-Import + +static bool lcl_PutString( + ScDocumentImport& rDocImport, bool bUseDocImport, + SCCOL nCol, SCROW nRow, SCTAB nTab, const OUString& rStr, sal_uInt8 nColFormat, + SvNumberFormatter* pFormatter, bool bDetectNumFormat, bool bDetectSciNumFormat, bool bEvaluateFormulas, bool bSkipEmptyCells, + const ::utl::TransliterationWrapper& rTransliteration, CalendarWrapper& rCalendar, + const ::utl::TransliterationWrapper* pSecondTransliteration, CalendarWrapper* pSecondCalendar ) +{ + ScDocument& rDoc = rDocImport.getDoc(); + bool bMultiLine = false; + if ( nColFormat == SC_COL_SKIP || !rDoc.ValidCol(nCol) || !rDoc.ValidRow(nRow) ) + return bMultiLine; + if ( rStr.isEmpty() ) + { + if ( !bSkipEmptyCells ) + { // delete destination cell + if ( bUseDocImport ) + rDocImport.setAutoInput(ScAddress(nCol, nRow, nTab), rStr ); + else + rDoc.SetString( nCol, nRow, nTab, rStr ); + } + return false; + } + + const bool bForceFormulaText = (!bEvaluateFormulas && rStr[0] == '='); + if (nColFormat == SC_COL_TEXT || bForceFormulaText) + { + if ( bUseDocImport ) + { + double fDummy; + sal_uInt32 nIndex = 0; + if (bForceFormulaText || rDoc.GetFormatTable()->IsNumberFormat(rStr, nIndex, fDummy)) + { + // Set the format of this cell to Text. + // This is only necessary for ScDocumentImport, + // ScDocument::SetTextCell() forces it by ScSetStringParam. + sal_uInt32 nFormat = rDoc.GetFormatTable()->GetStandardFormat(SvNumFormatType::TEXT); + ScPatternAttr aNewAttrs(rDoc.GetPool()); + SfxItemSet& rSet = aNewAttrs.GetItemSet(); + rSet.Put( SfxUInt32Item(ATTR_VALUE_FORMAT, nFormat) ); + rDoc.ApplyPattern(nCol, nRow, nTab, aNewAttrs); + } + if (ScStringUtil::isMultiline(rStr)) + { + ScFieldEditEngine& rEngine = rDoc.GetEditEngine(); + rEngine.SetTextCurrentDefaults(rStr); + rDocImport.setEditCell(ScAddress(nCol, nRow, nTab), rEngine.CreateTextObject()); + return true; + } + else + { + rDocImport.setStringCell(ScAddress(nCol, nRow, nTab), rStr); + return false; + } + } + else + { + rDoc.SetTextCell(ScAddress(nCol, nRow, nTab), rStr); + return bMultiLine; + } + } + + if ( nColFormat == SC_COL_ENGLISH ) + { + //! SetString with Extra-Flag ??? + + SvNumberFormatter* pDocFormatter = rDoc.GetFormatTable(); + sal_uInt32 nEnglish = pDocFormatter->GetStandardIndex(LANGUAGE_ENGLISH_US); + double fVal; + if ( pDocFormatter->IsNumberFormat( rStr, nEnglish, fVal ) ) + { + // Numberformat will not be set to English + if ( bUseDocImport ) + rDocImport.setNumericCell( ScAddress( nCol, nRow, nTab ), fVal ); + else + rDoc.SetValue( nCol, nRow, nTab, fVal ); + return bMultiLine; + } + // else, continue with SetString + } + else if ( nColFormat != SC_COL_STANDARD ) // Datumformats + { + const sal_uInt16 nMaxNumberParts = 7; // Y-M-D h:m:s.t + const sal_Int32 nLen = rStr.getLength(); + sal_Int32 nStart[nMaxNumberParts]; + sal_Int32 nEnd[nMaxNumberParts]; + + bool bIso; + sal_uInt16 nDP, nMP, nYP; + switch ( nColFormat ) + { + case SC_COL_YMD: nDP = 2; nMP = 1; nYP = 0; bIso = true; break; + case SC_COL_MDY: nDP = 1; nMP = 0; nYP = 2; bIso = false; break; + case SC_COL_DMY: + default: nDP = 0; nMP = 1; nYP = 2; bIso = false; break; + } + + sal_uInt16 nFound = 0; + bool bInNum = false; + for (sal_Int32 nPos = 0; nPos < nLen && (bInNum || nFound < nMaxNumberParts); ++nPos) + { + bool bLetter = false; + if (rtl::isAsciiDigit(rStr[nPos]) || + (((!bInNum && nFound==nMP) || (bInNum && nFound==nMP+1)) + && (bLetter = ScGlobal::getCharClass().isLetterNumeric( rStr, nPos)))) + { + if (!bInNum) + { + bInNum = true; + nStart[nFound] = nPos; + ++nFound; + } + nEnd[nFound-1] = nPos; + if (bIso && (bLetter || (2 <= nFound && nFound <= 6 && nPos > nStart[nFound-1] + 1))) + // Each M,D,h,m,s at most 2 digits. + bIso = false; + } + else + { + bInNum = false; + if (bIso) + { + // ([+-])YYYY-MM-DD([T ]hh:mm(:ss(.fff)))(([+-])TZ) + // XXX NOTE: timezone is accepted here, but number + // formatter parser will not, so the end result will be + // type Text to preserve timezone information. + switch (rStr[nPos]) + { + case '+': + if (nFound >= 5 && nPos == nEnd[nFound-1] + 1) + // Accept timezone offset. + ; + else if (nPos > 0) + // Accept one leading sign. + bIso = false; + break; + case '-': + if (nFound >= 5 && nPos == nEnd[nFound-1] + 1) + // Accept timezone offset. + ; + else if (nFound == 0 && nPos > 0) + // Accept one leading sign. + bIso = false; + else if (nFound < 1 || 2 < nFound || nPos != nEnd[nFound-1] + 1) + // Not immediately after 1 or 1-2 + bIso = false; + break; + case 'T': + case ' ': + if (nFound != 3 || nPos != nEnd[nFound-1] + 1) + // Not immediately after 1-2-3 + bIso = false; + break; + case ':': + if (nFound < 4 || 5 < nFound || nPos != nEnd[nFound-1] + 1) + // Not at 1-2-3T4:5: + bIso = false; + break; + case '.': + case ',': + if (nFound != 6 || nPos != nEnd[nFound-1] + 1) + // Not at 1-2-3T4:5:6. + bIso = false; + break; + case 'Z': + if (nFound >= 5 && nPos == nEnd[nFound-1] + 1) + // Accept Zero timezone. + ; + else + bIso = false; + break; + default: + bIso = false; + } + } + } + } + + if (nFound < 3) + bIso = false; + + if (bIso) + { + // Leave conversion and detection of various possible number + // formats to the number formatter. ISO is recognized in any locale + // so we can directly use the document's formatter. + sal_uInt32 nFormat = 0; + double fVal = 0.0; + SvNumberFormatter* pDocFormatter = rDoc.GetFormatTable(); + if (pDocFormatter->IsNumberFormat( rStr, nFormat, fVal)) + { + if (pDocFormatter->GetType(nFormat) & SvNumFormatType::DATE) + { + ScAddress aPos(nCol,nRow,nTab); + if (bUseDocImport) + rDocImport.setNumericCell(aPos, fVal); + else + rDoc.SetValue(aPos, fVal); + rDoc.SetNumberFormat(aPos, nFormat); + + return bMultiLine; // success + } + } + // If we reach here it is type Text (e.g. timezone or trailing + // characters). Handled below. + } + + if ( nFound == 1 ) + { + // try to break one number (without separators) into date fields + + sal_Int32 nDateStart = nStart[0]; + sal_Int32 nDateLen = nEnd[0] + 1 - nDateStart; + + if ( nDateLen >= 5 && nDateLen <= 8 && + ScGlobal::getCharClass().isNumeric( rStr.copy( nDateStart, nDateLen ) ) ) + { + // 6 digits: 2 each for day, month, year + // 8 digits: 4 for year, 2 each for day and month + // 5 or 7 digits: first field is shortened by 1 + + bool bLongYear = ( nDateLen >= 7 ); + bool bShortFirst = ( nDateLen == 5 || nDateLen == 7 ); + + sal_uInt16 nFieldStart = nDateStart; + for (sal_uInt16 nPos=0; nPos<3; nPos++) + { + sal_uInt16 nFieldEnd = nFieldStart + 1; // default: 2 digits + if ( bLongYear && nPos == nYP ) + nFieldEnd += 2; // 2 extra digits for long year + if ( bShortFirst && nPos == 0 ) + --nFieldEnd; // first field shortened? + + nStart[nPos] = nFieldStart; + nEnd[nPos] = nFieldEnd; + nFieldStart = nFieldEnd + 1; + } + nFound = 3; + } + } + + if (!bIso && nFound >= 3) + { + using namespace ::com::sun::star; + bool bSecondCal = false; + sal_uInt16 nDay = static_cast<sal_uInt16>(o3tl::toInt32(rStr.subView( nStart[nDP], nEnd[nDP]+1-nStart[nDP] ))); + sal_uInt16 nYear = static_cast<sal_uInt16>(o3tl::toInt32(rStr.subView( nStart[nYP], nEnd[nYP]+1-nStart[nYP] ))); + OUString aMStr = rStr.copy( nStart[nMP], nEnd[nMP]+1-nStart[nMP] ); + sal_Int16 nMonth = static_cast<sal_Int16>(aMStr.toInt32()); + if (!nMonth) + { + static constexpr OUString aSepShortened = u"SEP"_ustr; + uno::Sequence< i18n::CalendarItem2 > xMonths; + sal_Int32 i, nMonthCount; + // first test all month names from local international + xMonths = rCalendar.getMonths(); + nMonthCount = xMonths.getLength(); + for (i=0; i<nMonthCount && !nMonth; i++) + { + if ( rTransliteration.isEqual( aMStr, xMonths[i].FullName ) || + rTransliteration.isEqual( aMStr, xMonths[i].AbbrevName ) ) + nMonth = sal::static_int_cast<sal_Int16>( i+1 ); + else if ( i == 8 && rTransliteration.isEqual( "SEPT", + xMonths[i].AbbrevName ) && + rTransliteration.isEqual( aMStr, aSepShortened ) ) + { // correct English abbreviation is SEPT, + // but data mostly contains SEP only + nMonth = sal::static_int_cast<sal_Int16>( i+1 ); + } + } + // if none found, then test english month names + if ( !nMonth && pSecondCalendar && pSecondTransliteration ) + { + xMonths = pSecondCalendar->getMonths(); + nMonthCount = xMonths.getLength(); + for (i=0; i<nMonthCount && !nMonth; i++) + { + if ( pSecondTransliteration->isEqual( aMStr, xMonths[i].FullName ) || + pSecondTransliteration->isEqual( aMStr, xMonths[i].AbbrevName ) ) + { + nMonth = sal::static_int_cast<sal_Int16>( i+1 ); + bSecondCal = true; + } + else if ( i == 8 && pSecondTransliteration->isEqual( + aMStr, aSepShortened ) ) + { // correct English abbreviation is SEPT, + // but data mostly contains SEP only + nMonth = sal::static_int_cast<sal_Int16>( i+1 ); + bSecondCal = true; + } + } + } + } + + SvNumberFormatter* pDocFormatter = rDoc.GetFormatTable(); + if ( nYear < 100 ) + nYear = pDocFormatter->ExpandTwoDigitYear( nYear ); + + CalendarWrapper* pCalendar = (bSecondCal ? pSecondCalendar : &rCalendar); + sal_Int16 nNumMonths = pCalendar->getNumberOfMonthsInYear(); + if ( nDay && nMonth && nDay<=31 && nMonth<=nNumMonths ) + { + --nMonth; + pCalendar->setValue( i18n::CalendarFieldIndex::DAY_OF_MONTH, nDay ); + pCalendar->setValue( i18n::CalendarFieldIndex::MONTH, nMonth ); + pCalendar->setValue( i18n::CalendarFieldIndex::YEAR, nYear ); + sal_Int16 nHour, nMinute, nSecond; + // #i14974# The imported value should have no fractional value, so set the + // time fields to zero (ICU calendar instance defaults to current date/time) + nHour = nMinute = nSecond = 0; + if (nFound > 3) + nHour = static_cast<sal_Int16>(o3tl::toInt32(rStr.subView( nStart[3], nEnd[3]+1-nStart[3]))); + if (nFound > 4) + nMinute = static_cast<sal_Int16>(o3tl::toInt32(rStr.subView( nStart[4], nEnd[4]+1-nStart[4]))); + if (nFound > 5) + nSecond = static_cast<sal_Int16>(o3tl::toInt32(rStr.subView( nStart[5], nEnd[5]+1-nStart[5]))); + // do not use calendar's milliseconds, to avoid fractional part truncation + double fFrac = 0.0; + if (nFound > 6) + { + sal_Unicode cDec = '.'; + OUString aT = OUStringChar(cDec) + rStr.subView( nStart[6], nEnd[6]+1-nStart[6]); + rtl_math_ConversionStatus eStatus; + double fV = rtl::math::stringToDouble( aT, cDec, 0, &eStatus ); + if (eStatus == rtl_math_ConversionStatus_Ok) + fFrac = fV / 86400.0; + } + sal_Int32 nPos; + if (nFound > 3 && 1 <= nHour && nHour <= 12 // nHour 0 and >=13 can't be AM/PM + && (nPos = nEnd[nFound-1] + 1) < nLen) + { + // Dreaded AM/PM may be following. + while (nPos < nLen && rStr[nPos] == ' ') + ++nPos; + if (nPos < nLen) + { + sal_Int32 nStop = nPos; + while (nStop < nLen && rStr[nStop] != ' ') + ++nStop; + OUString aAmPm = rStr.copy( nPos, nStop - nPos); + // For AM only 12 needs to be treated, whereas for PM + // it must not. Check both, locale and second/English + // strings. + if (nHour == 12 && + (rTransliteration.isEqual( aAmPm, pFormatter->GetLocaleData()->getTimeAM()) || + (pSecondTransliteration && pSecondTransliteration->isEqual( aAmPm, "AM")))) + { + nHour = 0; + } + else if (nHour < 12 && + (rTransliteration.isEqual( aAmPm, pFormatter->GetLocaleData()->getTimePM()) || + (pSecondTransliteration && pSecondTransliteration->isEqual( aAmPm, "PM")))) + { + nHour += 12; + } + } + } + pCalendar->setValue( i18n::CalendarFieldIndex::HOUR, nHour ); + pCalendar->setValue( i18n::CalendarFieldIndex::MINUTE, nMinute ); + pCalendar->setValue( i18n::CalendarFieldIndex::SECOND, nSecond ); + pCalendar->setValue( i18n::CalendarFieldIndex::MILLISECOND, 0 ); + if ( pCalendar->isValid() ) + { + // Whole days diff. + double fDiff = DateTime::Sub( DateTime(pDocFormatter->GetNullDate()), + pCalendar->getEpochStart()); + // #i14974# must use getLocalDateTime to get the same + // date values as set above + double fDays = pCalendar->getLocalDateTime() + fFrac; + fDays -= fDiff; + + LanguageType eLatin, eCjk, eCtl; + rDoc.GetLanguage( eLatin, eCjk, eCtl ); + LanguageType eDocLang = eLatin; //! which language for date formats? + + SvNumFormatType nType = (nFound > 3 ? SvNumFormatType::DATETIME : SvNumFormatType::DATE); + sal_uLong nFormat = pDocFormatter->GetStandardFormat( nType, eDocLang ); + // maybe there is a special format including seconds or milliseconds + if (nFound > 5) + nFormat = pDocFormatter->GetStandardFormat( fDays, nFormat, nType, eDocLang); + + ScAddress aPos(nCol,nRow,nTab); + if ( bUseDocImport ) + rDocImport.setNumericCell(aPos, fDays); + else + rDoc.SetValue( aPos, fDays ); + rDoc.SetNumberFormat(aPos, nFormat); + + return bMultiLine; // success + } + } + } + } + + // Standard or date not determined -> SetString / EditCell + if( rStr.indexOf( '\n' ) == -1 ) + { + if (!bDetectNumFormat && nColFormat == SC_COL_STANDARD) + { + // Import a strict ISO 8601 date(+time) string even without + // "Detect special numbers" or "Date (YMD)". + do + { + // Simple pre-check before calling more expensive parser. + // ([+-])(Y)YYYY-MM-DD + if (rStr.getLength() < 10) + break; + const sal_Int32 n1 = rStr.indexOf('-', 1); + if (n1 < 4) + break; + const sal_Int32 n2 = rStr.indexOf('-', n1 + 1); + if (n2 < 7 || n1 + 3 < n2) + break; + + css::util::DateTime aDateTime; + if (!sax::Converter::parseDateTime( aDateTime, rStr)) + break; + + sal_uInt32 nFormat = 0; + double fVal = 0.0; + SvNumberFormatter* pDocFormatter = rDoc.GetFormatTable(); + if (pDocFormatter->IsNumberFormat( rStr, nFormat, fVal)) + { + if (pDocFormatter->GetType(nFormat) & SvNumFormatType::DATE) + { + ScAddress aPos(nCol,nRow,nTab); + if (bUseDocImport) + rDocImport.setNumericCell(aPos, fVal); + else + rDoc.SetValue(aPos, fVal); + rDoc.SetNumberFormat(aPos, nFormat); + + return bMultiLine; // success + } + } + } + while(false); + } + + ScSetStringParam aParam; + aParam.mpNumFormatter = pFormatter; + aParam.mbDetectNumberFormat = bDetectNumFormat; + aParam.mbDetectScientificNumberFormat = bDetectSciNumFormat; + aParam.meSetTextNumFormat = ScSetStringParam::SpecialNumberOnly; + aParam.mbHandleApostrophe = false; + aParam.mbCheckLinkFormula = true; + if ( bUseDocImport ) + rDocImport.setAutoInput(ScAddress(nCol, nRow, nTab), rStr, &aParam); + else + rDoc.SetString( nCol, nRow, nTab, rStr, &aParam ); + } + else + { + bMultiLine = true; + ScFieldEditEngine& rEngine = rDoc.GetEditEngine(); + rEngine.SetTextCurrentDefaults(rStr); + if ( bUseDocImport ) + rDocImport.setEditCell(ScAddress(nCol, nRow, nTab), rEngine.CreateTextObject()); + else + rDoc.SetEditText( ScAddress( nCol, nRow, nTab ), rEngine.CreateTextObject() ); + } + return bMultiLine; +} + +static OUString lcl_GetFixed( const OUString& rLine, sal_Int32 nStart, sal_Int32 nNext, + bool& rbIsQuoted, bool& rbOverflowCell ) +{ + sal_Int32 nLen = rLine.getLength(); + if (nNext > nLen) + nNext = nLen; + if ( nNext <= nStart ) + return OUString(); + + const sal_Unicode* pStr = rLine.getStr(); + + sal_Int32 nSpace = nNext; + while ( nSpace > nStart && pStr[nSpace-1] == ' ' ) + --nSpace; + + rbIsQuoted = (pStr[nStart] == '"' && pStr[nSpace-1] == '"'); + if (rbIsQuoted) + { + bool bFits = (nSpace - nStart - 3 <= nArbitraryCellLengthLimit); + if (bFits) + return rLine.copy(nStart+1, std::max< sal_Int32 >(0, nSpace-nStart-2)); + else + { + SAL_WARN( "sc", "lcl_GetFixed: line doesn't fit into data"); + rbOverflowCell = true; + return rLine.copy(nStart+1, nArbitraryCellLengthLimit); + } + } + else + { + bool bFits = (nSpace - nStart <= nArbitraryCellLengthLimit); + if (bFits) + return rLine.copy(nStart, nSpace-nStart); + else + { + SAL_WARN( "sc", "lcl_GetFixed: line doesn't fit into data"); + rbOverflowCell = true; + return rLine.copy(nStart, nArbitraryCellLengthLimit); + } + } +} + +bool ScImportExport::ExtText2Doc( SvStream& rStrm ) +{ + if (!pExtOptions) + return Text2Doc( rStrm ); + + sal_uInt64 const nOldPos = rStrm.Tell(); + sal_uInt64 const nRemaining = rStrm.remainingSize(); + std::unique_ptr<ScProgress> xProgress( new ScProgress( pDocSh, + ScResId( STR_LOAD_DOC ), nRemaining, true )); + rStrm.StartReadingUnicodeText( rStrm.GetStreamCharSet() ); + // tdf#82254 - check whether to include a byte-order-mark in the output + if (nOldPos != rStrm.Tell()) + mbIncludeBOM = true; + + SCCOL nStartCol = aRange.aStart.Col(); + SCCOL nEndCol = aRange.aEnd.Col(); + SCROW nStartRow = aRange.aStart.Row(); + const SCTAB nTab = aRange.aStart.Tab(); + + bool bFixed = pExtOptions->IsFixedLen(); + OUString aSeps = pExtOptions->GetFieldSeps(); // Need non-const for ReadCsvLine(), + const sal_Unicode* pSeps = aSeps.getStr(); // but it will be const anyway (asserted below). + bool bMerge = pExtOptions->IsMergeSeps(); + bool bRemoveSpace = pExtOptions->IsRemoveSpace(); + sal_uInt16 nInfoCount = pExtOptions->GetInfoCount(); + const sal_Int32* pColStart = pExtOptions->GetColStart(); + const sal_uInt8* pColFormat = pExtOptions->GetColFormat(); + tools::Long nSkipLines = pExtOptions->GetStartRow(); + + LanguageType eDocLang = pExtOptions->GetLanguage(); + SvNumberFormatter aNumFormatter( comphelper::getProcessComponentContext(), eDocLang); + bool bDetectNumFormat = pExtOptions->IsDetectSpecialNumber(); + bool bDetectSciNumFormat = pExtOptions->IsDetectScientificNumber(); + bool bEvaluateFormulas = pExtOptions->IsEvaluateFormulas(); + bool bSkipEmptyCells = pExtOptions->IsSkipEmptyCells(); + + // For date recognition + ::utl::TransliterationWrapper aTransliteration( + comphelper::getProcessComponentContext(), TransliterationFlags::IGNORE_CASE ); + aTransliteration.loadModuleIfNeeded( eDocLang ); + CalendarWrapper aCalendar( comphelper::getProcessComponentContext() ); + aCalendar.loadDefaultCalendar( + LanguageTag::convertToLocale( eDocLang ) ); + std::unique_ptr< ::utl::TransliterationWrapper > pEnglishTransliteration; + std::unique_ptr< CalendarWrapper > pEnglishCalendar; + if ( eDocLang != LANGUAGE_ENGLISH_US ) + { + pEnglishTransliteration.reset(new ::utl::TransliterationWrapper ( + comphelper::getProcessComponentContext(), TransliterationFlags::IGNORE_CASE )); + aTransliteration.loadModuleIfNeeded( LANGUAGE_ENGLISH_US ); + pEnglishCalendar.reset(new CalendarWrapper ( comphelper::getProcessComponentContext() )); + pEnglishCalendar->loadDefaultCalendar( + LanguageTag::convertToLocale( LANGUAGE_ENGLISH_US ) ); + } + + OUString aLine; + OUString aCell; + sal_uInt16 i; + SCROW nRow = nStartRow; + sal_Unicode cDetectSep = 0xffff; // No separator detection here. + + while(--nSkipLines>0) + { + aLine = ReadCsvLine(rStrm, !bFixed, aSeps, cStr, cDetectSep); // content is ignored + if ( rStrm.eof() ) + break; + } + + // Determine range for Undo. + // We don't need this during import of a file to a new sheet or document... + bool bDetermineRange = bUndo; + bool bColumnsAreDetermined = false; + + // Row heights don't need to be adjusted on the fly if EndPaste() is called + // afterwards, which happens only if bDetermineRange. This variable also + // survives the toggle of bDetermineRange down at the end of the do{} loop. + bool bRangeIsDetermined = bDetermineRange; + + bool bQuotedAsText = pExtOptions && pExtOptions->IsQuotedAsText(); + + sal_uInt64 nOriginalStreamPos = rStrm.Tell(); + + SCROW nFirstUpdateRowHeight = SCROW_MAX; + SCROW nLastUpdateRowHeight = -1; + + ScDocumentImport aDocImport(rDoc); + do + { + for( ;; ) + { + aLine = ReadCsvLine(rStrm, !bFixed, aSeps, cStr, cDetectSep); + if ( rStrm.eof() && aLine.isEmpty() ) + break; + + assert(pSeps == aSeps.getStr()); + + if ( nRow > rDoc.MaxRow() ) + { + bOverflowRow = true; // display warning on import + break; // for + } + + if (!bDetermineRange) + EmbeddedNullTreatment( aLine); + + sal_Int32 nLineLen = aLine.getLength(); + SCCOL nCol = nStartCol; + bool bMultiLine = false; + if ( bFixed ) // Fixed line length + { + if (bDetermineRange) + { + if (!bColumnsAreDetermined) + { + // Yes, the check is nCol<=rDoc.MaxCol()+1, +1 because it + // is only an overflow if there is really data following to + // be put behind the last column, which doesn't happen if + // info is SC_COL_SKIP. + for (i=0; i < nInfoCount && nCol <= rDoc.MaxCol()+1; ++i) + { + const sal_uInt8 nFmt = pColFormat[i]; + if (nFmt != SC_COL_SKIP) // otherwise don't increment nCol either + { + if (nCol > rDoc.MaxCol()) + bOverflowCol = true; // display warning on import + ++nCol; + } + } + bColumnsAreDetermined = true; + } + } + else + { + sal_Int32 nStartIdx = 0; + // Same maxcol+1 check reason as above. + for (i=0; i < nInfoCount && nCol <= rDoc.MaxCol()+1; ++i) + { + sal_Int32 nNextIdx = nStartIdx; + if (i + 1 < nInfoCount) + CountVisualWidth( aLine, nNextIdx, pColStart[i+1] - pColStart[i] ); + else + nNextIdx = nLineLen; + sal_uInt8 nFmt = pColFormat[i]; + if (nFmt != SC_COL_SKIP) // otherwise don't increment nCol either + { + if (nCol > rDoc.MaxCol()) + bOverflowCol = true; // display warning on import + else + { + bool bIsQuoted = false; + aCell = lcl_GetFixed( aLine, nStartIdx, nNextIdx, bIsQuoted, bOverflowCell ); + if (bIsQuoted && bQuotedAsText) + nFmt = SC_COL_TEXT; + + bMultiLine |= lcl_PutString( + aDocImport, !mbOverwriting, nCol, nRow, nTab, aCell, nFmt, + &aNumFormatter, bDetectNumFormat, bDetectSciNumFormat, bEvaluateFormulas, bSkipEmptyCells, + aTransliteration, aCalendar, + pEnglishTransliteration.get(), pEnglishCalendar.get()); + } + ++nCol; + } + nStartIdx = nNextIdx; + } + } + } + else // Search for the separator + { + SCCOL nSourceCol = 0; + sal_uInt16 nInfoStart = 0; + const sal_Unicode* p = aLine.getStr(); + // Yes, the check is nCol<=rDoc.MaxCol()+1, +1 because it is only an + // overflow if there is really data following to be put behind + // the last column, which doesn't happen if info is + // SC_COL_SKIP. + while (*p && nCol <= rDoc.MaxCol()+1) + { + bool bIsQuoted = false; + p = ScImportExport::ScanNextFieldFromString( p, aCell, + cStr, pSeps, bMerge, bIsQuoted, bOverflowCell, bRemoveSpace ); + + sal_uInt8 nFmt = SC_COL_STANDARD; + for ( i=nInfoStart; i<nInfoCount; i++ ) + { + if ( pColStart[i] == nSourceCol + 1 ) // pColStart is 1-based + { + nFmt = pColFormat[i]; + nInfoStart = i + 1; // ColInfos are in succession + break; // for + } + } + if ( nFmt != SC_COL_SKIP ) + { + if (nCol > rDoc.MaxCol()) + bOverflowCol = true; // display warning on import + else if (!bDetermineRange) + { + if (bIsQuoted && bQuotedAsText) + nFmt = SC_COL_TEXT; + + bMultiLine |= lcl_PutString( + aDocImport, !mbOverwriting, nCol, nRow, nTab, aCell, nFmt, + &aNumFormatter, bDetectNumFormat, bDetectSciNumFormat, bEvaluateFormulas, bSkipEmptyCells, + aTransliteration, aCalendar, + pEnglishTransliteration.get(), pEnglishCalendar.get()); + } + ++nCol; + } + + ++nSourceCol; + } + } + if (nEndCol < nCol) + nEndCol = nCol; //! points to the next free or even rDoc.MaxCol()+2 + + if (!bDetermineRange) + { + if (bMultiLine && !bRangeIsDetermined && pDocSh) + { // Adjust just once at the end for a whole range. + nFirstUpdateRowHeight = std::min( nFirstUpdateRowHeight, nRow ); + nLastUpdateRowHeight = std::max( nLastUpdateRowHeight, nRow ); + } + xProgress->SetStateOnPercent( rStrm.Tell() - nOldPos ); + } + ++nRow; + } + // so far nRow/nEndCol pointed to the next free + if (nRow > nStartRow) + --nRow; + if (nEndCol > nStartCol) + nEndCol = ::std::min( static_cast<SCCOL>(nEndCol - 1), rDoc.MaxCol()); + + if (bDetermineRange) + { + aRange.aEnd.SetCol( nEndCol ); + aRange.aEnd.SetRow( nRow ); + + if ( !mbApi && nStartCol != nEndCol && + !rDoc.IsBlockEmpty( nStartCol + 1, nStartRow, nEndCol, nRow, nTab ) ) + { + ScReplaceWarnBox aBox(ScDocShell::GetActiveDialogParent()); + if (aBox.run() != RET_YES) + { + return false; + } + } + + rStrm.Seek( nOriginalStreamPos ); + nRow = nStartRow; + if (!StartPaste()) + { + EndPaste(false); + return false; + } + } + + bDetermineRange = !bDetermineRange; // toggle + } while (!bDetermineRange); + + if ( !mbOverwriting ) + aDocImport.finalize(); + + xProgress.reset(); // make room for AdjustRowHeight progress + + if( nFirstUpdateRowHeight < nLastUpdateRowHeight && pDocSh ) + pDocSh->AdjustRowHeight( nFirstUpdateRowHeight, nLastUpdateRowHeight, nTab); + + if (bRangeIsDetermined) + EndPaste(false); + + if (mbImportBroadcast && !mbOverwriting) + { + rDoc.BroadcastCells(aRange, SfxHintId::ScDataChanged); + pDocSh->PostDataChanged(); + } + return true; +} + +void ScImportExport::EmbeddedNullTreatment( OUString & rStr ) +{ + // A nasty workaround for data with embedded NULL characters. As long as we + // can't handle them properly as cell content (things assume 0-terminated + // strings at too many places) simply strip all NULL characters from raw + // data. Excel does the same. See fdo#57841 for sample data. + + // The normal case is no embedded NULL, check first before de-/allocating + // ustring stuff. + sal_Unicode cNull = 0; + if (sal_Int32 pos = rStr.indexOf(cNull); pos >= 0) + { + rStr = rStr.replaceAll(std::u16string_view(&cNull, 1), u"", pos); + } +} + +const sal_Unicode* ScImportExport::ScanNextFieldFromString( const sal_Unicode* p, + OUString& rField, sal_Unicode cStr, const sal_Unicode* pSeps, bool bMergeSeps, bool& rbIsQuoted, + bool& rbOverflowCell, bool bRemoveSpace ) +{ + rbIsQuoted = false; + rField.clear(); + const sal_Unicode cBlank = ' '; + if (cStr && !ScGlobal::UnicodeStrChr(pSeps, cBlank)) + { + // Cope with broken generators that put leading blanks before a quoted + // field, like "field1", "field2", "..." + // NOTE: this is not in conformance with http://tools.ietf.org/html/rfc4180 + const sal_Unicode* pb = p; + while (*pb == cBlank) + ++pb; + if (*pb == cStr) + p = pb; + } + if (cStr && *p == cStr) // String in quotes + { + rbIsQuoted = true; + const sal_Unicode* p1; + p1 = p = lcl_ScanString( p, rField, pSeps, cStr, DoubledQuoteMode::ESCAPE, rbOverflowCell ); + while (!lcl_isFieldEnd( *p, pSeps)) + p++; + // Append remaining unquoted and undelimited data (dirty, dirty) to + // this field. + if (p > p1) + { + const sal_Unicode* ptrim_f = p; + if ( bRemoveSpace ) + { + while ( ptrim_f > p1 && ( *(ptrim_f - 1) == cBlank ) ) + --ptrim_f; + } + if (!lcl_appendLineData( rField, p1, ptrim_f)) + rbOverflowCell = true; + } + if( *p ) + p++; + } + else // up to delimiter + { + const sal_Unicode* p0 = p; + while (!lcl_isFieldEnd( *p, pSeps)) + p++; + const sal_Unicode* ptrim_i = p0; + const sal_Unicode* ptrim_f = p; // [ptrim_i,ptrim_f) is cell data after trimming + if ( bRemoveSpace ) + { + while ( ptrim_i < ptrim_f && *ptrim_i == cBlank ) + ++ptrim_i; + while ( ptrim_f > ptrim_i && ( *(ptrim_f - 1) == cBlank ) ) + --ptrim_f; + } + if (!lcl_appendLineData( rField, ptrim_i, ptrim_f)) + rbOverflowCell = true; + if( *p ) + p++; + } + if ( bMergeSeps ) // skip following delimiters + { + while (*p && ScGlobal::UnicodeStrChr( pSeps, *p)) + p++; + } + return p; +} + +namespace { + +/** + * Check if a given string has any line break characters or separators. + * + * @param rStr string to inspect. + * @param cSep separator character. + */ +bool hasLineBreaksOrSeps( const OUString& rStr, sal_Unicode cSep ) +{ + const sal_Unicode* p = rStr.getStr(); + for (sal_Int32 i = 0, n = rStr.getLength(); i < n; ++i, ++p) + { + sal_Unicode c = *p; + if (c == cSep) + // separator found. + return true; + + switch (c) + { + case '\n': + case '\r': + // line break found. + return true; + default: + ; + } + } + return false; +} + +} + +bool ScImportExport::Doc2Text( SvStream& rStrm ) +{ + SCCOL nCol; + SCROW nRow; + SCCOL nStartCol = aRange.aStart.Col(); + SCROW nStartRow = aRange.aStart.Row(); + SCTAB nStartTab = aRange.aStart.Tab(); + SCCOL nEndCol = aRange.aEnd.Col(); + SCROW nEndRow = aRange.aEnd.Row(); + SCTAB nEndTab = aRange.aEnd.Tab(); + + if (!rDoc.GetClipParam().isMultiRange() && nStartTab == nEndTab) + if (!rDoc.ShrinkToDataArea( nStartTab, nStartCol, nStartRow, nEndCol, nEndRow )) + return false; + + OUString aCellStr; + + bool bConvertLF = (GetSystemLineEnd() != LINEEND_LF); + + // We need to cache sc::ColumnBlockPosition per each column, tab is always nStartTab. + std::vector< sc::ColumnBlockPosition > blockPos( nEndCol - nStartCol + 1 ); + for( SCCOL i = nStartCol; i <= nEndCol; ++i ) + rDoc.InitColumnBlockPosition( blockPos[ i - nStartCol ], nStartTab, i ); + for (nRow = nStartRow; nRow <= nEndRow; nRow++) + { + if (bIncludeFiltered || !rDoc.RowFiltered( nRow, nStartTab )) + { + for (nCol = nStartCol; nCol <= nEndCol; nCol++) + { + ScAddress aPos(nCol, nRow, nStartTab); + sal_uInt32 nNumFmt = rDoc.GetNumberFormat(aPos); + SvNumberFormatter* pFormatter = rDoc.GetFormatTable(); + + ScRefCellValue aCell(rDoc, aPos, blockPos[ nCol - nStartCol ]); + switch (aCell.getType()) + { + case CELLTYPE_FORMULA: + { + if (bFormulas) + { + aCellStr = aCell.getFormula()->GetFormula(); + if( aCellStr.indexOf( cSep ) != -1 ) + lcl_WriteString( rStrm, aCellStr, cStr, cStr ); + else + lcl_WriteSimpleString( rStrm, aCellStr ); + } + else + { + const Color* pColor; + aCellStr = ScCellFormat::GetString(aCell, nNumFmt, &pColor, *pFormatter, rDoc); + + bool bMultiLineText = ( aCellStr.indexOf( '\n' ) != -1 ); + if( bMultiLineText ) + { + if( mExportTextOptions.meNewlineConversion == ScExportTextOptions::ToSpace ) + aCellStr = aCellStr.replaceAll( "\n", " " ); + else if ( mExportTextOptions.meNewlineConversion == ScExportTextOptions::ToSystem && bConvertLF ) + aCellStr = convertLineEnd(aCellStr, GetSystemLineEnd()); + } + + if( mExportTextOptions.mcSeparatorConvertTo && cSep ) + aCellStr = aCellStr.replaceAll( OUStringChar(cSep), OUStringChar(mExportTextOptions.mcSeparatorConvertTo) ); + + if( mExportTextOptions.mbAddQuotes && ( aCellStr.indexOf( cSep ) != -1 ) ) + lcl_WriteString( rStrm, aCellStr, cStr, cStr ); + else + lcl_WriteSimpleString( rStrm, aCellStr ); + } + } + break; + case CELLTYPE_VALUE: + { + const Color* pColor; + aCellStr = ScCellFormat::GetString(aCell, nNumFmt, &pColor, *pFormatter, rDoc); + lcl_WriteSimpleString( rStrm, aCellStr ); + } + break; + case CELLTYPE_NONE: + break; + default: + { + const Color* pColor; + aCellStr = ScCellFormat::GetString(aCell, nNumFmt, &pColor, *pFormatter, rDoc); + + bool bMultiLineText = ( aCellStr.indexOf( '\n' ) != -1 ); + if( bMultiLineText ) + { + if( mExportTextOptions.meNewlineConversion == ScExportTextOptions::ToSpace ) + aCellStr = aCellStr.replaceAll( "\n", " " ); + else if ( mExportTextOptions.meNewlineConversion == ScExportTextOptions::ToSystem && bConvertLF ) + aCellStr = convertLineEnd(aCellStr, GetSystemLineEnd()); + } + + if( mExportTextOptions.mcSeparatorConvertTo && cSep ) + aCellStr = aCellStr.replaceAll( OUStringChar(cSep), OUStringChar(mExportTextOptions.mcSeparatorConvertTo) ); + + if( mExportTextOptions.mbAddQuotes && hasLineBreaksOrSeps(aCellStr, cSep) ) + lcl_WriteString( rStrm, aCellStr, cStr, cStr ); + else + lcl_WriteSimpleString( rStrm, aCellStr ); + } + } + if( nCol < nEndCol ) + lcl_WriteSimpleString( rStrm, rtl::OUStringChar(cSep) ); + } + // Do not append a line feed for one single cell. + // NOTE: this Doc2Text() is only called for clipboard via + // ScImportExport::ExportStream(). + if (nStartRow != nEndRow || nStartCol != nEndCol) + WriteUnicodeOrByteEndl( rStrm ); + if( rStrm.GetError() != ERRCODE_NONE ) + break; + if( nSizeLimit && rStrm.Tell() > nSizeLimit ) + break; + } + } + + return rStrm.GetError() == ERRCODE_NONE; +} + +bool ScImportExport::Sylk2Doc( SvStream& rStrm ) +{ + bool bOk = true; + bool bMyDoc = false; + SylkVersion eVersion = SylkVersion::OTHER; + + // US-English separators for StringToDouble + sal_Unicode const cDecSep = '.'; + sal_Unicode const cGrpSep = ','; + + SCCOL nStartCol = aRange.aStart.Col(); + SCROW nStartRow = aRange.aStart.Row(); + SCCOL nEndCol = aRange.aEnd.Col(); + SCROW nEndRow = aRange.aEnd.Row(); + sal_uInt64 nOldPos = rStrm.Tell(); + bool bData = !bSingle; + ::std::vector< sal_uInt32 > aFormats; + + if( !bSingle) + bOk = StartPaste(); + + while( bOk ) + { + OUString aLine; + OUString aText; + OStringBuffer aByteLine; + SCCOL nCol = nStartCol; + SCROW nRow = nStartRow; + SCCOL nRefCol = nCol; + SCROW nRefRow = nRow; + rStrm.Seek( nOldPos ); + for( ;; ) + { + //! allow unicode + rStrm.ReadLine( aByteLine ); + aLine = OStringToOUString(aByteLine, rStrm.GetStreamCharSet()); + if( rStrm.eof() ) + break; + bool bInvalidCol = false; + bool bInvalidRow = false; + const sal_Unicode* p = aLine.getStr(); + sal_Unicode cTag = *p++; + if( cTag == 'C' ) // Content + { + if( *p++ != ';' ) + return false; + + bool bInvalidRefCol = false; + bool bInvalidRefRow = false; + while( *p ) + { + sal_Unicode ch = *p++; + ch = ScGlobal::ToUpperAlpha( ch ); + switch( ch ) + { + case 'X': + { + bInvalidCol = false; + bool bFail = o3tl::checked_add<SCCOL>(o3tl::toInt32(std::u16string_view(p)), nStartCol - 1, nCol); + if (bFail || nCol < 0 || rDoc.MaxCol() < nCol) + { + SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;X invalid nCol=" << nCol); + nCol = std::clamp<SCCOL>(nCol, 0, rDoc.MaxCol()); + bInvalidCol = bOverflowCol = true; + } + break; + } + case 'Y': + { + bInvalidRow = false; + bool bFail = o3tl::checked_add(o3tl::toInt32(std::u16string_view(p)), nStartRow - 1, nRow); + if (bFail || nRow < 0 || nMaxImportRow < nRow) + { + SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;Y invalid nRow=" << nRow); + nRow = std::clamp<SCROW>(nRow, 0, nMaxImportRow); + bInvalidRow = bOverflowRow = true; + } + break; + } + case 'C': + { + bInvalidRefCol = false; + bool bFail = o3tl::checked_add<SCCOL>(o3tl::toInt32(std::u16string_view(p)), nStartCol - 1, nRefCol); + if (bFail || nRefCol < 0 || rDoc.MaxCol() < nRefCol) + { + SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;C invalid nRefCol=" << nRefCol); + nRefCol = std::clamp<SCCOL>(nRefCol, 0, rDoc.MaxCol()); + bInvalidRefCol = bOverflowCol = true; + } + break; + } + case 'R': + { + bInvalidRefRow = false; + bool bFail = o3tl::checked_add(o3tl::toInt32(std::u16string_view(p)), nStartRow - 1, nRefRow); + if (bFail || nRefRow < 0 || nMaxImportRow < nRefRow) + { + SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;R invalid nRefRow=" << nRefRow); + nRefRow = std::clamp<SCROW>(nRefRow, 0, nMaxImportRow); + bInvalidRefRow = bOverflowRow = true; + } + break; + } + case 'K': + { + if( !bSingle && + ( nCol < nStartCol || nCol > nEndCol + || nRow < nStartRow || nRow > nEndRow + || nCol > rDoc.MaxCol() || nRow > nMaxImportRow + || bInvalidCol || bInvalidRow ) ) + break; + if( !bData ) + { + if( nRow > nEndRow ) + nEndRow = nRow; + if( nCol > nEndCol ) + nEndCol = nCol; + break; + } + bool bText; + if( *p == '"' ) + { + bText = true; + aText.clear(); + p = lcl_ScanSylkString( p, aText, eVersion); + } + else + bText = false; + const sal_Unicode* q = p; + while( *q && *q != ';' ) + q++; + if ( (*q != ';' || *(q+1) != 'I') && !bInvalidCol && !bInvalidRow ) + { // don't ignore value + if( bText ) + { + rDoc.EnsureTable(aRange.aStart.Tab()); + rDoc.SetTextCell( + ScAddress(nCol, nRow, aRange.aStart.Tab()), aText); + } + else + { + double fVal = rtl_math_uStringToDouble( p, + aLine.getStr() + aLine.getLength(), + cDecSep, cGrpSep, nullptr, nullptr ); + rDoc.SetValue( nCol, nRow, aRange.aStart.Tab(), fVal ); + } + } + } + break; + case 'E': + case 'M': + { + if ( ch == 'M' ) + { + if ( nRefCol < nCol ) + nRefCol = nCol; + if ( nRefRow < nRow ) + nRefRow = nRow; + if ( !bData ) + { + if( nRefRow > nEndRow ) + nEndRow = nRefRow; + if( nRefCol > nEndCol ) + nEndCol = nRefCol; + } + } + if( !bMyDoc || !bData ) + break; + aText = "="; + p = lcl_ScanSylkFormula( p, aText, eVersion); + + if (bInvalidCol || bInvalidRow || (ch == 'M' && (bInvalidRefCol || bInvalidRefRow))) + break; + + ScAddress aPos( nCol, nRow, aRange.aStart.Tab() ); + /* FIXME: do we want GRAM_ODFF_A1 instead? At the + * end it probably should be GRAM_ODFF_R1C1, since + * R1C1 is what Excel writes in SYLK, or even + * better GRAM_ENGLISH_XL_R1C1. */ + const formula::FormulaGrammar::Grammar eGrammar = formula::FormulaGrammar::GRAM_PODF_A1; + ScCompiler aComp(rDoc, aPos, eGrammar); + std::unique_ptr<ScTokenArray> xCode(aComp.CompileString(aText)); // ctor/InsertMatrixFormula did copy TokenArray + rDoc.CheckLinkFormulaNeedingCheck(*xCode); + if ( ch == 'M' ) + { + ScMarkData aMark(rDoc.GetSheetLimits()); + aMark.SelectTable( aPos.Tab(), true ); + rDoc.InsertMatrixFormula( nCol, nRow, nRefCol, + nRefRow, aMark, OUString(), xCode.get() ); + } + else + { + ScFormulaCell* pFCell = new ScFormulaCell( + rDoc, aPos, *xCode, eGrammar, ScMatrixMode::NONE); + rDoc.SetFormulaCell(aPos, pFCell); + } + } + break; + } + while( *p && *p != ';' ) + p++; + if( *p ) + p++; + } + } + else if( cTag == 'F' ) // Format + { + if( *p++ != ';' ) + return false; + sal_Int32 nFormat = -1; + while( *p ) + { + sal_Unicode ch = *p++; + ch = ScGlobal::ToUpperAlpha( ch ); + switch( ch ) + { + case 'X': + { + bInvalidCol = false; + bool bFail = o3tl::checked_add<SCCOL>(o3tl::toInt32(std::u16string_view(p)), nStartCol - 1, nCol); + if (bFail || nCol < 0 || rDoc.MaxCol() < nCol) + { + SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;X invalid nCol=" << nCol); + nCol = std::clamp<SCCOL>(nCol, 0, rDoc.MaxCol()); + bInvalidCol = bOverflowCol = true; + } + break; + } + case 'Y': + { + bInvalidRow = false; + bool bFail = o3tl::checked_add(o3tl::toInt32(std::u16string_view(p)), nStartRow - 1, nRow); + if (bFail || nRow < 0 || nMaxImportRow < nRow) + { + SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;Y invalid nRow=" << nRow); + nRow = std::clamp<SCROW>(nRow, 0, nMaxImportRow); + bInvalidRow = bOverflowRow = true; + } + break; + } + case 'P' : + if ( bData ) + { + // F;P<n> sets format code of P;P<code> at + // current position, or at ;X;Y if specified. + // Note that ;X;Y may appear after ;P + const sal_Unicode* p0 = p; + while( *p && *p != ';' ) + p++; + OUString aNumber(p0, p - p0); + nFormat = aNumber.toInt32(); + } + break; + } + while( *p && *p != ';' ) + p++; + if( *p ) + p++; + } + if ( !bData ) + { + if( nRow > nEndRow ) + nEndRow = nRow; + if( nCol > nEndCol ) + nEndCol = nCol; + } + if ( 0 <= nFormat && o3tl::make_unsigned(nFormat) < aFormats.size() && !bInvalidCol && !bInvalidRow ) + { + sal_uInt32 nKey = aFormats[nFormat]; + rDoc.ApplyAttr( nCol, nRow, aRange.aStart.Tab(), + SfxUInt32Item( ATTR_VALUE_FORMAT, nKey ) ); + } + } + else if( cTag == 'P' ) + { + if ( bData && *p == ';' && *(p+1) == 'P' ) + { + OUString aCode( p+2 ); + + sal_uInt32 nKey; + sal_Int32 nCheckPos; + + if (aCode.getLength() > 2048 && utl::ConfigManager::IsFuzzing()) + { + // consider an excessive length as a failure when fuzzing + nCheckPos = 1; + } + else + { + // unescape doubled semicolons + aCode = aCode.replaceAll(";;", ";"); + // get rid of Xcl escape characters + aCode = aCode.replaceAll("\x1b", ""); + SvNumFormatType nType; + rDoc.GetFormatTable()->PutandConvertEntry( aCode, nCheckPos, nType, nKey, + LANGUAGE_ENGLISH_US, ScGlobal::eLnge, false); + } + + if ( nCheckPos ) + nKey = 0; + + aFormats.push_back( nKey ); + } + } + else if (cTag == 'I' && *p == 'D' && aLine.getLength() > 4) + { + aLine = aLine.copy(4); + if (aLine == "CALCOOO32") + eVersion = SylkVersion::OOO32; + else if (aLine == "SCALC3") + eVersion = SylkVersion::SCALC3; + bMyDoc = (eVersion <= SylkVersion::OWN); + } + else if( cTag == 'E' ) // End + break; + } + if( !bData ) + { + aRange.aEnd.SetCol( nEndCol ); + aRange.aEnd.SetRow( nEndRow ); + bOk = StartPaste(); + bData = true; + } + else + break; + } + + EndPaste(); + return bOk; +} + +bool ScImportExport::Doc2Sylk( SvStream& rStrm ) +{ + SCCOL nCol; + SCROW nRow; + SCCOL nStartCol = aRange.aStart.Col(); + SCROW nStartRow = aRange.aStart.Row(); + SCCOL nEndCol = aRange.aEnd.Col(); + SCROW nEndRow = aRange.aEnd.Row(); + OUString aCellStr; + OUString aValStr; + lcl_WriteSimpleString( rStrm, u"ID;PCALCOOO32" ); + WriteUnicodeOrByteEndl( rStrm ); + + for (nRow = nStartRow; nRow <= nEndRow; nRow++) + { + for (nCol = nStartCol; nCol <= nEndCol; nCol++) + { + OUString aBufStr; + double nVal; + bool bForm = false; + SCROW r = nRow - nStartRow + 1; + SCCOL c = nCol - nStartCol + 1; + ScRefCellValue aCell(rDoc, ScAddress(nCol, nRow, aRange.aStart.Tab())); + CellType eType = aCell.getType(); + switch( eType ) + { + case CELLTYPE_FORMULA: + bForm = bFormulas; + if( rDoc.HasValueData( nCol, nRow, aRange.aStart.Tab()) ) + goto hasvalue; + else + goto hasstring; + + case CELLTYPE_VALUE: + hasvalue: + nVal = rDoc.GetValue( nCol, nRow, aRange.aStart.Tab() ); + + aValStr = ::rtl::math::doubleToUString( nVal, + rtl_math_StringFormat_Automatic, + rtl_math_DecimalPlaces_Max, '.', true ); + + aBufStr = "C;X" + + OUString::number( c ) + + ";Y" + + OUString::number( r ) + + ";K" + + aValStr; + lcl_WriteSimpleString( rStrm, aBufStr ); + goto checkformula; + + case CELLTYPE_STRING: + case CELLTYPE_EDIT: + hasstring: + aCellStr = rDoc.GetString(nCol, nRow, aRange.aStart.Tab()); + aCellStr = aCellStr.replaceAll("\n", SYLK_LF); + + aBufStr = "C;X" + + OUString::number( c ) + + ";Y" + + OUString::number( r ) + + ";K"; + lcl_WriteSimpleString( rStrm, aBufStr ); + lcl_WriteString( rStrm, aCellStr, '"', ';' ); + + checkformula: + if( bForm ) + { + const ScFormulaCell* pFCell = aCell.getFormula(); + switch ( pFCell->GetMatrixFlag() ) + { + case ScMatrixMode::Reference : + aCellStr.clear(); + break; + default: + aCellStr = pFCell->GetFormula( formula::FormulaGrammar::GRAM_PODF_A1); + /* FIXME: do we want GRAM_ODFF_A1 instead? At + * the end it probably should be + * GRAM_ODFF_R1C1, since R1C1 is what Excel + * writes in SYLK, or even better + * GRAM_ENGLISH_XL_R1C1. */ + } + if ( pFCell->GetMatrixFlag() != ScMatrixMode::NONE && + aCellStr.startsWith("{") && + aCellStr.endsWith("}") ) + { // cut off matrix {} characters + aCellStr = aCellStr.copy(1, aCellStr.getLength()-2); + } + if ( aCellStr[0] == '=' ) + aCellStr = aCellStr.copy(1); + OUString aPrefix; + switch ( pFCell->GetMatrixFlag() ) + { + case ScMatrixMode::Formula : + { // diff expression with 'M' M$-extension + SCCOL nC; + SCROW nR; + pFCell->GetMatColsRows( nC, nR ); + nC += c - 1; + nR += r - 1; + aPrefix = ";R" + + OUString::number( nR ) + + ";C" + + OUString::number( nC ) + + ";M"; + } + break; + case ScMatrixMode::Reference : + { // diff expression with 'I' M$-extension + ScAddress aPos; + (void)pFCell->GetMatrixOrigin( rDoc, aPos ); + aPrefix = ";I;R" + + OUString::number( aPos.Row() - nStartRow + 1 ) + + ";C" + + OUString::number( aPos.Col() - nStartCol + 1 ); + } + break; + default: + // formula Expression + aPrefix = ";E"; + } + lcl_WriteSimpleString( rStrm, aPrefix ); + if ( !aCellStr.isEmpty() ) + lcl_WriteString( rStrm, aCellStr, 0, ';' ); + } + WriteUnicodeOrByteEndl( rStrm ); + break; + + default: + { + // added to avoid warnings + } + } + } + } + lcl_WriteSimpleString( rStrm, rtl::OUStringChar( 'E' ) ); + WriteUnicodeOrByteEndl( rStrm ); + return rStrm.GetError() == ERRCODE_NONE; +} + +bool ScImportExport::Doc2HTML( SvStream& rStrm, const OUString& rBaseURL ) +{ + // rtl_TextEncoding is ignored in ScExportHTML, read from Load/Save HTML options + ScFormatFilter::Get().ScExportHTML( rStrm, rBaseURL, &rDoc, aRange, RTL_TEXTENCODING_DONTKNOW, bAll, + aStreamPath, aNonConvertibleChars, maFilterOptions ); + return rStrm.GetError() == ERRCODE_NONE; +} + +bool ScImportExport::Doc2RTF( SvStream& rStrm ) +{ + // rtl_TextEncoding is ignored in ScExportRTF + ScFormatFilter::Get().ScExportRTF( rStrm, &rDoc, aRange, RTL_TEXTENCODING_DONTKNOW ); + return rStrm.GetError() == ERRCODE_NONE; +} + +bool ScImportExport::Doc2Dif( SvStream& rStrm ) +{ + // for DIF in the clipboard, IBM_850 is always used + ScFormatFilter::Get().ScExportDif( rStrm, &rDoc, aRange, RTL_TEXTENCODING_IBM_850 ); + return true; +} + +bool ScImportExport::Dif2Doc( SvStream& rStrm ) +{ + SCTAB nTab = aRange.aStart.Tab(); + ScDocumentUniquePtr pImportDoc( new ScDocument( SCDOCMODE_UNDO ) ); + pImportDoc->InitUndo( rDoc, nTab, nTab ); + + // for DIF in the clipboard, IBM_850 is always used + ScFormatFilter::Get().ScImportDif( rStrm, pImportDoc.get(), aRange.aStart, RTL_TEXTENCODING_IBM_850 ); + + SCCOL nEndCol; + SCROW nEndRow; + pImportDoc->GetCellArea( nTab, nEndCol, nEndRow ); + // if there are no cells in the imported content, nEndCol/nEndRow may be before the start + if ( nEndCol < aRange.aStart.Col() ) + nEndCol = aRange.aStart.Col(); + if ( nEndRow < aRange.aStart.Row() ) + nEndRow = aRange.aStart.Row(); + aRange.aEnd = ScAddress( nEndCol, nEndRow, nTab ); + + bool bOk = StartPaste(); + if (bOk) + { + InsertDeleteFlags nFlags = InsertDeleteFlags::ALL & ~InsertDeleteFlags::STYLES; + rDoc.DeleteAreaTab( aRange, nFlags ); + pImportDoc->CopyToDocument(aRange, nFlags, false, rDoc); + EndPaste(); + } + + return bOk; +} + +bool ScImportExport::RTF2Doc( SvStream& rStrm, const OUString& rBaseURL ) +{ + std::unique_ptr<ScEEAbsImport> pImp = ScFormatFilter::Get().CreateRTFImport( &rDoc, aRange ); + if (!pImp) + return false; + pImp->Read( rStrm, rBaseURL ); + aRange = pImp->GetRange(); + + bool bOk = StartPaste(); + if (bOk) + { + InsertDeleteFlags const nFlags = InsertDeleteFlags::ALL & ~InsertDeleteFlags::STYLES; + rDoc.DeleteAreaTab( aRange, nFlags ); + pImp->WriteToDocument(); + EndPaste(); + } + return bOk; +} + +bool ScImportExport::HTML2Doc( SvStream& rStrm, const OUString& rBaseURL ) +{ + std::unique_ptr<ScEEAbsImport> pImp = ScFormatFilter::Get().CreateHTMLImport( &rDoc, rBaseURL, aRange); + if (!pImp) + return false; + pImp->Read( rStrm, rBaseURL ); + aRange = pImp->GetRange(); + + bool bOk = StartPaste(); + if (bOk) + { + // ScHTMLImport may call ScDocument::InitDrawLayer, resulting in + // a Draw Layer but no Draw View -> create Draw Layer and View here + if (pDocSh) + pDocSh->MakeDrawLayer(); + + InsertDeleteFlags const nFlags = InsertDeleteFlags::ALL & ~InsertDeleteFlags::STYLES; + rDoc.DeleteAreaTab( aRange, nFlags ); + + if (pExtOptions) + { + // Pick up import options if available. + LanguageType eLang = pExtOptions->GetLanguage(); + SvNumberFormatter aNumFormatter( comphelper::getProcessComponentContext(), eLang); + bool bSpecialNumber = pExtOptions->IsDetectSpecialNumber(); + bool bScientificNumber = pExtOptions->IsDetectScientificNumber(); + pImp->WriteToDocument(false, 1.0, &aNumFormatter, bSpecialNumber, bScientificNumber); + } + else + // Regular import, with no options. + pImp->WriteToDocument(); + + EndPaste(); + } + return bOk; +} + +#ifndef DISABLE_DYNLOADING + +extern "C" { static void thisModule() {} } + +#else + +extern "C" { +ScFormatFilterPlugin* ScFilterCreate(); +} + +#endif + +typedef ScFormatFilterPlugin * (*FilterFn)(); +ScFormatFilterPlugin &ScFormatFilter::Get() +{ + static ScFormatFilterPlugin *plugin = []() + { +#ifndef DISABLE_DYNLOADING + OUString sFilterLib(SVLIBRARY("scfilt")); + static ::osl::Module aModule; + bool bLoaded = aModule.is(); + if (!bLoaded) + bLoaded = aModule.loadRelative(&thisModule, sFilterLib); + if (!bLoaded) + bLoaded = aModule.load(sFilterLib); + if (bLoaded) + { + oslGenericFunction fn = aModule.getFunctionSymbol( "ScFilterCreate" ); + if (fn != nullptr) + return reinterpret_cast<FilterFn>(fn)(); + } + assert(false); + return static_cast<ScFormatFilterPlugin*>(nullptr); +#else + return ScFilterCreate(); +#endif + }(); + + return *plugin; +} + +// Precondition: pStr is guaranteed to be non-NULL and points to a 0-terminated +// array. +static const sal_Unicode* lcl_UnicodeStrChr( const sal_Unicode* pStr, + sal_Unicode c ) +{ + while (*pStr) + { + if (*pStr == c) + return pStr; + ++pStr; + } + return nullptr; +} + +ScImportStringStream::ScImportStringStream( const OUString& rStr ) + : SvMemoryStream( const_cast<sal_Unicode *>(rStr.getStr()), + rStr.getLength() * sizeof(sal_Unicode), StreamMode::READ) +{ + SetStreamCharSet( RTL_TEXTENCODING_UNICODE ); +#ifdef OSL_BIGENDIAN + SetEndian(SvStreamEndian::BIG); +#else + SetEndian(SvStreamEndian::LITTLE); +#endif +} + +OUString ReadCsvLine( SvStream &rStream, bool bEmbeddedLineBreak, + OUString& rFieldSeparators, sal_Unicode cFieldQuote, sal_Unicode& rcDetectSep, sal_uInt32 nMaxSourceLines ) +{ + enum RetryState + { + FORBID, + ALLOW, + RETRY, + RETRIED + } eRetryState = (bEmbeddedLineBreak && rcDetectSep == 0 ? RetryState::ALLOW : RetryState::FORBID); + + sal_uInt64 nStreamPos = (eRetryState == RetryState::ALLOW ? rStream.Tell() : 0); + +Label_RetryWithNewSep: + + if (eRetryState == RetryState::RETRY) + { + eRetryState = RetryState::RETRIED; + rStream.Seek( nStreamPos); + } + + OUString aStr; + rStream.ReadUniOrByteStringLine(aStr, rStream.GetStreamCharSet(), nArbitraryLineLengthLimit); + + if (bEmbeddedLineBreak) + { + sal_Int32 nFirstLineLength = aStr.getLength(); + sal_uInt64 nFirstLineStreamPos = rStream.Tell(); + sal_uInt32 nLine = 0; + + const sal_Unicode* pSeps = rFieldSeparators.getStr(); + + QuoteType eQuoteState = FIELDEND_QUOTE; + bool bFieldStart = true; + + sal_Int32 nLastOffset = 0; + sal_Int32 nQuotes = 0; + while (!rStream.eof() && aStr.getLength() < nArbitraryLineLengthLimit) + { + const sal_Unicode * p = aStr.getStr() + nLastOffset; + const sal_Unicode * const pStop = aStr.getStr() + aStr.getLength(); + while (p < pStop) + { + if (!*p) + { + // Skip embedded null-characters. They don't change + // anything and are handled at a higher level. + ++p; + continue; + } + + if (nQuotes) + { + if (*p == cFieldQuote) + { + if (bFieldStart) + { + ++nQuotes; + bFieldStart = false; + eQuoteState = FIELDSTART_QUOTE; + nFirstLineLength = aStr.getLength(); + nFirstLineStreamPos = rStream.Tell(); + } + // Do not detect a FIELDSTART_QUOTE if not in + // bFieldStart mode, in which case for unquoted content + // we are in FIELDEND_QUOTE state. + else if (eQuoteState != FIELDEND_QUOTE) + { + eQuoteState = lcl_isEscapedOrFieldEndQuote( nQuotes, p, pSeps, cFieldQuote, rcDetectSep); + + if (eRetryState == RetryState::ALLOW && rcDetectSep) + { + eRetryState = RetryState::RETRY; + rFieldSeparators += OUStringChar(rcDetectSep); + pSeps = rFieldSeparators.getStr(); + goto Label_RetryWithNewSep; + } + + // DONTKNOW_QUOTE is an embedded unescaped quote we + // don't count for pairing. + if (eQuoteState != DONTKNOW_QUOTE) + ++nQuotes; + } + } + else if (eQuoteState == FIELDEND_QUOTE) + { + if (bFieldStart) + // If blank is a separator it starts a field, if it + // is not and thus maybe leading before quote we + // are still at start of field regarding quotes. + bFieldStart = (*p == ' ' || lcl_UnicodeStrChr( pSeps, *p) != nullptr); + else + bFieldStart = (lcl_UnicodeStrChr( pSeps, *p) != nullptr); + } + } + else + { + if (*p == cFieldQuote && bFieldStart) + { + nQuotes = 1; + eQuoteState = FIELDSTART_QUOTE; + bFieldStart = false; + nFirstLineLength = aStr.getLength(); + nFirstLineStreamPos = rStream.Tell(); + } + else if (eQuoteState == FIELDEND_QUOTE) + { + // This also skips leading blanks at beginning of line + // if followed by a quote. It's debatable whether we + // actually want that or not, but congruent with what + // ScanNextFieldFromString() does. + if (bFieldStart) + bFieldStart = (*p == ' ' || lcl_UnicodeStrChr( pSeps, *p) != nullptr); + else + bFieldStart = (lcl_UnicodeStrChr( pSeps, *p) != nullptr); + } + } + // A quote character inside a field content does not start + // a quote. + ++p; + } + + if ((nQuotes & 1) == 0) + // We still have a (theoretical?) problem here if due to + // nArbitraryLineLengthLimit (or nMaxSourceLines below) we + // split a string right between a doubled quote pair. + break; + else if (eQuoteState == DONTKNOW_QUOTE) + // A single unescaped quote somewhere in a quote started + // field, most likely that was not meant to have embedded + // linefeeds either. + break; + else if (++nLine >= nMaxSourceLines && nMaxSourceLines > 0) + // Unconditionally increment nLine even if nMaxSourceLines==0 + // so it can be observed in debugger. + break; + else + { + nLastOffset = aStr.getLength(); + OUString aNext; + rStream.ReadUniOrByteStringLine(aNext, rStream.GetStreamCharSet(), nArbitraryLineLengthLimit); + if (!rStream.eof()) + aStr += "\n" + aNext; + } + } + if (nQuotes & 1) + { + // No closing quote at all. A single quote at field start => no + // embedded linefeeds for that field, take only first logical line. + aStr = aStr.copy( 0, nFirstLineLength); + rStream.Seek( nFirstLineStreamPos); + } + } + return aStr; +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |