2860 lines
108 KiB
C++
2860 lines
108 KiB
C++
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
|
|
/*
|
|
* This file is part of the LibreOffice project.
|
|
*
|
|
* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
*
|
|
* This file incorporates work covered by the following license notice:
|
|
*
|
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
* contributor license agreements. See the NOTICE file distributed
|
|
* with this work for additional information regarding copyright
|
|
* ownership. The ASF licenses this file to you under the Apache
|
|
* License, Version 2.0 (the "License"); you may not use this file
|
|
* except in compliance with the License. You may obtain a copy of
|
|
* the License at http://www.apache.org/licenses/LICENSE-2.0 .
|
|
*/
|
|
|
|
#include <comphelper/processfactory.hxx>
|
|
#include <i18nlangtag/languagetag.hxx>
|
|
#include <i18nutil/unicode.hxx>
|
|
#include <sot/formats.hxx>
|
|
#include <sfx2/mieclip.hxx>
|
|
#include <com/sun/star/i18n/CalendarFieldIndex.hpp>
|
|
#include <sal/log.hxx>
|
|
#include <unotools/charclass.hxx>
|
|
#include <osl/module.hxx>
|
|
#include <o3tl/string_view.hxx>
|
|
|
|
#include <global.hxx>
|
|
#include <docsh.hxx>
|
|
#include <undoblk.hxx>
|
|
#include <rangenam.hxx>
|
|
#include <tabvwsh.hxx>
|
|
#include <filter.hxx>
|
|
#include <asciiopt.hxx>
|
|
#include <formulacell.hxx>
|
|
#include <cellform.hxx>
|
|
#include <progress.hxx>
|
|
#include <scitems.hxx>
|
|
#include <editable.hxx>
|
|
#include <compiler.hxx>
|
|
#include <warnbox.hxx>
|
|
#include <clipparam.hxx>
|
|
#include <impex.hxx>
|
|
#include <editutil.hxx>
|
|
#include <patattr.hxx>
|
|
#include <docpool.hxx>
|
|
#include <stringutil.hxx>
|
|
#include <cellvalue.hxx>
|
|
#include <tokenarray.hxx>
|
|
#include <documentimport.hxx>
|
|
#include <refundo.hxx>
|
|
#include <mtvelements.hxx>
|
|
|
|
#include <globstr.hrc>
|
|
#include <scresid.hxx>
|
|
#include <o3tl/safeint.hxx>
|
|
#include <tools/svlibrary.h>
|
|
#include <comphelper/configuration.hxx>
|
|
#include <vcl/svapp.hxx>
|
|
#include <vcl/weld.hxx>
|
|
#include <editeng/editobj.hxx>
|
|
#include <svl/numformat.hxx>
|
|
#include <rtl/character.hxx>
|
|
#include <rtl/math.hxx>
|
|
#include <sax/tools/converter.hxx>
|
|
|
|
#include <memory>
|
|
#include <string_view>
|
|
|
|
#include <unicode/uchar.h>
|
|
|
|
#include <osl/endian.h>
|
|
#include <osl/file.hxx>
|
|
|
|
// We don't want to end up with 2GB read in one line just because of malformed
|
|
// multiline fields, so chop it _somewhere_, which is twice supported columns
|
|
// times arbitrary maximum cell content length, 2*1024*64K=128M, and because
|
|
// it's sal_Unicode that's 256MB. If it's 2GB of data without LF we're out of
|
|
// luck anyway.
|
|
constexpr sal_Int32 nArbitraryCellLengthLimit = SAL_MAX_UINT16;
|
|
constexpr sal_Int32 nArbitraryLineLengthLimit = 2 * MAXCOLCOUNT * nArbitraryCellLengthLimit;
|
|
|
|
namespace
|
|
{
|
|
const char SYLK_LF[] = "\x1b :";
|
|
}
|
|
|
|
namespace {
|
|
|
|
enum class SylkVersion
|
|
{
|
|
SCALC3, // Wrote wrongly quoted strings and unescaped semicolons.
|
|
OOO32, // Correct strings, plus multiline content.
|
|
OWN, // Place our new versions, if any, before this value.
|
|
OTHER // Assume that aliens wrote correct strings.
|
|
};
|
|
|
|
}
|
|
|
|
// Whole document without Undo
|
|
ScImportExport::ScImportExport( ScDocument& r )
|
|
: pDocSh( r.GetDocumentShell() ), rDoc( r ),
|
|
nSizeLimit( 0 ), nMaxImportRow(!comphelper::IsFuzzing() ? rDoc.MaxRow() : SCROWS32K),
|
|
cSep( '\t' ), cStr( '"' ),
|
|
bFormulas( false ), bIncludeFiltered( true ),
|
|
bAll( true ), bSingle( true ), bUndo( false ),
|
|
bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ),
|
|
mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ), mbIncludeBOM(false)
|
|
{
|
|
pUndoDoc = nullptr;
|
|
pExtOptions = nullptr;
|
|
}
|
|
|
|
// Insert am current cell without range(es)
|
|
ScImportExport::ScImportExport( ScDocument& r, const ScAddress& rPt )
|
|
: pDocSh( r.GetDocumentShell() ), rDoc( r ),
|
|
aRange( rPt ),
|
|
nSizeLimit( 0 ), nMaxImportRow(!comphelper::IsFuzzing() ? rDoc.MaxRow() : SCROWS32K),
|
|
cSep( '\t' ), cStr( '"' ),
|
|
bFormulas( false ), bIncludeFiltered( true ),
|
|
bAll( false ), bSingle( true ), bUndo( pDocSh != nullptr ),
|
|
bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ),
|
|
mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ), mbIncludeBOM(false)
|
|
{
|
|
pUndoDoc = nullptr;
|
|
pExtOptions = nullptr;
|
|
}
|
|
|
|
// ctor with a range is only used for export
|
|
//! ctor with a string (and bSingle=true) is also used for DdeSetData
|
|
ScImportExport::ScImportExport( ScDocument& r, const ScRange& rRange )
|
|
: pDocSh( r.GetDocumentShell() ), rDoc( r ),
|
|
aRange( rRange ),
|
|
nSizeLimit( 0 ), nMaxImportRow(!comphelper::IsFuzzing() ? rDoc.MaxRow() : SCROWS32K),
|
|
cSep( '\t' ), cStr( '"' ),
|
|
bFormulas( false ), bIncludeFiltered( true ),
|
|
bAll( false ), bSingle( false ), bUndo( pDocSh != nullptr ),
|
|
bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ),
|
|
mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ), mbIncludeBOM(false)
|
|
{
|
|
pUndoDoc = nullptr;
|
|
pExtOptions = nullptr;
|
|
// Only one sheet (table) supported
|
|
aRange.aEnd.SetTab( aRange.aStart.Tab() );
|
|
}
|
|
|
|
// Evaluate input string - either range, cell or the whole document (when error)
|
|
// If a View exists, the TabNo of the view will be used.
|
|
ScImportExport::ScImportExport( ScDocument& r, const OUString& rPos )
|
|
: pDocSh( r.GetDocumentShell() ), rDoc( r ),
|
|
nSizeLimit( 0 ), nMaxImportRow(!comphelper::IsFuzzing() ? rDoc.MaxRow() : SCROWS32K),
|
|
cSep( '\t' ), cStr( '"' ),
|
|
bFormulas( false ), bIncludeFiltered( true ),
|
|
bAll( false ), bSingle( true ), bUndo( pDocSh != nullptr ),
|
|
bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ),
|
|
mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ), mbIncludeBOM(false)
|
|
{
|
|
pUndoDoc = nullptr;
|
|
pExtOptions = nullptr;
|
|
|
|
SCTAB nTab = ScDocShell::GetCurTab();
|
|
aRange.aStart.SetTab( nTab );
|
|
OUString aPos( rPos );
|
|
// Named range?
|
|
ScRangeName* pRange = rDoc.GetRangeName();
|
|
if (pRange)
|
|
{
|
|
const ScRangeData* pData = pRange->findByUpperName(ScGlobal::getCharClass().uppercase(aPos));
|
|
if (pData)
|
|
{
|
|
if( pData->HasType( ScRangeData::Type::RefArea )
|
|
|| pData->HasType( ScRangeData::Type::AbsArea )
|
|
|| pData->HasType( ScRangeData::Type::AbsPos ) )
|
|
{
|
|
aPos = pData->GetSymbol();
|
|
}
|
|
}
|
|
}
|
|
formula::FormulaGrammar::AddressConvention eConv = rDoc.GetAddressConvention();
|
|
// Range?
|
|
if (aRange.Parse(aPos, rDoc, eConv) & ScRefFlags::VALID)
|
|
bSingle = false;
|
|
// Cell?
|
|
else if (aRange.aStart.Parse(aPos, rDoc, eConv) & ScRefFlags::VALID)
|
|
aRange.aEnd = aRange.aStart;
|
|
else
|
|
bAll = true;
|
|
}
|
|
|
|
ScImportExport::~ScImportExport() COVERITY_NOEXCEPT_FALSE
|
|
{
|
|
pUndoDoc.reset();
|
|
pExtOptions.reset();
|
|
}
|
|
|
|
void ScImportExport::SetExtOptions( const ScAsciiOptions& rOpt )
|
|
{
|
|
if ( pExtOptions )
|
|
*pExtOptions = rOpt;
|
|
else
|
|
pExtOptions.reset(new ScAsciiOptions( rOpt ));
|
|
|
|
// "normal" Options
|
|
|
|
cSep = ScAsciiOptions::GetWeightedFieldSep( rOpt.GetFieldSeps(), false);
|
|
cStr = rOpt.GetTextSep();
|
|
}
|
|
|
|
void ScImportExport::SetFilterOptions(const OUString& rFilterOptions)
|
|
{
|
|
maFilterOptions = rFilterOptions;
|
|
}
|
|
|
|
bool ScImportExport::IsFormatSupported( SotClipboardFormatId nFormat )
|
|
{
|
|
return nFormat == SotClipboardFormatId::STRING
|
|
|| nFormat == SotClipboardFormatId::STRING_TSVC
|
|
|| nFormat == SotClipboardFormatId::SYLK
|
|
|| nFormat == SotClipboardFormatId::LINK
|
|
|| nFormat == SotClipboardFormatId::HTML
|
|
|| nFormat == SotClipboardFormatId::HTML_SIMPLE
|
|
|| nFormat == SotClipboardFormatId::DIF;
|
|
}
|
|
|
|
// Prepare for Undo
|
|
bool ScImportExport::StartPaste()
|
|
{
|
|
if ( !bAll )
|
|
{
|
|
ScEditableTester aTester( rDoc, aRange, sc::EditAction::Unknown );
|
|
if ( !aTester.IsEditable() )
|
|
{
|
|
std::unique_ptr<weld::MessageDialog> xInfoBox(Application::CreateMessageDialog(ScDocShell::GetActiveDialogParent(),
|
|
VclMessageType::Info, VclButtonsType::Ok,
|
|
ScResId(aTester.GetMessageId())));
|
|
xInfoBox->run();
|
|
return false;
|
|
}
|
|
}
|
|
if( bUndo && pDocSh && rDoc.IsUndoEnabled())
|
|
{
|
|
pUndoDoc.reset(new ScDocument( SCDOCMODE_UNDO ));
|
|
pUndoDoc->InitUndo( rDoc, aRange.aStart.Tab(), aRange.aEnd.Tab() );
|
|
rDoc.CopyToDocument(aRange, InsertDeleteFlags::ALL | InsertDeleteFlags::NOCAPTIONS, false, *pUndoDoc);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// Create Undo/Redo actions, Invalidate/Repaint
|
|
void ScImportExport::EndPaste(bool bAutoRowHeight)
|
|
{
|
|
bool bHeight = bAutoRowHeight && pDocSh && pDocSh->AdjustRowHeight(
|
|
aRange.aStart.Row(), aRange.aEnd.Row(), aRange.aStart.Tab() );
|
|
|
|
if( pUndoDoc && rDoc.IsUndoEnabled() && pDocSh )
|
|
{
|
|
ScDocumentUniquePtr pRedoDoc(new ScDocument( SCDOCMODE_UNDO ));
|
|
pRedoDoc->InitUndo( rDoc, aRange.aStart.Tab(), aRange.aEnd.Tab() );
|
|
rDoc.CopyToDocument(aRange, InsertDeleteFlags::ALL | InsertDeleteFlags::NOCAPTIONS, false, *pRedoDoc);
|
|
ScMarkData aDestMark(pRedoDoc->GetSheetLimits());
|
|
aDestMark.SetMarkArea(aRange);
|
|
pDocSh->GetUndoManager()->AddUndoAction(
|
|
std::make_unique<ScUndoPaste>(pDocSh, aRange, aDestMark, std::move(pUndoDoc), std::move(pRedoDoc), InsertDeleteFlags::ALL, nullptr));
|
|
}
|
|
pUndoDoc.reset();
|
|
if( pDocSh )
|
|
{
|
|
if (!bHeight)
|
|
pDocSh->PostPaint( aRange, PaintPartFlags::Grid );
|
|
pDocSh->SetDocumentModified();
|
|
}
|
|
ScTabViewShell* pViewSh = ScTabViewShell::GetActiveViewShell();
|
|
if ( pViewSh )
|
|
pViewSh->UpdateInputHandler();
|
|
|
|
}
|
|
|
|
bool ScImportExport::ExportData( std::u16string_view rMimeType,
|
|
css::uno::Any & rValue )
|
|
{
|
|
SvMemoryStream aStrm;
|
|
SotClipboardFormatId fmtId = SotExchange::GetFormatIdFromMimeType(rMimeType);
|
|
if (fmtId == SotClipboardFormatId::STRING)
|
|
aStrm.SetStreamCharSet(RTL_TEXTENCODING_UNICODE);
|
|
// mba: no BaseURL for data exchange
|
|
if (ExportStream(aStrm, OUString(), fmtId))
|
|
{
|
|
if (fmtId == SotClipboardFormatId::STRING)
|
|
{
|
|
assert(aStrm.TellEnd() % sizeof(sal_Unicode) == 0);
|
|
rValue <<= OUString(static_cast<const sal_Unicode*>(aStrm.GetData()),
|
|
aStrm.TellEnd() / sizeof(sal_Unicode));
|
|
}
|
|
else
|
|
{
|
|
aStrm.WriteUChar(0);
|
|
rValue <<= css::uno::Sequence<sal_Int8>(static_cast<sal_Int8 const*>(aStrm.GetData()),
|
|
aStrm.TellEnd());
|
|
}
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool ScImportExport::ImportString( const OUString& rText, SotClipboardFormatId nFmt )
|
|
{
|
|
switch ( nFmt )
|
|
{
|
|
// formats supporting unicode
|
|
case SotClipboardFormatId::STRING :
|
|
case SotClipboardFormatId::STRING_TSVC :
|
|
{
|
|
ScImportStringStream aStrm( rText);
|
|
return ImportStream( aStrm, OUString(), nFmt );
|
|
// ImportStream must handle RTL_TEXTENCODING_UNICODE
|
|
}
|
|
default:
|
|
{
|
|
rtl_TextEncoding eEnc = osl_getThreadTextEncoding();
|
|
OString aTmp( rText.getStr(), rText.getLength(), eEnc );
|
|
SvMemoryStream aStrm( const_cast<char *>(aTmp.getStr()), aTmp.getLength() * sizeof(char), StreamMode::READ );
|
|
aStrm.SetStreamCharSet( eEnc );
|
|
SetNoEndianSwap( aStrm ); //! no swapping in memory
|
|
return ImportStream( aStrm, OUString(), nFmt );
|
|
}
|
|
}
|
|
}
|
|
|
|
bool ScImportExport::ExportString( OUString& rText, SotClipboardFormatId nFmt )
|
|
{
|
|
if ( nFmt != SotClipboardFormatId::STRING && nFmt != SotClipboardFormatId::STRING_TSVC )
|
|
{
|
|
SAL_WARN("sc.ui", "ScImportExport::ExportString: Unicode not supported for other formats than SotClipboardFormatId::STRING[_TSV]");
|
|
rtl_TextEncoding eEnc = osl_getThreadTextEncoding();
|
|
OString aTmp;
|
|
bool bOk = ExportByteString( aTmp, eEnc, nFmt );
|
|
rText = OStringToOUString( aTmp, eEnc );
|
|
return bOk;
|
|
}
|
|
// nSizeLimit not needed for OUString
|
|
|
|
SvMemoryStream aStrm;
|
|
aStrm.SetStreamCharSet( RTL_TEXTENCODING_UNICODE );
|
|
SetNoEndianSwap( aStrm ); //! no swapping in memory
|
|
// mba: no BaseURL for data exc
|
|
if( ExportStream( aStrm, OUString(), nFmt ) )
|
|
{
|
|
aStrm.WriteUInt16( 0 );
|
|
rText = OUString( static_cast<const sal_Unicode*>(aStrm.GetData()) );
|
|
return true;
|
|
}
|
|
rText.clear();
|
|
return false;
|
|
|
|
// ExportStream must handle RTL_TEXTENCODING_UNICODE
|
|
}
|
|
|
|
bool ScImportExport::ExportByteString( OString& rText, rtl_TextEncoding eEnc, SotClipboardFormatId nFmt )
|
|
{
|
|
OSL_ENSURE( eEnc != RTL_TEXTENCODING_UNICODE, "ScImportExport::ExportByteString: Unicode not supported" );
|
|
if ( eEnc == RTL_TEXTENCODING_UNICODE )
|
|
eEnc = osl_getThreadTextEncoding();
|
|
|
|
if (!nSizeLimit)
|
|
nSizeLimit = SAL_MAX_UINT16;
|
|
|
|
SvMemoryStream aStrm;
|
|
aStrm.SetStreamCharSet( eEnc );
|
|
SetNoEndianSwap( aStrm ); //! no swapping in memory
|
|
// mba: no BaseURL for data exchange
|
|
if( ExportStream( aStrm, OUString(), nFmt ) )
|
|
{
|
|
aStrm.WriteChar( 0 );
|
|
if( aStrm.TellEnd() <= nSizeLimit )
|
|
{
|
|
rText = static_cast<const char*>(aStrm.GetData());
|
|
return true;
|
|
}
|
|
}
|
|
rText.clear();
|
|
return false;
|
|
}
|
|
|
|
bool ScImportExport::ImportStream( SvStream& rStrm, const OUString& rBaseURL, SotClipboardFormatId nFmt )
|
|
{
|
|
if( nFmt == SotClipboardFormatId::STRING || nFmt == SotClipboardFormatId::STRING_TSVC )
|
|
{
|
|
if( ExtText2Doc( rStrm ) ) // evaluate pExtOptions
|
|
return true;
|
|
}
|
|
if( nFmt == SotClipboardFormatId::SYLK )
|
|
{
|
|
if( Sylk2Doc( rStrm ) )
|
|
return true;
|
|
}
|
|
if( nFmt == SotClipboardFormatId::DIF )
|
|
{
|
|
if( Dif2Doc( rStrm ) )
|
|
return true;
|
|
}
|
|
if( nFmt == SotClipboardFormatId::RTF || nFmt == SotClipboardFormatId::RICHTEXT )
|
|
{
|
|
if( RTF2Doc( rStrm, rBaseURL ) )
|
|
return true;
|
|
}
|
|
if( nFmt == SotClipboardFormatId::LINK )
|
|
return true; // Link-Import?
|
|
if ( nFmt == SotClipboardFormatId::HTML )
|
|
{
|
|
if( HTML2Doc( rStrm, rBaseURL ) )
|
|
return true;
|
|
}
|
|
if ( nFmt == SotClipboardFormatId::HTML_SIMPLE )
|
|
{
|
|
MSE40HTMLClipFormatObj aMSE40ClpObj; // needed to skip the header data
|
|
SvStream* pHTML = aMSE40ClpObj.IsValid( rStrm );
|
|
if ( pHTML && HTML2Doc( *pHTML, rBaseURL ) )
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
bool ScImportExport::ExportStream( SvStream& rStrm, const OUString& rBaseURL, SotClipboardFormatId nFmt )
|
|
{
|
|
if( nFmt == SotClipboardFormatId::STRING || nFmt == SotClipboardFormatId::STRING_TSVC )
|
|
{
|
|
if( Doc2Text( rStrm ) )
|
|
return true;
|
|
}
|
|
if( nFmt == SotClipboardFormatId::SYLK )
|
|
{
|
|
if( Doc2Sylk( rStrm ) )
|
|
return true;
|
|
}
|
|
if( nFmt == SotClipboardFormatId::DIF )
|
|
{
|
|
if( Doc2Dif( rStrm ) )
|
|
return true;
|
|
}
|
|
if( nFmt == SotClipboardFormatId::LINK && !bAll )
|
|
{
|
|
OUString aDocName;
|
|
if ( rDoc.IsClipboard() )
|
|
aDocName = ScGlobal::GetClipDocName();
|
|
else
|
|
{
|
|
ScDocShell* pShell = rDoc.GetDocumentShell();
|
|
if (pShell)
|
|
aDocName = pShell->GetTitle( SFX_TITLE_FULLNAME );
|
|
}
|
|
|
|
OSL_ENSURE( !aDocName.isEmpty(), "ClipBoard document has no name! :-/" );
|
|
if( !aDocName.isEmpty() )
|
|
{
|
|
// Always use Calc A1 syntax for paste link.
|
|
OUString aRefName;
|
|
ScRefFlags nFlags = ScRefFlags::VALID | ScRefFlags::TAB_3D;
|
|
if( bSingle )
|
|
aRefName = aRange.aStart.Format(nFlags, &rDoc, formula::FormulaGrammar::CONV_OOO);
|
|
else
|
|
{
|
|
if( aRange.aStart.Tab() != aRange.aEnd.Tab() )
|
|
nFlags |= ScRefFlags::TAB2_3D;
|
|
aRefName = aRange.Format(rDoc, nFlags, formula::FormulaGrammar::CONV_OOO);
|
|
}
|
|
OUString aAppName = Application::GetAppName();
|
|
|
|
// extra bits are used to tell the client to prefer external
|
|
// reference link.
|
|
return TransferableDataHelper::WriteDDELink(rStrm, aAppName, aDocName, aRefName,
|
|
u"calc:extref");
|
|
}
|
|
}
|
|
if( nFmt == SotClipboardFormatId::HTML )
|
|
{
|
|
if( Doc2HTML( rStrm, rBaseURL ) )
|
|
return true;
|
|
}
|
|
if( nFmt == SotClipboardFormatId::RTF || nFmt == SotClipboardFormatId::RICHTEXT )
|
|
{
|
|
if( Doc2RTF( rStrm ) )
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
// tdf#104927
|
|
// http://www.unicode.org/reports/tr11/
|
|
sal_Int32 ScImportExport::CountVisualWidth(std::u16string_view rStr, sal_Int32& nIdx, sal_Int32 nMaxWidth)
|
|
{
|
|
sal_Int32 nWidth = 0;
|
|
while(nIdx < static_cast<sal_Int32>(rStr.size()) && nWidth < nMaxWidth)
|
|
{
|
|
sal_uInt32 nCode = o3tl::iterateCodePoints(rStr, &nIdx);
|
|
|
|
auto nEaWidth = u_getIntPropertyValue(nCode, UCHAR_EAST_ASIAN_WIDTH);
|
|
if (nEaWidth == U_EA_FULLWIDTH || nEaWidth == U_EA_WIDE)
|
|
nWidth += 2;
|
|
else if (!u_getIntPropertyValue(nCode, UCHAR_DEFAULT_IGNORABLE_CODE_POINT))
|
|
nWidth += 1;
|
|
}
|
|
|
|
if (nIdx < static_cast<sal_Int32>(rStr.size()))
|
|
{
|
|
sal_Int32 nTmpIdx = nIdx;
|
|
sal_uInt32 nCode = o3tl::iterateCodePoints(rStr, &nTmpIdx);
|
|
|
|
if (u_getIntPropertyValue(nCode, UCHAR_DEFAULT_IGNORABLE_CODE_POINT))
|
|
nIdx = nTmpIdx;
|
|
}
|
|
return nWidth;
|
|
}
|
|
|
|
sal_Int32 ScImportExport::CountVisualWidth(std::u16string_view rStr)
|
|
{
|
|
sal_Int32 nIdx = 0;
|
|
return CountVisualWidth(rStr, nIdx, SAL_MAX_INT32);
|
|
}
|
|
|
|
void ScImportExport::SetNoEndianSwap( SvStream& rStrm )
|
|
{
|
|
#ifdef OSL_BIGENDIAN
|
|
rStrm.SetEndian( SvStreamEndian::BIG );
|
|
#else
|
|
rStrm.SetEndian( SvStreamEndian::LITTLE );
|
|
#endif
|
|
}
|
|
|
|
static inline bool lcl_isFieldEnd( sal_Unicode c, const sal_Unicode* pSeps )
|
|
{
|
|
return !c || ScGlobal::UnicodeStrChr( pSeps, c);
|
|
}
|
|
|
|
namespace {
|
|
|
|
enum QuoteType
|
|
{
|
|
FIELDSTART_QUOTE,
|
|
FIRST_QUOTE,
|
|
SECOND_QUOTE,
|
|
FIELDEND_QUOTE,
|
|
DONTKNOW_QUOTE
|
|
};
|
|
|
|
}
|
|
|
|
/** Determine if *p is a quote that ends a quoted field.
|
|
|
|
Precondition: we are parsing a quoted field already and *p is a quote.
|
|
|
|
@return
|
|
FIELDEND_QUOTE if end of field quote
|
|
DONTKNOW_QUOTE anything else
|
|
*/
|
|
static QuoteType lcl_isFieldEndQuote( const sal_Unicode* p, const sal_Unicode* pSeps, sal_Unicode& rcDetectSep )
|
|
{
|
|
// Due to broken CSV generators that don't double embedded quotes check if
|
|
// a field separator immediately or with trailing spaces follows the quote,
|
|
// only then end the field, or at end of string.
|
|
constexpr sal_Unicode cBlank = ' ';
|
|
if (p[1] == cBlank && ScGlobal::UnicodeStrChr( pSeps, cBlank))
|
|
return FIELDEND_QUOTE;
|
|
// Detect a possible blank separator if it's not already in the list (which
|
|
// was checked right above for p[1]==cBlank).
|
|
const bool bBlankSep = (p[1] == cBlank && !rcDetectSep && p[2] && p[2] != cBlank);
|
|
while (p[1] == cBlank)
|
|
++p;
|
|
if (lcl_isFieldEnd( p[1], pSeps))
|
|
return FIELDEND_QUOTE;
|
|
// Extended separator detection after a closing quote (with or without
|
|
// blanks). Note that nQuotes is incremented *after* the call so is not yet
|
|
// even here, and that with separator detection we reach here only if
|
|
// lcl_isEscapedOrFieldEndQuote() did not already detect FIRST_QUOTE or
|
|
// SECOND_QUOTE for an escaped embedded quote, thus nQuotes does not have
|
|
// to be checked.
|
|
if (!rcDetectSep)
|
|
{
|
|
static constexpr sal_Unicode vSep[] = { ',', '\t', ';' };
|
|
for (const sal_Unicode c : vSep)
|
|
{
|
|
if (p[1] == c)
|
|
{
|
|
rcDetectSep = c;
|
|
return FIELDEND_QUOTE;
|
|
}
|
|
}
|
|
}
|
|
// Blank separator is least significant, after others.
|
|
if (bBlankSep)
|
|
{
|
|
rcDetectSep = cBlank;
|
|
return FIELDEND_QUOTE;
|
|
}
|
|
return DONTKNOW_QUOTE;
|
|
}
|
|
|
|
/** Determine if *p is a quote that is escaped by being doubled or ends a
|
|
quoted field.
|
|
|
|
Precondition: *p is a quote.
|
|
|
|
@param nQuotes
|
|
Quote characters encountered so far.
|
|
Odd (after opening quote) means either no embedded quotes or only quote
|
|
pairs so far.
|
|
Even means either not in a quoted field or already one quote
|
|
encountered, the first of a pair.
|
|
|
|
@return
|
|
FIELDSTART_QUOTE if first quote in a field, either starting content or
|
|
embedded so caller should check beforehand.
|
|
FIRST_QUOTE if first of a doubled quote
|
|
SECOND_QUOTE if second of a doubled quote
|
|
FIELDEND_QUOTE if end of field quote
|
|
DONTKNOW_QUOTE if an unescaped quote we don't consider as end of field,
|
|
do not increment nQuotes in caller then!
|
|
*/
|
|
static QuoteType lcl_isEscapedOrFieldEndQuote( sal_Int32 nQuotes, const sal_Unicode* p,
|
|
const sal_Unicode* pSeps, sal_Unicode cStr, sal_Unicode& rcDetectSep )
|
|
{
|
|
if ((nQuotes & 1) == 0)
|
|
{
|
|
if (p[-1] == cStr)
|
|
return SECOND_QUOTE;
|
|
else
|
|
{
|
|
SAL_WARN( "sc", "lcl_isEscapedOrFieldEndQuote: really want a FIELDSTART_QUOTE?");
|
|
return FIELDSTART_QUOTE;
|
|
}
|
|
}
|
|
if (p[1] == cStr)
|
|
return FIRST_QUOTE;
|
|
return lcl_isFieldEndQuote( p, pSeps, rcDetectSep);
|
|
}
|
|
|
|
/** Append characters of [p1,p2) to rField.
|
|
|
|
@returns TRUE if ok; FALSE if data overflow, truncated
|
|
*/
|
|
static bool lcl_appendLineData( OUString& rField, const sal_Unicode* p1, const sal_Unicode* p2 )
|
|
{
|
|
if (rField.getLength() + (p2 - p1) <= nArbitraryCellLengthLimit)
|
|
{
|
|
rField += std::u16string_view( p1, p2 - p1 );
|
|
return true;
|
|
}
|
|
else
|
|
{
|
|
SAL_WARN( "sc", "lcl_appendLineData: data overflow");
|
|
rField += std::u16string_view( p1, nArbitraryCellLengthLimit - rField.getLength() );
|
|
return false;
|
|
}
|
|
}
|
|
|
|
namespace {
|
|
|
|
enum class DoubledQuoteMode
|
|
{
|
|
KEEP_ALL, // both are taken, additionally start and end quote are included in string
|
|
ESCAPE, // escaped quote, one is taken, one ignored
|
|
};
|
|
|
|
}
|
|
|
|
/** Scan for a quoted string.
|
|
|
|
Precondition: initial current position *p is a cStr quote.
|
|
|
|
For DoubledQuoteMode::ESCAPE, if after the closing quote there is a field
|
|
end (with or without trailing blanks and as determined by
|
|
lcl_isFieldEndQuote()), then the content is appended to rField with quotes
|
|
processed and removed. Else if no field end after the quoted string was
|
|
detected, nothing is appended and processing continues and is repeated
|
|
until the next quote. If no closing quote at a field end was found at all,
|
|
nothing is appended and the initial position is returned and caller has to
|
|
decide, usually just taking all as literal data.
|
|
|
|
For DoubledQuoteMode::KEEP_ALL, the string up to and including the closing
|
|
quote is appended to rField and the next position returned, regardless
|
|
whether there is a field separator following or not.
|
|
|
|
*/
|
|
static const sal_Unicode* lcl_ScanString( const sal_Unicode* p, OUString& rField,
|
|
const sal_Unicode* pSeps, sal_Unicode cStr, DoubledQuoteMode eMode, bool& rbOverflowCell )
|
|
{
|
|
OUString aString;
|
|
bool bClosingQuote = (eMode == DoubledQuoteMode::KEEP_ALL);
|
|
const sal_Unicode* const pStart = p;
|
|
if (eMode != DoubledQuoteMode::KEEP_ALL)
|
|
p++; //! jump over opening quote
|
|
bool bCont;
|
|
do
|
|
{
|
|
bCont = false;
|
|
const sal_Unicode* p0 = p;
|
|
for( ;; )
|
|
{
|
|
if (!*p)
|
|
{
|
|
// Encountering end of data after an opening quote is not a
|
|
// quoted string, ReadCsvLine() concatenated lines with '\n'
|
|
// for a properly quoted embedded linefeed.
|
|
if (eMode == DoubledQuoteMode::KEEP_ALL)
|
|
// Caller would append that data anyway, so we can do it
|
|
// already here.
|
|
break;
|
|
|
|
return pStart;
|
|
}
|
|
|
|
if( *p == cStr )
|
|
{
|
|
if ( *++p != cStr )
|
|
{
|
|
// break or continue for loop
|
|
if (eMode == DoubledQuoteMode::ESCAPE)
|
|
{
|
|
sal_Unicode cDetectSep = 0xffff; // No separator detection here.
|
|
if (lcl_isFieldEndQuote( p-1, pSeps, cDetectSep) == FIELDEND_QUOTE)
|
|
{
|
|
bClosingQuote = true;
|
|
break;
|
|
}
|
|
else
|
|
continue;
|
|
}
|
|
else
|
|
break;
|
|
}
|
|
// doubled quote char
|
|
switch ( eMode )
|
|
{
|
|
case DoubledQuoteMode::KEEP_ALL :
|
|
p++; // both for us (not breaking for-loop)
|
|
break;
|
|
case DoubledQuoteMode::ESCAPE :
|
|
p++; // one for us (breaking for-loop)
|
|
bCont = true; // and more
|
|
break;
|
|
}
|
|
if ( eMode == DoubledQuoteMode::ESCAPE )
|
|
break;
|
|
}
|
|
else
|
|
p++;
|
|
}
|
|
if ( p0 < p )
|
|
{
|
|
if (!lcl_appendLineData( aString, p0, ((eMode != DoubledQuoteMode::KEEP_ALL && (*p || *(p-1) == cStr)) ? p-1 : p)))
|
|
rbOverflowCell = true;
|
|
}
|
|
} while ( bCont );
|
|
|
|
if (!bClosingQuote)
|
|
return pStart;
|
|
|
|
if (!aString.isEmpty())
|
|
rField += aString;
|
|
|
|
return p;
|
|
}
|
|
|
|
static void lcl_UnescapeSylk( OUString & rString, SylkVersion eVersion )
|
|
{
|
|
// Older versions didn't escape the semicolon.
|
|
// Older versions quoted the string and doubled embedded quotes, but not
|
|
// the semicolons, which was plain wrong.
|
|
if (eVersion >= SylkVersion::OOO32)
|
|
rString = rString.replaceAll(";;", ";");
|
|
else
|
|
rString = rString.replaceAll("\"\"", "\"");
|
|
|
|
rString = rString.replaceAll(SYLK_LF, "\n");
|
|
}
|
|
|
|
static const sal_Unicode* lcl_ScanSylkString( const sal_Unicode* p,
|
|
OUString& rString, SylkVersion eVersion )
|
|
{
|
|
const sal_Unicode* pStartQuote = p;
|
|
const sal_Unicode* pEndQuote = nullptr;
|
|
while( *(++p) )
|
|
{
|
|
if( *p == '"' )
|
|
{
|
|
pEndQuote = p;
|
|
if (eVersion >= SylkVersion::OOO32)
|
|
{
|
|
if (*(p+1) == ';')
|
|
{
|
|
if (*(p+2) == ';')
|
|
{
|
|
p += 2; // escaped ';'
|
|
pEndQuote = nullptr;
|
|
}
|
|
else
|
|
break; // end field
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (*(p+1) == '"')
|
|
{
|
|
++p; // escaped '"'
|
|
pEndQuote = nullptr;
|
|
}
|
|
else if (*(p+1) == ';')
|
|
break; // end field
|
|
}
|
|
}
|
|
}
|
|
if (!pEndQuote)
|
|
pEndQuote = p; // Take all data as string.
|
|
rString += std::u16string_view(pStartQuote + 1, pEndQuote - pStartQuote - 1 );
|
|
lcl_UnescapeSylk( rString, eVersion);
|
|
return p;
|
|
}
|
|
|
|
static const sal_Unicode* lcl_ScanSylkFormula( const sal_Unicode* p,
|
|
OUString& rString, SylkVersion eVersion )
|
|
{
|
|
const sal_Unicode* pStart = p;
|
|
if (eVersion >= SylkVersion::OOO32)
|
|
{
|
|
while (*p)
|
|
{
|
|
if (*p == ';')
|
|
{
|
|
if (*(p+1) == ';')
|
|
++p; // escaped ';'
|
|
else
|
|
break; // end field
|
|
}
|
|
++p;
|
|
}
|
|
rString += std::u16string_view( pStart, p - pStart);
|
|
lcl_UnescapeSylk( rString, eVersion);
|
|
}
|
|
else
|
|
{
|
|
// Nasty. If in old versions the formula contained a semicolon, it was
|
|
// quoted and embedded quotes were doubled, but semicolons were not. If
|
|
// there was no semicolon, it could still contain quotes and doubled
|
|
// embedded quotes if it was something like ="a""b", which was saved as
|
|
// E"a""b" as is and has to be preserved, even if older versions
|
|
// couldn't even load it correctly. However, theoretically another
|
|
// field might follow and thus the line contain a semicolon again, such
|
|
// as ...;E"a""b";...
|
|
bool bQuoted = false;
|
|
if (*p == '"')
|
|
{
|
|
// May be a quoted expression or just a string constant expression
|
|
// with quotes.
|
|
while (*(++p))
|
|
{
|
|
if (*p == '"')
|
|
{
|
|
if (*(p+1) == '"')
|
|
++p; // escaped '"'
|
|
else
|
|
break; // closing '"', had no ';' yet
|
|
}
|
|
else if (*p == ';')
|
|
{
|
|
bQuoted = true; // ';' within quoted expression
|
|
break;
|
|
}
|
|
}
|
|
p = pStart;
|
|
}
|
|
if (bQuoted)
|
|
p = lcl_ScanSylkString( p, rString, eVersion);
|
|
else
|
|
{
|
|
while (*p && *p != ';')
|
|
++p;
|
|
rString += std::u16string_view( pStart, p - pStart);
|
|
}
|
|
}
|
|
return p;
|
|
}
|
|
|
|
static void lcl_WriteString( SvStream& rStrm, OUString& rString, sal_Unicode cQuote, sal_Unicode cEsc )
|
|
{
|
|
if (cEsc)
|
|
{
|
|
// the goal is to replace cStr by cStr+cStr
|
|
OUString strFrom(cEsc);
|
|
OUString strTo = strFrom + strFrom;
|
|
rString = rString.replaceAll(strFrom, strTo);
|
|
}
|
|
|
|
if (cQuote)
|
|
{
|
|
rString = OUStringChar(cQuote) + rString + OUStringChar(cQuote);
|
|
}
|
|
|
|
rStrm.WriteUnicodeOrByteText(rString);
|
|
}
|
|
|
|
bool ScImportExport::Text2Doc( SvStream& rStrm )
|
|
{
|
|
bool bOk = true;
|
|
|
|
sal_Unicode pSeps[2];
|
|
pSeps[0] = cSep;
|
|
pSeps[1] = 0;
|
|
|
|
ScSetStringParam aSetStringParam;
|
|
aSetStringParam.mbCheckLinkFormula = true;
|
|
|
|
SCCOL nStartCol = aRange.aStart.Col();
|
|
SCROW nStartRow = aRange.aStart.Row();
|
|
SCCOL nEndCol = aRange.aEnd.Col();
|
|
SCROW nEndRow = aRange.aEnd.Row();
|
|
sal_uInt64 nOldPos = rStrm.Tell();
|
|
rStrm.StartReadingUnicodeText( rStrm.GetStreamCharSet() );
|
|
bool bData = !bSingle;
|
|
if( !bSingle)
|
|
bOk = StartPaste();
|
|
|
|
while( bOk )
|
|
{
|
|
OUString aLine;
|
|
OUString aCell;
|
|
SCROW nRow = nStartRow;
|
|
rStrm.Seek( nOldPos );
|
|
for( ;; )
|
|
{
|
|
rStrm.ReadUniOrByteStringLine( aLine, rStrm.GetStreamCharSet(), nArbitraryLineLengthLimit );
|
|
// tdf#125440 When inserting tab separated string, consider quotes as field markers
|
|
DoubledQuoteMode mode = aLine.indexOf("\t") >= 0 ? DoubledQuoteMode::ESCAPE : DoubledQuoteMode::KEEP_ALL;
|
|
if( rStrm.eof() )
|
|
break;
|
|
SCCOL nCol = nStartCol;
|
|
const sal_Unicode* p = aLine.getStr();
|
|
while( *p )
|
|
{
|
|
aCell.clear();
|
|
const sal_Unicode* q = p;
|
|
if (*p == cStr)
|
|
{
|
|
// Look for a pairing quote.
|
|
q = p = lcl_ScanString( p, aCell, pSeps, cStr, mode, bOverflowCell );
|
|
}
|
|
// All until next separator.
|
|
while (*p && *p != cSep)
|
|
++p;
|
|
if (!lcl_appendLineData( aCell, q, p))
|
|
bOverflowCell = true; // display warning on import
|
|
if (*p)
|
|
++p;
|
|
if (rDoc.ValidCol(nCol) && rDoc.ValidRow(nRow) )
|
|
{
|
|
if( bSingle )
|
|
{
|
|
if (nCol>nEndCol) nEndCol = nCol;
|
|
if (nRow>nEndRow) nEndRow = nRow;
|
|
}
|
|
if( bData && nCol <= nEndCol && nRow <= nEndRow )
|
|
rDoc.SetString( nCol, nRow, aRange.aStart.Tab(), aCell, &aSetStringParam );
|
|
}
|
|
else // too many columns/rows
|
|
{
|
|
if (!rDoc.ValidRow(nRow))
|
|
bOverflowRow = true; // display warning on import
|
|
if (!rDoc.ValidCol(nCol))
|
|
bOverflowCol = true; // display warning on import
|
|
}
|
|
++nCol;
|
|
}
|
|
++nRow;
|
|
}
|
|
|
|
if( !bData )
|
|
{
|
|
aRange.aEnd.SetCol( nEndCol );
|
|
aRange.aEnd.SetRow( nEndRow );
|
|
bOk = StartPaste();
|
|
bData = true;
|
|
}
|
|
else
|
|
break;
|
|
}
|
|
|
|
EndPaste();
|
|
if (bOk && mbImportBroadcast)
|
|
{
|
|
rDoc.BroadcastCells(aRange, SfxHintId::ScDataChanged);
|
|
pDocSh->PostDataChanged();
|
|
}
|
|
|
|
return bOk;
|
|
}
|
|
|
|
// Extended Ascii-Import
|
|
|
|
static bool lcl_PutString(
|
|
ScDocumentImport& rDocImport, bool bUseDocImport,
|
|
SCCOL nCol, SCROW nRow, SCTAB nTab, const OUString& rStr, sal_uInt8 nColFormat,
|
|
SvNumberFormatter* pFormatter, bool bDetectNumFormat, bool bDetectSciNumFormat, bool bEvaluateFormulas, bool bSkipEmptyCells,
|
|
const ::utl::TransliterationWrapper& rTransliteration, CalendarWrapper& rCalendar,
|
|
const ::utl::TransliterationWrapper* pSecondTransliteration, CalendarWrapper* pSecondCalendar )
|
|
{
|
|
ScDocument& rDoc = rDocImport.getDoc();
|
|
bool bMultiLine = false;
|
|
if ( nColFormat == SC_COL_SKIP || !rDoc.ValidCol(nCol) || !rDoc.ValidRow(nRow) )
|
|
return bMultiLine;
|
|
if ( rStr.isEmpty() )
|
|
{
|
|
if ( !bSkipEmptyCells )
|
|
{ // delete destination cell
|
|
if ( bUseDocImport )
|
|
rDocImport.setAutoInput(ScAddress(nCol, nRow, nTab), rStr );
|
|
else
|
|
rDoc.SetString( nCol, nRow, nTab, rStr );
|
|
}
|
|
return false;
|
|
}
|
|
|
|
const bool bForceFormulaText = (!bEvaluateFormulas && rStr[0] == '=');
|
|
if (nColFormat == SC_COL_TEXT || bForceFormulaText)
|
|
{
|
|
if ( bUseDocImport )
|
|
{
|
|
double fDummy;
|
|
sal_uInt32 nIndex = 0;
|
|
if (bForceFormulaText || rDoc.GetFormatTable()->IsNumberFormat(rStr, nIndex, fDummy))
|
|
{
|
|
// Set the format of this cell to Text.
|
|
// This is only necessary for ScDocumentImport,
|
|
// ScDocument::SetTextCell() forces it by ScSetStringParam.
|
|
sal_uInt32 nFormat = rDoc.GetFormatTable()->GetStandardFormat(SvNumFormatType::TEXT);
|
|
ScPatternAttr aNewAttrs(rDoc.getCellAttributeHelper());
|
|
SfxItemSet& rSet = aNewAttrs.GetItemSet();
|
|
rSet.Put( SfxUInt32Item(ATTR_VALUE_FORMAT, nFormat) );
|
|
rDoc.ApplyPattern(nCol, nRow, nTab, aNewAttrs);
|
|
}
|
|
if (ScStringUtil::isMultiline(rStr))
|
|
{
|
|
ScFieldEditEngine& rEngine = rDoc.GetEditEngine();
|
|
rEngine.SetTextCurrentDefaults(rStr);
|
|
rDocImport.setEditCell(ScAddress(nCol, nRow, nTab), rEngine.CreateTextObject());
|
|
return true;
|
|
}
|
|
else
|
|
{
|
|
rDocImport.setStringCell(ScAddress(nCol, nRow, nTab), rStr);
|
|
return false;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
rDoc.SetTextCell(ScAddress(nCol, nRow, nTab), rStr);
|
|
return bMultiLine;
|
|
}
|
|
}
|
|
|
|
if ( nColFormat == SC_COL_ENGLISH )
|
|
{
|
|
//! SetString with Extra-Flag ???
|
|
|
|
SvNumberFormatter* pDocFormatter = rDoc.GetFormatTable();
|
|
sal_uInt32 nEnglish = pDocFormatter->GetStandardIndex(LANGUAGE_ENGLISH_US);
|
|
double fVal;
|
|
if ( pDocFormatter->IsNumberFormat( rStr, nEnglish, fVal ) )
|
|
{
|
|
// Numberformat will not be set to English
|
|
if ( bUseDocImport )
|
|
rDocImport.setNumericCell( ScAddress( nCol, nRow, nTab ), fVal );
|
|
else
|
|
rDoc.SetValue( nCol, nRow, nTab, fVal );
|
|
return bMultiLine;
|
|
}
|
|
// else, continue with SetString
|
|
}
|
|
else if ( nColFormat != SC_COL_STANDARD ) // Datumformats
|
|
{
|
|
const sal_uInt16 nMaxNumberParts = 7; // Y-M-D h:m:s.t
|
|
const sal_Int32 nLen = rStr.getLength();
|
|
sal_Int32 nStart[nMaxNumberParts];
|
|
sal_Int32 nEnd[nMaxNumberParts];
|
|
|
|
bool bIso;
|
|
sal_uInt16 nDP, nMP, nYP;
|
|
switch ( nColFormat )
|
|
{
|
|
case SC_COL_YMD: nDP = 2; nMP = 1; nYP = 0; bIso = true; break;
|
|
case SC_COL_MDY: nDP = 1; nMP = 0; nYP = 2; bIso = false; break;
|
|
case SC_COL_DMY:
|
|
default: nDP = 0; nMP = 1; nYP = 2; bIso = false; break;
|
|
}
|
|
|
|
sal_uInt16 nFound = 0;
|
|
bool bInNum = false;
|
|
for (sal_Int32 nPos = 0; nPos < nLen && (bInNum || nFound < nMaxNumberParts); ++nPos)
|
|
{
|
|
bool bLetter = false;
|
|
if (rtl::isAsciiDigit(rStr[nPos]) ||
|
|
(((!bInNum && nFound==nMP) || (bInNum && nFound==nMP+1))
|
|
&& (bLetter = ScGlobal::getCharClass().isLetterNumeric( rStr, nPos))))
|
|
{
|
|
if (!bInNum)
|
|
{
|
|
bInNum = true;
|
|
nStart[nFound] = nPos;
|
|
++nFound;
|
|
}
|
|
nEnd[nFound-1] = nPos;
|
|
if (bIso && (bLetter || (2 <= nFound && nFound <= 6 && nPos > nStart[nFound-1] + 1)))
|
|
// Each M,D,h,m,s at most 2 digits.
|
|
bIso = false;
|
|
}
|
|
else
|
|
{
|
|
bInNum = false;
|
|
if (bIso)
|
|
{
|
|
// ([+-])YYYY-MM-DD([T ]hh:mm(:ss(.fff)))(([+-])TZ)
|
|
// XXX NOTE: timezone is accepted here, but number
|
|
// formatter parser will not, so the end result will be
|
|
// type Text to preserve timezone information.
|
|
switch (rStr[nPos])
|
|
{
|
|
case '+':
|
|
if (nFound >= 5 && nPos == nEnd[nFound-1] + 1)
|
|
// Accept timezone offset.
|
|
;
|
|
else if (nPos > 0)
|
|
// Accept one leading sign.
|
|
bIso = false;
|
|
break;
|
|
case '-':
|
|
if (nFound >= 5 && nPos == nEnd[nFound-1] + 1)
|
|
// Accept timezone offset.
|
|
;
|
|
else if (nFound == 0 && nPos > 0)
|
|
// Accept one leading sign.
|
|
bIso = false;
|
|
else if (nFound < 1 || 2 < nFound || nPos != nEnd[nFound-1] + 1)
|
|
// Not immediately after 1 or 1-2
|
|
bIso = false;
|
|
break;
|
|
case 'T':
|
|
case ' ':
|
|
if (nFound != 3 || nPos != nEnd[nFound-1] + 1)
|
|
// Not immediately after 1-2-3
|
|
bIso = false;
|
|
break;
|
|
case ':':
|
|
if (nFound < 4 || 5 < nFound || nPos != nEnd[nFound-1] + 1)
|
|
// Not at 1-2-3T4:5:
|
|
bIso = false;
|
|
break;
|
|
case '.':
|
|
case ',':
|
|
if (nFound != 6 || nPos != nEnd[nFound-1] + 1)
|
|
// Not at 1-2-3T4:5:6.
|
|
bIso = false;
|
|
break;
|
|
case 'Z':
|
|
if (nFound >= 5 && nPos == nEnd[nFound-1] + 1)
|
|
// Accept Zero timezone.
|
|
;
|
|
else
|
|
bIso = false;
|
|
break;
|
|
default:
|
|
bIso = false;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (nFound < 3)
|
|
bIso = false;
|
|
|
|
if (bIso)
|
|
{
|
|
// Leave conversion and detection of various possible number
|
|
// formats to the number formatter. ISO is recognized in any locale
|
|
// so we can directly use the document's formatter.
|
|
sal_uInt32 nFormat = 0;
|
|
double fVal = 0.0;
|
|
SvNumberFormatter* pDocFormatter = rDoc.GetFormatTable();
|
|
if (pDocFormatter->IsNumberFormat( rStr, nFormat, fVal))
|
|
{
|
|
if (pDocFormatter->GetType(nFormat) & SvNumFormatType::DATE)
|
|
{
|
|
ScAddress aPos(nCol,nRow,nTab);
|
|
if (bUseDocImport)
|
|
rDocImport.setNumericCell(aPos, fVal);
|
|
else
|
|
rDoc.SetValue(aPos, fVal);
|
|
rDoc.SetNumberFormat(aPos, nFormat);
|
|
|
|
return bMultiLine; // success
|
|
}
|
|
}
|
|
// If we reach here it is type Text (e.g. timezone or trailing
|
|
// characters). Handled below.
|
|
}
|
|
|
|
if ( nFound == 1 )
|
|
{
|
|
// try to break one number (without separators) into date fields
|
|
|
|
sal_Int32 nDateStart = nStart[0];
|
|
sal_Int32 nDateLen = nEnd[0] + 1 - nDateStart;
|
|
|
|
if ( nDateLen >= 5 && nDateLen <= 8 &&
|
|
ScGlobal::getCharClass().isNumeric( rStr.copy( nDateStart, nDateLen ) ) )
|
|
{
|
|
// 6 digits: 2 each for day, month, year
|
|
// 8 digits: 4 for year, 2 each for day and month
|
|
// 5 or 7 digits: first field is shortened by 1
|
|
|
|
bool bLongYear = ( nDateLen >= 7 );
|
|
bool bShortFirst = ( nDateLen == 5 || nDateLen == 7 );
|
|
|
|
sal_uInt16 nFieldStart = nDateStart;
|
|
for (sal_uInt16 nPos=0; nPos<3; nPos++)
|
|
{
|
|
sal_uInt16 nFieldEnd = nFieldStart + 1; // default: 2 digits
|
|
if ( bLongYear && nPos == nYP )
|
|
nFieldEnd += 2; // 2 extra digits for long year
|
|
if ( bShortFirst && nPos == 0 )
|
|
--nFieldEnd; // first field shortened?
|
|
|
|
nStart[nPos] = nFieldStart;
|
|
nEnd[nPos] = nFieldEnd;
|
|
nFieldStart = nFieldEnd + 1;
|
|
}
|
|
nFound = 3;
|
|
}
|
|
}
|
|
|
|
if (!bIso && nFound >= 3)
|
|
{
|
|
using namespace ::com::sun::star;
|
|
bool bSecondCal = false;
|
|
sal_uInt16 nDay = static_cast<sal_uInt16>(o3tl::toInt32(rStr.subView( nStart[nDP], nEnd[nDP]+1-nStart[nDP] )));
|
|
sal_uInt16 nYear = static_cast<sal_uInt16>(o3tl::toInt32(rStr.subView( nStart[nYP], nEnd[nYP]+1-nStart[nYP] )));
|
|
OUString aMStr = rStr.copy( nStart[nMP], nEnd[nMP]+1-nStart[nMP] );
|
|
sal_Int16 nMonth = static_cast<sal_Int16>(aMStr.toInt32());
|
|
if (!nMonth)
|
|
{
|
|
static constexpr OUString aSepShortened = u"SEP"_ustr;
|
|
uno::Sequence< i18n::CalendarItem2 > xMonths;
|
|
sal_Int32 i, nMonthCount;
|
|
// first test all month names from local international
|
|
xMonths = rCalendar.getMonths();
|
|
nMonthCount = xMonths.getLength();
|
|
for (i=0; i<nMonthCount && !nMonth; i++)
|
|
{
|
|
if ( rTransliteration.isEqual( aMStr, xMonths[i].FullName ) ||
|
|
rTransliteration.isEqual( aMStr, xMonths[i].AbbrevName ) )
|
|
nMonth = sal::static_int_cast<sal_Int16>( i+1 );
|
|
else if ( i == 8 && rTransliteration.isEqual( u"SEPT"_ustr,
|
|
xMonths[i].AbbrevName ) &&
|
|
rTransliteration.isEqual( aMStr, aSepShortened ) )
|
|
{ // correct English abbreviation is SEPT,
|
|
// but data mostly contains SEP only
|
|
nMonth = sal::static_int_cast<sal_Int16>( i+1 );
|
|
}
|
|
}
|
|
// if none found, then test english month names
|
|
if ( !nMonth && pSecondCalendar && pSecondTransliteration )
|
|
{
|
|
xMonths = pSecondCalendar->getMonths();
|
|
nMonthCount = xMonths.getLength();
|
|
for (i=0; i<nMonthCount && !nMonth; i++)
|
|
{
|
|
if ( pSecondTransliteration->isEqual( aMStr, xMonths[i].FullName ) ||
|
|
pSecondTransliteration->isEqual( aMStr, xMonths[i].AbbrevName ) )
|
|
{
|
|
nMonth = sal::static_int_cast<sal_Int16>( i+1 );
|
|
bSecondCal = true;
|
|
}
|
|
else if ( i == 8 && pSecondTransliteration->isEqual(
|
|
aMStr, aSepShortened ) )
|
|
{ // correct English abbreviation is SEPT,
|
|
// but data mostly contains SEP only
|
|
nMonth = sal::static_int_cast<sal_Int16>( i+1 );
|
|
bSecondCal = true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
SvNumberFormatter* pDocFormatter = rDoc.GetFormatTable();
|
|
if ( nYear < 100 )
|
|
nYear = pDocFormatter->ExpandTwoDigitYear( nYear );
|
|
|
|
CalendarWrapper* pCalendar = (bSecondCal ? pSecondCalendar : &rCalendar);
|
|
sal_Int16 nNumMonths = pCalendar->getNumberOfMonthsInYear();
|
|
if ( nDay && nMonth && nDay<=31 && nMonth<=nNumMonths )
|
|
{
|
|
--nMonth;
|
|
pCalendar->setValue( i18n::CalendarFieldIndex::DAY_OF_MONTH, nDay );
|
|
pCalendar->setValue( i18n::CalendarFieldIndex::MONTH, nMonth );
|
|
pCalendar->setValue( i18n::CalendarFieldIndex::YEAR, nYear );
|
|
sal_Int16 nHour, nMinute, nSecond;
|
|
// #i14974# The imported value should have no fractional value, so set the
|
|
// time fields to zero (ICU calendar instance defaults to current date/time)
|
|
nHour = nMinute = nSecond = 0;
|
|
if (nFound > 3)
|
|
nHour = static_cast<sal_Int16>(o3tl::toInt32(rStr.subView( nStart[3], nEnd[3]+1-nStart[3])));
|
|
if (nFound > 4)
|
|
nMinute = static_cast<sal_Int16>(o3tl::toInt32(rStr.subView( nStart[4], nEnd[4]+1-nStart[4])));
|
|
if (nFound > 5)
|
|
nSecond = static_cast<sal_Int16>(o3tl::toInt32(rStr.subView( nStart[5], nEnd[5]+1-nStart[5])));
|
|
// do not use calendar's milliseconds, to avoid fractional part truncation
|
|
double fFrac = 0.0;
|
|
if (nFound > 6)
|
|
{
|
|
sal_Unicode cDec = '.';
|
|
OUString aT = OUStringChar(cDec) + rStr.subView( nStart[6], nEnd[6]+1-nStart[6]);
|
|
rtl_math_ConversionStatus eStatus;
|
|
double fV = rtl::math::stringToDouble( aT, cDec, 0, &eStatus );
|
|
if (eStatus == rtl_math_ConversionStatus_Ok)
|
|
fFrac = fV / 86400.0;
|
|
}
|
|
sal_Int32 nPos;
|
|
if (nFound > 3 && 1 <= nHour && nHour <= 12 // nHour 0 and >=13 can't be AM/PM
|
|
&& (nPos = nEnd[nFound-1] + 1) < nLen)
|
|
{
|
|
// Dreaded AM/PM may be following.
|
|
while (nPos < nLen && rStr[nPos] == ' ')
|
|
++nPos;
|
|
if (nPos < nLen)
|
|
{
|
|
sal_Int32 nStop = nPos;
|
|
while (nStop < nLen && rStr[nStop] != ' ')
|
|
++nStop;
|
|
OUString aAmPm = rStr.copy( nPos, nStop - nPos);
|
|
// For AM only 12 needs to be treated, whereas for PM
|
|
// it must not. Check both, locale and second/English
|
|
// strings.
|
|
if (nHour == 12 &&
|
|
(rTransliteration.isEqual( aAmPm, pFormatter->GetLocaleData()->getTimeAM()) ||
|
|
(pSecondTransliteration && pSecondTransliteration->isEqual( aAmPm, u"AM"_ustr))))
|
|
{
|
|
nHour = 0;
|
|
}
|
|
else if (nHour < 12 &&
|
|
(rTransliteration.isEqual( aAmPm, pFormatter->GetLocaleData()->getTimePM()) ||
|
|
(pSecondTransliteration && pSecondTransliteration->isEqual( aAmPm, u"PM"_ustr))))
|
|
{
|
|
nHour += 12;
|
|
}
|
|
}
|
|
}
|
|
pCalendar->setValue( i18n::CalendarFieldIndex::HOUR, nHour );
|
|
pCalendar->setValue( i18n::CalendarFieldIndex::MINUTE, nMinute );
|
|
pCalendar->setValue( i18n::CalendarFieldIndex::SECOND, nSecond );
|
|
pCalendar->setValue( i18n::CalendarFieldIndex::MILLISECOND, 0 );
|
|
if ( pCalendar->isValid() )
|
|
{
|
|
// Whole days diff.
|
|
double fDiff = DateTime::Sub( DateTime(pDocFormatter->GetNullDate()),
|
|
pCalendar->getEpochStart());
|
|
// #i14974# must use getLocalDateTime to get the same
|
|
// date values as set above
|
|
double fDays = pCalendar->getLocalDateTime() + fFrac;
|
|
fDays -= fDiff;
|
|
|
|
LanguageType eLatin, eCjk, eCtl;
|
|
rDoc.GetLanguage( eLatin, eCjk, eCtl );
|
|
LanguageType eDocLang = eLatin; //! which language for date formats?
|
|
|
|
SvNumFormatType nType = (nFound > 3 ? SvNumFormatType::DATETIME : SvNumFormatType::DATE);
|
|
sal_uLong nFormat = pDocFormatter->GetStandardFormat( nType, eDocLang );
|
|
// maybe there is a special format including seconds or milliseconds
|
|
if (nFound > 5)
|
|
nFormat = pDocFormatter->GetStandardFormat( fDays, nFormat, nType, eDocLang);
|
|
|
|
ScAddress aPos(nCol,nRow,nTab);
|
|
if ( bUseDocImport )
|
|
rDocImport.setNumericCell(aPos, fDays);
|
|
else
|
|
rDoc.SetValue( aPos, fDays );
|
|
rDoc.SetNumberFormat(aPos, nFormat);
|
|
|
|
return bMultiLine; // success
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Standard or date not determined -> SetString / EditCell
|
|
if( rStr.indexOf( '\n' ) == -1 )
|
|
{
|
|
if (!bDetectNumFormat && nColFormat == SC_COL_STANDARD)
|
|
{
|
|
// Import a strict ISO 8601 date(+time) string even without
|
|
// "Detect special numbers" or "Date (YMD)".
|
|
do
|
|
{
|
|
// Simple pre-check before calling more expensive parser.
|
|
// ([+-])(Y)YYYY-MM-DD
|
|
if (rStr.getLength() < 10)
|
|
break;
|
|
const sal_Int32 n1 = rStr.indexOf('-', 1);
|
|
if (n1 < 4)
|
|
break;
|
|
const sal_Int32 n2 = rStr.indexOf('-', n1 + 1);
|
|
if (n2 < 7 || n1 + 3 < n2)
|
|
break;
|
|
|
|
css::util::DateTime aDateTime;
|
|
if (!sax::Converter::parseDateTime( aDateTime, rStr))
|
|
break;
|
|
|
|
sal_uInt32 nFormat = 0;
|
|
double fVal = 0.0;
|
|
SvNumberFormatter* pDocFormatter = rDoc.GetFormatTable();
|
|
if (pDocFormatter->IsNumberFormat( rStr, nFormat, fVal))
|
|
{
|
|
if (pDocFormatter->GetType(nFormat) & SvNumFormatType::DATE)
|
|
{
|
|
ScAddress aPos(nCol,nRow,nTab);
|
|
if (bUseDocImport)
|
|
rDocImport.setNumericCell(aPos, fVal);
|
|
else
|
|
rDoc.SetValue(aPos, fVal);
|
|
rDoc.SetNumberFormat(aPos, nFormat);
|
|
|
|
return bMultiLine; // success
|
|
}
|
|
}
|
|
}
|
|
while(false);
|
|
}
|
|
|
|
ScSetStringParam aParam;
|
|
aParam.mpNumFormatter = pFormatter;
|
|
aParam.mbDetectNumberFormat = bDetectNumFormat;
|
|
aParam.mbDetectScientificNumberFormat = bDetectSciNumFormat;
|
|
aParam.meSetTextNumFormat = ScSetStringParam::SpecialNumberOnly;
|
|
aParam.mbHandleApostrophe = false;
|
|
aParam.mbCheckLinkFormula = true;
|
|
if ( bUseDocImport )
|
|
rDocImport.setAutoInput(ScAddress(nCol, nRow, nTab), rStr, &aParam);
|
|
else
|
|
rDoc.SetString( nCol, nRow, nTab, rStr, &aParam );
|
|
}
|
|
else
|
|
{
|
|
bMultiLine = true;
|
|
ScFieldEditEngine& rEngine = rDoc.GetEditEngine();
|
|
rEngine.SetTextCurrentDefaults(rStr);
|
|
if ( bUseDocImport )
|
|
rDocImport.setEditCell(ScAddress(nCol, nRow, nTab), rEngine.CreateTextObject());
|
|
else
|
|
rDoc.SetEditText( ScAddress( nCol, nRow, nTab ), rEngine.CreateTextObject() );
|
|
}
|
|
return bMultiLine;
|
|
}
|
|
|
|
static OUString lcl_GetFixed( const OUString& rLine, sal_Int32 nStart, sal_Int32 nNext,
|
|
bool& rbIsQuoted, bool& rbOverflowCell )
|
|
{
|
|
sal_Int32 nLen = rLine.getLength();
|
|
if (nNext > nLen)
|
|
nNext = nLen;
|
|
if ( nNext <= nStart )
|
|
return OUString();
|
|
|
|
const sal_Unicode* pStr = rLine.getStr();
|
|
|
|
sal_Int32 nSpace = nNext;
|
|
while ( nSpace > nStart && pStr[nSpace-1] == ' ' )
|
|
--nSpace;
|
|
|
|
rbIsQuoted = (pStr[nStart] == '"' && pStr[nSpace-1] == '"');
|
|
if (rbIsQuoted)
|
|
{
|
|
bool bFits = (nSpace - nStart - 3 <= nArbitraryCellLengthLimit);
|
|
if (bFits)
|
|
return rLine.copy(nStart+1, std::max< sal_Int32 >(0, nSpace-nStart-2));
|
|
else
|
|
{
|
|
SAL_WARN( "sc", "lcl_GetFixed: line doesn't fit into data");
|
|
rbOverflowCell = true;
|
|
return rLine.copy(nStart+1, nArbitraryCellLengthLimit);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
bool bFits = (nSpace - nStart <= nArbitraryCellLengthLimit);
|
|
if (bFits)
|
|
return rLine.copy(nStart, nSpace-nStart);
|
|
else
|
|
{
|
|
SAL_WARN( "sc", "lcl_GetFixed: line doesn't fit into data");
|
|
rbOverflowCell = true;
|
|
return rLine.copy(nStart, nArbitraryCellLengthLimit);
|
|
}
|
|
}
|
|
}
|
|
|
|
bool ScImportExport::ExtText2Doc( SvStream& rStrm )
|
|
{
|
|
if (!pExtOptions)
|
|
return Text2Doc( rStrm );
|
|
|
|
sal_uInt64 const nOldPos = rStrm.Tell();
|
|
sal_uInt64 const nRemaining = rStrm.remainingSize();
|
|
std::unique_ptr<ScProgress> xProgress( new ScProgress( pDocSh,
|
|
ScResId( STR_LOAD_DOC ), nRemaining, true ));
|
|
rStrm.StartReadingUnicodeText( rStrm.GetStreamCharSet() );
|
|
// tdf#82254 - check whether to include a byte-order-mark in the output
|
|
if (nOldPos != rStrm.Tell())
|
|
mbIncludeBOM = true;
|
|
|
|
SCCOL nStartCol = aRange.aStart.Col();
|
|
SCCOL nEndCol = aRange.aEnd.Col();
|
|
SCROW nStartRow = aRange.aStart.Row();
|
|
const SCTAB nTab = aRange.aStart.Tab();
|
|
|
|
bool bFixed = pExtOptions->IsFixedLen();
|
|
OUString aSeps = pExtOptions->GetFieldSeps(); // Need non-const for ReadCsvLine(),
|
|
const sal_Unicode* pSeps = aSeps.getStr(); // but it will be const anyway (asserted below).
|
|
bool bMerge = pExtOptions->IsMergeSeps();
|
|
bool bRemoveSpace = pExtOptions->IsRemoveSpace();
|
|
sal_uInt16 nInfoCount = pExtOptions->GetInfoCount();
|
|
const sal_Int32* pColStart = pExtOptions->GetColStart();
|
|
const sal_uInt8* pColFormat = pExtOptions->GetColFormat();
|
|
tools::Long nSkipLines = pExtOptions->GetStartRow();
|
|
|
|
LanguageType eDocLang = pExtOptions->GetLanguage();
|
|
SvNumberFormatter aNumFormatter( comphelper::getProcessComponentContext(), eDocLang);
|
|
bool bDetectNumFormat = pExtOptions->IsDetectSpecialNumber();
|
|
bool bDetectSciNumFormat = pExtOptions->IsDetectScientificNumber();
|
|
bool bEvaluateFormulas = pExtOptions->IsEvaluateFormulas();
|
|
bool bSkipEmptyCells = pExtOptions->IsSkipEmptyCells();
|
|
|
|
// For date recognition
|
|
::utl::TransliterationWrapper aTransliteration(
|
|
comphelper::getProcessComponentContext(), TransliterationFlags::IGNORE_CASE );
|
|
aTransliteration.loadModuleIfNeeded( eDocLang );
|
|
CalendarWrapper aCalendar( comphelper::getProcessComponentContext() );
|
|
aCalendar.loadDefaultCalendar(
|
|
LanguageTag::convertToLocale( eDocLang ) );
|
|
std::unique_ptr< ::utl::TransliterationWrapper > pEnglishTransliteration;
|
|
std::unique_ptr< CalendarWrapper > pEnglishCalendar;
|
|
if ( eDocLang != LANGUAGE_ENGLISH_US )
|
|
{
|
|
pEnglishTransliteration.reset(new ::utl::TransliterationWrapper (
|
|
comphelper::getProcessComponentContext(), TransliterationFlags::IGNORE_CASE ));
|
|
aTransliteration.loadModuleIfNeeded( LANGUAGE_ENGLISH_US );
|
|
pEnglishCalendar.reset(new CalendarWrapper ( comphelper::getProcessComponentContext() ));
|
|
pEnglishCalendar->loadDefaultCalendar(
|
|
LanguageTag::convertToLocale( LANGUAGE_ENGLISH_US ) );
|
|
}
|
|
|
|
OUString aLine;
|
|
OUString aCell;
|
|
sal_uInt16 i;
|
|
SCROW nRow = nStartRow;
|
|
sal_Unicode cDetectSep = 0xffff; // No separator detection here.
|
|
|
|
while(--nSkipLines>0)
|
|
{
|
|
aLine = ReadCsvLine(rStrm, !bFixed, aSeps, cStr, cDetectSep); // content is ignored
|
|
if ( rStrm.eof() )
|
|
break;
|
|
}
|
|
|
|
// Determine range for Undo.
|
|
// We don't need this during import of a file to a new sheet or document...
|
|
bool bDetermineRange = bUndo;
|
|
bool bColumnsAreDetermined = false;
|
|
|
|
// Row heights don't need to be adjusted on the fly if EndPaste() is called
|
|
// afterwards, which happens only if bDetermineRange. This variable also
|
|
// survives the toggle of bDetermineRange down at the end of the do{} loop.
|
|
bool bRangeIsDetermined = bDetermineRange;
|
|
|
|
bool bQuotedAsText = pExtOptions && pExtOptions->IsQuotedAsText();
|
|
|
|
sal_uInt64 nOriginalStreamPos = rStrm.Tell();
|
|
|
|
SCROW nFirstUpdateRowHeight = SCROW_MAX;
|
|
SCROW nLastUpdateRowHeight = -1;
|
|
|
|
ScDocumentImport aDocImport(rDoc);
|
|
do
|
|
{
|
|
const SCCOL nLastCol = nEndCol; // tdf#129701 preserve value of nEndCol
|
|
for( ;; )
|
|
{
|
|
aLine = ReadCsvLine(rStrm, !bFixed, aSeps, cStr, cDetectSep);
|
|
if ( rStrm.eof() && aLine.isEmpty() )
|
|
break;
|
|
|
|
assert(pSeps == aSeps.getStr());
|
|
|
|
if ( nRow > rDoc.MaxRow() )
|
|
{
|
|
bOverflowRow = true; // display warning on import
|
|
break; // for
|
|
}
|
|
|
|
if (!bDetermineRange)
|
|
EmbeddedNullTreatment( aLine);
|
|
|
|
sal_Int32 nLineLen = aLine.getLength();
|
|
SCCOL nCol = nStartCol;
|
|
bool bMultiLine = false;
|
|
if ( bFixed ) // Fixed line length
|
|
{
|
|
if (bDetermineRange)
|
|
{
|
|
if (!bColumnsAreDetermined)
|
|
{
|
|
// Yes, the check is nCol<=rDoc.MaxCol()+1, +1 because it
|
|
// is only an overflow if there is really data following to
|
|
// be put behind the last column, which doesn't happen if
|
|
// info is SC_COL_SKIP.
|
|
for (i=0; i < nInfoCount && nCol <= rDoc.MaxCol()+1; ++i)
|
|
{
|
|
const sal_uInt8 nFmt = pColFormat[i];
|
|
if (nFmt != SC_COL_SKIP) // otherwise don't increment nCol either
|
|
{
|
|
if (nCol > rDoc.MaxCol())
|
|
bOverflowCol = true; // display warning on import
|
|
++nCol;
|
|
}
|
|
}
|
|
bColumnsAreDetermined = true;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
sal_Int32 nStartIdx = 0;
|
|
// Same maxcol+1 check reason as above.
|
|
for (i=0; i < nInfoCount && nCol <= rDoc.MaxCol()+1; ++i)
|
|
{
|
|
sal_Int32 nNextIdx = nStartIdx;
|
|
if (i + 1 < nInfoCount)
|
|
CountVisualWidth( aLine, nNextIdx, pColStart[i+1] - pColStart[i] );
|
|
else
|
|
nNextIdx = nLineLen;
|
|
sal_uInt8 nFmt = pColFormat[i];
|
|
if (nFmt != SC_COL_SKIP) // otherwise don't increment nCol either
|
|
{
|
|
if (nCol > rDoc.MaxCol())
|
|
bOverflowCol = true; // display warning on import
|
|
else
|
|
{
|
|
bool bIsQuoted = false;
|
|
aCell = lcl_GetFixed( aLine, nStartIdx, nNextIdx, bIsQuoted, bOverflowCell );
|
|
if (bIsQuoted && bQuotedAsText)
|
|
nFmt = SC_COL_TEXT;
|
|
|
|
bMultiLine |= lcl_PutString(
|
|
aDocImport, !mbOverwriting, nCol, nRow, nTab, aCell, nFmt,
|
|
&aNumFormatter, bDetectNumFormat, bDetectSciNumFormat, bEvaluateFormulas, bSkipEmptyCells,
|
|
aTransliteration, aCalendar,
|
|
pEnglishTransliteration.get(), pEnglishCalendar.get());
|
|
}
|
|
++nCol;
|
|
}
|
|
nStartIdx = nNextIdx;
|
|
}
|
|
}
|
|
}
|
|
else // Search for the separator
|
|
{
|
|
SCCOL nSourceCol = 0;
|
|
sal_uInt16 nInfoStart = 0;
|
|
const sal_Unicode* p = aLine.getStr();
|
|
// tdf#129701 if there is only one column, and user wants to treat empty cells,
|
|
// we need to detect *p = null
|
|
bool bIsLastColEmpty = !(*p) && !bSkipEmptyCells && !bDetermineRange;
|
|
// Yes, the check is nCol<=rDoc.MaxCol()+1, +1 because it is only an
|
|
// overflow if there is really data following to be put behind
|
|
// the last column, which doesn't happen if info is
|
|
// SC_COL_SKIP.
|
|
while ( (*p || bIsLastColEmpty) && nCol <= rDoc.MaxCol()+1)
|
|
{
|
|
bool bIsQuoted = false;
|
|
p = ScImportExport::ScanNextFieldFromString( p, aCell,
|
|
cStr, pSeps, bMerge, bIsQuoted, bOverflowCell, bRemoveSpace );
|
|
|
|
sal_uInt8 nFmt = SC_COL_STANDARD;
|
|
for ( i=nInfoStart; i<nInfoCount; i++ )
|
|
{
|
|
if ( pColStart[i] == nSourceCol + 1 ) // pColStart is 1-based
|
|
{
|
|
nFmt = pColFormat[i];
|
|
nInfoStart = i + 1; // ColInfos are in succession
|
|
break; // for
|
|
}
|
|
}
|
|
if ( nFmt != SC_COL_SKIP )
|
|
{
|
|
if (nCol > rDoc.MaxCol())
|
|
bOverflowCol = true; // display warning on import
|
|
else if (!bDetermineRange)
|
|
{
|
|
if (bIsQuoted && bQuotedAsText)
|
|
nFmt = SC_COL_TEXT;
|
|
|
|
bMultiLine |= lcl_PutString(
|
|
aDocImport, !mbOverwriting, nCol, nRow, nTab, aCell, nFmt,
|
|
&aNumFormatter, bDetectNumFormat, bDetectSciNumFormat, bEvaluateFormulas, bSkipEmptyCells,
|
|
aTransliteration, aCalendar,
|
|
pEnglishTransliteration.get(), pEnglishCalendar.get());
|
|
}
|
|
++nCol;
|
|
if (bIsLastColEmpty)
|
|
{
|
|
bIsLastColEmpty = false; // toggle to stop
|
|
}
|
|
else
|
|
{
|
|
// tdf#129701 detect if there is a last empty column when we need it
|
|
bIsLastColEmpty = (nCol == nLastCol) && !(*p) && !bSkipEmptyCells && !bDetermineRange;
|
|
}
|
|
|
|
}
|
|
++nSourceCol;
|
|
}
|
|
}
|
|
if (nEndCol < nCol)
|
|
nEndCol = nCol; //! points to the next free or even rDoc.MaxCol()+2
|
|
|
|
if (!bDetermineRange)
|
|
{
|
|
if (bMultiLine && !bRangeIsDetermined && pDocSh)
|
|
{ // Adjust just once at the end for a whole range.
|
|
nFirstUpdateRowHeight = std::min( nFirstUpdateRowHeight, nRow );
|
|
nLastUpdateRowHeight = std::max( nLastUpdateRowHeight, nRow );
|
|
}
|
|
xProgress->SetStateOnPercent( rStrm.Tell() - nOldPos );
|
|
}
|
|
++nRow;
|
|
}
|
|
// so far nRow/nEndCol pointed to the next free
|
|
if (nRow > nStartRow)
|
|
--nRow;
|
|
if (nEndCol > nStartCol)
|
|
nEndCol = ::std::min( static_cast<SCCOL>(nEndCol - 1), rDoc.MaxCol());
|
|
|
|
if (bDetermineRange)
|
|
{
|
|
aRange.aEnd.SetCol( nEndCol );
|
|
aRange.aEnd.SetRow( nRow );
|
|
|
|
if ( !mbApi && nStartCol != nEndCol &&
|
|
!rDoc.IsBlockEmpty( nStartCol + 1, nStartRow, nEndCol, nRow, nTab ) )
|
|
{
|
|
ScReplaceWarnBox aBox(ScDocShell::GetActiveDialogParent());
|
|
if (aBox.run() != RET_YES)
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
|
|
rStrm.Seek( nOriginalStreamPos );
|
|
nRow = nStartRow;
|
|
if (!StartPaste())
|
|
{
|
|
EndPaste(false);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
bDetermineRange = !bDetermineRange; // toggle
|
|
} while (!bDetermineRange);
|
|
|
|
if ( !mbOverwriting )
|
|
aDocImport.finalize();
|
|
|
|
xProgress.reset(); // make room for AdjustRowHeight progress
|
|
|
|
if( nFirstUpdateRowHeight < nLastUpdateRowHeight && pDocSh )
|
|
pDocSh->AdjustRowHeight( nFirstUpdateRowHeight, nLastUpdateRowHeight, nTab);
|
|
|
|
if (bRangeIsDetermined)
|
|
EndPaste(false);
|
|
|
|
if (mbImportBroadcast && !mbOverwriting)
|
|
{
|
|
rDoc.BroadcastCells(aRange, SfxHintId::ScDataChanged);
|
|
pDocSh->PostDataChanged();
|
|
}
|
|
return true;
|
|
}
|
|
|
|
void ScImportExport::EmbeddedNullTreatment( OUString & rStr )
|
|
{
|
|
// A nasty workaround for data with embedded NULL characters. As long as we
|
|
// can't handle them properly as cell content (things assume 0-terminated
|
|
// strings at too many places) simply strip all NULL characters from raw
|
|
// data. Excel does the same. See fdo#57841 for sample data.
|
|
|
|
// The normal case is no embedded NULL, check first before de-/allocating
|
|
// ustring stuff.
|
|
sal_Unicode cNull = 0;
|
|
if (sal_Int32 pos = rStr.indexOf(cNull); pos >= 0)
|
|
{
|
|
rStr = rStr.replaceAll(std::u16string_view(&cNull, 1), u"", pos);
|
|
}
|
|
}
|
|
|
|
const sal_Unicode* ScImportExport::ScanNextFieldFromString( const sal_Unicode* p,
|
|
OUString& rField, sal_Unicode cStr, const sal_Unicode* pSeps, bool bMergeSeps, bool& rbIsQuoted,
|
|
bool& rbOverflowCell, bool bRemoveSpace )
|
|
{
|
|
rbIsQuoted = false;
|
|
rField.clear();
|
|
const sal_Unicode cBlank = ' ';
|
|
if (cStr && !ScGlobal::UnicodeStrChr(pSeps, cBlank))
|
|
{
|
|
// Cope with broken generators that put leading blanks before a quoted
|
|
// field, like "field1", "field2", "..."
|
|
// NOTE: this is not in conformance with http://tools.ietf.org/html/rfc4180
|
|
const sal_Unicode* pb = p;
|
|
while (*pb == cBlank)
|
|
++pb;
|
|
if (*pb == cStr)
|
|
p = pb;
|
|
}
|
|
if (cStr && *p == cStr) // String in quotes
|
|
{
|
|
rbIsQuoted = true;
|
|
const sal_Unicode* p1;
|
|
p1 = p = lcl_ScanString( p, rField, pSeps, cStr, DoubledQuoteMode::ESCAPE, rbOverflowCell );
|
|
while (!lcl_isFieldEnd( *p, pSeps))
|
|
p++;
|
|
// Append remaining unquoted and undelimited data (dirty, dirty) to
|
|
// this field.
|
|
if (p > p1)
|
|
{
|
|
const sal_Unicode* ptrim_f = p;
|
|
if ( bRemoveSpace )
|
|
{
|
|
while ( ptrim_f > p1 && ( *(ptrim_f - 1) == cBlank ) )
|
|
--ptrim_f;
|
|
}
|
|
if (!lcl_appendLineData( rField, p1, ptrim_f))
|
|
rbOverflowCell = true;
|
|
}
|
|
if( *p )
|
|
p++;
|
|
}
|
|
else // up to delimiter
|
|
{
|
|
const sal_Unicode* p0 = p;
|
|
while (!lcl_isFieldEnd( *p, pSeps))
|
|
p++;
|
|
const sal_Unicode* ptrim_i = p0;
|
|
const sal_Unicode* ptrim_f = p; // [ptrim_i,ptrim_f) is cell data after trimming
|
|
if ( bRemoveSpace )
|
|
{
|
|
while ( ptrim_i < ptrim_f && *ptrim_i == cBlank )
|
|
++ptrim_i;
|
|
while ( ptrim_f > ptrim_i && ( *(ptrim_f - 1) == cBlank ) )
|
|
--ptrim_f;
|
|
}
|
|
if (!lcl_appendLineData( rField, ptrim_i, ptrim_f))
|
|
rbOverflowCell = true;
|
|
if( *p )
|
|
p++;
|
|
}
|
|
if ( bMergeSeps ) // skip following delimiters
|
|
{
|
|
while (*p && ScGlobal::UnicodeStrChr( pSeps, *p))
|
|
p++;
|
|
}
|
|
return p;
|
|
}
|
|
|
|
namespace {
|
|
|
|
/**
|
|
* Check if a given string has any line break characters or separators.
|
|
*
|
|
* @param rStr string to inspect.
|
|
* @param cSep separator character.
|
|
*/
|
|
bool hasLineBreaksOrSeps( const OUString& rStr, sal_Unicode cSep )
|
|
{
|
|
const sal_Unicode* p = rStr.getStr();
|
|
for (sal_Int32 i = 0, n = rStr.getLength(); i < n; ++i, ++p)
|
|
{
|
|
sal_Unicode c = *p;
|
|
if (c == cSep)
|
|
// separator found.
|
|
return true;
|
|
|
|
switch (c)
|
|
{
|
|
case '\n':
|
|
case '\r':
|
|
// line break found.
|
|
return true;
|
|
default:
|
|
;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
}
|
|
|
|
bool ScImportExport::Doc2Text( SvStream& rStrm )
|
|
{
|
|
SCCOL nCol;
|
|
SCROW nRow;
|
|
SCCOL nStartCol = aRange.aStart.Col();
|
|
SCROW nStartRow = aRange.aStart.Row();
|
|
SCTAB nStartTab = aRange.aStart.Tab();
|
|
SCCOL nEndCol = aRange.aEnd.Col();
|
|
SCROW nEndRow = aRange.aEnd.Row();
|
|
SCTAB nEndTab = aRange.aEnd.Tab();
|
|
|
|
if (!rDoc.GetClipParam().isMultiRange() && nStartTab == nEndTab)
|
|
if (!rDoc.ShrinkToDataArea( nStartTab, nStartCol, nStartRow, nEndCol, nEndRow ))
|
|
return false;
|
|
|
|
OUString aCellStr;
|
|
|
|
bool bConvertLF = (GetSystemLineEnd() != LINEEND_LF);
|
|
|
|
// We need to cache sc::ColumnBlockPosition per each column, tab is always nStartTab.
|
|
std::vector< sc::ColumnBlockPosition > blockPos( nEndCol - nStartCol + 1 );
|
|
for( SCCOL i = nStartCol; i <= nEndCol; ++i )
|
|
rDoc.InitColumnBlockPosition( blockPos[ i - nStartCol ], nStartTab, i );
|
|
for (nRow = nStartRow; nRow <= nEndRow; nRow++)
|
|
{
|
|
if (bIncludeFiltered || !rDoc.RowFiltered( nRow, nStartTab ))
|
|
{
|
|
for (nCol = nStartCol; nCol <= nEndCol; nCol++)
|
|
{
|
|
ScAddress aPos(nCol, nRow, nStartTab);
|
|
sal_uInt32 nNumFmt = rDoc.GetNumberFormat(ScRange(aPos));
|
|
ScInterpreterContext& rContext = rDoc.GetNonThreadedContext();
|
|
|
|
ScRefCellValue aCell(rDoc, aPos, blockPos[ nCol - nStartCol ]);
|
|
switch (aCell.getType())
|
|
{
|
|
case CELLTYPE_FORMULA:
|
|
{
|
|
if (bFormulas)
|
|
{
|
|
aCellStr = aCell.getFormula()->GetFormula();
|
|
if( aCellStr.indexOf( cSep ) != -1 )
|
|
lcl_WriteString( rStrm, aCellStr, cStr, cStr );
|
|
else
|
|
rStrm.WriteUnicodeOrByteText(aCellStr);
|
|
}
|
|
else
|
|
{
|
|
const Color* pColor;
|
|
aCellStr = ScCellFormat::GetString(aCell, nNumFmt, &pColor, &rContext, rDoc);
|
|
|
|
bool bMultiLineText = ( aCellStr.indexOf( '\n' ) != -1 );
|
|
if( bMultiLineText )
|
|
{
|
|
if( mExportTextOptions.meNewlineConversion == ScExportTextOptions::ToSpace )
|
|
aCellStr = aCellStr.replaceAll( "\n", " " );
|
|
else if ( mExportTextOptions.meNewlineConversion == ScExportTextOptions::ToSystem && bConvertLF )
|
|
aCellStr = convertLineEnd(aCellStr, GetSystemLineEnd());
|
|
}
|
|
|
|
if( mExportTextOptions.mcSeparatorConvertTo && cSep )
|
|
aCellStr = aCellStr.replaceAll( OUStringChar(cSep), OUStringChar(mExportTextOptions.mcSeparatorConvertTo) );
|
|
|
|
if( mExportTextOptions.mbAddQuotes && ( aCellStr.indexOf( cSep ) != -1 ) )
|
|
lcl_WriteString( rStrm, aCellStr, cStr, cStr );
|
|
else
|
|
rStrm.WriteUnicodeOrByteText(aCellStr);
|
|
}
|
|
}
|
|
break;
|
|
case CELLTYPE_VALUE:
|
|
{
|
|
const Color* pColor;
|
|
aCellStr = ScCellFormat::GetString(aCell, nNumFmt, &pColor, &rContext, rDoc);
|
|
rStrm.WriteUnicodeOrByteText(aCellStr);
|
|
}
|
|
break;
|
|
case CELLTYPE_NONE:
|
|
break;
|
|
default:
|
|
{
|
|
const Color* pColor;
|
|
aCellStr = ScCellFormat::GetString(aCell, nNumFmt, &pColor, &rContext, rDoc);
|
|
|
|
bool bMultiLineText = ( aCellStr.indexOf( '\n' ) != -1 );
|
|
if( bMultiLineText )
|
|
{
|
|
if( mExportTextOptions.meNewlineConversion == ScExportTextOptions::ToSpace )
|
|
aCellStr = aCellStr.replaceAll( "\n", " " );
|
|
else if ( mExportTextOptions.meNewlineConversion == ScExportTextOptions::ToSystem && bConvertLF )
|
|
aCellStr = convertLineEnd(aCellStr, GetSystemLineEnd());
|
|
}
|
|
|
|
if( mExportTextOptions.mcSeparatorConvertTo && cSep )
|
|
aCellStr = aCellStr.replaceAll( OUStringChar(cSep), OUStringChar(mExportTextOptions.mcSeparatorConvertTo) );
|
|
|
|
if( mExportTextOptions.mbAddQuotes && hasLineBreaksOrSeps(aCellStr, cSep) )
|
|
lcl_WriteString( rStrm, aCellStr, cStr, cStr );
|
|
else
|
|
rStrm.WriteUnicodeOrByteText(aCellStr);
|
|
}
|
|
}
|
|
if( nCol < nEndCol )
|
|
rStrm.WriteUnicodeOrByteText(rtl::OUStringChar(cSep));
|
|
}
|
|
// Do not append a line feed for one single cell.
|
|
// NOTE: this Doc2Text() is only called for clipboard via
|
|
// ScImportExport::ExportStream().
|
|
if (nStartRow != nEndRow || nStartCol != nEndCol)
|
|
endlub(rStrm);
|
|
if( rStrm.GetError() != ERRCODE_NONE )
|
|
break;
|
|
if( nSizeLimit && rStrm.Tell() > nSizeLimit )
|
|
break;
|
|
}
|
|
}
|
|
|
|
return rStrm.GetError() == ERRCODE_NONE;
|
|
}
|
|
|
|
bool ScImportExport::Sylk2Doc( SvStream& rStrm )
|
|
{
|
|
bool bOk = true;
|
|
bool bMyDoc = false;
|
|
SylkVersion eVersion = SylkVersion::OTHER;
|
|
|
|
// US-English separators for StringToDouble
|
|
sal_Unicode const cDecSep = '.';
|
|
sal_Unicode const cGrpSep = ',';
|
|
|
|
SCCOL nStartCol = aRange.aStart.Col();
|
|
SCROW nStartRow = aRange.aStart.Row();
|
|
SCCOL nEndCol = aRange.aEnd.Col();
|
|
SCROW nEndRow = aRange.aEnd.Row();
|
|
sal_uInt64 nOldPos = rStrm.Tell();
|
|
bool bData = !bSingle;
|
|
::std::vector< sal_uInt32 > aFormats;
|
|
|
|
if( !bSingle)
|
|
bOk = StartPaste();
|
|
|
|
while( bOk )
|
|
{
|
|
OUString aLine;
|
|
OUString aText;
|
|
OStringBuffer aByteLine;
|
|
SCCOL nCol = nStartCol;
|
|
SCROW nRow = nStartRow;
|
|
SCCOL nRefCol = nCol;
|
|
SCROW nRefRow = nRow;
|
|
rStrm.Seek( nOldPos );
|
|
for( ;; )
|
|
{
|
|
//! allow unicode
|
|
(void)rStrm.ReadLine( aByteLine );
|
|
aLine = OStringToOUString(aByteLine, rStrm.GetStreamCharSet());
|
|
if( rStrm.eof() )
|
|
break;
|
|
bool bInvalidCol = false;
|
|
bool bInvalidRow = false;
|
|
const sal_Unicode* p = aLine.getStr();
|
|
sal_Unicode cTag = *p++;
|
|
if( cTag == 'C' ) // Content
|
|
{
|
|
if( *p++ != ';' )
|
|
return false;
|
|
|
|
bool bInvalidRefCol = false;
|
|
bool bInvalidRefRow = false;
|
|
while( *p )
|
|
{
|
|
sal_Unicode ch = *p++;
|
|
ch = ScGlobal::ToUpperAlpha( ch );
|
|
switch( ch )
|
|
{
|
|
case 'X':
|
|
{
|
|
bInvalidCol = false;
|
|
bool bFail = o3tl::checked_add<SCCOL>(o3tl::toInt32(std::u16string_view(p)), nStartCol - 1, nCol);
|
|
if (bFail || nCol < 0 || rDoc.MaxCol() < nCol)
|
|
{
|
|
SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;X invalid nCol=" << nCol);
|
|
nCol = std::clamp<SCCOL>(nCol, 0, rDoc.MaxCol());
|
|
bInvalidCol = bOverflowCol = true;
|
|
}
|
|
break;
|
|
}
|
|
case 'Y':
|
|
{
|
|
bInvalidRow = false;
|
|
bool bFail = o3tl::checked_add(o3tl::toInt32(std::u16string_view(p)), nStartRow - 1, nRow);
|
|
if (bFail || nRow < 0 || nMaxImportRow < nRow)
|
|
{
|
|
SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;Y invalid nRow=" << nRow);
|
|
nRow = std::clamp<SCROW>(nRow, 0, nMaxImportRow);
|
|
bInvalidRow = bOverflowRow = true;
|
|
}
|
|
break;
|
|
}
|
|
case 'C':
|
|
{
|
|
bInvalidRefCol = false;
|
|
bool bFail = o3tl::checked_add<SCCOL>(o3tl::toInt32(std::u16string_view(p)), nStartCol - 1, nRefCol);
|
|
if (bFail || nRefCol < 0 || rDoc.MaxCol() < nRefCol)
|
|
{
|
|
SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;C invalid nRefCol=" << nRefCol);
|
|
nRefCol = std::clamp<SCCOL>(nRefCol, 0, rDoc.MaxCol());
|
|
bInvalidRefCol = bOverflowCol = true;
|
|
}
|
|
break;
|
|
}
|
|
case 'R':
|
|
{
|
|
bInvalidRefRow = false;
|
|
bool bFail = o3tl::checked_add(o3tl::toInt32(std::u16string_view(p)), nStartRow - 1, nRefRow);
|
|
if (bFail || nRefRow < 0 || nMaxImportRow < nRefRow)
|
|
{
|
|
SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;R invalid nRefRow=" << nRefRow);
|
|
nRefRow = std::clamp<SCROW>(nRefRow, 0, nMaxImportRow);
|
|
bInvalidRefRow = bOverflowRow = true;
|
|
}
|
|
break;
|
|
}
|
|
case 'K':
|
|
{
|
|
if( !bSingle &&
|
|
( nCol < nStartCol || nCol > nEndCol
|
|
|| nRow < nStartRow || nRow > nEndRow
|
|
|| nCol > rDoc.MaxCol() || nRow > nMaxImportRow
|
|
|| bInvalidCol || bInvalidRow ) )
|
|
break;
|
|
if( !bData )
|
|
{
|
|
if( nRow > nEndRow )
|
|
nEndRow = nRow;
|
|
if( nCol > nEndCol )
|
|
nEndCol = nCol;
|
|
break;
|
|
}
|
|
bool bText;
|
|
if( *p == '"' )
|
|
{
|
|
bText = true;
|
|
aText.clear();
|
|
p = lcl_ScanSylkString( p, aText, eVersion);
|
|
}
|
|
else
|
|
bText = false;
|
|
const sal_Unicode* q = p;
|
|
while( *q && *q != ';' )
|
|
q++;
|
|
if ( (*q != ';' || *(q+1) != 'I') && !bInvalidCol && !bInvalidRow )
|
|
{ // don't ignore value
|
|
if( bText )
|
|
{
|
|
rDoc.EnsureTable(aRange.aStart.Tab());
|
|
rDoc.SetTextCell(
|
|
ScAddress(nCol, nRow, aRange.aStart.Tab()), aText);
|
|
}
|
|
else
|
|
{
|
|
double fVal = rtl_math_uStringToDouble( p,
|
|
aLine.getStr() + aLine.getLength(),
|
|
cDecSep, cGrpSep, nullptr, nullptr );
|
|
rDoc.SetValue( nCol, nRow, aRange.aStart.Tab(), fVal );
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
case 'E':
|
|
case 'M':
|
|
{
|
|
if ( ch == 'M' )
|
|
{
|
|
if ( nRefCol < nCol )
|
|
nRefCol = nCol;
|
|
if ( nRefRow < nRow )
|
|
nRefRow = nRow;
|
|
if ( !bData )
|
|
{
|
|
if( nRefRow > nEndRow )
|
|
nEndRow = nRefRow;
|
|
if( nRefCol > nEndCol )
|
|
nEndCol = nRefCol;
|
|
}
|
|
}
|
|
if( !bMyDoc || !bData )
|
|
break;
|
|
aText = "=";
|
|
p = lcl_ScanSylkFormula( p, aText, eVersion);
|
|
|
|
if (bInvalidCol || bInvalidRow || (ch == 'M' && (bInvalidRefCol || bInvalidRefRow)))
|
|
break;
|
|
|
|
ScAddress aPos( nCol, nRow, aRange.aStart.Tab() );
|
|
/* FIXME: do we want GRAM_ODFF_A1 instead? At the
|
|
* end it probably should be GRAM_ODFF_R1C1, since
|
|
* R1C1 is what Excel writes in SYLK, or even
|
|
* better GRAM_ENGLISH_XL_R1C1. */
|
|
const formula::FormulaGrammar::Grammar eGrammar = formula::FormulaGrammar::GRAM_PODF_A1;
|
|
ScCompiler aComp(rDoc, aPos, eGrammar);
|
|
std::unique_ptr<ScTokenArray> xCode(aComp.CompileString(aText)); // ctor/InsertMatrixFormula did copy TokenArray
|
|
rDoc.CheckLinkFormulaNeedingCheck(*xCode);
|
|
if ( ch == 'M' )
|
|
{
|
|
ScMarkData aMark(rDoc.GetSheetLimits());
|
|
aMark.SelectTable( aPos.Tab(), true );
|
|
rDoc.InsertMatrixFormula( nCol, nRow, nRefCol,
|
|
nRefRow, aMark, OUString(), xCode.get() );
|
|
}
|
|
else
|
|
{
|
|
ScFormulaCell* pFCell = new ScFormulaCell(
|
|
rDoc, aPos, *xCode, eGrammar, ScMatrixMode::NONE);
|
|
rDoc.SetFormulaCell(aPos, pFCell);
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
while( *p && *p != ';' )
|
|
p++;
|
|
if( *p )
|
|
p++;
|
|
}
|
|
}
|
|
else if( cTag == 'F' ) // Format
|
|
{
|
|
if( *p++ != ';' )
|
|
return false;
|
|
sal_Int32 nFormat = -1;
|
|
while( *p )
|
|
{
|
|
sal_Unicode ch = *p++;
|
|
ch = ScGlobal::ToUpperAlpha( ch );
|
|
switch( ch )
|
|
{
|
|
case 'X':
|
|
{
|
|
bInvalidCol = false;
|
|
bool bFail = o3tl::checked_add<SCCOL>(o3tl::toInt32(std::u16string_view(p)), nStartCol - 1, nCol);
|
|
if (bFail || nCol < 0 || rDoc.MaxCol() < nCol)
|
|
{
|
|
SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;X invalid nCol=" << nCol);
|
|
nCol = std::clamp<SCCOL>(nCol, 0, rDoc.MaxCol());
|
|
bInvalidCol = bOverflowCol = true;
|
|
}
|
|
break;
|
|
}
|
|
case 'Y':
|
|
{
|
|
bInvalidRow = false;
|
|
bool bFail = o3tl::checked_add(o3tl::toInt32(std::u16string_view(p)), nStartRow - 1, nRow);
|
|
if (bFail || nRow < 0 || nMaxImportRow < nRow)
|
|
{
|
|
SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;Y invalid nRow=" << nRow);
|
|
nRow = std::clamp<SCROW>(nRow, 0, nMaxImportRow);
|
|
bInvalidRow = bOverflowRow = true;
|
|
}
|
|
break;
|
|
}
|
|
case 'P' :
|
|
if ( bData )
|
|
{
|
|
// F;P<n> sets format code of P;P<code> at
|
|
// current position, or at ;X;Y if specified.
|
|
// Note that ;X;Y may appear after ;P
|
|
const sal_Unicode* p0 = p;
|
|
while( *p && *p != ';' )
|
|
p++;
|
|
OUString aNumber(p0, p - p0);
|
|
nFormat = aNumber.toInt32();
|
|
}
|
|
break;
|
|
}
|
|
while( *p && *p != ';' )
|
|
p++;
|
|
if( *p )
|
|
p++;
|
|
}
|
|
if ( !bData )
|
|
{
|
|
if( nRow > nEndRow )
|
|
nEndRow = nRow;
|
|
if( nCol > nEndCol )
|
|
nEndCol = nCol;
|
|
}
|
|
if ( 0 <= nFormat && o3tl::make_unsigned(nFormat) < aFormats.size() && !bInvalidCol && !bInvalidRow )
|
|
{
|
|
sal_uInt32 nKey = aFormats[nFormat];
|
|
rDoc.ApplyAttr( nCol, nRow, aRange.aStart.Tab(),
|
|
SfxUInt32Item( ATTR_VALUE_FORMAT, nKey ) );
|
|
}
|
|
}
|
|
else if( cTag == 'P' )
|
|
{
|
|
if ( bData && *p == ';' && *(p+1) == 'P' )
|
|
{
|
|
OUString aCode( p+2 );
|
|
|
|
sal_uInt32 nKey;
|
|
sal_Int32 nCheckPos;
|
|
|
|
if (aCode.getLength() > 2048 && comphelper::IsFuzzing())
|
|
{
|
|
// consider an excessive length as a failure when fuzzing
|
|
nCheckPos = 1;
|
|
}
|
|
else
|
|
{
|
|
// unescape doubled semicolons
|
|
aCode = aCode.replaceAll(";;", ";");
|
|
// get rid of Xcl escape characters
|
|
aCode = aCode.replaceAll("\x1b", "");
|
|
SvNumFormatType nType;
|
|
rDoc.GetFormatTable()->PutandConvertEntry( aCode, nCheckPos, nType, nKey,
|
|
LANGUAGE_ENGLISH_US, ScGlobal::eLnge, false);
|
|
}
|
|
|
|
if ( nCheckPos )
|
|
nKey = 0;
|
|
|
|
aFormats.push_back( nKey );
|
|
}
|
|
}
|
|
else if (cTag == 'I' && *p == 'D' && aLine.getLength() > 4)
|
|
{
|
|
aLine = aLine.copy(4);
|
|
if (aLine == "CALCOOO32")
|
|
eVersion = SylkVersion::OOO32;
|
|
else if (aLine == "SCALC3")
|
|
eVersion = SylkVersion::SCALC3;
|
|
bMyDoc = (eVersion <= SylkVersion::OWN);
|
|
}
|
|
else if( cTag == 'E' ) // End
|
|
break;
|
|
}
|
|
if( !bData )
|
|
{
|
|
aRange.aEnd.SetCol( nEndCol );
|
|
aRange.aEnd.SetRow( nEndRow );
|
|
bOk = StartPaste();
|
|
bData = true;
|
|
}
|
|
else
|
|
break;
|
|
}
|
|
|
|
EndPaste();
|
|
return bOk;
|
|
}
|
|
|
|
bool ScImportExport::Doc2Sylk( SvStream& rStrm )
|
|
{
|
|
SCCOL nCol;
|
|
SCROW nRow;
|
|
SCCOL nStartCol = aRange.aStart.Col();
|
|
SCROW nStartRow = aRange.aStart.Row();
|
|
SCCOL nEndCol = aRange.aEnd.Col();
|
|
SCROW nEndRow = aRange.aEnd.Row();
|
|
OUString aCellStr;
|
|
OUString aValStr;
|
|
rStrm.WriteUnicodeOrByteText(u"ID;PCALCOOO32");
|
|
endlub(rStrm);
|
|
|
|
for (nRow = nStartRow; nRow <= nEndRow; nRow++)
|
|
{
|
|
for (nCol = nStartCol; nCol <= nEndCol; nCol++)
|
|
{
|
|
OUString aBufStr;
|
|
double nVal;
|
|
bool bForm = false;
|
|
SCROW r = nRow - nStartRow + 1;
|
|
SCCOL c = nCol - nStartCol + 1;
|
|
ScRefCellValue aCell(rDoc, ScAddress(nCol, nRow, aRange.aStart.Tab()));
|
|
CellType eType = aCell.getType();
|
|
switch( eType )
|
|
{
|
|
case CELLTYPE_FORMULA:
|
|
bForm = bFormulas;
|
|
if( rDoc.HasValueData( nCol, nRow, aRange.aStart.Tab()) )
|
|
goto hasvalue;
|
|
else
|
|
goto hasstring;
|
|
|
|
case CELLTYPE_VALUE:
|
|
hasvalue:
|
|
nVal = rDoc.GetValue( nCol, nRow, aRange.aStart.Tab() );
|
|
|
|
aValStr = ::rtl::math::doubleToUString( nVal,
|
|
rtl_math_StringFormat_Automatic,
|
|
rtl_math_DecimalPlaces_Max, '.', true );
|
|
|
|
aBufStr = "C;X"
|
|
+ OUString::number( c )
|
|
+ ";Y"
|
|
+ OUString::number( r )
|
|
+ ";K"
|
|
+ aValStr;
|
|
rStrm.WriteUnicodeOrByteText(aBufStr);
|
|
goto checkformula;
|
|
|
|
case CELLTYPE_STRING:
|
|
case CELLTYPE_EDIT:
|
|
hasstring:
|
|
aCellStr = rDoc.GetString(nCol, nRow, aRange.aStart.Tab());
|
|
aCellStr = aCellStr.replaceAll("\n", SYLK_LF);
|
|
|
|
aBufStr = "C;X"
|
|
+ OUString::number( c )
|
|
+ ";Y"
|
|
+ OUString::number( r )
|
|
+ ";K";
|
|
rStrm.WriteUnicodeOrByteText(aBufStr);
|
|
lcl_WriteString( rStrm, aCellStr, '"', ';' );
|
|
|
|
checkformula:
|
|
if( bForm )
|
|
{
|
|
const ScFormulaCell* pFCell = aCell.getFormula();
|
|
switch ( pFCell->GetMatrixFlag() )
|
|
{
|
|
case ScMatrixMode::Reference :
|
|
aCellStr.clear();
|
|
break;
|
|
default:
|
|
aCellStr = pFCell->GetFormula( formula::FormulaGrammar::GRAM_PODF_A1);
|
|
/* FIXME: do we want GRAM_ODFF_A1 instead? At
|
|
* the end it probably should be
|
|
* GRAM_ODFF_R1C1, since R1C1 is what Excel
|
|
* writes in SYLK, or even better
|
|
* GRAM_ENGLISH_XL_R1C1. */
|
|
}
|
|
if ( pFCell->GetMatrixFlag() != ScMatrixMode::NONE &&
|
|
aCellStr.startsWith("{") &&
|
|
aCellStr.endsWith("}") )
|
|
{ // cut off matrix {} characters
|
|
aCellStr = aCellStr.copy(1, aCellStr.getLength()-2);
|
|
}
|
|
if ( aCellStr[0] == '=' )
|
|
aCellStr = aCellStr.copy(1);
|
|
OUString aPrefix;
|
|
switch ( pFCell->GetMatrixFlag() )
|
|
{
|
|
case ScMatrixMode::Formula :
|
|
{ // diff expression with 'M' M$-extension
|
|
SCCOL nC;
|
|
SCROW nR;
|
|
pFCell->GetMatColsRows( nC, nR );
|
|
nC += c - 1;
|
|
nR += r - 1;
|
|
aPrefix = ";R"
|
|
+ OUString::number( nR )
|
|
+ ";C"
|
|
+ OUString::number( nC )
|
|
+ ";M";
|
|
}
|
|
break;
|
|
case ScMatrixMode::Reference :
|
|
{ // diff expression with 'I' M$-extension
|
|
ScAddress aPos;
|
|
(void)pFCell->GetMatrixOrigin( rDoc, aPos );
|
|
aPrefix = ";I;R"
|
|
+ OUString::number( aPos.Row() - nStartRow + 1 )
|
|
+ ";C"
|
|
+ OUString::number( aPos.Col() - nStartCol + 1 );
|
|
}
|
|
break;
|
|
default:
|
|
// formula Expression
|
|
aPrefix = ";E";
|
|
}
|
|
rStrm.WriteUnicodeOrByteText(aPrefix);
|
|
if ( !aCellStr.isEmpty() )
|
|
lcl_WriteString( rStrm, aCellStr, 0, ';' );
|
|
}
|
|
endlub(rStrm);
|
|
break;
|
|
|
|
default:
|
|
{
|
|
// added to avoid warnings
|
|
}
|
|
}
|
|
}
|
|
}
|
|
rStrm.WriteUnicodeOrByteText(u"E");
|
|
endlub(rStrm);
|
|
return rStrm.GetError() == ERRCODE_NONE;
|
|
}
|
|
|
|
bool ScImportExport::Doc2HTML( SvStream& rStream, const OUString& rBaseURL )
|
|
{
|
|
std::optional<SvFileStream> oStream;
|
|
char* pEnv = getenv("SC_DEBUG_HTML_COPY_TO");
|
|
if (pEnv)
|
|
{
|
|
OUString aURL;
|
|
osl::FileBase::getFileURLFromSystemPath(OUString::fromUtf8(pEnv), aURL);
|
|
oStream.emplace(aURL, StreamMode::WRITE);
|
|
}
|
|
SvStream& rStrm = pEnv ? *oStream : rStream;
|
|
// rtl_TextEncoding is ignored in ScExportHTML, read from Load/Save HTML options
|
|
ScFormatFilter::Get().ScExportHTML( rStrm, rBaseURL, &rDoc, aRange, RTL_TEXTENCODING_DONTKNOW, bAll,
|
|
aStreamPath, aNonConvertibleChars, maFilterOptions );
|
|
return rStrm.GetError() == ERRCODE_NONE;
|
|
}
|
|
|
|
bool ScImportExport::Doc2RTF( SvStream& rStrm )
|
|
{
|
|
// rtl_TextEncoding is ignored in ScExportRTF
|
|
ScFormatFilter::Get().ScExportRTF( rStrm, &rDoc, aRange, RTL_TEXTENCODING_DONTKNOW );
|
|
return rStrm.GetError() == ERRCODE_NONE;
|
|
}
|
|
|
|
bool ScImportExport::Doc2Dif( SvStream& rStrm )
|
|
{
|
|
// for DIF in the clipboard, IBM_850 is always used
|
|
ScFormatFilter::Get().ScExportDif( rStrm, &rDoc, aRange, RTL_TEXTENCODING_IBM_850 );
|
|
return true;
|
|
}
|
|
|
|
bool ScImportExport::Dif2Doc( SvStream& rStrm )
|
|
{
|
|
SCTAB nTab = aRange.aStart.Tab();
|
|
ScDocumentUniquePtr pImportDoc( new ScDocument( SCDOCMODE_UNDO ) );
|
|
pImportDoc->InitUndo( rDoc, nTab, nTab );
|
|
|
|
// for DIF in the clipboard, IBM_850 is always used
|
|
ScFormatFilter::Get().ScImportDif( rStrm, pImportDoc.get(), aRange.aStart, RTL_TEXTENCODING_IBM_850 );
|
|
|
|
SCCOL nEndCol;
|
|
SCROW nEndRow;
|
|
pImportDoc->GetCellArea( nTab, nEndCol, nEndRow );
|
|
// if there are no cells in the imported content, nEndCol/nEndRow may be before the start
|
|
if ( nEndCol < aRange.aStart.Col() )
|
|
nEndCol = aRange.aStart.Col();
|
|
if ( nEndRow < aRange.aStart.Row() )
|
|
nEndRow = aRange.aStart.Row();
|
|
aRange.aEnd = ScAddress( nEndCol, nEndRow, nTab );
|
|
|
|
bool bOk = StartPaste();
|
|
if (bOk)
|
|
{
|
|
InsertDeleteFlags nFlags = InsertDeleteFlags::ALL & ~InsertDeleteFlags::STYLES;
|
|
rDoc.DeleteAreaTab( aRange, nFlags );
|
|
pImportDoc->CopyToDocument(aRange, nFlags, false, rDoc);
|
|
EndPaste();
|
|
}
|
|
|
|
return bOk;
|
|
}
|
|
|
|
bool ScImportExport::RTF2Doc( SvStream& rStrm, const OUString& rBaseURL )
|
|
{
|
|
std::unique_ptr<ScEEAbsImport> pImp = ScFormatFilter::Get().CreateRTFImport( &rDoc, aRange );
|
|
if (!pImp)
|
|
return false;
|
|
pImp->Read( rStrm, rBaseURL );
|
|
aRange = pImp->GetRange();
|
|
|
|
bool bOk = StartPaste();
|
|
if (bOk)
|
|
{
|
|
InsertDeleteFlags const nFlags = InsertDeleteFlags::ALL & ~InsertDeleteFlags::STYLES;
|
|
rDoc.DeleteAreaTab( aRange, nFlags );
|
|
pImp->WriteToDocument();
|
|
EndPaste();
|
|
}
|
|
return bOk;
|
|
}
|
|
|
|
bool ScImportExport::HTML2Doc( SvStream& rStrm, const OUString& rBaseURL )
|
|
{
|
|
std::unique_ptr<ScEEAbsImport> pImp = ScFormatFilter::Get().CreateHTMLImport( &rDoc, rBaseURL, aRange);
|
|
if (!pImp)
|
|
return false;
|
|
|
|
// If this is set, read from this file, instead of the real clipboard during paste.
|
|
char* pEnv = getenv("SC_DEBUG_HTML_PASTE_FROM");
|
|
if (pEnv)
|
|
{
|
|
OUString aURL;
|
|
osl::FileBase::getFileURLFromSystemPath(OUString::fromUtf8(pEnv), aURL);
|
|
SvFileStream aStream(aURL, StreamMode::READ);
|
|
pImp->Read( aStream, rBaseURL );
|
|
}
|
|
else
|
|
{
|
|
pImp->Read( rStrm, rBaseURL );
|
|
}
|
|
|
|
aRange = pImp->GetRange();
|
|
|
|
bool bOk = StartPaste();
|
|
if (bOk)
|
|
{
|
|
// ScHTMLImport may call ScDocument::InitDrawLayer, resulting in
|
|
// a Draw Layer but no Draw View -> create Draw Layer and View here
|
|
if (pDocSh)
|
|
pDocSh->MakeDrawLayer();
|
|
|
|
InsertDeleteFlags const nFlags = InsertDeleteFlags::ALL & ~InsertDeleteFlags::STYLES;
|
|
rDoc.DeleteAreaTab( aRange, nFlags );
|
|
|
|
if (pExtOptions)
|
|
{
|
|
// Pick up import options if available.
|
|
LanguageType eLang = pExtOptions->GetLanguage();
|
|
SvNumberFormatter aNumFormatter( comphelper::getProcessComponentContext(), eLang);
|
|
bool bSpecialNumber = pExtOptions->IsDetectSpecialNumber();
|
|
bool bScientificNumber = pExtOptions->IsDetectScientificNumber();
|
|
pImp->WriteToDocument(false, 1.0, &aNumFormatter, bSpecialNumber, bScientificNumber);
|
|
}
|
|
else
|
|
// Regular import, with no options.
|
|
pImp->WriteToDocument();
|
|
|
|
EndPaste();
|
|
}
|
|
return bOk;
|
|
}
|
|
|
|
#ifndef DISABLE_DYNLOADING
|
|
|
|
extern "C" { static void thisModule() {} }
|
|
|
|
#else
|
|
|
|
extern "C" {
|
|
ScFormatFilterPlugin* ScFilterCreate();
|
|
}
|
|
|
|
#endif
|
|
|
|
typedef ScFormatFilterPlugin * (*FilterFn)();
|
|
ScFormatFilterPlugin &ScFormatFilter::Get()
|
|
{
|
|
static ScFormatFilterPlugin *plugin = []()
|
|
{
|
|
#ifndef DISABLE_DYNLOADING
|
|
OUString sFilterLib(SVLIBRARY("scfilt"));
|
|
static ::osl::Module aModule;
|
|
bool bLoaded = aModule.is();
|
|
if (!bLoaded)
|
|
bLoaded = aModule.loadRelative(&thisModule, sFilterLib);
|
|
if (!bLoaded)
|
|
bLoaded = aModule.load(sFilterLib);
|
|
if (bLoaded)
|
|
{
|
|
oslGenericFunction fn = aModule.getFunctionSymbol( "ScFilterCreate" );
|
|
if (fn != nullptr)
|
|
return reinterpret_cast<FilterFn>(fn)();
|
|
}
|
|
assert(false);
|
|
return static_cast<ScFormatFilterPlugin*>(nullptr);
|
|
#else
|
|
return ScFilterCreate();
|
|
#endif
|
|
}();
|
|
|
|
return *plugin;
|
|
}
|
|
|
|
// Precondition: pStr is guaranteed to be non-NULL and points to a 0-terminated
|
|
// array.
|
|
static const sal_Unicode* lcl_UnicodeStrChr( const sal_Unicode* pStr,
|
|
sal_Unicode c )
|
|
{
|
|
while (*pStr)
|
|
{
|
|
if (*pStr == c)
|
|
return pStr;
|
|
++pStr;
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
ScImportStringStream::ScImportStringStream( const OUString& rStr )
|
|
: SvMemoryStream( const_cast<sal_Unicode *>(rStr.getStr()),
|
|
rStr.getLength() * sizeof(sal_Unicode), StreamMode::READ)
|
|
{
|
|
SetStreamCharSet( RTL_TEXTENCODING_UNICODE );
|
|
#ifdef OSL_BIGENDIAN
|
|
SetEndian(SvStreamEndian::BIG);
|
|
#else
|
|
SetEndian(SvStreamEndian::LITTLE);
|
|
#endif
|
|
}
|
|
|
|
OUString ReadCsvLine( SvStream &rStream, bool bEmbeddedLineBreak,
|
|
OUString& rFieldSeparators, sal_Unicode cFieldQuote, sal_Unicode& rcDetectSep, sal_uInt32 nMaxSourceLines )
|
|
{
|
|
enum RetryState
|
|
{
|
|
FORBID,
|
|
ALLOW,
|
|
RETRY,
|
|
RETRIED
|
|
} eRetryState = (bEmbeddedLineBreak && rcDetectSep == 0 ? RetryState::ALLOW : RetryState::FORBID);
|
|
|
|
sal_uInt64 nStreamPos = (eRetryState == RetryState::ALLOW ? rStream.Tell() : 0);
|
|
|
|
Label_RetryWithNewSep:
|
|
|
|
if (eRetryState == RetryState::RETRY)
|
|
{
|
|
eRetryState = RetryState::RETRIED;
|
|
rStream.Seek( nStreamPos);
|
|
}
|
|
|
|
OUString aStr;
|
|
rStream.ReadUniOrByteStringLine(aStr, rStream.GetStreamCharSet(), nArbitraryLineLengthLimit);
|
|
|
|
if (bEmbeddedLineBreak)
|
|
{
|
|
sal_Int32 nFirstLineLength = aStr.getLength();
|
|
sal_uInt64 nFirstLineStreamPos = rStream.Tell();
|
|
sal_uInt32 nLine = 0;
|
|
|
|
const sal_Unicode* pSeps = rFieldSeparators.getStr();
|
|
|
|
QuoteType eQuoteState = FIELDEND_QUOTE;
|
|
bool bFieldStart = true;
|
|
|
|
sal_Int32 nLastOffset = 0;
|
|
sal_Int32 nQuotes = 0;
|
|
while (!rStream.eof() && aStr.getLength() < nArbitraryLineLengthLimit)
|
|
{
|
|
const sal_Unicode * p = aStr.getStr() + nLastOffset;
|
|
const sal_Unicode * const pStop = aStr.getStr() + aStr.getLength();
|
|
while (p < pStop)
|
|
{
|
|
if (!*p)
|
|
{
|
|
// Skip embedded null-characters. They don't change
|
|
// anything and are handled at a higher level.
|
|
++p;
|
|
continue;
|
|
}
|
|
|
|
if (nQuotes)
|
|
{
|
|
if (*p == cFieldQuote)
|
|
{
|
|
if (bFieldStart)
|
|
{
|
|
++nQuotes;
|
|
bFieldStart = false;
|
|
eQuoteState = FIELDSTART_QUOTE;
|
|
nFirstLineLength = aStr.getLength();
|
|
nFirstLineStreamPos = rStream.Tell();
|
|
}
|
|
// Do not detect a FIELDSTART_QUOTE if not in
|
|
// bFieldStart mode, in which case for unquoted content
|
|
// we are in FIELDEND_QUOTE state.
|
|
else if (eQuoteState != FIELDEND_QUOTE)
|
|
{
|
|
eQuoteState = lcl_isEscapedOrFieldEndQuote( nQuotes, p, pSeps, cFieldQuote, rcDetectSep);
|
|
|
|
if (eRetryState == RetryState::ALLOW && rcDetectSep)
|
|
{
|
|
eRetryState = RetryState::RETRY;
|
|
rFieldSeparators += OUStringChar(rcDetectSep);
|
|
pSeps = rFieldSeparators.getStr();
|
|
goto Label_RetryWithNewSep;
|
|
}
|
|
|
|
// DONTKNOW_QUOTE is an embedded unescaped quote we
|
|
// don't count for pairing.
|
|
if (eQuoteState != DONTKNOW_QUOTE)
|
|
++nQuotes;
|
|
}
|
|
}
|
|
else if (eQuoteState == FIELDEND_QUOTE)
|
|
{
|
|
if (bFieldStart)
|
|
// If blank is a separator it starts a field, if it
|
|
// is not and thus maybe leading before quote we
|
|
// are still at start of field regarding quotes.
|
|
bFieldStart = (*p == ' ' || lcl_UnicodeStrChr( pSeps, *p) != nullptr);
|
|
else
|
|
bFieldStart = (lcl_UnicodeStrChr( pSeps, *p) != nullptr);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (*p == cFieldQuote && bFieldStart)
|
|
{
|
|
nQuotes = 1;
|
|
eQuoteState = FIELDSTART_QUOTE;
|
|
bFieldStart = false;
|
|
nFirstLineLength = aStr.getLength();
|
|
nFirstLineStreamPos = rStream.Tell();
|
|
}
|
|
else if (eQuoteState == FIELDEND_QUOTE)
|
|
{
|
|
// This also skips leading blanks at beginning of line
|
|
// if followed by a quote. It's debatable whether we
|
|
// actually want that or not, but congruent with what
|
|
// ScanNextFieldFromString() does.
|
|
if (bFieldStart)
|
|
bFieldStart = (*p == ' ' || lcl_UnicodeStrChr( pSeps, *p) != nullptr);
|
|
else
|
|
bFieldStart = (lcl_UnicodeStrChr( pSeps, *p) != nullptr);
|
|
}
|
|
}
|
|
// A quote character inside a field content does not start
|
|
// a quote.
|
|
++p;
|
|
}
|
|
|
|
if ((nQuotes & 1) == 0)
|
|
// We still have a (theoretical?) problem here if due to
|
|
// nArbitraryLineLengthLimit (or nMaxSourceLines below) we
|
|
// split a string right between a doubled quote pair.
|
|
break;
|
|
else if (eQuoteState == DONTKNOW_QUOTE)
|
|
// A single unescaped quote somewhere in a quote started
|
|
// field, most likely that was not meant to have embedded
|
|
// linefeeds either.
|
|
break;
|
|
else if (++nLine >= nMaxSourceLines && nMaxSourceLines > 0)
|
|
// Unconditionally increment nLine even if nMaxSourceLines==0
|
|
// so it can be observed in debugger.
|
|
break;
|
|
else
|
|
{
|
|
nLastOffset = aStr.getLength();
|
|
OUString aNext;
|
|
rStream.ReadUniOrByteStringLine(aNext, rStream.GetStreamCharSet(), nArbitraryLineLengthLimit);
|
|
if (!rStream.eof())
|
|
aStr += "\n" + aNext;
|
|
}
|
|
}
|
|
if (nQuotes & 1)
|
|
{
|
|
// No closing quote at all. A single quote at field start => no
|
|
// embedded linefeeds for that field, take only first logical line.
|
|
aStr = aStr.copy( 0, nFirstLineLength);
|
|
rStream.Seek( nFirstLineStreamPos);
|
|
}
|
|
}
|
|
return aStr;
|
|
}
|
|
|
|
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|