242 lines
11 KiB
C++
242 lines
11 KiB
C++
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
|
|
/*
|
|
* This file is part of the LibreOffice project.
|
|
*
|
|
* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
*
|
|
* This file incorporates work covered by the following license notice:
|
|
*
|
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
* contributor license agreements. See the NOTICE file distributed
|
|
* with this work for additional information regarding copyright
|
|
* ownership. The ASF licenses this file to you under the Apache
|
|
* License, Version 2.0 (the "License"); you may not use this file
|
|
* except in compliance with the License. You may obtain a copy of
|
|
* the License at http://www.apache.org/licenses/LICENSE-2.0 .
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include <o3tl/deleter.hxx>
|
|
#include <sot/formats.hxx>
|
|
#include <address.hxx>
|
|
#include <tools/stream.hxx>
|
|
|
|
#include <com/sun/star/uno/Any.hxx>
|
|
|
|
class ScDocShell;
|
|
class ScDocument;
|
|
class ScAsciiOptions;
|
|
|
|
/**
|
|
* These options control how multi-line cells are converted during export in
|
|
* certain lossy formats (such as csv).
|
|
*/
|
|
struct ScExportTextOptions
|
|
{
|
|
enum NewlineConversion { ToSystem, ToSpace, None };
|
|
ScExportTextOptions( NewlineConversion eNewlineConversion = ToSystem, sal_Unicode cSeparatorConvertTo = 0, bool bAddQuotes = false ) :
|
|
meNewlineConversion( eNewlineConversion ), mcSeparatorConvertTo( cSeparatorConvertTo ), mbAddQuotes( bAddQuotes ) {}
|
|
|
|
NewlineConversion meNewlineConversion;
|
|
sal_Unicode mcSeparatorConvertTo; // Convert separator to this character
|
|
bool mbAddQuotes;
|
|
};
|
|
|
|
class SAL_DLLPUBLIC_RTTI ScImportExport
|
|
{
|
|
ScDocShell* pDocSh;
|
|
ScDocument& rDoc;
|
|
std::unique_ptr<ScDocument, o3tl::default_delete<ScDocument>> pUndoDoc;
|
|
ScRange aRange;
|
|
OUString aStreamPath;
|
|
OUString aNonConvertibleChars;
|
|
OUString maFilterOptions;
|
|
sal_uInt32 nSizeLimit;
|
|
SCROW nMaxImportRow;
|
|
sal_Unicode cSep; // Separator
|
|
sal_Unicode cStr; // String Delimiter
|
|
bool bFormulas; // Formula in Text?
|
|
bool bIncludeFiltered; // include filtered rows? (default true)
|
|
bool bAll; // no selection
|
|
bool bSingle; // Single selection
|
|
bool bUndo; // with Undo?
|
|
bool bOverflowRow; // too many rows
|
|
bool bOverflowCol; // too many columns
|
|
bool bOverflowCell; // too much data for a cell
|
|
bool mbApi;
|
|
bool mbImportBroadcast; // whether or not to broadcast after data import.
|
|
bool mbOverwriting; // Whether we could be overwriting existing values (paste).
|
|
// In this case we cannot use the insert optimization, but we
|
|
// do not need to broadcast after the import.
|
|
bool mbIncludeBOM; // Whether to include a byte-order-mark in the output.
|
|
ScExportTextOptions mExportTextOptions;
|
|
|
|
std::unique_ptr<ScAsciiOptions> pExtOptions; // extended options
|
|
|
|
bool StartPaste(); // Protect check, set up Undo
|
|
void EndPaste(bool bAutoRowHeight = true); // Undo/Redo actions, Repaint
|
|
bool Doc2Text( SvStream& );
|
|
bool Text2Doc( SvStream& );
|
|
bool Doc2Sylk( SvStream& );
|
|
bool Sylk2Doc( SvStream& );
|
|
bool Doc2HTML( SvStream&, const OUString& );
|
|
bool Doc2RTF( SvStream& );
|
|
bool Doc2Dif( SvStream& );
|
|
bool Dif2Doc( SvStream& );
|
|
bool ExtText2Doc( SvStream& ); // with pExtOptions
|
|
bool RTF2Doc( SvStream&, const OUString& rBaseURL );
|
|
bool HTML2Doc( SvStream&, const OUString& rBaseURL );
|
|
|
|
public:
|
|
ScImportExport( ScDocument& ); // the whole document
|
|
ScImportExport( ScDocument&, const OUString& ); // Range/cell input
|
|
SC_DLLPUBLIC ScImportExport( ScDocument&, const ScAddress& );
|
|
SC_DLLPUBLIC ScImportExport( ScDocument&, const ScRange& );
|
|
SC_DLLPUBLIC ~ScImportExport() COVERITY_NOEXCEPT_FALSE;
|
|
|
|
void SetExtOptions( const ScAsciiOptions& rOpt );
|
|
void SetFilterOptions( const OUString& rFilterOptions );
|
|
bool IsRef() const { return !bAll; }
|
|
|
|
const ScRange& GetRange() const { return aRange; }
|
|
|
|
SC_DLLPUBLIC static void EmbeddedNullTreatment( OUString & rStr );
|
|
|
|
static bool IsFormatSupported( SotClipboardFormatId nFormat );
|
|
static const sal_Unicode* ScanNextFieldFromString( const sal_Unicode* p,
|
|
OUString& rField, sal_Unicode cStr, const sal_Unicode* pSeps,
|
|
bool bMergeSeps, bool& rbIsQuoted, bool& rbOverflowCell, bool bRemoveSpace );
|
|
|
|
/** ScImportExport::CountVisualWidth
|
|
Count the width of string visually ( in multiple of western characters), considering CJK
|
|
ideographs and CJK symbols (U+3000-U+303F) as twice the width of western characters.
|
|
@param rStr the string.
|
|
@param nIdx the starting index, index is incremented for each counted character.
|
|
@param nMaxWidth the maximum width to count.
|
|
@return the sum of the width of counted characters.
|
|
**/
|
|
static sal_Int32 CountVisualWidth(std::u16string_view rStr, sal_Int32& nIdx, sal_Int32 nMaxWidth);
|
|
|
|
/** ScImportExport::CountVisualWidth
|
|
@return the sum of the visual width of the whole string.
|
|
**/
|
|
static sal_Int32 CountVisualWidth(std::u16string_view rStr);
|
|
|
|
//! only if stream is only used in own (!) memory
|
|
static void SetNoEndianSwap( SvStream& rStrm );
|
|
|
|
void SetSeparator( sal_Unicode c ) { cSep = c; }
|
|
void SetDelimiter( sal_Unicode c ) { cStr = c; }
|
|
void SetFormulas( bool b ) { bFormulas = b; }
|
|
void SetIncludeFiltered( bool b ) { bIncludeFiltered = b; }
|
|
|
|
void SetStreamPath( const OUString& rPath ) { aStreamPath = rPath; }
|
|
|
|
bool ImportString( const OUString&, SotClipboardFormatId );
|
|
bool ExportString( OUString&, SotClipboardFormatId );
|
|
bool ExportByteString( OString&, rtl_TextEncoding, SotClipboardFormatId );
|
|
|
|
SC_DLLPUBLIC bool ImportStream( SvStream&, const OUString& rBaseURL, SotClipboardFormatId );
|
|
SC_DLLPUBLIC bool ExportStream( SvStream&, const OUString& rBaseURL, SotClipboardFormatId );
|
|
|
|
bool ExportData( std::u16string_view rMimeType,
|
|
css::uno::Any & rValue );
|
|
|
|
// after import
|
|
bool IsOverflowRow() const { return bOverflowRow; }
|
|
bool IsOverflowCol() const { return bOverflowCol; }
|
|
bool IsOverflowCell() const { return bOverflowCell; }
|
|
bool IsOverflow() const { return bOverflowRow || bOverflowCol || bOverflowCell; }
|
|
|
|
const OUString& GetNonConvertibleChars() const { return aNonConvertibleChars; }
|
|
|
|
void SetApi( bool bApi ) { mbApi = bApi; }
|
|
void SetImportBroadcast( bool b ) { mbImportBroadcast = b; }
|
|
void SetOverwriting( const bool bOverwriting ) { mbOverwriting = bOverwriting; }
|
|
void SetExportTextOptions( const ScExportTextOptions& options ) { mExportTextOptions = options; }
|
|
|
|
bool GetIncludeBOM() const { return mbIncludeBOM; }
|
|
};
|
|
|
|
// Helper class for importing clipboard strings as streams.
|
|
class ScImportStringStream : public SvMemoryStream
|
|
{
|
|
public:
|
|
ScImportStringStream(const OUString& rStr);
|
|
};
|
|
|
|
/** Read a CSV (comma separated values) data line using
|
|
ReadUniOrByteStringLine().
|
|
|
|
@param bEmbeddedLineBreak
|
|
If TRUE and a line-break occurs inside a field of data,
|
|
a line feed LF '\n' and the next line are appended. Repeats
|
|
until a line-break is not in a field. A field is determined
|
|
by delimiting rFieldSeparators and optionally surrounded by
|
|
a pair of cFieldQuote characters. For a line-break to be
|
|
within a field, the field content MUST be surrounded by
|
|
cFieldQuote characters, and the opening cFieldQuote MUST be
|
|
at the very start of a line or follow right behind a field
|
|
separator with no extra characters in between, with the
|
|
exception of blanks contradictory to RFC 4180. Anything,
|
|
including field separators and escaped quotes (by doubling
|
|
them) may appear in a quoted field.
|
|
|
|
If bEmbeddedLineBreak==FALSE, nothing is parsed and the
|
|
string returned is simply one ReadUniOrByteStringLine().
|
|
|
|
@param rFieldSeparators
|
|
A list of characters that each may act as a field separator.
|
|
If rcDetectSep was 0 and a separator is detected then it is appended to
|
|
rFieldSeparators.
|
|
|
|
@param cFieldQuote
|
|
The quote character used.
|
|
|
|
@param rcDetectSep
|
|
If 0 then attempt to detect a possible separator if
|
|
rFieldSeparators doesn't include it already. This can be necessary because
|
|
of the "accept broken misquoted CSV fields" feature that tries to ignore
|
|
trailing blanks after a quoted field and if no separator follows continues
|
|
to add content to the field assuming the single double quote was in error.
|
|
It is also necessary if the only possible separator was not selected and
|
|
not included in rFieldSeparators and a line starts with a quoted field, in
|
|
which case appending lines is tried until end of file.
|
|
If a separator is detected it is added to rFieldSeparators and the
|
|
line is reread with the new separators
|
|
|
|
@param nMaxSourceLines
|
|
Maximum source lines to read and combine into one logical line for embedded
|
|
new line purpose. Should be limited for the preview dialog because only
|
|
non-matching separators selected otherwise would lead to trying to
|
|
concatenate lines until file end.
|
|
If 0 no limit other than the internal arbitrary resulting line length
|
|
limit.
|
|
|
|
check Stream::good() to detect IO problems during read
|
|
|
|
@ATTENTION
|
|
Note that the string returned may be truncated even inside
|
|
a quoted field if some (arbitrary) maximum length was reached.
|
|
There currently is no way to exactly determine the conditions,
|
|
whether this was at a line end, or whether open quotes
|
|
would have closed the field before the line end, as even a
|
|
ReadUniOrByteStringLine() may return prematurely but the
|
|
stream was positioned ahead until the real end of line.
|
|
Additionally, due to character encoding conversions, string
|
|
length and bytes read don't necessarily match, and
|
|
resyncing to a previous position matching the string's
|
|
length isn't always possible. As a result, a logical line
|
|
with embedded line breaks and more than the maximum length
|
|
characters will be spoiled, and a subsequent ReadCsvLine()
|
|
may start under false preconditions.
|
|
|
|
*/
|
|
SC_DLLPUBLIC OUString ReadCsvLine( SvStream &rStream, bool bEmbeddedLineBreak,
|
|
OUString& rFieldSeparators, sal_Unicode cFieldQuote, sal_Unicode& rcDetectSep,
|
|
sal_uInt32 nMaxSourceLines = 0 );
|
|
|
|
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|