summaryrefslogtreecommitdiffstats
path: root/io/source/TextInputStream/TextInputStream.cxx
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-15 05:54:39 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-15 05:54:39 +0000
commit267c6f2ac71f92999e969232431ba04678e7437e (patch)
tree358c9467650e1d0a1d7227a21dac2e3d08b622b2 /io/source/TextInputStream/TextInputStream.cxx
parentInitial commit. (diff)
downloadlibreoffice-267c6f2ac71f92999e969232431ba04678e7437e.tar.xz
libreoffice-267c6f2ac71f92999e969232431ba04678e7437e.zip
Adding upstream version 4:24.2.0.upstream/4%24.2.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'io/source/TextInputStream/TextInputStream.cxx')
-rw-r--r--io/source/TextInputStream/TextInputStream.cxx417
1 files changed, 417 insertions, 0 deletions
diff --git a/io/source/TextInputStream/TextInputStream.cxx b/io/source/TextInputStream/TextInputStream.cxx
new file mode 100644
index 0000000000..1ce12a6e79
--- /dev/null
+++ b/io/source/TextInputStream/TextInputStream.cxx
@@ -0,0 +1,417 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <string.h>
+
+#include <comphelper/sequence.hxx>
+#include <cppuhelper/implbase.hxx>
+#include <cppuhelper/supportsservice.hxx>
+
+#include <rtl/textenc.h>
+#include <rtl/tencinfo.h>
+
+#include <com/sun/star/io/BufferSizeExceededException.hpp>
+#include <com/sun/star/io/IOException.hpp>
+#include <com/sun/star/io/NotConnectedException.hpp>
+#include <com/sun/star/io/XTextInputStream2.hpp>
+#include <com/sun/star/lang/XServiceInfo.hpp>
+
+#include <vector>
+
+namespace com::sun::star::uno { class XComponentContext; }
+
+using namespace ::osl;
+using namespace ::cppu;
+using namespace ::com::sun::star::uno;
+using namespace ::com::sun::star::lang;
+using namespace ::com::sun::star::io;
+
+
+// Implementation XTextInputStream
+
+#define INITIAL_UNICODE_BUFFER_CAPACITY 0x100
+#define READ_BYTE_COUNT 0x100
+
+namespace {
+
+class OTextInputStream : public WeakImplHelper< XTextInputStream2, XServiceInfo >
+{
+ Reference< XInputStream > mxStream;
+
+ // Encoding
+ bool mbEncodingInitialized;
+ rtl_TextToUnicodeConverter mConvText2Unicode;
+ rtl_TextToUnicodeContext mContextText2Unicode;
+ Sequence<sal_Int8> mSeqSource;
+
+ // Internal buffer for characters that are already converted successfully
+ std::vector<sal_Unicode> mvBuffer;
+ sal_Int32 mnCharsInBuffer;
+ bool mbReachedEOF;
+
+ /// @throws IOException
+ /// @throws RuntimeException
+ OUString implReadString( const Sequence< sal_Unicode >& Delimiters,
+ bool bRemoveDelimiter, bool bFindLineEnd );
+ /// @throws IOException
+ /// @throws RuntimeException
+ sal_Int32 implReadNext();
+ /// @throws RuntimeException
+ void checkNull();
+
+public:
+ OTextInputStream();
+ virtual ~OTextInputStream() override;
+
+ // Methods XTextInputStream
+ virtual OUString SAL_CALL readLine( ) override;
+ virtual OUString SAL_CALL readString( const Sequence< sal_Unicode >& Delimiters, sal_Bool bRemoveDelimiter ) override;
+ virtual sal_Bool SAL_CALL isEOF( ) override;
+ virtual void SAL_CALL setEncoding( const OUString& Encoding ) override;
+
+ // Methods XInputStream
+ virtual sal_Int32 SAL_CALL readBytes( Sequence< sal_Int8 >& aData, sal_Int32 nBytesToRead ) override;
+ virtual sal_Int32 SAL_CALL readSomeBytes( Sequence< sal_Int8 >& aData, sal_Int32 nMaxBytesToRead ) override;
+ virtual void SAL_CALL skipBytes( sal_Int32 nBytesToSkip ) override;
+ virtual sal_Int32 SAL_CALL available( ) override;
+ virtual void SAL_CALL closeInput( ) override;
+
+ // Methods XActiveDataSink
+ virtual void SAL_CALL setInputStream( const Reference< XInputStream >& aStream ) override;
+ virtual Reference< XInputStream > SAL_CALL getInputStream() override;
+
+ // Methods XServiceInfo
+ virtual OUString SAL_CALL getImplementationName() override;
+ virtual Sequence< OUString > SAL_CALL getSupportedServiceNames() override;
+ virtual sal_Bool SAL_CALL supportsService(const OUString& ServiceName) override;
+};
+
+}
+
+OTextInputStream::OTextInputStream()
+ : mbEncodingInitialized(false)
+ , mConvText2Unicode(nullptr)
+ , mContextText2Unicode(nullptr)
+ , mSeqSource(READ_BYTE_COUNT)
+ , mvBuffer(INITIAL_UNICODE_BUFFER_CAPACITY, 0)
+ , mnCharsInBuffer(0)
+ , mbReachedEOF(false)
+{
+}
+
+OTextInputStream::~OTextInputStream()
+{
+ if( mbEncodingInitialized )
+ {
+ rtl_destroyTextToUnicodeContext( mConvText2Unicode, mContextText2Unicode );
+ rtl_destroyTextToUnicodeConverter( mConvText2Unicode );
+ }
+}
+
+// Check uninitialized object
+
+void OTextInputStream::checkNull()
+{
+ if (mxStream==nullptr){
+ throw RuntimeException("Uninitialized object");
+ }
+}
+
+// XTextInputStream
+
+OUString OTextInputStream::readLine( )
+{
+ checkNull();
+ static Sequence< sal_Unicode > aDummySeq;
+ return implReadString( aDummySeq, true, true );
+}
+
+OUString OTextInputStream::readString( const Sequence< sal_Unicode >& Delimiters, sal_Bool bRemoveDelimiter )
+{
+ checkNull();
+ return implReadString( Delimiters, bRemoveDelimiter, false );
+}
+
+sal_Bool OTextInputStream::isEOF()
+{
+ checkNull();
+ bool bRet = false;
+ if( mnCharsInBuffer == 0 && mbReachedEOF )
+ bRet = true;
+ return bRet;
+}
+
+
+OUString OTextInputStream::implReadString( const Sequence< sal_Unicode >& Delimiters,
+ bool bRemoveDelimiter, bool bFindLineEnd )
+{
+ OUString aRetStr;
+ if( !mbEncodingInitialized )
+ {
+ setEncoding( "utf8" );
+ }
+ if( !mbEncodingInitialized )
+ return aRetStr;
+
+ // Only for bFindLineEnd
+ sal_Unicode cLineEndChar1 = 0x0D;
+ sal_Unicode cLineEndChar2 = 0x0A;
+
+ sal_Int32 nBufferReadPos = 0;
+ sal_Int32 nCopyLen = 0;
+ bool bFound = false;
+ bool bFoundFirstLineEndChar = false;
+ sal_Unicode cFirstLineEndChar = 0;
+ while( !bFound )
+ {
+ // Still characters available?
+ if( nBufferReadPos == mnCharsInBuffer )
+ {
+ // Already reached EOF? Then we can't read any more
+ if( mbReachedEOF )
+ break;
+
+ // No, so read new characters
+ if( !implReadNext() )
+ break;
+ }
+
+ // Now there should be characters available
+ // (otherwise the loop should have been broken before)
+ sal_Unicode c = mvBuffer[ nBufferReadPos++ ];
+
+ if( bFindLineEnd )
+ {
+ if( bFoundFirstLineEndChar )
+ {
+ bFound = true;
+ nCopyLen = nBufferReadPos - 2;
+ if( c == cLineEndChar1 || c == cLineEndChar2 )
+ {
+ // Same line end char -> new line break
+ if( c == cFirstLineEndChar )
+ {
+ nBufferReadPos--;
+ }
+ }
+ else
+ {
+ // No second line end char
+ nBufferReadPos--;
+ }
+ }
+ else if( c == cLineEndChar1 || c == cLineEndChar2 )
+ {
+ bFoundFirstLineEndChar = true;
+ cFirstLineEndChar = c;
+ }
+ }
+ else if( comphelper::findValue(Delimiters, c) != -1 )
+ {
+ bFound = true;
+ nCopyLen = nBufferReadPos;
+ if( bRemoveDelimiter )
+ nCopyLen--;
+ }
+ }
+
+ // Nothing found? Return all
+ if( !nCopyLen && !bFound && mbReachedEOF )
+ nCopyLen = nBufferReadPos;
+
+ // Create string
+ if( nCopyLen )
+ aRetStr = OUString( mvBuffer.data(), nCopyLen );
+
+ // Copy rest of buffer
+ memmove( mvBuffer.data(), mvBuffer.data() + nBufferReadPos,
+ (mnCharsInBuffer - nBufferReadPos) * sizeof( sal_Unicode ) );
+ mnCharsInBuffer -= nBufferReadPos;
+
+ return aRetStr;
+}
+
+
+sal_Int32 OTextInputStream::implReadNext()
+{
+ sal_Int32 nFreeBufferSize = mvBuffer.size() - mnCharsInBuffer;
+ if( nFreeBufferSize < READ_BYTE_COUNT )
+ mvBuffer.resize(mvBuffer.size() * 2);
+ nFreeBufferSize = mvBuffer.size() - mnCharsInBuffer;
+
+ try
+ {
+ sal_Int32 nRead = mxStream->readSomeBytes( mSeqSource, READ_BYTE_COUNT );
+ sal_Int32 nTotalRead = nRead;
+ if( nRead == 0 )
+ mbReachedEOF = true;
+
+ // Try to convert
+ sal_uInt32 uiInfo;
+ sal_Size nSrcCvtBytes = 0;
+ sal_Size nTargetCount = 0;
+ sal_Size nSourceCount = 0;
+ while( true )
+ {
+ const sal_Int8 *pbSource = mSeqSource.getConstArray();
+
+ // All invalid characters are transformed to the unicode undefined char
+ nTargetCount += rtl_convertTextToUnicode(
+ mConvText2Unicode,
+ mContextText2Unicode,
+ reinterpret_cast<const char*>(&( pbSource[nSourceCount] )),
+ nTotalRead - nSourceCount,
+ mvBuffer.data() + mnCharsInBuffer + nTargetCount,
+ nFreeBufferSize - nTargetCount,
+ RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT |
+ RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT |
+ RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT,
+ &uiInfo,
+ &nSrcCvtBytes );
+ nSourceCount += nSrcCvtBytes;
+
+ bool bCont = false;
+ if( uiInfo & RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL )
+ {
+ mvBuffer.resize(mvBuffer.size() * 2);
+ bCont = true;
+ }
+
+ if( uiInfo & RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL )
+ {
+ // read next byte
+ static Sequence< sal_Int8 > aOneByteSeq( 1 );
+ nRead = mxStream->readSomeBytes( aOneByteSeq, 1 );
+ if( nRead == 0 )
+ {
+ mbReachedEOF = true;
+ break;
+ }
+
+ sal_Int32 nOldLen = mSeqSource.getLength();
+ nTotalRead++;
+ if( nTotalRead > nOldLen )
+ {
+ mSeqSource.realloc( nTotalRead );
+ }
+ mSeqSource.getArray()[ nOldLen ] = aOneByteSeq.getConstArray()[ 0 ];
+ bCont = true;
+ }
+
+ if( bCont )
+ continue;
+ break;
+ }
+
+ mnCharsInBuffer += nTargetCount;
+ return nTargetCount;
+ }
+ catch( NotConnectedException& )
+ {
+ throw IOException("Not connected");
+ //throw IOException( L"OTextInputStream::implReadString failed" );
+ }
+ catch( BufferSizeExceededException& )
+ {
+ throw IOException("Buffer size exceeded");
+ }
+}
+
+void OTextInputStream::setEncoding( const OUString& Encoding )
+{
+ OString aOEncodingStr = OUStringToOString( Encoding, RTL_TEXTENCODING_ASCII_US );
+ rtl_TextEncoding encoding = rtl_getTextEncodingFromMimeCharset( aOEncodingStr.getStr() );
+ if( RTL_TEXTENCODING_DONTKNOW == encoding )
+ return;
+
+ mbEncodingInitialized = true;
+ mConvText2Unicode = rtl_createTextToUnicodeConverter( encoding );
+ mContextText2Unicode = rtl_createTextToUnicodeContext( mConvText2Unicode );
+}
+
+
+// XInputStream
+
+sal_Int32 OTextInputStream::readBytes( Sequence< sal_Int8 >& aData, sal_Int32 nBytesToRead )
+{
+ checkNull();
+ return mxStream->readBytes( aData, nBytesToRead );
+}
+
+sal_Int32 OTextInputStream::readSomeBytes( Sequence< sal_Int8 >& aData, sal_Int32 nMaxBytesToRead )
+{
+ checkNull();
+ return mxStream->readSomeBytes( aData, nMaxBytesToRead );
+}
+
+void OTextInputStream::skipBytes( sal_Int32 nBytesToSkip )
+{
+ checkNull();
+ mxStream->skipBytes( nBytesToSkip );
+}
+
+sal_Int32 OTextInputStream::available( )
+{
+ checkNull();
+ return mxStream->available();
+}
+
+void OTextInputStream::closeInput( )
+{
+ checkNull();
+ mxStream->closeInput();
+}
+
+
+// XActiveDataSink
+
+void OTextInputStream::setInputStream( const Reference< XInputStream >& aStream )
+{
+ mxStream = aStream;
+}
+
+Reference< XInputStream > OTextInputStream::getInputStream()
+{
+ return mxStream;
+}
+
+OUString OTextInputStream::getImplementationName()
+{
+ return "com.sun.star.comp.io.TextInputStream";
+}
+
+sal_Bool OTextInputStream::supportsService(const OUString& ServiceName)
+{
+ return cppu::supportsService(this, ServiceName);
+}
+
+Sequence< OUString > OTextInputStream::getSupportedServiceNames()
+{
+ return { "com.sun.star.io.TextInputStream" };
+}
+
+extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface*
+io_OTextInputStream_get_implementation(
+ css::uno::XComponentContext* , css::uno::Sequence<css::uno::Any> const&)
+{
+ return cppu::acquire(new OTextInputStream());
+}
+
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */