diff options
Diffstat (limited to '')
-rw-r--r-- | xpcom/io/nsLinebreakConverter.cpp | 452 |
1 files changed, 452 insertions, 0 deletions
diff --git a/xpcom/io/nsLinebreakConverter.cpp b/xpcom/io/nsLinebreakConverter.cpp new file mode 100644 index 0000000000..019fc0e4ae --- /dev/null +++ b/xpcom/io/nsLinebreakConverter.cpp @@ -0,0 +1,452 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsLinebreakConverter.h" + +#include "nsCRT.h" + +/*---------------------------------------------------------------------------- + GetLinebreakString + + Could make this inline +----------------------------------------------------------------------------*/ +static const char* GetLinebreakString( + nsLinebreakConverter::ELinebreakType aBreakType) { + static const char* const sLinebreaks[] = {"", // any + NS_LINEBREAK, // platform + LFSTR, // content + CRLF, // net + CRSTR, // Mac + LFSTR, // Unix + CRLF, // Windows + " ", // space + nullptr}; + + return sLinebreaks[aBreakType]; +} + +/*---------------------------------------------------------------------------- + AppendLinebreak + + Wee inline method to append a line break. Modifies ioDest. +----------------------------------------------------------------------------*/ +template <class T> +void AppendLinebreak(T*& aIoDest, const char* aLineBreakStr) { + *aIoDest++ = *aLineBreakStr; + + if (aLineBreakStr[1]) { + *aIoDest++ = aLineBreakStr[1]; + } +} + +/*---------------------------------------------------------------------------- + CountChars + + Counts occurrences of breakStr in aSrc +----------------------------------------------------------------------------*/ +template <class T> +int32_t CountLinebreaks(const T* aSrc, int32_t aInLen, const char* aBreakStr) { + const T* src = aSrc; + const T* srcEnd = aSrc + aInLen; + int32_t theCount = 0; + + while (src < srcEnd) { + if (*src == *aBreakStr) { + src++; + + if (aBreakStr[1]) { + if (src < srcEnd && *src == aBreakStr[1]) { + src++; + theCount++; + } + } else { + theCount++; + } + } else { + src++; + } + } + + return theCount; +} + +/*---------------------------------------------------------------------------- + ConvertBreaks + + ioLen *includes* a terminating null, if any +----------------------------------------------------------------------------*/ +template <class T> +static T* ConvertBreaks(const T* aInSrc, int32_t& aIoLen, const char* aSrcBreak, + const char* aDestBreak) { + NS_ASSERTION(aInSrc && aSrcBreak && aDestBreak, "Got a null string"); + + T* resultString = nullptr; + + // handle the no conversion case + if (nsCRT::strcmp(aSrcBreak, aDestBreak) == 0) { + resultString = (T*)malloc(sizeof(T) * aIoLen); + if (!resultString) { + return nullptr; + } + memcpy(resultString, aInSrc, + sizeof(T) * aIoLen); // includes the null, if any + return resultString; + } + + int32_t srcBreakLen = strlen(aSrcBreak); + int32_t destBreakLen = strlen(aDestBreak); + + // handle the easy case, where the string length does not change, and the + // breaks are only 1 char long, i.e. CR <-> LF + if (srcBreakLen == destBreakLen && srcBreakLen == 1) { + resultString = (T*)malloc(sizeof(T) * aIoLen); + if (!resultString) { + return nullptr; + } + + const T* src = aInSrc; + const T* srcEnd = aInSrc + aIoLen; // includes null, if any + T* dst = resultString; + + char srcBreakChar = *aSrcBreak; // we know it's one char long already + char dstBreakChar = *aDestBreak; + + while (src < srcEnd) { + if (*src == srcBreakChar) { + *dst++ = dstBreakChar; + src++; + } else { + *dst++ = *src++; + } + } + + // aIoLen does not change + } else { + // src and dest termination is different length. Do it a slower way. + + // count linebreaks in src. Assumes that chars in 2-char linebreaks are + // unique. + int32_t numLinebreaks = CountLinebreaks(aInSrc, aIoLen, aSrcBreak); + + int32_t newBufLen = + aIoLen - (numLinebreaks * srcBreakLen) + (numLinebreaks * destBreakLen); + resultString = (T*)malloc(sizeof(T) * newBufLen); + if (!resultString) { + return nullptr; + } + + const T* src = aInSrc; + const T* srcEnd = aInSrc + aIoLen; // includes null, if any + T* dst = resultString; + + while (src < srcEnd) { + if (*src == *aSrcBreak) { + *dst++ = *aDestBreak; + if (aDestBreak[1]) { + *dst++ = aDestBreak[1]; + } + + src++; + if (src < srcEnd && aSrcBreak[1] && *src == aSrcBreak[1]) { + src++; + } + } else { + *dst++ = *src++; + } + } + + aIoLen = newBufLen; + } + + return resultString; +} + +/*---------------------------------------------------------------------------- + ConvertBreaksInSitu + + Convert breaks in situ. Can only do this if the linebreak length + does not change. +----------------------------------------------------------------------------*/ +template <class T> +static void ConvertBreaksInSitu(T* aInSrc, int32_t aInLen, char aSrcBreak, + char aDestBreak) { + T* src = aInSrc; + T* srcEnd = aInSrc + aInLen; + + while (src < srcEnd) { + if (*src == aSrcBreak) { + *src = aDestBreak; + } + + src++; + } +} + +/*---------------------------------------------------------------------------- + ConvertUnknownBreaks + + Convert unknown line breaks to the specified break. + + This will convert CRLF pairs to one break, and single CR or LF to a break. +----------------------------------------------------------------------------*/ +template <class T> +static T* ConvertUnknownBreaks(const T* aInSrc, int32_t& aIoLen, + const char* aDestBreak) { + const T* src = aInSrc; + const T* srcEnd = aInSrc + aIoLen; // includes null, if any + + int32_t destBreakLen = strlen(aDestBreak); + int32_t finalLen = 0; + + while (src < srcEnd) { + if (*src == nsCRT::CR) { + if (src + 1 < srcEnd && src[1] == nsCRT::LF) { + // CRLF + finalLen += destBreakLen; + src++; + } else { + // Lone CR + finalLen += destBreakLen; + } + } else if (*src == nsCRT::LF) { + // Lone LF + finalLen += destBreakLen; + } else { + finalLen++; + } + src++; + } + + T* resultString = (T*)malloc(sizeof(T) * finalLen); + if (!resultString) { + return nullptr; + } + + src = aInSrc; + srcEnd = aInSrc + aIoLen; // includes null, if any + + T* dst = resultString; + + while (src < srcEnd) { + if (*src == nsCRT::CR) { + if (src + 1 < srcEnd && src[1] == nsCRT::LF) { + // CRLF + AppendLinebreak(dst, aDestBreak); + src++; + } else { + // Lone CR + AppendLinebreak(dst, aDestBreak); + } + } else if (*src == nsCRT::LF) { + // Lone LF + AppendLinebreak(dst, aDestBreak); + } else { + *dst++ = *src; + } + src++; + } + + aIoLen = finalLen; + return resultString; +} + +/*---------------------------------------------------------------------------- + ConvertLineBreaks + +----------------------------------------------------------------------------*/ +char* nsLinebreakConverter::ConvertLineBreaks(const char* aSrc, + ELinebreakType aSrcBreaks, + ELinebreakType aDestBreaks, + int32_t aSrcLen, + int32_t* aOutLen) { + NS_ASSERTION(aDestBreaks != eLinebreakAny && aSrcBreaks != eLinebreakSpace, + "Invalid parameter"); + if (!aSrc) { + return nullptr; + } + + int32_t sourceLen = (aSrcLen == kIgnoreLen) ? strlen(aSrc) + 1 : aSrcLen; + + char* resultString; + if (aSrcBreaks == eLinebreakAny) { + resultString = + ConvertUnknownBreaks(aSrc, sourceLen, GetLinebreakString(aDestBreaks)); + } else + resultString = + ConvertBreaks(aSrc, sourceLen, GetLinebreakString(aSrcBreaks), + GetLinebreakString(aDestBreaks)); + + if (aOutLen) { + *aOutLen = sourceLen; + } + return resultString; +} + +/*---------------------------------------------------------------------------- + ConvertLineBreaksInSitu + +----------------------------------------------------------------------------*/ +nsresult nsLinebreakConverter::ConvertLineBreaksInSitu( + char** aIoBuffer, ELinebreakType aSrcBreaks, ELinebreakType aDestBreaks, + int32_t aSrcLen, int32_t* aOutLen) { + NS_ASSERTION(aIoBuffer && *aIoBuffer, "Null pointer passed"); + if (!aIoBuffer || !*aIoBuffer) { + return NS_ERROR_NULL_POINTER; + } + + NS_ASSERTION(aDestBreaks != eLinebreakAny && aSrcBreaks != eLinebreakSpace, + "Invalid parameter"); + + int32_t sourceLen = + (aSrcLen == kIgnoreLen) ? strlen(*aIoBuffer) + 1 : aSrcLen; + + // can we convert in-place? + const char* srcBreaks = GetLinebreakString(aSrcBreaks); + const char* dstBreaks = GetLinebreakString(aDestBreaks); + + if (aSrcBreaks != eLinebreakAny && strlen(srcBreaks) == 1 && + strlen(dstBreaks) == 1) { + ConvertBreaksInSitu(*aIoBuffer, sourceLen, *srcBreaks, *dstBreaks); + if (aOutLen) { + *aOutLen = sourceLen; + } + } else { + char* destBuffer; + + if (aSrcBreaks == eLinebreakAny) { + destBuffer = ConvertUnknownBreaks(*aIoBuffer, sourceLen, dstBreaks); + } else { + destBuffer = ConvertBreaks(*aIoBuffer, sourceLen, srcBreaks, dstBreaks); + } + + if (!destBuffer) { + return NS_ERROR_OUT_OF_MEMORY; + } + *aIoBuffer = destBuffer; + if (aOutLen) { + *aOutLen = sourceLen; + } + } + + return NS_OK; +} + +/*---------------------------------------------------------------------------- + ConvertUnicharLineBreaks + +----------------------------------------------------------------------------*/ +char16_t* nsLinebreakConverter::ConvertUnicharLineBreaks( + const char16_t* aSrc, ELinebreakType aSrcBreaks, ELinebreakType aDestBreaks, + int32_t aSrcLen, int32_t* aOutLen) { + NS_ASSERTION(aDestBreaks != eLinebreakAny && aSrcBreaks != eLinebreakSpace, + "Invalid parameter"); + if (!aSrc) { + return nullptr; + } + + int32_t bufLen = (aSrcLen == kIgnoreLen) ? NS_strlen(aSrc) + 1 : aSrcLen; + + char16_t* resultString; + if (aSrcBreaks == eLinebreakAny) { + resultString = + ConvertUnknownBreaks(aSrc, bufLen, GetLinebreakString(aDestBreaks)); + } else + resultString = ConvertBreaks(aSrc, bufLen, GetLinebreakString(aSrcBreaks), + GetLinebreakString(aDestBreaks)); + + if (aOutLen) { + *aOutLen = bufLen; + } + return resultString; +} + +/*---------------------------------------------------------------------------- + ConvertStringLineBreaks + +----------------------------------------------------------------------------*/ +nsresult nsLinebreakConverter::ConvertUnicharLineBreaksInSitu( + char16_t** aIoBuffer, ELinebreakType aSrcBreaks, ELinebreakType aDestBreaks, + int32_t aSrcLen, int32_t* aOutLen) { + NS_ASSERTION(aIoBuffer && *aIoBuffer, "Null pointer passed"); + if (!aIoBuffer || !*aIoBuffer) { + return NS_ERROR_NULL_POINTER; + } + NS_ASSERTION(aDestBreaks != eLinebreakAny && aSrcBreaks != eLinebreakSpace, + "Invalid parameter"); + + int32_t sourceLen = + (aSrcLen == kIgnoreLen) ? NS_strlen(*aIoBuffer) + 1 : aSrcLen; + + // can we convert in-place? + const char* srcBreaks = GetLinebreakString(aSrcBreaks); + const char* dstBreaks = GetLinebreakString(aDestBreaks); + + if ((aSrcBreaks != eLinebreakAny) && (strlen(srcBreaks) == 1) && + (strlen(dstBreaks) == 1)) { + ConvertBreaksInSitu(*aIoBuffer, sourceLen, *srcBreaks, *dstBreaks); + if (aOutLen) { + *aOutLen = sourceLen; + } + } else { + char16_t* destBuffer; + + if (aSrcBreaks == eLinebreakAny) { + destBuffer = ConvertUnknownBreaks(*aIoBuffer, sourceLen, dstBreaks); + } else { + destBuffer = ConvertBreaks(*aIoBuffer, sourceLen, srcBreaks, dstBreaks); + } + + if (!destBuffer) { + return NS_ERROR_OUT_OF_MEMORY; + } + *aIoBuffer = destBuffer; + if (aOutLen) { + *aOutLen = sourceLen; + } + } + + return NS_OK; +} + +/*---------------------------------------------------------------------------- + ConvertStringLineBreaks + +----------------------------------------------------------------------------*/ +nsresult nsLinebreakConverter::ConvertStringLineBreaks( + nsString& aIoString, ELinebreakType aSrcBreaks, + ELinebreakType aDestBreaks) { + NS_ASSERTION(aDestBreaks != eLinebreakAny && aSrcBreaks != eLinebreakSpace, + "Invalid parameter"); + + // nothing to do + if (aIoString.IsEmpty()) { + return NS_OK; + } + + nsresult rv; + + // remember the old buffer in case + // we blow it away later + char16_t* stringBuf = aIoString.BeginWriting(mozilla::fallible); + if (!stringBuf) { + return NS_ERROR_OUT_OF_MEMORY; + } + + int32_t newLen; + + rv = ConvertUnicharLineBreaksInSitu(&stringBuf, aSrcBreaks, aDestBreaks, + aIoString.Length() + 1, &newLen); + if (NS_FAILED(rv)) { + return rv; + } + + const char16_t* currentBuf = aIoString.get(); + if (currentBuf != stringBuf) { + aIoString.Adopt(stringBuf, newLen - 1); + } + + return NS_OK; +} |