summaryrefslogtreecommitdiffstats
path: root/xpcom/io/nsLinebreakConverter.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'xpcom/io/nsLinebreakConverter.cpp')
-rw-r--r--xpcom/io/nsLinebreakConverter.cpp452
1 files changed, 452 insertions, 0 deletions
diff --git a/xpcom/io/nsLinebreakConverter.cpp b/xpcom/io/nsLinebreakConverter.cpp
new file mode 100644
index 0000000000..019fc0e4ae
--- /dev/null
+++ b/xpcom/io/nsLinebreakConverter.cpp
@@ -0,0 +1,452 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsLinebreakConverter.h"
+
+#include "nsCRT.h"
+
+/*----------------------------------------------------------------------------
+ GetLinebreakString
+
+ Could make this inline
+----------------------------------------------------------------------------*/
+static const char* GetLinebreakString(
+ nsLinebreakConverter::ELinebreakType aBreakType) {
+ static const char* const sLinebreaks[] = {"", // any
+ NS_LINEBREAK, // platform
+ LFSTR, // content
+ CRLF, // net
+ CRSTR, // Mac
+ LFSTR, // Unix
+ CRLF, // Windows
+ " ", // space
+ nullptr};
+
+ return sLinebreaks[aBreakType];
+}
+
+/*----------------------------------------------------------------------------
+ AppendLinebreak
+
+ Wee inline method to append a line break. Modifies ioDest.
+----------------------------------------------------------------------------*/
+template <class T>
+void AppendLinebreak(T*& aIoDest, const char* aLineBreakStr) {
+ *aIoDest++ = *aLineBreakStr;
+
+ if (aLineBreakStr[1]) {
+ *aIoDest++ = aLineBreakStr[1];
+ }
+}
+
+/*----------------------------------------------------------------------------
+ CountChars
+
+ Counts occurrences of breakStr in aSrc
+----------------------------------------------------------------------------*/
+template <class T>
+int32_t CountLinebreaks(const T* aSrc, int32_t aInLen, const char* aBreakStr) {
+ const T* src = aSrc;
+ const T* srcEnd = aSrc + aInLen;
+ int32_t theCount = 0;
+
+ while (src < srcEnd) {
+ if (*src == *aBreakStr) {
+ src++;
+
+ if (aBreakStr[1]) {
+ if (src < srcEnd && *src == aBreakStr[1]) {
+ src++;
+ theCount++;
+ }
+ } else {
+ theCount++;
+ }
+ } else {
+ src++;
+ }
+ }
+
+ return theCount;
+}
+
+/*----------------------------------------------------------------------------
+ ConvertBreaks
+
+ ioLen *includes* a terminating null, if any
+----------------------------------------------------------------------------*/
+template <class T>
+static T* ConvertBreaks(const T* aInSrc, int32_t& aIoLen, const char* aSrcBreak,
+ const char* aDestBreak) {
+ NS_ASSERTION(aInSrc && aSrcBreak && aDestBreak, "Got a null string");
+
+ T* resultString = nullptr;
+
+ // handle the no conversion case
+ if (nsCRT::strcmp(aSrcBreak, aDestBreak) == 0) {
+ resultString = (T*)malloc(sizeof(T) * aIoLen);
+ if (!resultString) {
+ return nullptr;
+ }
+ memcpy(resultString, aInSrc,
+ sizeof(T) * aIoLen); // includes the null, if any
+ return resultString;
+ }
+
+ int32_t srcBreakLen = strlen(aSrcBreak);
+ int32_t destBreakLen = strlen(aDestBreak);
+
+ // handle the easy case, where the string length does not change, and the
+ // breaks are only 1 char long, i.e. CR <-> LF
+ if (srcBreakLen == destBreakLen && srcBreakLen == 1) {
+ resultString = (T*)malloc(sizeof(T) * aIoLen);
+ if (!resultString) {
+ return nullptr;
+ }
+
+ const T* src = aInSrc;
+ const T* srcEnd = aInSrc + aIoLen; // includes null, if any
+ T* dst = resultString;
+
+ char srcBreakChar = *aSrcBreak; // we know it's one char long already
+ char dstBreakChar = *aDestBreak;
+
+ while (src < srcEnd) {
+ if (*src == srcBreakChar) {
+ *dst++ = dstBreakChar;
+ src++;
+ } else {
+ *dst++ = *src++;
+ }
+ }
+
+ // aIoLen does not change
+ } else {
+ // src and dest termination is different length. Do it a slower way.
+
+ // count linebreaks in src. Assumes that chars in 2-char linebreaks are
+ // unique.
+ int32_t numLinebreaks = CountLinebreaks(aInSrc, aIoLen, aSrcBreak);
+
+ int32_t newBufLen =
+ aIoLen - (numLinebreaks * srcBreakLen) + (numLinebreaks * destBreakLen);
+ resultString = (T*)malloc(sizeof(T) * newBufLen);
+ if (!resultString) {
+ return nullptr;
+ }
+
+ const T* src = aInSrc;
+ const T* srcEnd = aInSrc + aIoLen; // includes null, if any
+ T* dst = resultString;
+
+ while (src < srcEnd) {
+ if (*src == *aSrcBreak) {
+ *dst++ = *aDestBreak;
+ if (aDestBreak[1]) {
+ *dst++ = aDestBreak[1];
+ }
+
+ src++;
+ if (src < srcEnd && aSrcBreak[1] && *src == aSrcBreak[1]) {
+ src++;
+ }
+ } else {
+ *dst++ = *src++;
+ }
+ }
+
+ aIoLen = newBufLen;
+ }
+
+ return resultString;
+}
+
+/*----------------------------------------------------------------------------
+ ConvertBreaksInSitu
+
+ Convert breaks in situ. Can only do this if the linebreak length
+ does not change.
+----------------------------------------------------------------------------*/
+template <class T>
+static void ConvertBreaksInSitu(T* aInSrc, int32_t aInLen, char aSrcBreak,
+ char aDestBreak) {
+ T* src = aInSrc;
+ T* srcEnd = aInSrc + aInLen;
+
+ while (src < srcEnd) {
+ if (*src == aSrcBreak) {
+ *src = aDestBreak;
+ }
+
+ src++;
+ }
+}
+
+/*----------------------------------------------------------------------------
+ ConvertUnknownBreaks
+
+ Convert unknown line breaks to the specified break.
+
+ This will convert CRLF pairs to one break, and single CR or LF to a break.
+----------------------------------------------------------------------------*/
+template <class T>
+static T* ConvertUnknownBreaks(const T* aInSrc, int32_t& aIoLen,
+ const char* aDestBreak) {
+ const T* src = aInSrc;
+ const T* srcEnd = aInSrc + aIoLen; // includes null, if any
+
+ int32_t destBreakLen = strlen(aDestBreak);
+ int32_t finalLen = 0;
+
+ while (src < srcEnd) {
+ if (*src == nsCRT::CR) {
+ if (src + 1 < srcEnd && src[1] == nsCRT::LF) {
+ // CRLF
+ finalLen += destBreakLen;
+ src++;
+ } else {
+ // Lone CR
+ finalLen += destBreakLen;
+ }
+ } else if (*src == nsCRT::LF) {
+ // Lone LF
+ finalLen += destBreakLen;
+ } else {
+ finalLen++;
+ }
+ src++;
+ }
+
+ T* resultString = (T*)malloc(sizeof(T) * finalLen);
+ if (!resultString) {
+ return nullptr;
+ }
+
+ src = aInSrc;
+ srcEnd = aInSrc + aIoLen; // includes null, if any
+
+ T* dst = resultString;
+
+ while (src < srcEnd) {
+ if (*src == nsCRT::CR) {
+ if (src + 1 < srcEnd && src[1] == nsCRT::LF) {
+ // CRLF
+ AppendLinebreak(dst, aDestBreak);
+ src++;
+ } else {
+ // Lone CR
+ AppendLinebreak(dst, aDestBreak);
+ }
+ } else if (*src == nsCRT::LF) {
+ // Lone LF
+ AppendLinebreak(dst, aDestBreak);
+ } else {
+ *dst++ = *src;
+ }
+ src++;
+ }
+
+ aIoLen = finalLen;
+ return resultString;
+}
+
+/*----------------------------------------------------------------------------
+ ConvertLineBreaks
+
+----------------------------------------------------------------------------*/
+char* nsLinebreakConverter::ConvertLineBreaks(const char* aSrc,
+ ELinebreakType aSrcBreaks,
+ ELinebreakType aDestBreaks,
+ int32_t aSrcLen,
+ int32_t* aOutLen) {
+ NS_ASSERTION(aDestBreaks != eLinebreakAny && aSrcBreaks != eLinebreakSpace,
+ "Invalid parameter");
+ if (!aSrc) {
+ return nullptr;
+ }
+
+ int32_t sourceLen = (aSrcLen == kIgnoreLen) ? strlen(aSrc) + 1 : aSrcLen;
+
+ char* resultString;
+ if (aSrcBreaks == eLinebreakAny) {
+ resultString =
+ ConvertUnknownBreaks(aSrc, sourceLen, GetLinebreakString(aDestBreaks));
+ } else
+ resultString =
+ ConvertBreaks(aSrc, sourceLen, GetLinebreakString(aSrcBreaks),
+ GetLinebreakString(aDestBreaks));
+
+ if (aOutLen) {
+ *aOutLen = sourceLen;
+ }
+ return resultString;
+}
+
+/*----------------------------------------------------------------------------
+ ConvertLineBreaksInSitu
+
+----------------------------------------------------------------------------*/
+nsresult nsLinebreakConverter::ConvertLineBreaksInSitu(
+ char** aIoBuffer, ELinebreakType aSrcBreaks, ELinebreakType aDestBreaks,
+ int32_t aSrcLen, int32_t* aOutLen) {
+ NS_ASSERTION(aIoBuffer && *aIoBuffer, "Null pointer passed");
+ if (!aIoBuffer || !*aIoBuffer) {
+ return NS_ERROR_NULL_POINTER;
+ }
+
+ NS_ASSERTION(aDestBreaks != eLinebreakAny && aSrcBreaks != eLinebreakSpace,
+ "Invalid parameter");
+
+ int32_t sourceLen =
+ (aSrcLen == kIgnoreLen) ? strlen(*aIoBuffer) + 1 : aSrcLen;
+
+ // can we convert in-place?
+ const char* srcBreaks = GetLinebreakString(aSrcBreaks);
+ const char* dstBreaks = GetLinebreakString(aDestBreaks);
+
+ if (aSrcBreaks != eLinebreakAny && strlen(srcBreaks) == 1 &&
+ strlen(dstBreaks) == 1) {
+ ConvertBreaksInSitu(*aIoBuffer, sourceLen, *srcBreaks, *dstBreaks);
+ if (aOutLen) {
+ *aOutLen = sourceLen;
+ }
+ } else {
+ char* destBuffer;
+
+ if (aSrcBreaks == eLinebreakAny) {
+ destBuffer = ConvertUnknownBreaks(*aIoBuffer, sourceLen, dstBreaks);
+ } else {
+ destBuffer = ConvertBreaks(*aIoBuffer, sourceLen, srcBreaks, dstBreaks);
+ }
+
+ if (!destBuffer) {
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+ *aIoBuffer = destBuffer;
+ if (aOutLen) {
+ *aOutLen = sourceLen;
+ }
+ }
+
+ return NS_OK;
+}
+
+/*----------------------------------------------------------------------------
+ ConvertUnicharLineBreaks
+
+----------------------------------------------------------------------------*/
+char16_t* nsLinebreakConverter::ConvertUnicharLineBreaks(
+ const char16_t* aSrc, ELinebreakType aSrcBreaks, ELinebreakType aDestBreaks,
+ int32_t aSrcLen, int32_t* aOutLen) {
+ NS_ASSERTION(aDestBreaks != eLinebreakAny && aSrcBreaks != eLinebreakSpace,
+ "Invalid parameter");
+ if (!aSrc) {
+ return nullptr;
+ }
+
+ int32_t bufLen = (aSrcLen == kIgnoreLen) ? NS_strlen(aSrc) + 1 : aSrcLen;
+
+ char16_t* resultString;
+ if (aSrcBreaks == eLinebreakAny) {
+ resultString =
+ ConvertUnknownBreaks(aSrc, bufLen, GetLinebreakString(aDestBreaks));
+ } else
+ resultString = ConvertBreaks(aSrc, bufLen, GetLinebreakString(aSrcBreaks),
+ GetLinebreakString(aDestBreaks));
+
+ if (aOutLen) {
+ *aOutLen = bufLen;
+ }
+ return resultString;
+}
+
+/*----------------------------------------------------------------------------
+ ConvertStringLineBreaks
+
+----------------------------------------------------------------------------*/
+nsresult nsLinebreakConverter::ConvertUnicharLineBreaksInSitu(
+ char16_t** aIoBuffer, ELinebreakType aSrcBreaks, ELinebreakType aDestBreaks,
+ int32_t aSrcLen, int32_t* aOutLen) {
+ NS_ASSERTION(aIoBuffer && *aIoBuffer, "Null pointer passed");
+ if (!aIoBuffer || !*aIoBuffer) {
+ return NS_ERROR_NULL_POINTER;
+ }
+ NS_ASSERTION(aDestBreaks != eLinebreakAny && aSrcBreaks != eLinebreakSpace,
+ "Invalid parameter");
+
+ int32_t sourceLen =
+ (aSrcLen == kIgnoreLen) ? NS_strlen(*aIoBuffer) + 1 : aSrcLen;
+
+ // can we convert in-place?
+ const char* srcBreaks = GetLinebreakString(aSrcBreaks);
+ const char* dstBreaks = GetLinebreakString(aDestBreaks);
+
+ if ((aSrcBreaks != eLinebreakAny) && (strlen(srcBreaks) == 1) &&
+ (strlen(dstBreaks) == 1)) {
+ ConvertBreaksInSitu(*aIoBuffer, sourceLen, *srcBreaks, *dstBreaks);
+ if (aOutLen) {
+ *aOutLen = sourceLen;
+ }
+ } else {
+ char16_t* destBuffer;
+
+ if (aSrcBreaks == eLinebreakAny) {
+ destBuffer = ConvertUnknownBreaks(*aIoBuffer, sourceLen, dstBreaks);
+ } else {
+ destBuffer = ConvertBreaks(*aIoBuffer, sourceLen, srcBreaks, dstBreaks);
+ }
+
+ if (!destBuffer) {
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+ *aIoBuffer = destBuffer;
+ if (aOutLen) {
+ *aOutLen = sourceLen;
+ }
+ }
+
+ return NS_OK;
+}
+
+/*----------------------------------------------------------------------------
+ ConvertStringLineBreaks
+
+----------------------------------------------------------------------------*/
+nsresult nsLinebreakConverter::ConvertStringLineBreaks(
+ nsString& aIoString, ELinebreakType aSrcBreaks,
+ ELinebreakType aDestBreaks) {
+ NS_ASSERTION(aDestBreaks != eLinebreakAny && aSrcBreaks != eLinebreakSpace,
+ "Invalid parameter");
+
+ // nothing to do
+ if (aIoString.IsEmpty()) {
+ return NS_OK;
+ }
+
+ nsresult rv;
+
+ // remember the old buffer in case
+ // we blow it away later
+ char16_t* stringBuf = aIoString.BeginWriting(mozilla::fallible);
+ if (!stringBuf) {
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+
+ int32_t newLen;
+
+ rv = ConvertUnicharLineBreaksInSitu(&stringBuf, aSrcBreaks, aDestBreaks,
+ aIoString.Length() + 1, &newLen);
+ if (NS_FAILED(rv)) {
+ return rv;
+ }
+
+ const char16_t* currentBuf = aIoString.get();
+ if (currentBuf != stringBuf) {
+ aIoString.Adopt(stringBuf, newLen - 1);
+ }
+
+ return NS_OK;
+}