/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#ifndef intl_components_ICUUtils_h
#define intl_components_ICUUtils_h

#include "unicode/uenum.h"
#include "unicode/utypes.h"
#include "mozilla/Buffer.h"
#include "mozilla/DebugOnly.h"
#include "mozilla/Maybe.h"
#include "mozilla/Result.h"
#include "mozilla/Span.h"
#include "mozilla/Utf8.h"
#include "mozilla/Vector.h"
#include "mozilla/intl/ICUError.h"

// When building standalone js shell, it will include headers from
// intl/components if JS_HAS_INTL_API is true (the default value), but js shell
// won't include headers from XPCOM, so don't include nsTArray.h when building
// standalone js shell.
#ifndef JS_STANDALONE
#  include "nsTArray.h"
#endif

#include <cstring>
#include <iterator>
#include <stddef.h>
#include <stdint.h>
#include <string>
#include <string_view>

struct UFormattedValue;
namespace mozilla::intl {

template <typename CharType>
static inline CharType* AssertNullTerminatedString(Span<CharType> aSpan) {
  // Intentionally check one past the last character, because we expect that the
  // NUL character isn't part of the string.
  MOZ_ASSERT(*(aSpan.data() + aSpan.size()) == '\0');

  // Also ensure there aren't any other NUL characters within the string.
  MOZ_ASSERT(std::char_traits<std::remove_const_t<CharType>>::length(
                 aSpan.data()) == aSpan.size());

  return aSpan.data();
}

static inline const char* AssertNullTerminatedString(std::string_view aView) {
  // Intentionally check one past the last character, because we expect that the
  // NUL character isn't part of the string.
  MOZ_ASSERT(*(aView.data() + aView.size()) == '\0');

  // Also ensure there aren't any other NUL characters within the string.
  MOZ_ASSERT(std::strlen(aView.data()) == aView.size());

  return aView.data();
}

/**
 * Map the "und" locale to an empty string, which ICU uses internally.
 */
static inline const char* IcuLocale(const char* aLocale) {
  // Return the empty string if the input is exactly equal to the string "und".
  const char* locale = aLocale;
  if (!std::strcmp(locale, "und")) {
    locale = "";  // ICU root locale
  }
  return locale;
}

/**
 * Ensure a locale is null-terminated, and map the "und" locale to an empty
 * string, which ICU uses internally.
 */
static inline const char* IcuLocale(Span<const char> aLocale) {
  return IcuLocale(AssertNullTerminatedString(aLocale));
}

/**
 * Ensure a locale in the buffer is null-terminated, and map the "und" locale to
 * an empty string, which ICU uses internally.
 */
static inline const char* IcuLocale(const Buffer<char>& aLocale) {
  return IcuLocale(Span(aLocale.begin(), aLocale.Length() - 1));
}

using ICUResult = Result<Ok, ICUError>;

/**
 * Convert a UErrorCode to ICUError. This will correctly apply the OutOfMemory
 * case.
 */
ICUError ToICUError(UErrorCode status);

/**
 * Convert a UErrorCode to ICUResult. This will correctly apply the OutOfMemory
 * case.
 */
ICUResult ToICUResult(UErrorCode status);

/**
 * The ICU status can complain about a string not being terminated, but this
 * is fine for this API, as it deals with the mozilla::Span that has a pointer
 * and a length.
 */
static inline bool ICUSuccessForStringSpan(UErrorCode status) {
  return U_SUCCESS(status) || status == U_STRING_NOT_TERMINATED_WARNING;
}

/**
 * This class enforces that the unified mozilla::intl methods match the
 * const-ness of the underlying ICU4C API calls. const ICU4C APIs take a const
 * pointer, while mutable ones take a non-const pointer.
 *
 * For const ICU4C calls use:
 *   ICUPointer::GetConst().
 *
 * For non-const ICU4C calls use:
 *   ICUPointer::GetMut().
 *
 * This will propagate the `const` specifier from the ICU4C API call to the
 * unified method, and it will be enforced by the compiler. This helps ensures
 * a consistence and correct implementation.
 */
template <typename T>
class ICUPointer {
 public:
  explicit ICUPointer(T* aPointer) : mPointer(aPointer) {}

  // Only allow moves of ICUPointers, no copies.
  ICUPointer(ICUPointer&& other) noexcept = default;
  ICUPointer& operator=(ICUPointer&& other) noexcept = default;

  // Implicitly take ownership of a raw pointer through copy assignment.
  ICUPointer& operator=(T* aPointer) noexcept {
    mPointer = aPointer;
    return *this;
  };

  const T* GetConst() const { return const_cast<const T*>(mPointer); }
  T* GetMut() { return mPointer; }

  explicit operator bool() const { return !!mPointer; }

 private:
  T* mPointer;
};

/**
 * Calling into ICU with the C-API can be a bit tricky. This function wraps up
 * the relatively risky operations involving pointers, lengths, and buffers into
 * a simpler call. This function accepts a lambda that performs the ICU call,
 * and returns the length of characters in the buffer. When using a temporary
 * stack-based buffer, the calls can often be done in one trip. However, if
 * additional memory is needed, this function will call the C-API twice, in
 * order to first get the size of the result, and then second to copy the result
 * over to the buffer.
 */
template <typename ICUStringFunction, typename Buffer>
static ICUResult FillBufferWithICUCall(Buffer& buffer,
                                       const ICUStringFunction& strFn) {
  static_assert(std::is_same_v<typename Buffer::CharType, char16_t> ||
                std::is_same_v<typename Buffer::CharType, char> ||
                std::is_same_v<typename Buffer::CharType, uint8_t>);

  UErrorCode status = U_ZERO_ERROR;
  int32_t length = strFn(buffer.data(), buffer.capacity(), &status);
  if (status == U_BUFFER_OVERFLOW_ERROR) {
    MOZ_ASSERT(length >= 0);

    if (!buffer.reserve(length)) {
      return Err(ICUError::OutOfMemory);
    }

    status = U_ZERO_ERROR;
    mozilla::DebugOnly<int32_t> length2 = strFn(buffer.data(), length, &status);
    MOZ_ASSERT(length == length2);
  }
  if (!ICUSuccessForStringSpan(status)) {
    return Err(ToICUError(status));
  }

  buffer.written(length);

  return Ok{};
}

/**
 * Adaptor for mozilla::Vector to implement the Buffer interface.
 */
template <typename T, size_t N>
class VectorToBufferAdaptor {
  mozilla::Vector<T, N>& vector;

 public:
  using CharType = T;

  explicit VectorToBufferAdaptor(mozilla::Vector<T, N>& vector)
      : vector(vector) {}

  T* data() { return vector.begin(); }

  size_t capacity() const { return vector.capacity(); }

  bool reserve(size_t length) { return vector.reserve(length); }

  void written(size_t length) {
    mozilla::DebugOnly<bool> result = vector.resizeUninitialized(length);
    MOZ_ASSERT(result);
  }
};

/**
 * An overload of FillBufferWithICUCall that accepts a mozilla::Vector rather
 * than a Buffer.
 */
template <typename ICUStringFunction, size_t InlineSize, typename CharType>
static ICUResult FillBufferWithICUCall(Vector<CharType, InlineSize>& vector,
                                       const ICUStringFunction& strFn) {
  VectorToBufferAdaptor buffer(vector);
  return FillBufferWithICUCall(buffer, strFn);
}

#ifndef JS_STANDALONE
/**
 * mozilla::intl APIs require sizeable buffers. This class abstracts over
 * the nsTArray.
 */
template <typename T>
class nsTArrayToBufferAdapter {
 public:
  using CharType = T;

  // Do not allow copy or move. Move could be added in the future if needed.
  nsTArrayToBufferAdapter(const nsTArrayToBufferAdapter&) = delete;
  nsTArrayToBufferAdapter& operator=(const nsTArrayToBufferAdapter&) = delete;

  explicit nsTArrayToBufferAdapter(nsTArray<CharType>& aArray)
      : mArray(aArray) {}

  /**
   * Ensures the buffer has enough space to accommodate |size| elements.
   */
  [[nodiscard]] bool reserve(size_t size) {
    // Use fallible behavior here.
    return mArray.SetCapacity(size, fallible);
  }

  /**
   * Returns the raw data inside the buffer.
   */
  CharType* data() { return mArray.Elements(); }

  /**
   * Returns the count of elements written into the buffer.
   */
  size_t length() const { return mArray.Length(); }

  /**
   * Returns the buffer's overall capacity.
   */
  size_t capacity() const { return mArray.Capacity(); }

  /**
   * Resizes the buffer to the given amount of written elements.
   */
  void written(size_t amount) {
    MOZ_ASSERT(amount <= mArray.Capacity());
    // This sets |mArray|'s internal size so that it matches how much was
    // written. This is necessary because the write happens across FFI
    // boundaries.
    mArray.SetLengthAndRetainStorage(amount);
  }

 private:
  nsTArray<CharType>& mArray;
};

template <typename T, size_t N>
class AutoTArrayToBufferAdapter : public nsTArrayToBufferAdapter<T> {
  using nsTArrayToBufferAdapter<T>::nsTArrayToBufferAdapter;
};

/**
 * An overload of FillBufferWithICUCall that accepts a nsTArray.
 */
template <typename ICUStringFunction, typename CharType>
static ICUResult FillBufferWithICUCall(nsTArray<CharType>& array,
                                       const ICUStringFunction& strFn) {
  nsTArrayToBufferAdapter<CharType> buffer(array);
  return FillBufferWithICUCall(buffer, strFn);
}

template <typename ICUStringFunction, typename CharType, size_t N>
static ICUResult FillBufferWithICUCall(AutoTArray<CharType, N>& array,
                                       const ICUStringFunction& strFn) {
  AutoTArrayToBufferAdapter<CharType, N> buffer(array);
  return FillBufferWithICUCall(buffer, strFn);
}
#endif

/**
 * Fill a UTF-8 or a UTF-16 buffer with a UTF-16 span. ICU4C mostly uses UTF-16
 * internally, but different consumers may have different situations with their
 * buffers.
 */
template <typename Buffer>
[[nodiscard]] bool FillBuffer(Span<const char16_t> utf16Span,
                              Buffer& targetBuffer) {
  static_assert(std::is_same_v<typename Buffer::CharType, char> ||
                std::is_same_v<typename Buffer::CharType, unsigned char> ||
                std::is_same_v<typename Buffer::CharType, char16_t>);

  if constexpr (std::is_same_v<typename Buffer::CharType, char> ||
                std::is_same_v<typename Buffer::CharType, unsigned char>) {
    if (utf16Span.Length() & mozilla::tl::MulOverflowMask<3>::value) {
      // Tripling the size of the buffer overflows the size_t.
      return false;
    }

    if (!targetBuffer.reserve(3 * utf16Span.Length())) {
      return false;
    }

    size_t amount = ConvertUtf16toUtf8(
        utf16Span, Span(reinterpret_cast<char*>(targetBuffer.data()),
                        targetBuffer.capacity()));

    targetBuffer.written(amount);
  }
  if constexpr (std::is_same_v<typename Buffer::CharType, char16_t>) {
    size_t amount = utf16Span.Length();
    if (!targetBuffer.reserve(amount)) {
      return false;
    }
    for (size_t i = 0; i < amount; i++) {
      targetBuffer.data()[i] = utf16Span[i];
    }
    targetBuffer.written(amount);
  }

  return true;
}

/**
 * Fill a UTF-8 or a UTF-16 buffer with a UTF-8 span. ICU4C mostly uses UTF-16
 * internally, but different consumers may have different situations with their
 * buffers.
 */
template <typename Buffer>
[[nodiscard]] bool FillBuffer(Span<const char> utf8Span, Buffer& targetBuffer) {
  static_assert(std::is_same_v<typename Buffer::CharType, char> ||
                std::is_same_v<typename Buffer::CharType, unsigned char> ||
                std::is_same_v<typename Buffer::CharType, char16_t>);

  if constexpr (std::is_same_v<typename Buffer::CharType, char> ||
                std::is_same_v<typename Buffer::CharType, unsigned char>) {
    size_t amount = utf8Span.Length();
    if (!targetBuffer.reserve(amount)) {
      return false;
    }
    for (size_t i = 0; i < amount; i++) {
      targetBuffer.data()[i] =
          // Static cast in case of a mismatch between `unsigned char` and
          // `char`
          static_cast<typename Buffer::CharType>(utf8Span[i]);
    }
    targetBuffer.written(amount);
  }
  if constexpr (std::is_same_v<typename Buffer::CharType, char16_t>) {
    if (!targetBuffer.reserve(utf8Span.Length() + 1)) {
      return false;
    }

    size_t amount = ConvertUtf8toUtf16(
        utf8Span, Span(targetBuffer.data(), targetBuffer.capacity()));

    targetBuffer.written(amount);
  }

  return true;
}

/**
 * It is convenient for callers to be able to pass in UTF-8 strings to the API.
 * This function can be used to convert that to a stack-allocated UTF-16
 * mozilla::Vector that can then be passed into ICU calls. The string will be
 * null terminated.
 */
template <size_t StackSize>
[[nodiscard]] static bool FillUTF16Vector(
    Span<const char> utf8Span,
    mozilla::Vector<char16_t, StackSize>& utf16TargetVec) {
  // Per ConvertUtf8toUtf16: The length of aDest must be at least one greater
  // than the length of aSource. This additional length will be used for null
  // termination.
  if (!utf16TargetVec.reserve(utf8Span.Length() + 1)) {
    return false;
  }

  // ConvertUtf8toUtf16 fills the buffer with the data, but the length of the
  // vector is unchanged.
  size_t length = ConvertUtf8toUtf16(
      utf8Span, Span(utf16TargetVec.begin(), utf16TargetVec.capacity()));

  // Assert that the last element is free for writing a null terminator.
  MOZ_ASSERT(length < utf16TargetVec.capacity());
  utf16TargetVec.begin()[length] = '\0';

  // The call to resizeUninitialized notifies the vector of how much was written
  // exclusive of the null terminated character.
  return utf16TargetVec.resizeUninitialized(length);
}

/**
 * An iterable class that wraps calls to the ICU UEnumeration C API.
 *
 * Usage:
 *
 *  // Make sure the range expression is non-temporary, otherwise there is a
 *  // risk of undefined behavior:
 *  auto result = Calendar::GetBcp47KeywordValuesForLocale("en-US");
 *
 *  for (auto name : result.unwrap()) {
 *    MOZ_ASSERT(name.unwrap(), "An iterable value exists".);
 *  }
 */
template <typename CharType, typename T, T(Mapper)(const CharType*, int32_t)>
class Enumeration {
 public:
  class Iterator;
  friend class Iterator;

  // Transfer ownership of the UEnumeration in the move constructor.
  Enumeration(Enumeration&& other) noexcept
      : mUEnumeration(other.mUEnumeration) {
    other.mUEnumeration = nullptr;
  }

  // Transfer ownership of the UEnumeration in the move assignment operator.
  Enumeration& operator=(Enumeration&& other) noexcept {
    if (this == &other) {
      return *this;
    }
    if (mUEnumeration) {
      uenum_close(mUEnumeration);
    }
    mUEnumeration = other.mUEnumeration;
    other.mUEnumeration = nullptr;
    return *this;
  }

  class Iterator {
    Enumeration& mEnumeration;
    // `Nothing` signifies that no enumeration has been loaded through ICU yet.
    Maybe<int32_t> mIteration = Nothing{};
    const CharType* mNext = nullptr;
    int32_t mNextLength = 0;

   public:
    using value_type = const CharType*;
    using reference = T;
    using iterator_category = std::input_iterator_tag;

    explicit Iterator(Enumeration& aEnumeration, bool aIsBegin)
        : mEnumeration(aEnumeration) {
      if (aIsBegin) {
        AdvanceUEnum();
      }
    }

    Iterator& operator++() {
      AdvanceUEnum();
      return *this;
    }

    Iterator operator++(int) {
      Iterator retval = *this;
      ++(*this);
      return retval;
    }

    bool operator==(Iterator other) const {
      return mIteration == other.mIteration;
    }

    bool operator!=(Iterator other) const { return !(*this == other); }

    T operator*() const {
      // Map the iterated value to something new.
      return Mapper(mNext, mNextLength);
    }

   private:
    void AdvanceUEnum() {
      if (mIteration.isNothing()) {
        mIteration = Some(-1);
      }
      UErrorCode status = U_ZERO_ERROR;
      if constexpr (std::is_same_v<CharType, char16_t>) {
        mNext = uenum_unext(mEnumeration.mUEnumeration, &mNextLength, &status);
      } else {
        static_assert(std::is_same_v<CharType, char>,
                      "Only char16_t and char are supported by "
                      "mozilla::intl::Enumeration.");
        mNext = uenum_next(mEnumeration.mUEnumeration, &mNextLength, &status);
      }
      if (U_FAILURE(status)) {
        mNext = nullptr;
      }

      if (mNext) {
        (*mIteration)++;
      } else {
        // The iterator is complete.
        mIteration = Nothing{};
      }
    }
  };

  Iterator begin() { return Iterator(*this, true); }
  Iterator end() { return Iterator(*this, false); }

  explicit Enumeration(UEnumeration* aUEnumeration)
      : mUEnumeration(aUEnumeration) {}

  ~Enumeration() {
    if (mUEnumeration) {
      // Only close when the object is being destructed, not moved.
      uenum_close(mUEnumeration);
    }
  }

 private:
  UEnumeration* mUEnumeration = nullptr;
};

template <typename CharType>
Result<Span<const CharType>, InternalError> SpanMapper(const CharType* string,
                                                       int32_t length) {
  // Return the raw value from this Iterator.
  if (string == nullptr) {
    return Err(InternalError{});
  }
  MOZ_ASSERT(length >= 0);
  return Span<const CharType>(string, static_cast<size_t>(length));
}

template <typename CharType>
using SpanResult = Result<Span<const CharType>, InternalError>;

template <typename CharType>
using SpanEnumeration = Enumeration<CharType, SpanResult<CharType>, SpanMapper>;

/**
 * An iterable class that wraps calls to ICU's available locales API.
 */
template <int32_t(CountAvailable)(), const char*(GetAvailable)(int32_t)>
class AvailableLocalesEnumeration final {
  // The overall count of available locales.
  int32_t mLocalesCount = 0;

 public:
  AvailableLocalesEnumeration() { mLocalesCount = CountAvailable(); }

  class Iterator {
   public:
    // std::iterator traits.
    using iterator_category = std::input_iterator_tag;
    using value_type = const char*;
    using difference_type = ptrdiff_t;
    using pointer = value_type*;
    using reference = value_type&;

   private:
    // The current position in the list of available locales.
    int32_t mLocalesPos = 0;

   public:
    explicit Iterator(int32_t aLocalesPos) : mLocalesPos(aLocalesPos) {}

    Iterator& operator++() {
      mLocalesPos++;
      return *this;
    }

    Iterator operator++(int) {
      Iterator result = *this;
      ++(*this);
      return result;
    }

    bool operator==(const Iterator& aOther) const {
      return mLocalesPos == aOther.mLocalesPos;
    }

    bool operator!=(const Iterator& aOther) const { return !(*this == aOther); }

    value_type operator*() const { return GetAvailable(mLocalesPos); }
  };

  // std::iterator begin() and end() methods.

  /**
   * Return an iterator pointing to the first available locale.
   */
  Iterator begin() const { return Iterator(0); }

  /**
   * Return an iterator pointing to one past the last available locale.
   */
  Iterator end() const { return Iterator(mLocalesCount); }
};

/**
 * A helper class to wrap calling ICU function in cpp file so we don't have to
 * include the ICU header here.
 */
class FormattedResult {
 protected:
  static Result<Span<const char16_t>, ICUError> ToSpanImpl(
      const UFormattedValue* value);
};

/**
 * A RAII class to hold the formatted value of format result.
 *
 * The caller will need to create this AutoFormattedResult on the stack, with
 * the following parameters:
 * 1. Native ICU type.
 * 2. An ICU function which opens the result.
 * 3. An ICU function which can get the result as UFormattedValue.
 * 4. An ICU function which closes the result.
 *
 * After the object is created, caller needs to call IsValid() method to check
 * if the native object has been created properly, and then passes this
 * object to other format interfaces.
 * The format result will be stored in this object, the caller can use ToSpan()
 * method to get the formatted string.
 *
 * The methods GetFormatted() and Value() are private methods since they expose
 * native ICU types. If the caller wants to call these methods, the caller needs
 * to register itself as a friend class in AutoFormattedResult.
 *
 * The formatted value and the native ICU object will be released once this
 * class is destructed.
 */
template <typename T, T*(Open)(UErrorCode*),
          const UFormattedValue*(GetValue)(const T*, UErrorCode*),
          void(Close)(T*)>
class MOZ_RAII AutoFormattedResult : FormattedResult {
 public:
  AutoFormattedResult() {
    mFormatted = Open(&mError);
    if (U_FAILURE(mError)) {
      mFormatted = nullptr;
    }
  }
  ~AutoFormattedResult() {
    if (mFormatted) {
      Close(mFormatted);
    }
  }

  AutoFormattedResult(const AutoFormattedResult& other) = delete;
  AutoFormattedResult& operator=(const AutoFormattedResult& other) = delete;

  AutoFormattedResult(AutoFormattedResult&& other) = delete;
  AutoFormattedResult& operator=(AutoFormattedResult&& other) = delete;

  /**
   * Check if the native UFormattedDateInterval was created successfully.
   */
  bool IsValid() const { return !!mFormatted; }

  /**
   *  Get error code if IsValid() returns false.
   */
  ICUError GetError() const { return ToICUError(mError); }

  /**
   * Get the formatted result.
   */
  Result<Span<const char16_t>, ICUError> ToSpan() const {
    if (!IsValid()) {
      return Err(GetError());
    }

    const UFormattedValue* value = Value();
    if (!value) {
      return Err(ICUError::InternalError);
    }

    return ToSpanImpl(value);
  }

 private:
  friend class DateIntervalFormat;
  friend class ListFormat;
  T* GetFormatted() const { return mFormatted; }

  const UFormattedValue* Value() const {
    if (!IsValid()) {
      return nullptr;
    }

    UErrorCode status = U_ZERO_ERROR;
    const UFormattedValue* value = GetValue(mFormatted, &status);
    if (U_FAILURE(status)) {
      return nullptr;
    }

    return value;
  };

  T* mFormatted = nullptr;
  UErrorCode mError = U_ZERO_ERROR;
};
}  // namespace mozilla::intl

#endif /* intl_components_ICUUtils_h */