/*
 *  Copyright (C) 2005-2018 Team Kodi
 *  This file is part of Kodi - https://kodi.tv
 *
 *  SPDX-License-Identifier: GPL-2.0-or-later
 *  See LICENSES/README.md for more information.
 */

#pragma once

//! @todo - move to std::regex (after switching to gcc 4.9 or higher) and get rid of CRegExp

#include <string>
#include <vector>

/* make sure stdlib.h is included before including pcre.h inside the
   namespace; this works around stdlib.h definitions also living in
   the PCRE namespace */
#include <stdlib.h>

namespace PCRE {
struct real_pcre_jit_stack; // forward declaration for PCRE without JIT
typedef struct real_pcre_jit_stack pcre_jit_stack;
#include <pcre.h>
}

class CRegExp
{
public:
  enum studyMode
  {
    NoStudy          = 0, // do not study expression
    StudyRegExp      = 1, // study expression (slower compilation, faster find)
    StudyWithJitComp      // study expression and JIT-compile it, if possible (heavyweight optimization)
  };
  enum utf8Mode
  {
    autoUtf8  = -1, // analyze regexp for UTF-8 multi-byte chars, for Unicode codes > 0xFF
                    // or explicit Unicode properties (\p, \P and \X), enable UTF-8 mode if any of them are found
    asciiOnly =  0, // process regexp and strings as single-byte encoded strings
    forceUtf8 =  1  // enable UTF-8 mode (with Unicode properties)
  };

  static const int m_MaxNumOfBackrefrences = 20;
  /**
   * @param caseless (optional) Matching will be case insensitive if set to true
   *                            or case sensitive if set to false
   * @param utf8 (optional) Control UTF-8 processing
   */
  CRegExp(bool caseless = false, utf8Mode utf8 = asciiOnly);
  /**
   * Create new CRegExp object and compile regexp expression in one step
   * @warning Use only with hardcoded regexp when you're sure that regexp is compiled without errors
   * @param caseless    Matching will be case insensitive if set to true
   *                    or case sensitive if set to false
   * @param utf8        Control UTF-8 processing
   * @param re          The regular expression
   * @param study (optional) Controls study of expression, useful if expression will be used
   *                         several times
   */
  CRegExp(bool caseless, utf8Mode utf8, const char *re, studyMode study = NoStudy);

  CRegExp(const CRegExp& re);
  ~CRegExp();

  /**
   * Compile (prepare) regular expression
   * @param re          The regular expression
   * @param study (optional) Controls study of expression, useful if expression will be used
   *                         several times
   * @return true on success, false on any error
   */
  bool RegComp(const char *re, studyMode study = NoStudy);

  /**
   * Compile (prepare) regular expression
   * @param re          The regular expression
   * @param study (optional) Controls study of expression, useful if expression will be used
   *                         several times
   * @return true on success, false on any error
   */
  bool RegComp(const std::string& re, studyMode study = NoStudy)
  { return RegComp(re.c_str(), study); }

  /**
   * Find first match of regular expression in given string
   * @param str         The string to match against regular expression
   * @param startoffset (optional) The string offset to start matching
   * @param maxNumberOfCharsToTest (optional) The maximum number of characters to test (match) in
   *                                          string. If set to -1 string checked up to the end.
   * @return staring position of match in string, negative value in case of error or no match
   */
  int RegFind(const char* str, unsigned int startoffset = 0, int maxNumberOfCharsToTest = -1);
  /**
   * Find first match of regular expression in given string
   * @param str         The string to match against regular expression
   * @param startoffset (optional) The string offset to start matching
   * @param maxNumberOfCharsToTest (optional) The maximum number of characters to test (match) in
   *                                          string. If set to -1 string checked up to the end.
   * @return staring position of match in string, negative value in case of error or no match
   */
  int RegFind(const std::string& str, unsigned int startoffset = 0, int maxNumberOfCharsToTest = -1)
  { return PrivateRegFind(str.length(), str.c_str(), startoffset, maxNumberOfCharsToTest); }
  std::string GetReplaceString(const std::string& sReplaceExp) const;
  int GetFindLen() const
  {
    if (!m_re || !m_bMatched)
      return 0;

    return (m_iOvector[1] - m_iOvector[0]);
  };
  int GetSubCount() const { return m_iMatchCount - 1; } // PCRE returns the number of sub-patterns + 1
  int GetSubStart(int iSub) const;
  int GetSubStart(const std::string& subName) const;
  int GetSubLength(int iSub) const;
  int GetSubLength(const std::string& subName) const;
  int GetCaptureTotal() const;
  std::string GetMatch(int iSub = 0) const;
  std::string GetMatch(const std::string& subName) const;
  const std::string& GetPattern() const { return m_pattern; }
  bool GetNamedSubPattern(const char* strName, std::string& strMatch) const;
  int GetNamedSubPatternNumber(const char* strName) const;
  void DumpOvector(int iLog);
  /**
   * Check is RegExp object is ready for matching
   * @return true if RegExp object is ready for matching, false otherwise
   */
  inline bool IsCompiled(void) const
  { return !m_pattern.empty(); }
  CRegExp& operator= (const CRegExp& re);
  static bool IsUtf8Supported(void);
  static bool AreUnicodePropertiesSupported(void);
  static bool LogCheckUtf8Support(void);
  static bool IsJitSupported(void);

private:
  int PrivateRegFind(size_t bufferLen, const char *str, unsigned int startoffset = 0, int maxNumberOfCharsToTest = -1);
  void InitValues(bool caseless = false, CRegExp::utf8Mode utf8 = asciiOnly);
  static bool requireUtf8(const std::string& regexp);
  static int readCharXCode(const std::string& regexp, size_t& pos);
  static bool isCharClassWithUnicode(const std::string& regexp, size_t& pos);

  void Cleanup();
  inline bool IsValidSubNumber(int iSub) const;

  PCRE::pcre* m_re;
  PCRE::pcre_extra* m_sd;
  static const int OVECCOUNT=(m_MaxNumOfBackrefrences + 1) * 3;
  unsigned int m_offset;
  int         m_iOvector[OVECCOUNT];
  utf8Mode    m_utf8Mode;
  int         m_iMatchCount;
  int         m_iOptions;
  bool        m_jitCompiled;
  bool        m_bMatched;
  PCRE::pcre_jit_stack* m_jitStack;
  std::string m_subject;
  std::string m_pattern;
  static int  m_Utf8Supported;
  static int  m_UcpSupported;
  static int  m_JitSupported;
};

typedef std::vector<CRegExp> VECCREGEXP;