1 files changed, 165 insertions, 0 deletions
diff --git a/xbmc/utils/RegExp.h b/xbmc/utils/RegExp.h
new file mode 100644
index 0000000..53f6019
--- /dev/null
+++ b/xbmc/utils/RegExp.h
@@ -0,0 +1,165 @@
+/*
+ *  Copyright (C) 2005-2018 Team Kodi
+ *  This file is part of Kodi - https://kodi.tv
+ *
+ *  SPDX-License-Identifier: GPL-2.0-or-later
+ *  See LICENSES/README.md for more information.
+ */
+
+#pragma once
+
+//! @todo - move to std::regex (after switching to gcc 4.9 or higher) and get rid of CRegExp
+
+#include <string>
+#include <vector>
+
+/* make sure stdlib.h is included before including pcre.h inside the
+   namespace; this works around stdlib.h definitions also living in
+   the PCRE namespace */
+#include <stdlib.h>
+
+namespace PCRE {
+struct real_pcre_jit_stack; // forward declaration for PCRE without JIT
+typedef struct real_pcre_jit_stack pcre_jit_stack;
+#include <pcre.h>
+}
+
+class CRegExp
+{
+public:
+  enum studyMode
+  {
+    NoStudy          = 0, // do not study expression
+    StudyRegExp      = 1, // study expression (slower compilation, faster find)
+    StudyWithJitComp      // study expression and JIT-compile it, if possible (heavyweight optimization)
+  };
+  enum utf8Mode
+  {
+    autoUtf8  = -1, // analyze regexp for UTF-8 multi-byte chars, for Unicode codes > 0xFF
+                    // or explicit Unicode properties (\p, \P and \X), enable UTF-8 mode if any of them are found
+    asciiOnly =  0, // process regexp and strings as single-byte encoded strings
+    forceUtf8 =  1  // enable UTF-8 mode (with Unicode properties)
+  };
+
+  static const int m_MaxNumOfBackrefrences = 20;
+  /**
+   * @param caseless (optional) Matching will be case insensitive if set to true
+   *                            or case sensitive if set to false
+   * @param utf8 (optional) Control UTF-8 processing
+   */
+  CRegExp(bool caseless = false, utf8Mode utf8 = asciiOnly);
+  /**
+   * Create new CRegExp object and compile regexp expression in one step
+   * @warning Use only with hardcoded regexp when you're sure that regexp is compiled without errors
+   * @param caseless    Matching will be case insensitive if set to true
+   *                    or case sensitive if set to false
+   * @param utf8        Control UTF-8 processing
+   * @param re          The regular expression
+   * @param study (optional) Controls study of expression, useful if expression will be used
+   *                         several times
+   */
+  CRegExp(bool caseless, utf8Mode utf8, const char *re, studyMode study = NoStudy);
+
+  CRegExp(const CRegExp& re);
+  ~CRegExp();
+
+  /**
+   * Compile (prepare) regular expression
+   * @param re          The regular expression
+   * @param study (optional) Controls study of expression, useful if expression will be used
+   *                         several times
+   * @return true on success, false on any error
+   */
+  bool RegComp(const char *re, studyMode study = NoStudy);
+
+  /**
+   * Compile (prepare) regular expression
+   * @param re          The regular expression
+   * @param study (optional) Controls study of expression, useful if expression will be used
+   *                         several times
+   * @return true on success, false on any error
+   */
+  bool RegComp(const std::string& re, studyMode study = NoStudy)
+  { return RegComp(re.c_str(), study); }
+
+  /**
+   * Find first match of regular expression in given string
+   * @param str         The string to match against regular expression
+   * @param startoffset (optional) The string offset to start matching
+   * @param maxNumberOfCharsToTest (optional) The maximum number of characters to test (match) in
+   *                                          string. If set to -1 string checked up to the end.
+   * @return staring position of match in string, negative value in case of error or no match
+   */
+  int RegFind(const char* str, unsigned int startoffset = 0, int maxNumberOfCharsToTest = -1);
+  /**
+   * Find first match of regular expression in given string
+   * @param str         The string to match against regular expression
+   * @param startoffset (optional) The string offset to start matching
+   * @param maxNumberOfCharsToTest (optional) The maximum number of characters to test (match) in
+   *                                          string. If set to -1 string checked up to the end.
+   * @return staring position of match in string, negative value in case of error or no match
+   */
+  int RegFind(const std::string& str, unsigned int startoffset = 0, int maxNumberOfCharsToTest = -1)
+  { return PrivateRegFind(str.length(), str.c_str(), startoffset, maxNumberOfCharsToTest); }
+  std::string GetReplaceString(const std::string& sReplaceExp) const;
+  int GetFindLen() const
+  {
+    if (!m_re || !m_bMatched)
+      return 0;
+
+    return (m_iOvector[1] - m_iOvector[0]);
+  };
+  int GetSubCount() const { return m_iMatchCount - 1; } // PCRE returns the number of sub-patterns + 1
+  int GetSubStart(int iSub) const;
+  int GetSubStart(const std::string& subName) const;
+  int GetSubLength(int iSub) const;
+  int GetSubLength(const std::string& subName) const;
+  int GetCaptureTotal() const;
+  std::string GetMatch(int iSub = 0) const;
+  std::string GetMatch(const std::string& subName) const;
+  const std::string& GetPattern() const { return m_pattern; }
+  bool GetNamedSubPattern(const char* strName, std::string& strMatch) const;
+  int GetNamedSubPatternNumber(const char* strName) const;
+  void DumpOvector(int iLog);
+  /**
+   * Check is RegExp object is ready for matching
+   * @return true if RegExp object is ready for matching, false otherwise
+   */
+  inline bool IsCompiled(void) const
+  { return !m_pattern.empty(); }
+  CRegExp& operator= (const CRegExp& re);
+  static bool IsUtf8Supported(void);
+  static bool AreUnicodePropertiesSupported(void);
+  static bool LogCheckUtf8Support(void);
+  static bool IsJitSupported(void);
+
+private:
+  int PrivateRegFind(size_t bufferLen, const char *str, unsigned int startoffset = 0, int maxNumberOfCharsToTest = -1);
+  void InitValues(bool caseless = false, CRegExp::utf8Mode utf8 = asciiOnly);
+  static bool requireUtf8(const std::string& regexp);
+  static int readCharXCode(const std::string& regexp, size_t& pos);
+  static bool isCharClassWithUnicode(const std::string& regexp, size_t& pos);
+
+  void Cleanup();
+  inline bool IsValidSubNumber(int iSub) const;
+
+  PCRE::pcre* m_re;
+  PCRE::pcre_extra* m_sd;
+  static const int OVECCOUNT=(m_MaxNumOfBackrefrences + 1) * 3;
+  unsigned int m_offset;
+  int         m_iOvector[OVECCOUNT];
+  utf8Mode    m_utf8Mode;
+  int         m_iMatchCount;
+  int         m_iOptions;
+  bool        m_jitCompiled;
+  bool        m_bMatched;
+  PCRE::pcre_jit_stack* m_jitStack;
+  std::string m_subject;
+  std::string m_pattern;
+  static int  m_Utf8Supported;
+  static int  m_UcpSupported;
+  static int  m_JitSupported;
+};
+
+typedef std::vector<CRegExp> VECCREGEXP;
+