Adding upstream version 86.0.1.upstream/86.0.1 upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-28 14:29:10 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-28 14:29:10 +0000
commit: 2aa4a82499d4becd2284cdb482213d541b8804dd (patch)
tree: b80bf8bf13c3766139fbacc530efd0dd9d54394c /netwerk/streamconv/converters/mozTXTToHTMLConv.h
parent: Initial commit. (diff)
download: firefox-2aa4a82499d4becd2284cdb482213d541b8804dd.tar.xz
firefox-2aa4a82499d4becd2284cdb482213d541b8804dd.zip
1 files changed, 284 insertions, 0 deletions
diff --git a/netwerk/streamconv/converters/mozTXTToHTMLConv.h b/netwerk/streamconv/converters/mozTXTToHTMLConv.h
new file mode 100644
index 0000000000..23c3dac30f
--- /dev/null
+++ b/netwerk/streamconv/converters/mozTXTToHTMLConv.h
@@ -0,0 +1,284 @@
+/* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/**
+  Description: Currently only functions to enhance plain text with HTML tags.
+  See mozITXTToHTMLConv. Stream conversion is defunct.
+*/
+
+#ifndef _mozTXTToHTMLConv_h__
+#define _mozTXTToHTMLConv_h__
+
+#include "mozITXTToHTMLConv.h"
+#include "nsString.h"
+#include "nsCOMPtr.h"
+
+class nsIIOService;
+
+class mozTXTToHTMLConv : public mozITXTToHTMLConv {
+  virtual ~mozTXTToHTMLConv() = default;
+
+  //////////////////////////////////////////////////////////
+ public:
+  //////////////////////////////////////////////////////////
+
+  mozTXTToHTMLConv() = default;
+  NS_DECL_ISUPPORTS
+
+  NS_DECL_MOZITXTTOHTMLCONV
+  NS_DECL_NSIREQUESTOBSERVER
+  NS_DECL_NSISTREAMLISTENER
+  NS_DECL_NSISTREAMCONVERTER
+
+  /**
+    see mozITXTToHTMLConv::CiteLevelTXT
+   */
+  int32_t CiteLevelTXT(const char16_t* line, uint32_t& logLineStart);
+
+  //////////////////////////////////////////////////////////
+ protected:
+  //////////////////////////////////////////////////////////
+  nsCOMPtr<nsIIOService>
+      mIOService;  // for performance reasons, cache the netwerk service...
+                   /**
+                     Completes<ul>
+                     <li>Case 1: mailto: "mozilla@bucksch.org" -> "mailto:mozilla@bucksch.org"
+                     <li>Case 2: http:   "www.mozilla.org"     -> "http://www.mozilla.org"
+                     <li>Case 3: ftp:    "ftp.mozilla.org"     -> "ftp://www.mozilla.org"
+                     </ul>
+                     It does no check, if the resulting URL is valid.
+                     @param text (in): abbreviated URL
+                     @param pos (in): position of "@" (case 1) or first "." (case 2 and 3)
+                     @return Completed URL at success and empty string at failure
+                    */
+  void CompleteAbbreviatedURL(const char16_t* aInString, int32_t aInLength,
+                              const uint32_t pos, nsString& aOutString);
+
+  //////////////////////////////////////////////////////////
+ private:
+  //////////////////////////////////////////////////////////
+
+  enum LIMTYPE {
+    LT_IGNORE,     // limitation not checked
+    LT_DELIMITER,  // not alphanumeric and not rep[0]. End of text is also ok.
+    LT_ALPHA,      // alpha char
+    LT_DIGIT
+  };
+
+  /**
+    @param text (in): the string to search through.<p>
+           If before = IGNORE,<br>
+             rep is compared starting at 1. char of text (text[0]),<br>
+             else starting at 2. char of text (text[1]).
+           Chars after "after"-delimiter are ignored.
+    @param rep (in): the string to look for
+    @param aRepLen (in): the number of bytes in the string to look for
+    @param before (in): limitation before rep
+    @param after (in): limitation after rep
+    @return true, if rep is found and limitation spec is met or rep is empty
+  */
+  bool ItMatchesDelimited(const char16_t* aInString, int32_t aInLength,
+                          const char16_t* rep, int32_t aRepLen, LIMTYPE before,
+                          LIMTYPE after);
+
+  /**
+    @param see ItMatchesDelimited
+    @return Number of ItMatchesDelimited in text
+  */
+  uint32_t NumberOfMatches(const char16_t* aInString, int32_t aInStringLength,
+                           const char16_t* rep, int32_t aRepLen, LIMTYPE before,
+                           LIMTYPE after);
+
+  /**
+    Currently only changes "<", ">" and "&". All others stay as they are.<p>
+    "Char" in function name to avoid side effects with nsString(ch)
+    constructors.
+    @param ch (in)
+    @param aStringToAppendto (out) - the string to append the escaped
+                                     string to.
+    @param inAttribute (in) - will escape quotes, too (which is
+                              only needed for attribute values)
+  */
+  void EscapeChar(const char16_t ch, nsAString& aStringToAppendto,
+                  bool inAttribute);
+
+  /**
+    See EscapeChar. Escapes the string in place.
+  */
+  void EscapeStr(nsString& aInString, bool inAttribute);
+
+  /**
+    Currently only reverts "<", ">" and "&". All others stay as they are.<p>
+    @param aInString (in) HTML string
+    @param aStartPos (in) start index into the buffer
+    @param aLength (in) length of the buffer
+    @param aOutString (out) unescaped buffer
+  */
+  void UnescapeStr(const char16_t* aInString, int32_t aStartPos,
+                   int32_t aLength, nsString& aOutString);
+
+  /**
+    <em>Note</em>: I use different strategies to pass context between the
+    functions (full text and pos vs. cutted text and col0, glphyTextLen vs.
+    replaceBefore/-After). It makes some sense, but is hard to understand
+    (maintain) :-(.
+  */
+
+  /**
+    <p><em>Note:</em> replaceBefore + replaceAfter + 1 (for char at pos) chars
+    in text should be replaced by outputHTML.</p>
+    <p><em>Note:</em> This function should be able to process a URL on multiple
+    lines, but currently, ScanForURLs is called for every line, so it can't.</p>
+    @param text (in): includes possibly a URL
+    @param pos (in): position in text, where either ":", "." or "@" are found
+    @param whathasbeendone (in): What the calling ScanTXT did/has to do with the
+                (not-linkified) text, i.e. usually the "whattodo" parameter.
+                (Needed to calculate replaceBefore.) NOT what will be done with
+                the content of the link.
+    @param outputHTML (out): URL with HTML-a tag
+    @param replaceBefore (out): Number of chars of URL before pos
+    @param replaceAfter (out): Number of chars of URL after pos
+    @return URL found
+  */
+  bool FindURL(const char16_t* aInString, int32_t aInLength, const uint32_t pos,
+               const uint32_t whathasbeendone, nsString& outputHTML,
+               int32_t& replaceBefore, int32_t& replaceAfter);
+
+  enum modetype {
+    unknown,
+    RFC1738,    /* Check, if RFC1738, APPENDIX compliant,
+                   like "<URL:http://www.mozilla.org>". */
+    RFC2396E,   /* RFC2396, APPENDIX E allows anglebrackets (like
+                   "<http://www.mozilla.org>") (without "URL:") or
+                   quotation marks(like ""http://www.mozilla.org"").
+                   Also allow email addresses without scheme,
+                   e.g. "<mozilla@bucksch.org>" */
+    freetext,   /* assume heading scheme
+                   with "[a-zA-Z][a-zA-Z0-9+\-\.]*:" like "news:"
+                   (see RFC2396, Section 3.1).
+                   Certain characters (see code) or any whitespace
+                   (including linebreaks) end the URL.
+                   Other certain (punctation) characters (see code)
+                   at the end are stripped off. */
+    abbreviated /* Similar to freetext, but without scheme, e.g.
+                   "www.mozilla.org", "ftp.mozilla.org" and
+                   "mozilla@bucksch.org". */
+                /* RFC1738 and RFC2396E type URLs may use multiple lines,
+                   whitespace is stripped. Special characters like ")" stay intact.*/
+  };
+
+  /**
+   * @param text (in), pos (in): see FindURL
+   * @param check (in): Start must be conform with this mode
+   * @param start (out): Position in text, where URL (including brackets or
+   *             similar) starts
+   * @return |check|-conform start has been found
+   */
+  bool FindURLStart(const char16_t* aInString, int32_t aInLength,
+                    const uint32_t pos, const modetype check, uint32_t& start);
+
+  /**
+   * @param text (in), pos (in): see FindURL
+   * @param check (in): End must be conform with this mode
+   * @param start (in): see FindURLStart
+   * @param end (out): Similar to |start| param of FindURLStart
+   * @return |check|-conform end has been found
+   */
+  bool FindURLEnd(const char16_t* aInString, int32_t aInStringLength,
+                  const uint32_t pos, const modetype check,
+                  const uint32_t start, uint32_t& end);
+
+  /**
+   * @param text (in), pos (in), whathasbeendone (in): see FindURL
+   * @param check (in): Current mode
+   * @param start (in), end (in): see FindURLEnd
+   * @param txtURL (out): Guessed (raw) URL.
+   *             Without whitespace, but not completed.
+   * @param desc (out): Link as shown to the user, but already escaped.
+   *             Should be placed between the <a> and </a> tags.
+   * @param replaceBefore(out), replaceAfter (out): see FindURL
+   */
+  void CalculateURLBoundaries(const char16_t* aInString,
+                              int32_t aInStringLength, const uint32_t pos,
+                              const uint32_t whathasbeendone,
+                              const modetype check, const uint32_t start,
+                              const uint32_t end, nsString& txtURL,
+                              nsString& desc, int32_t& replaceBefore,
+                              int32_t& replaceAfter);
+
+  /**
+   * @param txtURL (in), desc (in): see CalculateURLBoundaries
+   * @param outputHTML (out): see FindURL
+   * @return A valid URL could be found (and creation of HTML successful)
+   */
+  bool CheckURLAndCreateHTML(const nsString& txtURL, const nsString& desc,
+                             const modetype mode, nsString& outputHTML);
+
+  /**
+    @param text (in): line of text possibly with tagTXT.<p>
+                if col0 is true,
+                  starting with tagTXT<br>
+                else
+                  starting one char before tagTXT
+    @param col0 (in): tagTXT is on the beginning of the line (or paragraph).
+                open must be 0 then.
+    @param tagTXT (in): Tag in plaintext to search for, e.g. "*"
+    @param aTagTxtLen (in): length of tagTXT.
+    @param tagHTML (in): HTML-Tag to replace tagTXT with,
+                without "<" and ">", e.g. "strong"
+    @param attributeHTML (in): HTML-attribute to add to opening tagHTML,
+                e.g. "class=txt_star"
+    @param aOutString: string to APPEND the converted html into
+    @param open (in/out): Number of currently open tags of type tagHTML
+    @return Conversion succeeded
+  */
+  bool StructPhraseHit(const char16_t* aInString, int32_t aInStringLength,
+                       bool col0, const char16_t* tagTXT, int32_t aTagTxtLen,
+                       const char* tagHTML, const char* attributeHTML,
+                       nsAString& aOutputString, uint32_t& openTags);
+
+  /**
+    @param text (in), col0 (in): see GlyphHit
+    @param tagTXT (in): Smily, see also StructPhraseHit
+    @param imageName (in): the basename of the file that contains the image for
+                           this smilie
+    @param outputHTML (out): new string containing the html for the smily
+    @param glyphTextLen (out): see GlyphHit
+  */
+  bool SmilyHit(const char16_t* aInString, int32_t aLength, bool col0,
+                const char* tagTXT, const char* imageName, nsString& outputHTML,
+                int32_t& glyphTextLen);
+
+  /**
+    Checks, if we can replace some chars at the start of line with prettier HTML
+    code.<p>
+    If success is reported, replace the first glyphTextLen chars with outputHTML
+
+    @param text (in): line of text possibly with Glyph.<p>
+                If col0 is true,
+                  starting with Glyph <br><!-- (br not part of text) -->
+                else
+                  starting one char before Glyph
+    @param col0 (in): text starts at the beginning of the line (or paragraph)
+    @param aOutString (out): APPENDS html for the glyph to this string
+    @param glyphTextLen (out): Length of original text to replace
+    @return see StructPhraseHit
+  */
+  bool GlyphHit(const char16_t* aInString, int32_t aInLength, bool col0,
+                nsAString& aOutString, int32_t& glyphTextLen);
+
+  /**
+    Check if a given url should be linkified.
+    @param aURL (in): url to be checked on.
+  */
+  bool ShouldLinkify(const nsCString& aURL);
+};
+
+// It's said, that Win32 and Mac don't like static const members
+const int32_t mozTXTToHTMLConv_lastMode = 4;
+// Needed (only) by mozTXTToHTMLConv::FindURL
+const int32_t mozTXTToHTMLConv_numberOfModes = 4;  // dito; unknown not counted
+
+#endif
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-28 14:29:10 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-28 14:29:10 +0000
commit	2aa4a82499d4becd2284cdb482213d541b8804dd (patch)
tree	b80bf8bf13c3766139fbacc530efd0dd9d54394c /netwerk/streamconv/converters/mozTXTToHTMLConv.h
parent	Initial commit. (diff)
download	firefox-2aa4a82499d4becd2284cdb482213d541b8804dd.tar.xz firefox-2aa4a82499d4becd2284cdb482213d541b8804dd.zip