summaryrefslogtreecommitdiffstats
path: root/third_party/python/python_slugify
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
commit26a029d407be480d791972afb5975cf62c9360a6 (patch)
treef435a8308119effd964b339f76abb83a57c29483 /third_party/python/python_slugify
parentInitial commit. (diff)
downloadfirefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz
firefox-26a029d407be480d791972afb5975cf62c9360a6.zip
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/python/python_slugify')
-rw-r--r--third_party/python/python_slugify/python_slugify-8.0.1.dist-info/LICENSE21
-rw-r--r--third_party/python/python_slugify/python_slugify-8.0.1.dist-info/METADATA247
-rw-r--r--third_party/python/python_slugify/python_slugify-8.0.1.dist-info/RECORD11
-rw-r--r--third_party/python/python_slugify/python_slugify-8.0.1.dist-info/WHEEL6
-rw-r--r--third_party/python/python_slugify/python_slugify-8.0.1.dist-info/entry_points.txt3
-rw-r--r--third_party/python/python_slugify/python_slugify-8.0.1.dist-info/top_level.txt1
-rw-r--r--third_party/python/python_slugify/slugify/__init__.py10
-rw-r--r--third_party/python/python_slugify/slugify/__main__.py96
-rw-r--r--third_party/python/python_slugify/slugify/__version__.py8
-rw-r--r--third_party/python/python_slugify/slugify/slugify.py177
-rw-r--r--third_party/python/python_slugify/slugify/special.py47
11 files changed, 627 insertions, 0 deletions
diff --git a/third_party/python/python_slugify/python_slugify-8.0.1.dist-info/LICENSE b/third_party/python/python_slugify/python_slugify-8.0.1.dist-info/LICENSE
new file mode 100644
index 0000000000..82af695f59
--- /dev/null
+++ b/third_party/python/python_slugify/python_slugify-8.0.1.dist-info/LICENSE
@@ -0,0 +1,21 @@
+The MIT License
+
+Copyright (c) Val Neekman @ Neekware Inc. http://neekware.com
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/third_party/python/python_slugify/python_slugify-8.0.1.dist-info/METADATA b/third_party/python/python_slugify/python_slugify-8.0.1.dist-info/METADATA
new file mode 100644
index 0000000000..e6948ae2f6
--- /dev/null
+++ b/third_party/python/python_slugify/python_slugify-8.0.1.dist-info/METADATA
@@ -0,0 +1,247 @@
+Metadata-Version: 2.1
+Name: python-slugify
+Version: 8.0.1
+Summary: A Python slugify application that also handles Unicode
+Home-page: https://github.com/un33k/python-slugify
+Author: Val Neekman
+Author-email: info@neekware.com
+License: MIT
+Platform: UNKNOWN
+Classifier: Development Status :: 5 - Production/Stable
+Classifier: Intended Audience :: Developers
+Classifier: Natural Language :: English
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.7
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Requires-Python: >=3.7
+Description-Content-Type: text/markdown
+Requires-Dist: text-unidecode (>=1.3)
+Provides-Extra: unidecode
+Requires-Dist: Unidecode (>=1.1.1) ; extra == 'unidecode'
+
+# Python Slugify
+
+**A Python slugify application that handles unicode**.
+
+[![status-image]][status-link]
+[![version-image]][version-link]
+[![coverage-image]][coverage-link]
+
+# Overview
+
+**Best attempt** to create slugs from unicode strings while keeping it **DRY**.
+
+# Notice
+
+This module, by default installs and uses [text-unidecode](https://github.com/kmike/text-unidecode) _(GPL & Perl Artistic)_ for its decoding needs.
+
+However, there is an alternative decoding package called [Unidecode](https://github.com/avian2/unidecode) _(GPL)_. It can be installed as `python-slugify[unidecode]` for those who prefer it. `Unidecode` is believed to be more advanced.
+
+### `Official` Support Matrix
+
+| Python | Slugify |
+| -------------- | ------------------ |
+| `>= 2.7 < 3.6` | `< 5.0.0` |
+| `>= 3.6 < 3.7` | `>= 5.0.0 < 7.0.0` |
+| `>= 3.7` | `>= 7.0.0` |
+
+# How to install
+
+ easy_install python-slugify |OR| easy_install python-slugify[unidecode]
+ -- OR --
+ pip install python-slugify |OR| pip install python-slugify[unidecode]
+
+# Options
+
+```python
+def slugify(
+ text,
+ entities=True,
+ decimal=True,
+ hexadecimal=True,
+ max_length=0,
+ word_boundary=False,
+ separator='-',
+ save_order=False,
+ stopwords=(),
+ regex_pattern=None,
+ lowercase=True,
+ replacements=(),
+ allow_unicode=False
+ ):
+ """
+ Make a slug from the given text.
+ :param text (str): initial text
+ :param entities (bool): converts html entities to unicode (foo &amp; bar -> foo-bar)
+ :param decimal (bool): converts html decimal to unicode (&#381; -> Ž -> z)
+ :param hexadecimal (bool): converts html hexadecimal to unicode (&#x17D; -> Ž -> z)
+ :param max_length (int): output string length
+ :param word_boundary (bool): truncates to end of full words (length may be shorter than max_length)
+ :param save_order (bool): if parameter is True and max_length > 0 return whole words in the initial order
+ :param separator (str): separator between words
+ :param stopwords (iterable): words to discount
+ :param regex_pattern (str): regex pattern for disallowed characters
+ :param lowercase (bool): activate case sensitivity by setting it to False
+ :param replacements (iterable): list of replacement rules e.g. [['|', 'or'], ['%', 'percent']]
+ :param allow_unicode (bool): allow unicode characters
+ :return (str): slugify text
+ """
+```
+
+# How to use
+
+```python
+from slugify import slugify
+
+txt = "This is a test ---"
+r = slugify(txt)
+self.assertEqual(r, "this-is-a-test")
+
+txt = '影師嗎'
+r = slugify(txt)
+self.assertEqual(r, "ying-shi-ma")
+
+txt = '影師嗎'
+r = slugify(txt, allow_unicode=True)
+self.assertEqual(r, "影師嗎")
+
+txt = 'C\'est déjà l\'été.'
+r = slugify(txt)
+self.assertEqual(r, "c-est-deja-l-ete")
+
+txt = 'Nín hǎo. Wǒ shì zhōng guó rén'
+r = slugify(txt)
+self.assertEqual(r, "nin-hao-wo-shi-zhong-guo-ren")
+
+txt = 'Компьютер'
+r = slugify(txt)
+self.assertEqual(r, "kompiuter")
+
+txt = 'jaja---lol-méméméoo--a'
+r = slugify(txt, max_length=9)
+self.assertEqual(r, "jaja-lol")
+
+txt = 'jaja---lol-méméméoo--a'
+r = slugify(txt, max_length=15, word_boundary=True)
+self.assertEqual(r, "jaja-lol-a")
+
+txt = 'jaja---lol-méméméoo--a'
+r = slugify(txt, max_length=20, word_boundary=True, separator=".")
+self.assertEqual(r, "jaja.lol.mememeoo.a")
+
+txt = 'one two three four five'
+r = slugify(txt, max_length=13, word_boundary=True, save_order=True)
+self.assertEqual(r, "one-two-three")
+
+txt = 'the quick brown fox jumps over the lazy dog'
+r = slugify(txt, stopwords=['the'])
+self.assertEqual(r, 'quick-brown-fox-jumps-over-lazy-dog')
+
+txt = 'the quick brown fox jumps over the lazy dog in a hurry'
+r = slugify(txt, stopwords=['the', 'in', 'a', 'hurry'])
+self.assertEqual(r, 'quick-brown-fox-jumps-over-lazy-dog')
+
+txt = 'thIs Has a stopword Stopword'
+r = slugify(txt, stopwords=['Stopword'], lowercase=False)
+self.assertEqual(r, 'thIs-Has-a-stopword')
+
+txt = "___This is a test___"
+regex_pattern = r'[^-a-z0-9_]+'
+r = slugify(txt, regex_pattern=regex_pattern)
+self.assertEqual(r, "___this-is-a-test___")
+
+txt = "___This is a test___"
+regex_pattern = r'[^-a-z0-9_]+'
+r = slugify(txt, separator='_', regex_pattern=regex_pattern)
+self.assertNotEqual(r, "_this_is_a_test_")
+
+txt = '10 | 20 %'
+r = slugify(txt, replacements=[['|', 'or'], ['%', 'percent']])
+self.assertEqual(r, "10-or-20-percent")
+
+txt = 'ÜBER Über German Umlaut'
+r = slugify(txt, replacements=[['Ü', 'UE'], ['ü', 'ue']])
+self.assertEqual(r, "ueber-ueber-german-umlaut")
+
+txt = 'i love 🦄'
+r = slugify(txt, allow_unicode=True)
+self.assertEqual(r, "i-love")
+
+txt = 'i love 🦄'
+r = slugify(txt, allow_unicode=True, regex_pattern=r'[^🦄]+')
+self.assertEqual(r, "🦄")
+
+```
+
+For more examples, have a look at the [test.py](test.py) file.
+
+# Command Line Options
+
+With the package, a command line tool called `slugify` is also installed.
+
+It allows convenient command line access to all the features the `slugify` function supports. Call it with `-h` for help.
+
+The command can take its input directly on the command line or from STDIN (when the `--stdin` flag is passed):
+
+```
+$ echo "Taking input from STDIN" | slugify --stdin
+taking-input-from-stdin
+```
+
+```
+$ slugify taking input from the command line
+taking-input-from-the-command-line
+```
+
+Please note that when a multi-valued option such as `--stopwords` or `--replacements` is passed, you need to use `--` as separator before you start with the input:
+
+```
+$ slugify --stopwords the in a hurry -- the quick brown fox jumps over the lazy dog in a hurry
+quick-brown-fox-jumps-over-lazy-dog
+```
+
+# Running the tests
+
+To run the tests against the current environment:
+
+ python test.py
+
+# Contribution
+
+Please read the ([wiki](https://github.com/un33k/python-slugify/wiki/Python-Slugify-Wiki)) page prior to raising any PRs.
+
+# License
+
+Released under a ([MIT](LICENSE)) license.
+
+### Notes on GPL dependencies
+Though the dependencies may be GPL licensed, `python-slugify` itself is not considered a derivative work and will remain under the MIT license.
+If you wish to avoid installation of any GPL licensed packages, please note that the default dependency `text-unidecode` explicitly lets you choose to use the [Artistic License](https://opensource.org/license/artistic-perl-1-0-2/) instead. Use without concern.
+
+# Version
+
+X.Y.Z Version
+
+ `MAJOR` version -- when you make incompatible API changes,
+ `MINOR` version -- when you add functionality in a backwards-compatible manner, and
+ `PATCH` version -- when you make backwards-compatible bug fixes.
+
+[status-image]: https://github.com/un33k/python-slugify/actions/workflows/ci.yml/badge.svg
+[status-link]: https://github.com/un33k/python-slugify/actions/workflows/ci.yml
+[version-image]: https://img.shields.io/pypi/v/python-slugify.svg
+[version-link]: https://pypi.python.org/pypi/python-slugify
+[coverage-image]: https://coveralls.io/repos/un33k/python-slugify/badge.svg
+[coverage-link]: https://coveralls.io/r/un33k/python-slugify
+[download-image]: https://img.shields.io/pypi/dm/python-slugify.svg
+[download-link]: https://pypi.python.org/pypi/python-slugify
+
+# Sponsors
+
+[Neekware Inc.](http://neekware.com)
+
+
diff --git a/third_party/python/python_slugify/python_slugify-8.0.1.dist-info/RECORD b/third_party/python/python_slugify/python_slugify-8.0.1.dist-info/RECORD
new file mode 100644
index 0000000000..6ec16d7e1c
--- /dev/null
+++ b/third_party/python/python_slugify/python_slugify-8.0.1.dist-info/RECORD
@@ -0,0 +1,11 @@
+slugify/__init__.py,sha256=Q-9bKCQv89uf3bJr_yHxMPhBWXN8YCzlxQwK_kdpefI,346
+slugify/__main__.py,sha256=3EVQris1UpnWMgvjeVLDvzRXGBqkNkdpzFPmez5syuU,3866
+slugify/__version__.py,sha256=EzSzGa2hG-1z11YrS38w8w2tmCoQqEHO46xcvQsiFgI,325
+slugify/slugify.py,sha256=v8rRfSR2I4QiRNoG0FpL0TabbKUelZmtYnQjHfwvp6I,5795
+slugify/special.py,sha256=uV3YMYay1HTaP3nvyzaiV4FqGazjj8HmDHM1fsPQ3oo,1167
+python_slugify-8.0.1.dist-info/LICENSE,sha256=MLpNxpqfTc4TLdcDk3x6k7Vz4lJGBNLV-SxQZlFMDU8,1103
+python_slugify-8.0.1.dist-info/METADATA,sha256=LVPaRoPcTNzPsamnpcpcxsOcyiCCoGApIPuT_memhFE,8176
+python_slugify-8.0.1.dist-info/WHEEL,sha256=Z-nyYpwrcSqxfdux5Mbn_DQ525iP7J2DG3JgGvOYyTQ,110
+python_slugify-8.0.1.dist-info/entry_points.txt,sha256=vd1gzjXoYZ16TfgZThH2nhVFwhAsWCecqUMGyHijAP8,51
+python_slugify-8.0.1.dist-info/top_level.txt,sha256=D7zuR7zxISqlCxArlOOOuLsWObz1_3jgosq5XhlSpew,8
+python_slugify-8.0.1.dist-info/RECORD,,
diff --git a/third_party/python/python_slugify/python_slugify-8.0.1.dist-info/WHEEL b/third_party/python/python_slugify/python_slugify-8.0.1.dist-info/WHEEL
new file mode 100644
index 0000000000..01b8fc7d4a
--- /dev/null
+++ b/third_party/python/python_slugify/python_slugify-8.0.1.dist-info/WHEEL
@@ -0,0 +1,6 @@
+Wheel-Version: 1.0
+Generator: bdist_wheel (0.36.2)
+Root-Is-Purelib: true
+Tag: py2-none-any
+Tag: py3-none-any
+
diff --git a/third_party/python/python_slugify/python_slugify-8.0.1.dist-info/entry_points.txt b/third_party/python/python_slugify/python_slugify-8.0.1.dist-info/entry_points.txt
new file mode 100644
index 0000000000..0ef496e6f9
--- /dev/null
+++ b/third_party/python/python_slugify/python_slugify-8.0.1.dist-info/entry_points.txt
@@ -0,0 +1,3 @@
+[console_scripts]
+slugify = slugify.__main__:main
+
diff --git a/third_party/python/python_slugify/python_slugify-8.0.1.dist-info/top_level.txt b/third_party/python/python_slugify/python_slugify-8.0.1.dist-info/top_level.txt
new file mode 100644
index 0000000000..f4843f722b
--- /dev/null
+++ b/third_party/python/python_slugify/python_slugify-8.0.1.dist-info/top_level.txt
@@ -0,0 +1 @@
+slugify
diff --git a/third_party/python/python_slugify/slugify/__init__.py b/third_party/python/python_slugify/slugify/__init__.py
new file mode 100644
index 0000000000..6d3279fb1a
--- /dev/null
+++ b/third_party/python/python_slugify/slugify/__init__.py
@@ -0,0 +1,10 @@
+from .special import *
+from .slugify import *
+from .__version__ import __title__
+from .__version__ import __author__
+from .__version__ import __author_email__
+from .__version__ import __description__
+from .__version__ import __url__
+from .__version__ import __license__
+from .__version__ import __copyright__
+from .__version__ import __version__
diff --git a/third_party/python/python_slugify/slugify/__main__.py b/third_party/python/python_slugify/slugify/__main__.py
new file mode 100644
index 0000000000..7dd6b01a5e
--- /dev/null
+++ b/third_party/python/python_slugify/slugify/__main__.py
@@ -0,0 +1,96 @@
+from __future__ import print_function, absolute_import
+import argparse
+import sys
+
+from .slugify import slugify, DEFAULT_SEPARATOR
+
+
+def parse_args(argv):
+ parser = argparse.ArgumentParser(description="Slug string")
+
+ input_group = parser.add_argument_group(description="Input")
+ input_group.add_argument("input_string", nargs='*',
+ help='Text to slugify')
+ input_group.add_argument("--stdin", action='store_true',
+ help="Take the text from STDIN")
+
+ parser.add_argument("--no-entities", action='store_false', dest='entities', default=True,
+ help="Do not convert HTML entities to unicode")
+ parser.add_argument("--no-decimal", action='store_false', dest='decimal', default=True,
+ help="Do not convert HTML decimal to unicode")
+ parser.add_argument("--no-hexadecimal", action='store_false', dest='hexadecimal', default=True,
+ help="Do not convert HTML hexadecimal to unicode")
+ parser.add_argument("--max-length", type=int, default=0,
+ help="Output string length, 0 for no limit")
+ parser.add_argument("--word-boundary", action='store_true', default=False,
+ help="Truncate to complete word even if length ends up shorter than --max_length")
+ parser.add_argument("--save-order", action='store_true', default=False,
+ help="When set and --max_length > 0 return whole words in the initial order")
+ parser.add_argument("--separator", type=str, default=DEFAULT_SEPARATOR,
+ help="Separator between words. By default " + DEFAULT_SEPARATOR)
+ parser.add_argument("--stopwords", nargs='+',
+ help="Words to discount")
+ parser.add_argument("--regex-pattern",
+ help="Python regex pattern for disallowed characters")
+ parser.add_argument("--no-lowercase", action='store_false', dest='lowercase', default=True,
+ help="Activate case sensitivity")
+ parser.add_argument("--replacements", nargs='+',
+ help="""Additional replacement rules e.g. "|->or", "%%->percent".""")
+ parser.add_argument("--allow-unicode", action='store_true', default=False,
+ help="Allow unicode characters")
+
+ args = parser.parse_args(argv[1:])
+
+ if args.input_string and args.stdin:
+ parser.error("Input strings and --stdin cannot work together")
+
+ if args.replacements:
+ def split_check(repl):
+ SEP = '->'
+ if SEP not in repl:
+ parser.error("Replacements must be of the form: ORIGINAL{SEP}REPLACED".format(SEP=SEP))
+ return repl.split(SEP, 1)
+ args.replacements = [split_check(repl) for repl in args.replacements]
+
+ if args.input_string:
+ args.input_string = " ".join(args.input_string)
+ elif args.stdin:
+ args.input_string = sys.stdin.read()
+
+ if not args.input_string:
+ args.input_string = ''
+
+ return args
+
+
+def slugify_params(args):
+ return dict(
+ text=args.input_string,
+ entities=args.entities,
+ decimal=args.decimal,
+ hexadecimal=args.hexadecimal,
+ max_length=args.max_length,
+ word_boundary=args.word_boundary,
+ save_order=args.save_order,
+ separator=args.separator,
+ stopwords=args.stopwords,
+ lowercase=args.lowercase,
+ replacements=args.replacements,
+ allow_unicode=args.allow_unicode
+ )
+
+
+def main(argv=None): # pragma: no cover
+ """ Run this program """
+ if argv is None:
+ argv = sys.argv
+ args = parse_args(argv)
+ params = slugify_params(args)
+ try:
+ print(slugify(**params))
+ except KeyboardInterrupt:
+ sys.exit(-1)
+
+
+if __name__ == '__main__': # pragma: no cover
+ main()
diff --git a/third_party/python/python_slugify/slugify/__version__.py b/third_party/python/python_slugify/slugify/__version__.py
new file mode 100644
index 0000000000..a558d9bce4
--- /dev/null
+++ b/third_party/python/python_slugify/slugify/__version__.py
@@ -0,0 +1,8 @@
+__title__ = 'python-slugify'
+__author__ = 'Val Neekman'
+__author_email__ = 'info@neekware.com'
+__description__ = 'A Python slugify application that also handles Unicode'
+__url__ = 'https://github.com/un33k/python-slugify'
+__license__ = 'MIT'
+__copyright__ = 'Copyright 2022 Val Neekman @ Neekware Inc.'
+__version__ = '8.0.1'
diff --git a/third_party/python/python_slugify/slugify/slugify.py b/third_party/python/python_slugify/slugify/slugify.py
new file mode 100644
index 0000000000..5354fa5e44
--- /dev/null
+++ b/third_party/python/python_slugify/slugify/slugify.py
@@ -0,0 +1,177 @@
+import re
+import sys
+import unicodedata
+from html.entities import name2codepoint
+
+try:
+ import unidecode
+except ImportError:
+ import text_unidecode as unidecode
+
+__all__ = ['slugify', 'smart_truncate']
+
+
+CHAR_ENTITY_PATTERN = re.compile(r'&(%s);' % '|'.join(name2codepoint))
+DECIMAL_PATTERN = re.compile(r'&#(\d+);')
+HEX_PATTERN = re.compile(r'&#x([\da-fA-F]+);')
+QUOTE_PATTERN = re.compile(r'[\']+')
+DISALLOWED_CHARS_PATTERN = re.compile(r'[^-a-zA-Z0-9]+')
+DISALLOWED_UNICODE_CHARS_PATTERN = re.compile(r'[\W_]+')
+DUPLICATE_DASH_PATTERN = re.compile(r'-{2,}')
+NUMBERS_PATTERN = re.compile(r'(?<=\d),(?=\d)')
+DEFAULT_SEPARATOR = '-'
+
+
+def smart_truncate(string, max_length=0, word_boundary=False, separator=' ', save_order=False):
+ """
+ Truncate a string.
+ :param string (str): string for modification
+ :param max_length (int): output string length
+ :param word_boundary (bool):
+ :param save_order (bool): if True then word order of output string is like input string
+ :param separator (str): separator between words
+ :return:
+ """
+
+ string = string.strip(separator)
+
+ if not max_length:
+ return string
+
+ if len(string) < max_length:
+ return string
+
+ if not word_boundary:
+ return string[:max_length].strip(separator)
+
+ if separator not in string:
+ return string[:max_length]
+
+ truncated = ''
+ for word in string.split(separator):
+ if word:
+ next_len = len(truncated) + len(word)
+ if next_len < max_length:
+ truncated += '{}{}'.format(word, separator)
+ elif next_len == max_length:
+ truncated += '{}'.format(word)
+ break
+ else:
+ if save_order:
+ break
+ if not truncated: # pragma: no cover
+ truncated = string[:max_length]
+ return truncated.strip(separator)
+
+
+def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, word_boundary=False,
+ separator=DEFAULT_SEPARATOR, save_order=False, stopwords=(), regex_pattern=None, lowercase=True,
+ replacements=(), allow_unicode=False):
+ """
+ Make a slug from the given text.
+ :param text (str): initial text
+ :param entities (bool): converts html entities to unicode
+ :param decimal (bool): converts html decimal to unicode
+ :param hexadecimal (bool): converts html hexadecimal to unicode
+ :param max_length (int): output string length
+ :param word_boundary (bool): truncates to complete word even if length ends up shorter than max_length
+ :param save_order (bool): if parameter is True and max_length > 0 return whole words in the initial order
+ :param separator (str): separator between words
+ :param stopwords (iterable): words to discount
+ :param regex_pattern (str): regex pattern for disallowed characters
+ :param lowercase (bool): activate case sensitivity by setting it to False
+ :param replacements (iterable): list of replacement rules e.g. [['|', 'or'], ['%', 'percent']]
+ :param allow_unicode (bool): allow unicode characters
+ :return (str):
+ """
+
+ # user-specific replacements
+ if replacements:
+ for old, new in replacements:
+ text = text.replace(old, new)
+
+ # ensure text is unicode
+ if not isinstance(text, str):
+ text = str(text, 'utf-8', 'ignore')
+
+ # replace quotes with dashes - pre-process
+ text = QUOTE_PATTERN.sub(DEFAULT_SEPARATOR, text)
+
+ # decode unicode
+ if not allow_unicode:
+ text = unidecode.unidecode(text)
+
+ # ensure text is still in unicode
+ if not isinstance(text, str):
+ text = str(text, 'utf-8', 'ignore')
+
+ # character entity reference
+ if entities:
+ text = CHAR_ENTITY_PATTERN.sub(lambda m: chr(name2codepoint[m.group(1)]), text)
+
+ # decimal character reference
+ if decimal:
+ try:
+ text = DECIMAL_PATTERN.sub(lambda m: chr(int(m.group(1))), text)
+ except Exception:
+ pass
+
+ # hexadecimal character reference
+ if hexadecimal:
+ try:
+ text = HEX_PATTERN.sub(lambda m: chr(int(m.group(1), 16)), text)
+ except Exception:
+ pass
+
+ # translate
+ if allow_unicode:
+ text = unicodedata.normalize('NFKC', text)
+ else:
+ text = unicodedata.normalize('NFKD', text)
+
+ if sys.version_info < (3,):
+ text = text.encode('ascii', 'ignore')
+
+ # make the text lowercase (optional)
+ if lowercase:
+ text = text.lower()
+
+ # remove generated quotes -- post-process
+ text = QUOTE_PATTERN.sub('', text)
+
+ # cleanup numbers
+ text = NUMBERS_PATTERN.sub('', text)
+
+ # replace all other unwanted characters
+ if allow_unicode:
+ pattern = regex_pattern or DISALLOWED_UNICODE_CHARS_PATTERN
+ else:
+ pattern = regex_pattern or DISALLOWED_CHARS_PATTERN
+
+ text = re.sub(pattern, DEFAULT_SEPARATOR, text)
+
+ # remove redundant
+ text = DUPLICATE_DASH_PATTERN.sub(DEFAULT_SEPARATOR, text).strip(DEFAULT_SEPARATOR)
+
+ # remove stopwords
+ if stopwords:
+ if lowercase:
+ stopwords_lower = [s.lower() for s in stopwords]
+ words = [w for w in text.split(DEFAULT_SEPARATOR) if w not in stopwords_lower]
+ else:
+ words = [w for w in text.split(DEFAULT_SEPARATOR) if w not in stopwords]
+ text = DEFAULT_SEPARATOR.join(words)
+
+ # finalize user-specific replacements
+ if replacements:
+ for old, new in replacements:
+ text = text.replace(old, new)
+
+ # smart truncate if requested
+ if max_length > 0:
+ text = smart_truncate(text, max_length, word_boundary, DEFAULT_SEPARATOR, save_order)
+
+ if separator != DEFAULT_SEPARATOR:
+ text = text.replace(DEFAULT_SEPARATOR, separator)
+
+ return text
diff --git a/third_party/python/python_slugify/slugify/special.py b/third_party/python/python_slugify/slugify/special.py
new file mode 100644
index 0000000000..54eb85c70e
--- /dev/null
+++ b/third_party/python/python_slugify/slugify/special.py
@@ -0,0 +1,47 @@
+# -*- coding: utf-8 -*-
+
+
+def add_uppercase_char(char_list):
+ """ Given a replacement char list, this adds uppercase chars to the list """
+
+ for item in char_list:
+ char, xlate = item
+ upper_dict = char.upper(), xlate.capitalize()
+ if upper_dict not in char_list and char != upper_dict[0]:
+ char_list.insert(0, upper_dict)
+ return char_list
+
+
+# Language specific pre translations
+# Source awesome-slugify
+
+_CYRILLIC = [ # package defaults:
+ (u'ё', u'e'), # io / yo
+ (u'я', u'ya'), # ia
+ (u'х', u'h'), # kh
+ (u'у', u'y'), # u
+ (u'щ', u'sch'), # sch
+ (u'ю', u'u'), # iu / yu
+]
+CYRILLIC = add_uppercase_char(_CYRILLIC)
+
+_GERMAN = [ # package defaults:
+ (u'ä', u'ae'), # a
+ (u'ö', u'oe'), # o
+ (u'ü', u'ue'), # u
+]
+GERMAN = add_uppercase_char(_GERMAN)
+
+_GREEK = [ # package defaults:
+ (u'χ', u'ch'), # kh
+ (u'Ξ', u'X'), # Ks
+ (u'ϒ', u'Y'), # U
+ (u'υ', u'y'), # u
+ (u'ύ', u'y'),
+ (u'ϋ', u'y'),
+ (u'ΰ', u'y'),
+]
+GREEK = add_uppercase_char(_GREEK)
+
+# Pre translations
+PRE_TRANSLATIONS = CYRILLIC + GERMAN + GREEK