diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
commit | 26a029d407be480d791972afb5975cf62c9360a6 (patch) | |
tree | f435a8308119effd964b339f76abb83a57c29483 /third_party/python/python_slugify | |
parent | Initial commit. (diff) | |
download | firefox-e51783d008170d9ab27d25da98ca3a38b0a41b67.tar.xz firefox-e51783d008170d9ab27d25da98ca3a38b0a41b67.zip |
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/python/python_slugify')
11 files changed, 627 insertions, 0 deletions
diff --git a/third_party/python/python_slugify/python_slugify-8.0.1.dist-info/LICENSE b/third_party/python/python_slugify/python_slugify-8.0.1.dist-info/LICENSE new file mode 100644 index 0000000000..82af695f59 --- /dev/null +++ b/third_party/python/python_slugify/python_slugify-8.0.1.dist-info/LICENSE @@ -0,0 +1,21 @@ +The MIT License + +Copyright (c) Val Neekman @ Neekware Inc. http://neekware.com + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/third_party/python/python_slugify/python_slugify-8.0.1.dist-info/METADATA b/third_party/python/python_slugify/python_slugify-8.0.1.dist-info/METADATA new file mode 100644 index 0000000000..e6948ae2f6 --- /dev/null +++ b/third_party/python/python_slugify/python_slugify-8.0.1.dist-info/METADATA @@ -0,0 +1,247 @@ +Metadata-Version: 2.1 +Name: python-slugify +Version: 8.0.1 +Summary: A Python slugify application that also handles Unicode +Home-page: https://github.com/un33k/python-slugify +Author: Val Neekman +Author-email: info@neekware.com +License: MIT +Platform: UNKNOWN +Classifier: Development Status :: 5 - Production/Stable +Classifier: Intended Audience :: Developers +Classifier: Natural Language :: English +Classifier: License :: OSI Approved :: MIT License +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.7 +Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: 3.10 +Classifier: Programming Language :: Python :: 3.11 +Requires-Python: >=3.7 +Description-Content-Type: text/markdown +Requires-Dist: text-unidecode (>=1.3) +Provides-Extra: unidecode +Requires-Dist: Unidecode (>=1.1.1) ; extra == 'unidecode' + +# Python Slugify + +**A Python slugify application that handles unicode**. + +[![status-image]][status-link] +[![version-image]][version-link] +[![coverage-image]][coverage-link] + +# Overview + +**Best attempt** to create slugs from unicode strings while keeping it **DRY**. + +# Notice + +This module, by default installs and uses [text-unidecode](https://github.com/kmike/text-unidecode) _(GPL & Perl Artistic)_ for its decoding needs. + +However, there is an alternative decoding package called [Unidecode](https://github.com/avian2/unidecode) _(GPL)_. It can be installed as `python-slugify[unidecode]` for those who prefer it. `Unidecode` is believed to be more advanced. + +### `Official` Support Matrix + +| Python | Slugify | +| -------------- | ------------------ | +| `>= 2.7 < 3.6` | `< 5.0.0` | +| `>= 3.6 < 3.7` | `>= 5.0.0 < 7.0.0` | +| `>= 3.7` | `>= 7.0.0` | + +# How to install + + easy_install python-slugify |OR| easy_install python-slugify[unidecode] + -- OR -- + pip install python-slugify |OR| pip install python-slugify[unidecode] + +# Options + +```python +def slugify( + text, + entities=True, + decimal=True, + hexadecimal=True, + max_length=0, + word_boundary=False, + separator='-', + save_order=False, + stopwords=(), + regex_pattern=None, + lowercase=True, + replacements=(), + allow_unicode=False + ): + """ + Make a slug from the given text. + :param text (str): initial text + :param entities (bool): converts html entities to unicode (foo & bar -> foo-bar) + :param decimal (bool): converts html decimal to unicode (Ž -> Ž -> z) + :param hexadecimal (bool): converts html hexadecimal to unicode (Ž -> Ž -> z) + :param max_length (int): output string length + :param word_boundary (bool): truncates to end of full words (length may be shorter than max_length) + :param save_order (bool): if parameter is True and max_length > 0 return whole words in the initial order + :param separator (str): separator between words + :param stopwords (iterable): words to discount + :param regex_pattern (str): regex pattern for disallowed characters + :param lowercase (bool): activate case sensitivity by setting it to False + :param replacements (iterable): list of replacement rules e.g. [['|', 'or'], ['%', 'percent']] + :param allow_unicode (bool): allow unicode characters + :return (str): slugify text + """ +``` + +# How to use + +```python +from slugify import slugify + +txt = "This is a test ---" +r = slugify(txt) +self.assertEqual(r, "this-is-a-test") + +txt = '影師嗎' +r = slugify(txt) +self.assertEqual(r, "ying-shi-ma") + +txt = '影師嗎' +r = slugify(txt, allow_unicode=True) +self.assertEqual(r, "影師嗎") + +txt = 'C\'est déjà l\'été.' +r = slugify(txt) +self.assertEqual(r, "c-est-deja-l-ete") + +txt = 'Nín hǎo. Wǒ shì zhōng guó rén' +r = slugify(txt) +self.assertEqual(r, "nin-hao-wo-shi-zhong-guo-ren") + +txt = 'Компьютер' +r = slugify(txt) +self.assertEqual(r, "kompiuter") + +txt = 'jaja---lol-méméméoo--a' +r = slugify(txt, max_length=9) +self.assertEqual(r, "jaja-lol") + +txt = 'jaja---lol-méméméoo--a' +r = slugify(txt, max_length=15, word_boundary=True) +self.assertEqual(r, "jaja-lol-a") + +txt = 'jaja---lol-méméméoo--a' +r = slugify(txt, max_length=20, word_boundary=True, separator=".") +self.assertEqual(r, "jaja.lol.mememeoo.a") + +txt = 'one two three four five' +r = slugify(txt, max_length=13, word_boundary=True, save_order=True) +self.assertEqual(r, "one-two-three") + +txt = 'the quick brown fox jumps over the lazy dog' +r = slugify(txt, stopwords=['the']) +self.assertEqual(r, 'quick-brown-fox-jumps-over-lazy-dog') + +txt = 'the quick brown fox jumps over the lazy dog in a hurry' +r = slugify(txt, stopwords=['the', 'in', 'a', 'hurry']) +self.assertEqual(r, 'quick-brown-fox-jumps-over-lazy-dog') + +txt = 'thIs Has a stopword Stopword' +r = slugify(txt, stopwords=['Stopword'], lowercase=False) +self.assertEqual(r, 'thIs-Has-a-stopword') + +txt = "___This is a test___" +regex_pattern = r'[^-a-z0-9_]+' +r = slugify(txt, regex_pattern=regex_pattern) +self.assertEqual(r, "___this-is-a-test___") + +txt = "___This is a test___" +regex_pattern = r'[^-a-z0-9_]+' +r = slugify(txt, separator='_', regex_pattern=regex_pattern) +self.assertNotEqual(r, "_this_is_a_test_") + +txt = '10 | 20 %' +r = slugify(txt, replacements=[['|', 'or'], ['%', 'percent']]) +self.assertEqual(r, "10-or-20-percent") + +txt = 'ÜBER Über German Umlaut' +r = slugify(txt, replacements=[['Ü', 'UE'], ['ü', 'ue']]) +self.assertEqual(r, "ueber-ueber-german-umlaut") + +txt = 'i love 🦄' +r = slugify(txt, allow_unicode=True) +self.assertEqual(r, "i-love") + +txt = 'i love 🦄' +r = slugify(txt, allow_unicode=True, regex_pattern=r'[^🦄]+') +self.assertEqual(r, "🦄") + +``` + +For more examples, have a look at the [test.py](test.py) file. + +# Command Line Options + +With the package, a command line tool called `slugify` is also installed. + +It allows convenient command line access to all the features the `slugify` function supports. Call it with `-h` for help. + +The command can take its input directly on the command line or from STDIN (when the `--stdin` flag is passed): + +``` +$ echo "Taking input from STDIN" | slugify --stdin +taking-input-from-stdin +``` + +``` +$ slugify taking input from the command line +taking-input-from-the-command-line +``` + +Please note that when a multi-valued option such as `--stopwords` or `--replacements` is passed, you need to use `--` as separator before you start with the input: + +``` +$ slugify --stopwords the in a hurry -- the quick brown fox jumps over the lazy dog in a hurry +quick-brown-fox-jumps-over-lazy-dog +``` + +# Running the tests + +To run the tests against the current environment: + + python test.py + +# Contribution + +Please read the ([wiki](https://github.com/un33k/python-slugify/wiki/Python-Slugify-Wiki)) page prior to raising any PRs. + +# License + +Released under a ([MIT](LICENSE)) license. + +### Notes on GPL dependencies +Though the dependencies may be GPL licensed, `python-slugify` itself is not considered a derivative work and will remain under the MIT license. +If you wish to avoid installation of any GPL licensed packages, please note that the default dependency `text-unidecode` explicitly lets you choose to use the [Artistic License](https://opensource.org/license/artistic-perl-1-0-2/) instead. Use without concern. + +# Version + +X.Y.Z Version + + `MAJOR` version -- when you make incompatible API changes, + `MINOR` version -- when you add functionality in a backwards-compatible manner, and + `PATCH` version -- when you make backwards-compatible bug fixes. + +[status-image]: https://github.com/un33k/python-slugify/actions/workflows/ci.yml/badge.svg +[status-link]: https://github.com/un33k/python-slugify/actions/workflows/ci.yml +[version-image]: https://img.shields.io/pypi/v/python-slugify.svg +[version-link]: https://pypi.python.org/pypi/python-slugify +[coverage-image]: https://coveralls.io/repos/un33k/python-slugify/badge.svg +[coverage-link]: https://coveralls.io/r/un33k/python-slugify +[download-image]: https://img.shields.io/pypi/dm/python-slugify.svg +[download-link]: https://pypi.python.org/pypi/python-slugify + +# Sponsors + +[Neekware Inc.](http://neekware.com) + + diff --git a/third_party/python/python_slugify/python_slugify-8.0.1.dist-info/RECORD b/third_party/python/python_slugify/python_slugify-8.0.1.dist-info/RECORD new file mode 100644 index 0000000000..6ec16d7e1c --- /dev/null +++ b/third_party/python/python_slugify/python_slugify-8.0.1.dist-info/RECORD @@ -0,0 +1,11 @@ +slugify/__init__.py,sha256=Q-9bKCQv89uf3bJr_yHxMPhBWXN8YCzlxQwK_kdpefI,346 +slugify/__main__.py,sha256=3EVQris1UpnWMgvjeVLDvzRXGBqkNkdpzFPmez5syuU,3866 +slugify/__version__.py,sha256=EzSzGa2hG-1z11YrS38w8w2tmCoQqEHO46xcvQsiFgI,325 +slugify/slugify.py,sha256=v8rRfSR2I4QiRNoG0FpL0TabbKUelZmtYnQjHfwvp6I,5795 +slugify/special.py,sha256=uV3YMYay1HTaP3nvyzaiV4FqGazjj8HmDHM1fsPQ3oo,1167 +python_slugify-8.0.1.dist-info/LICENSE,sha256=MLpNxpqfTc4TLdcDk3x6k7Vz4lJGBNLV-SxQZlFMDU8,1103 +python_slugify-8.0.1.dist-info/METADATA,sha256=LVPaRoPcTNzPsamnpcpcxsOcyiCCoGApIPuT_memhFE,8176 +python_slugify-8.0.1.dist-info/WHEEL,sha256=Z-nyYpwrcSqxfdux5Mbn_DQ525iP7J2DG3JgGvOYyTQ,110 +python_slugify-8.0.1.dist-info/entry_points.txt,sha256=vd1gzjXoYZ16TfgZThH2nhVFwhAsWCecqUMGyHijAP8,51 +python_slugify-8.0.1.dist-info/top_level.txt,sha256=D7zuR7zxISqlCxArlOOOuLsWObz1_3jgosq5XhlSpew,8 +python_slugify-8.0.1.dist-info/RECORD,, diff --git a/third_party/python/python_slugify/python_slugify-8.0.1.dist-info/WHEEL b/third_party/python/python_slugify/python_slugify-8.0.1.dist-info/WHEEL new file mode 100644 index 0000000000..01b8fc7d4a --- /dev/null +++ b/third_party/python/python_slugify/python_slugify-8.0.1.dist-info/WHEEL @@ -0,0 +1,6 @@ +Wheel-Version: 1.0 +Generator: bdist_wheel (0.36.2) +Root-Is-Purelib: true +Tag: py2-none-any +Tag: py3-none-any + diff --git a/third_party/python/python_slugify/python_slugify-8.0.1.dist-info/entry_points.txt b/third_party/python/python_slugify/python_slugify-8.0.1.dist-info/entry_points.txt new file mode 100644 index 0000000000..0ef496e6f9 --- /dev/null +++ b/third_party/python/python_slugify/python_slugify-8.0.1.dist-info/entry_points.txt @@ -0,0 +1,3 @@ +[console_scripts] +slugify = slugify.__main__:main + diff --git a/third_party/python/python_slugify/python_slugify-8.0.1.dist-info/top_level.txt b/third_party/python/python_slugify/python_slugify-8.0.1.dist-info/top_level.txt new file mode 100644 index 0000000000..f4843f722b --- /dev/null +++ b/third_party/python/python_slugify/python_slugify-8.0.1.dist-info/top_level.txt @@ -0,0 +1 @@ +slugify diff --git a/third_party/python/python_slugify/slugify/__init__.py b/third_party/python/python_slugify/slugify/__init__.py new file mode 100644 index 0000000000..6d3279fb1a --- /dev/null +++ b/third_party/python/python_slugify/slugify/__init__.py @@ -0,0 +1,10 @@ +from .special import * +from .slugify import * +from .__version__ import __title__ +from .__version__ import __author__ +from .__version__ import __author_email__ +from .__version__ import __description__ +from .__version__ import __url__ +from .__version__ import __license__ +from .__version__ import __copyright__ +from .__version__ import __version__ diff --git a/third_party/python/python_slugify/slugify/__main__.py b/third_party/python/python_slugify/slugify/__main__.py new file mode 100644 index 0000000000..7dd6b01a5e --- /dev/null +++ b/third_party/python/python_slugify/slugify/__main__.py @@ -0,0 +1,96 @@ +from __future__ import print_function, absolute_import +import argparse +import sys + +from .slugify import slugify, DEFAULT_SEPARATOR + + +def parse_args(argv): + parser = argparse.ArgumentParser(description="Slug string") + + input_group = parser.add_argument_group(description="Input") + input_group.add_argument("input_string", nargs='*', + help='Text to slugify') + input_group.add_argument("--stdin", action='store_true', + help="Take the text from STDIN") + + parser.add_argument("--no-entities", action='store_false', dest='entities', default=True, + help="Do not convert HTML entities to unicode") + parser.add_argument("--no-decimal", action='store_false', dest='decimal', default=True, + help="Do not convert HTML decimal to unicode") + parser.add_argument("--no-hexadecimal", action='store_false', dest='hexadecimal', default=True, + help="Do not convert HTML hexadecimal to unicode") + parser.add_argument("--max-length", type=int, default=0, + help="Output string length, 0 for no limit") + parser.add_argument("--word-boundary", action='store_true', default=False, + help="Truncate to complete word even if length ends up shorter than --max_length") + parser.add_argument("--save-order", action='store_true', default=False, + help="When set and --max_length > 0 return whole words in the initial order") + parser.add_argument("--separator", type=str, default=DEFAULT_SEPARATOR, + help="Separator between words. By default " + DEFAULT_SEPARATOR) + parser.add_argument("--stopwords", nargs='+', + help="Words to discount") + parser.add_argument("--regex-pattern", + help="Python regex pattern for disallowed characters") + parser.add_argument("--no-lowercase", action='store_false', dest='lowercase', default=True, + help="Activate case sensitivity") + parser.add_argument("--replacements", nargs='+', + help="""Additional replacement rules e.g. "|->or", "%%->percent".""") + parser.add_argument("--allow-unicode", action='store_true', default=False, + help="Allow unicode characters") + + args = parser.parse_args(argv[1:]) + + if args.input_string and args.stdin: + parser.error("Input strings and --stdin cannot work together") + + if args.replacements: + def split_check(repl): + SEP = '->' + if SEP not in repl: + parser.error("Replacements must be of the form: ORIGINAL{SEP}REPLACED".format(SEP=SEP)) + return repl.split(SEP, 1) + args.replacements = [split_check(repl) for repl in args.replacements] + + if args.input_string: + args.input_string = " ".join(args.input_string) + elif args.stdin: + args.input_string = sys.stdin.read() + + if not args.input_string: + args.input_string = '' + + return args + + +def slugify_params(args): + return dict( + text=args.input_string, + entities=args.entities, + decimal=args.decimal, + hexadecimal=args.hexadecimal, + max_length=args.max_length, + word_boundary=args.word_boundary, + save_order=args.save_order, + separator=args.separator, + stopwords=args.stopwords, + lowercase=args.lowercase, + replacements=args.replacements, + allow_unicode=args.allow_unicode + ) + + +def main(argv=None): # pragma: no cover + """ Run this program """ + if argv is None: + argv = sys.argv + args = parse_args(argv) + params = slugify_params(args) + try: + print(slugify(**params)) + except KeyboardInterrupt: + sys.exit(-1) + + +if __name__ == '__main__': # pragma: no cover + main() diff --git a/third_party/python/python_slugify/slugify/__version__.py b/third_party/python/python_slugify/slugify/__version__.py new file mode 100644 index 0000000000..a558d9bce4 --- /dev/null +++ b/third_party/python/python_slugify/slugify/__version__.py @@ -0,0 +1,8 @@ +__title__ = 'python-slugify' +__author__ = 'Val Neekman' +__author_email__ = 'info@neekware.com' +__description__ = 'A Python slugify application that also handles Unicode' +__url__ = 'https://github.com/un33k/python-slugify' +__license__ = 'MIT' +__copyright__ = 'Copyright 2022 Val Neekman @ Neekware Inc.' +__version__ = '8.0.1' diff --git a/third_party/python/python_slugify/slugify/slugify.py b/third_party/python/python_slugify/slugify/slugify.py new file mode 100644 index 0000000000..5354fa5e44 --- /dev/null +++ b/third_party/python/python_slugify/slugify/slugify.py @@ -0,0 +1,177 @@ +import re +import sys +import unicodedata +from html.entities import name2codepoint + +try: + import unidecode +except ImportError: + import text_unidecode as unidecode + +__all__ = ['slugify', 'smart_truncate'] + + +CHAR_ENTITY_PATTERN = re.compile(r'&(%s);' % '|'.join(name2codepoint)) +DECIMAL_PATTERN = re.compile(r'&#(\d+);') +HEX_PATTERN = re.compile(r'&#x([\da-fA-F]+);') +QUOTE_PATTERN = re.compile(r'[\']+') +DISALLOWED_CHARS_PATTERN = re.compile(r'[^-a-zA-Z0-9]+') +DISALLOWED_UNICODE_CHARS_PATTERN = re.compile(r'[\W_]+') +DUPLICATE_DASH_PATTERN = re.compile(r'-{2,}') +NUMBERS_PATTERN = re.compile(r'(?<=\d),(?=\d)') +DEFAULT_SEPARATOR = '-' + + +def smart_truncate(string, max_length=0, word_boundary=False, separator=' ', save_order=False): + """ + Truncate a string. + :param string (str): string for modification + :param max_length (int): output string length + :param word_boundary (bool): + :param save_order (bool): if True then word order of output string is like input string + :param separator (str): separator between words + :return: + """ + + string = string.strip(separator) + + if not max_length: + return string + + if len(string) < max_length: + return string + + if not word_boundary: + return string[:max_length].strip(separator) + + if separator not in string: + return string[:max_length] + + truncated = '' + for word in string.split(separator): + if word: + next_len = len(truncated) + len(word) + if next_len < max_length: + truncated += '{}{}'.format(word, separator) + elif next_len == max_length: + truncated += '{}'.format(word) + break + else: + if save_order: + break + if not truncated: # pragma: no cover + truncated = string[:max_length] + return truncated.strip(separator) + + +def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, word_boundary=False, + separator=DEFAULT_SEPARATOR, save_order=False, stopwords=(), regex_pattern=None, lowercase=True, + replacements=(), allow_unicode=False): + """ + Make a slug from the given text. + :param text (str): initial text + :param entities (bool): converts html entities to unicode + :param decimal (bool): converts html decimal to unicode + :param hexadecimal (bool): converts html hexadecimal to unicode + :param max_length (int): output string length + :param word_boundary (bool): truncates to complete word even if length ends up shorter than max_length + :param save_order (bool): if parameter is True and max_length > 0 return whole words in the initial order + :param separator (str): separator between words + :param stopwords (iterable): words to discount + :param regex_pattern (str): regex pattern for disallowed characters + :param lowercase (bool): activate case sensitivity by setting it to False + :param replacements (iterable): list of replacement rules e.g. [['|', 'or'], ['%', 'percent']] + :param allow_unicode (bool): allow unicode characters + :return (str): + """ + + # user-specific replacements + if replacements: + for old, new in replacements: + text = text.replace(old, new) + + # ensure text is unicode + if not isinstance(text, str): + text = str(text, 'utf-8', 'ignore') + + # replace quotes with dashes - pre-process + text = QUOTE_PATTERN.sub(DEFAULT_SEPARATOR, text) + + # decode unicode + if not allow_unicode: + text = unidecode.unidecode(text) + + # ensure text is still in unicode + if not isinstance(text, str): + text = str(text, 'utf-8', 'ignore') + + # character entity reference + if entities: + text = CHAR_ENTITY_PATTERN.sub(lambda m: chr(name2codepoint[m.group(1)]), text) + + # decimal character reference + if decimal: + try: + text = DECIMAL_PATTERN.sub(lambda m: chr(int(m.group(1))), text) + except Exception: + pass + + # hexadecimal character reference + if hexadecimal: + try: + text = HEX_PATTERN.sub(lambda m: chr(int(m.group(1), 16)), text) + except Exception: + pass + + # translate + if allow_unicode: + text = unicodedata.normalize('NFKC', text) + else: + text = unicodedata.normalize('NFKD', text) + + if sys.version_info < (3,): + text = text.encode('ascii', 'ignore') + + # make the text lowercase (optional) + if lowercase: + text = text.lower() + + # remove generated quotes -- post-process + text = QUOTE_PATTERN.sub('', text) + + # cleanup numbers + text = NUMBERS_PATTERN.sub('', text) + + # replace all other unwanted characters + if allow_unicode: + pattern = regex_pattern or DISALLOWED_UNICODE_CHARS_PATTERN + else: + pattern = regex_pattern or DISALLOWED_CHARS_PATTERN + + text = re.sub(pattern, DEFAULT_SEPARATOR, text) + + # remove redundant + text = DUPLICATE_DASH_PATTERN.sub(DEFAULT_SEPARATOR, text).strip(DEFAULT_SEPARATOR) + + # remove stopwords + if stopwords: + if lowercase: + stopwords_lower = [s.lower() for s in stopwords] + words = [w for w in text.split(DEFAULT_SEPARATOR) if w not in stopwords_lower] + else: + words = [w for w in text.split(DEFAULT_SEPARATOR) if w not in stopwords] + text = DEFAULT_SEPARATOR.join(words) + + # finalize user-specific replacements + if replacements: + for old, new in replacements: + text = text.replace(old, new) + + # smart truncate if requested + if max_length > 0: + text = smart_truncate(text, max_length, word_boundary, DEFAULT_SEPARATOR, save_order) + + if separator != DEFAULT_SEPARATOR: + text = text.replace(DEFAULT_SEPARATOR, separator) + + return text diff --git a/third_party/python/python_slugify/slugify/special.py b/third_party/python/python_slugify/slugify/special.py new file mode 100644 index 0000000000..54eb85c70e --- /dev/null +++ b/third_party/python/python_slugify/slugify/special.py @@ -0,0 +1,47 @@ +# -*- coding: utf-8 -*- + + +def add_uppercase_char(char_list): + """ Given a replacement char list, this adds uppercase chars to the list """ + + for item in char_list: + char, xlate = item + upper_dict = char.upper(), xlate.capitalize() + if upper_dict not in char_list and char != upper_dict[0]: + char_list.insert(0, upper_dict) + return char_list + + +# Language specific pre translations +# Source awesome-slugify + +_CYRILLIC = [ # package defaults: + (u'ё', u'e'), # io / yo + (u'я', u'ya'), # ia + (u'х', u'h'), # kh + (u'у', u'y'), # u + (u'щ', u'sch'), # sch + (u'ю', u'u'), # iu / yu +] +CYRILLIC = add_uppercase_char(_CYRILLIC) + +_GERMAN = [ # package defaults: + (u'ä', u'ae'), # a + (u'ö', u'oe'), # o + (u'ü', u'ue'), # u +] +GERMAN = add_uppercase_char(_GERMAN) + +_GREEK = [ # package defaults: + (u'χ', u'ch'), # kh + (u'Ξ', u'X'), # Ks + (u'ϒ', u'Y'), # U + (u'υ', u'y'), # u + (u'ύ', u'y'), + (u'ϋ', u'y'), + (u'ΰ', u'y'), +] +GREEK = add_uppercase_char(_GREEK) + +# Pre translations +PRE_TRANSLATIONS = CYRILLIC + GERMAN + GREEK |