summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2021-09-23 11:19:40 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2021-09-23 11:19:40 +0000
commit5ce97f566d2f8ce938c252e7eb7422451ae5eec2 (patch)
treee0648504a430d68fb9fa2e705a0c0407f909becf
parentInitial commit. (diff)
downloadjaraco.text-5ce97f566d2f8ce938c252e7eb7422451ae5eec2.tar.xz
jaraco.text-5ce97f566d2f8ce938c252e7eb7422451ae5eec2.zip
Adding upstream version 3.5.0.upstream/3.5.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
-rw-r--r--.coveragerc5
-rw-r--r--.flake89
-rw-r--r--.github/workflows/automerge.yml27
-rw-r--r--.github/workflows/main.yml42
-rw-r--r--.pre-commit-config.yaml10
-rw-r--r--.readthedocs.yml6
-rw-r--r--CHANGES.rst122
-rw-r--r--LICENSE19
-rw-r--r--README.rst18
-rw-r--r--docs/conf.py26
-rw-r--r--docs/history.rst8
-rw-r--r--docs/index.rst22
-rw-r--r--jaraco/text/Lorem ipsum.txt2
-rw-r--r--jaraco/text/__init__.py529
-rw-r--r--mypy.ini2
-rw-r--r--pyproject.toml20
-rw-r--r--pytest.ini9
-rw-r--r--setup.cfg53
-rw-r--r--setup.py6
-rw-r--r--skeleton.md166
-rw-r--r--tox.ini40
21 files changed, 1141 insertions, 0 deletions
diff --git a/.coveragerc b/.coveragerc
new file mode 100644
index 0000000..4582306
--- /dev/null
+++ b/.coveragerc
@@ -0,0 +1,5 @@
+[run]
+omit = .tox/*
+
+[report]
+show_missing = True
diff --git a/.flake8 b/.flake8
new file mode 100644
index 0000000..48b2e24
--- /dev/null
+++ b/.flake8
@@ -0,0 +1,9 @@
+[flake8]
+max-line-length = 88
+
+# jaraco/skeleton#34
+max-complexity = 10
+
+extend-ignore =
+ # Black creates whitespace before colon
+ E203
diff --git a/.github/workflows/automerge.yml b/.github/workflows/automerge.yml
new file mode 100644
index 0000000..4f70acf
--- /dev/null
+++ b/.github/workflows/automerge.yml
@@ -0,0 +1,27 @@
+name: automerge
+on:
+ pull_request:
+ types:
+ - labeled
+ - unlabeled
+ - synchronize
+ - opened
+ - edited
+ - ready_for_review
+ - reopened
+ - unlocked
+ pull_request_review:
+ types:
+ - submitted
+ check_suite:
+ types:
+ - completed
+ status: {}
+jobs:
+ automerge:
+ runs-on: ubuntu-latest
+ steps:
+ - name: automerge
+ uses: "pascalgn/automerge-action@v0.12.0"
+ env:
+ GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
new file mode 100644
index 0000000..6a8ff00
--- /dev/null
+++ b/.github/workflows/main.yml
@@ -0,0 +1,42 @@
+name: tests
+
+on: [push, pull_request]
+
+jobs:
+ test:
+ strategy:
+ matrix:
+ python: [3.6, 3.8, 3.9]
+ platform: [ubuntu-latest, macos-latest, windows-latest]
+ runs-on: ${{ matrix.platform }}
+ steps:
+ - uses: actions/checkout@v2
+ - name: Setup Python
+ uses: actions/setup-python@v2
+ with:
+ python-version: ${{ matrix.python }}
+ - name: Install tox
+ run: |
+ python -m pip install tox
+ - name: Run tests
+ run: tox
+
+ release:
+ needs: test
+ if: github.event_name == 'push' && contains(github.ref, 'refs/tags/')
+ runs-on: ubuntu-latest
+
+ steps:
+ - uses: actions/checkout@v2
+ - name: Setup Python
+ uses: actions/setup-python@v2
+ with:
+ python-version: 3.9
+ - name: Install tox
+ run: |
+ python -m pip install tox
+ - name: Release
+ run: tox -e release
+ env:
+ TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..c15ab0c
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,10 @@
+repos:
+- repo: https://github.com/psf/black
+ rev: 20.8b1
+ hooks:
+ - id: black
+
+- repo: https://github.com/asottile/blacken-docs
+ rev: v1.9.1
+ hooks:
+ - id: blacken-docs
diff --git a/.readthedocs.yml b/.readthedocs.yml
new file mode 100644
index 0000000..cc69854
--- /dev/null
+++ b/.readthedocs.yml
@@ -0,0 +1,6 @@
+version: 2
+python:
+ install:
+ - path: .
+ extra_requirements:
+ - docs
diff --git a/CHANGES.rst b/CHANGES.rst
new file mode 100644
index 0000000..3bde24e
--- /dev/null
+++ b/CHANGES.rst
@@ -0,0 +1,122 @@
+v3.5.0
+======
+
+Rely on PEP 420 for namespace package.
+
+v3.4.0
+======
+
+Added ``WordSet.trim*`` methods.
+
+v3.3.0
+======
+
+Require Python 3.6 or later.
+
+v3.2.0
+======
+
+Added normalize_newlines function.
+
+3.1
+===
+
+Added ``wrap`` and ``unwrap`` functions and ``lorem_ipsum``
+attribute containing the Lorem Ipsum sample text.
+
+3.0.1
+=====
+
+Declare missing dependency on six.
+
+3.0
+===
+
+Removed ``local_format``, ``global_format``, and
+``namespace_format``. Instead, developers should
+use `f-strings
+<https://docs.python.org/3.6/reference/lexical_analysis.html#f-strings>`_
+on Python 3.6 and later or `future-fstrings
+<https://pypi.org/project/future-fstrings>`_ for compatibilty
+with older Pythons. This change eliminates the dependency on
+jaraco.collections and thus for now removes the circular dependency
+as reported in #3.
+
+2.0
+===
+
+Switch to `pkgutil namespace technique
+<https://packaging.python.org/guides/packaging-namespace-packages/#pkgutil-style-namespace-packages>`_
+for the ``jaraco`` namespace.
+
+1.10.1
+======
+
+Packaging refresh. Docs now published in RTD.
+
+1.10
+====
+
+FoldedCase now supports string-containment support in an
+unfortunately assymetric way.
+
+1.9.2
+=====
+
+Fix bug where ``FoldedCase.__ne__`` was case-sensitive.
+
+1.9.1
+=====
+
+Refresh packaging.
+
+1.9
+===
+
+Synchronize with skeleton.
+
+Update docs and expand tests on FoldedCase.
+
+Use method_cache for ``FoldedCase.lower``.
+
+1.8
+===
+
+Add remove_prefix and remove_suffix helpers.
+
+1.7
+===
+
+In Stripper, always strip the prefix, even if it's empty.
+
+1.6.2
+=====
+
+Issue #1: Fix WordSet on Python 2.
+
+1.6
+===
+
+Drop dependency on jaraco.context (and its dependencies).
+
+1.5
+===
+
+Move hosting to github.
+
+Add missing namespace package declaration in distribution.
+
+1.4
+===
+
+Add Stripper class.
+
+1.3
+===
+
+Add SeparatedValues class.
+
+1.0
+===
+
+Initial implementation adopted from jaraco.util.string 10.8.
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..353924b
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,19 @@
+Copyright Jason R. Coombs
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to
+deal in the Software without restriction, including without limitation the
+rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+sell copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+IN THE SOFTWARE.
diff --git a/README.rst b/README.rst
new file mode 100644
index 0000000..5055a1d
--- /dev/null
+++ b/README.rst
@@ -0,0 +1,18 @@
+.. image:: https://img.shields.io/pypi/v/jaraco.text.svg
+ :target: `PyPI link`_
+
+.. image:: https://img.shields.io/pypi/pyversions/jaraco.text.svg
+ :target: `PyPI link`_
+
+.. _PyPI link: https://pypi.org/project/jaraco.text
+
+.. image:: https://github.com/jaraco/jaraco.text/workflows/tests/badge.svg
+ :target: https://github.com/jaraco/jaraco.text/actions?query=workflow%3A%22tests%22
+ :alt: tests
+
+.. image:: https://img.shields.io/badge/code%20style-black-000000.svg
+ :target: https://github.com/psf/black
+ :alt: Code style: Black
+
+.. image:: https://readthedocs.org/projects/jaracotext/badge/?version=latest
+ :target: https://jaracotext.readthedocs.io/en/latest/?badge=latest
diff --git a/docs/conf.py b/docs/conf.py
new file mode 100644
index 0000000..433d185
--- /dev/null
+++ b/docs/conf.py
@@ -0,0 +1,26 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+extensions = ['sphinx.ext.autodoc', 'jaraco.packaging.sphinx', 'rst.linker']
+
+master_doc = "index"
+
+link_files = {
+ '../CHANGES.rst': dict(
+ using=dict(GH='https://github.com'),
+ replace=[
+ dict(
+ pattern=r'(Issue #|\B#)(?P<issue>\d+)',
+ url='{package_url}/issues/{issue}',
+ ),
+ dict(
+ pattern=r'(?m:^((?P<scm_version>v?\d+(\.\d+){1,2}))\n[-=]+\n)',
+ with_scm='{text}\n{rev[timestamp]:%d %b %Y}\n',
+ ),
+ dict(
+ pattern=r'PEP[- ](?P<pep_number>\d+)',
+ url='https://www.python.org/dev/peps/pep-{pep_number:0>4}/',
+ ),
+ ],
+ )
+}
diff --git a/docs/history.rst b/docs/history.rst
new file mode 100644
index 0000000..8e21750
--- /dev/null
+++ b/docs/history.rst
@@ -0,0 +1,8 @@
+:tocdepth: 2
+
+.. _changes:
+
+History
+*******
+
+.. include:: ../CHANGES (links).rst
diff --git a/docs/index.rst b/docs/index.rst
new file mode 100644
index 0000000..5c77b41
--- /dev/null
+++ b/docs/index.rst
@@ -0,0 +1,22 @@
+Welcome to |project| documentation!
+===================================
+
+.. toctree::
+ :maxdepth: 1
+
+ history
+
+
+.. automodule:: jaraco.text
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+
+Indices and tables
+==================
+
+* :ref:`genindex`
+* :ref:`modindex`
+* :ref:`search`
+
diff --git a/jaraco/text/Lorem ipsum.txt b/jaraco/text/Lorem ipsum.txt
new file mode 100644
index 0000000..986f944
--- /dev/null
+++ b/jaraco/text/Lorem ipsum.txt
@@ -0,0 +1,2 @@
+Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
+Curabitur pretium tincidunt lacus. Nulla gravida orci a odio. Nullam varius, turpis et commodo pharetra, est eros bibendum elit, nec luctus magna felis sollicitudin mauris. Integer in mauris eu nibh euismod gravida. Duis ac tellus et risus vulputate vehicula. Donec lobortis risus a elit. Etiam tempor. Ut ullamcorper, ligula eu tempor congue, eros est euismod turpis, id tincidunt sapien risus a quam. Maecenas fermentum consequat mi. Donec fermentum. Pellentesque malesuada nulla a mi. Duis sapien sem, aliquet nec, commodo eget, consequat quis, neque. Aliquam faucibus, elit ut dictum aliquet, felis nisl adipiscing sapien, sed malesuada diam lacus eget erat. Cras mollis scelerisque nunc. Nullam arcu. Aliquam consequat. Curabitur augue lorem, dapibus quis, laoreet et, pretium ac, nisi. Aenean magna nisl, mollis quis, molestie eu, feugiat in, orci. In hac habitasse platea dictumst.
diff --git a/jaraco/text/__init__.py b/jaraco/text/__init__.py
new file mode 100644
index 0000000..c37899c
--- /dev/null
+++ b/jaraco/text/__init__.py
@@ -0,0 +1,529 @@
+import re
+import itertools
+import textwrap
+import functools
+
+try:
+ from importlib import resources # type: ignore
+except ImportError: # pragma: nocover
+ import importlib_resources as resources # type: ignore
+
+from jaraco.functools import compose, method_cache
+
+
+def substitution(old, new):
+ """
+ Return a function that will perform a substitution on a string
+ """
+ return lambda s: s.replace(old, new)
+
+
+def multi_substitution(*substitutions):
+ """
+ Take a sequence of pairs specifying substitutions, and create
+ a function that performs those substitutions.
+
+ >>> multi_substitution(('foo', 'bar'), ('bar', 'baz'))('foo')
+ 'baz'
+ """
+ substitutions = itertools.starmap(substitution, substitutions)
+ # compose function applies last function first, so reverse the
+ # substitutions to get the expected order.
+ substitutions = reversed(tuple(substitutions))
+ return compose(*substitutions)
+
+
+class FoldedCase(str):
+ """
+ A case insensitive string class; behaves just like str
+ except compares equal when the only variation is case.
+
+ >>> s = FoldedCase('hello world')
+
+ >>> s == 'Hello World'
+ True
+
+ >>> 'Hello World' == s
+ True
+
+ >>> s != 'Hello World'
+ False
+
+ >>> s.index('O')
+ 4
+
+ >>> s.split('O')
+ ['hell', ' w', 'rld']
+
+ >>> sorted(map(FoldedCase, ['GAMMA', 'alpha', 'Beta']))
+ ['alpha', 'Beta', 'GAMMA']
+
+ Sequence membership is straightforward.
+
+ >>> "Hello World" in [s]
+ True
+ >>> s in ["Hello World"]
+ True
+
+ You may test for set inclusion, but candidate and elements
+ must both be folded.
+
+ >>> FoldedCase("Hello World") in {s}
+ True
+ >>> s in {FoldedCase("Hello World")}
+ True
+
+ String inclusion works as long as the FoldedCase object
+ is on the right.
+
+ >>> "hello" in FoldedCase("Hello World")
+ True
+
+ But not if the FoldedCase object is on the left:
+
+ >>> FoldedCase('hello') in 'Hello World'
+ False
+
+ In that case, use in_:
+
+ >>> FoldedCase('hello').in_('Hello World')
+ True
+
+ >>> FoldedCase('hello') > FoldedCase('Hello')
+ False
+ """
+
+ def __lt__(self, other):
+ return self.lower() < other.lower()
+
+ def __gt__(self, other):
+ return self.lower() > other.lower()
+
+ def __eq__(self, other):
+ return self.lower() == other.lower()
+
+ def __ne__(self, other):
+ return self.lower() != other.lower()
+
+ def __hash__(self):
+ return hash(self.lower())
+
+ def __contains__(self, other):
+ return super(FoldedCase, self).lower().__contains__(other.lower())
+
+ def in_(self, other):
+ "Does self appear in other?"
+ return self in FoldedCase(other)
+
+ # cache lower since it's likely to be called frequently.
+ @method_cache
+ def lower(self):
+ return super(FoldedCase, self).lower()
+
+ def index(self, sub):
+ return self.lower().index(sub.lower())
+
+ def split(self, splitter=' ', maxsplit=0):
+ pattern = re.compile(re.escape(splitter), re.I)
+ return pattern.split(self, maxsplit)
+
+
+def is_decodable(value):
+ r"""
+ Return True if the supplied value is decodable (using the default
+ encoding).
+
+ >>> is_decodable(b'\xff')
+ False
+ >>> is_decodable(b'\x32')
+ True
+ """
+ # TODO: This code could be expressed more consisely and directly
+ # with a jaraco.context.ExceptionTrap, but that adds an unfortunate
+ # long dependency tree, so for now, use boolean literals.
+ try:
+ value.decode()
+ except UnicodeDecodeError:
+ return False
+ return True
+
+
+def is_binary(value):
+ r"""
+ Return True if the value appears to be binary (that is, it's a byte
+ string and isn't decodable).
+
+ >>> is_binary(b'\xff')
+ True
+ >>> is_binary('\xff')
+ False
+ """
+ return isinstance(value, bytes) and not is_decodable(value)
+
+
+def trim(s):
+ r"""
+ Trim something like a docstring to remove the whitespace that
+ is common due to indentation and formatting.
+
+ >>> trim("\n\tfoo = bar\n\t\tbar = baz\n")
+ 'foo = bar\n\tbar = baz'
+ """
+ return textwrap.dedent(s).strip()
+
+
+def wrap(s):
+ """
+ Wrap lines of text, retaining existing newlines as
+ paragraph markers.
+
+ >>> print(wrap(lorem_ipsum))
+ Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do
+ eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad
+ minim veniam, quis nostrud exercitation ullamco laboris nisi ut
+ aliquip ex ea commodo consequat. Duis aute irure dolor in
+ reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla
+ pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
+ culpa qui officia deserunt mollit anim id est laborum.
+ <BLANKLINE>
+ Curabitur pretium tincidunt lacus. Nulla gravida orci a odio. Nullam
+ varius, turpis et commodo pharetra, est eros bibendum elit, nec luctus
+ magna felis sollicitudin mauris. Integer in mauris eu nibh euismod
+ gravida. Duis ac tellus et risus vulputate vehicula. Donec lobortis
+ risus a elit. Etiam tempor. Ut ullamcorper, ligula eu tempor congue,
+ eros est euismod turpis, id tincidunt sapien risus a quam. Maecenas
+ fermentum consequat mi. Donec fermentum. Pellentesque malesuada nulla
+ a mi. Duis sapien sem, aliquet nec, commodo eget, consequat quis,
+ neque. Aliquam faucibus, elit ut dictum aliquet, felis nisl adipiscing
+ sapien, sed malesuada diam lacus eget erat. Cras mollis scelerisque
+ nunc. Nullam arcu. Aliquam consequat. Curabitur augue lorem, dapibus
+ quis, laoreet et, pretium ac, nisi. Aenean magna nisl, mollis quis,
+ molestie eu, feugiat in, orci. In hac habitasse platea dictumst.
+ """
+ paragraphs = s.splitlines()
+ wrapped = ('\n'.join(textwrap.wrap(para)) for para in paragraphs)
+ return '\n\n'.join(wrapped)
+
+
+def unwrap(s):
+ r"""
+ Given a multi-line string, return an unwrapped version.
+
+ >>> wrapped = wrap(lorem_ipsum)
+ >>> wrapped.count('\n')
+ 20
+ >>> unwrapped = unwrap(wrapped)
+ >>> unwrapped.count('\n')
+ 1
+ >>> print(unwrapped)
+ Lorem ipsum dolor sit amet, consectetur adipiscing ...
+ Curabitur pretium tincidunt lacus. Nulla gravida orci ...
+
+ """
+ paragraphs = re.split(r'\n\n+', s)
+ cleaned = (para.replace('\n', ' ') for para in paragraphs)
+ return '\n'.join(cleaned)
+
+
+lorem_ipsum = resources.read_text(__name__, 'Lorem ipsum.txt') # type: ignore
+
+
+class Splitter(object):
+ """object that will split a string with the given arguments for each call
+
+ >>> s = Splitter(',')
+ >>> s('hello, world, this is your, master calling')
+ ['hello', ' world', ' this is your', ' master calling']
+ """
+
+ def __init__(self, *args):
+ self.args = args
+
+ def __call__(self, s):
+ return s.split(*self.args)
+
+
+def indent(string, prefix=' ' * 4):
+ """
+ >>> indent('foo')
+ ' foo'
+ """
+ return prefix + string
+
+
+class WordSet(tuple):
+ """
+ Given an identifier, return the words that identifier represents,
+ whether in camel case, underscore-separated, etc.
+
+ >>> WordSet.parse("camelCase")
+ ('camel', 'Case')
+
+ >>> WordSet.parse("under_sep")
+ ('under', 'sep')
+
+ Acronyms should be retained
+
+ >>> WordSet.parse("firstSNL")
+ ('first', 'SNL')
+
+ >>> WordSet.parse("you_and_I")
+ ('you', 'and', 'I')
+
+ >>> WordSet.parse("A simple test")
+ ('A', 'simple', 'test')
+
+ Multiple caps should not interfere with the first cap of another word.
+
+ >>> WordSet.parse("myABCClass")
+ ('my', 'ABC', 'Class')
+
+ The result is a WordSet, so you can get the form you need.
+
+ >>> WordSet.parse("myABCClass").underscore_separated()
+ 'my_ABC_Class'
+
+ >>> WordSet.parse('a-command').camel_case()
+ 'ACommand'
+
+ >>> WordSet.parse('someIdentifier').lowered().space_separated()
+ 'some identifier'
+
+ Slices of the result should return another WordSet.
+
+ >>> WordSet.parse('taken-out-of-context')[1:].underscore_separated()
+ 'out_of_context'
+
+ >>> WordSet.from_class_name(WordSet()).lowered().space_separated()
+ 'word set'
+
+ >>> example = WordSet.parse('figured it out')
+ >>> example.headless_camel_case()
+ 'figuredItOut'
+ >>> example.dash_separated()
+ 'figured-it-out'
+
+ """
+
+ _pattern = re.compile('([A-Z]?[a-z]+)|([A-Z]+(?![a-z]))')
+
+ def capitalized(self):
+ return WordSet(word.capitalize() for word in self)
+
+ def lowered(self):
+ return WordSet(word.lower() for word in self)
+
+ def camel_case(self):
+ return ''.join(self.capitalized())
+
+ def headless_camel_case(self):
+ words = iter(self)
+ first = next(words).lower()
+ new_words = itertools.chain((first,), WordSet(words).camel_case())
+ return ''.join(new_words)
+
+ def underscore_separated(self):
+ return '_'.join(self)
+
+ def dash_separated(self):
+ return '-'.join(self)
+
+ def space_separated(self):
+ return ' '.join(self)
+
+ def trim_right(self, item):
+ """
+ Remove the item from the end of the set.
+
+ >>> WordSet.parse('foo bar').trim_right('foo')
+ ('foo', 'bar')
+ >>> WordSet.parse('foo bar').trim_right('bar')
+ ('foo',)
+ >>> WordSet.parse('').trim_right('bar')
+ ()
+ """
+ return self[:-1] if self and self[-1] == item else self
+
+ def trim_left(self, item):
+ """
+ Remove the item from the beginning of the set.
+
+ >>> WordSet.parse('foo bar').trim_left('foo')
+ ('bar',)
+ >>> WordSet.parse('foo bar').trim_left('bar')
+ ('foo', 'bar')
+ >>> WordSet.parse('').trim_left('bar')
+ ()
+ """
+ return self[1:] if self and self[0] == item else self
+
+ def trim(self, item):
+ """
+ >>> WordSet.parse('foo bar').trim('foo')
+ ('bar',)
+ """
+ return self.trim_left(item).trim_right(item)
+
+ def __getitem__(self, item):
+ result = super(WordSet, self).__getitem__(item)
+ if isinstance(item, slice):
+ result = WordSet(result)
+ return result
+
+ # for compatibility with Python 2
+ def __getslice__(self, i, j): # pragma: nocover
+ return self.__getitem__(slice(i, j))
+
+ @classmethod
+ def parse(cls, identifier):
+ matches = cls._pattern.finditer(identifier)
+ return WordSet(match.group(0) for match in matches)
+
+ @classmethod
+ def from_class_name(cls, subject):
+ return cls.parse(subject.__class__.__name__)
+
+
+# for backward compatibility
+words = WordSet.parse
+
+
+def simple_html_strip(s):
+ r"""
+ Remove HTML from the string `s`.
+
+ >>> str(simple_html_strip(''))
+ ''
+
+ >>> print(simple_html_strip('A <bold>stormy</bold> day in paradise'))
+ A stormy day in paradise
+
+ >>> print(simple_html_strip('Somebody <!-- do not --> tell the truth.'))
+ Somebody tell the truth.
+
+ >>> print(simple_html_strip('What about<br/>\nmultiple lines?'))
+ What about
+ multiple lines?
+ """
+ html_stripper = re.compile('(<!--.*?-->)|(<[^>]*>)|([^<]+)', re.DOTALL)
+ texts = (match.group(3) or '' for match in html_stripper.finditer(s))
+ return ''.join(texts)
+
+
+class SeparatedValues(str):
+ """
+ A string separated by a separator. Overrides __iter__ for getting
+ the values.
+
+ >>> list(SeparatedValues('a,b,c'))
+ ['a', 'b', 'c']
+
+ Whitespace is stripped and empty values are discarded.
+
+ >>> list(SeparatedValues(' a, b , c, '))
+ ['a', 'b', 'c']
+ """
+
+ separator = ','
+
+ def __iter__(self):
+ parts = self.split(self.separator)
+ return filter(None, (part.strip() for part in parts))
+
+
+class Stripper:
+ r"""
+ Given a series of lines, find the common prefix and strip it from them.
+
+ >>> lines = [
+ ... 'abcdefg\n',
+ ... 'abc\n',
+ ... 'abcde\n',
+ ... ]
+ >>> res = Stripper.strip_prefix(lines)
+ >>> res.prefix
+ 'abc'
+ >>> list(res.lines)
+ ['defg\n', '\n', 'de\n']
+
+ If no prefix is common, nothing should be stripped.
+
+ >>> lines = [
+ ... 'abcd\n',
+ ... '1234\n',
+ ... ]
+ >>> res = Stripper.strip_prefix(lines)
+ >>> res.prefix = ''
+ >>> list(res.lines)
+ ['abcd\n', '1234\n']
+ """
+
+ def __init__(self, prefix, lines):
+ self.prefix = prefix
+ self.lines = map(self, lines)
+
+ @classmethod
+ def strip_prefix(cls, lines):
+ prefix_lines, lines = itertools.tee(lines)
+ prefix = functools.reduce(cls.common_prefix, prefix_lines)
+ return cls(prefix, lines)
+
+ def __call__(self, line):
+ if not self.prefix:
+ return line
+ null, prefix, rest = line.partition(self.prefix)
+ return rest
+
+ @staticmethod
+ def common_prefix(s1, s2):
+ """
+ Return the common prefix of two lines.
+ """
+ index = min(len(s1), len(s2))
+ while s1[:index] != s2[:index]:
+ index -= 1
+ return s1[:index]
+
+
+def remove_prefix(text, prefix):
+ """
+ Remove the prefix from the text if it exists.
+
+ >>> remove_prefix('underwhelming performance', 'underwhelming ')
+ 'performance'
+
+ >>> remove_prefix('something special', 'sample')
+ 'something special'
+ """
+ null, prefix, rest = text.rpartition(prefix)
+ return rest
+
+
+def remove_suffix(text, suffix):
+ """
+ Remove the suffix from the text if it exists.
+
+ >>> remove_suffix('name.git', '.git')
+ 'name'
+
+ >>> remove_suffix('something special', 'sample')
+ 'something special'
+ """
+ rest, suffix, null = text.partition(suffix)
+ return rest
+
+
+def normalize_newlines(text):
+ r"""
+ Replace alternate newlines with the canonical newline.
+
+ >>> normalize_newlines('Lorem Ipsum\u2029')
+ 'Lorem Ipsum\n'
+ >>> normalize_newlines('Lorem Ipsum\r\n')
+ 'Lorem Ipsum\n'
+ >>> normalize_newlines('Lorem Ipsum\x85')
+ 'Lorem Ipsum\n'
+ """
+ newlines = ['\r\n', '\r', '\n', '\u0085', '\u2028', '\u2029']
+ pattern = '|'.join(newlines)
+ return re.sub(pattern, '\n', text)
diff --git a/mypy.ini b/mypy.ini
new file mode 100644
index 0000000..976ba02
--- /dev/null
+++ b/mypy.ini
@@ -0,0 +1,2 @@
+[mypy]
+ignore_missing_imports = True
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..b6ebc0b
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,20 @@
+[build-system]
+requires = ["setuptools>=42", "wheel", "setuptools_scm[toml]>=3.4.1"]
+build-backend = "setuptools.build_meta"
+
+[tool.black]
+skip-string-normalization = true
+
+[tool.setuptools_scm]
+
+[pytest.enabler.black]
+addopts = "--black"
+
+[pytest.enabler.mypy]
+addopts = "--mypy"
+
+[pytest.enabler.flake8]
+addopts = "--flake8"
+
+[pytest.enabler.cov]
+addopts = "--cov"
diff --git a/pytest.ini b/pytest.ini
new file mode 100644
index 0000000..d7f0b11
--- /dev/null
+++ b/pytest.ini
@@ -0,0 +1,9 @@
+[pytest]
+norecursedirs=dist build .tox .eggs
+addopts=--doctest-modules
+doctest_optionflags=ALLOW_UNICODE ELLIPSIS
+# workaround for warning pytest-dev/pytest#6178
+junit_family=xunit2
+filterwarnings=
+ # https://github.com/pytest-dev/pytest/issues/6928
+ ignore:direct construction of .*Item has been deprecated:DeprecationWarning
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000..355028b
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,53 @@
+[metadata]
+license_files =
+ LICENSE
+name = jaraco.text
+author = Jason R. Coombs
+author_email = jaraco@jaraco.com
+description = Module for text manipulation
+long_description = file:README.rst
+url = https://github.com/jaraco/jaraco.text
+classifiers =
+ Development Status :: 5 - Production/Stable
+ Intended Audience :: Developers
+ License :: OSI Approved :: MIT License
+ Programming Language :: Python :: 3
+ Programming Language :: Python :: 3 :: Only
+
+[options]
+packages = find_namespace:
+include_package_data = true
+python_requires = >=3.6
+install_requires =
+ jaraco.functools
+ importlib_resources; python_version < "3.7"
+setup_requires = setuptools_scm[toml] >= 3.4.1
+
+[options.packages.find]
+exclude =
+ build*
+ docs*
+ tests*
+
+[options.extras_require]
+testing =
+ # upstream
+ pytest >= 3.5, !=3.7.3
+ pytest-checkdocs >= 1.2.3
+ pytest-flake8
+ pytest-black >= 0.3.7; python_implementation != "PyPy"
+ pytest-cov
+ pytest-mypy; python_implementation != "PyPy"
+ pytest-enabler
+
+ # local
+
+docs =
+ # upstream
+ sphinx
+ jaraco.packaging >= 8.2
+ rst.linker >= 1.9
+
+ # local
+
+[options.entry_points]
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..bac24a4
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,6 @@
+#!/usr/bin/env python
+
+import setuptools
+
+if __name__ == "__main__":
+ setuptools.setup()
diff --git a/skeleton.md b/skeleton.md
new file mode 100644
index 0000000..0938f89
--- /dev/null
+++ b/skeleton.md
@@ -0,0 +1,166 @@
+# Overview
+
+This project is merged with [skeleton](https://github.com/jaraco/skeleton). What is skeleton? It's the scaffolding of a Python project jaraco [introduced in his blog](https://blog.jaraco.com/a-project-skeleton-for-python-projects/). It seeks to provide a means to re-use techniques and inherit advances when managing projects for distribution.
+
+## An SCM-Managed Approach
+
+While maintaining dozens of projects in PyPI, jaraco derives best practices for project distribution and publishes them in the [skeleton repo](https://github.com/jaraco/skeleton), a Git repo capturing the evolution and culmination of these best practices.
+
+It's intended to be used by a new or existing project to adopt these practices and honed and proven techniques. Adopters are encouraged to use the project directly and maintain a small deviation from the technique, make their own fork for more substantial changes unique to their environment or preferences, or simply adopt the skeleton once and abandon it thereafter.
+
+The primary advantage to using an SCM for maintaining these techniques is that those tools help facilitate the merge between the template and its adopting projects.
+
+Another advantage to using an SCM-managed approach is that tools like GitHub recognize that a change in the skeleton is the _same change_ across all projects that merge with that skeleton. Without the ancestry, with a traditional copy/paste approach, a [commit like this](https://github.com/jaraco/skeleton/commit/12eed1326e1bc26ce256e7b3f8cd8d3a5beab2d5) would produce notifications in the upstream project issue for each and every application, but because it's centralized, GitHub provides just the one notification when the change is added to the skeleton.
+
+# Usage
+
+## new projects
+
+To use skeleton for a new project, simply pull the skeleton into a new project:
+
+```
+$ git init my-new-project
+$ cd my-new-project
+$ git pull gh://jaraco/skeleton
+```
+
+Now customize the project to suit your individual project needs.
+
+## existing projects
+
+If you have an existing project, you can still incorporate the skeleton by merging it into the codebase.
+
+```
+$ git merge skeleton --allow-unrelated-histories
+```
+
+The `--allow-unrelated-histories` is necessary because the history from the skeleton was previously unrelated to the existing codebase. Resolve any merge conflicts and commit to the master, and now the project is based on the shared skeleton.
+
+## Updating
+
+Whenever a change is needed or desired for the general technique for packaging, it can be made in the skeleton project and then merged into each of the derived projects as needed, recommended before each release. As a result, features and best practices for packaging are centrally maintained and readily trickle into a whole suite of packages. This technique lowers the amount of tedious work necessary to create or maintain a project, and coupled with other techniques like continuous integration and deployment, lowers the cost of creating and maintaining refined Python projects to just a few, familiar Git operations.
+
+For example, here's a session of the [path project](https://pypi.org/project/path) pulling non-conflicting changes from the skeleton:
+
+<img src="https://raw.githubusercontent.com/jaraco/skeleton/gh-pages/docs/refresh.svg">
+
+Thereafter, the target project can make whatever customizations it deems relevant to the scaffolding. The project may even at some point decide that the divergence is too great to merit renewed merging with the original skeleton. This approach applies maximal guidance while creating minimal constraints.
+
+## Periodic Collapse
+
+In late 2020, this project [introduced](https://github.com/jaraco/skeleton/issues/27) the idea of a periodic but infrequent (O(years)) collapse of commits to limit the number of commits a new consumer will need to accept to adopt the skeleton.
+
+The full history of commits is collapsed into a single commit and that commit becomes the new mainline head.
+
+When one of these collapse operations happens, any project that previously pulled from the skeleton will no longer have a related history with that new main branch. For those projects, the skeleton provides a "handoff" branch that reconciles the two branches. Any project that has previously merged with the skeleton but now gets an error "fatal: refusing to merge unrelated histories" should instead use the handoff branch once to incorporate the new main branch.
+
+```
+$ git pull https://github.com/jaraco/skeleton 2020-handoff
+```
+
+This handoff needs to be pulled just once and thereafter the project can pull from the main head.
+
+The archive and handoff branches from prior collapses are indicate here:
+
+| refresh | archive | handoff |
+|---------|-----------------|--------------|
+| 2020-12 | archive/2020-12 | 2020-handoff |
+
+# Features
+
+The features/techniques employed by the skeleton include:
+
+- PEP 517/518-based build relying on Setuptools as the build tool
+- Setuptools declarative configuration using setup.cfg
+- tox for running tests
+- A README.rst as reStructuredText with some popular badges, but with Read the Docs and AppVeyor badges commented out
+- A CHANGES.rst file intended for publishing release notes about the project
+- Use of [Black](https://black.readthedocs.io/en/stable/) for code formatting (disabled on unsupported Python 3.5 and earlier)
+- Integrated type checking through [mypy](https://github.com/python/mypy/).
+
+## Packaging Conventions
+
+A pyproject.toml is included to enable PEP 517 and PEP 518 compatibility and declares the requirements necessary to build the project on Setuptools (a minimum version compatible with setup.cfg declarative config).
+
+The setup.cfg file implements the following features:
+
+- Assumes universal wheel for release
+- Advertises the project's LICENSE file (MIT by default)
+- Reads the README.rst file into the long description
+- Some common Trove classifiers
+- Includes all packages discovered in the repo
+- Data files in the package are also included (not just Python files)
+- Declares the required Python versions
+- Declares install requirements (empty by default)
+- Declares setup requirements for legacy environments
+- Supplies two 'extras':
+ - testing: requirements for running tests
+ - docs: requirements for building docs
+ - these extras split the declaration into "upstream" (requirements as declared by the skeleton) and "local" (those specific to the local project); these markers help avoid merge conflicts
+- Placeholder for defining entry points
+
+Additionally, the setup.py file declares `use_scm_version` which relies on [setuptools_scm](https://pypi.org/project/setuptools_scm) to do two things:
+
+- derive the project version from SCM tags
+- ensure that all files committed to the repo are automatically included in releases
+
+## Running Tests
+
+The skeleton assumes the developer has [tox](https://pypi.org/project/tox) installed. The developer is expected to run `tox` to run tests on the current Python version using [pytest](https://pypi.org/project/pytest).
+
+Other environments (invoked with `tox -e {name}`) supplied include:
+
+ - a `docs` environment to build the documentation
+ - a `release` environment to publish the package to PyPI
+
+A pytest.ini is included to define common options around running tests. In particular:
+
+- rely on default test discovery in the current directory
+- avoid recursing into common directories not containing tests
+- run doctests on modules and invoke Flake8 tests
+- in doctests, allow Unicode literals and regular literals to match, allowing for doctests to run on Python 2 and 3. Also enable ELLIPSES, a default that would be undone by supplying the prior option.
+- filters out known warnings caused by libraries/functionality included by the skeleton
+
+Relies on a .flake8 file to correct some default behaviors:
+
+- disable mutually incompatible rules W503 and W504
+- support for Black format
+
+## Continuous Integration
+
+The project is pre-configured to run Continuous Integration tests.
+
+### Github Actions
+
+[Github Actions](https://docs.github.com/en/free-pro-team@latest/actions) are the preferred provider as they provide free, fast, multi-platform services with straightforward configuration. Configured in `.github/workflows`.
+
+Features include:
+- test against multiple Python versions
+- run on late (and updated) platform versions
+- automated releases of tagged commits
+- [automatic merging of PRs](https://github.com/marketplace/actions/merge-pull-requests) (requires [protecting branches with required status checks](https://docs.github.com/en/free-pro-team@latest/github/administering-a-repository/enabling-required-status-checks), [not possible through API](https://github.community/t/set-all-status-checks-to-be-required-as-branch-protection-using-the-github-api/119493))
+
+
+### Continuous Deployments
+
+In addition to running tests, an additional publish stage is configured to automatically release tagged commits to PyPI using [API tokens](https://pypi.org/help/#apitoken). The release process expects an authorized token to be configured with each Github project (or org) `PYPI_TOKEN` [secret](https://docs.github.com/en/free-pro-team@latest/actions/reference/encrypted-secrets). Example:
+
+```
+pip-run -q jaraco.develop -- -m jaraco.develop.add-github-secrets
+```
+
+## Building Documentation
+
+Documentation is automatically built by [Read the Docs](https://readthedocs.org) when the project is registered with it, by way of the .readthedocs.yml file. To test the docs build manually, a tox env may be invoked as `tox -e docs`. Both techniques rely on the dependencies declared in `setup.cfg/options.extras_require.docs`.
+
+In addition to building the Sphinx docs scaffolded in `docs/`, the docs build a `history.html` file that first injects release dates and hyperlinks into the CHANGES.rst before incorporating it as history in the docs.
+
+## Cutting releases
+
+By default, tagged commits are released through the continuous integration deploy stage.
+
+Releases may also be cut manually by invoking the tox environment `release` with the PyPI token set as the TWINE_PASSWORD:
+
+```
+TWINE_PASSWORD={token} tox -e release
+```
diff --git a/tox.ini b/tox.ini
new file mode 100644
index 0000000..249f97c
--- /dev/null
+++ b/tox.ini
@@ -0,0 +1,40 @@
+[tox]
+envlist = python
+minversion = 3.2
+# https://github.com/jaraco/skeleton/issues/6
+tox_pip_extensions_ext_venv_update = true
+toxworkdir={env:TOX_WORK_DIR:.tox}
+
+
+[testenv]
+deps =
+commands =
+ pytest {posargs}
+usedevelop = True
+extras = testing
+
+[testenv:docs]
+extras =
+ docs
+ testing
+changedir = docs
+commands =
+ python -m sphinx . {toxinidir}/build/html
+
+[testenv:release]
+skip_install = True
+deps =
+ build
+ twine[keyring]>=1.13
+ path
+ jaraco.develop>=7.1
+passenv =
+ TWINE_PASSWORD
+ GITHUB_TOKEN
+setenv =
+ TWINE_USERNAME = {env:TWINE_USERNAME:__token__}
+commands =
+ python -c "import path; path.Path('dist').rmtree_p()"
+ python -m build
+ python -m twine upload dist/*
+ python -m jaraco.develop.create-github-release