summaryrefslogtreecommitdiffstats
path: root/src/aristaproto/casing.py
blob: f7d0832b87e9f55ac38d65d822ba8c6cbbc60682 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import keyword
import re


# Word delimiters and symbols that will not be preserved when re-casing.
# language=PythonRegExp
SYMBOLS = "[^a-zA-Z0-9]*"

# Optionally capitalized word.
# language=PythonRegExp
WORD = "[A-Z]*[a-z]*[0-9]*"

# Uppercase word, not followed by lowercase letters.
# language=PythonRegExp
WORD_UPPER = "[A-Z]+(?![a-z])[0-9]*"


def safe_snake_case(value: str) -> str:
    """Snake case a value taking into account Python keywords."""
    value = snake_case(value)
    value = sanitize_name(value)
    return value


def snake_case(value: str, strict: bool = True) -> str:
    """
    Join words with an underscore into lowercase and remove symbols.

    Parameters
    -----------
    value: :class:`str`
        The value to convert.
    strict: :class:`bool`
        Whether or not to force single underscores.

    Returns
    --------
    :class:`str`
        The value in snake_case.
    """

    def substitute_word(symbols: str, word: str, is_start: bool) -> str:
        if not word:
            return ""
        if strict:
            delimiter_count = 0 if is_start else 1  # Single underscore if strict.
        elif is_start:
            delimiter_count = len(symbols)
        elif word.isupper() or word.islower():
            delimiter_count = max(
                1, len(symbols)
            )  # Preserve all delimiters if not strict.
        else:
            delimiter_count = len(symbols) + 1  # Extra underscore for leading capital.

        return ("_" * delimiter_count) + word.lower()

    snake = re.sub(
        f"(^)?({SYMBOLS})({WORD_UPPER}|{WORD})",
        lambda groups: substitute_word(groups[2], groups[3], groups[1] is not None),
        value,
    )
    return snake


def pascal_case(value: str, strict: bool = True) -> str:
    """
    Capitalize each word and remove symbols.

    Parameters
    -----------
    value: :class:`str`
        The value to convert.
    strict: :class:`bool`
        Whether or not to output only alphanumeric characters.

    Returns
    --------
    :class:`str`
        The value in PascalCase.
    """

    def substitute_word(symbols, word):
        if strict:
            return word.capitalize()  # Remove all delimiters

        if word.islower():
            delimiter_length = len(symbols[:-1])  # Lose one delimiter
        else:
            delimiter_length = len(symbols)  # Preserve all delimiters

        return ("_" * delimiter_length) + word.capitalize()

    return re.sub(
        f"({SYMBOLS})({WORD_UPPER}|{WORD})",
        lambda groups: substitute_word(groups[1], groups[2]),
        value,
    )


def camel_case(value: str, strict: bool = True) -> str:
    """
    Capitalize all words except first and remove symbols.

    Parameters
    -----------
    value: :class:`str`
        The value to convert.
    strict: :class:`bool`
        Whether or not to output only alphanumeric characters.

    Returns
    --------
    :class:`str`
        The value in camelCase.
    """
    return lowercase_first(pascal_case(value, strict=strict))


def lowercase_first(value: str) -> str:
    """
    Lower cases the first character of the value.

    Parameters
    ----------
    value: :class:`str`
        The value to lower case.

    Returns
    -------
    :class:`str`
        The lower cased string.
    """
    return value[0:1].lower() + value[1:]


def sanitize_name(value: str) -> str:
    # https://www.python.org/dev/peps/pep-0008/#descriptive-naming-styles
    if keyword.iskeyword(value):
        return f"{value}_"
    if not value.isidentifier():
        return f"_{value}"
    return value