summaryrefslogtreecommitdiffstats
path: root/sphinx/versioning.py
blob: 0da3b1e280504c4e830c926d1b1ca8b3028e1e4a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
"""Implements the low-level algorithms Sphinx uses for versioning doctrees."""
from __future__ import annotations

import pickle
from itertools import product, zip_longest
from operator import itemgetter
from os import path
from typing import TYPE_CHECKING, Any
from uuid import uuid4

from sphinx.transforms import SphinxTransform

if TYPE_CHECKING:
    from collections.abc import Iterator

    from docutils.nodes import Node

    from sphinx.application import Sphinx

try:
    import Levenshtein
    IS_SPEEDUP = True
except ImportError:
    IS_SPEEDUP = False

# anything below that ratio is considered equal/changed
VERSIONING_RATIO = 65


def add_uids(doctree: Node, condition: Any) -> Iterator[Node]:
    """Add a unique id to every node in the `doctree` which matches the
    condition and yield the nodes.

    :param doctree:
        A :class:`docutils.nodes.document` instance.

    :param condition:
        A callable which returns either ``True`` or ``False`` for a given node.
    """
    for node in doctree.findall(condition):
        node.uid = uuid4().hex
        yield node


def merge_doctrees(old: Node, new: Node, condition: Any) -> Iterator[Node]:
    """Merge the `old` doctree with the `new` one while looking at nodes
    matching the `condition`.

    Each node which replaces another one or has been added to the `new` doctree
    will be yielded.

    :param condition:
        A callable which returns either ``True`` or ``False`` for a given node.
    """
    old_iter = old.findall(condition)
    new_iter = new.findall(condition)
    old_nodes = []
    new_nodes = []
    ratios = {}
    seen = set()
    # compare the nodes each doctree in order
    for old_node, new_node in zip_longest(old_iter, new_iter):
        if old_node is None:
            new_nodes.append(new_node)
            continue
        if not getattr(old_node, 'uid', None):
            # maybe config.gettext_uuid has been changed.
            old_node.uid = uuid4().hex
        if new_node is None:
            old_nodes.append(old_node)
            continue
        ratio = get_ratio(old_node.rawsource, new_node.rawsource)
        if ratio == 0:
            new_node.uid = old_node.uid
            seen.add(new_node)
        else:
            ratios[old_node, new_node] = ratio
            old_nodes.append(old_node)
            new_nodes.append(new_node)
    # calculate the ratios for each unequal pair of nodes, should we stumble
    # on a pair which is equal we set the uid and add it to the seen ones
    for old_node, new_node in product(old_nodes, new_nodes):
        if new_node in seen or (old_node, new_node) in ratios:
            continue
        ratio = get_ratio(old_node.rawsource, new_node.rawsource)
        if ratio == 0:
            new_node.uid = old_node.uid
            seen.add(new_node)
        else:
            ratios[old_node, new_node] = ratio
    # choose the old node with the best ratio for each new node and set the uid
    # as long as the ratio is under a certain value, in which case we consider
    # them not changed but different
    ratios = sorted(ratios.items(), key=itemgetter(1))  # type: ignore[assignment]
    for (old_node, new_node), ratio in ratios:
        if new_node in seen:
            continue
        else:
            seen.add(new_node)
        if ratio < VERSIONING_RATIO:
            new_node.uid = old_node.uid
        else:
            new_node.uid = uuid4().hex
            yield new_node
    # create new uuids for any new node we left out earlier, this happens
    # if one or more nodes are simply added.
    for new_node in set(new_nodes) - seen:
        new_node.uid = uuid4().hex
        yield new_node


def get_ratio(old: str, new: str) -> float:
    """Return a "similarity ratio" (in percent) representing the similarity
    between the two strings where 0 is equal and anything above less than equal.
    """
    if not all([old, new]):
        return VERSIONING_RATIO

    if IS_SPEEDUP:
        return Levenshtein.distance(old, new) / (len(old) / 100.0)
    else:
        return levenshtein_distance(old, new) / (len(old) / 100.0)


def levenshtein_distance(a: str, b: str) -> int:
    """Return the Levenshtein edit distance between two strings *a* and *b*."""
    if a == b:
        return 0
    if len(a) < len(b):
        a, b = b, a
    if not a:
        return len(b)
    previous_row = list(range(len(b) + 1))
    for i, column1 in enumerate(a):
        current_row = [i + 1]
        for j, column2 in enumerate(b):
            insertions = previous_row[j + 1] + 1
            deletions = current_row[j] + 1
            substitutions = previous_row[j] + (column1 != column2)
            current_row.append(min(insertions, deletions, substitutions))
        previous_row = current_row
    return previous_row[-1]


class UIDTransform(SphinxTransform):
    """Add UIDs to doctree for versioning."""
    default_priority = 880

    def apply(self, **kwargs: Any) -> None:
        env = self.env
        old_doctree = None
        if not env.versioning_condition:
            return

        if env.versioning_compare:
            # get old doctree
            try:
                filename = path.join(env.doctreedir, env.docname + '.doctree')
                with open(filename, 'rb') as f:
                    old_doctree = pickle.load(f)
            except OSError:
                pass

        # add uids for versioning
        if not env.versioning_compare or old_doctree is None:
            list(add_uids(self.document, env.versioning_condition))
        else:
            list(merge_doctrees(old_doctree, self.document, env.versioning_condition))


def setup(app: Sphinx) -> dict[str, Any]:
    app.add_transform(UIDTransform)

    return {
        'version': 'builtin',
        'parallel_read_safe': True,
        'parallel_write_safe': True,
    }