1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
|
"""Inventory utility functions for Sphinx."""
from __future__ import annotations
import os
import re
import zlib
from typing import IO, TYPE_CHECKING, Callable
from sphinx.util import logging
BUFSIZE = 16 * 1024
logger = logging.getLogger(__name__)
if TYPE_CHECKING:
from collections.abc import Iterator
from sphinx.builders import Builder
from sphinx.environment import BuildEnvironment
from sphinx.util.typing import Inventory, InventoryItem
class InventoryFileReader:
"""A file reader for an inventory file.
This reader supports mixture of texts and compressed texts.
"""
def __init__(self, stream: IO[bytes]) -> None:
self.stream = stream
self.buffer = b''
self.eof = False
def read_buffer(self) -> None:
chunk = self.stream.read(BUFSIZE)
if chunk == b'':
self.eof = True
self.buffer += chunk
def readline(self) -> str:
pos = self.buffer.find(b'\n')
if pos != -1:
line = self.buffer[:pos].decode()
self.buffer = self.buffer[pos + 1:]
elif self.eof:
line = self.buffer.decode()
self.buffer = b''
else:
self.read_buffer()
line = self.readline()
return line
def readlines(self) -> Iterator[str]:
while not self.eof:
line = self.readline()
if line:
yield line
def read_compressed_chunks(self) -> Iterator[bytes]:
decompressor = zlib.decompressobj()
while not self.eof:
self.read_buffer()
yield decompressor.decompress(self.buffer)
self.buffer = b''
yield decompressor.flush()
def read_compressed_lines(self) -> Iterator[str]:
buf = b''
for chunk in self.read_compressed_chunks():
buf += chunk
pos = buf.find(b'\n')
while pos != -1:
yield buf[:pos].decode()
buf = buf[pos + 1:]
pos = buf.find(b'\n')
class InventoryFile:
@classmethod
def load(
cls: type[InventoryFile],
stream: IO[bytes],
uri: str,
joinfunc: Callable[[str, str], str],
) -> Inventory:
reader = InventoryFileReader(stream)
line = reader.readline().rstrip()
if line == '# Sphinx inventory version 1':
return cls.load_v1(reader, uri, joinfunc)
elif line == '# Sphinx inventory version 2':
return cls.load_v2(reader, uri, joinfunc)
else:
raise ValueError('invalid inventory header: %s' % line)
@classmethod
def load_v1(
cls: type[InventoryFile],
stream: InventoryFileReader,
uri: str,
join: Callable[[str, str], str],
) -> Inventory:
invdata: Inventory = {}
projname = stream.readline().rstrip()[11:]
version = stream.readline().rstrip()[11:]
for line in stream.readlines():
name, type, location = line.rstrip().split(None, 2)
location = join(uri, location)
# version 1 did not add anchors to the location
if type == 'mod':
type = 'py:module'
location += '#module-' + name
else:
type = 'py:' + type
location += '#' + name
invdata.setdefault(type, {})[name] = (projname, version, location, '-')
return invdata
@classmethod
def load_v2(
cls: type[InventoryFile],
stream: InventoryFileReader,
uri: str,
join: Callable[[str, str], str],
) -> Inventory:
invdata: Inventory = {}
projname = stream.readline().rstrip()[11:]
version = stream.readline().rstrip()[11:]
line = stream.readline()
if 'zlib' not in line:
raise ValueError('invalid inventory header (not compressed): %s' % line)
for line in stream.read_compressed_lines():
# be careful to handle names with embedded spaces correctly
m = re.match(r'(.+?)\s+(\S+)\s+(-?\d+)\s+?(\S*)\s+(.*)',
line.rstrip(), flags=re.VERBOSE)
if not m:
continue
name, type, prio, location, dispname = m.groups()
if ':' not in type:
# wrong type value. type should be in the form of "{domain}:{objtype}"
#
# Note: To avoid the regex DoS, this is implemented in python (refs: #8175)
continue
if type == 'py:module' and type in invdata and name in invdata[type]:
# due to a bug in 1.1 and below,
# two inventory entries are created
# for Python modules, and the first
# one is correct
continue
if location.endswith('$'):
location = location[:-1] + name
location = join(uri, location)
inv_item: InventoryItem = projname, version, location, dispname
invdata.setdefault(type, {})[name] = inv_item
return invdata
@classmethod
def dump(
cls: type[InventoryFile], filename: str, env: BuildEnvironment, builder: Builder,
) -> None:
def escape(string: str) -> str:
return re.sub("\\s+", " ", string)
with open(os.path.join(filename), 'wb') as f:
# header
f.write(('# Sphinx inventory version 2\n'
'# Project: %s\n'
'# Version: %s\n'
'# The remainder of this file is compressed using zlib.\n' %
(escape(env.config.project),
escape(env.config.version))).encode())
# body
compressor = zlib.compressobj(9)
for domainname, domain in sorted(env.domains.items()):
for name, dispname, typ, docname, anchor, prio in \
sorted(domain.get_objects()):
if anchor.endswith(name):
# this can shorten the inventory by as much as 25%
anchor = anchor[:-len(name)] + '$'
uri = builder.get_target_uri(docname)
if anchor:
uri += '#' + anchor
if dispname == name:
dispname = '-'
entry = ('%s %s:%s %s %s %s\n' %
(name, domainname, typ, prio, uri, dispname))
f.write(compressor.compress(entry.encode()))
f.write(compressor.flush())
|