diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-06-05 16:20:58 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-06-05 16:20:58 +0000 |
commit | ffcb4b87846b4e4a2d9eee8df4b7ec40365878b8 (patch) | |
tree | 3c64877dd20ad1141111c77b3463e95686002b39 /sphinx/search | |
parent | Adding debian version 7.2.6-8. (diff) | |
download | sphinx-ffcb4b87846b4e4a2d9eee8df4b7ec40365878b8.tar.xz sphinx-ffcb4b87846b4e4a2d9eee8df4b7ec40365878b8.zip |
Merging upstream version 7.3.7.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sphinx/search')
-rw-r--r-- | sphinx/search/__init__.py | 35 | ||||
-rw-r--r-- | sphinx/search/da.py | 4 | ||||
-rw-r--r-- | sphinx/search/de.py | 4 | ||||
-rw-r--r-- | sphinx/search/en.py | 2 | ||||
-rw-r--r-- | sphinx/search/es.py | 4 | ||||
-rw-r--r-- | sphinx/search/fi.py | 4 | ||||
-rw-r--r-- | sphinx/search/fr.py | 4 | ||||
-rw-r--r-- | sphinx/search/hu.py | 4 | ||||
-rw-r--r-- | sphinx/search/it.py | 4 | ||||
-rw-r--r-- | sphinx/search/ja.py | 35 | ||||
-rw-r--r-- | sphinx/search/nl.py | 4 | ||||
-rw-r--r-- | sphinx/search/no.py | 4 | ||||
-rw-r--r-- | sphinx/search/pt.py | 4 | ||||
-rw-r--r-- | sphinx/search/ro.py | 2 | ||||
-rw-r--r-- | sphinx/search/ru.py | 4 | ||||
-rw-r--r-- | sphinx/search/sv.py | 4 | ||||
-rw-r--r-- | sphinx/search/tr.py | 2 | ||||
-rw-r--r-- | sphinx/search/zh.py | 5 |
18 files changed, 59 insertions, 70 deletions
diff --git a/sphinx/search/__init__.py b/sphinx/search/__init__.py index 21758d3..2638f92 100644 --- a/sphinx/search/__init__.py +++ b/sphinx/search/__init__.py @@ -70,11 +70,11 @@ var Stemmer = function() { _word_re = re.compile(r'\w+') - def __init__(self, options: dict) -> None: + def __init__(self, options: dict[str, str]) -> None: self.options = options self.init(options) - def init(self, options: dict) -> None: + def init(self, options: dict[str, str]) -> None: """ Initialize the class with the options the user has given. """ @@ -121,7 +121,7 @@ def parse_stop_word(source: str) -> set[str]: """ Parse snowball style word list like this: - * http://snowball.tartarus.org/algorithms/finnish/stop.txt + * https://snowball.tartarus.org/algorithms/finnish/stop.txt """ result: set[str] = set() for line in source.splitlines(): @@ -162,7 +162,7 @@ class _JavaScriptIndex: SUFFIX = ')' def dumps(self, data: Any) -> str: - return self.PREFIX + json.dumps(data) + self.SUFFIX + return self.PREFIX + json.dumps(data, sort_keys=True) + self.SUFFIX def loads(self, s: str) -> Any: data = s[len(self.PREFIX):-len(self.SUFFIX)] @@ -171,10 +171,10 @@ class _JavaScriptIndex: raise ValueError('invalid data') return json.loads(data) - def dump(self, data: Any, f: IO) -> None: + def dump(self, data: Any, f: IO[str]) -> None: f.write(self.dumps(data)) - def load(self, f: IO) -> Any: + def load(self, f: IO[str]) -> Any: return self.loads(f.read()) @@ -182,7 +182,7 @@ js_index = _JavaScriptIndex() def _is_meta_keywords( - node: nodes.meta, # type: ignore[name-defined] + node: nodes.meta, lang: str | None, ) -> bool: if node.get('name') == 'keywords': @@ -234,7 +234,7 @@ class WordCollector(nodes.NodeVisitor): ids = node.parent['ids'] self.found_titles.append((title, ids[0] if ids else None)) self.found_title_words.extend(self.lang.split(title)) - elif isinstance(node, Element) and _is_meta_keywords(node, self.lang.lang): + elif isinstance(node, Element) and _is_meta_keywords(node, self.lang.lang): # type: ignore[arg-type] keywords = node['content'] keywords = [keyword.strip() for keyword in keywords.split(',')] self.found_words.extend(keywords) @@ -250,7 +250,7 @@ class IndexBuilder: 'pickle': pickle } - def __init__(self, env: BuildEnvironment, lang: str, options: dict, scoring: str) -> None: + def __init__(self, env: BuildEnvironment, lang: str, options: dict[str, str], scoring: str) -> None: self.env = env # docname -> title self._titles: dict[str, str] = env._search_index_titles @@ -368,8 +368,8 @@ class IndexBuilder: plist.append((fn2index[docname], typeindex, prio, shortanchor, name)) return rv - def get_terms(self, fn2index: dict) -> tuple[dict[str, list[str]], dict[str, list[str]]]: - rvs: tuple[dict[str, list[str]], dict[str, list[str]]] = ({}, {}) + def get_terms(self, fn2index: dict[str, int]) -> tuple[dict[str, list[int] | int], dict[str, list[int] | int]]: + rvs: tuple[dict[str, list[int] | int], dict[str, list[int] | int]] = ({}, {}) for rv, mapping in zip(rvs, (self._mapping, self._title_mapping)): for k, v in mapping.items(): if len(v) == 1: @@ -377,7 +377,7 @@ class IndexBuilder: if fn in fn2index: rv[k] = fn2index[fn] else: - rv[k] = sorted([fn2index[fn] for fn in v if fn in fn2index]) + rv[k] = sorted(fn2index[fn] for fn in v if fn in fn2index) return rvs def freeze(self) -> dict[str, Any]: @@ -392,14 +392,14 @@ class IndexBuilder: objnames = self._objnames alltitles: dict[str, list[tuple[int, str]]] = {} - for docname, titlelist in self._all_titles.items(): + for docname, titlelist in sorted(self._all_titles.items()): for title, titleid in titlelist: alltitles.setdefault(title, []).append((fn2index[docname], titleid)) - index_entries: dict[str, list[tuple[int, str]]] = {} + index_entries: dict[str, list[tuple[int, str, bool]]] = {} for docname, entries in self._index_entries.items(): for entry, entry_id, main_entry in entries: - index_entries.setdefault(entry.lower(), []).append((fn2index[docname], entry_id)) + index_entries.setdefault(entry.lower(), []).append((fn2index[docname], entry_id, main_entry == "main")) return dict(docnames=docnames, filenames=filenames, titles=titles, terms=terms, objects=objects, objtypes=objtypes, objnames=objnames, @@ -438,7 +438,7 @@ class IndexBuilder: _stem = self.lang.stem # memoise self.lang.stem - @functools.lru_cache(maxsize=None) + @functools.cache def stem(word_to_stem: str) -> str: return _stem(word_to_stem).lower() @@ -495,7 +495,7 @@ class IndexBuilder: nodetext = re.sub(r'<[^<]+?>', '', nodetext) word_store.words.extend(split(nodetext)) return - elif (isinstance(node, nodes.meta) # type: ignore[attr-defined] + elif (isinstance(node, nodes.meta) and _is_meta_keywords(node, language)): keywords = [keyword.strip() for keyword in node['content'].split(',')] word_store.words.extend(keywords) @@ -508,7 +508,6 @@ class IndexBuilder: word_store.title_words.extend(split(title)) for child in node.children: _visit_nodes(child) - return word_store = WordStore() split = self.lang.split diff --git a/sphinx/search/da.py b/sphinx/search/da.py index 9b5b9f5..47c5744 100644 --- a/sphinx/search/da.py +++ b/sphinx/search/da.py @@ -9,7 +9,7 @@ import snowballstemmer from sphinx.search import SearchLanguage, parse_stop_word danish_stopwords = parse_stop_word(''' -| source: http://snowball.tartarus.org/algorithms/danish/stop.txt +| source: https://snowball.tartarus.org/algorithms/danish/stop.txt og | and i | in jeg | I @@ -113,7 +113,7 @@ class SearchDanish(SearchLanguage): js_stemmer_rawcode = 'danish-stemmer.js' stopwords = danish_stopwords - def init(self, options: dict) -> None: + def init(self, options: dict[str, str]) -> None: self.stemmer = snowballstemmer.stemmer('danish') def stem(self, word: str) -> str: diff --git a/sphinx/search/de.py b/sphinx/search/de.py index 1c253fd..dae52c9 100644 --- a/sphinx/search/de.py +++ b/sphinx/search/de.py @@ -9,7 +9,7 @@ import snowballstemmer from sphinx.search import SearchLanguage, parse_stop_word german_stopwords = parse_stop_word(''' -|source: http://snowball.tartarus.org/algorithms/german/stop.txt +|source: https://snowball.tartarus.org/algorithms/german/stop.txt aber | but alle | all @@ -296,7 +296,7 @@ class SearchGerman(SearchLanguage): js_stemmer_rawcode = 'german-stemmer.js' stopwords = german_stopwords - def init(self, options: dict) -> None: + def init(self, options: dict[str, str]) -> None: self.stemmer = snowballstemmer.stemmer('german') def stem(self, word: str) -> str: diff --git a/sphinx/search/en.py b/sphinx/search/en.py index caa6f66..a1f06bd 100644 --- a/sphinx/search/en.py +++ b/sphinx/search/en.py @@ -213,7 +213,7 @@ class SearchEnglish(SearchLanguage): js_stemmer_code = js_porter_stemmer stopwords = english_stopwords - def init(self, options: dict) -> None: + def init(self, options: dict[str, str]) -> None: self.stemmer = snowballstemmer.stemmer('porter') def stem(self, word: str) -> str: diff --git a/sphinx/search/es.py b/sphinx/search/es.py index c5d9a5c..247095b 100644 --- a/sphinx/search/es.py +++ b/sphinx/search/es.py @@ -9,7 +9,7 @@ import snowballstemmer from sphinx.search import SearchLanguage, parse_stop_word spanish_stopwords = parse_stop_word(''' -|source: http://snowball.tartarus.org/algorithms/spanish/stop.txt +|source: https://snowball.tartarus.org/algorithms/spanish/stop.txt de | from, of la | the, her que | who, that @@ -356,7 +356,7 @@ class SearchSpanish(SearchLanguage): js_stemmer_rawcode = 'spanish-stemmer.js' stopwords = spanish_stopwords - def init(self, options: dict) -> None: + def init(self, options: dict[str, str]) -> None: self.stemmer = snowballstemmer.stemmer('spanish') def stem(self, word: str) -> str: diff --git a/sphinx/search/fi.py b/sphinx/search/fi.py index 70114f8..5eca6e3 100644 --- a/sphinx/search/fi.py +++ b/sphinx/search/fi.py @@ -9,7 +9,7 @@ import snowballstemmer from sphinx.search import SearchLanguage, parse_stop_word finnish_stopwords = parse_stop_word(''' -| source: http://snowball.tartarus.org/algorithms/finnish/stop.txt +| source: https://snowball.tartarus.org/algorithms/finnish/stop.txt | forms of BE olla @@ -106,7 +106,7 @@ class SearchFinnish(SearchLanguage): js_stemmer_rawcode = 'finnish-stemmer.js' stopwords = finnish_stopwords - def init(self, options: dict) -> None: + def init(self, options: dict[str, str]) -> None: self.stemmer = snowballstemmer.stemmer('finnish') def stem(self, word: str) -> str: diff --git a/sphinx/search/fr.py b/sphinx/search/fr.py index 01319dd..4d41cf4 100644 --- a/sphinx/search/fr.py +++ b/sphinx/search/fr.py @@ -9,7 +9,7 @@ import snowballstemmer from sphinx.search import SearchLanguage, parse_stop_word french_stopwords = parse_stop_word(''' -| source: http://snowball.tartarus.org/algorithms/french/stop.txt +| source: https://snowball.tartarus.org/algorithms/french/stop.txt au | a + le aux | a + les avec | with @@ -192,7 +192,7 @@ class SearchFrench(SearchLanguage): js_stemmer_rawcode = 'french-stemmer.js' stopwords = french_stopwords - def init(self, options: dict) -> None: + def init(self, options: dict[str, str]) -> None: self.stemmer = snowballstemmer.stemmer('french') def stem(self, word: str) -> str: diff --git a/sphinx/search/hu.py b/sphinx/search/hu.py index eed08db..ccd6ebe 100644 --- a/sphinx/search/hu.py +++ b/sphinx/search/hu.py @@ -9,7 +9,7 @@ import snowballstemmer from sphinx.search import SearchLanguage, parse_stop_word hungarian_stopwords = parse_stop_word(''' -| source: http://snowball.tartarus.org/algorithms/hungarian/stop.txt +| source: https://snowball.tartarus.org/algorithms/hungarian/stop.txt | prepared by Anna Tordai a ahogy @@ -219,7 +219,7 @@ class SearchHungarian(SearchLanguage): js_stemmer_rawcode = 'hungarian-stemmer.js' stopwords = hungarian_stopwords - def init(self, options: dict) -> None: + def init(self, options: dict[str, str]) -> None: self.stemmer = snowballstemmer.stemmer('hungarian') def stem(self, word: str) -> str: diff --git a/sphinx/search/it.py b/sphinx/search/it.py index 7bf712b..8436dfa 100644 --- a/sphinx/search/it.py +++ b/sphinx/search/it.py @@ -9,7 +9,7 @@ import snowballstemmer from sphinx.search import SearchLanguage, parse_stop_word italian_stopwords = parse_stop_word(''' -| source: http://snowball.tartarus.org/algorithms/italian/stop.txt +| source: https://snowball.tartarus.org/algorithms/italian/stop.txt ad | a (to) before vowel al | a + il allo | a + lo @@ -309,7 +309,7 @@ class SearchItalian(SearchLanguage): js_stemmer_rawcode = 'italian-stemmer.js' stopwords = italian_stopwords - def init(self, options: dict) -> None: + def init(self, options: dict[str, str]) -> None: self.stemmer = snowballstemmer.stemmer('italian') def stem(self, word: str) -> str: diff --git a/sphinx/search/ja.py b/sphinx/search/ja.py index de221ce..5669155 100644 --- a/sphinx/search/ja.py +++ b/sphinx/search/ja.py @@ -1,28 +1,28 @@ """Japanese search language: includes routine to split words.""" # Python Version of TinySegmenter -# (http://chasen.org/~taku/software/TinySegmenter/) +# (https://chasen.org/~taku/software/TinySegmenter/) # TinySegmenter is super compact Japanese tokenizer. # # TinySegmenter was originally developed by Taku Kudo <taku(at)chasen.org>. # Python Version was developed by xnights <programming.magic(at)gmail.com>. -# For details, see http://programming-magic.com/?id=170 +# For details, see https://programming-magic.com/?id=170 from __future__ import annotations import os import re import sys -from typing import TYPE_CHECKING, Any, Dict, List +from typing import Any try: - import MeCab + import MeCab # type: ignore[import-not-found] native_module = True except ImportError: native_module = False try: - import janome.tokenizer + import janome.tokenizer # type: ignore[import-not-found] janome_module = True except ImportError: janome_module = False @@ -33,7 +33,7 @@ from sphinx.util import import_object class BaseSplitter: - def __init__(self, options: dict) -> None: + def __init__(self, options: dict[str, str]) -> None: self.options = options def split(self, input: str) -> list[str]: @@ -46,7 +46,7 @@ class BaseSplitter: class MecabSplitter(BaseSplitter): - def __init__(self, options: dict) -> None: + def __init__(self, options: dict[str, str]) -> None: super().__init__(options) self.ctypes_libmecab: Any = None self.ctypes_mecab: Any = None @@ -64,14 +64,14 @@ class MecabSplitter(BaseSplitter): self.ctypes_mecab, input.encode(self.dict_encode)) return result.split(' ') - def init_native(self, options: dict) -> None: + def init_native(self, options: dict[str, str]) -> None: param = '-Owakati' dict = options.get('dict') if dict: param += ' -d %s' % dict self.native = MeCab.Tagger(param) - def init_ctypes(self, options: dict) -> None: + def init_ctypes(self, options: dict[str, str]) -> None: import ctypes.util lib = options.get('lib') @@ -113,7 +113,7 @@ class MecabSplitter(BaseSplitter): class JanomeSplitter(BaseSplitter): - def __init__(self, options: dict) -> None: + def __init__(self, options: dict[str, str]) -> None: super().__init__(options) self.user_dict = options.get('user_dic') self.user_dict_enc = options.get('user_dic_enc', 'utf8') @@ -418,17 +418,8 @@ class DefaultSplitter(BaseSplitter): return [] result = [] - seg = ['B3', 'B2', 'B1'] - ctype = ['O', 'O', 'O'] - for t in input: - seg.append(t) - ctype.append(self.ctype_(t)) - seg.append('E1') - seg.append('E2') - seg.append('E3') - ctype.append('O') - ctype.append('O') - ctype.append('O') + seg = ['B3', 'B2', 'B1', *input, 'E1', 'E2', 'E3'] + ctype = ['O', 'O', 'O', *map(self.ctype_, input), 'O', 'O', 'O'] word = seg[3] p1 = 'U' p2 = 'U' @@ -513,7 +504,7 @@ class SearchJapanese(SearchLanguage): lang = 'ja' language_name = 'Japanese' - def init(self, options: dict) -> None: + def init(self, options: dict[str, str]) -> None: dotted_path = options.get('type', 'sphinx.search.ja.DefaultSplitter') try: self.splitter = import_object(dotted_path)(options) diff --git a/sphinx/search/nl.py b/sphinx/search/nl.py index a610b12..cb5e8c4 100644 --- a/sphinx/search/nl.py +++ b/sphinx/search/nl.py @@ -9,7 +9,7 @@ import snowballstemmer from sphinx.search import SearchLanguage, parse_stop_word dutch_stopwords = parse_stop_word(''' -| source: http://snowball.tartarus.org/algorithms/dutch/stop.txt +| source: https://snowball.tartarus.org/algorithms/dutch/stop.txt de | the en | and van | of, from @@ -120,7 +120,7 @@ class SearchDutch(SearchLanguage): js_stemmer_rawcode = 'dutch-stemmer.js' stopwords = dutch_stopwords - def init(self, options: dict) -> None: + def init(self, options: dict[str, str]) -> None: self.stemmer = snowballstemmer.stemmer('dutch') def stem(self, word: str) -> str: diff --git a/sphinx/search/no.py b/sphinx/search/no.py index a69380b..aa7c104 100644 --- a/sphinx/search/no.py +++ b/sphinx/search/no.py @@ -9,7 +9,7 @@ import snowballstemmer from sphinx.search import SearchLanguage, parse_stop_word norwegian_stopwords = parse_stop_word(''' -| source: http://snowball.tartarus.org/algorithms/norwegian/stop.txt +| source: https://snowball.tartarus.org/algorithms/norwegian/stop.txt og | and i | in jeg | I @@ -195,7 +195,7 @@ class SearchNorwegian(SearchLanguage): js_stemmer_rawcode = 'norwegian-stemmer.js' stopwords = norwegian_stopwords - def init(self, options: dict) -> None: + def init(self, options: dict[str, str]) -> None: self.stemmer = snowballstemmer.stemmer('norwegian') def stem(self, word: str) -> str: diff --git a/sphinx/search/pt.py b/sphinx/search/pt.py index 908a417..0cf9610 100644 --- a/sphinx/search/pt.py +++ b/sphinx/search/pt.py @@ -9,7 +9,7 @@ import snowballstemmer from sphinx.search import SearchLanguage, parse_stop_word portuguese_stopwords = parse_stop_word(''' -| source: http://snowball.tartarus.org/algorithms/portuguese/stop.txt +| source: https://snowball.tartarus.org/algorithms/portuguese/stop.txt de | of, from a | the; to, at; her o | the; him @@ -254,7 +254,7 @@ class SearchPortuguese(SearchLanguage): js_stemmer_rawcode = 'portuguese-stemmer.js' stopwords = portuguese_stopwords - def init(self, options: dict) -> None: + def init(self, options: dict[str, str]) -> None: self.stemmer = snowballstemmer.stemmer('portuguese') def stem(self, word: str) -> str: diff --git a/sphinx/search/ro.py b/sphinx/search/ro.py index b6c9d67..f15b7a6 100644 --- a/sphinx/search/ro.py +++ b/sphinx/search/ro.py @@ -15,7 +15,7 @@ class SearchRomanian(SearchLanguage): js_stemmer_rawcode = 'romanian-stemmer.js' stopwords: set[str] = set() - def init(self, options: dict) -> None: + def init(self, options: dict[str, str]) -> None: self.stemmer = snowballstemmer.stemmer('romanian') def stem(self, word: str) -> str: diff --git a/sphinx/search/ru.py b/sphinx/search/ru.py index b8412c1..d6b817e 100644 --- a/sphinx/search/ru.py +++ b/sphinx/search/ru.py @@ -9,7 +9,7 @@ import snowballstemmer from sphinx.search import SearchLanguage, parse_stop_word russian_stopwords = parse_stop_word(''' -| source: http://snowball.tartarus.org/algorithms/russian/stop.txt +| source: https://snowball.tartarus.org/algorithms/russian/stop.txt и | and в | in/into во | alternative form @@ -244,7 +244,7 @@ class SearchRussian(SearchLanguage): js_stemmer_rawcode = 'russian-stemmer.js' stopwords = russian_stopwords - def init(self, options: dict) -> None: + def init(self, options: dict[str, str]) -> None: self.stemmer = snowballstemmer.stemmer('russian') def stem(self, word: str) -> str: diff --git a/sphinx/search/sv.py b/sphinx/search/sv.py index 88cc560..b90e227 100644 --- a/sphinx/search/sv.py +++ b/sphinx/search/sv.py @@ -9,7 +9,7 @@ import snowballstemmer from sphinx.search import SearchLanguage, parse_stop_word swedish_stopwords = parse_stop_word(''' -| source: http://snowball.tartarus.org/algorithms/swedish/stop.txt +| source: https://snowball.tartarus.org/algorithms/swedish/stop.txt och | and det | it, this/that att | to (with infinitive) @@ -133,7 +133,7 @@ class SearchSwedish(SearchLanguage): js_stemmer_rawcode = 'swedish-stemmer.js' stopwords = swedish_stopwords - def init(self, options: dict) -> None: + def init(self, options: dict[str, str]) -> None: self.stemmer = snowballstemmer.stemmer('swedish') def stem(self, word: str) -> str: diff --git a/sphinx/search/tr.py b/sphinx/search/tr.py index f4a865c..fdfc18a 100644 --- a/sphinx/search/tr.py +++ b/sphinx/search/tr.py @@ -15,7 +15,7 @@ class SearchTurkish(SearchLanguage): js_stemmer_rawcode = 'turkish-stemmer.js' stopwords: set[str] = set() - def init(self, options: dict) -> None: + def init(self, options: dict[str, str]) -> None: self.stemmer = snowballstemmer.stemmer('turkish') def stem(self, word: str) -> str: diff --git a/sphinx/search/zh.py b/sphinx/search/zh.py index 2a3a6e7..e40c9a9 100644 --- a/sphinx/search/zh.py +++ b/sphinx/search/zh.py @@ -4,14 +4,13 @@ from __future__ import annotations import os import re -from typing import TYPE_CHECKING, Dict, List import snowballstemmer from sphinx.search import SearchLanguage try: - import jieba + import jieba # type: ignore[import-not-found] JIEBA = True except ImportError: JIEBA = False @@ -227,7 +226,7 @@ class SearchChinese(SearchLanguage): latin1_letters = re.compile(r'[a-zA-Z0-9_]+') latin_terms: list[str] = [] - def init(self, options: dict) -> None: + def init(self, options: dict[str, str]) -> None: if JIEBA: dict_path = options.get('dict') if dict_path and os.path.isfile(dict_path): |