Coverage for src/debputy/lsp/lsp_debian_copyright.py: 18%
173 statements
« prev ^ index » next coverage.py v7.2.7, created at 2024-04-07 12:14 +0200
« prev ^ index » next coverage.py v7.2.7, created at 2024-04-07 12:14 +0200
1import re
2from typing import (
3 Union,
4 Sequence,
5 Tuple,
6 Iterator,
7 Optional,
8 Iterable,
9 Mapping,
10 List,
11)
13from lsprotocol.types import (
14 DiagnosticSeverity,
15 Range,
16 Diagnostic,
17 Position,
18 CompletionItem,
19 CompletionList,
20 CompletionParams,
21 TEXT_DOCUMENT_WILL_SAVE_WAIT_UNTIL,
22 DiagnosticRelatedInformation,
23 Location,
24 HoverParams,
25 Hover,
26 TEXT_DOCUMENT_CODE_ACTION,
27 SemanticTokens,
28 SemanticTokensParams,
29 FoldingRangeParams,
30 FoldingRange,
31)
33from debputy.linting.lint_util import LintState
34from debputy.lsp.lsp_debian_control_reference_data import (
35 _DEP5_HEADER_FIELDS,
36 _DEP5_FILES_FIELDS,
37 Deb822KnownField,
38 _DEP5_LICENSE_FIELDS,
39 Dep5FileMetadata,
40)
41from debputy.lsp.lsp_features import (
42 lint_diagnostics,
43 lsp_completer,
44 lsp_hover,
45 lsp_standard_handler,
46 lsp_folding_ranges,
47 lsp_semantic_tokens_full,
48)
49from debputy.lsp.lsp_generic_deb822 import (
50 deb822_completer,
51 deb822_hover,
52 deb822_folding_ranges,
53 deb822_semantic_tokens_full,
54)
55from debputy.lsp.quickfixes import (
56 propose_correct_text_quick_fix,
57)
58from debputy.lsp.spellchecking import default_spellchecker
59from debputy.lsp.text_util import (
60 normalize_dctrl_field_name,
61 LintCapablePositionCodec,
62 detect_possible_typo,
63 te_range_to_lsp,
64)
65from debputy.lsp.vendoring._deb822_repro import (
66 parse_deb822_file,
67 Deb822FileElement,
68 Deb822ParagraphElement,
69)
70from debputy.lsp.vendoring._deb822_repro.parsing import (
71 Deb822KeyValuePairElement,
72 LIST_SPACE_SEPARATED_INTERPRETATION,
73)
74from debputy.lsp.vendoring._deb822_repro.tokens import (
75 Deb822Token,
76)
78try:
79 from debputy.lsp.vendoring._deb822_repro.locatable import (
80 Position as TEPosition,
81 Range as TERange,
82 START_POSITION,
83 )
85 from pygls.server import LanguageServer
86 from pygls.workspace import TextDocument
87except ImportError:
88 pass
91_CONTAINS_SPACE_OR_COLON = re.compile(r"[\s:]")
92_LANGUAGE_IDS = [
93 "debian/copyright",
94 # emacs's name
95 "debian-copyright",
96 # vim's name
97 "debcopyright",
98]
100_DEP5_FILE_METADATA = Dep5FileMetadata()
102lsp_standard_handler(_LANGUAGE_IDS, TEXT_DOCUMENT_CODE_ACTION)
103lsp_standard_handler(_LANGUAGE_IDS, TEXT_DOCUMENT_WILL_SAVE_WAIT_UNTIL)
106@lsp_hover(_LANGUAGE_IDS)
107def _debian_copyright_hover(
108 ls: "LanguageServer",
109 params: HoverParams,
110) -> Optional[Hover]:
111 return deb822_hover(ls, params, _DEP5_FILE_METADATA)
114@lsp_completer(_LANGUAGE_IDS)
115def _debian_copyright_completions(
116 ls: "LanguageServer",
117 params: CompletionParams,
118) -> Optional[Union[CompletionList, Sequence[CompletionItem]]]:
119 return deb822_completer(ls, params, _DEP5_FILE_METADATA)
122@lsp_folding_ranges(_LANGUAGE_IDS)
123def _debian_copyright_folding_ranges(
124 ls: "LanguageServer",
125 params: FoldingRangeParams,
126) -> Optional[Sequence[FoldingRange]]:
127 return deb822_folding_ranges(ls, params, _DEP5_FILE_METADATA)
130def _deb822_token_iter(
131 tokens: Iterable[Deb822Token],
132) -> Iterator[Tuple[Deb822Token, int, int, int, int, int]]:
133 line_no = 0
134 line_offset = 0
136 for token in tokens:
137 start_line = line_no
138 start_line_offset = line_offset
140 newlines = token.text.count("\n")
141 line_no += newlines
142 text_len = len(token.text)
143 if newlines:
144 if token.text.endswith("\n"):
145 line_offset = 0
146 else:
147 # -2, one to remove the "\n" and one to get 0-offset
148 line_offset = text_len - token.text.rindex("\n") - 2
149 else:
150 line_offset += text_len
152 yield token, start_line, start_line_offset, line_no, line_offset
155def _paragraph_representation_field(
156 paragraph: Deb822ParagraphElement,
157) -> Deb822KeyValuePairElement:
158 return next(iter(paragraph.iter_parts_of_type(Deb822KeyValuePairElement)))
161def _diagnostics_for_paragraph(
162 stanza: Deb822ParagraphElement,
163 stanza_position: "TEPosition",
164 known_fields: Mapping[str, Deb822KnownField],
165 other_known_fields: Mapping[str, Deb822KnownField],
166 is_files_or_license_paragraph: bool,
167 doc_reference: str,
168 position_codec: "LintCapablePositionCodec",
169 lines: List[str],
170 diagnostics: List[Diagnostic],
171) -> None:
172 representation_field = _paragraph_representation_field(stanza)
173 representation_field_pos = representation_field.position_in_parent().relative_to(
174 stanza_position
175 )
176 representation_field_range_server_units = te_range_to_lsp(
177 TERange.from_position_and_size(
178 representation_field_pos, representation_field.size()
179 )
180 )
181 representation_field_range = position_codec.range_to_client_units(
182 lines,
183 representation_field_range_server_units,
184 )
185 for known_field in known_fields.values():
186 missing_field_severity = known_field.missing_field_severity
187 if missing_field_severity is None or known_field.name in stanza:
188 continue
190 diagnostics.append(
191 Diagnostic(
192 representation_field_range,
193 f"Stanza is missing field {known_field.name}",
194 severity=missing_field_severity,
195 source="debputy",
196 )
197 )
199 seen_fields = {}
201 for kvpair in stanza.iter_parts_of_type(Deb822KeyValuePairElement):
202 field_name_token = kvpair.field_token
203 field_name = field_name_token.text
204 field_name_lc = field_name.lower()
205 normalized_field_name_lc = normalize_dctrl_field_name(field_name_lc)
206 known_field = known_fields.get(normalized_field_name_lc)
207 field_value = stanza[field_name]
208 field_range_te = kvpair.range_in_parent().relative_to(stanza_position)
209 field_position_te = field_range_te.start_pos
210 field_range_server_units = te_range_to_lsp(field_range_te)
211 field_range = position_codec.range_to_client_units(
212 lines,
213 field_range_server_units,
214 )
215 field_name_typo_detected = False
216 existing_field_range = seen_fields.get(normalized_field_name_lc)
217 if existing_field_range is not None:
218 existing_field_range[3].append(field_range)
219 else:
220 normalized_field_name = normalize_dctrl_field_name(field_name)
221 seen_fields[field_name_lc] = (
222 field_name,
223 normalized_field_name,
224 field_range,
225 [],
226 )
228 if known_field is None:
229 candidates = detect_possible_typo(normalized_field_name_lc, known_fields)
230 if candidates:
231 known_field = known_fields[candidates[0]]
232 token_range_server_units = te_range_to_lsp(
233 TERange.from_position_and_size(
234 field_position_te, kvpair.field_token.size()
235 )
236 )
237 field_range = position_codec.range_to_client_units(
238 lines,
239 token_range_server_units,
240 )
241 field_name_typo_detected = True
242 diagnostics.append(
243 Diagnostic(
244 field_range,
245 f'The "{field_name}" looks like a typo of "{known_field.name}".',
246 severity=DiagnosticSeverity.Warning,
247 source="debputy",
248 data=[
249 propose_correct_text_quick_fix(known_fields[m].name)
250 for m in candidates
251 ],
252 )
253 )
254 if known_field is None:
255 known_else_where = other_known_fields.get(normalized_field_name_lc)
256 if known_else_where is not None:
257 intended_usage = (
258 "Header" if is_files_or_license_paragraph else "Files/License"
259 )
260 diagnostics.append(
261 Diagnostic(
262 field_range,
263 f'The {field_name} is defined for use in the "{intended_usage}" stanza.'
264 f" Please move it to the right place or remove it",
265 severity=DiagnosticSeverity.Error,
266 source="debputy",
267 )
268 )
269 continue
271 if field_value.strip() == "":
272 diagnostics.append(
273 Diagnostic(
274 field_range,
275 f"The {field_name} has no value. Either provide a value or remove it.",
276 severity=DiagnosticSeverity.Error,
277 source="debputy",
278 )
279 )
280 continue
281 diagnostics.extend(
282 known_field.field_diagnostics(
283 kvpair,
284 stanza,
285 stanza_position,
286 position_codec,
287 lines,
288 field_name_typo_reported=field_name_typo_detected,
289 )
290 )
291 if known_field.spellcheck_value:
292 words = kvpair.interpret_as(LIST_SPACE_SEPARATED_INTERPRETATION)
293 spell_checker = default_spellchecker()
294 value_position = kvpair.value_element.position_in_parent().relative_to(
295 field_position_te
296 )
297 for word_ref in words.iter_value_references():
298 token = word_ref.value
299 for word, pos, endpos in spell_checker.iter_words(token):
300 corrections = spell_checker.provide_corrections_for(word)
301 if not corrections:
302 continue
303 word_loc = word_ref.locatable
304 word_pos_te = word_loc.position_in_parent().relative_to(
305 value_position
306 )
307 if pos:
308 word_pos_te = TEPosition(0, pos).relative_to(word_pos_te)
309 word_range = TERange(
310 START_POSITION,
311 TEPosition(0, endpos - pos),
312 )
313 word_range_server_units = te_range_to_lsp(
314 TERange.from_position_and_size(word_pos_te, word_range)
315 )
316 word_range = position_codec.range_to_client_units(
317 lines,
318 word_range_server_units,
319 )
320 diagnostics.append(
321 Diagnostic(
322 word_range,
323 f'Spelling "{word}"',
324 severity=DiagnosticSeverity.Hint,
325 source="debputy",
326 data=[
327 propose_correct_text_quick_fix(c) for c in corrections
328 ],
329 )
330 )
331 if known_field.warn_if_default and field_value == known_field.default_value:
332 diagnostics.append(
333 Diagnostic(
334 field_range,
335 f"The {field_name} is redundant as it is set to the default value and the field should only be"
336 " used in exceptional cases.",
337 severity=DiagnosticSeverity.Warning,
338 source="debputy",
339 )
340 )
341 for (
342 field_name,
343 normalized_field_name,
344 field_range,
345 duplicates,
346 ) in seen_fields.values():
347 if not duplicates:
348 continue
349 related_information = [
350 DiagnosticRelatedInformation(
351 location=Location(doc_reference, field_range),
352 message=f"First definition of {field_name}",
353 )
354 ]
355 related_information.extend(
356 DiagnosticRelatedInformation(
357 location=Location(doc_reference, r),
358 message=f"Duplicate of {field_name}",
359 )
360 for r in duplicates
361 )
362 for dup_range in duplicates:
363 diagnostics.append(
364 Diagnostic(
365 dup_range,
366 f"The {normalized_field_name} field name was used multiple times in this stanza."
367 f" Please ensure the field is only used once per stanza. Note that {normalized_field_name} and"
368 f" X[BCS]-{normalized_field_name} are considered the same field.",
369 severity=DiagnosticSeverity.Error,
370 source="debputy",
371 related_information=related_information,
372 )
373 )
376def _scan_for_syntax_errors_and_token_level_diagnostics(
377 deb822_file: Deb822FileElement,
378 position_codec: LintCapablePositionCodec,
379 lines: List[str],
380 diagnostics: List[Diagnostic],
381) -> int:
382 first_error = len(lines) + 1
383 spell_checker = default_spellchecker()
384 for (
385 token,
386 start_line,
387 start_offset,
388 end_line,
389 end_offset,
390 ) in _deb822_token_iter(deb822_file.iter_tokens()):
391 if token.is_error:
392 first_error = min(first_error, start_line)
393 start_pos = Position(
394 start_line,
395 start_offset,
396 )
397 end_pos = Position(
398 end_line,
399 end_offset,
400 )
401 token_range = position_codec.range_to_client_units(
402 lines, Range(start_pos, end_pos)
403 )
404 diagnostics.append(
405 Diagnostic(
406 token_range,
407 "Syntax error",
408 severity=DiagnosticSeverity.Error,
409 source="debputy (python-debian parser)",
410 )
411 )
412 elif token.is_comment:
413 for word, pos, end_pos in spell_checker.iter_words(token.text):
414 corrections = spell_checker.provide_corrections_for(word)
415 if not corrections:
416 continue
417 start_pos = Position(
418 start_line,
419 pos,
420 )
421 end_pos = Position(
422 start_line,
423 end_pos,
424 )
425 word_range = position_codec.range_to_client_units(
426 lines, Range(start_pos, end_pos)
427 )
428 diagnostics.append(
429 Diagnostic(
430 word_range,
431 f'Spelling "{word}"',
432 severity=DiagnosticSeverity.Hint,
433 source="debputy",
434 data=[propose_correct_text_quick_fix(c) for c in corrections],
435 )
436 )
437 return first_error
440@lint_diagnostics(_LANGUAGE_IDS)
441def _lint_debian_copyright(
442 lint_state: LintState,
443) -> Optional[List[Diagnostic]]:
444 lines = lint_state.lines
445 position_codec = lint_state.position_codec
446 doc_reference = lint_state.doc_uri
447 diagnostics = []
448 deb822_file = parse_deb822_file(
449 lines,
450 accept_files_with_duplicated_fields=True,
451 accept_files_with_error_tokens=True,
452 )
454 first_error = _scan_for_syntax_errors_and_token_level_diagnostics(
455 deb822_file,
456 position_codec,
457 lines,
458 diagnostics,
459 )
461 paragraphs = list(deb822_file)
462 is_dep5 = False
464 for paragraph_no, paragraph in enumerate(paragraphs, start=1):
465 paragraph_pos = paragraph.position_in_file()
466 if paragraph_pos.line_position >= first_error:
467 break
468 is_files_or_license_paragraph = paragraph_no != 1
469 if is_files_or_license_paragraph:
470 known_fields = (
471 _DEP5_FILES_FIELDS if "Files" in paragraph else _DEP5_LICENSE_FIELDS
472 )
473 other_known_fields = _DEP5_HEADER_FIELDS
474 elif "Format" in paragraph:
475 is_dep5 = True
476 known_fields = _DEP5_HEADER_FIELDS
477 other_known_fields = _DEP5_FILES_FIELDS
478 else:
479 break
480 _diagnostics_for_paragraph(
481 paragraph,
482 paragraph_pos,
483 known_fields,
484 other_known_fields,
485 is_files_or_license_paragraph,
486 doc_reference,
487 position_codec,
488 lines,
489 diagnostics,
490 )
491 if not is_dep5:
492 return None
493 return diagnostics
496@lsp_semantic_tokens_full(_LANGUAGE_IDS)
497def _semantic_tokens_full(
498 ls: "LanguageServer",
499 request: SemanticTokensParams,
500) -> Optional[SemanticTokens]:
501 return deb822_semantic_tokens_full(
502 ls,
503 request,
504 _DEP5_FILE_METADATA,
505 )