diff options
Diffstat (limited to 'src/debputy/lsp/lsp_debian_control.py')
-rw-r--r-- | src/debputy/lsp/lsp_debian_control.py | 797 |
1 files changed, 797 insertions, 0 deletions
diff --git a/src/debputy/lsp/lsp_debian_control.py b/src/debputy/lsp/lsp_debian_control.py new file mode 100644 index 0000000..d00f1c2 --- /dev/null +++ b/src/debputy/lsp/lsp_debian_control.py @@ -0,0 +1,797 @@ +from typing import ( + Union, + Sequence, + Tuple, + Iterator, + Optional, + Iterable, + Mapping, + List, +) + +from debputy.lsp.vendoring._deb822_repro import ( + parse_deb822_file, + Deb822FileElement, + Deb822ParagraphElement, +) +from debputy.lsp.vendoring._deb822_repro.parsing import ( + Deb822KeyValuePairElement, + LIST_SPACE_SEPARATED_INTERPRETATION, +) +from debputy.lsp.vendoring._deb822_repro.tokens import ( + Deb822Token, + tokenize_deb822_file, + Deb822FieldNameToken, +) +from lsprotocol.types import ( + DiagnosticSeverity, + Range, + Diagnostic, + Position, + DidOpenTextDocumentParams, + DidChangeTextDocumentParams, + FoldingRangeKind, + FoldingRange, + FoldingRangeParams, + CompletionItem, + CompletionList, + CompletionParams, + TEXT_DOCUMENT_DID_OPEN, + TEXT_DOCUMENT_DID_CHANGE, + TEXT_DOCUMENT_FOLDING_RANGE, + TEXT_DOCUMENT_COMPLETION, + TEXT_DOCUMENT_WILL_SAVE_WAIT_UNTIL, + DiagnosticRelatedInformation, + Location, + TEXT_DOCUMENT_HOVER, + HoverParams, + Hover, + TEXT_DOCUMENT_CODE_ACTION, + DiagnosticTag, + SemanticTokensLegend, + TEXT_DOCUMENT_SEMANTIC_TOKENS_FULL, + SemanticTokens, + SemanticTokensParams, +) + +from debputy.lsp.lsp_debian_control_reference_data import ( + DctrlKnownField, + BINARY_FIELDS, + SOURCE_FIELDS, + FieldValueClass, + DctrlFileMetadata, +) +from debputy.lsp.lsp_features import ( + lint_diagnostics, + lsp_completer, + lsp_hover, + lsp_standard_handler, +) +from debputy.lsp.lsp_generic_deb822 import deb822_completer, deb822_hover +from debputy.lsp.quickfixes import ( + propose_remove_line_quick_fix, + range_compatible_with_remove_line_fix, + propose_correct_text_quick_fix, + provide_standard_quickfixes_from_diagnostics, +) +from debputy.lsp.spellchecking import default_spellchecker +from debputy.lsp.text_util import ( + on_save_trim_end_of_line_whitespace, + normalize_dctrl_field_name, + LintCapablePositionCodec, + detect_possible_typo, + te_range_to_lsp, +) +from debputy.util import _info, _error + +try: + from debputy.lsp.vendoring._deb822_repro.locatable import ( + Position as TEPosition, + Range as TERange, + START_POSITION, + ) + + from pygls.server import LanguageServer + from pygls.workspace import TextDocument +except ImportError: + pass + + +_LANGUAGE_IDS = [ + "debian/control", + # emacs's name + "debian-control", + # vim's name + "debcontrol", +] + + +SEMANTIC_TOKENS_LEGEND = SemanticTokensLegend( + token_types=["keyword"], + token_modifiers=[], +) +_DCTRL_FILE_METADATA = DctrlFileMetadata() + + +def register_dctrl_lsp(ls: "LanguageServer") -> None: + try: + from debputy.lsp.vendoring._deb822_repro.locatable import Locatable + except ImportError: + _error( + 'Sorry; this feature requires a newer version of python-debian (with "Locatable").' + ) + + ls.feature(TEXT_DOCUMENT_DID_OPEN)(_diagnostics_debian_control) + ls.feature(TEXT_DOCUMENT_DID_CHANGE)(_diagnostics_debian_control) + ls.feature(TEXT_DOCUMENT_FOLDING_RANGE)(_detect_folding_ranges_debian_control) + ls.feature(TEXT_DOCUMENT_COMPLETION)(_debian_control_completions) + ls.feature(TEXT_DOCUMENT_CODE_ACTION)(provide_standard_quickfixes_from_diagnostics) + ls.feature(TEXT_DOCUMENT_HOVER)(_debian_control_hover) + ls.feature(TEXT_DOCUMENT_WILL_SAVE_WAIT_UNTIL)(on_save_trim_end_of_line_whitespace) + ls.feature(TEXT_DOCUMENT_SEMANTIC_TOKENS_FULL, SEMANTIC_TOKENS_LEGEND)( + _handle_semantic_tokens_full + ) + + +lsp_standard_handler(_LANGUAGE_IDS, TEXT_DOCUMENT_CODE_ACTION) +lsp_standard_handler(_LANGUAGE_IDS, TEXT_DOCUMENT_WILL_SAVE_WAIT_UNTIL) + + +@lsp_hover(_LANGUAGE_IDS) +def _debian_control_hover( + ls: "LanguageServer", + params: HoverParams, +) -> Optional[Hover]: + return deb822_hover(ls, params, _DCTRL_FILE_METADATA) + + +@lsp_completer(_LANGUAGE_IDS) +def _debian_control_completions( + ls: "LanguageServer", + params: CompletionParams, +) -> Optional[Union[CompletionList, Sequence[CompletionItem]]]: + return deb822_completer(ls, params, _DCTRL_FILE_METADATA) + + +def _detect_folding_ranges_debian_control( + ls: "LanguageServer", + params: FoldingRangeParams, +) -> Optional[Sequence[FoldingRange]]: + doc = ls.workspace.get_text_document(params.text_document.uri) + comment_start = -1 + folding_ranges = [] + for ( + token, + start_line, + start_offset, + end_line, + end_offset, + ) in _deb822_token_iter(tokenize_deb822_file(doc.lines)): + if token.is_comment: + if comment_start < 0: + comment_start = start_line + _info(f"Detected new comment: {start_line}") + elif comment_start > -1: + comment_start = -1 + folding_range = FoldingRange( + comment_start, + end_line, + kind=FoldingRangeKind.Comment, + ) + + folding_ranges.append(folding_range) + _info(f"Detected folding range: {folding_range}") + + return folding_ranges + + +def _deb822_token_iter( + tokens: Iterable[Deb822Token], +) -> Iterator[Tuple[Deb822Token, int, int, int, int, int]]: + line_no = 0 + line_offset = 0 + + for token in tokens: + start_line = line_no + start_line_offset = line_offset + + newlines = token.text.count("\n") + line_no += newlines + text_len = len(token.text) + if newlines: + if token.text.endswith("\n"): + line_offset = 0 + else: + # -2, one to remove the "\n" and one to get 0-offset + line_offset = text_len - token.text.rindex("\n") - 2 + else: + line_offset += text_len + + yield token, start_line, start_line_offset, line_no, line_offset + + +def _paragraph_representation_field( + paragraph: Deb822ParagraphElement, +) -> Deb822KeyValuePairElement: + return next(iter(paragraph.iter_parts_of_type(Deb822KeyValuePairElement))) + + +def _extract_first_value_and_position( + kvpair: Deb822KeyValuePairElement, + stanza_pos: "TEPosition", + position_codec: "LintCapablePositionCodec", + lines: List[str], +) -> Tuple[Optional[str], Optional[Range]]: + kvpair_pos = kvpair.position_in_parent().relative_to(stanza_pos) + value_element_pos = kvpair.value_element.position_in_parent().relative_to( + kvpair_pos + ) + for value_ref in kvpair.interpret_as( + LIST_SPACE_SEPARATED_INTERPRETATION + ).iter_value_references(): + v = value_ref.value + section_value_loc = value_ref.locatable + value_range_te = section_value_loc.range_in_parent().relative_to( + value_element_pos + ) + section_range_server_units = te_range_to_lsp(value_range_te) + section_range = position_codec.range_to_client_units( + lines, section_range_server_units + ) + return v, section_range + return None, None + + +def _binary_package_checks( + stanza: Deb822ParagraphElement, + stanza_position: "TEPosition", + source_stanza: Deb822ParagraphElement, + representation_field_range: Range, + position_codec: "LintCapablePositionCodec", + lines: List[str], + diagnostics: List[Diagnostic], +) -> None: + ma_kvpair = stanza.get_kvpair_element("Multi-Arch", use_get=True) + arch = stanza.get("Architecture", "any") + if arch == "all" and ma_kvpair is not None: + ma_value, ma_value_range = _extract_first_value_and_position( + ma_kvpair, + stanza_position, + position_codec, + lines, + ) + if ma_value == "same": + diagnostics.append( + Diagnostic( + ma_value_range, + "Multi-Arch: same is not valid for Architecture: all packages. Maybe you want foreign?", + severity=DiagnosticSeverity.Error, + source="debputy", + ) + ) + + package_name = stanza.get("Package", "") + source_section = source_stanza.get("Section") + section_kvpair = stanza.get_kvpair_element("Section", use_get=True) + section: Optional[str] = None + if section_kvpair is not None: + section, section_range = _extract_first_value_and_position( + section_kvpair, + stanza_position, + position_codec, + lines, + ) + else: + section_range = representation_field_range + effective_section = section or source_section or "unknown" + package_type = stanza.get("Package-Type", "") + component_prefix = "" + if "/" in effective_section: + component_prefix, effective_section = effective_section.split("/", maxsplit=1) + component_prefix += "/" + + if package_name.endswith("-udeb") or package_type == "udeb": + if package_type != "udeb": + package_type_kvpair = stanza.get_kvpair_element( + "Package-Type", use_get=True + ) + package_type_range = None + if package_type_kvpair is not None: + _, package_type_range = _extract_first_value_and_position( + package_type_kvpair, + stanza_position, + position_codec, + lines, + ) + if package_type_range is None: + package_type_range = representation_field_range + diagnostics.append( + Diagnostic( + package_type_range, + 'The Package-Type should be "udeb" given the package name', + severity=DiagnosticSeverity.Warning, + source="debputy", + ) + ) + if effective_section != "debian-installer": + quickfix_data = None + if section is not None: + quickfix_data = [ + propose_correct_text_quick_fix( + f"{component_prefix}debian-installer" + ) + ] + diagnostics.append( + Diagnostic( + section_range, + f'The Section should be "{component_prefix}debian-installer" for udebs', + severity=DiagnosticSeverity.Warning, + source="debputy", + data=quickfix_data, + ) + ) + + +def _diagnostics_for_paragraph( + stanza: Deb822ParagraphElement, + stanza_position: "TEPosition", + source_stanza: Deb822ParagraphElement, + known_fields: Mapping[str, DctrlKnownField], + other_known_fields: Mapping[str, DctrlKnownField], + is_binary_paragraph: bool, + doc_reference: str, + position_codec: "LintCapablePositionCodec", + lines: List[str], + diagnostics: List[Diagnostic], +) -> None: + representation_field = _paragraph_representation_field(stanza) + representation_field_pos = representation_field.position_in_parent().relative_to( + stanza_position + ) + representation_field_range_server_units = te_range_to_lsp( + TERange.from_position_and_size( + representation_field_pos, representation_field.size() + ) + ) + representation_field_range = position_codec.range_to_client_units( + lines, + representation_field_range_server_units, + ) + for known_field in known_fields.values(): + missing_field_severity = known_field.missing_field_severity + if missing_field_severity is None or known_field.name in stanza: + continue + + if known_field.inherits_from_source and known_field.name in source_stanza: + continue + + diagnostics.append( + Diagnostic( + representation_field_range, + f"Stanza is missing field {known_field.name}", + severity=missing_field_severity, + source="debputy", + ) + ) + + if is_binary_paragraph: + _binary_package_checks( + stanza, + stanza_position, + source_stanza, + representation_field_range, + position_codec, + lines, + diagnostics, + ) + + seen_fields = {} + + for kvpair in stanza.iter_parts_of_type(Deb822KeyValuePairElement): + field_name_token = kvpair.field_token + field_name = field_name_token.text + field_name_lc = field_name.lower() + normalized_field_name_lc = normalize_dctrl_field_name(field_name_lc) + known_field = known_fields.get(normalized_field_name_lc) + field_value = stanza[field_name] + field_range_te = kvpair.range_in_parent().relative_to(stanza_position) + field_position_te = field_range_te.start_pos + field_range_server_units = te_range_to_lsp(field_range_te) + field_range = position_codec.range_to_client_units( + lines, + field_range_server_units, + ) + field_name_typo_detected = False + existing_field_range = seen_fields.get(normalized_field_name_lc) + if existing_field_range is not None: + existing_field_range[3].append(field_range) + else: + normalized_field_name = normalize_dctrl_field_name(field_name) + seen_fields[field_name_lc] = ( + field_name, + normalized_field_name, + field_range, + [], + ) + + if known_field is None: + candidates = detect_possible_typo(normalized_field_name_lc, known_fields) + if candidates: + known_field = known_fields[candidates[0]] + token_range_server_units = te_range_to_lsp( + TERange.from_position_and_size( + field_position_te, kvpair.field_token.size() + ) + ) + field_range = position_codec.range_to_client_units( + lines, + token_range_server_units, + ) + field_name_typo_detected = True + diagnostics.append( + Diagnostic( + field_range, + f'The "{field_name}" looks like a typo of "{known_field.name}".', + severity=DiagnosticSeverity.Warning, + source="debputy", + data=[ + propose_correct_text_quick_fix(known_fields[m].name) + for m in candidates + ], + ) + ) + if known_field is None: + known_else_where = other_known_fields.get(normalized_field_name_lc) + if known_else_where is not None: + intended_usage = "Source" if is_binary_paragraph else "Package" + diagnostics.append( + Diagnostic( + field_range, + f'The {field_name} is defined for use in the "{intended_usage}" stanza.' + f" Please move it to the right place or remove it", + severity=DiagnosticSeverity.Error, + source="debputy", + ) + ) + continue + + if field_value.strip() == "": + diagnostics.append( + Diagnostic( + field_range, + f"The {field_name} has no value. Either provide a value or remove it.", + severity=DiagnosticSeverity.Error, + source="debputy", + ) + ) + continue + diagnostics.extend( + known_field.field_diagnostics( + kvpair, + stanza_position, + position_codec, + lines, + field_name_typo_reported=field_name_typo_detected, + ) + ) + if known_field.spellcheck_value: + words = kvpair.interpret_as(LIST_SPACE_SEPARATED_INTERPRETATION) + spell_checker = default_spellchecker() + value_position = kvpair.value_element.position_in_parent().relative_to( + field_position_te + ) + for word_ref in words.iter_value_references(): + token = word_ref.value + for word, pos, endpos in spell_checker.iter_words(token): + corrections = spell_checker.provide_corrections_for(word) + if not corrections: + continue + word_loc = word_ref.locatable + word_pos_te = word_loc.position_in_parent().relative_to( + value_position + ) + if pos: + word_pos_te = TEPosition(0, pos).relative_to(word_pos_te) + word_range = TERange( + START_POSITION, + TEPosition(0, endpos - pos), + ) + word_range_server_units = te_range_to_lsp( + TERange.from_position_and_size(word_pos_te, word_range) + ) + word_range = position_codec.range_to_client_units( + lines, + word_range_server_units, + ) + diagnostics.append( + Diagnostic( + word_range, + f'Spelling "{word}"', + severity=DiagnosticSeverity.Hint, + source="debputy", + data=[ + propose_correct_text_quick_fix(c) for c in corrections + ], + ) + ) + source_value = source_stanza.get(field_name) + if known_field.warn_if_default and field_value == known_field.default_value: + diagnostics.append( + Diagnostic( + field_range, + f"The {field_name} is redundant as it is set to the default value and the field should only be" + " used in exceptional cases.", + severity=DiagnosticSeverity.Warning, + source="debputy", + ) + ) + + if known_field.inherits_from_source and field_value == source_value: + if range_compatible_with_remove_line_fix(field_range): + fix_data = propose_remove_line_quick_fix() + else: + fix_data = None + diagnostics.append( + Diagnostic( + field_range, + f"The field {field_name} duplicates the value from the Source stanza.", + severity=DiagnosticSeverity.Information, + source="debputy", + data=fix_data, + ) + ) + for ( + field_name, + normalized_field_name, + field_range, + duplicates, + ) in seen_fields.values(): + if not duplicates: + continue + related_information = [ + DiagnosticRelatedInformation( + location=Location(doc_reference, field_range), + message=f"First definition of {field_name}", + ) + ] + related_information.extend( + DiagnosticRelatedInformation( + location=Location(doc_reference, r), + message=f"Duplicate of {field_name}", + ) + for r in duplicates + ) + for dup_range in duplicates: + diagnostics.append( + Diagnostic( + dup_range, + f"The {normalized_field_name} field name was used multiple times in this stanza." + f" Please ensure the field is only used once per stanza. Note that {normalized_field_name} and" + f" X[BCS]-{normalized_field_name} are considered the same field.", + severity=DiagnosticSeverity.Error, + source="debputy", + related_information=related_information, + ) + ) + + +def _diagnostics_for_field_name( + token: Deb822FieldNameToken, + token_position: "TEPosition", + known_field: DctrlKnownField, + typo_detected: bool, + position_codec: "LintCapablePositionCodec", + lines: List[str], + diagnostics: List[Diagnostic], +) -> None: + field_name = token.text + # Defeat the case-insensitivity from python-debian + field_name_cased = str(field_name) + token_range_server_units = te_range_to_lsp( + TERange.from_position_and_size(token_position, token.size()) + ) + token_range = position_codec.range_to_client_units( + lines, + token_range_server_units, + ) + if known_field.deprecated_with_no_replacement: + diagnostics.append( + Diagnostic( + token_range, + f"{field_name_cased} is deprecated and no longer used", + severity=DiagnosticSeverity.Warning, + source="debputy", + tags=[DiagnosticTag.Deprecated], + data=propose_remove_line_quick_fix(), + ) + ) + elif known_field.replaced_by is not None: + diagnostics.append( + Diagnostic( + token_range, + f"{field_name_cased} is a deprecated name for {known_field.replaced_by}", + severity=DiagnosticSeverity.Warning, + source="debputy", + tags=[DiagnosticTag.Deprecated], + data=propose_correct_text_quick_fix(known_field.replaced_by), + ) + ) + + if not typo_detected and field_name_cased != known_field.name: + diagnostics.append( + Diagnostic( + token_range, + f"Non-canonical spelling of {known_field.name}", + severity=DiagnosticSeverity.Information, + source="debputy", + data=propose_correct_text_quick_fix(known_field.name), + ) + ) + + +def _scan_for_syntax_errors_and_token_level_diagnostics( + deb822_file: Deb822FileElement, + position_codec: LintCapablePositionCodec, + lines: List[str], + diagnostics: List[Diagnostic], +) -> int: + first_error = len(lines) + 1 + spell_checker = default_spellchecker() + for ( + token, + start_line, + start_offset, + end_line, + end_offset, + ) in _deb822_token_iter(deb822_file.iter_tokens()): + if token.is_error: + first_error = min(first_error, start_line) + start_pos = Position( + start_line, + start_offset, + ) + end_pos = Position( + end_line, + end_offset, + ) + token_range = position_codec.range_to_client_units( + lines, Range(start_pos, end_pos) + ) + diagnostics.append( + Diagnostic( + token_range, + "Syntax error", + severity=DiagnosticSeverity.Error, + source="debputy (python-debian parser)", + ) + ) + elif token.is_comment: + for word, pos, end_pos in spell_checker.iter_words(token.text): + corrections = spell_checker.provide_corrections_for(word) + if not corrections: + continue + start_pos = Position( + start_line, + pos, + ) + end_pos = Position( + start_line, + end_pos, + ) + word_range = position_codec.range_to_client_units( + lines, Range(start_pos, end_pos) + ) + diagnostics.append( + Diagnostic( + word_range, + f'Spelling "{word}"', + severity=DiagnosticSeverity.Hint, + source="debputy", + data=[propose_correct_text_quick_fix(c) for c in corrections], + ) + ) + return first_error + + +def _diagnostics_debian_control( + ls: "LanguageServer", + params: Union[DidOpenTextDocumentParams, DidChangeTextDocumentParams], +) -> None: + doc = ls.workspace.get_text_document(params.text_document.uri) + _info(f"Opened document: {doc.path} ({doc.language_id})") + lines = doc.lines + position_codec: LintCapablePositionCodec = doc.position_codec + + diagnostics = _lint_debian_control(doc.uri, doc.path, lines, position_codec) + ls.publish_diagnostics( + doc.uri, + diagnostics, + ) + + +@lint_diagnostics(_LANGUAGE_IDS) +def _lint_debian_control( + doc_reference: str, + _path: str, + lines: List[str], + position_codec: LintCapablePositionCodec, +) -> Optional[List[Diagnostic]]: + diagnostics = [] + deb822_file = parse_deb822_file( + lines, + accept_files_with_duplicated_fields=True, + accept_files_with_error_tokens=True, + ) + + first_error = _scan_for_syntax_errors_and_token_level_diagnostics( + deb822_file, + position_codec, + lines, + diagnostics, + ) + + paragraphs = list(deb822_file) + source_paragraph = paragraphs[0] if paragraphs else None + + for paragraph_no, paragraph in enumerate(paragraphs, start=1): + paragraph_pos = paragraph.position_in_file() + if paragraph_pos.line_position >= first_error: + break + is_binary_paragraph = paragraph_no != 1 + if is_binary_paragraph: + known_fields = BINARY_FIELDS + other_known_fields = SOURCE_FIELDS + else: + known_fields = SOURCE_FIELDS + other_known_fields = BINARY_FIELDS + _diagnostics_for_paragraph( + paragraph, + paragraph_pos, + source_paragraph, + known_fields, + other_known_fields, + is_binary_paragraph, + doc_reference, + position_codec, + lines, + diagnostics, + ) + + return diagnostics + + +def _handle_semantic_tokens_full( + ls: "LanguageServer", + request: SemanticTokensParams, +) -> Optional[SemanticTokens]: + doc = ls.workspace.get_text_document(request.text_document.uri) + lines = doc.lines + deb822_file = parse_deb822_file( + lines, + accept_files_with_duplicated_fields=True, + accept_files_with_error_tokens=True, + ) + tokens = [] + previous_line = 0 + keyword_token = 0 + no_modifiers = 0 + + for paragraph_no, paragraph in enumerate(deb822_file, start=1): + paragraph_position = paragraph.position_in_file() + for kvpair in paragraph.iter_parts_of_type(Deb822KeyValuePairElement): + field_position_without_comments = kvpair.position_in_parent().relative_to( + paragraph_position + ) + field_size = doc.position_codec.client_num_units(kvpair.field_name) + current_line = field_position_without_comments.line_position + line_delta = current_line - previous_line + previous_line = current_line + tokens.append(line_delta) # Line delta + tokens.append(0) # Token delta + tokens.append(field_size) # Token length + tokens.append(keyword_token) + tokens.append(no_modifiers) + + if not tokens: + return None + return SemanticTokens(tokens) |