diff options
Diffstat (limited to 'sqlglot/diff.py')
-rw-r--r-- | sqlglot/diff.py | 17 |
1 files changed, 14 insertions, 3 deletions
diff --git a/sqlglot/diff.py b/sqlglot/diff.py index 86665e0..c10d640 100644 --- a/sqlglot/diff.py +++ b/sqlglot/diff.py @@ -53,7 +53,8 @@ class Keep: if t.TYPE_CHECKING: - T = t.TypeVar("T") + from sqlglot._typing import T + Edit = t.Union[Insert, Remove, Move, Update, Keep] @@ -240,7 +241,7 @@ class ChangeDistiller: return matching_set def _compute_leaf_matching_set(self) -> t.Set[t.Tuple[int, int]]: - candidate_matchings: t.List[t.Tuple[float, int, exp.Expression, exp.Expression]] = [] + candidate_matchings: t.List[t.Tuple[float, int, int, exp.Expression, exp.Expression]] = [] source_leaves = list(_get_leaves(self._source)) target_leaves = list(_get_leaves(self._target)) for source_leaf in source_leaves: @@ -252,6 +253,7 @@ class ChangeDistiller: candidate_matchings, ( -similarity_score, + -_parent_similarity_score(source_leaf, target_leaf), len(candidate_matchings), source_leaf, target_leaf, @@ -261,7 +263,7 @@ class ChangeDistiller: # Pick best matchings based on the highest score matching_set = set() while candidate_matchings: - _, _, source_leaf, target_leaf = heappop(candidate_matchings) + _, _, _, source_leaf, target_leaf = heappop(candidate_matchings) if ( id(source_leaf) in self._unmatched_source_nodes and id(target_leaf) in self._unmatched_target_nodes @@ -327,6 +329,15 @@ def _is_same_type(source: exp.Expression, target: exp.Expression) -> bool: return False +def _parent_similarity_score( + source: t.Optional[exp.Expression], target: t.Optional[exp.Expression] +) -> int: + if source is None or target is None or type(source) is not type(target): + return 0 + + return 1 + _parent_similarity_score(source.parent, target.parent) + + def _expression_only_args(expression: exp.Expression) -> t.List[exp.Expression]: args: t.List[t.Union[exp.Expression, t.List]] = [] if expression: |