summaryrefslogtreecommitdiffstats
path: root/docs/sqlglot/diff.html
diff options
context:
space:
mode:
Diffstat (limited to 'docs/sqlglot/diff.html')
-rw-r--r--docs/sqlglot/diff.html1560
1 files changed, 1560 insertions, 0 deletions
diff --git a/docs/sqlglot/diff.html b/docs/sqlglot/diff.html
new file mode 100644
index 0000000..9c366b2
--- /dev/null
+++ b/docs/sqlglot/diff.html
@@ -0,0 +1,1560 @@
+<!doctype html>
+<html lang="en">
+<head>
+ <meta charset="utf-8">
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+ <meta name="generator" content="pdoc 12.3.1"/>
+ <title>sqlglot.diff API documentation</title>
+
+ <style>/*! * Bootstrap Reboot v5.0.0 (https://getbootstrap.com/) * Copyright 2011-2021 The Bootstrap Authors * Copyright 2011-2021 Twitter, Inc. * Licensed under MIT (https://github.com/twbs/bootstrap/blob/main/LICENSE) * Forked from Normalize.css, licensed MIT (https://github.com/necolas/normalize.css/blob/master/LICENSE.md) */*,::after,::before{box-sizing:border-box}@media (prefers-reduced-motion:no-preference){:root{scroll-behavior:smooth}}body{margin:0;font-family:system-ui,-apple-system,"Segoe UI",Roboto,"Helvetica Neue",Arial,"Noto Sans","Liberation Sans",sans-serif,"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol","Noto Color Emoji";font-size:1rem;font-weight:400;line-height:1.5;color:#212529;background-color:#fff;-webkit-text-size-adjust:100%;-webkit-tap-highlight-color:transparent}hr{margin:1rem 0;color:inherit;background-color:currentColor;border:0;opacity:.25}hr:not([size]){height:1px}h1,h2,h3,h4,h5,h6{margin-top:0;margin-bottom:.5rem;font-weight:500;line-height:1.2}h1{font-size:calc(1.375rem + 1.5vw)}@media (min-width:1200px){h1{font-size:2.5rem}}h2{font-size:calc(1.325rem + .9vw)}@media (min-width:1200px){h2{font-size:2rem}}h3{font-size:calc(1.3rem + .6vw)}@media (min-width:1200px){h3{font-size:1.75rem}}h4{font-size:calc(1.275rem + .3vw)}@media (min-width:1200px){h4{font-size:1.5rem}}h5{font-size:1.25rem}h6{font-size:1rem}p{margin-top:0;margin-bottom:1rem}abbr[data-bs-original-title],abbr[title]{-webkit-text-decoration:underline dotted;text-decoration:underline dotted;cursor:help;-webkit-text-decoration-skip-ink:none;text-decoration-skip-ink:none}address{margin-bottom:1rem;font-style:normal;line-height:inherit}ol,ul{padding-left:2rem}dl,ol,ul{margin-top:0;margin-bottom:1rem}ol ol,ol ul,ul ol,ul ul{margin-bottom:0}dt{font-weight:700}dd{margin-bottom:.5rem;margin-left:0}blockquote{margin:0 0 1rem}b,strong{font-weight:bolder}small{font-size:.875em}mark{padding:.2em;background-color:#fcf8e3}sub,sup{position:relative;font-size:.75em;line-height:0;vertical-align:baseline}sub{bottom:-.25em}sup{top:-.5em}a{color:#0d6efd;text-decoration:underline}a:hover{color:#0a58ca}a:not([href]):not([class]),a:not([href]):not([class]):hover{color:inherit;text-decoration:none}code,kbd,pre,samp{font-family:SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",monospace;font-size:1em;direction:ltr;unicode-bidi:bidi-override}pre{display:block;margin-top:0;margin-bottom:1rem;overflow:auto;font-size:.875em}pre code{font-size:inherit;color:inherit;word-break:normal}code{font-size:.875em;color:#d63384;word-wrap:break-word}a>code{color:inherit}kbd{padding:.2rem .4rem;font-size:.875em;color:#fff;background-color:#212529;border-radius:.2rem}kbd kbd{padding:0;font-size:1em;font-weight:700}figure{margin:0 0 1rem}img,svg{vertical-align:middle}table{caption-side:bottom;border-collapse:collapse}caption{padding-top:.5rem;padding-bottom:.5rem;color:#6c757d;text-align:left}th{text-align:inherit;text-align:-webkit-match-parent}tbody,td,tfoot,th,thead,tr{border-color:inherit;border-style:solid;border-width:0}label{display:inline-block}button{border-radius:0}button:focus:not(:focus-visible){outline:0}button,input,optgroup,select,textarea{margin:0;font-family:inherit;font-size:inherit;line-height:inherit}button,select{text-transform:none}[role=button]{cursor:pointer}select{word-wrap:normal}select:disabled{opacity:1}[list]::-webkit-calendar-picker-indicator{display:none}[type=button],[type=reset],[type=submit],button{-webkit-appearance:button}[type=button]:not(:disabled),[type=reset]:not(:disabled),[type=submit]:not(:disabled),button:not(:disabled){cursor:pointer}::-moz-focus-inner{padding:0;border-style:none}textarea{resize:vertical}fieldset{min-width:0;padding:0;margin:0;border:0}legend{float:left;width:100%;padding:0;margin-bottom:.5rem;font-size:calc(1.275rem + .3vw);line-height:inherit}@media (min-width:1200px){legend{font-size:1.5rem}}legend+*{clear:left}::-webkit-datetime-edit-day-field,::-webkit-datetime-edit-fields-wrapper,::-webkit-datetime-edit-hour-field,::-webkit-datetime-edit-minute,::-webkit-datetime-edit-month-field,::-webkit-datetime-edit-text,::-webkit-datetime-edit-year-field{padding:0}::-webkit-inner-spin-button{height:auto}[type=search]{outline-offset:-2px;-webkit-appearance:textfield}::-webkit-search-decoration{-webkit-appearance:none}::-webkit-color-swatch-wrapper{padding:0}::file-selector-button{font:inherit}::-webkit-file-upload-button{font:inherit;-webkit-appearance:button}output{display:inline-block}iframe{border:0}summary{display:list-item;cursor:pointer}progress{vertical-align:baseline}[hidden]{display:none!important}</style>
+ <style>/*! syntax-highlighting.css */pre{line-height:125%;}span.linenos{color:inherit; background-color:transparent; padding-left:5px; padding-right:20px;}.pdoc-code .hll{background-color:#ffffcc}.pdoc-code{background:#f8f8f8;}.pdoc-code .c{color:#3D7B7B; font-style:italic}.pdoc-code .err{border:1px solid #FF0000}.pdoc-code .k{color:#008000; font-weight:bold}.pdoc-code .o{color:#666666}.pdoc-code .ch{color:#3D7B7B; font-style:italic}.pdoc-code .cm{color:#3D7B7B; font-style:italic}.pdoc-code .cp{color:#9C6500}.pdoc-code .cpf{color:#3D7B7B; font-style:italic}.pdoc-code .c1{color:#3D7B7B; font-style:italic}.pdoc-code .cs{color:#3D7B7B; font-style:italic}.pdoc-code .gd{color:#A00000}.pdoc-code .ge{font-style:italic}.pdoc-code .gr{color:#E40000}.pdoc-code .gh{color:#000080; font-weight:bold}.pdoc-code .gi{color:#008400}.pdoc-code .go{color:#717171}.pdoc-code .gp{color:#000080; font-weight:bold}.pdoc-code .gs{font-weight:bold}.pdoc-code .gu{color:#800080; font-weight:bold}.pdoc-code .gt{color:#0044DD}.pdoc-code .kc{color:#008000; font-weight:bold}.pdoc-code .kd{color:#008000; font-weight:bold}.pdoc-code .kn{color:#008000; font-weight:bold}.pdoc-code .kp{color:#008000}.pdoc-code .kr{color:#008000; font-weight:bold}.pdoc-code .kt{color:#B00040}.pdoc-code .m{color:#666666}.pdoc-code .s{color:#BA2121}.pdoc-code .na{color:#687822}.pdoc-code .nb{color:#008000}.pdoc-code .nc{color:#0000FF; font-weight:bold}.pdoc-code .no{color:#880000}.pdoc-code .nd{color:#AA22FF}.pdoc-code .ni{color:#717171; font-weight:bold}.pdoc-code .ne{color:#CB3F38; font-weight:bold}.pdoc-code .nf{color:#0000FF}.pdoc-code .nl{color:#767600}.pdoc-code .nn{color:#0000FF; font-weight:bold}.pdoc-code .nt{color:#008000; font-weight:bold}.pdoc-code .nv{color:#19177C}.pdoc-code .ow{color:#AA22FF; font-weight:bold}.pdoc-code .w{color:#bbbbbb}.pdoc-code .mb{color:#666666}.pdoc-code .mf{color:#666666}.pdoc-code .mh{color:#666666}.pdoc-code .mi{color:#666666}.pdoc-code .mo{color:#666666}.pdoc-code .sa{color:#BA2121}.pdoc-code .sb{color:#BA2121}.pdoc-code .sc{color:#BA2121}.pdoc-code .dl{color:#BA2121}.pdoc-code .sd{color:#BA2121; font-style:italic}.pdoc-code .s2{color:#BA2121}.pdoc-code .se{color:#AA5D1F; font-weight:bold}.pdoc-code .sh{color:#BA2121}.pdoc-code .si{color:#A45A77; font-weight:bold}.pdoc-code .sx{color:#008000}.pdoc-code .sr{color:#A45A77}.pdoc-code .s1{color:#BA2121}.pdoc-code .ss{color:#19177C}.pdoc-code .bp{color:#008000}.pdoc-code .fm{color:#0000FF}.pdoc-code .vc{color:#19177C}.pdoc-code .vg{color:#19177C}.pdoc-code .vi{color:#19177C}.pdoc-code .vm{color:#19177C}.pdoc-code .il{color:#666666}</style>
+ <style>/*! theme.css */:root{--pdoc-background:#fff;}.pdoc{--text:#212529;--muted:#6c757d;--link:#3660a5;--link-hover:#1659c5;--code:#f8f8f8;--active:#fff598;--accent:#eee;--accent2:#c1c1c1;--nav-hover:rgba(255, 255, 255, 0.5);--name:#0066BB;--def:#008800;--annotation:#007020;}</style>
+ <style>/*! layout.css */html, body{width:100%;height:100%;}html, main{scroll-behavior:smooth;}body{background-color:var(--pdoc-background);}@media (max-width:769px){#navtoggle{cursor:pointer;position:absolute;width:50px;height:40px;top:1rem;right:1rem;border-color:var(--text);color:var(--text);display:flex;opacity:0.8;}#navtoggle:hover{opacity:1;}#togglestate + div{display:none;}#togglestate:checked + div{display:inherit;}main, header{padding:2rem 3vw;}header + main{margin-top:-3rem;}.git-button{display:none !important;}nav input[type="search"]{max-width:77%;}nav input[type="search"]:first-child{margin-top:-6px;}nav input[type="search"]:valid ~ *{display:none !important;}}@media (min-width:770px){:root{--sidebar-width:clamp(12.5rem, 28vw, 22rem);}nav{position:fixed;overflow:auto;height:100vh;width:var(--sidebar-width);}main, header{padding:3rem 2rem 3rem calc(var(--sidebar-width) + 3rem);width:calc(54rem + var(--sidebar-width));max-width:100%;}header + main{margin-top:-4rem;}#navtoggle{display:none;}}#togglestate{position:absolute;height:0;opacity:0;}nav.pdoc{--pad:clamp(0.5rem, 2vw, 1.75rem);--indent:1.5rem;background-color:var(--accent);border-right:1px solid var(--accent2);box-shadow:0 0 20px rgba(50, 50, 50, .2) inset;padding:0 0 0 var(--pad);overflow-wrap:anywhere;scrollbar-width:thin; scrollbar-color:var(--accent2) transparent }nav.pdoc::-webkit-scrollbar{width:.4rem; }nav.pdoc::-webkit-scrollbar-thumb{background-color:var(--accent2); }nav.pdoc > div{padding:var(--pad) 0;}nav.pdoc .module-list-button{display:inline-flex;align-items:center;color:var(--text);border-color:var(--muted);margin-bottom:1rem;}nav.pdoc .module-list-button:hover{border-color:var(--text);}nav.pdoc input[type=search]{display:block;outline-offset:0;width:calc(100% - var(--pad));}nav.pdoc .logo{max-width:calc(100% - var(--pad));max-height:35vh;display:block;margin:0 auto 1rem;transform:translate(calc(-.5 * var(--pad)), 0);}nav.pdoc ul{list-style:none;padding-left:0;}nav.pdoc > div > ul{margin-left:calc(0px - var(--pad));}nav.pdoc li a{padding:.2rem 0 .2rem calc(var(--pad) + var(--indent));}nav.pdoc > div > ul > li > a{padding-left:var(--pad);}nav.pdoc li{transition:all 100ms;}nav.pdoc li:hover{background-color:var(--nav-hover);}nav.pdoc a, nav.pdoc a:hover{color:var(--text);}nav.pdoc a{display:block;}nav.pdoc > h2:first-of-type{margin-top:1.5rem;}nav.pdoc .class:before{content:"class ";color:var(--muted);}nav.pdoc .function:after{content:"()";color:var(--muted);}nav.pdoc footer:before{content:"";display:block;width:calc(100% - var(--pad));border-top:solid var(--accent2) 1px;margin-top:1.5rem;padding-top:.5rem;}nav.pdoc footer{font-size:small;}</style>
+ <style>/*! content.css */.pdoc{color:var(--text);box-sizing:border-box;line-height:1.5;background:none;}.pdoc .pdoc-button{display:inline-block;border:solid black 1px;border-radius:2px;font-size:.75rem;padding:calc(0.5em - 1px) 1em;transition:100ms all;}.pdoc .pdoc-alert{padding:1rem 1rem 1rem calc(1.5rem + 24px);border:1px solid transparent;border-radius:.25rem;background-repeat:no-repeat;background-position:1rem center;margin-bottom:1rem;}.pdoc .pdoc-alert > *:last-child{margin-bottom:0;}.pdoc .pdoc-alert-note {color:#084298;background-color:#cfe2ff;border-color:#b6d4fe;background-image:url("data:image/svg+xml,%3Csvg%20xmlns%3D%22http%3A//www.w3.org/2000/svg%22%20width%3D%2224%22%20height%3D%2224%22%20fill%3D%22%23084298%22%20viewBox%3D%220%200%2016%2016%22%3E%3Cpath%20d%3D%22M8%2016A8%208%200%201%200%208%200a8%208%200%200%200%200%2016zm.93-9.412-1%204.705c-.07.34.029.533.304.533.194%200%20.487-.07.686-.246l-.088.416c-.287.346-.92.598-1.465.598-.703%200-1.002-.422-.808-1.319l.738-3.468c.064-.293.006-.399-.287-.47l-.451-.081.082-.381%202.29-.287zM8%205.5a1%201%200%201%201%200-2%201%201%200%200%201%200%202z%22/%3E%3C/svg%3E");}.pdoc .pdoc-alert-warning{color:#664d03;background-color:#fff3cd;border-color:#ffecb5;background-image:url("data:image/svg+xml,%3Csvg%20xmlns%3D%22http%3A//www.w3.org/2000/svg%22%20width%3D%2224%22%20height%3D%2224%22%20fill%3D%22%23664d03%22%20viewBox%3D%220%200%2016%2016%22%3E%3Cpath%20d%3D%22M8.982%201.566a1.13%201.13%200%200%200-1.96%200L.165%2013.233c-.457.778.091%201.767.98%201.767h13.713c.889%200%201.438-.99.98-1.767L8.982%201.566zM8%205c.535%200%20.954.462.9.995l-.35%203.507a.552.552%200%200%201-1.1%200L7.1%205.995A.905.905%200%200%201%208%205zm.002%206a1%201%200%201%201%200%202%201%201%200%200%201%200-2z%22/%3E%3C/svg%3E");}.pdoc .pdoc-alert-danger{color:#842029;background-color:#f8d7da;border-color:#f5c2c7;background-image:url("data:image/svg+xml,%3Csvg%20xmlns%3D%22http%3A//www.w3.org/2000/svg%22%20width%3D%2224%22%20height%3D%2224%22%20fill%3D%22%23842029%22%20viewBox%3D%220%200%2016%2016%22%3E%3Cpath%20d%3D%22M5.52.359A.5.5%200%200%201%206%200h4a.5.5%200%200%201%20.474.658L8.694%206H12.5a.5.5%200%200%201%20.395.807l-7%209a.5.5%200%200%201-.873-.454L6.823%209.5H3.5a.5.5%200%200%201-.48-.641l2.5-8.5z%22/%3E%3C/svg%3E");}.pdoc .visually-hidden{position:absolute !important;width:1px !important;height:1px !important;padding:0 !important;margin:-1px !important;overflow:hidden !important;clip:rect(0, 0, 0, 0) !important;white-space:nowrap !important;border:0 !important;}.pdoc h1, .pdoc h2, .pdoc h3{font-weight:300;margin:.3em 0;padding:.2em 0;}.pdoc > section:not(.module-info) h1{font-size:1.5rem;font-weight:500;}.pdoc > section:not(.module-info) h2{font-size:1.4rem;font-weight:500;}.pdoc > section:not(.module-info) h3{font-size:1.3rem;font-weight:500;}.pdoc > section:not(.module-info) h4{font-size:1.2rem;}.pdoc > section:not(.module-info) h5{font-size:1.1rem;}.pdoc a{text-decoration:none;color:var(--link);}.pdoc a:hover{color:var(--link-hover);}.pdoc blockquote{margin-left:2rem;}.pdoc pre{border-top:1px solid var(--accent2);border-bottom:1px solid var(--accent2);margin-top:0;margin-bottom:1em;padding:.5rem 0 .5rem .5rem;overflow-x:auto;background-color:var(--code);}.pdoc code{color:var(--text);padding:.2em .4em;margin:0;font-size:85%;background-color:var(--code);border-radius:6px;}.pdoc a > code{color:inherit;}.pdoc pre > code{display:inline-block;font-size:inherit;background:none;border:none;padding:0;}.pdoc > section:not(.module-info){margin-bottom:1.5rem;}.pdoc .modulename{margin-top:0;font-weight:bold;}.pdoc .modulename a{color:var(--link);transition:100ms all;}.pdoc .git-button{float:right;border:solid var(--link) 1px;}.pdoc .git-button:hover{background-color:var(--link);color:var(--pdoc-background);}.view-source-toggle-state,.view-source-toggle-state ~ .pdoc-code{display:none;}.view-source-toggle-state:checked ~ .pdoc-code{display:block;}.view-source-button{display:inline-block;float:right;font-size:.75rem;line-height:1.5rem;color:var(--muted);padding:0 .4rem 0 1.3rem;cursor:pointer;text-indent:-2px;}.view-source-button > span{visibility:hidden;}.module-info .view-source-button{float:none;display:flex;justify-content:flex-end;margin:-1.2rem .4rem -.2rem 0;}.view-source-button::before{position:absolute;content:"View Source";display:list-item;list-style-type:disclosure-closed;}.view-source-toggle-state:checked ~ .attr .view-source-button::before,.view-source-toggle-state:checked ~ .view-source-button::before{list-style-type:disclosure-open;}.pdoc .docstring{margin-bottom:1.5rem;}.pdoc section:not(.module-info) .docstring{margin-left:clamp(0rem, 5vw - 2rem, 1rem);}.pdoc .docstring .pdoc-code{margin-left:1em;margin-right:1em;}.pdoc h1:target,.pdoc h2:target,.pdoc h3:target,.pdoc h4:target,.pdoc h5:target,.pdoc h6:target,.pdoc .pdoc-code > pre > span:target{background-color:var(--active);box-shadow:-1rem 0 0 0 var(--active);}.pdoc .pdoc-code > pre > span:target{display:block;}.pdoc div:target > .attr,.pdoc section:target > .attr,.pdoc dd:target > a{background-color:var(--active);}.pdoc *{scroll-margin:2rem;}.pdoc .pdoc-code .linenos{user-select:none;}.pdoc .attr:hover{filter:contrast(0.95);}.pdoc section, .pdoc .classattr{position:relative;}.pdoc .headerlink{--width:clamp(1rem, 3vw, 2rem);position:absolute;top:0;left:calc(0rem - var(--width));transition:all 100ms ease-in-out;opacity:0;}.pdoc .headerlink::before{content:"#";display:block;text-align:center;width:var(--width);height:2.3rem;line-height:2.3rem;font-size:1.5rem;}.pdoc .attr:hover ~ .headerlink,.pdoc *:target > .headerlink,.pdoc .headerlink:hover{opacity:1;}.pdoc .attr{display:block;margin:.5rem 0 .5rem;padding:.4rem .4rem .4rem 1rem;background-color:var(--accent);overflow-x:auto;}.pdoc .classattr{margin-left:2rem;}.pdoc .name{color:var(--name);font-weight:bold;}.pdoc .def{color:var(--def);font-weight:bold;}.pdoc .signature{background-color:transparent;}.pdoc .param, .pdoc .return-annotation{white-space:pre;}.pdoc .signature.multiline .param{display:block;}.pdoc .signature.condensed .param{display:inline-block;}.pdoc .annotation{color:var(--annotation);}.pdoc .inherited{margin-left:2rem;}.pdoc .inherited dt{font-weight:700;}.pdoc .inherited dt, .pdoc .inherited dd{display:inline;margin-left:0;margin-bottom:.5rem;}.pdoc .inherited dd:not(:last-child):after{content:", ";}.pdoc .inherited .class:before{content:"class ";}.pdoc .inherited .function a:after{content:"()";}.pdoc .search-result .docstring{overflow:auto;max-height:25vh;}.pdoc .search-result.focused > .attr{background-color:var(--active);}.pdoc .attribution{margin-top:2rem;display:block;opacity:0.5;transition:all 200ms;filter:grayscale(100%);}.pdoc .attribution:hover{opacity:1;filter:grayscale(0%);}.pdoc .attribution img{margin-left:5px;height:35px;vertical-align:middle;width:70px;transition:all 200ms;}.pdoc table{display:block;width:max-content;max-width:100%;overflow:auto;margin-bottom:1rem;}.pdoc table th{font-weight:600;}.pdoc table th, .pdoc table td{padding:6px 13px;border:1px solid var(--accent2);}</style>
+ <style>/*! custom.css */</style></head>
+<body>
+ <nav class="pdoc">
+ <label id="navtoggle" for="togglestate" class="pdoc-button"><svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 30 30'><path stroke-linecap='round' stroke="currentColor" stroke-miterlimit='10' stroke-width='2' d='M4 7h22M4 15h22M4 23h22'/></svg></label>
+ <input id="togglestate" type="checkbox" aria-hidden="true" tabindex="-1">
+ <div> <a class="pdoc-button module-list-button" href="../sqlglot.html">
+<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-box-arrow-in-left" viewBox="0 0 16 16">
+ <path fill-rule="evenodd" d="M10 3.5a.5.5 0 0 0-.5-.5h-8a.5.5 0 0 0-.5.5v9a.5.5 0 0 0 .5.5h8a.5.5 0 0 0 .5-.5v-2a.5.5 0 0 1 1 0v2A1.5 1.5 0 0 1 9.5 14h-8A1.5 1.5 0 0 1 0 12.5v-9A1.5 1.5 0 0 1 1.5 2h8A1.5 1.5 0 0 1 11 3.5v2a.5.5 0 0 1-1 0v-2z"/>
+ <path fill-rule="evenodd" d="M4.146 8.354a.5.5 0 0 1 0-.708l3-3a.5.5 0 1 1 .708.708L5.707 7.5H14.5a.5.5 0 0 1 0 1H5.707l2.147 2.146a.5.5 0 0 1-.708.708l-3-3z"/>
+</svg> &nbsp;sqlglot</a>
+
+
+ <input type="search" placeholder="Search..." role="searchbox" aria-label="search"
+ pattern=".+" required>
+
+ <h2>Contents</h2>
+ <ul>
+ <li><a href="#semantic-diff-for-sql">Semantic Diff for SQL</a>
+ <ul>
+ <li><a href="#motivation">Motivation</a></li>
+ <li><a href="#the-search-for-a-solution">The Search for a Solution</a></li>
+ <li><a href="#change-distiller">Change Distiller</a></li>
+ <li><a href="#alternative-solutions">Alternative Solutions</a></li>
+ <li><a href="#conclusion">Conclusion</a></li>
+ <li><a href="#references">References</a></li>
+ </ul></li>
+</ul>
+
+
+
+ <h2>API Documentation</h2>
+ <ul class="memberlist">
+ <li>
+ <a class="class" href="#Insert">Insert</a>
+ <ul class="memberlist">
+ <li>
+ <a class="function" href="#Insert.__init__">Insert</a>
+ </li>
+ </ul>
+
+ </li>
+ <li>
+ <a class="class" href="#Remove">Remove</a>
+ <ul class="memberlist">
+ <li>
+ <a class="function" href="#Remove.__init__">Remove</a>
+ </li>
+ </ul>
+
+ </li>
+ <li>
+ <a class="class" href="#Move">Move</a>
+ <ul class="memberlist">
+ <li>
+ <a class="function" href="#Move.__init__">Move</a>
+ </li>
+ </ul>
+
+ </li>
+ <li>
+ <a class="class" href="#Update">Update</a>
+ <ul class="memberlist">
+ <li>
+ <a class="function" href="#Update.__init__">Update</a>
+ </li>
+ </ul>
+
+ </li>
+ <li>
+ <a class="class" href="#Keep">Keep</a>
+ <ul class="memberlist">
+ <li>
+ <a class="function" href="#Keep.__init__">Keep</a>
+ </li>
+ </ul>
+
+ </li>
+ <li>
+ <a class="function" href="#diff">diff</a>
+ </li>
+ <li>
+ <a class="class" href="#ChangeDistiller">ChangeDistiller</a>
+ <ul class="memberlist">
+ <li>
+ <a class="function" href="#ChangeDistiller.__init__">ChangeDistiller</a>
+ </li>
+ <li>
+ <a class="function" href="#ChangeDistiller.diff">diff</a>
+ </li>
+ </ul>
+
+ </li>
+ </ul>
+
+
+ <footer>Copyright (c) 2023 Toby Mao</footer>
+
+ <a class="attribution" title="pdoc: Python API documentation generator" href="https://pdoc.dev" target="_blank">
+ built with <span class="visually-hidden">pdoc</span><img
+ alt="pdoc logo"
+ src="data:image/svg+xml,%3Csvg%20xmlns%3D%22http%3A//www.w3.org/2000/svg%22%20role%3D%22img%22%20aria-label%3D%22pdoc%20logo%22%20width%3D%22300%22%20height%3D%22150%22%20viewBox%3D%22-1%200%2060%2030%22%3E%3Ctitle%3Epdoc%3C/title%3E%3Cpath%20d%3D%22M29.621%2021.293c-.011-.273-.214-.475-.511-.481a.5.5%200%200%200-.489.503l-.044%201.393c-.097.551-.695%201.215-1.566%201.704-.577.428-1.306.486-2.193.182-1.426-.617-2.467-1.654-3.304-2.487l-.173-.172a3.43%203.43%200%200%200-.365-.306.49.49%200%200%200-.286-.196c-1.718-1.06-4.931-1.47-7.353.191l-.219.15c-1.707%201.187-3.413%202.131-4.328%201.03-.02-.027-.49-.685-.141-1.763.233-.721.546-2.408.772-4.076.042-.09.067-.187.046-.288.166-1.347.277-2.625.241-3.351%201.378-1.008%202.271-2.586%202.271-4.362%200-.976-.272-1.935-.788-2.774-.057-.094-.122-.18-.184-.268.033-.167.052-.339.052-.516%200-1.477-1.202-2.679-2.679-2.679-.791%200-1.496.352-1.987.9a6.3%206.3%200%200%200-1.001.029c-.492-.564-1.207-.929-2.012-.929-1.477%200-2.679%201.202-2.679%202.679A2.65%202.65%200%200%200%20.97%206.554c-.383.747-.595%201.572-.595%202.41%200%202.311%201.507%204.29%203.635%205.107-.037.699-.147%202.27-.423%203.294l-.137.461c-.622%202.042-2.515%208.257%201.727%2010.643%201.614.908%203.06%201.248%204.317%201.248%202.665%200%204.492-1.524%205.322-2.401%201.476-1.559%202.886-1.854%206.491.82%201.877%201.393%203.514%201.753%204.861%201.068%202.223-1.713%202.811-3.867%203.399-6.374.077-.846.056-1.469.054-1.537zm-4.835%204.313c-.054.305-.156.586-.242.629-.034-.007-.131-.022-.307-.157-.145-.111-.314-.478-.456-.908.221.121.432.25.675.355.115.039.219.051.33.081zm-2.251-1.238c-.05.33-.158.648-.252.694-.022.001-.125-.018-.307-.157-.217-.166-.488-.906-.639-1.573.358.344.754.693%201.198%201.036zm-3.887-2.337c-.006-.116-.018-.231-.041-.342.635.145%201.189.368%201.599.625.097.231.166.481.174.642-.03.049-.055.101-.067.158-.046.013-.128.026-.298.004-.278-.037-.901-.57-1.367-1.087zm-1.127-.497c.116.306.176.625.12.71-.019.014-.117.045-.345.016-.206-.027-.604-.332-.986-.695.41-.051.816-.056%201.211-.031zm-4.535%201.535c.209.22.379.47.358.598-.006.041-.088.138-.351.234-.144.055-.539-.063-.979-.259a11.66%2011.66%200%200%200%20.972-.573zm.983-.664c.359-.237.738-.418%201.126-.554.25.237.479.548.457.694-.006.042-.087.138-.351.235-.174.064-.694-.105-1.232-.375zm-3.381%201.794c-.022.145-.061.29-.149.401-.133.166-.358.248-.69.251h-.002c-.133%200-.306-.26-.45-.621.417.091.854.07%201.291-.031zm-2.066-8.077a4.78%204.78%200%200%201-.775-.584c.172-.115.505-.254.88-.378l-.105.962zm-.331%202.302a10.32%2010.32%200%200%201-.828-.502c.202-.143.576-.328.984-.49l-.156.992zm-.45%202.157l-.701-.403c.214-.115.536-.249.891-.376a11.57%2011.57%200%200%201-.19.779zm-.181%201.716c.064.398.194.702.298.893-.194-.051-.435-.162-.736-.398.061-.119.224-.3.438-.495zM8.87%204.141c0%20.152-.123.276-.276.276s-.275-.124-.275-.276.123-.276.276-.276.275.124.275.276zm-.735-.389a1.15%201.15%200%200%200-.314.783%201.16%201.16%200%200%200%201.162%201.162c.457%200%20.842-.27%201.032-.653.026.117.042.238.042.362a1.68%201.68%200%200%201-1.679%201.679%201.68%201.68%200%200%201-1.679-1.679c0-.843.626-1.535%201.436-1.654zM5.059%205.406A1.68%201.68%200%200%201%203.38%207.085a1.68%201.68%200%200%201-1.679-1.679c0-.037.009-.072.011-.109.21.3.541.508.935.508a1.16%201.16%200%200%200%201.162-1.162%201.14%201.14%200%200%200-.474-.912c.015%200%20.03-.005.045-.005.926.001%201.679.754%201.679%201.68zM3.198%204.141c0%20.152-.123.276-.276.276s-.275-.124-.275-.276.123-.276.276-.276.275.124.275.276zM1.375%208.964c0-.52.103-1.035.288-1.52.466.394%201.06.64%201.717.64%201.144%200%202.116-.725%202.499-1.738.383%201.012%201.355%201.738%202.499%201.738.867%200%201.631-.421%202.121-1.062.307.605.478%201.267.478%201.942%200%202.486-2.153%204.51-4.801%204.51s-4.801-2.023-4.801-4.51zm24.342%2019.349c-.985.498-2.267.168-3.813-.979-3.073-2.281-5.453-3.199-7.813-.705-1.315%201.391-4.163%203.365-8.423.97-3.174-1.786-2.239-6.266-1.261-9.479l.146-.492c.276-1.02.395-2.457.444-3.268a6.11%206.11%200%200%200%201.18.115%206.01%206.01%200%200%200%202.536-.562l-.006.175c-.802.215-1.848.612-2.021%201.25-.079.295.021.601.274.837.219.203.415.364.598.501-.667.304-1.243.698-1.311%201.179-.02.144-.022.507.393.787.213.144.395.26.564.365-1.285.521-1.361.96-1.381%201.126-.018.142-.011.496.427.746l.854.489c-.473.389-.971.914-.999%201.429-.018.278.095.532.316.713.675.556%201.231.721%201.653.721.059%200%20.104-.014.158-.02.207.707.641%201.64%201.513%201.64h.013c.8-.008%201.236-.345%201.462-.626.173-.216.268-.457.325-.692.424.195.93.374%201.372.374.151%200%20.294-.021.423-.068.732-.27.944-.704.993-1.021.009-.061.003-.119.002-.179.266.086.538.147.789.147.15%200%20.294-.021.423-.069.542-.2.797-.489.914-.754.237.147.478.258.704.288.106.014.205.021.296.021.356%200%20.595-.101.767-.229.438.435%201.094.992%201.656%201.067.106.014.205.021.296.021a1.56%201.56%200%200%200%20.323-.035c.17.575.453%201.289.866%201.605.358.273.665.362.914.362a.99.99%200%200%200%20.421-.093%201.03%201.03%200%200%200%20.245-.164c.168.428.39.846.68%201.068.358.273.665.362.913.362a.99.99%200%200%200%20.421-.093c.317-.148.512-.448.639-.762.251.157.495.257.726.257.127%200%20.25-.024.37-.071.427-.17.706-.617.841-1.314.022-.015.047-.022.068-.038.067-.051.133-.104.196-.159-.443%201.486-1.107%202.761-2.086%203.257zM8.66%209.925a.5.5%200%201%200-1%200c0%20.653-.818%201.205-1.787%201.205s-1.787-.552-1.787-1.205a.5.5%200%201%200-1%200c0%201.216%201.25%202.205%202.787%202.205s2.787-.989%202.787-2.205zm4.4%2015.965l-.208.097c-2.661%201.258-4.708%201.436-6.086.527-1.542-1.017-1.88-3.19-1.844-4.198a.4.4%200%200%200-.385-.414c-.242-.029-.406.164-.414.385-.046%201.249.367%203.686%202.202%204.896.708.467%201.547.7%202.51.7%201.248%200%202.706-.392%204.362-1.174l.185-.086a.4.4%200%200%200%20.205-.527c-.089-.204-.326-.291-.527-.206zM9.547%202.292c.093.077.205.114.317.114a.5.5%200%200%200%20.318-.886L8.817.397a.5.5%200%200%200-.703.068.5.5%200%200%200%20.069.703l1.364%201.124zm-7.661-.065c.086%200%20.173-.022.253-.068l1.523-.893a.5.5%200%200%200-.506-.863l-1.523.892a.5.5%200%200%200-.179.685c.094.158.261.247.432.247z%22%20transform%3D%22matrix%28-1%200%200%201%2058%200%29%22%20fill%3D%22%233bb300%22/%3E%3Cpath%20d%3D%22M.3%2021.86V10.18q0-.46.02-.68.04-.22.18-.5.28-.54%201.34-.54%201.06%200%201.42.28.38.26.44.78.76-1.04%202.38-1.04%201.64%200%203.1%201.54%201.46%201.54%201.46%203.58%200%202.04-1.46%203.58-1.44%201.54-3.08%201.54-1.64%200-2.38-.92v4.04q0%20.46-.04.68-.02.22-.18.5-.14.3-.5.42-.36.12-.98.12-.62%200-1-.12-.36-.12-.52-.4-.14-.28-.18-.5-.02-.22-.02-.68zm3.96-9.42q-.46.54-.46%201.18%200%20.64.46%201.18.48.52%201.2.52.74%200%201.24-.52.52-.52.52-1.18%200-.66-.48-1.18-.48-.54-1.26-.54-.76%200-1.22.54zm14.741-8.36q.16-.3.54-.42.38-.12%201-.12.64%200%201.02.12.38.12.52.42.16.3.18.54.04.22.04.68v11.94q0%20.46-.04.7-.02.22-.18.5-.3.54-1.7.54-1.38%200-1.54-.98-.84.96-2.34.96-1.8%200-3.28-1.56-1.48-1.58-1.48-3.66%200-2.1%201.48-3.68%201.5-1.58%203.28-1.58%201.48%200%202.3%201v-4.2q0-.46.02-.68.04-.24.18-.52zm-3.24%2010.86q.52.54%201.26.54.74%200%201.22-.54.5-.54.5-1.18%200-.66-.48-1.22-.46-.56-1.26-.56-.8%200-1.28.56-.48.54-.48%201.2%200%20.66.52%201.2zm7.833-1.2q0-2.4%201.68-3.96%201.68-1.56%203.84-1.56%202.16%200%203.82%201.56%201.66%201.54%201.66%203.94%200%201.66-.86%202.96-.86%201.28-2.1%201.9-1.22.6-2.54.6-1.32%200-2.56-.64-1.24-.66-2.1-1.92-.84-1.28-.84-2.88zm4.18%201.44q.64.48%201.3.48.66%200%201.32-.5.66-.5.66-1.48%200-.98-.62-1.46-.62-.48-1.34-.48-.72%200-1.34.5-.62.5-.62%201.48%200%20.96.64%201.46zm11.412-1.44q0%20.84.56%201.32.56.46%201.18.46.64%200%201.18-.36.56-.38.9-.38.6%200%201.46%201.06.46.58.46%201.04%200%20.76-1.1%201.42-1.14.8-2.8.8-1.86%200-3.58-1.34-.82-.64-1.34-1.7-.52-1.08-.52-2.36%200-1.3.52-2.34.52-1.06%201.34-1.7%201.66-1.32%203.54-1.32.76%200%201.48.22.72.2%201.06.4l.32.2q.36.24.56.38.52.4.52.92%200%20.5-.42%201.14-.72%201.1-1.38%201.1-.38%200-1.08-.44-.36-.34-1.04-.34-.66%200-1.24.48-.58.48-.58%201.34z%22%20fill%3D%22green%22/%3E%3C/svg%3E"/>
+ </a>
+</div>
+ </nav>
+ <main class="pdoc">
+ <section class="module-info">
+ <a class="pdoc-button git-button" href="https://github.com/tobymao/sqlglot/tree/main/sqlglot/diff.py">Edit on GitHub</a>
+
+ <div class="docstring"><h1 id="semantic-diff-for-sql">Semantic Diff for SQL</h1>
+
+<p><em>by <a href="https://github.com/izeigerman">Iaroslav Zeigerman</a></em></p>
+
+<h2 id="motivation">Motivation</h2>
+
+<p>Software is constantly changing and evolving, and identifying what has changed and reviewing those changes is an integral part of the development process. SQL code is no exception to this.</p>
+
+<p>Text-based diff tools such as <code>git diff</code>, when applied to a code base, have certain limitations. First, they can only detect insertions and deletions, not movements or updates of individual pieces of code. Second, such tools can only detect changes between lines of text, which is too coarse for something as granular and detailed as source code. Additionally, the outcome of such a diff is dependent on the underlying code formatting, and yields different results if the formatting should change.</p>
+
+<p>Consider the following diff generated by Git:</p>
+
+<p><img src="sql_diff_images/git_diff_output.png" alt="Git diff output" /></p>
+
+<p>Semantically the query hasn’t changed. The two arguments <code>b</code> and <code>c</code> have been swapped (moved), posing no impact on the output of the query. Yet Git replaced the whole affected expression alongside a bulk of unrelated elements.</p>
+
+<p>The alternative to text-based diffing is to compare Abstract Syntax Trees (AST) instead. The main advantage of ASTs are that they are a direct product of code parsing, which represents the underlying code structure at any desired level of granularity. Comparing ASTs may yield extremely precise diffs; changes such as code movements and updates can also be detected. Even more importantly, this approach facilitates additional use cases beyond eyeballing two versions of source code side by side.</p>
+
+<p>The use cases I had in mind for SQL when I decided to embark on this journey of semantic diffing were the following:</p>
+
+<ul>
+<li><strong>Query similarity score.</strong> Identifying which parts the two queries have in common to automatically suggest opportunities for consolidation, creation of intermediate/staging tables, and so on.</li>
+<li><strong>Differentiating between cosmetic / structural changes and functional ones.</strong> For example when a nested query is refactored into a common table expression (CTE), this kind of change doesn’t have any functional impact on either a query or its outcome.</li>
+<li><strong>Automatic suggestions about the need to retroactively backfill data.</strong> This is especially important for pipelines that populate very large tables for which restatement is a runtime-intensive procedure. The ability to discern between simple code movements and actual modifications can help assess the impact of a change and make suggestions accordingly.</li>
+</ul>
+
+<p>The implementation discussed in this post is now a part of the <a href="https://github.com/tobymao/sqlglot/">SQLGlot</a> library. You can find a complete source code in the <a href="https://github.com/tobymao/sqlglot/blob/main/sqlglot/diff.py">diff.py</a> module. The choice of SQLglot was an obvious one due to its simple but powerful API, lack of external dependencies and, more importantly, extensive list of supported SQL dialects.</p>
+
+<h2 id="the-search-for-a-solution">The Search for a Solution</h2>
+
+<p>When it comes to any diffing tool (not just a semantic one), the primary challenge is to match as many elements of compared entities as possible. Once such a set of matching elements is available, deriving a sequence of changes becomes an easy task.</p>
+
+<p>If our elements have unique identifiers associated with them (for example, an element’s ID in DOM), the matching problem is trivial. However, the SQL syntax trees that we are comparing have neither unique keys nor object identifiers that can be used for the purposes of matching. So, how do we suppose to find pairs of nodes that are related?</p>
+
+<p>To better illustrate the problem, consider comparing the following SQL expressions: <code>SELECT a + b + c, d, e</code> and <code>SELECT a - b + c, e, f</code>. Matching individual nodes from respective syntax trees can be visualized as follows:</p>
+
+<p><img src="sql_diff_images/figure_1.png" alt="Figure 1: Example of node matching for two SQL expression trees" />
+<em>Figure 1: Example of node matching for two SQL expression trees.</em></p>
+
+<p>By looking at the figure of node matching for two SQL expression trees above, we conclude that the following changes should be captured by our solution:</p>
+
+<ul>
+<li>Inserted nodes: <code>Sub</code> and <code>f</code>. These are the nodes from the target AST which do not have a matching node in the source AST.</li>
+<li>Removed nodes: <code>Add</code> and <code>d</code>. These are the nodes from the source AST which do not have a counterpart in the target AST.</li>
+<li>Remaining nodes must be identified as unchanged.</li>
+</ul>
+
+<p>It should be clear at this point that if we manage to match nodes in the source tree with their counterparts in the target tree, then computing the diff becomes a trivial matter.</p>
+
+<h3 id="naive-brute-force">Naïve Brute-Force</h3>
+
+<p>The naïve solution would be to try all different permutations of node pair combinations, and see which set of pairs performs the best based on some type of heuristics. The runtime cost of such a solution quickly reaches the escape velocity; if both trees had only 10 nodes each, the number of such sets would approximately be 10! ^ 2 = 3.6M ^ 2 ~= 13 * 10^12. This is a very bad case of factorial complexity (to be precise, it’s actually much worse - O(n! ^ 2) - but I couldn’t come up with a name for it), so there is little need to explore this approach any further.</p>
+
+<h3 id="myers-algorithm">Myers Algorithm</h3>
+
+<p>After the naïve approach was proven to be infeasible, the next question I asked myself was “how does git diff work?”. This question led me to discover the Myers diff algorithm [1]. This algorithm has been designed to compare sequences of strings. At its core, it’s looking for the shortest path on a graph of possible edits that transform the first sequence into the second one, while heavily rewarding those paths that lead to longest subsequences of unchanged elements. There’s a lot of material out there describing this algorithm in greater detail. I found James Coglan’s series of <a href="https://blog.jcoglan.com/2017/02/12/the-myers-diff-algorithm-part-1/">blog posts</a> to be the most comprehensive.</p>
+
+<p>Therefore, I had this “brilliant” (actually not) idea to transform trees into sequences by traversing them in topological order, and then applying the Myers algorithm on resulting sequences while using a custom heuristics when checking the equality of two nodes. Unsurprisingly, comparing sequences of strings is quite different from comparing hierarchical tree structures, and by flattening trees into sequences, we lose a lot of relevant context. This resulted in a terrible performance of this algorithm on ASTs. It often matched completely unrelated nodes, even when the two trees were mostly the same, and produced extremely inaccurate lists of changes overall. After playing around with it a little and tweaking my equality heuristics to improve accuracy, I ultimately scrapped the whole implementation and went back to the drawing board.</p>
+
+<h2 id="change-distiller">Change Distiller</h2>
+
+<p>The algorithm I settled on at the end was Change Distiller, created by Fluri et al. [2], which in turn is an improvement over the core idea described by Chawathe et al. [3].</p>
+
+<p>The algorithm consists of two high-level steps:</p>
+
+<ol>
+<li><strong>Finding appropriate matchings between pairs of nodes that are part of compared ASTs.</strong> Identifying what is meant by “appropriate” matching is also a part of this step.</li>
+<li><strong>Generating the so-called “edit script” from the matching set built in the 1st step.</strong> The edit script is a sequence of edit operations (for example, insert, remove, update, etc.) on individual tree nodes, such that when applied as transformations on the source AST, it eventually becomes the target AST. In general, the shorter the sequence, the better. The length of the edit script can be used to compare the performance of different algorithms, though this is not the only metric that matters.</li>
+</ol>
+
+<p>The rest of this section is dedicated to the Python implementation of the steps above using the AST implementation provided by the SQLGlot library.</p>
+
+<h3 id="building-the-matching-set">Building the Matching Set</h3>
+
+<h4 id="matching-leaves">Matching Leaves</h4>
+
+<p>We begin composing the matching set by matching the leaf nodes. Leaf nodes are the nodes that do not have any children nodes (such as literals, identifiers, etc.). In order to match them, we gather all the leaf nodes from the source tree and generate a cartesian product with all the leaves from the target tree, while comparing pairs created this way and assigning them a similarity score. During this stage, we also exclude pairs that don’t pass basic matching criteria. Then, we pick pairs that scored the highest while making sure that each node is matched no more than once.</p>
+
+<p>Using the example provided at the beginning of the post, the process of building an initial set of candidate matchings can be seen on Figure 2.</p>
+
+<p><img src="sql_diff_images/figure_2.gif" alt="Figure 2: Building a set of candidate matchings between leaf nodes. The third item in each triplet represents a similarity score between two nodes." />
+<em>Figure 2: Building a set of candidate matchings between leaf nodes. The third item in each triplet represents a similarity score between two nodes.</em></p>
+
+<p>First, let’s analyze the similarity score. Then, we’ll discuss matching criteria.</p>
+
+<p>The similarity score proposed by Fluri et al. [2] is a <a href="https://en.wikipedia.org/wiki/S%C3%B8rensen%E2%80%93Dice_coefficient">dice coefficient </a>applied to <a href="https://en.wikipedia.org/wiki/Bigram">bigrams</a> of respective node values. A bigram is a sequence of two adjacent elements from a string computed in a sliding window fashion:</p>
+
+<div class="pdoc-code codehilite">
+<pre><span></span><code><span class="k">def</span> <span class="nf">bigram</span><span class="p">(</span><span class="n">string</span><span class="p">):</span>
+ <span class="n">count</span> <span class="o">=</span> <span class="nb">max</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">string</span><span class="p">)</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span>
+ <span class="k">return</span> <span class="p">[</span><span class="n">string</span><span class="p">[</span><span class="n">i</span> <span class="p">:</span> <span class="n">i</span> <span class="o">+</span> <span class="mi">2</span><span class="p">]</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">count</span><span class="p">)]</span>
+</code></pre>
+</div>
+
+<p>For reasons that will become clear shortly, we actually need to compute bigram histograms rather than just sequences:</p>
+
+<div class="pdoc-code codehilite">
+<pre><span></span><code><span class="kn">from</span> <span class="nn">collections</span> <span class="kn">import</span> <span class="n">defaultdict</span>
+
+<span class="k">def</span> <span class="nf">bigram_histo</span><span class="p">(</span><span class="n">string</span><span class="p">):</span>
+ <span class="n">count</span> <span class="o">=</span> <span class="nb">max</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">string</span><span class="p">)</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span>
+ <span class="n">bigram_histo</span> <span class="o">=</span> <span class="n">defaultdict</span><span class="p">(</span><span class="nb">int</span><span class="p">)</span>
+ <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">count</span><span class="p">):</span>
+ <span class="n">bigram_histo</span><span class="p">[</span><span class="n">string</span><span class="p">[</span><span class="n">i</span> <span class="p">:</span> <span class="n">i</span> <span class="o">+</span> <span class="mi">2</span><span class="p">]]</span> <span class="o">+=</span> <span class="mi">1</span>
+ <span class="k">return</span> <span class="n">bigram_histo</span>
+</code></pre>
+</div>
+
+<p>The dice coefficient formula looks like following:</p>
+
+<p><img src="sql_diff_images/dice_coef.png" alt="Dice Coefficient" /></p>
+
+<p>Where X is a bigram of the source node and Y is a bigram of the second one. What this essentially does is count the number of bigram elements the two nodes have in common, multiply it by 2, and then divide by the total number of elements in both bigrams. This is where bigram histograms come in handy:</p>
+
+<div class="pdoc-code codehilite">
+<pre><span></span><code><span class="k">def</span> <span class="nf">dice_coefficient</span><span class="p">(</span><span class="n">source</span><span class="p">,</span> <span class="n">target</span><span class="p">):</span>
+ <span class="n">source_histo</span> <span class="o">=</span> <span class="n">bigram_histo</span><span class="p">(</span><span class="n">source</span><span class="o">.</span><span class="n">sql</span><span class="p">())</span>
+ <span class="n">target_histo</span> <span class="o">=</span> <span class="n">bigram_histo</span><span class="p">(</span><span class="n">target</span><span class="o">.</span><span class="n">sql</span><span class="p">())</span>
+
+ <span class="n">total_grams</span> <span class="o">=</span> <span class="p">(</span>
+ <span class="nb">sum</span><span class="p">(</span><span class="n">source_histo</span><span class="o">.</span><span class="n">values</span><span class="p">())</span> <span class="o">+</span> <span class="nb">sum</span><span class="p">(</span><span class="n">target_histo</span><span class="o">.</span><span class="n">values</span><span class="p">())</span>
+ <span class="p">)</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">total_grams</span><span class="p">:</span>
+ <span class="k">return</span> <span class="mf">1.0</span> <span class="k">if</span> <span class="n">source</span> <span class="o">==</span> <span class="n">target</span> <span class="k">else</span> <span class="mf">0.0</span>
+
+ <span class="n">overlap_len</span> <span class="o">=</span> <span class="mi">0</span>
+ <span class="n">overlapping_grams</span> <span class="o">=</span> <span class="nb">set</span><span class="p">(</span><span class="n">source_histo</span><span class="p">)</span> <span class="o">&amp;</span> <span class="nb">set</span><span class="p">(</span><span class="n">target_histo</span><span class="p">)</span>
+ <span class="k">for</span> <span class="n">g</span> <span class="ow">in</span> <span class="n">overlapping_grams</span><span class="p">:</span>
+ <span class="n">overlap_len</span> <span class="o">+=</span> <span class="nb">min</span><span class="p">(</span><span class="n">source_histo</span><span class="p">[</span><span class="n">g</span><span class="p">],</span> <span class="n">target_histo</span><span class="p">[</span><span class="n">g</span><span class="p">])</span>
+
+ <span class="k">return</span> <span class="mi">2</span> <span class="o">*</span> <span class="n">overlap_len</span> <span class="o">/</span> <span class="n">total_grams</span>
+</code></pre>
+</div>
+
+<p>To compute a bigram given a tree node, we first transform the node into its canonical SQL representation,so that the <code>Literal(123)</code> node becomes just “123” and the <code>Identifier(“a”)</code> node becomes just “a”. We also handle a scenario when strings are too short to derive bigrams. In this case, we fallback to checking the two nodes for equality.</p>
+
+<p>Now when we know how to compute the similarity score, we can take care of the matching criteria for leaf nodes. In the original paper [2], the matching criteria is formalized as follows:</p>
+
+<p><img src="sql_diff_images/matching_criteria_1.png" alt="Matching criteria for leaf nodes" /></p>
+
+<p>The two nodes are matched if two conditions are met:</p>
+
+<ol>
+<li>The node labels match (in our case labels are just node types).</li>
+<li>The similarity score for node values is greater than or equal to some threshold “f”. The authors of the paper recommend setting the value of “f” to 0.6.</li>
+</ol>
+
+<p>With building blocks in place, we can now build a matching set for leaf nodes. First, we generate a list of candidates for matching:</p>
+
+<div class="pdoc-code codehilite">
+<pre><span></span><code><span class="kn">from</span> <span class="nn">heapq</span> <span class="kn">import</span> <span class="n">heappush</span><span class="p">,</span> <span class="n">heappop</span>
+
+<span class="n">candidate_matchings</span> <span class="o">=</span> <span class="p">[]</span>
+<span class="n">source_leaves</span> <span class="o">=</span> <span class="n">_get_leaves</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_source</span><span class="p">)</span>
+<span class="n">target_leaves</span> <span class="o">=</span> <span class="n">_get_leaves</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_target</span><span class="p">)</span>
+<span class="k">for</span> <span class="n">source_leaf</span> <span class="ow">in</span> <span class="n">source_leaves</span><span class="p">:</span>
+ <span class="k">for</span> <span class="n">target_leaf</span> <span class="ow">in</span> <span class="n">target_leaves</span><span class="p">:</span>
+ <span class="k">if</span> <span class="n">_is_same_type</span><span class="p">(</span><span class="n">source_leaf</span><span class="p">,</span> <span class="n">target_leaf</span><span class="p">):</span>
+ <span class="n">similarity_score</span> <span class="o">=</span> <span class="n">dice_coefficient</span><span class="p">(</span>
+ <span class="n">source_leaf</span><span class="p">,</span> <span class="n">target_leaf</span>
+ <span class="p">)</span>
+ <span class="k">if</span> <span class="n">similarity_score</span> <span class="o">&gt;=</span> <span class="mf">0.6</span><span class="p">:</span>
+ <span class="n">heappush</span><span class="p">(</span>
+ <span class="n">candidate_matchings</span><span class="p">,</span>
+ <span class="p">(</span>
+ <span class="o">-</span><span class="n">similarity_score</span><span class="p">,</span>
+ <span class="nb">len</span><span class="p">(</span><span class="n">candidate_matchings</span><span class="p">),</span>
+ <span class="n">source_leaf</span><span class="p">,</span>
+ <span class="n">target_leaf</span><span class="p">,</span>
+ <span class="p">),</span>
+ <span class="p">)</span>
+</code></pre>
+</div>
+
+<p>In the implementation above, we push each matching pair onto the heap to automatically maintain the correct order based on the assigned similarity score.</p>
+
+<p>Finally, we build the initial matching set by picking leaf pairs with the highest score:</p>
+
+<div class="pdoc-code codehilite">
+<pre><span></span><code><span class="n">matching_set</span> <span class="o">=</span> <span class="nb">set</span><span class="p">()</span>
+<span class="k">while</span> <span class="n">candidate_matchings</span><span class="p">:</span>
+ <span class="n">_</span><span class="p">,</span> <span class="n">_</span><span class="p">,</span> <span class="n">source_leaf</span><span class="p">,</span> <span class="n">target_leaf</span> <span class="o">=</span> <span class="n">heappop</span><span class="p">(</span><span class="n">candidate_matchings</span><span class="p">)</span>
+ <span class="k">if</span> <span class="p">(</span>
+ <span class="n">source_leaf</span> <span class="ow">in</span> <span class="n">unmatched_source_nodes</span>
+ <span class="ow">and</span> <span class="n">target_leaf</span> <span class="ow">in</span> <span class="n">unmatched_target_nodes</span>
+ <span class="p">):</span>
+ <span class="n">matching_set</span><span class="o">.</span><span class="n">add</span><span class="p">((</span><span class="n">source_leaf</span><span class="p">,</span> <span class="n">target_leaf</span><span class="p">))</span>
+ <span class="n">unmatched_source_nodes</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="n">source_leaf</span><span class="p">)</span>
+ <span class="n">unmatched_target_nodes</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="n">target_leaf</span><span class="p">)</span>
+</code></pre>
+</div>
+
+<p>To finalize the matching set, we should now proceed with matching inner nodes.</p>
+
+<h4 id="matching-inner-nodes">Matching Inner Nodes</h4>
+
+<p>Matching inner nodes is quite similar to matching leaf nodes, with the following two distinctions:</p>
+
+<ul>
+<li>Rather than ranking a set of possible candidates, we pick the first node pair that passes the matching criteria.</li>
+<li>The matching criteria itself has been extended to account for the number of leaf nodes the pair of inner nodes have in common.</li>
+</ul>
+
+<p><img src="sql_diff_images/figure_3.gif" alt="Figure 3: Matching inner nodes based on their type as well as how many of their leaf nodes have been previously matched." />
+<em>Figure 3: Matching inner nodes based on their type as well as how many of their leaf nodes have been previously matched.</em></p>
+
+<p>Let’s start with the matching criteria. The criteria is formalized as follows:</p>
+
+<p><img src="sql_diff_images/matching_criteria_2.png" alt="Matching criteria for inner nodes" /></p>
+
+<p>Alongside already familiar similarity score and node type criteria, there is a new one in the middle: the ratio of leaf nodes that the two nodes have in common must exceed some threshold “t”. The recommended value for “t” is also 0.6. Counting the number of common leaf nodes is pretty straightforward, since we already have the complete matching set for leaves. All we need to do is count how many matching pairs do leaf nodes from the two compared inner nodes form.</p>
+
+<p>There are two additional heuristics associated with this matching criteria:</p>
+
+<ul>
+<li>Inner node similarity weighting: if the similarity score between the node values doesn’t pass the threshold “f” but the ratio of common leaf nodes (“t”) is greater than or equal to 0.8, then the matching is considered successful.</li>
+<li>The threshold “t” is reduced to 0.4 for inner nodes with the number of leaf nodes equal to 4 or less, in order to decrease the false negative rate for small subtrees.</li>
+</ul>
+
+<p>We now only have to iterate through the remaining unmatched nodes and form matching pairs based on the outlined criteria:</p>
+
+<div class="pdoc-code codehilite">
+<pre><span></span><code><span class="n">leaves_matching_set</span> <span class="o">=</span> <span class="n">matching_set</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
+
+<span class="k">for</span> <span class="n">source_node</span> <span class="ow">in</span> <span class="n">unmatched_source_nodes</span><span class="o">.</span><span class="n">copy</span><span class="p">():</span>
+ <span class="k">for</span> <span class="n">target_node</span> <span class="ow">in</span> <span class="n">unmatched_target_nodes</span><span class="p">:</span>
+ <span class="k">if</span> <span class="n">_is_same_type</span><span class="p">(</span><span class="n">source_node</span><span class="p">,</span> <span class="n">target_node</span><span class="p">):</span>
+ <span class="n">source_leaves</span> <span class="o">=</span> <span class="nb">set</span><span class="p">(</span><span class="n">_get_leaves</span><span class="p">(</span><span class="n">source_node</span><span class="p">))</span>
+ <span class="n">target_leaves</span> <span class="o">=</span> <span class="nb">set</span><span class="p">(</span><span class="n">_get_leaves</span><span class="p">(</span><span class="n">target_node</span><span class="p">))</span>
+
+ <span class="n">max_leaves_num</span> <span class="o">=</span> <span class="nb">max</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">source_leaves</span><span class="p">),</span> <span class="nb">len</span><span class="p">(</span><span class="n">target_leaves</span><span class="p">))</span>
+ <span class="k">if</span> <span class="n">max_leaves_num</span><span class="p">:</span>
+ <span class="n">common_leaves_num</span> <span class="o">=</span> <span class="nb">sum</span><span class="p">(</span>
+ <span class="mi">1</span> <span class="k">if</span> <span class="n">s</span> <span class="ow">in</span> <span class="n">source_leaves</span> <span class="ow">and</span> <span class="n">t</span> <span class="ow">in</span> <span class="n">target_leaves</span> <span class="k">else</span> <span class="mi">0</span>
+ <span class="k">for</span> <span class="n">s</span><span class="p">,</span> <span class="n">t</span> <span class="ow">in</span> <span class="n">leaves_matching_set</span>
+ <span class="p">)</span>
+ <span class="n">leaf_similarity_score</span> <span class="o">=</span> <span class="n">common_leaves_num</span> <span class="o">/</span> <span class="n">max_leaves_num</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">leaf_similarity_score</span> <span class="o">=</span> <span class="mf">0.0</span>
+
+ <span class="n">adjusted_t</span> <span class="o">=</span> <span class="p">(</span>
+ <span class="mf">0.6</span>
+ <span class="k">if</span> <span class="nb">min</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">source_leaves</span><span class="p">),</span> <span class="nb">len</span><span class="p">(</span><span class="n">target_leaves</span><span class="p">))</span> <span class="o">&gt;</span> <span class="mi">4</span>
+ <span class="k">else</span> <span class="mf">0.4</span>
+ <span class="p">)</span>
+
+ <span class="k">if</span> <span class="n">leaf_similarity_score</span> <span class="o">&gt;=</span> <span class="mf">0.8</span> <span class="ow">or</span> <span class="p">(</span>
+ <span class="n">leaf_similarity_score</span> <span class="o">&gt;=</span> <span class="n">adjusted_t</span>
+ <span class="ow">and</span> <span class="n">dice_coefficient</span><span class="p">(</span><span class="n">source_node</span><span class="p">,</span> <span class="n">target_node</span><span class="p">)</span> <span class="o">&gt;=</span> <span class="mf">0.6</span>
+ <span class="p">):</span>
+ <span class="n">matching_set</span><span class="o">.</span><span class="n">add</span><span class="p">((</span><span class="n">source_node</span><span class="p">,</span> <span class="n">target_node</span><span class="p">))</span>
+ <span class="n">unmatched_source_nodes</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="n">source_node</span><span class="p">)</span>
+ <span class="n">unmatched_target_nodes</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="n">target_node</span><span class="p">)</span>
+ <span class="k">break</span>
+</code></pre>
+</div>
+
+<p>After the matching set is formed, we can proceed with generation of the edit script, which will be the algorithm’s output.</p>
+
+<h3 id="generating-the-edit-script">Generating the Edit Script</h3>
+
+<p>At this point, we should have the following 3 sets at our disposal:</p>
+
+<ul>
+<li>The set of matched node pairs.</li>
+<li>The set of remaining unmatched nodes from the source tree.</li>
+<li>The set of remaining unmatched nodes from the target tree.</li>
+</ul>
+
+<p>We can derive 3 kinds of edits from the matching set: either the node’s value was updated (<strong>Update</strong>), the node was moved to a different position within the tree (<strong>Move</strong>), or the node remained unchanged (<strong>Keep</strong>). Note that the <strong>Move</strong> case is not mutually exclusive with the other two. The node could have been updated or could have remained the same while at the same time its position within its parent node or the parent node itself could have changed. All unmatched nodes from the source tree are the ones that were removed (<strong>Remove</strong>), while unmatched nodes from the target tree are the ones that were inserted (<strong>Insert</strong>).</p>
+
+<p>The latter two cases are pretty straightforward to implement:</p>
+
+<div class="pdoc-code codehilite">
+<pre><span></span><code><span class="n">edit_script</span> <span class="o">=</span> <span class="p">[]</span>
+
+<span class="k">for</span> <span class="n">removed_node</span> <span class="ow">in</span> <span class="n">unmatched_source_nodes</span><span class="p">:</span>
+ <span class="n">edit_script</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">Remove</span><span class="p">(</span><span class="n">removed_node</span><span class="p">))</span>
+<span class="k">for</span> <span class="n">inserted_node</span> <span class="ow">in</span> <span class="n">unmatched_target_nodes</span><span class="p">:</span>
+ <span class="n">edit_script</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">Insert</span><span class="p">(</span><span class="n">inserted_node</span><span class="p">))</span>
+</code></pre>
+</div>
+
+<p>Traversing the matching set requires a little more thought:</p>
+
+<div class="pdoc-code codehilite">
+<pre><span></span><code><span class="k">for</span> <span class="n">source_node</span><span class="p">,</span> <span class="n">target_node</span> <span class="ow">in</span> <span class="n">matching_set</span><span class="p">:</span>
+ <span class="k">if</span> <span class="p">(</span>
+ <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">source_node</span><span class="p">,</span> <span class="n">LEAF_EXPRESSION_TYPES</span><span class="p">)</span>
+ <span class="ow">or</span> <span class="n">source_node</span> <span class="o">==</span> <span class="n">target_node</span>
+ <span class="p">):</span>
+ <span class="n">move_edits</span> <span class="o">=</span> <span class="n">generate_move_edits</span><span class="p">(</span>
+ <span class="n">source_node</span><span class="p">,</span> <span class="n">target_node</span><span class="p">,</span> <span class="n">matching_set</span>
+ <span class="p">)</span>
+ <span class="n">edit_script</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="n">move_edits</span><span class="p">)</span>
+ <span class="n">edit_script</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">Keep</span><span class="p">(</span><span class="n">source_node</span><span class="p">,</span> <span class="n">target_node</span><span class="p">))</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">edit_script</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">Update</span><span class="p">(</span><span class="n">source_node</span><span class="p">,</span> <span class="n">target_node</span><span class="p">))</span>
+</code></pre>
+</div>
+
+<p>If a matching pair represents a pair of leaf nodes, we check if they are the same to decide whether an update took place. For inner node pairs, we also need to compare the positions of their respective children to detect node movements. Chawathe et al. [3] suggest applying the <a href="https://en.wikipedia.org/wiki/Longest_common_subsequence_problem">longest common subsequence </a>(LCS) algorithm which, no surprise here, was described by Myers himself [1]. There is a small catch, however: instead of checking the equality of two children nodes, we need to check whether the two nodes form a pair that is a part of our matching set.</p>
+
+<p>Now with this knowledge, the implementation becomes straightforward:</p>
+
+<div class="pdoc-code codehilite">
+<pre><span></span><code><span class="k">def</span> <span class="nf">generate_move_edits</span><span class="p">(</span><span class="n">source</span><span class="p">,</span> <span class="n">target</span><span class="p">,</span> <span class="n">matching_set</span><span class="p">):</span>
+ <span class="n">source_children</span> <span class="o">=</span> <span class="n">_get_child_nodes</span><span class="p">(</span><span class="n">source</span><span class="p">)</span>
+ <span class="n">target_children</span> <span class="o">=</span> <span class="n">_get_child_nodes</span><span class="p">(</span><span class="n">target</span><span class="p">)</span>
+
+ <span class="n">lcs</span> <span class="o">=</span> <span class="nb">set</span><span class="p">(</span>
+ <span class="n">_longest_common_subsequence</span><span class="p">(</span>
+ <span class="n">source_children</span><span class="p">,</span>
+ <span class="n">target_children</span><span class="p">,</span>
+ <span class="k">lambda</span> <span class="n">l</span><span class="p">,</span> <span class="n">r</span><span class="p">:</span> <span class="p">(</span><span class="n">l</span><span class="p">,</span> <span class="n">r</span><span class="p">)</span> <span class="ow">in</span> <span class="n">matching_set</span>
+ <span class="p">)</span>
+ <span class="p">)</span>
+
+ <span class="n">move_edits</span> <span class="o">=</span> <span class="p">[]</span>
+ <span class="k">for</span> <span class="n">node</span> <span class="ow">in</span> <span class="n">source_children</span><span class="p">:</span>
+ <span class="k">if</span> <span class="n">node</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">lcs</span> <span class="ow">and</span> <span class="n">node</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">unmatched_source_nodes</span><span class="p">:</span>
+ <span class="n">move_edits</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">Move</span><span class="p">(</span><span class="n">node</span><span class="p">))</span>
+
+ <span class="k">return</span> <span class="n">move_edits</span>
+</code></pre>
+</div>
+
+<p>I left out the implementation of the LCS algorithm itself here, but there are plenty of implementation choices out there that can be easily looked up.</p>
+
+<h3 id="output">Output</h3>
+
+<p>The implemented algorithm produces the output that resembles the following:</p>
+
+<div class="pdoc-code codehilite">
+<pre><span></span><code><span class="o">&gt;&gt;&gt;</span> <span class="kn">from</span> <span class="nn">sqlglot</span> <span class="kn">import</span> <span class="n">parse_one</span><span class="p">,</span> <span class="n">diff</span>
+<span class="o">&gt;&gt;&gt;</span> <span class="n">diff</span><span class="p">(</span><span class="n">parse_one</span><span class="p">(</span><span class="s2">&quot;SELECT a + b + c, d, e&quot;</span><span class="p">),</span> <span class="n">parse_one</span><span class="p">(</span><span class="s2">&quot;SELECT a - b + c, e, f&quot;</span><span class="p">))</span>
+
+<span class="n">Remove</span><span class="p">(</span><span class="n">Add</span><span class="p">)</span>
+<span class="n">Remove</span><span class="p">(</span><span class="n">Column</span><span class="p">(</span><span class="n">d</span><span class="p">))</span>
+<span class="n">Remove</span><span class="p">(</span><span class="n">Identifier</span><span class="p">(</span><span class="n">d</span><span class="p">))</span>
+<span class="n">Insert</span><span class="p">(</span><span class="n">Sub</span><span class="p">)</span>
+<span class="n">Insert</span><span class="p">(</span><span class="n">Column</span><span class="p">(</span><span class="n">f</span><span class="p">))</span>
+<span class="n">Insert</span><span class="p">(</span><span class="n">Identifier</span><span class="p">(</span><span class="n">f</span><span class="p">))</span>
+<span class="n">Keep</span><span class="p">(</span><span class="n">Select</span><span class="p">,</span> <span class="n">Select</span><span class="p">)</span>
+<span class="n">Keep</span><span class="p">(</span><span class="n">Add</span><span class="p">,</span> <span class="n">Add</span><span class="p">)</span>
+<span class="n">Keep</span><span class="p">(</span><span class="n">Column</span><span class="p">(</span><span class="n">a</span><span class="p">),</span> <span class="n">Column</span><span class="p">(</span><span class="n">a</span><span class="p">))</span>
+<span class="n">Keep</span><span class="p">(</span><span class="n">Identifier</span><span class="p">(</span><span class="n">a</span><span class="p">),</span> <span class="n">Identifier</span><span class="p">(</span><span class="n">a</span><span class="p">))</span>
+<span class="n">Keep</span><span class="p">(</span><span class="n">Column</span><span class="p">(</span><span class="n">b</span><span class="p">),</span> <span class="n">Column</span><span class="p">(</span><span class="n">b</span><span class="p">))</span>
+<span class="n">Keep</span><span class="p">(</span><span class="n">Identifier</span><span class="p">(</span><span class="n">b</span><span class="p">),</span> <span class="n">Identifier</span><span class="p">(</span><span class="n">b</span><span class="p">))</span>
+<span class="n">Keep</span><span class="p">(</span><span class="n">Column</span><span class="p">(</span><span class="n">c</span><span class="p">),</span> <span class="n">Column</span><span class="p">(</span><span class="n">c</span><span class="p">))</span>
+<span class="n">Keep</span><span class="p">(</span><span class="n">Identifier</span><span class="p">(</span><span class="n">c</span><span class="p">),</span> <span class="n">Identifier</span><span class="p">(</span><span class="n">c</span><span class="p">))</span>
+<span class="n">Keep</span><span class="p">(</span><span class="n">Column</span><span class="p">(</span><span class="n">e</span><span class="p">),</span> <span class="n">Column</span><span class="p">(</span><span class="n">e</span><span class="p">))</span>
+<span class="n">Keep</span><span class="p">(</span><span class="n">Identifier</span><span class="p">(</span><span class="n">e</span><span class="p">),</span> <span class="n">Identifier</span><span class="p">(</span><span class="n">e</span><span class="p">))</span>
+</code></pre>
+</div>
+
+<p>Note that the output above is abbreviated. The string representation of actual AST nodes is significantly more verbose.</p>
+
+<p>The implementation works especially well when coupled with the SQLGlot’s query optimizer which can be used to produce canonical representations of compared queries:</p>
+
+<div class="pdoc-code codehilite">
+<pre><span></span><code><span class="o">&gt;&gt;&gt;</span> <span class="n">schema</span><span class="o">=</span><span class="p">{</span><span class="s2">&quot;t&quot;</span><span class="p">:</span> <span class="p">{</span><span class="s2">&quot;a&quot;</span><span class="p">:</span> <span class="s2">&quot;INT&quot;</span><span class="p">,</span> <span class="s2">&quot;b&quot;</span><span class="p">:</span> <span class="s2">&quot;INT&quot;</span><span class="p">,</span> <span class="s2">&quot;c&quot;</span><span class="p">:</span> <span class="s2">&quot;INT&quot;</span><span class="p">,</span> <span class="s2">&quot;d&quot;</span><span class="p">:</span> <span class="s2">&quot;INT&quot;</span><span class="p">}}</span>
+<span class="o">&gt;&gt;&gt;</span> <span class="n">source</span> <span class="o">=</span> <span class="s2">&quot;&quot;&quot;</span>
+<span class="s2">... SELECT 1 + 1 + a</span>
+<span class="s2">... FROM t</span>
+<span class="s2">... WHERE b = 1 OR (c = 2 AND d = 3)</span>
+<span class="s2">... &quot;&quot;&quot;</span>
+<span class="o">&gt;&gt;&gt;</span> <span class="n">target</span> <span class="o">=</span> <span class="s2">&quot;&quot;&quot;</span>
+<span class="s2">... SELECT 2 + a</span>
+<span class="s2">... FROM t</span>
+<span class="s2">... WHERE (b = 1 OR c = 2) AND (b = 1 OR d = 3)</span>
+<span class="s2">... &quot;&quot;&quot;</span>
+<span class="o">&gt;&gt;&gt;</span> <span class="n">optimized_source</span> <span class="o">=</span> <span class="n">optimize</span><span class="p">(</span><span class="n">parse_one</span><span class="p">(</span><span class="n">source</span><span class="p">),</span> <span class="n">schema</span><span class="o">=</span><span class="n">schema</span><span class="p">)</span>
+<span class="o">&gt;&gt;&gt;</span> <span class="n">optimized_target</span> <span class="o">=</span> <span class="n">optimize</span><span class="p">(</span><span class="n">parse_one</span><span class="p">(</span><span class="n">target</span><span class="p">),</span> <span class="n">schema</span><span class="o">=</span><span class="n">schema</span><span class="p">)</span>
+<span class="o">&gt;&gt;&gt;</span> <span class="n">edit_script</span> <span class="o">=</span> <span class="n">diff</span><span class="p">(</span><span class="n">optimized_source</span><span class="p">,</span> <span class="n">optimized_target</span><span class="p">)</span>
+<span class="o">&gt;&gt;&gt;</span> <span class="nb">sum</span><span class="p">(</span><span class="mi">0</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">e</span><span class="p">,</span> <span class="n">Keep</span><span class="p">)</span> <span class="k">else</span> <span class="mi">1</span> <span class="k">for</span> <span class="n">e</span> <span class="ow">in</span> <span class="n">edit_script</span><span class="p">)</span>
+<span class="mi">0</span>
+</code></pre>
+</div>
+
+<h3 id="optimizations">Optimizations</h3>
+
+<p>The worst case runtime complexity of this algorithm is not exactly stellar: O(n^2 * log n^2). This is because of the leaf matching process, which involves ranking a cartesian product between all leaf nodes of compared trees. Unsurprisingly, the algorithm takes a considerable time to finish for bigger queries.</p>
+
+<p>There are still a few basic things we can do in our implementation to help improve performance:</p>
+
+<ul>
+<li>Refer to individual node objects using their identifiers (Python’s <a href="https://docs.python.org/3/library/functions.html#id">id()</a>) instead of direct references in sets. This helps avoid costly recursive hash calculations and equality checks.</li>
+<li>Cache bigram histograms to avoid computing them more than once for the same node.</li>
+<li>Compute the canonical SQL string representation for each tree once while caching string representations of all inner nodes. This prevents redundant tree traversals when bigrams are computed.</li>
+</ul>
+
+<p>At the time of writing only the first two optimizations have been implemented, so there is an opportunity to contribute for anyone who’s interested.</p>
+
+<h2 id="alternative-solutions">Alternative Solutions</h2>
+
+<p>This section is dedicated to solutions that I’ve investigated, but haven’t tried.</p>
+
+<p>First, this section wouldn’t be complete without Tristan Hume’s <a href="https://thume.ca/2017/06/17/tree-diffing/">blog post</a>. Tristan’s solution has a lot in common with the Myers algorithm plus heuristics that is much more clever than what I came up with. The implementation relies on a combination of <a href="https://en.wikipedia.org/wiki/Dynamic_programming">dynamic programming</a> and <a href="https://en.wikipedia.org/wiki/A*_search_algorithm">A* search algorithm</a> to explore the space of possible matchings and pick the best ones. It seemed to have worked well for Tistan’s specific use case, but after my negative experience with the Myers algorithm, I decided to try something different.</p>
+
+<p>Another notable approach is the Gumtree algorithm by Falleri et al. [4]. I discovered this paper after I’d already implemented the algorithm that is the main focus of this post. In sections 5.2 and 5.3 of their paper, the authors compare the two algorithms side by side and claim that Gumtree is significantly better in terms of both runtime performance and accuracy when evaluated on 12 792 pairs of Java source files. This doesn’t surprise me, as the algorithm takes the height of subtrees into account. In my tests, I definitely saw scenarios in which this context would have helped. On top of that, the authors promise O(n^2) runtime complexity in the worst case which, given the Change Distiller's O(n^2 * log n^2), looks particularly tempting. I hope to try this algorithm out at some point, and there is a good chance you see me writing about it in my future posts.</p>
+
+<h2 id="conclusion">Conclusion</h2>
+
+<p>The Change Distiller algorithm yielded quite satisfactory results in most of my tests. The scenarios in which it fell short mostly concerned identical (or very similar) subtrees located in different parts of the AST. In those cases, node mismatches were frequent and, as a result, edit scripts were somewhat suboptimal.</p>
+
+<p>Additionally, the runtime performance of the algorithm leaves a lot to be desired. On trees with 1000 leaf nodes each, the algorithm takes a little under 2 seconds to complete. My implementation still has room for improvement, but this should give you a rough idea of what to expect. It appears that the Gumtree algorithm [4] can help address both of these points. I hope to find bandwidth to work on it soon and then compare the two algorithms side-by-side to find out which one performs better on SQL specifically. In the meantime, Change Distiller definitely gets the job done, and I can now proceed with applying it to some of the use cases I mentioned at the beginning of this post.</p>
+
+<p>I’m also curious to learn whether other folks in the industry faced a similar problem, and how they approached it. If you did something similar, I’m interested to hear about your experience.</p>
+
+<h2 id="references">References</h2>
+
+<p>[1] Eugene W. Myers. <a href="http://www.xmailserver.org/diff2.pdf">An O(ND) Difference Algorithm and Its Variations</a>. Algorithmica 1(2): 251-266 (1986)</p>
+
+<p>[2] B. Fluri, M. Wursch, M. Pinzger, and H. Gall. <a href="https://www.researchgate.net/publication/3189787_Change_DistillingTree_Differencing_for_Fine-Grained_Source_Code_Change_Extraction">Change Distilling: Tree differencing for fine-grained source code change extraction</a>. IEEE Trans. Software Eng., 33(11):725–743, 2007.</p>
+
+<p>[3] S.S. Chawathe, A. Rajaraman, H. Garcia-Molina, and J. Widom. <a href="http://ilpubs.stanford.edu:8090/115/1/1995-46.pdf">Change Detection in Hierarchically Structured Information</a>. Proc. ACM Sigmod Int’l Conf. Management of Data, pp. 493-504, June 1996</p>
+
+<p>[4] Jean-Rémy Falleri, Floréal Morandat, Xavier Blanc, Matias Martinez, Martin Monperrus. <a href="https://hal.archives-ouvertes.fr/hal-01054552/document">Fine-grained and Accurate Source Code Differencing</a>. Proceedings of the International Conference on Automated Software Engineering, 2014, Västeras, Sweden. pp.313-324, 10.1145/2642937.2642982. hal-01054552</p>
+
+<hr />
+</div>
+
+ <input id="mod-diff-view-source" class="view-source-toggle-state" type="checkbox" aria-hidden="true" tabindex="-1">
+
+ <label class="view-source-button" for="mod-diff-view-source"><span>View Source</span></label>
+
+ <div class="pdoc-code codehilite"><pre><span></span><span id="L-1"><a href="#L-1"><span class="linenos"> 1</span></a><span class="sd">&quot;&quot;&quot;</span>
+</span><span id="L-2"><a href="#L-2"><span class="linenos"> 2</span></a><span class="sd">.. include:: ../posts/sql_diff.md</span>
+</span><span id="L-3"><a href="#L-3"><span class="linenos"> 3</span></a>
+</span><span id="L-4"><a href="#L-4"><span class="linenos"> 4</span></a><span class="sd">----</span>
+</span><span id="L-5"><a href="#L-5"><span class="linenos"> 5</span></a><span class="sd">&quot;&quot;&quot;</span>
+</span><span id="L-6"><a href="#L-6"><span class="linenos"> 6</span></a>
+</span><span id="L-7"><a href="#L-7"><span class="linenos"> 7</span></a><span class="kn">from</span> <span class="nn">__future__</span> <span class="kn">import</span> <span class="n">annotations</span>
+</span><span id="L-8"><a href="#L-8"><span class="linenos"> 8</span></a>
+</span><span id="L-9"><a href="#L-9"><span class="linenos"> 9</span></a><span class="kn">import</span> <span class="nn">typing</span> <span class="k">as</span> <span class="nn">t</span>
+</span><span id="L-10"><a href="#L-10"><span class="linenos"> 10</span></a><span class="kn">from</span> <span class="nn">collections</span> <span class="kn">import</span> <span class="n">defaultdict</span>
+</span><span id="L-11"><a href="#L-11"><span class="linenos"> 11</span></a><span class="kn">from</span> <span class="nn">dataclasses</span> <span class="kn">import</span> <span class="n">dataclass</span>
+</span><span id="L-12"><a href="#L-12"><span class="linenos"> 12</span></a><span class="kn">from</span> <span class="nn">heapq</span> <span class="kn">import</span> <span class="n">heappop</span><span class="p">,</span> <span class="n">heappush</span>
+</span><span id="L-13"><a href="#L-13"><span class="linenos"> 13</span></a>
+</span><span id="L-14"><a href="#L-14"><span class="linenos"> 14</span></a><span class="kn">from</span> <span class="nn">sqlglot</span> <span class="kn">import</span> <span class="n">Dialect</span>
+</span><span id="L-15"><a href="#L-15"><span class="linenos"> 15</span></a><span class="kn">from</span> <span class="nn">sqlglot</span> <span class="kn">import</span> <span class="n">expressions</span> <span class="k">as</span> <span class="n">exp</span>
+</span><span id="L-16"><a href="#L-16"><span class="linenos"> 16</span></a><span class="kn">from</span> <span class="nn">sqlglot.helper</span> <span class="kn">import</span> <span class="n">ensure_collection</span>
+</span><span id="L-17"><a href="#L-17"><span class="linenos"> 17</span></a>
+</span><span id="L-18"><a href="#L-18"><span class="linenos"> 18</span></a>
+</span><span id="L-19"><a href="#L-19"><span class="linenos"> 19</span></a><span class="nd">@dataclass</span><span class="p">(</span><span class="n">frozen</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
+</span><span id="L-20"><a href="#L-20"><span class="linenos"> 20</span></a><span class="k">class</span> <span class="nc">Insert</span><span class="p">:</span>
+</span><span id="L-21"><a href="#L-21"><span class="linenos"> 21</span></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Indicates that a new node has been inserted&quot;&quot;&quot;</span>
+</span><span id="L-22"><a href="#L-22"><span class="linenos"> 22</span></a>
+</span><span id="L-23"><a href="#L-23"><span class="linenos"> 23</span></a> <span class="n">expression</span><span class="p">:</span> <span class="n">exp</span><span class="o">.</span><span class="n">Expression</span>
+</span><span id="L-24"><a href="#L-24"><span class="linenos"> 24</span></a>
+</span><span id="L-25"><a href="#L-25"><span class="linenos"> 25</span></a>
+</span><span id="L-26"><a href="#L-26"><span class="linenos"> 26</span></a><span class="nd">@dataclass</span><span class="p">(</span><span class="n">frozen</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
+</span><span id="L-27"><a href="#L-27"><span class="linenos"> 27</span></a><span class="k">class</span> <span class="nc">Remove</span><span class="p">:</span>
+</span><span id="L-28"><a href="#L-28"><span class="linenos"> 28</span></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Indicates that an existing node has been removed&quot;&quot;&quot;</span>
+</span><span id="L-29"><a href="#L-29"><span class="linenos"> 29</span></a>
+</span><span id="L-30"><a href="#L-30"><span class="linenos"> 30</span></a> <span class="n">expression</span><span class="p">:</span> <span class="n">exp</span><span class="o">.</span><span class="n">Expression</span>
+</span><span id="L-31"><a href="#L-31"><span class="linenos"> 31</span></a>
+</span><span id="L-32"><a href="#L-32"><span class="linenos"> 32</span></a>
+</span><span id="L-33"><a href="#L-33"><span class="linenos"> 33</span></a><span class="nd">@dataclass</span><span class="p">(</span><span class="n">frozen</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
+</span><span id="L-34"><a href="#L-34"><span class="linenos"> 34</span></a><span class="k">class</span> <span class="nc">Move</span><span class="p">:</span>
+</span><span id="L-35"><a href="#L-35"><span class="linenos"> 35</span></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Indicates that an existing node&#39;s position within the tree has changed&quot;&quot;&quot;</span>
+</span><span id="L-36"><a href="#L-36"><span class="linenos"> 36</span></a>
+</span><span id="L-37"><a href="#L-37"><span class="linenos"> 37</span></a> <span class="n">expression</span><span class="p">:</span> <span class="n">exp</span><span class="o">.</span><span class="n">Expression</span>
+</span><span id="L-38"><a href="#L-38"><span class="linenos"> 38</span></a>
+</span><span id="L-39"><a href="#L-39"><span class="linenos"> 39</span></a>
+</span><span id="L-40"><a href="#L-40"><span class="linenos"> 40</span></a><span class="nd">@dataclass</span><span class="p">(</span><span class="n">frozen</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
+</span><span id="L-41"><a href="#L-41"><span class="linenos"> 41</span></a><span class="k">class</span> <span class="nc">Update</span><span class="p">:</span>
+</span><span id="L-42"><a href="#L-42"><span class="linenos"> 42</span></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Indicates that an existing node has been updated&quot;&quot;&quot;</span>
+</span><span id="L-43"><a href="#L-43"><span class="linenos"> 43</span></a>
+</span><span id="L-44"><a href="#L-44"><span class="linenos"> 44</span></a> <span class="n">source</span><span class="p">:</span> <span class="n">exp</span><span class="o">.</span><span class="n">Expression</span>
+</span><span id="L-45"><a href="#L-45"><span class="linenos"> 45</span></a> <span class="n">target</span><span class="p">:</span> <span class="n">exp</span><span class="o">.</span><span class="n">Expression</span>
+</span><span id="L-46"><a href="#L-46"><span class="linenos"> 46</span></a>
+</span><span id="L-47"><a href="#L-47"><span class="linenos"> 47</span></a>
+</span><span id="L-48"><a href="#L-48"><span class="linenos"> 48</span></a><span class="nd">@dataclass</span><span class="p">(</span><span class="n">frozen</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
+</span><span id="L-49"><a href="#L-49"><span class="linenos"> 49</span></a><span class="k">class</span> <span class="nc">Keep</span><span class="p">:</span>
+</span><span id="L-50"><a href="#L-50"><span class="linenos"> 50</span></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Indicates that an existing node hasn&#39;t been changed&quot;&quot;&quot;</span>
+</span><span id="L-51"><a href="#L-51"><span class="linenos"> 51</span></a>
+</span><span id="L-52"><a href="#L-52"><span class="linenos"> 52</span></a> <span class="n">source</span><span class="p">:</span> <span class="n">exp</span><span class="o">.</span><span class="n">Expression</span>
+</span><span id="L-53"><a href="#L-53"><span class="linenos"> 53</span></a> <span class="n">target</span><span class="p">:</span> <span class="n">exp</span><span class="o">.</span><span class="n">Expression</span>
+</span><span id="L-54"><a href="#L-54"><span class="linenos"> 54</span></a>
+</span><span id="L-55"><a href="#L-55"><span class="linenos"> 55</span></a>
+</span><span id="L-56"><a href="#L-56"><span class="linenos"> 56</span></a><span class="k">if</span> <span class="n">t</span><span class="o">.</span><span class="n">TYPE_CHECKING</span><span class="p">:</span>
+</span><span id="L-57"><a href="#L-57"><span class="linenos"> 57</span></a> <span class="n">T</span> <span class="o">=</span> <span class="n">t</span><span class="o">.</span><span class="n">TypeVar</span><span class="p">(</span><span class="s2">&quot;T&quot;</span><span class="p">)</span>
+</span><span id="L-58"><a href="#L-58"><span class="linenos"> 58</span></a> <span class="n">Edit</span> <span class="o">=</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="n">Insert</span><span class="p">,</span> <span class="n">Remove</span><span class="p">,</span> <span class="n">Move</span><span class="p">,</span> <span class="n">Update</span><span class="p">,</span> <span class="n">Keep</span><span class="p">]</span>
+</span><span id="L-59"><a href="#L-59"><span class="linenos"> 59</span></a>
+</span><span id="L-60"><a href="#L-60"><span class="linenos"> 60</span></a>
+</span><span id="L-61"><a href="#L-61"><span class="linenos"> 61</span></a><span class="k">def</span> <span class="nf">diff</span><span class="p">(</span><span class="n">source</span><span class="p">:</span> <span class="n">exp</span><span class="o">.</span><span class="n">Expression</span><span class="p">,</span> <span class="n">target</span><span class="p">:</span> <span class="n">exp</span><span class="o">.</span><span class="n">Expression</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="n">Edit</span><span class="p">]:</span>
+</span><span id="L-62"><a href="#L-62"><span class="linenos"> 62</span></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
+</span><span id="L-63"><a href="#L-63"><span class="linenos"> 63</span></a><span class="sd"> Returns the list of changes between the source and the target expressions.</span>
+</span><span id="L-64"><a href="#L-64"><span class="linenos"> 64</span></a>
+</span><span id="L-65"><a href="#L-65"><span class="linenos"> 65</span></a><span class="sd"> Examples:</span>
+</span><span id="L-66"><a href="#L-66"><span class="linenos"> 66</span></a><span class="sd"> &gt;&gt;&gt; diff(parse_one(&quot;a + b&quot;), parse_one(&quot;a + c&quot;))</span>
+</span><span id="L-67"><a href="#L-67"><span class="linenos"> 67</span></a><span class="sd"> [</span>
+</span><span id="L-68"><a href="#L-68"><span class="linenos"> 68</span></a><span class="sd"> Remove(expression=(COLUMN this: (IDENTIFIER this: b, quoted: False))),</span>
+</span><span id="L-69"><a href="#L-69"><span class="linenos"> 69</span></a><span class="sd"> Insert(expression=(COLUMN this: (IDENTIFIER this: c, quoted: False))),</span>
+</span><span id="L-70"><a href="#L-70"><span class="linenos"> 70</span></a><span class="sd"> Keep(</span>
+</span><span id="L-71"><a href="#L-71"><span class="linenos"> 71</span></a><span class="sd"> source=(ADD this: ...),</span>
+</span><span id="L-72"><a href="#L-72"><span class="linenos"> 72</span></a><span class="sd"> target=(ADD this: ...)</span>
+</span><span id="L-73"><a href="#L-73"><span class="linenos"> 73</span></a><span class="sd"> ),</span>
+</span><span id="L-74"><a href="#L-74"><span class="linenos"> 74</span></a><span class="sd"> Keep(</span>
+</span><span id="L-75"><a href="#L-75"><span class="linenos"> 75</span></a><span class="sd"> source=(COLUMN this: (IDENTIFIER this: a, quoted: False)),</span>
+</span><span id="L-76"><a href="#L-76"><span class="linenos"> 76</span></a><span class="sd"> target=(COLUMN this: (IDENTIFIER this: a, quoted: False))</span>
+</span><span id="L-77"><a href="#L-77"><span class="linenos"> 77</span></a><span class="sd"> ),</span>
+</span><span id="L-78"><a href="#L-78"><span class="linenos"> 78</span></a><span class="sd"> ]</span>
+</span><span id="L-79"><a href="#L-79"><span class="linenos"> 79</span></a>
+</span><span id="L-80"><a href="#L-80"><span class="linenos"> 80</span></a><span class="sd"> Args:</span>
+</span><span id="L-81"><a href="#L-81"><span class="linenos"> 81</span></a><span class="sd"> source: the source expression.</span>
+</span><span id="L-82"><a href="#L-82"><span class="linenos"> 82</span></a><span class="sd"> target: the target expression against which the diff should be calculated.</span>
+</span><span id="L-83"><a href="#L-83"><span class="linenos"> 83</span></a>
+</span><span id="L-84"><a href="#L-84"><span class="linenos"> 84</span></a><span class="sd"> Returns:</span>
+</span><span id="L-85"><a href="#L-85"><span class="linenos"> 85</span></a><span class="sd"> the list of Insert, Remove, Move, Update and Keep objects for each node in the source and the</span>
+</span><span id="L-86"><a href="#L-86"><span class="linenos"> 86</span></a><span class="sd"> target expression trees. This list represents a sequence of steps needed to transform the source</span>
+</span><span id="L-87"><a href="#L-87"><span class="linenos"> 87</span></a><span class="sd"> expression tree into the target one.</span>
+</span><span id="L-88"><a href="#L-88"><span class="linenos"> 88</span></a><span class="sd"> &quot;&quot;&quot;</span>
+</span><span id="L-89"><a href="#L-89"><span class="linenos"> 89</span></a> <span class="k">return</span> <span class="n">ChangeDistiller</span><span class="p">()</span><span class="o">.</span><span class="n">diff</span><span class="p">(</span><span class="n">source</span><span class="o">.</span><span class="n">copy</span><span class="p">(),</span> <span class="n">target</span><span class="o">.</span><span class="n">copy</span><span class="p">())</span>
+</span><span id="L-90"><a href="#L-90"><span class="linenos"> 90</span></a>
+</span><span id="L-91"><a href="#L-91"><span class="linenos"> 91</span></a>
+</span><span id="L-92"><a href="#L-92"><span class="linenos"> 92</span></a><span class="n">LEAF_EXPRESSION_TYPES</span> <span class="o">=</span> <span class="p">(</span>
+</span><span id="L-93"><a href="#L-93"><span class="linenos"> 93</span></a> <span class="n">exp</span><span class="o">.</span><span class="n">Boolean</span><span class="p">,</span>
+</span><span id="L-94"><a href="#L-94"><span class="linenos"> 94</span></a> <span class="n">exp</span><span class="o">.</span><span class="n">DataType</span><span class="p">,</span>
+</span><span id="L-95"><a href="#L-95"><span class="linenos"> 95</span></a> <span class="n">exp</span><span class="o">.</span><span class="n">Identifier</span><span class="p">,</span>
+</span><span id="L-96"><a href="#L-96"><span class="linenos"> 96</span></a> <span class="n">exp</span><span class="o">.</span><span class="n">Literal</span><span class="p">,</span>
+</span><span id="L-97"><a href="#L-97"><span class="linenos"> 97</span></a><span class="p">)</span>
+</span><span id="L-98"><a href="#L-98"><span class="linenos"> 98</span></a>
+</span><span id="L-99"><a href="#L-99"><span class="linenos"> 99</span></a>
+</span><span id="L-100"><a href="#L-100"><span class="linenos">100</span></a><span class="k">class</span> <span class="nc">ChangeDistiller</span><span class="p">:</span>
+</span><span id="L-101"><a href="#L-101"><span class="linenos">101</span></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
+</span><span id="L-102"><a href="#L-102"><span class="linenos">102</span></a><span class="sd"> The implementation of the Change Distiller algorithm described by Beat Fluri and Martin Pinzger in</span>
+</span><span id="L-103"><a href="#L-103"><span class="linenos">103</span></a><span class="sd"> their paper https://ieeexplore.ieee.org/document/4339230, which in turn is based on the algorithm by</span>
+</span><span id="L-104"><a href="#L-104"><span class="linenos">104</span></a><span class="sd"> Chawathe et al. described in http://ilpubs.stanford.edu:8090/115/1/1995-46.pdf.</span>
+</span><span id="L-105"><a href="#L-105"><span class="linenos">105</span></a><span class="sd"> &quot;&quot;&quot;</span>
+</span><span id="L-106"><a href="#L-106"><span class="linenos">106</span></a>
+</span><span id="L-107"><a href="#L-107"><span class="linenos">107</span></a> <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">f</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.6</span><span class="p">,</span> <span class="n">t</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.6</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
+</span><span id="L-108"><a href="#L-108"><span class="linenos">108</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">f</span> <span class="o">=</span> <span class="n">f</span>
+</span><span id="L-109"><a href="#L-109"><span class="linenos">109</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">t</span> <span class="o">=</span> <span class="n">t</span>
+</span><span id="L-110"><a href="#L-110"><span class="linenos">110</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_sql_generator</span> <span class="o">=</span> <span class="n">Dialect</span><span class="p">()</span><span class="o">.</span><span class="n">generator</span><span class="p">()</span>
+</span><span id="L-111"><a href="#L-111"><span class="linenos">111</span></a>
+</span><span id="L-112"><a href="#L-112"><span class="linenos">112</span></a> <span class="k">def</span> <span class="nf">diff</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">source</span><span class="p">:</span> <span class="n">exp</span><span class="o">.</span><span class="n">Expression</span><span class="p">,</span> <span class="n">target</span><span class="p">:</span> <span class="n">exp</span><span class="o">.</span><span class="n">Expression</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="n">Edit</span><span class="p">]:</span>
+</span><span id="L-113"><a href="#L-113"><span class="linenos">113</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_source</span> <span class="o">=</span> <span class="n">source</span>
+</span><span id="L-114"><a href="#L-114"><span class="linenos">114</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_target</span> <span class="o">=</span> <span class="n">target</span>
+</span><span id="L-115"><a href="#L-115"><span class="linenos">115</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_source_index</span> <span class="o">=</span> <span class="p">{</span><span class="nb">id</span><span class="p">(</span><span class="n">n</span><span class="p">[</span><span class="mi">0</span><span class="p">]):</span> <span class="n">n</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="k">for</span> <span class="n">n</span> <span class="ow">in</span> <span class="n">source</span><span class="o">.</span><span class="n">bfs</span><span class="p">()}</span>
+</span><span id="L-116"><a href="#L-116"><span class="linenos">116</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_target_index</span> <span class="o">=</span> <span class="p">{</span><span class="nb">id</span><span class="p">(</span><span class="n">n</span><span class="p">[</span><span class="mi">0</span><span class="p">]):</span> <span class="n">n</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="k">for</span> <span class="n">n</span> <span class="ow">in</span> <span class="n">target</span><span class="o">.</span><span class="n">bfs</span><span class="p">()}</span>
+</span><span id="L-117"><a href="#L-117"><span class="linenos">117</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_unmatched_source_nodes</span> <span class="o">=</span> <span class="nb">set</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_source_index</span><span class="p">)</span>
+</span><span id="L-118"><a href="#L-118"><span class="linenos">118</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_unmatched_target_nodes</span> <span class="o">=</span> <span class="nb">set</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_target_index</span><span class="p">)</span>
+</span><span id="L-119"><a href="#L-119"><span class="linenos">119</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_bigram_histo_cache</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Dict</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">DefaultDict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">int</span><span class="p">]]</span> <span class="o">=</span> <span class="p">{}</span>
+</span><span id="L-120"><a href="#L-120"><span class="linenos">120</span></a>
+</span><span id="L-121"><a href="#L-121"><span class="linenos">121</span></a> <span class="n">matching_set</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_compute_matching_set</span><span class="p">()</span>
+</span><span id="L-122"><a href="#L-122"><span class="linenos">122</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_generate_edit_script</span><span class="p">(</span><span class="n">matching_set</span><span class="p">)</span>
+</span><span id="L-123"><a href="#L-123"><span class="linenos">123</span></a>
+</span><span id="L-124"><a href="#L-124"><span class="linenos">124</span></a> <span class="k">def</span> <span class="nf">_generate_edit_script</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">matching_set</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Set</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Tuple</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">int</span><span class="p">]])</span> <span class="o">-&gt;</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="n">Edit</span><span class="p">]:</span>
+</span><span id="L-125"><a href="#L-125"><span class="linenos">125</span></a> <span class="n">edit_script</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="n">Edit</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
+</span><span id="L-126"><a href="#L-126"><span class="linenos">126</span></a> <span class="k">for</span> <span class="n">removed_node_id</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_unmatched_source_nodes</span><span class="p">:</span>
+</span><span id="L-127"><a href="#L-127"><span class="linenos">127</span></a> <span class="n">edit_script</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">Remove</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_source_index</span><span class="p">[</span><span class="n">removed_node_id</span><span class="p">]))</span>
+</span><span id="L-128"><a href="#L-128"><span class="linenos">128</span></a> <span class="k">for</span> <span class="n">inserted_node_id</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_unmatched_target_nodes</span><span class="p">:</span>
+</span><span id="L-129"><a href="#L-129"><span class="linenos">129</span></a> <span class="n">edit_script</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">Insert</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_target_index</span><span class="p">[</span><span class="n">inserted_node_id</span><span class="p">]))</span>
+</span><span id="L-130"><a href="#L-130"><span class="linenos">130</span></a> <span class="k">for</span> <span class="n">kept_source_node_id</span><span class="p">,</span> <span class="n">kept_target_node_id</span> <span class="ow">in</span> <span class="n">matching_set</span><span class="p">:</span>
+</span><span id="L-131"><a href="#L-131"><span class="linenos">131</span></a> <span class="n">source_node</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_source_index</span><span class="p">[</span><span class="n">kept_source_node_id</span><span class="p">]</span>
+</span><span id="L-132"><a href="#L-132"><span class="linenos">132</span></a> <span class="n">target_node</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_target_index</span><span class="p">[</span><span class="n">kept_target_node_id</span><span class="p">]</span>
+</span><span id="L-133"><a href="#L-133"><span class="linenos">133</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">source_node</span><span class="p">,</span> <span class="n">LEAF_EXPRESSION_TYPES</span><span class="p">)</span> <span class="ow">or</span> <span class="n">source_node</span> <span class="o">==</span> <span class="n">target_node</span><span class="p">:</span>
+</span><span id="L-134"><a href="#L-134"><span class="linenos">134</span></a> <span class="n">edit_script</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span>
+</span><span id="L-135"><a href="#L-135"><span class="linenos">135</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_generate_move_edits</span><span class="p">(</span><span class="n">source_node</span><span class="p">,</span> <span class="n">target_node</span><span class="p">,</span> <span class="n">matching_set</span><span class="p">)</span>
+</span><span id="L-136"><a href="#L-136"><span class="linenos">136</span></a> <span class="p">)</span>
+</span><span id="L-137"><a href="#L-137"><span class="linenos">137</span></a> <span class="n">edit_script</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">Keep</span><span class="p">(</span><span class="n">source_node</span><span class="p">,</span> <span class="n">target_node</span><span class="p">))</span>
+</span><span id="L-138"><a href="#L-138"><span class="linenos">138</span></a> <span class="k">else</span><span class="p">:</span>
+</span><span id="L-139"><a href="#L-139"><span class="linenos">139</span></a> <span class="n">edit_script</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">Update</span><span class="p">(</span><span class="n">source_node</span><span class="p">,</span> <span class="n">target_node</span><span class="p">))</span>
+</span><span id="L-140"><a href="#L-140"><span class="linenos">140</span></a>
+</span><span id="L-141"><a href="#L-141"><span class="linenos">141</span></a> <span class="k">return</span> <span class="n">edit_script</span>
+</span><span id="L-142"><a href="#L-142"><span class="linenos">142</span></a>
+</span><span id="L-143"><a href="#L-143"><span class="linenos">143</span></a> <span class="k">def</span> <span class="nf">_generate_move_edits</span><span class="p">(</span>
+</span><span id="L-144"><a href="#L-144"><span class="linenos">144</span></a> <span class="bp">self</span><span class="p">,</span> <span class="n">source</span><span class="p">:</span> <span class="n">exp</span><span class="o">.</span><span class="n">Expression</span><span class="p">,</span> <span class="n">target</span><span class="p">:</span> <span class="n">exp</span><span class="o">.</span><span class="n">Expression</span><span class="p">,</span> <span class="n">matching_set</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Set</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Tuple</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">int</span><span class="p">]]</span>
+</span><span id="L-145"><a href="#L-145"><span class="linenos">145</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="n">Move</span><span class="p">]:</span>
+</span><span id="L-146"><a href="#L-146"><span class="linenos">146</span></a> <span class="n">source_args</span> <span class="o">=</span> <span class="p">[</span><span class="nb">id</span><span class="p">(</span><span class="n">e</span><span class="p">)</span> <span class="k">for</span> <span class="n">e</span> <span class="ow">in</span> <span class="n">_expression_only_args</span><span class="p">(</span><span class="n">source</span><span class="p">)]</span>
+</span><span id="L-147"><a href="#L-147"><span class="linenos">147</span></a> <span class="n">target_args</span> <span class="o">=</span> <span class="p">[</span><span class="nb">id</span><span class="p">(</span><span class="n">e</span><span class="p">)</span> <span class="k">for</span> <span class="n">e</span> <span class="ow">in</span> <span class="n">_expression_only_args</span><span class="p">(</span><span class="n">target</span><span class="p">)]</span>
+</span><span id="L-148"><a href="#L-148"><span class="linenos">148</span></a>
+</span><span id="L-149"><a href="#L-149"><span class="linenos">149</span></a> <span class="n">args_lcs</span> <span class="o">=</span> <span class="nb">set</span><span class="p">(</span><span class="n">_lcs</span><span class="p">(</span><span class="n">source_args</span><span class="p">,</span> <span class="n">target_args</span><span class="p">,</span> <span class="k">lambda</span> <span class="n">l</span><span class="p">,</span> <span class="n">r</span><span class="p">:</span> <span class="p">(</span><span class="n">l</span><span class="p">,</span> <span class="n">r</span><span class="p">)</span> <span class="ow">in</span> <span class="n">matching_set</span><span class="p">))</span>
+</span><span id="L-150"><a href="#L-150"><span class="linenos">150</span></a>
+</span><span id="L-151"><a href="#L-151"><span class="linenos">151</span></a> <span class="n">move_edits</span> <span class="o">=</span> <span class="p">[]</span>
+</span><span id="L-152"><a href="#L-152"><span class="linenos">152</span></a> <span class="k">for</span> <span class="n">a</span> <span class="ow">in</span> <span class="n">source_args</span><span class="p">:</span>
+</span><span id="L-153"><a href="#L-153"><span class="linenos">153</span></a> <span class="k">if</span> <span class="n">a</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">args_lcs</span> <span class="ow">and</span> <span class="n">a</span> <span class="ow">not</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_unmatched_source_nodes</span><span class="p">:</span>
+</span><span id="L-154"><a href="#L-154"><span class="linenos">154</span></a> <span class="n">move_edits</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">Move</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_source_index</span><span class="p">[</span><span class="n">a</span><span class="p">]))</span>
+</span><span id="L-155"><a href="#L-155"><span class="linenos">155</span></a>
+</span><span id="L-156"><a href="#L-156"><span class="linenos">156</span></a> <span class="k">return</span> <span class="n">move_edits</span>
+</span><span id="L-157"><a href="#L-157"><span class="linenos">157</span></a>
+</span><span id="L-158"><a href="#L-158"><span class="linenos">158</span></a> <span class="k">def</span> <span class="nf">_compute_matching_set</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">t</span><span class="o">.</span><span class="n">Set</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Tuple</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">int</span><span class="p">]]:</span>
+</span><span id="L-159"><a href="#L-159"><span class="linenos">159</span></a> <span class="n">leaves_matching_set</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_compute_leaf_matching_set</span><span class="p">()</span>
+</span><span id="L-160"><a href="#L-160"><span class="linenos">160</span></a> <span class="n">matching_set</span> <span class="o">=</span> <span class="n">leaves_matching_set</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
+</span><span id="L-161"><a href="#L-161"><span class="linenos">161</span></a>
+</span><span id="L-162"><a href="#L-162"><span class="linenos">162</span></a> <span class="n">ordered_unmatched_source_nodes</span> <span class="o">=</span> <span class="p">{</span>
+</span><span id="L-163"><a href="#L-163"><span class="linenos">163</span></a> <span class="nb">id</span><span class="p">(</span><span class="n">n</span><span class="p">[</span><span class="mi">0</span><span class="p">]):</span> <span class="kc">None</span> <span class="k">for</span> <span class="n">n</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_source</span><span class="o">.</span><span class="n">bfs</span><span class="p">()</span> <span class="k">if</span> <span class="nb">id</span><span class="p">(</span><span class="n">n</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_unmatched_source_nodes</span>
+</span><span id="L-164"><a href="#L-164"><span class="linenos">164</span></a> <span class="p">}</span>
+</span><span id="L-165"><a href="#L-165"><span class="linenos">165</span></a> <span class="n">ordered_unmatched_target_nodes</span> <span class="o">=</span> <span class="p">{</span>
+</span><span id="L-166"><a href="#L-166"><span class="linenos">166</span></a> <span class="nb">id</span><span class="p">(</span><span class="n">n</span><span class="p">[</span><span class="mi">0</span><span class="p">]):</span> <span class="kc">None</span> <span class="k">for</span> <span class="n">n</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_target</span><span class="o">.</span><span class="n">bfs</span><span class="p">()</span> <span class="k">if</span> <span class="nb">id</span><span class="p">(</span><span class="n">n</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_unmatched_target_nodes</span>
+</span><span id="L-167"><a href="#L-167"><span class="linenos">167</span></a> <span class="p">}</span>
+</span><span id="L-168"><a href="#L-168"><span class="linenos">168</span></a>
+</span><span id="L-169"><a href="#L-169"><span class="linenos">169</span></a> <span class="k">for</span> <span class="n">source_node_id</span> <span class="ow">in</span> <span class="n">ordered_unmatched_source_nodes</span><span class="p">:</span>
+</span><span id="L-170"><a href="#L-170"><span class="linenos">170</span></a> <span class="k">for</span> <span class="n">target_node_id</span> <span class="ow">in</span> <span class="n">ordered_unmatched_target_nodes</span><span class="p">:</span>
+</span><span id="L-171"><a href="#L-171"><span class="linenos">171</span></a> <span class="n">source_node</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_source_index</span><span class="p">[</span><span class="n">source_node_id</span><span class="p">]</span>
+</span><span id="L-172"><a href="#L-172"><span class="linenos">172</span></a> <span class="n">target_node</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_target_index</span><span class="p">[</span><span class="n">target_node_id</span><span class="p">]</span>
+</span><span id="L-173"><a href="#L-173"><span class="linenos">173</span></a> <span class="k">if</span> <span class="n">_is_same_type</span><span class="p">(</span><span class="n">source_node</span><span class="p">,</span> <span class="n">target_node</span><span class="p">):</span>
+</span><span id="L-174"><a href="#L-174"><span class="linenos">174</span></a> <span class="n">source_leaf_ids</span> <span class="o">=</span> <span class="p">{</span><span class="nb">id</span><span class="p">(</span><span class="n">l</span><span class="p">)</span> <span class="k">for</span> <span class="n">l</span> <span class="ow">in</span> <span class="n">_get_leaves</span><span class="p">(</span><span class="n">source_node</span><span class="p">)}</span>
+</span><span id="L-175"><a href="#L-175"><span class="linenos">175</span></a> <span class="n">target_leaf_ids</span> <span class="o">=</span> <span class="p">{</span><span class="nb">id</span><span class="p">(</span><span class="n">l</span><span class="p">)</span> <span class="k">for</span> <span class="n">l</span> <span class="ow">in</span> <span class="n">_get_leaves</span><span class="p">(</span><span class="n">target_node</span><span class="p">)}</span>
+</span><span id="L-176"><a href="#L-176"><span class="linenos">176</span></a>
+</span><span id="L-177"><a href="#L-177"><span class="linenos">177</span></a> <span class="n">max_leaves_num</span> <span class="o">=</span> <span class="nb">max</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">source_leaf_ids</span><span class="p">),</span> <span class="nb">len</span><span class="p">(</span><span class="n">target_leaf_ids</span><span class="p">))</span>
+</span><span id="L-178"><a href="#L-178"><span class="linenos">178</span></a> <span class="k">if</span> <span class="n">max_leaves_num</span><span class="p">:</span>
+</span><span id="L-179"><a href="#L-179"><span class="linenos">179</span></a> <span class="n">common_leaves_num</span> <span class="o">=</span> <span class="nb">sum</span><span class="p">(</span>
+</span><span id="L-180"><a href="#L-180"><span class="linenos">180</span></a> <span class="mi">1</span> <span class="k">if</span> <span class="n">s</span> <span class="ow">in</span> <span class="n">source_leaf_ids</span> <span class="ow">and</span> <span class="n">t</span> <span class="ow">in</span> <span class="n">target_leaf_ids</span> <span class="k">else</span> <span class="mi">0</span>
+</span><span id="L-181"><a href="#L-181"><span class="linenos">181</span></a> <span class="k">for</span> <span class="n">s</span><span class="p">,</span> <span class="n">t</span> <span class="ow">in</span> <span class="n">leaves_matching_set</span>
+</span><span id="L-182"><a href="#L-182"><span class="linenos">182</span></a> <span class="p">)</span>
+</span><span id="L-183"><a href="#L-183"><span class="linenos">183</span></a> <span class="n">leaf_similarity_score</span> <span class="o">=</span> <span class="n">common_leaves_num</span> <span class="o">/</span> <span class="n">max_leaves_num</span>
+</span><span id="L-184"><a href="#L-184"><span class="linenos">184</span></a> <span class="k">else</span><span class="p">:</span>
+</span><span id="L-185"><a href="#L-185"><span class="linenos">185</span></a> <span class="n">leaf_similarity_score</span> <span class="o">=</span> <span class="mf">0.0</span>
+</span><span id="L-186"><a href="#L-186"><span class="linenos">186</span></a>
+</span><span id="L-187"><a href="#L-187"><span class="linenos">187</span></a> <span class="n">adjusted_t</span> <span class="o">=</span> <span class="p">(</span>
+</span><span id="L-188"><a href="#L-188"><span class="linenos">188</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">t</span> <span class="k">if</span> <span class="nb">min</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">source_leaf_ids</span><span class="p">),</span> <span class="nb">len</span><span class="p">(</span><span class="n">target_leaf_ids</span><span class="p">))</span> <span class="o">&gt;</span> <span class="mi">4</span> <span class="k">else</span> <span class="mf">0.4</span>
+</span><span id="L-189"><a href="#L-189"><span class="linenos">189</span></a> <span class="p">)</span>
+</span><span id="L-190"><a href="#L-190"><span class="linenos">190</span></a>
+</span><span id="L-191"><a href="#L-191"><span class="linenos">191</span></a> <span class="k">if</span> <span class="n">leaf_similarity_score</span> <span class="o">&gt;=</span> <span class="mf">0.8</span> <span class="ow">or</span> <span class="p">(</span>
+</span><span id="L-192"><a href="#L-192"><span class="linenos">192</span></a> <span class="n">leaf_similarity_score</span> <span class="o">&gt;=</span> <span class="n">adjusted_t</span>
+</span><span id="L-193"><a href="#L-193"><span class="linenos">193</span></a> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">_dice_coefficient</span><span class="p">(</span><span class="n">source_node</span><span class="p">,</span> <span class="n">target_node</span><span class="p">)</span> <span class="o">&gt;=</span> <span class="bp">self</span><span class="o">.</span><span class="n">f</span>
+</span><span id="L-194"><a href="#L-194"><span class="linenos">194</span></a> <span class="p">):</span>
+</span><span id="L-195"><a href="#L-195"><span class="linenos">195</span></a> <span class="n">matching_set</span><span class="o">.</span><span class="n">add</span><span class="p">((</span><span class="n">source_node_id</span><span class="p">,</span> <span class="n">target_node_id</span><span class="p">))</span>
+</span><span id="L-196"><a href="#L-196"><span class="linenos">196</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_unmatched_source_nodes</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="n">source_node_id</span><span class="p">)</span>
+</span><span id="L-197"><a href="#L-197"><span class="linenos">197</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_unmatched_target_nodes</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="n">target_node_id</span><span class="p">)</span>
+</span><span id="L-198"><a href="#L-198"><span class="linenos">198</span></a> <span class="n">ordered_unmatched_target_nodes</span><span class="o">.</span><span class="n">pop</span><span class="p">(</span><span class="n">target_node_id</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
+</span><span id="L-199"><a href="#L-199"><span class="linenos">199</span></a> <span class="k">break</span>
+</span><span id="L-200"><a href="#L-200"><span class="linenos">200</span></a>
+</span><span id="L-201"><a href="#L-201"><span class="linenos">201</span></a> <span class="k">return</span> <span class="n">matching_set</span>
+</span><span id="L-202"><a href="#L-202"><span class="linenos">202</span></a>
+</span><span id="L-203"><a href="#L-203"><span class="linenos">203</span></a> <span class="k">def</span> <span class="nf">_compute_leaf_matching_set</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">t</span><span class="o">.</span><span class="n">Set</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Tuple</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">int</span><span class="p">]]:</span>
+</span><span id="L-204"><a href="#L-204"><span class="linenos">204</span></a> <span class="n">candidate_matchings</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Tuple</span><span class="p">[</span><span class="nb">float</span><span class="p">,</span> <span class="nb">int</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Expression</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Expression</span><span class="p">]]</span> <span class="o">=</span> <span class="p">[]</span>
+</span><span id="L-205"><a href="#L-205"><span class="linenos">205</span></a> <span class="n">source_leaves</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">_get_leaves</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_source</span><span class="p">))</span>
+</span><span id="L-206"><a href="#L-206"><span class="linenos">206</span></a> <span class="n">target_leaves</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">_get_leaves</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_target</span><span class="p">))</span>
+</span><span id="L-207"><a href="#L-207"><span class="linenos">207</span></a> <span class="k">for</span> <span class="n">source_leaf</span> <span class="ow">in</span> <span class="n">source_leaves</span><span class="p">:</span>
+</span><span id="L-208"><a href="#L-208"><span class="linenos">208</span></a> <span class="k">for</span> <span class="n">target_leaf</span> <span class="ow">in</span> <span class="n">target_leaves</span><span class="p">:</span>
+</span><span id="L-209"><a href="#L-209"><span class="linenos">209</span></a> <span class="k">if</span> <span class="n">_is_same_type</span><span class="p">(</span><span class="n">source_leaf</span><span class="p">,</span> <span class="n">target_leaf</span><span class="p">):</span>
+</span><span id="L-210"><a href="#L-210"><span class="linenos">210</span></a> <span class="n">similarity_score</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_dice_coefficient</span><span class="p">(</span><span class="n">source_leaf</span><span class="p">,</span> <span class="n">target_leaf</span><span class="p">)</span>
+</span><span id="L-211"><a href="#L-211"><span class="linenos">211</span></a> <span class="k">if</span> <span class="n">similarity_score</span> <span class="o">&gt;=</span> <span class="bp">self</span><span class="o">.</span><span class="n">f</span><span class="p">:</span>
+</span><span id="L-212"><a href="#L-212"><span class="linenos">212</span></a> <span class="n">heappush</span><span class="p">(</span>
+</span><span id="L-213"><a href="#L-213"><span class="linenos">213</span></a> <span class="n">candidate_matchings</span><span class="p">,</span>
+</span><span id="L-214"><a href="#L-214"><span class="linenos">214</span></a> <span class="p">(</span>
+</span><span id="L-215"><a href="#L-215"><span class="linenos">215</span></a> <span class="o">-</span><span class="n">similarity_score</span><span class="p">,</span>
+</span><span id="L-216"><a href="#L-216"><span class="linenos">216</span></a> <span class="nb">len</span><span class="p">(</span><span class="n">candidate_matchings</span><span class="p">),</span>
+</span><span id="L-217"><a href="#L-217"><span class="linenos">217</span></a> <span class="n">source_leaf</span><span class="p">,</span>
+</span><span id="L-218"><a href="#L-218"><span class="linenos">218</span></a> <span class="n">target_leaf</span><span class="p">,</span>
+</span><span id="L-219"><a href="#L-219"><span class="linenos">219</span></a> <span class="p">),</span>
+</span><span id="L-220"><a href="#L-220"><span class="linenos">220</span></a> <span class="p">)</span>
+</span><span id="L-221"><a href="#L-221"><span class="linenos">221</span></a>
+</span><span id="L-222"><a href="#L-222"><span class="linenos">222</span></a> <span class="c1"># Pick best matchings based on the highest score</span>
+</span><span id="L-223"><a href="#L-223"><span class="linenos">223</span></a> <span class="n">matching_set</span> <span class="o">=</span> <span class="nb">set</span><span class="p">()</span>
+</span><span id="L-224"><a href="#L-224"><span class="linenos">224</span></a> <span class="k">while</span> <span class="n">candidate_matchings</span><span class="p">:</span>
+</span><span id="L-225"><a href="#L-225"><span class="linenos">225</span></a> <span class="n">_</span><span class="p">,</span> <span class="n">_</span><span class="p">,</span> <span class="n">source_leaf</span><span class="p">,</span> <span class="n">target_leaf</span> <span class="o">=</span> <span class="n">heappop</span><span class="p">(</span><span class="n">candidate_matchings</span><span class="p">)</span>
+</span><span id="L-226"><a href="#L-226"><span class="linenos">226</span></a> <span class="k">if</span> <span class="p">(</span>
+</span><span id="L-227"><a href="#L-227"><span class="linenos">227</span></a> <span class="nb">id</span><span class="p">(</span><span class="n">source_leaf</span><span class="p">)</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_unmatched_source_nodes</span>
+</span><span id="L-228"><a href="#L-228"><span class="linenos">228</span></a> <span class="ow">and</span> <span class="nb">id</span><span class="p">(</span><span class="n">target_leaf</span><span class="p">)</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_unmatched_target_nodes</span>
+</span><span id="L-229"><a href="#L-229"><span class="linenos">229</span></a> <span class="p">):</span>
+</span><span id="L-230"><a href="#L-230"><span class="linenos">230</span></a> <span class="n">matching_set</span><span class="o">.</span><span class="n">add</span><span class="p">((</span><span class="nb">id</span><span class="p">(</span><span class="n">source_leaf</span><span class="p">),</span> <span class="nb">id</span><span class="p">(</span><span class="n">target_leaf</span><span class="p">)))</span>
+</span><span id="L-231"><a href="#L-231"><span class="linenos">231</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_unmatched_source_nodes</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="nb">id</span><span class="p">(</span><span class="n">source_leaf</span><span class="p">))</span>
+</span><span id="L-232"><a href="#L-232"><span class="linenos">232</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_unmatched_target_nodes</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="nb">id</span><span class="p">(</span><span class="n">target_leaf</span><span class="p">))</span>
+</span><span id="L-233"><a href="#L-233"><span class="linenos">233</span></a>
+</span><span id="L-234"><a href="#L-234"><span class="linenos">234</span></a> <span class="k">return</span> <span class="n">matching_set</span>
+</span><span id="L-235"><a href="#L-235"><span class="linenos">235</span></a>
+</span><span id="L-236"><a href="#L-236"><span class="linenos">236</span></a> <span class="k">def</span> <span class="nf">_dice_coefficient</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">source</span><span class="p">:</span> <span class="n">exp</span><span class="o">.</span><span class="n">Expression</span><span class="p">,</span> <span class="n">target</span><span class="p">:</span> <span class="n">exp</span><span class="o">.</span><span class="n">Expression</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">float</span><span class="p">:</span>
+</span><span id="L-237"><a href="#L-237"><span class="linenos">237</span></a> <span class="n">source_histo</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_bigram_histo</span><span class="p">(</span><span class="n">source</span><span class="p">)</span>
+</span><span id="L-238"><a href="#L-238"><span class="linenos">238</span></a> <span class="n">target_histo</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_bigram_histo</span><span class="p">(</span><span class="n">target</span><span class="p">)</span>
+</span><span id="L-239"><a href="#L-239"><span class="linenos">239</span></a>
+</span><span id="L-240"><a href="#L-240"><span class="linenos">240</span></a> <span class="n">total_grams</span> <span class="o">=</span> <span class="nb">sum</span><span class="p">(</span><span class="n">source_histo</span><span class="o">.</span><span class="n">values</span><span class="p">())</span> <span class="o">+</span> <span class="nb">sum</span><span class="p">(</span><span class="n">target_histo</span><span class="o">.</span><span class="n">values</span><span class="p">())</span>
+</span><span id="L-241"><a href="#L-241"><span class="linenos">241</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">total_grams</span><span class="p">:</span>
+</span><span id="L-242"><a href="#L-242"><span class="linenos">242</span></a> <span class="k">return</span> <span class="mf">1.0</span> <span class="k">if</span> <span class="n">source</span> <span class="o">==</span> <span class="n">target</span> <span class="k">else</span> <span class="mf">0.0</span>
+</span><span id="L-243"><a href="#L-243"><span class="linenos">243</span></a>
+</span><span id="L-244"><a href="#L-244"><span class="linenos">244</span></a> <span class="n">overlap_len</span> <span class="o">=</span> <span class="mi">0</span>
+</span><span id="L-245"><a href="#L-245"><span class="linenos">245</span></a> <span class="n">overlapping_grams</span> <span class="o">=</span> <span class="nb">set</span><span class="p">(</span><span class="n">source_histo</span><span class="p">)</span> <span class="o">&amp;</span> <span class="nb">set</span><span class="p">(</span><span class="n">target_histo</span><span class="p">)</span>
+</span><span id="L-246"><a href="#L-246"><span class="linenos">246</span></a> <span class="k">for</span> <span class="n">g</span> <span class="ow">in</span> <span class="n">overlapping_grams</span><span class="p">:</span>
+</span><span id="L-247"><a href="#L-247"><span class="linenos">247</span></a> <span class="n">overlap_len</span> <span class="o">+=</span> <span class="nb">min</span><span class="p">(</span><span class="n">source_histo</span><span class="p">[</span><span class="n">g</span><span class="p">],</span> <span class="n">target_histo</span><span class="p">[</span><span class="n">g</span><span class="p">])</span>
+</span><span id="L-248"><a href="#L-248"><span class="linenos">248</span></a>
+</span><span id="L-249"><a href="#L-249"><span class="linenos">249</span></a> <span class="k">return</span> <span class="mi">2</span> <span class="o">*</span> <span class="n">overlap_len</span> <span class="o">/</span> <span class="n">total_grams</span>
+</span><span id="L-250"><a href="#L-250"><span class="linenos">250</span></a>
+</span><span id="L-251"><a href="#L-251"><span class="linenos">251</span></a> <span class="k">def</span> <span class="nf">_bigram_histo</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">expression</span><span class="p">:</span> <span class="n">exp</span><span class="o">.</span><span class="n">Expression</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">t</span><span class="o">.</span><span class="n">DefaultDict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">int</span><span class="p">]:</span>
+</span><span id="L-252"><a href="#L-252"><span class="linenos">252</span></a> <span class="k">if</span> <span class="nb">id</span><span class="p">(</span><span class="n">expression</span><span class="p">)</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_bigram_histo_cache</span><span class="p">:</span>
+</span><span id="L-253"><a href="#L-253"><span class="linenos">253</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_bigram_histo_cache</span><span class="p">[</span><span class="nb">id</span><span class="p">(</span><span class="n">expression</span><span class="p">)]</span>
+</span><span id="L-254"><a href="#L-254"><span class="linenos">254</span></a>
+</span><span id="L-255"><a href="#L-255"><span class="linenos">255</span></a> <span class="n">expression_str</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_sql_generator</span><span class="o">.</span><span class="n">generate</span><span class="p">(</span><span class="n">expression</span><span class="p">)</span>
+</span><span id="L-256"><a href="#L-256"><span class="linenos">256</span></a> <span class="n">count</span> <span class="o">=</span> <span class="nb">max</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">expression_str</span><span class="p">)</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span>
+</span><span id="L-257"><a href="#L-257"><span class="linenos">257</span></a> <span class="n">bigram_histo</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">DefaultDict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="n">defaultdict</span><span class="p">(</span><span class="nb">int</span><span class="p">)</span>
+</span><span id="L-258"><a href="#L-258"><span class="linenos">258</span></a> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">count</span><span class="p">):</span>
+</span><span id="L-259"><a href="#L-259"><span class="linenos">259</span></a> <span class="n">bigram_histo</span><span class="p">[</span><span class="n">expression_str</span><span class="p">[</span><span class="n">i</span> <span class="p">:</span> <span class="n">i</span> <span class="o">+</span> <span class="mi">2</span><span class="p">]]</span> <span class="o">+=</span> <span class="mi">1</span>
+</span><span id="L-260"><a href="#L-260"><span class="linenos">260</span></a>
+</span><span id="L-261"><a href="#L-261"><span class="linenos">261</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_bigram_histo_cache</span><span class="p">[</span><span class="nb">id</span><span class="p">(</span><span class="n">expression</span><span class="p">)]</span> <span class="o">=</span> <span class="n">bigram_histo</span>
+</span><span id="L-262"><a href="#L-262"><span class="linenos">262</span></a> <span class="k">return</span> <span class="n">bigram_histo</span>
+</span><span id="L-263"><a href="#L-263"><span class="linenos">263</span></a>
+</span><span id="L-264"><a href="#L-264"><span class="linenos">264</span></a>
+</span><span id="L-265"><a href="#L-265"><span class="linenos">265</span></a><span class="k">def</span> <span class="nf">_get_leaves</span><span class="p">(</span><span class="n">expression</span><span class="p">:</span> <span class="n">exp</span><span class="o">.</span><span class="n">Expression</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">t</span><span class="o">.</span><span class="n">Iterator</span><span class="p">[</span><span class="n">exp</span><span class="o">.</span><span class="n">Expression</span><span class="p">]:</span>
+</span><span id="L-266"><a href="#L-266"><span class="linenos">266</span></a> <span class="n">has_child_exprs</span> <span class="o">=</span> <span class="kc">False</span>
+</span><span id="L-267"><a href="#L-267"><span class="linenos">267</span></a>
+</span><span id="L-268"><a href="#L-268"><span class="linenos">268</span></a> <span class="k">for</span> <span class="n">a</span> <span class="ow">in</span> <span class="n">expression</span><span class="o">.</span><span class="n">args</span><span class="o">.</span><span class="n">values</span><span class="p">():</span>
+</span><span id="L-269"><a href="#L-269"><span class="linenos">269</span></a> <span class="k">for</span> <span class="n">node</span> <span class="ow">in</span> <span class="n">ensure_collection</span><span class="p">(</span><span class="n">a</span><span class="p">):</span>
+</span><span id="L-270"><a href="#L-270"><span class="linenos">270</span></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">node</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Expression</span><span class="p">):</span>
+</span><span id="L-271"><a href="#L-271"><span class="linenos">271</span></a> <span class="n">has_child_exprs</span> <span class="o">=</span> <span class="kc">True</span>
+</span><span id="L-272"><a href="#L-272"><span class="linenos">272</span></a> <span class="k">yield from</span> <span class="n">_get_leaves</span><span class="p">(</span><span class="n">node</span><span class="p">)</span>
+</span><span id="L-273"><a href="#L-273"><span class="linenos">273</span></a>
+</span><span id="L-274"><a href="#L-274"><span class="linenos">274</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">has_child_exprs</span><span class="p">:</span>
+</span><span id="L-275"><a href="#L-275"><span class="linenos">275</span></a> <span class="k">yield</span> <span class="n">expression</span>
+</span><span id="L-276"><a href="#L-276"><span class="linenos">276</span></a>
+</span><span id="L-277"><a href="#L-277"><span class="linenos">277</span></a>
+</span><span id="L-278"><a href="#L-278"><span class="linenos">278</span></a><span class="k">def</span> <span class="nf">_is_same_type</span><span class="p">(</span><span class="n">source</span><span class="p">:</span> <span class="n">exp</span><span class="o">.</span><span class="n">Expression</span><span class="p">,</span> <span class="n">target</span><span class="p">:</span> <span class="n">exp</span><span class="o">.</span><span class="n">Expression</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">bool</span><span class="p">:</span>
+</span><span id="L-279"><a href="#L-279"><span class="linenos">279</span></a> <span class="k">if</span> <span class="nb">type</span><span class="p">(</span><span class="n">source</span><span class="p">)</span> <span class="ow">is</span> <span class="nb">type</span><span class="p">(</span><span class="n">target</span><span class="p">):</span>
+</span><span id="L-280"><a href="#L-280"><span class="linenos">280</span></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">source</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Join</span><span class="p">):</span>
+</span><span id="L-281"><a href="#L-281"><span class="linenos">281</span></a> <span class="k">return</span> <span class="n">source</span><span class="o">.</span><span class="n">args</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;side&quot;</span><span class="p">)</span> <span class="o">==</span> <span class="n">target</span><span class="o">.</span><span class="n">args</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;side&quot;</span><span class="p">)</span>
+</span><span id="L-282"><a href="#L-282"><span class="linenos">282</span></a>
+</span><span id="L-283"><a href="#L-283"><span class="linenos">283</span></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">source</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Anonymous</span><span class="p">):</span>
+</span><span id="L-284"><a href="#L-284"><span class="linenos">284</span></a> <span class="k">return</span> <span class="n">source</span><span class="o">.</span><span class="n">this</span> <span class="o">==</span> <span class="n">target</span><span class="o">.</span><span class="n">this</span>
+</span><span id="L-285"><a href="#L-285"><span class="linenos">285</span></a>
+</span><span id="L-286"><a href="#L-286"><span class="linenos">286</span></a> <span class="k">return</span> <span class="kc">True</span>
+</span><span id="L-287"><a href="#L-287"><span class="linenos">287</span></a>
+</span><span id="L-288"><a href="#L-288"><span class="linenos">288</span></a> <span class="k">return</span> <span class="kc">False</span>
+</span><span id="L-289"><a href="#L-289"><span class="linenos">289</span></a>
+</span><span id="L-290"><a href="#L-290"><span class="linenos">290</span></a>
+</span><span id="L-291"><a href="#L-291"><span class="linenos">291</span></a><span class="k">def</span> <span class="nf">_expression_only_args</span><span class="p">(</span><span class="n">expression</span><span class="p">:</span> <span class="n">exp</span><span class="o">.</span><span class="n">Expression</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="n">exp</span><span class="o">.</span><span class="n">Expression</span><span class="p">]:</span>
+</span><span id="L-292"><a href="#L-292"><span class="linenos">292</span></a> <span class="n">args</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="n">exp</span><span class="o">.</span><span class="n">Expression</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">]]</span> <span class="o">=</span> <span class="p">[]</span>
+</span><span id="L-293"><a href="#L-293"><span class="linenos">293</span></a> <span class="k">if</span> <span class="n">expression</span><span class="p">:</span>
+</span><span id="L-294"><a href="#L-294"><span class="linenos">294</span></a> <span class="k">for</span> <span class="n">a</span> <span class="ow">in</span> <span class="n">expression</span><span class="o">.</span><span class="n">args</span><span class="o">.</span><span class="n">values</span><span class="p">():</span>
+</span><span id="L-295"><a href="#L-295"><span class="linenos">295</span></a> <span class="n">args</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="n">ensure_collection</span><span class="p">(</span><span class="n">a</span><span class="p">))</span>
+</span><span id="L-296"><a href="#L-296"><span class="linenos">296</span></a> <span class="k">return</span> <span class="p">[</span><span class="n">a</span> <span class="k">for</span> <span class="n">a</span> <span class="ow">in</span> <span class="n">args</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Expression</span><span class="p">)]</span>
+</span><span id="L-297"><a href="#L-297"><span class="linenos">297</span></a>
+</span><span id="L-298"><a href="#L-298"><span class="linenos">298</span></a>
+</span><span id="L-299"><a href="#L-299"><span class="linenos">299</span></a><span class="k">def</span> <span class="nf">_lcs</span><span class="p">(</span>
+</span><span id="L-300"><a href="#L-300"><span class="linenos">300</span></a> <span class="n">seq_a</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Sequence</span><span class="p">[</span><span class="n">T</span><span class="p">],</span> <span class="n">seq_b</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Sequence</span><span class="p">[</span><span class="n">T</span><span class="p">],</span> <span class="n">equal</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Callable</span><span class="p">[[</span><span class="n">T</span><span class="p">,</span> <span class="n">T</span><span class="p">],</span> <span class="nb">bool</span><span class="p">]</span>
+</span><span id="L-301"><a href="#L-301"><span class="linenos">301</span></a><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">t</span><span class="o">.</span><span class="n">Sequence</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">T</span><span class="p">]]:</span>
+</span><span id="L-302"><a href="#L-302"><span class="linenos">302</span></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Calculates the longest common subsequence&quot;&quot;&quot;</span>
+</span><span id="L-303"><a href="#L-303"><span class="linenos">303</span></a>
+</span><span id="L-304"><a href="#L-304"><span class="linenos">304</span></a> <span class="n">len_a</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">seq_a</span><span class="p">)</span>
+</span><span id="L-305"><a href="#L-305"><span class="linenos">305</span></a> <span class="n">len_b</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">seq_b</span><span class="p">)</span>
+</span><span id="L-306"><a href="#L-306"><span class="linenos">306</span></a> <span class="n">lcs_result</span> <span class="o">=</span> <span class="p">[[</span><span class="kc">None</span><span class="p">]</span> <span class="o">*</span> <span class="p">(</span><span class="n">len_b</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">len_a</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)]</span>
+</span><span id="L-307"><a href="#L-307"><span class="linenos">307</span></a>
+</span><span id="L-308"><a href="#L-308"><span class="linenos">308</span></a> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">len_a</span> <span class="o">+</span> <span class="mi">1</span><span class="p">):</span>
+</span><span id="L-309"><a href="#L-309"><span class="linenos">309</span></a> <span class="k">for</span> <span class="n">j</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">len_b</span> <span class="o">+</span> <span class="mi">1</span><span class="p">):</span>
+</span><span id="L-310"><a href="#L-310"><span class="linenos">310</span></a> <span class="k">if</span> <span class="n">i</span> <span class="o">==</span> <span class="mi">0</span> <span class="ow">or</span> <span class="n">j</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
+</span><span id="L-311"><a href="#L-311"><span class="linenos">311</span></a> <span class="n">lcs_result</span><span class="p">[</span><span class="n">i</span><span class="p">][</span><span class="n">j</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span> <span class="c1"># type: ignore</span>
+</span><span id="L-312"><a href="#L-312"><span class="linenos">312</span></a> <span class="k">elif</span> <span class="n">equal</span><span class="p">(</span><span class="n">seq_a</span><span class="p">[</span><span class="n">i</span> <span class="o">-</span> <span class="mi">1</span><span class="p">],</span> <span class="n">seq_b</span><span class="p">[</span><span class="n">j</span> <span class="o">-</span> <span class="mi">1</span><span class="p">]):</span>
+</span><span id="L-313"><a href="#L-313"><span class="linenos">313</span></a> <span class="n">lcs_result</span><span class="p">[</span><span class="n">i</span><span class="p">][</span><span class="n">j</span><span class="p">]</span> <span class="o">=</span> <span class="n">lcs_result</span><span class="p">[</span><span class="n">i</span> <span class="o">-</span> <span class="mi">1</span><span class="p">][</span><span class="n">j</span> <span class="o">-</span> <span class="mi">1</span><span class="p">]</span> <span class="o">+</span> <span class="p">[</span><span class="n">seq_a</span><span class="p">[</span><span class="n">i</span> <span class="o">-</span> <span class="mi">1</span><span class="p">]]</span> <span class="c1"># type: ignore</span>
+</span><span id="L-314"><a href="#L-314"><span class="linenos">314</span></a> <span class="k">else</span><span class="p">:</span>
+</span><span id="L-315"><a href="#L-315"><span class="linenos">315</span></a> <span class="n">lcs_result</span><span class="p">[</span><span class="n">i</span><span class="p">][</span><span class="n">j</span><span class="p">]</span> <span class="o">=</span> <span class="p">(</span>
+</span><span id="L-316"><a href="#L-316"><span class="linenos">316</span></a> <span class="n">lcs_result</span><span class="p">[</span><span class="n">i</span> <span class="o">-</span> <span class="mi">1</span><span class="p">][</span><span class="n">j</span><span class="p">]</span>
+</span><span id="L-317"><a href="#L-317"><span class="linenos">317</span></a> <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">lcs_result</span><span class="p">[</span><span class="n">i</span> <span class="o">-</span> <span class="mi">1</span><span class="p">][</span><span class="n">j</span><span class="p">])</span> <span class="o">&gt;</span> <span class="nb">len</span><span class="p">(</span><span class="n">lcs_result</span><span class="p">[</span><span class="n">i</span><span class="p">][</span><span class="n">j</span> <span class="o">-</span> <span class="mi">1</span><span class="p">])</span> <span class="c1"># type: ignore</span>
+</span><span id="L-318"><a href="#L-318"><span class="linenos">318</span></a> <span class="k">else</span> <span class="n">lcs_result</span><span class="p">[</span><span class="n">i</span><span class="p">][</span><span class="n">j</span> <span class="o">-</span> <span class="mi">1</span><span class="p">]</span>
+</span><span id="L-319"><a href="#L-319"><span class="linenos">319</span></a> <span class="p">)</span>
+</span><span id="L-320"><a href="#L-320"><span class="linenos">320</span></a>
+</span><span id="L-321"><a href="#L-321"><span class="linenos">321</span></a> <span class="k">return</span> <span class="n">lcs_result</span><span class="p">[</span><span class="n">len_a</span><span class="p">][</span><span class="n">len_b</span><span class="p">]</span> <span class="c1"># type: ignore</span>
+</span></pre></div>
+
+
+ </section>
+ <section id="Insert">
+ <input id="Insert-view-source" class="view-source-toggle-state" type="checkbox" aria-hidden="true" tabindex="-1">
+<div class="attr class">
+ <div class="decorator">@dataclass(frozen=True)</div>
+
+ <span class="def">class</span>
+ <span class="name">Insert</span>:
+
+ <label class="view-source-button" for="Insert-view-source"><span>View Source</span></label>
+
+ </div>
+ <a class="headerlink" href="#Insert"></a>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="Insert-20"><a href="#Insert-20"><span class="linenos">20</span></a><span class="nd">@dataclass</span><span class="p">(</span><span class="n">frozen</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
+</span><span id="Insert-21"><a href="#Insert-21"><span class="linenos">21</span></a><span class="k">class</span> <span class="nc">Insert</span><span class="p">:</span>
+</span><span id="Insert-22"><a href="#Insert-22"><span class="linenos">22</span></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Indicates that a new node has been inserted&quot;&quot;&quot;</span>
+</span><span id="Insert-23"><a href="#Insert-23"><span class="linenos">23</span></a>
+</span><span id="Insert-24"><a href="#Insert-24"><span class="linenos">24</span></a> <span class="n">expression</span><span class="p">:</span> <span class="n">exp</span><span class="o">.</span><span class="n">Expression</span>
+</span></pre></div>
+
+
+ <div class="docstring"><p>Indicates that a new node has been inserted</p>
+</div>
+
+
+ <div id="Insert.__init__" class="classattr">
+ <div class="attr function">
+
+ <span class="name">Insert</span><span class="signature pdoc-code condensed">(<span class="param"><span class="n">expression</span><span class="p">:</span> <span class="n"><a href="expressions.html#Expression">sqlglot.expressions.Expression</a></span></span>)</span>
+
+
+ </div>
+ <a class="headerlink" href="#Insert.__init__"></a>
+
+
+
+ </div>
+ </section>
+ <section id="Remove">
+ <input id="Remove-view-source" class="view-source-toggle-state" type="checkbox" aria-hidden="true" tabindex="-1">
+<div class="attr class">
+ <div class="decorator">@dataclass(frozen=True)</div>
+
+ <span class="def">class</span>
+ <span class="name">Remove</span>:
+
+ <label class="view-source-button" for="Remove-view-source"><span>View Source</span></label>
+
+ </div>
+ <a class="headerlink" href="#Remove"></a>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="Remove-27"><a href="#Remove-27"><span class="linenos">27</span></a><span class="nd">@dataclass</span><span class="p">(</span><span class="n">frozen</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
+</span><span id="Remove-28"><a href="#Remove-28"><span class="linenos">28</span></a><span class="k">class</span> <span class="nc">Remove</span><span class="p">:</span>
+</span><span id="Remove-29"><a href="#Remove-29"><span class="linenos">29</span></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Indicates that an existing node has been removed&quot;&quot;&quot;</span>
+</span><span id="Remove-30"><a href="#Remove-30"><span class="linenos">30</span></a>
+</span><span id="Remove-31"><a href="#Remove-31"><span class="linenos">31</span></a> <span class="n">expression</span><span class="p">:</span> <span class="n">exp</span><span class="o">.</span><span class="n">Expression</span>
+</span></pre></div>
+
+
+ <div class="docstring"><p>Indicates that an existing node has been removed</p>
+</div>
+
+
+ <div id="Remove.__init__" class="classattr">
+ <div class="attr function">
+
+ <span class="name">Remove</span><span class="signature pdoc-code condensed">(<span class="param"><span class="n">expression</span><span class="p">:</span> <span class="n"><a href="expressions.html#Expression">sqlglot.expressions.Expression</a></span></span>)</span>
+
+
+ </div>
+ <a class="headerlink" href="#Remove.__init__"></a>
+
+
+
+ </div>
+ </section>
+ <section id="Move">
+ <input id="Move-view-source" class="view-source-toggle-state" type="checkbox" aria-hidden="true" tabindex="-1">
+<div class="attr class">
+ <div class="decorator">@dataclass(frozen=True)</div>
+
+ <span class="def">class</span>
+ <span class="name">Move</span>:
+
+ <label class="view-source-button" for="Move-view-source"><span>View Source</span></label>
+
+ </div>
+ <a class="headerlink" href="#Move"></a>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="Move-34"><a href="#Move-34"><span class="linenos">34</span></a><span class="nd">@dataclass</span><span class="p">(</span><span class="n">frozen</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
+</span><span id="Move-35"><a href="#Move-35"><span class="linenos">35</span></a><span class="k">class</span> <span class="nc">Move</span><span class="p">:</span>
+</span><span id="Move-36"><a href="#Move-36"><span class="linenos">36</span></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Indicates that an existing node&#39;s position within the tree has changed&quot;&quot;&quot;</span>
+</span><span id="Move-37"><a href="#Move-37"><span class="linenos">37</span></a>
+</span><span id="Move-38"><a href="#Move-38"><span class="linenos">38</span></a> <span class="n">expression</span><span class="p">:</span> <span class="n">exp</span><span class="o">.</span><span class="n">Expression</span>
+</span></pre></div>
+
+
+ <div class="docstring"><p>Indicates that an existing node's position within the tree has changed</p>
+</div>
+
+
+ <div id="Move.__init__" class="classattr">
+ <div class="attr function">
+
+ <span class="name">Move</span><span class="signature pdoc-code condensed">(<span class="param"><span class="n">expression</span><span class="p">:</span> <span class="n"><a href="expressions.html#Expression">sqlglot.expressions.Expression</a></span></span>)</span>
+
+
+ </div>
+ <a class="headerlink" href="#Move.__init__"></a>
+
+
+
+ </div>
+ </section>
+ <section id="Update">
+ <input id="Update-view-source" class="view-source-toggle-state" type="checkbox" aria-hidden="true" tabindex="-1">
+<div class="attr class">
+ <div class="decorator">@dataclass(frozen=True)</div>
+
+ <span class="def">class</span>
+ <span class="name">Update</span>:
+
+ <label class="view-source-button" for="Update-view-source"><span>View Source</span></label>
+
+ </div>
+ <a class="headerlink" href="#Update"></a>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="Update-41"><a href="#Update-41"><span class="linenos">41</span></a><span class="nd">@dataclass</span><span class="p">(</span><span class="n">frozen</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
+</span><span id="Update-42"><a href="#Update-42"><span class="linenos">42</span></a><span class="k">class</span> <span class="nc">Update</span><span class="p">:</span>
+</span><span id="Update-43"><a href="#Update-43"><span class="linenos">43</span></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Indicates that an existing node has been updated&quot;&quot;&quot;</span>
+</span><span id="Update-44"><a href="#Update-44"><span class="linenos">44</span></a>
+</span><span id="Update-45"><a href="#Update-45"><span class="linenos">45</span></a> <span class="n">source</span><span class="p">:</span> <span class="n">exp</span><span class="o">.</span><span class="n">Expression</span>
+</span><span id="Update-46"><a href="#Update-46"><span class="linenos">46</span></a> <span class="n">target</span><span class="p">:</span> <span class="n">exp</span><span class="o">.</span><span class="n">Expression</span>
+</span></pre></div>
+
+
+ <div class="docstring"><p>Indicates that an existing node has been updated</p>
+</div>
+
+
+ <div id="Update.__init__" class="classattr">
+ <div class="attr function">
+
+ <span class="name">Update</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="n">source</span><span class="p">:</span> <span class="n"><a href="expressions.html#Expression">sqlglot.expressions.Expression</a></span>,</span><span class="param"> <span class="n">target</span><span class="p">:</span> <span class="n"><a href="expressions.html#Expression">sqlglot.expressions.Expression</a></span></span>)</span>
+
+
+ </div>
+ <a class="headerlink" href="#Update.__init__"></a>
+
+
+
+ </div>
+ </section>
+ <section id="Keep">
+ <input id="Keep-view-source" class="view-source-toggle-state" type="checkbox" aria-hidden="true" tabindex="-1">
+<div class="attr class">
+ <div class="decorator">@dataclass(frozen=True)</div>
+
+ <span class="def">class</span>
+ <span class="name">Keep</span>:
+
+ <label class="view-source-button" for="Keep-view-source"><span>View Source</span></label>
+
+ </div>
+ <a class="headerlink" href="#Keep"></a>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="Keep-49"><a href="#Keep-49"><span class="linenos">49</span></a><span class="nd">@dataclass</span><span class="p">(</span><span class="n">frozen</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
+</span><span id="Keep-50"><a href="#Keep-50"><span class="linenos">50</span></a><span class="k">class</span> <span class="nc">Keep</span><span class="p">:</span>
+</span><span id="Keep-51"><a href="#Keep-51"><span class="linenos">51</span></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Indicates that an existing node hasn&#39;t been changed&quot;&quot;&quot;</span>
+</span><span id="Keep-52"><a href="#Keep-52"><span class="linenos">52</span></a>
+</span><span id="Keep-53"><a href="#Keep-53"><span class="linenos">53</span></a> <span class="n">source</span><span class="p">:</span> <span class="n">exp</span><span class="o">.</span><span class="n">Expression</span>
+</span><span id="Keep-54"><a href="#Keep-54"><span class="linenos">54</span></a> <span class="n">target</span><span class="p">:</span> <span class="n">exp</span><span class="o">.</span><span class="n">Expression</span>
+</span></pre></div>
+
+
+ <div class="docstring"><p>Indicates that an existing node hasn't been changed</p>
+</div>
+
+
+ <div id="Keep.__init__" class="classattr">
+ <div class="attr function">
+
+ <span class="name">Keep</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="n">source</span><span class="p">:</span> <span class="n"><a href="expressions.html#Expression">sqlglot.expressions.Expression</a></span>,</span><span class="param"> <span class="n">target</span><span class="p">:</span> <span class="n"><a href="expressions.html#Expression">sqlglot.expressions.Expression</a></span></span>)</span>
+
+
+ </div>
+ <a class="headerlink" href="#Keep.__init__"></a>
+
+
+
+ </div>
+ </section>
+ <section id="diff">
+ <input id="diff-view-source" class="view-source-toggle-state" type="checkbox" aria-hidden="true" tabindex="-1">
+<div class="attr function">
+
+ <span class="def">def</span>
+ <span class="name">diff</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="n">source</span><span class="p">:</span> <span class="n"><a href="expressions.html#Expression">sqlglot.expressions.Expression</a></span>,</span><span class="param"> <span class="n">target</span><span class="p">:</span> <span class="n"><a href="expressions.html#Expression">sqlglot.expressions.Expression</a></span></span><span class="return-annotation">) -> <span class="n">List</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="n"><a href="#Insert">sqlglot.diff.Insert</a></span><span class="p">,</span> <span class="n"><a href="#Remove">sqlglot.diff.Remove</a></span><span class="p">,</span> <span class="n"><a href="#Move">sqlglot.diff.Move</a></span><span class="p">,</span> <span class="n"><a href="#Update">sqlglot.diff.Update</a></span><span class="p">,</span> <span class="n"><a href="#Keep">sqlglot.diff.Keep</a></span><span class="p">]]</span>:</span></span>
+
+ <label class="view-source-button" for="diff-view-source"><span>View Source</span></label>
+
+ </div>
+ <a class="headerlink" href="#diff"></a>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="diff-62"><a href="#diff-62"><span class="linenos">62</span></a><span class="k">def</span> <span class="nf">diff</span><span class="p">(</span><span class="n">source</span><span class="p">:</span> <span class="n">exp</span><span class="o">.</span><span class="n">Expression</span><span class="p">,</span> <span class="n">target</span><span class="p">:</span> <span class="n">exp</span><span class="o">.</span><span class="n">Expression</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="n">Edit</span><span class="p">]:</span>
+</span><span id="diff-63"><a href="#diff-63"><span class="linenos">63</span></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
+</span><span id="diff-64"><a href="#diff-64"><span class="linenos">64</span></a><span class="sd"> Returns the list of changes between the source and the target expressions.</span>
+</span><span id="diff-65"><a href="#diff-65"><span class="linenos">65</span></a>
+</span><span id="diff-66"><a href="#diff-66"><span class="linenos">66</span></a><span class="sd"> Examples:</span>
+</span><span id="diff-67"><a href="#diff-67"><span class="linenos">67</span></a><span class="sd"> &gt;&gt;&gt; diff(parse_one(&quot;a + b&quot;), parse_one(&quot;a + c&quot;))</span>
+</span><span id="diff-68"><a href="#diff-68"><span class="linenos">68</span></a><span class="sd"> [</span>
+</span><span id="diff-69"><a href="#diff-69"><span class="linenos">69</span></a><span class="sd"> Remove(expression=(COLUMN this: (IDENTIFIER this: b, quoted: False))),</span>
+</span><span id="diff-70"><a href="#diff-70"><span class="linenos">70</span></a><span class="sd"> Insert(expression=(COLUMN this: (IDENTIFIER this: c, quoted: False))),</span>
+</span><span id="diff-71"><a href="#diff-71"><span class="linenos">71</span></a><span class="sd"> Keep(</span>
+</span><span id="diff-72"><a href="#diff-72"><span class="linenos">72</span></a><span class="sd"> source=(ADD this: ...),</span>
+</span><span id="diff-73"><a href="#diff-73"><span class="linenos">73</span></a><span class="sd"> target=(ADD this: ...)</span>
+</span><span id="diff-74"><a href="#diff-74"><span class="linenos">74</span></a><span class="sd"> ),</span>
+</span><span id="diff-75"><a href="#diff-75"><span class="linenos">75</span></a><span class="sd"> Keep(</span>
+</span><span id="diff-76"><a href="#diff-76"><span class="linenos">76</span></a><span class="sd"> source=(COLUMN this: (IDENTIFIER this: a, quoted: False)),</span>
+</span><span id="diff-77"><a href="#diff-77"><span class="linenos">77</span></a><span class="sd"> target=(COLUMN this: (IDENTIFIER this: a, quoted: False))</span>
+</span><span id="diff-78"><a href="#diff-78"><span class="linenos">78</span></a><span class="sd"> ),</span>
+</span><span id="diff-79"><a href="#diff-79"><span class="linenos">79</span></a><span class="sd"> ]</span>
+</span><span id="diff-80"><a href="#diff-80"><span class="linenos">80</span></a>
+</span><span id="diff-81"><a href="#diff-81"><span class="linenos">81</span></a><span class="sd"> Args:</span>
+</span><span id="diff-82"><a href="#diff-82"><span class="linenos">82</span></a><span class="sd"> source: the source expression.</span>
+</span><span id="diff-83"><a href="#diff-83"><span class="linenos">83</span></a><span class="sd"> target: the target expression against which the diff should be calculated.</span>
+</span><span id="diff-84"><a href="#diff-84"><span class="linenos">84</span></a>
+</span><span id="diff-85"><a href="#diff-85"><span class="linenos">85</span></a><span class="sd"> Returns:</span>
+</span><span id="diff-86"><a href="#diff-86"><span class="linenos">86</span></a><span class="sd"> the list of Insert, Remove, Move, Update and Keep objects for each node in the source and the</span>
+</span><span id="diff-87"><a href="#diff-87"><span class="linenos">87</span></a><span class="sd"> target expression trees. This list represents a sequence of steps needed to transform the source</span>
+</span><span id="diff-88"><a href="#diff-88"><span class="linenos">88</span></a><span class="sd"> expression tree into the target one.</span>
+</span><span id="diff-89"><a href="#diff-89"><span class="linenos">89</span></a><span class="sd"> &quot;&quot;&quot;</span>
+</span><span id="diff-90"><a href="#diff-90"><span class="linenos">90</span></a> <span class="k">return</span> <span class="n">ChangeDistiller</span><span class="p">()</span><span class="o">.</span><span class="n">diff</span><span class="p">(</span><span class="n">source</span><span class="o">.</span><span class="n">copy</span><span class="p">(),</span> <span class="n">target</span><span class="o">.</span><span class="n">copy</span><span class="p">())</span>
+</span></pre></div>
+
+
+ <div class="docstring"><p>Returns the list of changes between the source and the target expressions.</p>
+
+<h6 id="examples">Examples:</h6>
+
+<blockquote>
+ <div class="pdoc-code codehilite">
+<pre><span></span><code><span class="gp">&gt;&gt;&gt; </span><span class="n">diff</span><span class="p">(</span><span class="n">parse_one</span><span class="p">(</span><span class="s2">&quot;a + b&quot;</span><span class="p">),</span> <span class="n">parse_one</span><span class="p">(</span><span class="s2">&quot;a + c&quot;</span><span class="p">))</span>
+<span class="go">[</span>
+<span class="go"> Remove(expression=(COLUMN this: (IDENTIFIER this: b, quoted: False))),</span>
+<span class="go"> Insert(expression=(COLUMN this: (IDENTIFIER this: c, quoted: False))),</span>
+<span class="go"> Keep(</span>
+<span class="go"> source=(ADD this: ...),</span>
+<span class="go"> target=(ADD this: ...)</span>
+<span class="go"> ),</span>
+<span class="go"> Keep(</span>
+<span class="go"> source=(COLUMN this: (IDENTIFIER this: a, quoted: False)),</span>
+<span class="go"> target=(COLUMN this: (IDENTIFIER this: a, quoted: False))</span>
+<span class="go"> ),</span>
+<span class="go">]</span>
+</code></pre>
+ </div>
+</blockquote>
+
+<h6 id="arguments">Arguments:</h6>
+
+<ul>
+<li><strong>source:</strong> the source expression.</li>
+<li><strong>target:</strong> the target expression against which the diff should be calculated.</li>
+</ul>
+
+<h6 id="returns">Returns:</h6>
+
+<blockquote>
+ <p>the list of Insert, Remove, Move, Update and Keep objects for each node in the source and the
+ target expression trees. This list represents a sequence of steps needed to transform the source
+ expression tree into the target one.</p>
+</blockquote>
+</div>
+
+
+ </section>
+ <section id="ChangeDistiller">
+ <input id="ChangeDistiller-view-source" class="view-source-toggle-state" type="checkbox" aria-hidden="true" tabindex="-1">
+<div class="attr class">
+
+ <span class="def">class</span>
+ <span class="name">ChangeDistiller</span>:
+
+ <label class="view-source-button" for="ChangeDistiller-view-source"><span>View Source</span></label>
+
+ </div>
+ <a class="headerlink" href="#ChangeDistiller"></a>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="ChangeDistiller-101"><a href="#ChangeDistiller-101"><span class="linenos">101</span></a><span class="k">class</span> <span class="nc">ChangeDistiller</span><span class="p">:</span>
+</span><span id="ChangeDistiller-102"><a href="#ChangeDistiller-102"><span class="linenos">102</span></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
+</span><span id="ChangeDistiller-103"><a href="#ChangeDistiller-103"><span class="linenos">103</span></a><span class="sd"> The implementation of the Change Distiller algorithm described by Beat Fluri and Martin Pinzger in</span>
+</span><span id="ChangeDistiller-104"><a href="#ChangeDistiller-104"><span class="linenos">104</span></a><span class="sd"> their paper https://ieeexplore.ieee.org/document/4339230, which in turn is based on the algorithm by</span>
+</span><span id="ChangeDistiller-105"><a href="#ChangeDistiller-105"><span class="linenos">105</span></a><span class="sd"> Chawathe et al. described in http://ilpubs.stanford.edu:8090/115/1/1995-46.pdf.</span>
+</span><span id="ChangeDistiller-106"><a href="#ChangeDistiller-106"><span class="linenos">106</span></a><span class="sd"> &quot;&quot;&quot;</span>
+</span><span id="ChangeDistiller-107"><a href="#ChangeDistiller-107"><span class="linenos">107</span></a>
+</span><span id="ChangeDistiller-108"><a href="#ChangeDistiller-108"><span class="linenos">108</span></a> <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">f</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.6</span><span class="p">,</span> <span class="n">t</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.6</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
+</span><span id="ChangeDistiller-109"><a href="#ChangeDistiller-109"><span class="linenos">109</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">f</span> <span class="o">=</span> <span class="n">f</span>
+</span><span id="ChangeDistiller-110"><a href="#ChangeDistiller-110"><span class="linenos">110</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">t</span> <span class="o">=</span> <span class="n">t</span>
+</span><span id="ChangeDistiller-111"><a href="#ChangeDistiller-111"><span class="linenos">111</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_sql_generator</span> <span class="o">=</span> <span class="n">Dialect</span><span class="p">()</span><span class="o">.</span><span class="n">generator</span><span class="p">()</span>
+</span><span id="ChangeDistiller-112"><a href="#ChangeDistiller-112"><span class="linenos">112</span></a>
+</span><span id="ChangeDistiller-113"><a href="#ChangeDistiller-113"><span class="linenos">113</span></a> <span class="k">def</span> <span class="nf">diff</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">source</span><span class="p">:</span> <span class="n">exp</span><span class="o">.</span><span class="n">Expression</span><span class="p">,</span> <span class="n">target</span><span class="p">:</span> <span class="n">exp</span><span class="o">.</span><span class="n">Expression</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="n">Edit</span><span class="p">]:</span>
+</span><span id="ChangeDistiller-114"><a href="#ChangeDistiller-114"><span class="linenos">114</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_source</span> <span class="o">=</span> <span class="n">source</span>
+</span><span id="ChangeDistiller-115"><a href="#ChangeDistiller-115"><span class="linenos">115</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_target</span> <span class="o">=</span> <span class="n">target</span>
+</span><span id="ChangeDistiller-116"><a href="#ChangeDistiller-116"><span class="linenos">116</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_source_index</span> <span class="o">=</span> <span class="p">{</span><span class="nb">id</span><span class="p">(</span><span class="n">n</span><span class="p">[</span><span class="mi">0</span><span class="p">]):</span> <span class="n">n</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="k">for</span> <span class="n">n</span> <span class="ow">in</span> <span class="n">source</span><span class="o">.</span><span class="n">bfs</span><span class="p">()}</span>
+</span><span id="ChangeDistiller-117"><a href="#ChangeDistiller-117"><span class="linenos">117</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_target_index</span> <span class="o">=</span> <span class="p">{</span><span class="nb">id</span><span class="p">(</span><span class="n">n</span><span class="p">[</span><span class="mi">0</span><span class="p">]):</span> <span class="n">n</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="k">for</span> <span class="n">n</span> <span class="ow">in</span> <span class="n">target</span><span class="o">.</span><span class="n">bfs</span><span class="p">()}</span>
+</span><span id="ChangeDistiller-118"><a href="#ChangeDistiller-118"><span class="linenos">118</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_unmatched_source_nodes</span> <span class="o">=</span> <span class="nb">set</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_source_index</span><span class="p">)</span>
+</span><span id="ChangeDistiller-119"><a href="#ChangeDistiller-119"><span class="linenos">119</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_unmatched_target_nodes</span> <span class="o">=</span> <span class="nb">set</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_target_index</span><span class="p">)</span>
+</span><span id="ChangeDistiller-120"><a href="#ChangeDistiller-120"><span class="linenos">120</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_bigram_histo_cache</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Dict</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">DefaultDict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">int</span><span class="p">]]</span> <span class="o">=</span> <span class="p">{}</span>
+</span><span id="ChangeDistiller-121"><a href="#ChangeDistiller-121"><span class="linenos">121</span></a>
+</span><span id="ChangeDistiller-122"><a href="#ChangeDistiller-122"><span class="linenos">122</span></a> <span class="n">matching_set</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_compute_matching_set</span><span class="p">()</span>
+</span><span id="ChangeDistiller-123"><a href="#ChangeDistiller-123"><span class="linenos">123</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_generate_edit_script</span><span class="p">(</span><span class="n">matching_set</span><span class="p">)</span>
+</span><span id="ChangeDistiller-124"><a href="#ChangeDistiller-124"><span class="linenos">124</span></a>
+</span><span id="ChangeDistiller-125"><a href="#ChangeDistiller-125"><span class="linenos">125</span></a> <span class="k">def</span> <span class="nf">_generate_edit_script</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">matching_set</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Set</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Tuple</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">int</span><span class="p">]])</span> <span class="o">-&gt;</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="n">Edit</span><span class="p">]:</span>
+</span><span id="ChangeDistiller-126"><a href="#ChangeDistiller-126"><span class="linenos">126</span></a> <span class="n">edit_script</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="n">Edit</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
+</span><span id="ChangeDistiller-127"><a href="#ChangeDistiller-127"><span class="linenos">127</span></a> <span class="k">for</span> <span class="n">removed_node_id</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_unmatched_source_nodes</span><span class="p">:</span>
+</span><span id="ChangeDistiller-128"><a href="#ChangeDistiller-128"><span class="linenos">128</span></a> <span class="n">edit_script</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">Remove</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_source_index</span><span class="p">[</span><span class="n">removed_node_id</span><span class="p">]))</span>
+</span><span id="ChangeDistiller-129"><a href="#ChangeDistiller-129"><span class="linenos">129</span></a> <span class="k">for</span> <span class="n">inserted_node_id</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_unmatched_target_nodes</span><span class="p">:</span>
+</span><span id="ChangeDistiller-130"><a href="#ChangeDistiller-130"><span class="linenos">130</span></a> <span class="n">edit_script</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">Insert</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_target_index</span><span class="p">[</span><span class="n">inserted_node_id</span><span class="p">]))</span>
+</span><span id="ChangeDistiller-131"><a href="#ChangeDistiller-131"><span class="linenos">131</span></a> <span class="k">for</span> <span class="n">kept_source_node_id</span><span class="p">,</span> <span class="n">kept_target_node_id</span> <span class="ow">in</span> <span class="n">matching_set</span><span class="p">:</span>
+</span><span id="ChangeDistiller-132"><a href="#ChangeDistiller-132"><span class="linenos">132</span></a> <span class="n">source_node</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_source_index</span><span class="p">[</span><span class="n">kept_source_node_id</span><span class="p">]</span>
+</span><span id="ChangeDistiller-133"><a href="#ChangeDistiller-133"><span class="linenos">133</span></a> <span class="n">target_node</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_target_index</span><span class="p">[</span><span class="n">kept_target_node_id</span><span class="p">]</span>
+</span><span id="ChangeDistiller-134"><a href="#ChangeDistiller-134"><span class="linenos">134</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">source_node</span><span class="p">,</span> <span class="n">LEAF_EXPRESSION_TYPES</span><span class="p">)</span> <span class="ow">or</span> <span class="n">source_node</span> <span class="o">==</span> <span class="n">target_node</span><span class="p">:</span>
+</span><span id="ChangeDistiller-135"><a href="#ChangeDistiller-135"><span class="linenos">135</span></a> <span class="n">edit_script</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span>
+</span><span id="ChangeDistiller-136"><a href="#ChangeDistiller-136"><span class="linenos">136</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_generate_move_edits</span><span class="p">(</span><span class="n">source_node</span><span class="p">,</span> <span class="n">target_node</span><span class="p">,</span> <span class="n">matching_set</span><span class="p">)</span>
+</span><span id="ChangeDistiller-137"><a href="#ChangeDistiller-137"><span class="linenos">137</span></a> <span class="p">)</span>
+</span><span id="ChangeDistiller-138"><a href="#ChangeDistiller-138"><span class="linenos">138</span></a> <span class="n">edit_script</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">Keep</span><span class="p">(</span><span class="n">source_node</span><span class="p">,</span> <span class="n">target_node</span><span class="p">))</span>
+</span><span id="ChangeDistiller-139"><a href="#ChangeDistiller-139"><span class="linenos">139</span></a> <span class="k">else</span><span class="p">:</span>
+</span><span id="ChangeDistiller-140"><a href="#ChangeDistiller-140"><span class="linenos">140</span></a> <span class="n">edit_script</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">Update</span><span class="p">(</span><span class="n">source_node</span><span class="p">,</span> <span class="n">target_node</span><span class="p">))</span>
+</span><span id="ChangeDistiller-141"><a href="#ChangeDistiller-141"><span class="linenos">141</span></a>
+</span><span id="ChangeDistiller-142"><a href="#ChangeDistiller-142"><span class="linenos">142</span></a> <span class="k">return</span> <span class="n">edit_script</span>
+</span><span id="ChangeDistiller-143"><a href="#ChangeDistiller-143"><span class="linenos">143</span></a>
+</span><span id="ChangeDistiller-144"><a href="#ChangeDistiller-144"><span class="linenos">144</span></a> <span class="k">def</span> <span class="nf">_generate_move_edits</span><span class="p">(</span>
+</span><span id="ChangeDistiller-145"><a href="#ChangeDistiller-145"><span class="linenos">145</span></a> <span class="bp">self</span><span class="p">,</span> <span class="n">source</span><span class="p">:</span> <span class="n">exp</span><span class="o">.</span><span class="n">Expression</span><span class="p">,</span> <span class="n">target</span><span class="p">:</span> <span class="n">exp</span><span class="o">.</span><span class="n">Expression</span><span class="p">,</span> <span class="n">matching_set</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Set</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Tuple</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">int</span><span class="p">]]</span>
+</span><span id="ChangeDistiller-146"><a href="#ChangeDistiller-146"><span class="linenos">146</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="n">Move</span><span class="p">]:</span>
+</span><span id="ChangeDistiller-147"><a href="#ChangeDistiller-147"><span class="linenos">147</span></a> <span class="n">source_args</span> <span class="o">=</span> <span class="p">[</span><span class="nb">id</span><span class="p">(</span><span class="n">e</span><span class="p">)</span> <span class="k">for</span> <span class="n">e</span> <span class="ow">in</span> <span class="n">_expression_only_args</span><span class="p">(</span><span class="n">source</span><span class="p">)]</span>
+</span><span id="ChangeDistiller-148"><a href="#ChangeDistiller-148"><span class="linenos">148</span></a> <span class="n">target_args</span> <span class="o">=</span> <span class="p">[</span><span class="nb">id</span><span class="p">(</span><span class="n">e</span><span class="p">)</span> <span class="k">for</span> <span class="n">e</span> <span class="ow">in</span> <span class="n">_expression_only_args</span><span class="p">(</span><span class="n">target</span><span class="p">)]</span>
+</span><span id="ChangeDistiller-149"><a href="#ChangeDistiller-149"><span class="linenos">149</span></a>
+</span><span id="ChangeDistiller-150"><a href="#ChangeDistiller-150"><span class="linenos">150</span></a> <span class="n">args_lcs</span> <span class="o">=</span> <span class="nb">set</span><span class="p">(</span><span class="n">_lcs</span><span class="p">(</span><span class="n">source_args</span><span class="p">,</span> <span class="n">target_args</span><span class="p">,</span> <span class="k">lambda</span> <span class="n">l</span><span class="p">,</span> <span class="n">r</span><span class="p">:</span> <span class="p">(</span><span class="n">l</span><span class="p">,</span> <span class="n">r</span><span class="p">)</span> <span class="ow">in</span> <span class="n">matching_set</span><span class="p">))</span>
+</span><span id="ChangeDistiller-151"><a href="#ChangeDistiller-151"><span class="linenos">151</span></a>
+</span><span id="ChangeDistiller-152"><a href="#ChangeDistiller-152"><span class="linenos">152</span></a> <span class="n">move_edits</span> <span class="o">=</span> <span class="p">[]</span>
+</span><span id="ChangeDistiller-153"><a href="#ChangeDistiller-153"><span class="linenos">153</span></a> <span class="k">for</span> <span class="n">a</span> <span class="ow">in</span> <span class="n">source_args</span><span class="p">:</span>
+</span><span id="ChangeDistiller-154"><a href="#ChangeDistiller-154"><span class="linenos">154</span></a> <span class="k">if</span> <span class="n">a</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">args_lcs</span> <span class="ow">and</span> <span class="n">a</span> <span class="ow">not</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_unmatched_source_nodes</span><span class="p">:</span>
+</span><span id="ChangeDistiller-155"><a href="#ChangeDistiller-155"><span class="linenos">155</span></a> <span class="n">move_edits</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">Move</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_source_index</span><span class="p">[</span><span class="n">a</span><span class="p">]))</span>
+</span><span id="ChangeDistiller-156"><a href="#ChangeDistiller-156"><span class="linenos">156</span></a>
+</span><span id="ChangeDistiller-157"><a href="#ChangeDistiller-157"><span class="linenos">157</span></a> <span class="k">return</span> <span class="n">move_edits</span>
+</span><span id="ChangeDistiller-158"><a href="#ChangeDistiller-158"><span class="linenos">158</span></a>
+</span><span id="ChangeDistiller-159"><a href="#ChangeDistiller-159"><span class="linenos">159</span></a> <span class="k">def</span> <span class="nf">_compute_matching_set</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">t</span><span class="o">.</span><span class="n">Set</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Tuple</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">int</span><span class="p">]]:</span>
+</span><span id="ChangeDistiller-160"><a href="#ChangeDistiller-160"><span class="linenos">160</span></a> <span class="n">leaves_matching_set</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_compute_leaf_matching_set</span><span class="p">()</span>
+</span><span id="ChangeDistiller-161"><a href="#ChangeDistiller-161"><span class="linenos">161</span></a> <span class="n">matching_set</span> <span class="o">=</span> <span class="n">leaves_matching_set</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
+</span><span id="ChangeDistiller-162"><a href="#ChangeDistiller-162"><span class="linenos">162</span></a>
+</span><span id="ChangeDistiller-163"><a href="#ChangeDistiller-163"><span class="linenos">163</span></a> <span class="n">ordered_unmatched_source_nodes</span> <span class="o">=</span> <span class="p">{</span>
+</span><span id="ChangeDistiller-164"><a href="#ChangeDistiller-164"><span class="linenos">164</span></a> <span class="nb">id</span><span class="p">(</span><span class="n">n</span><span class="p">[</span><span class="mi">0</span><span class="p">]):</span> <span class="kc">None</span> <span class="k">for</span> <span class="n">n</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_source</span><span class="o">.</span><span class="n">bfs</span><span class="p">()</span> <span class="k">if</span> <span class="nb">id</span><span class="p">(</span><span class="n">n</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_unmatched_source_nodes</span>
+</span><span id="ChangeDistiller-165"><a href="#ChangeDistiller-165"><span class="linenos">165</span></a> <span class="p">}</span>
+</span><span id="ChangeDistiller-166"><a href="#ChangeDistiller-166"><span class="linenos">166</span></a> <span class="n">ordered_unmatched_target_nodes</span> <span class="o">=</span> <span class="p">{</span>
+</span><span id="ChangeDistiller-167"><a href="#ChangeDistiller-167"><span class="linenos">167</span></a> <span class="nb">id</span><span class="p">(</span><span class="n">n</span><span class="p">[</span><span class="mi">0</span><span class="p">]):</span> <span class="kc">None</span> <span class="k">for</span> <span class="n">n</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_target</span><span class="o">.</span><span class="n">bfs</span><span class="p">()</span> <span class="k">if</span> <span class="nb">id</span><span class="p">(</span><span class="n">n</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_unmatched_target_nodes</span>
+</span><span id="ChangeDistiller-168"><a href="#ChangeDistiller-168"><span class="linenos">168</span></a> <span class="p">}</span>
+</span><span id="ChangeDistiller-169"><a href="#ChangeDistiller-169"><span class="linenos">169</span></a>
+</span><span id="ChangeDistiller-170"><a href="#ChangeDistiller-170"><span class="linenos">170</span></a> <span class="k">for</span> <span class="n">source_node_id</span> <span class="ow">in</span> <span class="n">ordered_unmatched_source_nodes</span><span class="p">:</span>
+</span><span id="ChangeDistiller-171"><a href="#ChangeDistiller-171"><span class="linenos">171</span></a> <span class="k">for</span> <span class="n">target_node_id</span> <span class="ow">in</span> <span class="n">ordered_unmatched_target_nodes</span><span class="p">:</span>
+</span><span id="ChangeDistiller-172"><a href="#ChangeDistiller-172"><span class="linenos">172</span></a> <span class="n">source_node</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_source_index</span><span class="p">[</span><span class="n">source_node_id</span><span class="p">]</span>
+</span><span id="ChangeDistiller-173"><a href="#ChangeDistiller-173"><span class="linenos">173</span></a> <span class="n">target_node</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_target_index</span><span class="p">[</span><span class="n">target_node_id</span><span class="p">]</span>
+</span><span id="ChangeDistiller-174"><a href="#ChangeDistiller-174"><span class="linenos">174</span></a> <span class="k">if</span> <span class="n">_is_same_type</span><span class="p">(</span><span class="n">source_node</span><span class="p">,</span> <span class="n">target_node</span><span class="p">):</span>
+</span><span id="ChangeDistiller-175"><a href="#ChangeDistiller-175"><span class="linenos">175</span></a> <span class="n">source_leaf_ids</span> <span class="o">=</span> <span class="p">{</span><span class="nb">id</span><span class="p">(</span><span class="n">l</span><span class="p">)</span> <span class="k">for</span> <span class="n">l</span> <span class="ow">in</span> <span class="n">_get_leaves</span><span class="p">(</span><span class="n">source_node</span><span class="p">)}</span>
+</span><span id="ChangeDistiller-176"><a href="#ChangeDistiller-176"><span class="linenos">176</span></a> <span class="n">target_leaf_ids</span> <span class="o">=</span> <span class="p">{</span><span class="nb">id</span><span class="p">(</span><span class="n">l</span><span class="p">)</span> <span class="k">for</span> <span class="n">l</span> <span class="ow">in</span> <span class="n">_get_leaves</span><span class="p">(</span><span class="n">target_node</span><span class="p">)}</span>
+</span><span id="ChangeDistiller-177"><a href="#ChangeDistiller-177"><span class="linenos">177</span></a>
+</span><span id="ChangeDistiller-178"><a href="#ChangeDistiller-178"><span class="linenos">178</span></a> <span class="n">max_leaves_num</span> <span class="o">=</span> <span class="nb">max</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">source_leaf_ids</span><span class="p">),</span> <span class="nb">len</span><span class="p">(</span><span class="n">target_leaf_ids</span><span class="p">))</span>
+</span><span id="ChangeDistiller-179"><a href="#ChangeDistiller-179"><span class="linenos">179</span></a> <span class="k">if</span> <span class="n">max_leaves_num</span><span class="p">:</span>
+</span><span id="ChangeDistiller-180"><a href="#ChangeDistiller-180"><span class="linenos">180</span></a> <span class="n">common_leaves_num</span> <span class="o">=</span> <span class="nb">sum</span><span class="p">(</span>
+</span><span id="ChangeDistiller-181"><a href="#ChangeDistiller-181"><span class="linenos">181</span></a> <span class="mi">1</span> <span class="k">if</span> <span class="n">s</span> <span class="ow">in</span> <span class="n">source_leaf_ids</span> <span class="ow">and</span> <span class="n">t</span> <span class="ow">in</span> <span class="n">target_leaf_ids</span> <span class="k">else</span> <span class="mi">0</span>
+</span><span id="ChangeDistiller-182"><a href="#ChangeDistiller-182"><span class="linenos">182</span></a> <span class="k">for</span> <span class="n">s</span><span class="p">,</span> <span class="n">t</span> <span class="ow">in</span> <span class="n">leaves_matching_set</span>
+</span><span id="ChangeDistiller-183"><a href="#ChangeDistiller-183"><span class="linenos">183</span></a> <span class="p">)</span>
+</span><span id="ChangeDistiller-184"><a href="#ChangeDistiller-184"><span class="linenos">184</span></a> <span class="n">leaf_similarity_score</span> <span class="o">=</span> <span class="n">common_leaves_num</span> <span class="o">/</span> <span class="n">max_leaves_num</span>
+</span><span id="ChangeDistiller-185"><a href="#ChangeDistiller-185"><span class="linenos">185</span></a> <span class="k">else</span><span class="p">:</span>
+</span><span id="ChangeDistiller-186"><a href="#ChangeDistiller-186"><span class="linenos">186</span></a> <span class="n">leaf_similarity_score</span> <span class="o">=</span> <span class="mf">0.0</span>
+</span><span id="ChangeDistiller-187"><a href="#ChangeDistiller-187"><span class="linenos">187</span></a>
+</span><span id="ChangeDistiller-188"><a href="#ChangeDistiller-188"><span class="linenos">188</span></a> <span class="n">adjusted_t</span> <span class="o">=</span> <span class="p">(</span>
+</span><span id="ChangeDistiller-189"><a href="#ChangeDistiller-189"><span class="linenos">189</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">t</span> <span class="k">if</span> <span class="nb">min</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">source_leaf_ids</span><span class="p">),</span> <span class="nb">len</span><span class="p">(</span><span class="n">target_leaf_ids</span><span class="p">))</span> <span class="o">&gt;</span> <span class="mi">4</span> <span class="k">else</span> <span class="mf">0.4</span>
+</span><span id="ChangeDistiller-190"><a href="#ChangeDistiller-190"><span class="linenos">190</span></a> <span class="p">)</span>
+</span><span id="ChangeDistiller-191"><a href="#ChangeDistiller-191"><span class="linenos">191</span></a>
+</span><span id="ChangeDistiller-192"><a href="#ChangeDistiller-192"><span class="linenos">192</span></a> <span class="k">if</span> <span class="n">leaf_similarity_score</span> <span class="o">&gt;=</span> <span class="mf">0.8</span> <span class="ow">or</span> <span class="p">(</span>
+</span><span id="ChangeDistiller-193"><a href="#ChangeDistiller-193"><span class="linenos">193</span></a> <span class="n">leaf_similarity_score</span> <span class="o">&gt;=</span> <span class="n">adjusted_t</span>
+</span><span id="ChangeDistiller-194"><a href="#ChangeDistiller-194"><span class="linenos">194</span></a> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">_dice_coefficient</span><span class="p">(</span><span class="n">source_node</span><span class="p">,</span> <span class="n">target_node</span><span class="p">)</span> <span class="o">&gt;=</span> <span class="bp">self</span><span class="o">.</span><span class="n">f</span>
+</span><span id="ChangeDistiller-195"><a href="#ChangeDistiller-195"><span class="linenos">195</span></a> <span class="p">):</span>
+</span><span id="ChangeDistiller-196"><a href="#ChangeDistiller-196"><span class="linenos">196</span></a> <span class="n">matching_set</span><span class="o">.</span><span class="n">add</span><span class="p">((</span><span class="n">source_node_id</span><span class="p">,</span> <span class="n">target_node_id</span><span class="p">))</span>
+</span><span id="ChangeDistiller-197"><a href="#ChangeDistiller-197"><span class="linenos">197</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_unmatched_source_nodes</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="n">source_node_id</span><span class="p">)</span>
+</span><span id="ChangeDistiller-198"><a href="#ChangeDistiller-198"><span class="linenos">198</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_unmatched_target_nodes</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="n">target_node_id</span><span class="p">)</span>
+</span><span id="ChangeDistiller-199"><a href="#ChangeDistiller-199"><span class="linenos">199</span></a> <span class="n">ordered_unmatched_target_nodes</span><span class="o">.</span><span class="n">pop</span><span class="p">(</span><span class="n">target_node_id</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
+</span><span id="ChangeDistiller-200"><a href="#ChangeDistiller-200"><span class="linenos">200</span></a> <span class="k">break</span>
+</span><span id="ChangeDistiller-201"><a href="#ChangeDistiller-201"><span class="linenos">201</span></a>
+</span><span id="ChangeDistiller-202"><a href="#ChangeDistiller-202"><span class="linenos">202</span></a> <span class="k">return</span> <span class="n">matching_set</span>
+</span><span id="ChangeDistiller-203"><a href="#ChangeDistiller-203"><span class="linenos">203</span></a>
+</span><span id="ChangeDistiller-204"><a href="#ChangeDistiller-204"><span class="linenos">204</span></a> <span class="k">def</span> <span class="nf">_compute_leaf_matching_set</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">t</span><span class="o">.</span><span class="n">Set</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Tuple</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">int</span><span class="p">]]:</span>
+</span><span id="ChangeDistiller-205"><a href="#ChangeDistiller-205"><span class="linenos">205</span></a> <span class="n">candidate_matchings</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Tuple</span><span class="p">[</span><span class="nb">float</span><span class="p">,</span> <span class="nb">int</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Expression</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Expression</span><span class="p">]]</span> <span class="o">=</span> <span class="p">[]</span>
+</span><span id="ChangeDistiller-206"><a href="#ChangeDistiller-206"><span class="linenos">206</span></a> <span class="n">source_leaves</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">_get_leaves</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_source</span><span class="p">))</span>
+</span><span id="ChangeDistiller-207"><a href="#ChangeDistiller-207"><span class="linenos">207</span></a> <span class="n">target_leaves</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">_get_leaves</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_target</span><span class="p">))</span>
+</span><span id="ChangeDistiller-208"><a href="#ChangeDistiller-208"><span class="linenos">208</span></a> <span class="k">for</span> <span class="n">source_leaf</span> <span class="ow">in</span> <span class="n">source_leaves</span><span class="p">:</span>
+</span><span id="ChangeDistiller-209"><a href="#ChangeDistiller-209"><span class="linenos">209</span></a> <span class="k">for</span> <span class="n">target_leaf</span> <span class="ow">in</span> <span class="n">target_leaves</span><span class="p">:</span>
+</span><span id="ChangeDistiller-210"><a href="#ChangeDistiller-210"><span class="linenos">210</span></a> <span class="k">if</span> <span class="n">_is_same_type</span><span class="p">(</span><span class="n">source_leaf</span><span class="p">,</span> <span class="n">target_leaf</span><span class="p">):</span>
+</span><span id="ChangeDistiller-211"><a href="#ChangeDistiller-211"><span class="linenos">211</span></a> <span class="n">similarity_score</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_dice_coefficient</span><span class="p">(</span><span class="n">source_leaf</span><span class="p">,</span> <span class="n">target_leaf</span><span class="p">)</span>
+</span><span id="ChangeDistiller-212"><a href="#ChangeDistiller-212"><span class="linenos">212</span></a> <span class="k">if</span> <span class="n">similarity_score</span> <span class="o">&gt;=</span> <span class="bp">self</span><span class="o">.</span><span class="n">f</span><span class="p">:</span>
+</span><span id="ChangeDistiller-213"><a href="#ChangeDistiller-213"><span class="linenos">213</span></a> <span class="n">heappush</span><span class="p">(</span>
+</span><span id="ChangeDistiller-214"><a href="#ChangeDistiller-214"><span class="linenos">214</span></a> <span class="n">candidate_matchings</span><span class="p">,</span>
+</span><span id="ChangeDistiller-215"><a href="#ChangeDistiller-215"><span class="linenos">215</span></a> <span class="p">(</span>
+</span><span id="ChangeDistiller-216"><a href="#ChangeDistiller-216"><span class="linenos">216</span></a> <span class="o">-</span><span class="n">similarity_score</span><span class="p">,</span>
+</span><span id="ChangeDistiller-217"><a href="#ChangeDistiller-217"><span class="linenos">217</span></a> <span class="nb">len</span><span class="p">(</span><span class="n">candidate_matchings</span><span class="p">),</span>
+</span><span id="ChangeDistiller-218"><a href="#ChangeDistiller-218"><span class="linenos">218</span></a> <span class="n">source_leaf</span><span class="p">,</span>
+</span><span id="ChangeDistiller-219"><a href="#ChangeDistiller-219"><span class="linenos">219</span></a> <span class="n">target_leaf</span><span class="p">,</span>
+</span><span id="ChangeDistiller-220"><a href="#ChangeDistiller-220"><span class="linenos">220</span></a> <span class="p">),</span>
+</span><span id="ChangeDistiller-221"><a href="#ChangeDistiller-221"><span class="linenos">221</span></a> <span class="p">)</span>
+</span><span id="ChangeDistiller-222"><a href="#ChangeDistiller-222"><span class="linenos">222</span></a>
+</span><span id="ChangeDistiller-223"><a href="#ChangeDistiller-223"><span class="linenos">223</span></a> <span class="c1"># Pick best matchings based on the highest score</span>
+</span><span id="ChangeDistiller-224"><a href="#ChangeDistiller-224"><span class="linenos">224</span></a> <span class="n">matching_set</span> <span class="o">=</span> <span class="nb">set</span><span class="p">()</span>
+</span><span id="ChangeDistiller-225"><a href="#ChangeDistiller-225"><span class="linenos">225</span></a> <span class="k">while</span> <span class="n">candidate_matchings</span><span class="p">:</span>
+</span><span id="ChangeDistiller-226"><a href="#ChangeDistiller-226"><span class="linenos">226</span></a> <span class="n">_</span><span class="p">,</span> <span class="n">_</span><span class="p">,</span> <span class="n">source_leaf</span><span class="p">,</span> <span class="n">target_leaf</span> <span class="o">=</span> <span class="n">heappop</span><span class="p">(</span><span class="n">candidate_matchings</span><span class="p">)</span>
+</span><span id="ChangeDistiller-227"><a href="#ChangeDistiller-227"><span class="linenos">227</span></a> <span class="k">if</span> <span class="p">(</span>
+</span><span id="ChangeDistiller-228"><a href="#ChangeDistiller-228"><span class="linenos">228</span></a> <span class="nb">id</span><span class="p">(</span><span class="n">source_leaf</span><span class="p">)</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_unmatched_source_nodes</span>
+</span><span id="ChangeDistiller-229"><a href="#ChangeDistiller-229"><span class="linenos">229</span></a> <span class="ow">and</span> <span class="nb">id</span><span class="p">(</span><span class="n">target_leaf</span><span class="p">)</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_unmatched_target_nodes</span>
+</span><span id="ChangeDistiller-230"><a href="#ChangeDistiller-230"><span class="linenos">230</span></a> <span class="p">):</span>
+</span><span id="ChangeDistiller-231"><a href="#ChangeDistiller-231"><span class="linenos">231</span></a> <span class="n">matching_set</span><span class="o">.</span><span class="n">add</span><span class="p">((</span><span class="nb">id</span><span class="p">(</span><span class="n">source_leaf</span><span class="p">),</span> <span class="nb">id</span><span class="p">(</span><span class="n">target_leaf</span><span class="p">)))</span>
+</span><span id="ChangeDistiller-232"><a href="#ChangeDistiller-232"><span class="linenos">232</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_unmatched_source_nodes</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="nb">id</span><span class="p">(</span><span class="n">source_leaf</span><span class="p">))</span>
+</span><span id="ChangeDistiller-233"><a href="#ChangeDistiller-233"><span class="linenos">233</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_unmatched_target_nodes</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="nb">id</span><span class="p">(</span><span class="n">target_leaf</span><span class="p">))</span>
+</span><span id="ChangeDistiller-234"><a href="#ChangeDistiller-234"><span class="linenos">234</span></a>
+</span><span id="ChangeDistiller-235"><a href="#ChangeDistiller-235"><span class="linenos">235</span></a> <span class="k">return</span> <span class="n">matching_set</span>
+</span><span id="ChangeDistiller-236"><a href="#ChangeDistiller-236"><span class="linenos">236</span></a>
+</span><span id="ChangeDistiller-237"><a href="#ChangeDistiller-237"><span class="linenos">237</span></a> <span class="k">def</span> <span class="nf">_dice_coefficient</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">source</span><span class="p">:</span> <span class="n">exp</span><span class="o">.</span><span class="n">Expression</span><span class="p">,</span> <span class="n">target</span><span class="p">:</span> <span class="n">exp</span><span class="o">.</span><span class="n">Expression</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">float</span><span class="p">:</span>
+</span><span id="ChangeDistiller-238"><a href="#ChangeDistiller-238"><span class="linenos">238</span></a> <span class="n">source_histo</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_bigram_histo</span><span class="p">(</span><span class="n">source</span><span class="p">)</span>
+</span><span id="ChangeDistiller-239"><a href="#ChangeDistiller-239"><span class="linenos">239</span></a> <span class="n">target_histo</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_bigram_histo</span><span class="p">(</span><span class="n">target</span><span class="p">)</span>
+</span><span id="ChangeDistiller-240"><a href="#ChangeDistiller-240"><span class="linenos">240</span></a>
+</span><span id="ChangeDistiller-241"><a href="#ChangeDistiller-241"><span class="linenos">241</span></a> <span class="n">total_grams</span> <span class="o">=</span> <span class="nb">sum</span><span class="p">(</span><span class="n">source_histo</span><span class="o">.</span><span class="n">values</span><span class="p">())</span> <span class="o">+</span> <span class="nb">sum</span><span class="p">(</span><span class="n">target_histo</span><span class="o">.</span><span class="n">values</span><span class="p">())</span>
+</span><span id="ChangeDistiller-242"><a href="#ChangeDistiller-242"><span class="linenos">242</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">total_grams</span><span class="p">:</span>
+</span><span id="ChangeDistiller-243"><a href="#ChangeDistiller-243"><span class="linenos">243</span></a> <span class="k">return</span> <span class="mf">1.0</span> <span class="k">if</span> <span class="n">source</span> <span class="o">==</span> <span class="n">target</span> <span class="k">else</span> <span class="mf">0.0</span>
+</span><span id="ChangeDistiller-244"><a href="#ChangeDistiller-244"><span class="linenos">244</span></a>
+</span><span id="ChangeDistiller-245"><a href="#ChangeDistiller-245"><span class="linenos">245</span></a> <span class="n">overlap_len</span> <span class="o">=</span> <span class="mi">0</span>
+</span><span id="ChangeDistiller-246"><a href="#ChangeDistiller-246"><span class="linenos">246</span></a> <span class="n">overlapping_grams</span> <span class="o">=</span> <span class="nb">set</span><span class="p">(</span><span class="n">source_histo</span><span class="p">)</span> <span class="o">&amp;</span> <span class="nb">set</span><span class="p">(</span><span class="n">target_histo</span><span class="p">)</span>
+</span><span id="ChangeDistiller-247"><a href="#ChangeDistiller-247"><span class="linenos">247</span></a> <span class="k">for</span> <span class="n">g</span> <span class="ow">in</span> <span class="n">overlapping_grams</span><span class="p">:</span>
+</span><span id="ChangeDistiller-248"><a href="#ChangeDistiller-248"><span class="linenos">248</span></a> <span class="n">overlap_len</span> <span class="o">+=</span> <span class="nb">min</span><span class="p">(</span><span class="n">source_histo</span><span class="p">[</span><span class="n">g</span><span class="p">],</span> <span class="n">target_histo</span><span class="p">[</span><span class="n">g</span><span class="p">])</span>
+</span><span id="ChangeDistiller-249"><a href="#ChangeDistiller-249"><span class="linenos">249</span></a>
+</span><span id="ChangeDistiller-250"><a href="#ChangeDistiller-250"><span class="linenos">250</span></a> <span class="k">return</span> <span class="mi">2</span> <span class="o">*</span> <span class="n">overlap_len</span> <span class="o">/</span> <span class="n">total_grams</span>
+</span><span id="ChangeDistiller-251"><a href="#ChangeDistiller-251"><span class="linenos">251</span></a>
+</span><span id="ChangeDistiller-252"><a href="#ChangeDistiller-252"><span class="linenos">252</span></a> <span class="k">def</span> <span class="nf">_bigram_histo</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">expression</span><span class="p">:</span> <span class="n">exp</span><span class="o">.</span><span class="n">Expression</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">t</span><span class="o">.</span><span class="n">DefaultDict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">int</span><span class="p">]:</span>
+</span><span id="ChangeDistiller-253"><a href="#ChangeDistiller-253"><span class="linenos">253</span></a> <span class="k">if</span> <span class="nb">id</span><span class="p">(</span><span class="n">expression</span><span class="p">)</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_bigram_histo_cache</span><span class="p">:</span>
+</span><span id="ChangeDistiller-254"><a href="#ChangeDistiller-254"><span class="linenos">254</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_bigram_histo_cache</span><span class="p">[</span><span class="nb">id</span><span class="p">(</span><span class="n">expression</span><span class="p">)]</span>
+</span><span id="ChangeDistiller-255"><a href="#ChangeDistiller-255"><span class="linenos">255</span></a>
+</span><span id="ChangeDistiller-256"><a href="#ChangeDistiller-256"><span class="linenos">256</span></a> <span class="n">expression_str</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_sql_generator</span><span class="o">.</span><span class="n">generate</span><span class="p">(</span><span class="n">expression</span><span class="p">)</span>
+</span><span id="ChangeDistiller-257"><a href="#ChangeDistiller-257"><span class="linenos">257</span></a> <span class="n">count</span> <span class="o">=</span> <span class="nb">max</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">expression_str</span><span class="p">)</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span>
+</span><span id="ChangeDistiller-258"><a href="#ChangeDistiller-258"><span class="linenos">258</span></a> <span class="n">bigram_histo</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">DefaultDict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="n">defaultdict</span><span class="p">(</span><span class="nb">int</span><span class="p">)</span>
+</span><span id="ChangeDistiller-259"><a href="#ChangeDistiller-259"><span class="linenos">259</span></a> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">count</span><span class="p">):</span>
+</span><span id="ChangeDistiller-260"><a href="#ChangeDistiller-260"><span class="linenos">260</span></a> <span class="n">bigram_histo</span><span class="p">[</span><span class="n">expression_str</span><span class="p">[</span><span class="n">i</span> <span class="p">:</span> <span class="n">i</span> <span class="o">+</span> <span class="mi">2</span><span class="p">]]</span> <span class="o">+=</span> <span class="mi">1</span>
+</span><span id="ChangeDistiller-261"><a href="#ChangeDistiller-261"><span class="linenos">261</span></a>
+</span><span id="ChangeDistiller-262"><a href="#ChangeDistiller-262"><span class="linenos">262</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_bigram_histo_cache</span><span class="p">[</span><span class="nb">id</span><span class="p">(</span><span class="n">expression</span><span class="p">)]</span> <span class="o">=</span> <span class="n">bigram_histo</span>
+</span><span id="ChangeDistiller-263"><a href="#ChangeDistiller-263"><span class="linenos">263</span></a> <span class="k">return</span> <span class="n">bigram_histo</span>
+</span></pre></div>
+
+
+ <div class="docstring"><p>The implementation of the Change Distiller algorithm described by Beat Fluri and Martin Pinzger in
+their paper <a href="https://ieeexplore.ieee.org/document/4339230">https://ieeexplore.ieee.org/document/4339230</a>, which in turn is based on the algorithm by
+Chawathe et al. described in <a href="http://ilpubs.stanford.edu:8090/115/1/1995-46.pdf">http://ilpubs.stanford.edu:8090/115/1/1995-46.pdf</a>.</p>
+</div>
+
+
+ <div id="ChangeDistiller.__init__" class="classattr">
+ <input id="ChangeDistiller.__init__-view-source" class="view-source-toggle-state" type="checkbox" aria-hidden="true" tabindex="-1">
+<div class="attr function">
+
+ <span class="name">ChangeDistiller</span><span class="signature pdoc-code condensed">(<span class="param"><span class="n">f</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.6</span>, </span><span class="param"><span class="n">t</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.6</span></span>)</span>
+
+ <label class="view-source-button" for="ChangeDistiller.__init__-view-source"><span>View Source</span></label>
+
+ </div>
+ <a class="headerlink" href="#ChangeDistiller.__init__"></a>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="ChangeDistiller.__init__-108"><a href="#ChangeDistiller.__init__-108"><span class="linenos">108</span></a> <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">f</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.6</span><span class="p">,</span> <span class="n">t</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.6</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
+</span><span id="ChangeDistiller.__init__-109"><a href="#ChangeDistiller.__init__-109"><span class="linenos">109</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">f</span> <span class="o">=</span> <span class="n">f</span>
+</span><span id="ChangeDistiller.__init__-110"><a href="#ChangeDistiller.__init__-110"><span class="linenos">110</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">t</span> <span class="o">=</span> <span class="n">t</span>
+</span><span id="ChangeDistiller.__init__-111"><a href="#ChangeDistiller.__init__-111"><span class="linenos">111</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_sql_generator</span> <span class="o">=</span> <span class="n">Dialect</span><span class="p">()</span><span class="o">.</span><span class="n">generator</span><span class="p">()</span>
+</span></pre></div>
+
+
+
+
+ </div>
+ <div id="ChangeDistiller.diff" class="classattr">
+ <input id="ChangeDistiller.diff-view-source" class="view-source-toggle-state" type="checkbox" aria-hidden="true" tabindex="-1">
+<div class="attr function">
+
+ <span class="def">def</span>
+ <span class="name">diff</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">self</span>,</span><span class="param"> <span class="n">source</span><span class="p">:</span> <span class="n"><a href="expressions.html#Expression">sqlglot.expressions.Expression</a></span>,</span><span class="param"> <span class="n">target</span><span class="p">:</span> <span class="n"><a href="expressions.html#Expression">sqlglot.expressions.Expression</a></span></span><span class="return-annotation">) -> <span class="n">List</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="n"><a href="#Insert">sqlglot.diff.Insert</a></span><span class="p">,</span> <span class="n"><a href="#Remove">sqlglot.diff.Remove</a></span><span class="p">,</span> <span class="n"><a href="#Move">sqlglot.diff.Move</a></span><span class="p">,</span> <span class="n"><a href="#Update">sqlglot.diff.Update</a></span><span class="p">,</span> <span class="n"><a href="#Keep">sqlglot.diff.Keep</a></span><span class="p">]]</span>:</span></span>
+
+ <label class="view-source-button" for="ChangeDistiller.diff-view-source"><span>View Source</span></label>
+
+ </div>
+ <a class="headerlink" href="#ChangeDistiller.diff"></a>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="ChangeDistiller.diff-113"><a href="#ChangeDistiller.diff-113"><span class="linenos">113</span></a> <span class="k">def</span> <span class="nf">diff</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">source</span><span class="p">:</span> <span class="n">exp</span><span class="o">.</span><span class="n">Expression</span><span class="p">,</span> <span class="n">target</span><span class="p">:</span> <span class="n">exp</span><span class="o">.</span><span class="n">Expression</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="n">Edit</span><span class="p">]:</span>
+</span><span id="ChangeDistiller.diff-114"><a href="#ChangeDistiller.diff-114"><span class="linenos">114</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_source</span> <span class="o">=</span> <span class="n">source</span>
+</span><span id="ChangeDistiller.diff-115"><a href="#ChangeDistiller.diff-115"><span class="linenos">115</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_target</span> <span class="o">=</span> <span class="n">target</span>
+</span><span id="ChangeDistiller.diff-116"><a href="#ChangeDistiller.diff-116"><span class="linenos">116</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_source_index</span> <span class="o">=</span> <span class="p">{</span><span class="nb">id</span><span class="p">(</span><span class="n">n</span><span class="p">[</span><span class="mi">0</span><span class="p">]):</span> <span class="n">n</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="k">for</span> <span class="n">n</span> <span class="ow">in</span> <span class="n">source</span><span class="o">.</span><span class="n">bfs</span><span class="p">()}</span>
+</span><span id="ChangeDistiller.diff-117"><a href="#ChangeDistiller.diff-117"><span class="linenos">117</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_target_index</span> <span class="o">=</span> <span class="p">{</span><span class="nb">id</span><span class="p">(</span><span class="n">n</span><span class="p">[</span><span class="mi">0</span><span class="p">]):</span> <span class="n">n</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="k">for</span> <span class="n">n</span> <span class="ow">in</span> <span class="n">target</span><span class="o">.</span><span class="n">bfs</span><span class="p">()}</span>
+</span><span id="ChangeDistiller.diff-118"><a href="#ChangeDistiller.diff-118"><span class="linenos">118</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_unmatched_source_nodes</span> <span class="o">=</span> <span class="nb">set</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_source_index</span><span class="p">)</span>
+</span><span id="ChangeDistiller.diff-119"><a href="#ChangeDistiller.diff-119"><span class="linenos">119</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_unmatched_target_nodes</span> <span class="o">=</span> <span class="nb">set</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_target_index</span><span class="p">)</span>
+</span><span id="ChangeDistiller.diff-120"><a href="#ChangeDistiller.diff-120"><span class="linenos">120</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_bigram_histo_cache</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Dict</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">DefaultDict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">int</span><span class="p">]]</span> <span class="o">=</span> <span class="p">{}</span>
+</span><span id="ChangeDistiller.diff-121"><a href="#ChangeDistiller.diff-121"><span class="linenos">121</span></a>
+</span><span id="ChangeDistiller.diff-122"><a href="#ChangeDistiller.diff-122"><span class="linenos">122</span></a> <span class="n">matching_set</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_compute_matching_set</span><span class="p">()</span>
+</span><span id="ChangeDistiller.diff-123"><a href="#ChangeDistiller.diff-123"><span class="linenos">123</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_generate_edit_script</span><span class="p">(</span><span class="n">matching_set</span><span class="p">)</span>
+</span></pre></div>
+
+
+
+
+ </div>
+ </section>
+ </main>
+<script>
+ function escapeHTML(html) {
+ return document.createElement('div').appendChild(document.createTextNode(html)).parentNode.innerHTML;
+ }
+
+ const originalContent = document.querySelector("main.pdoc");
+ let currentContent = originalContent;
+
+ function setContent(innerHTML) {
+ let elem;
+ if (innerHTML) {
+ elem = document.createElement("main");
+ elem.classList.add("pdoc");
+ elem.innerHTML = innerHTML;
+ } else {
+ elem = originalContent;
+ }
+ if (currentContent !== elem) {
+ currentContent.replaceWith(elem);
+ currentContent = elem;
+ }
+ }
+
+ function getSearchTerm() {
+ return (new URL(window.location)).searchParams.get("search");
+ }
+
+ const searchBox = document.querySelector(".pdoc input[type=search]");
+ searchBox.addEventListener("input", function () {
+ let url = new URL(window.location);
+ if (searchBox.value.trim()) {
+ url.hash = "";
+ url.searchParams.set("search", searchBox.value);
+ } else {
+ url.searchParams.delete("search");
+ }
+ history.replaceState("", "", url.toString());
+ onInput();
+ });
+ window.addEventListener("popstate", onInput);
+
+
+ let search, searchErr;
+
+ async function initialize() {
+ try {
+ search = await new Promise((resolve, reject) => {
+ const script = document.createElement("script");
+ script.type = "text/javascript";
+ script.async = true;
+ script.onload = () => resolve(window.pdocSearch);
+ script.onerror = (e) => reject(e);
+ script.src = "../search.js";
+ document.getElementsByTagName("head")[0].appendChild(script);
+ });
+ } catch (e) {
+ console.error("Cannot fetch pdoc search index");
+ searchErr = "Cannot fetch search index.";
+ }
+ onInput();
+
+ document.querySelector("nav.pdoc").addEventListener("click", e => {
+ if (e.target.hash) {
+ searchBox.value = "";
+ searchBox.dispatchEvent(new Event("input"));
+ }
+ });
+ }
+
+ function onInput() {
+ setContent((() => {
+ const term = getSearchTerm();
+ if (!term) {
+ return null
+ }
+ if (searchErr) {
+ return `<h3>Error: ${searchErr}</h3>`
+ }
+ if (!search) {
+ return "<h3>Searching...</h3>"
+ }
+
+ window.scrollTo({top: 0, left: 0, behavior: 'auto'});
+
+ const results = search(term);
+
+ let html;
+ if (results.length === 0) {
+ html = `No search results for '${escapeHTML(term)}'.`
+ } else {
+ html = `<h4>${results.length} search result${results.length > 1 ? "s" : ""} for '${escapeHTML(term)}'.</h4>`;
+ }
+ for (let result of results.slice(0, 10)) {
+ let doc = result.doc;
+ let url = `../${doc.modulename.replaceAll(".", "/")}.html`;
+ if (doc.qualname) {
+ url += `#${doc.qualname}`;
+ }
+
+ let heading;
+ switch (result.doc.kind) {
+ case "function":
+ if (doc.fullname.endsWith(".__init__")) {
+ heading = `<span class="name">${doc.fullname.replace(/\.__init__$/, "")}</span>${doc.signature}`;
+ } else {
+ heading = `<span class="def">${doc.funcdef}</span> <span class="name">${doc.fullname}</span>${doc.signature}`;
+ }
+ break;
+ case "class":
+ heading = `<span class="def">class</span> <span class="name">${doc.fullname}</span>`;
+ if (doc.bases)
+ heading += `<wbr>(<span class="base">${doc.bases}</span>)`;
+ heading += `:`;
+ break;
+ case "variable":
+ heading = `<span class="name">${doc.fullname}</span>`;
+ if (doc.annotation)
+ heading += `<span class="annotation">${doc.annotation}</span>`;
+ if (doc.default_value)
+ heading += `<span class="default_value">${doc.default_value}</span>`;
+ break;
+ default:
+ heading = `<span class="name">${doc.fullname}</span>`;
+ break;
+ }
+ html += `
+ <section class="search-result">
+ <a href="${url}" class="attr ${doc.kind}">${heading}</a>
+ <div class="docstring">${doc.doc}</div>
+ </section>
+ `;
+
+ }
+ return html;
+ })());
+ }
+
+ if (getSearchTerm()) {
+ initialize();
+ searchBox.value = getSearchTerm();
+ onInput();
+ } else {
+ searchBox.addEventListener("focus", initialize, {once: true});
+ }
+
+ searchBox.addEventListener("keydown", e => {
+ if (["ArrowDown", "ArrowUp", "Enter"].includes(e.key)) {
+ let focused = currentContent.querySelector(".search-result.focused");
+ if (!focused) {
+ currentContent.querySelector(".search-result").classList.add("focused");
+ } else if (
+ e.key === "ArrowDown"
+ && focused.nextElementSibling
+ && focused.nextElementSibling.classList.contains("search-result")
+ ) {
+ focused.classList.remove("focused");
+ focused.nextElementSibling.classList.add("focused");
+ focused.nextElementSibling.scrollIntoView({
+ behavior: "smooth",
+ block: "nearest",
+ inline: "nearest"
+ });
+ } else if (
+ e.key === "ArrowUp"
+ && focused.previousElementSibling
+ && focused.previousElementSibling.classList.contains("search-result")
+ ) {
+ focused.classList.remove("focused");
+ focused.previousElementSibling.classList.add("focused");
+ focused.previousElementSibling.scrollIntoView({
+ behavior: "smooth",
+ block: "nearest",
+ inline: "nearest"
+ });
+ } else if (
+ e.key === "Enter"
+ ) {
+ focused.querySelector("a").click();
+ }
+ }
+ });
+</script></body>
+</html> \ No newline at end of file