summaryrefslogtreecommitdiffstats
path: root/docs/sqlglot/dataframe/sql.html
diff options
context:
space:
mode:
Diffstat (limited to 'docs/sqlglot/dataframe/sql.html')
-rw-r--r--docs/sqlglot/dataframe/sql.html2182
1 files changed, 1136 insertions, 1046 deletions
diff --git a/docs/sqlglot/dataframe/sql.html b/docs/sqlglot/dataframe/sql.html
index 9fd995a..a9a7f9d 100644
--- a/docs/sqlglot/dataframe/sql.html
+++ b/docs/sqlglot/dataframe/sql.html
@@ -3,7 +3,7 @@
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
- <meta name="generator" content="pdoc 13.1.0"/>
+ <meta name="generator" content="pdoc 13.1.1"/>
<title>sqlglot.dataframe.sql API documentation</title>
<style>/*! * Bootstrap Reboot v5.0.0 (https://getbootstrap.com/) * Copyright 2011-2021 The Bootstrap Authors * Copyright 2011-2021 Twitter, Inc. * Licensed under MIT (https://github.com/twbs/bootstrap/blob/main/LICENSE) * Forked from Normalize.css, licensed MIT (https://github.com/necolas/normalize.css/blob/master/LICENSE.md) */*,::after,::before{box-sizing:border-box}@media (prefers-reduced-motion:no-preference){:root{scroll-behavior:smooth}}body{margin:0;font-family:system-ui,-apple-system,"Segoe UI",Roboto,"Helvetica Neue",Arial,"Noto Sans","Liberation Sans",sans-serif,"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol","Noto Color Emoji";font-size:1rem;font-weight:400;line-height:1.5;color:#212529;background-color:#fff;-webkit-text-size-adjust:100%;-webkit-tap-highlight-color:transparent}hr{margin:1rem 0;color:inherit;background-color:currentColor;border:0;opacity:.25}hr:not([size]){height:1px}h1,h2,h3,h4,h5,h6{margin-top:0;margin-bottom:.5rem;font-weight:500;line-height:1.2}h1{font-size:calc(1.375rem + 1.5vw)}@media (min-width:1200px){h1{font-size:2.5rem}}h2{font-size:calc(1.325rem + .9vw)}@media (min-width:1200px){h2{font-size:2rem}}h3{font-size:calc(1.3rem + .6vw)}@media (min-width:1200px){h3{font-size:1.75rem}}h4{font-size:calc(1.275rem + .3vw)}@media (min-width:1200px){h4{font-size:1.5rem}}h5{font-size:1.25rem}h6{font-size:1rem}p{margin-top:0;margin-bottom:1rem}abbr[data-bs-original-title],abbr[title]{-webkit-text-decoration:underline dotted;text-decoration:underline dotted;cursor:help;-webkit-text-decoration-skip-ink:none;text-decoration-skip-ink:none}address{margin-bottom:1rem;font-style:normal;line-height:inherit}ol,ul{padding-left:2rem}dl,ol,ul{margin-top:0;margin-bottom:1rem}ol ol,ol ul,ul ol,ul ul{margin-bottom:0}dt{font-weight:700}dd{margin-bottom:.5rem;margin-left:0}blockquote{margin:0 0 1rem}b,strong{font-weight:bolder}small{font-size:.875em}mark{padding:.2em;background-color:#fcf8e3}sub,sup{position:relative;font-size:.75em;line-height:0;vertical-align:baseline}sub{bottom:-.25em}sup{top:-.5em}a{color:#0d6efd;text-decoration:underline}a:hover{color:#0a58ca}a:not([href]):not([class]),a:not([href]):not([class]):hover{color:inherit;text-decoration:none}code,kbd,pre,samp{font-family:SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",monospace;font-size:1em;direction:ltr;unicode-bidi:bidi-override}pre{display:block;margin-top:0;margin-bottom:1rem;overflow:auto;font-size:.875em}pre code{font-size:inherit;color:inherit;word-break:normal}code{font-size:.875em;color:#d63384;word-wrap:break-word}a>code{color:inherit}kbd{padding:.2rem .4rem;font-size:.875em;color:#fff;background-color:#212529;border-radius:.2rem}kbd kbd{padding:0;font-size:1em;font-weight:700}figure{margin:0 0 1rem}img,svg{vertical-align:middle}table{caption-side:bottom;border-collapse:collapse}caption{padding-top:.5rem;padding-bottom:.5rem;color:#6c757d;text-align:left}th{text-align:inherit;text-align:-webkit-match-parent}tbody,td,tfoot,th,thead,tr{border-color:inherit;border-style:solid;border-width:0}label{display:inline-block}button{border-radius:0}button:focus:not(:focus-visible){outline:0}button,input,optgroup,select,textarea{margin:0;font-family:inherit;font-size:inherit;line-height:inherit}button,select{text-transform:none}[role=button]{cursor:pointer}select{word-wrap:normal}select:disabled{opacity:1}[list]::-webkit-calendar-picker-indicator{display:none}[type=button],[type=reset],[type=submit],button{-webkit-appearance:button}[type=button]:not(:disabled),[type=reset]:not(:disabled),[type=submit]:not(:disabled),button:not(:disabled){cursor:pointer}::-moz-focus-inner{padding:0;border-style:none}textarea{resize:vertical}fieldset{min-width:0;padding:0;margin:0;border:0}legend{float:left;width:100%;padding:0;margin-bottom:.5rem;font-size:calc(1.275rem + .3vw);line-height:inherit}@media (min-width:1200px){legend{font-size:1.5rem}}legend+*{clear:left}::-webkit-datetime-edit-day-field,::-webkit-datetime-edit-fields-wrapper,::-webkit-datetime-edit-hour-field,::-webkit-datetime-edit-minute,::-webkit-datetime-edit-month-field,::-webkit-datetime-edit-text,::-webkit-datetime-edit-year-field{padding:0}::-webkit-inner-spin-button{height:auto}[type=search]{outline-offset:-2px;-webkit-appearance:textfield}::-webkit-search-decoration{-webkit-appearance:none}::-webkit-color-swatch-wrapper{padding:0}::file-selector-button{font:inherit}::-webkit-file-upload-button{font:inherit;-webkit-appearance:button}output{display:inline-block}iframe{border:0}summary{display:list-item;cursor:pointer}progress{vertical-align:baseline}[hidden]{display:none!important}</style>
@@ -608,7 +608,7 @@
<div class="attr function">
<span class="def">def</span>
- <span class="name">createDataFrame</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">self</span>,</span><span class="param"> <span class="n">data</span><span class="p">:</span> <span class="n">Sequence</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139719846869552&#39;</span><span class="o">&gt;</span><span class="p">],</span> <span class="n">List</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139719846869552&#39;</span><span class="o">&gt;</span><span class="p">],</span> <span class="n">Tuple</span><span class="p">]]</span>,</span><span class="param"> <span class="n">schema</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139719847382416&#39;</span><span class="o">&gt;</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>,</span><span class="param"> <span class="n">samplingRatio</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>,</span><span class="param"> <span class="n">verifySchema</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span></span><span class="return-annotation">) -> <span class="n"><a href="#DataFrame">sqlglot.dataframe.sql.DataFrame</a></span>:</span></span>
+ <span class="name">createDataFrame</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">self</span>,</span><span class="param"> <span class="n">data</span><span class="p">:</span> <span class="n">Sequence</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;140377708850400&#39;</span><span class="o">&gt;</span><span class="p">],</span> <span class="n">List</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;140377708850400&#39;</span><span class="o">&gt;</span><span class="p">],</span> <span class="n">Tuple</span><span class="p">]]</span>,</span><span class="param"> <span class="n">schema</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;140377708766032&#39;</span><span class="o">&gt;</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>,</span><span class="param"> <span class="n">samplingRatio</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>,</span><span class="param"> <span class="n">verifySchema</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span></span><span class="return-annotation">) -> <span class="n"><a href="#DataFrame">sqlglot.dataframe.sql.DataFrame</a></span>:</span></span>
<label class="view-source-button" for="SparkSession.createDataFrame-view-source"><span>View Source</span></label>
@@ -835,9 +835,9 @@
</span><span id="DataFrame-147"><a href="#DataFrame-147"><span class="linenos">147</span></a> <span class="k">def</span> <span class="nf">_ensure_list_of_columns</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">cols</span><span class="p">):</span>
</span><span id="DataFrame-148"><a href="#DataFrame-148"><span class="linenos">148</span></a> <span class="k">return</span> <span class="n">Column</span><span class="o">.</span><span class="n">ensure_cols</span><span class="p">(</span><span class="n">ensure_list</span><span class="p">(</span><span class="n">cols</span><span class="p">))</span>
</span><span id="DataFrame-149"><a href="#DataFrame-149"><span class="linenos">149</span></a>
-</span><span id="DataFrame-150"><a href="#DataFrame-150"><span class="linenos">150</span></a> <span class="k">def</span> <span class="nf">_ensure_and_normalize_cols</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">cols</span><span class="p">):</span>
+</span><span id="DataFrame-150"><a href="#DataFrame-150"><span class="linenos">150</span></a> <span class="k">def</span> <span class="nf">_ensure_and_normalize_cols</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">cols</span><span class="p">,</span> <span class="n">expression</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">exp</span><span class="o">.</span><span class="n">Select</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">):</span>
</span><span id="DataFrame-151"><a href="#DataFrame-151"><span class="linenos">151</span></a> <span class="n">cols</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_list_of_columns</span><span class="p">(</span><span class="n">cols</span><span class="p">)</span>
-</span><span id="DataFrame-152"><a href="#DataFrame-152"><span class="linenos">152</span></a> <span class="n">normalize</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="p">,</span> <span class="n">cols</span><span class="p">)</span>
+</span><span id="DataFrame-152"><a href="#DataFrame-152"><span class="linenos">152</span></a> <span class="n">normalize</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="p">,</span> <span class="n">expression</span> <span class="ow">or</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="p">,</span> <span class="n">cols</span><span class="p">)</span>
</span><span id="DataFrame-153"><a href="#DataFrame-153"><span class="linenos">153</span></a> <span class="k">return</span> <span class="n">cols</span>
</span><span id="DataFrame-154"><a href="#DataFrame-154"><span class="linenos">154</span></a>
</span><span id="DataFrame-155"><a href="#DataFrame-155"><span class="linenos">155</span></a> <span class="k">def</span> <span class="nf">_ensure_and_normalize_col</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">col</span><span class="p">):</span>
@@ -1044,429 +1044,466 @@
</span><span id="DataFrame-356"><a href="#DataFrame-356"><span class="linenos">356</span></a> <span class="n">cols</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">cols</span><span class="p">)</span>
</span><span id="DataFrame-357"><a href="#DataFrame-357"><span class="linenos">357</span></a> <span class="n">kwargs</span><span class="p">[</span><span class="s2">&quot;append&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">kwargs</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;append&quot;</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span>
</span><span id="DataFrame-358"><a href="#DataFrame-358"><span class="linenos">358</span></a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">args</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;joins&quot;</span><span class="p">):</span>
-</span><span id="DataFrame-359"><a href="#DataFrame-359"><span class="linenos">359</span></a> <span class="n">ambiguous_cols</span> <span class="o">=</span> <span class="p">[</span><span class="n">col</span> <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="n">cols</span> <span class="k">if</span> <span class="ow">not</span> <span class="n">col</span><span class="o">.</span><span class="n">column_expression</span><span class="o">.</span><span class="n">table</span><span class="p">]</span>
-</span><span id="DataFrame-360"><a href="#DataFrame-360"><span class="linenos">360</span></a> <span class="k">if</span> <span class="n">ambiguous_cols</span><span class="p">:</span>
-</span><span id="DataFrame-361"><a href="#DataFrame-361"><span class="linenos">361</span></a> <span class="n">join_table_identifiers</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame-362"><a href="#DataFrame-362"><span class="linenos">362</span></a> <span class="n">x</span><span class="o">.</span><span class="n">this</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">get_tables_from_expression_with_join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="p">)</span>
-</span><span id="DataFrame-363"><a href="#DataFrame-363"><span class="linenos">363</span></a> <span class="p">]</span>
-</span><span id="DataFrame-364"><a href="#DataFrame-364"><span class="linenos">364</span></a> <span class="n">cte_names_in_join</span> <span class="o">=</span> <span class="p">[</span><span class="n">x</span><span class="o">.</span><span class="n">this</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">join_table_identifiers</span><span class="p">]</span>
-</span><span id="DataFrame-365"><a href="#DataFrame-365"><span class="linenos">365</span></a> <span class="k">for</span> <span class="n">ambiguous_col</span> <span class="ow">in</span> <span class="n">ambiguous_cols</span><span class="p">:</span>
-</span><span id="DataFrame-366"><a href="#DataFrame-366"><span class="linenos">366</span></a> <span class="n">ctes_with_column</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame-367"><a href="#DataFrame-367"><span class="linenos">367</span></a> <span class="n">cte</span>
-</span><span id="DataFrame-368"><a href="#DataFrame-368"><span class="linenos">368</span></a> <span class="k">for</span> <span class="n">cte</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">ctes</span>
-</span><span id="DataFrame-369"><a href="#DataFrame-369"><span class="linenos">369</span></a> <span class="k">if</span> <span class="n">cte</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="ow">in</span> <span class="n">cte_names_in_join</span>
-</span><span id="DataFrame-370"><a href="#DataFrame-370"><span class="linenos">370</span></a> <span class="ow">and</span> <span class="n">ambiguous_col</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="ow">in</span> <span class="n">cte</span><span class="o">.</span><span class="n">this</span><span class="o">.</span><span class="n">named_selects</span>
-</span><span id="DataFrame-371"><a href="#DataFrame-371"><span class="linenos">371</span></a> <span class="p">]</span>
-</span><span id="DataFrame-372"><a href="#DataFrame-372"><span class="linenos">372</span></a> <span class="c1"># If the select column does not specify a table and there is a join</span>
-</span><span id="DataFrame-373"><a href="#DataFrame-373"><span class="linenos">373</span></a> <span class="c1"># then we assume they are referring to the left table</span>
-</span><span id="DataFrame-374"><a href="#DataFrame-374"><span class="linenos">374</span></a> <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">ctes_with_column</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">1</span><span class="p">:</span>
-</span><span id="DataFrame-375"><a href="#DataFrame-375"><span class="linenos">375</span></a> <span class="n">table_identifier</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">args</span><span class="p">[</span><span class="s2">&quot;from&quot;</span><span class="p">]</span><span class="o">.</span><span class="n">args</span><span class="p">[</span><span class="s2">&quot;expressions&quot;</span><span class="p">][</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">this</span>
-</span><span id="DataFrame-376"><a href="#DataFrame-376"><span class="linenos">376</span></a> <span class="k">else</span><span class="p">:</span>
-</span><span id="DataFrame-377"><a href="#DataFrame-377"><span class="linenos">377</span></a> <span class="n">table_identifier</span> <span class="o">=</span> <span class="n">ctes_with_column</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">args</span><span class="p">[</span><span class="s2">&quot;alias&quot;</span><span class="p">]</span><span class="o">.</span><span class="n">this</span>
-</span><span id="DataFrame-378"><a href="#DataFrame-378"><span class="linenos">378</span></a> <span class="n">ambiguous_col</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">&quot;table&quot;</span><span class="p">,</span> <span class="n">table_identifier</span><span class="p">)</span>
-</span><span id="DataFrame-379"><a href="#DataFrame-379"><span class="linenos">379</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span>
-</span><span id="DataFrame-380"><a href="#DataFrame-380"><span class="linenos">380</span></a> <span class="n">expression</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="p">[</span><span class="n">x</span><span class="o">.</span><span class="n">expression</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">cols</span><span class="p">],</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">),</span> <span class="o">**</span><span class="n">kwargs</span>
-</span><span id="DataFrame-381"><a href="#DataFrame-381"><span class="linenos">381</span></a> <span class="p">)</span>
-</span><span id="DataFrame-382"><a href="#DataFrame-382"><span class="linenos">382</span></a>
-</span><span id="DataFrame-383"><a href="#DataFrame-383"><span class="linenos">383</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">NO_OP</span><span class="p">)</span>
-</span><span id="DataFrame-384"><a href="#DataFrame-384"><span class="linenos">384</span></a> <span class="k">def</span> <span class="nf">alias</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame-385"><a href="#DataFrame-385"><span class="linenos">385</span></a> <span class="n">new_sequence_id</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">_random_sequence_id</span>
-</span><span id="DataFrame-386"><a href="#DataFrame-386"><span class="linenos">386</span></a> <span class="n">df</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
-</span><span id="DataFrame-387"><a href="#DataFrame-387"><span class="linenos">387</span></a> <span class="k">for</span> <span class="n">join_hint</span> <span class="ow">in</span> <span class="n">df</span><span class="o">.</span><span class="n">pending_join_hints</span><span class="p">:</span>
-</span><span id="DataFrame-388"><a href="#DataFrame-388"><span class="linenos">388</span></a> <span class="k">for</span> <span class="n">expression</span> <span class="ow">in</span> <span class="n">join_hint</span><span class="o">.</span><span class="n">expressions</span><span class="p">:</span>
-</span><span id="DataFrame-389"><a href="#DataFrame-389"><span class="linenos">389</span></a> <span class="k">if</span> <span class="n">expression</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">sequence_id</span><span class="p">:</span>
-</span><span id="DataFrame-390"><a href="#DataFrame-390"><span class="linenos">390</span></a> <span class="n">expression</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">&quot;this&quot;</span><span class="p">,</span> <span class="n">Column</span><span class="o">.</span><span class="n">ensure_col</span><span class="p">(</span><span class="n">new_sequence_id</span><span class="p">)</span><span class="o">.</span><span class="n">expression</span><span class="p">)</span>
-</span><span id="DataFrame-391"><a href="#DataFrame-391"><span class="linenos">391</span></a> <span class="n">df</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">_add_alias_to_mapping</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">new_sequence_id</span><span class="p">)</span>
-</span><span id="DataFrame-392"><a href="#DataFrame-392"><span class="linenos">392</span></a> <span class="k">return</span> <span class="n">df</span><span class="o">.</span><span class="n">_convert_leaf_to_cte</span><span class="p">(</span><span class="n">sequence_id</span><span class="o">=</span><span class="n">new_sequence_id</span><span class="p">)</span>
-</span><span id="DataFrame-393"><a href="#DataFrame-393"><span class="linenos">393</span></a>
-</span><span id="DataFrame-394"><a href="#DataFrame-394"><span class="linenos">394</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">WHERE</span><span class="p">)</span>
-</span><span id="DataFrame-395"><a href="#DataFrame-395"><span class="linenos">395</span></a> <span class="k">def</span> <span class="nf">where</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">column</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="n">Column</span><span class="p">,</span> <span class="nb">bool</span><span class="p">],</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame-396"><a href="#DataFrame-396"><span class="linenos">396</span></a> <span class="n">col</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_col</span><span class="p">(</span><span class="n">column</span><span class="p">)</span>
-</span><span id="DataFrame-397"><a href="#DataFrame-397"><span class="linenos">397</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">where</span><span class="p">(</span><span class="n">col</span><span class="o">.</span><span class="n">expression</span><span class="p">))</span>
-</span><span id="DataFrame-398"><a href="#DataFrame-398"><span class="linenos">398</span></a>
-</span><span id="DataFrame-399"><a href="#DataFrame-399"><span class="linenos">399</span></a> <span class="nb">filter</span> <span class="o">=</span> <span class="n">where</span>
-</span><span id="DataFrame-400"><a href="#DataFrame-400"><span class="linenos">400</span></a>
-</span><span id="DataFrame-401"><a href="#DataFrame-401"><span class="linenos">401</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">GROUP_BY</span><span class="p">)</span>
-</span><span id="DataFrame-402"><a href="#DataFrame-402"><span class="linenos">402</span></a> <span class="k">def</span> <span class="nf">groupBy</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">cols</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">GroupedData</span><span class="p">:</span>
-</span><span id="DataFrame-403"><a href="#DataFrame-403"><span class="linenos">403</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">cols</span><span class="p">)</span>
-</span><span id="DataFrame-404"><a href="#DataFrame-404"><span class="linenos">404</span></a> <span class="k">return</span> <span class="n">GroupedData</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">columns</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">last_op</span><span class="p">)</span>
-</span><span id="DataFrame-405"><a href="#DataFrame-405"><span class="linenos">405</span></a>
-</span><span id="DataFrame-406"><a href="#DataFrame-406"><span class="linenos">406</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">SELECT</span><span class="p">)</span>
-</span><span id="DataFrame-407"><a href="#DataFrame-407"><span class="linenos">407</span></a> <span class="k">def</span> <span class="nf">agg</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">exprs</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame-408"><a href="#DataFrame-408"><span class="linenos">408</span></a> <span class="n">cols</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">exprs</span><span class="p">)</span>
-</span><span id="DataFrame-409"><a href="#DataFrame-409"><span class="linenos">409</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">groupBy</span><span class="p">()</span><span class="o">.</span><span class="n">agg</span><span class="p">(</span><span class="o">*</span><span class="n">cols</span><span class="p">)</span>
-</span><span id="DataFrame-410"><a href="#DataFrame-410"><span class="linenos">410</span></a>
-</span><span id="DataFrame-411"><a href="#DataFrame-411"><span class="linenos">411</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
-</span><span id="DataFrame-412"><a href="#DataFrame-412"><span class="linenos">412</span></a> <span class="k">def</span> <span class="nf">join</span><span class="p">(</span>
-</span><span id="DataFrame-413"><a href="#DataFrame-413"><span class="linenos">413</span></a> <span class="bp">self</span><span class="p">,</span>
-</span><span id="DataFrame-414"><a href="#DataFrame-414"><span class="linenos">414</span></a> <span class="n">other_df</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">,</span>
-</span><span id="DataFrame-415"><a href="#DataFrame-415"><span class="linenos">415</span></a> <span class="n">on</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">],</span> <span class="n">Column</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="n">Column</span><span class="p">]],</span>
-</span><span id="DataFrame-416"><a href="#DataFrame-416"><span class="linenos">416</span></a> <span class="n">how</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;inner&quot;</span><span class="p">,</span>
-</span><span id="DataFrame-417"><a href="#DataFrame-417"><span class="linenos">417</span></a> <span class="o">**</span><span class="n">kwargs</span><span class="p">,</span>
-</span><span id="DataFrame-418"><a href="#DataFrame-418"><span class="linenos">418</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame-419"><a href="#DataFrame-419"><span class="linenos">419</span></a> <span class="n">other_df</span> <span class="o">=</span> <span class="n">other_df</span><span class="o">.</span><span class="n">_convert_leaf_to_cte</span><span class="p">()</span>
-</span><span id="DataFrame-420"><a href="#DataFrame-420"><span class="linenos">420</span></a> <span class="n">pre_join_self_latest_cte_name</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">latest_cte_name</span>
-</span><span id="DataFrame-421"><a href="#DataFrame-421"><span class="linenos">421</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">on</span><span class="p">)</span>
-</span><span id="DataFrame-422"><a href="#DataFrame-422"><span class="linenos">422</span></a> <span class="n">join_type</span> <span class="o">=</span> <span class="n">how</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;_&quot;</span><span class="p">,</span> <span class="s2">&quot; &quot;</span><span class="p">)</span>
-</span><span id="DataFrame-423"><a href="#DataFrame-423"><span class="linenos">423</span></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">columns</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">expression</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Column</span><span class="p">):</span>
-</span><span id="DataFrame-424"><a href="#DataFrame-424"><span class="linenos">424</span></a> <span class="n">join_columns</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame-425"><a href="#DataFrame-425"><span class="linenos">425</span></a> <span class="n">Column</span><span class="p">(</span><span class="n">x</span><span class="p">)</span><span class="o">.</span><span class="n">set_table_name</span><span class="p">(</span><span class="n">pre_join_self_latest_cte_name</span><span class="p">)</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">columns</span>
-</span><span id="DataFrame-426"><a href="#DataFrame-426"><span class="linenos">426</span></a> <span class="p">]</span>
-</span><span id="DataFrame-427"><a href="#DataFrame-427"><span class="linenos">427</span></a> <span class="n">join_clause</span> <span class="o">=</span> <span class="n">functools</span><span class="o">.</span><span class="n">reduce</span><span class="p">(</span>
-</span><span id="DataFrame-428"><a href="#DataFrame-428"><span class="linenos">428</span></a> <span class="k">lambda</span> <span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">:</span> <span class="n">x</span> <span class="o">&amp;</span> <span class="n">y</span><span class="p">,</span>
-</span><span id="DataFrame-429"><a href="#DataFrame-429"><span class="linenos">429</span></a> <span class="p">[</span>
-</span><span id="DataFrame-430"><a href="#DataFrame-430"><span class="linenos">430</span></a> <span class="n">col</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span><span class="o">.</span><span class="n">set_table_name</span><span class="p">(</span><span class="n">pre_join_self_latest_cte_name</span><span class="p">)</span>
-</span><span id="DataFrame-431"><a href="#DataFrame-431"><span class="linenos">431</span></a> <span class="o">==</span> <span class="n">col</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span><span class="o">.</span><span class="n">set_table_name</span><span class="p">(</span><span class="n">other_df</span><span class="o">.</span><span class="n">latest_cte_name</span><span class="p">)</span>
-</span><span id="DataFrame-432"><a href="#DataFrame-432"><span class="linenos">432</span></a> <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="n">columns</span>
-</span><span id="DataFrame-433"><a href="#DataFrame-433"><span class="linenos">433</span></a> <span class="p">],</span>
-</span><span id="DataFrame-434"><a href="#DataFrame-434"><span class="linenos">434</span></a> <span class="p">)</span>
-</span><span id="DataFrame-435"><a href="#DataFrame-435"><span class="linenos">435</span></a> <span class="k">else</span><span class="p">:</span>
-</span><span id="DataFrame-436"><a href="#DataFrame-436"><span class="linenos">436</span></a> <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">1</span><span class="p">:</span>
-</span><span id="DataFrame-437"><a href="#DataFrame-437"><span class="linenos">437</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="p">[</span><span class="n">functools</span><span class="o">.</span><span class="n">reduce</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">:</span> <span class="n">x</span> <span class="o">&amp;</span> <span class="n">y</span><span class="p">,</span> <span class="n">columns</span><span class="p">)]</span>
-</span><span id="DataFrame-438"><a href="#DataFrame-438"><span class="linenos">438</span></a> <span class="n">join_clause</span> <span class="o">=</span> <span class="n">columns</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
-</span><span id="DataFrame-439"><a href="#DataFrame-439"><span class="linenos">439</span></a> <span class="n">join_columns</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame-440"><a href="#DataFrame-440"><span class="linenos">440</span></a> <span class="n">Column</span><span class="p">(</span><span class="n">x</span><span class="p">)</span><span class="o">.</span><span class="n">set_table_name</span><span class="p">(</span><span class="n">pre_join_self_latest_cte_name</span><span class="p">)</span>
-</span><span id="DataFrame-441"><a href="#DataFrame-441"><span class="linenos">441</span></a> <span class="k">if</span> <span class="n">i</span> <span class="o">%</span> <span class="mi">2</span> <span class="o">==</span> <span class="mi">0</span>
-</span><span id="DataFrame-442"><a href="#DataFrame-442"><span class="linenos">442</span></a> <span class="k">else</span> <span class="n">Column</span><span class="p">(</span><span class="n">x</span><span class="p">)</span><span class="o">.</span><span class="n">set_table_name</span><span class="p">(</span><span class="n">other_df</span><span class="o">.</span><span class="n">latest_cte_name</span><span class="p">)</span>
-</span><span id="DataFrame-443"><a href="#DataFrame-443"><span class="linenos">443</span></a> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">x</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">join_clause</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">find_all</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Column</span><span class="p">))</span>
-</span><span id="DataFrame-444"><a href="#DataFrame-444"><span class="linenos">444</span></a> <span class="p">]</span>
-</span><span id="DataFrame-445"><a href="#DataFrame-445"><span class="linenos">445</span></a> <span class="n">self_columns</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame-446"><a href="#DataFrame-446"><span class="linenos">446</span></a> <span class="n">column</span><span class="o">.</span><span class="n">set_table_name</span><span class="p">(</span><span class="n">pre_join_self_latest_cte_name</span><span class="p">,</span> <span class="n">copy</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
-</span><span id="DataFrame-447"><a href="#DataFrame-447"><span class="linenos">447</span></a> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_outer_select_columns</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
-</span><span id="DataFrame-448"><a href="#DataFrame-448"><span class="linenos">448</span></a> <span class="p">]</span>
-</span><span id="DataFrame-449"><a href="#DataFrame-449"><span class="linenos">449</span></a> <span class="n">other_columns</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame-450"><a href="#DataFrame-450"><span class="linenos">450</span></a> <span class="n">column</span><span class="o">.</span><span class="n">set_table_name</span><span class="p">(</span><span class="n">other_df</span><span class="o">.</span><span class="n">latest_cte_name</span><span class="p">,</span> <span class="n">copy</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
-</span><span id="DataFrame-451"><a href="#DataFrame-451"><span class="linenos">451</span></a> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_outer_select_columns</span><span class="p">(</span><span class="n">other_df</span><span class="p">)</span>
-</span><span id="DataFrame-452"><a href="#DataFrame-452"><span class="linenos">452</span></a> <span class="p">]</span>
-</span><span id="DataFrame-453"><a href="#DataFrame-453"><span class="linenos">453</span></a> <span class="n">column_value_mapping</span> <span class="o">=</span> <span class="p">{</span>
-</span><span id="DataFrame-454"><a href="#DataFrame-454"><span class="linenos">454</span></a> <span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span>
-</span><span id="DataFrame-455"><a href="#DataFrame-455"><span class="linenos">455</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">column</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">this</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Star</span><span class="p">)</span>
-</span><span id="DataFrame-456"><a href="#DataFrame-456"><span class="linenos">456</span></a> <span class="k">else</span> <span class="n">column</span><span class="o">.</span><span class="n">sql</span><span class="p">():</span> <span class="n">column</span>
-</span><span id="DataFrame-457"><a href="#DataFrame-457"><span class="linenos">457</span></a> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">other_columns</span> <span class="o">+</span> <span class="n">self_columns</span> <span class="o">+</span> <span class="n">join_columns</span>
-</span><span id="DataFrame-458"><a href="#DataFrame-458"><span class="linenos">458</span></a> <span class="p">}</span>
-</span><span id="DataFrame-459"><a href="#DataFrame-459"><span class="linenos">459</span></a> <span class="n">all_columns</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame-460"><a href="#DataFrame-460"><span class="linenos">460</span></a> <span class="n">column_value_mapping</span><span class="p">[</span><span class="n">name</span><span class="p">]</span>
-</span><span id="DataFrame-461"><a href="#DataFrame-461"><span class="linenos">461</span></a> <span class="k">for</span> <span class="n">name</span> <span class="ow">in</span> <span class="p">{</span><span class="n">x</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">:</span> <span class="kc">None</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">join_columns</span> <span class="o">+</span> <span class="n">self_columns</span> <span class="o">+</span> <span class="n">other_columns</span><span class="p">}</span>
-</span><span id="DataFrame-462"><a href="#DataFrame-462"><span class="linenos">462</span></a> <span class="p">]</span>
-</span><span id="DataFrame-463"><a href="#DataFrame-463"><span class="linenos">463</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span>
-</span><span id="DataFrame-464"><a href="#DataFrame-464"><span class="linenos">464</span></a> <span class="n">expression</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">join</span><span class="p">(</span>
-</span><span id="DataFrame-465"><a href="#DataFrame-465"><span class="linenos">465</span></a> <span class="n">other_df</span><span class="o">.</span><span class="n">latest_cte_name</span><span class="p">,</span> <span class="n">on</span><span class="o">=</span><span class="n">join_clause</span><span class="o">.</span><span class="n">expression</span><span class="p">,</span> <span class="n">join_type</span><span class="o">=</span><span class="n">join_type</span>
-</span><span id="DataFrame-466"><a href="#DataFrame-466"><span class="linenos">466</span></a> <span class="p">)</span>
-</span><span id="DataFrame-467"><a href="#DataFrame-467"><span class="linenos">467</span></a> <span class="p">)</span>
-</span><span id="DataFrame-468"><a href="#DataFrame-468"><span class="linenos">468</span></a> <span class="n">new_df</span><span class="o">.</span><span class="n">expression</span> <span class="o">=</span> <span class="n">new_df</span><span class="o">.</span><span class="n">_add_ctes_to_expression</span><span class="p">(</span>
-</span><span id="DataFrame-469"><a href="#DataFrame-469"><span class="linenos">469</span></a> <span class="n">new_df</span><span class="o">.</span><span class="n">expression</span><span class="p">,</span> <span class="n">other_df</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">ctes</span>
-</span><span id="DataFrame-470"><a href="#DataFrame-470"><span class="linenos">470</span></a> <span class="p">)</span>
-</span><span id="DataFrame-471"><a href="#DataFrame-471"><span class="linenos">471</span></a> <span class="n">new_df</span><span class="o">.</span><span class="n">pending_hints</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="n">other_df</span><span class="o">.</span><span class="n">pending_hints</span><span class="p">)</span>
-</span><span id="DataFrame-472"><a href="#DataFrame-472"><span class="linenos">472</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="n">new_df</span><span class="o">.</span><span class="n">select</span><span class="o">.</span><span class="n">__wrapped__</span><span class="p">(</span><span class="n">new_df</span><span class="p">,</span> <span class="o">*</span><span class="n">all_columns</span><span class="p">)</span>
-</span><span id="DataFrame-473"><a href="#DataFrame-473"><span class="linenos">473</span></a> <span class="k">return</span> <span class="n">new_df</span>
-</span><span id="DataFrame-474"><a href="#DataFrame-474"><span class="linenos">474</span></a>
-</span><span id="DataFrame-475"><a href="#DataFrame-475"><span class="linenos">475</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">ORDER_BY</span><span class="p">)</span>
-</span><span id="DataFrame-476"><a href="#DataFrame-476"><span class="linenos">476</span></a> <span class="k">def</span> <span class="nf">orderBy</span><span class="p">(</span>
-</span><span id="DataFrame-477"><a href="#DataFrame-477"><span class="linenos">477</span></a> <span class="bp">self</span><span class="p">,</span>
-</span><span id="DataFrame-478"><a href="#DataFrame-478"><span class="linenos">478</span></a> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Column</span><span class="p">],</span>
-</span><span id="DataFrame-479"><a href="#DataFrame-479"><span class="linenos">479</span></a> <span class="n">ascending</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrame-480"><a href="#DataFrame-480"><span class="linenos">480</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame-481"><a href="#DataFrame-481"><span class="linenos">481</span></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
-</span><span id="DataFrame-482"><a href="#DataFrame-482"><span class="linenos">482</span></a><span class="sd"> This implementation lets any ordered columns take priority over whatever is provided in `ascending`. Spark</span>
-</span><span id="DataFrame-483"><a href="#DataFrame-483"><span class="linenos">483</span></a><span class="sd"> has irregular behavior and can result in runtime errors. Users shouldn&#39;t be mixing the two anyways so this</span>
-</span><span id="DataFrame-484"><a href="#DataFrame-484"><span class="linenos">484</span></a><span class="sd"> is unlikely to come up.</span>
-</span><span id="DataFrame-485"><a href="#DataFrame-485"><span class="linenos">485</span></a><span class="sd"> &quot;&quot;&quot;</span>
-</span><span id="DataFrame-486"><a href="#DataFrame-486"><span class="linenos">486</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">cols</span><span class="p">)</span>
-</span><span id="DataFrame-487"><a href="#DataFrame-487"><span class="linenos">487</span></a> <span class="n">pre_ordered_col_indexes</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame-488"><a href="#DataFrame-488"><span class="linenos">488</span></a> <span class="n">x</span>
-</span><span id="DataFrame-489"><a href="#DataFrame-489"><span class="linenos">489</span></a> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="p">[</span>
-</span><span id="DataFrame-490"><a href="#DataFrame-490"><span class="linenos">490</span></a> <span class="n">i</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">col</span><span class="o">.</span><span class="n">expression</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Ordered</span><span class="p">)</span> <span class="k">else</span> <span class="kc">None</span>
-</span><span id="DataFrame-491"><a href="#DataFrame-491"><span class="linenos">491</span></a> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">col</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span>
-</span><span id="DataFrame-492"><a href="#DataFrame-492"><span class="linenos">492</span></a> <span class="p">]</span>
-</span><span id="DataFrame-493"><a href="#DataFrame-493"><span class="linenos">493</span></a> <span class="k">if</span> <span class="n">x</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
-</span><span id="DataFrame-494"><a href="#DataFrame-494"><span class="linenos">494</span></a> <span class="p">]</span>
-</span><span id="DataFrame-495"><a href="#DataFrame-495"><span class="linenos">495</span></a> <span class="k">if</span> <span class="n">ascending</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
-</span><span id="DataFrame-496"><a href="#DataFrame-496"><span class="linenos">496</span></a> <span class="n">ascending</span> <span class="o">=</span> <span class="p">[</span><span class="kc">True</span><span class="p">]</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span>
-</span><span id="DataFrame-497"><a href="#DataFrame-497"><span class="linenos">497</span></a> <span class="k">elif</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">ascending</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span>
-</span><span id="DataFrame-498"><a href="#DataFrame-498"><span class="linenos">498</span></a> <span class="n">ascending</span> <span class="o">=</span> <span class="p">[</span><span class="n">ascending</span><span class="p">]</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span>
-</span><span id="DataFrame-499"><a href="#DataFrame-499"><span class="linenos">499</span></a> <span class="n">ascending</span> <span class="o">=</span> <span class="p">[</span><span class="nb">bool</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">x</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">ascending</span><span class="p">)]</span>
-</span><span id="DataFrame-500"><a href="#DataFrame-500"><span class="linenos">500</span></a> <span class="k">assert</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span> <span class="o">==</span> <span class="nb">len</span><span class="p">(</span>
-</span><span id="DataFrame-501"><a href="#DataFrame-501"><span class="linenos">501</span></a> <span class="n">ascending</span>
-</span><span id="DataFrame-502"><a href="#DataFrame-502"><span class="linenos">502</span></a> <span class="p">),</span> <span class="s2">&quot;The length of items in ascending must equal the number of columns provided&quot;</span>
-</span><span id="DataFrame-503"><a href="#DataFrame-503"><span class="linenos">503</span></a> <span class="n">col_and_ascending</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="nb">zip</span><span class="p">(</span><span class="n">columns</span><span class="p">,</span> <span class="n">ascending</span><span class="p">))</span>
-</span><span id="DataFrame-504"><a href="#DataFrame-504"><span class="linenos">504</span></a> <span class="n">order_by_columns</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame-505"><a href="#DataFrame-505"><span class="linenos">505</span></a> <span class="n">exp</span><span class="o">.</span><span class="n">Ordered</span><span class="p">(</span><span class="n">this</span><span class="o">=</span><span class="n">col</span><span class="o">.</span><span class="n">expression</span><span class="p">,</span> <span class="n">desc</span><span class="o">=</span><span class="ow">not</span> <span class="n">asc</span><span class="p">)</span>
-</span><span id="DataFrame-506"><a href="#DataFrame-506"><span class="linenos">506</span></a> <span class="k">if</span> <span class="n">i</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">pre_ordered_col_indexes</span>
-</span><span id="DataFrame-507"><a href="#DataFrame-507"><span class="linenos">507</span></a> <span class="k">else</span> <span class="n">columns</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">.</span><span class="n">column_expression</span>
-</span><span id="DataFrame-508"><a href="#DataFrame-508"><span class="linenos">508</span></a> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="p">(</span><span class="n">col</span><span class="p">,</span> <span class="n">asc</span><span class="p">)</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">col_and_ascending</span><span class="p">)</span>
-</span><span id="DataFrame-509"><a href="#DataFrame-509"><span class="linenos">509</span></a> <span class="p">]</span>
-</span><span id="DataFrame-510"><a href="#DataFrame-510"><span class="linenos">510</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">order_by</span><span class="p">(</span><span class="o">*</span><span class="n">order_by_columns</span><span class="p">))</span>
+</span><span id="DataFrame-359"><a href="#DataFrame-359"><span class="linenos">359</span></a> <span class="n">ambiguous_cols</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame-360"><a href="#DataFrame-360"><span class="linenos">360</span></a> <span class="n">col</span>
+</span><span id="DataFrame-361"><a href="#DataFrame-361"><span class="linenos">361</span></a> <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="n">cols</span>
+</span><span id="DataFrame-362"><a href="#DataFrame-362"><span class="linenos">362</span></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">col</span><span class="o">.</span><span class="n">column_expression</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Column</span><span class="p">)</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">col</span><span class="o">.</span><span class="n">column_expression</span><span class="o">.</span><span class="n">table</span>
+</span><span id="DataFrame-363"><a href="#DataFrame-363"><span class="linenos">363</span></a> <span class="p">]</span>
+</span><span id="DataFrame-364"><a href="#DataFrame-364"><span class="linenos">364</span></a> <span class="k">if</span> <span class="n">ambiguous_cols</span><span class="p">:</span>
+</span><span id="DataFrame-365"><a href="#DataFrame-365"><span class="linenos">365</span></a> <span class="n">join_table_identifiers</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame-366"><a href="#DataFrame-366"><span class="linenos">366</span></a> <span class="n">x</span><span class="o">.</span><span class="n">this</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">get_tables_from_expression_with_join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="p">)</span>
+</span><span id="DataFrame-367"><a href="#DataFrame-367"><span class="linenos">367</span></a> <span class="p">]</span>
+</span><span id="DataFrame-368"><a href="#DataFrame-368"><span class="linenos">368</span></a> <span class="n">cte_names_in_join</span> <span class="o">=</span> <span class="p">[</span><span class="n">x</span><span class="o">.</span><span class="n">this</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">join_table_identifiers</span><span class="p">]</span>
+</span><span id="DataFrame-369"><a href="#DataFrame-369"><span class="linenos">369</span></a> <span class="c1"># If we have columns that resolve to multiple CTE expressions then we want to use each CTE left-to-right</span>
+</span><span id="DataFrame-370"><a href="#DataFrame-370"><span class="linenos">370</span></a> <span class="c1"># and therefore we allow multiple columns with the same name in the result. This matches the behavior</span>
+</span><span id="DataFrame-371"><a href="#DataFrame-371"><span class="linenos">371</span></a> <span class="c1"># of Spark.</span>
+</span><span id="DataFrame-372"><a href="#DataFrame-372"><span class="linenos">372</span></a> <span class="n">resolved_column_position</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Dict</span><span class="p">[</span><span class="n">Column</span><span class="p">,</span> <span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span><span class="n">col</span><span class="p">:</span> <span class="o">-</span><span class="mi">1</span> <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="n">ambiguous_cols</span><span class="p">}</span>
+</span><span id="DataFrame-373"><a href="#DataFrame-373"><span class="linenos">373</span></a> <span class="k">for</span> <span class="n">ambiguous_col</span> <span class="ow">in</span> <span class="n">ambiguous_cols</span><span class="p">:</span>
+</span><span id="DataFrame-374"><a href="#DataFrame-374"><span class="linenos">374</span></a> <span class="n">ctes_with_column</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame-375"><a href="#DataFrame-375"><span class="linenos">375</span></a> <span class="n">cte</span>
+</span><span id="DataFrame-376"><a href="#DataFrame-376"><span class="linenos">376</span></a> <span class="k">for</span> <span class="n">cte</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">ctes</span>
+</span><span id="DataFrame-377"><a href="#DataFrame-377"><span class="linenos">377</span></a> <span class="k">if</span> <span class="n">cte</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="ow">in</span> <span class="n">cte_names_in_join</span>
+</span><span id="DataFrame-378"><a href="#DataFrame-378"><span class="linenos">378</span></a> <span class="ow">and</span> <span class="n">ambiguous_col</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="ow">in</span> <span class="n">cte</span><span class="o">.</span><span class="n">this</span><span class="o">.</span><span class="n">named_selects</span>
+</span><span id="DataFrame-379"><a href="#DataFrame-379"><span class="linenos">379</span></a> <span class="p">]</span>
+</span><span id="DataFrame-380"><a href="#DataFrame-380"><span class="linenos">380</span></a> <span class="c1"># Check if there is a CTE with this column that we haven&#39;t used before. If so, use it. Otherwise,</span>
+</span><span id="DataFrame-381"><a href="#DataFrame-381"><span class="linenos">381</span></a> <span class="c1"># use the same CTE we used before</span>
+</span><span id="DataFrame-382"><a href="#DataFrame-382"><span class="linenos">382</span></a> <span class="n">cte</span> <span class="o">=</span> <span class="n">seq_get</span><span class="p">(</span><span class="n">ctes_with_column</span><span class="p">,</span> <span class="n">resolved_column_position</span><span class="p">[</span><span class="n">ambiguous_col</span><span class="p">]</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span>
+</span><span id="DataFrame-383"><a href="#DataFrame-383"><span class="linenos">383</span></a> <span class="k">if</span> <span class="n">cte</span><span class="p">:</span>
+</span><span id="DataFrame-384"><a href="#DataFrame-384"><span class="linenos">384</span></a> <span class="n">resolved_column_position</span><span class="p">[</span><span class="n">ambiguous_col</span><span class="p">]</span> <span class="o">+=</span> <span class="mi">1</span>
+</span><span id="DataFrame-385"><a href="#DataFrame-385"><span class="linenos">385</span></a> <span class="k">else</span><span class="p">:</span>
+</span><span id="DataFrame-386"><a href="#DataFrame-386"><span class="linenos">386</span></a> <span class="n">cte</span> <span class="o">=</span> <span class="n">ctes_with_column</span><span class="p">[</span><span class="n">resolved_column_position</span><span class="p">[</span><span class="n">ambiguous_col</span><span class="p">]]</span>
+</span><span id="DataFrame-387"><a href="#DataFrame-387"><span class="linenos">387</span></a> <span class="n">ambiguous_col</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">&quot;table&quot;</span><span class="p">,</span> <span class="n">cte</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">)</span>
+</span><span id="DataFrame-388"><a href="#DataFrame-388"><span class="linenos">388</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span>
+</span><span id="DataFrame-389"><a href="#DataFrame-389"><span class="linenos">389</span></a> <span class="n">expression</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="p">[</span><span class="n">x</span><span class="o">.</span><span class="n">expression</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">cols</span><span class="p">],</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">),</span> <span class="o">**</span><span class="n">kwargs</span>
+</span><span id="DataFrame-390"><a href="#DataFrame-390"><span class="linenos">390</span></a> <span class="p">)</span>
+</span><span id="DataFrame-391"><a href="#DataFrame-391"><span class="linenos">391</span></a>
+</span><span id="DataFrame-392"><a href="#DataFrame-392"><span class="linenos">392</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">NO_OP</span><span class="p">)</span>
+</span><span id="DataFrame-393"><a href="#DataFrame-393"><span class="linenos">393</span></a> <span class="k">def</span> <span class="nf">alias</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame-394"><a href="#DataFrame-394"><span class="linenos">394</span></a> <span class="n">new_sequence_id</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">_random_sequence_id</span>
+</span><span id="DataFrame-395"><a href="#DataFrame-395"><span class="linenos">395</span></a> <span class="n">df</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
+</span><span id="DataFrame-396"><a href="#DataFrame-396"><span class="linenos">396</span></a> <span class="k">for</span> <span class="n">join_hint</span> <span class="ow">in</span> <span class="n">df</span><span class="o">.</span><span class="n">pending_join_hints</span><span class="p">:</span>
+</span><span id="DataFrame-397"><a href="#DataFrame-397"><span class="linenos">397</span></a> <span class="k">for</span> <span class="n">expression</span> <span class="ow">in</span> <span class="n">join_hint</span><span class="o">.</span><span class="n">expressions</span><span class="p">:</span>
+</span><span id="DataFrame-398"><a href="#DataFrame-398"><span class="linenos">398</span></a> <span class="k">if</span> <span class="n">expression</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">sequence_id</span><span class="p">:</span>
+</span><span id="DataFrame-399"><a href="#DataFrame-399"><span class="linenos">399</span></a> <span class="n">expression</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">&quot;this&quot;</span><span class="p">,</span> <span class="n">Column</span><span class="o">.</span><span class="n">ensure_col</span><span class="p">(</span><span class="n">new_sequence_id</span><span class="p">)</span><span class="o">.</span><span class="n">expression</span><span class="p">)</span>
+</span><span id="DataFrame-400"><a href="#DataFrame-400"><span class="linenos">400</span></a> <span class="n">df</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">_add_alias_to_mapping</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">new_sequence_id</span><span class="p">)</span>
+</span><span id="DataFrame-401"><a href="#DataFrame-401"><span class="linenos">401</span></a> <span class="k">return</span> <span class="n">df</span><span class="o">.</span><span class="n">_convert_leaf_to_cte</span><span class="p">(</span><span class="n">sequence_id</span><span class="o">=</span><span class="n">new_sequence_id</span><span class="p">)</span>
+</span><span id="DataFrame-402"><a href="#DataFrame-402"><span class="linenos">402</span></a>
+</span><span id="DataFrame-403"><a href="#DataFrame-403"><span class="linenos">403</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">WHERE</span><span class="p">)</span>
+</span><span id="DataFrame-404"><a href="#DataFrame-404"><span class="linenos">404</span></a> <span class="k">def</span> <span class="nf">where</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">column</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="n">Column</span><span class="p">,</span> <span class="nb">bool</span><span class="p">],</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame-405"><a href="#DataFrame-405"><span class="linenos">405</span></a> <span class="n">col</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_col</span><span class="p">(</span><span class="n">column</span><span class="p">)</span>
+</span><span id="DataFrame-406"><a href="#DataFrame-406"><span class="linenos">406</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">where</span><span class="p">(</span><span class="n">col</span><span class="o">.</span><span class="n">expression</span><span class="p">))</span>
+</span><span id="DataFrame-407"><a href="#DataFrame-407"><span class="linenos">407</span></a>
+</span><span id="DataFrame-408"><a href="#DataFrame-408"><span class="linenos">408</span></a> <span class="nb">filter</span> <span class="o">=</span> <span class="n">where</span>
+</span><span id="DataFrame-409"><a href="#DataFrame-409"><span class="linenos">409</span></a>
+</span><span id="DataFrame-410"><a href="#DataFrame-410"><span class="linenos">410</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">GROUP_BY</span><span class="p">)</span>
+</span><span id="DataFrame-411"><a href="#DataFrame-411"><span class="linenos">411</span></a> <span class="k">def</span> <span class="nf">groupBy</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">cols</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">GroupedData</span><span class="p">:</span>
+</span><span id="DataFrame-412"><a href="#DataFrame-412"><span class="linenos">412</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">cols</span><span class="p">)</span>
+</span><span id="DataFrame-413"><a href="#DataFrame-413"><span class="linenos">413</span></a> <span class="k">return</span> <span class="n">GroupedData</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">columns</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">last_op</span><span class="p">)</span>
+</span><span id="DataFrame-414"><a href="#DataFrame-414"><span class="linenos">414</span></a>
+</span><span id="DataFrame-415"><a href="#DataFrame-415"><span class="linenos">415</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">SELECT</span><span class="p">)</span>
+</span><span id="DataFrame-416"><a href="#DataFrame-416"><span class="linenos">416</span></a> <span class="k">def</span> <span class="nf">agg</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">exprs</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame-417"><a href="#DataFrame-417"><span class="linenos">417</span></a> <span class="n">cols</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">exprs</span><span class="p">)</span>
+</span><span id="DataFrame-418"><a href="#DataFrame-418"><span class="linenos">418</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">groupBy</span><span class="p">()</span><span class="o">.</span><span class="n">agg</span><span class="p">(</span><span class="o">*</span><span class="n">cols</span><span class="p">)</span>
+</span><span id="DataFrame-419"><a href="#DataFrame-419"><span class="linenos">419</span></a>
+</span><span id="DataFrame-420"><a href="#DataFrame-420"><span class="linenos">420</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
+</span><span id="DataFrame-421"><a href="#DataFrame-421"><span class="linenos">421</span></a> <span class="k">def</span> <span class="nf">join</span><span class="p">(</span>
+</span><span id="DataFrame-422"><a href="#DataFrame-422"><span class="linenos">422</span></a> <span class="bp">self</span><span class="p">,</span>
+</span><span id="DataFrame-423"><a href="#DataFrame-423"><span class="linenos">423</span></a> <span class="n">other_df</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">,</span>
+</span><span id="DataFrame-424"><a href="#DataFrame-424"><span class="linenos">424</span></a> <span class="n">on</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">],</span> <span class="n">Column</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="n">Column</span><span class="p">]],</span>
+</span><span id="DataFrame-425"><a href="#DataFrame-425"><span class="linenos">425</span></a> <span class="n">how</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;inner&quot;</span><span class="p">,</span>
+</span><span id="DataFrame-426"><a href="#DataFrame-426"><span class="linenos">426</span></a> <span class="o">**</span><span class="n">kwargs</span><span class="p">,</span>
+</span><span id="DataFrame-427"><a href="#DataFrame-427"><span class="linenos">427</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame-428"><a href="#DataFrame-428"><span class="linenos">428</span></a> <span class="n">other_df</span> <span class="o">=</span> <span class="n">other_df</span><span class="o">.</span><span class="n">_convert_leaf_to_cte</span><span class="p">()</span>
+</span><span id="DataFrame-429"><a href="#DataFrame-429"><span class="linenos">429</span></a> <span class="n">join_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_list_of_columns</span><span class="p">(</span><span class="n">on</span><span class="p">)</span>
+</span><span id="DataFrame-430"><a href="#DataFrame-430"><span class="linenos">430</span></a> <span class="c1"># We will determine actual &quot;join on&quot; expression later so we don&#39;t provide it at first</span>
+</span><span id="DataFrame-431"><a href="#DataFrame-431"><span class="linenos">431</span></a> <span class="n">join_expression</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">join</span><span class="p">(</span>
+</span><span id="DataFrame-432"><a href="#DataFrame-432"><span class="linenos">432</span></a> <span class="n">other_df</span><span class="o">.</span><span class="n">latest_cte_name</span><span class="p">,</span> <span class="n">join_type</span><span class="o">=</span><span class="n">how</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;_&quot;</span><span class="p">,</span> <span class="s2">&quot; &quot;</span><span class="p">)</span>
+</span><span id="DataFrame-433"><a href="#DataFrame-433"><span class="linenos">433</span></a> <span class="p">)</span>
+</span><span id="DataFrame-434"><a href="#DataFrame-434"><span class="linenos">434</span></a> <span class="n">join_expression</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_add_ctes_to_expression</span><span class="p">(</span><span class="n">join_expression</span><span class="p">,</span> <span class="n">other_df</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">ctes</span><span class="p">)</span>
+</span><span id="DataFrame-435"><a href="#DataFrame-435"><span class="linenos">435</span></a> <span class="n">self_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_outer_select_columns</span><span class="p">(</span><span class="n">join_expression</span><span class="p">)</span>
+</span><span id="DataFrame-436"><a href="#DataFrame-436"><span class="linenos">436</span></a> <span class="n">other_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_outer_select_columns</span><span class="p">(</span><span class="n">other_df</span><span class="p">)</span>
+</span><span id="DataFrame-437"><a href="#DataFrame-437"><span class="linenos">437</span></a> <span class="c1"># Determines the join clause and select columns to be used passed on what type of columns were provided for</span>
+</span><span id="DataFrame-438"><a href="#DataFrame-438"><span class="linenos">438</span></a> <span class="c1"># the join. The columns returned changes based on how the on expression is provided.</span>
+</span><span id="DataFrame-439"><a href="#DataFrame-439"><span class="linenos">439</span></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">join_columns</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">expression</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Column</span><span class="p">):</span>
+</span><span id="DataFrame-440"><a href="#DataFrame-440"><span class="linenos">440</span></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
+</span><span id="DataFrame-441"><a href="#DataFrame-441"><span class="linenos">441</span></a><span class="sd"> Unique characteristics of join on column names only:</span>
+</span><span id="DataFrame-442"><a href="#DataFrame-442"><span class="linenos">442</span></a><span class="sd"> * The column names are put at the front of the select list</span>
+</span><span id="DataFrame-443"><a href="#DataFrame-443"><span class="linenos">443</span></a><span class="sd"> * The column names are deduplicated across the entire select list and only the column names (other dups are allowed)</span>
+</span><span id="DataFrame-444"><a href="#DataFrame-444"><span class="linenos">444</span></a><span class="sd"> &quot;&quot;&quot;</span>
+</span><span id="DataFrame-445"><a href="#DataFrame-445"><span class="linenos">445</span></a> <span class="n">table_names</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame-446"><a href="#DataFrame-446"><span class="linenos">446</span></a> <span class="n">table</span><span class="o">.</span><span class="n">alias_or_name</span>
+</span><span id="DataFrame-447"><a href="#DataFrame-447"><span class="linenos">447</span></a> <span class="k">for</span> <span class="n">table</span> <span class="ow">in</span> <span class="n">get_tables_from_expression_with_join</span><span class="p">(</span><span class="n">join_expression</span><span class="p">)</span>
+</span><span id="DataFrame-448"><a href="#DataFrame-448"><span class="linenos">448</span></a> <span class="p">]</span>
+</span><span id="DataFrame-449"><a href="#DataFrame-449"><span class="linenos">449</span></a> <span class="n">potential_ctes</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame-450"><a href="#DataFrame-450"><span class="linenos">450</span></a> <span class="n">cte</span>
+</span><span id="DataFrame-451"><a href="#DataFrame-451"><span class="linenos">451</span></a> <span class="k">for</span> <span class="n">cte</span> <span class="ow">in</span> <span class="n">join_expression</span><span class="o">.</span><span class="n">ctes</span>
+</span><span id="DataFrame-452"><a href="#DataFrame-452"><span class="linenos">452</span></a> <span class="k">if</span> <span class="n">cte</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="ow">in</span> <span class="n">table_names</span>
+</span><span id="DataFrame-453"><a href="#DataFrame-453"><span class="linenos">453</span></a> <span class="ow">and</span> <span class="n">cte</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="o">!=</span> <span class="n">other_df</span><span class="o">.</span><span class="n">latest_cte_name</span>
+</span><span id="DataFrame-454"><a href="#DataFrame-454"><span class="linenos">454</span></a> <span class="p">]</span>
+</span><span id="DataFrame-455"><a href="#DataFrame-455"><span class="linenos">455</span></a> <span class="c1"># Determine the table to reference for the left side of the join by checking each of the left side</span>
+</span><span id="DataFrame-456"><a href="#DataFrame-456"><span class="linenos">456</span></a> <span class="c1"># tables and see if they have the column being referenced.</span>
+</span><span id="DataFrame-457"><a href="#DataFrame-457"><span class="linenos">457</span></a> <span class="n">join_column_pairs</span> <span class="o">=</span> <span class="p">[]</span>
+</span><span id="DataFrame-458"><a href="#DataFrame-458"><span class="linenos">458</span></a> <span class="k">for</span> <span class="n">join_column</span> <span class="ow">in</span> <span class="n">join_columns</span><span class="p">:</span>
+</span><span id="DataFrame-459"><a href="#DataFrame-459"><span class="linenos">459</span></a> <span class="n">num_matching_ctes</span> <span class="o">=</span> <span class="mi">0</span>
+</span><span id="DataFrame-460"><a href="#DataFrame-460"><span class="linenos">460</span></a> <span class="k">for</span> <span class="n">cte</span> <span class="ow">in</span> <span class="n">potential_ctes</span><span class="p">:</span>
+</span><span id="DataFrame-461"><a href="#DataFrame-461"><span class="linenos">461</span></a> <span class="k">if</span> <span class="n">join_column</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="ow">in</span> <span class="n">cte</span><span class="o">.</span><span class="n">this</span><span class="o">.</span><span class="n">named_selects</span><span class="p">:</span>
+</span><span id="DataFrame-462"><a href="#DataFrame-462"><span class="linenos">462</span></a> <span class="n">left_column</span> <span class="o">=</span> <span class="n">join_column</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span><span class="o">.</span><span class="n">set_table_name</span><span class="p">(</span><span class="n">cte</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">)</span>
+</span><span id="DataFrame-463"><a href="#DataFrame-463"><span class="linenos">463</span></a> <span class="n">right_column</span> <span class="o">=</span> <span class="n">join_column</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span><span class="o">.</span><span class="n">set_table_name</span><span class="p">(</span><span class="n">other_df</span><span class="o">.</span><span class="n">latest_cte_name</span><span class="p">)</span>
+</span><span id="DataFrame-464"><a href="#DataFrame-464"><span class="linenos">464</span></a> <span class="n">join_column_pairs</span><span class="o">.</span><span class="n">append</span><span class="p">((</span><span class="n">left_column</span><span class="p">,</span> <span class="n">right_column</span><span class="p">))</span>
+</span><span id="DataFrame-465"><a href="#DataFrame-465"><span class="linenos">465</span></a> <span class="n">num_matching_ctes</span> <span class="o">+=</span> <span class="mi">1</span>
+</span><span id="DataFrame-466"><a href="#DataFrame-466"><span class="linenos">466</span></a> <span class="k">if</span> <span class="n">num_matching_ctes</span> <span class="o">&gt;</span> <span class="mi">1</span><span class="p">:</span>
+</span><span id="DataFrame-467"><a href="#DataFrame-467"><span class="linenos">467</span></a> <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
+</span><span id="DataFrame-468"><a href="#DataFrame-468"><span class="linenos">468</span></a> <span class="sa">f</span><span class="s2">&quot;Column </span><span class="si">{</span><span class="n">join_column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="si">}</span><span class="s2"> is ambiguous. Please specify the table name.&quot;</span>
+</span><span id="DataFrame-469"><a href="#DataFrame-469"><span class="linenos">469</span></a> <span class="p">)</span>
+</span><span id="DataFrame-470"><a href="#DataFrame-470"><span class="linenos">470</span></a> <span class="k">elif</span> <span class="n">num_matching_ctes</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
+</span><span id="DataFrame-471"><a href="#DataFrame-471"><span class="linenos">471</span></a> <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
+</span><span id="DataFrame-472"><a href="#DataFrame-472"><span class="linenos">472</span></a> <span class="sa">f</span><span class="s2">&quot;Column </span><span class="si">{</span><span class="n">join_column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="si">}</span><span class="s2"> does not exist in any of the tables.&quot;</span>
+</span><span id="DataFrame-473"><a href="#DataFrame-473"><span class="linenos">473</span></a> <span class="p">)</span>
+</span><span id="DataFrame-474"><a href="#DataFrame-474"><span class="linenos">474</span></a> <span class="n">join_clause</span> <span class="o">=</span> <span class="n">functools</span><span class="o">.</span><span class="n">reduce</span><span class="p">(</span>
+</span><span id="DataFrame-475"><a href="#DataFrame-475"><span class="linenos">475</span></a> <span class="k">lambda</span> <span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">:</span> <span class="n">x</span> <span class="o">&amp;</span> <span class="n">y</span><span class="p">,</span>
+</span><span id="DataFrame-476"><a href="#DataFrame-476"><span class="linenos">476</span></a> <span class="p">[</span><span class="n">left_column</span> <span class="o">==</span> <span class="n">right_column</span> <span class="k">for</span> <span class="n">left_column</span><span class="p">,</span> <span class="n">right_column</span> <span class="ow">in</span> <span class="n">join_column_pairs</span><span class="p">],</span>
+</span><span id="DataFrame-477"><a href="#DataFrame-477"><span class="linenos">477</span></a> <span class="p">)</span>
+</span><span id="DataFrame-478"><a href="#DataFrame-478"><span class="linenos">478</span></a> <span class="n">join_column_names</span> <span class="o">=</span> <span class="p">[</span><span class="n">left_col</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="k">for</span> <span class="n">left_col</span><span class="p">,</span> <span class="n">_</span> <span class="ow">in</span> <span class="n">join_column_pairs</span><span class="p">]</span>
+</span><span id="DataFrame-479"><a href="#DataFrame-479"><span class="linenos">479</span></a> <span class="c1"># To match spark behavior only the join clause gets deduplicated and it gets put in the front of the column list</span>
+</span><span id="DataFrame-480"><a href="#DataFrame-480"><span class="linenos">480</span></a> <span class="n">select_column_names</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame-481"><a href="#DataFrame-481"><span class="linenos">481</span></a> <span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span>
+</span><span id="DataFrame-482"><a href="#DataFrame-482"><span class="linenos">482</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">column</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">this</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Star</span><span class="p">)</span>
+</span><span id="DataFrame-483"><a href="#DataFrame-483"><span class="linenos">483</span></a> <span class="k">else</span> <span class="n">column</span><span class="o">.</span><span class="n">sql</span><span class="p">()</span>
+</span><span id="DataFrame-484"><a href="#DataFrame-484"><span class="linenos">484</span></a> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">self_columns</span> <span class="o">+</span> <span class="n">other_columns</span>
+</span><span id="DataFrame-485"><a href="#DataFrame-485"><span class="linenos">485</span></a> <span class="p">]</span>
+</span><span id="DataFrame-486"><a href="#DataFrame-486"><span class="linenos">486</span></a> <span class="n">select_column_names</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame-487"><a href="#DataFrame-487"><span class="linenos">487</span></a> <span class="n">column_name</span>
+</span><span id="DataFrame-488"><a href="#DataFrame-488"><span class="linenos">488</span></a> <span class="k">for</span> <span class="n">column_name</span> <span class="ow">in</span> <span class="n">select_column_names</span>
+</span><span id="DataFrame-489"><a href="#DataFrame-489"><span class="linenos">489</span></a> <span class="k">if</span> <span class="n">column_name</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">join_column_names</span>
+</span><span id="DataFrame-490"><a href="#DataFrame-490"><span class="linenos">490</span></a> <span class="p">]</span>
+</span><span id="DataFrame-491"><a href="#DataFrame-491"><span class="linenos">491</span></a> <span class="n">select_column_names</span> <span class="o">=</span> <span class="n">join_column_names</span> <span class="o">+</span> <span class="n">select_column_names</span>
+</span><span id="DataFrame-492"><a href="#DataFrame-492"><span class="linenos">492</span></a> <span class="k">else</span><span class="p">:</span>
+</span><span id="DataFrame-493"><a href="#DataFrame-493"><span class="linenos">493</span></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
+</span><span id="DataFrame-494"><a href="#DataFrame-494"><span class="linenos">494</span></a><span class="sd"> Unique characteristics of join on expressions:</span>
+</span><span id="DataFrame-495"><a href="#DataFrame-495"><span class="linenos">495</span></a><span class="sd"> * There is no deduplication of the results.</span>
+</span><span id="DataFrame-496"><a href="#DataFrame-496"><span class="linenos">496</span></a><span class="sd"> * The left join dataframe columns go first and right come after. No sort preference is given to join columns</span>
+</span><span id="DataFrame-497"><a href="#DataFrame-497"><span class="linenos">497</span></a><span class="sd"> &quot;&quot;&quot;</span>
+</span><span id="DataFrame-498"><a href="#DataFrame-498"><span class="linenos">498</span></a> <span class="n">join_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">join_columns</span><span class="p">,</span> <span class="n">join_expression</span><span class="p">)</span>
+</span><span id="DataFrame-499"><a href="#DataFrame-499"><span class="linenos">499</span></a> <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">join_columns</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">1</span><span class="p">:</span>
+</span><span id="DataFrame-500"><a href="#DataFrame-500"><span class="linenos">500</span></a> <span class="n">join_columns</span> <span class="o">=</span> <span class="p">[</span><span class="n">functools</span><span class="o">.</span><span class="n">reduce</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">:</span> <span class="n">x</span> <span class="o">&amp;</span> <span class="n">y</span><span class="p">,</span> <span class="n">join_columns</span><span class="p">)]</span>
+</span><span id="DataFrame-501"><a href="#DataFrame-501"><span class="linenos">501</span></a> <span class="n">join_clause</span> <span class="o">=</span> <span class="n">join_columns</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
+</span><span id="DataFrame-502"><a href="#DataFrame-502"><span class="linenos">502</span></a> <span class="n">select_column_names</span> <span class="o">=</span> <span class="p">[</span><span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">self_columns</span> <span class="o">+</span> <span class="n">other_columns</span><span class="p">]</span>
+</span><span id="DataFrame-503"><a href="#DataFrame-503"><span class="linenos">503</span></a>
+</span><span id="DataFrame-504"><a href="#DataFrame-504"><span class="linenos">504</span></a> <span class="c1"># Update the on expression with the actual join clause to replace the dummy one from before</span>
+</span><span id="DataFrame-505"><a href="#DataFrame-505"><span class="linenos">505</span></a> <span class="n">join_expression</span><span class="o">.</span><span class="n">args</span><span class="p">[</span><span class="s2">&quot;joins&quot;</span><span class="p">][</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">&quot;on&quot;</span><span class="p">,</span> <span class="n">join_clause</span><span class="o">.</span><span class="n">expression</span><span class="p">)</span>
+</span><span id="DataFrame-506"><a href="#DataFrame-506"><span class="linenos">506</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="n">join_expression</span><span class="p">)</span>
+</span><span id="DataFrame-507"><a href="#DataFrame-507"><span class="linenos">507</span></a> <span class="n">new_df</span><span class="o">.</span><span class="n">pending_join_hints</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">pending_join_hints</span><span class="p">)</span>
+</span><span id="DataFrame-508"><a href="#DataFrame-508"><span class="linenos">508</span></a> <span class="n">new_df</span><span class="o">.</span><span class="n">pending_hints</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="n">other_df</span><span class="o">.</span><span class="n">pending_hints</span><span class="p">)</span>
+</span><span id="DataFrame-509"><a href="#DataFrame-509"><span class="linenos">509</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="n">new_df</span><span class="o">.</span><span class="n">select</span><span class="o">.</span><span class="n">__wrapped__</span><span class="p">(</span><span class="n">new_df</span><span class="p">,</span> <span class="o">*</span><span class="n">select_column_names</span><span class="p">)</span>
+</span><span id="DataFrame-510"><a href="#DataFrame-510"><span class="linenos">510</span></a> <span class="k">return</span> <span class="n">new_df</span>
</span><span id="DataFrame-511"><a href="#DataFrame-511"><span class="linenos">511</span></a>
-</span><span id="DataFrame-512"><a href="#DataFrame-512"><span class="linenos">512</span></a> <span class="n">sort</span> <span class="o">=</span> <span class="n">orderBy</span>
-</span><span id="DataFrame-513"><a href="#DataFrame-513"><span class="linenos">513</span></a>
-</span><span id="DataFrame-514"><a href="#DataFrame-514"><span class="linenos">514</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
-</span><span id="DataFrame-515"><a href="#DataFrame-515"><span class="linenos">515</span></a> <span class="k">def</span> <span class="nf">union</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame-516"><a href="#DataFrame-516"><span class="linenos">516</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set_operation</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Union</span><span class="p">,</span> <span class="n">other</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span>
-</span><span id="DataFrame-517"><a href="#DataFrame-517"><span class="linenos">517</span></a>
-</span><span id="DataFrame-518"><a href="#DataFrame-518"><span class="linenos">518</span></a> <span class="n">unionAll</span> <span class="o">=</span> <span class="n">union</span>
-</span><span id="DataFrame-519"><a href="#DataFrame-519"><span class="linenos">519</span></a>
-</span><span id="DataFrame-520"><a href="#DataFrame-520"><span class="linenos">520</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
-</span><span id="DataFrame-521"><a href="#DataFrame-521"><span class="linenos">521</span></a> <span class="k">def</span> <span class="nf">unionByName</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">,</span> <span class="n">allowMissingColumns</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">):</span>
-</span><span id="DataFrame-522"><a href="#DataFrame-522"><span class="linenos">522</span></a> <span class="n">l_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">columns</span>
-</span><span id="DataFrame-523"><a href="#DataFrame-523"><span class="linenos">523</span></a> <span class="n">r_columns</span> <span class="o">=</span> <span class="n">other</span><span class="o">.</span><span class="n">columns</span>
-</span><span id="DataFrame-524"><a href="#DataFrame-524"><span class="linenos">524</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">allowMissingColumns</span><span class="p">:</span>
-</span><span id="DataFrame-525"><a href="#DataFrame-525"><span class="linenos">525</span></a> <span class="n">l_expressions</span> <span class="o">=</span> <span class="n">l_columns</span>
-</span><span id="DataFrame-526"><a href="#DataFrame-526"><span class="linenos">526</span></a> <span class="n">r_expressions</span> <span class="o">=</span> <span class="n">l_columns</span>
-</span><span id="DataFrame-527"><a href="#DataFrame-527"><span class="linenos">527</span></a> <span class="k">else</span><span class="p">:</span>
-</span><span id="DataFrame-528"><a href="#DataFrame-528"><span class="linenos">528</span></a> <span class="n">l_expressions</span> <span class="o">=</span> <span class="p">[]</span>
-</span><span id="DataFrame-529"><a href="#DataFrame-529"><span class="linenos">529</span></a> <span class="n">r_expressions</span> <span class="o">=</span> <span class="p">[]</span>
-</span><span id="DataFrame-530"><a href="#DataFrame-530"><span class="linenos">530</span></a> <span class="n">r_columns_unused</span> <span class="o">=</span> <span class="n">copy</span><span class="p">(</span><span class="n">r_columns</span><span class="p">)</span>
-</span><span id="DataFrame-531"><a href="#DataFrame-531"><span class="linenos">531</span></a> <span class="k">for</span> <span class="n">l_column</span> <span class="ow">in</span> <span class="n">l_columns</span><span class="p">:</span>
-</span><span id="DataFrame-532"><a href="#DataFrame-532"><span class="linenos">532</span></a> <span class="n">l_expressions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">l_column</span><span class="p">)</span>
-</span><span id="DataFrame-533"><a href="#DataFrame-533"><span class="linenos">533</span></a> <span class="k">if</span> <span class="n">l_column</span> <span class="ow">in</span> <span class="n">r_columns</span><span class="p">:</span>
-</span><span id="DataFrame-534"><a href="#DataFrame-534"><span class="linenos">534</span></a> <span class="n">r_expressions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">l_column</span><span class="p">)</span>
-</span><span id="DataFrame-535"><a href="#DataFrame-535"><span class="linenos">535</span></a> <span class="n">r_columns_unused</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="n">l_column</span><span class="p">)</span>
-</span><span id="DataFrame-536"><a href="#DataFrame-536"><span class="linenos">536</span></a> <span class="k">else</span><span class="p">:</span>
-</span><span id="DataFrame-537"><a href="#DataFrame-537"><span class="linenos">537</span></a> <span class="n">r_expressions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">alias_</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Null</span><span class="p">(),</span> <span class="n">l_column</span><span class="p">))</span>
-</span><span id="DataFrame-538"><a href="#DataFrame-538"><span class="linenos">538</span></a> <span class="k">for</span> <span class="n">r_column</span> <span class="ow">in</span> <span class="n">r_columns_unused</span><span class="p">:</span>
-</span><span id="DataFrame-539"><a href="#DataFrame-539"><span class="linenos">539</span></a> <span class="n">l_expressions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">alias_</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Null</span><span class="p">(),</span> <span class="n">r_column</span><span class="p">))</span>
-</span><span id="DataFrame-540"><a href="#DataFrame-540"><span class="linenos">540</span></a> <span class="n">r_expressions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">r_column</span><span class="p">)</span>
-</span><span id="DataFrame-541"><a href="#DataFrame-541"><span class="linenos">541</span></a> <span class="n">r_df</span> <span class="o">=</span> <span class="p">(</span>
-</span><span id="DataFrame-542"><a href="#DataFrame-542"><span class="linenos">542</span></a> <span class="n">other</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span><span class="o">.</span><span class="n">_convert_leaf_to_cte</span><span class="p">()</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="bp">self</span><span class="o">.</span><span class="n">_ensure_list_of_columns</span><span class="p">(</span><span class="n">r_expressions</span><span class="p">))</span>
-</span><span id="DataFrame-543"><a href="#DataFrame-543"><span class="linenos">543</span></a> <span class="p">)</span>
-</span><span id="DataFrame-544"><a href="#DataFrame-544"><span class="linenos">544</span></a> <span class="n">l_df</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
-</span><span id="DataFrame-545"><a href="#DataFrame-545"><span class="linenos">545</span></a> <span class="k">if</span> <span class="n">allowMissingColumns</span><span class="p">:</span>
-</span><span id="DataFrame-546"><a href="#DataFrame-546"><span class="linenos">546</span></a> <span class="n">l_df</span> <span class="o">=</span> <span class="n">l_df</span><span class="o">.</span><span class="n">_convert_leaf_to_cte</span><span class="p">()</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="bp">self</span><span class="o">.</span><span class="n">_ensure_list_of_columns</span><span class="p">(</span><span class="n">l_expressions</span><span class="p">))</span>
-</span><span id="DataFrame-547"><a href="#DataFrame-547"><span class="linenos">547</span></a> <span class="k">return</span> <span class="n">l_df</span><span class="o">.</span><span class="n">_set_operation</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Union</span><span class="p">,</span> <span class="n">r_df</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span>
+</span><span id="DataFrame-512"><a href="#DataFrame-512"><span class="linenos">512</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">ORDER_BY</span><span class="p">)</span>
+</span><span id="DataFrame-513"><a href="#DataFrame-513"><span class="linenos">513</span></a> <span class="k">def</span> <span class="nf">orderBy</span><span class="p">(</span>
+</span><span id="DataFrame-514"><a href="#DataFrame-514"><span class="linenos">514</span></a> <span class="bp">self</span><span class="p">,</span>
+</span><span id="DataFrame-515"><a href="#DataFrame-515"><span class="linenos">515</span></a> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Column</span><span class="p">],</span>
+</span><span id="DataFrame-516"><a href="#DataFrame-516"><span class="linenos">516</span></a> <span class="n">ascending</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrame-517"><a href="#DataFrame-517"><span class="linenos">517</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame-518"><a href="#DataFrame-518"><span class="linenos">518</span></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
+</span><span id="DataFrame-519"><a href="#DataFrame-519"><span class="linenos">519</span></a><span class="sd"> This implementation lets any ordered columns take priority over whatever is provided in `ascending`. Spark</span>
+</span><span id="DataFrame-520"><a href="#DataFrame-520"><span class="linenos">520</span></a><span class="sd"> has irregular behavior and can result in runtime errors. Users shouldn&#39;t be mixing the two anyways so this</span>
+</span><span id="DataFrame-521"><a href="#DataFrame-521"><span class="linenos">521</span></a><span class="sd"> is unlikely to come up.</span>
+</span><span id="DataFrame-522"><a href="#DataFrame-522"><span class="linenos">522</span></a><span class="sd"> &quot;&quot;&quot;</span>
+</span><span id="DataFrame-523"><a href="#DataFrame-523"><span class="linenos">523</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">cols</span><span class="p">)</span>
+</span><span id="DataFrame-524"><a href="#DataFrame-524"><span class="linenos">524</span></a> <span class="n">pre_ordered_col_indexes</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame-525"><a href="#DataFrame-525"><span class="linenos">525</span></a> <span class="n">x</span>
+</span><span id="DataFrame-526"><a href="#DataFrame-526"><span class="linenos">526</span></a> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="p">[</span>
+</span><span id="DataFrame-527"><a href="#DataFrame-527"><span class="linenos">527</span></a> <span class="n">i</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">col</span><span class="o">.</span><span class="n">expression</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Ordered</span><span class="p">)</span> <span class="k">else</span> <span class="kc">None</span>
+</span><span id="DataFrame-528"><a href="#DataFrame-528"><span class="linenos">528</span></a> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">col</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span>
+</span><span id="DataFrame-529"><a href="#DataFrame-529"><span class="linenos">529</span></a> <span class="p">]</span>
+</span><span id="DataFrame-530"><a href="#DataFrame-530"><span class="linenos">530</span></a> <span class="k">if</span> <span class="n">x</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
+</span><span id="DataFrame-531"><a href="#DataFrame-531"><span class="linenos">531</span></a> <span class="p">]</span>
+</span><span id="DataFrame-532"><a href="#DataFrame-532"><span class="linenos">532</span></a> <span class="k">if</span> <span class="n">ascending</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+</span><span id="DataFrame-533"><a href="#DataFrame-533"><span class="linenos">533</span></a> <span class="n">ascending</span> <span class="o">=</span> <span class="p">[</span><span class="kc">True</span><span class="p">]</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span>
+</span><span id="DataFrame-534"><a href="#DataFrame-534"><span class="linenos">534</span></a> <span class="k">elif</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">ascending</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span>
+</span><span id="DataFrame-535"><a href="#DataFrame-535"><span class="linenos">535</span></a> <span class="n">ascending</span> <span class="o">=</span> <span class="p">[</span><span class="n">ascending</span><span class="p">]</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span>
+</span><span id="DataFrame-536"><a href="#DataFrame-536"><span class="linenos">536</span></a> <span class="n">ascending</span> <span class="o">=</span> <span class="p">[</span><span class="nb">bool</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">x</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">ascending</span><span class="p">)]</span>
+</span><span id="DataFrame-537"><a href="#DataFrame-537"><span class="linenos">537</span></a> <span class="k">assert</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span> <span class="o">==</span> <span class="nb">len</span><span class="p">(</span>
+</span><span id="DataFrame-538"><a href="#DataFrame-538"><span class="linenos">538</span></a> <span class="n">ascending</span>
+</span><span id="DataFrame-539"><a href="#DataFrame-539"><span class="linenos">539</span></a> <span class="p">),</span> <span class="s2">&quot;The length of items in ascending must equal the number of columns provided&quot;</span>
+</span><span id="DataFrame-540"><a href="#DataFrame-540"><span class="linenos">540</span></a> <span class="n">col_and_ascending</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="nb">zip</span><span class="p">(</span><span class="n">columns</span><span class="p">,</span> <span class="n">ascending</span><span class="p">))</span>
+</span><span id="DataFrame-541"><a href="#DataFrame-541"><span class="linenos">541</span></a> <span class="n">order_by_columns</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame-542"><a href="#DataFrame-542"><span class="linenos">542</span></a> <span class="n">exp</span><span class="o">.</span><span class="n">Ordered</span><span class="p">(</span><span class="n">this</span><span class="o">=</span><span class="n">col</span><span class="o">.</span><span class="n">expression</span><span class="p">,</span> <span class="n">desc</span><span class="o">=</span><span class="ow">not</span> <span class="n">asc</span><span class="p">)</span>
+</span><span id="DataFrame-543"><a href="#DataFrame-543"><span class="linenos">543</span></a> <span class="k">if</span> <span class="n">i</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">pre_ordered_col_indexes</span>
+</span><span id="DataFrame-544"><a href="#DataFrame-544"><span class="linenos">544</span></a> <span class="k">else</span> <span class="n">columns</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">.</span><span class="n">column_expression</span>
+</span><span id="DataFrame-545"><a href="#DataFrame-545"><span class="linenos">545</span></a> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="p">(</span><span class="n">col</span><span class="p">,</span> <span class="n">asc</span><span class="p">)</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">col_and_ascending</span><span class="p">)</span>
+</span><span id="DataFrame-546"><a href="#DataFrame-546"><span class="linenos">546</span></a> <span class="p">]</span>
+</span><span id="DataFrame-547"><a href="#DataFrame-547"><span class="linenos">547</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">order_by</span><span class="p">(</span><span class="o">*</span><span class="n">order_by_columns</span><span class="p">))</span>
</span><span id="DataFrame-548"><a href="#DataFrame-548"><span class="linenos">548</span></a>
-</span><span id="DataFrame-549"><a href="#DataFrame-549"><span class="linenos">549</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
-</span><span id="DataFrame-550"><a href="#DataFrame-550"><span class="linenos">550</span></a> <span class="k">def</span> <span class="nf">intersect</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame-551"><a href="#DataFrame-551"><span class="linenos">551</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set_operation</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Intersect</span><span class="p">,</span> <span class="n">other</span><span class="p">,</span> <span class="kc">True</span><span class="p">)</span>
-</span><span id="DataFrame-552"><a href="#DataFrame-552"><span class="linenos">552</span></a>
-</span><span id="DataFrame-553"><a href="#DataFrame-553"><span class="linenos">553</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
-</span><span id="DataFrame-554"><a href="#DataFrame-554"><span class="linenos">554</span></a> <span class="k">def</span> <span class="nf">intersectAll</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame-555"><a href="#DataFrame-555"><span class="linenos">555</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set_operation</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Intersect</span><span class="p">,</span> <span class="n">other</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span>
+</span><span id="DataFrame-549"><a href="#DataFrame-549"><span class="linenos">549</span></a> <span class="n">sort</span> <span class="o">=</span> <span class="n">orderBy</span>
+</span><span id="DataFrame-550"><a href="#DataFrame-550"><span class="linenos">550</span></a>
+</span><span id="DataFrame-551"><a href="#DataFrame-551"><span class="linenos">551</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
+</span><span id="DataFrame-552"><a href="#DataFrame-552"><span class="linenos">552</span></a> <span class="k">def</span> <span class="nf">union</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame-553"><a href="#DataFrame-553"><span class="linenos">553</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set_operation</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Union</span><span class="p">,</span> <span class="n">other</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span>
+</span><span id="DataFrame-554"><a href="#DataFrame-554"><span class="linenos">554</span></a>
+</span><span id="DataFrame-555"><a href="#DataFrame-555"><span class="linenos">555</span></a> <span class="n">unionAll</span> <span class="o">=</span> <span class="n">union</span>
</span><span id="DataFrame-556"><a href="#DataFrame-556"><span class="linenos">556</span></a>
</span><span id="DataFrame-557"><a href="#DataFrame-557"><span class="linenos">557</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
-</span><span id="DataFrame-558"><a href="#DataFrame-558"><span class="linenos">558</span></a> <span class="k">def</span> <span class="nf">exceptAll</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame-559"><a href="#DataFrame-559"><span class="linenos">559</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set_operation</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Except</span><span class="p">,</span> <span class="n">other</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span>
-</span><span id="DataFrame-560"><a href="#DataFrame-560"><span class="linenos">560</span></a>
-</span><span id="DataFrame-561"><a href="#DataFrame-561"><span class="linenos">561</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">SELECT</span><span class="p">)</span>
-</span><span id="DataFrame-562"><a href="#DataFrame-562"><span class="linenos">562</span></a> <span class="k">def</span> <span class="nf">distinct</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame-563"><a href="#DataFrame-563"><span class="linenos">563</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">distinct</span><span class="p">())</span>
-</span><span id="DataFrame-564"><a href="#DataFrame-564"><span class="linenos">564</span></a>
-</span><span id="DataFrame-565"><a href="#DataFrame-565"><span class="linenos">565</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">SELECT</span><span class="p">)</span>
-</span><span id="DataFrame-566"><a href="#DataFrame-566"><span class="linenos">566</span></a> <span class="k">def</span> <span class="nf">dropDuplicates</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">):</span>
-</span><span id="DataFrame-567"><a href="#DataFrame-567"><span class="linenos">567</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">subset</span><span class="p">:</span>
-</span><span id="DataFrame-568"><a href="#DataFrame-568"><span class="linenos">568</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">distinct</span><span class="p">()</span>
-</span><span id="DataFrame-569"><a href="#DataFrame-569"><span class="linenos">569</span></a> <span class="n">column_names</span> <span class="o">=</span> <span class="n">ensure_list</span><span class="p">(</span><span class="n">subset</span><span class="p">)</span>
-</span><span id="DataFrame-570"><a href="#DataFrame-570"><span class="linenos">570</span></a> <span class="n">window</span> <span class="o">=</span> <span class="n">Window</span><span class="o">.</span><span class="n">partitionBy</span><span class="p">(</span><span class="o">*</span><span class="n">column_names</span><span class="p">)</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="o">*</span><span class="n">column_names</span><span class="p">)</span>
-</span><span id="DataFrame-571"><a href="#DataFrame-571"><span class="linenos">571</span></a> <span class="k">return</span> <span class="p">(</span>
-</span><span id="DataFrame-572"><a href="#DataFrame-572"><span class="linenos">572</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
-</span><span id="DataFrame-573"><a href="#DataFrame-573"><span class="linenos">573</span></a> <span class="o">.</span><span class="n">withColumn</span><span class="p">(</span><span class="s2">&quot;row_num&quot;</span><span class="p">,</span> <span class="n">F</span><span class="o">.</span><span class="n">row_number</span><span class="p">()</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="n">window</span><span class="p">))</span>
-</span><span id="DataFrame-574"><a href="#DataFrame-574"><span class="linenos">574</span></a> <span class="o">.</span><span class="n">where</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="s2">&quot;row_num&quot;</span><span class="p">)</span> <span class="o">==</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="mi">1</span><span class="p">))</span>
-</span><span id="DataFrame-575"><a href="#DataFrame-575"><span class="linenos">575</span></a> <span class="o">.</span><span class="n">drop</span><span class="p">(</span><span class="s2">&quot;row_num&quot;</span><span class="p">)</span>
-</span><span id="DataFrame-576"><a href="#DataFrame-576"><span class="linenos">576</span></a> <span class="p">)</span>
-</span><span id="DataFrame-577"><a href="#DataFrame-577"><span class="linenos">577</span></a>
-</span><span id="DataFrame-578"><a href="#DataFrame-578"><span class="linenos">578</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
-</span><span id="DataFrame-579"><a href="#DataFrame-579"><span class="linenos">579</span></a> <span class="k">def</span> <span class="nf">dropna</span><span class="p">(</span>
-</span><span id="DataFrame-580"><a href="#DataFrame-580"><span class="linenos">580</span></a> <span class="bp">self</span><span class="p">,</span>
-</span><span id="DataFrame-581"><a href="#DataFrame-581"><span class="linenos">581</span></a> <span class="n">how</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;any&quot;</span><span class="p">,</span>
-</span><span id="DataFrame-582"><a href="#DataFrame-582"><span class="linenos">582</span></a> <span class="n">thresh</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrame-583"><a href="#DataFrame-583"><span class="linenos">583</span></a> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="o">...</span><span class="p">],</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrame-584"><a href="#DataFrame-584"><span class="linenos">584</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame-585"><a href="#DataFrame-585"><span class="linenos">585</span></a> <span class="n">minimum_non_null</span> <span class="o">=</span> <span class="n">thresh</span> <span class="ow">or</span> <span class="mi">0</span> <span class="c1"># will be determined later if thresh is null</span>
-</span><span id="DataFrame-586"><a href="#DataFrame-586"><span class="linenos">586</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
-</span><span id="DataFrame-587"><a href="#DataFrame-587"><span class="linenos">587</span></a> <span class="n">all_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_outer_select_columns</span><span class="p">(</span><span class="n">new_df</span><span class="o">.</span><span class="n">expression</span><span class="p">)</span>
-</span><span id="DataFrame-588"><a href="#DataFrame-588"><span class="linenos">588</span></a> <span class="k">if</span> <span class="n">subset</span><span class="p">:</span>
-</span><span id="DataFrame-589"><a href="#DataFrame-589"><span class="linenos">589</span></a> <span class="n">null_check_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">subset</span><span class="p">)</span>
-</span><span id="DataFrame-590"><a href="#DataFrame-590"><span class="linenos">590</span></a> <span class="k">else</span><span class="p">:</span>
-</span><span id="DataFrame-591"><a href="#DataFrame-591"><span class="linenos">591</span></a> <span class="n">null_check_columns</span> <span class="o">=</span> <span class="n">all_columns</span>
-</span><span id="DataFrame-592"><a href="#DataFrame-592"><span class="linenos">592</span></a> <span class="k">if</span> <span class="n">thresh</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
-</span><span id="DataFrame-593"><a href="#DataFrame-593"><span class="linenos">593</span></a> <span class="n">minimum_num_nulls</span> <span class="o">=</span> <span class="mi">1</span> <span class="k">if</span> <span class="n">how</span> <span class="o">==</span> <span class="s2">&quot;any&quot;</span> <span class="k">else</span> <span class="nb">len</span><span class="p">(</span><span class="n">null_check_columns</span><span class="p">)</span>
-</span><span id="DataFrame-594"><a href="#DataFrame-594"><span class="linenos">594</span></a> <span class="k">else</span><span class="p">:</span>
-</span><span id="DataFrame-595"><a href="#DataFrame-595"><span class="linenos">595</span></a> <span class="n">minimum_num_nulls</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">null_check_columns</span><span class="p">)</span> <span class="o">-</span> <span class="n">minimum_non_null</span> <span class="o">+</span> <span class="mi">1</span>
-</span><span id="DataFrame-596"><a href="#DataFrame-596"><span class="linenos">596</span></a> <span class="k">if</span> <span class="n">minimum_num_nulls</span> <span class="o">&gt;</span> <span class="nb">len</span><span class="p">(</span><span class="n">null_check_columns</span><span class="p">):</span>
-</span><span id="DataFrame-597"><a href="#DataFrame-597"><span class="linenos">597</span></a> <span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span>
-</span><span id="DataFrame-598"><a href="#DataFrame-598"><span class="linenos">598</span></a> <span class="sa">f</span><span class="s2">&quot;The minimum num nulls for dropna must be less than or equal to the number of columns. &quot;</span>
-</span><span id="DataFrame-599"><a href="#DataFrame-599"><span class="linenos">599</span></a> <span class="sa">f</span><span class="s2">&quot;Minimum num nulls: </span><span class="si">{</span><span class="n">minimum_num_nulls</span><span class="si">}</span><span class="s2">, Num Columns: </span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="n">null_check_columns</span><span class="p">)</span><span class="si">}</span><span class="s2">&quot;</span>
-</span><span id="DataFrame-600"><a href="#DataFrame-600"><span class="linenos">600</span></a> <span class="p">)</span>
-</span><span id="DataFrame-601"><a href="#DataFrame-601"><span class="linenos">601</span></a> <span class="n">if_null_checks</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame-602"><a href="#DataFrame-602"><span class="linenos">602</span></a> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">column</span><span class="o">.</span><span class="n">isNull</span><span class="p">(),</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="mi">1</span><span class="p">))</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="mi">0</span><span class="p">))</span> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">null_check_columns</span>
-</span><span id="DataFrame-603"><a href="#DataFrame-603"><span class="linenos">603</span></a> <span class="p">]</span>
-</span><span id="DataFrame-604"><a href="#DataFrame-604"><span class="linenos">604</span></a> <span class="n">nulls_added_together</span> <span class="o">=</span> <span class="n">functools</span><span class="o">.</span><span class="n">reduce</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">:</span> <span class="n">x</span> <span class="o">+</span> <span class="n">y</span><span class="p">,</span> <span class="n">if_null_checks</span><span class="p">)</span>
-</span><span id="DataFrame-605"><a href="#DataFrame-605"><span class="linenos">605</span></a> <span class="n">num_nulls</span> <span class="o">=</span> <span class="n">nulls_added_together</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">&quot;num_nulls&quot;</span><span class="p">)</span>
-</span><span id="DataFrame-606"><a href="#DataFrame-606"><span class="linenos">606</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="n">new_df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">num_nulls</span><span class="p">,</span> <span class="n">append</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
-</span><span id="DataFrame-607"><a href="#DataFrame-607"><span class="linenos">607</span></a> <span class="n">filtered_df</span> <span class="o">=</span> <span class="n">new_df</span><span class="o">.</span><span class="n">where</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="s2">&quot;num_nulls&quot;</span><span class="p">)</span> <span class="o">&lt;</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="n">minimum_num_nulls</span><span class="p">))</span>
-</span><span id="DataFrame-608"><a href="#DataFrame-608"><span class="linenos">608</span></a> <span class="n">final_df</span> <span class="o">=</span> <span class="n">filtered_df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="n">all_columns</span><span class="p">)</span>
-</span><span id="DataFrame-609"><a href="#DataFrame-609"><span class="linenos">609</span></a> <span class="k">return</span> <span class="n">final_df</span>
-</span><span id="DataFrame-610"><a href="#DataFrame-610"><span class="linenos">610</span></a>
-</span><span id="DataFrame-611"><a href="#DataFrame-611"><span class="linenos">611</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
-</span><span id="DataFrame-612"><a href="#DataFrame-612"><span class="linenos">612</span></a> <span class="k">def</span> <span class="nf">fillna</span><span class="p">(</span>
-</span><span id="DataFrame-613"><a href="#DataFrame-613"><span class="linenos">613</span></a> <span class="bp">self</span><span class="p">,</span>
-</span><span id="DataFrame-614"><a href="#DataFrame-614"><span class="linenos">614</span></a> <span class="n">value</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="n">ColumnLiterals</span><span class="p">],</span>
-</span><span id="DataFrame-615"><a href="#DataFrame-615"><span class="linenos">615</span></a> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="o">...</span><span class="p">],</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrame-616"><a href="#DataFrame-616"><span class="linenos">616</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame-617"><a href="#DataFrame-617"><span class="linenos">617</span></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
-</span><span id="DataFrame-618"><a href="#DataFrame-618"><span class="linenos">618</span></a><span class="sd"> Functionality Difference: If you provide a value to replace a null and that type conflicts</span>
-</span><span id="DataFrame-619"><a href="#DataFrame-619"><span class="linenos">619</span></a><span class="sd"> with the type of the column then PySpark will just ignore your replacement.</span>
-</span><span id="DataFrame-620"><a href="#DataFrame-620"><span class="linenos">620</span></a><span class="sd"> This will try to cast them to be the same in some cases. So they won&#39;t always match.</span>
-</span><span id="DataFrame-621"><a href="#DataFrame-621"><span class="linenos">621</span></a><span class="sd"> Best to not mix types so make sure replacement is the same type as the column</span>
-</span><span id="DataFrame-622"><a href="#DataFrame-622"><span class="linenos">622</span></a>
-</span><span id="DataFrame-623"><a href="#DataFrame-623"><span class="linenos">623</span></a><span class="sd"> Possibility for improvement: Use `typeof` function to get the type of the column</span>
-</span><span id="DataFrame-624"><a href="#DataFrame-624"><span class="linenos">624</span></a><span class="sd"> and check if it matches the type of the value provided. If not then make it null.</span>
-</span><span id="DataFrame-625"><a href="#DataFrame-625"><span class="linenos">625</span></a><span class="sd"> &quot;&quot;&quot;</span>
-</span><span id="DataFrame-626"><a href="#DataFrame-626"><span class="linenos">626</span></a> <span class="kn">from</span> <span class="nn">sqlglot.dataframe.sql.functions</span> <span class="kn">import</span> <span class="n">lit</span>
-</span><span id="DataFrame-627"><a href="#DataFrame-627"><span class="linenos">627</span></a>
-</span><span id="DataFrame-628"><a href="#DataFrame-628"><span class="linenos">628</span></a> <span class="n">values</span> <span class="o">=</span> <span class="kc">None</span>
-</span><span id="DataFrame-629"><a href="#DataFrame-629"><span class="linenos">629</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="kc">None</span>
-</span><span id="DataFrame-630"><a href="#DataFrame-630"><span class="linenos">630</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
-</span><span id="DataFrame-631"><a href="#DataFrame-631"><span class="linenos">631</span></a> <span class="n">all_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_outer_select_columns</span><span class="p">(</span><span class="n">new_df</span><span class="o">.</span><span class="n">expression</span><span class="p">)</span>
-</span><span id="DataFrame-632"><a href="#DataFrame-632"><span class="linenos">632</span></a> <span class="n">all_column_mapping</span> <span class="o">=</span> <span class="p">{</span><span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">:</span> <span class="n">column</span> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">all_columns</span><span class="p">}</span>
-</span><span id="DataFrame-633"><a href="#DataFrame-633"><span class="linenos">633</span></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span>
-</span><span id="DataFrame-634"><a href="#DataFrame-634"><span class="linenos">634</span></a> <span class="n">values</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">value</span><span class="o">.</span><span class="n">values</span><span class="p">())</span>
-</span><span id="DataFrame-635"><a href="#DataFrame-635"><span class="linenos">635</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="nb">list</span><span class="p">(</span><span class="n">value</span><span class="p">))</span>
-</span><span id="DataFrame-636"><a href="#DataFrame-636"><span class="linenos">636</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">columns</span><span class="p">:</span>
-</span><span id="DataFrame-637"><a href="#DataFrame-637"><span class="linenos">637</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">subset</span><span class="p">)</span> <span class="k">if</span> <span class="n">subset</span> <span class="k">else</span> <span class="n">all_columns</span>
-</span><span id="DataFrame-638"><a href="#DataFrame-638"><span class="linenos">638</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">values</span><span class="p">:</span>
-</span><span id="DataFrame-639"><a href="#DataFrame-639"><span class="linenos">639</span></a> <span class="n">values</span> <span class="o">=</span> <span class="p">[</span><span class="n">value</span><span class="p">]</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span>
-</span><span id="DataFrame-640"><a href="#DataFrame-640"><span class="linenos">640</span></a> <span class="n">value_columns</span> <span class="o">=</span> <span class="p">[</span><span class="n">lit</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> <span class="k">for</span> <span class="n">value</span> <span class="ow">in</span> <span class="n">values</span><span class="p">]</span>
-</span><span id="DataFrame-641"><a href="#DataFrame-641"><span class="linenos">641</span></a>
-</span><span id="DataFrame-642"><a href="#DataFrame-642"><span class="linenos">642</span></a> <span class="n">null_replacement_mapping</span> <span class="o">=</span> <span class="p">{</span>
-</span><span id="DataFrame-643"><a href="#DataFrame-643"><span class="linenos">643</span></a> <span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">:</span> <span class="p">(</span>
-</span><span id="DataFrame-644"><a href="#DataFrame-644"><span class="linenos">644</span></a> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">column</span><span class="o">.</span><span class="n">isNull</span><span class="p">(),</span> <span class="n">value</span><span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">column</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">)</span>
-</span><span id="DataFrame-645"><a href="#DataFrame-645"><span class="linenos">645</span></a> <span class="p">)</span>
-</span><span id="DataFrame-646"><a href="#DataFrame-646"><span class="linenos">646</span></a> <span class="k">for</span> <span class="n">column</span><span class="p">,</span> <span class="n">value</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">columns</span><span class="p">,</span> <span class="n">value_columns</span><span class="p">)</span>
-</span><span id="DataFrame-647"><a href="#DataFrame-647"><span class="linenos">647</span></a> <span class="p">}</span>
-</span><span id="DataFrame-648"><a href="#DataFrame-648"><span class="linenos">648</span></a> <span class="n">null_replacement_mapping</span> <span class="o">=</span> <span class="p">{</span><span class="o">**</span><span class="n">all_column_mapping</span><span class="p">,</span> <span class="o">**</span><span class="n">null_replacement_mapping</span><span class="p">}</span>
-</span><span id="DataFrame-649"><a href="#DataFrame-649"><span class="linenos">649</span></a> <span class="n">null_replacement_columns</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame-650"><a href="#DataFrame-650"><span class="linenos">650</span></a> <span class="n">null_replacement_mapping</span><span class="p">[</span><span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">]</span> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">all_columns</span>
-</span><span id="DataFrame-651"><a href="#DataFrame-651"><span class="linenos">651</span></a> <span class="p">]</span>
-</span><span id="DataFrame-652"><a href="#DataFrame-652"><span class="linenos">652</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="n">new_df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="n">null_replacement_columns</span><span class="p">)</span>
-</span><span id="DataFrame-653"><a href="#DataFrame-653"><span class="linenos">653</span></a> <span class="k">return</span> <span class="n">new_df</span>
-</span><span id="DataFrame-654"><a href="#DataFrame-654"><span class="linenos">654</span></a>
-</span><span id="DataFrame-655"><a href="#DataFrame-655"><span class="linenos">655</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
-</span><span id="DataFrame-656"><a href="#DataFrame-656"><span class="linenos">656</span></a> <span class="k">def</span> <span class="nf">replace</span><span class="p">(</span>
-</span><span id="DataFrame-657"><a href="#DataFrame-657"><span class="linenos">657</span></a> <span class="bp">self</span><span class="p">,</span>
-</span><span id="DataFrame-658"><a href="#DataFrame-658"><span class="linenos">658</span></a> <span class="n">to_replace</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Dict</span><span class="p">],</span>
-</span><span id="DataFrame-659"><a href="#DataFrame-659"><span class="linenos">659</span></a> <span class="n">value</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrame-660"><a href="#DataFrame-660"><span class="linenos">660</span></a> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Collection</span><span class="p">[</span><span class="n">ColumnOrName</span><span class="p">]</span> <span class="o">|</span> <span class="n">ColumnOrName</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrame-661"><a href="#DataFrame-661"><span class="linenos">661</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame-662"><a href="#DataFrame-662"><span class="linenos">662</span></a> <span class="kn">from</span> <span class="nn">sqlglot.dataframe.sql.functions</span> <span class="kn">import</span> <span class="n">lit</span>
-</span><span id="DataFrame-663"><a href="#DataFrame-663"><span class="linenos">663</span></a>
-</span><span id="DataFrame-664"><a href="#DataFrame-664"><span class="linenos">664</span></a> <span class="n">old_values</span> <span class="o">=</span> <span class="kc">None</span>
-</span><span id="DataFrame-665"><a href="#DataFrame-665"><span class="linenos">665</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
-</span><span id="DataFrame-666"><a href="#DataFrame-666"><span class="linenos">666</span></a> <span class="n">all_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_outer_select_columns</span><span class="p">(</span><span class="n">new_df</span><span class="o">.</span><span class="n">expression</span><span class="p">)</span>
-</span><span id="DataFrame-667"><a href="#DataFrame-667"><span class="linenos">667</span></a> <span class="n">all_column_mapping</span> <span class="o">=</span> <span class="p">{</span><span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">:</span> <span class="n">column</span> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">all_columns</span><span class="p">}</span>
-</span><span id="DataFrame-668"><a href="#DataFrame-668"><span class="linenos">668</span></a>
-</span><span id="DataFrame-669"><a href="#DataFrame-669"><span class="linenos">669</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">subset</span><span class="p">)</span> <span class="k">if</span> <span class="n">subset</span> <span class="k">else</span> <span class="n">all_columns</span>
-</span><span id="DataFrame-670"><a href="#DataFrame-670"><span class="linenos">670</span></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">to_replace</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span>
-</span><span id="DataFrame-671"><a href="#DataFrame-671"><span class="linenos">671</span></a> <span class="n">old_values</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">to_replace</span><span class="p">)</span>
-</span><span id="DataFrame-672"><a href="#DataFrame-672"><span class="linenos">672</span></a> <span class="n">new_values</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">to_replace</span><span class="o">.</span><span class="n">values</span><span class="p">())</span>
-</span><span id="DataFrame-673"><a href="#DataFrame-673"><span class="linenos">673</span></a> <span class="k">elif</span> <span class="ow">not</span> <span class="n">old_values</span> <span class="ow">and</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">to_replace</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span>
-</span><span id="DataFrame-674"><a href="#DataFrame-674"><span class="linenos">674</span></a> <span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="nb">list</span><span class="p">),</span> <span class="s2">&quot;value must be a list since the replacements are a list&quot;</span>
-</span><span id="DataFrame-675"><a href="#DataFrame-675"><span class="linenos">675</span></a> <span class="k">assert</span> <span class="nb">len</span><span class="p">(</span><span class="n">to_replace</span><span class="p">)</span> <span class="o">==</span> <span class="nb">len</span><span class="p">(</span>
-</span><span id="DataFrame-676"><a href="#DataFrame-676"><span class="linenos">676</span></a> <span class="n">value</span>
-</span><span id="DataFrame-677"><a href="#DataFrame-677"><span class="linenos">677</span></a> <span class="p">),</span> <span class="s2">&quot;the replacements and values must be the same length&quot;</span>
-</span><span id="DataFrame-678"><a href="#DataFrame-678"><span class="linenos">678</span></a> <span class="n">old_values</span> <span class="o">=</span> <span class="n">to_replace</span>
-</span><span id="DataFrame-679"><a href="#DataFrame-679"><span class="linenos">679</span></a> <span class="n">new_values</span> <span class="o">=</span> <span class="n">value</span>
-</span><span id="DataFrame-680"><a href="#DataFrame-680"><span class="linenos">680</span></a> <span class="k">else</span><span class="p">:</span>
-</span><span id="DataFrame-681"><a href="#DataFrame-681"><span class="linenos">681</span></a> <span class="n">old_values</span> <span class="o">=</span> <span class="p">[</span><span class="n">to_replace</span><span class="p">]</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span>
-</span><span id="DataFrame-682"><a href="#DataFrame-682"><span class="linenos">682</span></a> <span class="n">new_values</span> <span class="o">=</span> <span class="p">[</span><span class="n">value</span><span class="p">]</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span>
-</span><span id="DataFrame-683"><a href="#DataFrame-683"><span class="linenos">683</span></a> <span class="n">old_values</span> <span class="o">=</span> <span class="p">[</span><span class="n">lit</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> <span class="k">for</span> <span class="n">value</span> <span class="ow">in</span> <span class="n">old_values</span><span class="p">]</span>
-</span><span id="DataFrame-684"><a href="#DataFrame-684"><span class="linenos">684</span></a> <span class="n">new_values</span> <span class="o">=</span> <span class="p">[</span><span class="n">lit</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> <span class="k">for</span> <span class="n">value</span> <span class="ow">in</span> <span class="n">new_values</span><span class="p">]</span>
-</span><span id="DataFrame-685"><a href="#DataFrame-685"><span class="linenos">685</span></a>
-</span><span id="DataFrame-686"><a href="#DataFrame-686"><span class="linenos">686</span></a> <span class="n">replacement_mapping</span> <span class="o">=</span> <span class="p">{}</span>
-</span><span id="DataFrame-687"><a href="#DataFrame-687"><span class="linenos">687</span></a> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">columns</span><span class="p">:</span>
-</span><span id="DataFrame-688"><a href="#DataFrame-688"><span class="linenos">688</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="kc">None</span><span class="p">)</span>
-</span><span id="DataFrame-689"><a href="#DataFrame-689"><span class="linenos">689</span></a> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="p">(</span><span class="n">old_value</span><span class="p">,</span> <span class="n">new_value</span><span class="p">)</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="nb">zip</span><span class="p">(</span><span class="n">old_values</span><span class="p">,</span> <span class="n">new_values</span><span class="p">)):</span>
-</span><span id="DataFrame-690"><a href="#DataFrame-690"><span class="linenos">690</span></a> <span class="k">if</span> <span class="n">i</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
-</span><span id="DataFrame-691"><a href="#DataFrame-691"><span class="linenos">691</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">column</span> <span class="o">==</span> <span class="n">old_value</span><span class="p">,</span> <span class="n">new_value</span><span class="p">)</span>
-</span><span id="DataFrame-692"><a href="#DataFrame-692"><span class="linenos">692</span></a> <span class="k">else</span><span class="p">:</span>
-</span><span id="DataFrame-693"><a href="#DataFrame-693"><span class="linenos">693</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="n">expression</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">column</span> <span class="o">==</span> <span class="n">old_value</span><span class="p">,</span> <span class="n">new_value</span><span class="p">)</span> <span class="c1"># type: ignore</span>
-</span><span id="DataFrame-694"><a href="#DataFrame-694"><span class="linenos">694</span></a> <span class="n">replacement_mapping</span><span class="p">[</span><span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">]</span> <span class="o">=</span> <span class="n">expression</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">column</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span>
-</span><span id="DataFrame-695"><a href="#DataFrame-695"><span class="linenos">695</span></a> <span class="n">column</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">alias_or_name</span>
-</span><span id="DataFrame-696"><a href="#DataFrame-696"><span class="linenos">696</span></a> <span class="p">)</span>
-</span><span id="DataFrame-697"><a href="#DataFrame-697"><span class="linenos">697</span></a>
-</span><span id="DataFrame-698"><a href="#DataFrame-698"><span class="linenos">698</span></a> <span class="n">replacement_mapping</span> <span class="o">=</span> <span class="p">{</span><span class="o">**</span><span class="n">all_column_mapping</span><span class="p">,</span> <span class="o">**</span><span class="n">replacement_mapping</span><span class="p">}</span>
-</span><span id="DataFrame-699"><a href="#DataFrame-699"><span class="linenos">699</span></a> <span class="n">replacement_columns</span> <span class="o">=</span> <span class="p">[</span><span class="n">replacement_mapping</span><span class="p">[</span><span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">]</span> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">all_columns</span><span class="p">]</span>
-</span><span id="DataFrame-700"><a href="#DataFrame-700"><span class="linenos">700</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="n">new_df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="n">replacement_columns</span><span class="p">)</span>
-</span><span id="DataFrame-701"><a href="#DataFrame-701"><span class="linenos">701</span></a> <span class="k">return</span> <span class="n">new_df</span>
-</span><span id="DataFrame-702"><a href="#DataFrame-702"><span class="linenos">702</span></a>
-</span><span id="DataFrame-703"><a href="#DataFrame-703"><span class="linenos">703</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">SELECT</span><span class="p">)</span>
-</span><span id="DataFrame-704"><a href="#DataFrame-704"><span class="linenos">704</span></a> <span class="k">def</span> <span class="nf">withColumn</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">colName</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">col</span><span class="p">:</span> <span class="n">Column</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame-705"><a href="#DataFrame-705"><span class="linenos">705</span></a> <span class="n">col</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_col</span><span class="p">(</span><span class="n">col</span><span class="p">)</span>
-</span><span id="DataFrame-706"><a href="#DataFrame-706"><span class="linenos">706</span></a> <span class="n">existing_col_names</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">named_selects</span>
-</span><span id="DataFrame-707"><a href="#DataFrame-707"><span class="linenos">707</span></a> <span class="n">existing_col_index</span> <span class="o">=</span> <span class="p">(</span>
-</span><span id="DataFrame-708"><a href="#DataFrame-708"><span class="linenos">708</span></a> <span class="n">existing_col_names</span><span class="o">.</span><span class="n">index</span><span class="p">(</span><span class="n">colName</span><span class="p">)</span> <span class="k">if</span> <span class="n">colName</span> <span class="ow">in</span> <span class="n">existing_col_names</span> <span class="k">else</span> <span class="kc">None</span>
-</span><span id="DataFrame-709"><a href="#DataFrame-709"><span class="linenos">709</span></a> <span class="p">)</span>
-</span><span id="DataFrame-710"><a href="#DataFrame-710"><span class="linenos">710</span></a> <span class="k">if</span> <span class="n">existing_col_index</span><span class="p">:</span>
-</span><span id="DataFrame-711"><a href="#DataFrame-711"><span class="linenos">711</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
-</span><span id="DataFrame-712"><a href="#DataFrame-712"><span class="linenos">712</span></a> <span class="n">expression</span><span class="o">.</span><span class="n">expressions</span><span class="p">[</span><span class="n">existing_col_index</span><span class="p">]</span> <span class="o">=</span> <span class="n">col</span><span class="o">.</span><span class="n">expression</span>
-</span><span id="DataFrame-713"><a href="#DataFrame-713"><span class="linenos">713</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="n">expression</span><span class="p">)</span>
-</span><span id="DataFrame-714"><a href="#DataFrame-714"><span class="linenos">714</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">col</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">colName</span><span class="p">),</span> <span class="n">append</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
-</span><span id="DataFrame-715"><a href="#DataFrame-715"><span class="linenos">715</span></a>
-</span><span id="DataFrame-716"><a href="#DataFrame-716"><span class="linenos">716</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">SELECT</span><span class="p">)</span>
-</span><span id="DataFrame-717"><a href="#DataFrame-717"><span class="linenos">717</span></a> <span class="k">def</span> <span class="nf">withColumnRenamed</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">existing</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">new</span><span class="p">:</span> <span class="nb">str</span><span class="p">):</span>
-</span><span id="DataFrame-718"><a href="#DataFrame-718"><span class="linenos">718</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
-</span><span id="DataFrame-719"><a href="#DataFrame-719"><span class="linenos">719</span></a> <span class="n">existing_columns</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame-720"><a href="#DataFrame-720"><span class="linenos">720</span></a> <span class="n">expression</span>
-</span><span id="DataFrame-721"><a href="#DataFrame-721"><span class="linenos">721</span></a> <span class="k">for</span> <span class="n">expression</span> <span class="ow">in</span> <span class="n">expression</span><span class="o">.</span><span class="n">expressions</span>
-</span><span id="DataFrame-722"><a href="#DataFrame-722"><span class="linenos">722</span></a> <span class="k">if</span> <span class="n">expression</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="o">==</span> <span class="n">existing</span>
-</span><span id="DataFrame-723"><a href="#DataFrame-723"><span class="linenos">723</span></a> <span class="p">]</span>
-</span><span id="DataFrame-724"><a href="#DataFrame-724"><span class="linenos">724</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">existing_columns</span><span class="p">:</span>
-</span><span id="DataFrame-725"><a href="#DataFrame-725"><span class="linenos">725</span></a> <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;Tried to rename a column that doesn&#39;t exist&quot;</span><span class="p">)</span>
-</span><span id="DataFrame-726"><a href="#DataFrame-726"><span class="linenos">726</span></a> <span class="k">for</span> <span class="n">existing_column</span> <span class="ow">in</span> <span class="n">existing_columns</span><span class="p">:</span>
-</span><span id="DataFrame-727"><a href="#DataFrame-727"><span class="linenos">727</span></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">existing_column</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Column</span><span class="p">):</span>
-</span><span id="DataFrame-728"><a href="#DataFrame-728"><span class="linenos">728</span></a> <span class="n">existing_column</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">alias_</span><span class="p">(</span><span class="n">existing_column</span><span class="o">.</span><span class="n">copy</span><span class="p">(),</span> <span class="n">new</span><span class="p">))</span>
-</span><span id="DataFrame-729"><a href="#DataFrame-729"><span class="linenos">729</span></a> <span class="k">else</span><span class="p">:</span>
-</span><span id="DataFrame-730"><a href="#DataFrame-730"><span class="linenos">730</span></a> <span class="n">existing_column</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">&quot;alias&quot;</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">to_identifier</span><span class="p">(</span><span class="n">new</span><span class="p">))</span>
-</span><span id="DataFrame-731"><a href="#DataFrame-731"><span class="linenos">731</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="n">expression</span><span class="p">)</span>
-</span><span id="DataFrame-732"><a href="#DataFrame-732"><span class="linenos">732</span></a>
-</span><span id="DataFrame-733"><a href="#DataFrame-733"><span class="linenos">733</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">SELECT</span><span class="p">)</span>
-</span><span id="DataFrame-734"><a href="#DataFrame-734"><span class="linenos">734</span></a> <span class="k">def</span> <span class="nf">drop</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Column</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame-735"><a href="#DataFrame-735"><span class="linenos">735</span></a> <span class="n">all_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_outer_select_columns</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="p">)</span>
-</span><span id="DataFrame-736"><a href="#DataFrame-736"><span class="linenos">736</span></a> <span class="n">drop_cols</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">cols</span><span class="p">)</span>
-</span><span id="DataFrame-737"><a href="#DataFrame-737"><span class="linenos">737</span></a> <span class="n">new_columns</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame-738"><a href="#DataFrame-738"><span class="linenos">738</span></a> <span class="n">col</span>
-</span><span id="DataFrame-739"><a href="#DataFrame-739"><span class="linenos">739</span></a> <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="n">all_columns</span>
-</span><span id="DataFrame-740"><a href="#DataFrame-740"><span class="linenos">740</span></a> <span class="k">if</span> <span class="n">col</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="ow">not</span> <span class="ow">in</span> <span class="p">[</span><span class="n">drop_column</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="k">for</span> <span class="n">drop_column</span> <span class="ow">in</span> <span class="n">drop_cols</span><span class="p">]</span>
-</span><span id="DataFrame-741"><a href="#DataFrame-741"><span class="linenos">741</span></a> <span class="p">]</span>
-</span><span id="DataFrame-742"><a href="#DataFrame-742"><span class="linenos">742</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="n">new_columns</span><span class="p">,</span> <span class="n">append</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
-</span><span id="DataFrame-743"><a href="#DataFrame-743"><span class="linenos">743</span></a>
-</span><span id="DataFrame-744"><a href="#DataFrame-744"><span class="linenos">744</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">LIMIT</span><span class="p">)</span>
-</span><span id="DataFrame-745"><a href="#DataFrame-745"><span class="linenos">745</span></a> <span class="k">def</span> <span class="nf">limit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">num</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame-746"><a href="#DataFrame-746"><span class="linenos">746</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">limit</span><span class="p">(</span><span class="n">num</span><span class="p">))</span>
-</span><span id="DataFrame-747"><a href="#DataFrame-747"><span class="linenos">747</span></a>
-</span><span id="DataFrame-748"><a href="#DataFrame-748"><span class="linenos">748</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">NO_OP</span><span class="p">)</span>
-</span><span id="DataFrame-749"><a href="#DataFrame-749"><span class="linenos">749</span></a> <span class="k">def</span> <span class="nf">hint</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="o">*</span><span class="n">parameters</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">int</span><span class="p">]])</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame-750"><a href="#DataFrame-750"><span class="linenos">750</span></a> <span class="n">parameter_list</span> <span class="o">=</span> <span class="n">ensure_list</span><span class="p">(</span><span class="n">parameters</span><span class="p">)</span>
-</span><span id="DataFrame-751"><a href="#DataFrame-751"><span class="linenos">751</span></a> <span class="n">parameter_columns</span> <span class="o">=</span> <span class="p">(</span>
-</span><span id="DataFrame-752"><a href="#DataFrame-752"><span class="linenos">752</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_list_of_columns</span><span class="p">(</span><span class="n">parameter_list</span><span class="p">)</span>
-</span><span id="DataFrame-753"><a href="#DataFrame-753"><span class="linenos">753</span></a> <span class="k">if</span> <span class="n">parameters</span>
-</span><span id="DataFrame-754"><a href="#DataFrame-754"><span class="linenos">754</span></a> <span class="k">else</span> <span class="n">Column</span><span class="o">.</span><span class="n">ensure_cols</span><span class="p">([</span><span class="bp">self</span><span class="o">.</span><span class="n">sequence_id</span><span class="p">])</span>
-</span><span id="DataFrame-755"><a href="#DataFrame-755"><span class="linenos">755</span></a> <span class="p">)</span>
-</span><span id="DataFrame-756"><a href="#DataFrame-756"><span class="linenos">756</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_hint</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">parameter_columns</span><span class="p">)</span>
-</span><span id="DataFrame-757"><a href="#DataFrame-757"><span class="linenos">757</span></a>
-</span><span id="DataFrame-758"><a href="#DataFrame-758"><span class="linenos">758</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">NO_OP</span><span class="p">)</span>
-</span><span id="DataFrame-759"><a href="#DataFrame-759"><span class="linenos">759</span></a> <span class="k">def</span> <span class="nf">repartition</span><span class="p">(</span>
-</span><span id="DataFrame-760"><a href="#DataFrame-760"><span class="linenos">760</span></a> <span class="bp">self</span><span class="p">,</span> <span class="n">numPartitions</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">ColumnOrName</span><span class="p">],</span> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="n">ColumnOrName</span>
-</span><span id="DataFrame-761"><a href="#DataFrame-761"><span class="linenos">761</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame-762"><a href="#DataFrame-762"><span class="linenos">762</span></a> <span class="n">num_partition_cols</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_list_of_columns</span><span class="p">(</span><span class="n">numPartitions</span><span class="p">)</span>
-</span><span id="DataFrame-763"><a href="#DataFrame-763"><span class="linenos">763</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">cols</span><span class="p">)</span>
-</span><span id="DataFrame-764"><a href="#DataFrame-764"><span class="linenos">764</span></a> <span class="n">args</span> <span class="o">=</span> <span class="n">num_partition_cols</span> <span class="o">+</span> <span class="n">columns</span>
-</span><span id="DataFrame-765"><a href="#DataFrame-765"><span class="linenos">765</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_hint</span><span class="p">(</span><span class="s2">&quot;repartition&quot;</span><span class="p">,</span> <span class="n">args</span><span class="p">)</span>
-</span><span id="DataFrame-766"><a href="#DataFrame-766"><span class="linenos">766</span></a>
-</span><span id="DataFrame-767"><a href="#DataFrame-767"><span class="linenos">767</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">NO_OP</span><span class="p">)</span>
-</span><span id="DataFrame-768"><a href="#DataFrame-768"><span class="linenos">768</span></a> <span class="k">def</span> <span class="nf">coalesce</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">numPartitions</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame-769"><a href="#DataFrame-769"><span class="linenos">769</span></a> <span class="n">num_partitions</span> <span class="o">=</span> <span class="n">Column</span><span class="o">.</span><span class="n">ensure_cols</span><span class="p">([</span><span class="n">numPartitions</span><span class="p">])</span>
-</span><span id="DataFrame-770"><a href="#DataFrame-770"><span class="linenos">770</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_hint</span><span class="p">(</span><span class="s2">&quot;coalesce&quot;</span><span class="p">,</span> <span class="n">num_partitions</span><span class="p">)</span>
-</span><span id="DataFrame-771"><a href="#DataFrame-771"><span class="linenos">771</span></a>
-</span><span id="DataFrame-772"><a href="#DataFrame-772"><span class="linenos">772</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">NO_OP</span><span class="p">)</span>
-</span><span id="DataFrame-773"><a href="#DataFrame-773"><span class="linenos">773</span></a> <span class="k">def</span> <span class="nf">cache</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame-774"><a href="#DataFrame-774"><span class="linenos">774</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_cache</span><span class="p">(</span><span class="n">storage_level</span><span class="o">=</span><span class="s2">&quot;MEMORY_AND_DISK&quot;</span><span class="p">)</span>
-</span><span id="DataFrame-775"><a href="#DataFrame-775"><span class="linenos">775</span></a>
-</span><span id="DataFrame-776"><a href="#DataFrame-776"><span class="linenos">776</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">NO_OP</span><span class="p">)</span>
-</span><span id="DataFrame-777"><a href="#DataFrame-777"><span class="linenos">777</span></a> <span class="k">def</span> <span class="nf">persist</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">storageLevel</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;MEMORY_AND_DISK_SER&quot;</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame-778"><a href="#DataFrame-778"><span class="linenos">778</span></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
-</span><span id="DataFrame-779"><a href="#DataFrame-779"><span class="linenos">779</span></a><span class="sd"> Storage Level Options: https://spark.apache.org/docs/3.0.0-preview/sql-ref-syntax-aux-cache-cache-table.html</span>
-</span><span id="DataFrame-780"><a href="#DataFrame-780"><span class="linenos">780</span></a><span class="sd"> &quot;&quot;&quot;</span>
-</span><span id="DataFrame-781"><a href="#DataFrame-781"><span class="linenos">781</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_cache</span><span class="p">(</span><span class="n">storageLevel</span><span class="p">)</span>
+</span><span id="DataFrame-558"><a href="#DataFrame-558"><span class="linenos">558</span></a> <span class="k">def</span> <span class="nf">unionByName</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">,</span> <span class="n">allowMissingColumns</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">):</span>
+</span><span id="DataFrame-559"><a href="#DataFrame-559"><span class="linenos">559</span></a> <span class="n">l_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">columns</span>
+</span><span id="DataFrame-560"><a href="#DataFrame-560"><span class="linenos">560</span></a> <span class="n">r_columns</span> <span class="o">=</span> <span class="n">other</span><span class="o">.</span><span class="n">columns</span>
+</span><span id="DataFrame-561"><a href="#DataFrame-561"><span class="linenos">561</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">allowMissingColumns</span><span class="p">:</span>
+</span><span id="DataFrame-562"><a href="#DataFrame-562"><span class="linenos">562</span></a> <span class="n">l_expressions</span> <span class="o">=</span> <span class="n">l_columns</span>
+</span><span id="DataFrame-563"><a href="#DataFrame-563"><span class="linenos">563</span></a> <span class="n">r_expressions</span> <span class="o">=</span> <span class="n">l_columns</span>
+</span><span id="DataFrame-564"><a href="#DataFrame-564"><span class="linenos">564</span></a> <span class="k">else</span><span class="p">:</span>
+</span><span id="DataFrame-565"><a href="#DataFrame-565"><span class="linenos">565</span></a> <span class="n">l_expressions</span> <span class="o">=</span> <span class="p">[]</span>
+</span><span id="DataFrame-566"><a href="#DataFrame-566"><span class="linenos">566</span></a> <span class="n">r_expressions</span> <span class="o">=</span> <span class="p">[]</span>
+</span><span id="DataFrame-567"><a href="#DataFrame-567"><span class="linenos">567</span></a> <span class="n">r_columns_unused</span> <span class="o">=</span> <span class="n">copy</span><span class="p">(</span><span class="n">r_columns</span><span class="p">)</span>
+</span><span id="DataFrame-568"><a href="#DataFrame-568"><span class="linenos">568</span></a> <span class="k">for</span> <span class="n">l_column</span> <span class="ow">in</span> <span class="n">l_columns</span><span class="p">:</span>
+</span><span id="DataFrame-569"><a href="#DataFrame-569"><span class="linenos">569</span></a> <span class="n">l_expressions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">l_column</span><span class="p">)</span>
+</span><span id="DataFrame-570"><a href="#DataFrame-570"><span class="linenos">570</span></a> <span class="k">if</span> <span class="n">l_column</span> <span class="ow">in</span> <span class="n">r_columns</span><span class="p">:</span>
+</span><span id="DataFrame-571"><a href="#DataFrame-571"><span class="linenos">571</span></a> <span class="n">r_expressions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">l_column</span><span class="p">)</span>
+</span><span id="DataFrame-572"><a href="#DataFrame-572"><span class="linenos">572</span></a> <span class="n">r_columns_unused</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="n">l_column</span><span class="p">)</span>
+</span><span id="DataFrame-573"><a href="#DataFrame-573"><span class="linenos">573</span></a> <span class="k">else</span><span class="p">:</span>
+</span><span id="DataFrame-574"><a href="#DataFrame-574"><span class="linenos">574</span></a> <span class="n">r_expressions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">alias_</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Null</span><span class="p">(),</span> <span class="n">l_column</span><span class="p">))</span>
+</span><span id="DataFrame-575"><a href="#DataFrame-575"><span class="linenos">575</span></a> <span class="k">for</span> <span class="n">r_column</span> <span class="ow">in</span> <span class="n">r_columns_unused</span><span class="p">:</span>
+</span><span id="DataFrame-576"><a href="#DataFrame-576"><span class="linenos">576</span></a> <span class="n">l_expressions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">alias_</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Null</span><span class="p">(),</span> <span class="n">r_column</span><span class="p">))</span>
+</span><span id="DataFrame-577"><a href="#DataFrame-577"><span class="linenos">577</span></a> <span class="n">r_expressions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">r_column</span><span class="p">)</span>
+</span><span id="DataFrame-578"><a href="#DataFrame-578"><span class="linenos">578</span></a> <span class="n">r_df</span> <span class="o">=</span> <span class="p">(</span>
+</span><span id="DataFrame-579"><a href="#DataFrame-579"><span class="linenos">579</span></a> <span class="n">other</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span><span class="o">.</span><span class="n">_convert_leaf_to_cte</span><span class="p">()</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="bp">self</span><span class="o">.</span><span class="n">_ensure_list_of_columns</span><span class="p">(</span><span class="n">r_expressions</span><span class="p">))</span>
+</span><span id="DataFrame-580"><a href="#DataFrame-580"><span class="linenos">580</span></a> <span class="p">)</span>
+</span><span id="DataFrame-581"><a href="#DataFrame-581"><span class="linenos">581</span></a> <span class="n">l_df</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
+</span><span id="DataFrame-582"><a href="#DataFrame-582"><span class="linenos">582</span></a> <span class="k">if</span> <span class="n">allowMissingColumns</span><span class="p">:</span>
+</span><span id="DataFrame-583"><a href="#DataFrame-583"><span class="linenos">583</span></a> <span class="n">l_df</span> <span class="o">=</span> <span class="n">l_df</span><span class="o">.</span><span class="n">_convert_leaf_to_cte</span><span class="p">()</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="bp">self</span><span class="o">.</span><span class="n">_ensure_list_of_columns</span><span class="p">(</span><span class="n">l_expressions</span><span class="p">))</span>
+</span><span id="DataFrame-584"><a href="#DataFrame-584"><span class="linenos">584</span></a> <span class="k">return</span> <span class="n">l_df</span><span class="o">.</span><span class="n">_set_operation</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Union</span><span class="p">,</span> <span class="n">r_df</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span>
+</span><span id="DataFrame-585"><a href="#DataFrame-585"><span class="linenos">585</span></a>
+</span><span id="DataFrame-586"><a href="#DataFrame-586"><span class="linenos">586</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
+</span><span id="DataFrame-587"><a href="#DataFrame-587"><span class="linenos">587</span></a> <span class="k">def</span> <span class="nf">intersect</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame-588"><a href="#DataFrame-588"><span class="linenos">588</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set_operation</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Intersect</span><span class="p">,</span> <span class="n">other</span><span class="p">,</span> <span class="kc">True</span><span class="p">)</span>
+</span><span id="DataFrame-589"><a href="#DataFrame-589"><span class="linenos">589</span></a>
+</span><span id="DataFrame-590"><a href="#DataFrame-590"><span class="linenos">590</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
+</span><span id="DataFrame-591"><a href="#DataFrame-591"><span class="linenos">591</span></a> <span class="k">def</span> <span class="nf">intersectAll</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame-592"><a href="#DataFrame-592"><span class="linenos">592</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set_operation</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Intersect</span><span class="p">,</span> <span class="n">other</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span>
+</span><span id="DataFrame-593"><a href="#DataFrame-593"><span class="linenos">593</span></a>
+</span><span id="DataFrame-594"><a href="#DataFrame-594"><span class="linenos">594</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
+</span><span id="DataFrame-595"><a href="#DataFrame-595"><span class="linenos">595</span></a> <span class="k">def</span> <span class="nf">exceptAll</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame-596"><a href="#DataFrame-596"><span class="linenos">596</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set_operation</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Except</span><span class="p">,</span> <span class="n">other</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span>
+</span><span id="DataFrame-597"><a href="#DataFrame-597"><span class="linenos">597</span></a>
+</span><span id="DataFrame-598"><a href="#DataFrame-598"><span class="linenos">598</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">SELECT</span><span class="p">)</span>
+</span><span id="DataFrame-599"><a href="#DataFrame-599"><span class="linenos">599</span></a> <span class="k">def</span> <span class="nf">distinct</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame-600"><a href="#DataFrame-600"><span class="linenos">600</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">distinct</span><span class="p">())</span>
+</span><span id="DataFrame-601"><a href="#DataFrame-601"><span class="linenos">601</span></a>
+</span><span id="DataFrame-602"><a href="#DataFrame-602"><span class="linenos">602</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">SELECT</span><span class="p">)</span>
+</span><span id="DataFrame-603"><a href="#DataFrame-603"><span class="linenos">603</span></a> <span class="k">def</span> <span class="nf">dropDuplicates</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">):</span>
+</span><span id="DataFrame-604"><a href="#DataFrame-604"><span class="linenos">604</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">subset</span><span class="p">:</span>
+</span><span id="DataFrame-605"><a href="#DataFrame-605"><span class="linenos">605</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">distinct</span><span class="p">()</span>
+</span><span id="DataFrame-606"><a href="#DataFrame-606"><span class="linenos">606</span></a> <span class="n">column_names</span> <span class="o">=</span> <span class="n">ensure_list</span><span class="p">(</span><span class="n">subset</span><span class="p">)</span>
+</span><span id="DataFrame-607"><a href="#DataFrame-607"><span class="linenos">607</span></a> <span class="n">window</span> <span class="o">=</span> <span class="n">Window</span><span class="o">.</span><span class="n">partitionBy</span><span class="p">(</span><span class="o">*</span><span class="n">column_names</span><span class="p">)</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="o">*</span><span class="n">column_names</span><span class="p">)</span>
+</span><span id="DataFrame-608"><a href="#DataFrame-608"><span class="linenos">608</span></a> <span class="k">return</span> <span class="p">(</span>
+</span><span id="DataFrame-609"><a href="#DataFrame-609"><span class="linenos">609</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
+</span><span id="DataFrame-610"><a href="#DataFrame-610"><span class="linenos">610</span></a> <span class="o">.</span><span class="n">withColumn</span><span class="p">(</span><span class="s2">&quot;row_num&quot;</span><span class="p">,</span> <span class="n">F</span><span class="o">.</span><span class="n">row_number</span><span class="p">()</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="n">window</span><span class="p">))</span>
+</span><span id="DataFrame-611"><a href="#DataFrame-611"><span class="linenos">611</span></a> <span class="o">.</span><span class="n">where</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="s2">&quot;row_num&quot;</span><span class="p">)</span> <span class="o">==</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="mi">1</span><span class="p">))</span>
+</span><span id="DataFrame-612"><a href="#DataFrame-612"><span class="linenos">612</span></a> <span class="o">.</span><span class="n">drop</span><span class="p">(</span><span class="s2">&quot;row_num&quot;</span><span class="p">)</span>
+</span><span id="DataFrame-613"><a href="#DataFrame-613"><span class="linenos">613</span></a> <span class="p">)</span>
+</span><span id="DataFrame-614"><a href="#DataFrame-614"><span class="linenos">614</span></a>
+</span><span id="DataFrame-615"><a href="#DataFrame-615"><span class="linenos">615</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
+</span><span id="DataFrame-616"><a href="#DataFrame-616"><span class="linenos">616</span></a> <span class="k">def</span> <span class="nf">dropna</span><span class="p">(</span>
+</span><span id="DataFrame-617"><a href="#DataFrame-617"><span class="linenos">617</span></a> <span class="bp">self</span><span class="p">,</span>
+</span><span id="DataFrame-618"><a href="#DataFrame-618"><span class="linenos">618</span></a> <span class="n">how</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;any&quot;</span><span class="p">,</span>
+</span><span id="DataFrame-619"><a href="#DataFrame-619"><span class="linenos">619</span></a> <span class="n">thresh</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrame-620"><a href="#DataFrame-620"><span class="linenos">620</span></a> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="o">...</span><span class="p">],</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrame-621"><a href="#DataFrame-621"><span class="linenos">621</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame-622"><a href="#DataFrame-622"><span class="linenos">622</span></a> <span class="n">minimum_non_null</span> <span class="o">=</span> <span class="n">thresh</span> <span class="ow">or</span> <span class="mi">0</span> <span class="c1"># will be determined later if thresh is null</span>
+</span><span id="DataFrame-623"><a href="#DataFrame-623"><span class="linenos">623</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
+</span><span id="DataFrame-624"><a href="#DataFrame-624"><span class="linenos">624</span></a> <span class="n">all_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_outer_select_columns</span><span class="p">(</span><span class="n">new_df</span><span class="o">.</span><span class="n">expression</span><span class="p">)</span>
+</span><span id="DataFrame-625"><a href="#DataFrame-625"><span class="linenos">625</span></a> <span class="k">if</span> <span class="n">subset</span><span class="p">:</span>
+</span><span id="DataFrame-626"><a href="#DataFrame-626"><span class="linenos">626</span></a> <span class="n">null_check_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">subset</span><span class="p">)</span>
+</span><span id="DataFrame-627"><a href="#DataFrame-627"><span class="linenos">627</span></a> <span class="k">else</span><span class="p">:</span>
+</span><span id="DataFrame-628"><a href="#DataFrame-628"><span class="linenos">628</span></a> <span class="n">null_check_columns</span> <span class="o">=</span> <span class="n">all_columns</span>
+</span><span id="DataFrame-629"><a href="#DataFrame-629"><span class="linenos">629</span></a> <span class="k">if</span> <span class="n">thresh</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+</span><span id="DataFrame-630"><a href="#DataFrame-630"><span class="linenos">630</span></a> <span class="n">minimum_num_nulls</span> <span class="o">=</span> <span class="mi">1</span> <span class="k">if</span> <span class="n">how</span> <span class="o">==</span> <span class="s2">&quot;any&quot;</span> <span class="k">else</span> <span class="nb">len</span><span class="p">(</span><span class="n">null_check_columns</span><span class="p">)</span>
+</span><span id="DataFrame-631"><a href="#DataFrame-631"><span class="linenos">631</span></a> <span class="k">else</span><span class="p">:</span>
+</span><span id="DataFrame-632"><a href="#DataFrame-632"><span class="linenos">632</span></a> <span class="n">minimum_num_nulls</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">null_check_columns</span><span class="p">)</span> <span class="o">-</span> <span class="n">minimum_non_null</span> <span class="o">+</span> <span class="mi">1</span>
+</span><span id="DataFrame-633"><a href="#DataFrame-633"><span class="linenos">633</span></a> <span class="k">if</span> <span class="n">minimum_num_nulls</span> <span class="o">&gt;</span> <span class="nb">len</span><span class="p">(</span><span class="n">null_check_columns</span><span class="p">):</span>
+</span><span id="DataFrame-634"><a href="#DataFrame-634"><span class="linenos">634</span></a> <span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span>
+</span><span id="DataFrame-635"><a href="#DataFrame-635"><span class="linenos">635</span></a> <span class="sa">f</span><span class="s2">&quot;The minimum num nulls for dropna must be less than or equal to the number of columns. &quot;</span>
+</span><span id="DataFrame-636"><a href="#DataFrame-636"><span class="linenos">636</span></a> <span class="sa">f</span><span class="s2">&quot;Minimum num nulls: </span><span class="si">{</span><span class="n">minimum_num_nulls</span><span class="si">}</span><span class="s2">, Num Columns: </span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="n">null_check_columns</span><span class="p">)</span><span class="si">}</span><span class="s2">&quot;</span>
+</span><span id="DataFrame-637"><a href="#DataFrame-637"><span class="linenos">637</span></a> <span class="p">)</span>
+</span><span id="DataFrame-638"><a href="#DataFrame-638"><span class="linenos">638</span></a> <span class="n">if_null_checks</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame-639"><a href="#DataFrame-639"><span class="linenos">639</span></a> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">column</span><span class="o">.</span><span class="n">isNull</span><span class="p">(),</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="mi">1</span><span class="p">))</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="mi">0</span><span class="p">))</span> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">null_check_columns</span>
+</span><span id="DataFrame-640"><a href="#DataFrame-640"><span class="linenos">640</span></a> <span class="p">]</span>
+</span><span id="DataFrame-641"><a href="#DataFrame-641"><span class="linenos">641</span></a> <span class="n">nulls_added_together</span> <span class="o">=</span> <span class="n">functools</span><span class="o">.</span><span class="n">reduce</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">:</span> <span class="n">x</span> <span class="o">+</span> <span class="n">y</span><span class="p">,</span> <span class="n">if_null_checks</span><span class="p">)</span>
+</span><span id="DataFrame-642"><a href="#DataFrame-642"><span class="linenos">642</span></a> <span class="n">num_nulls</span> <span class="o">=</span> <span class="n">nulls_added_together</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">&quot;num_nulls&quot;</span><span class="p">)</span>
+</span><span id="DataFrame-643"><a href="#DataFrame-643"><span class="linenos">643</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="n">new_df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">num_nulls</span><span class="p">,</span> <span class="n">append</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
+</span><span id="DataFrame-644"><a href="#DataFrame-644"><span class="linenos">644</span></a> <span class="n">filtered_df</span> <span class="o">=</span> <span class="n">new_df</span><span class="o">.</span><span class="n">where</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="s2">&quot;num_nulls&quot;</span><span class="p">)</span> <span class="o">&lt;</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="n">minimum_num_nulls</span><span class="p">))</span>
+</span><span id="DataFrame-645"><a href="#DataFrame-645"><span class="linenos">645</span></a> <span class="n">final_df</span> <span class="o">=</span> <span class="n">filtered_df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="n">all_columns</span><span class="p">)</span>
+</span><span id="DataFrame-646"><a href="#DataFrame-646"><span class="linenos">646</span></a> <span class="k">return</span> <span class="n">final_df</span>
+</span><span id="DataFrame-647"><a href="#DataFrame-647"><span class="linenos">647</span></a>
+</span><span id="DataFrame-648"><a href="#DataFrame-648"><span class="linenos">648</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
+</span><span id="DataFrame-649"><a href="#DataFrame-649"><span class="linenos">649</span></a> <span class="k">def</span> <span class="nf">fillna</span><span class="p">(</span>
+</span><span id="DataFrame-650"><a href="#DataFrame-650"><span class="linenos">650</span></a> <span class="bp">self</span><span class="p">,</span>
+</span><span id="DataFrame-651"><a href="#DataFrame-651"><span class="linenos">651</span></a> <span class="n">value</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="n">ColumnLiterals</span><span class="p">],</span>
+</span><span id="DataFrame-652"><a href="#DataFrame-652"><span class="linenos">652</span></a> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="o">...</span><span class="p">],</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrame-653"><a href="#DataFrame-653"><span class="linenos">653</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame-654"><a href="#DataFrame-654"><span class="linenos">654</span></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
+</span><span id="DataFrame-655"><a href="#DataFrame-655"><span class="linenos">655</span></a><span class="sd"> Functionality Difference: If you provide a value to replace a null and that type conflicts</span>
+</span><span id="DataFrame-656"><a href="#DataFrame-656"><span class="linenos">656</span></a><span class="sd"> with the type of the column then PySpark will just ignore your replacement.</span>
+</span><span id="DataFrame-657"><a href="#DataFrame-657"><span class="linenos">657</span></a><span class="sd"> This will try to cast them to be the same in some cases. So they won&#39;t always match.</span>
+</span><span id="DataFrame-658"><a href="#DataFrame-658"><span class="linenos">658</span></a><span class="sd"> Best to not mix types so make sure replacement is the same type as the column</span>
+</span><span id="DataFrame-659"><a href="#DataFrame-659"><span class="linenos">659</span></a>
+</span><span id="DataFrame-660"><a href="#DataFrame-660"><span class="linenos">660</span></a><span class="sd"> Possibility for improvement: Use `typeof` function to get the type of the column</span>
+</span><span id="DataFrame-661"><a href="#DataFrame-661"><span class="linenos">661</span></a><span class="sd"> and check if it matches the type of the value provided. If not then make it null.</span>
+</span><span id="DataFrame-662"><a href="#DataFrame-662"><span class="linenos">662</span></a><span class="sd"> &quot;&quot;&quot;</span>
+</span><span id="DataFrame-663"><a href="#DataFrame-663"><span class="linenos">663</span></a> <span class="kn">from</span> <span class="nn">sqlglot.dataframe.sql.functions</span> <span class="kn">import</span> <span class="n">lit</span>
+</span><span id="DataFrame-664"><a href="#DataFrame-664"><span class="linenos">664</span></a>
+</span><span id="DataFrame-665"><a href="#DataFrame-665"><span class="linenos">665</span></a> <span class="n">values</span> <span class="o">=</span> <span class="kc">None</span>
+</span><span id="DataFrame-666"><a href="#DataFrame-666"><span class="linenos">666</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="kc">None</span>
+</span><span id="DataFrame-667"><a href="#DataFrame-667"><span class="linenos">667</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
+</span><span id="DataFrame-668"><a href="#DataFrame-668"><span class="linenos">668</span></a> <span class="n">all_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_outer_select_columns</span><span class="p">(</span><span class="n">new_df</span><span class="o">.</span><span class="n">expression</span><span class="p">)</span>
+</span><span id="DataFrame-669"><a href="#DataFrame-669"><span class="linenos">669</span></a> <span class="n">all_column_mapping</span> <span class="o">=</span> <span class="p">{</span><span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">:</span> <span class="n">column</span> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">all_columns</span><span class="p">}</span>
+</span><span id="DataFrame-670"><a href="#DataFrame-670"><span class="linenos">670</span></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span>
+</span><span id="DataFrame-671"><a href="#DataFrame-671"><span class="linenos">671</span></a> <span class="n">values</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">value</span><span class="o">.</span><span class="n">values</span><span class="p">())</span>
+</span><span id="DataFrame-672"><a href="#DataFrame-672"><span class="linenos">672</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="nb">list</span><span class="p">(</span><span class="n">value</span><span class="p">))</span>
+</span><span id="DataFrame-673"><a href="#DataFrame-673"><span class="linenos">673</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">columns</span><span class="p">:</span>
+</span><span id="DataFrame-674"><a href="#DataFrame-674"><span class="linenos">674</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">subset</span><span class="p">)</span> <span class="k">if</span> <span class="n">subset</span> <span class="k">else</span> <span class="n">all_columns</span>
+</span><span id="DataFrame-675"><a href="#DataFrame-675"><span class="linenos">675</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">values</span><span class="p">:</span>
+</span><span id="DataFrame-676"><a href="#DataFrame-676"><span class="linenos">676</span></a> <span class="n">values</span> <span class="o">=</span> <span class="p">[</span><span class="n">value</span><span class="p">]</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span>
+</span><span id="DataFrame-677"><a href="#DataFrame-677"><span class="linenos">677</span></a> <span class="n">value_columns</span> <span class="o">=</span> <span class="p">[</span><span class="n">lit</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> <span class="k">for</span> <span class="n">value</span> <span class="ow">in</span> <span class="n">values</span><span class="p">]</span>
+</span><span id="DataFrame-678"><a href="#DataFrame-678"><span class="linenos">678</span></a>
+</span><span id="DataFrame-679"><a href="#DataFrame-679"><span class="linenos">679</span></a> <span class="n">null_replacement_mapping</span> <span class="o">=</span> <span class="p">{</span>
+</span><span id="DataFrame-680"><a href="#DataFrame-680"><span class="linenos">680</span></a> <span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">:</span> <span class="p">(</span>
+</span><span id="DataFrame-681"><a href="#DataFrame-681"><span class="linenos">681</span></a> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">column</span><span class="o">.</span><span class="n">isNull</span><span class="p">(),</span> <span class="n">value</span><span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">column</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">)</span>
+</span><span id="DataFrame-682"><a href="#DataFrame-682"><span class="linenos">682</span></a> <span class="p">)</span>
+</span><span id="DataFrame-683"><a href="#DataFrame-683"><span class="linenos">683</span></a> <span class="k">for</span> <span class="n">column</span><span class="p">,</span> <span class="n">value</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">columns</span><span class="p">,</span> <span class="n">value_columns</span><span class="p">)</span>
+</span><span id="DataFrame-684"><a href="#DataFrame-684"><span class="linenos">684</span></a> <span class="p">}</span>
+</span><span id="DataFrame-685"><a href="#DataFrame-685"><span class="linenos">685</span></a> <span class="n">null_replacement_mapping</span> <span class="o">=</span> <span class="p">{</span><span class="o">**</span><span class="n">all_column_mapping</span><span class="p">,</span> <span class="o">**</span><span class="n">null_replacement_mapping</span><span class="p">}</span>
+</span><span id="DataFrame-686"><a href="#DataFrame-686"><span class="linenos">686</span></a> <span class="n">null_replacement_columns</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame-687"><a href="#DataFrame-687"><span class="linenos">687</span></a> <span class="n">null_replacement_mapping</span><span class="p">[</span><span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">]</span> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">all_columns</span>
+</span><span id="DataFrame-688"><a href="#DataFrame-688"><span class="linenos">688</span></a> <span class="p">]</span>
+</span><span id="DataFrame-689"><a href="#DataFrame-689"><span class="linenos">689</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="n">new_df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="n">null_replacement_columns</span><span class="p">)</span>
+</span><span id="DataFrame-690"><a href="#DataFrame-690"><span class="linenos">690</span></a> <span class="k">return</span> <span class="n">new_df</span>
+</span><span id="DataFrame-691"><a href="#DataFrame-691"><span class="linenos">691</span></a>
+</span><span id="DataFrame-692"><a href="#DataFrame-692"><span class="linenos">692</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
+</span><span id="DataFrame-693"><a href="#DataFrame-693"><span class="linenos">693</span></a> <span class="k">def</span> <span class="nf">replace</span><span class="p">(</span>
+</span><span id="DataFrame-694"><a href="#DataFrame-694"><span class="linenos">694</span></a> <span class="bp">self</span><span class="p">,</span>
+</span><span id="DataFrame-695"><a href="#DataFrame-695"><span class="linenos">695</span></a> <span class="n">to_replace</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Dict</span><span class="p">],</span>
+</span><span id="DataFrame-696"><a href="#DataFrame-696"><span class="linenos">696</span></a> <span class="n">value</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrame-697"><a href="#DataFrame-697"><span class="linenos">697</span></a> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Collection</span><span class="p">[</span><span class="n">ColumnOrName</span><span class="p">]</span> <span class="o">|</span> <span class="n">ColumnOrName</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrame-698"><a href="#DataFrame-698"><span class="linenos">698</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame-699"><a href="#DataFrame-699"><span class="linenos">699</span></a> <span class="kn">from</span> <span class="nn">sqlglot.dataframe.sql.functions</span> <span class="kn">import</span> <span class="n">lit</span>
+</span><span id="DataFrame-700"><a href="#DataFrame-700"><span class="linenos">700</span></a>
+</span><span id="DataFrame-701"><a href="#DataFrame-701"><span class="linenos">701</span></a> <span class="n">old_values</span> <span class="o">=</span> <span class="kc">None</span>
+</span><span id="DataFrame-702"><a href="#DataFrame-702"><span class="linenos">702</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
+</span><span id="DataFrame-703"><a href="#DataFrame-703"><span class="linenos">703</span></a> <span class="n">all_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_outer_select_columns</span><span class="p">(</span><span class="n">new_df</span><span class="o">.</span><span class="n">expression</span><span class="p">)</span>
+</span><span id="DataFrame-704"><a href="#DataFrame-704"><span class="linenos">704</span></a> <span class="n">all_column_mapping</span> <span class="o">=</span> <span class="p">{</span><span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">:</span> <span class="n">column</span> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">all_columns</span><span class="p">}</span>
+</span><span id="DataFrame-705"><a href="#DataFrame-705"><span class="linenos">705</span></a>
+</span><span id="DataFrame-706"><a href="#DataFrame-706"><span class="linenos">706</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">subset</span><span class="p">)</span> <span class="k">if</span> <span class="n">subset</span> <span class="k">else</span> <span class="n">all_columns</span>
+</span><span id="DataFrame-707"><a href="#DataFrame-707"><span class="linenos">707</span></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">to_replace</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span>
+</span><span id="DataFrame-708"><a href="#DataFrame-708"><span class="linenos">708</span></a> <span class="n">old_values</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">to_replace</span><span class="p">)</span>
+</span><span id="DataFrame-709"><a href="#DataFrame-709"><span class="linenos">709</span></a> <span class="n">new_values</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">to_replace</span><span class="o">.</span><span class="n">values</span><span class="p">())</span>
+</span><span id="DataFrame-710"><a href="#DataFrame-710"><span class="linenos">710</span></a> <span class="k">elif</span> <span class="ow">not</span> <span class="n">old_values</span> <span class="ow">and</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">to_replace</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span>
+</span><span id="DataFrame-711"><a href="#DataFrame-711"><span class="linenos">711</span></a> <span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="nb">list</span><span class="p">),</span> <span class="s2">&quot;value must be a list since the replacements are a list&quot;</span>
+</span><span id="DataFrame-712"><a href="#DataFrame-712"><span class="linenos">712</span></a> <span class="k">assert</span> <span class="nb">len</span><span class="p">(</span><span class="n">to_replace</span><span class="p">)</span> <span class="o">==</span> <span class="nb">len</span><span class="p">(</span>
+</span><span id="DataFrame-713"><a href="#DataFrame-713"><span class="linenos">713</span></a> <span class="n">value</span>
+</span><span id="DataFrame-714"><a href="#DataFrame-714"><span class="linenos">714</span></a> <span class="p">),</span> <span class="s2">&quot;the replacements and values must be the same length&quot;</span>
+</span><span id="DataFrame-715"><a href="#DataFrame-715"><span class="linenos">715</span></a> <span class="n">old_values</span> <span class="o">=</span> <span class="n">to_replace</span>
+</span><span id="DataFrame-716"><a href="#DataFrame-716"><span class="linenos">716</span></a> <span class="n">new_values</span> <span class="o">=</span> <span class="n">value</span>
+</span><span id="DataFrame-717"><a href="#DataFrame-717"><span class="linenos">717</span></a> <span class="k">else</span><span class="p">:</span>
+</span><span id="DataFrame-718"><a href="#DataFrame-718"><span class="linenos">718</span></a> <span class="n">old_values</span> <span class="o">=</span> <span class="p">[</span><span class="n">to_replace</span><span class="p">]</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span>
+</span><span id="DataFrame-719"><a href="#DataFrame-719"><span class="linenos">719</span></a> <span class="n">new_values</span> <span class="o">=</span> <span class="p">[</span><span class="n">value</span><span class="p">]</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span>
+</span><span id="DataFrame-720"><a href="#DataFrame-720"><span class="linenos">720</span></a> <span class="n">old_values</span> <span class="o">=</span> <span class="p">[</span><span class="n">lit</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> <span class="k">for</span> <span class="n">value</span> <span class="ow">in</span> <span class="n">old_values</span><span class="p">]</span>
+</span><span id="DataFrame-721"><a href="#DataFrame-721"><span class="linenos">721</span></a> <span class="n">new_values</span> <span class="o">=</span> <span class="p">[</span><span class="n">lit</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> <span class="k">for</span> <span class="n">value</span> <span class="ow">in</span> <span class="n">new_values</span><span class="p">]</span>
+</span><span id="DataFrame-722"><a href="#DataFrame-722"><span class="linenos">722</span></a>
+</span><span id="DataFrame-723"><a href="#DataFrame-723"><span class="linenos">723</span></a> <span class="n">replacement_mapping</span> <span class="o">=</span> <span class="p">{}</span>
+</span><span id="DataFrame-724"><a href="#DataFrame-724"><span class="linenos">724</span></a> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">columns</span><span class="p">:</span>
+</span><span id="DataFrame-725"><a href="#DataFrame-725"><span class="linenos">725</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="kc">None</span><span class="p">)</span>
+</span><span id="DataFrame-726"><a href="#DataFrame-726"><span class="linenos">726</span></a> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="p">(</span><span class="n">old_value</span><span class="p">,</span> <span class="n">new_value</span><span class="p">)</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="nb">zip</span><span class="p">(</span><span class="n">old_values</span><span class="p">,</span> <span class="n">new_values</span><span class="p">)):</span>
+</span><span id="DataFrame-727"><a href="#DataFrame-727"><span class="linenos">727</span></a> <span class="k">if</span> <span class="n">i</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
+</span><span id="DataFrame-728"><a href="#DataFrame-728"><span class="linenos">728</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">column</span> <span class="o">==</span> <span class="n">old_value</span><span class="p">,</span> <span class="n">new_value</span><span class="p">)</span>
+</span><span id="DataFrame-729"><a href="#DataFrame-729"><span class="linenos">729</span></a> <span class="k">else</span><span class="p">:</span>
+</span><span id="DataFrame-730"><a href="#DataFrame-730"><span class="linenos">730</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="n">expression</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">column</span> <span class="o">==</span> <span class="n">old_value</span><span class="p">,</span> <span class="n">new_value</span><span class="p">)</span> <span class="c1"># type: ignore</span>
+</span><span id="DataFrame-731"><a href="#DataFrame-731"><span class="linenos">731</span></a> <span class="n">replacement_mapping</span><span class="p">[</span><span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">]</span> <span class="o">=</span> <span class="n">expression</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">column</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span>
+</span><span id="DataFrame-732"><a href="#DataFrame-732"><span class="linenos">732</span></a> <span class="n">column</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">alias_or_name</span>
+</span><span id="DataFrame-733"><a href="#DataFrame-733"><span class="linenos">733</span></a> <span class="p">)</span>
+</span><span id="DataFrame-734"><a href="#DataFrame-734"><span class="linenos">734</span></a>
+</span><span id="DataFrame-735"><a href="#DataFrame-735"><span class="linenos">735</span></a> <span class="n">replacement_mapping</span> <span class="o">=</span> <span class="p">{</span><span class="o">**</span><span class="n">all_column_mapping</span><span class="p">,</span> <span class="o">**</span><span class="n">replacement_mapping</span><span class="p">}</span>
+</span><span id="DataFrame-736"><a href="#DataFrame-736"><span class="linenos">736</span></a> <span class="n">replacement_columns</span> <span class="o">=</span> <span class="p">[</span><span class="n">replacement_mapping</span><span class="p">[</span><span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">]</span> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">all_columns</span><span class="p">]</span>
+</span><span id="DataFrame-737"><a href="#DataFrame-737"><span class="linenos">737</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="n">new_df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="n">replacement_columns</span><span class="p">)</span>
+</span><span id="DataFrame-738"><a href="#DataFrame-738"><span class="linenos">738</span></a> <span class="k">return</span> <span class="n">new_df</span>
+</span><span id="DataFrame-739"><a href="#DataFrame-739"><span class="linenos">739</span></a>
+</span><span id="DataFrame-740"><a href="#DataFrame-740"><span class="linenos">740</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">SELECT</span><span class="p">)</span>
+</span><span id="DataFrame-741"><a href="#DataFrame-741"><span class="linenos">741</span></a> <span class="k">def</span> <span class="nf">withColumn</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">colName</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">col</span><span class="p">:</span> <span class="n">Column</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame-742"><a href="#DataFrame-742"><span class="linenos">742</span></a> <span class="n">col</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_col</span><span class="p">(</span><span class="n">col</span><span class="p">)</span>
+</span><span id="DataFrame-743"><a href="#DataFrame-743"><span class="linenos">743</span></a> <span class="n">existing_col_names</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">named_selects</span>
+</span><span id="DataFrame-744"><a href="#DataFrame-744"><span class="linenos">744</span></a> <span class="n">existing_col_index</span> <span class="o">=</span> <span class="p">(</span>
+</span><span id="DataFrame-745"><a href="#DataFrame-745"><span class="linenos">745</span></a> <span class="n">existing_col_names</span><span class="o">.</span><span class="n">index</span><span class="p">(</span><span class="n">colName</span><span class="p">)</span> <span class="k">if</span> <span class="n">colName</span> <span class="ow">in</span> <span class="n">existing_col_names</span> <span class="k">else</span> <span class="kc">None</span>
+</span><span id="DataFrame-746"><a href="#DataFrame-746"><span class="linenos">746</span></a> <span class="p">)</span>
+</span><span id="DataFrame-747"><a href="#DataFrame-747"><span class="linenos">747</span></a> <span class="k">if</span> <span class="n">existing_col_index</span><span class="p">:</span>
+</span><span id="DataFrame-748"><a href="#DataFrame-748"><span class="linenos">748</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
+</span><span id="DataFrame-749"><a href="#DataFrame-749"><span class="linenos">749</span></a> <span class="n">expression</span><span class="o">.</span><span class="n">expressions</span><span class="p">[</span><span class="n">existing_col_index</span><span class="p">]</span> <span class="o">=</span> <span class="n">col</span><span class="o">.</span><span class="n">expression</span>
+</span><span id="DataFrame-750"><a href="#DataFrame-750"><span class="linenos">750</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="n">expression</span><span class="p">)</span>
+</span><span id="DataFrame-751"><a href="#DataFrame-751"><span class="linenos">751</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">col</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">colName</span><span class="p">),</span> <span class="n">append</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
+</span><span id="DataFrame-752"><a href="#DataFrame-752"><span class="linenos">752</span></a>
+</span><span id="DataFrame-753"><a href="#DataFrame-753"><span class="linenos">753</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">SELECT</span><span class="p">)</span>
+</span><span id="DataFrame-754"><a href="#DataFrame-754"><span class="linenos">754</span></a> <span class="k">def</span> <span class="nf">withColumnRenamed</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">existing</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">new</span><span class="p">:</span> <span class="nb">str</span><span class="p">):</span>
+</span><span id="DataFrame-755"><a href="#DataFrame-755"><span class="linenos">755</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
+</span><span id="DataFrame-756"><a href="#DataFrame-756"><span class="linenos">756</span></a> <span class="n">existing_columns</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame-757"><a href="#DataFrame-757"><span class="linenos">757</span></a> <span class="n">expression</span>
+</span><span id="DataFrame-758"><a href="#DataFrame-758"><span class="linenos">758</span></a> <span class="k">for</span> <span class="n">expression</span> <span class="ow">in</span> <span class="n">expression</span><span class="o">.</span><span class="n">expressions</span>
+</span><span id="DataFrame-759"><a href="#DataFrame-759"><span class="linenos">759</span></a> <span class="k">if</span> <span class="n">expression</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="o">==</span> <span class="n">existing</span>
+</span><span id="DataFrame-760"><a href="#DataFrame-760"><span class="linenos">760</span></a> <span class="p">]</span>
+</span><span id="DataFrame-761"><a href="#DataFrame-761"><span class="linenos">761</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">existing_columns</span><span class="p">:</span>
+</span><span id="DataFrame-762"><a href="#DataFrame-762"><span class="linenos">762</span></a> <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;Tried to rename a column that doesn&#39;t exist&quot;</span><span class="p">)</span>
+</span><span id="DataFrame-763"><a href="#DataFrame-763"><span class="linenos">763</span></a> <span class="k">for</span> <span class="n">existing_column</span> <span class="ow">in</span> <span class="n">existing_columns</span><span class="p">:</span>
+</span><span id="DataFrame-764"><a href="#DataFrame-764"><span class="linenos">764</span></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">existing_column</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Column</span><span class="p">):</span>
+</span><span id="DataFrame-765"><a href="#DataFrame-765"><span class="linenos">765</span></a> <span class="n">existing_column</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">alias_</span><span class="p">(</span><span class="n">existing_column</span><span class="o">.</span><span class="n">copy</span><span class="p">(),</span> <span class="n">new</span><span class="p">))</span>
+</span><span id="DataFrame-766"><a href="#DataFrame-766"><span class="linenos">766</span></a> <span class="k">else</span><span class="p">:</span>
+</span><span id="DataFrame-767"><a href="#DataFrame-767"><span class="linenos">767</span></a> <span class="n">existing_column</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">&quot;alias&quot;</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">to_identifier</span><span class="p">(</span><span class="n">new</span><span class="p">))</span>
+</span><span id="DataFrame-768"><a href="#DataFrame-768"><span class="linenos">768</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="n">expression</span><span class="p">)</span>
+</span><span id="DataFrame-769"><a href="#DataFrame-769"><span class="linenos">769</span></a>
+</span><span id="DataFrame-770"><a href="#DataFrame-770"><span class="linenos">770</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">SELECT</span><span class="p">)</span>
+</span><span id="DataFrame-771"><a href="#DataFrame-771"><span class="linenos">771</span></a> <span class="k">def</span> <span class="nf">drop</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Column</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame-772"><a href="#DataFrame-772"><span class="linenos">772</span></a> <span class="n">all_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_outer_select_columns</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="p">)</span>
+</span><span id="DataFrame-773"><a href="#DataFrame-773"><span class="linenos">773</span></a> <span class="n">drop_cols</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">cols</span><span class="p">)</span>
+</span><span id="DataFrame-774"><a href="#DataFrame-774"><span class="linenos">774</span></a> <span class="n">new_columns</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame-775"><a href="#DataFrame-775"><span class="linenos">775</span></a> <span class="n">col</span>
+</span><span id="DataFrame-776"><a href="#DataFrame-776"><span class="linenos">776</span></a> <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="n">all_columns</span>
+</span><span id="DataFrame-777"><a href="#DataFrame-777"><span class="linenos">777</span></a> <span class="k">if</span> <span class="n">col</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="ow">not</span> <span class="ow">in</span> <span class="p">[</span><span class="n">drop_column</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="k">for</span> <span class="n">drop_column</span> <span class="ow">in</span> <span class="n">drop_cols</span><span class="p">]</span>
+</span><span id="DataFrame-778"><a href="#DataFrame-778"><span class="linenos">778</span></a> <span class="p">]</span>
+</span><span id="DataFrame-779"><a href="#DataFrame-779"><span class="linenos">779</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="n">new_columns</span><span class="p">,</span> <span class="n">append</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
+</span><span id="DataFrame-780"><a href="#DataFrame-780"><span class="linenos">780</span></a>
+</span><span id="DataFrame-781"><a href="#DataFrame-781"><span class="linenos">781</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">LIMIT</span><span class="p">)</span>
+</span><span id="DataFrame-782"><a href="#DataFrame-782"><span class="linenos">782</span></a> <span class="k">def</span> <span class="nf">limit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">num</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame-783"><a href="#DataFrame-783"><span class="linenos">783</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">limit</span><span class="p">(</span><span class="n">num</span><span class="p">))</span>
+</span><span id="DataFrame-784"><a href="#DataFrame-784"><span class="linenos">784</span></a>
+</span><span id="DataFrame-785"><a href="#DataFrame-785"><span class="linenos">785</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">NO_OP</span><span class="p">)</span>
+</span><span id="DataFrame-786"><a href="#DataFrame-786"><span class="linenos">786</span></a> <span class="k">def</span> <span class="nf">hint</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="o">*</span><span class="n">parameters</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">int</span><span class="p">]])</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame-787"><a href="#DataFrame-787"><span class="linenos">787</span></a> <span class="n">parameter_list</span> <span class="o">=</span> <span class="n">ensure_list</span><span class="p">(</span><span class="n">parameters</span><span class="p">)</span>
+</span><span id="DataFrame-788"><a href="#DataFrame-788"><span class="linenos">788</span></a> <span class="n">parameter_columns</span> <span class="o">=</span> <span class="p">(</span>
+</span><span id="DataFrame-789"><a href="#DataFrame-789"><span class="linenos">789</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_list_of_columns</span><span class="p">(</span><span class="n">parameter_list</span><span class="p">)</span>
+</span><span id="DataFrame-790"><a href="#DataFrame-790"><span class="linenos">790</span></a> <span class="k">if</span> <span class="n">parameters</span>
+</span><span id="DataFrame-791"><a href="#DataFrame-791"><span class="linenos">791</span></a> <span class="k">else</span> <span class="n">Column</span><span class="o">.</span><span class="n">ensure_cols</span><span class="p">([</span><span class="bp">self</span><span class="o">.</span><span class="n">sequence_id</span><span class="p">])</span>
+</span><span id="DataFrame-792"><a href="#DataFrame-792"><span class="linenos">792</span></a> <span class="p">)</span>
+</span><span id="DataFrame-793"><a href="#DataFrame-793"><span class="linenos">793</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_hint</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">parameter_columns</span><span class="p">)</span>
+</span><span id="DataFrame-794"><a href="#DataFrame-794"><span class="linenos">794</span></a>
+</span><span id="DataFrame-795"><a href="#DataFrame-795"><span class="linenos">795</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">NO_OP</span><span class="p">)</span>
+</span><span id="DataFrame-796"><a href="#DataFrame-796"><span class="linenos">796</span></a> <span class="k">def</span> <span class="nf">repartition</span><span class="p">(</span>
+</span><span id="DataFrame-797"><a href="#DataFrame-797"><span class="linenos">797</span></a> <span class="bp">self</span><span class="p">,</span> <span class="n">numPartitions</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">ColumnOrName</span><span class="p">],</span> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="n">ColumnOrName</span>
+</span><span id="DataFrame-798"><a href="#DataFrame-798"><span class="linenos">798</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame-799"><a href="#DataFrame-799"><span class="linenos">799</span></a> <span class="n">num_partition_cols</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_list_of_columns</span><span class="p">(</span><span class="n">numPartitions</span><span class="p">)</span>
+</span><span id="DataFrame-800"><a href="#DataFrame-800"><span class="linenos">800</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">cols</span><span class="p">)</span>
+</span><span id="DataFrame-801"><a href="#DataFrame-801"><span class="linenos">801</span></a> <span class="n">args</span> <span class="o">=</span> <span class="n">num_partition_cols</span> <span class="o">+</span> <span class="n">columns</span>
+</span><span id="DataFrame-802"><a href="#DataFrame-802"><span class="linenos">802</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_hint</span><span class="p">(</span><span class="s2">&quot;repartition&quot;</span><span class="p">,</span> <span class="n">args</span><span class="p">)</span>
+</span><span id="DataFrame-803"><a href="#DataFrame-803"><span class="linenos">803</span></a>
+</span><span id="DataFrame-804"><a href="#DataFrame-804"><span class="linenos">804</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">NO_OP</span><span class="p">)</span>
+</span><span id="DataFrame-805"><a href="#DataFrame-805"><span class="linenos">805</span></a> <span class="k">def</span> <span class="nf">coalesce</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">numPartitions</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame-806"><a href="#DataFrame-806"><span class="linenos">806</span></a> <span class="n">num_partitions</span> <span class="o">=</span> <span class="n">Column</span><span class="o">.</span><span class="n">ensure_cols</span><span class="p">([</span><span class="n">numPartitions</span><span class="p">])</span>
+</span><span id="DataFrame-807"><a href="#DataFrame-807"><span class="linenos">807</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_hint</span><span class="p">(</span><span class="s2">&quot;coalesce&quot;</span><span class="p">,</span> <span class="n">num_partitions</span><span class="p">)</span>
+</span><span id="DataFrame-808"><a href="#DataFrame-808"><span class="linenos">808</span></a>
+</span><span id="DataFrame-809"><a href="#DataFrame-809"><span class="linenos">809</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">NO_OP</span><span class="p">)</span>
+</span><span id="DataFrame-810"><a href="#DataFrame-810"><span class="linenos">810</span></a> <span class="k">def</span> <span class="nf">cache</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame-811"><a href="#DataFrame-811"><span class="linenos">811</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_cache</span><span class="p">(</span><span class="n">storage_level</span><span class="o">=</span><span class="s2">&quot;MEMORY_AND_DISK&quot;</span><span class="p">)</span>
+</span><span id="DataFrame-812"><a href="#DataFrame-812"><span class="linenos">812</span></a>
+</span><span id="DataFrame-813"><a href="#DataFrame-813"><span class="linenos">813</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">NO_OP</span><span class="p">)</span>
+</span><span id="DataFrame-814"><a href="#DataFrame-814"><span class="linenos">814</span></a> <span class="k">def</span> <span class="nf">persist</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">storageLevel</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;MEMORY_AND_DISK_SER&quot;</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame-815"><a href="#DataFrame-815"><span class="linenos">815</span></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
+</span><span id="DataFrame-816"><a href="#DataFrame-816"><span class="linenos">816</span></a><span class="sd"> Storage Level Options: https://spark.apache.org/docs/3.0.0-preview/sql-ref-syntax-aux-cache-cache-table.html</span>
+</span><span id="DataFrame-817"><a href="#DataFrame-817"><span class="linenos">817</span></a><span class="sd"> &quot;&quot;&quot;</span>
+</span><span id="DataFrame-818"><a href="#DataFrame-818"><span class="linenos">818</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_cache</span><span class="p">(</span><span class="n">storageLevel</span><span class="p">)</span>
</span></pre></div>
@@ -1476,7 +1513,7 @@
<input id="DataFrame.__init__-view-source" class="view-source-toggle-state" type="checkbox" aria-hidden="true" tabindex="-1">
<div class="attr function">
- <span class="name">DataFrame</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="n">spark</span><span class="p">:</span> <span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139719851707024&#39;</span><span class="o">&gt;</span>,</span><span class="param"> <span class="n">expression</span><span class="p">:</span> <span class="n"><a href="../expressions.html#Select">sqlglot.expressions.Select</a></span>,</span><span class="param"> <span class="n">branch_id</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>,</span><span class="param"> <span class="n">sequence_id</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>,</span><span class="param"> <span class="n">last_op</span><span class="p">:</span> <span class="n">sqlglot</span><span class="o">.</span><span class="n">dataframe</span><span class="o">.</span><span class="n">sql</span><span class="o">.</span><span class="n">operations</span><span class="o">.</span><span class="n">Operation</span> <span class="o">=</span> <span class="o">&lt;</span><span class="n">Operation</span><span class="o">.</span><span class="n">INIT</span><span class="p">:</span> <span class="o">-</span><span class="mi">1</span><span class="o">&gt;</span>,</span><span class="param"> <span class="n">pending_hints</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="n"><a href="../expressions.html#Expression">sqlglot.expressions.Expression</a></span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span>,</span><span class="param"> <span class="n">output_expression_container</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139719851535440&#39;</span><span class="o">&gt;</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>,</span><span class="param"> <span class="o">**</span><span class="n">kwargs</span></span>)</span>
+ <span class="name">DataFrame</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="n">spark</span><span class="p">:</span> <span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;140377712280432&#39;</span><span class="o">&gt;</span>,</span><span class="param"> <span class="n">expression</span><span class="p">:</span> <span class="n"><a href="../expressions.html#Select">sqlglot.expressions.Select</a></span>,</span><span class="param"> <span class="n">branch_id</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>,</span><span class="param"> <span class="n">sequence_id</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>,</span><span class="param"> <span class="n">last_op</span><span class="p">:</span> <span class="n">sqlglot</span><span class="o">.</span><span class="n">dataframe</span><span class="o">.</span><span class="n">sql</span><span class="o">.</span><span class="n">operations</span><span class="o">.</span><span class="n">Operation</span> <span class="o">=</span> <span class="o">&lt;</span><span class="n">Operation</span><span class="o">.</span><span class="n">INIT</span><span class="p">:</span> <span class="o">-</span><span class="mi">1</span><span class="o">&gt;</span>,</span><span class="param"> <span class="n">pending_hints</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="n"><a href="../expressions.html#Expression">sqlglot.expressions.Expression</a></span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span>,</span><span class="param"> <span class="n">output_expression_container</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;140377711871072&#39;</span><span class="o">&gt;</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>,</span><span class="param"> <span class="o">**</span><span class="n">kwargs</span></span>)</span>
<label class="view-source-button" for="DataFrame.__init__-view-source"><span>View Source</span></label>
@@ -1614,29 +1651,38 @@
</span><span id="DataFrame.select-356"><a href="#DataFrame.select-356"><span class="linenos">356</span></a> <span class="n">cols</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">cols</span><span class="p">)</span>
</span><span id="DataFrame.select-357"><a href="#DataFrame.select-357"><span class="linenos">357</span></a> <span class="n">kwargs</span><span class="p">[</span><span class="s2">&quot;append&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">kwargs</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;append&quot;</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span>
</span><span id="DataFrame.select-358"><a href="#DataFrame.select-358"><span class="linenos">358</span></a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">args</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;joins&quot;</span><span class="p">):</span>
-</span><span id="DataFrame.select-359"><a href="#DataFrame.select-359"><span class="linenos">359</span></a> <span class="n">ambiguous_cols</span> <span class="o">=</span> <span class="p">[</span><span class="n">col</span> <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="n">cols</span> <span class="k">if</span> <span class="ow">not</span> <span class="n">col</span><span class="o">.</span><span class="n">column_expression</span><span class="o">.</span><span class="n">table</span><span class="p">]</span>
-</span><span id="DataFrame.select-360"><a href="#DataFrame.select-360"><span class="linenos">360</span></a> <span class="k">if</span> <span class="n">ambiguous_cols</span><span class="p">:</span>
-</span><span id="DataFrame.select-361"><a href="#DataFrame.select-361"><span class="linenos">361</span></a> <span class="n">join_table_identifiers</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame.select-362"><a href="#DataFrame.select-362"><span class="linenos">362</span></a> <span class="n">x</span><span class="o">.</span><span class="n">this</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">get_tables_from_expression_with_join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="p">)</span>
-</span><span id="DataFrame.select-363"><a href="#DataFrame.select-363"><span class="linenos">363</span></a> <span class="p">]</span>
-</span><span id="DataFrame.select-364"><a href="#DataFrame.select-364"><span class="linenos">364</span></a> <span class="n">cte_names_in_join</span> <span class="o">=</span> <span class="p">[</span><span class="n">x</span><span class="o">.</span><span class="n">this</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">join_table_identifiers</span><span class="p">]</span>
-</span><span id="DataFrame.select-365"><a href="#DataFrame.select-365"><span class="linenos">365</span></a> <span class="k">for</span> <span class="n">ambiguous_col</span> <span class="ow">in</span> <span class="n">ambiguous_cols</span><span class="p">:</span>
-</span><span id="DataFrame.select-366"><a href="#DataFrame.select-366"><span class="linenos">366</span></a> <span class="n">ctes_with_column</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame.select-367"><a href="#DataFrame.select-367"><span class="linenos">367</span></a> <span class="n">cte</span>
-</span><span id="DataFrame.select-368"><a href="#DataFrame.select-368"><span class="linenos">368</span></a> <span class="k">for</span> <span class="n">cte</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">ctes</span>
-</span><span id="DataFrame.select-369"><a href="#DataFrame.select-369"><span class="linenos">369</span></a> <span class="k">if</span> <span class="n">cte</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="ow">in</span> <span class="n">cte_names_in_join</span>
-</span><span id="DataFrame.select-370"><a href="#DataFrame.select-370"><span class="linenos">370</span></a> <span class="ow">and</span> <span class="n">ambiguous_col</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="ow">in</span> <span class="n">cte</span><span class="o">.</span><span class="n">this</span><span class="o">.</span><span class="n">named_selects</span>
-</span><span id="DataFrame.select-371"><a href="#DataFrame.select-371"><span class="linenos">371</span></a> <span class="p">]</span>
-</span><span id="DataFrame.select-372"><a href="#DataFrame.select-372"><span class="linenos">372</span></a> <span class="c1"># If the select column does not specify a table and there is a join</span>
-</span><span id="DataFrame.select-373"><a href="#DataFrame.select-373"><span class="linenos">373</span></a> <span class="c1"># then we assume they are referring to the left table</span>
-</span><span id="DataFrame.select-374"><a href="#DataFrame.select-374"><span class="linenos">374</span></a> <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">ctes_with_column</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">1</span><span class="p">:</span>
-</span><span id="DataFrame.select-375"><a href="#DataFrame.select-375"><span class="linenos">375</span></a> <span class="n">table_identifier</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">args</span><span class="p">[</span><span class="s2">&quot;from&quot;</span><span class="p">]</span><span class="o">.</span><span class="n">args</span><span class="p">[</span><span class="s2">&quot;expressions&quot;</span><span class="p">][</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">this</span>
-</span><span id="DataFrame.select-376"><a href="#DataFrame.select-376"><span class="linenos">376</span></a> <span class="k">else</span><span class="p">:</span>
-</span><span id="DataFrame.select-377"><a href="#DataFrame.select-377"><span class="linenos">377</span></a> <span class="n">table_identifier</span> <span class="o">=</span> <span class="n">ctes_with_column</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">args</span><span class="p">[</span><span class="s2">&quot;alias&quot;</span><span class="p">]</span><span class="o">.</span><span class="n">this</span>
-</span><span id="DataFrame.select-378"><a href="#DataFrame.select-378"><span class="linenos">378</span></a> <span class="n">ambiguous_col</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">&quot;table&quot;</span><span class="p">,</span> <span class="n">table_identifier</span><span class="p">)</span>
-</span><span id="DataFrame.select-379"><a href="#DataFrame.select-379"><span class="linenos">379</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span>
-</span><span id="DataFrame.select-380"><a href="#DataFrame.select-380"><span class="linenos">380</span></a> <span class="n">expression</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="p">[</span><span class="n">x</span><span class="o">.</span><span class="n">expression</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">cols</span><span class="p">],</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">),</span> <span class="o">**</span><span class="n">kwargs</span>
-</span><span id="DataFrame.select-381"><a href="#DataFrame.select-381"><span class="linenos">381</span></a> <span class="p">)</span>
+</span><span id="DataFrame.select-359"><a href="#DataFrame.select-359"><span class="linenos">359</span></a> <span class="n">ambiguous_cols</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame.select-360"><a href="#DataFrame.select-360"><span class="linenos">360</span></a> <span class="n">col</span>
+</span><span id="DataFrame.select-361"><a href="#DataFrame.select-361"><span class="linenos">361</span></a> <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="n">cols</span>
+</span><span id="DataFrame.select-362"><a href="#DataFrame.select-362"><span class="linenos">362</span></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">col</span><span class="o">.</span><span class="n">column_expression</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Column</span><span class="p">)</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">col</span><span class="o">.</span><span class="n">column_expression</span><span class="o">.</span><span class="n">table</span>
+</span><span id="DataFrame.select-363"><a href="#DataFrame.select-363"><span class="linenos">363</span></a> <span class="p">]</span>
+</span><span id="DataFrame.select-364"><a href="#DataFrame.select-364"><span class="linenos">364</span></a> <span class="k">if</span> <span class="n">ambiguous_cols</span><span class="p">:</span>
+</span><span id="DataFrame.select-365"><a href="#DataFrame.select-365"><span class="linenos">365</span></a> <span class="n">join_table_identifiers</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame.select-366"><a href="#DataFrame.select-366"><span class="linenos">366</span></a> <span class="n">x</span><span class="o">.</span><span class="n">this</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">get_tables_from_expression_with_join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="p">)</span>
+</span><span id="DataFrame.select-367"><a href="#DataFrame.select-367"><span class="linenos">367</span></a> <span class="p">]</span>
+</span><span id="DataFrame.select-368"><a href="#DataFrame.select-368"><span class="linenos">368</span></a> <span class="n">cte_names_in_join</span> <span class="o">=</span> <span class="p">[</span><span class="n">x</span><span class="o">.</span><span class="n">this</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">join_table_identifiers</span><span class="p">]</span>
+</span><span id="DataFrame.select-369"><a href="#DataFrame.select-369"><span class="linenos">369</span></a> <span class="c1"># If we have columns that resolve to multiple CTE expressions then we want to use each CTE left-to-right</span>
+</span><span id="DataFrame.select-370"><a href="#DataFrame.select-370"><span class="linenos">370</span></a> <span class="c1"># and therefore we allow multiple columns with the same name in the result. This matches the behavior</span>
+</span><span id="DataFrame.select-371"><a href="#DataFrame.select-371"><span class="linenos">371</span></a> <span class="c1"># of Spark.</span>
+</span><span id="DataFrame.select-372"><a href="#DataFrame.select-372"><span class="linenos">372</span></a> <span class="n">resolved_column_position</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Dict</span><span class="p">[</span><span class="n">Column</span><span class="p">,</span> <span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span><span class="n">col</span><span class="p">:</span> <span class="o">-</span><span class="mi">1</span> <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="n">ambiguous_cols</span><span class="p">}</span>
+</span><span id="DataFrame.select-373"><a href="#DataFrame.select-373"><span class="linenos">373</span></a> <span class="k">for</span> <span class="n">ambiguous_col</span> <span class="ow">in</span> <span class="n">ambiguous_cols</span><span class="p">:</span>
+</span><span id="DataFrame.select-374"><a href="#DataFrame.select-374"><span class="linenos">374</span></a> <span class="n">ctes_with_column</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame.select-375"><a href="#DataFrame.select-375"><span class="linenos">375</span></a> <span class="n">cte</span>
+</span><span id="DataFrame.select-376"><a href="#DataFrame.select-376"><span class="linenos">376</span></a> <span class="k">for</span> <span class="n">cte</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">ctes</span>
+</span><span id="DataFrame.select-377"><a href="#DataFrame.select-377"><span class="linenos">377</span></a> <span class="k">if</span> <span class="n">cte</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="ow">in</span> <span class="n">cte_names_in_join</span>
+</span><span id="DataFrame.select-378"><a href="#DataFrame.select-378"><span class="linenos">378</span></a> <span class="ow">and</span> <span class="n">ambiguous_col</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="ow">in</span> <span class="n">cte</span><span class="o">.</span><span class="n">this</span><span class="o">.</span><span class="n">named_selects</span>
+</span><span id="DataFrame.select-379"><a href="#DataFrame.select-379"><span class="linenos">379</span></a> <span class="p">]</span>
+</span><span id="DataFrame.select-380"><a href="#DataFrame.select-380"><span class="linenos">380</span></a> <span class="c1"># Check if there is a CTE with this column that we haven&#39;t used before. If so, use it. Otherwise,</span>
+</span><span id="DataFrame.select-381"><a href="#DataFrame.select-381"><span class="linenos">381</span></a> <span class="c1"># use the same CTE we used before</span>
+</span><span id="DataFrame.select-382"><a href="#DataFrame.select-382"><span class="linenos">382</span></a> <span class="n">cte</span> <span class="o">=</span> <span class="n">seq_get</span><span class="p">(</span><span class="n">ctes_with_column</span><span class="p">,</span> <span class="n">resolved_column_position</span><span class="p">[</span><span class="n">ambiguous_col</span><span class="p">]</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span>
+</span><span id="DataFrame.select-383"><a href="#DataFrame.select-383"><span class="linenos">383</span></a> <span class="k">if</span> <span class="n">cte</span><span class="p">:</span>
+</span><span id="DataFrame.select-384"><a href="#DataFrame.select-384"><span class="linenos">384</span></a> <span class="n">resolved_column_position</span><span class="p">[</span><span class="n">ambiguous_col</span><span class="p">]</span> <span class="o">+=</span> <span class="mi">1</span>
+</span><span id="DataFrame.select-385"><a href="#DataFrame.select-385"><span class="linenos">385</span></a> <span class="k">else</span><span class="p">:</span>
+</span><span id="DataFrame.select-386"><a href="#DataFrame.select-386"><span class="linenos">386</span></a> <span class="n">cte</span> <span class="o">=</span> <span class="n">ctes_with_column</span><span class="p">[</span><span class="n">resolved_column_position</span><span class="p">[</span><span class="n">ambiguous_col</span><span class="p">]]</span>
+</span><span id="DataFrame.select-387"><a href="#DataFrame.select-387"><span class="linenos">387</span></a> <span class="n">ambiguous_col</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">&quot;table&quot;</span><span class="p">,</span> <span class="n">cte</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">)</span>
+</span><span id="DataFrame.select-388"><a href="#DataFrame.select-388"><span class="linenos">388</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span>
+</span><span id="DataFrame.select-389"><a href="#DataFrame.select-389"><span class="linenos">389</span></a> <span class="n">expression</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="p">[</span><span class="n">x</span><span class="o">.</span><span class="n">expression</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">cols</span><span class="p">],</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">),</span> <span class="o">**</span><span class="n">kwargs</span>
+</span><span id="DataFrame.select-390"><a href="#DataFrame.select-390"><span class="linenos">390</span></a> <span class="p">)</span>
</span></pre></div>
@@ -1655,16 +1701,16 @@
</div>
<a class="headerlink" href="#DataFrame.alias"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.alias-383"><a href="#DataFrame.alias-383"><span class="linenos">383</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">NO_OP</span><span class="p">)</span>
-</span><span id="DataFrame.alias-384"><a href="#DataFrame.alias-384"><span class="linenos">384</span></a> <span class="k">def</span> <span class="nf">alias</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame.alias-385"><a href="#DataFrame.alias-385"><span class="linenos">385</span></a> <span class="n">new_sequence_id</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">_random_sequence_id</span>
-</span><span id="DataFrame.alias-386"><a href="#DataFrame.alias-386"><span class="linenos">386</span></a> <span class="n">df</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
-</span><span id="DataFrame.alias-387"><a href="#DataFrame.alias-387"><span class="linenos">387</span></a> <span class="k">for</span> <span class="n">join_hint</span> <span class="ow">in</span> <span class="n">df</span><span class="o">.</span><span class="n">pending_join_hints</span><span class="p">:</span>
-</span><span id="DataFrame.alias-388"><a href="#DataFrame.alias-388"><span class="linenos">388</span></a> <span class="k">for</span> <span class="n">expression</span> <span class="ow">in</span> <span class="n">join_hint</span><span class="o">.</span><span class="n">expressions</span><span class="p">:</span>
-</span><span id="DataFrame.alias-389"><a href="#DataFrame.alias-389"><span class="linenos">389</span></a> <span class="k">if</span> <span class="n">expression</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">sequence_id</span><span class="p">:</span>
-</span><span id="DataFrame.alias-390"><a href="#DataFrame.alias-390"><span class="linenos">390</span></a> <span class="n">expression</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">&quot;this&quot;</span><span class="p">,</span> <span class="n">Column</span><span class="o">.</span><span class="n">ensure_col</span><span class="p">(</span><span class="n">new_sequence_id</span><span class="p">)</span><span class="o">.</span><span class="n">expression</span><span class="p">)</span>
-</span><span id="DataFrame.alias-391"><a href="#DataFrame.alias-391"><span class="linenos">391</span></a> <span class="n">df</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">_add_alias_to_mapping</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">new_sequence_id</span><span class="p">)</span>
-</span><span id="DataFrame.alias-392"><a href="#DataFrame.alias-392"><span class="linenos">392</span></a> <span class="k">return</span> <span class="n">df</span><span class="o">.</span><span class="n">_convert_leaf_to_cte</span><span class="p">(</span><span class="n">sequence_id</span><span class="o">=</span><span class="n">new_sequence_id</span><span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.alias-392"><a href="#DataFrame.alias-392"><span class="linenos">392</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">NO_OP</span><span class="p">)</span>
+</span><span id="DataFrame.alias-393"><a href="#DataFrame.alias-393"><span class="linenos">393</span></a> <span class="k">def</span> <span class="nf">alias</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame.alias-394"><a href="#DataFrame.alias-394"><span class="linenos">394</span></a> <span class="n">new_sequence_id</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">_random_sequence_id</span>
+</span><span id="DataFrame.alias-395"><a href="#DataFrame.alias-395"><span class="linenos">395</span></a> <span class="n">df</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
+</span><span id="DataFrame.alias-396"><a href="#DataFrame.alias-396"><span class="linenos">396</span></a> <span class="k">for</span> <span class="n">join_hint</span> <span class="ow">in</span> <span class="n">df</span><span class="o">.</span><span class="n">pending_join_hints</span><span class="p">:</span>
+</span><span id="DataFrame.alias-397"><a href="#DataFrame.alias-397"><span class="linenos">397</span></a> <span class="k">for</span> <span class="n">expression</span> <span class="ow">in</span> <span class="n">join_hint</span><span class="o">.</span><span class="n">expressions</span><span class="p">:</span>
+</span><span id="DataFrame.alias-398"><a href="#DataFrame.alias-398"><span class="linenos">398</span></a> <span class="k">if</span> <span class="n">expression</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">sequence_id</span><span class="p">:</span>
+</span><span id="DataFrame.alias-399"><a href="#DataFrame.alias-399"><span class="linenos">399</span></a> <span class="n">expression</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">&quot;this&quot;</span><span class="p">,</span> <span class="n">Column</span><span class="o">.</span><span class="n">ensure_col</span><span class="p">(</span><span class="n">new_sequence_id</span><span class="p">)</span><span class="o">.</span><span class="n">expression</span><span class="p">)</span>
+</span><span id="DataFrame.alias-400"><a href="#DataFrame.alias-400"><span class="linenos">400</span></a> <span class="n">df</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">_add_alias_to_mapping</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">new_sequence_id</span><span class="p">)</span>
+</span><span id="DataFrame.alias-401"><a href="#DataFrame.alias-401"><span class="linenos">401</span></a> <span class="k">return</span> <span class="n">df</span><span class="o">.</span><span class="n">_convert_leaf_to_cte</span><span class="p">(</span><span class="n">sequence_id</span><span class="o">=</span><span class="n">new_sequence_id</span><span class="p">)</span>
</span></pre></div>
@@ -1683,10 +1729,10 @@
</div>
<a class="headerlink" href="#DataFrame.where"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.where-394"><a href="#DataFrame.where-394"><span class="linenos">394</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">WHERE</span><span class="p">)</span>
-</span><span id="DataFrame.where-395"><a href="#DataFrame.where-395"><span class="linenos">395</span></a> <span class="k">def</span> <span class="nf">where</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">column</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="n">Column</span><span class="p">,</span> <span class="nb">bool</span><span class="p">],</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame.where-396"><a href="#DataFrame.where-396"><span class="linenos">396</span></a> <span class="n">col</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_col</span><span class="p">(</span><span class="n">column</span><span class="p">)</span>
-</span><span id="DataFrame.where-397"><a href="#DataFrame.where-397"><span class="linenos">397</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">where</span><span class="p">(</span><span class="n">col</span><span class="o">.</span><span class="n">expression</span><span class="p">))</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.where-403"><a href="#DataFrame.where-403"><span class="linenos">403</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">WHERE</span><span class="p">)</span>
+</span><span id="DataFrame.where-404"><a href="#DataFrame.where-404"><span class="linenos">404</span></a> <span class="k">def</span> <span class="nf">where</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">column</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="n">Column</span><span class="p">,</span> <span class="nb">bool</span><span class="p">],</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame.where-405"><a href="#DataFrame.where-405"><span class="linenos">405</span></a> <span class="n">col</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_col</span><span class="p">(</span><span class="n">column</span><span class="p">)</span>
+</span><span id="DataFrame.where-406"><a href="#DataFrame.where-406"><span class="linenos">406</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">where</span><span class="p">(</span><span class="n">col</span><span class="o">.</span><span class="n">expression</span><span class="p">))</span>
</span></pre></div>
@@ -1705,10 +1751,10 @@
</div>
<a class="headerlink" href="#DataFrame.filter"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.filter-394"><a href="#DataFrame.filter-394"><span class="linenos">394</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">WHERE</span><span class="p">)</span>
-</span><span id="DataFrame.filter-395"><a href="#DataFrame.filter-395"><span class="linenos">395</span></a> <span class="k">def</span> <span class="nf">where</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">column</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="n">Column</span><span class="p">,</span> <span class="nb">bool</span><span class="p">],</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame.filter-396"><a href="#DataFrame.filter-396"><span class="linenos">396</span></a> <span class="n">col</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_col</span><span class="p">(</span><span class="n">column</span><span class="p">)</span>
-</span><span id="DataFrame.filter-397"><a href="#DataFrame.filter-397"><span class="linenos">397</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">where</span><span class="p">(</span><span class="n">col</span><span class="o">.</span><span class="n">expression</span><span class="p">))</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.filter-403"><a href="#DataFrame.filter-403"><span class="linenos">403</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">WHERE</span><span class="p">)</span>
+</span><span id="DataFrame.filter-404"><a href="#DataFrame.filter-404"><span class="linenos">404</span></a> <span class="k">def</span> <span class="nf">where</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">column</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="n">Column</span><span class="p">,</span> <span class="nb">bool</span><span class="p">],</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame.filter-405"><a href="#DataFrame.filter-405"><span class="linenos">405</span></a> <span class="n">col</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_col</span><span class="p">(</span><span class="n">column</span><span class="p">)</span>
+</span><span id="DataFrame.filter-406"><a href="#DataFrame.filter-406"><span class="linenos">406</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">where</span><span class="p">(</span><span class="n">col</span><span class="o">.</span><span class="n">expression</span><span class="p">))</span>
</span></pre></div>
@@ -1727,10 +1773,10 @@
</div>
<a class="headerlink" href="#DataFrame.groupBy"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.groupBy-401"><a href="#DataFrame.groupBy-401"><span class="linenos">401</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">GROUP_BY</span><span class="p">)</span>
-</span><span id="DataFrame.groupBy-402"><a href="#DataFrame.groupBy-402"><span class="linenos">402</span></a> <span class="k">def</span> <span class="nf">groupBy</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">cols</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">GroupedData</span><span class="p">:</span>
-</span><span id="DataFrame.groupBy-403"><a href="#DataFrame.groupBy-403"><span class="linenos">403</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">cols</span><span class="p">)</span>
-</span><span id="DataFrame.groupBy-404"><a href="#DataFrame.groupBy-404"><span class="linenos">404</span></a> <span class="k">return</span> <span class="n">GroupedData</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">columns</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">last_op</span><span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.groupBy-410"><a href="#DataFrame.groupBy-410"><span class="linenos">410</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">GROUP_BY</span><span class="p">)</span>
+</span><span id="DataFrame.groupBy-411"><a href="#DataFrame.groupBy-411"><span class="linenos">411</span></a> <span class="k">def</span> <span class="nf">groupBy</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">cols</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">GroupedData</span><span class="p">:</span>
+</span><span id="DataFrame.groupBy-412"><a href="#DataFrame.groupBy-412"><span class="linenos">412</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">cols</span><span class="p">)</span>
+</span><span id="DataFrame.groupBy-413"><a href="#DataFrame.groupBy-413"><span class="linenos">413</span></a> <span class="k">return</span> <span class="n">GroupedData</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">columns</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">last_op</span><span class="p">)</span>
</span></pre></div>
@@ -1749,10 +1795,10 @@
</div>
<a class="headerlink" href="#DataFrame.agg"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.agg-406"><a href="#DataFrame.agg-406"><span class="linenos">406</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">SELECT</span><span class="p">)</span>
-</span><span id="DataFrame.agg-407"><a href="#DataFrame.agg-407"><span class="linenos">407</span></a> <span class="k">def</span> <span class="nf">agg</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">exprs</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame.agg-408"><a href="#DataFrame.agg-408"><span class="linenos">408</span></a> <span class="n">cols</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">exprs</span><span class="p">)</span>
-</span><span id="DataFrame.agg-409"><a href="#DataFrame.agg-409"><span class="linenos">409</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">groupBy</span><span class="p">()</span><span class="o">.</span><span class="n">agg</span><span class="p">(</span><span class="o">*</span><span class="n">cols</span><span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.agg-415"><a href="#DataFrame.agg-415"><span class="linenos">415</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">SELECT</span><span class="p">)</span>
+</span><span id="DataFrame.agg-416"><a href="#DataFrame.agg-416"><span class="linenos">416</span></a> <span class="k">def</span> <span class="nf">agg</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">exprs</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame.agg-417"><a href="#DataFrame.agg-417"><span class="linenos">417</span></a> <span class="n">cols</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">exprs</span><span class="p">)</span>
+</span><span id="DataFrame.agg-418"><a href="#DataFrame.agg-418"><span class="linenos">418</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">groupBy</span><span class="p">()</span><span class="o">.</span><span class="n">agg</span><span class="p">(</span><span class="o">*</span><span class="n">cols</span><span class="p">)</span>
</span></pre></div>
@@ -1771,69 +1817,97 @@
</div>
<a class="headerlink" href="#DataFrame.join"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.join-411"><a href="#DataFrame.join-411"><span class="linenos">411</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
-</span><span id="DataFrame.join-412"><a href="#DataFrame.join-412"><span class="linenos">412</span></a> <span class="k">def</span> <span class="nf">join</span><span class="p">(</span>
-</span><span id="DataFrame.join-413"><a href="#DataFrame.join-413"><span class="linenos">413</span></a> <span class="bp">self</span><span class="p">,</span>
-</span><span id="DataFrame.join-414"><a href="#DataFrame.join-414"><span class="linenos">414</span></a> <span class="n">other_df</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">,</span>
-</span><span id="DataFrame.join-415"><a href="#DataFrame.join-415"><span class="linenos">415</span></a> <span class="n">on</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">],</span> <span class="n">Column</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="n">Column</span><span class="p">]],</span>
-</span><span id="DataFrame.join-416"><a href="#DataFrame.join-416"><span class="linenos">416</span></a> <span class="n">how</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;inner&quot;</span><span class="p">,</span>
-</span><span id="DataFrame.join-417"><a href="#DataFrame.join-417"><span class="linenos">417</span></a> <span class="o">**</span><span class="n">kwargs</span><span class="p">,</span>
-</span><span id="DataFrame.join-418"><a href="#DataFrame.join-418"><span class="linenos">418</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame.join-419"><a href="#DataFrame.join-419"><span class="linenos">419</span></a> <span class="n">other_df</span> <span class="o">=</span> <span class="n">other_df</span><span class="o">.</span><span class="n">_convert_leaf_to_cte</span><span class="p">()</span>
-</span><span id="DataFrame.join-420"><a href="#DataFrame.join-420"><span class="linenos">420</span></a> <span class="n">pre_join_self_latest_cte_name</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">latest_cte_name</span>
-</span><span id="DataFrame.join-421"><a href="#DataFrame.join-421"><span class="linenos">421</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">on</span><span class="p">)</span>
-</span><span id="DataFrame.join-422"><a href="#DataFrame.join-422"><span class="linenos">422</span></a> <span class="n">join_type</span> <span class="o">=</span> <span class="n">how</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;_&quot;</span><span class="p">,</span> <span class="s2">&quot; &quot;</span><span class="p">)</span>
-</span><span id="DataFrame.join-423"><a href="#DataFrame.join-423"><span class="linenos">423</span></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">columns</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">expression</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Column</span><span class="p">):</span>
-</span><span id="DataFrame.join-424"><a href="#DataFrame.join-424"><span class="linenos">424</span></a> <span class="n">join_columns</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame.join-425"><a href="#DataFrame.join-425"><span class="linenos">425</span></a> <span class="n">Column</span><span class="p">(</span><span class="n">x</span><span class="p">)</span><span class="o">.</span><span class="n">set_table_name</span><span class="p">(</span><span class="n">pre_join_self_latest_cte_name</span><span class="p">)</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">columns</span>
-</span><span id="DataFrame.join-426"><a href="#DataFrame.join-426"><span class="linenos">426</span></a> <span class="p">]</span>
-</span><span id="DataFrame.join-427"><a href="#DataFrame.join-427"><span class="linenos">427</span></a> <span class="n">join_clause</span> <span class="o">=</span> <span class="n">functools</span><span class="o">.</span><span class="n">reduce</span><span class="p">(</span>
-</span><span id="DataFrame.join-428"><a href="#DataFrame.join-428"><span class="linenos">428</span></a> <span class="k">lambda</span> <span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">:</span> <span class="n">x</span> <span class="o">&amp;</span> <span class="n">y</span><span class="p">,</span>
-</span><span id="DataFrame.join-429"><a href="#DataFrame.join-429"><span class="linenos">429</span></a> <span class="p">[</span>
-</span><span id="DataFrame.join-430"><a href="#DataFrame.join-430"><span class="linenos">430</span></a> <span class="n">col</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span><span class="o">.</span><span class="n">set_table_name</span><span class="p">(</span><span class="n">pre_join_self_latest_cte_name</span><span class="p">)</span>
-</span><span id="DataFrame.join-431"><a href="#DataFrame.join-431"><span class="linenos">431</span></a> <span class="o">==</span> <span class="n">col</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span><span class="o">.</span><span class="n">set_table_name</span><span class="p">(</span><span class="n">other_df</span><span class="o">.</span><span class="n">latest_cte_name</span><span class="p">)</span>
-</span><span id="DataFrame.join-432"><a href="#DataFrame.join-432"><span class="linenos">432</span></a> <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="n">columns</span>
-</span><span id="DataFrame.join-433"><a href="#DataFrame.join-433"><span class="linenos">433</span></a> <span class="p">],</span>
-</span><span id="DataFrame.join-434"><a href="#DataFrame.join-434"><span class="linenos">434</span></a> <span class="p">)</span>
-</span><span id="DataFrame.join-435"><a href="#DataFrame.join-435"><span class="linenos">435</span></a> <span class="k">else</span><span class="p">:</span>
-</span><span id="DataFrame.join-436"><a href="#DataFrame.join-436"><span class="linenos">436</span></a> <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">1</span><span class="p">:</span>
-</span><span id="DataFrame.join-437"><a href="#DataFrame.join-437"><span class="linenos">437</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="p">[</span><span class="n">functools</span><span class="o">.</span><span class="n">reduce</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">:</span> <span class="n">x</span> <span class="o">&amp;</span> <span class="n">y</span><span class="p">,</span> <span class="n">columns</span><span class="p">)]</span>
-</span><span id="DataFrame.join-438"><a href="#DataFrame.join-438"><span class="linenos">438</span></a> <span class="n">join_clause</span> <span class="o">=</span> <span class="n">columns</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
-</span><span id="DataFrame.join-439"><a href="#DataFrame.join-439"><span class="linenos">439</span></a> <span class="n">join_columns</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame.join-440"><a href="#DataFrame.join-440"><span class="linenos">440</span></a> <span class="n">Column</span><span class="p">(</span><span class="n">x</span><span class="p">)</span><span class="o">.</span><span class="n">set_table_name</span><span class="p">(</span><span class="n">pre_join_self_latest_cte_name</span><span class="p">)</span>
-</span><span id="DataFrame.join-441"><a href="#DataFrame.join-441"><span class="linenos">441</span></a> <span class="k">if</span> <span class="n">i</span> <span class="o">%</span> <span class="mi">2</span> <span class="o">==</span> <span class="mi">0</span>
-</span><span id="DataFrame.join-442"><a href="#DataFrame.join-442"><span class="linenos">442</span></a> <span class="k">else</span> <span class="n">Column</span><span class="p">(</span><span class="n">x</span><span class="p">)</span><span class="o">.</span><span class="n">set_table_name</span><span class="p">(</span><span class="n">other_df</span><span class="o">.</span><span class="n">latest_cte_name</span><span class="p">)</span>
-</span><span id="DataFrame.join-443"><a href="#DataFrame.join-443"><span class="linenos">443</span></a> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">x</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">join_clause</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">find_all</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Column</span><span class="p">))</span>
-</span><span id="DataFrame.join-444"><a href="#DataFrame.join-444"><span class="linenos">444</span></a> <span class="p">]</span>
-</span><span id="DataFrame.join-445"><a href="#DataFrame.join-445"><span class="linenos">445</span></a> <span class="n">self_columns</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame.join-446"><a href="#DataFrame.join-446"><span class="linenos">446</span></a> <span class="n">column</span><span class="o">.</span><span class="n">set_table_name</span><span class="p">(</span><span class="n">pre_join_self_latest_cte_name</span><span class="p">,</span> <span class="n">copy</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
-</span><span id="DataFrame.join-447"><a href="#DataFrame.join-447"><span class="linenos">447</span></a> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_outer_select_columns</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
-</span><span id="DataFrame.join-448"><a href="#DataFrame.join-448"><span class="linenos">448</span></a> <span class="p">]</span>
-</span><span id="DataFrame.join-449"><a href="#DataFrame.join-449"><span class="linenos">449</span></a> <span class="n">other_columns</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame.join-450"><a href="#DataFrame.join-450"><span class="linenos">450</span></a> <span class="n">column</span><span class="o">.</span><span class="n">set_table_name</span><span class="p">(</span><span class="n">other_df</span><span class="o">.</span><span class="n">latest_cte_name</span><span class="p">,</span> <span class="n">copy</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
-</span><span id="DataFrame.join-451"><a href="#DataFrame.join-451"><span class="linenos">451</span></a> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_outer_select_columns</span><span class="p">(</span><span class="n">other_df</span><span class="p">)</span>
-</span><span id="DataFrame.join-452"><a href="#DataFrame.join-452"><span class="linenos">452</span></a> <span class="p">]</span>
-</span><span id="DataFrame.join-453"><a href="#DataFrame.join-453"><span class="linenos">453</span></a> <span class="n">column_value_mapping</span> <span class="o">=</span> <span class="p">{</span>
-</span><span id="DataFrame.join-454"><a href="#DataFrame.join-454"><span class="linenos">454</span></a> <span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span>
-</span><span id="DataFrame.join-455"><a href="#DataFrame.join-455"><span class="linenos">455</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">column</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">this</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Star</span><span class="p">)</span>
-</span><span id="DataFrame.join-456"><a href="#DataFrame.join-456"><span class="linenos">456</span></a> <span class="k">else</span> <span class="n">column</span><span class="o">.</span><span class="n">sql</span><span class="p">():</span> <span class="n">column</span>
-</span><span id="DataFrame.join-457"><a href="#DataFrame.join-457"><span class="linenos">457</span></a> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">other_columns</span> <span class="o">+</span> <span class="n">self_columns</span> <span class="o">+</span> <span class="n">join_columns</span>
-</span><span id="DataFrame.join-458"><a href="#DataFrame.join-458"><span class="linenos">458</span></a> <span class="p">}</span>
-</span><span id="DataFrame.join-459"><a href="#DataFrame.join-459"><span class="linenos">459</span></a> <span class="n">all_columns</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame.join-460"><a href="#DataFrame.join-460"><span class="linenos">460</span></a> <span class="n">column_value_mapping</span><span class="p">[</span><span class="n">name</span><span class="p">]</span>
-</span><span id="DataFrame.join-461"><a href="#DataFrame.join-461"><span class="linenos">461</span></a> <span class="k">for</span> <span class="n">name</span> <span class="ow">in</span> <span class="p">{</span><span class="n">x</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">:</span> <span class="kc">None</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">join_columns</span> <span class="o">+</span> <span class="n">self_columns</span> <span class="o">+</span> <span class="n">other_columns</span><span class="p">}</span>
-</span><span id="DataFrame.join-462"><a href="#DataFrame.join-462"><span class="linenos">462</span></a> <span class="p">]</span>
-</span><span id="DataFrame.join-463"><a href="#DataFrame.join-463"><span class="linenos">463</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span>
-</span><span id="DataFrame.join-464"><a href="#DataFrame.join-464"><span class="linenos">464</span></a> <span class="n">expression</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">join</span><span class="p">(</span>
-</span><span id="DataFrame.join-465"><a href="#DataFrame.join-465"><span class="linenos">465</span></a> <span class="n">other_df</span><span class="o">.</span><span class="n">latest_cte_name</span><span class="p">,</span> <span class="n">on</span><span class="o">=</span><span class="n">join_clause</span><span class="o">.</span><span class="n">expression</span><span class="p">,</span> <span class="n">join_type</span><span class="o">=</span><span class="n">join_type</span>
-</span><span id="DataFrame.join-466"><a href="#DataFrame.join-466"><span class="linenos">466</span></a> <span class="p">)</span>
-</span><span id="DataFrame.join-467"><a href="#DataFrame.join-467"><span class="linenos">467</span></a> <span class="p">)</span>
-</span><span id="DataFrame.join-468"><a href="#DataFrame.join-468"><span class="linenos">468</span></a> <span class="n">new_df</span><span class="o">.</span><span class="n">expression</span> <span class="o">=</span> <span class="n">new_df</span><span class="o">.</span><span class="n">_add_ctes_to_expression</span><span class="p">(</span>
-</span><span id="DataFrame.join-469"><a href="#DataFrame.join-469"><span class="linenos">469</span></a> <span class="n">new_df</span><span class="o">.</span><span class="n">expression</span><span class="p">,</span> <span class="n">other_df</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">ctes</span>
-</span><span id="DataFrame.join-470"><a href="#DataFrame.join-470"><span class="linenos">470</span></a> <span class="p">)</span>
-</span><span id="DataFrame.join-471"><a href="#DataFrame.join-471"><span class="linenos">471</span></a> <span class="n">new_df</span><span class="o">.</span><span class="n">pending_hints</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="n">other_df</span><span class="o">.</span><span class="n">pending_hints</span><span class="p">)</span>
-</span><span id="DataFrame.join-472"><a href="#DataFrame.join-472"><span class="linenos">472</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="n">new_df</span><span class="o">.</span><span class="n">select</span><span class="o">.</span><span class="n">__wrapped__</span><span class="p">(</span><span class="n">new_df</span><span class="p">,</span> <span class="o">*</span><span class="n">all_columns</span><span class="p">)</span>
-</span><span id="DataFrame.join-473"><a href="#DataFrame.join-473"><span class="linenos">473</span></a> <span class="k">return</span> <span class="n">new_df</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.join-420"><a href="#DataFrame.join-420"><span class="linenos">420</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
+</span><span id="DataFrame.join-421"><a href="#DataFrame.join-421"><span class="linenos">421</span></a> <span class="k">def</span> <span class="nf">join</span><span class="p">(</span>
+</span><span id="DataFrame.join-422"><a href="#DataFrame.join-422"><span class="linenos">422</span></a> <span class="bp">self</span><span class="p">,</span>
+</span><span id="DataFrame.join-423"><a href="#DataFrame.join-423"><span class="linenos">423</span></a> <span class="n">other_df</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">,</span>
+</span><span id="DataFrame.join-424"><a href="#DataFrame.join-424"><span class="linenos">424</span></a> <span class="n">on</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">],</span> <span class="n">Column</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="n">Column</span><span class="p">]],</span>
+</span><span id="DataFrame.join-425"><a href="#DataFrame.join-425"><span class="linenos">425</span></a> <span class="n">how</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;inner&quot;</span><span class="p">,</span>
+</span><span id="DataFrame.join-426"><a href="#DataFrame.join-426"><span class="linenos">426</span></a> <span class="o">**</span><span class="n">kwargs</span><span class="p">,</span>
+</span><span id="DataFrame.join-427"><a href="#DataFrame.join-427"><span class="linenos">427</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame.join-428"><a href="#DataFrame.join-428"><span class="linenos">428</span></a> <span class="n">other_df</span> <span class="o">=</span> <span class="n">other_df</span><span class="o">.</span><span class="n">_convert_leaf_to_cte</span><span class="p">()</span>
+</span><span id="DataFrame.join-429"><a href="#DataFrame.join-429"><span class="linenos">429</span></a> <span class="n">join_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_list_of_columns</span><span class="p">(</span><span class="n">on</span><span class="p">)</span>
+</span><span id="DataFrame.join-430"><a href="#DataFrame.join-430"><span class="linenos">430</span></a> <span class="c1"># We will determine actual &quot;join on&quot; expression later so we don&#39;t provide it at first</span>
+</span><span id="DataFrame.join-431"><a href="#DataFrame.join-431"><span class="linenos">431</span></a> <span class="n">join_expression</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">join</span><span class="p">(</span>
+</span><span id="DataFrame.join-432"><a href="#DataFrame.join-432"><span class="linenos">432</span></a> <span class="n">other_df</span><span class="o">.</span><span class="n">latest_cte_name</span><span class="p">,</span> <span class="n">join_type</span><span class="o">=</span><span class="n">how</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;_&quot;</span><span class="p">,</span> <span class="s2">&quot; &quot;</span><span class="p">)</span>
+</span><span id="DataFrame.join-433"><a href="#DataFrame.join-433"><span class="linenos">433</span></a> <span class="p">)</span>
+</span><span id="DataFrame.join-434"><a href="#DataFrame.join-434"><span class="linenos">434</span></a> <span class="n">join_expression</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_add_ctes_to_expression</span><span class="p">(</span><span class="n">join_expression</span><span class="p">,</span> <span class="n">other_df</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">ctes</span><span class="p">)</span>
+</span><span id="DataFrame.join-435"><a href="#DataFrame.join-435"><span class="linenos">435</span></a> <span class="n">self_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_outer_select_columns</span><span class="p">(</span><span class="n">join_expression</span><span class="p">)</span>
+</span><span id="DataFrame.join-436"><a href="#DataFrame.join-436"><span class="linenos">436</span></a> <span class="n">other_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_outer_select_columns</span><span class="p">(</span><span class="n">other_df</span><span class="p">)</span>
+</span><span id="DataFrame.join-437"><a href="#DataFrame.join-437"><span class="linenos">437</span></a> <span class="c1"># Determines the join clause and select columns to be used passed on what type of columns were provided for</span>
+</span><span id="DataFrame.join-438"><a href="#DataFrame.join-438"><span class="linenos">438</span></a> <span class="c1"># the join. The columns returned changes based on how the on expression is provided.</span>
+</span><span id="DataFrame.join-439"><a href="#DataFrame.join-439"><span class="linenos">439</span></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">join_columns</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">expression</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Column</span><span class="p">):</span>
+</span><span id="DataFrame.join-440"><a href="#DataFrame.join-440"><span class="linenos">440</span></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
+</span><span id="DataFrame.join-441"><a href="#DataFrame.join-441"><span class="linenos">441</span></a><span class="sd"> Unique characteristics of join on column names only:</span>
+</span><span id="DataFrame.join-442"><a href="#DataFrame.join-442"><span class="linenos">442</span></a><span class="sd"> * The column names are put at the front of the select list</span>
+</span><span id="DataFrame.join-443"><a href="#DataFrame.join-443"><span class="linenos">443</span></a><span class="sd"> * The column names are deduplicated across the entire select list and only the column names (other dups are allowed)</span>
+</span><span id="DataFrame.join-444"><a href="#DataFrame.join-444"><span class="linenos">444</span></a><span class="sd"> &quot;&quot;&quot;</span>
+</span><span id="DataFrame.join-445"><a href="#DataFrame.join-445"><span class="linenos">445</span></a> <span class="n">table_names</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame.join-446"><a href="#DataFrame.join-446"><span class="linenos">446</span></a> <span class="n">table</span><span class="o">.</span><span class="n">alias_or_name</span>
+</span><span id="DataFrame.join-447"><a href="#DataFrame.join-447"><span class="linenos">447</span></a> <span class="k">for</span> <span class="n">table</span> <span class="ow">in</span> <span class="n">get_tables_from_expression_with_join</span><span class="p">(</span><span class="n">join_expression</span><span class="p">)</span>
+</span><span id="DataFrame.join-448"><a href="#DataFrame.join-448"><span class="linenos">448</span></a> <span class="p">]</span>
+</span><span id="DataFrame.join-449"><a href="#DataFrame.join-449"><span class="linenos">449</span></a> <span class="n">potential_ctes</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame.join-450"><a href="#DataFrame.join-450"><span class="linenos">450</span></a> <span class="n">cte</span>
+</span><span id="DataFrame.join-451"><a href="#DataFrame.join-451"><span class="linenos">451</span></a> <span class="k">for</span> <span class="n">cte</span> <span class="ow">in</span> <span class="n">join_expression</span><span class="o">.</span><span class="n">ctes</span>
+</span><span id="DataFrame.join-452"><a href="#DataFrame.join-452"><span class="linenos">452</span></a> <span class="k">if</span> <span class="n">cte</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="ow">in</span> <span class="n">table_names</span>
+</span><span id="DataFrame.join-453"><a href="#DataFrame.join-453"><span class="linenos">453</span></a> <span class="ow">and</span> <span class="n">cte</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="o">!=</span> <span class="n">other_df</span><span class="o">.</span><span class="n">latest_cte_name</span>
+</span><span id="DataFrame.join-454"><a href="#DataFrame.join-454"><span class="linenos">454</span></a> <span class="p">]</span>
+</span><span id="DataFrame.join-455"><a href="#DataFrame.join-455"><span class="linenos">455</span></a> <span class="c1"># Determine the table to reference for the left side of the join by checking each of the left side</span>
+</span><span id="DataFrame.join-456"><a href="#DataFrame.join-456"><span class="linenos">456</span></a> <span class="c1"># tables and see if they have the column being referenced.</span>
+</span><span id="DataFrame.join-457"><a href="#DataFrame.join-457"><span class="linenos">457</span></a> <span class="n">join_column_pairs</span> <span class="o">=</span> <span class="p">[]</span>
+</span><span id="DataFrame.join-458"><a href="#DataFrame.join-458"><span class="linenos">458</span></a> <span class="k">for</span> <span class="n">join_column</span> <span class="ow">in</span> <span class="n">join_columns</span><span class="p">:</span>
+</span><span id="DataFrame.join-459"><a href="#DataFrame.join-459"><span class="linenos">459</span></a> <span class="n">num_matching_ctes</span> <span class="o">=</span> <span class="mi">0</span>
+</span><span id="DataFrame.join-460"><a href="#DataFrame.join-460"><span class="linenos">460</span></a> <span class="k">for</span> <span class="n">cte</span> <span class="ow">in</span> <span class="n">potential_ctes</span><span class="p">:</span>
+</span><span id="DataFrame.join-461"><a href="#DataFrame.join-461"><span class="linenos">461</span></a> <span class="k">if</span> <span class="n">join_column</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="ow">in</span> <span class="n">cte</span><span class="o">.</span><span class="n">this</span><span class="o">.</span><span class="n">named_selects</span><span class="p">:</span>
+</span><span id="DataFrame.join-462"><a href="#DataFrame.join-462"><span class="linenos">462</span></a> <span class="n">left_column</span> <span class="o">=</span> <span class="n">join_column</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span><span class="o">.</span><span class="n">set_table_name</span><span class="p">(</span><span class="n">cte</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">)</span>
+</span><span id="DataFrame.join-463"><a href="#DataFrame.join-463"><span class="linenos">463</span></a> <span class="n">right_column</span> <span class="o">=</span> <span class="n">join_column</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span><span class="o">.</span><span class="n">set_table_name</span><span class="p">(</span><span class="n">other_df</span><span class="o">.</span><span class="n">latest_cte_name</span><span class="p">)</span>
+</span><span id="DataFrame.join-464"><a href="#DataFrame.join-464"><span class="linenos">464</span></a> <span class="n">join_column_pairs</span><span class="o">.</span><span class="n">append</span><span class="p">((</span><span class="n">left_column</span><span class="p">,</span> <span class="n">right_column</span><span class="p">))</span>
+</span><span id="DataFrame.join-465"><a href="#DataFrame.join-465"><span class="linenos">465</span></a> <span class="n">num_matching_ctes</span> <span class="o">+=</span> <span class="mi">1</span>
+</span><span id="DataFrame.join-466"><a href="#DataFrame.join-466"><span class="linenos">466</span></a> <span class="k">if</span> <span class="n">num_matching_ctes</span> <span class="o">&gt;</span> <span class="mi">1</span><span class="p">:</span>
+</span><span id="DataFrame.join-467"><a href="#DataFrame.join-467"><span class="linenos">467</span></a> <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
+</span><span id="DataFrame.join-468"><a href="#DataFrame.join-468"><span class="linenos">468</span></a> <span class="sa">f</span><span class="s2">&quot;Column </span><span class="si">{</span><span class="n">join_column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="si">}</span><span class="s2"> is ambiguous. Please specify the table name.&quot;</span>
+</span><span id="DataFrame.join-469"><a href="#DataFrame.join-469"><span class="linenos">469</span></a> <span class="p">)</span>
+</span><span id="DataFrame.join-470"><a href="#DataFrame.join-470"><span class="linenos">470</span></a> <span class="k">elif</span> <span class="n">num_matching_ctes</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
+</span><span id="DataFrame.join-471"><a href="#DataFrame.join-471"><span class="linenos">471</span></a> <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
+</span><span id="DataFrame.join-472"><a href="#DataFrame.join-472"><span class="linenos">472</span></a> <span class="sa">f</span><span class="s2">&quot;Column </span><span class="si">{</span><span class="n">join_column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="si">}</span><span class="s2"> does not exist in any of the tables.&quot;</span>
+</span><span id="DataFrame.join-473"><a href="#DataFrame.join-473"><span class="linenos">473</span></a> <span class="p">)</span>
+</span><span id="DataFrame.join-474"><a href="#DataFrame.join-474"><span class="linenos">474</span></a> <span class="n">join_clause</span> <span class="o">=</span> <span class="n">functools</span><span class="o">.</span><span class="n">reduce</span><span class="p">(</span>
+</span><span id="DataFrame.join-475"><a href="#DataFrame.join-475"><span class="linenos">475</span></a> <span class="k">lambda</span> <span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">:</span> <span class="n">x</span> <span class="o">&amp;</span> <span class="n">y</span><span class="p">,</span>
+</span><span id="DataFrame.join-476"><a href="#DataFrame.join-476"><span class="linenos">476</span></a> <span class="p">[</span><span class="n">left_column</span> <span class="o">==</span> <span class="n">right_column</span> <span class="k">for</span> <span class="n">left_column</span><span class="p">,</span> <span class="n">right_column</span> <span class="ow">in</span> <span class="n">join_column_pairs</span><span class="p">],</span>
+</span><span id="DataFrame.join-477"><a href="#DataFrame.join-477"><span class="linenos">477</span></a> <span class="p">)</span>
+</span><span id="DataFrame.join-478"><a href="#DataFrame.join-478"><span class="linenos">478</span></a> <span class="n">join_column_names</span> <span class="o">=</span> <span class="p">[</span><span class="n">left_col</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="k">for</span> <span class="n">left_col</span><span class="p">,</span> <span class="n">_</span> <span class="ow">in</span> <span class="n">join_column_pairs</span><span class="p">]</span>
+</span><span id="DataFrame.join-479"><a href="#DataFrame.join-479"><span class="linenos">479</span></a> <span class="c1"># To match spark behavior only the join clause gets deduplicated and it gets put in the front of the column list</span>
+</span><span id="DataFrame.join-480"><a href="#DataFrame.join-480"><span class="linenos">480</span></a> <span class="n">select_column_names</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame.join-481"><a href="#DataFrame.join-481"><span class="linenos">481</span></a> <span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span>
+</span><span id="DataFrame.join-482"><a href="#DataFrame.join-482"><span class="linenos">482</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">column</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">this</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Star</span><span class="p">)</span>
+</span><span id="DataFrame.join-483"><a href="#DataFrame.join-483"><span class="linenos">483</span></a> <span class="k">else</span> <span class="n">column</span><span class="o">.</span><span class="n">sql</span><span class="p">()</span>
+</span><span id="DataFrame.join-484"><a href="#DataFrame.join-484"><span class="linenos">484</span></a> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">self_columns</span> <span class="o">+</span> <span class="n">other_columns</span>
+</span><span id="DataFrame.join-485"><a href="#DataFrame.join-485"><span class="linenos">485</span></a> <span class="p">]</span>
+</span><span id="DataFrame.join-486"><a href="#DataFrame.join-486"><span class="linenos">486</span></a> <span class="n">select_column_names</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame.join-487"><a href="#DataFrame.join-487"><span class="linenos">487</span></a> <span class="n">column_name</span>
+</span><span id="DataFrame.join-488"><a href="#DataFrame.join-488"><span class="linenos">488</span></a> <span class="k">for</span> <span class="n">column_name</span> <span class="ow">in</span> <span class="n">select_column_names</span>
+</span><span id="DataFrame.join-489"><a href="#DataFrame.join-489"><span class="linenos">489</span></a> <span class="k">if</span> <span class="n">column_name</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">join_column_names</span>
+</span><span id="DataFrame.join-490"><a href="#DataFrame.join-490"><span class="linenos">490</span></a> <span class="p">]</span>
+</span><span id="DataFrame.join-491"><a href="#DataFrame.join-491"><span class="linenos">491</span></a> <span class="n">select_column_names</span> <span class="o">=</span> <span class="n">join_column_names</span> <span class="o">+</span> <span class="n">select_column_names</span>
+</span><span id="DataFrame.join-492"><a href="#DataFrame.join-492"><span class="linenos">492</span></a> <span class="k">else</span><span class="p">:</span>
+</span><span id="DataFrame.join-493"><a href="#DataFrame.join-493"><span class="linenos">493</span></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
+</span><span id="DataFrame.join-494"><a href="#DataFrame.join-494"><span class="linenos">494</span></a><span class="sd"> Unique characteristics of join on expressions:</span>
+</span><span id="DataFrame.join-495"><a href="#DataFrame.join-495"><span class="linenos">495</span></a><span class="sd"> * There is no deduplication of the results.</span>
+</span><span id="DataFrame.join-496"><a href="#DataFrame.join-496"><span class="linenos">496</span></a><span class="sd"> * The left join dataframe columns go first and right come after. No sort preference is given to join columns</span>
+</span><span id="DataFrame.join-497"><a href="#DataFrame.join-497"><span class="linenos">497</span></a><span class="sd"> &quot;&quot;&quot;</span>
+</span><span id="DataFrame.join-498"><a href="#DataFrame.join-498"><span class="linenos">498</span></a> <span class="n">join_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">join_columns</span><span class="p">,</span> <span class="n">join_expression</span><span class="p">)</span>
+</span><span id="DataFrame.join-499"><a href="#DataFrame.join-499"><span class="linenos">499</span></a> <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">join_columns</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">1</span><span class="p">:</span>
+</span><span id="DataFrame.join-500"><a href="#DataFrame.join-500"><span class="linenos">500</span></a> <span class="n">join_columns</span> <span class="o">=</span> <span class="p">[</span><span class="n">functools</span><span class="o">.</span><span class="n">reduce</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">:</span> <span class="n">x</span> <span class="o">&amp;</span> <span class="n">y</span><span class="p">,</span> <span class="n">join_columns</span><span class="p">)]</span>
+</span><span id="DataFrame.join-501"><a href="#DataFrame.join-501"><span class="linenos">501</span></a> <span class="n">join_clause</span> <span class="o">=</span> <span class="n">join_columns</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
+</span><span id="DataFrame.join-502"><a href="#DataFrame.join-502"><span class="linenos">502</span></a> <span class="n">select_column_names</span> <span class="o">=</span> <span class="p">[</span><span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">self_columns</span> <span class="o">+</span> <span class="n">other_columns</span><span class="p">]</span>
+</span><span id="DataFrame.join-503"><a href="#DataFrame.join-503"><span class="linenos">503</span></a>
+</span><span id="DataFrame.join-504"><a href="#DataFrame.join-504"><span class="linenos">504</span></a> <span class="c1"># Update the on expression with the actual join clause to replace the dummy one from before</span>
+</span><span id="DataFrame.join-505"><a href="#DataFrame.join-505"><span class="linenos">505</span></a> <span class="n">join_expression</span><span class="o">.</span><span class="n">args</span><span class="p">[</span><span class="s2">&quot;joins&quot;</span><span class="p">][</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">&quot;on&quot;</span><span class="p">,</span> <span class="n">join_clause</span><span class="o">.</span><span class="n">expression</span><span class="p">)</span>
+</span><span id="DataFrame.join-506"><a href="#DataFrame.join-506"><span class="linenos">506</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="n">join_expression</span><span class="p">)</span>
+</span><span id="DataFrame.join-507"><a href="#DataFrame.join-507"><span class="linenos">507</span></a> <span class="n">new_df</span><span class="o">.</span><span class="n">pending_join_hints</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">pending_join_hints</span><span class="p">)</span>
+</span><span id="DataFrame.join-508"><a href="#DataFrame.join-508"><span class="linenos">508</span></a> <span class="n">new_df</span><span class="o">.</span><span class="n">pending_hints</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="n">other_df</span><span class="o">.</span><span class="n">pending_hints</span><span class="p">)</span>
+</span><span id="DataFrame.join-509"><a href="#DataFrame.join-509"><span class="linenos">509</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="n">new_df</span><span class="o">.</span><span class="n">select</span><span class="o">.</span><span class="n">__wrapped__</span><span class="p">(</span><span class="n">new_df</span><span class="p">,</span> <span class="o">*</span><span class="n">select_column_names</span><span class="p">)</span>
+</span><span id="DataFrame.join-510"><a href="#DataFrame.join-510"><span class="linenos">510</span></a> <span class="k">return</span> <span class="n">new_df</span>
</span></pre></div>
@@ -1852,42 +1926,42 @@
</div>
<a class="headerlink" href="#DataFrame.orderBy"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.orderBy-475"><a href="#DataFrame.orderBy-475"><span class="linenos">475</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">ORDER_BY</span><span class="p">)</span>
-</span><span id="DataFrame.orderBy-476"><a href="#DataFrame.orderBy-476"><span class="linenos">476</span></a> <span class="k">def</span> <span class="nf">orderBy</span><span class="p">(</span>
-</span><span id="DataFrame.orderBy-477"><a href="#DataFrame.orderBy-477"><span class="linenos">477</span></a> <span class="bp">self</span><span class="p">,</span>
-</span><span id="DataFrame.orderBy-478"><a href="#DataFrame.orderBy-478"><span class="linenos">478</span></a> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Column</span><span class="p">],</span>
-</span><span id="DataFrame.orderBy-479"><a href="#DataFrame.orderBy-479"><span class="linenos">479</span></a> <span class="n">ascending</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrame.orderBy-480"><a href="#DataFrame.orderBy-480"><span class="linenos">480</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame.orderBy-481"><a href="#DataFrame.orderBy-481"><span class="linenos">481</span></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
-</span><span id="DataFrame.orderBy-482"><a href="#DataFrame.orderBy-482"><span class="linenos">482</span></a><span class="sd"> This implementation lets any ordered columns take priority over whatever is provided in `ascending`. Spark</span>
-</span><span id="DataFrame.orderBy-483"><a href="#DataFrame.orderBy-483"><span class="linenos">483</span></a><span class="sd"> has irregular behavior and can result in runtime errors. Users shouldn&#39;t be mixing the two anyways so this</span>
-</span><span id="DataFrame.orderBy-484"><a href="#DataFrame.orderBy-484"><span class="linenos">484</span></a><span class="sd"> is unlikely to come up.</span>
-</span><span id="DataFrame.orderBy-485"><a href="#DataFrame.orderBy-485"><span class="linenos">485</span></a><span class="sd"> &quot;&quot;&quot;</span>
-</span><span id="DataFrame.orderBy-486"><a href="#DataFrame.orderBy-486"><span class="linenos">486</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">cols</span><span class="p">)</span>
-</span><span id="DataFrame.orderBy-487"><a href="#DataFrame.orderBy-487"><span class="linenos">487</span></a> <span class="n">pre_ordered_col_indexes</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame.orderBy-488"><a href="#DataFrame.orderBy-488"><span class="linenos">488</span></a> <span class="n">x</span>
-</span><span id="DataFrame.orderBy-489"><a href="#DataFrame.orderBy-489"><span class="linenos">489</span></a> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="p">[</span>
-</span><span id="DataFrame.orderBy-490"><a href="#DataFrame.orderBy-490"><span class="linenos">490</span></a> <span class="n">i</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">col</span><span class="o">.</span><span class="n">expression</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Ordered</span><span class="p">)</span> <span class="k">else</span> <span class="kc">None</span>
-</span><span id="DataFrame.orderBy-491"><a href="#DataFrame.orderBy-491"><span class="linenos">491</span></a> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">col</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span>
-</span><span id="DataFrame.orderBy-492"><a href="#DataFrame.orderBy-492"><span class="linenos">492</span></a> <span class="p">]</span>
-</span><span id="DataFrame.orderBy-493"><a href="#DataFrame.orderBy-493"><span class="linenos">493</span></a> <span class="k">if</span> <span class="n">x</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
-</span><span id="DataFrame.orderBy-494"><a href="#DataFrame.orderBy-494"><span class="linenos">494</span></a> <span class="p">]</span>
-</span><span id="DataFrame.orderBy-495"><a href="#DataFrame.orderBy-495"><span class="linenos">495</span></a> <span class="k">if</span> <span class="n">ascending</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
-</span><span id="DataFrame.orderBy-496"><a href="#DataFrame.orderBy-496"><span class="linenos">496</span></a> <span class="n">ascending</span> <span class="o">=</span> <span class="p">[</span><span class="kc">True</span><span class="p">]</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span>
-</span><span id="DataFrame.orderBy-497"><a href="#DataFrame.orderBy-497"><span class="linenos">497</span></a> <span class="k">elif</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">ascending</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span>
-</span><span id="DataFrame.orderBy-498"><a href="#DataFrame.orderBy-498"><span class="linenos">498</span></a> <span class="n">ascending</span> <span class="o">=</span> <span class="p">[</span><span class="n">ascending</span><span class="p">]</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span>
-</span><span id="DataFrame.orderBy-499"><a href="#DataFrame.orderBy-499"><span class="linenos">499</span></a> <span class="n">ascending</span> <span class="o">=</span> <span class="p">[</span><span class="nb">bool</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">x</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">ascending</span><span class="p">)]</span>
-</span><span id="DataFrame.orderBy-500"><a href="#DataFrame.orderBy-500"><span class="linenos">500</span></a> <span class="k">assert</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span> <span class="o">==</span> <span class="nb">len</span><span class="p">(</span>
-</span><span id="DataFrame.orderBy-501"><a href="#DataFrame.orderBy-501"><span class="linenos">501</span></a> <span class="n">ascending</span>
-</span><span id="DataFrame.orderBy-502"><a href="#DataFrame.orderBy-502"><span class="linenos">502</span></a> <span class="p">),</span> <span class="s2">&quot;The length of items in ascending must equal the number of columns provided&quot;</span>
-</span><span id="DataFrame.orderBy-503"><a href="#DataFrame.orderBy-503"><span class="linenos">503</span></a> <span class="n">col_and_ascending</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="nb">zip</span><span class="p">(</span><span class="n">columns</span><span class="p">,</span> <span class="n">ascending</span><span class="p">))</span>
-</span><span id="DataFrame.orderBy-504"><a href="#DataFrame.orderBy-504"><span class="linenos">504</span></a> <span class="n">order_by_columns</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame.orderBy-505"><a href="#DataFrame.orderBy-505"><span class="linenos">505</span></a> <span class="n">exp</span><span class="o">.</span><span class="n">Ordered</span><span class="p">(</span><span class="n">this</span><span class="o">=</span><span class="n">col</span><span class="o">.</span><span class="n">expression</span><span class="p">,</span> <span class="n">desc</span><span class="o">=</span><span class="ow">not</span> <span class="n">asc</span><span class="p">)</span>
-</span><span id="DataFrame.orderBy-506"><a href="#DataFrame.orderBy-506"><span class="linenos">506</span></a> <span class="k">if</span> <span class="n">i</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">pre_ordered_col_indexes</span>
-</span><span id="DataFrame.orderBy-507"><a href="#DataFrame.orderBy-507"><span class="linenos">507</span></a> <span class="k">else</span> <span class="n">columns</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">.</span><span class="n">column_expression</span>
-</span><span id="DataFrame.orderBy-508"><a href="#DataFrame.orderBy-508"><span class="linenos">508</span></a> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="p">(</span><span class="n">col</span><span class="p">,</span> <span class="n">asc</span><span class="p">)</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">col_and_ascending</span><span class="p">)</span>
-</span><span id="DataFrame.orderBy-509"><a href="#DataFrame.orderBy-509"><span class="linenos">509</span></a> <span class="p">]</span>
-</span><span id="DataFrame.orderBy-510"><a href="#DataFrame.orderBy-510"><span class="linenos">510</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">order_by</span><span class="p">(</span><span class="o">*</span><span class="n">order_by_columns</span><span class="p">))</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.orderBy-512"><a href="#DataFrame.orderBy-512"><span class="linenos">512</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">ORDER_BY</span><span class="p">)</span>
+</span><span id="DataFrame.orderBy-513"><a href="#DataFrame.orderBy-513"><span class="linenos">513</span></a> <span class="k">def</span> <span class="nf">orderBy</span><span class="p">(</span>
+</span><span id="DataFrame.orderBy-514"><a href="#DataFrame.orderBy-514"><span class="linenos">514</span></a> <span class="bp">self</span><span class="p">,</span>
+</span><span id="DataFrame.orderBy-515"><a href="#DataFrame.orderBy-515"><span class="linenos">515</span></a> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Column</span><span class="p">],</span>
+</span><span id="DataFrame.orderBy-516"><a href="#DataFrame.orderBy-516"><span class="linenos">516</span></a> <span class="n">ascending</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrame.orderBy-517"><a href="#DataFrame.orderBy-517"><span class="linenos">517</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame.orderBy-518"><a href="#DataFrame.orderBy-518"><span class="linenos">518</span></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
+</span><span id="DataFrame.orderBy-519"><a href="#DataFrame.orderBy-519"><span class="linenos">519</span></a><span class="sd"> This implementation lets any ordered columns take priority over whatever is provided in `ascending`. Spark</span>
+</span><span id="DataFrame.orderBy-520"><a href="#DataFrame.orderBy-520"><span class="linenos">520</span></a><span class="sd"> has irregular behavior and can result in runtime errors. Users shouldn&#39;t be mixing the two anyways so this</span>
+</span><span id="DataFrame.orderBy-521"><a href="#DataFrame.orderBy-521"><span class="linenos">521</span></a><span class="sd"> is unlikely to come up.</span>
+</span><span id="DataFrame.orderBy-522"><a href="#DataFrame.orderBy-522"><span class="linenos">522</span></a><span class="sd"> &quot;&quot;&quot;</span>
+</span><span id="DataFrame.orderBy-523"><a href="#DataFrame.orderBy-523"><span class="linenos">523</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">cols</span><span class="p">)</span>
+</span><span id="DataFrame.orderBy-524"><a href="#DataFrame.orderBy-524"><span class="linenos">524</span></a> <span class="n">pre_ordered_col_indexes</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame.orderBy-525"><a href="#DataFrame.orderBy-525"><span class="linenos">525</span></a> <span class="n">x</span>
+</span><span id="DataFrame.orderBy-526"><a href="#DataFrame.orderBy-526"><span class="linenos">526</span></a> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="p">[</span>
+</span><span id="DataFrame.orderBy-527"><a href="#DataFrame.orderBy-527"><span class="linenos">527</span></a> <span class="n">i</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">col</span><span class="o">.</span><span class="n">expression</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Ordered</span><span class="p">)</span> <span class="k">else</span> <span class="kc">None</span>
+</span><span id="DataFrame.orderBy-528"><a href="#DataFrame.orderBy-528"><span class="linenos">528</span></a> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">col</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span>
+</span><span id="DataFrame.orderBy-529"><a href="#DataFrame.orderBy-529"><span class="linenos">529</span></a> <span class="p">]</span>
+</span><span id="DataFrame.orderBy-530"><a href="#DataFrame.orderBy-530"><span class="linenos">530</span></a> <span class="k">if</span> <span class="n">x</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
+</span><span id="DataFrame.orderBy-531"><a href="#DataFrame.orderBy-531"><span class="linenos">531</span></a> <span class="p">]</span>
+</span><span id="DataFrame.orderBy-532"><a href="#DataFrame.orderBy-532"><span class="linenos">532</span></a> <span class="k">if</span> <span class="n">ascending</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+</span><span id="DataFrame.orderBy-533"><a href="#DataFrame.orderBy-533"><span class="linenos">533</span></a> <span class="n">ascending</span> <span class="o">=</span> <span class="p">[</span><span class="kc">True</span><span class="p">]</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span>
+</span><span id="DataFrame.orderBy-534"><a href="#DataFrame.orderBy-534"><span class="linenos">534</span></a> <span class="k">elif</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">ascending</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span>
+</span><span id="DataFrame.orderBy-535"><a href="#DataFrame.orderBy-535"><span class="linenos">535</span></a> <span class="n">ascending</span> <span class="o">=</span> <span class="p">[</span><span class="n">ascending</span><span class="p">]</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span>
+</span><span id="DataFrame.orderBy-536"><a href="#DataFrame.orderBy-536"><span class="linenos">536</span></a> <span class="n">ascending</span> <span class="o">=</span> <span class="p">[</span><span class="nb">bool</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">x</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">ascending</span><span class="p">)]</span>
+</span><span id="DataFrame.orderBy-537"><a href="#DataFrame.orderBy-537"><span class="linenos">537</span></a> <span class="k">assert</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span> <span class="o">==</span> <span class="nb">len</span><span class="p">(</span>
+</span><span id="DataFrame.orderBy-538"><a href="#DataFrame.orderBy-538"><span class="linenos">538</span></a> <span class="n">ascending</span>
+</span><span id="DataFrame.orderBy-539"><a href="#DataFrame.orderBy-539"><span class="linenos">539</span></a> <span class="p">),</span> <span class="s2">&quot;The length of items in ascending must equal the number of columns provided&quot;</span>
+</span><span id="DataFrame.orderBy-540"><a href="#DataFrame.orderBy-540"><span class="linenos">540</span></a> <span class="n">col_and_ascending</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="nb">zip</span><span class="p">(</span><span class="n">columns</span><span class="p">,</span> <span class="n">ascending</span><span class="p">))</span>
+</span><span id="DataFrame.orderBy-541"><a href="#DataFrame.orderBy-541"><span class="linenos">541</span></a> <span class="n">order_by_columns</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame.orderBy-542"><a href="#DataFrame.orderBy-542"><span class="linenos">542</span></a> <span class="n">exp</span><span class="o">.</span><span class="n">Ordered</span><span class="p">(</span><span class="n">this</span><span class="o">=</span><span class="n">col</span><span class="o">.</span><span class="n">expression</span><span class="p">,</span> <span class="n">desc</span><span class="o">=</span><span class="ow">not</span> <span class="n">asc</span><span class="p">)</span>
+</span><span id="DataFrame.orderBy-543"><a href="#DataFrame.orderBy-543"><span class="linenos">543</span></a> <span class="k">if</span> <span class="n">i</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">pre_ordered_col_indexes</span>
+</span><span id="DataFrame.orderBy-544"><a href="#DataFrame.orderBy-544"><span class="linenos">544</span></a> <span class="k">else</span> <span class="n">columns</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">.</span><span class="n">column_expression</span>
+</span><span id="DataFrame.orderBy-545"><a href="#DataFrame.orderBy-545"><span class="linenos">545</span></a> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="p">(</span><span class="n">col</span><span class="p">,</span> <span class="n">asc</span><span class="p">)</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">col_and_ascending</span><span class="p">)</span>
+</span><span id="DataFrame.orderBy-546"><a href="#DataFrame.orderBy-546"><span class="linenos">546</span></a> <span class="p">]</span>
+</span><span id="DataFrame.orderBy-547"><a href="#DataFrame.orderBy-547"><span class="linenos">547</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">order_by</span><span class="p">(</span><span class="o">*</span><span class="n">order_by_columns</span><span class="p">))</span>
</span></pre></div>
@@ -1910,42 +1984,42 @@ is unlikely to come up.</p>
</div>
<a class="headerlink" href="#DataFrame.sort"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.sort-475"><a href="#DataFrame.sort-475"><span class="linenos">475</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">ORDER_BY</span><span class="p">)</span>
-</span><span id="DataFrame.sort-476"><a href="#DataFrame.sort-476"><span class="linenos">476</span></a> <span class="k">def</span> <span class="nf">orderBy</span><span class="p">(</span>
-</span><span id="DataFrame.sort-477"><a href="#DataFrame.sort-477"><span class="linenos">477</span></a> <span class="bp">self</span><span class="p">,</span>
-</span><span id="DataFrame.sort-478"><a href="#DataFrame.sort-478"><span class="linenos">478</span></a> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Column</span><span class="p">],</span>
-</span><span id="DataFrame.sort-479"><a href="#DataFrame.sort-479"><span class="linenos">479</span></a> <span class="n">ascending</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrame.sort-480"><a href="#DataFrame.sort-480"><span class="linenos">480</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame.sort-481"><a href="#DataFrame.sort-481"><span class="linenos">481</span></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
-</span><span id="DataFrame.sort-482"><a href="#DataFrame.sort-482"><span class="linenos">482</span></a><span class="sd"> This implementation lets any ordered columns take priority over whatever is provided in `ascending`. Spark</span>
-</span><span id="DataFrame.sort-483"><a href="#DataFrame.sort-483"><span class="linenos">483</span></a><span class="sd"> has irregular behavior and can result in runtime errors. Users shouldn&#39;t be mixing the two anyways so this</span>
-</span><span id="DataFrame.sort-484"><a href="#DataFrame.sort-484"><span class="linenos">484</span></a><span class="sd"> is unlikely to come up.</span>
-</span><span id="DataFrame.sort-485"><a href="#DataFrame.sort-485"><span class="linenos">485</span></a><span class="sd"> &quot;&quot;&quot;</span>
-</span><span id="DataFrame.sort-486"><a href="#DataFrame.sort-486"><span class="linenos">486</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">cols</span><span class="p">)</span>
-</span><span id="DataFrame.sort-487"><a href="#DataFrame.sort-487"><span class="linenos">487</span></a> <span class="n">pre_ordered_col_indexes</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame.sort-488"><a href="#DataFrame.sort-488"><span class="linenos">488</span></a> <span class="n">x</span>
-</span><span id="DataFrame.sort-489"><a href="#DataFrame.sort-489"><span class="linenos">489</span></a> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="p">[</span>
-</span><span id="DataFrame.sort-490"><a href="#DataFrame.sort-490"><span class="linenos">490</span></a> <span class="n">i</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">col</span><span class="o">.</span><span class="n">expression</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Ordered</span><span class="p">)</span> <span class="k">else</span> <span class="kc">None</span>
-</span><span id="DataFrame.sort-491"><a href="#DataFrame.sort-491"><span class="linenos">491</span></a> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">col</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span>
-</span><span id="DataFrame.sort-492"><a href="#DataFrame.sort-492"><span class="linenos">492</span></a> <span class="p">]</span>
-</span><span id="DataFrame.sort-493"><a href="#DataFrame.sort-493"><span class="linenos">493</span></a> <span class="k">if</span> <span class="n">x</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
-</span><span id="DataFrame.sort-494"><a href="#DataFrame.sort-494"><span class="linenos">494</span></a> <span class="p">]</span>
-</span><span id="DataFrame.sort-495"><a href="#DataFrame.sort-495"><span class="linenos">495</span></a> <span class="k">if</span> <span class="n">ascending</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
-</span><span id="DataFrame.sort-496"><a href="#DataFrame.sort-496"><span class="linenos">496</span></a> <span class="n">ascending</span> <span class="o">=</span> <span class="p">[</span><span class="kc">True</span><span class="p">]</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span>
-</span><span id="DataFrame.sort-497"><a href="#DataFrame.sort-497"><span class="linenos">497</span></a> <span class="k">elif</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">ascending</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span>
-</span><span id="DataFrame.sort-498"><a href="#DataFrame.sort-498"><span class="linenos">498</span></a> <span class="n">ascending</span> <span class="o">=</span> <span class="p">[</span><span class="n">ascending</span><span class="p">]</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span>
-</span><span id="DataFrame.sort-499"><a href="#DataFrame.sort-499"><span class="linenos">499</span></a> <span class="n">ascending</span> <span class="o">=</span> <span class="p">[</span><span class="nb">bool</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">x</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">ascending</span><span class="p">)]</span>
-</span><span id="DataFrame.sort-500"><a href="#DataFrame.sort-500"><span class="linenos">500</span></a> <span class="k">assert</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span> <span class="o">==</span> <span class="nb">len</span><span class="p">(</span>
-</span><span id="DataFrame.sort-501"><a href="#DataFrame.sort-501"><span class="linenos">501</span></a> <span class="n">ascending</span>
-</span><span id="DataFrame.sort-502"><a href="#DataFrame.sort-502"><span class="linenos">502</span></a> <span class="p">),</span> <span class="s2">&quot;The length of items in ascending must equal the number of columns provided&quot;</span>
-</span><span id="DataFrame.sort-503"><a href="#DataFrame.sort-503"><span class="linenos">503</span></a> <span class="n">col_and_ascending</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="nb">zip</span><span class="p">(</span><span class="n">columns</span><span class="p">,</span> <span class="n">ascending</span><span class="p">))</span>
-</span><span id="DataFrame.sort-504"><a href="#DataFrame.sort-504"><span class="linenos">504</span></a> <span class="n">order_by_columns</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame.sort-505"><a href="#DataFrame.sort-505"><span class="linenos">505</span></a> <span class="n">exp</span><span class="o">.</span><span class="n">Ordered</span><span class="p">(</span><span class="n">this</span><span class="o">=</span><span class="n">col</span><span class="o">.</span><span class="n">expression</span><span class="p">,</span> <span class="n">desc</span><span class="o">=</span><span class="ow">not</span> <span class="n">asc</span><span class="p">)</span>
-</span><span id="DataFrame.sort-506"><a href="#DataFrame.sort-506"><span class="linenos">506</span></a> <span class="k">if</span> <span class="n">i</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">pre_ordered_col_indexes</span>
-</span><span id="DataFrame.sort-507"><a href="#DataFrame.sort-507"><span class="linenos">507</span></a> <span class="k">else</span> <span class="n">columns</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">.</span><span class="n">column_expression</span>
-</span><span id="DataFrame.sort-508"><a href="#DataFrame.sort-508"><span class="linenos">508</span></a> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="p">(</span><span class="n">col</span><span class="p">,</span> <span class="n">asc</span><span class="p">)</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">col_and_ascending</span><span class="p">)</span>
-</span><span id="DataFrame.sort-509"><a href="#DataFrame.sort-509"><span class="linenos">509</span></a> <span class="p">]</span>
-</span><span id="DataFrame.sort-510"><a href="#DataFrame.sort-510"><span class="linenos">510</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">order_by</span><span class="p">(</span><span class="o">*</span><span class="n">order_by_columns</span><span class="p">))</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.sort-512"><a href="#DataFrame.sort-512"><span class="linenos">512</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">ORDER_BY</span><span class="p">)</span>
+</span><span id="DataFrame.sort-513"><a href="#DataFrame.sort-513"><span class="linenos">513</span></a> <span class="k">def</span> <span class="nf">orderBy</span><span class="p">(</span>
+</span><span id="DataFrame.sort-514"><a href="#DataFrame.sort-514"><span class="linenos">514</span></a> <span class="bp">self</span><span class="p">,</span>
+</span><span id="DataFrame.sort-515"><a href="#DataFrame.sort-515"><span class="linenos">515</span></a> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Column</span><span class="p">],</span>
+</span><span id="DataFrame.sort-516"><a href="#DataFrame.sort-516"><span class="linenos">516</span></a> <span class="n">ascending</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrame.sort-517"><a href="#DataFrame.sort-517"><span class="linenos">517</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame.sort-518"><a href="#DataFrame.sort-518"><span class="linenos">518</span></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
+</span><span id="DataFrame.sort-519"><a href="#DataFrame.sort-519"><span class="linenos">519</span></a><span class="sd"> This implementation lets any ordered columns take priority over whatever is provided in `ascending`. Spark</span>
+</span><span id="DataFrame.sort-520"><a href="#DataFrame.sort-520"><span class="linenos">520</span></a><span class="sd"> has irregular behavior and can result in runtime errors. Users shouldn&#39;t be mixing the two anyways so this</span>
+</span><span id="DataFrame.sort-521"><a href="#DataFrame.sort-521"><span class="linenos">521</span></a><span class="sd"> is unlikely to come up.</span>
+</span><span id="DataFrame.sort-522"><a href="#DataFrame.sort-522"><span class="linenos">522</span></a><span class="sd"> &quot;&quot;&quot;</span>
+</span><span id="DataFrame.sort-523"><a href="#DataFrame.sort-523"><span class="linenos">523</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">cols</span><span class="p">)</span>
+</span><span id="DataFrame.sort-524"><a href="#DataFrame.sort-524"><span class="linenos">524</span></a> <span class="n">pre_ordered_col_indexes</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame.sort-525"><a href="#DataFrame.sort-525"><span class="linenos">525</span></a> <span class="n">x</span>
+</span><span id="DataFrame.sort-526"><a href="#DataFrame.sort-526"><span class="linenos">526</span></a> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="p">[</span>
+</span><span id="DataFrame.sort-527"><a href="#DataFrame.sort-527"><span class="linenos">527</span></a> <span class="n">i</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">col</span><span class="o">.</span><span class="n">expression</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Ordered</span><span class="p">)</span> <span class="k">else</span> <span class="kc">None</span>
+</span><span id="DataFrame.sort-528"><a href="#DataFrame.sort-528"><span class="linenos">528</span></a> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">col</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span>
+</span><span id="DataFrame.sort-529"><a href="#DataFrame.sort-529"><span class="linenos">529</span></a> <span class="p">]</span>
+</span><span id="DataFrame.sort-530"><a href="#DataFrame.sort-530"><span class="linenos">530</span></a> <span class="k">if</span> <span class="n">x</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
+</span><span id="DataFrame.sort-531"><a href="#DataFrame.sort-531"><span class="linenos">531</span></a> <span class="p">]</span>
+</span><span id="DataFrame.sort-532"><a href="#DataFrame.sort-532"><span class="linenos">532</span></a> <span class="k">if</span> <span class="n">ascending</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+</span><span id="DataFrame.sort-533"><a href="#DataFrame.sort-533"><span class="linenos">533</span></a> <span class="n">ascending</span> <span class="o">=</span> <span class="p">[</span><span class="kc">True</span><span class="p">]</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span>
+</span><span id="DataFrame.sort-534"><a href="#DataFrame.sort-534"><span class="linenos">534</span></a> <span class="k">elif</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">ascending</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span>
+</span><span id="DataFrame.sort-535"><a href="#DataFrame.sort-535"><span class="linenos">535</span></a> <span class="n">ascending</span> <span class="o">=</span> <span class="p">[</span><span class="n">ascending</span><span class="p">]</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span>
+</span><span id="DataFrame.sort-536"><a href="#DataFrame.sort-536"><span class="linenos">536</span></a> <span class="n">ascending</span> <span class="o">=</span> <span class="p">[</span><span class="nb">bool</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">x</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">ascending</span><span class="p">)]</span>
+</span><span id="DataFrame.sort-537"><a href="#DataFrame.sort-537"><span class="linenos">537</span></a> <span class="k">assert</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span> <span class="o">==</span> <span class="nb">len</span><span class="p">(</span>
+</span><span id="DataFrame.sort-538"><a href="#DataFrame.sort-538"><span class="linenos">538</span></a> <span class="n">ascending</span>
+</span><span id="DataFrame.sort-539"><a href="#DataFrame.sort-539"><span class="linenos">539</span></a> <span class="p">),</span> <span class="s2">&quot;The length of items in ascending must equal the number of columns provided&quot;</span>
+</span><span id="DataFrame.sort-540"><a href="#DataFrame.sort-540"><span class="linenos">540</span></a> <span class="n">col_and_ascending</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="nb">zip</span><span class="p">(</span><span class="n">columns</span><span class="p">,</span> <span class="n">ascending</span><span class="p">))</span>
+</span><span id="DataFrame.sort-541"><a href="#DataFrame.sort-541"><span class="linenos">541</span></a> <span class="n">order_by_columns</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame.sort-542"><a href="#DataFrame.sort-542"><span class="linenos">542</span></a> <span class="n">exp</span><span class="o">.</span><span class="n">Ordered</span><span class="p">(</span><span class="n">this</span><span class="o">=</span><span class="n">col</span><span class="o">.</span><span class="n">expression</span><span class="p">,</span> <span class="n">desc</span><span class="o">=</span><span class="ow">not</span> <span class="n">asc</span><span class="p">)</span>
+</span><span id="DataFrame.sort-543"><a href="#DataFrame.sort-543"><span class="linenos">543</span></a> <span class="k">if</span> <span class="n">i</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">pre_ordered_col_indexes</span>
+</span><span id="DataFrame.sort-544"><a href="#DataFrame.sort-544"><span class="linenos">544</span></a> <span class="k">else</span> <span class="n">columns</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">.</span><span class="n">column_expression</span>
+</span><span id="DataFrame.sort-545"><a href="#DataFrame.sort-545"><span class="linenos">545</span></a> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="p">(</span><span class="n">col</span><span class="p">,</span> <span class="n">asc</span><span class="p">)</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">col_and_ascending</span><span class="p">)</span>
+</span><span id="DataFrame.sort-546"><a href="#DataFrame.sort-546"><span class="linenos">546</span></a> <span class="p">]</span>
+</span><span id="DataFrame.sort-547"><a href="#DataFrame.sort-547"><span class="linenos">547</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">order_by</span><span class="p">(</span><span class="o">*</span><span class="n">order_by_columns</span><span class="p">))</span>
</span></pre></div>
@@ -1968,9 +2042,9 @@ is unlikely to come up.</p>
</div>
<a class="headerlink" href="#DataFrame.union"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.union-514"><a href="#DataFrame.union-514"><span class="linenos">514</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
-</span><span id="DataFrame.union-515"><a href="#DataFrame.union-515"><span class="linenos">515</span></a> <span class="k">def</span> <span class="nf">union</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame.union-516"><a href="#DataFrame.union-516"><span class="linenos">516</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set_operation</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Union</span><span class="p">,</span> <span class="n">other</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.union-551"><a href="#DataFrame.union-551"><span class="linenos">551</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
+</span><span id="DataFrame.union-552"><a href="#DataFrame.union-552"><span class="linenos">552</span></a> <span class="k">def</span> <span class="nf">union</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame.union-553"><a href="#DataFrame.union-553"><span class="linenos">553</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set_operation</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Union</span><span class="p">,</span> <span class="n">other</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span>
</span></pre></div>
@@ -1989,9 +2063,9 @@ is unlikely to come up.</p>
</div>
<a class="headerlink" href="#DataFrame.unionAll"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.unionAll-514"><a href="#DataFrame.unionAll-514"><span class="linenos">514</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
-</span><span id="DataFrame.unionAll-515"><a href="#DataFrame.unionAll-515"><span class="linenos">515</span></a> <span class="k">def</span> <span class="nf">union</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame.unionAll-516"><a href="#DataFrame.unionAll-516"><span class="linenos">516</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set_operation</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Union</span><span class="p">,</span> <span class="n">other</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.unionAll-551"><a href="#DataFrame.unionAll-551"><span class="linenos">551</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
+</span><span id="DataFrame.unionAll-552"><a href="#DataFrame.unionAll-552"><span class="linenos">552</span></a> <span class="k">def</span> <span class="nf">union</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame.unionAll-553"><a href="#DataFrame.unionAll-553"><span class="linenos">553</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set_operation</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Union</span><span class="p">,</span> <span class="n">other</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span>
</span></pre></div>
@@ -2010,34 +2084,34 @@ is unlikely to come up.</p>
</div>
<a class="headerlink" href="#DataFrame.unionByName"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.unionByName-520"><a href="#DataFrame.unionByName-520"><span class="linenos">520</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
-</span><span id="DataFrame.unionByName-521"><a href="#DataFrame.unionByName-521"><span class="linenos">521</span></a> <span class="k">def</span> <span class="nf">unionByName</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">,</span> <span class="n">allowMissingColumns</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">):</span>
-</span><span id="DataFrame.unionByName-522"><a href="#DataFrame.unionByName-522"><span class="linenos">522</span></a> <span class="n">l_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">columns</span>
-</span><span id="DataFrame.unionByName-523"><a href="#DataFrame.unionByName-523"><span class="linenos">523</span></a> <span class="n">r_columns</span> <span class="o">=</span> <span class="n">other</span><span class="o">.</span><span class="n">columns</span>
-</span><span id="DataFrame.unionByName-524"><a href="#DataFrame.unionByName-524"><span class="linenos">524</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">allowMissingColumns</span><span class="p">:</span>
-</span><span id="DataFrame.unionByName-525"><a href="#DataFrame.unionByName-525"><span class="linenos">525</span></a> <span class="n">l_expressions</span> <span class="o">=</span> <span class="n">l_columns</span>
-</span><span id="DataFrame.unionByName-526"><a href="#DataFrame.unionByName-526"><span class="linenos">526</span></a> <span class="n">r_expressions</span> <span class="o">=</span> <span class="n">l_columns</span>
-</span><span id="DataFrame.unionByName-527"><a href="#DataFrame.unionByName-527"><span class="linenos">527</span></a> <span class="k">else</span><span class="p">:</span>
-</span><span id="DataFrame.unionByName-528"><a href="#DataFrame.unionByName-528"><span class="linenos">528</span></a> <span class="n">l_expressions</span> <span class="o">=</span> <span class="p">[]</span>
-</span><span id="DataFrame.unionByName-529"><a href="#DataFrame.unionByName-529"><span class="linenos">529</span></a> <span class="n">r_expressions</span> <span class="o">=</span> <span class="p">[]</span>
-</span><span id="DataFrame.unionByName-530"><a href="#DataFrame.unionByName-530"><span class="linenos">530</span></a> <span class="n">r_columns_unused</span> <span class="o">=</span> <span class="n">copy</span><span class="p">(</span><span class="n">r_columns</span><span class="p">)</span>
-</span><span id="DataFrame.unionByName-531"><a href="#DataFrame.unionByName-531"><span class="linenos">531</span></a> <span class="k">for</span> <span class="n">l_column</span> <span class="ow">in</span> <span class="n">l_columns</span><span class="p">:</span>
-</span><span id="DataFrame.unionByName-532"><a href="#DataFrame.unionByName-532"><span class="linenos">532</span></a> <span class="n">l_expressions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">l_column</span><span class="p">)</span>
-</span><span id="DataFrame.unionByName-533"><a href="#DataFrame.unionByName-533"><span class="linenos">533</span></a> <span class="k">if</span> <span class="n">l_column</span> <span class="ow">in</span> <span class="n">r_columns</span><span class="p">:</span>
-</span><span id="DataFrame.unionByName-534"><a href="#DataFrame.unionByName-534"><span class="linenos">534</span></a> <span class="n">r_expressions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">l_column</span><span class="p">)</span>
-</span><span id="DataFrame.unionByName-535"><a href="#DataFrame.unionByName-535"><span class="linenos">535</span></a> <span class="n">r_columns_unused</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="n">l_column</span><span class="p">)</span>
-</span><span id="DataFrame.unionByName-536"><a href="#DataFrame.unionByName-536"><span class="linenos">536</span></a> <span class="k">else</span><span class="p">:</span>
-</span><span id="DataFrame.unionByName-537"><a href="#DataFrame.unionByName-537"><span class="linenos">537</span></a> <span class="n">r_expressions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">alias_</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Null</span><span class="p">(),</span> <span class="n">l_column</span><span class="p">))</span>
-</span><span id="DataFrame.unionByName-538"><a href="#DataFrame.unionByName-538"><span class="linenos">538</span></a> <span class="k">for</span> <span class="n">r_column</span> <span class="ow">in</span> <span class="n">r_columns_unused</span><span class="p">:</span>
-</span><span id="DataFrame.unionByName-539"><a href="#DataFrame.unionByName-539"><span class="linenos">539</span></a> <span class="n">l_expressions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">alias_</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Null</span><span class="p">(),</span> <span class="n">r_column</span><span class="p">))</span>
-</span><span id="DataFrame.unionByName-540"><a href="#DataFrame.unionByName-540"><span class="linenos">540</span></a> <span class="n">r_expressions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">r_column</span><span class="p">)</span>
-</span><span id="DataFrame.unionByName-541"><a href="#DataFrame.unionByName-541"><span class="linenos">541</span></a> <span class="n">r_df</span> <span class="o">=</span> <span class="p">(</span>
-</span><span id="DataFrame.unionByName-542"><a href="#DataFrame.unionByName-542"><span class="linenos">542</span></a> <span class="n">other</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span><span class="o">.</span><span class="n">_convert_leaf_to_cte</span><span class="p">()</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="bp">self</span><span class="o">.</span><span class="n">_ensure_list_of_columns</span><span class="p">(</span><span class="n">r_expressions</span><span class="p">))</span>
-</span><span id="DataFrame.unionByName-543"><a href="#DataFrame.unionByName-543"><span class="linenos">543</span></a> <span class="p">)</span>
-</span><span id="DataFrame.unionByName-544"><a href="#DataFrame.unionByName-544"><span class="linenos">544</span></a> <span class="n">l_df</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
-</span><span id="DataFrame.unionByName-545"><a href="#DataFrame.unionByName-545"><span class="linenos">545</span></a> <span class="k">if</span> <span class="n">allowMissingColumns</span><span class="p">:</span>
-</span><span id="DataFrame.unionByName-546"><a href="#DataFrame.unionByName-546"><span class="linenos">546</span></a> <span class="n">l_df</span> <span class="o">=</span> <span class="n">l_df</span><span class="o">.</span><span class="n">_convert_leaf_to_cte</span><span class="p">()</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="bp">self</span><span class="o">.</span><span class="n">_ensure_list_of_columns</span><span class="p">(</span><span class="n">l_expressions</span><span class="p">))</span>
-</span><span id="DataFrame.unionByName-547"><a href="#DataFrame.unionByName-547"><span class="linenos">547</span></a> <span class="k">return</span> <span class="n">l_df</span><span class="o">.</span><span class="n">_set_operation</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Union</span><span class="p">,</span> <span class="n">r_df</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.unionByName-557"><a href="#DataFrame.unionByName-557"><span class="linenos">557</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
+</span><span id="DataFrame.unionByName-558"><a href="#DataFrame.unionByName-558"><span class="linenos">558</span></a> <span class="k">def</span> <span class="nf">unionByName</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">,</span> <span class="n">allowMissingColumns</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">):</span>
+</span><span id="DataFrame.unionByName-559"><a href="#DataFrame.unionByName-559"><span class="linenos">559</span></a> <span class="n">l_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">columns</span>
+</span><span id="DataFrame.unionByName-560"><a href="#DataFrame.unionByName-560"><span class="linenos">560</span></a> <span class="n">r_columns</span> <span class="o">=</span> <span class="n">other</span><span class="o">.</span><span class="n">columns</span>
+</span><span id="DataFrame.unionByName-561"><a href="#DataFrame.unionByName-561"><span class="linenos">561</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">allowMissingColumns</span><span class="p">:</span>
+</span><span id="DataFrame.unionByName-562"><a href="#DataFrame.unionByName-562"><span class="linenos">562</span></a> <span class="n">l_expressions</span> <span class="o">=</span> <span class="n">l_columns</span>
+</span><span id="DataFrame.unionByName-563"><a href="#DataFrame.unionByName-563"><span class="linenos">563</span></a> <span class="n">r_expressions</span> <span class="o">=</span> <span class="n">l_columns</span>
+</span><span id="DataFrame.unionByName-564"><a href="#DataFrame.unionByName-564"><span class="linenos">564</span></a> <span class="k">else</span><span class="p">:</span>
+</span><span id="DataFrame.unionByName-565"><a href="#DataFrame.unionByName-565"><span class="linenos">565</span></a> <span class="n">l_expressions</span> <span class="o">=</span> <span class="p">[]</span>
+</span><span id="DataFrame.unionByName-566"><a href="#DataFrame.unionByName-566"><span class="linenos">566</span></a> <span class="n">r_expressions</span> <span class="o">=</span> <span class="p">[]</span>
+</span><span id="DataFrame.unionByName-567"><a href="#DataFrame.unionByName-567"><span class="linenos">567</span></a> <span class="n">r_columns_unused</span> <span class="o">=</span> <span class="n">copy</span><span class="p">(</span><span class="n">r_columns</span><span class="p">)</span>
+</span><span id="DataFrame.unionByName-568"><a href="#DataFrame.unionByName-568"><span class="linenos">568</span></a> <span class="k">for</span> <span class="n">l_column</span> <span class="ow">in</span> <span class="n">l_columns</span><span class="p">:</span>
+</span><span id="DataFrame.unionByName-569"><a href="#DataFrame.unionByName-569"><span class="linenos">569</span></a> <span class="n">l_expressions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">l_column</span><span class="p">)</span>
+</span><span id="DataFrame.unionByName-570"><a href="#DataFrame.unionByName-570"><span class="linenos">570</span></a> <span class="k">if</span> <span class="n">l_column</span> <span class="ow">in</span> <span class="n">r_columns</span><span class="p">:</span>
+</span><span id="DataFrame.unionByName-571"><a href="#DataFrame.unionByName-571"><span class="linenos">571</span></a> <span class="n">r_expressions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">l_column</span><span class="p">)</span>
+</span><span id="DataFrame.unionByName-572"><a href="#DataFrame.unionByName-572"><span class="linenos">572</span></a> <span class="n">r_columns_unused</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="n">l_column</span><span class="p">)</span>
+</span><span id="DataFrame.unionByName-573"><a href="#DataFrame.unionByName-573"><span class="linenos">573</span></a> <span class="k">else</span><span class="p">:</span>
+</span><span id="DataFrame.unionByName-574"><a href="#DataFrame.unionByName-574"><span class="linenos">574</span></a> <span class="n">r_expressions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">alias_</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Null</span><span class="p">(),</span> <span class="n">l_column</span><span class="p">))</span>
+</span><span id="DataFrame.unionByName-575"><a href="#DataFrame.unionByName-575"><span class="linenos">575</span></a> <span class="k">for</span> <span class="n">r_column</span> <span class="ow">in</span> <span class="n">r_columns_unused</span><span class="p">:</span>
+</span><span id="DataFrame.unionByName-576"><a href="#DataFrame.unionByName-576"><span class="linenos">576</span></a> <span class="n">l_expressions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">alias_</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Null</span><span class="p">(),</span> <span class="n">r_column</span><span class="p">))</span>
+</span><span id="DataFrame.unionByName-577"><a href="#DataFrame.unionByName-577"><span class="linenos">577</span></a> <span class="n">r_expressions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">r_column</span><span class="p">)</span>
+</span><span id="DataFrame.unionByName-578"><a href="#DataFrame.unionByName-578"><span class="linenos">578</span></a> <span class="n">r_df</span> <span class="o">=</span> <span class="p">(</span>
+</span><span id="DataFrame.unionByName-579"><a href="#DataFrame.unionByName-579"><span class="linenos">579</span></a> <span class="n">other</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span><span class="o">.</span><span class="n">_convert_leaf_to_cte</span><span class="p">()</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="bp">self</span><span class="o">.</span><span class="n">_ensure_list_of_columns</span><span class="p">(</span><span class="n">r_expressions</span><span class="p">))</span>
+</span><span id="DataFrame.unionByName-580"><a href="#DataFrame.unionByName-580"><span class="linenos">580</span></a> <span class="p">)</span>
+</span><span id="DataFrame.unionByName-581"><a href="#DataFrame.unionByName-581"><span class="linenos">581</span></a> <span class="n">l_df</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
+</span><span id="DataFrame.unionByName-582"><a href="#DataFrame.unionByName-582"><span class="linenos">582</span></a> <span class="k">if</span> <span class="n">allowMissingColumns</span><span class="p">:</span>
+</span><span id="DataFrame.unionByName-583"><a href="#DataFrame.unionByName-583"><span class="linenos">583</span></a> <span class="n">l_df</span> <span class="o">=</span> <span class="n">l_df</span><span class="o">.</span><span class="n">_convert_leaf_to_cte</span><span class="p">()</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="bp">self</span><span class="o">.</span><span class="n">_ensure_list_of_columns</span><span class="p">(</span><span class="n">l_expressions</span><span class="p">))</span>
+</span><span id="DataFrame.unionByName-584"><a href="#DataFrame.unionByName-584"><span class="linenos">584</span></a> <span class="k">return</span> <span class="n">l_df</span><span class="o">.</span><span class="n">_set_operation</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Union</span><span class="p">,</span> <span class="n">r_df</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span>
</span></pre></div>
@@ -2056,9 +2130,9 @@ is unlikely to come up.</p>
</div>
<a class="headerlink" href="#DataFrame.intersect"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.intersect-549"><a href="#DataFrame.intersect-549"><span class="linenos">549</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
-</span><span id="DataFrame.intersect-550"><a href="#DataFrame.intersect-550"><span class="linenos">550</span></a> <span class="k">def</span> <span class="nf">intersect</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame.intersect-551"><a href="#DataFrame.intersect-551"><span class="linenos">551</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set_operation</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Intersect</span><span class="p">,</span> <span class="n">other</span><span class="p">,</span> <span class="kc">True</span><span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.intersect-586"><a href="#DataFrame.intersect-586"><span class="linenos">586</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
+</span><span id="DataFrame.intersect-587"><a href="#DataFrame.intersect-587"><span class="linenos">587</span></a> <span class="k">def</span> <span class="nf">intersect</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame.intersect-588"><a href="#DataFrame.intersect-588"><span class="linenos">588</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set_operation</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Intersect</span><span class="p">,</span> <span class="n">other</span><span class="p">,</span> <span class="kc">True</span><span class="p">)</span>
</span></pre></div>
@@ -2077,9 +2151,9 @@ is unlikely to come up.</p>
</div>
<a class="headerlink" href="#DataFrame.intersectAll"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.intersectAll-553"><a href="#DataFrame.intersectAll-553"><span class="linenos">553</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
-</span><span id="DataFrame.intersectAll-554"><a href="#DataFrame.intersectAll-554"><span class="linenos">554</span></a> <span class="k">def</span> <span class="nf">intersectAll</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame.intersectAll-555"><a href="#DataFrame.intersectAll-555"><span class="linenos">555</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set_operation</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Intersect</span><span class="p">,</span> <span class="n">other</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.intersectAll-590"><a href="#DataFrame.intersectAll-590"><span class="linenos">590</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
+</span><span id="DataFrame.intersectAll-591"><a href="#DataFrame.intersectAll-591"><span class="linenos">591</span></a> <span class="k">def</span> <span class="nf">intersectAll</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame.intersectAll-592"><a href="#DataFrame.intersectAll-592"><span class="linenos">592</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set_operation</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Intersect</span><span class="p">,</span> <span class="n">other</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span>
</span></pre></div>
@@ -2098,9 +2172,9 @@ is unlikely to come up.</p>
</div>
<a class="headerlink" href="#DataFrame.exceptAll"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.exceptAll-557"><a href="#DataFrame.exceptAll-557"><span class="linenos">557</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
-</span><span id="DataFrame.exceptAll-558"><a href="#DataFrame.exceptAll-558"><span class="linenos">558</span></a> <span class="k">def</span> <span class="nf">exceptAll</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame.exceptAll-559"><a href="#DataFrame.exceptAll-559"><span class="linenos">559</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set_operation</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Except</span><span class="p">,</span> <span class="n">other</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.exceptAll-594"><a href="#DataFrame.exceptAll-594"><span class="linenos">594</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
+</span><span id="DataFrame.exceptAll-595"><a href="#DataFrame.exceptAll-595"><span class="linenos">595</span></a> <span class="k">def</span> <span class="nf">exceptAll</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame.exceptAll-596"><a href="#DataFrame.exceptAll-596"><span class="linenos">596</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set_operation</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Except</span><span class="p">,</span> <span class="n">other</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span>
</span></pre></div>
@@ -2119,9 +2193,9 @@ is unlikely to come up.</p>
</div>
<a class="headerlink" href="#DataFrame.distinct"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.distinct-561"><a href="#DataFrame.distinct-561"><span class="linenos">561</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">SELECT</span><span class="p">)</span>
-</span><span id="DataFrame.distinct-562"><a href="#DataFrame.distinct-562"><span class="linenos">562</span></a> <span class="k">def</span> <span class="nf">distinct</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame.distinct-563"><a href="#DataFrame.distinct-563"><span class="linenos">563</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">distinct</span><span class="p">())</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.distinct-598"><a href="#DataFrame.distinct-598"><span class="linenos">598</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">SELECT</span><span class="p">)</span>
+</span><span id="DataFrame.distinct-599"><a href="#DataFrame.distinct-599"><span class="linenos">599</span></a> <span class="k">def</span> <span class="nf">distinct</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame.distinct-600"><a href="#DataFrame.distinct-600"><span class="linenos">600</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">distinct</span><span class="p">())</span>
</span></pre></div>
@@ -2140,18 +2214,18 @@ is unlikely to come up.</p>
</div>
<a class="headerlink" href="#DataFrame.dropDuplicates"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.dropDuplicates-565"><a href="#DataFrame.dropDuplicates-565"><span class="linenos">565</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">SELECT</span><span class="p">)</span>
-</span><span id="DataFrame.dropDuplicates-566"><a href="#DataFrame.dropDuplicates-566"><span class="linenos">566</span></a> <span class="k">def</span> <span class="nf">dropDuplicates</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">):</span>
-</span><span id="DataFrame.dropDuplicates-567"><a href="#DataFrame.dropDuplicates-567"><span class="linenos">567</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">subset</span><span class="p">:</span>
-</span><span id="DataFrame.dropDuplicates-568"><a href="#DataFrame.dropDuplicates-568"><span class="linenos">568</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">distinct</span><span class="p">()</span>
-</span><span id="DataFrame.dropDuplicates-569"><a href="#DataFrame.dropDuplicates-569"><span class="linenos">569</span></a> <span class="n">column_names</span> <span class="o">=</span> <span class="n">ensure_list</span><span class="p">(</span><span class="n">subset</span><span class="p">)</span>
-</span><span id="DataFrame.dropDuplicates-570"><a href="#DataFrame.dropDuplicates-570"><span class="linenos">570</span></a> <span class="n">window</span> <span class="o">=</span> <span class="n">Window</span><span class="o">.</span><span class="n">partitionBy</span><span class="p">(</span><span class="o">*</span><span class="n">column_names</span><span class="p">)</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="o">*</span><span class="n">column_names</span><span class="p">)</span>
-</span><span id="DataFrame.dropDuplicates-571"><a href="#DataFrame.dropDuplicates-571"><span class="linenos">571</span></a> <span class="k">return</span> <span class="p">(</span>
-</span><span id="DataFrame.dropDuplicates-572"><a href="#DataFrame.dropDuplicates-572"><span class="linenos">572</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
-</span><span id="DataFrame.dropDuplicates-573"><a href="#DataFrame.dropDuplicates-573"><span class="linenos">573</span></a> <span class="o">.</span><span class="n">withColumn</span><span class="p">(</span><span class="s2">&quot;row_num&quot;</span><span class="p">,</span> <span class="n">F</span><span class="o">.</span><span class="n">row_number</span><span class="p">()</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="n">window</span><span class="p">))</span>
-</span><span id="DataFrame.dropDuplicates-574"><a href="#DataFrame.dropDuplicates-574"><span class="linenos">574</span></a> <span class="o">.</span><span class="n">where</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="s2">&quot;row_num&quot;</span><span class="p">)</span> <span class="o">==</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="mi">1</span><span class="p">))</span>
-</span><span id="DataFrame.dropDuplicates-575"><a href="#DataFrame.dropDuplicates-575"><span class="linenos">575</span></a> <span class="o">.</span><span class="n">drop</span><span class="p">(</span><span class="s2">&quot;row_num&quot;</span><span class="p">)</span>
-</span><span id="DataFrame.dropDuplicates-576"><a href="#DataFrame.dropDuplicates-576"><span class="linenos">576</span></a> <span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.dropDuplicates-602"><a href="#DataFrame.dropDuplicates-602"><span class="linenos">602</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">SELECT</span><span class="p">)</span>
+</span><span id="DataFrame.dropDuplicates-603"><a href="#DataFrame.dropDuplicates-603"><span class="linenos">603</span></a> <span class="k">def</span> <span class="nf">dropDuplicates</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">):</span>
+</span><span id="DataFrame.dropDuplicates-604"><a href="#DataFrame.dropDuplicates-604"><span class="linenos">604</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">subset</span><span class="p">:</span>
+</span><span id="DataFrame.dropDuplicates-605"><a href="#DataFrame.dropDuplicates-605"><span class="linenos">605</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">distinct</span><span class="p">()</span>
+</span><span id="DataFrame.dropDuplicates-606"><a href="#DataFrame.dropDuplicates-606"><span class="linenos">606</span></a> <span class="n">column_names</span> <span class="o">=</span> <span class="n">ensure_list</span><span class="p">(</span><span class="n">subset</span><span class="p">)</span>
+</span><span id="DataFrame.dropDuplicates-607"><a href="#DataFrame.dropDuplicates-607"><span class="linenos">607</span></a> <span class="n">window</span> <span class="o">=</span> <span class="n">Window</span><span class="o">.</span><span class="n">partitionBy</span><span class="p">(</span><span class="o">*</span><span class="n">column_names</span><span class="p">)</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="o">*</span><span class="n">column_names</span><span class="p">)</span>
+</span><span id="DataFrame.dropDuplicates-608"><a href="#DataFrame.dropDuplicates-608"><span class="linenos">608</span></a> <span class="k">return</span> <span class="p">(</span>
+</span><span id="DataFrame.dropDuplicates-609"><a href="#DataFrame.dropDuplicates-609"><span class="linenos">609</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
+</span><span id="DataFrame.dropDuplicates-610"><a href="#DataFrame.dropDuplicates-610"><span class="linenos">610</span></a> <span class="o">.</span><span class="n">withColumn</span><span class="p">(</span><span class="s2">&quot;row_num&quot;</span><span class="p">,</span> <span class="n">F</span><span class="o">.</span><span class="n">row_number</span><span class="p">()</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="n">window</span><span class="p">))</span>
+</span><span id="DataFrame.dropDuplicates-611"><a href="#DataFrame.dropDuplicates-611"><span class="linenos">611</span></a> <span class="o">.</span><span class="n">where</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="s2">&quot;row_num&quot;</span><span class="p">)</span> <span class="o">==</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="mi">1</span><span class="p">))</span>
+</span><span id="DataFrame.dropDuplicates-612"><a href="#DataFrame.dropDuplicates-612"><span class="linenos">612</span></a> <span class="o">.</span><span class="n">drop</span><span class="p">(</span><span class="s2">&quot;row_num&quot;</span><span class="p">)</span>
+</span><span id="DataFrame.dropDuplicates-613"><a href="#DataFrame.dropDuplicates-613"><span class="linenos">613</span></a> <span class="p">)</span>
</span></pre></div>
@@ -2170,38 +2244,38 @@ is unlikely to come up.</p>
</div>
<a class="headerlink" href="#DataFrame.dropna"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.dropna-578"><a href="#DataFrame.dropna-578"><span class="linenos">578</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
-</span><span id="DataFrame.dropna-579"><a href="#DataFrame.dropna-579"><span class="linenos">579</span></a> <span class="k">def</span> <span class="nf">dropna</span><span class="p">(</span>
-</span><span id="DataFrame.dropna-580"><a href="#DataFrame.dropna-580"><span class="linenos">580</span></a> <span class="bp">self</span><span class="p">,</span>
-</span><span id="DataFrame.dropna-581"><a href="#DataFrame.dropna-581"><span class="linenos">581</span></a> <span class="n">how</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;any&quot;</span><span class="p">,</span>
-</span><span id="DataFrame.dropna-582"><a href="#DataFrame.dropna-582"><span class="linenos">582</span></a> <span class="n">thresh</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrame.dropna-583"><a href="#DataFrame.dropna-583"><span class="linenos">583</span></a> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="o">...</span><span class="p">],</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrame.dropna-584"><a href="#DataFrame.dropna-584"><span class="linenos">584</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame.dropna-585"><a href="#DataFrame.dropna-585"><span class="linenos">585</span></a> <span class="n">minimum_non_null</span> <span class="o">=</span> <span class="n">thresh</span> <span class="ow">or</span> <span class="mi">0</span> <span class="c1"># will be determined later if thresh is null</span>
-</span><span id="DataFrame.dropna-586"><a href="#DataFrame.dropna-586"><span class="linenos">586</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
-</span><span id="DataFrame.dropna-587"><a href="#DataFrame.dropna-587"><span class="linenos">587</span></a> <span class="n">all_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_outer_select_columns</span><span class="p">(</span><span class="n">new_df</span><span class="o">.</span><span class="n">expression</span><span class="p">)</span>
-</span><span id="DataFrame.dropna-588"><a href="#DataFrame.dropna-588"><span class="linenos">588</span></a> <span class="k">if</span> <span class="n">subset</span><span class="p">:</span>
-</span><span id="DataFrame.dropna-589"><a href="#DataFrame.dropna-589"><span class="linenos">589</span></a> <span class="n">null_check_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">subset</span><span class="p">)</span>
-</span><span id="DataFrame.dropna-590"><a href="#DataFrame.dropna-590"><span class="linenos">590</span></a> <span class="k">else</span><span class="p">:</span>
-</span><span id="DataFrame.dropna-591"><a href="#DataFrame.dropna-591"><span class="linenos">591</span></a> <span class="n">null_check_columns</span> <span class="o">=</span> <span class="n">all_columns</span>
-</span><span id="DataFrame.dropna-592"><a href="#DataFrame.dropna-592"><span class="linenos">592</span></a> <span class="k">if</span> <span class="n">thresh</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
-</span><span id="DataFrame.dropna-593"><a href="#DataFrame.dropna-593"><span class="linenos">593</span></a> <span class="n">minimum_num_nulls</span> <span class="o">=</span> <span class="mi">1</span> <span class="k">if</span> <span class="n">how</span> <span class="o">==</span> <span class="s2">&quot;any&quot;</span> <span class="k">else</span> <span class="nb">len</span><span class="p">(</span><span class="n">null_check_columns</span><span class="p">)</span>
-</span><span id="DataFrame.dropna-594"><a href="#DataFrame.dropna-594"><span class="linenos">594</span></a> <span class="k">else</span><span class="p">:</span>
-</span><span id="DataFrame.dropna-595"><a href="#DataFrame.dropna-595"><span class="linenos">595</span></a> <span class="n">minimum_num_nulls</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">null_check_columns</span><span class="p">)</span> <span class="o">-</span> <span class="n">minimum_non_null</span> <span class="o">+</span> <span class="mi">1</span>
-</span><span id="DataFrame.dropna-596"><a href="#DataFrame.dropna-596"><span class="linenos">596</span></a> <span class="k">if</span> <span class="n">minimum_num_nulls</span> <span class="o">&gt;</span> <span class="nb">len</span><span class="p">(</span><span class="n">null_check_columns</span><span class="p">):</span>
-</span><span id="DataFrame.dropna-597"><a href="#DataFrame.dropna-597"><span class="linenos">597</span></a> <span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span>
-</span><span id="DataFrame.dropna-598"><a href="#DataFrame.dropna-598"><span class="linenos">598</span></a> <span class="sa">f</span><span class="s2">&quot;The minimum num nulls for dropna must be less than or equal to the number of columns. &quot;</span>
-</span><span id="DataFrame.dropna-599"><a href="#DataFrame.dropna-599"><span class="linenos">599</span></a> <span class="sa">f</span><span class="s2">&quot;Minimum num nulls: </span><span class="si">{</span><span class="n">minimum_num_nulls</span><span class="si">}</span><span class="s2">, Num Columns: </span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="n">null_check_columns</span><span class="p">)</span><span class="si">}</span><span class="s2">&quot;</span>
-</span><span id="DataFrame.dropna-600"><a href="#DataFrame.dropna-600"><span class="linenos">600</span></a> <span class="p">)</span>
-</span><span id="DataFrame.dropna-601"><a href="#DataFrame.dropna-601"><span class="linenos">601</span></a> <span class="n">if_null_checks</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame.dropna-602"><a href="#DataFrame.dropna-602"><span class="linenos">602</span></a> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">column</span><span class="o">.</span><span class="n">isNull</span><span class="p">(),</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="mi">1</span><span class="p">))</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="mi">0</span><span class="p">))</span> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">null_check_columns</span>
-</span><span id="DataFrame.dropna-603"><a href="#DataFrame.dropna-603"><span class="linenos">603</span></a> <span class="p">]</span>
-</span><span id="DataFrame.dropna-604"><a href="#DataFrame.dropna-604"><span class="linenos">604</span></a> <span class="n">nulls_added_together</span> <span class="o">=</span> <span class="n">functools</span><span class="o">.</span><span class="n">reduce</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">:</span> <span class="n">x</span> <span class="o">+</span> <span class="n">y</span><span class="p">,</span> <span class="n">if_null_checks</span><span class="p">)</span>
-</span><span id="DataFrame.dropna-605"><a href="#DataFrame.dropna-605"><span class="linenos">605</span></a> <span class="n">num_nulls</span> <span class="o">=</span> <span class="n">nulls_added_together</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">&quot;num_nulls&quot;</span><span class="p">)</span>
-</span><span id="DataFrame.dropna-606"><a href="#DataFrame.dropna-606"><span class="linenos">606</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="n">new_df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">num_nulls</span><span class="p">,</span> <span class="n">append</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
-</span><span id="DataFrame.dropna-607"><a href="#DataFrame.dropna-607"><span class="linenos">607</span></a> <span class="n">filtered_df</span> <span class="o">=</span> <span class="n">new_df</span><span class="o">.</span><span class="n">where</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="s2">&quot;num_nulls&quot;</span><span class="p">)</span> <span class="o">&lt;</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="n">minimum_num_nulls</span><span class="p">))</span>
-</span><span id="DataFrame.dropna-608"><a href="#DataFrame.dropna-608"><span class="linenos">608</span></a> <span class="n">final_df</span> <span class="o">=</span> <span class="n">filtered_df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="n">all_columns</span><span class="p">)</span>
-</span><span id="DataFrame.dropna-609"><a href="#DataFrame.dropna-609"><span class="linenos">609</span></a> <span class="k">return</span> <span class="n">final_df</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.dropna-615"><a href="#DataFrame.dropna-615"><span class="linenos">615</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
+</span><span id="DataFrame.dropna-616"><a href="#DataFrame.dropna-616"><span class="linenos">616</span></a> <span class="k">def</span> <span class="nf">dropna</span><span class="p">(</span>
+</span><span id="DataFrame.dropna-617"><a href="#DataFrame.dropna-617"><span class="linenos">617</span></a> <span class="bp">self</span><span class="p">,</span>
+</span><span id="DataFrame.dropna-618"><a href="#DataFrame.dropna-618"><span class="linenos">618</span></a> <span class="n">how</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;any&quot;</span><span class="p">,</span>
+</span><span id="DataFrame.dropna-619"><a href="#DataFrame.dropna-619"><span class="linenos">619</span></a> <span class="n">thresh</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrame.dropna-620"><a href="#DataFrame.dropna-620"><span class="linenos">620</span></a> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="o">...</span><span class="p">],</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrame.dropna-621"><a href="#DataFrame.dropna-621"><span class="linenos">621</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame.dropna-622"><a href="#DataFrame.dropna-622"><span class="linenos">622</span></a> <span class="n">minimum_non_null</span> <span class="o">=</span> <span class="n">thresh</span> <span class="ow">or</span> <span class="mi">0</span> <span class="c1"># will be determined later if thresh is null</span>
+</span><span id="DataFrame.dropna-623"><a href="#DataFrame.dropna-623"><span class="linenos">623</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
+</span><span id="DataFrame.dropna-624"><a href="#DataFrame.dropna-624"><span class="linenos">624</span></a> <span class="n">all_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_outer_select_columns</span><span class="p">(</span><span class="n">new_df</span><span class="o">.</span><span class="n">expression</span><span class="p">)</span>
+</span><span id="DataFrame.dropna-625"><a href="#DataFrame.dropna-625"><span class="linenos">625</span></a> <span class="k">if</span> <span class="n">subset</span><span class="p">:</span>
+</span><span id="DataFrame.dropna-626"><a href="#DataFrame.dropna-626"><span class="linenos">626</span></a> <span class="n">null_check_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">subset</span><span class="p">)</span>
+</span><span id="DataFrame.dropna-627"><a href="#DataFrame.dropna-627"><span class="linenos">627</span></a> <span class="k">else</span><span class="p">:</span>
+</span><span id="DataFrame.dropna-628"><a href="#DataFrame.dropna-628"><span class="linenos">628</span></a> <span class="n">null_check_columns</span> <span class="o">=</span> <span class="n">all_columns</span>
+</span><span id="DataFrame.dropna-629"><a href="#DataFrame.dropna-629"><span class="linenos">629</span></a> <span class="k">if</span> <span class="n">thresh</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+</span><span id="DataFrame.dropna-630"><a href="#DataFrame.dropna-630"><span class="linenos">630</span></a> <span class="n">minimum_num_nulls</span> <span class="o">=</span> <span class="mi">1</span> <span class="k">if</span> <span class="n">how</span> <span class="o">==</span> <span class="s2">&quot;any&quot;</span> <span class="k">else</span> <span class="nb">len</span><span class="p">(</span><span class="n">null_check_columns</span><span class="p">)</span>
+</span><span id="DataFrame.dropna-631"><a href="#DataFrame.dropna-631"><span class="linenos">631</span></a> <span class="k">else</span><span class="p">:</span>
+</span><span id="DataFrame.dropna-632"><a href="#DataFrame.dropna-632"><span class="linenos">632</span></a> <span class="n">minimum_num_nulls</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">null_check_columns</span><span class="p">)</span> <span class="o">-</span> <span class="n">minimum_non_null</span> <span class="o">+</span> <span class="mi">1</span>
+</span><span id="DataFrame.dropna-633"><a href="#DataFrame.dropna-633"><span class="linenos">633</span></a> <span class="k">if</span> <span class="n">minimum_num_nulls</span> <span class="o">&gt;</span> <span class="nb">len</span><span class="p">(</span><span class="n">null_check_columns</span><span class="p">):</span>
+</span><span id="DataFrame.dropna-634"><a href="#DataFrame.dropna-634"><span class="linenos">634</span></a> <span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span>
+</span><span id="DataFrame.dropna-635"><a href="#DataFrame.dropna-635"><span class="linenos">635</span></a> <span class="sa">f</span><span class="s2">&quot;The minimum num nulls for dropna must be less than or equal to the number of columns. &quot;</span>
+</span><span id="DataFrame.dropna-636"><a href="#DataFrame.dropna-636"><span class="linenos">636</span></a> <span class="sa">f</span><span class="s2">&quot;Minimum num nulls: </span><span class="si">{</span><span class="n">minimum_num_nulls</span><span class="si">}</span><span class="s2">, Num Columns: </span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="n">null_check_columns</span><span class="p">)</span><span class="si">}</span><span class="s2">&quot;</span>
+</span><span id="DataFrame.dropna-637"><a href="#DataFrame.dropna-637"><span class="linenos">637</span></a> <span class="p">)</span>
+</span><span id="DataFrame.dropna-638"><a href="#DataFrame.dropna-638"><span class="linenos">638</span></a> <span class="n">if_null_checks</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame.dropna-639"><a href="#DataFrame.dropna-639"><span class="linenos">639</span></a> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">column</span><span class="o">.</span><span class="n">isNull</span><span class="p">(),</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="mi">1</span><span class="p">))</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="mi">0</span><span class="p">))</span> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">null_check_columns</span>
+</span><span id="DataFrame.dropna-640"><a href="#DataFrame.dropna-640"><span class="linenos">640</span></a> <span class="p">]</span>
+</span><span id="DataFrame.dropna-641"><a href="#DataFrame.dropna-641"><span class="linenos">641</span></a> <span class="n">nulls_added_together</span> <span class="o">=</span> <span class="n">functools</span><span class="o">.</span><span class="n">reduce</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">:</span> <span class="n">x</span> <span class="o">+</span> <span class="n">y</span><span class="p">,</span> <span class="n">if_null_checks</span><span class="p">)</span>
+</span><span id="DataFrame.dropna-642"><a href="#DataFrame.dropna-642"><span class="linenos">642</span></a> <span class="n">num_nulls</span> <span class="o">=</span> <span class="n">nulls_added_together</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">&quot;num_nulls&quot;</span><span class="p">)</span>
+</span><span id="DataFrame.dropna-643"><a href="#DataFrame.dropna-643"><span class="linenos">643</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="n">new_df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">num_nulls</span><span class="p">,</span> <span class="n">append</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
+</span><span id="DataFrame.dropna-644"><a href="#DataFrame.dropna-644"><span class="linenos">644</span></a> <span class="n">filtered_df</span> <span class="o">=</span> <span class="n">new_df</span><span class="o">.</span><span class="n">where</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="s2">&quot;num_nulls&quot;</span><span class="p">)</span> <span class="o">&lt;</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="n">minimum_num_nulls</span><span class="p">))</span>
+</span><span id="DataFrame.dropna-645"><a href="#DataFrame.dropna-645"><span class="linenos">645</span></a> <span class="n">final_df</span> <span class="o">=</span> <span class="n">filtered_df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="n">all_columns</span><span class="p">)</span>
+</span><span id="DataFrame.dropna-646"><a href="#DataFrame.dropna-646"><span class="linenos">646</span></a> <span class="k">return</span> <span class="n">final_df</span>
</span></pre></div>
@@ -2214,55 +2288,55 @@ is unlikely to come up.</p>
<div class="decorator">@operation(Operation.FROM)</div>
<span class="def">def</span>
- <span class="name">fillna</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">self</span>,</span><span class="param"> <span class="n">value</span><span class="p">:</span> <span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139719847202288&#39;</span><span class="o">&gt;</span>,</span><span class="param"> <span class="n">subset</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="o">...</span><span class="p">],</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">],</span> <span class="n">NoneType</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span></span><span class="return-annotation">) -> <span class="n"><a href="#DataFrame">sqlglot.dataframe.sql.DataFrame</a></span>:</span></span>
+ <span class="name">fillna</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">self</span>,</span><span class="param"> <span class="n">value</span><span class="p">:</span> <span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;140377707652544&#39;</span><span class="o">&gt;</span>,</span><span class="param"> <span class="n">subset</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="o">...</span><span class="p">],</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">],</span> <span class="n">NoneType</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span></span><span class="return-annotation">) -> <span class="n"><a href="#DataFrame">sqlglot.dataframe.sql.DataFrame</a></span>:</span></span>
<label class="view-source-button" for="DataFrame.fillna-view-source"><span>View Source</span></label>
</div>
<a class="headerlink" href="#DataFrame.fillna"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.fillna-611"><a href="#DataFrame.fillna-611"><span class="linenos">611</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
-</span><span id="DataFrame.fillna-612"><a href="#DataFrame.fillna-612"><span class="linenos">612</span></a> <span class="k">def</span> <span class="nf">fillna</span><span class="p">(</span>
-</span><span id="DataFrame.fillna-613"><a href="#DataFrame.fillna-613"><span class="linenos">613</span></a> <span class="bp">self</span><span class="p">,</span>
-</span><span id="DataFrame.fillna-614"><a href="#DataFrame.fillna-614"><span class="linenos">614</span></a> <span class="n">value</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="n">ColumnLiterals</span><span class="p">],</span>
-</span><span id="DataFrame.fillna-615"><a href="#DataFrame.fillna-615"><span class="linenos">615</span></a> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="o">...</span><span class="p">],</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrame.fillna-616"><a href="#DataFrame.fillna-616"><span class="linenos">616</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame.fillna-617"><a href="#DataFrame.fillna-617"><span class="linenos">617</span></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
-</span><span id="DataFrame.fillna-618"><a href="#DataFrame.fillna-618"><span class="linenos">618</span></a><span class="sd"> Functionality Difference: If you provide a value to replace a null and that type conflicts</span>
-</span><span id="DataFrame.fillna-619"><a href="#DataFrame.fillna-619"><span class="linenos">619</span></a><span class="sd"> with the type of the column then PySpark will just ignore your replacement.</span>
-</span><span id="DataFrame.fillna-620"><a href="#DataFrame.fillna-620"><span class="linenos">620</span></a><span class="sd"> This will try to cast them to be the same in some cases. So they won&#39;t always match.</span>
-</span><span id="DataFrame.fillna-621"><a href="#DataFrame.fillna-621"><span class="linenos">621</span></a><span class="sd"> Best to not mix types so make sure replacement is the same type as the column</span>
-</span><span id="DataFrame.fillna-622"><a href="#DataFrame.fillna-622"><span class="linenos">622</span></a>
-</span><span id="DataFrame.fillna-623"><a href="#DataFrame.fillna-623"><span class="linenos">623</span></a><span class="sd"> Possibility for improvement: Use `typeof` function to get the type of the column</span>
-</span><span id="DataFrame.fillna-624"><a href="#DataFrame.fillna-624"><span class="linenos">624</span></a><span class="sd"> and check if it matches the type of the value provided. If not then make it null.</span>
-</span><span id="DataFrame.fillna-625"><a href="#DataFrame.fillna-625"><span class="linenos">625</span></a><span class="sd"> &quot;&quot;&quot;</span>
-</span><span id="DataFrame.fillna-626"><a href="#DataFrame.fillna-626"><span class="linenos">626</span></a> <span class="kn">from</span> <span class="nn">sqlglot.dataframe.sql.functions</span> <span class="kn">import</span> <span class="n">lit</span>
-</span><span id="DataFrame.fillna-627"><a href="#DataFrame.fillna-627"><span class="linenos">627</span></a>
-</span><span id="DataFrame.fillna-628"><a href="#DataFrame.fillna-628"><span class="linenos">628</span></a> <span class="n">values</span> <span class="o">=</span> <span class="kc">None</span>
-</span><span id="DataFrame.fillna-629"><a href="#DataFrame.fillna-629"><span class="linenos">629</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="kc">None</span>
-</span><span id="DataFrame.fillna-630"><a href="#DataFrame.fillna-630"><span class="linenos">630</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
-</span><span id="DataFrame.fillna-631"><a href="#DataFrame.fillna-631"><span class="linenos">631</span></a> <span class="n">all_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_outer_select_columns</span><span class="p">(</span><span class="n">new_df</span><span class="o">.</span><span class="n">expression</span><span class="p">)</span>
-</span><span id="DataFrame.fillna-632"><a href="#DataFrame.fillna-632"><span class="linenos">632</span></a> <span class="n">all_column_mapping</span> <span class="o">=</span> <span class="p">{</span><span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">:</span> <span class="n">column</span> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">all_columns</span><span class="p">}</span>
-</span><span id="DataFrame.fillna-633"><a href="#DataFrame.fillna-633"><span class="linenos">633</span></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span>
-</span><span id="DataFrame.fillna-634"><a href="#DataFrame.fillna-634"><span class="linenos">634</span></a> <span class="n">values</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">value</span><span class="o">.</span><span class="n">values</span><span class="p">())</span>
-</span><span id="DataFrame.fillna-635"><a href="#DataFrame.fillna-635"><span class="linenos">635</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="nb">list</span><span class="p">(</span><span class="n">value</span><span class="p">))</span>
-</span><span id="DataFrame.fillna-636"><a href="#DataFrame.fillna-636"><span class="linenos">636</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">columns</span><span class="p">:</span>
-</span><span id="DataFrame.fillna-637"><a href="#DataFrame.fillna-637"><span class="linenos">637</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">subset</span><span class="p">)</span> <span class="k">if</span> <span class="n">subset</span> <span class="k">else</span> <span class="n">all_columns</span>
-</span><span id="DataFrame.fillna-638"><a href="#DataFrame.fillna-638"><span class="linenos">638</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">values</span><span class="p">:</span>
-</span><span id="DataFrame.fillna-639"><a href="#DataFrame.fillna-639"><span class="linenos">639</span></a> <span class="n">values</span> <span class="o">=</span> <span class="p">[</span><span class="n">value</span><span class="p">]</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span>
-</span><span id="DataFrame.fillna-640"><a href="#DataFrame.fillna-640"><span class="linenos">640</span></a> <span class="n">value_columns</span> <span class="o">=</span> <span class="p">[</span><span class="n">lit</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> <span class="k">for</span> <span class="n">value</span> <span class="ow">in</span> <span class="n">values</span><span class="p">]</span>
-</span><span id="DataFrame.fillna-641"><a href="#DataFrame.fillna-641"><span class="linenos">641</span></a>
-</span><span id="DataFrame.fillna-642"><a href="#DataFrame.fillna-642"><span class="linenos">642</span></a> <span class="n">null_replacement_mapping</span> <span class="o">=</span> <span class="p">{</span>
-</span><span id="DataFrame.fillna-643"><a href="#DataFrame.fillna-643"><span class="linenos">643</span></a> <span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">:</span> <span class="p">(</span>
-</span><span id="DataFrame.fillna-644"><a href="#DataFrame.fillna-644"><span class="linenos">644</span></a> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">column</span><span class="o">.</span><span class="n">isNull</span><span class="p">(),</span> <span class="n">value</span><span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">column</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">)</span>
-</span><span id="DataFrame.fillna-645"><a href="#DataFrame.fillna-645"><span class="linenos">645</span></a> <span class="p">)</span>
-</span><span id="DataFrame.fillna-646"><a href="#DataFrame.fillna-646"><span class="linenos">646</span></a> <span class="k">for</span> <span class="n">column</span><span class="p">,</span> <span class="n">value</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">columns</span><span class="p">,</span> <span class="n">value_columns</span><span class="p">)</span>
-</span><span id="DataFrame.fillna-647"><a href="#DataFrame.fillna-647"><span class="linenos">647</span></a> <span class="p">}</span>
-</span><span id="DataFrame.fillna-648"><a href="#DataFrame.fillna-648"><span class="linenos">648</span></a> <span class="n">null_replacement_mapping</span> <span class="o">=</span> <span class="p">{</span><span class="o">**</span><span class="n">all_column_mapping</span><span class="p">,</span> <span class="o">**</span><span class="n">null_replacement_mapping</span><span class="p">}</span>
-</span><span id="DataFrame.fillna-649"><a href="#DataFrame.fillna-649"><span class="linenos">649</span></a> <span class="n">null_replacement_columns</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame.fillna-650"><a href="#DataFrame.fillna-650"><span class="linenos">650</span></a> <span class="n">null_replacement_mapping</span><span class="p">[</span><span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">]</span> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">all_columns</span>
-</span><span id="DataFrame.fillna-651"><a href="#DataFrame.fillna-651"><span class="linenos">651</span></a> <span class="p">]</span>
-</span><span id="DataFrame.fillna-652"><a href="#DataFrame.fillna-652"><span class="linenos">652</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="n">new_df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="n">null_replacement_columns</span><span class="p">)</span>
-</span><span id="DataFrame.fillna-653"><a href="#DataFrame.fillna-653"><span class="linenos">653</span></a> <span class="k">return</span> <span class="n">new_df</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.fillna-648"><a href="#DataFrame.fillna-648"><span class="linenos">648</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
+</span><span id="DataFrame.fillna-649"><a href="#DataFrame.fillna-649"><span class="linenos">649</span></a> <span class="k">def</span> <span class="nf">fillna</span><span class="p">(</span>
+</span><span id="DataFrame.fillna-650"><a href="#DataFrame.fillna-650"><span class="linenos">650</span></a> <span class="bp">self</span><span class="p">,</span>
+</span><span id="DataFrame.fillna-651"><a href="#DataFrame.fillna-651"><span class="linenos">651</span></a> <span class="n">value</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="n">ColumnLiterals</span><span class="p">],</span>
+</span><span id="DataFrame.fillna-652"><a href="#DataFrame.fillna-652"><span class="linenos">652</span></a> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="o">...</span><span class="p">],</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrame.fillna-653"><a href="#DataFrame.fillna-653"><span class="linenos">653</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame.fillna-654"><a href="#DataFrame.fillna-654"><span class="linenos">654</span></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
+</span><span id="DataFrame.fillna-655"><a href="#DataFrame.fillna-655"><span class="linenos">655</span></a><span class="sd"> Functionality Difference: If you provide a value to replace a null and that type conflicts</span>
+</span><span id="DataFrame.fillna-656"><a href="#DataFrame.fillna-656"><span class="linenos">656</span></a><span class="sd"> with the type of the column then PySpark will just ignore your replacement.</span>
+</span><span id="DataFrame.fillna-657"><a href="#DataFrame.fillna-657"><span class="linenos">657</span></a><span class="sd"> This will try to cast them to be the same in some cases. So they won&#39;t always match.</span>
+</span><span id="DataFrame.fillna-658"><a href="#DataFrame.fillna-658"><span class="linenos">658</span></a><span class="sd"> Best to not mix types so make sure replacement is the same type as the column</span>
+</span><span id="DataFrame.fillna-659"><a href="#DataFrame.fillna-659"><span class="linenos">659</span></a>
+</span><span id="DataFrame.fillna-660"><a href="#DataFrame.fillna-660"><span class="linenos">660</span></a><span class="sd"> Possibility for improvement: Use `typeof` function to get the type of the column</span>
+</span><span id="DataFrame.fillna-661"><a href="#DataFrame.fillna-661"><span class="linenos">661</span></a><span class="sd"> and check if it matches the type of the value provided. If not then make it null.</span>
+</span><span id="DataFrame.fillna-662"><a href="#DataFrame.fillna-662"><span class="linenos">662</span></a><span class="sd"> &quot;&quot;&quot;</span>
+</span><span id="DataFrame.fillna-663"><a href="#DataFrame.fillna-663"><span class="linenos">663</span></a> <span class="kn">from</span> <span class="nn">sqlglot.dataframe.sql.functions</span> <span class="kn">import</span> <span class="n">lit</span>
+</span><span id="DataFrame.fillna-664"><a href="#DataFrame.fillna-664"><span class="linenos">664</span></a>
+</span><span id="DataFrame.fillna-665"><a href="#DataFrame.fillna-665"><span class="linenos">665</span></a> <span class="n">values</span> <span class="o">=</span> <span class="kc">None</span>
+</span><span id="DataFrame.fillna-666"><a href="#DataFrame.fillna-666"><span class="linenos">666</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="kc">None</span>
+</span><span id="DataFrame.fillna-667"><a href="#DataFrame.fillna-667"><span class="linenos">667</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
+</span><span id="DataFrame.fillna-668"><a href="#DataFrame.fillna-668"><span class="linenos">668</span></a> <span class="n">all_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_outer_select_columns</span><span class="p">(</span><span class="n">new_df</span><span class="o">.</span><span class="n">expression</span><span class="p">)</span>
+</span><span id="DataFrame.fillna-669"><a href="#DataFrame.fillna-669"><span class="linenos">669</span></a> <span class="n">all_column_mapping</span> <span class="o">=</span> <span class="p">{</span><span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">:</span> <span class="n">column</span> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">all_columns</span><span class="p">}</span>
+</span><span id="DataFrame.fillna-670"><a href="#DataFrame.fillna-670"><span class="linenos">670</span></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span>
+</span><span id="DataFrame.fillna-671"><a href="#DataFrame.fillna-671"><span class="linenos">671</span></a> <span class="n">values</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">value</span><span class="o">.</span><span class="n">values</span><span class="p">())</span>
+</span><span id="DataFrame.fillna-672"><a href="#DataFrame.fillna-672"><span class="linenos">672</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="nb">list</span><span class="p">(</span><span class="n">value</span><span class="p">))</span>
+</span><span id="DataFrame.fillna-673"><a href="#DataFrame.fillna-673"><span class="linenos">673</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">columns</span><span class="p">:</span>
+</span><span id="DataFrame.fillna-674"><a href="#DataFrame.fillna-674"><span class="linenos">674</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">subset</span><span class="p">)</span> <span class="k">if</span> <span class="n">subset</span> <span class="k">else</span> <span class="n">all_columns</span>
+</span><span id="DataFrame.fillna-675"><a href="#DataFrame.fillna-675"><span class="linenos">675</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">values</span><span class="p">:</span>
+</span><span id="DataFrame.fillna-676"><a href="#DataFrame.fillna-676"><span class="linenos">676</span></a> <span class="n">values</span> <span class="o">=</span> <span class="p">[</span><span class="n">value</span><span class="p">]</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span>
+</span><span id="DataFrame.fillna-677"><a href="#DataFrame.fillna-677"><span class="linenos">677</span></a> <span class="n">value_columns</span> <span class="o">=</span> <span class="p">[</span><span class="n">lit</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> <span class="k">for</span> <span class="n">value</span> <span class="ow">in</span> <span class="n">values</span><span class="p">]</span>
+</span><span id="DataFrame.fillna-678"><a href="#DataFrame.fillna-678"><span class="linenos">678</span></a>
+</span><span id="DataFrame.fillna-679"><a href="#DataFrame.fillna-679"><span class="linenos">679</span></a> <span class="n">null_replacement_mapping</span> <span class="o">=</span> <span class="p">{</span>
+</span><span id="DataFrame.fillna-680"><a href="#DataFrame.fillna-680"><span class="linenos">680</span></a> <span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">:</span> <span class="p">(</span>
+</span><span id="DataFrame.fillna-681"><a href="#DataFrame.fillna-681"><span class="linenos">681</span></a> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">column</span><span class="o">.</span><span class="n">isNull</span><span class="p">(),</span> <span class="n">value</span><span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">column</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">)</span>
+</span><span id="DataFrame.fillna-682"><a href="#DataFrame.fillna-682"><span class="linenos">682</span></a> <span class="p">)</span>
+</span><span id="DataFrame.fillna-683"><a href="#DataFrame.fillna-683"><span class="linenos">683</span></a> <span class="k">for</span> <span class="n">column</span><span class="p">,</span> <span class="n">value</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">columns</span><span class="p">,</span> <span class="n">value_columns</span><span class="p">)</span>
+</span><span id="DataFrame.fillna-684"><a href="#DataFrame.fillna-684"><span class="linenos">684</span></a> <span class="p">}</span>
+</span><span id="DataFrame.fillna-685"><a href="#DataFrame.fillna-685"><span class="linenos">685</span></a> <span class="n">null_replacement_mapping</span> <span class="o">=</span> <span class="p">{</span><span class="o">**</span><span class="n">all_column_mapping</span><span class="p">,</span> <span class="o">**</span><span class="n">null_replacement_mapping</span><span class="p">}</span>
+</span><span id="DataFrame.fillna-686"><a href="#DataFrame.fillna-686"><span class="linenos">686</span></a> <span class="n">null_replacement_columns</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame.fillna-687"><a href="#DataFrame.fillna-687"><span class="linenos">687</span></a> <span class="n">null_replacement_mapping</span><span class="p">[</span><span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">]</span> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">all_columns</span>
+</span><span id="DataFrame.fillna-688"><a href="#DataFrame.fillna-688"><span class="linenos">688</span></a> <span class="p">]</span>
+</span><span id="DataFrame.fillna-689"><a href="#DataFrame.fillna-689"><span class="linenos">689</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="n">new_df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="n">null_replacement_columns</span><span class="p">)</span>
+</span><span id="DataFrame.fillna-690"><a href="#DataFrame.fillna-690"><span class="linenos">690</span></a> <span class="k">return</span> <span class="n">new_df</span>
</span></pre></div>
@@ -2283,59 +2357,59 @@ and check if it matches the type of the value provided. If not then make it null
<div class="decorator">@operation(Operation.FROM)</div>
<span class="def">def</span>
- <span class="name">replace</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">self</span>,</span><span class="param"> <span class="n">to_replace</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="n">List</span><span class="p">,</span> <span class="n">Dict</span><span class="p">]</span>,</span><span class="param"> <span class="n">value</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="n">List</span><span class="p">,</span> <span class="n">NoneType</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>,</span><span class="param"> <span class="n">subset</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="n">Collection</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139719846105664&#39;</span><span class="o">&gt;</span><span class="p">],</span> <span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139719846105664&#39;</span><span class="o">&gt;</span><span class="p">,</span> <span class="n">NoneType</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span></span><span class="return-annotation">) -> <span class="n"><a href="#DataFrame">sqlglot.dataframe.sql.DataFrame</a></span>:</span></span>
+ <span class="name">replace</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">self</span>,</span><span class="param"> <span class="n">to_replace</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="n">List</span><span class="p">,</span> <span class="n">Dict</span><span class="p">]</span>,</span><span class="param"> <span class="n">value</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="n">List</span><span class="p">,</span> <span class="n">NoneType</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>,</span><span class="param"> <span class="n">subset</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="n">Collection</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;140377707950480&#39;</span><span class="o">&gt;</span><span class="p">],</span> <span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;140377707950480&#39;</span><span class="o">&gt;</span><span class="p">,</span> <span class="n">NoneType</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span></span><span class="return-annotation">) -> <span class="n"><a href="#DataFrame">sqlglot.dataframe.sql.DataFrame</a></span>:</span></span>
<label class="view-source-button" for="DataFrame.replace-view-source"><span>View Source</span></label>
</div>
<a class="headerlink" href="#DataFrame.replace"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.replace-655"><a href="#DataFrame.replace-655"><span class="linenos">655</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
-</span><span id="DataFrame.replace-656"><a href="#DataFrame.replace-656"><span class="linenos">656</span></a> <span class="k">def</span> <span class="nf">replace</span><span class="p">(</span>
-</span><span id="DataFrame.replace-657"><a href="#DataFrame.replace-657"><span class="linenos">657</span></a> <span class="bp">self</span><span class="p">,</span>
-</span><span id="DataFrame.replace-658"><a href="#DataFrame.replace-658"><span class="linenos">658</span></a> <span class="n">to_replace</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Dict</span><span class="p">],</span>
-</span><span id="DataFrame.replace-659"><a href="#DataFrame.replace-659"><span class="linenos">659</span></a> <span class="n">value</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrame.replace-660"><a href="#DataFrame.replace-660"><span class="linenos">660</span></a> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Collection</span><span class="p">[</span><span class="n">ColumnOrName</span><span class="p">]</span> <span class="o">|</span> <span class="n">ColumnOrName</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrame.replace-661"><a href="#DataFrame.replace-661"><span class="linenos">661</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame.replace-662"><a href="#DataFrame.replace-662"><span class="linenos">662</span></a> <span class="kn">from</span> <span class="nn">sqlglot.dataframe.sql.functions</span> <span class="kn">import</span> <span class="n">lit</span>
-</span><span id="DataFrame.replace-663"><a href="#DataFrame.replace-663"><span class="linenos">663</span></a>
-</span><span id="DataFrame.replace-664"><a href="#DataFrame.replace-664"><span class="linenos">664</span></a> <span class="n">old_values</span> <span class="o">=</span> <span class="kc">None</span>
-</span><span id="DataFrame.replace-665"><a href="#DataFrame.replace-665"><span class="linenos">665</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
-</span><span id="DataFrame.replace-666"><a href="#DataFrame.replace-666"><span class="linenos">666</span></a> <span class="n">all_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_outer_select_columns</span><span class="p">(</span><span class="n">new_df</span><span class="o">.</span><span class="n">expression</span><span class="p">)</span>
-</span><span id="DataFrame.replace-667"><a href="#DataFrame.replace-667"><span class="linenos">667</span></a> <span class="n">all_column_mapping</span> <span class="o">=</span> <span class="p">{</span><span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">:</span> <span class="n">column</span> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">all_columns</span><span class="p">}</span>
-</span><span id="DataFrame.replace-668"><a href="#DataFrame.replace-668"><span class="linenos">668</span></a>
-</span><span id="DataFrame.replace-669"><a href="#DataFrame.replace-669"><span class="linenos">669</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">subset</span><span class="p">)</span> <span class="k">if</span> <span class="n">subset</span> <span class="k">else</span> <span class="n">all_columns</span>
-</span><span id="DataFrame.replace-670"><a href="#DataFrame.replace-670"><span class="linenos">670</span></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">to_replace</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span>
-</span><span id="DataFrame.replace-671"><a href="#DataFrame.replace-671"><span class="linenos">671</span></a> <span class="n">old_values</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">to_replace</span><span class="p">)</span>
-</span><span id="DataFrame.replace-672"><a href="#DataFrame.replace-672"><span class="linenos">672</span></a> <span class="n">new_values</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">to_replace</span><span class="o">.</span><span class="n">values</span><span class="p">())</span>
-</span><span id="DataFrame.replace-673"><a href="#DataFrame.replace-673"><span class="linenos">673</span></a> <span class="k">elif</span> <span class="ow">not</span> <span class="n">old_values</span> <span class="ow">and</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">to_replace</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span>
-</span><span id="DataFrame.replace-674"><a href="#DataFrame.replace-674"><span class="linenos">674</span></a> <span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="nb">list</span><span class="p">),</span> <span class="s2">&quot;value must be a list since the replacements are a list&quot;</span>
-</span><span id="DataFrame.replace-675"><a href="#DataFrame.replace-675"><span class="linenos">675</span></a> <span class="k">assert</span> <span class="nb">len</span><span class="p">(</span><span class="n">to_replace</span><span class="p">)</span> <span class="o">==</span> <span class="nb">len</span><span class="p">(</span>
-</span><span id="DataFrame.replace-676"><a href="#DataFrame.replace-676"><span class="linenos">676</span></a> <span class="n">value</span>
-</span><span id="DataFrame.replace-677"><a href="#DataFrame.replace-677"><span class="linenos">677</span></a> <span class="p">),</span> <span class="s2">&quot;the replacements and values must be the same length&quot;</span>
-</span><span id="DataFrame.replace-678"><a href="#DataFrame.replace-678"><span class="linenos">678</span></a> <span class="n">old_values</span> <span class="o">=</span> <span class="n">to_replace</span>
-</span><span id="DataFrame.replace-679"><a href="#DataFrame.replace-679"><span class="linenos">679</span></a> <span class="n">new_values</span> <span class="o">=</span> <span class="n">value</span>
-</span><span id="DataFrame.replace-680"><a href="#DataFrame.replace-680"><span class="linenos">680</span></a> <span class="k">else</span><span class="p">:</span>
-</span><span id="DataFrame.replace-681"><a href="#DataFrame.replace-681"><span class="linenos">681</span></a> <span class="n">old_values</span> <span class="o">=</span> <span class="p">[</span><span class="n">to_replace</span><span class="p">]</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span>
-</span><span id="DataFrame.replace-682"><a href="#DataFrame.replace-682"><span class="linenos">682</span></a> <span class="n">new_values</span> <span class="o">=</span> <span class="p">[</span><span class="n">value</span><span class="p">]</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span>
-</span><span id="DataFrame.replace-683"><a href="#DataFrame.replace-683"><span class="linenos">683</span></a> <span class="n">old_values</span> <span class="o">=</span> <span class="p">[</span><span class="n">lit</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> <span class="k">for</span> <span class="n">value</span> <span class="ow">in</span> <span class="n">old_values</span><span class="p">]</span>
-</span><span id="DataFrame.replace-684"><a href="#DataFrame.replace-684"><span class="linenos">684</span></a> <span class="n">new_values</span> <span class="o">=</span> <span class="p">[</span><span class="n">lit</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> <span class="k">for</span> <span class="n">value</span> <span class="ow">in</span> <span class="n">new_values</span><span class="p">]</span>
-</span><span id="DataFrame.replace-685"><a href="#DataFrame.replace-685"><span class="linenos">685</span></a>
-</span><span id="DataFrame.replace-686"><a href="#DataFrame.replace-686"><span class="linenos">686</span></a> <span class="n">replacement_mapping</span> <span class="o">=</span> <span class="p">{}</span>
-</span><span id="DataFrame.replace-687"><a href="#DataFrame.replace-687"><span class="linenos">687</span></a> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">columns</span><span class="p">:</span>
-</span><span id="DataFrame.replace-688"><a href="#DataFrame.replace-688"><span class="linenos">688</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="kc">None</span><span class="p">)</span>
-</span><span id="DataFrame.replace-689"><a href="#DataFrame.replace-689"><span class="linenos">689</span></a> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="p">(</span><span class="n">old_value</span><span class="p">,</span> <span class="n">new_value</span><span class="p">)</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="nb">zip</span><span class="p">(</span><span class="n">old_values</span><span class="p">,</span> <span class="n">new_values</span><span class="p">)):</span>
-</span><span id="DataFrame.replace-690"><a href="#DataFrame.replace-690"><span class="linenos">690</span></a> <span class="k">if</span> <span class="n">i</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
-</span><span id="DataFrame.replace-691"><a href="#DataFrame.replace-691"><span class="linenos">691</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">column</span> <span class="o">==</span> <span class="n">old_value</span><span class="p">,</span> <span class="n">new_value</span><span class="p">)</span>
-</span><span id="DataFrame.replace-692"><a href="#DataFrame.replace-692"><span class="linenos">692</span></a> <span class="k">else</span><span class="p">:</span>
-</span><span id="DataFrame.replace-693"><a href="#DataFrame.replace-693"><span class="linenos">693</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="n">expression</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">column</span> <span class="o">==</span> <span class="n">old_value</span><span class="p">,</span> <span class="n">new_value</span><span class="p">)</span> <span class="c1"># type: ignore</span>
-</span><span id="DataFrame.replace-694"><a href="#DataFrame.replace-694"><span class="linenos">694</span></a> <span class="n">replacement_mapping</span><span class="p">[</span><span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">]</span> <span class="o">=</span> <span class="n">expression</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">column</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span>
-</span><span id="DataFrame.replace-695"><a href="#DataFrame.replace-695"><span class="linenos">695</span></a> <span class="n">column</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">alias_or_name</span>
-</span><span id="DataFrame.replace-696"><a href="#DataFrame.replace-696"><span class="linenos">696</span></a> <span class="p">)</span>
-</span><span id="DataFrame.replace-697"><a href="#DataFrame.replace-697"><span class="linenos">697</span></a>
-</span><span id="DataFrame.replace-698"><a href="#DataFrame.replace-698"><span class="linenos">698</span></a> <span class="n">replacement_mapping</span> <span class="o">=</span> <span class="p">{</span><span class="o">**</span><span class="n">all_column_mapping</span><span class="p">,</span> <span class="o">**</span><span class="n">replacement_mapping</span><span class="p">}</span>
-</span><span id="DataFrame.replace-699"><a href="#DataFrame.replace-699"><span class="linenos">699</span></a> <span class="n">replacement_columns</span> <span class="o">=</span> <span class="p">[</span><span class="n">replacement_mapping</span><span class="p">[</span><span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">]</span> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">all_columns</span><span class="p">]</span>
-</span><span id="DataFrame.replace-700"><a href="#DataFrame.replace-700"><span class="linenos">700</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="n">new_df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="n">replacement_columns</span><span class="p">)</span>
-</span><span id="DataFrame.replace-701"><a href="#DataFrame.replace-701"><span class="linenos">701</span></a> <span class="k">return</span> <span class="n">new_df</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.replace-692"><a href="#DataFrame.replace-692"><span class="linenos">692</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
+</span><span id="DataFrame.replace-693"><a href="#DataFrame.replace-693"><span class="linenos">693</span></a> <span class="k">def</span> <span class="nf">replace</span><span class="p">(</span>
+</span><span id="DataFrame.replace-694"><a href="#DataFrame.replace-694"><span class="linenos">694</span></a> <span class="bp">self</span><span class="p">,</span>
+</span><span id="DataFrame.replace-695"><a href="#DataFrame.replace-695"><span class="linenos">695</span></a> <span class="n">to_replace</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Dict</span><span class="p">],</span>
+</span><span id="DataFrame.replace-696"><a href="#DataFrame.replace-696"><span class="linenos">696</span></a> <span class="n">value</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrame.replace-697"><a href="#DataFrame.replace-697"><span class="linenos">697</span></a> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Collection</span><span class="p">[</span><span class="n">ColumnOrName</span><span class="p">]</span> <span class="o">|</span> <span class="n">ColumnOrName</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrame.replace-698"><a href="#DataFrame.replace-698"><span class="linenos">698</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame.replace-699"><a href="#DataFrame.replace-699"><span class="linenos">699</span></a> <span class="kn">from</span> <span class="nn">sqlglot.dataframe.sql.functions</span> <span class="kn">import</span> <span class="n">lit</span>
+</span><span id="DataFrame.replace-700"><a href="#DataFrame.replace-700"><span class="linenos">700</span></a>
+</span><span id="DataFrame.replace-701"><a href="#DataFrame.replace-701"><span class="linenos">701</span></a> <span class="n">old_values</span> <span class="o">=</span> <span class="kc">None</span>
+</span><span id="DataFrame.replace-702"><a href="#DataFrame.replace-702"><span class="linenos">702</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
+</span><span id="DataFrame.replace-703"><a href="#DataFrame.replace-703"><span class="linenos">703</span></a> <span class="n">all_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_outer_select_columns</span><span class="p">(</span><span class="n">new_df</span><span class="o">.</span><span class="n">expression</span><span class="p">)</span>
+</span><span id="DataFrame.replace-704"><a href="#DataFrame.replace-704"><span class="linenos">704</span></a> <span class="n">all_column_mapping</span> <span class="o">=</span> <span class="p">{</span><span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">:</span> <span class="n">column</span> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">all_columns</span><span class="p">}</span>
+</span><span id="DataFrame.replace-705"><a href="#DataFrame.replace-705"><span class="linenos">705</span></a>
+</span><span id="DataFrame.replace-706"><a href="#DataFrame.replace-706"><span class="linenos">706</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">subset</span><span class="p">)</span> <span class="k">if</span> <span class="n">subset</span> <span class="k">else</span> <span class="n">all_columns</span>
+</span><span id="DataFrame.replace-707"><a href="#DataFrame.replace-707"><span class="linenos">707</span></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">to_replace</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span>
+</span><span id="DataFrame.replace-708"><a href="#DataFrame.replace-708"><span class="linenos">708</span></a> <span class="n">old_values</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">to_replace</span><span class="p">)</span>
+</span><span id="DataFrame.replace-709"><a href="#DataFrame.replace-709"><span class="linenos">709</span></a> <span class="n">new_values</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">to_replace</span><span class="o">.</span><span class="n">values</span><span class="p">())</span>
+</span><span id="DataFrame.replace-710"><a href="#DataFrame.replace-710"><span class="linenos">710</span></a> <span class="k">elif</span> <span class="ow">not</span> <span class="n">old_values</span> <span class="ow">and</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">to_replace</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span>
+</span><span id="DataFrame.replace-711"><a href="#DataFrame.replace-711"><span class="linenos">711</span></a> <span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="nb">list</span><span class="p">),</span> <span class="s2">&quot;value must be a list since the replacements are a list&quot;</span>
+</span><span id="DataFrame.replace-712"><a href="#DataFrame.replace-712"><span class="linenos">712</span></a> <span class="k">assert</span> <span class="nb">len</span><span class="p">(</span><span class="n">to_replace</span><span class="p">)</span> <span class="o">==</span> <span class="nb">len</span><span class="p">(</span>
+</span><span id="DataFrame.replace-713"><a href="#DataFrame.replace-713"><span class="linenos">713</span></a> <span class="n">value</span>
+</span><span id="DataFrame.replace-714"><a href="#DataFrame.replace-714"><span class="linenos">714</span></a> <span class="p">),</span> <span class="s2">&quot;the replacements and values must be the same length&quot;</span>
+</span><span id="DataFrame.replace-715"><a href="#DataFrame.replace-715"><span class="linenos">715</span></a> <span class="n">old_values</span> <span class="o">=</span> <span class="n">to_replace</span>
+</span><span id="DataFrame.replace-716"><a href="#DataFrame.replace-716"><span class="linenos">716</span></a> <span class="n">new_values</span> <span class="o">=</span> <span class="n">value</span>
+</span><span id="DataFrame.replace-717"><a href="#DataFrame.replace-717"><span class="linenos">717</span></a> <span class="k">else</span><span class="p">:</span>
+</span><span id="DataFrame.replace-718"><a href="#DataFrame.replace-718"><span class="linenos">718</span></a> <span class="n">old_values</span> <span class="o">=</span> <span class="p">[</span><span class="n">to_replace</span><span class="p">]</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span>
+</span><span id="DataFrame.replace-719"><a href="#DataFrame.replace-719"><span class="linenos">719</span></a> <span class="n">new_values</span> <span class="o">=</span> <span class="p">[</span><span class="n">value</span><span class="p">]</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span>
+</span><span id="DataFrame.replace-720"><a href="#DataFrame.replace-720"><span class="linenos">720</span></a> <span class="n">old_values</span> <span class="o">=</span> <span class="p">[</span><span class="n">lit</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> <span class="k">for</span> <span class="n">value</span> <span class="ow">in</span> <span class="n">old_values</span><span class="p">]</span>
+</span><span id="DataFrame.replace-721"><a href="#DataFrame.replace-721"><span class="linenos">721</span></a> <span class="n">new_values</span> <span class="o">=</span> <span class="p">[</span><span class="n">lit</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> <span class="k">for</span> <span class="n">value</span> <span class="ow">in</span> <span class="n">new_values</span><span class="p">]</span>
+</span><span id="DataFrame.replace-722"><a href="#DataFrame.replace-722"><span class="linenos">722</span></a>
+</span><span id="DataFrame.replace-723"><a href="#DataFrame.replace-723"><span class="linenos">723</span></a> <span class="n">replacement_mapping</span> <span class="o">=</span> <span class="p">{}</span>
+</span><span id="DataFrame.replace-724"><a href="#DataFrame.replace-724"><span class="linenos">724</span></a> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">columns</span><span class="p">:</span>
+</span><span id="DataFrame.replace-725"><a href="#DataFrame.replace-725"><span class="linenos">725</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="kc">None</span><span class="p">)</span>
+</span><span id="DataFrame.replace-726"><a href="#DataFrame.replace-726"><span class="linenos">726</span></a> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="p">(</span><span class="n">old_value</span><span class="p">,</span> <span class="n">new_value</span><span class="p">)</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="nb">zip</span><span class="p">(</span><span class="n">old_values</span><span class="p">,</span> <span class="n">new_values</span><span class="p">)):</span>
+</span><span id="DataFrame.replace-727"><a href="#DataFrame.replace-727"><span class="linenos">727</span></a> <span class="k">if</span> <span class="n">i</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
+</span><span id="DataFrame.replace-728"><a href="#DataFrame.replace-728"><span class="linenos">728</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">column</span> <span class="o">==</span> <span class="n">old_value</span><span class="p">,</span> <span class="n">new_value</span><span class="p">)</span>
+</span><span id="DataFrame.replace-729"><a href="#DataFrame.replace-729"><span class="linenos">729</span></a> <span class="k">else</span><span class="p">:</span>
+</span><span id="DataFrame.replace-730"><a href="#DataFrame.replace-730"><span class="linenos">730</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="n">expression</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">column</span> <span class="o">==</span> <span class="n">old_value</span><span class="p">,</span> <span class="n">new_value</span><span class="p">)</span> <span class="c1"># type: ignore</span>
+</span><span id="DataFrame.replace-731"><a href="#DataFrame.replace-731"><span class="linenos">731</span></a> <span class="n">replacement_mapping</span><span class="p">[</span><span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">]</span> <span class="o">=</span> <span class="n">expression</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">column</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span>
+</span><span id="DataFrame.replace-732"><a href="#DataFrame.replace-732"><span class="linenos">732</span></a> <span class="n">column</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">alias_or_name</span>
+</span><span id="DataFrame.replace-733"><a href="#DataFrame.replace-733"><span class="linenos">733</span></a> <span class="p">)</span>
+</span><span id="DataFrame.replace-734"><a href="#DataFrame.replace-734"><span class="linenos">734</span></a>
+</span><span id="DataFrame.replace-735"><a href="#DataFrame.replace-735"><span class="linenos">735</span></a> <span class="n">replacement_mapping</span> <span class="o">=</span> <span class="p">{</span><span class="o">**</span><span class="n">all_column_mapping</span><span class="p">,</span> <span class="o">**</span><span class="n">replacement_mapping</span><span class="p">}</span>
+</span><span id="DataFrame.replace-736"><a href="#DataFrame.replace-736"><span class="linenos">736</span></a> <span class="n">replacement_columns</span> <span class="o">=</span> <span class="p">[</span><span class="n">replacement_mapping</span><span class="p">[</span><span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">]</span> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">all_columns</span><span class="p">]</span>
+</span><span id="DataFrame.replace-737"><a href="#DataFrame.replace-737"><span class="linenos">737</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="n">new_df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="n">replacement_columns</span><span class="p">)</span>
+</span><span id="DataFrame.replace-738"><a href="#DataFrame.replace-738"><span class="linenos">738</span></a> <span class="k">return</span> <span class="n">new_df</span>
</span></pre></div>
@@ -2354,18 +2428,18 @@ and check if it matches the type of the value provided. If not then make it null
</div>
<a class="headerlink" href="#DataFrame.withColumn"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.withColumn-703"><a href="#DataFrame.withColumn-703"><span class="linenos">703</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">SELECT</span><span class="p">)</span>
-</span><span id="DataFrame.withColumn-704"><a href="#DataFrame.withColumn-704"><span class="linenos">704</span></a> <span class="k">def</span> <span class="nf">withColumn</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">colName</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">col</span><span class="p">:</span> <span class="n">Column</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame.withColumn-705"><a href="#DataFrame.withColumn-705"><span class="linenos">705</span></a> <span class="n">col</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_col</span><span class="p">(</span><span class="n">col</span><span class="p">)</span>
-</span><span id="DataFrame.withColumn-706"><a href="#DataFrame.withColumn-706"><span class="linenos">706</span></a> <span class="n">existing_col_names</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">named_selects</span>
-</span><span id="DataFrame.withColumn-707"><a href="#DataFrame.withColumn-707"><span class="linenos">707</span></a> <span class="n">existing_col_index</span> <span class="o">=</span> <span class="p">(</span>
-</span><span id="DataFrame.withColumn-708"><a href="#DataFrame.withColumn-708"><span class="linenos">708</span></a> <span class="n">existing_col_names</span><span class="o">.</span><span class="n">index</span><span class="p">(</span><span class="n">colName</span><span class="p">)</span> <span class="k">if</span> <span class="n">colName</span> <span class="ow">in</span> <span class="n">existing_col_names</span> <span class="k">else</span> <span class="kc">None</span>
-</span><span id="DataFrame.withColumn-709"><a href="#DataFrame.withColumn-709"><span class="linenos">709</span></a> <span class="p">)</span>
-</span><span id="DataFrame.withColumn-710"><a href="#DataFrame.withColumn-710"><span class="linenos">710</span></a> <span class="k">if</span> <span class="n">existing_col_index</span><span class="p">:</span>
-</span><span id="DataFrame.withColumn-711"><a href="#DataFrame.withColumn-711"><span class="linenos">711</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
-</span><span id="DataFrame.withColumn-712"><a href="#DataFrame.withColumn-712"><span class="linenos">712</span></a> <span class="n">expression</span><span class="o">.</span><span class="n">expressions</span><span class="p">[</span><span class="n">existing_col_index</span><span class="p">]</span> <span class="o">=</span> <span class="n">col</span><span class="o">.</span><span class="n">expression</span>
-</span><span id="DataFrame.withColumn-713"><a href="#DataFrame.withColumn-713"><span class="linenos">713</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="n">expression</span><span class="p">)</span>
-</span><span id="DataFrame.withColumn-714"><a href="#DataFrame.withColumn-714"><span class="linenos">714</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">col</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">colName</span><span class="p">),</span> <span class="n">append</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.withColumn-740"><a href="#DataFrame.withColumn-740"><span class="linenos">740</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">SELECT</span><span class="p">)</span>
+</span><span id="DataFrame.withColumn-741"><a href="#DataFrame.withColumn-741"><span class="linenos">741</span></a> <span class="k">def</span> <span class="nf">withColumn</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">colName</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">col</span><span class="p">:</span> <span class="n">Column</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame.withColumn-742"><a href="#DataFrame.withColumn-742"><span class="linenos">742</span></a> <span class="n">col</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_col</span><span class="p">(</span><span class="n">col</span><span class="p">)</span>
+</span><span id="DataFrame.withColumn-743"><a href="#DataFrame.withColumn-743"><span class="linenos">743</span></a> <span class="n">existing_col_names</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">named_selects</span>
+</span><span id="DataFrame.withColumn-744"><a href="#DataFrame.withColumn-744"><span class="linenos">744</span></a> <span class="n">existing_col_index</span> <span class="o">=</span> <span class="p">(</span>
+</span><span id="DataFrame.withColumn-745"><a href="#DataFrame.withColumn-745"><span class="linenos">745</span></a> <span class="n">existing_col_names</span><span class="o">.</span><span class="n">index</span><span class="p">(</span><span class="n">colName</span><span class="p">)</span> <span class="k">if</span> <span class="n">colName</span> <span class="ow">in</span> <span class="n">existing_col_names</span> <span class="k">else</span> <span class="kc">None</span>
+</span><span id="DataFrame.withColumn-746"><a href="#DataFrame.withColumn-746"><span class="linenos">746</span></a> <span class="p">)</span>
+</span><span id="DataFrame.withColumn-747"><a href="#DataFrame.withColumn-747"><span class="linenos">747</span></a> <span class="k">if</span> <span class="n">existing_col_index</span><span class="p">:</span>
+</span><span id="DataFrame.withColumn-748"><a href="#DataFrame.withColumn-748"><span class="linenos">748</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
+</span><span id="DataFrame.withColumn-749"><a href="#DataFrame.withColumn-749"><span class="linenos">749</span></a> <span class="n">expression</span><span class="o">.</span><span class="n">expressions</span><span class="p">[</span><span class="n">existing_col_index</span><span class="p">]</span> <span class="o">=</span> <span class="n">col</span><span class="o">.</span><span class="n">expression</span>
+</span><span id="DataFrame.withColumn-750"><a href="#DataFrame.withColumn-750"><span class="linenos">750</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="n">expression</span><span class="p">)</span>
+</span><span id="DataFrame.withColumn-751"><a href="#DataFrame.withColumn-751"><span class="linenos">751</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">col</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">colName</span><span class="p">),</span> <span class="n">append</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
</span></pre></div>
@@ -2384,22 +2458,22 @@ and check if it matches the type of the value provided. If not then make it null
</div>
<a class="headerlink" href="#DataFrame.withColumnRenamed"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.withColumnRenamed-716"><a href="#DataFrame.withColumnRenamed-716"><span class="linenos">716</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">SELECT</span><span class="p">)</span>
-</span><span id="DataFrame.withColumnRenamed-717"><a href="#DataFrame.withColumnRenamed-717"><span class="linenos">717</span></a> <span class="k">def</span> <span class="nf">withColumnRenamed</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">existing</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">new</span><span class="p">:</span> <span class="nb">str</span><span class="p">):</span>
-</span><span id="DataFrame.withColumnRenamed-718"><a href="#DataFrame.withColumnRenamed-718"><span class="linenos">718</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
-</span><span id="DataFrame.withColumnRenamed-719"><a href="#DataFrame.withColumnRenamed-719"><span class="linenos">719</span></a> <span class="n">existing_columns</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame.withColumnRenamed-720"><a href="#DataFrame.withColumnRenamed-720"><span class="linenos">720</span></a> <span class="n">expression</span>
-</span><span id="DataFrame.withColumnRenamed-721"><a href="#DataFrame.withColumnRenamed-721"><span class="linenos">721</span></a> <span class="k">for</span> <span class="n">expression</span> <span class="ow">in</span> <span class="n">expression</span><span class="o">.</span><span class="n">expressions</span>
-</span><span id="DataFrame.withColumnRenamed-722"><a href="#DataFrame.withColumnRenamed-722"><span class="linenos">722</span></a> <span class="k">if</span> <span class="n">expression</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="o">==</span> <span class="n">existing</span>
-</span><span id="DataFrame.withColumnRenamed-723"><a href="#DataFrame.withColumnRenamed-723"><span class="linenos">723</span></a> <span class="p">]</span>
-</span><span id="DataFrame.withColumnRenamed-724"><a href="#DataFrame.withColumnRenamed-724"><span class="linenos">724</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">existing_columns</span><span class="p">:</span>
-</span><span id="DataFrame.withColumnRenamed-725"><a href="#DataFrame.withColumnRenamed-725"><span class="linenos">725</span></a> <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;Tried to rename a column that doesn&#39;t exist&quot;</span><span class="p">)</span>
-</span><span id="DataFrame.withColumnRenamed-726"><a href="#DataFrame.withColumnRenamed-726"><span class="linenos">726</span></a> <span class="k">for</span> <span class="n">existing_column</span> <span class="ow">in</span> <span class="n">existing_columns</span><span class="p">:</span>
-</span><span id="DataFrame.withColumnRenamed-727"><a href="#DataFrame.withColumnRenamed-727"><span class="linenos">727</span></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">existing_column</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Column</span><span class="p">):</span>
-</span><span id="DataFrame.withColumnRenamed-728"><a href="#DataFrame.withColumnRenamed-728"><span class="linenos">728</span></a> <span class="n">existing_column</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">alias_</span><span class="p">(</span><span class="n">existing_column</span><span class="o">.</span><span class="n">copy</span><span class="p">(),</span> <span class="n">new</span><span class="p">))</span>
-</span><span id="DataFrame.withColumnRenamed-729"><a href="#DataFrame.withColumnRenamed-729"><span class="linenos">729</span></a> <span class="k">else</span><span class="p">:</span>
-</span><span id="DataFrame.withColumnRenamed-730"><a href="#DataFrame.withColumnRenamed-730"><span class="linenos">730</span></a> <span class="n">existing_column</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">&quot;alias&quot;</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">to_identifier</span><span class="p">(</span><span class="n">new</span><span class="p">))</span>
-</span><span id="DataFrame.withColumnRenamed-731"><a href="#DataFrame.withColumnRenamed-731"><span class="linenos">731</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="n">expression</span><span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.withColumnRenamed-753"><a href="#DataFrame.withColumnRenamed-753"><span class="linenos">753</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">SELECT</span><span class="p">)</span>
+</span><span id="DataFrame.withColumnRenamed-754"><a href="#DataFrame.withColumnRenamed-754"><span class="linenos">754</span></a> <span class="k">def</span> <span class="nf">withColumnRenamed</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">existing</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">new</span><span class="p">:</span> <span class="nb">str</span><span class="p">):</span>
+</span><span id="DataFrame.withColumnRenamed-755"><a href="#DataFrame.withColumnRenamed-755"><span class="linenos">755</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
+</span><span id="DataFrame.withColumnRenamed-756"><a href="#DataFrame.withColumnRenamed-756"><span class="linenos">756</span></a> <span class="n">existing_columns</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame.withColumnRenamed-757"><a href="#DataFrame.withColumnRenamed-757"><span class="linenos">757</span></a> <span class="n">expression</span>
+</span><span id="DataFrame.withColumnRenamed-758"><a href="#DataFrame.withColumnRenamed-758"><span class="linenos">758</span></a> <span class="k">for</span> <span class="n">expression</span> <span class="ow">in</span> <span class="n">expression</span><span class="o">.</span><span class="n">expressions</span>
+</span><span id="DataFrame.withColumnRenamed-759"><a href="#DataFrame.withColumnRenamed-759"><span class="linenos">759</span></a> <span class="k">if</span> <span class="n">expression</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="o">==</span> <span class="n">existing</span>
+</span><span id="DataFrame.withColumnRenamed-760"><a href="#DataFrame.withColumnRenamed-760"><span class="linenos">760</span></a> <span class="p">]</span>
+</span><span id="DataFrame.withColumnRenamed-761"><a href="#DataFrame.withColumnRenamed-761"><span class="linenos">761</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">existing_columns</span><span class="p">:</span>
+</span><span id="DataFrame.withColumnRenamed-762"><a href="#DataFrame.withColumnRenamed-762"><span class="linenos">762</span></a> <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;Tried to rename a column that doesn&#39;t exist&quot;</span><span class="p">)</span>
+</span><span id="DataFrame.withColumnRenamed-763"><a href="#DataFrame.withColumnRenamed-763"><span class="linenos">763</span></a> <span class="k">for</span> <span class="n">existing_column</span> <span class="ow">in</span> <span class="n">existing_columns</span><span class="p">:</span>
+</span><span id="DataFrame.withColumnRenamed-764"><a href="#DataFrame.withColumnRenamed-764"><span class="linenos">764</span></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">existing_column</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Column</span><span class="p">):</span>
+</span><span id="DataFrame.withColumnRenamed-765"><a href="#DataFrame.withColumnRenamed-765"><span class="linenos">765</span></a> <span class="n">existing_column</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">alias_</span><span class="p">(</span><span class="n">existing_column</span><span class="o">.</span><span class="n">copy</span><span class="p">(),</span> <span class="n">new</span><span class="p">))</span>
+</span><span id="DataFrame.withColumnRenamed-766"><a href="#DataFrame.withColumnRenamed-766"><span class="linenos">766</span></a> <span class="k">else</span><span class="p">:</span>
+</span><span id="DataFrame.withColumnRenamed-767"><a href="#DataFrame.withColumnRenamed-767"><span class="linenos">767</span></a> <span class="n">existing_column</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">&quot;alias&quot;</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">to_identifier</span><span class="p">(</span><span class="n">new</span><span class="p">))</span>
+</span><span id="DataFrame.withColumnRenamed-768"><a href="#DataFrame.withColumnRenamed-768"><span class="linenos">768</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="n">expression</span><span class="p">)</span>
</span></pre></div>
@@ -2418,16 +2492,16 @@ and check if it matches the type of the value provided. If not then make it null
</div>
<a class="headerlink" href="#DataFrame.drop"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.drop-733"><a href="#DataFrame.drop-733"><span class="linenos">733</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">SELECT</span><span class="p">)</span>
-</span><span id="DataFrame.drop-734"><a href="#DataFrame.drop-734"><span class="linenos">734</span></a> <span class="k">def</span> <span class="nf">drop</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Column</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame.drop-735"><a href="#DataFrame.drop-735"><span class="linenos">735</span></a> <span class="n">all_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_outer_select_columns</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="p">)</span>
-</span><span id="DataFrame.drop-736"><a href="#DataFrame.drop-736"><span class="linenos">736</span></a> <span class="n">drop_cols</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">cols</span><span class="p">)</span>
-</span><span id="DataFrame.drop-737"><a href="#DataFrame.drop-737"><span class="linenos">737</span></a> <span class="n">new_columns</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame.drop-738"><a href="#DataFrame.drop-738"><span class="linenos">738</span></a> <span class="n">col</span>
-</span><span id="DataFrame.drop-739"><a href="#DataFrame.drop-739"><span class="linenos">739</span></a> <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="n">all_columns</span>
-</span><span id="DataFrame.drop-740"><a href="#DataFrame.drop-740"><span class="linenos">740</span></a> <span class="k">if</span> <span class="n">col</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="ow">not</span> <span class="ow">in</span> <span class="p">[</span><span class="n">drop_column</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="k">for</span> <span class="n">drop_column</span> <span class="ow">in</span> <span class="n">drop_cols</span><span class="p">]</span>
-</span><span id="DataFrame.drop-741"><a href="#DataFrame.drop-741"><span class="linenos">741</span></a> <span class="p">]</span>
-</span><span id="DataFrame.drop-742"><a href="#DataFrame.drop-742"><span class="linenos">742</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="n">new_columns</span><span class="p">,</span> <span class="n">append</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.drop-770"><a href="#DataFrame.drop-770"><span class="linenos">770</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">SELECT</span><span class="p">)</span>
+</span><span id="DataFrame.drop-771"><a href="#DataFrame.drop-771"><span class="linenos">771</span></a> <span class="k">def</span> <span class="nf">drop</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Column</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame.drop-772"><a href="#DataFrame.drop-772"><span class="linenos">772</span></a> <span class="n">all_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_outer_select_columns</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="p">)</span>
+</span><span id="DataFrame.drop-773"><a href="#DataFrame.drop-773"><span class="linenos">773</span></a> <span class="n">drop_cols</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">cols</span><span class="p">)</span>
+</span><span id="DataFrame.drop-774"><a href="#DataFrame.drop-774"><span class="linenos">774</span></a> <span class="n">new_columns</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame.drop-775"><a href="#DataFrame.drop-775"><span class="linenos">775</span></a> <span class="n">col</span>
+</span><span id="DataFrame.drop-776"><a href="#DataFrame.drop-776"><span class="linenos">776</span></a> <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="n">all_columns</span>
+</span><span id="DataFrame.drop-777"><a href="#DataFrame.drop-777"><span class="linenos">777</span></a> <span class="k">if</span> <span class="n">col</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="ow">not</span> <span class="ow">in</span> <span class="p">[</span><span class="n">drop_column</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="k">for</span> <span class="n">drop_column</span> <span class="ow">in</span> <span class="n">drop_cols</span><span class="p">]</span>
+</span><span id="DataFrame.drop-778"><a href="#DataFrame.drop-778"><span class="linenos">778</span></a> <span class="p">]</span>
+</span><span id="DataFrame.drop-779"><a href="#DataFrame.drop-779"><span class="linenos">779</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="n">new_columns</span><span class="p">,</span> <span class="n">append</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
</span></pre></div>
@@ -2446,9 +2520,9 @@ and check if it matches the type of the value provided. If not then make it null
</div>
<a class="headerlink" href="#DataFrame.limit"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.limit-744"><a href="#DataFrame.limit-744"><span class="linenos">744</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">LIMIT</span><span class="p">)</span>
-</span><span id="DataFrame.limit-745"><a href="#DataFrame.limit-745"><span class="linenos">745</span></a> <span class="k">def</span> <span class="nf">limit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">num</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame.limit-746"><a href="#DataFrame.limit-746"><span class="linenos">746</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">limit</span><span class="p">(</span><span class="n">num</span><span class="p">))</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.limit-781"><a href="#DataFrame.limit-781"><span class="linenos">781</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">LIMIT</span><span class="p">)</span>
+</span><span id="DataFrame.limit-782"><a href="#DataFrame.limit-782"><span class="linenos">782</span></a> <span class="k">def</span> <span class="nf">limit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">num</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame.limit-783"><a href="#DataFrame.limit-783"><span class="linenos">783</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">limit</span><span class="p">(</span><span class="n">num</span><span class="p">))</span>
</span></pre></div>
@@ -2467,15 +2541,15 @@ and check if it matches the type of the value provided. If not then make it null
</div>
<a class="headerlink" href="#DataFrame.hint"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.hint-748"><a href="#DataFrame.hint-748"><span class="linenos">748</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">NO_OP</span><span class="p">)</span>
-</span><span id="DataFrame.hint-749"><a href="#DataFrame.hint-749"><span class="linenos">749</span></a> <span class="k">def</span> <span class="nf">hint</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="o">*</span><span class="n">parameters</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">int</span><span class="p">]])</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame.hint-750"><a href="#DataFrame.hint-750"><span class="linenos">750</span></a> <span class="n">parameter_list</span> <span class="o">=</span> <span class="n">ensure_list</span><span class="p">(</span><span class="n">parameters</span><span class="p">)</span>
-</span><span id="DataFrame.hint-751"><a href="#DataFrame.hint-751"><span class="linenos">751</span></a> <span class="n">parameter_columns</span> <span class="o">=</span> <span class="p">(</span>
-</span><span id="DataFrame.hint-752"><a href="#DataFrame.hint-752"><span class="linenos">752</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_list_of_columns</span><span class="p">(</span><span class="n">parameter_list</span><span class="p">)</span>
-</span><span id="DataFrame.hint-753"><a href="#DataFrame.hint-753"><span class="linenos">753</span></a> <span class="k">if</span> <span class="n">parameters</span>
-</span><span id="DataFrame.hint-754"><a href="#DataFrame.hint-754"><span class="linenos">754</span></a> <span class="k">else</span> <span class="n">Column</span><span class="o">.</span><span class="n">ensure_cols</span><span class="p">([</span><span class="bp">self</span><span class="o">.</span><span class="n">sequence_id</span><span class="p">])</span>
-</span><span id="DataFrame.hint-755"><a href="#DataFrame.hint-755"><span class="linenos">755</span></a> <span class="p">)</span>
-</span><span id="DataFrame.hint-756"><a href="#DataFrame.hint-756"><span class="linenos">756</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_hint</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">parameter_columns</span><span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.hint-785"><a href="#DataFrame.hint-785"><span class="linenos">785</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">NO_OP</span><span class="p">)</span>
+</span><span id="DataFrame.hint-786"><a href="#DataFrame.hint-786"><span class="linenos">786</span></a> <span class="k">def</span> <span class="nf">hint</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="o">*</span><span class="n">parameters</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">int</span><span class="p">]])</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame.hint-787"><a href="#DataFrame.hint-787"><span class="linenos">787</span></a> <span class="n">parameter_list</span> <span class="o">=</span> <span class="n">ensure_list</span><span class="p">(</span><span class="n">parameters</span><span class="p">)</span>
+</span><span id="DataFrame.hint-788"><a href="#DataFrame.hint-788"><span class="linenos">788</span></a> <span class="n">parameter_columns</span> <span class="o">=</span> <span class="p">(</span>
+</span><span id="DataFrame.hint-789"><a href="#DataFrame.hint-789"><span class="linenos">789</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_list_of_columns</span><span class="p">(</span><span class="n">parameter_list</span><span class="p">)</span>
+</span><span id="DataFrame.hint-790"><a href="#DataFrame.hint-790"><span class="linenos">790</span></a> <span class="k">if</span> <span class="n">parameters</span>
+</span><span id="DataFrame.hint-791"><a href="#DataFrame.hint-791"><span class="linenos">791</span></a> <span class="k">else</span> <span class="n">Column</span><span class="o">.</span><span class="n">ensure_cols</span><span class="p">([</span><span class="bp">self</span><span class="o">.</span><span class="n">sequence_id</span><span class="p">])</span>
+</span><span id="DataFrame.hint-792"><a href="#DataFrame.hint-792"><span class="linenos">792</span></a> <span class="p">)</span>
+</span><span id="DataFrame.hint-793"><a href="#DataFrame.hint-793"><span class="linenos">793</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_hint</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">parameter_columns</span><span class="p">)</span>
</span></pre></div>
@@ -2488,20 +2562,20 @@ and check if it matches the type of the value provided. If not then make it null
<div class="decorator">@operation(Operation.NO_OP)</div>
<span class="def">def</span>
- <span class="name">repartition</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">self</span>,</span><span class="param"> <span class="n">numPartitions</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139719845685008&#39;</span><span class="o">&gt;</span><span class="p">]</span>,</span><span class="param"> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139719845795232&#39;</span><span class="o">&gt;</span></span><span class="return-annotation">) -> <span class="n"><a href="#DataFrame">sqlglot.dataframe.sql.DataFrame</a></span>:</span></span>
+ <span class="name">repartition</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">self</span>,</span><span class="param"> <span class="n">numPartitions</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;140377708152048&#39;</span><span class="o">&gt;</span><span class="p">]</span>,</span><span class="param"> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;140377708084144&#39;</span><span class="o">&gt;</span></span><span class="return-annotation">) -> <span class="n"><a href="#DataFrame">sqlglot.dataframe.sql.DataFrame</a></span>:</span></span>
<label class="view-source-button" for="DataFrame.repartition-view-source"><span>View Source</span></label>
</div>
<a class="headerlink" href="#DataFrame.repartition"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.repartition-758"><a href="#DataFrame.repartition-758"><span class="linenos">758</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">NO_OP</span><span class="p">)</span>
-</span><span id="DataFrame.repartition-759"><a href="#DataFrame.repartition-759"><span class="linenos">759</span></a> <span class="k">def</span> <span class="nf">repartition</span><span class="p">(</span>
-</span><span id="DataFrame.repartition-760"><a href="#DataFrame.repartition-760"><span class="linenos">760</span></a> <span class="bp">self</span><span class="p">,</span> <span class="n">numPartitions</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">ColumnOrName</span><span class="p">],</span> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="n">ColumnOrName</span>
-</span><span id="DataFrame.repartition-761"><a href="#DataFrame.repartition-761"><span class="linenos">761</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame.repartition-762"><a href="#DataFrame.repartition-762"><span class="linenos">762</span></a> <span class="n">num_partition_cols</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_list_of_columns</span><span class="p">(</span><span class="n">numPartitions</span><span class="p">)</span>
-</span><span id="DataFrame.repartition-763"><a href="#DataFrame.repartition-763"><span class="linenos">763</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">cols</span><span class="p">)</span>
-</span><span id="DataFrame.repartition-764"><a href="#DataFrame.repartition-764"><span class="linenos">764</span></a> <span class="n">args</span> <span class="o">=</span> <span class="n">num_partition_cols</span> <span class="o">+</span> <span class="n">columns</span>
-</span><span id="DataFrame.repartition-765"><a href="#DataFrame.repartition-765"><span class="linenos">765</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_hint</span><span class="p">(</span><span class="s2">&quot;repartition&quot;</span><span class="p">,</span> <span class="n">args</span><span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.repartition-795"><a href="#DataFrame.repartition-795"><span class="linenos">795</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">NO_OP</span><span class="p">)</span>
+</span><span id="DataFrame.repartition-796"><a href="#DataFrame.repartition-796"><span class="linenos">796</span></a> <span class="k">def</span> <span class="nf">repartition</span><span class="p">(</span>
+</span><span id="DataFrame.repartition-797"><a href="#DataFrame.repartition-797"><span class="linenos">797</span></a> <span class="bp">self</span><span class="p">,</span> <span class="n">numPartitions</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">ColumnOrName</span><span class="p">],</span> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="n">ColumnOrName</span>
+</span><span id="DataFrame.repartition-798"><a href="#DataFrame.repartition-798"><span class="linenos">798</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame.repartition-799"><a href="#DataFrame.repartition-799"><span class="linenos">799</span></a> <span class="n">num_partition_cols</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_list_of_columns</span><span class="p">(</span><span class="n">numPartitions</span><span class="p">)</span>
+</span><span id="DataFrame.repartition-800"><a href="#DataFrame.repartition-800"><span class="linenos">800</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">cols</span><span class="p">)</span>
+</span><span id="DataFrame.repartition-801"><a href="#DataFrame.repartition-801"><span class="linenos">801</span></a> <span class="n">args</span> <span class="o">=</span> <span class="n">num_partition_cols</span> <span class="o">+</span> <span class="n">columns</span>
+</span><span id="DataFrame.repartition-802"><a href="#DataFrame.repartition-802"><span class="linenos">802</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_hint</span><span class="p">(</span><span class="s2">&quot;repartition&quot;</span><span class="p">,</span> <span class="n">args</span><span class="p">)</span>
</span></pre></div>
@@ -2520,10 +2594,10 @@ and check if it matches the type of the value provided. If not then make it null
</div>
<a class="headerlink" href="#DataFrame.coalesce"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.coalesce-767"><a href="#DataFrame.coalesce-767"><span class="linenos">767</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">NO_OP</span><span class="p">)</span>
-</span><span id="DataFrame.coalesce-768"><a href="#DataFrame.coalesce-768"><span class="linenos">768</span></a> <span class="k">def</span> <span class="nf">coalesce</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">numPartitions</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame.coalesce-769"><a href="#DataFrame.coalesce-769"><span class="linenos">769</span></a> <span class="n">num_partitions</span> <span class="o">=</span> <span class="n">Column</span><span class="o">.</span><span class="n">ensure_cols</span><span class="p">([</span><span class="n">numPartitions</span><span class="p">])</span>
-</span><span id="DataFrame.coalesce-770"><a href="#DataFrame.coalesce-770"><span class="linenos">770</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_hint</span><span class="p">(</span><span class="s2">&quot;coalesce&quot;</span><span class="p">,</span> <span class="n">num_partitions</span><span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.coalesce-804"><a href="#DataFrame.coalesce-804"><span class="linenos">804</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">NO_OP</span><span class="p">)</span>
+</span><span id="DataFrame.coalesce-805"><a href="#DataFrame.coalesce-805"><span class="linenos">805</span></a> <span class="k">def</span> <span class="nf">coalesce</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">numPartitions</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame.coalesce-806"><a href="#DataFrame.coalesce-806"><span class="linenos">806</span></a> <span class="n">num_partitions</span> <span class="o">=</span> <span class="n">Column</span><span class="o">.</span><span class="n">ensure_cols</span><span class="p">([</span><span class="n">numPartitions</span><span class="p">])</span>
+</span><span id="DataFrame.coalesce-807"><a href="#DataFrame.coalesce-807"><span class="linenos">807</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_hint</span><span class="p">(</span><span class="s2">&quot;coalesce&quot;</span><span class="p">,</span> <span class="n">num_partitions</span><span class="p">)</span>
</span></pre></div>
@@ -2542,9 +2616,9 @@ and check if it matches the type of the value provided. If not then make it null
</div>
<a class="headerlink" href="#DataFrame.cache"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.cache-772"><a href="#DataFrame.cache-772"><span class="linenos">772</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">NO_OP</span><span class="p">)</span>
-</span><span id="DataFrame.cache-773"><a href="#DataFrame.cache-773"><span class="linenos">773</span></a> <span class="k">def</span> <span class="nf">cache</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame.cache-774"><a href="#DataFrame.cache-774"><span class="linenos">774</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_cache</span><span class="p">(</span><span class="n">storage_level</span><span class="o">=</span><span class="s2">&quot;MEMORY_AND_DISK&quot;</span><span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.cache-809"><a href="#DataFrame.cache-809"><span class="linenos">809</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">NO_OP</span><span class="p">)</span>
+</span><span id="DataFrame.cache-810"><a href="#DataFrame.cache-810"><span class="linenos">810</span></a> <span class="k">def</span> <span class="nf">cache</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame.cache-811"><a href="#DataFrame.cache-811"><span class="linenos">811</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_cache</span><span class="p">(</span><span class="n">storage_level</span><span class="o">=</span><span class="s2">&quot;MEMORY_AND_DISK&quot;</span><span class="p">)</span>
</span></pre></div>
@@ -2563,12 +2637,12 @@ and check if it matches the type of the value provided. If not then make it null
</div>
<a class="headerlink" href="#DataFrame.persist"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.persist-776"><a href="#DataFrame.persist-776"><span class="linenos">776</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">NO_OP</span><span class="p">)</span>
-</span><span id="DataFrame.persist-777"><a href="#DataFrame.persist-777"><span class="linenos">777</span></a> <span class="k">def</span> <span class="nf">persist</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">storageLevel</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;MEMORY_AND_DISK_SER&quot;</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame.persist-778"><a href="#DataFrame.persist-778"><span class="linenos">778</span></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
-</span><span id="DataFrame.persist-779"><a href="#DataFrame.persist-779"><span class="linenos">779</span></a><span class="sd"> Storage Level Options: https://spark.apache.org/docs/3.0.0-preview/sql-ref-syntax-aux-cache-cache-table.html</span>
-</span><span id="DataFrame.persist-780"><a href="#DataFrame.persist-780"><span class="linenos">780</span></a><span class="sd"> &quot;&quot;&quot;</span>
-</span><span id="DataFrame.persist-781"><a href="#DataFrame.persist-781"><span class="linenos">781</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_cache</span><span class="p">(</span><span class="n">storageLevel</span><span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.persist-813"><a href="#DataFrame.persist-813"><span class="linenos">813</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">NO_OP</span><span class="p">)</span>
+</span><span id="DataFrame.persist-814"><a href="#DataFrame.persist-814"><span class="linenos">814</span></a> <span class="k">def</span> <span class="nf">persist</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">storageLevel</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;MEMORY_AND_DISK_SER&quot;</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame.persist-815"><a href="#DataFrame.persist-815"><span class="linenos">815</span></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
+</span><span id="DataFrame.persist-816"><a href="#DataFrame.persist-816"><span class="linenos">816</span></a><span class="sd"> Storage Level Options: https://spark.apache.org/docs/3.0.0-preview/sql-ref-syntax-aux-cache-cache-table.html</span>
+</span><span id="DataFrame.persist-817"><a href="#DataFrame.persist-817"><span class="linenos">817</span></a><span class="sd"> &quot;&quot;&quot;</span>
+</span><span id="DataFrame.persist-818"><a href="#DataFrame.persist-818"><span class="linenos">818</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_cache</span><span class="p">(</span><span class="n">storageLevel</span><span class="p">)</span>
</span></pre></div>
@@ -3002,7 +3076,7 @@ and check if it matches the type of the value provided. If not then make it null
</span><span id="Column-177"><a href="#Column-177"><span class="linenos">177</span></a> <span class="k">return</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Column</span><span class="p">)</span>
</span><span id="Column-178"><a href="#Column-178"><span class="linenos">178</span></a>
</span><span id="Column-179"><a href="#Column-179"><span class="linenos">179</span></a> <span class="nd">@property</span>
-</span><span id="Column-180"><a href="#Column-180"><span class="linenos">180</span></a> <span class="k">def</span> <span class="nf">column_expression</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">exp</span><span class="o">.</span><span class="n">Column</span><span class="p">:</span>
+</span><span id="Column-180"><a href="#Column-180"><span class="linenos">180</span></a> <span class="k">def</span> <span class="nf">column_expression</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="n">exp</span><span class="o">.</span><span class="n">Column</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Literal</span><span class="p">]:</span>
</span><span id="Column-181"><a href="#Column-181"><span class="linenos">181</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">unalias</span><span class="p">()</span>
</span><span id="Column-182"><a href="#Column-182"><span class="linenos">182</span></a>
</span><span id="Column-183"><a href="#Column-183"><span class="linenos">183</span></a> <span class="nd">@property</span>
@@ -3156,7 +3230,7 @@ and check if it matches the type of the value provided. If not then make it null
<input id="Column.__init__-view-source" class="view-source-toggle-state" type="checkbox" aria-hidden="true" tabindex="-1">
<div class="attr function">
- <span class="name">Column</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="n">expression</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139719847713120&#39;</span><span class="o">&gt;</span><span class="p">,</span> <span class="n"><a href="../expressions.html#Expression">sqlglot.expressions.Expression</a></span><span class="p">,</span> <span class="n">NoneType</span><span class="p">]</span></span>)</span>
+ <span class="name">Column</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="n">expression</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;140377710043744&#39;</span><span class="o">&gt;</span><span class="p">,</span> <span class="n"><a href="../expressions.html#Expression">sqlglot.expressions.Expression</a></span><span class="p">,</span> <span class="n">NoneType</span><span class="p">]</span></span>)</span>
<label class="view-source-button" for="Column.__init__-view-source"><span>View Source</span></label>
@@ -3184,7 +3258,7 @@ and check if it matches the type of the value provided. If not then make it null
<div class="decorator">@classmethod</div>
<span class="def">def</span>
- <span class="name">ensure_col</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">cls</span>,</span><span class="param"> <span class="n">value</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139719845612224&#39;</span><span class="o">&gt;</span><span class="p">,</span> <span class="n"><a href="../expressions.html#Expression">sqlglot.expressions.Expression</a></span><span class="p">,</span> <span class="n">NoneType</span><span class="p">]</span></span><span class="return-annotation">):</span></span>
+ <span class="name">ensure_col</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">cls</span>,</span><span class="param"> <span class="n">value</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;140377708259664&#39;</span><span class="o">&gt;</span><span class="p">,</span> <span class="n"><a href="../expressions.html#Expression">sqlglot.expressions.Expression</a></span><span class="p">,</span> <span class="n">NoneType</span><span class="p">]</span></span><span class="return-annotation">):</span></span>
<label class="view-source-button" for="Column.ensure_col-view-source"><span>View Source</span></label>
@@ -3205,7 +3279,7 @@ and check if it matches the type of the value provided. If not then make it null
<div class="decorator">@classmethod</div>
<span class="def">def</span>
- <span class="name">ensure_cols</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">cls</span>,</span><span class="param"> <span class="n">args</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139719846275024&#39;</span><span class="o">&gt;</span><span class="p">,</span> <span class="n"><a href="../expressions.html#Expression">sqlglot.expressions.Expression</a></span><span class="p">]]</span></span><span class="return-annotation">) -> <span class="n">List</span><span class="p">[</span><span class="n"><a href="#Column">sqlglot.dataframe.sql.Column</a></span><span class="p">]</span>:</span></span>
+ <span class="name">ensure_cols</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">cls</span>,</span><span class="param"> <span class="n">args</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;140377708547664&#39;</span><span class="o">&gt;</span><span class="p">,</span> <span class="n"><a href="../expressions.html#Expression">sqlglot.expressions.Expression</a></span><span class="p">]]</span></span><span class="return-annotation">) -> <span class="n">List</span><span class="p">[</span><span class="n"><a href="#Column">sqlglot.dataframe.sql.Column</a></span><span class="p">]</span>:</span></span>
<label class="view-source-button" for="Column.ensure_cols-view-source"><span>View Source</span></label>
@@ -3226,7 +3300,7 @@ and check if it matches the type of the value provided. If not then make it null
<div class="decorator">@classmethod</div>
<span class="def">def</span>
- <span class="name">invoke_anonymous_function</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">cls</span>,</span><span class="param"> <span class="n">column</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139719846030752&#39;</span><span class="o">&gt;</span><span class="p">]</span>,</span><span class="param"> <span class="n">func_name</span><span class="p">:</span> <span class="nb">str</span>,</span><span class="param"> <span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139719845950944&#39;</span><span class="o">&gt;</span><span class="p">]</span></span><span class="return-annotation">) -> <span class="n"><a href="#Column">sqlglot.dataframe.sql.Column</a></span>:</span></span>
+ <span class="name">invoke_anonymous_function</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">cls</span>,</span><span class="param"> <span class="n">column</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;140377708420048&#39;</span><span class="o">&gt;</span><span class="p">]</span>,</span><span class="param"> <span class="n">func_name</span><span class="p">:</span> <span class="nb">str</span>,</span><span class="param"> <span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;140377708523584&#39;</span><span class="o">&gt;</span><span class="p">]</span></span><span class="return-annotation">) -> <span class="n"><a href="#Column">sqlglot.dataframe.sql.Column</a></span>:</span></span>
<label class="view-source-button" for="Column.invoke_anonymous_function-view-source"><span>View Source</span></label>
@@ -3253,7 +3327,7 @@ and check if it matches the type of the value provided. If not then make it null
<div class="decorator">@classmethod</div>
<span class="def">def</span>
- <span class="name">invoke_expression_over_column</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">cls</span>,</span><span class="param"> <span class="n">column</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139719845906896&#39;</span><span class="o">&gt;</span><span class="p">]</span>,</span><span class="param"> <span class="n">callable_expression</span><span class="p">:</span> <span class="n">Callable</span>,</span><span class="param"> <span class="o">**</span><span class="n">kwargs</span></span><span class="return-annotation">) -> <span class="n"><a href="#Column">sqlglot.dataframe.sql.Column</a></span>:</span></span>
+ <span class="name">invoke_expression_over_column</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">cls</span>,</span><span class="param"> <span class="n">column</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;140377708376176&#39;</span><span class="o">&gt;</span><span class="p">]</span>,</span><span class="param"> <span class="n">callable_expression</span><span class="p">:</span> <span class="n">Callable</span>,</span><span class="param"> <span class="o">**</span><span class="n">kwargs</span></span><span class="return-annotation">) -> <span class="n"><a href="#Column">sqlglot.dataframe.sql.Column</a></span>:</span></span>
<label class="view-source-button" for="Column.invoke_expression_over_column-view-source"><span>View Source</span></label>
@@ -3290,7 +3364,7 @@ and check if it matches the type of the value provided. If not then make it null
<div class="attr function">
<span class="def">def</span>
- <span class="name">binary_op</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">self</span>,</span><span class="param"> <span class="n">klass</span><span class="p">:</span> <span class="n">Callable</span>,</span><span class="param"> <span class="n">other</span><span class="p">:</span> <span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139719845931152&#39;</span><span class="o">&gt;</span>,</span><span class="param"> <span class="o">**</span><span class="n">kwargs</span></span><span class="return-annotation">) -> <span class="n"><a href="#Column">sqlglot.dataframe.sql.Column</a></span>:</span></span>
+ <span class="name">binary_op</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">self</span>,</span><span class="param"> <span class="n">klass</span><span class="p">:</span> <span class="n">Callable</span>,</span><span class="param"> <span class="n">other</span><span class="p">:</span> <span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;140377706598112&#39;</span><span class="o">&gt;</span>,</span><span class="param"> <span class="o">**</span><span class="n">kwargs</span></span><span class="return-annotation">) -> <span class="n"><a href="#Column">sqlglot.dataframe.sql.Column</a></span>:</span></span>
<label class="view-source-button" for="Column.binary_op-view-source"><span>View Source</span></label>
@@ -3311,7 +3385,7 @@ and check if it matches the type of the value provided. If not then make it null
<div class="attr function">
<span class="def">def</span>
- <span class="name">inverse_binary_op</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">self</span>,</span><span class="param"> <span class="n">klass</span><span class="p">:</span> <span class="n">Callable</span>,</span><span class="param"> <span class="n">other</span><span class="p">:</span> <span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139719845815168&#39;</span><span class="o">&gt;</span>,</span><span class="param"> <span class="o">**</span><span class="n">kwargs</span></span><span class="return-annotation">) -> <span class="n"><a href="#Column">sqlglot.dataframe.sql.Column</a></span>:</span></span>
+ <span class="name">inverse_binary_op</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">self</span>,</span><span class="param"> <span class="n">klass</span><span class="p">:</span> <span class="n">Callable</span>,</span><span class="param"> <span class="n">other</span><span class="p">:</span> <span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;140377706608240&#39;</span><span class="o">&gt;</span>,</span><span class="param"> <span class="o">**</span><span class="n">kwargs</span></span><span class="return-annotation">) -> <span class="n"><a href="#Column">sqlglot.dataframe.sql.Column</a></span>:</span></span>
<label class="view-source-button" for="Column.inverse_binary_op-view-source"><span>View Source</span></label>
@@ -3821,7 +3895,7 @@ Sqlglot doesn't currently replicate this class so it only accepts a string</p>
<div class="attr function">
<span class="def">def</span>
- <span class="name">isin</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">self</span>,</span><span class="param"> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139719846379728&#39;</span><span class="o">&gt;</span><span class="p">,</span> <span class="n">Iterable</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139719846379728&#39;</span><span class="o">&gt;</span><span class="p">]]</span></span><span class="return-annotation">):</span></span>
+ <span class="name">isin</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">self</span>,</span><span class="param"> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;140377706743696&#39;</span><span class="o">&gt;</span><span class="p">,</span> <span class="n">Iterable</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;140377706743696&#39;</span><span class="o">&gt;</span><span class="p">]]</span></span><span class="return-annotation">):</span></span>
<label class="view-source-button" for="Column.isin-view-source"><span>View Source</span></label>
@@ -3842,7 +3916,7 @@ Sqlglot doesn't currently replicate this class so it only accepts a string</p>
<div class="attr function">
<span class="def">def</span>
- <span class="name">between</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">self</span>,</span><span class="param"> <span class="n">lowerBound</span><span class="p">:</span> <span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139719846507952&#39;</span><span class="o">&gt;</span>,</span><span class="param"> <span class="n">upperBound</span><span class="p">:</span> <span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139719844481728&#39;</span><span class="o">&gt;</span></span><span class="return-annotation">) -> <span class="n"><a href="#Column">sqlglot.dataframe.sql.Column</a></span>:</span></span>
+ <span class="name">between</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">self</span>,</span><span class="param"> <span class="n">lowerBound</span><span class="p">:</span> <span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;140377706814464&#39;</span><span class="o">&gt;</span>,</span><span class="param"> <span class="n">upperBound</span><span class="p">:</span> <span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;140377706852560&#39;</span><span class="o">&gt;</span></span><span class="return-annotation">) -> <span class="n"><a href="#Column">sqlglot.dataframe.sql.Column</a></span>:</span></span>
<label class="view-source-button" for="Column.between-view-source"><span>View Source</span></label>
@@ -3877,7 +3951,7 @@ Sqlglot doesn't currently replicate this class so it only accepts a string</p>
<div class="attr function">
<span class="def">def</span>
- <span class="name">over</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">self</span>,</span><span class="param"> <span class="n">window</span><span class="p">:</span> <span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139719844522944&#39;</span><span class="o">&gt;</span></span><span class="return-annotation">) -> <span class="n"><a href="#Column">sqlglot.dataframe.sql.Column</a></span>:</span></span>
+ <span class="name">over</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">self</span>,</span><span class="param"> <span class="n">window</span><span class="p">:</span> <span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;140377706910272&#39;</span><span class="o">&gt;</span></span><span class="return-annotation">) -> <span class="n"><a href="#Column">sqlglot.dataframe.sql.Column</a></span>:</span></span>
<label class="view-source-button" for="Column.over-view-source"><span>View Source</span></label>
@@ -3905,32 +3979,32 @@ Sqlglot doesn't currently replicate this class so it only accepts a string</p>
</div>
<a class="headerlink" href="#DataFrameNaFunctions"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrameNaFunctions-784"><a href="#DataFrameNaFunctions-784"><span class="linenos">784</span></a><span class="k">class</span> <span class="nc">DataFrameNaFunctions</span><span class="p">:</span>
-</span><span id="DataFrameNaFunctions-785"><a href="#DataFrameNaFunctions-785"><span class="linenos">785</span></a> <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">df</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">):</span>
-</span><span id="DataFrameNaFunctions-786"><a href="#DataFrameNaFunctions-786"><span class="linenos">786</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">df</span> <span class="o">=</span> <span class="n">df</span>
-</span><span id="DataFrameNaFunctions-787"><a href="#DataFrameNaFunctions-787"><span class="linenos">787</span></a>
-</span><span id="DataFrameNaFunctions-788"><a href="#DataFrameNaFunctions-788"><span class="linenos">788</span></a> <span class="k">def</span> <span class="nf">drop</span><span class="p">(</span>
-</span><span id="DataFrameNaFunctions-789"><a href="#DataFrameNaFunctions-789"><span class="linenos">789</span></a> <span class="bp">self</span><span class="p">,</span>
-</span><span id="DataFrameNaFunctions-790"><a href="#DataFrameNaFunctions-790"><span class="linenos">790</span></a> <span class="n">how</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;any&quot;</span><span class="p">,</span>
-</span><span id="DataFrameNaFunctions-791"><a href="#DataFrameNaFunctions-791"><span class="linenos">791</span></a> <span class="n">thresh</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrameNaFunctions-792"><a href="#DataFrameNaFunctions-792"><span class="linenos">792</span></a> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="o">...</span><span class="p">],</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrameNaFunctions-793"><a href="#DataFrameNaFunctions-793"><span class="linenos">793</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrameNaFunctions-794"><a href="#DataFrameNaFunctions-794"><span class="linenos">794</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">df</span><span class="o">.</span><span class="n">dropna</span><span class="p">(</span><span class="n">how</span><span class="o">=</span><span class="n">how</span><span class="p">,</span> <span class="n">thresh</span><span class="o">=</span><span class="n">thresh</span><span class="p">,</span> <span class="n">subset</span><span class="o">=</span><span class="n">subset</span><span class="p">)</span>
-</span><span id="DataFrameNaFunctions-795"><a href="#DataFrameNaFunctions-795"><span class="linenos">795</span></a>
-</span><span id="DataFrameNaFunctions-796"><a href="#DataFrameNaFunctions-796"><span class="linenos">796</span></a> <span class="k">def</span> <span class="nf">fill</span><span class="p">(</span>
-</span><span id="DataFrameNaFunctions-797"><a href="#DataFrameNaFunctions-797"><span class="linenos">797</span></a> <span class="bp">self</span><span class="p">,</span>
-</span><span id="DataFrameNaFunctions-798"><a href="#DataFrameNaFunctions-798"><span class="linenos">798</span></a> <span class="n">value</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">bool</span><span class="p">,</span> <span class="nb">float</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">]],</span>
-</span><span id="DataFrameNaFunctions-799"><a href="#DataFrameNaFunctions-799"><span class="linenos">799</span></a> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="o">...</span><span class="p">],</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrameNaFunctions-800"><a href="#DataFrameNaFunctions-800"><span class="linenos">800</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrameNaFunctions-801"><a href="#DataFrameNaFunctions-801"><span class="linenos">801</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">df</span><span class="o">.</span><span class="n">fillna</span><span class="p">(</span><span class="n">value</span><span class="o">=</span><span class="n">value</span><span class="p">,</span> <span class="n">subset</span><span class="o">=</span><span class="n">subset</span><span class="p">)</span>
-</span><span id="DataFrameNaFunctions-802"><a href="#DataFrameNaFunctions-802"><span class="linenos">802</span></a>
-</span><span id="DataFrameNaFunctions-803"><a href="#DataFrameNaFunctions-803"><span class="linenos">803</span></a> <span class="k">def</span> <span class="nf">replace</span><span class="p">(</span>
-</span><span id="DataFrameNaFunctions-804"><a href="#DataFrameNaFunctions-804"><span class="linenos">804</span></a> <span class="bp">self</span><span class="p">,</span>
-</span><span id="DataFrameNaFunctions-805"><a href="#DataFrameNaFunctions-805"><span class="linenos">805</span></a> <span class="n">to_replace</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Dict</span><span class="p">],</span>
-</span><span id="DataFrameNaFunctions-806"><a href="#DataFrameNaFunctions-806"><span class="linenos">806</span></a> <span class="n">value</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrameNaFunctions-807"><a href="#DataFrameNaFunctions-807"><span class="linenos">807</span></a> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrameNaFunctions-808"><a href="#DataFrameNaFunctions-808"><span class="linenos">808</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrameNaFunctions-809"><a href="#DataFrameNaFunctions-809"><span class="linenos">809</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">df</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="n">to_replace</span><span class="o">=</span><span class="n">to_replace</span><span class="p">,</span> <span class="n">value</span><span class="o">=</span><span class="n">value</span><span class="p">,</span> <span class="n">subset</span><span class="o">=</span><span class="n">subset</span><span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrameNaFunctions-821"><a href="#DataFrameNaFunctions-821"><span class="linenos">821</span></a><span class="k">class</span> <span class="nc">DataFrameNaFunctions</span><span class="p">:</span>
+</span><span id="DataFrameNaFunctions-822"><a href="#DataFrameNaFunctions-822"><span class="linenos">822</span></a> <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">df</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">):</span>
+</span><span id="DataFrameNaFunctions-823"><a href="#DataFrameNaFunctions-823"><span class="linenos">823</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">df</span> <span class="o">=</span> <span class="n">df</span>
+</span><span id="DataFrameNaFunctions-824"><a href="#DataFrameNaFunctions-824"><span class="linenos">824</span></a>
+</span><span id="DataFrameNaFunctions-825"><a href="#DataFrameNaFunctions-825"><span class="linenos">825</span></a> <span class="k">def</span> <span class="nf">drop</span><span class="p">(</span>
+</span><span id="DataFrameNaFunctions-826"><a href="#DataFrameNaFunctions-826"><span class="linenos">826</span></a> <span class="bp">self</span><span class="p">,</span>
+</span><span id="DataFrameNaFunctions-827"><a href="#DataFrameNaFunctions-827"><span class="linenos">827</span></a> <span class="n">how</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;any&quot;</span><span class="p">,</span>
+</span><span id="DataFrameNaFunctions-828"><a href="#DataFrameNaFunctions-828"><span class="linenos">828</span></a> <span class="n">thresh</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrameNaFunctions-829"><a href="#DataFrameNaFunctions-829"><span class="linenos">829</span></a> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="o">...</span><span class="p">],</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrameNaFunctions-830"><a href="#DataFrameNaFunctions-830"><span class="linenos">830</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrameNaFunctions-831"><a href="#DataFrameNaFunctions-831"><span class="linenos">831</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">df</span><span class="o">.</span><span class="n">dropna</span><span class="p">(</span><span class="n">how</span><span class="o">=</span><span class="n">how</span><span class="p">,</span> <span class="n">thresh</span><span class="o">=</span><span class="n">thresh</span><span class="p">,</span> <span class="n">subset</span><span class="o">=</span><span class="n">subset</span><span class="p">)</span>
+</span><span id="DataFrameNaFunctions-832"><a href="#DataFrameNaFunctions-832"><span class="linenos">832</span></a>
+</span><span id="DataFrameNaFunctions-833"><a href="#DataFrameNaFunctions-833"><span class="linenos">833</span></a> <span class="k">def</span> <span class="nf">fill</span><span class="p">(</span>
+</span><span id="DataFrameNaFunctions-834"><a href="#DataFrameNaFunctions-834"><span class="linenos">834</span></a> <span class="bp">self</span><span class="p">,</span>
+</span><span id="DataFrameNaFunctions-835"><a href="#DataFrameNaFunctions-835"><span class="linenos">835</span></a> <span class="n">value</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">bool</span><span class="p">,</span> <span class="nb">float</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">]],</span>
+</span><span id="DataFrameNaFunctions-836"><a href="#DataFrameNaFunctions-836"><span class="linenos">836</span></a> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="o">...</span><span class="p">],</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrameNaFunctions-837"><a href="#DataFrameNaFunctions-837"><span class="linenos">837</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrameNaFunctions-838"><a href="#DataFrameNaFunctions-838"><span class="linenos">838</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">df</span><span class="o">.</span><span class="n">fillna</span><span class="p">(</span><span class="n">value</span><span class="o">=</span><span class="n">value</span><span class="p">,</span> <span class="n">subset</span><span class="o">=</span><span class="n">subset</span><span class="p">)</span>
+</span><span id="DataFrameNaFunctions-839"><a href="#DataFrameNaFunctions-839"><span class="linenos">839</span></a>
+</span><span id="DataFrameNaFunctions-840"><a href="#DataFrameNaFunctions-840"><span class="linenos">840</span></a> <span class="k">def</span> <span class="nf">replace</span><span class="p">(</span>
+</span><span id="DataFrameNaFunctions-841"><a href="#DataFrameNaFunctions-841"><span class="linenos">841</span></a> <span class="bp">self</span><span class="p">,</span>
+</span><span id="DataFrameNaFunctions-842"><a href="#DataFrameNaFunctions-842"><span class="linenos">842</span></a> <span class="n">to_replace</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Dict</span><span class="p">],</span>
+</span><span id="DataFrameNaFunctions-843"><a href="#DataFrameNaFunctions-843"><span class="linenos">843</span></a> <span class="n">value</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrameNaFunctions-844"><a href="#DataFrameNaFunctions-844"><span class="linenos">844</span></a> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrameNaFunctions-845"><a href="#DataFrameNaFunctions-845"><span class="linenos">845</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrameNaFunctions-846"><a href="#DataFrameNaFunctions-846"><span class="linenos">846</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">df</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="n">to_replace</span><span class="o">=</span><span class="n">to_replace</span><span class="p">,</span> <span class="n">value</span><span class="o">=</span><span class="n">value</span><span class="p">,</span> <span class="n">subset</span><span class="o">=</span><span class="n">subset</span><span class="p">)</span>
</span></pre></div>
@@ -3946,8 +4020,8 @@ Sqlglot doesn't currently replicate this class so it only accepts a string</p>
</div>
<a class="headerlink" href="#DataFrameNaFunctions.__init__"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrameNaFunctions.__init__-785"><a href="#DataFrameNaFunctions.__init__-785"><span class="linenos">785</span></a> <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">df</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">):</span>
-</span><span id="DataFrameNaFunctions.__init__-786"><a href="#DataFrameNaFunctions.__init__-786"><span class="linenos">786</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">df</span> <span class="o">=</span> <span class="n">df</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrameNaFunctions.__init__-822"><a href="#DataFrameNaFunctions.__init__-822"><span class="linenos">822</span></a> <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">df</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">):</span>
+</span><span id="DataFrameNaFunctions.__init__-823"><a href="#DataFrameNaFunctions.__init__-823"><span class="linenos">823</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">df</span> <span class="o">=</span> <span class="n">df</span>
</span></pre></div>
@@ -3965,13 +4039,13 @@ Sqlglot doesn't currently replicate this class so it only accepts a string</p>
</div>
<a class="headerlink" href="#DataFrameNaFunctions.drop"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrameNaFunctions.drop-788"><a href="#DataFrameNaFunctions.drop-788"><span class="linenos">788</span></a> <span class="k">def</span> <span class="nf">drop</span><span class="p">(</span>
-</span><span id="DataFrameNaFunctions.drop-789"><a href="#DataFrameNaFunctions.drop-789"><span class="linenos">789</span></a> <span class="bp">self</span><span class="p">,</span>
-</span><span id="DataFrameNaFunctions.drop-790"><a href="#DataFrameNaFunctions.drop-790"><span class="linenos">790</span></a> <span class="n">how</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;any&quot;</span><span class="p">,</span>
-</span><span id="DataFrameNaFunctions.drop-791"><a href="#DataFrameNaFunctions.drop-791"><span class="linenos">791</span></a> <span class="n">thresh</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrameNaFunctions.drop-792"><a href="#DataFrameNaFunctions.drop-792"><span class="linenos">792</span></a> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="o">...</span><span class="p">],</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrameNaFunctions.drop-793"><a href="#DataFrameNaFunctions.drop-793"><span class="linenos">793</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrameNaFunctions.drop-794"><a href="#DataFrameNaFunctions.drop-794"><span class="linenos">794</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">df</span><span class="o">.</span><span class="n">dropna</span><span class="p">(</span><span class="n">how</span><span class="o">=</span><span class="n">how</span><span class="p">,</span> <span class="n">thresh</span><span class="o">=</span><span class="n">thresh</span><span class="p">,</span> <span class="n">subset</span><span class="o">=</span><span class="n">subset</span><span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrameNaFunctions.drop-825"><a href="#DataFrameNaFunctions.drop-825"><span class="linenos">825</span></a> <span class="k">def</span> <span class="nf">drop</span><span class="p">(</span>
+</span><span id="DataFrameNaFunctions.drop-826"><a href="#DataFrameNaFunctions.drop-826"><span class="linenos">826</span></a> <span class="bp">self</span><span class="p">,</span>
+</span><span id="DataFrameNaFunctions.drop-827"><a href="#DataFrameNaFunctions.drop-827"><span class="linenos">827</span></a> <span class="n">how</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;any&quot;</span><span class="p">,</span>
+</span><span id="DataFrameNaFunctions.drop-828"><a href="#DataFrameNaFunctions.drop-828"><span class="linenos">828</span></a> <span class="n">thresh</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrameNaFunctions.drop-829"><a href="#DataFrameNaFunctions.drop-829"><span class="linenos">829</span></a> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="o">...</span><span class="p">],</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrameNaFunctions.drop-830"><a href="#DataFrameNaFunctions.drop-830"><span class="linenos">830</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrameNaFunctions.drop-831"><a href="#DataFrameNaFunctions.drop-831"><span class="linenos">831</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">df</span><span class="o">.</span><span class="n">dropna</span><span class="p">(</span><span class="n">how</span><span class="o">=</span><span class="n">how</span><span class="p">,</span> <span class="n">thresh</span><span class="o">=</span><span class="n">thresh</span><span class="p">,</span> <span class="n">subset</span><span class="o">=</span><span class="n">subset</span><span class="p">)</span>
</span></pre></div>
@@ -3989,12 +4063,12 @@ Sqlglot doesn't currently replicate this class so it only accepts a string</p>
</div>
<a class="headerlink" href="#DataFrameNaFunctions.fill"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrameNaFunctions.fill-796"><a href="#DataFrameNaFunctions.fill-796"><span class="linenos">796</span></a> <span class="k">def</span> <span class="nf">fill</span><span class="p">(</span>
-</span><span id="DataFrameNaFunctions.fill-797"><a href="#DataFrameNaFunctions.fill-797"><span class="linenos">797</span></a> <span class="bp">self</span><span class="p">,</span>
-</span><span id="DataFrameNaFunctions.fill-798"><a href="#DataFrameNaFunctions.fill-798"><span class="linenos">798</span></a> <span class="n">value</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">bool</span><span class="p">,</span> <span class="nb">float</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">]],</span>
-</span><span id="DataFrameNaFunctions.fill-799"><a href="#DataFrameNaFunctions.fill-799"><span class="linenos">799</span></a> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="o">...</span><span class="p">],</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrameNaFunctions.fill-800"><a href="#DataFrameNaFunctions.fill-800"><span class="linenos">800</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrameNaFunctions.fill-801"><a href="#DataFrameNaFunctions.fill-801"><span class="linenos">801</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">df</span><span class="o">.</span><span class="n">fillna</span><span class="p">(</span><span class="n">value</span><span class="o">=</span><span class="n">value</span><span class="p">,</span> <span class="n">subset</span><span class="o">=</span><span class="n">subset</span><span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrameNaFunctions.fill-833"><a href="#DataFrameNaFunctions.fill-833"><span class="linenos">833</span></a> <span class="k">def</span> <span class="nf">fill</span><span class="p">(</span>
+</span><span id="DataFrameNaFunctions.fill-834"><a href="#DataFrameNaFunctions.fill-834"><span class="linenos">834</span></a> <span class="bp">self</span><span class="p">,</span>
+</span><span id="DataFrameNaFunctions.fill-835"><a href="#DataFrameNaFunctions.fill-835"><span class="linenos">835</span></a> <span class="n">value</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">bool</span><span class="p">,</span> <span class="nb">float</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">]],</span>
+</span><span id="DataFrameNaFunctions.fill-836"><a href="#DataFrameNaFunctions.fill-836"><span class="linenos">836</span></a> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="o">...</span><span class="p">],</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrameNaFunctions.fill-837"><a href="#DataFrameNaFunctions.fill-837"><span class="linenos">837</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrameNaFunctions.fill-838"><a href="#DataFrameNaFunctions.fill-838"><span class="linenos">838</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">df</span><span class="o">.</span><span class="n">fillna</span><span class="p">(</span><span class="n">value</span><span class="o">=</span><span class="n">value</span><span class="p">,</span> <span class="n">subset</span><span class="o">=</span><span class="n">subset</span><span class="p">)</span>
</span></pre></div>
@@ -4012,13 +4086,13 @@ Sqlglot doesn't currently replicate this class so it only accepts a string</p>
</div>
<a class="headerlink" href="#DataFrameNaFunctions.replace"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrameNaFunctions.replace-803"><a href="#DataFrameNaFunctions.replace-803"><span class="linenos">803</span></a> <span class="k">def</span> <span class="nf">replace</span><span class="p">(</span>
-</span><span id="DataFrameNaFunctions.replace-804"><a href="#DataFrameNaFunctions.replace-804"><span class="linenos">804</span></a> <span class="bp">self</span><span class="p">,</span>
-</span><span id="DataFrameNaFunctions.replace-805"><a href="#DataFrameNaFunctions.replace-805"><span class="linenos">805</span></a> <span class="n">to_replace</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Dict</span><span class="p">],</span>
-</span><span id="DataFrameNaFunctions.replace-806"><a href="#DataFrameNaFunctions.replace-806"><span class="linenos">806</span></a> <span class="n">value</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrameNaFunctions.replace-807"><a href="#DataFrameNaFunctions.replace-807"><span class="linenos">807</span></a> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrameNaFunctions.replace-808"><a href="#DataFrameNaFunctions.replace-808"><span class="linenos">808</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrameNaFunctions.replace-809"><a href="#DataFrameNaFunctions.replace-809"><span class="linenos">809</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">df</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="n">to_replace</span><span class="o">=</span><span class="n">to_replace</span><span class="p">,</span> <span class="n">value</span><span class="o">=</span><span class="n">value</span><span class="p">,</span> <span class="n">subset</span><span class="o">=</span><span class="n">subset</span><span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrameNaFunctions.replace-840"><a href="#DataFrameNaFunctions.replace-840"><span class="linenos">840</span></a> <span class="k">def</span> <span class="nf">replace</span><span class="p">(</span>
+</span><span id="DataFrameNaFunctions.replace-841"><a href="#DataFrameNaFunctions.replace-841"><span class="linenos">841</span></a> <span class="bp">self</span><span class="p">,</span>
+</span><span id="DataFrameNaFunctions.replace-842"><a href="#DataFrameNaFunctions.replace-842"><span class="linenos">842</span></a> <span class="n">to_replace</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Dict</span><span class="p">],</span>
+</span><span id="DataFrameNaFunctions.replace-843"><a href="#DataFrameNaFunctions.replace-843"><span class="linenos">843</span></a> <span class="n">value</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrameNaFunctions.replace-844"><a href="#DataFrameNaFunctions.replace-844"><span class="linenos">844</span></a> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrameNaFunctions.replace-845"><a href="#DataFrameNaFunctions.replace-845"><span class="linenos">845</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrameNaFunctions.replace-846"><a href="#DataFrameNaFunctions.replace-846"><span class="linenos">846</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">df</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="n">to_replace</span><span class="o">=</span><span class="n">to_replace</span><span class="p">,</span> <span class="n">value</span><span class="o">=</span><span class="n">value</span><span class="p">,</span> <span class="n">subset</span><span class="o">=</span><span class="n">subset</span><span class="p">)</span>
</span></pre></div>
@@ -4075,7 +4149,7 @@ Sqlglot doesn't currently replicate this class so it only accepts a string</p>
<div class="decorator">@classmethod</div>
<span class="def">def</span>
- <span class="name">partitionBy</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">cls</span>,</span><span class="param"> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139719845053776&#39;</span><span class="o">&gt;</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139719845053776&#39;</span><span class="o">&gt;</span><span class="p">]]</span></span><span class="return-annotation">) -> <span class="n"><a href="#WindowSpec">sqlglot.dataframe.sql.WindowSpec</a></span>:</span></span>
+ <span class="name">partitionBy</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">cls</span>,</span><span class="param"> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;140377707221328&#39;</span><span class="o">&gt;</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;140377707221328&#39;</span><span class="o">&gt;</span><span class="p">]]</span></span><span class="return-annotation">) -> <span class="n"><a href="#WindowSpec">sqlglot.dataframe.sql.WindowSpec</a></span>:</span></span>
<label class="view-source-button" for="Window.partitionBy-view-source"><span>View Source</span></label>
@@ -4096,7 +4170,7 @@ Sqlglot doesn't currently replicate this class so it only accepts a string</p>
<div class="decorator">@classmethod</div>
<span class="def">def</span>
- <span class="name">orderBy</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">cls</span>,</span><span class="param"> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139719844989536&#39;</span><span class="o">&gt;</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139719844989536&#39;</span><span class="o">&gt;</span><span class="p">]]</span></span><span class="return-annotation">) -> <span class="n"><a href="#WindowSpec">sqlglot.dataframe.sql.WindowSpec</a></span>:</span></span>
+ <span class="name">orderBy</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">cls</span>,</span><span class="param"> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;140377707360368&#39;</span><span class="o">&gt;</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;140377707360368&#39;</span><span class="o">&gt;</span><span class="p">]]</span></span><span class="return-annotation">) -> <span class="n"><a href="#WindowSpec">sqlglot.dataframe.sql.WindowSpec</a></span>:</span></span>
<label class="view-source-button" for="Window.orderBy-view-source"><span>View Source</span></label>
@@ -4321,7 +4395,7 @@ Sqlglot doesn't currently replicate this class so it only accepts a string</p>
<div class="attr function">
<span class="def">def</span>
- <span class="name">partitionBy</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">self</span>,</span><span class="param"> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139719844913712&#39;</span><span class="o">&gt;</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139719844913712&#39;</span><span class="o">&gt;</span><span class="p">]]</span></span><span class="return-annotation">) -> <span class="n"><a href="#WindowSpec">sqlglot.dataframe.sql.WindowSpec</a></span>:</span></span>
+ <span class="name">partitionBy</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">self</span>,</span><span class="param"> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;140377707291568&#39;</span><span class="o">&gt;</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;140377707291568&#39;</span><span class="o">&gt;</span><span class="p">]]</span></span><span class="return-annotation">) -> <span class="n"><a href="#WindowSpec">sqlglot.dataframe.sql.WindowSpec</a></span>:</span></span>
<label class="view-source-button" for="WindowSpec.partitionBy-view-source"><span>View Source</span></label>
@@ -4348,7 +4422,7 @@ Sqlglot doesn't currently replicate this class so it only accepts a string</p>
<div class="attr function">
<span class="def">def</span>
- <span class="name">orderBy</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">self</span>,</span><span class="param"> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139719844813248&#39;</span><span class="o">&gt;</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139719844813248&#39;</span><span class="o">&gt;</span><span class="p">]]</span></span><span class="return-annotation">) -> <span class="n"><a href="#WindowSpec">sqlglot.dataframe.sql.WindowSpec</a></span>:</span></span>
+ <span class="name">orderBy</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">self</span>,</span><span class="param"> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;140377707161408&#39;</span><span class="o">&gt;</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;140377707161408&#39;</span><span class="o">&gt;</span><span class="p">]]</span></span><span class="return-annotation">) -> <span class="n"><a href="#WindowSpec">sqlglot.dataframe.sql.WindowSpec</a></span>:</span></span>
<label class="view-source-button" for="WindowSpec.orderBy-view-source"><span>View Source</span></label>
@@ -4448,10 +4522,18 @@ Sqlglot doesn't currently replicate this class so it only accepts a string</p>
</span><span id="DataFrameReader-20"><a href="#DataFrameReader-20"><span class="linenos">20</span></a> <span class="kn">from</span> <span class="nn">sqlglot.dataframe.sql.dataframe</span> <span class="kn">import</span> <span class="n">DataFrame</span>
</span><span id="DataFrameReader-21"><a href="#DataFrameReader-21"><span class="linenos">21</span></a>
</span><span id="DataFrameReader-22"><a href="#DataFrameReader-22"><span class="linenos">22</span></a> <span class="n">sqlglot</span><span class="o">.</span><span class="n">schema</span><span class="o">.</span><span class="n">add_table</span><span class="p">(</span><span class="n">tableName</span><span class="p">)</span>
-</span><span id="DataFrameReader-23"><a href="#DataFrameReader-23"><span class="linenos">23</span></a> <span class="k">return</span> <span class="n">DataFrame</span><span class="p">(</span>
-</span><span id="DataFrameReader-24"><a href="#DataFrameReader-24"><span class="linenos">24</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="p">,</span>
-</span><span id="DataFrameReader-25"><a href="#DataFrameReader-25"><span class="linenos">25</span></a> <span class="n">exp</span><span class="o">.</span><span class="n">Select</span><span class="p">()</span><span class="o">.</span><span class="n">from_</span><span class="p">(</span><span class="n">tableName</span><span class="p">)</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="n">sqlglot</span><span class="o">.</span><span class="n">schema</span><span class="o">.</span><span class="n">column_names</span><span class="p">(</span><span class="n">tableName</span><span class="p">)),</span>
-</span><span id="DataFrameReader-26"><a href="#DataFrameReader-26"><span class="linenos">26</span></a> <span class="p">)</span>
+</span><span id="DataFrameReader-23"><a href="#DataFrameReader-23"><span class="linenos">23</span></a>
+</span><span id="DataFrameReader-24"><a href="#DataFrameReader-24"><span class="linenos">24</span></a> <span class="k">return</span> <span class="n">DataFrame</span><span class="p">(</span>
+</span><span id="DataFrameReader-25"><a href="#DataFrameReader-25"><span class="linenos">25</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="p">,</span>
+</span><span id="DataFrameReader-26"><a href="#DataFrameReader-26"><span class="linenos">26</span></a> <span class="n">exp</span><span class="o">.</span><span class="n">Select</span><span class="p">()</span>
+</span><span id="DataFrameReader-27"><a href="#DataFrameReader-27"><span class="linenos">27</span></a> <span class="o">.</span><span class="n">from_</span><span class="p">(</span><span class="n">tableName</span><span class="p">)</span>
+</span><span id="DataFrameReader-28"><a href="#DataFrameReader-28"><span class="linenos">28</span></a> <span class="o">.</span><span class="n">select</span><span class="p">(</span>
+</span><span id="DataFrameReader-29"><a href="#DataFrameReader-29"><span class="linenos">29</span></a> <span class="o">*</span><span class="p">(</span>
+</span><span id="DataFrameReader-30"><a href="#DataFrameReader-30"><span class="linenos">30</span></a> <span class="n">column</span> <span class="k">if</span> <span class="n">should_identify</span><span class="p">(</span><span class="n">column</span><span class="p">,</span> <span class="s2">&quot;safe&quot;</span><span class="p">)</span> <span class="k">else</span> <span class="sa">f</span><span class="s1">&#39;&quot;</span><span class="si">{</span><span class="n">column</span><span class="si">}</span><span class="s1">&quot;&#39;</span>
+</span><span id="DataFrameReader-31"><a href="#DataFrameReader-31"><span class="linenos">31</span></a> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">sqlglot</span><span class="o">.</span><span class="n">schema</span><span class="o">.</span><span class="n">column_names</span><span class="p">(</span><span class="n">tableName</span><span class="p">)</span>
+</span><span id="DataFrameReader-32"><a href="#DataFrameReader-32"><span class="linenos">32</span></a> <span class="p">)</span>
+</span><span id="DataFrameReader-33"><a href="#DataFrameReader-33"><span class="linenos">33</span></a> <span class="p">),</span>
+</span><span id="DataFrameReader-34"><a href="#DataFrameReader-34"><span class="linenos">34</span></a> <span class="p">)</span>
</span></pre></div>
@@ -4490,10 +4572,18 @@ Sqlglot doesn't currently replicate this class so it only accepts a string</p>
</span><span id="DataFrameReader.table-20"><a href="#DataFrameReader.table-20"><span class="linenos">20</span></a> <span class="kn">from</span> <span class="nn">sqlglot.dataframe.sql.dataframe</span> <span class="kn">import</span> <span class="n">DataFrame</span>
</span><span id="DataFrameReader.table-21"><a href="#DataFrameReader.table-21"><span class="linenos">21</span></a>
</span><span id="DataFrameReader.table-22"><a href="#DataFrameReader.table-22"><span class="linenos">22</span></a> <span class="n">sqlglot</span><span class="o">.</span><span class="n">schema</span><span class="o">.</span><span class="n">add_table</span><span class="p">(</span><span class="n">tableName</span><span class="p">)</span>
-</span><span id="DataFrameReader.table-23"><a href="#DataFrameReader.table-23"><span class="linenos">23</span></a> <span class="k">return</span> <span class="n">DataFrame</span><span class="p">(</span>
-</span><span id="DataFrameReader.table-24"><a href="#DataFrameReader.table-24"><span class="linenos">24</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="p">,</span>
-</span><span id="DataFrameReader.table-25"><a href="#DataFrameReader.table-25"><span class="linenos">25</span></a> <span class="n">exp</span><span class="o">.</span><span class="n">Select</span><span class="p">()</span><span class="o">.</span><span class="n">from_</span><span class="p">(</span><span class="n">tableName</span><span class="p">)</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="n">sqlglot</span><span class="o">.</span><span class="n">schema</span><span class="o">.</span><span class="n">column_names</span><span class="p">(</span><span class="n">tableName</span><span class="p">)),</span>
-</span><span id="DataFrameReader.table-26"><a href="#DataFrameReader.table-26"><span class="linenos">26</span></a> <span class="p">)</span>
+</span><span id="DataFrameReader.table-23"><a href="#DataFrameReader.table-23"><span class="linenos">23</span></a>
+</span><span id="DataFrameReader.table-24"><a href="#DataFrameReader.table-24"><span class="linenos">24</span></a> <span class="k">return</span> <span class="n">DataFrame</span><span class="p">(</span>
+</span><span id="DataFrameReader.table-25"><a href="#DataFrameReader.table-25"><span class="linenos">25</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="p">,</span>
+</span><span id="DataFrameReader.table-26"><a href="#DataFrameReader.table-26"><span class="linenos">26</span></a> <span class="n">exp</span><span class="o">.</span><span class="n">Select</span><span class="p">()</span>
+</span><span id="DataFrameReader.table-27"><a href="#DataFrameReader.table-27"><span class="linenos">27</span></a> <span class="o">.</span><span class="n">from_</span><span class="p">(</span><span class="n">tableName</span><span class="p">)</span>
+</span><span id="DataFrameReader.table-28"><a href="#DataFrameReader.table-28"><span class="linenos">28</span></a> <span class="o">.</span><span class="n">select</span><span class="p">(</span>
+</span><span id="DataFrameReader.table-29"><a href="#DataFrameReader.table-29"><span class="linenos">29</span></a> <span class="o">*</span><span class="p">(</span>
+</span><span id="DataFrameReader.table-30"><a href="#DataFrameReader.table-30"><span class="linenos">30</span></a> <span class="n">column</span> <span class="k">if</span> <span class="n">should_identify</span><span class="p">(</span><span class="n">column</span><span class="p">,</span> <span class="s2">&quot;safe&quot;</span><span class="p">)</span> <span class="k">else</span> <span class="sa">f</span><span class="s1">&#39;&quot;</span><span class="si">{</span><span class="n">column</span><span class="si">}</span><span class="s1">&quot;&#39;</span>
+</span><span id="DataFrameReader.table-31"><a href="#DataFrameReader.table-31"><span class="linenos">31</span></a> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">sqlglot</span><span class="o">.</span><span class="n">schema</span><span class="o">.</span><span class="n">column_names</span><span class="p">(</span><span class="n">tableName</span><span class="p">)</span>
+</span><span id="DataFrameReader.table-32"><a href="#DataFrameReader.table-32"><span class="linenos">32</span></a> <span class="p">)</span>
+</span><span id="DataFrameReader.table-33"><a href="#DataFrameReader.table-33"><span class="linenos">33</span></a> <span class="p">),</span>
+</span><span id="DataFrameReader.table-34"><a href="#DataFrameReader.table-34"><span class="linenos">34</span></a> <span class="p">)</span>
</span></pre></div>
@@ -4512,68 +4602,68 @@ Sqlglot doesn't currently replicate this class so it only accepts a string</p>
</div>
<a class="headerlink" href="#DataFrameWriter"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrameWriter-29"><a href="#DataFrameWriter-29"><span class="linenos">29</span></a><span class="k">class</span> <span class="nc">DataFrameWriter</span><span class="p">:</span>
-</span><span id="DataFrameWriter-30"><a href="#DataFrameWriter-30"><span class="linenos">30</span></a> <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
-</span><span id="DataFrameWriter-31"><a href="#DataFrameWriter-31"><span class="linenos">31</span></a> <span class="bp">self</span><span class="p">,</span>
-</span><span id="DataFrameWriter-32"><a href="#DataFrameWriter-32"><span class="linenos">32</span></a> <span class="n">df</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">,</span>
-</span><span id="DataFrameWriter-33"><a href="#DataFrameWriter-33"><span class="linenos">33</span></a> <span class="n">spark</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">SparkSession</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrameWriter-34"><a href="#DataFrameWriter-34"><span class="linenos">34</span></a> <span class="n">mode</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrameWriter-35"><a href="#DataFrameWriter-35"><span class="linenos">35</span></a> <span class="n">by_name</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
-</span><span id="DataFrameWriter-36"><a href="#DataFrameWriter-36"><span class="linenos">36</span></a> <span class="p">):</span>
-</span><span id="DataFrameWriter-37"><a href="#DataFrameWriter-37"><span class="linenos">37</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_df</span> <span class="o">=</span> <span class="n">df</span>
-</span><span id="DataFrameWriter-38"><a href="#DataFrameWriter-38"><span class="linenos">38</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_spark</span> <span class="o">=</span> <span class="n">spark</span> <span class="ow">or</span> <span class="n">df</span><span class="o">.</span><span class="n">spark</span>
-</span><span id="DataFrameWriter-39"><a href="#DataFrameWriter-39"><span class="linenos">39</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_mode</span> <span class="o">=</span> <span class="n">mode</span>
-</span><span id="DataFrameWriter-40"><a href="#DataFrameWriter-40"><span class="linenos">40</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_by_name</span> <span class="o">=</span> <span class="n">by_name</span>
-</span><span id="DataFrameWriter-41"><a href="#DataFrameWriter-41"><span class="linenos">41</span></a>
-</span><span id="DataFrameWriter-42"><a href="#DataFrameWriter-42"><span class="linenos">42</span></a> <span class="k">def</span> <span class="nf">copy</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrameWriter</span><span class="p">:</span>
-</span><span id="DataFrameWriter-43"><a href="#DataFrameWriter-43"><span class="linenos">43</span></a> <span class="k">return</span> <span class="n">DataFrameWriter</span><span class="p">(</span>
-</span><span id="DataFrameWriter-44"><a href="#DataFrameWriter-44"><span class="linenos">44</span></a> <span class="o">**</span><span class="p">{</span>
-</span><span id="DataFrameWriter-45"><a href="#DataFrameWriter-45"><span class="linenos">45</span></a> <span class="n">k</span><span class="p">[</span><span class="mi">1</span><span class="p">:]</span> <span class="k">if</span> <span class="n">k</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s2">&quot;_&quot;</span><span class="p">)</span> <span class="k">else</span> <span class="n">k</span><span class="p">:</span> <span class="n">v</span>
-</span><span id="DataFrameWriter-46"><a href="#DataFrameWriter-46"><span class="linenos">46</span></a> <span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span> <span class="ow">in</span> <span class="n">object_to_dict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span><span class="o">.</span><span class="n">items</span><span class="p">()</span>
-</span><span id="DataFrameWriter-47"><a href="#DataFrameWriter-47"><span class="linenos">47</span></a> <span class="p">}</span>
-</span><span id="DataFrameWriter-48"><a href="#DataFrameWriter-48"><span class="linenos">48</span></a> <span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrameWriter-37"><a href="#DataFrameWriter-37"><span class="linenos">37</span></a><span class="k">class</span> <span class="nc">DataFrameWriter</span><span class="p">:</span>
+</span><span id="DataFrameWriter-38"><a href="#DataFrameWriter-38"><span class="linenos">38</span></a> <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
+</span><span id="DataFrameWriter-39"><a href="#DataFrameWriter-39"><span class="linenos">39</span></a> <span class="bp">self</span><span class="p">,</span>
+</span><span id="DataFrameWriter-40"><a href="#DataFrameWriter-40"><span class="linenos">40</span></a> <span class="n">df</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">,</span>
+</span><span id="DataFrameWriter-41"><a href="#DataFrameWriter-41"><span class="linenos">41</span></a> <span class="n">spark</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">SparkSession</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrameWriter-42"><a href="#DataFrameWriter-42"><span class="linenos">42</span></a> <span class="n">mode</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrameWriter-43"><a href="#DataFrameWriter-43"><span class="linenos">43</span></a> <span class="n">by_name</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
+</span><span id="DataFrameWriter-44"><a href="#DataFrameWriter-44"><span class="linenos">44</span></a> <span class="p">):</span>
+</span><span id="DataFrameWriter-45"><a href="#DataFrameWriter-45"><span class="linenos">45</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_df</span> <span class="o">=</span> <span class="n">df</span>
+</span><span id="DataFrameWriter-46"><a href="#DataFrameWriter-46"><span class="linenos">46</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_spark</span> <span class="o">=</span> <span class="n">spark</span> <span class="ow">or</span> <span class="n">df</span><span class="o">.</span><span class="n">spark</span>
+</span><span id="DataFrameWriter-47"><a href="#DataFrameWriter-47"><span class="linenos">47</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_mode</span> <span class="o">=</span> <span class="n">mode</span>
+</span><span id="DataFrameWriter-48"><a href="#DataFrameWriter-48"><span class="linenos">48</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_by_name</span> <span class="o">=</span> <span class="n">by_name</span>
</span><span id="DataFrameWriter-49"><a href="#DataFrameWriter-49"><span class="linenos">49</span></a>
-</span><span id="DataFrameWriter-50"><a href="#DataFrameWriter-50"><span class="linenos">50</span></a> <span class="k">def</span> <span class="nf">sql</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span>
-</span><span id="DataFrameWriter-51"><a href="#DataFrameWriter-51"><span class="linenos">51</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_df</span><span class="o">.</span><span class="n">sql</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
-</span><span id="DataFrameWriter-52"><a href="#DataFrameWriter-52"><span class="linenos">52</span></a>
-</span><span id="DataFrameWriter-53"><a href="#DataFrameWriter-53"><span class="linenos">53</span></a> <span class="k">def</span> <span class="nf">mode</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">saveMode</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">DataFrameWriter</span><span class="p">:</span>
-</span><span id="DataFrameWriter-54"><a href="#DataFrameWriter-54"><span class="linenos">54</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">_mode</span><span class="o">=</span><span class="n">saveMode</span><span class="p">)</span>
-</span><span id="DataFrameWriter-55"><a href="#DataFrameWriter-55"><span class="linenos">55</span></a>
-</span><span id="DataFrameWriter-56"><a href="#DataFrameWriter-56"><span class="linenos">56</span></a> <span class="nd">@property</span>
-</span><span id="DataFrameWriter-57"><a href="#DataFrameWriter-57"><span class="linenos">57</span></a> <span class="k">def</span> <span class="nf">byName</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
-</span><span id="DataFrameWriter-58"><a href="#DataFrameWriter-58"><span class="linenos">58</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">by_name</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
-</span><span id="DataFrameWriter-59"><a href="#DataFrameWriter-59"><span class="linenos">59</span></a>
-</span><span id="DataFrameWriter-60"><a href="#DataFrameWriter-60"><span class="linenos">60</span></a> <span class="k">def</span> <span class="nf">insertInto</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tableName</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">overwrite</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrameWriter</span><span class="p">:</span>
-</span><span id="DataFrameWriter-61"><a href="#DataFrameWriter-61"><span class="linenos">61</span></a> <span class="n">output_expression_container</span> <span class="o">=</span> <span class="n">exp</span><span class="o">.</span><span class="n">Insert</span><span class="p">(</span>
-</span><span id="DataFrameWriter-62"><a href="#DataFrameWriter-62"><span class="linenos">62</span></a> <span class="o">**</span><span class="p">{</span>
-</span><span id="DataFrameWriter-63"><a href="#DataFrameWriter-63"><span class="linenos">63</span></a> <span class="s2">&quot;this&quot;</span><span class="p">:</span> <span class="n">exp</span><span class="o">.</span><span class="n">to_table</span><span class="p">(</span><span class="n">tableName</span><span class="p">),</span>
-</span><span id="DataFrameWriter-64"><a href="#DataFrameWriter-64"><span class="linenos">64</span></a> <span class="s2">&quot;overwrite&quot;</span><span class="p">:</span> <span class="n">overwrite</span><span class="p">,</span>
-</span><span id="DataFrameWriter-65"><a href="#DataFrameWriter-65"><span class="linenos">65</span></a> <span class="p">}</span>
-</span><span id="DataFrameWriter-66"><a href="#DataFrameWriter-66"><span class="linenos">66</span></a> <span class="p">)</span>
-</span><span id="DataFrameWriter-67"><a href="#DataFrameWriter-67"><span class="linenos">67</span></a> <span class="n">df</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_df</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">output_expression_container</span><span class="o">=</span><span class="n">output_expression_container</span><span class="p">)</span>
-</span><span id="DataFrameWriter-68"><a href="#DataFrameWriter-68"><span class="linenos">68</span></a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_by_name</span><span class="p">:</span>
-</span><span id="DataFrameWriter-69"><a href="#DataFrameWriter-69"><span class="linenos">69</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="n">sqlglot</span><span class="o">.</span><span class="n">schema</span><span class="o">.</span><span class="n">column_names</span><span class="p">(</span><span class="n">tableName</span><span class="p">,</span> <span class="n">only_visible</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
-</span><span id="DataFrameWriter-70"><a href="#DataFrameWriter-70"><span class="linenos">70</span></a> <span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">_convert_leaf_to_cte</span><span class="p">()</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="n">columns</span><span class="p">)</span>
-</span><span id="DataFrameWriter-71"><a href="#DataFrameWriter-71"><span class="linenos">71</span></a>
-</span><span id="DataFrameWriter-72"><a href="#DataFrameWriter-72"><span class="linenos">72</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">_df</span><span class="o">=</span><span class="n">df</span><span class="p">)</span>
-</span><span id="DataFrameWriter-73"><a href="#DataFrameWriter-73"><span class="linenos">73</span></a>
-</span><span id="DataFrameWriter-74"><a href="#DataFrameWriter-74"><span class="linenos">74</span></a> <span class="k">def</span> <span class="nf">saveAsTable</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="nb">format</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">mode</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">):</span>
-</span><span id="DataFrameWriter-75"><a href="#DataFrameWriter-75"><span class="linenos">75</span></a> <span class="k">if</span> <span class="nb">format</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
-</span><span id="DataFrameWriter-76"><a href="#DataFrameWriter-76"><span class="linenos">76</span></a> <span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s2">&quot;Providing Format in the save as table is not supported&quot;</span><span class="p">)</span>
-</span><span id="DataFrameWriter-77"><a href="#DataFrameWriter-77"><span class="linenos">77</span></a> <span class="n">exists</span><span class="p">,</span> <span class="n">replace</span><span class="p">,</span> <span class="n">mode</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="kc">None</span><span class="p">,</span> <span class="n">mode</span> <span class="ow">or</span> <span class="nb">str</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_mode</span><span class="p">)</span>
-</span><span id="DataFrameWriter-78"><a href="#DataFrameWriter-78"><span class="linenos">78</span></a> <span class="k">if</span> <span class="n">mode</span> <span class="o">==</span> <span class="s2">&quot;append&quot;</span><span class="p">:</span>
-</span><span id="DataFrameWriter-79"><a href="#DataFrameWriter-79"><span class="linenos">79</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">insertInto</span><span class="p">(</span><span class="n">name</span><span class="p">)</span>
-</span><span id="DataFrameWriter-80"><a href="#DataFrameWriter-80"><span class="linenos">80</span></a> <span class="k">if</span> <span class="n">mode</span> <span class="o">==</span> <span class="s2">&quot;ignore&quot;</span><span class="p">:</span>
-</span><span id="DataFrameWriter-81"><a href="#DataFrameWriter-81"><span class="linenos">81</span></a> <span class="n">exists</span> <span class="o">=</span> <span class="kc">True</span>
-</span><span id="DataFrameWriter-82"><a href="#DataFrameWriter-82"><span class="linenos">82</span></a> <span class="k">if</span> <span class="n">mode</span> <span class="o">==</span> <span class="s2">&quot;overwrite&quot;</span><span class="p">:</span>
-</span><span id="DataFrameWriter-83"><a href="#DataFrameWriter-83"><span class="linenos">83</span></a> <span class="n">replace</span> <span class="o">=</span> <span class="kc">True</span>
-</span><span id="DataFrameWriter-84"><a href="#DataFrameWriter-84"><span class="linenos">84</span></a> <span class="n">output_expression_container</span> <span class="o">=</span> <span class="n">exp</span><span class="o">.</span><span class="n">Create</span><span class="p">(</span>
-</span><span id="DataFrameWriter-85"><a href="#DataFrameWriter-85"><span class="linenos">85</span></a> <span class="n">this</span><span class="o">=</span><span class="n">exp</span><span class="o">.</span><span class="n">to_table</span><span class="p">(</span><span class="n">name</span><span class="p">),</span>
-</span><span id="DataFrameWriter-86"><a href="#DataFrameWriter-86"><span class="linenos">86</span></a> <span class="n">kind</span><span class="o">=</span><span class="s2">&quot;TABLE&quot;</span><span class="p">,</span>
-</span><span id="DataFrameWriter-87"><a href="#DataFrameWriter-87"><span class="linenos">87</span></a> <span class="n">exists</span><span class="o">=</span><span class="n">exists</span><span class="p">,</span>
-</span><span id="DataFrameWriter-88"><a href="#DataFrameWriter-88"><span class="linenos">88</span></a> <span class="n">replace</span><span class="o">=</span><span class="n">replace</span><span class="p">,</span>
-</span><span id="DataFrameWriter-89"><a href="#DataFrameWriter-89"><span class="linenos">89</span></a> <span class="p">)</span>
-</span><span id="DataFrameWriter-90"><a href="#DataFrameWriter-90"><span class="linenos">90</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">_df</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_df</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">output_expression_container</span><span class="o">=</span><span class="n">output_expression_container</span><span class="p">))</span>
+</span><span id="DataFrameWriter-50"><a href="#DataFrameWriter-50"><span class="linenos">50</span></a> <span class="k">def</span> <span class="nf">copy</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrameWriter</span><span class="p">:</span>
+</span><span id="DataFrameWriter-51"><a href="#DataFrameWriter-51"><span class="linenos">51</span></a> <span class="k">return</span> <span class="n">DataFrameWriter</span><span class="p">(</span>
+</span><span id="DataFrameWriter-52"><a href="#DataFrameWriter-52"><span class="linenos">52</span></a> <span class="o">**</span><span class="p">{</span>
+</span><span id="DataFrameWriter-53"><a href="#DataFrameWriter-53"><span class="linenos">53</span></a> <span class="n">k</span><span class="p">[</span><span class="mi">1</span><span class="p">:]</span> <span class="k">if</span> <span class="n">k</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s2">&quot;_&quot;</span><span class="p">)</span> <span class="k">else</span> <span class="n">k</span><span class="p">:</span> <span class="n">v</span>
+</span><span id="DataFrameWriter-54"><a href="#DataFrameWriter-54"><span class="linenos">54</span></a> <span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span> <span class="ow">in</span> <span class="n">object_to_dict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span><span class="o">.</span><span class="n">items</span><span class="p">()</span>
+</span><span id="DataFrameWriter-55"><a href="#DataFrameWriter-55"><span class="linenos">55</span></a> <span class="p">}</span>
+</span><span id="DataFrameWriter-56"><a href="#DataFrameWriter-56"><span class="linenos">56</span></a> <span class="p">)</span>
+</span><span id="DataFrameWriter-57"><a href="#DataFrameWriter-57"><span class="linenos">57</span></a>
+</span><span id="DataFrameWriter-58"><a href="#DataFrameWriter-58"><span class="linenos">58</span></a> <span class="k">def</span> <span class="nf">sql</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span>
+</span><span id="DataFrameWriter-59"><a href="#DataFrameWriter-59"><span class="linenos">59</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_df</span><span class="o">.</span><span class="n">sql</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
+</span><span id="DataFrameWriter-60"><a href="#DataFrameWriter-60"><span class="linenos">60</span></a>
+</span><span id="DataFrameWriter-61"><a href="#DataFrameWriter-61"><span class="linenos">61</span></a> <span class="k">def</span> <span class="nf">mode</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">saveMode</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">DataFrameWriter</span><span class="p">:</span>
+</span><span id="DataFrameWriter-62"><a href="#DataFrameWriter-62"><span class="linenos">62</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">_mode</span><span class="o">=</span><span class="n">saveMode</span><span class="p">)</span>
+</span><span id="DataFrameWriter-63"><a href="#DataFrameWriter-63"><span class="linenos">63</span></a>
+</span><span id="DataFrameWriter-64"><a href="#DataFrameWriter-64"><span class="linenos">64</span></a> <span class="nd">@property</span>
+</span><span id="DataFrameWriter-65"><a href="#DataFrameWriter-65"><span class="linenos">65</span></a> <span class="k">def</span> <span class="nf">byName</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+</span><span id="DataFrameWriter-66"><a href="#DataFrameWriter-66"><span class="linenos">66</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">by_name</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
+</span><span id="DataFrameWriter-67"><a href="#DataFrameWriter-67"><span class="linenos">67</span></a>
+</span><span id="DataFrameWriter-68"><a href="#DataFrameWriter-68"><span class="linenos">68</span></a> <span class="k">def</span> <span class="nf">insertInto</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tableName</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">overwrite</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrameWriter</span><span class="p">:</span>
+</span><span id="DataFrameWriter-69"><a href="#DataFrameWriter-69"><span class="linenos">69</span></a> <span class="n">output_expression_container</span> <span class="o">=</span> <span class="n">exp</span><span class="o">.</span><span class="n">Insert</span><span class="p">(</span>
+</span><span id="DataFrameWriter-70"><a href="#DataFrameWriter-70"><span class="linenos">70</span></a> <span class="o">**</span><span class="p">{</span>
+</span><span id="DataFrameWriter-71"><a href="#DataFrameWriter-71"><span class="linenos">71</span></a> <span class="s2">&quot;this&quot;</span><span class="p">:</span> <span class="n">exp</span><span class="o">.</span><span class="n">to_table</span><span class="p">(</span><span class="n">tableName</span><span class="p">),</span>
+</span><span id="DataFrameWriter-72"><a href="#DataFrameWriter-72"><span class="linenos">72</span></a> <span class="s2">&quot;overwrite&quot;</span><span class="p">:</span> <span class="n">overwrite</span><span class="p">,</span>
+</span><span id="DataFrameWriter-73"><a href="#DataFrameWriter-73"><span class="linenos">73</span></a> <span class="p">}</span>
+</span><span id="DataFrameWriter-74"><a href="#DataFrameWriter-74"><span class="linenos">74</span></a> <span class="p">)</span>
+</span><span id="DataFrameWriter-75"><a href="#DataFrameWriter-75"><span class="linenos">75</span></a> <span class="n">df</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_df</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">output_expression_container</span><span class="o">=</span><span class="n">output_expression_container</span><span class="p">)</span>
+</span><span id="DataFrameWriter-76"><a href="#DataFrameWriter-76"><span class="linenos">76</span></a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_by_name</span><span class="p">:</span>
+</span><span id="DataFrameWriter-77"><a href="#DataFrameWriter-77"><span class="linenos">77</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="n">sqlglot</span><span class="o">.</span><span class="n">schema</span><span class="o">.</span><span class="n">column_names</span><span class="p">(</span><span class="n">tableName</span><span class="p">,</span> <span class="n">only_visible</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
+</span><span id="DataFrameWriter-78"><a href="#DataFrameWriter-78"><span class="linenos">78</span></a> <span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">_convert_leaf_to_cte</span><span class="p">()</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="n">columns</span><span class="p">)</span>
+</span><span id="DataFrameWriter-79"><a href="#DataFrameWriter-79"><span class="linenos">79</span></a>
+</span><span id="DataFrameWriter-80"><a href="#DataFrameWriter-80"><span class="linenos">80</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">_df</span><span class="o">=</span><span class="n">df</span><span class="p">)</span>
+</span><span id="DataFrameWriter-81"><a href="#DataFrameWriter-81"><span class="linenos">81</span></a>
+</span><span id="DataFrameWriter-82"><a href="#DataFrameWriter-82"><span class="linenos">82</span></a> <span class="k">def</span> <span class="nf">saveAsTable</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="nb">format</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">mode</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">):</span>
+</span><span id="DataFrameWriter-83"><a href="#DataFrameWriter-83"><span class="linenos">83</span></a> <span class="k">if</span> <span class="nb">format</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
+</span><span id="DataFrameWriter-84"><a href="#DataFrameWriter-84"><span class="linenos">84</span></a> <span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s2">&quot;Providing Format in the save as table is not supported&quot;</span><span class="p">)</span>
+</span><span id="DataFrameWriter-85"><a href="#DataFrameWriter-85"><span class="linenos">85</span></a> <span class="n">exists</span><span class="p">,</span> <span class="n">replace</span><span class="p">,</span> <span class="n">mode</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="kc">None</span><span class="p">,</span> <span class="n">mode</span> <span class="ow">or</span> <span class="nb">str</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_mode</span><span class="p">)</span>
+</span><span id="DataFrameWriter-86"><a href="#DataFrameWriter-86"><span class="linenos">86</span></a> <span class="k">if</span> <span class="n">mode</span> <span class="o">==</span> <span class="s2">&quot;append&quot;</span><span class="p">:</span>
+</span><span id="DataFrameWriter-87"><a href="#DataFrameWriter-87"><span class="linenos">87</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">insertInto</span><span class="p">(</span><span class="n">name</span><span class="p">)</span>
+</span><span id="DataFrameWriter-88"><a href="#DataFrameWriter-88"><span class="linenos">88</span></a> <span class="k">if</span> <span class="n">mode</span> <span class="o">==</span> <span class="s2">&quot;ignore&quot;</span><span class="p">:</span>
+</span><span id="DataFrameWriter-89"><a href="#DataFrameWriter-89"><span class="linenos">89</span></a> <span class="n">exists</span> <span class="o">=</span> <span class="kc">True</span>
+</span><span id="DataFrameWriter-90"><a href="#DataFrameWriter-90"><span class="linenos">90</span></a> <span class="k">if</span> <span class="n">mode</span> <span class="o">==</span> <span class="s2">&quot;overwrite&quot;</span><span class="p">:</span>
+</span><span id="DataFrameWriter-91"><a href="#DataFrameWriter-91"><span class="linenos">91</span></a> <span class="n">replace</span> <span class="o">=</span> <span class="kc">True</span>
+</span><span id="DataFrameWriter-92"><a href="#DataFrameWriter-92"><span class="linenos">92</span></a> <span class="n">output_expression_container</span> <span class="o">=</span> <span class="n">exp</span><span class="o">.</span><span class="n">Create</span><span class="p">(</span>
+</span><span id="DataFrameWriter-93"><a href="#DataFrameWriter-93"><span class="linenos">93</span></a> <span class="n">this</span><span class="o">=</span><span class="n">exp</span><span class="o">.</span><span class="n">to_table</span><span class="p">(</span><span class="n">name</span><span class="p">),</span>
+</span><span id="DataFrameWriter-94"><a href="#DataFrameWriter-94"><span class="linenos">94</span></a> <span class="n">kind</span><span class="o">=</span><span class="s2">&quot;TABLE&quot;</span><span class="p">,</span>
+</span><span id="DataFrameWriter-95"><a href="#DataFrameWriter-95"><span class="linenos">95</span></a> <span class="n">exists</span><span class="o">=</span><span class="n">exists</span><span class="p">,</span>
+</span><span id="DataFrameWriter-96"><a href="#DataFrameWriter-96"><span class="linenos">96</span></a> <span class="n">replace</span><span class="o">=</span><span class="n">replace</span><span class="p">,</span>
+</span><span id="DataFrameWriter-97"><a href="#DataFrameWriter-97"><span class="linenos">97</span></a> <span class="p">)</span>
+</span><span id="DataFrameWriter-98"><a href="#DataFrameWriter-98"><span class="linenos">98</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">_df</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_df</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">output_expression_container</span><span class="o">=</span><span class="n">output_expression_container</span><span class="p">))</span>
</span></pre></div>
@@ -4589,17 +4679,17 @@ Sqlglot doesn't currently replicate this class so it only accepts a string</p>
</div>
<a class="headerlink" href="#DataFrameWriter.__init__"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrameWriter.__init__-30"><a href="#DataFrameWriter.__init__-30"><span class="linenos">30</span></a> <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
-</span><span id="DataFrameWriter.__init__-31"><a href="#DataFrameWriter.__init__-31"><span class="linenos">31</span></a> <span class="bp">self</span><span class="p">,</span>
-</span><span id="DataFrameWriter.__init__-32"><a href="#DataFrameWriter.__init__-32"><span class="linenos">32</span></a> <span class="n">df</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">,</span>
-</span><span id="DataFrameWriter.__init__-33"><a href="#DataFrameWriter.__init__-33"><span class="linenos">33</span></a> <span class="n">spark</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">SparkSession</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrameWriter.__init__-34"><a href="#DataFrameWriter.__init__-34"><span class="linenos">34</span></a> <span class="n">mode</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrameWriter.__init__-35"><a href="#DataFrameWriter.__init__-35"><span class="linenos">35</span></a> <span class="n">by_name</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
-</span><span id="DataFrameWriter.__init__-36"><a href="#DataFrameWriter.__init__-36"><span class="linenos">36</span></a> <span class="p">):</span>
-</span><span id="DataFrameWriter.__init__-37"><a href="#DataFrameWriter.__init__-37"><span class="linenos">37</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_df</span> <span class="o">=</span> <span class="n">df</span>
-</span><span id="DataFrameWriter.__init__-38"><a href="#DataFrameWriter.__init__-38"><span class="linenos">38</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_spark</span> <span class="o">=</span> <span class="n">spark</span> <span class="ow">or</span> <span class="n">df</span><span class="o">.</span><span class="n">spark</span>
-</span><span id="DataFrameWriter.__init__-39"><a href="#DataFrameWriter.__init__-39"><span class="linenos">39</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_mode</span> <span class="o">=</span> <span class="n">mode</span>
-</span><span id="DataFrameWriter.__init__-40"><a href="#DataFrameWriter.__init__-40"><span class="linenos">40</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_by_name</span> <span class="o">=</span> <span class="n">by_name</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrameWriter.__init__-38"><a href="#DataFrameWriter.__init__-38"><span class="linenos">38</span></a> <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
+</span><span id="DataFrameWriter.__init__-39"><a href="#DataFrameWriter.__init__-39"><span class="linenos">39</span></a> <span class="bp">self</span><span class="p">,</span>
+</span><span id="DataFrameWriter.__init__-40"><a href="#DataFrameWriter.__init__-40"><span class="linenos">40</span></a> <span class="n">df</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">,</span>
+</span><span id="DataFrameWriter.__init__-41"><a href="#DataFrameWriter.__init__-41"><span class="linenos">41</span></a> <span class="n">spark</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">SparkSession</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrameWriter.__init__-42"><a href="#DataFrameWriter.__init__-42"><span class="linenos">42</span></a> <span class="n">mode</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrameWriter.__init__-43"><a href="#DataFrameWriter.__init__-43"><span class="linenos">43</span></a> <span class="n">by_name</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
+</span><span id="DataFrameWriter.__init__-44"><a href="#DataFrameWriter.__init__-44"><span class="linenos">44</span></a> <span class="p">):</span>
+</span><span id="DataFrameWriter.__init__-45"><a href="#DataFrameWriter.__init__-45"><span class="linenos">45</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_df</span> <span class="o">=</span> <span class="n">df</span>
+</span><span id="DataFrameWriter.__init__-46"><a href="#DataFrameWriter.__init__-46"><span class="linenos">46</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_spark</span> <span class="o">=</span> <span class="n">spark</span> <span class="ow">or</span> <span class="n">df</span><span class="o">.</span><span class="n">spark</span>
+</span><span id="DataFrameWriter.__init__-47"><a href="#DataFrameWriter.__init__-47"><span class="linenos">47</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_mode</span> <span class="o">=</span> <span class="n">mode</span>
+</span><span id="DataFrameWriter.__init__-48"><a href="#DataFrameWriter.__init__-48"><span class="linenos">48</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_by_name</span> <span class="o">=</span> <span class="n">by_name</span>
</span></pre></div>
@@ -4617,13 +4707,13 @@ Sqlglot doesn't currently replicate this class so it only accepts a string</p>
</div>
<a class="headerlink" href="#DataFrameWriter.copy"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrameWriter.copy-42"><a href="#DataFrameWriter.copy-42"><span class="linenos">42</span></a> <span class="k">def</span> <span class="nf">copy</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrameWriter</span><span class="p">:</span>
-</span><span id="DataFrameWriter.copy-43"><a href="#DataFrameWriter.copy-43"><span class="linenos">43</span></a> <span class="k">return</span> <span class="n">DataFrameWriter</span><span class="p">(</span>
-</span><span id="DataFrameWriter.copy-44"><a href="#DataFrameWriter.copy-44"><span class="linenos">44</span></a> <span class="o">**</span><span class="p">{</span>
-</span><span id="DataFrameWriter.copy-45"><a href="#DataFrameWriter.copy-45"><span class="linenos">45</span></a> <span class="n">k</span><span class="p">[</span><span class="mi">1</span><span class="p">:]</span> <span class="k">if</span> <span class="n">k</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s2">&quot;_&quot;</span><span class="p">)</span> <span class="k">else</span> <span class="n">k</span><span class="p">:</span> <span class="n">v</span>
-</span><span id="DataFrameWriter.copy-46"><a href="#DataFrameWriter.copy-46"><span class="linenos">46</span></a> <span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span> <span class="ow">in</span> <span class="n">object_to_dict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span><span class="o">.</span><span class="n">items</span><span class="p">()</span>
-</span><span id="DataFrameWriter.copy-47"><a href="#DataFrameWriter.copy-47"><span class="linenos">47</span></a> <span class="p">}</span>
-</span><span id="DataFrameWriter.copy-48"><a href="#DataFrameWriter.copy-48"><span class="linenos">48</span></a> <span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrameWriter.copy-50"><a href="#DataFrameWriter.copy-50"><span class="linenos">50</span></a> <span class="k">def</span> <span class="nf">copy</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrameWriter</span><span class="p">:</span>
+</span><span id="DataFrameWriter.copy-51"><a href="#DataFrameWriter.copy-51"><span class="linenos">51</span></a> <span class="k">return</span> <span class="n">DataFrameWriter</span><span class="p">(</span>
+</span><span id="DataFrameWriter.copy-52"><a href="#DataFrameWriter.copy-52"><span class="linenos">52</span></a> <span class="o">**</span><span class="p">{</span>
+</span><span id="DataFrameWriter.copy-53"><a href="#DataFrameWriter.copy-53"><span class="linenos">53</span></a> <span class="n">k</span><span class="p">[</span><span class="mi">1</span><span class="p">:]</span> <span class="k">if</span> <span class="n">k</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s2">&quot;_&quot;</span><span class="p">)</span> <span class="k">else</span> <span class="n">k</span><span class="p">:</span> <span class="n">v</span>
+</span><span id="DataFrameWriter.copy-54"><a href="#DataFrameWriter.copy-54"><span class="linenos">54</span></a> <span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span> <span class="ow">in</span> <span class="n">object_to_dict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span><span class="o">.</span><span class="n">items</span><span class="p">()</span>
+</span><span id="DataFrameWriter.copy-55"><a href="#DataFrameWriter.copy-55"><span class="linenos">55</span></a> <span class="p">}</span>
+</span><span id="DataFrameWriter.copy-56"><a href="#DataFrameWriter.copy-56"><span class="linenos">56</span></a> <span class="p">)</span>
</span></pre></div>
@@ -4641,8 +4731,8 @@ Sqlglot doesn't currently replicate this class so it only accepts a string</p>
</div>
<a class="headerlink" href="#DataFrameWriter.sql"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrameWriter.sql-50"><a href="#DataFrameWriter.sql-50"><span class="linenos">50</span></a> <span class="k">def</span> <span class="nf">sql</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span>
-</span><span id="DataFrameWriter.sql-51"><a href="#DataFrameWriter.sql-51"><span class="linenos">51</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_df</span><span class="o">.</span><span class="n">sql</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrameWriter.sql-58"><a href="#DataFrameWriter.sql-58"><span class="linenos">58</span></a> <span class="k">def</span> <span class="nf">sql</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span>
+</span><span id="DataFrameWriter.sql-59"><a href="#DataFrameWriter.sql-59"><span class="linenos">59</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_df</span><span class="o">.</span><span class="n">sql</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
</span></pre></div>
@@ -4660,8 +4750,8 @@ Sqlglot doesn't currently replicate this class so it only accepts a string</p>
</div>
<a class="headerlink" href="#DataFrameWriter.mode"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrameWriter.mode-53"><a href="#DataFrameWriter.mode-53"><span class="linenos">53</span></a> <span class="k">def</span> <span class="nf">mode</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">saveMode</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">DataFrameWriter</span><span class="p">:</span>
-</span><span id="DataFrameWriter.mode-54"><a href="#DataFrameWriter.mode-54"><span class="linenos">54</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">_mode</span><span class="o">=</span><span class="n">saveMode</span><span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrameWriter.mode-61"><a href="#DataFrameWriter.mode-61"><span class="linenos">61</span></a> <span class="k">def</span> <span class="nf">mode</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">saveMode</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">DataFrameWriter</span><span class="p">:</span>
+</span><span id="DataFrameWriter.mode-62"><a href="#DataFrameWriter.mode-62"><span class="linenos">62</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">_mode</span><span class="o">=</span><span class="n">saveMode</span><span class="p">)</span>
</span></pre></div>
@@ -4679,19 +4769,19 @@ Sqlglot doesn't currently replicate this class so it only accepts a string</p>
</div>
<a class="headerlink" href="#DataFrameWriter.insertInto"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrameWriter.insertInto-60"><a href="#DataFrameWriter.insertInto-60"><span class="linenos">60</span></a> <span class="k">def</span> <span class="nf">insertInto</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tableName</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">overwrite</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrameWriter</span><span class="p">:</span>
-</span><span id="DataFrameWriter.insertInto-61"><a href="#DataFrameWriter.insertInto-61"><span class="linenos">61</span></a> <span class="n">output_expression_container</span> <span class="o">=</span> <span class="n">exp</span><span class="o">.</span><span class="n">Insert</span><span class="p">(</span>
-</span><span id="DataFrameWriter.insertInto-62"><a href="#DataFrameWriter.insertInto-62"><span class="linenos">62</span></a> <span class="o">**</span><span class="p">{</span>
-</span><span id="DataFrameWriter.insertInto-63"><a href="#DataFrameWriter.insertInto-63"><span class="linenos">63</span></a> <span class="s2">&quot;this&quot;</span><span class="p">:</span> <span class="n">exp</span><span class="o">.</span><span class="n">to_table</span><span class="p">(</span><span class="n">tableName</span><span class="p">),</span>
-</span><span id="DataFrameWriter.insertInto-64"><a href="#DataFrameWriter.insertInto-64"><span class="linenos">64</span></a> <span class="s2">&quot;overwrite&quot;</span><span class="p">:</span> <span class="n">overwrite</span><span class="p">,</span>
-</span><span id="DataFrameWriter.insertInto-65"><a href="#DataFrameWriter.insertInto-65"><span class="linenos">65</span></a> <span class="p">}</span>
-</span><span id="DataFrameWriter.insertInto-66"><a href="#DataFrameWriter.insertInto-66"><span class="linenos">66</span></a> <span class="p">)</span>
-</span><span id="DataFrameWriter.insertInto-67"><a href="#DataFrameWriter.insertInto-67"><span class="linenos">67</span></a> <span class="n">df</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_df</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">output_expression_container</span><span class="o">=</span><span class="n">output_expression_container</span><span class="p">)</span>
-</span><span id="DataFrameWriter.insertInto-68"><a href="#DataFrameWriter.insertInto-68"><span class="linenos">68</span></a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_by_name</span><span class="p">:</span>
-</span><span id="DataFrameWriter.insertInto-69"><a href="#DataFrameWriter.insertInto-69"><span class="linenos">69</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="n">sqlglot</span><span class="o">.</span><span class="n">schema</span><span class="o">.</span><span class="n">column_names</span><span class="p">(</span><span class="n">tableName</span><span class="p">,</span> <span class="n">only_visible</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
-</span><span id="DataFrameWriter.insertInto-70"><a href="#DataFrameWriter.insertInto-70"><span class="linenos">70</span></a> <span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">_convert_leaf_to_cte</span><span class="p">()</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="n">columns</span><span class="p">)</span>
-</span><span id="DataFrameWriter.insertInto-71"><a href="#DataFrameWriter.insertInto-71"><span class="linenos">71</span></a>
-</span><span id="DataFrameWriter.insertInto-72"><a href="#DataFrameWriter.insertInto-72"><span class="linenos">72</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">_df</span><span class="o">=</span><span class="n">df</span><span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrameWriter.insertInto-68"><a href="#DataFrameWriter.insertInto-68"><span class="linenos">68</span></a> <span class="k">def</span> <span class="nf">insertInto</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tableName</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">overwrite</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrameWriter</span><span class="p">:</span>
+</span><span id="DataFrameWriter.insertInto-69"><a href="#DataFrameWriter.insertInto-69"><span class="linenos">69</span></a> <span class="n">output_expression_container</span> <span class="o">=</span> <span class="n">exp</span><span class="o">.</span><span class="n">Insert</span><span class="p">(</span>
+</span><span id="DataFrameWriter.insertInto-70"><a href="#DataFrameWriter.insertInto-70"><span class="linenos">70</span></a> <span class="o">**</span><span class="p">{</span>
+</span><span id="DataFrameWriter.insertInto-71"><a href="#DataFrameWriter.insertInto-71"><span class="linenos">71</span></a> <span class="s2">&quot;this&quot;</span><span class="p">:</span> <span class="n">exp</span><span class="o">.</span><span class="n">to_table</span><span class="p">(</span><span class="n">tableName</span><span class="p">),</span>
+</span><span id="DataFrameWriter.insertInto-72"><a href="#DataFrameWriter.insertInto-72"><span class="linenos">72</span></a> <span class="s2">&quot;overwrite&quot;</span><span class="p">:</span> <span class="n">overwrite</span><span class="p">,</span>
+</span><span id="DataFrameWriter.insertInto-73"><a href="#DataFrameWriter.insertInto-73"><span class="linenos">73</span></a> <span class="p">}</span>
+</span><span id="DataFrameWriter.insertInto-74"><a href="#DataFrameWriter.insertInto-74"><span class="linenos">74</span></a> <span class="p">)</span>
+</span><span id="DataFrameWriter.insertInto-75"><a href="#DataFrameWriter.insertInto-75"><span class="linenos">75</span></a> <span class="n">df</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_df</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">output_expression_container</span><span class="o">=</span><span class="n">output_expression_container</span><span class="p">)</span>
+</span><span id="DataFrameWriter.insertInto-76"><a href="#DataFrameWriter.insertInto-76"><span class="linenos">76</span></a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_by_name</span><span class="p">:</span>
+</span><span id="DataFrameWriter.insertInto-77"><a href="#DataFrameWriter.insertInto-77"><span class="linenos">77</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="n">sqlglot</span><span class="o">.</span><span class="n">schema</span><span class="o">.</span><span class="n">column_names</span><span class="p">(</span><span class="n">tableName</span><span class="p">,</span> <span class="n">only_visible</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
+</span><span id="DataFrameWriter.insertInto-78"><a href="#DataFrameWriter.insertInto-78"><span class="linenos">78</span></a> <span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">_convert_leaf_to_cte</span><span class="p">()</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="n">columns</span><span class="p">)</span>
+</span><span id="DataFrameWriter.insertInto-79"><a href="#DataFrameWriter.insertInto-79"><span class="linenos">79</span></a>
+</span><span id="DataFrameWriter.insertInto-80"><a href="#DataFrameWriter.insertInto-80"><span class="linenos">80</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">_df</span><span class="o">=</span><span class="n">df</span><span class="p">)</span>
</span></pre></div>
@@ -4709,23 +4799,23 @@ Sqlglot doesn't currently replicate this class so it only accepts a string</p>
</div>
<a class="headerlink" href="#DataFrameWriter.saveAsTable"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrameWriter.saveAsTable-74"><a href="#DataFrameWriter.saveAsTable-74"><span class="linenos">74</span></a> <span class="k">def</span> <span class="nf">saveAsTable</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="nb">format</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">mode</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">):</span>
-</span><span id="DataFrameWriter.saveAsTable-75"><a href="#DataFrameWriter.saveAsTable-75"><span class="linenos">75</span></a> <span class="k">if</span> <span class="nb">format</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
-</span><span id="DataFrameWriter.saveAsTable-76"><a href="#DataFrameWriter.saveAsTable-76"><span class="linenos">76</span></a> <span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s2">&quot;Providing Format in the save as table is not supported&quot;</span><span class="p">)</span>
-</span><span id="DataFrameWriter.saveAsTable-77"><a href="#DataFrameWriter.saveAsTable-77"><span class="linenos">77</span></a> <span class="n">exists</span><span class="p">,</span> <span class="n">replace</span><span class="p">,</span> <span class="n">mode</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="kc">None</span><span class="p">,</span> <span class="n">mode</span> <span class="ow">or</span> <span class="nb">str</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_mode</span><span class="p">)</span>
-</span><span id="DataFrameWriter.saveAsTable-78"><a href="#DataFrameWriter.saveAsTable-78"><span class="linenos">78</span></a> <span class="k">if</span> <span class="n">mode</span> <span class="o">==</span> <span class="s2">&quot;append&quot;</span><span class="p">:</span>
-</span><span id="DataFrameWriter.saveAsTable-79"><a href="#DataFrameWriter.saveAsTable-79"><span class="linenos">79</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">insertInto</span><span class="p">(</span><span class="n">name</span><span class="p">)</span>
-</span><span id="DataFrameWriter.saveAsTable-80"><a href="#DataFrameWriter.saveAsTable-80"><span class="linenos">80</span></a> <span class="k">if</span> <span class="n">mode</span> <span class="o">==</span> <span class="s2">&quot;ignore&quot;</span><span class="p">:</span>
-</span><span id="DataFrameWriter.saveAsTable-81"><a href="#DataFrameWriter.saveAsTable-81"><span class="linenos">81</span></a> <span class="n">exists</span> <span class="o">=</span> <span class="kc">True</span>
-</span><span id="DataFrameWriter.saveAsTable-82"><a href="#DataFrameWriter.saveAsTable-82"><span class="linenos">82</span></a> <span class="k">if</span> <span class="n">mode</span> <span class="o">==</span> <span class="s2">&quot;overwrite&quot;</span><span class="p">:</span>
-</span><span id="DataFrameWriter.saveAsTable-83"><a href="#DataFrameWriter.saveAsTable-83"><span class="linenos">83</span></a> <span class="n">replace</span> <span class="o">=</span> <span class="kc">True</span>
-</span><span id="DataFrameWriter.saveAsTable-84"><a href="#DataFrameWriter.saveAsTable-84"><span class="linenos">84</span></a> <span class="n">output_expression_container</span> <span class="o">=</span> <span class="n">exp</span><span class="o">.</span><span class="n">Create</span><span class="p">(</span>
-</span><span id="DataFrameWriter.saveAsTable-85"><a href="#DataFrameWriter.saveAsTable-85"><span class="linenos">85</span></a> <span class="n">this</span><span class="o">=</span><span class="n">exp</span><span class="o">.</span><span class="n">to_table</span><span class="p">(</span><span class="n">name</span><span class="p">),</span>
-</span><span id="DataFrameWriter.saveAsTable-86"><a href="#DataFrameWriter.saveAsTable-86"><span class="linenos">86</span></a> <span class="n">kind</span><span class="o">=</span><span class="s2">&quot;TABLE&quot;</span><span class="p">,</span>
-</span><span id="DataFrameWriter.saveAsTable-87"><a href="#DataFrameWriter.saveAsTable-87"><span class="linenos">87</span></a> <span class="n">exists</span><span class="o">=</span><span class="n">exists</span><span class="p">,</span>
-</span><span id="DataFrameWriter.saveAsTable-88"><a href="#DataFrameWriter.saveAsTable-88"><span class="linenos">88</span></a> <span class="n">replace</span><span class="o">=</span><span class="n">replace</span><span class="p">,</span>
-</span><span id="DataFrameWriter.saveAsTable-89"><a href="#DataFrameWriter.saveAsTable-89"><span class="linenos">89</span></a> <span class="p">)</span>
-</span><span id="DataFrameWriter.saveAsTable-90"><a href="#DataFrameWriter.saveAsTable-90"><span class="linenos">90</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">_df</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_df</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">output_expression_container</span><span class="o">=</span><span class="n">output_expression_container</span><span class="p">))</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrameWriter.saveAsTable-82"><a href="#DataFrameWriter.saveAsTable-82"><span class="linenos">82</span></a> <span class="k">def</span> <span class="nf">saveAsTable</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="nb">format</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">mode</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">):</span>
+</span><span id="DataFrameWriter.saveAsTable-83"><a href="#DataFrameWriter.saveAsTable-83"><span class="linenos">83</span></a> <span class="k">if</span> <span class="nb">format</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
+</span><span id="DataFrameWriter.saveAsTable-84"><a href="#DataFrameWriter.saveAsTable-84"><span class="linenos">84</span></a> <span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s2">&quot;Providing Format in the save as table is not supported&quot;</span><span class="p">)</span>
+</span><span id="DataFrameWriter.saveAsTable-85"><a href="#DataFrameWriter.saveAsTable-85"><span class="linenos">85</span></a> <span class="n">exists</span><span class="p">,</span> <span class="n">replace</span><span class="p">,</span> <span class="n">mode</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="kc">None</span><span class="p">,</span> <span class="n">mode</span> <span class="ow">or</span> <span class="nb">str</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_mode</span><span class="p">)</span>
+</span><span id="DataFrameWriter.saveAsTable-86"><a href="#DataFrameWriter.saveAsTable-86"><span class="linenos">86</span></a> <span class="k">if</span> <span class="n">mode</span> <span class="o">==</span> <span class="s2">&quot;append&quot;</span><span class="p">:</span>
+</span><span id="DataFrameWriter.saveAsTable-87"><a href="#DataFrameWriter.saveAsTable-87"><span class="linenos">87</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">insertInto</span><span class="p">(</span><span class="n">name</span><span class="p">)</span>
+</span><span id="DataFrameWriter.saveAsTable-88"><a href="#DataFrameWriter.saveAsTable-88"><span class="linenos">88</span></a> <span class="k">if</span> <span class="n">mode</span> <span class="o">==</span> <span class="s2">&quot;ignore&quot;</span><span class="p">:</span>
+</span><span id="DataFrameWriter.saveAsTable-89"><a href="#DataFrameWriter.saveAsTable-89"><span class="linenos">89</span></a> <span class="n">exists</span> <span class="o">=</span> <span class="kc">True</span>
+</span><span id="DataFrameWriter.saveAsTable-90"><a href="#DataFrameWriter.saveAsTable-90"><span class="linenos">90</span></a> <span class="k">if</span> <span class="n">mode</span> <span class="o">==</span> <span class="s2">&quot;overwrite&quot;</span><span class="p">:</span>
+</span><span id="DataFrameWriter.saveAsTable-91"><a href="#DataFrameWriter.saveAsTable-91"><span class="linenos">91</span></a> <span class="n">replace</span> <span class="o">=</span> <span class="kc">True</span>
+</span><span id="DataFrameWriter.saveAsTable-92"><a href="#DataFrameWriter.saveAsTable-92"><span class="linenos">92</span></a> <span class="n">output_expression_container</span> <span class="o">=</span> <span class="n">exp</span><span class="o">.</span><span class="n">Create</span><span class="p">(</span>
+</span><span id="DataFrameWriter.saveAsTable-93"><a href="#DataFrameWriter.saveAsTable-93"><span class="linenos">93</span></a> <span class="n">this</span><span class="o">=</span><span class="n">exp</span><span class="o">.</span><span class="n">to_table</span><span class="p">(</span><span class="n">name</span><span class="p">),</span>
+</span><span id="DataFrameWriter.saveAsTable-94"><a href="#DataFrameWriter.saveAsTable-94"><span class="linenos">94</span></a> <span class="n">kind</span><span class="o">=</span><span class="s2">&quot;TABLE&quot;</span><span class="p">,</span>
+</span><span id="DataFrameWriter.saveAsTable-95"><a href="#DataFrameWriter.saveAsTable-95"><span class="linenos">95</span></a> <span class="n">exists</span><span class="o">=</span><span class="n">exists</span><span class="p">,</span>
+</span><span id="DataFrameWriter.saveAsTable-96"><a href="#DataFrameWriter.saveAsTable-96"><span class="linenos">96</span></a> <span class="n">replace</span><span class="o">=</span><span class="n">replace</span><span class="p">,</span>
+</span><span id="DataFrameWriter.saveAsTable-97"><a href="#DataFrameWriter.saveAsTable-97"><span class="linenos">97</span></a> <span class="p">)</span>
+</span><span id="DataFrameWriter.saveAsTable-98"><a href="#DataFrameWriter.saveAsTable-98"><span class="linenos">98</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">_df</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_df</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">output_expression_container</span><span class="o">=</span><span class="n">output_expression_container</span><span class="p">))</span>
</span></pre></div>