summaryrefslogtreecommitdiffstats
path: root/docs/sqlglot/dataframe
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-08 08:11:53 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-08 08:12:02 +0000
commit8d36f5966675e23bee7026ba37ae0647fbf47300 (patch)
treedf4227bbb3b07cb70df87237bcff03c8efd7822d /docs/sqlglot/dataframe
parentReleasing debian version 22.2.0-1. (diff)
downloadsqlglot-8d36f5966675e23bee7026ba37ae0647fbf47300.tar.xz
sqlglot-8d36f5966675e23bee7026ba37ae0647fbf47300.zip
Merging upstream version 23.7.0.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'docs/sqlglot/dataframe')
-rw-r--r--docs/sqlglot/dataframe/sql.html3115
1 files changed, 1563 insertions, 1552 deletions
diff --git a/docs/sqlglot/dataframe/sql.html b/docs/sqlglot/dataframe/sql.html
index 3fa4f61..571ddae 100644
--- a/docs/sqlglot/dataframe/sql.html
+++ b/docs/sqlglot/dataframe/sql.html
@@ -554,177 +554,184 @@
</div>
<a class="headerlink" href="#SparkSession"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="SparkSession-21"><a href="#SparkSession-21"><span class="linenos"> 21</span></a><span class="k">class</span> <span class="nc">SparkSession</span><span class="p">:</span>
-</span><span id="SparkSession-22"><a href="#SparkSession-22"><span class="linenos"> 22</span></a> <span class="n">DEFAULT_DIALECT</span> <span class="o">=</span> <span class="s2">&quot;spark&quot;</span>
-</span><span id="SparkSession-23"><a href="#SparkSession-23"><span class="linenos"> 23</span></a> <span class="n">_instance</span> <span class="o">=</span> <span class="kc">None</span>
-</span><span id="SparkSession-24"><a href="#SparkSession-24"><span class="linenos"> 24</span></a>
-</span><span id="SparkSession-25"><a href="#SparkSession-25"><span class="linenos"> 25</span></a> <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
-</span><span id="SparkSession-26"><a href="#SparkSession-26"><span class="linenos"> 26</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="nb">hasattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s2">&quot;known_ids&quot;</span><span class="p">):</span>
-</span><span id="SparkSession-27"><a href="#SparkSession-27"><span class="linenos"> 27</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">known_ids</span> <span class="o">=</span> <span class="nb">set</span><span class="p">()</span>
-</span><span id="SparkSession-28"><a href="#SparkSession-28"><span class="linenos"> 28</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">known_branch_ids</span> <span class="o">=</span> <span class="nb">set</span><span class="p">()</span>
-</span><span id="SparkSession-29"><a href="#SparkSession-29"><span class="linenos"> 29</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">known_sequence_ids</span> <span class="o">=</span> <span class="nb">set</span><span class="p">()</span>
-</span><span id="SparkSession-30"><a href="#SparkSession-30"><span class="linenos"> 30</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">name_to_sequence_id_mapping</span> <span class="o">=</span> <span class="n">defaultdict</span><span class="p">(</span><span class="nb">list</span><span class="p">)</span>
-</span><span id="SparkSession-31"><a href="#SparkSession-31"><span class="linenos"> 31</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">incrementing_id</span> <span class="o">=</span> <span class="mi">1</span>
-</span><span id="SparkSession-32"><a href="#SparkSession-32"><span class="linenos"> 32</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">dialect</span> <span class="o">=</span> <span class="n">Dialect</span><span class="o">.</span><span class="n">get_or_raise</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">DEFAULT_DIALECT</span><span class="p">)</span>
-</span><span id="SparkSession-33"><a href="#SparkSession-33"><span class="linenos"> 33</span></a>
-</span><span id="SparkSession-34"><a href="#SparkSession-34"><span class="linenos"> 34</span></a> <span class="k">def</span> <span class="fm">__new__</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">SparkSession</span><span class="p">:</span>
-</span><span id="SparkSession-35"><a href="#SparkSession-35"><span class="linenos"> 35</span></a> <span class="k">if</span> <span class="bp">cls</span><span class="o">.</span><span class="n">_instance</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
-</span><span id="SparkSession-36"><a href="#SparkSession-36"><span class="linenos"> 36</span></a> <span class="bp">cls</span><span class="o">.</span><span class="n">_instance</span> <span class="o">=</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__new__</span><span class="p">(</span><span class="bp">cls</span><span class="p">)</span>
-</span><span id="SparkSession-37"><a href="#SparkSession-37"><span class="linenos"> 37</span></a> <span class="k">return</span> <span class="bp">cls</span><span class="o">.</span><span class="n">_instance</span>
-</span><span id="SparkSession-38"><a href="#SparkSession-38"><span class="linenos"> 38</span></a>
-</span><span id="SparkSession-39"><a href="#SparkSession-39"><span class="linenos"> 39</span></a> <span class="nd">@property</span>
-</span><span id="SparkSession-40"><a href="#SparkSession-40"><span class="linenos"> 40</span></a> <span class="k">def</span> <span class="nf">read</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrameReader</span><span class="p">:</span>
-</span><span id="SparkSession-41"><a href="#SparkSession-41"><span class="linenos"> 41</span></a> <span class="k">return</span> <span class="n">DataFrameReader</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
-</span><span id="SparkSession-42"><a href="#SparkSession-42"><span class="linenos"> 42</span></a>
-</span><span id="SparkSession-43"><a href="#SparkSession-43"><span class="linenos"> 43</span></a> <span class="k">def</span> <span class="nf">table</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tableName</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="SparkSession-44"><a href="#SparkSession-44"><span class="linenos"> 44</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">read</span><span class="o">.</span><span class="n">table</span><span class="p">(</span><span class="n">tableName</span><span class="p">)</span>
-</span><span id="SparkSession-45"><a href="#SparkSession-45"><span class="linenos"> 45</span></a>
-</span><span id="SparkSession-46"><a href="#SparkSession-46"><span class="linenos"> 46</span></a> <span class="k">def</span> <span class="nf">createDataFrame</span><span class="p">(</span>
-</span><span id="SparkSession-47"><a href="#SparkSession-47"><span class="linenos"> 47</span></a> <span class="bp">self</span><span class="p">,</span>
-</span><span id="SparkSession-48"><a href="#SparkSession-48"><span class="linenos"> 48</span></a> <span class="n">data</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Sequence</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">ColumnLiterals</span><span class="p">],</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="n">ColumnLiterals</span><span class="p">],</span> <span class="n">t</span><span class="o">.</span><span class="n">Tuple</span><span class="p">]],</span>
-</span><span id="SparkSession-49"><a href="#SparkSession-49"><span class="linenos"> 49</span></a> <span class="n">schema</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">SchemaInput</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="SparkSession-50"><a href="#SparkSession-50"><span class="linenos"> 50</span></a> <span class="n">samplingRatio</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="SparkSession-51"><a href="#SparkSession-51"><span class="linenos"> 51</span></a> <span class="n">verifySchema</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
-</span><span id="SparkSession-52"><a href="#SparkSession-52"><span class="linenos"> 52</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="SparkSession-53"><a href="#SparkSession-53"><span class="linenos"> 53</span></a> <span class="kn">from</span> <span class="nn">sqlglot.dataframe.sql.dataframe</span> <span class="kn">import</span> <span class="n">DataFrame</span>
-</span><span id="SparkSession-54"><a href="#SparkSession-54"><span class="linenos"> 54</span></a>
-</span><span id="SparkSession-55"><a href="#SparkSession-55"><span class="linenos"> 55</span></a> <span class="k">if</span> <span class="n">samplingRatio</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">verifySchema</span><span class="p">:</span>
-</span><span id="SparkSession-56"><a href="#SparkSession-56"><span class="linenos"> 56</span></a> <span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s2">&quot;Sampling Ratio and Verify Schema are not supported&quot;</span><span class="p">)</span>
-</span><span id="SparkSession-57"><a href="#SparkSession-57"><span class="linenos"> 57</span></a> <span class="k">if</span> <span class="n">schema</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="p">(</span>
-</span><span id="SparkSession-58"><a href="#SparkSession-58"><span class="linenos"> 58</span></a> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">schema</span><span class="p">,</span> <span class="p">(</span><span class="n">StructType</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="nb">list</span><span class="p">))</span>
-</span><span id="SparkSession-59"><a href="#SparkSession-59"><span class="linenos"> 59</span></a> <span class="ow">or</span> <span class="p">(</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">schema</span><span class="p">,</span> <span class="nb">list</span><span class="p">)</span> <span class="ow">and</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">schema</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="nb">str</span><span class="p">))</span>
-</span><span id="SparkSession-60"><a href="#SparkSession-60"><span class="linenos"> 60</span></a> <span class="p">):</span>
-</span><span id="SparkSession-61"><a href="#SparkSession-61"><span class="linenos"> 61</span></a> <span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s2">&quot;Only schema of either list or string of list supported&quot;</span><span class="p">)</span>
-</span><span id="SparkSession-62"><a href="#SparkSession-62"><span class="linenos"> 62</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">data</span><span class="p">:</span>
-</span><span id="SparkSession-63"><a href="#SparkSession-63"><span class="linenos"> 63</span></a> <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;Must provide data to create into a DataFrame&quot;</span><span class="p">)</span>
-</span><span id="SparkSession-64"><a href="#SparkSession-64"><span class="linenos"> 64</span></a>
-</span><span id="SparkSession-65"><a href="#SparkSession-65"><span class="linenos"> 65</span></a> <span class="n">column_mapping</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span>
-</span><span id="SparkSession-66"><a href="#SparkSession-66"><span class="linenos"> 66</span></a> <span class="k">if</span> <span class="n">schema</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
-</span><span id="SparkSession-67"><a href="#SparkSession-67"><span class="linenos"> 67</span></a> <span class="n">column_mapping</span> <span class="o">=</span> <span class="n">get_column_mapping_from_schema_input</span><span class="p">(</span><span class="n">schema</span><span class="p">)</span>
-</span><span id="SparkSession-68"><a href="#SparkSession-68"><span class="linenos"> 68</span></a> <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">data</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="nb">dict</span><span class="p">):</span>
-</span><span id="SparkSession-69"><a href="#SparkSession-69"><span class="linenos"> 69</span></a> <span class="n">column_mapping</span> <span class="o">=</span> <span class="p">{</span><span class="n">col_name</span><span class="o">.</span><span class="n">strip</span><span class="p">():</span> <span class="kc">None</span> <span class="k">for</span> <span class="n">col_name</span> <span class="ow">in</span> <span class="n">data</span><span class="p">[</span><span class="mi">0</span><span class="p">]}</span>
-</span><span id="SparkSession-70"><a href="#SparkSession-70"><span class="linenos"> 70</span></a> <span class="k">else</span><span class="p">:</span>
-</span><span id="SparkSession-71"><a href="#SparkSession-71"><span class="linenos"> 71</span></a> <span class="n">column_mapping</span> <span class="o">=</span> <span class="p">{</span><span class="sa">f</span><span class="s2">&quot;_</span><span class="si">{</span><span class="n">i</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">:</span> <span class="kc">None</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">data</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)}</span>
-</span><span id="SparkSession-72"><a href="#SparkSession-72"><span class="linenos"> 72</span></a>
-</span><span id="SparkSession-73"><a href="#SparkSession-73"><span class="linenos"> 73</span></a> <span class="n">data_expressions</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="SparkSession-74"><a href="#SparkSession-74"><span class="linenos"> 74</span></a> <span class="n">exp</span><span class="o">.</span><span class="n">tuple_</span><span class="p">(</span>
-</span><span id="SparkSession-75"><a href="#SparkSession-75"><span class="linenos"> 75</span></a> <span class="o">*</span><span class="nb">map</span><span class="p">(</span>
-</span><span id="SparkSession-76"><a href="#SparkSession-76"><span class="linenos"> 76</span></a> <span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="n">x</span><span class="p">)</span><span class="o">.</span><span class="n">expression</span><span class="p">,</span>
-</span><span id="SparkSession-77"><a href="#SparkSession-77"><span class="linenos"> 77</span></a> <span class="n">row</span> <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">row</span><span class="p">,</span> <span class="nb">dict</span><span class="p">)</span> <span class="k">else</span> <span class="n">row</span><span class="o">.</span><span class="n">values</span><span class="p">(),</span>
-</span><span id="SparkSession-78"><a href="#SparkSession-78"><span class="linenos"> 78</span></a> <span class="p">)</span>
-</span><span id="SparkSession-79"><a href="#SparkSession-79"><span class="linenos"> 79</span></a> <span class="p">)</span>
-</span><span id="SparkSession-80"><a href="#SparkSession-80"><span class="linenos"> 80</span></a> <span class="k">for</span> <span class="n">row</span> <span class="ow">in</span> <span class="n">data</span>
-</span><span id="SparkSession-81"><a href="#SparkSession-81"><span class="linenos"> 81</span></a> <span class="p">]</span>
-</span><span id="SparkSession-82"><a href="#SparkSession-82"><span class="linenos"> 82</span></a>
-</span><span id="SparkSession-83"><a href="#SparkSession-83"><span class="linenos"> 83</span></a> <span class="n">sel_columns</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="SparkSession-84"><a href="#SparkSession-84"><span class="linenos"> 84</span></a> <span class="p">(</span>
-</span><span id="SparkSession-85"><a href="#SparkSession-85"><span class="linenos"> 85</span></a> <span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="n">name</span><span class="p">)</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="n">data_type</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">name</span><span class="p">)</span><span class="o">.</span><span class="n">expression</span>
-</span><span id="SparkSession-86"><a href="#SparkSession-86"><span class="linenos"> 86</span></a> <span class="k">if</span> <span class="n">data_type</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
-</span><span id="SparkSession-87"><a href="#SparkSession-87"><span class="linenos"> 87</span></a> <span class="k">else</span> <span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="n">name</span><span class="p">)</span><span class="o">.</span><span class="n">expression</span>
-</span><span id="SparkSession-88"><a href="#SparkSession-88"><span class="linenos"> 88</span></a> <span class="p">)</span>
-</span><span id="SparkSession-89"><a href="#SparkSession-89"><span class="linenos"> 89</span></a> <span class="k">for</span> <span class="n">name</span><span class="p">,</span> <span class="n">data_type</span> <span class="ow">in</span> <span class="n">column_mapping</span><span class="o">.</span><span class="n">items</span><span class="p">()</span>
-</span><span id="SparkSession-90"><a href="#SparkSession-90"><span class="linenos"> 90</span></a> <span class="p">]</span>
-</span><span id="SparkSession-91"><a href="#SparkSession-91"><span class="linenos"> 91</span></a>
-</span><span id="SparkSession-92"><a href="#SparkSession-92"><span class="linenos"> 92</span></a> <span class="n">select_kwargs</span> <span class="o">=</span> <span class="p">{</span>
-</span><span id="SparkSession-93"><a href="#SparkSession-93"><span class="linenos"> 93</span></a> <span class="s2">&quot;expressions&quot;</span><span class="p">:</span> <span class="n">sel_columns</span><span class="p">,</span>
-</span><span id="SparkSession-94"><a href="#SparkSession-94"><span class="linenos"> 94</span></a> <span class="s2">&quot;from&quot;</span><span class="p">:</span> <span class="n">exp</span><span class="o">.</span><span class="n">From</span><span class="p">(</span>
-</span><span id="SparkSession-95"><a href="#SparkSession-95"><span class="linenos"> 95</span></a> <span class="n">this</span><span class="o">=</span><span class="n">exp</span><span class="o">.</span><span class="n">Values</span><span class="p">(</span>
-</span><span id="SparkSession-96"><a href="#SparkSession-96"><span class="linenos"> 96</span></a> <span class="n">expressions</span><span class="o">=</span><span class="n">data_expressions</span><span class="p">,</span>
-</span><span id="SparkSession-97"><a href="#SparkSession-97"><span class="linenos"> 97</span></a> <span class="n">alias</span><span class="o">=</span><span class="n">exp</span><span class="o">.</span><span class="n">TableAlias</span><span class="p">(</span>
-</span><span id="SparkSession-98"><a href="#SparkSession-98"><span class="linenos"> 98</span></a> <span class="n">this</span><span class="o">=</span><span class="n">exp</span><span class="o">.</span><span class="n">to_identifier</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_auto_incrementing_name</span><span class="p">),</span>
-</span><span id="SparkSession-99"><a href="#SparkSession-99"><span class="linenos"> 99</span></a> <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="n">exp</span><span class="o">.</span><span class="n">to_identifier</span><span class="p">(</span><span class="n">col_name</span><span class="p">)</span> <span class="k">for</span> <span class="n">col_name</span> <span class="ow">in</span> <span class="n">column_mapping</span><span class="p">],</span>
-</span><span id="SparkSession-100"><a href="#SparkSession-100"><span class="linenos">100</span></a> <span class="p">),</span>
-</span><span id="SparkSession-101"><a href="#SparkSession-101"><span class="linenos">101</span></a> <span class="p">),</span>
-</span><span id="SparkSession-102"><a href="#SparkSession-102"><span class="linenos">102</span></a> <span class="p">),</span>
-</span><span id="SparkSession-103"><a href="#SparkSession-103"><span class="linenos">103</span></a> <span class="p">}</span>
-</span><span id="SparkSession-104"><a href="#SparkSession-104"><span class="linenos">104</span></a>
-</span><span id="SparkSession-105"><a href="#SparkSession-105"><span class="linenos">105</span></a> <span class="n">sel_expression</span> <span class="o">=</span> <span class="n">exp</span><span class="o">.</span><span class="n">Select</span><span class="p">(</span><span class="o">**</span><span class="n">select_kwargs</span><span class="p">)</span>
-</span><span id="SparkSession-106"><a href="#SparkSession-106"><span class="linenos">106</span></a> <span class="k">return</span> <span class="n">DataFrame</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sel_expression</span><span class="p">)</span>
-</span><span id="SparkSession-107"><a href="#SparkSession-107"><span class="linenos">107</span></a>
-</span><span id="SparkSession-108"><a href="#SparkSession-108"><span class="linenos">108</span></a> <span class="k">def</span> <span class="nf">sql</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sqlQuery</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="SparkSession-109"><a href="#SparkSession-109"><span class="linenos">109</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="n">sqlglot</span><span class="o">.</span><span class="n">parse_one</span><span class="p">(</span><span class="n">sqlQuery</span><span class="p">,</span> <span class="n">read</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">dialect</span><span class="p">)</span>
-</span><span id="SparkSession-110"><a href="#SparkSession-110"><span class="linenos">110</span></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">expression</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Select</span><span class="p">):</span>
-</span><span id="SparkSession-111"><a href="#SparkSession-111"><span class="linenos">111</span></a> <span class="n">df</span> <span class="o">=</span> <span class="n">DataFrame</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">expression</span><span class="p">)</span>
-</span><span id="SparkSession-112"><a href="#SparkSession-112"><span class="linenos">112</span></a> <span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">_convert_leaf_to_cte</span><span class="p">()</span>
-</span><span id="SparkSession-113"><a href="#SparkSession-113"><span class="linenos">113</span></a> <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">expression</span><span class="p">,</span> <span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Create</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Insert</span><span class="p">)):</span>
-</span><span id="SparkSession-114"><a href="#SparkSession-114"><span class="linenos">114</span></a> <span class="n">select_expression</span> <span class="o">=</span> <span class="n">expression</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
-</span><span id="SparkSession-115"><a href="#SparkSession-115"><span class="linenos">115</span></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">expression</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Insert</span><span class="p">):</span>
-</span><span id="SparkSession-116"><a href="#SparkSession-116"><span class="linenos">116</span></a> <span class="n">select_expression</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">&quot;with&quot;</span><span class="p">,</span> <span class="n">expression</span><span class="o">.</span><span class="n">args</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;with&quot;</span><span class="p">))</span>
-</span><span id="SparkSession-117"><a href="#SparkSession-117"><span class="linenos">117</span></a> <span class="n">expression</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">&quot;with&quot;</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
-</span><span id="SparkSession-118"><a href="#SparkSession-118"><span class="linenos">118</span></a> <span class="k">del</span> <span class="n">expression</span><span class="o">.</span><span class="n">args</span><span class="p">[</span><span class="s2">&quot;expression&quot;</span><span class="p">]</span>
-</span><span id="SparkSession-119"><a href="#SparkSession-119"><span class="linenos">119</span></a> <span class="n">df</span> <span class="o">=</span> <span class="n">DataFrame</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">select_expression</span><span class="p">,</span> <span class="n">output_expression_container</span><span class="o">=</span><span class="n">expression</span><span class="p">)</span> <span class="c1"># type: ignore</span>
-</span><span id="SparkSession-120"><a href="#SparkSession-120"><span class="linenos">120</span></a> <span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">_convert_leaf_to_cte</span><span class="p">()</span>
-</span><span id="SparkSession-121"><a href="#SparkSession-121"><span class="linenos">121</span></a> <span class="k">else</span><span class="p">:</span>
-</span><span id="SparkSession-122"><a href="#SparkSession-122"><span class="linenos">122</span></a> <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
-</span><span id="SparkSession-123"><a href="#SparkSession-123"><span class="linenos">123</span></a> <span class="s2">&quot;Unknown expression type provided in the SQL. Please create an issue with the SQL.&quot;</span>
-</span><span id="SparkSession-124"><a href="#SparkSession-124"><span class="linenos">124</span></a> <span class="p">)</span>
-</span><span id="SparkSession-125"><a href="#SparkSession-125"><span class="linenos">125</span></a> <span class="k">return</span> <span class="n">df</span>
-</span><span id="SparkSession-126"><a href="#SparkSession-126"><span class="linenos">126</span></a>
-</span><span id="SparkSession-127"><a href="#SparkSession-127"><span class="linenos">127</span></a> <span class="nd">@property</span>
-</span><span id="SparkSession-128"><a href="#SparkSession-128"><span class="linenos">128</span></a> <span class="k">def</span> <span class="nf">_auto_incrementing_name</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">str</span><span class="p">:</span>
-</span><span id="SparkSession-129"><a href="#SparkSession-129"><span class="linenos">129</span></a> <span class="n">name</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">&quot;a</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">incrementing_id</span><span class="si">}</span><span class="s2">&quot;</span>
-</span><span id="SparkSession-130"><a href="#SparkSession-130"><span class="linenos">130</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">incrementing_id</span> <span class="o">+=</span> <span class="mi">1</span>
-</span><span id="SparkSession-131"><a href="#SparkSession-131"><span class="linenos">131</span></a> <span class="k">return</span> <span class="n">name</span>
-</span><span id="SparkSession-132"><a href="#SparkSession-132"><span class="linenos">132</span></a>
-</span><span id="SparkSession-133"><a href="#SparkSession-133"><span class="linenos">133</span></a> <span class="nd">@property</span>
-</span><span id="SparkSession-134"><a href="#SparkSession-134"><span class="linenos">134</span></a> <span class="k">def</span> <span class="nf">_random_branch_id</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">str</span><span class="p">:</span>
-</span><span id="SparkSession-135"><a href="#SparkSession-135"><span class="linenos">135</span></a> <span class="nb">id</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_random_id</span>
-</span><span id="SparkSession-136"><a href="#SparkSession-136"><span class="linenos">136</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">known_branch_ids</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="nb">id</span><span class="p">)</span>
-</span><span id="SparkSession-137"><a href="#SparkSession-137"><span class="linenos">137</span></a> <span class="k">return</span> <span class="nb">id</span>
-</span><span id="SparkSession-138"><a href="#SparkSession-138"><span class="linenos">138</span></a>
-</span><span id="SparkSession-139"><a href="#SparkSession-139"><span class="linenos">139</span></a> <span class="nd">@property</span>
-</span><span id="SparkSession-140"><a href="#SparkSession-140"><span class="linenos">140</span></a> <span class="k">def</span> <span class="nf">_random_sequence_id</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
-</span><span id="SparkSession-141"><a href="#SparkSession-141"><span class="linenos">141</span></a> <span class="nb">id</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_random_id</span>
-</span><span id="SparkSession-142"><a href="#SparkSession-142"><span class="linenos">142</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">known_sequence_ids</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="nb">id</span><span class="p">)</span>
-</span><span id="SparkSession-143"><a href="#SparkSession-143"><span class="linenos">143</span></a> <span class="k">return</span> <span class="nb">id</span>
-</span><span id="SparkSession-144"><a href="#SparkSession-144"><span class="linenos">144</span></a>
-</span><span id="SparkSession-145"><a href="#SparkSession-145"><span class="linenos">145</span></a> <span class="nd">@property</span>
-</span><span id="SparkSession-146"><a href="#SparkSession-146"><span class="linenos">146</span></a> <span class="k">def</span> <span class="nf">_random_id</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">str</span><span class="p">:</span>
-</span><span id="SparkSession-147"><a href="#SparkSession-147"><span class="linenos">147</span></a> <span class="nb">id</span> <span class="o">=</span> <span class="s2">&quot;r&quot;</span> <span class="o">+</span> <span class="n">uuid</span><span class="o">.</span><span class="n">uuid4</span><span class="p">()</span><span class="o">.</span><span class="n">hex</span>
-</span><span id="SparkSession-148"><a href="#SparkSession-148"><span class="linenos">148</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">known_ids</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="nb">id</span><span class="p">)</span>
-</span><span id="SparkSession-149"><a href="#SparkSession-149"><span class="linenos">149</span></a> <span class="k">return</span> <span class="nb">id</span>
-</span><span id="SparkSession-150"><a href="#SparkSession-150"><span class="linenos">150</span></a>
-</span><span id="SparkSession-151"><a href="#SparkSession-151"><span class="linenos">151</span></a> <span class="nd">@property</span>
-</span><span id="SparkSession-152"><a href="#SparkSession-152"><span class="linenos">152</span></a> <span class="k">def</span> <span class="nf">_join_hint_names</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">t</span><span class="o">.</span><span class="n">Set</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span>
-</span><span id="SparkSession-153"><a href="#SparkSession-153"><span class="linenos">153</span></a> <span class="k">return</span> <span class="p">{</span><span class="s2">&quot;BROADCAST&quot;</span><span class="p">,</span> <span class="s2">&quot;MERGE&quot;</span><span class="p">,</span> <span class="s2">&quot;SHUFFLE_HASH&quot;</span><span class="p">,</span> <span class="s2">&quot;SHUFFLE_REPLICATE_NL&quot;</span><span class="p">}</span>
-</span><span id="SparkSession-154"><a href="#SparkSession-154"><span class="linenos">154</span></a>
-</span><span id="SparkSession-155"><a href="#SparkSession-155"><span class="linenos">155</span></a> <span class="k">def</span> <span class="nf">_add_alias_to_mapping</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">sequence_id</span><span class="p">:</span> <span class="nb">str</span><span class="p">):</span>
-</span><span id="SparkSession-156"><a href="#SparkSession-156"><span class="linenos">156</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">name_to_sequence_id_mapping</span><span class="p">[</span><span class="n">name</span><span class="p">]</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">sequence_id</span><span class="p">)</span>
-</span><span id="SparkSession-157"><a href="#SparkSession-157"><span class="linenos">157</span></a>
-</span><span id="SparkSession-158"><a href="#SparkSession-158"><span class="linenos">158</span></a> <span class="k">class</span> <span class="nc">Builder</span><span class="p">:</span>
-</span><span id="SparkSession-159"><a href="#SparkSession-159"><span class="linenos">159</span></a> <span class="n">SQLFRAME_DIALECT_KEY</span> <span class="o">=</span> <span class="s2">&quot;sqlframe.dialect&quot;</span>
-</span><span id="SparkSession-160"><a href="#SparkSession-160"><span class="linenos">160</span></a>
-</span><span id="SparkSession-161"><a href="#SparkSession-161"><span class="linenos">161</span></a> <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
-</span><span id="SparkSession-162"><a href="#SparkSession-162"><span class="linenos">162</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">dialect</span> <span class="o">=</span> <span class="s2">&quot;spark&quot;</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="SparkSession-23"><a href="#SparkSession-23"><span class="linenos"> 23</span></a><span class="k">class</span> <span class="nc">SparkSession</span><span class="p">:</span>
+</span><span id="SparkSession-24"><a href="#SparkSession-24"><span class="linenos"> 24</span></a> <span class="n">DEFAULT_DIALECT</span> <span class="o">=</span> <span class="s2">&quot;spark&quot;</span>
+</span><span id="SparkSession-25"><a href="#SparkSession-25"><span class="linenos"> 25</span></a> <span class="n">_instance</span> <span class="o">=</span> <span class="kc">None</span>
+</span><span id="SparkSession-26"><a href="#SparkSession-26"><span class="linenos"> 26</span></a>
+</span><span id="SparkSession-27"><a href="#SparkSession-27"><span class="linenos"> 27</span></a> <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+</span><span id="SparkSession-28"><a href="#SparkSession-28"><span class="linenos"> 28</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="nb">hasattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s2">&quot;known_ids&quot;</span><span class="p">):</span>
+</span><span id="SparkSession-29"><a href="#SparkSession-29"><span class="linenos"> 29</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">known_ids</span> <span class="o">=</span> <span class="nb">set</span><span class="p">()</span>
+</span><span id="SparkSession-30"><a href="#SparkSession-30"><span class="linenos"> 30</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">known_branch_ids</span> <span class="o">=</span> <span class="nb">set</span><span class="p">()</span>
+</span><span id="SparkSession-31"><a href="#SparkSession-31"><span class="linenos"> 31</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">known_sequence_ids</span> <span class="o">=</span> <span class="nb">set</span><span class="p">()</span>
+</span><span id="SparkSession-32"><a href="#SparkSession-32"><span class="linenos"> 32</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">name_to_sequence_id_mapping</span> <span class="o">=</span> <span class="n">defaultdict</span><span class="p">(</span><span class="nb">list</span><span class="p">)</span>
+</span><span id="SparkSession-33"><a href="#SparkSession-33"><span class="linenos"> 33</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">incrementing_id</span> <span class="o">=</span> <span class="mi">1</span>
+</span><span id="SparkSession-34"><a href="#SparkSession-34"><span class="linenos"> 34</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">dialect</span> <span class="o">=</span> <span class="n">Dialect</span><span class="o">.</span><span class="n">get_or_raise</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">DEFAULT_DIALECT</span><span class="p">)</span>
+</span><span id="SparkSession-35"><a href="#SparkSession-35"><span class="linenos"> 35</span></a>
+</span><span id="SparkSession-36"><a href="#SparkSession-36"><span class="linenos"> 36</span></a> <span class="k">def</span> <span class="fm">__new__</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">SparkSession</span><span class="p">:</span>
+</span><span id="SparkSession-37"><a href="#SparkSession-37"><span class="linenos"> 37</span></a> <span class="k">if</span> <span class="bp">cls</span><span class="o">.</span><span class="n">_instance</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+</span><span id="SparkSession-38"><a href="#SparkSession-38"><span class="linenos"> 38</span></a> <span class="bp">cls</span><span class="o">.</span><span class="n">_instance</span> <span class="o">=</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__new__</span><span class="p">(</span><span class="bp">cls</span><span class="p">)</span>
+</span><span id="SparkSession-39"><a href="#SparkSession-39"><span class="linenos"> 39</span></a> <span class="k">return</span> <span class="bp">cls</span><span class="o">.</span><span class="n">_instance</span>
+</span><span id="SparkSession-40"><a href="#SparkSession-40"><span class="linenos"> 40</span></a>
+</span><span id="SparkSession-41"><a href="#SparkSession-41"><span class="linenos"> 41</span></a> <span class="nd">@property</span>
+</span><span id="SparkSession-42"><a href="#SparkSession-42"><span class="linenos"> 42</span></a> <span class="k">def</span> <span class="nf">read</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrameReader</span><span class="p">:</span>
+</span><span id="SparkSession-43"><a href="#SparkSession-43"><span class="linenos"> 43</span></a> <span class="k">return</span> <span class="n">DataFrameReader</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
+</span><span id="SparkSession-44"><a href="#SparkSession-44"><span class="linenos"> 44</span></a>
+</span><span id="SparkSession-45"><a href="#SparkSession-45"><span class="linenos"> 45</span></a> <span class="k">def</span> <span class="nf">table</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tableName</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="SparkSession-46"><a href="#SparkSession-46"><span class="linenos"> 46</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">read</span><span class="o">.</span><span class="n">table</span><span class="p">(</span><span class="n">tableName</span><span class="p">)</span>
+</span><span id="SparkSession-47"><a href="#SparkSession-47"><span class="linenos"> 47</span></a>
+</span><span id="SparkSession-48"><a href="#SparkSession-48"><span class="linenos"> 48</span></a> <span class="k">def</span> <span class="nf">createDataFrame</span><span class="p">(</span>
+</span><span id="SparkSession-49"><a href="#SparkSession-49"><span class="linenos"> 49</span></a> <span class="bp">self</span><span class="p">,</span>
+</span><span id="SparkSession-50"><a href="#SparkSession-50"><span class="linenos"> 50</span></a> <span class="n">data</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Sequence</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">ColumnLiterals</span><span class="p">],</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="n">ColumnLiterals</span><span class="p">],</span> <span class="n">t</span><span class="o">.</span><span class="n">Tuple</span><span class="p">]],</span>
+</span><span id="SparkSession-51"><a href="#SparkSession-51"><span class="linenos"> 51</span></a> <span class="n">schema</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">SchemaInput</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="SparkSession-52"><a href="#SparkSession-52"><span class="linenos"> 52</span></a> <span class="n">samplingRatio</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="SparkSession-53"><a href="#SparkSession-53"><span class="linenos"> 53</span></a> <span class="n">verifySchema</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
+</span><span id="SparkSession-54"><a href="#SparkSession-54"><span class="linenos"> 54</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="SparkSession-55"><a href="#SparkSession-55"><span class="linenos"> 55</span></a> <span class="kn">from</span> <span class="nn">sqlglot.dataframe.sql.dataframe</span> <span class="kn">import</span> <span class="n">DataFrame</span>
+</span><span id="SparkSession-56"><a href="#SparkSession-56"><span class="linenos"> 56</span></a>
+</span><span id="SparkSession-57"><a href="#SparkSession-57"><span class="linenos"> 57</span></a> <span class="k">if</span> <span class="n">samplingRatio</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">verifySchema</span><span class="p">:</span>
+</span><span id="SparkSession-58"><a href="#SparkSession-58"><span class="linenos"> 58</span></a> <span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s2">&quot;Sampling Ratio and Verify Schema are not supported&quot;</span><span class="p">)</span>
+</span><span id="SparkSession-59"><a href="#SparkSession-59"><span class="linenos"> 59</span></a> <span class="k">if</span> <span class="n">schema</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="p">(</span>
+</span><span id="SparkSession-60"><a href="#SparkSession-60"><span class="linenos"> 60</span></a> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">schema</span><span class="p">,</span> <span class="p">(</span><span class="n">StructType</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="nb">list</span><span class="p">))</span>
+</span><span id="SparkSession-61"><a href="#SparkSession-61"><span class="linenos"> 61</span></a> <span class="ow">or</span> <span class="p">(</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">schema</span><span class="p">,</span> <span class="nb">list</span><span class="p">)</span> <span class="ow">and</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">schema</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="nb">str</span><span class="p">))</span>
+</span><span id="SparkSession-62"><a href="#SparkSession-62"><span class="linenos"> 62</span></a> <span class="p">):</span>
+</span><span id="SparkSession-63"><a href="#SparkSession-63"><span class="linenos"> 63</span></a> <span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s2">&quot;Only schema of either list or string of list supported&quot;</span><span class="p">)</span>
+</span><span id="SparkSession-64"><a href="#SparkSession-64"><span class="linenos"> 64</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">data</span><span class="p">:</span>
+</span><span id="SparkSession-65"><a href="#SparkSession-65"><span class="linenos"> 65</span></a> <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;Must provide data to create into a DataFrame&quot;</span><span class="p">)</span>
+</span><span id="SparkSession-66"><a href="#SparkSession-66"><span class="linenos"> 66</span></a>
+</span><span id="SparkSession-67"><a href="#SparkSession-67"><span class="linenos"> 67</span></a> <span class="n">column_mapping</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span>
+</span><span id="SparkSession-68"><a href="#SparkSession-68"><span class="linenos"> 68</span></a> <span class="k">if</span> <span class="n">schema</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
+</span><span id="SparkSession-69"><a href="#SparkSession-69"><span class="linenos"> 69</span></a> <span class="n">column_mapping</span> <span class="o">=</span> <span class="n">get_column_mapping_from_schema_input</span><span class="p">(</span><span class="n">schema</span><span class="p">)</span>
+</span><span id="SparkSession-70"><a href="#SparkSession-70"><span class="linenos"> 70</span></a> <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">data</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="nb">dict</span><span class="p">):</span>
+</span><span id="SparkSession-71"><a href="#SparkSession-71"><span class="linenos"> 71</span></a> <span class="n">column_mapping</span> <span class="o">=</span> <span class="p">{</span><span class="n">col_name</span><span class="o">.</span><span class="n">strip</span><span class="p">():</span> <span class="kc">None</span> <span class="k">for</span> <span class="n">col_name</span> <span class="ow">in</span> <span class="n">data</span><span class="p">[</span><span class="mi">0</span><span class="p">]}</span>
+</span><span id="SparkSession-72"><a href="#SparkSession-72"><span class="linenos"> 72</span></a> <span class="k">else</span><span class="p">:</span>
+</span><span id="SparkSession-73"><a href="#SparkSession-73"><span class="linenos"> 73</span></a> <span class="n">column_mapping</span> <span class="o">=</span> <span class="p">{</span><span class="sa">f</span><span class="s2">&quot;_</span><span class="si">{</span><span class="n">i</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">:</span> <span class="kc">None</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">data</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)}</span>
+</span><span id="SparkSession-74"><a href="#SparkSession-74"><span class="linenos"> 74</span></a>
+</span><span id="SparkSession-75"><a href="#SparkSession-75"><span class="linenos"> 75</span></a> <span class="n">data_expressions</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="SparkSession-76"><a href="#SparkSession-76"><span class="linenos"> 76</span></a> <span class="n">exp</span><span class="o">.</span><span class="n">tuple_</span><span class="p">(</span>
+</span><span id="SparkSession-77"><a href="#SparkSession-77"><span class="linenos"> 77</span></a> <span class="o">*</span><span class="nb">map</span><span class="p">(</span>
+</span><span id="SparkSession-78"><a href="#SparkSession-78"><span class="linenos"> 78</span></a> <span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="n">x</span><span class="p">)</span><span class="o">.</span><span class="n">expression</span><span class="p">,</span>
+</span><span id="SparkSession-79"><a href="#SparkSession-79"><span class="linenos"> 79</span></a> <span class="n">row</span> <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">row</span><span class="p">,</span> <span class="nb">dict</span><span class="p">)</span> <span class="k">else</span> <span class="n">row</span><span class="o">.</span><span class="n">values</span><span class="p">(),</span>
+</span><span id="SparkSession-80"><a href="#SparkSession-80"><span class="linenos"> 80</span></a> <span class="p">)</span>
+</span><span id="SparkSession-81"><a href="#SparkSession-81"><span class="linenos"> 81</span></a> <span class="p">)</span>
+</span><span id="SparkSession-82"><a href="#SparkSession-82"><span class="linenos"> 82</span></a> <span class="k">for</span> <span class="n">row</span> <span class="ow">in</span> <span class="n">data</span>
+</span><span id="SparkSession-83"><a href="#SparkSession-83"><span class="linenos"> 83</span></a> <span class="p">]</span>
+</span><span id="SparkSession-84"><a href="#SparkSession-84"><span class="linenos"> 84</span></a>
+</span><span id="SparkSession-85"><a href="#SparkSession-85"><span class="linenos"> 85</span></a> <span class="n">sel_columns</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="SparkSession-86"><a href="#SparkSession-86"><span class="linenos"> 86</span></a> <span class="p">(</span>
+</span><span id="SparkSession-87"><a href="#SparkSession-87"><span class="linenos"> 87</span></a> <span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="n">name</span><span class="p">)</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="n">data_type</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">name</span><span class="p">)</span><span class="o">.</span><span class="n">expression</span>
+</span><span id="SparkSession-88"><a href="#SparkSession-88"><span class="linenos"> 88</span></a> <span class="k">if</span> <span class="n">data_type</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
+</span><span id="SparkSession-89"><a href="#SparkSession-89"><span class="linenos"> 89</span></a> <span class="k">else</span> <span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="n">name</span><span class="p">)</span><span class="o">.</span><span class="n">expression</span>
+</span><span id="SparkSession-90"><a href="#SparkSession-90"><span class="linenos"> 90</span></a> <span class="p">)</span>
+</span><span id="SparkSession-91"><a href="#SparkSession-91"><span class="linenos"> 91</span></a> <span class="k">for</span> <span class="n">name</span><span class="p">,</span> <span class="n">data_type</span> <span class="ow">in</span> <span class="n">column_mapping</span><span class="o">.</span><span class="n">items</span><span class="p">()</span>
+</span><span id="SparkSession-92"><a href="#SparkSession-92"><span class="linenos"> 92</span></a> <span class="p">]</span>
+</span><span id="SparkSession-93"><a href="#SparkSession-93"><span class="linenos"> 93</span></a>
+</span><span id="SparkSession-94"><a href="#SparkSession-94"><span class="linenos"> 94</span></a> <span class="n">select_kwargs</span> <span class="o">=</span> <span class="p">{</span>
+</span><span id="SparkSession-95"><a href="#SparkSession-95"><span class="linenos"> 95</span></a> <span class="s2">&quot;expressions&quot;</span><span class="p">:</span> <span class="n">sel_columns</span><span class="p">,</span>
+</span><span id="SparkSession-96"><a href="#SparkSession-96"><span class="linenos"> 96</span></a> <span class="s2">&quot;from&quot;</span><span class="p">:</span> <span class="n">exp</span><span class="o">.</span><span class="n">From</span><span class="p">(</span>
+</span><span id="SparkSession-97"><a href="#SparkSession-97"><span class="linenos"> 97</span></a> <span class="n">this</span><span class="o">=</span><span class="n">exp</span><span class="o">.</span><span class="n">Values</span><span class="p">(</span>
+</span><span id="SparkSession-98"><a href="#SparkSession-98"><span class="linenos"> 98</span></a> <span class="n">expressions</span><span class="o">=</span><span class="n">data_expressions</span><span class="p">,</span>
+</span><span id="SparkSession-99"><a href="#SparkSession-99"><span class="linenos"> 99</span></a> <span class="n">alias</span><span class="o">=</span><span class="n">exp</span><span class="o">.</span><span class="n">TableAlias</span><span class="p">(</span>
+</span><span id="SparkSession-100"><a href="#SparkSession-100"><span class="linenos">100</span></a> <span class="n">this</span><span class="o">=</span><span class="n">exp</span><span class="o">.</span><span class="n">to_identifier</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_auto_incrementing_name</span><span class="p">),</span>
+</span><span id="SparkSession-101"><a href="#SparkSession-101"><span class="linenos">101</span></a> <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="n">exp</span><span class="o">.</span><span class="n">to_identifier</span><span class="p">(</span><span class="n">col_name</span><span class="p">)</span> <span class="k">for</span> <span class="n">col_name</span> <span class="ow">in</span> <span class="n">column_mapping</span><span class="p">],</span>
+</span><span id="SparkSession-102"><a href="#SparkSession-102"><span class="linenos">102</span></a> <span class="p">),</span>
+</span><span id="SparkSession-103"><a href="#SparkSession-103"><span class="linenos">103</span></a> <span class="p">),</span>
+</span><span id="SparkSession-104"><a href="#SparkSession-104"><span class="linenos">104</span></a> <span class="p">),</span>
+</span><span id="SparkSession-105"><a href="#SparkSession-105"><span class="linenos">105</span></a> <span class="p">}</span>
+</span><span id="SparkSession-106"><a href="#SparkSession-106"><span class="linenos">106</span></a>
+</span><span id="SparkSession-107"><a href="#SparkSession-107"><span class="linenos">107</span></a> <span class="n">sel_expression</span> <span class="o">=</span> <span class="n">exp</span><span class="o">.</span><span class="n">Select</span><span class="p">(</span><span class="o">**</span><span class="n">select_kwargs</span><span class="p">)</span>
+</span><span id="SparkSession-108"><a href="#SparkSession-108"><span class="linenos">108</span></a> <span class="k">return</span> <span class="n">DataFrame</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sel_expression</span><span class="p">)</span>
+</span><span id="SparkSession-109"><a href="#SparkSession-109"><span class="linenos">109</span></a>
+</span><span id="SparkSession-110"><a href="#SparkSession-110"><span class="linenos">110</span></a> <span class="k">def</span> <span class="nf">_optimize</span><span class="p">(</span>
+</span><span id="SparkSession-111"><a href="#SparkSession-111"><span class="linenos">111</span></a> <span class="bp">self</span><span class="p">,</span> <span class="n">expression</span><span class="p">:</span> <span class="n">exp</span><span class="o">.</span><span class="n">Expression</span><span class="p">,</span> <span class="n">dialect</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">Dialect</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
+</span><span id="SparkSession-112"><a href="#SparkSession-112"><span class="linenos">112</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">exp</span><span class="o">.</span><span class="n">Expression</span><span class="p">:</span>
+</span><span id="SparkSession-113"><a href="#SparkSession-113"><span class="linenos">113</span></a> <span class="n">dialect</span> <span class="o">=</span> <span class="n">dialect</span> <span class="ow">or</span> <span class="bp">self</span><span class="o">.</span><span class="n">dialect</span>
+</span><span id="SparkSession-114"><a href="#SparkSession-114"><span class="linenos">114</span></a> <span class="n">quote_identifiers</span><span class="p">(</span><span class="n">expression</span><span class="p">,</span> <span class="n">dialect</span><span class="o">=</span><span class="n">dialect</span><span class="p">)</span>
+</span><span id="SparkSession-115"><a href="#SparkSession-115"><span class="linenos">115</span></a> <span class="k">return</span> <span class="n">optimize</span><span class="p">(</span><span class="n">expression</span><span class="p">,</span> <span class="n">dialect</span><span class="o">=</span><span class="n">dialect</span><span class="p">)</span>
+</span><span id="SparkSession-116"><a href="#SparkSession-116"><span class="linenos">116</span></a>
+</span><span id="SparkSession-117"><a href="#SparkSession-117"><span class="linenos">117</span></a> <span class="k">def</span> <span class="nf">sql</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sqlQuery</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="SparkSession-118"><a href="#SparkSession-118"><span class="linenos">118</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_optimize</span><span class="p">(</span><span class="n">sqlglot</span><span class="o">.</span><span class="n">parse_one</span><span class="p">(</span><span class="n">sqlQuery</span><span class="p">,</span> <span class="n">read</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">dialect</span><span class="p">))</span>
+</span><span id="SparkSession-119"><a href="#SparkSession-119"><span class="linenos">119</span></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">expression</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Select</span><span class="p">):</span>
+</span><span id="SparkSession-120"><a href="#SparkSession-120"><span class="linenos">120</span></a> <span class="n">df</span> <span class="o">=</span> <span class="n">DataFrame</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">expression</span><span class="p">)</span>
+</span><span id="SparkSession-121"><a href="#SparkSession-121"><span class="linenos">121</span></a> <span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">_convert_leaf_to_cte</span><span class="p">()</span>
+</span><span id="SparkSession-122"><a href="#SparkSession-122"><span class="linenos">122</span></a> <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">expression</span><span class="p">,</span> <span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Create</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Insert</span><span class="p">)):</span>
+</span><span id="SparkSession-123"><a href="#SparkSession-123"><span class="linenos">123</span></a> <span class="n">select_expression</span> <span class="o">=</span> <span class="n">expression</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
+</span><span id="SparkSession-124"><a href="#SparkSession-124"><span class="linenos">124</span></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">expression</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Insert</span><span class="p">):</span>
+</span><span id="SparkSession-125"><a href="#SparkSession-125"><span class="linenos">125</span></a> <span class="n">select_expression</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">&quot;with&quot;</span><span class="p">,</span> <span class="n">expression</span><span class="o">.</span><span class="n">args</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;with&quot;</span><span class="p">))</span>
+</span><span id="SparkSession-126"><a href="#SparkSession-126"><span class="linenos">126</span></a> <span class="n">expression</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">&quot;with&quot;</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
+</span><span id="SparkSession-127"><a href="#SparkSession-127"><span class="linenos">127</span></a> <span class="k">del</span> <span class="n">expression</span><span class="o">.</span><span class="n">args</span><span class="p">[</span><span class="s2">&quot;expression&quot;</span><span class="p">]</span>
+</span><span id="SparkSession-128"><a href="#SparkSession-128"><span class="linenos">128</span></a> <span class="n">df</span> <span class="o">=</span> <span class="n">DataFrame</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">select_expression</span><span class="p">,</span> <span class="n">output_expression_container</span><span class="o">=</span><span class="n">expression</span><span class="p">)</span> <span class="c1"># type: ignore</span>
+</span><span id="SparkSession-129"><a href="#SparkSession-129"><span class="linenos">129</span></a> <span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">_convert_leaf_to_cte</span><span class="p">()</span>
+</span><span id="SparkSession-130"><a href="#SparkSession-130"><span class="linenos">130</span></a> <span class="k">else</span><span class="p">:</span>
+</span><span id="SparkSession-131"><a href="#SparkSession-131"><span class="linenos">131</span></a> <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
+</span><span id="SparkSession-132"><a href="#SparkSession-132"><span class="linenos">132</span></a> <span class="s2">&quot;Unknown expression type provided in the SQL. Please create an issue with the SQL.&quot;</span>
+</span><span id="SparkSession-133"><a href="#SparkSession-133"><span class="linenos">133</span></a> <span class="p">)</span>
+</span><span id="SparkSession-134"><a href="#SparkSession-134"><span class="linenos">134</span></a> <span class="k">return</span> <span class="n">df</span>
+</span><span id="SparkSession-135"><a href="#SparkSession-135"><span class="linenos">135</span></a>
+</span><span id="SparkSession-136"><a href="#SparkSession-136"><span class="linenos">136</span></a> <span class="nd">@property</span>
+</span><span id="SparkSession-137"><a href="#SparkSession-137"><span class="linenos">137</span></a> <span class="k">def</span> <span class="nf">_auto_incrementing_name</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">str</span><span class="p">:</span>
+</span><span id="SparkSession-138"><a href="#SparkSession-138"><span class="linenos">138</span></a> <span class="n">name</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">&quot;a</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">incrementing_id</span><span class="si">}</span><span class="s2">&quot;</span>
+</span><span id="SparkSession-139"><a href="#SparkSession-139"><span class="linenos">139</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">incrementing_id</span> <span class="o">+=</span> <span class="mi">1</span>
+</span><span id="SparkSession-140"><a href="#SparkSession-140"><span class="linenos">140</span></a> <span class="k">return</span> <span class="n">name</span>
+</span><span id="SparkSession-141"><a href="#SparkSession-141"><span class="linenos">141</span></a>
+</span><span id="SparkSession-142"><a href="#SparkSession-142"><span class="linenos">142</span></a> <span class="nd">@property</span>
+</span><span id="SparkSession-143"><a href="#SparkSession-143"><span class="linenos">143</span></a> <span class="k">def</span> <span class="nf">_random_branch_id</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">str</span><span class="p">:</span>
+</span><span id="SparkSession-144"><a href="#SparkSession-144"><span class="linenos">144</span></a> <span class="nb">id</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_random_id</span>
+</span><span id="SparkSession-145"><a href="#SparkSession-145"><span class="linenos">145</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">known_branch_ids</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="nb">id</span><span class="p">)</span>
+</span><span id="SparkSession-146"><a href="#SparkSession-146"><span class="linenos">146</span></a> <span class="k">return</span> <span class="nb">id</span>
+</span><span id="SparkSession-147"><a href="#SparkSession-147"><span class="linenos">147</span></a>
+</span><span id="SparkSession-148"><a href="#SparkSession-148"><span class="linenos">148</span></a> <span class="nd">@property</span>
+</span><span id="SparkSession-149"><a href="#SparkSession-149"><span class="linenos">149</span></a> <span class="k">def</span> <span class="nf">_random_sequence_id</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+</span><span id="SparkSession-150"><a href="#SparkSession-150"><span class="linenos">150</span></a> <span class="nb">id</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_random_id</span>
+</span><span id="SparkSession-151"><a href="#SparkSession-151"><span class="linenos">151</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">known_sequence_ids</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="nb">id</span><span class="p">)</span>
+</span><span id="SparkSession-152"><a href="#SparkSession-152"><span class="linenos">152</span></a> <span class="k">return</span> <span class="nb">id</span>
+</span><span id="SparkSession-153"><a href="#SparkSession-153"><span class="linenos">153</span></a>
+</span><span id="SparkSession-154"><a href="#SparkSession-154"><span class="linenos">154</span></a> <span class="nd">@property</span>
+</span><span id="SparkSession-155"><a href="#SparkSession-155"><span class="linenos">155</span></a> <span class="k">def</span> <span class="nf">_random_id</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">str</span><span class="p">:</span>
+</span><span id="SparkSession-156"><a href="#SparkSession-156"><span class="linenos">156</span></a> <span class="nb">id</span> <span class="o">=</span> <span class="s2">&quot;r&quot;</span> <span class="o">+</span> <span class="n">uuid</span><span class="o">.</span><span class="n">uuid4</span><span class="p">()</span><span class="o">.</span><span class="n">hex</span>
+</span><span id="SparkSession-157"><a href="#SparkSession-157"><span class="linenos">157</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">known_ids</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="nb">id</span><span class="p">)</span>
+</span><span id="SparkSession-158"><a href="#SparkSession-158"><span class="linenos">158</span></a> <span class="k">return</span> <span class="nb">id</span>
+</span><span id="SparkSession-159"><a href="#SparkSession-159"><span class="linenos">159</span></a>
+</span><span id="SparkSession-160"><a href="#SparkSession-160"><span class="linenos">160</span></a> <span class="nd">@property</span>
+</span><span id="SparkSession-161"><a href="#SparkSession-161"><span class="linenos">161</span></a> <span class="k">def</span> <span class="nf">_join_hint_names</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">t</span><span class="o">.</span><span class="n">Set</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span>
+</span><span id="SparkSession-162"><a href="#SparkSession-162"><span class="linenos">162</span></a> <span class="k">return</span> <span class="p">{</span><span class="s2">&quot;BROADCAST&quot;</span><span class="p">,</span> <span class="s2">&quot;MERGE&quot;</span><span class="p">,</span> <span class="s2">&quot;SHUFFLE_HASH&quot;</span><span class="p">,</span> <span class="s2">&quot;SHUFFLE_REPLICATE_NL&quot;</span><span class="p">}</span>
</span><span id="SparkSession-163"><a href="#SparkSession-163"><span class="linenos">163</span></a>
-</span><span id="SparkSession-164"><a href="#SparkSession-164"><span class="linenos">164</span></a> <span class="k">def</span> <span class="fm">__getattr__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">item</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">SparkSession</span><span class="o">.</span><span class="n">Builder</span><span class="p">:</span>
-</span><span id="SparkSession-165"><a href="#SparkSession-165"><span class="linenos">165</span></a> <span class="k">return</span> <span class="bp">self</span>
+</span><span id="SparkSession-164"><a href="#SparkSession-164"><span class="linenos">164</span></a> <span class="k">def</span> <span class="nf">_add_alias_to_mapping</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">sequence_id</span><span class="p">:</span> <span class="nb">str</span><span class="p">):</span>
+</span><span id="SparkSession-165"><a href="#SparkSession-165"><span class="linenos">165</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">name_to_sequence_id_mapping</span><span class="p">[</span><span class="n">name</span><span class="p">]</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">sequence_id</span><span class="p">)</span>
</span><span id="SparkSession-166"><a href="#SparkSession-166"><span class="linenos">166</span></a>
-</span><span id="SparkSession-167"><a href="#SparkSession-167"><span class="linenos">167</span></a> <span class="k">def</span> <span class="fm">__call__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
-</span><span id="SparkSession-168"><a href="#SparkSession-168"><span class="linenos">168</span></a> <span class="k">return</span> <span class="bp">self</span>
+</span><span id="SparkSession-167"><a href="#SparkSession-167"><span class="linenos">167</span></a> <span class="k">class</span> <span class="nc">Builder</span><span class="p">:</span>
+</span><span id="SparkSession-168"><a href="#SparkSession-168"><span class="linenos">168</span></a> <span class="n">SQLFRAME_DIALECT_KEY</span> <span class="o">=</span> <span class="s2">&quot;sqlframe.dialect&quot;</span>
</span><span id="SparkSession-169"><a href="#SparkSession-169"><span class="linenos">169</span></a>
-</span><span id="SparkSession-170"><a href="#SparkSession-170"><span class="linenos">170</span></a> <span class="k">def</span> <span class="nf">config</span><span class="p">(</span>
-</span><span id="SparkSession-171"><a href="#SparkSession-171"><span class="linenos">171</span></a> <span class="bp">self</span><span class="p">,</span>
-</span><span id="SparkSession-172"><a href="#SparkSession-172"><span class="linenos">172</span></a> <span class="n">key</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="SparkSession-173"><a href="#SparkSession-173"><span class="linenos">173</span></a> <span class="n">value</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="SparkSession-174"><a href="#SparkSession-174"><span class="linenos">174</span></a> <span class="o">*</span><span class="p">,</span>
-</span><span id="SparkSession-175"><a href="#SparkSession-175"><span class="linenos">175</span></a> <span class="nb">map</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="SparkSession-176"><a href="#SparkSession-176"><span class="linenos">176</span></a> <span class="o">**</span><span class="n">kwargs</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">,</span>
-</span><span id="SparkSession-177"><a href="#SparkSession-177"><span class="linenos">177</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">SparkSession</span><span class="o">.</span><span class="n">Builder</span><span class="p">:</span>
-</span><span id="SparkSession-178"><a href="#SparkSession-178"><span class="linenos">178</span></a> <span class="k">if</span> <span class="n">key</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">SQLFRAME_DIALECT_KEY</span><span class="p">:</span>
-</span><span id="SparkSession-179"><a href="#SparkSession-179"><span class="linenos">179</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">dialect</span> <span class="o">=</span> <span class="n">value</span>
-</span><span id="SparkSession-180"><a href="#SparkSession-180"><span class="linenos">180</span></a> <span class="k">elif</span> <span class="nb">map</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">SQLFRAME_DIALECT_KEY</span> <span class="ow">in</span> <span class="nb">map</span><span class="p">:</span>
-</span><span id="SparkSession-181"><a href="#SparkSession-181"><span class="linenos">181</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">dialect</span> <span class="o">=</span> <span class="nb">map</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">SQLFRAME_DIALECT_KEY</span><span class="p">]</span>
-</span><span id="SparkSession-182"><a href="#SparkSession-182"><span class="linenos">182</span></a> <span class="k">return</span> <span class="bp">self</span>
-</span><span id="SparkSession-183"><a href="#SparkSession-183"><span class="linenos">183</span></a>
-</span><span id="SparkSession-184"><a href="#SparkSession-184"><span class="linenos">184</span></a> <span class="k">def</span> <span class="nf">getOrCreate</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">SparkSession</span><span class="p">:</span>
-</span><span id="SparkSession-185"><a href="#SparkSession-185"><span class="linenos">185</span></a> <span class="n">spark</span> <span class="o">=</span> <span class="n">SparkSession</span><span class="p">()</span>
-</span><span id="SparkSession-186"><a href="#SparkSession-186"><span class="linenos">186</span></a> <span class="n">spark</span><span class="o">.</span><span class="n">dialect</span> <span class="o">=</span> <span class="n">Dialect</span><span class="o">.</span><span class="n">get_or_raise</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">dialect</span><span class="p">)</span>
-</span><span id="SparkSession-187"><a href="#SparkSession-187"><span class="linenos">187</span></a> <span class="k">return</span> <span class="n">spark</span>
-</span><span id="SparkSession-188"><a href="#SparkSession-188"><span class="linenos">188</span></a>
-</span><span id="SparkSession-189"><a href="#SparkSession-189"><span class="linenos">189</span></a> <span class="nd">@classproperty</span>
-</span><span id="SparkSession-190"><a href="#SparkSession-190"><span class="linenos">190</span></a> <span class="k">def</span> <span class="nf">builder</span><span class="p">(</span><span class="bp">cls</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Builder</span><span class="p">:</span>
-</span><span id="SparkSession-191"><a href="#SparkSession-191"><span class="linenos">191</span></a> <span class="k">return</span> <span class="bp">cls</span><span class="o">.</span><span class="n">Builder</span><span class="p">()</span>
+</span><span id="SparkSession-170"><a href="#SparkSession-170"><span class="linenos">170</span></a> <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+</span><span id="SparkSession-171"><a href="#SparkSession-171"><span class="linenos">171</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">dialect</span> <span class="o">=</span> <span class="s2">&quot;spark&quot;</span>
+</span><span id="SparkSession-172"><a href="#SparkSession-172"><span class="linenos">172</span></a>
+</span><span id="SparkSession-173"><a href="#SparkSession-173"><span class="linenos">173</span></a> <span class="k">def</span> <span class="fm">__getattr__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">item</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">SparkSession</span><span class="o">.</span><span class="n">Builder</span><span class="p">:</span>
+</span><span id="SparkSession-174"><a href="#SparkSession-174"><span class="linenos">174</span></a> <span class="k">return</span> <span class="bp">self</span>
+</span><span id="SparkSession-175"><a href="#SparkSession-175"><span class="linenos">175</span></a>
+</span><span id="SparkSession-176"><a href="#SparkSession-176"><span class="linenos">176</span></a> <span class="k">def</span> <span class="fm">__call__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
+</span><span id="SparkSession-177"><a href="#SparkSession-177"><span class="linenos">177</span></a> <span class="k">return</span> <span class="bp">self</span>
+</span><span id="SparkSession-178"><a href="#SparkSession-178"><span class="linenos">178</span></a>
+</span><span id="SparkSession-179"><a href="#SparkSession-179"><span class="linenos">179</span></a> <span class="k">def</span> <span class="nf">config</span><span class="p">(</span>
+</span><span id="SparkSession-180"><a href="#SparkSession-180"><span class="linenos">180</span></a> <span class="bp">self</span><span class="p">,</span>
+</span><span id="SparkSession-181"><a href="#SparkSession-181"><span class="linenos">181</span></a> <span class="n">key</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="SparkSession-182"><a href="#SparkSession-182"><span class="linenos">182</span></a> <span class="n">value</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="SparkSession-183"><a href="#SparkSession-183"><span class="linenos">183</span></a> <span class="o">*</span><span class="p">,</span>
+</span><span id="SparkSession-184"><a href="#SparkSession-184"><span class="linenos">184</span></a> <span class="nb">map</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="SparkSession-185"><a href="#SparkSession-185"><span class="linenos">185</span></a> <span class="o">**</span><span class="n">kwargs</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">,</span>
+</span><span id="SparkSession-186"><a href="#SparkSession-186"><span class="linenos">186</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">SparkSession</span><span class="o">.</span><span class="n">Builder</span><span class="p">:</span>
+</span><span id="SparkSession-187"><a href="#SparkSession-187"><span class="linenos">187</span></a> <span class="k">if</span> <span class="n">key</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">SQLFRAME_DIALECT_KEY</span><span class="p">:</span>
+</span><span id="SparkSession-188"><a href="#SparkSession-188"><span class="linenos">188</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">dialect</span> <span class="o">=</span> <span class="n">value</span>
+</span><span id="SparkSession-189"><a href="#SparkSession-189"><span class="linenos">189</span></a> <span class="k">elif</span> <span class="nb">map</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">SQLFRAME_DIALECT_KEY</span> <span class="ow">in</span> <span class="nb">map</span><span class="p">:</span>
+</span><span id="SparkSession-190"><a href="#SparkSession-190"><span class="linenos">190</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">dialect</span> <span class="o">=</span> <span class="nb">map</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">SQLFRAME_DIALECT_KEY</span><span class="p">]</span>
+</span><span id="SparkSession-191"><a href="#SparkSession-191"><span class="linenos">191</span></a> <span class="k">return</span> <span class="bp">self</span>
+</span><span id="SparkSession-192"><a href="#SparkSession-192"><span class="linenos">192</span></a>
+</span><span id="SparkSession-193"><a href="#SparkSession-193"><span class="linenos">193</span></a> <span class="k">def</span> <span class="nf">getOrCreate</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">SparkSession</span><span class="p">:</span>
+</span><span id="SparkSession-194"><a href="#SparkSession-194"><span class="linenos">194</span></a> <span class="n">spark</span> <span class="o">=</span> <span class="n">SparkSession</span><span class="p">()</span>
+</span><span id="SparkSession-195"><a href="#SparkSession-195"><span class="linenos">195</span></a> <span class="n">spark</span><span class="o">.</span><span class="n">dialect</span> <span class="o">=</span> <span class="n">Dialect</span><span class="o">.</span><span class="n">get_or_raise</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">dialect</span><span class="p">)</span>
+</span><span id="SparkSession-196"><a href="#SparkSession-196"><span class="linenos">196</span></a> <span class="k">return</span> <span class="n">spark</span>
+</span><span id="SparkSession-197"><a href="#SparkSession-197"><span class="linenos">197</span></a>
+</span><span id="SparkSession-198"><a href="#SparkSession-198"><span class="linenos">198</span></a> <span class="nd">@classproperty</span>
+</span><span id="SparkSession-199"><a href="#SparkSession-199"><span class="linenos">199</span></a> <span class="k">def</span> <span class="nf">builder</span><span class="p">(</span><span class="bp">cls</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Builder</span><span class="p">:</span>
+</span><span id="SparkSession-200"><a href="#SparkSession-200"><span class="linenos">200</span></a> <span class="k">return</span> <span class="bp">cls</span><span class="o">.</span><span class="n">Builder</span><span class="p">()</span>
</span></pre></div>
@@ -751,9 +758,9 @@
</div>
<a class="headerlink" href="#SparkSession.read"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="SparkSession.read-39"><a href="#SparkSession.read-39"><span class="linenos">39</span></a> <span class="nd">@property</span>
-</span><span id="SparkSession.read-40"><a href="#SparkSession.read-40"><span class="linenos">40</span></a> <span class="k">def</span> <span class="nf">read</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrameReader</span><span class="p">:</span>
-</span><span id="SparkSession.read-41"><a href="#SparkSession.read-41"><span class="linenos">41</span></a> <span class="k">return</span> <span class="n">DataFrameReader</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="SparkSession.read-41"><a href="#SparkSession.read-41"><span class="linenos">41</span></a> <span class="nd">@property</span>
+</span><span id="SparkSession.read-42"><a href="#SparkSession.read-42"><span class="linenos">42</span></a> <span class="k">def</span> <span class="nf">read</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrameReader</span><span class="p">:</span>
+</span><span id="SparkSession.read-43"><a href="#SparkSession.read-43"><span class="linenos">43</span></a> <span class="k">return</span> <span class="n">DataFrameReader</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
</span></pre></div>
@@ -771,8 +778,8 @@
</div>
<a class="headerlink" href="#SparkSession.table"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="SparkSession.table-43"><a href="#SparkSession.table-43"><span class="linenos">43</span></a> <span class="k">def</span> <span class="nf">table</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tableName</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="SparkSession.table-44"><a href="#SparkSession.table-44"><span class="linenos">44</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">read</span><span class="o">.</span><span class="n">table</span><span class="p">(</span><span class="n">tableName</span><span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="SparkSession.table-45"><a href="#SparkSession.table-45"><span class="linenos">45</span></a> <span class="k">def</span> <span class="nf">table</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tableName</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="SparkSession.table-46"><a href="#SparkSession.table-46"><span class="linenos">46</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">read</span><span class="o">.</span><span class="n">table</span><span class="p">(</span><span class="n">tableName</span><span class="p">)</span>
</span></pre></div>
@@ -784,73 +791,73 @@
<div class="attr function">
<span class="def">def</span>
- <span class="name">createDataFrame</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">self</span>,</span><span class="param"> <span class="n">data</span><span class="p">:</span> <span class="n">Sequence</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139791552193600&#39;</span><span class="o">&gt;</span><span class="p">],</span> <span class="n">List</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139791552193600&#39;</span><span class="o">&gt;</span><span class="p">],</span> <span class="n">Tuple</span><span class="p">]]</span>,</span><span class="param"> <span class="n">schema</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139791552268224&#39;</span><span class="o">&gt;</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>,</span><span class="param"> <span class="n">samplingRatio</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>,</span><span class="param"> <span class="n">verifySchema</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span></span><span class="return-annotation">) -> <span class="n"><a href="#DataFrame">DataFrame</a></span>:</span></span>
+ <span class="name">createDataFrame</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">self</span>,</span><span class="param"> <span class="n">data</span><span class="p">:</span> <span class="n">Sequence</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139844518578496&#39;</span><span class="o">&gt;</span><span class="p">],</span> <span class="n">List</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139844518578496&#39;</span><span class="o">&gt;</span><span class="p">],</span> <span class="n">Tuple</span><span class="p">]]</span>,</span><span class="param"> <span class="n">schema</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139844518514208&#39;</span><span class="o">&gt;</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>,</span><span class="param"> <span class="n">samplingRatio</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>,</span><span class="param"> <span class="n">verifySchema</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span></span><span class="return-annotation">) -> <span class="n"><a href="#DataFrame">DataFrame</a></span>:</span></span>
<label class="view-source-button" for="SparkSession.createDataFrame-view-source"><span>View Source</span></label>
</div>
<a class="headerlink" href="#SparkSession.createDataFrame"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="SparkSession.createDataFrame-46"><a href="#SparkSession.createDataFrame-46"><span class="linenos"> 46</span></a> <span class="k">def</span> <span class="nf">createDataFrame</span><span class="p">(</span>
-</span><span id="SparkSession.createDataFrame-47"><a href="#SparkSession.createDataFrame-47"><span class="linenos"> 47</span></a> <span class="bp">self</span><span class="p">,</span>
-</span><span id="SparkSession.createDataFrame-48"><a href="#SparkSession.createDataFrame-48"><span class="linenos"> 48</span></a> <span class="n">data</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Sequence</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">ColumnLiterals</span><span class="p">],</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="n">ColumnLiterals</span><span class="p">],</span> <span class="n">t</span><span class="o">.</span><span class="n">Tuple</span><span class="p">]],</span>
-</span><span id="SparkSession.createDataFrame-49"><a href="#SparkSession.createDataFrame-49"><span class="linenos"> 49</span></a> <span class="n">schema</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">SchemaInput</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="SparkSession.createDataFrame-50"><a href="#SparkSession.createDataFrame-50"><span class="linenos"> 50</span></a> <span class="n">samplingRatio</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="SparkSession.createDataFrame-51"><a href="#SparkSession.createDataFrame-51"><span class="linenos"> 51</span></a> <span class="n">verifySchema</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
-</span><span id="SparkSession.createDataFrame-52"><a href="#SparkSession.createDataFrame-52"><span class="linenos"> 52</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="SparkSession.createDataFrame-53"><a href="#SparkSession.createDataFrame-53"><span class="linenos"> 53</span></a> <span class="kn">from</span> <span class="nn">sqlglot.dataframe.sql.dataframe</span> <span class="kn">import</span> <span class="n">DataFrame</span>
-</span><span id="SparkSession.createDataFrame-54"><a href="#SparkSession.createDataFrame-54"><span class="linenos"> 54</span></a>
-</span><span id="SparkSession.createDataFrame-55"><a href="#SparkSession.createDataFrame-55"><span class="linenos"> 55</span></a> <span class="k">if</span> <span class="n">samplingRatio</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">verifySchema</span><span class="p">:</span>
-</span><span id="SparkSession.createDataFrame-56"><a href="#SparkSession.createDataFrame-56"><span class="linenos"> 56</span></a> <span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s2">&quot;Sampling Ratio and Verify Schema are not supported&quot;</span><span class="p">)</span>
-</span><span id="SparkSession.createDataFrame-57"><a href="#SparkSession.createDataFrame-57"><span class="linenos"> 57</span></a> <span class="k">if</span> <span class="n">schema</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="p">(</span>
-</span><span id="SparkSession.createDataFrame-58"><a href="#SparkSession.createDataFrame-58"><span class="linenos"> 58</span></a> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">schema</span><span class="p">,</span> <span class="p">(</span><span class="n">StructType</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="nb">list</span><span class="p">))</span>
-</span><span id="SparkSession.createDataFrame-59"><a href="#SparkSession.createDataFrame-59"><span class="linenos"> 59</span></a> <span class="ow">or</span> <span class="p">(</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">schema</span><span class="p">,</span> <span class="nb">list</span><span class="p">)</span> <span class="ow">and</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">schema</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="nb">str</span><span class="p">))</span>
-</span><span id="SparkSession.createDataFrame-60"><a href="#SparkSession.createDataFrame-60"><span class="linenos"> 60</span></a> <span class="p">):</span>
-</span><span id="SparkSession.createDataFrame-61"><a href="#SparkSession.createDataFrame-61"><span class="linenos"> 61</span></a> <span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s2">&quot;Only schema of either list or string of list supported&quot;</span><span class="p">)</span>
-</span><span id="SparkSession.createDataFrame-62"><a href="#SparkSession.createDataFrame-62"><span class="linenos"> 62</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">data</span><span class="p">:</span>
-</span><span id="SparkSession.createDataFrame-63"><a href="#SparkSession.createDataFrame-63"><span class="linenos"> 63</span></a> <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;Must provide data to create into a DataFrame&quot;</span><span class="p">)</span>
-</span><span id="SparkSession.createDataFrame-64"><a href="#SparkSession.createDataFrame-64"><span class="linenos"> 64</span></a>
-</span><span id="SparkSession.createDataFrame-65"><a href="#SparkSession.createDataFrame-65"><span class="linenos"> 65</span></a> <span class="n">column_mapping</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span>
-</span><span id="SparkSession.createDataFrame-66"><a href="#SparkSession.createDataFrame-66"><span class="linenos"> 66</span></a> <span class="k">if</span> <span class="n">schema</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
-</span><span id="SparkSession.createDataFrame-67"><a href="#SparkSession.createDataFrame-67"><span class="linenos"> 67</span></a> <span class="n">column_mapping</span> <span class="o">=</span> <span class="n">get_column_mapping_from_schema_input</span><span class="p">(</span><span class="n">schema</span><span class="p">)</span>
-</span><span id="SparkSession.createDataFrame-68"><a href="#SparkSession.createDataFrame-68"><span class="linenos"> 68</span></a> <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">data</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="nb">dict</span><span class="p">):</span>
-</span><span id="SparkSession.createDataFrame-69"><a href="#SparkSession.createDataFrame-69"><span class="linenos"> 69</span></a> <span class="n">column_mapping</span> <span class="o">=</span> <span class="p">{</span><span class="n">col_name</span><span class="o">.</span><span class="n">strip</span><span class="p">():</span> <span class="kc">None</span> <span class="k">for</span> <span class="n">col_name</span> <span class="ow">in</span> <span class="n">data</span><span class="p">[</span><span class="mi">0</span><span class="p">]}</span>
-</span><span id="SparkSession.createDataFrame-70"><a href="#SparkSession.createDataFrame-70"><span class="linenos"> 70</span></a> <span class="k">else</span><span class="p">:</span>
-</span><span id="SparkSession.createDataFrame-71"><a href="#SparkSession.createDataFrame-71"><span class="linenos"> 71</span></a> <span class="n">column_mapping</span> <span class="o">=</span> <span class="p">{</span><span class="sa">f</span><span class="s2">&quot;_</span><span class="si">{</span><span class="n">i</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">:</span> <span class="kc">None</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">data</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)}</span>
-</span><span id="SparkSession.createDataFrame-72"><a href="#SparkSession.createDataFrame-72"><span class="linenos"> 72</span></a>
-</span><span id="SparkSession.createDataFrame-73"><a href="#SparkSession.createDataFrame-73"><span class="linenos"> 73</span></a> <span class="n">data_expressions</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="SparkSession.createDataFrame-74"><a href="#SparkSession.createDataFrame-74"><span class="linenos"> 74</span></a> <span class="n">exp</span><span class="o">.</span><span class="n">tuple_</span><span class="p">(</span>
-</span><span id="SparkSession.createDataFrame-75"><a href="#SparkSession.createDataFrame-75"><span class="linenos"> 75</span></a> <span class="o">*</span><span class="nb">map</span><span class="p">(</span>
-</span><span id="SparkSession.createDataFrame-76"><a href="#SparkSession.createDataFrame-76"><span class="linenos"> 76</span></a> <span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="n">x</span><span class="p">)</span><span class="o">.</span><span class="n">expression</span><span class="p">,</span>
-</span><span id="SparkSession.createDataFrame-77"><a href="#SparkSession.createDataFrame-77"><span class="linenos"> 77</span></a> <span class="n">row</span> <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">row</span><span class="p">,</span> <span class="nb">dict</span><span class="p">)</span> <span class="k">else</span> <span class="n">row</span><span class="o">.</span><span class="n">values</span><span class="p">(),</span>
-</span><span id="SparkSession.createDataFrame-78"><a href="#SparkSession.createDataFrame-78"><span class="linenos"> 78</span></a> <span class="p">)</span>
-</span><span id="SparkSession.createDataFrame-79"><a href="#SparkSession.createDataFrame-79"><span class="linenos"> 79</span></a> <span class="p">)</span>
-</span><span id="SparkSession.createDataFrame-80"><a href="#SparkSession.createDataFrame-80"><span class="linenos"> 80</span></a> <span class="k">for</span> <span class="n">row</span> <span class="ow">in</span> <span class="n">data</span>
-</span><span id="SparkSession.createDataFrame-81"><a href="#SparkSession.createDataFrame-81"><span class="linenos"> 81</span></a> <span class="p">]</span>
-</span><span id="SparkSession.createDataFrame-82"><a href="#SparkSession.createDataFrame-82"><span class="linenos"> 82</span></a>
-</span><span id="SparkSession.createDataFrame-83"><a href="#SparkSession.createDataFrame-83"><span class="linenos"> 83</span></a> <span class="n">sel_columns</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="SparkSession.createDataFrame-84"><a href="#SparkSession.createDataFrame-84"><span class="linenos"> 84</span></a> <span class="p">(</span>
-</span><span id="SparkSession.createDataFrame-85"><a href="#SparkSession.createDataFrame-85"><span class="linenos"> 85</span></a> <span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="n">name</span><span class="p">)</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="n">data_type</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">name</span><span class="p">)</span><span class="o">.</span><span class="n">expression</span>
-</span><span id="SparkSession.createDataFrame-86"><a href="#SparkSession.createDataFrame-86"><span class="linenos"> 86</span></a> <span class="k">if</span> <span class="n">data_type</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
-</span><span id="SparkSession.createDataFrame-87"><a href="#SparkSession.createDataFrame-87"><span class="linenos"> 87</span></a> <span class="k">else</span> <span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="n">name</span><span class="p">)</span><span class="o">.</span><span class="n">expression</span>
-</span><span id="SparkSession.createDataFrame-88"><a href="#SparkSession.createDataFrame-88"><span class="linenos"> 88</span></a> <span class="p">)</span>
-</span><span id="SparkSession.createDataFrame-89"><a href="#SparkSession.createDataFrame-89"><span class="linenos"> 89</span></a> <span class="k">for</span> <span class="n">name</span><span class="p">,</span> <span class="n">data_type</span> <span class="ow">in</span> <span class="n">column_mapping</span><span class="o">.</span><span class="n">items</span><span class="p">()</span>
-</span><span id="SparkSession.createDataFrame-90"><a href="#SparkSession.createDataFrame-90"><span class="linenos"> 90</span></a> <span class="p">]</span>
-</span><span id="SparkSession.createDataFrame-91"><a href="#SparkSession.createDataFrame-91"><span class="linenos"> 91</span></a>
-</span><span id="SparkSession.createDataFrame-92"><a href="#SparkSession.createDataFrame-92"><span class="linenos"> 92</span></a> <span class="n">select_kwargs</span> <span class="o">=</span> <span class="p">{</span>
-</span><span id="SparkSession.createDataFrame-93"><a href="#SparkSession.createDataFrame-93"><span class="linenos"> 93</span></a> <span class="s2">&quot;expressions&quot;</span><span class="p">:</span> <span class="n">sel_columns</span><span class="p">,</span>
-</span><span id="SparkSession.createDataFrame-94"><a href="#SparkSession.createDataFrame-94"><span class="linenos"> 94</span></a> <span class="s2">&quot;from&quot;</span><span class="p">:</span> <span class="n">exp</span><span class="o">.</span><span class="n">From</span><span class="p">(</span>
-</span><span id="SparkSession.createDataFrame-95"><a href="#SparkSession.createDataFrame-95"><span class="linenos"> 95</span></a> <span class="n">this</span><span class="o">=</span><span class="n">exp</span><span class="o">.</span><span class="n">Values</span><span class="p">(</span>
-</span><span id="SparkSession.createDataFrame-96"><a href="#SparkSession.createDataFrame-96"><span class="linenos"> 96</span></a> <span class="n">expressions</span><span class="o">=</span><span class="n">data_expressions</span><span class="p">,</span>
-</span><span id="SparkSession.createDataFrame-97"><a href="#SparkSession.createDataFrame-97"><span class="linenos"> 97</span></a> <span class="n">alias</span><span class="o">=</span><span class="n">exp</span><span class="o">.</span><span class="n">TableAlias</span><span class="p">(</span>
-</span><span id="SparkSession.createDataFrame-98"><a href="#SparkSession.createDataFrame-98"><span class="linenos"> 98</span></a> <span class="n">this</span><span class="o">=</span><span class="n">exp</span><span class="o">.</span><span class="n">to_identifier</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_auto_incrementing_name</span><span class="p">),</span>
-</span><span id="SparkSession.createDataFrame-99"><a href="#SparkSession.createDataFrame-99"><span class="linenos"> 99</span></a> <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="n">exp</span><span class="o">.</span><span class="n">to_identifier</span><span class="p">(</span><span class="n">col_name</span><span class="p">)</span> <span class="k">for</span> <span class="n">col_name</span> <span class="ow">in</span> <span class="n">column_mapping</span><span class="p">],</span>
-</span><span id="SparkSession.createDataFrame-100"><a href="#SparkSession.createDataFrame-100"><span class="linenos">100</span></a> <span class="p">),</span>
-</span><span id="SparkSession.createDataFrame-101"><a href="#SparkSession.createDataFrame-101"><span class="linenos">101</span></a> <span class="p">),</span>
-</span><span id="SparkSession.createDataFrame-102"><a href="#SparkSession.createDataFrame-102"><span class="linenos">102</span></a> <span class="p">),</span>
-</span><span id="SparkSession.createDataFrame-103"><a href="#SparkSession.createDataFrame-103"><span class="linenos">103</span></a> <span class="p">}</span>
-</span><span id="SparkSession.createDataFrame-104"><a href="#SparkSession.createDataFrame-104"><span class="linenos">104</span></a>
-</span><span id="SparkSession.createDataFrame-105"><a href="#SparkSession.createDataFrame-105"><span class="linenos">105</span></a> <span class="n">sel_expression</span> <span class="o">=</span> <span class="n">exp</span><span class="o">.</span><span class="n">Select</span><span class="p">(</span><span class="o">**</span><span class="n">select_kwargs</span><span class="p">)</span>
-</span><span id="SparkSession.createDataFrame-106"><a href="#SparkSession.createDataFrame-106"><span class="linenos">106</span></a> <span class="k">return</span> <span class="n">DataFrame</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sel_expression</span><span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="SparkSession.createDataFrame-48"><a href="#SparkSession.createDataFrame-48"><span class="linenos"> 48</span></a> <span class="k">def</span> <span class="nf">createDataFrame</span><span class="p">(</span>
+</span><span id="SparkSession.createDataFrame-49"><a href="#SparkSession.createDataFrame-49"><span class="linenos"> 49</span></a> <span class="bp">self</span><span class="p">,</span>
+</span><span id="SparkSession.createDataFrame-50"><a href="#SparkSession.createDataFrame-50"><span class="linenos"> 50</span></a> <span class="n">data</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Sequence</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">ColumnLiterals</span><span class="p">],</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="n">ColumnLiterals</span><span class="p">],</span> <span class="n">t</span><span class="o">.</span><span class="n">Tuple</span><span class="p">]],</span>
+</span><span id="SparkSession.createDataFrame-51"><a href="#SparkSession.createDataFrame-51"><span class="linenos"> 51</span></a> <span class="n">schema</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">SchemaInput</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="SparkSession.createDataFrame-52"><a href="#SparkSession.createDataFrame-52"><span class="linenos"> 52</span></a> <span class="n">samplingRatio</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="SparkSession.createDataFrame-53"><a href="#SparkSession.createDataFrame-53"><span class="linenos"> 53</span></a> <span class="n">verifySchema</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
+</span><span id="SparkSession.createDataFrame-54"><a href="#SparkSession.createDataFrame-54"><span class="linenos"> 54</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="SparkSession.createDataFrame-55"><a href="#SparkSession.createDataFrame-55"><span class="linenos"> 55</span></a> <span class="kn">from</span> <span class="nn">sqlglot.dataframe.sql.dataframe</span> <span class="kn">import</span> <span class="n">DataFrame</span>
+</span><span id="SparkSession.createDataFrame-56"><a href="#SparkSession.createDataFrame-56"><span class="linenos"> 56</span></a>
+</span><span id="SparkSession.createDataFrame-57"><a href="#SparkSession.createDataFrame-57"><span class="linenos"> 57</span></a> <span class="k">if</span> <span class="n">samplingRatio</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">verifySchema</span><span class="p">:</span>
+</span><span id="SparkSession.createDataFrame-58"><a href="#SparkSession.createDataFrame-58"><span class="linenos"> 58</span></a> <span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s2">&quot;Sampling Ratio and Verify Schema are not supported&quot;</span><span class="p">)</span>
+</span><span id="SparkSession.createDataFrame-59"><a href="#SparkSession.createDataFrame-59"><span class="linenos"> 59</span></a> <span class="k">if</span> <span class="n">schema</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="p">(</span>
+</span><span id="SparkSession.createDataFrame-60"><a href="#SparkSession.createDataFrame-60"><span class="linenos"> 60</span></a> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">schema</span><span class="p">,</span> <span class="p">(</span><span class="n">StructType</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="nb">list</span><span class="p">))</span>
+</span><span id="SparkSession.createDataFrame-61"><a href="#SparkSession.createDataFrame-61"><span class="linenos"> 61</span></a> <span class="ow">or</span> <span class="p">(</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">schema</span><span class="p">,</span> <span class="nb">list</span><span class="p">)</span> <span class="ow">and</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">schema</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="nb">str</span><span class="p">))</span>
+</span><span id="SparkSession.createDataFrame-62"><a href="#SparkSession.createDataFrame-62"><span class="linenos"> 62</span></a> <span class="p">):</span>
+</span><span id="SparkSession.createDataFrame-63"><a href="#SparkSession.createDataFrame-63"><span class="linenos"> 63</span></a> <span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s2">&quot;Only schema of either list or string of list supported&quot;</span><span class="p">)</span>
+</span><span id="SparkSession.createDataFrame-64"><a href="#SparkSession.createDataFrame-64"><span class="linenos"> 64</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">data</span><span class="p">:</span>
+</span><span id="SparkSession.createDataFrame-65"><a href="#SparkSession.createDataFrame-65"><span class="linenos"> 65</span></a> <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;Must provide data to create into a DataFrame&quot;</span><span class="p">)</span>
+</span><span id="SparkSession.createDataFrame-66"><a href="#SparkSession.createDataFrame-66"><span class="linenos"> 66</span></a>
+</span><span id="SparkSession.createDataFrame-67"><a href="#SparkSession.createDataFrame-67"><span class="linenos"> 67</span></a> <span class="n">column_mapping</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span>
+</span><span id="SparkSession.createDataFrame-68"><a href="#SparkSession.createDataFrame-68"><span class="linenos"> 68</span></a> <span class="k">if</span> <span class="n">schema</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
+</span><span id="SparkSession.createDataFrame-69"><a href="#SparkSession.createDataFrame-69"><span class="linenos"> 69</span></a> <span class="n">column_mapping</span> <span class="o">=</span> <span class="n">get_column_mapping_from_schema_input</span><span class="p">(</span><span class="n">schema</span><span class="p">)</span>
+</span><span id="SparkSession.createDataFrame-70"><a href="#SparkSession.createDataFrame-70"><span class="linenos"> 70</span></a> <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">data</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="nb">dict</span><span class="p">):</span>
+</span><span id="SparkSession.createDataFrame-71"><a href="#SparkSession.createDataFrame-71"><span class="linenos"> 71</span></a> <span class="n">column_mapping</span> <span class="o">=</span> <span class="p">{</span><span class="n">col_name</span><span class="o">.</span><span class="n">strip</span><span class="p">():</span> <span class="kc">None</span> <span class="k">for</span> <span class="n">col_name</span> <span class="ow">in</span> <span class="n">data</span><span class="p">[</span><span class="mi">0</span><span class="p">]}</span>
+</span><span id="SparkSession.createDataFrame-72"><a href="#SparkSession.createDataFrame-72"><span class="linenos"> 72</span></a> <span class="k">else</span><span class="p">:</span>
+</span><span id="SparkSession.createDataFrame-73"><a href="#SparkSession.createDataFrame-73"><span class="linenos"> 73</span></a> <span class="n">column_mapping</span> <span class="o">=</span> <span class="p">{</span><span class="sa">f</span><span class="s2">&quot;_</span><span class="si">{</span><span class="n">i</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">:</span> <span class="kc">None</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">data</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)}</span>
+</span><span id="SparkSession.createDataFrame-74"><a href="#SparkSession.createDataFrame-74"><span class="linenos"> 74</span></a>
+</span><span id="SparkSession.createDataFrame-75"><a href="#SparkSession.createDataFrame-75"><span class="linenos"> 75</span></a> <span class="n">data_expressions</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="SparkSession.createDataFrame-76"><a href="#SparkSession.createDataFrame-76"><span class="linenos"> 76</span></a> <span class="n">exp</span><span class="o">.</span><span class="n">tuple_</span><span class="p">(</span>
+</span><span id="SparkSession.createDataFrame-77"><a href="#SparkSession.createDataFrame-77"><span class="linenos"> 77</span></a> <span class="o">*</span><span class="nb">map</span><span class="p">(</span>
+</span><span id="SparkSession.createDataFrame-78"><a href="#SparkSession.createDataFrame-78"><span class="linenos"> 78</span></a> <span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="n">x</span><span class="p">)</span><span class="o">.</span><span class="n">expression</span><span class="p">,</span>
+</span><span id="SparkSession.createDataFrame-79"><a href="#SparkSession.createDataFrame-79"><span class="linenos"> 79</span></a> <span class="n">row</span> <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">row</span><span class="p">,</span> <span class="nb">dict</span><span class="p">)</span> <span class="k">else</span> <span class="n">row</span><span class="o">.</span><span class="n">values</span><span class="p">(),</span>
+</span><span id="SparkSession.createDataFrame-80"><a href="#SparkSession.createDataFrame-80"><span class="linenos"> 80</span></a> <span class="p">)</span>
+</span><span id="SparkSession.createDataFrame-81"><a href="#SparkSession.createDataFrame-81"><span class="linenos"> 81</span></a> <span class="p">)</span>
+</span><span id="SparkSession.createDataFrame-82"><a href="#SparkSession.createDataFrame-82"><span class="linenos"> 82</span></a> <span class="k">for</span> <span class="n">row</span> <span class="ow">in</span> <span class="n">data</span>
+</span><span id="SparkSession.createDataFrame-83"><a href="#SparkSession.createDataFrame-83"><span class="linenos"> 83</span></a> <span class="p">]</span>
+</span><span id="SparkSession.createDataFrame-84"><a href="#SparkSession.createDataFrame-84"><span class="linenos"> 84</span></a>
+</span><span id="SparkSession.createDataFrame-85"><a href="#SparkSession.createDataFrame-85"><span class="linenos"> 85</span></a> <span class="n">sel_columns</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="SparkSession.createDataFrame-86"><a href="#SparkSession.createDataFrame-86"><span class="linenos"> 86</span></a> <span class="p">(</span>
+</span><span id="SparkSession.createDataFrame-87"><a href="#SparkSession.createDataFrame-87"><span class="linenos"> 87</span></a> <span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="n">name</span><span class="p">)</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="n">data_type</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">name</span><span class="p">)</span><span class="o">.</span><span class="n">expression</span>
+</span><span id="SparkSession.createDataFrame-88"><a href="#SparkSession.createDataFrame-88"><span class="linenos"> 88</span></a> <span class="k">if</span> <span class="n">data_type</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
+</span><span id="SparkSession.createDataFrame-89"><a href="#SparkSession.createDataFrame-89"><span class="linenos"> 89</span></a> <span class="k">else</span> <span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="n">name</span><span class="p">)</span><span class="o">.</span><span class="n">expression</span>
+</span><span id="SparkSession.createDataFrame-90"><a href="#SparkSession.createDataFrame-90"><span class="linenos"> 90</span></a> <span class="p">)</span>
+</span><span id="SparkSession.createDataFrame-91"><a href="#SparkSession.createDataFrame-91"><span class="linenos"> 91</span></a> <span class="k">for</span> <span class="n">name</span><span class="p">,</span> <span class="n">data_type</span> <span class="ow">in</span> <span class="n">column_mapping</span><span class="o">.</span><span class="n">items</span><span class="p">()</span>
+</span><span id="SparkSession.createDataFrame-92"><a href="#SparkSession.createDataFrame-92"><span class="linenos"> 92</span></a> <span class="p">]</span>
+</span><span id="SparkSession.createDataFrame-93"><a href="#SparkSession.createDataFrame-93"><span class="linenos"> 93</span></a>
+</span><span id="SparkSession.createDataFrame-94"><a href="#SparkSession.createDataFrame-94"><span class="linenos"> 94</span></a> <span class="n">select_kwargs</span> <span class="o">=</span> <span class="p">{</span>
+</span><span id="SparkSession.createDataFrame-95"><a href="#SparkSession.createDataFrame-95"><span class="linenos"> 95</span></a> <span class="s2">&quot;expressions&quot;</span><span class="p">:</span> <span class="n">sel_columns</span><span class="p">,</span>
+</span><span id="SparkSession.createDataFrame-96"><a href="#SparkSession.createDataFrame-96"><span class="linenos"> 96</span></a> <span class="s2">&quot;from&quot;</span><span class="p">:</span> <span class="n">exp</span><span class="o">.</span><span class="n">From</span><span class="p">(</span>
+</span><span id="SparkSession.createDataFrame-97"><a href="#SparkSession.createDataFrame-97"><span class="linenos"> 97</span></a> <span class="n">this</span><span class="o">=</span><span class="n">exp</span><span class="o">.</span><span class="n">Values</span><span class="p">(</span>
+</span><span id="SparkSession.createDataFrame-98"><a href="#SparkSession.createDataFrame-98"><span class="linenos"> 98</span></a> <span class="n">expressions</span><span class="o">=</span><span class="n">data_expressions</span><span class="p">,</span>
+</span><span id="SparkSession.createDataFrame-99"><a href="#SparkSession.createDataFrame-99"><span class="linenos"> 99</span></a> <span class="n">alias</span><span class="o">=</span><span class="n">exp</span><span class="o">.</span><span class="n">TableAlias</span><span class="p">(</span>
+</span><span id="SparkSession.createDataFrame-100"><a href="#SparkSession.createDataFrame-100"><span class="linenos">100</span></a> <span class="n">this</span><span class="o">=</span><span class="n">exp</span><span class="o">.</span><span class="n">to_identifier</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_auto_incrementing_name</span><span class="p">),</span>
+</span><span id="SparkSession.createDataFrame-101"><a href="#SparkSession.createDataFrame-101"><span class="linenos">101</span></a> <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="n">exp</span><span class="o">.</span><span class="n">to_identifier</span><span class="p">(</span><span class="n">col_name</span><span class="p">)</span> <span class="k">for</span> <span class="n">col_name</span> <span class="ow">in</span> <span class="n">column_mapping</span><span class="p">],</span>
+</span><span id="SparkSession.createDataFrame-102"><a href="#SparkSession.createDataFrame-102"><span class="linenos">102</span></a> <span class="p">),</span>
+</span><span id="SparkSession.createDataFrame-103"><a href="#SparkSession.createDataFrame-103"><span class="linenos">103</span></a> <span class="p">),</span>
+</span><span id="SparkSession.createDataFrame-104"><a href="#SparkSession.createDataFrame-104"><span class="linenos">104</span></a> <span class="p">),</span>
+</span><span id="SparkSession.createDataFrame-105"><a href="#SparkSession.createDataFrame-105"><span class="linenos">105</span></a> <span class="p">}</span>
+</span><span id="SparkSession.createDataFrame-106"><a href="#SparkSession.createDataFrame-106"><span class="linenos">106</span></a>
+</span><span id="SparkSession.createDataFrame-107"><a href="#SparkSession.createDataFrame-107"><span class="linenos">107</span></a> <span class="n">sel_expression</span> <span class="o">=</span> <span class="n">exp</span><span class="o">.</span><span class="n">Select</span><span class="p">(</span><span class="o">**</span><span class="n">select_kwargs</span><span class="p">)</span>
+</span><span id="SparkSession.createDataFrame-108"><a href="#SparkSession.createDataFrame-108"><span class="linenos">108</span></a> <span class="k">return</span> <span class="n">DataFrame</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sel_expression</span><span class="p">)</span>
</span></pre></div>
@@ -868,24 +875,24 @@
</div>
<a class="headerlink" href="#SparkSession.sql"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="SparkSession.sql-108"><a href="#SparkSession.sql-108"><span class="linenos">108</span></a> <span class="k">def</span> <span class="nf">sql</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sqlQuery</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="SparkSession.sql-109"><a href="#SparkSession.sql-109"><span class="linenos">109</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="n">sqlglot</span><span class="o">.</span><span class="n">parse_one</span><span class="p">(</span><span class="n">sqlQuery</span><span class="p">,</span> <span class="n">read</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">dialect</span><span class="p">)</span>
-</span><span id="SparkSession.sql-110"><a href="#SparkSession.sql-110"><span class="linenos">110</span></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">expression</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Select</span><span class="p">):</span>
-</span><span id="SparkSession.sql-111"><a href="#SparkSession.sql-111"><span class="linenos">111</span></a> <span class="n">df</span> <span class="o">=</span> <span class="n">DataFrame</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">expression</span><span class="p">)</span>
-</span><span id="SparkSession.sql-112"><a href="#SparkSession.sql-112"><span class="linenos">112</span></a> <span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">_convert_leaf_to_cte</span><span class="p">()</span>
-</span><span id="SparkSession.sql-113"><a href="#SparkSession.sql-113"><span class="linenos">113</span></a> <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">expression</span><span class="p">,</span> <span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Create</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Insert</span><span class="p">)):</span>
-</span><span id="SparkSession.sql-114"><a href="#SparkSession.sql-114"><span class="linenos">114</span></a> <span class="n">select_expression</span> <span class="o">=</span> <span class="n">expression</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
-</span><span id="SparkSession.sql-115"><a href="#SparkSession.sql-115"><span class="linenos">115</span></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">expression</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Insert</span><span class="p">):</span>
-</span><span id="SparkSession.sql-116"><a href="#SparkSession.sql-116"><span class="linenos">116</span></a> <span class="n">select_expression</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">&quot;with&quot;</span><span class="p">,</span> <span class="n">expression</span><span class="o">.</span><span class="n">args</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;with&quot;</span><span class="p">))</span>
-</span><span id="SparkSession.sql-117"><a href="#SparkSession.sql-117"><span class="linenos">117</span></a> <span class="n">expression</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">&quot;with&quot;</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
-</span><span id="SparkSession.sql-118"><a href="#SparkSession.sql-118"><span class="linenos">118</span></a> <span class="k">del</span> <span class="n">expression</span><span class="o">.</span><span class="n">args</span><span class="p">[</span><span class="s2">&quot;expression&quot;</span><span class="p">]</span>
-</span><span id="SparkSession.sql-119"><a href="#SparkSession.sql-119"><span class="linenos">119</span></a> <span class="n">df</span> <span class="o">=</span> <span class="n">DataFrame</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">select_expression</span><span class="p">,</span> <span class="n">output_expression_container</span><span class="o">=</span><span class="n">expression</span><span class="p">)</span> <span class="c1"># type: ignore</span>
-</span><span id="SparkSession.sql-120"><a href="#SparkSession.sql-120"><span class="linenos">120</span></a> <span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">_convert_leaf_to_cte</span><span class="p">()</span>
-</span><span id="SparkSession.sql-121"><a href="#SparkSession.sql-121"><span class="linenos">121</span></a> <span class="k">else</span><span class="p">:</span>
-</span><span id="SparkSession.sql-122"><a href="#SparkSession.sql-122"><span class="linenos">122</span></a> <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
-</span><span id="SparkSession.sql-123"><a href="#SparkSession.sql-123"><span class="linenos">123</span></a> <span class="s2">&quot;Unknown expression type provided in the SQL. Please create an issue with the SQL.&quot;</span>
-</span><span id="SparkSession.sql-124"><a href="#SparkSession.sql-124"><span class="linenos">124</span></a> <span class="p">)</span>
-</span><span id="SparkSession.sql-125"><a href="#SparkSession.sql-125"><span class="linenos">125</span></a> <span class="k">return</span> <span class="n">df</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="SparkSession.sql-117"><a href="#SparkSession.sql-117"><span class="linenos">117</span></a> <span class="k">def</span> <span class="nf">sql</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sqlQuery</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="SparkSession.sql-118"><a href="#SparkSession.sql-118"><span class="linenos">118</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_optimize</span><span class="p">(</span><span class="n">sqlglot</span><span class="o">.</span><span class="n">parse_one</span><span class="p">(</span><span class="n">sqlQuery</span><span class="p">,</span> <span class="n">read</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">dialect</span><span class="p">))</span>
+</span><span id="SparkSession.sql-119"><a href="#SparkSession.sql-119"><span class="linenos">119</span></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">expression</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Select</span><span class="p">):</span>
+</span><span id="SparkSession.sql-120"><a href="#SparkSession.sql-120"><span class="linenos">120</span></a> <span class="n">df</span> <span class="o">=</span> <span class="n">DataFrame</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">expression</span><span class="p">)</span>
+</span><span id="SparkSession.sql-121"><a href="#SparkSession.sql-121"><span class="linenos">121</span></a> <span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">_convert_leaf_to_cte</span><span class="p">()</span>
+</span><span id="SparkSession.sql-122"><a href="#SparkSession.sql-122"><span class="linenos">122</span></a> <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">expression</span><span class="p">,</span> <span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Create</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Insert</span><span class="p">)):</span>
+</span><span id="SparkSession.sql-123"><a href="#SparkSession.sql-123"><span class="linenos">123</span></a> <span class="n">select_expression</span> <span class="o">=</span> <span class="n">expression</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
+</span><span id="SparkSession.sql-124"><a href="#SparkSession.sql-124"><span class="linenos">124</span></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">expression</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Insert</span><span class="p">):</span>
+</span><span id="SparkSession.sql-125"><a href="#SparkSession.sql-125"><span class="linenos">125</span></a> <span class="n">select_expression</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">&quot;with&quot;</span><span class="p">,</span> <span class="n">expression</span><span class="o">.</span><span class="n">args</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;with&quot;</span><span class="p">))</span>
+</span><span id="SparkSession.sql-126"><a href="#SparkSession.sql-126"><span class="linenos">126</span></a> <span class="n">expression</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">&quot;with&quot;</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
+</span><span id="SparkSession.sql-127"><a href="#SparkSession.sql-127"><span class="linenos">127</span></a> <span class="k">del</span> <span class="n">expression</span><span class="o">.</span><span class="n">args</span><span class="p">[</span><span class="s2">&quot;expression&quot;</span><span class="p">]</span>
+</span><span id="SparkSession.sql-128"><a href="#SparkSession.sql-128"><span class="linenos">128</span></a> <span class="n">df</span> <span class="o">=</span> <span class="n">DataFrame</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">select_expression</span><span class="p">,</span> <span class="n">output_expression_container</span><span class="o">=</span><span class="n">expression</span><span class="p">)</span> <span class="c1"># type: ignore</span>
+</span><span id="SparkSession.sql-129"><a href="#SparkSession.sql-129"><span class="linenos">129</span></a> <span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">_convert_leaf_to_cte</span><span class="p">()</span>
+</span><span id="SparkSession.sql-130"><a href="#SparkSession.sql-130"><span class="linenos">130</span></a> <span class="k">else</span><span class="p">:</span>
+</span><span id="SparkSession.sql-131"><a href="#SparkSession.sql-131"><span class="linenos">131</span></a> <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
+</span><span id="SparkSession.sql-132"><a href="#SparkSession.sql-132"><span class="linenos">132</span></a> <span class="s2">&quot;Unknown expression type provided in the SQL. Please create an issue with the SQL.&quot;</span>
+</span><span id="SparkSession.sql-133"><a href="#SparkSession.sql-133"><span class="linenos">133</span></a> <span class="p">)</span>
+</span><span id="SparkSession.sql-134"><a href="#SparkSession.sql-134"><span class="linenos">134</span></a> <span class="k">return</span> <span class="n">df</span>
</span></pre></div>
@@ -901,9 +908,9 @@
</div>
<a class="headerlink" href="#SparkSession.builder"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="SparkSession.builder-189"><a href="#SparkSession.builder-189"><span class="linenos">189</span></a> <span class="nd">@classproperty</span>
-</span><span id="SparkSession.builder-190"><a href="#SparkSession.builder-190"><span class="linenos">190</span></a> <span class="k">def</span> <span class="nf">builder</span><span class="p">(</span><span class="bp">cls</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Builder</span><span class="p">:</span>
-</span><span id="SparkSession.builder-191"><a href="#SparkSession.builder-191"><span class="linenos">191</span></a> <span class="k">return</span> <span class="bp">cls</span><span class="o">.</span><span class="n">Builder</span><span class="p">()</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="SparkSession.builder-198"><a href="#SparkSession.builder-198"><span class="linenos">198</span></a> <span class="nd">@classproperty</span>
+</span><span id="SparkSession.builder-199"><a href="#SparkSession.builder-199"><span class="linenos">199</span></a> <span class="k">def</span> <span class="nf">builder</span><span class="p">(</span><span class="bp">cls</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Builder</span><span class="p">:</span>
+</span><span id="SparkSession.builder-200"><a href="#SparkSession.builder-200"><span class="linenos">200</span></a> <span class="k">return</span> <span class="bp">cls</span><span class="o">.</span><span class="n">Builder</span><span class="p">()</span>
</span></pre></div>
@@ -922,36 +929,36 @@
</div>
<a class="headerlink" href="#SparkSession.Builder"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="SparkSession.Builder-158"><a href="#SparkSession.Builder-158"><span class="linenos">158</span></a> <span class="k">class</span> <span class="nc">Builder</span><span class="p">:</span>
-</span><span id="SparkSession.Builder-159"><a href="#SparkSession.Builder-159"><span class="linenos">159</span></a> <span class="n">SQLFRAME_DIALECT_KEY</span> <span class="o">=</span> <span class="s2">&quot;sqlframe.dialect&quot;</span>
-</span><span id="SparkSession.Builder-160"><a href="#SparkSession.Builder-160"><span class="linenos">160</span></a>
-</span><span id="SparkSession.Builder-161"><a href="#SparkSession.Builder-161"><span class="linenos">161</span></a> <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
-</span><span id="SparkSession.Builder-162"><a href="#SparkSession.Builder-162"><span class="linenos">162</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">dialect</span> <span class="o">=</span> <span class="s2">&quot;spark&quot;</span>
-</span><span id="SparkSession.Builder-163"><a href="#SparkSession.Builder-163"><span class="linenos">163</span></a>
-</span><span id="SparkSession.Builder-164"><a href="#SparkSession.Builder-164"><span class="linenos">164</span></a> <span class="k">def</span> <span class="fm">__getattr__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">item</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">SparkSession</span><span class="o">.</span><span class="n">Builder</span><span class="p">:</span>
-</span><span id="SparkSession.Builder-165"><a href="#SparkSession.Builder-165"><span class="linenos">165</span></a> <span class="k">return</span> <span class="bp">self</span>
-</span><span id="SparkSession.Builder-166"><a href="#SparkSession.Builder-166"><span class="linenos">166</span></a>
-</span><span id="SparkSession.Builder-167"><a href="#SparkSession.Builder-167"><span class="linenos">167</span></a> <span class="k">def</span> <span class="fm">__call__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
-</span><span id="SparkSession.Builder-168"><a href="#SparkSession.Builder-168"><span class="linenos">168</span></a> <span class="k">return</span> <span class="bp">self</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="SparkSession.Builder-167"><a href="#SparkSession.Builder-167"><span class="linenos">167</span></a> <span class="k">class</span> <span class="nc">Builder</span><span class="p">:</span>
+</span><span id="SparkSession.Builder-168"><a href="#SparkSession.Builder-168"><span class="linenos">168</span></a> <span class="n">SQLFRAME_DIALECT_KEY</span> <span class="o">=</span> <span class="s2">&quot;sqlframe.dialect&quot;</span>
</span><span id="SparkSession.Builder-169"><a href="#SparkSession.Builder-169"><span class="linenos">169</span></a>
-</span><span id="SparkSession.Builder-170"><a href="#SparkSession.Builder-170"><span class="linenos">170</span></a> <span class="k">def</span> <span class="nf">config</span><span class="p">(</span>
-</span><span id="SparkSession.Builder-171"><a href="#SparkSession.Builder-171"><span class="linenos">171</span></a> <span class="bp">self</span><span class="p">,</span>
-</span><span id="SparkSession.Builder-172"><a href="#SparkSession.Builder-172"><span class="linenos">172</span></a> <span class="n">key</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="SparkSession.Builder-173"><a href="#SparkSession.Builder-173"><span class="linenos">173</span></a> <span class="n">value</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="SparkSession.Builder-174"><a href="#SparkSession.Builder-174"><span class="linenos">174</span></a> <span class="o">*</span><span class="p">,</span>
-</span><span id="SparkSession.Builder-175"><a href="#SparkSession.Builder-175"><span class="linenos">175</span></a> <span class="nb">map</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="SparkSession.Builder-176"><a href="#SparkSession.Builder-176"><span class="linenos">176</span></a> <span class="o">**</span><span class="n">kwargs</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">,</span>
-</span><span id="SparkSession.Builder-177"><a href="#SparkSession.Builder-177"><span class="linenos">177</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">SparkSession</span><span class="o">.</span><span class="n">Builder</span><span class="p">:</span>
-</span><span id="SparkSession.Builder-178"><a href="#SparkSession.Builder-178"><span class="linenos">178</span></a> <span class="k">if</span> <span class="n">key</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">SQLFRAME_DIALECT_KEY</span><span class="p">:</span>
-</span><span id="SparkSession.Builder-179"><a href="#SparkSession.Builder-179"><span class="linenos">179</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">dialect</span> <span class="o">=</span> <span class="n">value</span>
-</span><span id="SparkSession.Builder-180"><a href="#SparkSession.Builder-180"><span class="linenos">180</span></a> <span class="k">elif</span> <span class="nb">map</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">SQLFRAME_DIALECT_KEY</span> <span class="ow">in</span> <span class="nb">map</span><span class="p">:</span>
-</span><span id="SparkSession.Builder-181"><a href="#SparkSession.Builder-181"><span class="linenos">181</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">dialect</span> <span class="o">=</span> <span class="nb">map</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">SQLFRAME_DIALECT_KEY</span><span class="p">]</span>
-</span><span id="SparkSession.Builder-182"><a href="#SparkSession.Builder-182"><span class="linenos">182</span></a> <span class="k">return</span> <span class="bp">self</span>
-</span><span id="SparkSession.Builder-183"><a href="#SparkSession.Builder-183"><span class="linenos">183</span></a>
-</span><span id="SparkSession.Builder-184"><a href="#SparkSession.Builder-184"><span class="linenos">184</span></a> <span class="k">def</span> <span class="nf">getOrCreate</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">SparkSession</span><span class="p">:</span>
-</span><span id="SparkSession.Builder-185"><a href="#SparkSession.Builder-185"><span class="linenos">185</span></a> <span class="n">spark</span> <span class="o">=</span> <span class="n">SparkSession</span><span class="p">()</span>
-</span><span id="SparkSession.Builder-186"><a href="#SparkSession.Builder-186"><span class="linenos">186</span></a> <span class="n">spark</span><span class="o">.</span><span class="n">dialect</span> <span class="o">=</span> <span class="n">Dialect</span><span class="o">.</span><span class="n">get_or_raise</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">dialect</span><span class="p">)</span>
-</span><span id="SparkSession.Builder-187"><a href="#SparkSession.Builder-187"><span class="linenos">187</span></a> <span class="k">return</span> <span class="n">spark</span>
+</span><span id="SparkSession.Builder-170"><a href="#SparkSession.Builder-170"><span class="linenos">170</span></a> <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+</span><span id="SparkSession.Builder-171"><a href="#SparkSession.Builder-171"><span class="linenos">171</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">dialect</span> <span class="o">=</span> <span class="s2">&quot;spark&quot;</span>
+</span><span id="SparkSession.Builder-172"><a href="#SparkSession.Builder-172"><span class="linenos">172</span></a>
+</span><span id="SparkSession.Builder-173"><a href="#SparkSession.Builder-173"><span class="linenos">173</span></a> <span class="k">def</span> <span class="fm">__getattr__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">item</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">SparkSession</span><span class="o">.</span><span class="n">Builder</span><span class="p">:</span>
+</span><span id="SparkSession.Builder-174"><a href="#SparkSession.Builder-174"><span class="linenos">174</span></a> <span class="k">return</span> <span class="bp">self</span>
+</span><span id="SparkSession.Builder-175"><a href="#SparkSession.Builder-175"><span class="linenos">175</span></a>
+</span><span id="SparkSession.Builder-176"><a href="#SparkSession.Builder-176"><span class="linenos">176</span></a> <span class="k">def</span> <span class="fm">__call__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
+</span><span id="SparkSession.Builder-177"><a href="#SparkSession.Builder-177"><span class="linenos">177</span></a> <span class="k">return</span> <span class="bp">self</span>
+</span><span id="SparkSession.Builder-178"><a href="#SparkSession.Builder-178"><span class="linenos">178</span></a>
+</span><span id="SparkSession.Builder-179"><a href="#SparkSession.Builder-179"><span class="linenos">179</span></a> <span class="k">def</span> <span class="nf">config</span><span class="p">(</span>
+</span><span id="SparkSession.Builder-180"><a href="#SparkSession.Builder-180"><span class="linenos">180</span></a> <span class="bp">self</span><span class="p">,</span>
+</span><span id="SparkSession.Builder-181"><a href="#SparkSession.Builder-181"><span class="linenos">181</span></a> <span class="n">key</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="SparkSession.Builder-182"><a href="#SparkSession.Builder-182"><span class="linenos">182</span></a> <span class="n">value</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="SparkSession.Builder-183"><a href="#SparkSession.Builder-183"><span class="linenos">183</span></a> <span class="o">*</span><span class="p">,</span>
+</span><span id="SparkSession.Builder-184"><a href="#SparkSession.Builder-184"><span class="linenos">184</span></a> <span class="nb">map</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="SparkSession.Builder-185"><a href="#SparkSession.Builder-185"><span class="linenos">185</span></a> <span class="o">**</span><span class="n">kwargs</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">,</span>
+</span><span id="SparkSession.Builder-186"><a href="#SparkSession.Builder-186"><span class="linenos">186</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">SparkSession</span><span class="o">.</span><span class="n">Builder</span><span class="p">:</span>
+</span><span id="SparkSession.Builder-187"><a href="#SparkSession.Builder-187"><span class="linenos">187</span></a> <span class="k">if</span> <span class="n">key</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">SQLFRAME_DIALECT_KEY</span><span class="p">:</span>
+</span><span id="SparkSession.Builder-188"><a href="#SparkSession.Builder-188"><span class="linenos">188</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">dialect</span> <span class="o">=</span> <span class="n">value</span>
+</span><span id="SparkSession.Builder-189"><a href="#SparkSession.Builder-189"><span class="linenos">189</span></a> <span class="k">elif</span> <span class="nb">map</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">SQLFRAME_DIALECT_KEY</span> <span class="ow">in</span> <span class="nb">map</span><span class="p">:</span>
+</span><span id="SparkSession.Builder-190"><a href="#SparkSession.Builder-190"><span class="linenos">190</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">dialect</span> <span class="o">=</span> <span class="nb">map</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">SQLFRAME_DIALECT_KEY</span><span class="p">]</span>
+</span><span id="SparkSession.Builder-191"><a href="#SparkSession.Builder-191"><span class="linenos">191</span></a> <span class="k">return</span> <span class="bp">self</span>
+</span><span id="SparkSession.Builder-192"><a href="#SparkSession.Builder-192"><span class="linenos">192</span></a>
+</span><span id="SparkSession.Builder-193"><a href="#SparkSession.Builder-193"><span class="linenos">193</span></a> <span class="k">def</span> <span class="nf">getOrCreate</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">SparkSession</span><span class="p">:</span>
+</span><span id="SparkSession.Builder-194"><a href="#SparkSession.Builder-194"><span class="linenos">194</span></a> <span class="n">spark</span> <span class="o">=</span> <span class="n">SparkSession</span><span class="p">()</span>
+</span><span id="SparkSession.Builder-195"><a href="#SparkSession.Builder-195"><span class="linenos">195</span></a> <span class="n">spark</span><span class="o">.</span><span class="n">dialect</span> <span class="o">=</span> <span class="n">Dialect</span><span class="o">.</span><span class="n">get_or_raise</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">dialect</span><span class="p">)</span>
+</span><span id="SparkSession.Builder-196"><a href="#SparkSession.Builder-196"><span class="linenos">196</span></a> <span class="k">return</span> <span class="n">spark</span>
</span></pre></div>
@@ -991,19 +998,19 @@
</div>
<a class="headerlink" href="#SparkSession.Builder.config"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="SparkSession.Builder.config-170"><a href="#SparkSession.Builder.config-170"><span class="linenos">170</span></a> <span class="k">def</span> <span class="nf">config</span><span class="p">(</span>
-</span><span id="SparkSession.Builder.config-171"><a href="#SparkSession.Builder.config-171"><span class="linenos">171</span></a> <span class="bp">self</span><span class="p">,</span>
-</span><span id="SparkSession.Builder.config-172"><a href="#SparkSession.Builder.config-172"><span class="linenos">172</span></a> <span class="n">key</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="SparkSession.Builder.config-173"><a href="#SparkSession.Builder.config-173"><span class="linenos">173</span></a> <span class="n">value</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="SparkSession.Builder.config-174"><a href="#SparkSession.Builder.config-174"><span class="linenos">174</span></a> <span class="o">*</span><span class="p">,</span>
-</span><span id="SparkSession.Builder.config-175"><a href="#SparkSession.Builder.config-175"><span class="linenos">175</span></a> <span class="nb">map</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="SparkSession.Builder.config-176"><a href="#SparkSession.Builder.config-176"><span class="linenos">176</span></a> <span class="o">**</span><span class="n">kwargs</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">,</span>
-</span><span id="SparkSession.Builder.config-177"><a href="#SparkSession.Builder.config-177"><span class="linenos">177</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">SparkSession</span><span class="o">.</span><span class="n">Builder</span><span class="p">:</span>
-</span><span id="SparkSession.Builder.config-178"><a href="#SparkSession.Builder.config-178"><span class="linenos">178</span></a> <span class="k">if</span> <span class="n">key</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">SQLFRAME_DIALECT_KEY</span><span class="p">:</span>
-</span><span id="SparkSession.Builder.config-179"><a href="#SparkSession.Builder.config-179"><span class="linenos">179</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">dialect</span> <span class="o">=</span> <span class="n">value</span>
-</span><span id="SparkSession.Builder.config-180"><a href="#SparkSession.Builder.config-180"><span class="linenos">180</span></a> <span class="k">elif</span> <span class="nb">map</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">SQLFRAME_DIALECT_KEY</span> <span class="ow">in</span> <span class="nb">map</span><span class="p">:</span>
-</span><span id="SparkSession.Builder.config-181"><a href="#SparkSession.Builder.config-181"><span class="linenos">181</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">dialect</span> <span class="o">=</span> <span class="nb">map</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">SQLFRAME_DIALECT_KEY</span><span class="p">]</span>
-</span><span id="SparkSession.Builder.config-182"><a href="#SparkSession.Builder.config-182"><span class="linenos">182</span></a> <span class="k">return</span> <span class="bp">self</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="SparkSession.Builder.config-179"><a href="#SparkSession.Builder.config-179"><span class="linenos">179</span></a> <span class="k">def</span> <span class="nf">config</span><span class="p">(</span>
+</span><span id="SparkSession.Builder.config-180"><a href="#SparkSession.Builder.config-180"><span class="linenos">180</span></a> <span class="bp">self</span><span class="p">,</span>
+</span><span id="SparkSession.Builder.config-181"><a href="#SparkSession.Builder.config-181"><span class="linenos">181</span></a> <span class="n">key</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="SparkSession.Builder.config-182"><a href="#SparkSession.Builder.config-182"><span class="linenos">182</span></a> <span class="n">value</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="SparkSession.Builder.config-183"><a href="#SparkSession.Builder.config-183"><span class="linenos">183</span></a> <span class="o">*</span><span class="p">,</span>
+</span><span id="SparkSession.Builder.config-184"><a href="#SparkSession.Builder.config-184"><span class="linenos">184</span></a> <span class="nb">map</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="SparkSession.Builder.config-185"><a href="#SparkSession.Builder.config-185"><span class="linenos">185</span></a> <span class="o">**</span><span class="n">kwargs</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">,</span>
+</span><span id="SparkSession.Builder.config-186"><a href="#SparkSession.Builder.config-186"><span class="linenos">186</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">SparkSession</span><span class="o">.</span><span class="n">Builder</span><span class="p">:</span>
+</span><span id="SparkSession.Builder.config-187"><a href="#SparkSession.Builder.config-187"><span class="linenos">187</span></a> <span class="k">if</span> <span class="n">key</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">SQLFRAME_DIALECT_KEY</span><span class="p">:</span>
+</span><span id="SparkSession.Builder.config-188"><a href="#SparkSession.Builder.config-188"><span class="linenos">188</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">dialect</span> <span class="o">=</span> <span class="n">value</span>
+</span><span id="SparkSession.Builder.config-189"><a href="#SparkSession.Builder.config-189"><span class="linenos">189</span></a> <span class="k">elif</span> <span class="nb">map</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">SQLFRAME_DIALECT_KEY</span> <span class="ow">in</span> <span class="nb">map</span><span class="p">:</span>
+</span><span id="SparkSession.Builder.config-190"><a href="#SparkSession.Builder.config-190"><span class="linenos">190</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">dialect</span> <span class="o">=</span> <span class="nb">map</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">SQLFRAME_DIALECT_KEY</span><span class="p">]</span>
+</span><span id="SparkSession.Builder.config-191"><a href="#SparkSession.Builder.config-191"><span class="linenos">191</span></a> <span class="k">return</span> <span class="bp">self</span>
</span></pre></div>
@@ -1021,10 +1028,10 @@
</div>
<a class="headerlink" href="#SparkSession.Builder.getOrCreate"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="SparkSession.Builder.getOrCreate-184"><a href="#SparkSession.Builder.getOrCreate-184"><span class="linenos">184</span></a> <span class="k">def</span> <span class="nf">getOrCreate</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">SparkSession</span><span class="p">:</span>
-</span><span id="SparkSession.Builder.getOrCreate-185"><a href="#SparkSession.Builder.getOrCreate-185"><span class="linenos">185</span></a> <span class="n">spark</span> <span class="o">=</span> <span class="n">SparkSession</span><span class="p">()</span>
-</span><span id="SparkSession.Builder.getOrCreate-186"><a href="#SparkSession.Builder.getOrCreate-186"><span class="linenos">186</span></a> <span class="n">spark</span><span class="o">.</span><span class="n">dialect</span> <span class="o">=</span> <span class="n">Dialect</span><span class="o">.</span><span class="n">get_or_raise</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">dialect</span><span class="p">)</span>
-</span><span id="SparkSession.Builder.getOrCreate-187"><a href="#SparkSession.Builder.getOrCreate-187"><span class="linenos">187</span></a> <span class="k">return</span> <span class="n">spark</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="SparkSession.Builder.getOrCreate-193"><a href="#SparkSession.Builder.getOrCreate-193"><span class="linenos">193</span></a> <span class="k">def</span> <span class="nf">getOrCreate</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">SparkSession</span><span class="p">:</span>
+</span><span id="SparkSession.Builder.getOrCreate-194"><a href="#SparkSession.Builder.getOrCreate-194"><span class="linenos">194</span></a> <span class="n">spark</span> <span class="o">=</span> <span class="n">SparkSession</span><span class="p">()</span>
+</span><span id="SparkSession.Builder.getOrCreate-195"><a href="#SparkSession.Builder.getOrCreate-195"><span class="linenos">195</span></a> <span class="n">spark</span><span class="o">.</span><span class="n">dialect</span> <span class="o">=</span> <span class="n">Dialect</span><span class="o">.</span><span class="n">get_or_raise</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">dialect</span><span class="p">)</span>
+</span><span id="SparkSession.Builder.getOrCreate-196"><a href="#SparkSession.Builder.getOrCreate-196"><span class="linenos">196</span></a> <span class="k">return</span> <span class="n">spark</span>
</span></pre></div>
@@ -1043,83 +1050,85 @@
</div>
<a class="headerlink" href="#DataFrame"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame-49"><a href="#DataFrame-49"><span class="linenos"> 49</span></a><span class="k">class</span> <span class="nc">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame-50"><a href="#DataFrame-50"><span class="linenos"> 50</span></a> <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
-</span><span id="DataFrame-51"><a href="#DataFrame-51"><span class="linenos"> 51</span></a> <span class="bp">self</span><span class="p">,</span>
-</span><span id="DataFrame-52"><a href="#DataFrame-52"><span class="linenos"> 52</span></a> <span class="n">spark</span><span class="p">:</span> <span class="n">SparkSession</span><span class="p">,</span>
-</span><span id="DataFrame-53"><a href="#DataFrame-53"><span class="linenos"> 53</span></a> <span class="n">expression</span><span class="p">:</span> <span class="n">exp</span><span class="o">.</span><span class="n">Select</span><span class="p">,</span>
-</span><span id="DataFrame-54"><a href="#DataFrame-54"><span class="linenos"> 54</span></a> <span class="n">branch_id</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrame-55"><a href="#DataFrame-55"><span class="linenos"> 55</span></a> <span class="n">sequence_id</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrame-56"><a href="#DataFrame-56"><span class="linenos"> 56</span></a> <span class="n">last_op</span><span class="p">:</span> <span class="n">Operation</span> <span class="o">=</span> <span class="n">Operation</span><span class="o">.</span><span class="n">INIT</span><span class="p">,</span>
-</span><span id="DataFrame-57"><a href="#DataFrame-57"><span class="linenos"> 57</span></a> <span class="n">pending_hints</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="n">exp</span><span class="o">.</span><span class="n">Expression</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrame-58"><a href="#DataFrame-58"><span class="linenos"> 58</span></a> <span class="n">output_expression_container</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">OutputExpressionContainer</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrame-59"><a href="#DataFrame-59"><span class="linenos"> 59</span></a> <span class="o">**</span><span class="n">kwargs</span><span class="p">,</span>
-</span><span id="DataFrame-60"><a href="#DataFrame-60"><span class="linenos"> 60</span></a> <span class="p">):</span>
-</span><span id="DataFrame-61"><a href="#DataFrame-61"><span class="linenos"> 61</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span> <span class="o">=</span> <span class="n">spark</span>
-</span><span id="DataFrame-62"><a href="#DataFrame-62"><span class="linenos"> 62</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span> <span class="o">=</span> <span class="n">expression</span>
-</span><span id="DataFrame-63"><a href="#DataFrame-63"><span class="linenos"> 63</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">branch_id</span> <span class="o">=</span> <span class="n">branch_id</span> <span class="ow">or</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">_random_branch_id</span>
-</span><span id="DataFrame-64"><a href="#DataFrame-64"><span class="linenos"> 64</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">sequence_id</span> <span class="o">=</span> <span class="n">sequence_id</span> <span class="ow">or</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">_random_sequence_id</span>
-</span><span id="DataFrame-65"><a href="#DataFrame-65"><span class="linenos"> 65</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">last_op</span> <span class="o">=</span> <span class="n">last_op</span>
-</span><span id="DataFrame-66"><a href="#DataFrame-66"><span class="linenos"> 66</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">pending_hints</span> <span class="o">=</span> <span class="n">pending_hints</span> <span class="ow">or</span> <span class="p">[]</span>
-</span><span id="DataFrame-67"><a href="#DataFrame-67"><span class="linenos"> 67</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">output_expression_container</span> <span class="o">=</span> <span class="n">output_expression_container</span> <span class="ow">or</span> <span class="n">exp</span><span class="o">.</span><span class="n">Select</span><span class="p">()</span>
-</span><span id="DataFrame-68"><a href="#DataFrame-68"><span class="linenos"> 68</span></a>
-</span><span id="DataFrame-69"><a href="#DataFrame-69"><span class="linenos"> 69</span></a> <span class="k">def</span> <span class="fm">__getattr__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">column_name</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Column</span><span class="p">:</span>
-</span><span id="DataFrame-70"><a href="#DataFrame-70"><span class="linenos"> 70</span></a> <span class="k">return</span> <span class="bp">self</span><span class="p">[</span><span class="n">column_name</span><span class="p">]</span>
-</span><span id="DataFrame-71"><a href="#DataFrame-71"><span class="linenos"> 71</span></a>
-</span><span id="DataFrame-72"><a href="#DataFrame-72"><span class="linenos"> 72</span></a> <span class="k">def</span> <span class="fm">__getitem__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">column_name</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Column</span><span class="p">:</span>
-</span><span id="DataFrame-73"><a href="#DataFrame-73"><span class="linenos"> 73</span></a> <span class="n">column_name</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">branch_id</span><span class="si">}</span><span class="s2">.</span><span class="si">{</span><span class="n">column_name</span><span class="si">}</span><span class="s2">&quot;</span>
-</span><span id="DataFrame-74"><a href="#DataFrame-74"><span class="linenos"> 74</span></a> <span class="k">return</span> <span class="n">Column</span><span class="p">(</span><span class="n">column_name</span><span class="p">)</span>
-</span><span id="DataFrame-75"><a href="#DataFrame-75"><span class="linenos"> 75</span></a>
-</span><span id="DataFrame-76"><a href="#DataFrame-76"><span class="linenos"> 76</span></a> <span class="k">def</span> <span class="nf">__copy__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
-</span><span id="DataFrame-77"><a href="#DataFrame-77"><span class="linenos"> 77</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
-</span><span id="DataFrame-78"><a href="#DataFrame-78"><span class="linenos"> 78</span></a>
-</span><span id="DataFrame-79"><a href="#DataFrame-79"><span class="linenos"> 79</span></a> <span class="nd">@property</span>
-</span><span id="DataFrame-80"><a href="#DataFrame-80"><span class="linenos"> 80</span></a> <span class="k">def</span> <span class="nf">sparkSession</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
-</span><span id="DataFrame-81"><a href="#DataFrame-81"><span class="linenos"> 81</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span>
-</span><span id="DataFrame-82"><a href="#DataFrame-82"><span class="linenos"> 82</span></a>
-</span><span id="DataFrame-83"><a href="#DataFrame-83"><span class="linenos"> 83</span></a> <span class="nd">@property</span>
-</span><span id="DataFrame-84"><a href="#DataFrame-84"><span class="linenos"> 84</span></a> <span class="k">def</span> <span class="nf">write</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
-</span><span id="DataFrame-85"><a href="#DataFrame-85"><span class="linenos"> 85</span></a> <span class="k">return</span> <span class="n">DataFrameWriter</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
-</span><span id="DataFrame-86"><a href="#DataFrame-86"><span class="linenos"> 86</span></a>
-</span><span id="DataFrame-87"><a href="#DataFrame-87"><span class="linenos"> 87</span></a> <span class="nd">@property</span>
-</span><span id="DataFrame-88"><a href="#DataFrame-88"><span class="linenos"> 88</span></a> <span class="k">def</span> <span class="nf">latest_cte_name</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">str</span><span class="p">:</span>
-</span><span id="DataFrame-89"><a href="#DataFrame-89"><span class="linenos"> 89</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">ctes</span><span class="p">:</span>
-</span><span id="DataFrame-90"><a href="#DataFrame-90"><span class="linenos"> 90</span></a> <span class="n">from_exp</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">args</span><span class="p">[</span><span class="s2">&quot;from&quot;</span><span class="p">]</span>
-</span><span id="DataFrame-91"><a href="#DataFrame-91"><span class="linenos"> 91</span></a> <span class="k">if</span> <span class="n">from_exp</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">:</span>
-</span><span id="DataFrame-92"><a href="#DataFrame-92"><span class="linenos"> 92</span></a> <span class="k">return</span> <span class="n">from_exp</span><span class="o">.</span><span class="n">alias_or_name</span>
-</span><span id="DataFrame-93"><a href="#DataFrame-93"><span class="linenos"> 93</span></a> <span class="n">table_alias</span> <span class="o">=</span> <span class="n">from_exp</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">TableAlias</span><span class="p">)</span>
-</span><span id="DataFrame-94"><a href="#DataFrame-94"><span class="linenos"> 94</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">table_alias</span><span class="p">:</span>
-</span><span id="DataFrame-95"><a href="#DataFrame-95"><span class="linenos"> 95</span></a> <span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span>
-</span><span id="DataFrame-96"><a href="#DataFrame-96"><span class="linenos"> 96</span></a> <span class="sa">f</span><span class="s2">&quot;Could not find an alias name for this expression: </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="si">}</span><span class="s2">&quot;</span>
-</span><span id="DataFrame-97"><a href="#DataFrame-97"><span class="linenos"> 97</span></a> <span class="p">)</span>
-</span><span id="DataFrame-98"><a href="#DataFrame-98"><span class="linenos"> 98</span></a> <span class="k">return</span> <span class="n">table_alias</span><span class="o">.</span><span class="n">alias_or_name</span>
-</span><span id="DataFrame-99"><a href="#DataFrame-99"><span class="linenos"> 99</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">ctes</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">alias</span>
-</span><span id="DataFrame-100"><a href="#DataFrame-100"><span class="linenos">100</span></a>
-</span><span id="DataFrame-101"><a href="#DataFrame-101"><span class="linenos">101</span></a> <span class="nd">@property</span>
-</span><span id="DataFrame-102"><a href="#DataFrame-102"><span class="linenos">102</span></a> <span class="k">def</span> <span class="nf">pending_join_hints</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
-</span><span id="DataFrame-103"><a href="#DataFrame-103"><span class="linenos">103</span></a> <span class="k">return</span> <span class="p">[</span><span class="n">hint</span> <span class="k">for</span> <span class="n">hint</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">pending_hints</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">hint</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">JoinHint</span><span class="p">)]</span>
-</span><span id="DataFrame-104"><a href="#DataFrame-104"><span class="linenos">104</span></a>
-</span><span id="DataFrame-105"><a href="#DataFrame-105"><span class="linenos">105</span></a> <span class="nd">@property</span>
-</span><span id="DataFrame-106"><a href="#DataFrame-106"><span class="linenos">106</span></a> <span class="k">def</span> <span class="nf">pending_partition_hints</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
-</span><span id="DataFrame-107"><a href="#DataFrame-107"><span class="linenos">107</span></a> <span class="k">return</span> <span class="p">[</span><span class="n">hint</span> <span class="k">for</span> <span class="n">hint</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">pending_hints</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">hint</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Anonymous</span><span class="p">)]</span>
-</span><span id="DataFrame-108"><a href="#DataFrame-108"><span class="linenos">108</span></a>
-</span><span id="DataFrame-109"><a href="#DataFrame-109"><span class="linenos">109</span></a> <span class="nd">@property</span>
-</span><span id="DataFrame-110"><a href="#DataFrame-110"><span class="linenos">110</span></a> <span class="k">def</span> <span class="nf">columns</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span>
-</span><span id="DataFrame-111"><a href="#DataFrame-111"><span class="linenos">111</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">named_selects</span>
-</span><span id="DataFrame-112"><a href="#DataFrame-112"><span class="linenos">112</span></a>
-</span><span id="DataFrame-113"><a href="#DataFrame-113"><span class="linenos">113</span></a> <span class="nd">@property</span>
-</span><span id="DataFrame-114"><a href="#DataFrame-114"><span class="linenos">114</span></a> <span class="k">def</span> <span class="nf">na</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrameNaFunctions</span><span class="p">:</span>
-</span><span id="DataFrame-115"><a href="#DataFrame-115"><span class="linenos">115</span></a> <span class="k">return</span> <span class="n">DataFrameNaFunctions</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
-</span><span id="DataFrame-116"><a href="#DataFrame-116"><span class="linenos">116</span></a>
-</span><span id="DataFrame-117"><a href="#DataFrame-117"><span class="linenos">117</span></a> <span class="k">def</span> <span class="nf">_replace_cte_names_with_hashes</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">expression</span><span class="p">:</span> <span class="n">exp</span><span class="o">.</span><span class="n">Select</span><span class="p">):</span>
-</span><span id="DataFrame-118"><a href="#DataFrame-118"><span class="linenos">118</span></a> <span class="n">replacement_mapping</span> <span class="o">=</span> <span class="p">{}</span>
-</span><span id="DataFrame-119"><a href="#DataFrame-119"><span class="linenos">119</span></a> <span class="k">for</span> <span class="n">cte</span> <span class="ow">in</span> <span class="n">expression</span><span class="o">.</span><span class="n">ctes</span><span class="p">:</span>
-</span><span id="DataFrame-120"><a href="#DataFrame-120"><span class="linenos">120</span></a> <span class="n">old_name_id</span> <span class="o">=</span> <span class="n">cte</span><span class="o">.</span><span class="n">args</span><span class="p">[</span><span class="s2">&quot;alias&quot;</span><span class="p">]</span><span class="o">.</span><span class="n">this</span>
-</span><span id="DataFrame-121"><a href="#DataFrame-121"><span class="linenos">121</span></a> <span class="n">new_hashed_id</span> <span class="o">=</span> <span class="n">exp</span><span class="o">.</span><span class="n">to_identifier</span><span class="p">(</span>
-</span><span id="DataFrame-122"><a href="#DataFrame-122"><span class="linenos">122</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_create_hash_from_expression</span><span class="p">(</span><span class="n">cte</span><span class="o">.</span><span class="n">this</span><span class="p">),</span> <span class="n">quoted</span><span class="o">=</span><span class="n">old_name_id</span><span class="o">.</span><span class="n">args</span><span class="p">[</span><span class="s2">&quot;quoted&quot;</span><span class="p">]</span>
-</span><span id="DataFrame-123"><a href="#DataFrame-123"><span class="linenos">123</span></a> <span class="p">)</span>
-</span><span id="DataFrame-124"><a href="#DataFrame-124"><span class="linenos">124</span></a> <span class="n">replacement_mapping</span><span class="p">[</span><span class="n">old_name_id</span><span class="p">]</span> <span class="o">=</span> <span class="n">new_hashed_id</span>
-</span><span id="DataFrame-125"><a href="#DataFrame-125"><span class="linenos">125</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="n">expression</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="n">replace_id_value</span><span class="p">,</span> <span class="n">replacement_mapping</span><span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame-47"><a href="#DataFrame-47"><span class="linenos"> 47</span></a><span class="k">class</span> <span class="nc">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame-48"><a href="#DataFrame-48"><span class="linenos"> 48</span></a> <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
+</span><span id="DataFrame-49"><a href="#DataFrame-49"><span class="linenos"> 49</span></a> <span class="bp">self</span><span class="p">,</span>
+</span><span id="DataFrame-50"><a href="#DataFrame-50"><span class="linenos"> 50</span></a> <span class="n">spark</span><span class="p">:</span> <span class="n">SparkSession</span><span class="p">,</span>
+</span><span id="DataFrame-51"><a href="#DataFrame-51"><span class="linenos"> 51</span></a> <span class="n">expression</span><span class="p">:</span> <span class="n">exp</span><span class="o">.</span><span class="n">Select</span><span class="p">,</span>
+</span><span id="DataFrame-52"><a href="#DataFrame-52"><span class="linenos"> 52</span></a> <span class="n">branch_id</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrame-53"><a href="#DataFrame-53"><span class="linenos"> 53</span></a> <span class="n">sequence_id</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrame-54"><a href="#DataFrame-54"><span class="linenos"> 54</span></a> <span class="n">last_op</span><span class="p">:</span> <span class="n">Operation</span> <span class="o">=</span> <span class="n">Operation</span><span class="o">.</span><span class="n">INIT</span><span class="p">,</span>
+</span><span id="DataFrame-55"><a href="#DataFrame-55"><span class="linenos"> 55</span></a> <span class="n">pending_hints</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="n">exp</span><span class="o">.</span><span class="n">Expression</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrame-56"><a href="#DataFrame-56"><span class="linenos"> 56</span></a> <span class="n">output_expression_container</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">OutputExpressionContainer</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrame-57"><a href="#DataFrame-57"><span class="linenos"> 57</span></a> <span class="o">**</span><span class="n">kwargs</span><span class="p">,</span>
+</span><span id="DataFrame-58"><a href="#DataFrame-58"><span class="linenos"> 58</span></a> <span class="p">):</span>
+</span><span id="DataFrame-59"><a href="#DataFrame-59"><span class="linenos"> 59</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span> <span class="o">=</span> <span class="n">spark</span>
+</span><span id="DataFrame-60"><a href="#DataFrame-60"><span class="linenos"> 60</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span> <span class="o">=</span> <span class="n">expression</span>
+</span><span id="DataFrame-61"><a href="#DataFrame-61"><span class="linenos"> 61</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">branch_id</span> <span class="o">=</span> <span class="n">branch_id</span> <span class="ow">or</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">_random_branch_id</span>
+</span><span id="DataFrame-62"><a href="#DataFrame-62"><span class="linenos"> 62</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">sequence_id</span> <span class="o">=</span> <span class="n">sequence_id</span> <span class="ow">or</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">_random_sequence_id</span>
+</span><span id="DataFrame-63"><a href="#DataFrame-63"><span class="linenos"> 63</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">last_op</span> <span class="o">=</span> <span class="n">last_op</span>
+</span><span id="DataFrame-64"><a href="#DataFrame-64"><span class="linenos"> 64</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">pending_hints</span> <span class="o">=</span> <span class="n">pending_hints</span> <span class="ow">or</span> <span class="p">[]</span>
+</span><span id="DataFrame-65"><a href="#DataFrame-65"><span class="linenos"> 65</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">output_expression_container</span> <span class="o">=</span> <span class="n">output_expression_container</span> <span class="ow">or</span> <span class="n">exp</span><span class="o">.</span><span class="n">Select</span><span class="p">()</span>
+</span><span id="DataFrame-66"><a href="#DataFrame-66"><span class="linenos"> 66</span></a>
+</span><span id="DataFrame-67"><a href="#DataFrame-67"><span class="linenos"> 67</span></a> <span class="k">def</span> <span class="fm">__getattr__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">column_name</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Column</span><span class="p">:</span>
+</span><span id="DataFrame-68"><a href="#DataFrame-68"><span class="linenos"> 68</span></a> <span class="k">return</span> <span class="bp">self</span><span class="p">[</span><span class="n">column_name</span><span class="p">]</span>
+</span><span id="DataFrame-69"><a href="#DataFrame-69"><span class="linenos"> 69</span></a>
+</span><span id="DataFrame-70"><a href="#DataFrame-70"><span class="linenos"> 70</span></a> <span class="k">def</span> <span class="fm">__getitem__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">column_name</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Column</span><span class="p">:</span>
+</span><span id="DataFrame-71"><a href="#DataFrame-71"><span class="linenos"> 71</span></a> <span class="n">column_name</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">branch_id</span><span class="si">}</span><span class="s2">.</span><span class="si">{</span><span class="n">column_name</span><span class="si">}</span><span class="s2">&quot;</span>
+</span><span id="DataFrame-72"><a href="#DataFrame-72"><span class="linenos"> 72</span></a> <span class="k">return</span> <span class="n">Column</span><span class="p">(</span><span class="n">column_name</span><span class="p">)</span>
+</span><span id="DataFrame-73"><a href="#DataFrame-73"><span class="linenos"> 73</span></a>
+</span><span id="DataFrame-74"><a href="#DataFrame-74"><span class="linenos"> 74</span></a> <span class="k">def</span> <span class="nf">__copy__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+</span><span id="DataFrame-75"><a href="#DataFrame-75"><span class="linenos"> 75</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
+</span><span id="DataFrame-76"><a href="#DataFrame-76"><span class="linenos"> 76</span></a>
+</span><span id="DataFrame-77"><a href="#DataFrame-77"><span class="linenos"> 77</span></a> <span class="nd">@property</span>
+</span><span id="DataFrame-78"><a href="#DataFrame-78"><span class="linenos"> 78</span></a> <span class="k">def</span> <span class="nf">sparkSession</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+</span><span id="DataFrame-79"><a href="#DataFrame-79"><span class="linenos"> 79</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span>
+</span><span id="DataFrame-80"><a href="#DataFrame-80"><span class="linenos"> 80</span></a>
+</span><span id="DataFrame-81"><a href="#DataFrame-81"><span class="linenos"> 81</span></a> <span class="nd">@property</span>
+</span><span id="DataFrame-82"><a href="#DataFrame-82"><span class="linenos"> 82</span></a> <span class="k">def</span> <span class="nf">write</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+</span><span id="DataFrame-83"><a href="#DataFrame-83"><span class="linenos"> 83</span></a> <span class="k">return</span> <span class="n">DataFrameWriter</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
+</span><span id="DataFrame-84"><a href="#DataFrame-84"><span class="linenos"> 84</span></a>
+</span><span id="DataFrame-85"><a href="#DataFrame-85"><span class="linenos"> 85</span></a> <span class="nd">@property</span>
+</span><span id="DataFrame-86"><a href="#DataFrame-86"><span class="linenos"> 86</span></a> <span class="k">def</span> <span class="nf">latest_cte_name</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">str</span><span class="p">:</span>
+</span><span id="DataFrame-87"><a href="#DataFrame-87"><span class="linenos"> 87</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">ctes</span><span class="p">:</span>
+</span><span id="DataFrame-88"><a href="#DataFrame-88"><span class="linenos"> 88</span></a> <span class="n">from_exp</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">args</span><span class="p">[</span><span class="s2">&quot;from&quot;</span><span class="p">]</span>
+</span><span id="DataFrame-89"><a href="#DataFrame-89"><span class="linenos"> 89</span></a> <span class="k">if</span> <span class="n">from_exp</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">:</span>
+</span><span id="DataFrame-90"><a href="#DataFrame-90"><span class="linenos"> 90</span></a> <span class="k">return</span> <span class="n">from_exp</span><span class="o">.</span><span class="n">alias_or_name</span>
+</span><span id="DataFrame-91"><a href="#DataFrame-91"><span class="linenos"> 91</span></a> <span class="n">table_alias</span> <span class="o">=</span> <span class="n">from_exp</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">TableAlias</span><span class="p">)</span>
+</span><span id="DataFrame-92"><a href="#DataFrame-92"><span class="linenos"> 92</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">table_alias</span><span class="p">:</span>
+</span><span id="DataFrame-93"><a href="#DataFrame-93"><span class="linenos"> 93</span></a> <span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span>
+</span><span id="DataFrame-94"><a href="#DataFrame-94"><span class="linenos"> 94</span></a> <span class="sa">f</span><span class="s2">&quot;Could not find an alias name for this expression: </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="si">}</span><span class="s2">&quot;</span>
+</span><span id="DataFrame-95"><a href="#DataFrame-95"><span class="linenos"> 95</span></a> <span class="p">)</span>
+</span><span id="DataFrame-96"><a href="#DataFrame-96"><span class="linenos"> 96</span></a> <span class="k">return</span> <span class="n">table_alias</span><span class="o">.</span><span class="n">alias_or_name</span>
+</span><span id="DataFrame-97"><a href="#DataFrame-97"><span class="linenos"> 97</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">ctes</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">alias</span>
+</span><span id="DataFrame-98"><a href="#DataFrame-98"><span class="linenos"> 98</span></a>
+</span><span id="DataFrame-99"><a href="#DataFrame-99"><span class="linenos"> 99</span></a> <span class="nd">@property</span>
+</span><span id="DataFrame-100"><a href="#DataFrame-100"><span class="linenos">100</span></a> <span class="k">def</span> <span class="nf">pending_join_hints</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+</span><span id="DataFrame-101"><a href="#DataFrame-101"><span class="linenos">101</span></a> <span class="k">return</span> <span class="p">[</span><span class="n">hint</span> <span class="k">for</span> <span class="n">hint</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">pending_hints</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">hint</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">JoinHint</span><span class="p">)]</span>
+</span><span id="DataFrame-102"><a href="#DataFrame-102"><span class="linenos">102</span></a>
+</span><span id="DataFrame-103"><a href="#DataFrame-103"><span class="linenos">103</span></a> <span class="nd">@property</span>
+</span><span id="DataFrame-104"><a href="#DataFrame-104"><span class="linenos">104</span></a> <span class="k">def</span> <span class="nf">pending_partition_hints</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+</span><span id="DataFrame-105"><a href="#DataFrame-105"><span class="linenos">105</span></a> <span class="k">return</span> <span class="p">[</span><span class="n">hint</span> <span class="k">for</span> <span class="n">hint</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">pending_hints</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">hint</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Anonymous</span><span class="p">)]</span>
+</span><span id="DataFrame-106"><a href="#DataFrame-106"><span class="linenos">106</span></a>
+</span><span id="DataFrame-107"><a href="#DataFrame-107"><span class="linenos">107</span></a> <span class="nd">@property</span>
+</span><span id="DataFrame-108"><a href="#DataFrame-108"><span class="linenos">108</span></a> <span class="k">def</span> <span class="nf">columns</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span>
+</span><span id="DataFrame-109"><a href="#DataFrame-109"><span class="linenos">109</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">named_selects</span>
+</span><span id="DataFrame-110"><a href="#DataFrame-110"><span class="linenos">110</span></a>
+</span><span id="DataFrame-111"><a href="#DataFrame-111"><span class="linenos">111</span></a> <span class="nd">@property</span>
+</span><span id="DataFrame-112"><a href="#DataFrame-112"><span class="linenos">112</span></a> <span class="k">def</span> <span class="nf">na</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrameNaFunctions</span><span class="p">:</span>
+</span><span id="DataFrame-113"><a href="#DataFrame-113"><span class="linenos">113</span></a> <span class="k">return</span> <span class="n">DataFrameNaFunctions</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
+</span><span id="DataFrame-114"><a href="#DataFrame-114"><span class="linenos">114</span></a>
+</span><span id="DataFrame-115"><a href="#DataFrame-115"><span class="linenos">115</span></a> <span class="k">def</span> <span class="nf">_replace_cte_names_with_hashes</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">expression</span><span class="p">:</span> <span class="n">exp</span><span class="o">.</span><span class="n">Select</span><span class="p">):</span>
+</span><span id="DataFrame-116"><a href="#DataFrame-116"><span class="linenos">116</span></a> <span class="n">replacement_mapping</span> <span class="o">=</span> <span class="p">{}</span>
+</span><span id="DataFrame-117"><a href="#DataFrame-117"><span class="linenos">117</span></a> <span class="k">for</span> <span class="n">cte</span> <span class="ow">in</span> <span class="n">expression</span><span class="o">.</span><span class="n">ctes</span><span class="p">:</span>
+</span><span id="DataFrame-118"><a href="#DataFrame-118"><span class="linenos">118</span></a> <span class="n">old_name_id</span> <span class="o">=</span> <span class="n">cte</span><span class="o">.</span><span class="n">args</span><span class="p">[</span><span class="s2">&quot;alias&quot;</span><span class="p">]</span><span class="o">.</span><span class="n">this</span>
+</span><span id="DataFrame-119"><a href="#DataFrame-119"><span class="linenos">119</span></a> <span class="n">new_hashed_id</span> <span class="o">=</span> <span class="n">exp</span><span class="o">.</span><span class="n">to_identifier</span><span class="p">(</span>
+</span><span id="DataFrame-120"><a href="#DataFrame-120"><span class="linenos">120</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_create_hash_from_expression</span><span class="p">(</span><span class="n">cte</span><span class="o">.</span><span class="n">this</span><span class="p">),</span> <span class="n">quoted</span><span class="o">=</span><span class="n">old_name_id</span><span class="o">.</span><span class="n">args</span><span class="p">[</span><span class="s2">&quot;quoted&quot;</span><span class="p">]</span>
+</span><span id="DataFrame-121"><a href="#DataFrame-121"><span class="linenos">121</span></a> <span class="p">)</span>
+</span><span id="DataFrame-122"><a href="#DataFrame-122"><span class="linenos">122</span></a> <span class="n">replacement_mapping</span><span class="p">[</span><span class="n">old_name_id</span><span class="p">]</span> <span class="o">=</span> <span class="n">new_hashed_id</span>
+</span><span id="DataFrame-123"><a href="#DataFrame-123"><span class="linenos">123</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="n">expression</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="n">replace_id_value</span><span class="p">,</span> <span class="n">replacement_mapping</span><span class="p">)</span><span class="o">.</span><span class="n">assert_is</span><span class="p">(</span>
+</span><span id="DataFrame-124"><a href="#DataFrame-124"><span class="linenos">124</span></a> <span class="n">exp</span><span class="o">.</span><span class="n">Select</span>
+</span><span id="DataFrame-125"><a href="#DataFrame-125"><span class="linenos">125</span></a> <span class="p">)</span>
</span><span id="DataFrame-126"><a href="#DataFrame-126"><span class="linenos">126</span></a> <span class="k">return</span> <span class="n">expression</span>
</span><span id="DataFrame-127"><a href="#DataFrame-127"><span class="linenos">127</span></a>
</span><span id="DataFrame-128"><a href="#DataFrame-128"><span class="linenos">128</span></a> <span class="k">def</span> <span class="nf">_create_cte_from_expression</span><span class="p">(</span>
@@ -1304,531 +1313,532 @@
</span><span id="DataFrame-307"><a href="#DataFrame-307"><span class="linenos">307</span></a> <span class="n">replacement_mapping</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Dict</span><span class="p">[</span><span class="n">exp</span><span class="o">.</span><span class="n">Identifier</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Identifier</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span>
</span><span id="DataFrame-308"><a href="#DataFrame-308"><span class="linenos">308</span></a>
</span><span id="DataFrame-309"><a href="#DataFrame-309"><span class="linenos">309</span></a> <span class="k">for</span> <span class="n">expression_type</span><span class="p">,</span> <span class="n">select_expression</span> <span class="ow">in</span> <span class="n">select_expressions</span><span class="p">:</span>
-</span><span id="DataFrame-310"><a href="#DataFrame-310"><span class="linenos">310</span></a> <span class="n">select_expression</span> <span class="o">=</span> <span class="n">select_expression</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="n">replace_id_value</span><span class="p">,</span> <span class="n">replacement_mapping</span><span class="p">)</span>
-</span><span id="DataFrame-311"><a href="#DataFrame-311"><span class="linenos">311</span></a> <span class="k">if</span> <span class="n">optimize</span><span class="p">:</span>
-</span><span id="DataFrame-312"><a href="#DataFrame-312"><span class="linenos">312</span></a> <span class="n">quote_identifiers</span><span class="p">(</span><span class="n">select_expression</span><span class="p">,</span> <span class="n">dialect</span><span class="o">=</span><span class="n">dialect</span><span class="p">)</span>
-</span><span id="DataFrame-313"><a href="#DataFrame-313"><span class="linenos">313</span></a> <span class="n">select_expression</span> <span class="o">=</span> <span class="n">t</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span>
-</span><span id="DataFrame-314"><a href="#DataFrame-314"><span class="linenos">314</span></a> <span class="n">exp</span><span class="o">.</span><span class="n">Select</span><span class="p">,</span> <span class="n">optimize_func</span><span class="p">(</span><span class="n">select_expression</span><span class="p">,</span> <span class="n">dialect</span><span class="o">=</span><span class="n">dialect</span><span class="p">)</span>
-</span><span id="DataFrame-315"><a href="#DataFrame-315"><span class="linenos">315</span></a> <span class="p">)</span>
-</span><span id="DataFrame-316"><a href="#DataFrame-316"><span class="linenos">316</span></a>
-</span><span id="DataFrame-317"><a href="#DataFrame-317"><span class="linenos">317</span></a> <span class="n">select_expression</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">_replace_cte_names_with_hashes</span><span class="p">(</span><span class="n">select_expression</span><span class="p">)</span>
-</span><span id="DataFrame-318"><a href="#DataFrame-318"><span class="linenos">318</span></a>
-</span><span id="DataFrame-319"><a href="#DataFrame-319"><span class="linenos">319</span></a> <span class="n">expression</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="n">exp</span><span class="o">.</span><span class="n">Select</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Cache</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Drop</span><span class="p">]</span>
-</span><span id="DataFrame-320"><a href="#DataFrame-320"><span class="linenos">320</span></a> <span class="k">if</span> <span class="n">expression_type</span> <span class="o">==</span> <span class="n">exp</span><span class="o">.</span><span class="n">Cache</span><span class="p">:</span>
-</span><span id="DataFrame-321"><a href="#DataFrame-321"><span class="linenos">321</span></a> <span class="n">cache_table_name</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">_create_hash_from_expression</span><span class="p">(</span><span class="n">select_expression</span><span class="p">)</span>
-</span><span id="DataFrame-322"><a href="#DataFrame-322"><span class="linenos">322</span></a> <span class="n">cache_table</span> <span class="o">=</span> <span class="n">exp</span><span class="o">.</span><span class="n">to_table</span><span class="p">(</span><span class="n">cache_table_name</span><span class="p">)</span>
-</span><span id="DataFrame-323"><a href="#DataFrame-323"><span class="linenos">323</span></a> <span class="n">original_alias_name</span> <span class="o">=</span> <span class="n">select_expression</span><span class="o">.</span><span class="n">args</span><span class="p">[</span><span class="s2">&quot;cte_alias_name&quot;</span><span class="p">]</span>
-</span><span id="DataFrame-324"><a href="#DataFrame-324"><span class="linenos">324</span></a>
-</span><span id="DataFrame-325"><a href="#DataFrame-325"><span class="linenos">325</span></a> <span class="n">replacement_mapping</span><span class="p">[</span><span class="n">exp</span><span class="o">.</span><span class="n">to_identifier</span><span class="p">(</span><span class="n">original_alias_name</span><span class="p">)]</span> <span class="o">=</span> <span class="n">exp</span><span class="o">.</span><span class="n">to_identifier</span><span class="p">(</span> <span class="c1"># type: ignore</span>
-</span><span id="DataFrame-326"><a href="#DataFrame-326"><span class="linenos">326</span></a> <span class="n">cache_table_name</span>
-</span><span id="DataFrame-327"><a href="#DataFrame-327"><span class="linenos">327</span></a> <span class="p">)</span>
-</span><span id="DataFrame-328"><a href="#DataFrame-328"><span class="linenos">328</span></a> <span class="n">sqlglot</span><span class="o">.</span><span class="n">schema</span><span class="o">.</span><span class="n">add_table</span><span class="p">(</span>
-</span><span id="DataFrame-329"><a href="#DataFrame-329"><span class="linenos">329</span></a> <span class="n">cache_table_name</span><span class="p">,</span>
-</span><span id="DataFrame-330"><a href="#DataFrame-330"><span class="linenos">330</span></a> <span class="p">{</span>
-</span><span id="DataFrame-331"><a href="#DataFrame-331"><span class="linenos">331</span></a> <span class="n">expression</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">:</span> <span class="n">expression</span><span class="o">.</span><span class="n">type</span><span class="o">.</span><span class="n">sql</span><span class="p">(</span><span class="n">dialect</span><span class="o">=</span><span class="n">dialect</span><span class="p">)</span>
-</span><span id="DataFrame-332"><a href="#DataFrame-332"><span class="linenos">332</span></a> <span class="k">for</span> <span class="n">expression</span> <span class="ow">in</span> <span class="n">select_expression</span><span class="o">.</span><span class="n">expressions</span>
-</span><span id="DataFrame-333"><a href="#DataFrame-333"><span class="linenos">333</span></a> <span class="p">},</span>
-</span><span id="DataFrame-334"><a href="#DataFrame-334"><span class="linenos">334</span></a> <span class="n">dialect</span><span class="o">=</span><span class="n">dialect</span><span class="p">,</span>
-</span><span id="DataFrame-335"><a href="#DataFrame-335"><span class="linenos">335</span></a> <span class="p">)</span>
-</span><span id="DataFrame-336"><a href="#DataFrame-336"><span class="linenos">336</span></a>
-</span><span id="DataFrame-337"><a href="#DataFrame-337"><span class="linenos">337</span></a> <span class="n">cache_storage_level</span> <span class="o">=</span> <span class="n">select_expression</span><span class="o">.</span><span class="n">args</span><span class="p">[</span><span class="s2">&quot;cache_storage_level&quot;</span><span class="p">]</span>
-</span><span id="DataFrame-338"><a href="#DataFrame-338"><span class="linenos">338</span></a> <span class="n">options</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame-339"><a href="#DataFrame-339"><span class="linenos">339</span></a> <span class="n">exp</span><span class="o">.</span><span class="n">Literal</span><span class="o">.</span><span class="n">string</span><span class="p">(</span><span class="s2">&quot;storageLevel&quot;</span><span class="p">),</span>
-</span><span id="DataFrame-340"><a href="#DataFrame-340"><span class="linenos">340</span></a> <span class="n">exp</span><span class="o">.</span><span class="n">Literal</span><span class="o">.</span><span class="n">string</span><span class="p">(</span><span class="n">cache_storage_level</span><span class="p">),</span>
-</span><span id="DataFrame-341"><a href="#DataFrame-341"><span class="linenos">341</span></a> <span class="p">]</span>
-</span><span id="DataFrame-342"><a href="#DataFrame-342"><span class="linenos">342</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="n">exp</span><span class="o">.</span><span class="n">Cache</span><span class="p">(</span>
-</span><span id="DataFrame-343"><a href="#DataFrame-343"><span class="linenos">343</span></a> <span class="n">this</span><span class="o">=</span><span class="n">cache_table</span><span class="p">,</span> <span class="n">expression</span><span class="o">=</span><span class="n">select_expression</span><span class="p">,</span> <span class="n">lazy</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">options</span><span class="o">=</span><span class="n">options</span>
-</span><span id="DataFrame-344"><a href="#DataFrame-344"><span class="linenos">344</span></a> <span class="p">)</span>
-</span><span id="DataFrame-345"><a href="#DataFrame-345"><span class="linenos">345</span></a>
-</span><span id="DataFrame-346"><a href="#DataFrame-346"><span class="linenos">346</span></a> <span class="c1"># We will drop the &quot;view&quot; if it exists before running the cache table</span>
-</span><span id="DataFrame-347"><a href="#DataFrame-347"><span class="linenos">347</span></a> <span class="n">output_expressions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Drop</span><span class="p">(</span><span class="n">this</span><span class="o">=</span><span class="n">cache_table</span><span class="p">,</span> <span class="n">exists</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">kind</span><span class="o">=</span><span class="s2">&quot;VIEW&quot;</span><span class="p">))</span>
-</span><span id="DataFrame-348"><a href="#DataFrame-348"><span class="linenos">348</span></a> <span class="k">elif</span> <span class="n">expression_type</span> <span class="o">==</span> <span class="n">exp</span><span class="o">.</span><span class="n">Create</span><span class="p">:</span>
-</span><span id="DataFrame-349"><a href="#DataFrame-349"><span class="linenos">349</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">output_expression_container</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
-</span><span id="DataFrame-350"><a href="#DataFrame-350"><span class="linenos">350</span></a> <span class="n">expression</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">&quot;expression&quot;</span><span class="p">,</span> <span class="n">select_expression</span><span class="p">)</span>
-</span><span id="DataFrame-351"><a href="#DataFrame-351"><span class="linenos">351</span></a> <span class="k">elif</span> <span class="n">expression_type</span> <span class="o">==</span> <span class="n">exp</span><span class="o">.</span><span class="n">Insert</span><span class="p">:</span>
-</span><span id="DataFrame-352"><a href="#DataFrame-352"><span class="linenos">352</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">output_expression_container</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
-</span><span id="DataFrame-353"><a href="#DataFrame-353"><span class="linenos">353</span></a> <span class="n">select_without_ctes</span> <span class="o">=</span> <span class="n">select_expression</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
-</span><span id="DataFrame-354"><a href="#DataFrame-354"><span class="linenos">354</span></a> <span class="n">select_without_ctes</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">&quot;with&quot;</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
-</span><span id="DataFrame-355"><a href="#DataFrame-355"><span class="linenos">355</span></a> <span class="n">expression</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">&quot;expression&quot;</span><span class="p">,</span> <span class="n">select_without_ctes</span><span class="p">)</span>
-</span><span id="DataFrame-356"><a href="#DataFrame-356"><span class="linenos">356</span></a>
-</span><span id="DataFrame-357"><a href="#DataFrame-357"><span class="linenos">357</span></a> <span class="k">if</span> <span class="n">select_expression</span><span class="o">.</span><span class="n">ctes</span><span class="p">:</span>
-</span><span id="DataFrame-358"><a href="#DataFrame-358"><span class="linenos">358</span></a> <span class="n">expression</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">&quot;with&quot;</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">With</span><span class="p">(</span><span class="n">expressions</span><span class="o">=</span><span class="n">select_expression</span><span class="o">.</span><span class="n">ctes</span><span class="p">))</span>
-</span><span id="DataFrame-359"><a href="#DataFrame-359"><span class="linenos">359</span></a> <span class="k">elif</span> <span class="n">expression_type</span> <span class="o">==</span> <span class="n">exp</span><span class="o">.</span><span class="n">Select</span><span class="p">:</span>
-</span><span id="DataFrame-360"><a href="#DataFrame-360"><span class="linenos">360</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="n">select_expression</span>
-</span><span id="DataFrame-361"><a href="#DataFrame-361"><span class="linenos">361</span></a> <span class="k">else</span><span class="p">:</span>
-</span><span id="DataFrame-362"><a href="#DataFrame-362"><span class="linenos">362</span></a> <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Invalid expression type: </span><span class="si">{</span><span class="n">expression_type</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
-</span><span id="DataFrame-363"><a href="#DataFrame-363"><span class="linenos">363</span></a>
-</span><span id="DataFrame-364"><a href="#DataFrame-364"><span class="linenos">364</span></a> <span class="n">output_expressions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">expression</span><span class="p">)</span>
-</span><span id="DataFrame-365"><a href="#DataFrame-365"><span class="linenos">365</span></a>
-</span><span id="DataFrame-366"><a href="#DataFrame-366"><span class="linenos">366</span></a> <span class="k">return</span> <span class="p">[</span><span class="n">expression</span><span class="o">.</span><span class="n">sql</span><span class="p">(</span><span class="n">dialect</span><span class="o">=</span><span class="n">dialect</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="k">for</span> <span class="n">expression</span> <span class="ow">in</span> <span class="n">output_expressions</span><span class="p">]</span>
-</span><span id="DataFrame-367"><a href="#DataFrame-367"><span class="linenos">367</span></a>
-</span><span id="DataFrame-368"><a href="#DataFrame-368"><span class="linenos">368</span></a> <span class="k">def</span> <span class="nf">copy</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame-369"><a href="#DataFrame-369"><span class="linenos">369</span></a> <span class="k">return</span> <span class="n">DataFrame</span><span class="p">(</span><span class="o">**</span><span class="n">object_to_dict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">))</span>
-</span><span id="DataFrame-370"><a href="#DataFrame-370"><span class="linenos">370</span></a>
-</span><span id="DataFrame-371"><a href="#DataFrame-371"><span class="linenos">371</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">SELECT</span><span class="p">)</span>
-</span><span id="DataFrame-372"><a href="#DataFrame-372"><span class="linenos">372</span></a> <span class="k">def</span> <span class="nf">select</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">cols</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame-373"><a href="#DataFrame-373"><span class="linenos">373</span></a> <span class="n">cols</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">cols</span><span class="p">)</span>
-</span><span id="DataFrame-374"><a href="#DataFrame-374"><span class="linenos">374</span></a> <span class="n">kwargs</span><span class="p">[</span><span class="s2">&quot;append&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">kwargs</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;append&quot;</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span>
-</span><span id="DataFrame-375"><a href="#DataFrame-375"><span class="linenos">375</span></a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">args</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;joins&quot;</span><span class="p">):</span>
-</span><span id="DataFrame-376"><a href="#DataFrame-376"><span class="linenos">376</span></a> <span class="n">ambiguous_cols</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame-377"><a href="#DataFrame-377"><span class="linenos">377</span></a> <span class="n">col</span>
-</span><span id="DataFrame-378"><a href="#DataFrame-378"><span class="linenos">378</span></a> <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="n">cols</span>
-</span><span id="DataFrame-379"><a href="#DataFrame-379"><span class="linenos">379</span></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">col</span><span class="o">.</span><span class="n">column_expression</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Column</span><span class="p">)</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">col</span><span class="o">.</span><span class="n">column_expression</span><span class="o">.</span><span class="n">table</span>
-</span><span id="DataFrame-380"><a href="#DataFrame-380"><span class="linenos">380</span></a> <span class="p">]</span>
-</span><span id="DataFrame-381"><a href="#DataFrame-381"><span class="linenos">381</span></a> <span class="k">if</span> <span class="n">ambiguous_cols</span><span class="p">:</span>
-</span><span id="DataFrame-382"><a href="#DataFrame-382"><span class="linenos">382</span></a> <span class="n">join_table_identifiers</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame-383"><a href="#DataFrame-383"><span class="linenos">383</span></a> <span class="n">x</span><span class="o">.</span><span class="n">this</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">get_tables_from_expression_with_join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="p">)</span>
-</span><span id="DataFrame-384"><a href="#DataFrame-384"><span class="linenos">384</span></a> <span class="p">]</span>
-</span><span id="DataFrame-385"><a href="#DataFrame-385"><span class="linenos">385</span></a> <span class="n">cte_names_in_join</span> <span class="o">=</span> <span class="p">[</span><span class="n">x</span><span class="o">.</span><span class="n">this</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">join_table_identifiers</span><span class="p">]</span>
-</span><span id="DataFrame-386"><a href="#DataFrame-386"><span class="linenos">386</span></a> <span class="c1"># If we have columns that resolve to multiple CTE expressions then we want to use each CTE left-to-right</span>
-</span><span id="DataFrame-387"><a href="#DataFrame-387"><span class="linenos">387</span></a> <span class="c1"># and therefore we allow multiple columns with the same name in the result. This matches the behavior</span>
-</span><span id="DataFrame-388"><a href="#DataFrame-388"><span class="linenos">388</span></a> <span class="c1"># of Spark.</span>
-</span><span id="DataFrame-389"><a href="#DataFrame-389"><span class="linenos">389</span></a> <span class="n">resolved_column_position</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Dict</span><span class="p">[</span><span class="n">Column</span><span class="p">,</span> <span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span><span class="n">col</span><span class="p">:</span> <span class="o">-</span><span class="mi">1</span> <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="n">ambiguous_cols</span><span class="p">}</span>
-</span><span id="DataFrame-390"><a href="#DataFrame-390"><span class="linenos">390</span></a> <span class="k">for</span> <span class="n">ambiguous_col</span> <span class="ow">in</span> <span class="n">ambiguous_cols</span><span class="p">:</span>
-</span><span id="DataFrame-391"><a href="#DataFrame-391"><span class="linenos">391</span></a> <span class="n">ctes_with_column</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame-392"><a href="#DataFrame-392"><span class="linenos">392</span></a> <span class="n">cte</span>
-</span><span id="DataFrame-393"><a href="#DataFrame-393"><span class="linenos">393</span></a> <span class="k">for</span> <span class="n">cte</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">ctes</span>
-</span><span id="DataFrame-394"><a href="#DataFrame-394"><span class="linenos">394</span></a> <span class="k">if</span> <span class="n">cte</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="ow">in</span> <span class="n">cte_names_in_join</span>
-</span><span id="DataFrame-395"><a href="#DataFrame-395"><span class="linenos">395</span></a> <span class="ow">and</span> <span class="n">ambiguous_col</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="ow">in</span> <span class="n">cte</span><span class="o">.</span><span class="n">this</span><span class="o">.</span><span class="n">named_selects</span>
-</span><span id="DataFrame-396"><a href="#DataFrame-396"><span class="linenos">396</span></a> <span class="p">]</span>
-</span><span id="DataFrame-397"><a href="#DataFrame-397"><span class="linenos">397</span></a> <span class="c1"># Check if there is a CTE with this column that we haven&#39;t used before. If so, use it. Otherwise,</span>
-</span><span id="DataFrame-398"><a href="#DataFrame-398"><span class="linenos">398</span></a> <span class="c1"># use the same CTE we used before</span>
-</span><span id="DataFrame-399"><a href="#DataFrame-399"><span class="linenos">399</span></a> <span class="n">cte</span> <span class="o">=</span> <span class="n">seq_get</span><span class="p">(</span><span class="n">ctes_with_column</span><span class="p">,</span> <span class="n">resolved_column_position</span><span class="p">[</span><span class="n">ambiguous_col</span><span class="p">]</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span>
-</span><span id="DataFrame-400"><a href="#DataFrame-400"><span class="linenos">400</span></a> <span class="k">if</span> <span class="n">cte</span><span class="p">:</span>
-</span><span id="DataFrame-401"><a href="#DataFrame-401"><span class="linenos">401</span></a> <span class="n">resolved_column_position</span><span class="p">[</span><span class="n">ambiguous_col</span><span class="p">]</span> <span class="o">+=</span> <span class="mi">1</span>
-</span><span id="DataFrame-402"><a href="#DataFrame-402"><span class="linenos">402</span></a> <span class="k">else</span><span class="p">:</span>
-</span><span id="DataFrame-403"><a href="#DataFrame-403"><span class="linenos">403</span></a> <span class="n">cte</span> <span class="o">=</span> <span class="n">ctes_with_column</span><span class="p">[</span><span class="n">resolved_column_position</span><span class="p">[</span><span class="n">ambiguous_col</span><span class="p">]]</span>
-</span><span id="DataFrame-404"><a href="#DataFrame-404"><span class="linenos">404</span></a> <span class="n">ambiguous_col</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">&quot;table&quot;</span><span class="p">,</span> <span class="n">cte</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">)</span>
-</span><span id="DataFrame-405"><a href="#DataFrame-405"><span class="linenos">405</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span>
-</span><span id="DataFrame-406"><a href="#DataFrame-406"><span class="linenos">406</span></a> <span class="n">expression</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="p">[</span><span class="n">x</span><span class="o">.</span><span class="n">expression</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">cols</span><span class="p">],</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">),</span> <span class="o">**</span><span class="n">kwargs</span>
-</span><span id="DataFrame-407"><a href="#DataFrame-407"><span class="linenos">407</span></a> <span class="p">)</span>
-</span><span id="DataFrame-408"><a href="#DataFrame-408"><span class="linenos">408</span></a>
-</span><span id="DataFrame-409"><a href="#DataFrame-409"><span class="linenos">409</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">NO_OP</span><span class="p">)</span>
-</span><span id="DataFrame-410"><a href="#DataFrame-410"><span class="linenos">410</span></a> <span class="k">def</span> <span class="nf">alias</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame-411"><a href="#DataFrame-411"><span class="linenos">411</span></a> <span class="n">new_sequence_id</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">_random_sequence_id</span>
-</span><span id="DataFrame-412"><a href="#DataFrame-412"><span class="linenos">412</span></a> <span class="n">df</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
-</span><span id="DataFrame-413"><a href="#DataFrame-413"><span class="linenos">413</span></a> <span class="k">for</span> <span class="n">join_hint</span> <span class="ow">in</span> <span class="n">df</span><span class="o">.</span><span class="n">pending_join_hints</span><span class="p">:</span>
-</span><span id="DataFrame-414"><a href="#DataFrame-414"><span class="linenos">414</span></a> <span class="k">for</span> <span class="n">expression</span> <span class="ow">in</span> <span class="n">join_hint</span><span class="o">.</span><span class="n">expressions</span><span class="p">:</span>
-</span><span id="DataFrame-415"><a href="#DataFrame-415"><span class="linenos">415</span></a> <span class="k">if</span> <span class="n">expression</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">sequence_id</span><span class="p">:</span>
-</span><span id="DataFrame-416"><a href="#DataFrame-416"><span class="linenos">416</span></a> <span class="n">expression</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">&quot;this&quot;</span><span class="p">,</span> <span class="n">Column</span><span class="o">.</span><span class="n">ensure_col</span><span class="p">(</span><span class="n">new_sequence_id</span><span class="p">)</span><span class="o">.</span><span class="n">expression</span><span class="p">)</span>
-</span><span id="DataFrame-417"><a href="#DataFrame-417"><span class="linenos">417</span></a> <span class="n">df</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">_add_alias_to_mapping</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">new_sequence_id</span><span class="p">)</span>
-</span><span id="DataFrame-418"><a href="#DataFrame-418"><span class="linenos">418</span></a> <span class="k">return</span> <span class="n">df</span><span class="o">.</span><span class="n">_convert_leaf_to_cte</span><span class="p">(</span><span class="n">sequence_id</span><span class="o">=</span><span class="n">new_sequence_id</span><span class="p">)</span>
-</span><span id="DataFrame-419"><a href="#DataFrame-419"><span class="linenos">419</span></a>
-</span><span id="DataFrame-420"><a href="#DataFrame-420"><span class="linenos">420</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">WHERE</span><span class="p">)</span>
-</span><span id="DataFrame-421"><a href="#DataFrame-421"><span class="linenos">421</span></a> <span class="k">def</span> <span class="nf">where</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">column</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="n">Column</span><span class="p">,</span> <span class="nb">bool</span><span class="p">],</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame-422"><a href="#DataFrame-422"><span class="linenos">422</span></a> <span class="n">col</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_col</span><span class="p">(</span><span class="n">column</span><span class="p">)</span>
-</span><span id="DataFrame-423"><a href="#DataFrame-423"><span class="linenos">423</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">where</span><span class="p">(</span><span class="n">col</span><span class="o">.</span><span class="n">expression</span><span class="p">))</span>
-</span><span id="DataFrame-424"><a href="#DataFrame-424"><span class="linenos">424</span></a>
-</span><span id="DataFrame-425"><a href="#DataFrame-425"><span class="linenos">425</span></a> <span class="nb">filter</span> <span class="o">=</span> <span class="n">where</span>
-</span><span id="DataFrame-426"><a href="#DataFrame-426"><span class="linenos">426</span></a>
-</span><span id="DataFrame-427"><a href="#DataFrame-427"><span class="linenos">427</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">GROUP_BY</span><span class="p">)</span>
-</span><span id="DataFrame-428"><a href="#DataFrame-428"><span class="linenos">428</span></a> <span class="k">def</span> <span class="nf">groupBy</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">cols</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">GroupedData</span><span class="p">:</span>
-</span><span id="DataFrame-429"><a href="#DataFrame-429"><span class="linenos">429</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">cols</span><span class="p">)</span>
-</span><span id="DataFrame-430"><a href="#DataFrame-430"><span class="linenos">430</span></a> <span class="k">return</span> <span class="n">GroupedData</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">columns</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">last_op</span><span class="p">)</span>
-</span><span id="DataFrame-431"><a href="#DataFrame-431"><span class="linenos">431</span></a>
-</span><span id="DataFrame-432"><a href="#DataFrame-432"><span class="linenos">432</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">SELECT</span><span class="p">)</span>
-</span><span id="DataFrame-433"><a href="#DataFrame-433"><span class="linenos">433</span></a> <span class="k">def</span> <span class="nf">agg</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">exprs</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame-434"><a href="#DataFrame-434"><span class="linenos">434</span></a> <span class="n">cols</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">exprs</span><span class="p">)</span>
-</span><span id="DataFrame-435"><a href="#DataFrame-435"><span class="linenos">435</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">groupBy</span><span class="p">()</span><span class="o">.</span><span class="n">agg</span><span class="p">(</span><span class="o">*</span><span class="n">cols</span><span class="p">)</span>
-</span><span id="DataFrame-436"><a href="#DataFrame-436"><span class="linenos">436</span></a>
-</span><span id="DataFrame-437"><a href="#DataFrame-437"><span class="linenos">437</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
-</span><span id="DataFrame-438"><a href="#DataFrame-438"><span class="linenos">438</span></a> <span class="k">def</span> <span class="nf">join</span><span class="p">(</span>
-</span><span id="DataFrame-439"><a href="#DataFrame-439"><span class="linenos">439</span></a> <span class="bp">self</span><span class="p">,</span>
-</span><span id="DataFrame-440"><a href="#DataFrame-440"><span class="linenos">440</span></a> <span class="n">other_df</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">,</span>
-</span><span id="DataFrame-441"><a href="#DataFrame-441"><span class="linenos">441</span></a> <span class="n">on</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">],</span> <span class="n">Column</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="n">Column</span><span class="p">]],</span>
-</span><span id="DataFrame-442"><a href="#DataFrame-442"><span class="linenos">442</span></a> <span class="n">how</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;inner&quot;</span><span class="p">,</span>
-</span><span id="DataFrame-443"><a href="#DataFrame-443"><span class="linenos">443</span></a> <span class="o">**</span><span class="n">kwargs</span><span class="p">,</span>
-</span><span id="DataFrame-444"><a href="#DataFrame-444"><span class="linenos">444</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame-445"><a href="#DataFrame-445"><span class="linenos">445</span></a> <span class="n">other_df</span> <span class="o">=</span> <span class="n">other_df</span><span class="o">.</span><span class="n">_convert_leaf_to_cte</span><span class="p">()</span>
-</span><span id="DataFrame-446"><a href="#DataFrame-446"><span class="linenos">446</span></a> <span class="n">join_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_list_of_columns</span><span class="p">(</span><span class="n">on</span><span class="p">)</span>
-</span><span id="DataFrame-447"><a href="#DataFrame-447"><span class="linenos">447</span></a> <span class="c1"># We will determine actual &quot;join on&quot; expression later so we don&#39;t provide it at first</span>
-</span><span id="DataFrame-448"><a href="#DataFrame-448"><span class="linenos">448</span></a> <span class="n">join_expression</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">join</span><span class="p">(</span>
-</span><span id="DataFrame-449"><a href="#DataFrame-449"><span class="linenos">449</span></a> <span class="n">other_df</span><span class="o">.</span><span class="n">latest_cte_name</span><span class="p">,</span> <span class="n">join_type</span><span class="o">=</span><span class="n">how</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;_&quot;</span><span class="p">,</span> <span class="s2">&quot; &quot;</span><span class="p">)</span>
-</span><span id="DataFrame-450"><a href="#DataFrame-450"><span class="linenos">450</span></a> <span class="p">)</span>
-</span><span id="DataFrame-451"><a href="#DataFrame-451"><span class="linenos">451</span></a> <span class="n">join_expression</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_add_ctes_to_expression</span><span class="p">(</span><span class="n">join_expression</span><span class="p">,</span> <span class="n">other_df</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">ctes</span><span class="p">)</span>
-</span><span id="DataFrame-452"><a href="#DataFrame-452"><span class="linenos">452</span></a> <span class="n">self_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_outer_select_columns</span><span class="p">(</span><span class="n">join_expression</span><span class="p">)</span>
-</span><span id="DataFrame-453"><a href="#DataFrame-453"><span class="linenos">453</span></a> <span class="n">other_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_outer_select_columns</span><span class="p">(</span><span class="n">other_df</span><span class="p">)</span>
-</span><span id="DataFrame-454"><a href="#DataFrame-454"><span class="linenos">454</span></a> <span class="c1"># Determines the join clause and select columns to be used passed on what type of columns were provided for</span>
-</span><span id="DataFrame-455"><a href="#DataFrame-455"><span class="linenos">455</span></a> <span class="c1"># the join. The columns returned changes based on how the on expression is provided.</span>
-</span><span id="DataFrame-456"><a href="#DataFrame-456"><span class="linenos">456</span></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">join_columns</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">expression</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Column</span><span class="p">):</span>
-</span><span id="DataFrame-457"><a href="#DataFrame-457"><span class="linenos">457</span></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
-</span><span id="DataFrame-458"><a href="#DataFrame-458"><span class="linenos">458</span></a><span class="sd"> Unique characteristics of join on column names only:</span>
-</span><span id="DataFrame-459"><a href="#DataFrame-459"><span class="linenos">459</span></a><span class="sd"> * The column names are put at the front of the select list</span>
-</span><span id="DataFrame-460"><a href="#DataFrame-460"><span class="linenos">460</span></a><span class="sd"> * The column names are deduplicated across the entire select list and only the column names (other dups are allowed)</span>
-</span><span id="DataFrame-461"><a href="#DataFrame-461"><span class="linenos">461</span></a><span class="sd"> &quot;&quot;&quot;</span>
-</span><span id="DataFrame-462"><a href="#DataFrame-462"><span class="linenos">462</span></a> <span class="n">table_names</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame-463"><a href="#DataFrame-463"><span class="linenos">463</span></a> <span class="n">table</span><span class="o">.</span><span class="n">alias_or_name</span>
-</span><span id="DataFrame-464"><a href="#DataFrame-464"><span class="linenos">464</span></a> <span class="k">for</span> <span class="n">table</span> <span class="ow">in</span> <span class="n">get_tables_from_expression_with_join</span><span class="p">(</span><span class="n">join_expression</span><span class="p">)</span>
-</span><span id="DataFrame-465"><a href="#DataFrame-465"><span class="linenos">465</span></a> <span class="p">]</span>
-</span><span id="DataFrame-466"><a href="#DataFrame-466"><span class="linenos">466</span></a> <span class="n">potential_ctes</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame-467"><a href="#DataFrame-467"><span class="linenos">467</span></a> <span class="n">cte</span>
-</span><span id="DataFrame-468"><a href="#DataFrame-468"><span class="linenos">468</span></a> <span class="k">for</span> <span class="n">cte</span> <span class="ow">in</span> <span class="n">join_expression</span><span class="o">.</span><span class="n">ctes</span>
-</span><span id="DataFrame-469"><a href="#DataFrame-469"><span class="linenos">469</span></a> <span class="k">if</span> <span class="n">cte</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="ow">in</span> <span class="n">table_names</span>
-</span><span id="DataFrame-470"><a href="#DataFrame-470"><span class="linenos">470</span></a> <span class="ow">and</span> <span class="n">cte</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="o">!=</span> <span class="n">other_df</span><span class="o">.</span><span class="n">latest_cte_name</span>
-</span><span id="DataFrame-471"><a href="#DataFrame-471"><span class="linenos">471</span></a> <span class="p">]</span>
-</span><span id="DataFrame-472"><a href="#DataFrame-472"><span class="linenos">472</span></a> <span class="c1"># Determine the table to reference for the left side of the join by checking each of the left side</span>
-</span><span id="DataFrame-473"><a href="#DataFrame-473"><span class="linenos">473</span></a> <span class="c1"># tables and see if they have the column being referenced.</span>
-</span><span id="DataFrame-474"><a href="#DataFrame-474"><span class="linenos">474</span></a> <span class="n">join_column_pairs</span> <span class="o">=</span> <span class="p">[]</span>
-</span><span id="DataFrame-475"><a href="#DataFrame-475"><span class="linenos">475</span></a> <span class="k">for</span> <span class="n">join_column</span> <span class="ow">in</span> <span class="n">join_columns</span><span class="p">:</span>
-</span><span id="DataFrame-476"><a href="#DataFrame-476"><span class="linenos">476</span></a> <span class="n">num_matching_ctes</span> <span class="o">=</span> <span class="mi">0</span>
-</span><span id="DataFrame-477"><a href="#DataFrame-477"><span class="linenos">477</span></a> <span class="k">for</span> <span class="n">cte</span> <span class="ow">in</span> <span class="n">potential_ctes</span><span class="p">:</span>
-</span><span id="DataFrame-478"><a href="#DataFrame-478"><span class="linenos">478</span></a> <span class="k">if</span> <span class="n">join_column</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="ow">in</span> <span class="n">cte</span><span class="o">.</span><span class="n">this</span><span class="o">.</span><span class="n">named_selects</span><span class="p">:</span>
-</span><span id="DataFrame-479"><a href="#DataFrame-479"><span class="linenos">479</span></a> <span class="n">left_column</span> <span class="o">=</span> <span class="n">join_column</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span><span class="o">.</span><span class="n">set_table_name</span><span class="p">(</span><span class="n">cte</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">)</span>
-</span><span id="DataFrame-480"><a href="#DataFrame-480"><span class="linenos">480</span></a> <span class="n">right_column</span> <span class="o">=</span> <span class="n">join_column</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span><span class="o">.</span><span class="n">set_table_name</span><span class="p">(</span><span class="n">other_df</span><span class="o">.</span><span class="n">latest_cte_name</span><span class="p">)</span>
-</span><span id="DataFrame-481"><a href="#DataFrame-481"><span class="linenos">481</span></a> <span class="n">join_column_pairs</span><span class="o">.</span><span class="n">append</span><span class="p">((</span><span class="n">left_column</span><span class="p">,</span> <span class="n">right_column</span><span class="p">))</span>
-</span><span id="DataFrame-482"><a href="#DataFrame-482"><span class="linenos">482</span></a> <span class="n">num_matching_ctes</span> <span class="o">+=</span> <span class="mi">1</span>
-</span><span id="DataFrame-483"><a href="#DataFrame-483"><span class="linenos">483</span></a> <span class="k">if</span> <span class="n">num_matching_ctes</span> <span class="o">&gt;</span> <span class="mi">1</span><span class="p">:</span>
-</span><span id="DataFrame-484"><a href="#DataFrame-484"><span class="linenos">484</span></a> <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
-</span><span id="DataFrame-485"><a href="#DataFrame-485"><span class="linenos">485</span></a> <span class="sa">f</span><span class="s2">&quot;Column </span><span class="si">{</span><span class="n">join_column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="si">}</span><span class="s2"> is ambiguous. Please specify the table name.&quot;</span>
-</span><span id="DataFrame-486"><a href="#DataFrame-486"><span class="linenos">486</span></a> <span class="p">)</span>
-</span><span id="DataFrame-487"><a href="#DataFrame-487"><span class="linenos">487</span></a> <span class="k">elif</span> <span class="n">num_matching_ctes</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
-</span><span id="DataFrame-488"><a href="#DataFrame-488"><span class="linenos">488</span></a> <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
-</span><span id="DataFrame-489"><a href="#DataFrame-489"><span class="linenos">489</span></a> <span class="sa">f</span><span class="s2">&quot;Column </span><span class="si">{</span><span class="n">join_column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="si">}</span><span class="s2"> does not exist in any of the tables.&quot;</span>
-</span><span id="DataFrame-490"><a href="#DataFrame-490"><span class="linenos">490</span></a> <span class="p">)</span>
-</span><span id="DataFrame-491"><a href="#DataFrame-491"><span class="linenos">491</span></a> <span class="n">join_clause</span> <span class="o">=</span> <span class="n">functools</span><span class="o">.</span><span class="n">reduce</span><span class="p">(</span>
-</span><span id="DataFrame-492"><a href="#DataFrame-492"><span class="linenos">492</span></a> <span class="k">lambda</span> <span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">:</span> <span class="n">x</span> <span class="o">&amp;</span> <span class="n">y</span><span class="p">,</span>
-</span><span id="DataFrame-493"><a href="#DataFrame-493"><span class="linenos">493</span></a> <span class="p">[</span><span class="n">left_column</span> <span class="o">==</span> <span class="n">right_column</span> <span class="k">for</span> <span class="n">left_column</span><span class="p">,</span> <span class="n">right_column</span> <span class="ow">in</span> <span class="n">join_column_pairs</span><span class="p">],</span>
-</span><span id="DataFrame-494"><a href="#DataFrame-494"><span class="linenos">494</span></a> <span class="p">)</span>
-</span><span id="DataFrame-495"><a href="#DataFrame-495"><span class="linenos">495</span></a> <span class="n">join_column_names</span> <span class="o">=</span> <span class="p">[</span><span class="n">left_col</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="k">for</span> <span class="n">left_col</span><span class="p">,</span> <span class="n">_</span> <span class="ow">in</span> <span class="n">join_column_pairs</span><span class="p">]</span>
-</span><span id="DataFrame-496"><a href="#DataFrame-496"><span class="linenos">496</span></a> <span class="c1"># To match spark behavior only the join clause gets deduplicated and it gets put in the front of the column list</span>
-</span><span id="DataFrame-497"><a href="#DataFrame-497"><span class="linenos">497</span></a> <span class="n">select_column_names</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame-498"><a href="#DataFrame-498"><span class="linenos">498</span></a> <span class="p">(</span>
-</span><span id="DataFrame-499"><a href="#DataFrame-499"><span class="linenos">499</span></a> <span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span>
-</span><span id="DataFrame-500"><a href="#DataFrame-500"><span class="linenos">500</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">column</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">this</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Star</span><span class="p">)</span>
-</span><span id="DataFrame-501"><a href="#DataFrame-501"><span class="linenos">501</span></a> <span class="k">else</span> <span class="n">column</span><span class="o">.</span><span class="n">sql</span><span class="p">()</span>
-</span><span id="DataFrame-502"><a href="#DataFrame-502"><span class="linenos">502</span></a> <span class="p">)</span>
-</span><span id="DataFrame-503"><a href="#DataFrame-503"><span class="linenos">503</span></a> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">self_columns</span> <span class="o">+</span> <span class="n">other_columns</span>
-</span><span id="DataFrame-504"><a href="#DataFrame-504"><span class="linenos">504</span></a> <span class="p">]</span>
-</span><span id="DataFrame-505"><a href="#DataFrame-505"><span class="linenos">505</span></a> <span class="n">select_column_names</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame-506"><a href="#DataFrame-506"><span class="linenos">506</span></a> <span class="n">column_name</span>
-</span><span id="DataFrame-507"><a href="#DataFrame-507"><span class="linenos">507</span></a> <span class="k">for</span> <span class="n">column_name</span> <span class="ow">in</span> <span class="n">select_column_names</span>
-</span><span id="DataFrame-508"><a href="#DataFrame-508"><span class="linenos">508</span></a> <span class="k">if</span> <span class="n">column_name</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">join_column_names</span>
-</span><span id="DataFrame-509"><a href="#DataFrame-509"><span class="linenos">509</span></a> <span class="p">]</span>
-</span><span id="DataFrame-510"><a href="#DataFrame-510"><span class="linenos">510</span></a> <span class="n">select_column_names</span> <span class="o">=</span> <span class="n">join_column_names</span> <span class="o">+</span> <span class="n">select_column_names</span>
-</span><span id="DataFrame-511"><a href="#DataFrame-511"><span class="linenos">511</span></a> <span class="k">else</span><span class="p">:</span>
-</span><span id="DataFrame-512"><a href="#DataFrame-512"><span class="linenos">512</span></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
-</span><span id="DataFrame-513"><a href="#DataFrame-513"><span class="linenos">513</span></a><span class="sd"> Unique characteristics of join on expressions:</span>
-</span><span id="DataFrame-514"><a href="#DataFrame-514"><span class="linenos">514</span></a><span class="sd"> * There is no deduplication of the results.</span>
-</span><span id="DataFrame-515"><a href="#DataFrame-515"><span class="linenos">515</span></a><span class="sd"> * The left join dataframe columns go first and right come after. No sort preference is given to join columns</span>
-</span><span id="DataFrame-516"><a href="#DataFrame-516"><span class="linenos">516</span></a><span class="sd"> &quot;&quot;&quot;</span>
-</span><span id="DataFrame-517"><a href="#DataFrame-517"><span class="linenos">517</span></a> <span class="n">join_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">join_columns</span><span class="p">,</span> <span class="n">join_expression</span><span class="p">)</span>
-</span><span id="DataFrame-518"><a href="#DataFrame-518"><span class="linenos">518</span></a> <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">join_columns</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">1</span><span class="p">:</span>
-</span><span id="DataFrame-519"><a href="#DataFrame-519"><span class="linenos">519</span></a> <span class="n">join_columns</span> <span class="o">=</span> <span class="p">[</span><span class="n">functools</span><span class="o">.</span><span class="n">reduce</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">:</span> <span class="n">x</span> <span class="o">&amp;</span> <span class="n">y</span><span class="p">,</span> <span class="n">join_columns</span><span class="p">)]</span>
-</span><span id="DataFrame-520"><a href="#DataFrame-520"><span class="linenos">520</span></a> <span class="n">join_clause</span> <span class="o">=</span> <span class="n">join_columns</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
-</span><span id="DataFrame-521"><a href="#DataFrame-521"><span class="linenos">521</span></a> <span class="n">select_column_names</span> <span class="o">=</span> <span class="p">[</span><span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">self_columns</span> <span class="o">+</span> <span class="n">other_columns</span><span class="p">]</span>
-</span><span id="DataFrame-522"><a href="#DataFrame-522"><span class="linenos">522</span></a>
-</span><span id="DataFrame-523"><a href="#DataFrame-523"><span class="linenos">523</span></a> <span class="c1"># Update the on expression with the actual join clause to replace the dummy one from before</span>
-</span><span id="DataFrame-524"><a href="#DataFrame-524"><span class="linenos">524</span></a> <span class="n">join_expression</span><span class="o">.</span><span class="n">args</span><span class="p">[</span><span class="s2">&quot;joins&quot;</span><span class="p">][</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">&quot;on&quot;</span><span class="p">,</span> <span class="n">join_clause</span><span class="o">.</span><span class="n">expression</span><span class="p">)</span>
-</span><span id="DataFrame-525"><a href="#DataFrame-525"><span class="linenos">525</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="n">join_expression</span><span class="p">)</span>
-</span><span id="DataFrame-526"><a href="#DataFrame-526"><span class="linenos">526</span></a> <span class="n">new_df</span><span class="o">.</span><span class="n">pending_join_hints</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">pending_join_hints</span><span class="p">)</span>
-</span><span id="DataFrame-527"><a href="#DataFrame-527"><span class="linenos">527</span></a> <span class="n">new_df</span><span class="o">.</span><span class="n">pending_hints</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="n">other_df</span><span class="o">.</span><span class="n">pending_hints</span><span class="p">)</span>
-</span><span id="DataFrame-528"><a href="#DataFrame-528"><span class="linenos">528</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="n">new_df</span><span class="o">.</span><span class="n">select</span><span class="o">.</span><span class="n">__wrapped__</span><span class="p">(</span><span class="n">new_df</span><span class="p">,</span> <span class="o">*</span><span class="n">select_column_names</span><span class="p">)</span>
-</span><span id="DataFrame-529"><a href="#DataFrame-529"><span class="linenos">529</span></a> <span class="k">return</span> <span class="n">new_df</span>
-</span><span id="DataFrame-530"><a href="#DataFrame-530"><span class="linenos">530</span></a>
-</span><span id="DataFrame-531"><a href="#DataFrame-531"><span class="linenos">531</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">ORDER_BY</span><span class="p">)</span>
-</span><span id="DataFrame-532"><a href="#DataFrame-532"><span class="linenos">532</span></a> <span class="k">def</span> <span class="nf">orderBy</span><span class="p">(</span>
-</span><span id="DataFrame-533"><a href="#DataFrame-533"><span class="linenos">533</span></a> <span class="bp">self</span><span class="p">,</span>
-</span><span id="DataFrame-534"><a href="#DataFrame-534"><span class="linenos">534</span></a> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Column</span><span class="p">],</span>
-</span><span id="DataFrame-535"><a href="#DataFrame-535"><span class="linenos">535</span></a> <span class="n">ascending</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrame-536"><a href="#DataFrame-536"><span class="linenos">536</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame-537"><a href="#DataFrame-537"><span class="linenos">537</span></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
-</span><span id="DataFrame-538"><a href="#DataFrame-538"><span class="linenos">538</span></a><span class="sd"> This implementation lets any ordered columns take priority over whatever is provided in `ascending`. Spark</span>
-</span><span id="DataFrame-539"><a href="#DataFrame-539"><span class="linenos">539</span></a><span class="sd"> has irregular behavior and can result in runtime errors. Users shouldn&#39;t be mixing the two anyways so this</span>
-</span><span id="DataFrame-540"><a href="#DataFrame-540"><span class="linenos">540</span></a><span class="sd"> is unlikely to come up.</span>
-</span><span id="DataFrame-541"><a href="#DataFrame-541"><span class="linenos">541</span></a><span class="sd"> &quot;&quot;&quot;</span>
-</span><span id="DataFrame-542"><a href="#DataFrame-542"><span class="linenos">542</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">cols</span><span class="p">)</span>
-</span><span id="DataFrame-543"><a href="#DataFrame-543"><span class="linenos">543</span></a> <span class="n">pre_ordered_col_indexes</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame-544"><a href="#DataFrame-544"><span class="linenos">544</span></a> <span class="n">i</span> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">col</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">col</span><span class="o">.</span><span class="n">expression</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Ordered</span><span class="p">)</span>
-</span><span id="DataFrame-545"><a href="#DataFrame-545"><span class="linenos">545</span></a> <span class="p">]</span>
-</span><span id="DataFrame-546"><a href="#DataFrame-546"><span class="linenos">546</span></a> <span class="k">if</span> <span class="n">ascending</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
-</span><span id="DataFrame-547"><a href="#DataFrame-547"><span class="linenos">547</span></a> <span class="n">ascending</span> <span class="o">=</span> <span class="p">[</span><span class="kc">True</span><span class="p">]</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span>
-</span><span id="DataFrame-548"><a href="#DataFrame-548"><span class="linenos">548</span></a> <span class="k">elif</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">ascending</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span>
-</span><span id="DataFrame-549"><a href="#DataFrame-549"><span class="linenos">549</span></a> <span class="n">ascending</span> <span class="o">=</span> <span class="p">[</span><span class="n">ascending</span><span class="p">]</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span>
-</span><span id="DataFrame-550"><a href="#DataFrame-550"><span class="linenos">550</span></a> <span class="n">ascending</span> <span class="o">=</span> <span class="p">[</span><span class="nb">bool</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">x</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">ascending</span><span class="p">)]</span>
-</span><span id="DataFrame-551"><a href="#DataFrame-551"><span class="linenos">551</span></a> <span class="k">assert</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span> <span class="o">==</span> <span class="nb">len</span><span class="p">(</span>
-</span><span id="DataFrame-552"><a href="#DataFrame-552"><span class="linenos">552</span></a> <span class="n">ascending</span>
-</span><span id="DataFrame-553"><a href="#DataFrame-553"><span class="linenos">553</span></a> <span class="p">),</span> <span class="s2">&quot;The length of items in ascending must equal the number of columns provided&quot;</span>
-</span><span id="DataFrame-554"><a href="#DataFrame-554"><span class="linenos">554</span></a> <span class="n">col_and_ascending</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="nb">zip</span><span class="p">(</span><span class="n">columns</span><span class="p">,</span> <span class="n">ascending</span><span class="p">))</span>
-</span><span id="DataFrame-555"><a href="#DataFrame-555"><span class="linenos">555</span></a> <span class="n">order_by_columns</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame-556"><a href="#DataFrame-556"><span class="linenos">556</span></a> <span class="p">(</span>
-</span><span id="DataFrame-557"><a href="#DataFrame-557"><span class="linenos">557</span></a> <span class="n">exp</span><span class="o">.</span><span class="n">Ordered</span><span class="p">(</span><span class="n">this</span><span class="o">=</span><span class="n">col</span><span class="o">.</span><span class="n">expression</span><span class="p">,</span> <span class="n">desc</span><span class="o">=</span><span class="ow">not</span> <span class="n">asc</span><span class="p">)</span>
-</span><span id="DataFrame-558"><a href="#DataFrame-558"><span class="linenos">558</span></a> <span class="k">if</span> <span class="n">i</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">pre_ordered_col_indexes</span>
-</span><span id="DataFrame-559"><a href="#DataFrame-559"><span class="linenos">559</span></a> <span class="k">else</span> <span class="n">columns</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">.</span><span class="n">column_expression</span>
-</span><span id="DataFrame-560"><a href="#DataFrame-560"><span class="linenos">560</span></a> <span class="p">)</span>
-</span><span id="DataFrame-561"><a href="#DataFrame-561"><span class="linenos">561</span></a> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="p">(</span><span class="n">col</span><span class="p">,</span> <span class="n">asc</span><span class="p">)</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">col_and_ascending</span><span class="p">)</span>
-</span><span id="DataFrame-562"><a href="#DataFrame-562"><span class="linenos">562</span></a> <span class="p">]</span>
-</span><span id="DataFrame-563"><a href="#DataFrame-563"><span class="linenos">563</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">order_by</span><span class="p">(</span><span class="o">*</span><span class="n">order_by_columns</span><span class="p">))</span>
-</span><span id="DataFrame-564"><a href="#DataFrame-564"><span class="linenos">564</span></a>
-</span><span id="DataFrame-565"><a href="#DataFrame-565"><span class="linenos">565</span></a> <span class="n">sort</span> <span class="o">=</span> <span class="n">orderBy</span>
-</span><span id="DataFrame-566"><a href="#DataFrame-566"><span class="linenos">566</span></a>
-</span><span id="DataFrame-567"><a href="#DataFrame-567"><span class="linenos">567</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
-</span><span id="DataFrame-568"><a href="#DataFrame-568"><span class="linenos">568</span></a> <span class="k">def</span> <span class="nf">union</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame-569"><a href="#DataFrame-569"><span class="linenos">569</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set_operation</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Union</span><span class="p">,</span> <span class="n">other</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span>
-</span><span id="DataFrame-570"><a href="#DataFrame-570"><span class="linenos">570</span></a>
-</span><span id="DataFrame-571"><a href="#DataFrame-571"><span class="linenos">571</span></a> <span class="n">unionAll</span> <span class="o">=</span> <span class="n">union</span>
-</span><span id="DataFrame-572"><a href="#DataFrame-572"><span class="linenos">572</span></a>
-</span><span id="DataFrame-573"><a href="#DataFrame-573"><span class="linenos">573</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
-</span><span id="DataFrame-574"><a href="#DataFrame-574"><span class="linenos">574</span></a> <span class="k">def</span> <span class="nf">unionByName</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">,</span> <span class="n">allowMissingColumns</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">):</span>
-</span><span id="DataFrame-575"><a href="#DataFrame-575"><span class="linenos">575</span></a> <span class="n">l_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">columns</span>
-</span><span id="DataFrame-576"><a href="#DataFrame-576"><span class="linenos">576</span></a> <span class="n">r_columns</span> <span class="o">=</span> <span class="n">other</span><span class="o">.</span><span class="n">columns</span>
-</span><span id="DataFrame-577"><a href="#DataFrame-577"><span class="linenos">577</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">allowMissingColumns</span><span class="p">:</span>
-</span><span id="DataFrame-578"><a href="#DataFrame-578"><span class="linenos">578</span></a> <span class="n">l_expressions</span> <span class="o">=</span> <span class="n">l_columns</span>
-</span><span id="DataFrame-579"><a href="#DataFrame-579"><span class="linenos">579</span></a> <span class="n">r_expressions</span> <span class="o">=</span> <span class="n">l_columns</span>
-</span><span id="DataFrame-580"><a href="#DataFrame-580"><span class="linenos">580</span></a> <span class="k">else</span><span class="p">:</span>
-</span><span id="DataFrame-581"><a href="#DataFrame-581"><span class="linenos">581</span></a> <span class="n">l_expressions</span> <span class="o">=</span> <span class="p">[]</span>
-</span><span id="DataFrame-582"><a href="#DataFrame-582"><span class="linenos">582</span></a> <span class="n">r_expressions</span> <span class="o">=</span> <span class="p">[]</span>
-</span><span id="DataFrame-583"><a href="#DataFrame-583"><span class="linenos">583</span></a> <span class="n">r_columns_unused</span> <span class="o">=</span> <span class="n">copy</span><span class="p">(</span><span class="n">r_columns</span><span class="p">)</span>
-</span><span id="DataFrame-584"><a href="#DataFrame-584"><span class="linenos">584</span></a> <span class="k">for</span> <span class="n">l_column</span> <span class="ow">in</span> <span class="n">l_columns</span><span class="p">:</span>
-</span><span id="DataFrame-585"><a href="#DataFrame-585"><span class="linenos">585</span></a> <span class="n">l_expressions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">l_column</span><span class="p">)</span>
-</span><span id="DataFrame-586"><a href="#DataFrame-586"><span class="linenos">586</span></a> <span class="k">if</span> <span class="n">l_column</span> <span class="ow">in</span> <span class="n">r_columns</span><span class="p">:</span>
-</span><span id="DataFrame-587"><a href="#DataFrame-587"><span class="linenos">587</span></a> <span class="n">r_expressions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">l_column</span><span class="p">)</span>
-</span><span id="DataFrame-588"><a href="#DataFrame-588"><span class="linenos">588</span></a> <span class="n">r_columns_unused</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="n">l_column</span><span class="p">)</span>
-</span><span id="DataFrame-589"><a href="#DataFrame-589"><span class="linenos">589</span></a> <span class="k">else</span><span class="p">:</span>
-</span><span id="DataFrame-590"><a href="#DataFrame-590"><span class="linenos">590</span></a> <span class="n">r_expressions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">alias_</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Null</span><span class="p">(),</span> <span class="n">l_column</span><span class="p">,</span> <span class="n">copy</span><span class="o">=</span><span class="kc">False</span><span class="p">))</span>
-</span><span id="DataFrame-591"><a href="#DataFrame-591"><span class="linenos">591</span></a> <span class="k">for</span> <span class="n">r_column</span> <span class="ow">in</span> <span class="n">r_columns_unused</span><span class="p">:</span>
-</span><span id="DataFrame-592"><a href="#DataFrame-592"><span class="linenos">592</span></a> <span class="n">l_expressions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">alias_</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Null</span><span class="p">(),</span> <span class="n">r_column</span><span class="p">,</span> <span class="n">copy</span><span class="o">=</span><span class="kc">False</span><span class="p">))</span>
-</span><span id="DataFrame-593"><a href="#DataFrame-593"><span class="linenos">593</span></a> <span class="n">r_expressions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">r_column</span><span class="p">)</span>
-</span><span id="DataFrame-594"><a href="#DataFrame-594"><span class="linenos">594</span></a> <span class="n">r_df</span> <span class="o">=</span> <span class="p">(</span>
-</span><span id="DataFrame-595"><a href="#DataFrame-595"><span class="linenos">595</span></a> <span class="n">other</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span><span class="o">.</span><span class="n">_convert_leaf_to_cte</span><span class="p">()</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="bp">self</span><span class="o">.</span><span class="n">_ensure_list_of_columns</span><span class="p">(</span><span class="n">r_expressions</span><span class="p">))</span>
-</span><span id="DataFrame-596"><a href="#DataFrame-596"><span class="linenos">596</span></a> <span class="p">)</span>
-</span><span id="DataFrame-597"><a href="#DataFrame-597"><span class="linenos">597</span></a> <span class="n">l_df</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
-</span><span id="DataFrame-598"><a href="#DataFrame-598"><span class="linenos">598</span></a> <span class="k">if</span> <span class="n">allowMissingColumns</span><span class="p">:</span>
-</span><span id="DataFrame-599"><a href="#DataFrame-599"><span class="linenos">599</span></a> <span class="n">l_df</span> <span class="o">=</span> <span class="n">l_df</span><span class="o">.</span><span class="n">_convert_leaf_to_cte</span><span class="p">()</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="bp">self</span><span class="o">.</span><span class="n">_ensure_list_of_columns</span><span class="p">(</span><span class="n">l_expressions</span><span class="p">))</span>
-</span><span id="DataFrame-600"><a href="#DataFrame-600"><span class="linenos">600</span></a> <span class="k">return</span> <span class="n">l_df</span><span class="o">.</span><span class="n">_set_operation</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Union</span><span class="p">,</span> <span class="n">r_df</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span>
-</span><span id="DataFrame-601"><a href="#DataFrame-601"><span class="linenos">601</span></a>
-</span><span id="DataFrame-602"><a href="#DataFrame-602"><span class="linenos">602</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
-</span><span id="DataFrame-603"><a href="#DataFrame-603"><span class="linenos">603</span></a> <span class="k">def</span> <span class="nf">intersect</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame-604"><a href="#DataFrame-604"><span class="linenos">604</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set_operation</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Intersect</span><span class="p">,</span> <span class="n">other</span><span class="p">,</span> <span class="kc">True</span><span class="p">)</span>
-</span><span id="DataFrame-605"><a href="#DataFrame-605"><span class="linenos">605</span></a>
-</span><span id="DataFrame-606"><a href="#DataFrame-606"><span class="linenos">606</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
-</span><span id="DataFrame-607"><a href="#DataFrame-607"><span class="linenos">607</span></a> <span class="k">def</span> <span class="nf">intersectAll</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame-608"><a href="#DataFrame-608"><span class="linenos">608</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set_operation</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Intersect</span><span class="p">,</span> <span class="n">other</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span>
-</span><span id="DataFrame-609"><a href="#DataFrame-609"><span class="linenos">609</span></a>
-</span><span id="DataFrame-610"><a href="#DataFrame-610"><span class="linenos">610</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
-</span><span id="DataFrame-611"><a href="#DataFrame-611"><span class="linenos">611</span></a> <span class="k">def</span> <span class="nf">exceptAll</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame-612"><a href="#DataFrame-612"><span class="linenos">612</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set_operation</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Except</span><span class="p">,</span> <span class="n">other</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span>
-</span><span id="DataFrame-613"><a href="#DataFrame-613"><span class="linenos">613</span></a>
-</span><span id="DataFrame-614"><a href="#DataFrame-614"><span class="linenos">614</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">SELECT</span><span class="p">)</span>
-</span><span id="DataFrame-615"><a href="#DataFrame-615"><span class="linenos">615</span></a> <span class="k">def</span> <span class="nf">distinct</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame-616"><a href="#DataFrame-616"><span class="linenos">616</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">distinct</span><span class="p">())</span>
-</span><span id="DataFrame-617"><a href="#DataFrame-617"><span class="linenos">617</span></a>
-</span><span id="DataFrame-618"><a href="#DataFrame-618"><span class="linenos">618</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">SELECT</span><span class="p">)</span>
-</span><span id="DataFrame-619"><a href="#DataFrame-619"><span class="linenos">619</span></a> <span class="k">def</span> <span class="nf">dropDuplicates</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">):</span>
-</span><span id="DataFrame-620"><a href="#DataFrame-620"><span class="linenos">620</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">subset</span><span class="p">:</span>
-</span><span id="DataFrame-621"><a href="#DataFrame-621"><span class="linenos">621</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">distinct</span><span class="p">()</span>
-</span><span id="DataFrame-622"><a href="#DataFrame-622"><span class="linenos">622</span></a> <span class="n">column_names</span> <span class="o">=</span> <span class="n">ensure_list</span><span class="p">(</span><span class="n">subset</span><span class="p">)</span>
-</span><span id="DataFrame-623"><a href="#DataFrame-623"><span class="linenos">623</span></a> <span class="n">window</span> <span class="o">=</span> <span class="n">Window</span><span class="o">.</span><span class="n">partitionBy</span><span class="p">(</span><span class="o">*</span><span class="n">column_names</span><span class="p">)</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="o">*</span><span class="n">column_names</span><span class="p">)</span>
-</span><span id="DataFrame-624"><a href="#DataFrame-624"><span class="linenos">624</span></a> <span class="k">return</span> <span class="p">(</span>
-</span><span id="DataFrame-625"><a href="#DataFrame-625"><span class="linenos">625</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
-</span><span id="DataFrame-626"><a href="#DataFrame-626"><span class="linenos">626</span></a> <span class="o">.</span><span class="n">withColumn</span><span class="p">(</span><span class="s2">&quot;row_num&quot;</span><span class="p">,</span> <span class="n">F</span><span class="o">.</span><span class="n">row_number</span><span class="p">()</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="n">window</span><span class="p">))</span>
-</span><span id="DataFrame-627"><a href="#DataFrame-627"><span class="linenos">627</span></a> <span class="o">.</span><span class="n">where</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="s2">&quot;row_num&quot;</span><span class="p">)</span> <span class="o">==</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="mi">1</span><span class="p">))</span>
-</span><span id="DataFrame-628"><a href="#DataFrame-628"><span class="linenos">628</span></a> <span class="o">.</span><span class="n">drop</span><span class="p">(</span><span class="s2">&quot;row_num&quot;</span><span class="p">)</span>
-</span><span id="DataFrame-629"><a href="#DataFrame-629"><span class="linenos">629</span></a> <span class="p">)</span>
-</span><span id="DataFrame-630"><a href="#DataFrame-630"><span class="linenos">630</span></a>
-</span><span id="DataFrame-631"><a href="#DataFrame-631"><span class="linenos">631</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
-</span><span id="DataFrame-632"><a href="#DataFrame-632"><span class="linenos">632</span></a> <span class="k">def</span> <span class="nf">dropna</span><span class="p">(</span>
-</span><span id="DataFrame-633"><a href="#DataFrame-633"><span class="linenos">633</span></a> <span class="bp">self</span><span class="p">,</span>
-</span><span id="DataFrame-634"><a href="#DataFrame-634"><span class="linenos">634</span></a> <span class="n">how</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;any&quot;</span><span class="p">,</span>
-</span><span id="DataFrame-635"><a href="#DataFrame-635"><span class="linenos">635</span></a> <span class="n">thresh</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrame-636"><a href="#DataFrame-636"><span class="linenos">636</span></a> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="o">...</span><span class="p">],</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrame-637"><a href="#DataFrame-637"><span class="linenos">637</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame-638"><a href="#DataFrame-638"><span class="linenos">638</span></a> <span class="n">minimum_non_null</span> <span class="o">=</span> <span class="n">thresh</span> <span class="ow">or</span> <span class="mi">0</span> <span class="c1"># will be determined later if thresh is null</span>
-</span><span id="DataFrame-639"><a href="#DataFrame-639"><span class="linenos">639</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
-</span><span id="DataFrame-640"><a href="#DataFrame-640"><span class="linenos">640</span></a> <span class="n">all_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_outer_select_columns</span><span class="p">(</span><span class="n">new_df</span><span class="o">.</span><span class="n">expression</span><span class="p">)</span>
-</span><span id="DataFrame-641"><a href="#DataFrame-641"><span class="linenos">641</span></a> <span class="k">if</span> <span class="n">subset</span><span class="p">:</span>
-</span><span id="DataFrame-642"><a href="#DataFrame-642"><span class="linenos">642</span></a> <span class="n">null_check_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">subset</span><span class="p">)</span>
-</span><span id="DataFrame-643"><a href="#DataFrame-643"><span class="linenos">643</span></a> <span class="k">else</span><span class="p">:</span>
-</span><span id="DataFrame-644"><a href="#DataFrame-644"><span class="linenos">644</span></a> <span class="n">null_check_columns</span> <span class="o">=</span> <span class="n">all_columns</span>
-</span><span id="DataFrame-645"><a href="#DataFrame-645"><span class="linenos">645</span></a> <span class="k">if</span> <span class="n">thresh</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
-</span><span id="DataFrame-646"><a href="#DataFrame-646"><span class="linenos">646</span></a> <span class="n">minimum_num_nulls</span> <span class="o">=</span> <span class="mi">1</span> <span class="k">if</span> <span class="n">how</span> <span class="o">==</span> <span class="s2">&quot;any&quot;</span> <span class="k">else</span> <span class="nb">len</span><span class="p">(</span><span class="n">null_check_columns</span><span class="p">)</span>
-</span><span id="DataFrame-647"><a href="#DataFrame-647"><span class="linenos">647</span></a> <span class="k">else</span><span class="p">:</span>
-</span><span id="DataFrame-648"><a href="#DataFrame-648"><span class="linenos">648</span></a> <span class="n">minimum_num_nulls</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">null_check_columns</span><span class="p">)</span> <span class="o">-</span> <span class="n">minimum_non_null</span> <span class="o">+</span> <span class="mi">1</span>
-</span><span id="DataFrame-649"><a href="#DataFrame-649"><span class="linenos">649</span></a> <span class="k">if</span> <span class="n">minimum_num_nulls</span> <span class="o">&gt;</span> <span class="nb">len</span><span class="p">(</span><span class="n">null_check_columns</span><span class="p">):</span>
-</span><span id="DataFrame-650"><a href="#DataFrame-650"><span class="linenos">650</span></a> <span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span>
-</span><span id="DataFrame-651"><a href="#DataFrame-651"><span class="linenos">651</span></a> <span class="sa">f</span><span class="s2">&quot;The minimum num nulls for dropna must be less than or equal to the number of columns. &quot;</span>
-</span><span id="DataFrame-652"><a href="#DataFrame-652"><span class="linenos">652</span></a> <span class="sa">f</span><span class="s2">&quot;Minimum num nulls: </span><span class="si">{</span><span class="n">minimum_num_nulls</span><span class="si">}</span><span class="s2">, Num Columns: </span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="n">null_check_columns</span><span class="p">)</span><span class="si">}</span><span class="s2">&quot;</span>
-</span><span id="DataFrame-653"><a href="#DataFrame-653"><span class="linenos">653</span></a> <span class="p">)</span>
-</span><span id="DataFrame-654"><a href="#DataFrame-654"><span class="linenos">654</span></a> <span class="n">if_null_checks</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame-655"><a href="#DataFrame-655"><span class="linenos">655</span></a> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">column</span><span class="o">.</span><span class="n">isNull</span><span class="p">(),</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="mi">1</span><span class="p">))</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="mi">0</span><span class="p">))</span> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">null_check_columns</span>
-</span><span id="DataFrame-656"><a href="#DataFrame-656"><span class="linenos">656</span></a> <span class="p">]</span>
-</span><span id="DataFrame-657"><a href="#DataFrame-657"><span class="linenos">657</span></a> <span class="n">nulls_added_together</span> <span class="o">=</span> <span class="n">functools</span><span class="o">.</span><span class="n">reduce</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">:</span> <span class="n">x</span> <span class="o">+</span> <span class="n">y</span><span class="p">,</span> <span class="n">if_null_checks</span><span class="p">)</span>
-</span><span id="DataFrame-658"><a href="#DataFrame-658"><span class="linenos">658</span></a> <span class="n">num_nulls</span> <span class="o">=</span> <span class="n">nulls_added_together</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">&quot;num_nulls&quot;</span><span class="p">)</span>
-</span><span id="DataFrame-659"><a href="#DataFrame-659"><span class="linenos">659</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="n">new_df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">num_nulls</span><span class="p">,</span> <span class="n">append</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
-</span><span id="DataFrame-660"><a href="#DataFrame-660"><span class="linenos">660</span></a> <span class="n">filtered_df</span> <span class="o">=</span> <span class="n">new_df</span><span class="o">.</span><span class="n">where</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="s2">&quot;num_nulls&quot;</span><span class="p">)</span> <span class="o">&lt;</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="n">minimum_num_nulls</span><span class="p">))</span>
-</span><span id="DataFrame-661"><a href="#DataFrame-661"><span class="linenos">661</span></a> <span class="n">final_df</span> <span class="o">=</span> <span class="n">filtered_df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="n">all_columns</span><span class="p">)</span>
-</span><span id="DataFrame-662"><a href="#DataFrame-662"><span class="linenos">662</span></a> <span class="k">return</span> <span class="n">final_df</span>
-</span><span id="DataFrame-663"><a href="#DataFrame-663"><span class="linenos">663</span></a>
-</span><span id="DataFrame-664"><a href="#DataFrame-664"><span class="linenos">664</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
-</span><span id="DataFrame-665"><a href="#DataFrame-665"><span class="linenos">665</span></a> <span class="k">def</span> <span class="nf">fillna</span><span class="p">(</span>
-</span><span id="DataFrame-666"><a href="#DataFrame-666"><span class="linenos">666</span></a> <span class="bp">self</span><span class="p">,</span>
-</span><span id="DataFrame-667"><a href="#DataFrame-667"><span class="linenos">667</span></a> <span class="n">value</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="n">ColumnLiterals</span><span class="p">],</span>
-</span><span id="DataFrame-668"><a href="#DataFrame-668"><span class="linenos">668</span></a> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="o">...</span><span class="p">],</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrame-669"><a href="#DataFrame-669"><span class="linenos">669</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame-670"><a href="#DataFrame-670"><span class="linenos">670</span></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
-</span><span id="DataFrame-671"><a href="#DataFrame-671"><span class="linenos">671</span></a><span class="sd"> Functionality Difference: If you provide a value to replace a null and that type conflicts</span>
-</span><span id="DataFrame-672"><a href="#DataFrame-672"><span class="linenos">672</span></a><span class="sd"> with the type of the column then PySpark will just ignore your replacement.</span>
-</span><span id="DataFrame-673"><a href="#DataFrame-673"><span class="linenos">673</span></a><span class="sd"> This will try to cast them to be the same in some cases. So they won&#39;t always match.</span>
-</span><span id="DataFrame-674"><a href="#DataFrame-674"><span class="linenos">674</span></a><span class="sd"> Best to not mix types so make sure replacement is the same type as the column</span>
-</span><span id="DataFrame-675"><a href="#DataFrame-675"><span class="linenos">675</span></a>
-</span><span id="DataFrame-676"><a href="#DataFrame-676"><span class="linenos">676</span></a><span class="sd"> Possibility for improvement: Use `typeof` function to get the type of the column</span>
-</span><span id="DataFrame-677"><a href="#DataFrame-677"><span class="linenos">677</span></a><span class="sd"> and check if it matches the type of the value provided. If not then make it null.</span>
-</span><span id="DataFrame-678"><a href="#DataFrame-678"><span class="linenos">678</span></a><span class="sd"> &quot;&quot;&quot;</span>
-</span><span id="DataFrame-679"><a href="#DataFrame-679"><span class="linenos">679</span></a> <span class="kn">from</span> <span class="nn">sqlglot.dataframe.sql.functions</span> <span class="kn">import</span> <span class="n">lit</span>
-</span><span id="DataFrame-680"><a href="#DataFrame-680"><span class="linenos">680</span></a>
-</span><span id="DataFrame-681"><a href="#DataFrame-681"><span class="linenos">681</span></a> <span class="n">values</span> <span class="o">=</span> <span class="kc">None</span>
-</span><span id="DataFrame-682"><a href="#DataFrame-682"><span class="linenos">682</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="kc">None</span>
-</span><span id="DataFrame-683"><a href="#DataFrame-683"><span class="linenos">683</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
-</span><span id="DataFrame-684"><a href="#DataFrame-684"><span class="linenos">684</span></a> <span class="n">all_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_outer_select_columns</span><span class="p">(</span><span class="n">new_df</span><span class="o">.</span><span class="n">expression</span><span class="p">)</span>
-</span><span id="DataFrame-685"><a href="#DataFrame-685"><span class="linenos">685</span></a> <span class="n">all_column_mapping</span> <span class="o">=</span> <span class="p">{</span><span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">:</span> <span class="n">column</span> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">all_columns</span><span class="p">}</span>
-</span><span id="DataFrame-686"><a href="#DataFrame-686"><span class="linenos">686</span></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span>
-</span><span id="DataFrame-687"><a href="#DataFrame-687"><span class="linenos">687</span></a> <span class="n">values</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">value</span><span class="o">.</span><span class="n">values</span><span class="p">())</span>
-</span><span id="DataFrame-688"><a href="#DataFrame-688"><span class="linenos">688</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="nb">list</span><span class="p">(</span><span class="n">value</span><span class="p">))</span>
-</span><span id="DataFrame-689"><a href="#DataFrame-689"><span class="linenos">689</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">columns</span><span class="p">:</span>
-</span><span id="DataFrame-690"><a href="#DataFrame-690"><span class="linenos">690</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">subset</span><span class="p">)</span> <span class="k">if</span> <span class="n">subset</span> <span class="k">else</span> <span class="n">all_columns</span>
-</span><span id="DataFrame-691"><a href="#DataFrame-691"><span class="linenos">691</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">values</span><span class="p">:</span>
-</span><span id="DataFrame-692"><a href="#DataFrame-692"><span class="linenos">692</span></a> <span class="n">values</span> <span class="o">=</span> <span class="p">[</span><span class="n">value</span><span class="p">]</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span>
-</span><span id="DataFrame-693"><a href="#DataFrame-693"><span class="linenos">693</span></a> <span class="n">value_columns</span> <span class="o">=</span> <span class="p">[</span><span class="n">lit</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> <span class="k">for</span> <span class="n">value</span> <span class="ow">in</span> <span class="n">values</span><span class="p">]</span>
-</span><span id="DataFrame-694"><a href="#DataFrame-694"><span class="linenos">694</span></a>
-</span><span id="DataFrame-695"><a href="#DataFrame-695"><span class="linenos">695</span></a> <span class="n">null_replacement_mapping</span> <span class="o">=</span> <span class="p">{</span>
-</span><span id="DataFrame-696"><a href="#DataFrame-696"><span class="linenos">696</span></a> <span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">:</span> <span class="p">(</span>
-</span><span id="DataFrame-697"><a href="#DataFrame-697"><span class="linenos">697</span></a> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">column</span><span class="o">.</span><span class="n">isNull</span><span class="p">(),</span> <span class="n">value</span><span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">column</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">)</span>
-</span><span id="DataFrame-698"><a href="#DataFrame-698"><span class="linenos">698</span></a> <span class="p">)</span>
-</span><span id="DataFrame-699"><a href="#DataFrame-699"><span class="linenos">699</span></a> <span class="k">for</span> <span class="n">column</span><span class="p">,</span> <span class="n">value</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">columns</span><span class="p">,</span> <span class="n">value_columns</span><span class="p">)</span>
-</span><span id="DataFrame-700"><a href="#DataFrame-700"><span class="linenos">700</span></a> <span class="p">}</span>
-</span><span id="DataFrame-701"><a href="#DataFrame-701"><span class="linenos">701</span></a> <span class="n">null_replacement_mapping</span> <span class="o">=</span> <span class="p">{</span><span class="o">**</span><span class="n">all_column_mapping</span><span class="p">,</span> <span class="o">**</span><span class="n">null_replacement_mapping</span><span class="p">}</span>
-</span><span id="DataFrame-702"><a href="#DataFrame-702"><span class="linenos">702</span></a> <span class="n">null_replacement_columns</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame-703"><a href="#DataFrame-703"><span class="linenos">703</span></a> <span class="n">null_replacement_mapping</span><span class="p">[</span><span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">]</span> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">all_columns</span>
-</span><span id="DataFrame-704"><a href="#DataFrame-704"><span class="linenos">704</span></a> <span class="p">]</span>
-</span><span id="DataFrame-705"><a href="#DataFrame-705"><span class="linenos">705</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="n">new_df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="n">null_replacement_columns</span><span class="p">)</span>
-</span><span id="DataFrame-706"><a href="#DataFrame-706"><span class="linenos">706</span></a> <span class="k">return</span> <span class="n">new_df</span>
-</span><span id="DataFrame-707"><a href="#DataFrame-707"><span class="linenos">707</span></a>
-</span><span id="DataFrame-708"><a href="#DataFrame-708"><span class="linenos">708</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
-</span><span id="DataFrame-709"><a href="#DataFrame-709"><span class="linenos">709</span></a> <span class="k">def</span> <span class="nf">replace</span><span class="p">(</span>
-</span><span id="DataFrame-710"><a href="#DataFrame-710"><span class="linenos">710</span></a> <span class="bp">self</span><span class="p">,</span>
-</span><span id="DataFrame-711"><a href="#DataFrame-711"><span class="linenos">711</span></a> <span class="n">to_replace</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Dict</span><span class="p">],</span>
-</span><span id="DataFrame-712"><a href="#DataFrame-712"><span class="linenos">712</span></a> <span class="n">value</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrame-713"><a href="#DataFrame-713"><span class="linenos">713</span></a> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Collection</span><span class="p">[</span><span class="n">ColumnOrName</span><span class="p">]</span> <span class="o">|</span> <span class="n">ColumnOrName</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrame-714"><a href="#DataFrame-714"><span class="linenos">714</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame-715"><a href="#DataFrame-715"><span class="linenos">715</span></a> <span class="kn">from</span> <span class="nn">sqlglot.dataframe.sql.functions</span> <span class="kn">import</span> <span class="n">lit</span>
-</span><span id="DataFrame-716"><a href="#DataFrame-716"><span class="linenos">716</span></a>
-</span><span id="DataFrame-717"><a href="#DataFrame-717"><span class="linenos">717</span></a> <span class="n">old_values</span> <span class="o">=</span> <span class="kc">None</span>
-</span><span id="DataFrame-718"><a href="#DataFrame-718"><span class="linenos">718</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
-</span><span id="DataFrame-719"><a href="#DataFrame-719"><span class="linenos">719</span></a> <span class="n">all_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_outer_select_columns</span><span class="p">(</span><span class="n">new_df</span><span class="o">.</span><span class="n">expression</span><span class="p">)</span>
-</span><span id="DataFrame-720"><a href="#DataFrame-720"><span class="linenos">720</span></a> <span class="n">all_column_mapping</span> <span class="o">=</span> <span class="p">{</span><span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">:</span> <span class="n">column</span> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">all_columns</span><span class="p">}</span>
-</span><span id="DataFrame-721"><a href="#DataFrame-721"><span class="linenos">721</span></a>
-</span><span id="DataFrame-722"><a href="#DataFrame-722"><span class="linenos">722</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">subset</span><span class="p">)</span> <span class="k">if</span> <span class="n">subset</span> <span class="k">else</span> <span class="n">all_columns</span>
-</span><span id="DataFrame-723"><a href="#DataFrame-723"><span class="linenos">723</span></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">to_replace</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span>
-</span><span id="DataFrame-724"><a href="#DataFrame-724"><span class="linenos">724</span></a> <span class="n">old_values</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">to_replace</span><span class="p">)</span>
-</span><span id="DataFrame-725"><a href="#DataFrame-725"><span class="linenos">725</span></a> <span class="n">new_values</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">to_replace</span><span class="o">.</span><span class="n">values</span><span class="p">())</span>
-</span><span id="DataFrame-726"><a href="#DataFrame-726"><span class="linenos">726</span></a> <span class="k">elif</span> <span class="ow">not</span> <span class="n">old_values</span> <span class="ow">and</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">to_replace</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span>
-</span><span id="DataFrame-727"><a href="#DataFrame-727"><span class="linenos">727</span></a> <span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="nb">list</span><span class="p">),</span> <span class="s2">&quot;value must be a list since the replacements are a list&quot;</span>
-</span><span id="DataFrame-728"><a href="#DataFrame-728"><span class="linenos">728</span></a> <span class="k">assert</span> <span class="nb">len</span><span class="p">(</span><span class="n">to_replace</span><span class="p">)</span> <span class="o">==</span> <span class="nb">len</span><span class="p">(</span>
-</span><span id="DataFrame-729"><a href="#DataFrame-729"><span class="linenos">729</span></a> <span class="n">value</span>
-</span><span id="DataFrame-730"><a href="#DataFrame-730"><span class="linenos">730</span></a> <span class="p">),</span> <span class="s2">&quot;the replacements and values must be the same length&quot;</span>
-</span><span id="DataFrame-731"><a href="#DataFrame-731"><span class="linenos">731</span></a> <span class="n">old_values</span> <span class="o">=</span> <span class="n">to_replace</span>
-</span><span id="DataFrame-732"><a href="#DataFrame-732"><span class="linenos">732</span></a> <span class="n">new_values</span> <span class="o">=</span> <span class="n">value</span>
-</span><span id="DataFrame-733"><a href="#DataFrame-733"><span class="linenos">733</span></a> <span class="k">else</span><span class="p">:</span>
-</span><span id="DataFrame-734"><a href="#DataFrame-734"><span class="linenos">734</span></a> <span class="n">old_values</span> <span class="o">=</span> <span class="p">[</span><span class="n">to_replace</span><span class="p">]</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span>
-</span><span id="DataFrame-735"><a href="#DataFrame-735"><span class="linenos">735</span></a> <span class="n">new_values</span> <span class="o">=</span> <span class="p">[</span><span class="n">value</span><span class="p">]</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span>
-</span><span id="DataFrame-736"><a href="#DataFrame-736"><span class="linenos">736</span></a> <span class="n">old_values</span> <span class="o">=</span> <span class="p">[</span><span class="n">lit</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> <span class="k">for</span> <span class="n">value</span> <span class="ow">in</span> <span class="n">old_values</span><span class="p">]</span>
-</span><span id="DataFrame-737"><a href="#DataFrame-737"><span class="linenos">737</span></a> <span class="n">new_values</span> <span class="o">=</span> <span class="p">[</span><span class="n">lit</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> <span class="k">for</span> <span class="n">value</span> <span class="ow">in</span> <span class="n">new_values</span><span class="p">]</span>
-</span><span id="DataFrame-738"><a href="#DataFrame-738"><span class="linenos">738</span></a>
-</span><span id="DataFrame-739"><a href="#DataFrame-739"><span class="linenos">739</span></a> <span class="n">replacement_mapping</span> <span class="o">=</span> <span class="p">{}</span>
-</span><span id="DataFrame-740"><a href="#DataFrame-740"><span class="linenos">740</span></a> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">columns</span><span class="p">:</span>
-</span><span id="DataFrame-741"><a href="#DataFrame-741"><span class="linenos">741</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="kc">None</span><span class="p">)</span>
-</span><span id="DataFrame-742"><a href="#DataFrame-742"><span class="linenos">742</span></a> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="p">(</span><span class="n">old_value</span><span class="p">,</span> <span class="n">new_value</span><span class="p">)</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="nb">zip</span><span class="p">(</span><span class="n">old_values</span><span class="p">,</span> <span class="n">new_values</span><span class="p">)):</span>
-</span><span id="DataFrame-743"><a href="#DataFrame-743"><span class="linenos">743</span></a> <span class="k">if</span> <span class="n">i</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
-</span><span id="DataFrame-744"><a href="#DataFrame-744"><span class="linenos">744</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">column</span> <span class="o">==</span> <span class="n">old_value</span><span class="p">,</span> <span class="n">new_value</span><span class="p">)</span>
-</span><span id="DataFrame-745"><a href="#DataFrame-745"><span class="linenos">745</span></a> <span class="k">else</span><span class="p">:</span>
-</span><span id="DataFrame-746"><a href="#DataFrame-746"><span class="linenos">746</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="n">expression</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">column</span> <span class="o">==</span> <span class="n">old_value</span><span class="p">,</span> <span class="n">new_value</span><span class="p">)</span> <span class="c1"># type: ignore</span>
-</span><span id="DataFrame-747"><a href="#DataFrame-747"><span class="linenos">747</span></a> <span class="n">replacement_mapping</span><span class="p">[</span><span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">]</span> <span class="o">=</span> <span class="n">expression</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">column</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span>
-</span><span id="DataFrame-748"><a href="#DataFrame-748"><span class="linenos">748</span></a> <span class="n">column</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">alias_or_name</span>
-</span><span id="DataFrame-749"><a href="#DataFrame-749"><span class="linenos">749</span></a> <span class="p">)</span>
-</span><span id="DataFrame-750"><a href="#DataFrame-750"><span class="linenos">750</span></a>
-</span><span id="DataFrame-751"><a href="#DataFrame-751"><span class="linenos">751</span></a> <span class="n">replacement_mapping</span> <span class="o">=</span> <span class="p">{</span><span class="o">**</span><span class="n">all_column_mapping</span><span class="p">,</span> <span class="o">**</span><span class="n">replacement_mapping</span><span class="p">}</span>
-</span><span id="DataFrame-752"><a href="#DataFrame-752"><span class="linenos">752</span></a> <span class="n">replacement_columns</span> <span class="o">=</span> <span class="p">[</span><span class="n">replacement_mapping</span><span class="p">[</span><span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">]</span> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">all_columns</span><span class="p">]</span>
-</span><span id="DataFrame-753"><a href="#DataFrame-753"><span class="linenos">753</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="n">new_df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="n">replacement_columns</span><span class="p">)</span>
-</span><span id="DataFrame-754"><a href="#DataFrame-754"><span class="linenos">754</span></a> <span class="k">return</span> <span class="n">new_df</span>
-</span><span id="DataFrame-755"><a href="#DataFrame-755"><span class="linenos">755</span></a>
-</span><span id="DataFrame-756"><a href="#DataFrame-756"><span class="linenos">756</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">SELECT</span><span class="p">)</span>
-</span><span id="DataFrame-757"><a href="#DataFrame-757"><span class="linenos">757</span></a> <span class="k">def</span> <span class="nf">withColumn</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">colName</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">col</span><span class="p">:</span> <span class="n">Column</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame-758"><a href="#DataFrame-758"><span class="linenos">758</span></a> <span class="n">col</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_col</span><span class="p">(</span><span class="n">col</span><span class="p">)</span>
-</span><span id="DataFrame-759"><a href="#DataFrame-759"><span class="linenos">759</span></a> <span class="n">existing_col_names</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">named_selects</span>
-</span><span id="DataFrame-760"><a href="#DataFrame-760"><span class="linenos">760</span></a> <span class="n">existing_col_index</span> <span class="o">=</span> <span class="p">(</span>
-</span><span id="DataFrame-761"><a href="#DataFrame-761"><span class="linenos">761</span></a> <span class="n">existing_col_names</span><span class="o">.</span><span class="n">index</span><span class="p">(</span><span class="n">colName</span><span class="p">)</span> <span class="k">if</span> <span class="n">colName</span> <span class="ow">in</span> <span class="n">existing_col_names</span> <span class="k">else</span> <span class="kc">None</span>
-</span><span id="DataFrame-762"><a href="#DataFrame-762"><span class="linenos">762</span></a> <span class="p">)</span>
-</span><span id="DataFrame-763"><a href="#DataFrame-763"><span class="linenos">763</span></a> <span class="k">if</span> <span class="n">existing_col_index</span><span class="p">:</span>
-</span><span id="DataFrame-764"><a href="#DataFrame-764"><span class="linenos">764</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
-</span><span id="DataFrame-765"><a href="#DataFrame-765"><span class="linenos">765</span></a> <span class="n">expression</span><span class="o">.</span><span class="n">expressions</span><span class="p">[</span><span class="n">existing_col_index</span><span class="p">]</span> <span class="o">=</span> <span class="n">col</span><span class="o">.</span><span class="n">expression</span>
-</span><span id="DataFrame-766"><a href="#DataFrame-766"><span class="linenos">766</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="n">expression</span><span class="p">)</span>
-</span><span id="DataFrame-767"><a href="#DataFrame-767"><span class="linenos">767</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">col</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">colName</span><span class="p">),</span> <span class="n">append</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
-</span><span id="DataFrame-768"><a href="#DataFrame-768"><span class="linenos">768</span></a>
-</span><span id="DataFrame-769"><a href="#DataFrame-769"><span class="linenos">769</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">SELECT</span><span class="p">)</span>
-</span><span id="DataFrame-770"><a href="#DataFrame-770"><span class="linenos">770</span></a> <span class="k">def</span> <span class="nf">withColumnRenamed</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">existing</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">new</span><span class="p">:</span> <span class="nb">str</span><span class="p">):</span>
-</span><span id="DataFrame-771"><a href="#DataFrame-771"><span class="linenos">771</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
-</span><span id="DataFrame-772"><a href="#DataFrame-772"><span class="linenos">772</span></a> <span class="n">existing_columns</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame-773"><a href="#DataFrame-773"><span class="linenos">773</span></a> <span class="n">expression</span>
-</span><span id="DataFrame-774"><a href="#DataFrame-774"><span class="linenos">774</span></a> <span class="k">for</span> <span class="n">expression</span> <span class="ow">in</span> <span class="n">expression</span><span class="o">.</span><span class="n">expressions</span>
-</span><span id="DataFrame-775"><a href="#DataFrame-775"><span class="linenos">775</span></a> <span class="k">if</span> <span class="n">expression</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="o">==</span> <span class="n">existing</span>
-</span><span id="DataFrame-776"><a href="#DataFrame-776"><span class="linenos">776</span></a> <span class="p">]</span>
-</span><span id="DataFrame-777"><a href="#DataFrame-777"><span class="linenos">777</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">existing_columns</span><span class="p">:</span>
-</span><span id="DataFrame-778"><a href="#DataFrame-778"><span class="linenos">778</span></a> <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;Tried to rename a column that doesn&#39;t exist&quot;</span><span class="p">)</span>
-</span><span id="DataFrame-779"><a href="#DataFrame-779"><span class="linenos">779</span></a> <span class="k">for</span> <span class="n">existing_column</span> <span class="ow">in</span> <span class="n">existing_columns</span><span class="p">:</span>
-</span><span id="DataFrame-780"><a href="#DataFrame-780"><span class="linenos">780</span></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">existing_column</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Column</span><span class="p">):</span>
-</span><span id="DataFrame-781"><a href="#DataFrame-781"><span class="linenos">781</span></a> <span class="n">existing_column</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">alias_</span><span class="p">(</span><span class="n">existing_column</span><span class="p">,</span> <span class="n">new</span><span class="p">))</span>
-</span><span id="DataFrame-782"><a href="#DataFrame-782"><span class="linenos">782</span></a> <span class="k">else</span><span class="p">:</span>
-</span><span id="DataFrame-783"><a href="#DataFrame-783"><span class="linenos">783</span></a> <span class="n">existing_column</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">&quot;alias&quot;</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">to_identifier</span><span class="p">(</span><span class="n">new</span><span class="p">))</span>
-</span><span id="DataFrame-784"><a href="#DataFrame-784"><span class="linenos">784</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="n">expression</span><span class="p">)</span>
-</span><span id="DataFrame-785"><a href="#DataFrame-785"><span class="linenos">785</span></a>
-</span><span id="DataFrame-786"><a href="#DataFrame-786"><span class="linenos">786</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">SELECT</span><span class="p">)</span>
-</span><span id="DataFrame-787"><a href="#DataFrame-787"><span class="linenos">787</span></a> <span class="k">def</span> <span class="nf">drop</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Column</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame-788"><a href="#DataFrame-788"><span class="linenos">788</span></a> <span class="n">all_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_outer_select_columns</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="p">)</span>
-</span><span id="DataFrame-789"><a href="#DataFrame-789"><span class="linenos">789</span></a> <span class="n">drop_cols</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">cols</span><span class="p">)</span>
-</span><span id="DataFrame-790"><a href="#DataFrame-790"><span class="linenos">790</span></a> <span class="n">new_columns</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame-791"><a href="#DataFrame-791"><span class="linenos">791</span></a> <span class="n">col</span>
-</span><span id="DataFrame-792"><a href="#DataFrame-792"><span class="linenos">792</span></a> <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="n">all_columns</span>
-</span><span id="DataFrame-793"><a href="#DataFrame-793"><span class="linenos">793</span></a> <span class="k">if</span> <span class="n">col</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="ow">not</span> <span class="ow">in</span> <span class="p">[</span><span class="n">drop_column</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="k">for</span> <span class="n">drop_column</span> <span class="ow">in</span> <span class="n">drop_cols</span><span class="p">]</span>
-</span><span id="DataFrame-794"><a href="#DataFrame-794"><span class="linenos">794</span></a> <span class="p">]</span>
-</span><span id="DataFrame-795"><a href="#DataFrame-795"><span class="linenos">795</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="n">new_columns</span><span class="p">,</span> <span class="n">append</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
-</span><span id="DataFrame-796"><a href="#DataFrame-796"><span class="linenos">796</span></a>
-</span><span id="DataFrame-797"><a href="#DataFrame-797"><span class="linenos">797</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">LIMIT</span><span class="p">)</span>
-</span><span id="DataFrame-798"><a href="#DataFrame-798"><span class="linenos">798</span></a> <span class="k">def</span> <span class="nf">limit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">num</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame-799"><a href="#DataFrame-799"><span class="linenos">799</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">limit</span><span class="p">(</span><span class="n">num</span><span class="p">))</span>
-</span><span id="DataFrame-800"><a href="#DataFrame-800"><span class="linenos">800</span></a>
-</span><span id="DataFrame-801"><a href="#DataFrame-801"><span class="linenos">801</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">NO_OP</span><span class="p">)</span>
-</span><span id="DataFrame-802"><a href="#DataFrame-802"><span class="linenos">802</span></a> <span class="k">def</span> <span class="nf">hint</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="o">*</span><span class="n">parameters</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">int</span><span class="p">]])</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame-803"><a href="#DataFrame-803"><span class="linenos">803</span></a> <span class="n">parameter_list</span> <span class="o">=</span> <span class="n">ensure_list</span><span class="p">(</span><span class="n">parameters</span><span class="p">)</span>
-</span><span id="DataFrame-804"><a href="#DataFrame-804"><span class="linenos">804</span></a> <span class="n">parameter_columns</span> <span class="o">=</span> <span class="p">(</span>
-</span><span id="DataFrame-805"><a href="#DataFrame-805"><span class="linenos">805</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_list_of_columns</span><span class="p">(</span><span class="n">parameter_list</span><span class="p">)</span>
-</span><span id="DataFrame-806"><a href="#DataFrame-806"><span class="linenos">806</span></a> <span class="k">if</span> <span class="n">parameters</span>
-</span><span id="DataFrame-807"><a href="#DataFrame-807"><span class="linenos">807</span></a> <span class="k">else</span> <span class="n">Column</span><span class="o">.</span><span class="n">ensure_cols</span><span class="p">([</span><span class="bp">self</span><span class="o">.</span><span class="n">sequence_id</span><span class="p">])</span>
-</span><span id="DataFrame-808"><a href="#DataFrame-808"><span class="linenos">808</span></a> <span class="p">)</span>
-</span><span id="DataFrame-809"><a href="#DataFrame-809"><span class="linenos">809</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_hint</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">parameter_columns</span><span class="p">)</span>
-</span><span id="DataFrame-810"><a href="#DataFrame-810"><span class="linenos">810</span></a>
-</span><span id="DataFrame-811"><a href="#DataFrame-811"><span class="linenos">811</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">NO_OP</span><span class="p">)</span>
-</span><span id="DataFrame-812"><a href="#DataFrame-812"><span class="linenos">812</span></a> <span class="k">def</span> <span class="nf">repartition</span><span class="p">(</span>
-</span><span id="DataFrame-813"><a href="#DataFrame-813"><span class="linenos">813</span></a> <span class="bp">self</span><span class="p">,</span> <span class="n">numPartitions</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">ColumnOrName</span><span class="p">],</span> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="n">ColumnOrName</span>
-</span><span id="DataFrame-814"><a href="#DataFrame-814"><span class="linenos">814</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame-815"><a href="#DataFrame-815"><span class="linenos">815</span></a> <span class="n">num_partition_cols</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_list_of_columns</span><span class="p">(</span><span class="n">numPartitions</span><span class="p">)</span>
-</span><span id="DataFrame-816"><a href="#DataFrame-816"><span class="linenos">816</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">cols</span><span class="p">)</span>
-</span><span id="DataFrame-817"><a href="#DataFrame-817"><span class="linenos">817</span></a> <span class="n">args</span> <span class="o">=</span> <span class="n">num_partition_cols</span> <span class="o">+</span> <span class="n">columns</span>
-</span><span id="DataFrame-818"><a href="#DataFrame-818"><span class="linenos">818</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_hint</span><span class="p">(</span><span class="s2">&quot;repartition&quot;</span><span class="p">,</span> <span class="n">args</span><span class="p">)</span>
-</span><span id="DataFrame-819"><a href="#DataFrame-819"><span class="linenos">819</span></a>
-</span><span id="DataFrame-820"><a href="#DataFrame-820"><span class="linenos">820</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">NO_OP</span><span class="p">)</span>
-</span><span id="DataFrame-821"><a href="#DataFrame-821"><span class="linenos">821</span></a> <span class="k">def</span> <span class="nf">coalesce</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">numPartitions</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame-822"><a href="#DataFrame-822"><span class="linenos">822</span></a> <span class="n">num_partitions</span> <span class="o">=</span> <span class="n">Column</span><span class="o">.</span><span class="n">ensure_cols</span><span class="p">([</span><span class="n">numPartitions</span><span class="p">])</span>
-</span><span id="DataFrame-823"><a href="#DataFrame-823"><span class="linenos">823</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_hint</span><span class="p">(</span><span class="s2">&quot;coalesce&quot;</span><span class="p">,</span> <span class="n">num_partitions</span><span class="p">)</span>
-</span><span id="DataFrame-824"><a href="#DataFrame-824"><span class="linenos">824</span></a>
-</span><span id="DataFrame-825"><a href="#DataFrame-825"><span class="linenos">825</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">NO_OP</span><span class="p">)</span>
-</span><span id="DataFrame-826"><a href="#DataFrame-826"><span class="linenos">826</span></a> <span class="k">def</span> <span class="nf">cache</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame-827"><a href="#DataFrame-827"><span class="linenos">827</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_cache</span><span class="p">(</span><span class="n">storage_level</span><span class="o">=</span><span class="s2">&quot;MEMORY_AND_DISK&quot;</span><span class="p">)</span>
-</span><span id="DataFrame-828"><a href="#DataFrame-828"><span class="linenos">828</span></a>
-</span><span id="DataFrame-829"><a href="#DataFrame-829"><span class="linenos">829</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">NO_OP</span><span class="p">)</span>
-</span><span id="DataFrame-830"><a href="#DataFrame-830"><span class="linenos">830</span></a> <span class="k">def</span> <span class="nf">persist</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">storageLevel</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;MEMORY_AND_DISK_SER&quot;</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame-831"><a href="#DataFrame-831"><span class="linenos">831</span></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
-</span><span id="DataFrame-832"><a href="#DataFrame-832"><span class="linenos">832</span></a><span class="sd"> Storage Level Options: https://spark.apache.org/docs/3.0.0-preview/sql-ref-syntax-aux-cache-cache-table.html</span>
-</span><span id="DataFrame-833"><a href="#DataFrame-833"><span class="linenos">833</span></a><span class="sd"> &quot;&quot;&quot;</span>
-</span><span id="DataFrame-834"><a href="#DataFrame-834"><span class="linenos">834</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_cache</span><span class="p">(</span><span class="n">storageLevel</span><span class="p">)</span>
+</span><span id="DataFrame-310"><a href="#DataFrame-310"><span class="linenos">310</span></a> <span class="n">select_expression</span> <span class="o">=</span> <span class="n">select_expression</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span>
+</span><span id="DataFrame-311"><a href="#DataFrame-311"><span class="linenos">311</span></a> <span class="n">replace_id_value</span><span class="p">,</span> <span class="n">replacement_mapping</span>
+</span><span id="DataFrame-312"><a href="#DataFrame-312"><span class="linenos">312</span></a> <span class="p">)</span><span class="o">.</span><span class="n">assert_is</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Select</span><span class="p">)</span>
+</span><span id="DataFrame-313"><a href="#DataFrame-313"><span class="linenos">313</span></a> <span class="k">if</span> <span class="n">optimize</span><span class="p">:</span>
+</span><span id="DataFrame-314"><a href="#DataFrame-314"><span class="linenos">314</span></a> <span class="n">select_expression</span> <span class="o">=</span> <span class="n">t</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span>
+</span><span id="DataFrame-315"><a href="#DataFrame-315"><span class="linenos">315</span></a> <span class="n">exp</span><span class="o">.</span><span class="n">Select</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">_optimize</span><span class="p">(</span><span class="n">select_expression</span><span class="p">,</span> <span class="n">dialect</span><span class="o">=</span><span class="n">dialect</span><span class="p">)</span>
+</span><span id="DataFrame-316"><a href="#DataFrame-316"><span class="linenos">316</span></a> <span class="p">)</span>
+</span><span id="DataFrame-317"><a href="#DataFrame-317"><span class="linenos">317</span></a>
+</span><span id="DataFrame-318"><a href="#DataFrame-318"><span class="linenos">318</span></a> <span class="n">select_expression</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">_replace_cte_names_with_hashes</span><span class="p">(</span><span class="n">select_expression</span><span class="p">)</span>
+</span><span id="DataFrame-319"><a href="#DataFrame-319"><span class="linenos">319</span></a>
+</span><span id="DataFrame-320"><a href="#DataFrame-320"><span class="linenos">320</span></a> <span class="n">expression</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="n">exp</span><span class="o">.</span><span class="n">Select</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Cache</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Drop</span><span class="p">]</span>
+</span><span id="DataFrame-321"><a href="#DataFrame-321"><span class="linenos">321</span></a> <span class="k">if</span> <span class="n">expression_type</span> <span class="o">==</span> <span class="n">exp</span><span class="o">.</span><span class="n">Cache</span><span class="p">:</span>
+</span><span id="DataFrame-322"><a href="#DataFrame-322"><span class="linenos">322</span></a> <span class="n">cache_table_name</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">_create_hash_from_expression</span><span class="p">(</span><span class="n">select_expression</span><span class="p">)</span>
+</span><span id="DataFrame-323"><a href="#DataFrame-323"><span class="linenos">323</span></a> <span class="n">cache_table</span> <span class="o">=</span> <span class="n">exp</span><span class="o">.</span><span class="n">to_table</span><span class="p">(</span><span class="n">cache_table_name</span><span class="p">)</span>
+</span><span id="DataFrame-324"><a href="#DataFrame-324"><span class="linenos">324</span></a> <span class="n">original_alias_name</span> <span class="o">=</span> <span class="n">select_expression</span><span class="o">.</span><span class="n">args</span><span class="p">[</span><span class="s2">&quot;cte_alias_name&quot;</span><span class="p">]</span>
+</span><span id="DataFrame-325"><a href="#DataFrame-325"><span class="linenos">325</span></a>
+</span><span id="DataFrame-326"><a href="#DataFrame-326"><span class="linenos">326</span></a> <span class="n">replacement_mapping</span><span class="p">[</span><span class="n">exp</span><span class="o">.</span><span class="n">to_identifier</span><span class="p">(</span><span class="n">original_alias_name</span><span class="p">)]</span> <span class="o">=</span> <span class="n">exp</span><span class="o">.</span><span class="n">to_identifier</span><span class="p">(</span> <span class="c1"># type: ignore</span>
+</span><span id="DataFrame-327"><a href="#DataFrame-327"><span class="linenos">327</span></a> <span class="n">cache_table_name</span>
+</span><span id="DataFrame-328"><a href="#DataFrame-328"><span class="linenos">328</span></a> <span class="p">)</span>
+</span><span id="DataFrame-329"><a href="#DataFrame-329"><span class="linenos">329</span></a> <span class="n">sqlglot</span><span class="o">.</span><span class="n">schema</span><span class="o">.</span><span class="n">add_table</span><span class="p">(</span>
+</span><span id="DataFrame-330"><a href="#DataFrame-330"><span class="linenos">330</span></a> <span class="n">cache_table_name</span><span class="p">,</span>
+</span><span id="DataFrame-331"><a href="#DataFrame-331"><span class="linenos">331</span></a> <span class="p">{</span>
+</span><span id="DataFrame-332"><a href="#DataFrame-332"><span class="linenos">332</span></a> <span class="n">expression</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">:</span> <span class="n">expression</span><span class="o">.</span><span class="n">type</span><span class="o">.</span><span class="n">sql</span><span class="p">(</span><span class="n">dialect</span><span class="o">=</span><span class="n">dialect</span><span class="p">)</span>
+</span><span id="DataFrame-333"><a href="#DataFrame-333"><span class="linenos">333</span></a> <span class="k">for</span> <span class="n">expression</span> <span class="ow">in</span> <span class="n">select_expression</span><span class="o">.</span><span class="n">expressions</span>
+</span><span id="DataFrame-334"><a href="#DataFrame-334"><span class="linenos">334</span></a> <span class="p">},</span>
+</span><span id="DataFrame-335"><a href="#DataFrame-335"><span class="linenos">335</span></a> <span class="n">dialect</span><span class="o">=</span><span class="n">dialect</span><span class="p">,</span>
+</span><span id="DataFrame-336"><a href="#DataFrame-336"><span class="linenos">336</span></a> <span class="p">)</span>
+</span><span id="DataFrame-337"><a href="#DataFrame-337"><span class="linenos">337</span></a>
+</span><span id="DataFrame-338"><a href="#DataFrame-338"><span class="linenos">338</span></a> <span class="n">cache_storage_level</span> <span class="o">=</span> <span class="n">select_expression</span><span class="o">.</span><span class="n">args</span><span class="p">[</span><span class="s2">&quot;cache_storage_level&quot;</span><span class="p">]</span>
+</span><span id="DataFrame-339"><a href="#DataFrame-339"><span class="linenos">339</span></a> <span class="n">options</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame-340"><a href="#DataFrame-340"><span class="linenos">340</span></a> <span class="n">exp</span><span class="o">.</span><span class="n">Literal</span><span class="o">.</span><span class="n">string</span><span class="p">(</span><span class="s2">&quot;storageLevel&quot;</span><span class="p">),</span>
+</span><span id="DataFrame-341"><a href="#DataFrame-341"><span class="linenos">341</span></a> <span class="n">exp</span><span class="o">.</span><span class="n">Literal</span><span class="o">.</span><span class="n">string</span><span class="p">(</span><span class="n">cache_storage_level</span><span class="p">),</span>
+</span><span id="DataFrame-342"><a href="#DataFrame-342"><span class="linenos">342</span></a> <span class="p">]</span>
+</span><span id="DataFrame-343"><a href="#DataFrame-343"><span class="linenos">343</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="n">exp</span><span class="o">.</span><span class="n">Cache</span><span class="p">(</span>
+</span><span id="DataFrame-344"><a href="#DataFrame-344"><span class="linenos">344</span></a> <span class="n">this</span><span class="o">=</span><span class="n">cache_table</span><span class="p">,</span> <span class="n">expression</span><span class="o">=</span><span class="n">select_expression</span><span class="p">,</span> <span class="n">lazy</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">options</span><span class="o">=</span><span class="n">options</span>
+</span><span id="DataFrame-345"><a href="#DataFrame-345"><span class="linenos">345</span></a> <span class="p">)</span>
+</span><span id="DataFrame-346"><a href="#DataFrame-346"><span class="linenos">346</span></a>
+</span><span id="DataFrame-347"><a href="#DataFrame-347"><span class="linenos">347</span></a> <span class="c1"># We will drop the &quot;view&quot; if it exists before running the cache table</span>
+</span><span id="DataFrame-348"><a href="#DataFrame-348"><span class="linenos">348</span></a> <span class="n">output_expressions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Drop</span><span class="p">(</span><span class="n">this</span><span class="o">=</span><span class="n">cache_table</span><span class="p">,</span> <span class="n">exists</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">kind</span><span class="o">=</span><span class="s2">&quot;VIEW&quot;</span><span class="p">))</span>
+</span><span id="DataFrame-349"><a href="#DataFrame-349"><span class="linenos">349</span></a> <span class="k">elif</span> <span class="n">expression_type</span> <span class="o">==</span> <span class="n">exp</span><span class="o">.</span><span class="n">Create</span><span class="p">:</span>
+</span><span id="DataFrame-350"><a href="#DataFrame-350"><span class="linenos">350</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">output_expression_container</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
+</span><span id="DataFrame-351"><a href="#DataFrame-351"><span class="linenos">351</span></a> <span class="n">expression</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">&quot;expression&quot;</span><span class="p">,</span> <span class="n">select_expression</span><span class="p">)</span>
+</span><span id="DataFrame-352"><a href="#DataFrame-352"><span class="linenos">352</span></a> <span class="k">elif</span> <span class="n">expression_type</span> <span class="o">==</span> <span class="n">exp</span><span class="o">.</span><span class="n">Insert</span><span class="p">:</span>
+</span><span id="DataFrame-353"><a href="#DataFrame-353"><span class="linenos">353</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">output_expression_container</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
+</span><span id="DataFrame-354"><a href="#DataFrame-354"><span class="linenos">354</span></a> <span class="n">select_without_ctes</span> <span class="o">=</span> <span class="n">select_expression</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
+</span><span id="DataFrame-355"><a href="#DataFrame-355"><span class="linenos">355</span></a> <span class="n">select_without_ctes</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">&quot;with&quot;</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
+</span><span id="DataFrame-356"><a href="#DataFrame-356"><span class="linenos">356</span></a> <span class="n">expression</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">&quot;expression&quot;</span><span class="p">,</span> <span class="n">select_without_ctes</span><span class="p">)</span>
+</span><span id="DataFrame-357"><a href="#DataFrame-357"><span class="linenos">357</span></a>
+</span><span id="DataFrame-358"><a href="#DataFrame-358"><span class="linenos">358</span></a> <span class="k">if</span> <span class="n">select_expression</span><span class="o">.</span><span class="n">ctes</span><span class="p">:</span>
+</span><span id="DataFrame-359"><a href="#DataFrame-359"><span class="linenos">359</span></a> <span class="n">expression</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">&quot;with&quot;</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">With</span><span class="p">(</span><span class="n">expressions</span><span class="o">=</span><span class="n">select_expression</span><span class="o">.</span><span class="n">ctes</span><span class="p">))</span>
+</span><span id="DataFrame-360"><a href="#DataFrame-360"><span class="linenos">360</span></a> <span class="k">elif</span> <span class="n">expression_type</span> <span class="o">==</span> <span class="n">exp</span><span class="o">.</span><span class="n">Select</span><span class="p">:</span>
+</span><span id="DataFrame-361"><a href="#DataFrame-361"><span class="linenos">361</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="n">select_expression</span>
+</span><span id="DataFrame-362"><a href="#DataFrame-362"><span class="linenos">362</span></a> <span class="k">else</span><span class="p">:</span>
+</span><span id="DataFrame-363"><a href="#DataFrame-363"><span class="linenos">363</span></a> <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Invalid expression type: </span><span class="si">{</span><span class="n">expression_type</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
+</span><span id="DataFrame-364"><a href="#DataFrame-364"><span class="linenos">364</span></a>
+</span><span id="DataFrame-365"><a href="#DataFrame-365"><span class="linenos">365</span></a> <span class="n">output_expressions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">expression</span><span class="p">)</span>
+</span><span id="DataFrame-366"><a href="#DataFrame-366"><span class="linenos">366</span></a>
+</span><span id="DataFrame-367"><a href="#DataFrame-367"><span class="linenos">367</span></a> <span class="k">return</span> <span class="p">[</span><span class="n">expression</span><span class="o">.</span><span class="n">sql</span><span class="p">(</span><span class="n">dialect</span><span class="o">=</span><span class="n">dialect</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="k">for</span> <span class="n">expression</span> <span class="ow">in</span> <span class="n">output_expressions</span><span class="p">]</span>
+</span><span id="DataFrame-368"><a href="#DataFrame-368"><span class="linenos">368</span></a>
+</span><span id="DataFrame-369"><a href="#DataFrame-369"><span class="linenos">369</span></a> <span class="k">def</span> <span class="nf">copy</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame-370"><a href="#DataFrame-370"><span class="linenos">370</span></a> <span class="k">return</span> <span class="n">DataFrame</span><span class="p">(</span><span class="o">**</span><span class="n">object_to_dict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">))</span>
+</span><span id="DataFrame-371"><a href="#DataFrame-371"><span class="linenos">371</span></a>
+</span><span id="DataFrame-372"><a href="#DataFrame-372"><span class="linenos">372</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">SELECT</span><span class="p">)</span>
+</span><span id="DataFrame-373"><a href="#DataFrame-373"><span class="linenos">373</span></a> <span class="k">def</span> <span class="nf">select</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">cols</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame-374"><a href="#DataFrame-374"><span class="linenos">374</span></a> <span class="n">cols</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">cols</span><span class="p">)</span>
+</span><span id="DataFrame-375"><a href="#DataFrame-375"><span class="linenos">375</span></a> <span class="n">kwargs</span><span class="p">[</span><span class="s2">&quot;append&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">kwargs</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;append&quot;</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span>
+</span><span id="DataFrame-376"><a href="#DataFrame-376"><span class="linenos">376</span></a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">args</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;joins&quot;</span><span class="p">):</span>
+</span><span id="DataFrame-377"><a href="#DataFrame-377"><span class="linenos">377</span></a> <span class="n">ambiguous_cols</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame-378"><a href="#DataFrame-378"><span class="linenos">378</span></a> <span class="n">col</span>
+</span><span id="DataFrame-379"><a href="#DataFrame-379"><span class="linenos">379</span></a> <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="n">cols</span>
+</span><span id="DataFrame-380"><a href="#DataFrame-380"><span class="linenos">380</span></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">col</span><span class="o">.</span><span class="n">column_expression</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Column</span><span class="p">)</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">col</span><span class="o">.</span><span class="n">column_expression</span><span class="o">.</span><span class="n">table</span>
+</span><span id="DataFrame-381"><a href="#DataFrame-381"><span class="linenos">381</span></a> <span class="p">]</span>
+</span><span id="DataFrame-382"><a href="#DataFrame-382"><span class="linenos">382</span></a> <span class="k">if</span> <span class="n">ambiguous_cols</span><span class="p">:</span>
+</span><span id="DataFrame-383"><a href="#DataFrame-383"><span class="linenos">383</span></a> <span class="n">join_table_identifiers</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame-384"><a href="#DataFrame-384"><span class="linenos">384</span></a> <span class="n">x</span><span class="o">.</span><span class="n">this</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">get_tables_from_expression_with_join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="p">)</span>
+</span><span id="DataFrame-385"><a href="#DataFrame-385"><span class="linenos">385</span></a> <span class="p">]</span>
+</span><span id="DataFrame-386"><a href="#DataFrame-386"><span class="linenos">386</span></a> <span class="n">cte_names_in_join</span> <span class="o">=</span> <span class="p">[</span><span class="n">x</span><span class="o">.</span><span class="n">this</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">join_table_identifiers</span><span class="p">]</span>
+</span><span id="DataFrame-387"><a href="#DataFrame-387"><span class="linenos">387</span></a> <span class="c1"># If we have columns that resolve to multiple CTE expressions then we want to use each CTE left-to-right</span>
+</span><span id="DataFrame-388"><a href="#DataFrame-388"><span class="linenos">388</span></a> <span class="c1"># and therefore we allow multiple columns with the same name in the result. This matches the behavior</span>
+</span><span id="DataFrame-389"><a href="#DataFrame-389"><span class="linenos">389</span></a> <span class="c1"># of Spark.</span>
+</span><span id="DataFrame-390"><a href="#DataFrame-390"><span class="linenos">390</span></a> <span class="n">resolved_column_position</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Dict</span><span class="p">[</span><span class="n">Column</span><span class="p">,</span> <span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span><span class="n">col</span><span class="p">:</span> <span class="o">-</span><span class="mi">1</span> <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="n">ambiguous_cols</span><span class="p">}</span>
+</span><span id="DataFrame-391"><a href="#DataFrame-391"><span class="linenos">391</span></a> <span class="k">for</span> <span class="n">ambiguous_col</span> <span class="ow">in</span> <span class="n">ambiguous_cols</span><span class="p">:</span>
+</span><span id="DataFrame-392"><a href="#DataFrame-392"><span class="linenos">392</span></a> <span class="n">ctes_with_column</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame-393"><a href="#DataFrame-393"><span class="linenos">393</span></a> <span class="n">cte</span>
+</span><span id="DataFrame-394"><a href="#DataFrame-394"><span class="linenos">394</span></a> <span class="k">for</span> <span class="n">cte</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">ctes</span>
+</span><span id="DataFrame-395"><a href="#DataFrame-395"><span class="linenos">395</span></a> <span class="k">if</span> <span class="n">cte</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="ow">in</span> <span class="n">cte_names_in_join</span>
+</span><span id="DataFrame-396"><a href="#DataFrame-396"><span class="linenos">396</span></a> <span class="ow">and</span> <span class="n">ambiguous_col</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="ow">in</span> <span class="n">cte</span><span class="o">.</span><span class="n">this</span><span class="o">.</span><span class="n">named_selects</span>
+</span><span id="DataFrame-397"><a href="#DataFrame-397"><span class="linenos">397</span></a> <span class="p">]</span>
+</span><span id="DataFrame-398"><a href="#DataFrame-398"><span class="linenos">398</span></a> <span class="c1"># Check if there is a CTE with this column that we haven&#39;t used before. If so, use it. Otherwise,</span>
+</span><span id="DataFrame-399"><a href="#DataFrame-399"><span class="linenos">399</span></a> <span class="c1"># use the same CTE we used before</span>
+</span><span id="DataFrame-400"><a href="#DataFrame-400"><span class="linenos">400</span></a> <span class="n">cte</span> <span class="o">=</span> <span class="n">seq_get</span><span class="p">(</span><span class="n">ctes_with_column</span><span class="p">,</span> <span class="n">resolved_column_position</span><span class="p">[</span><span class="n">ambiguous_col</span><span class="p">]</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span>
+</span><span id="DataFrame-401"><a href="#DataFrame-401"><span class="linenos">401</span></a> <span class="k">if</span> <span class="n">cte</span><span class="p">:</span>
+</span><span id="DataFrame-402"><a href="#DataFrame-402"><span class="linenos">402</span></a> <span class="n">resolved_column_position</span><span class="p">[</span><span class="n">ambiguous_col</span><span class="p">]</span> <span class="o">+=</span> <span class="mi">1</span>
+</span><span id="DataFrame-403"><a href="#DataFrame-403"><span class="linenos">403</span></a> <span class="k">else</span><span class="p">:</span>
+</span><span id="DataFrame-404"><a href="#DataFrame-404"><span class="linenos">404</span></a> <span class="n">cte</span> <span class="o">=</span> <span class="n">ctes_with_column</span><span class="p">[</span><span class="n">resolved_column_position</span><span class="p">[</span><span class="n">ambiguous_col</span><span class="p">]]</span>
+</span><span id="DataFrame-405"><a href="#DataFrame-405"><span class="linenos">405</span></a> <span class="n">ambiguous_col</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">&quot;table&quot;</span><span class="p">,</span> <span class="n">cte</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">)</span>
+</span><span id="DataFrame-406"><a href="#DataFrame-406"><span class="linenos">406</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span>
+</span><span id="DataFrame-407"><a href="#DataFrame-407"><span class="linenos">407</span></a> <span class="n">expression</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="p">[</span><span class="n">x</span><span class="o">.</span><span class="n">expression</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">cols</span><span class="p">],</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">),</span> <span class="o">**</span><span class="n">kwargs</span>
+</span><span id="DataFrame-408"><a href="#DataFrame-408"><span class="linenos">408</span></a> <span class="p">)</span>
+</span><span id="DataFrame-409"><a href="#DataFrame-409"><span class="linenos">409</span></a>
+</span><span id="DataFrame-410"><a href="#DataFrame-410"><span class="linenos">410</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">NO_OP</span><span class="p">)</span>
+</span><span id="DataFrame-411"><a href="#DataFrame-411"><span class="linenos">411</span></a> <span class="k">def</span> <span class="nf">alias</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame-412"><a href="#DataFrame-412"><span class="linenos">412</span></a> <span class="n">new_sequence_id</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">_random_sequence_id</span>
+</span><span id="DataFrame-413"><a href="#DataFrame-413"><span class="linenos">413</span></a> <span class="n">df</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
+</span><span id="DataFrame-414"><a href="#DataFrame-414"><span class="linenos">414</span></a> <span class="k">for</span> <span class="n">join_hint</span> <span class="ow">in</span> <span class="n">df</span><span class="o">.</span><span class="n">pending_join_hints</span><span class="p">:</span>
+</span><span id="DataFrame-415"><a href="#DataFrame-415"><span class="linenos">415</span></a> <span class="k">for</span> <span class="n">expression</span> <span class="ow">in</span> <span class="n">join_hint</span><span class="o">.</span><span class="n">expressions</span><span class="p">:</span>
+</span><span id="DataFrame-416"><a href="#DataFrame-416"><span class="linenos">416</span></a> <span class="k">if</span> <span class="n">expression</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">sequence_id</span><span class="p">:</span>
+</span><span id="DataFrame-417"><a href="#DataFrame-417"><span class="linenos">417</span></a> <span class="n">expression</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">&quot;this&quot;</span><span class="p">,</span> <span class="n">Column</span><span class="o">.</span><span class="n">ensure_col</span><span class="p">(</span><span class="n">new_sequence_id</span><span class="p">)</span><span class="o">.</span><span class="n">expression</span><span class="p">)</span>
+</span><span id="DataFrame-418"><a href="#DataFrame-418"><span class="linenos">418</span></a> <span class="n">df</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">_add_alias_to_mapping</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">new_sequence_id</span><span class="p">)</span>
+</span><span id="DataFrame-419"><a href="#DataFrame-419"><span class="linenos">419</span></a> <span class="k">return</span> <span class="n">df</span><span class="o">.</span><span class="n">_convert_leaf_to_cte</span><span class="p">(</span><span class="n">sequence_id</span><span class="o">=</span><span class="n">new_sequence_id</span><span class="p">)</span>
+</span><span id="DataFrame-420"><a href="#DataFrame-420"><span class="linenos">420</span></a>
+</span><span id="DataFrame-421"><a href="#DataFrame-421"><span class="linenos">421</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">WHERE</span><span class="p">)</span>
+</span><span id="DataFrame-422"><a href="#DataFrame-422"><span class="linenos">422</span></a> <span class="k">def</span> <span class="nf">where</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">column</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="n">Column</span><span class="p">,</span> <span class="nb">bool</span><span class="p">],</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame-423"><a href="#DataFrame-423"><span class="linenos">423</span></a> <span class="n">col</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_col</span><span class="p">(</span><span class="n">column</span><span class="p">)</span>
+</span><span id="DataFrame-424"><a href="#DataFrame-424"><span class="linenos">424</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">where</span><span class="p">(</span><span class="n">col</span><span class="o">.</span><span class="n">expression</span><span class="p">))</span>
+</span><span id="DataFrame-425"><a href="#DataFrame-425"><span class="linenos">425</span></a>
+</span><span id="DataFrame-426"><a href="#DataFrame-426"><span class="linenos">426</span></a> <span class="nb">filter</span> <span class="o">=</span> <span class="n">where</span>
+</span><span id="DataFrame-427"><a href="#DataFrame-427"><span class="linenos">427</span></a>
+</span><span id="DataFrame-428"><a href="#DataFrame-428"><span class="linenos">428</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">GROUP_BY</span><span class="p">)</span>
+</span><span id="DataFrame-429"><a href="#DataFrame-429"><span class="linenos">429</span></a> <span class="k">def</span> <span class="nf">groupBy</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">cols</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">GroupedData</span><span class="p">:</span>
+</span><span id="DataFrame-430"><a href="#DataFrame-430"><span class="linenos">430</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">cols</span><span class="p">)</span>
+</span><span id="DataFrame-431"><a href="#DataFrame-431"><span class="linenos">431</span></a> <span class="k">return</span> <span class="n">GroupedData</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">columns</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">last_op</span><span class="p">)</span>
+</span><span id="DataFrame-432"><a href="#DataFrame-432"><span class="linenos">432</span></a>
+</span><span id="DataFrame-433"><a href="#DataFrame-433"><span class="linenos">433</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">SELECT</span><span class="p">)</span>
+</span><span id="DataFrame-434"><a href="#DataFrame-434"><span class="linenos">434</span></a> <span class="k">def</span> <span class="nf">agg</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">exprs</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame-435"><a href="#DataFrame-435"><span class="linenos">435</span></a> <span class="n">cols</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">exprs</span><span class="p">)</span>
+</span><span id="DataFrame-436"><a href="#DataFrame-436"><span class="linenos">436</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">groupBy</span><span class="p">()</span><span class="o">.</span><span class="n">agg</span><span class="p">(</span><span class="o">*</span><span class="n">cols</span><span class="p">)</span>
+</span><span id="DataFrame-437"><a href="#DataFrame-437"><span class="linenos">437</span></a>
+</span><span id="DataFrame-438"><a href="#DataFrame-438"><span class="linenos">438</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
+</span><span id="DataFrame-439"><a href="#DataFrame-439"><span class="linenos">439</span></a> <span class="k">def</span> <span class="nf">join</span><span class="p">(</span>
+</span><span id="DataFrame-440"><a href="#DataFrame-440"><span class="linenos">440</span></a> <span class="bp">self</span><span class="p">,</span>
+</span><span id="DataFrame-441"><a href="#DataFrame-441"><span class="linenos">441</span></a> <span class="n">other_df</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">,</span>
+</span><span id="DataFrame-442"><a href="#DataFrame-442"><span class="linenos">442</span></a> <span class="n">on</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">],</span> <span class="n">Column</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="n">Column</span><span class="p">]],</span>
+</span><span id="DataFrame-443"><a href="#DataFrame-443"><span class="linenos">443</span></a> <span class="n">how</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;inner&quot;</span><span class="p">,</span>
+</span><span id="DataFrame-444"><a href="#DataFrame-444"><span class="linenos">444</span></a> <span class="o">**</span><span class="n">kwargs</span><span class="p">,</span>
+</span><span id="DataFrame-445"><a href="#DataFrame-445"><span class="linenos">445</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame-446"><a href="#DataFrame-446"><span class="linenos">446</span></a> <span class="n">other_df</span> <span class="o">=</span> <span class="n">other_df</span><span class="o">.</span><span class="n">_convert_leaf_to_cte</span><span class="p">()</span>
+</span><span id="DataFrame-447"><a href="#DataFrame-447"><span class="linenos">447</span></a> <span class="n">join_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_list_of_columns</span><span class="p">(</span><span class="n">on</span><span class="p">)</span>
+</span><span id="DataFrame-448"><a href="#DataFrame-448"><span class="linenos">448</span></a> <span class="c1"># We will determine actual &quot;join on&quot; expression later so we don&#39;t provide it at first</span>
+</span><span id="DataFrame-449"><a href="#DataFrame-449"><span class="linenos">449</span></a> <span class="n">join_expression</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">join</span><span class="p">(</span>
+</span><span id="DataFrame-450"><a href="#DataFrame-450"><span class="linenos">450</span></a> <span class="n">other_df</span><span class="o">.</span><span class="n">latest_cte_name</span><span class="p">,</span> <span class="n">join_type</span><span class="o">=</span><span class="n">how</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;_&quot;</span><span class="p">,</span> <span class="s2">&quot; &quot;</span><span class="p">)</span>
+</span><span id="DataFrame-451"><a href="#DataFrame-451"><span class="linenos">451</span></a> <span class="p">)</span>
+</span><span id="DataFrame-452"><a href="#DataFrame-452"><span class="linenos">452</span></a> <span class="n">join_expression</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_add_ctes_to_expression</span><span class="p">(</span><span class="n">join_expression</span><span class="p">,</span> <span class="n">other_df</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">ctes</span><span class="p">)</span>
+</span><span id="DataFrame-453"><a href="#DataFrame-453"><span class="linenos">453</span></a> <span class="n">self_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_outer_select_columns</span><span class="p">(</span><span class="n">join_expression</span><span class="p">)</span>
+</span><span id="DataFrame-454"><a href="#DataFrame-454"><span class="linenos">454</span></a> <span class="n">other_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_outer_select_columns</span><span class="p">(</span><span class="n">other_df</span><span class="p">)</span>
+</span><span id="DataFrame-455"><a href="#DataFrame-455"><span class="linenos">455</span></a> <span class="c1"># Determines the join clause and select columns to be used passed on what type of columns were provided for</span>
+</span><span id="DataFrame-456"><a href="#DataFrame-456"><span class="linenos">456</span></a> <span class="c1"># the join. The columns returned changes based on how the on expression is provided.</span>
+</span><span id="DataFrame-457"><a href="#DataFrame-457"><span class="linenos">457</span></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">join_columns</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">expression</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Column</span><span class="p">):</span>
+</span><span id="DataFrame-458"><a href="#DataFrame-458"><span class="linenos">458</span></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
+</span><span id="DataFrame-459"><a href="#DataFrame-459"><span class="linenos">459</span></a><span class="sd"> Unique characteristics of join on column names only:</span>
+</span><span id="DataFrame-460"><a href="#DataFrame-460"><span class="linenos">460</span></a><span class="sd"> * The column names are put at the front of the select list</span>
+</span><span id="DataFrame-461"><a href="#DataFrame-461"><span class="linenos">461</span></a><span class="sd"> * The column names are deduplicated across the entire select list and only the column names (other dups are allowed)</span>
+</span><span id="DataFrame-462"><a href="#DataFrame-462"><span class="linenos">462</span></a><span class="sd"> &quot;&quot;&quot;</span>
+</span><span id="DataFrame-463"><a href="#DataFrame-463"><span class="linenos">463</span></a> <span class="n">table_names</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame-464"><a href="#DataFrame-464"><span class="linenos">464</span></a> <span class="n">table</span><span class="o">.</span><span class="n">alias_or_name</span>
+</span><span id="DataFrame-465"><a href="#DataFrame-465"><span class="linenos">465</span></a> <span class="k">for</span> <span class="n">table</span> <span class="ow">in</span> <span class="n">get_tables_from_expression_with_join</span><span class="p">(</span><span class="n">join_expression</span><span class="p">)</span>
+</span><span id="DataFrame-466"><a href="#DataFrame-466"><span class="linenos">466</span></a> <span class="p">]</span>
+</span><span id="DataFrame-467"><a href="#DataFrame-467"><span class="linenos">467</span></a> <span class="n">potential_ctes</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame-468"><a href="#DataFrame-468"><span class="linenos">468</span></a> <span class="n">cte</span>
+</span><span id="DataFrame-469"><a href="#DataFrame-469"><span class="linenos">469</span></a> <span class="k">for</span> <span class="n">cte</span> <span class="ow">in</span> <span class="n">join_expression</span><span class="o">.</span><span class="n">ctes</span>
+</span><span id="DataFrame-470"><a href="#DataFrame-470"><span class="linenos">470</span></a> <span class="k">if</span> <span class="n">cte</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="ow">in</span> <span class="n">table_names</span>
+</span><span id="DataFrame-471"><a href="#DataFrame-471"><span class="linenos">471</span></a> <span class="ow">and</span> <span class="n">cte</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="o">!=</span> <span class="n">other_df</span><span class="o">.</span><span class="n">latest_cte_name</span>
+</span><span id="DataFrame-472"><a href="#DataFrame-472"><span class="linenos">472</span></a> <span class="p">]</span>
+</span><span id="DataFrame-473"><a href="#DataFrame-473"><span class="linenos">473</span></a> <span class="c1"># Determine the table to reference for the left side of the join by checking each of the left side</span>
+</span><span id="DataFrame-474"><a href="#DataFrame-474"><span class="linenos">474</span></a> <span class="c1"># tables and see if they have the column being referenced.</span>
+</span><span id="DataFrame-475"><a href="#DataFrame-475"><span class="linenos">475</span></a> <span class="n">join_column_pairs</span> <span class="o">=</span> <span class="p">[]</span>
+</span><span id="DataFrame-476"><a href="#DataFrame-476"><span class="linenos">476</span></a> <span class="k">for</span> <span class="n">join_column</span> <span class="ow">in</span> <span class="n">join_columns</span><span class="p">:</span>
+</span><span id="DataFrame-477"><a href="#DataFrame-477"><span class="linenos">477</span></a> <span class="n">num_matching_ctes</span> <span class="o">=</span> <span class="mi">0</span>
+</span><span id="DataFrame-478"><a href="#DataFrame-478"><span class="linenos">478</span></a> <span class="k">for</span> <span class="n">cte</span> <span class="ow">in</span> <span class="n">potential_ctes</span><span class="p">:</span>
+</span><span id="DataFrame-479"><a href="#DataFrame-479"><span class="linenos">479</span></a> <span class="k">if</span> <span class="n">join_column</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="ow">in</span> <span class="n">cte</span><span class="o">.</span><span class="n">this</span><span class="o">.</span><span class="n">named_selects</span><span class="p">:</span>
+</span><span id="DataFrame-480"><a href="#DataFrame-480"><span class="linenos">480</span></a> <span class="n">left_column</span> <span class="o">=</span> <span class="n">join_column</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span><span class="o">.</span><span class="n">set_table_name</span><span class="p">(</span><span class="n">cte</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">)</span>
+</span><span id="DataFrame-481"><a href="#DataFrame-481"><span class="linenos">481</span></a> <span class="n">right_column</span> <span class="o">=</span> <span class="n">join_column</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span><span class="o">.</span><span class="n">set_table_name</span><span class="p">(</span><span class="n">other_df</span><span class="o">.</span><span class="n">latest_cte_name</span><span class="p">)</span>
+</span><span id="DataFrame-482"><a href="#DataFrame-482"><span class="linenos">482</span></a> <span class="n">join_column_pairs</span><span class="o">.</span><span class="n">append</span><span class="p">((</span><span class="n">left_column</span><span class="p">,</span> <span class="n">right_column</span><span class="p">))</span>
+</span><span id="DataFrame-483"><a href="#DataFrame-483"><span class="linenos">483</span></a> <span class="n">num_matching_ctes</span> <span class="o">+=</span> <span class="mi">1</span>
+</span><span id="DataFrame-484"><a href="#DataFrame-484"><span class="linenos">484</span></a> <span class="k">if</span> <span class="n">num_matching_ctes</span> <span class="o">&gt;</span> <span class="mi">1</span><span class="p">:</span>
+</span><span id="DataFrame-485"><a href="#DataFrame-485"><span class="linenos">485</span></a> <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
+</span><span id="DataFrame-486"><a href="#DataFrame-486"><span class="linenos">486</span></a> <span class="sa">f</span><span class="s2">&quot;Column </span><span class="si">{</span><span class="n">join_column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="si">}</span><span class="s2"> is ambiguous. Please specify the table name.&quot;</span>
+</span><span id="DataFrame-487"><a href="#DataFrame-487"><span class="linenos">487</span></a> <span class="p">)</span>
+</span><span id="DataFrame-488"><a href="#DataFrame-488"><span class="linenos">488</span></a> <span class="k">elif</span> <span class="n">num_matching_ctes</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
+</span><span id="DataFrame-489"><a href="#DataFrame-489"><span class="linenos">489</span></a> <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
+</span><span id="DataFrame-490"><a href="#DataFrame-490"><span class="linenos">490</span></a> <span class="sa">f</span><span class="s2">&quot;Column </span><span class="si">{</span><span class="n">join_column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="si">}</span><span class="s2"> does not exist in any of the tables.&quot;</span>
+</span><span id="DataFrame-491"><a href="#DataFrame-491"><span class="linenos">491</span></a> <span class="p">)</span>
+</span><span id="DataFrame-492"><a href="#DataFrame-492"><span class="linenos">492</span></a> <span class="n">join_clause</span> <span class="o">=</span> <span class="n">functools</span><span class="o">.</span><span class="n">reduce</span><span class="p">(</span>
+</span><span id="DataFrame-493"><a href="#DataFrame-493"><span class="linenos">493</span></a> <span class="k">lambda</span> <span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">:</span> <span class="n">x</span> <span class="o">&amp;</span> <span class="n">y</span><span class="p">,</span>
+</span><span id="DataFrame-494"><a href="#DataFrame-494"><span class="linenos">494</span></a> <span class="p">[</span><span class="n">left_column</span> <span class="o">==</span> <span class="n">right_column</span> <span class="k">for</span> <span class="n">left_column</span><span class="p">,</span> <span class="n">right_column</span> <span class="ow">in</span> <span class="n">join_column_pairs</span><span class="p">],</span>
+</span><span id="DataFrame-495"><a href="#DataFrame-495"><span class="linenos">495</span></a> <span class="p">)</span>
+</span><span id="DataFrame-496"><a href="#DataFrame-496"><span class="linenos">496</span></a> <span class="n">join_column_names</span> <span class="o">=</span> <span class="p">[</span><span class="n">left_col</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="k">for</span> <span class="n">left_col</span><span class="p">,</span> <span class="n">_</span> <span class="ow">in</span> <span class="n">join_column_pairs</span><span class="p">]</span>
+</span><span id="DataFrame-497"><a href="#DataFrame-497"><span class="linenos">497</span></a> <span class="c1"># To match spark behavior only the join clause gets deduplicated and it gets put in the front of the column list</span>
+</span><span id="DataFrame-498"><a href="#DataFrame-498"><span class="linenos">498</span></a> <span class="n">select_column_names</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame-499"><a href="#DataFrame-499"><span class="linenos">499</span></a> <span class="p">(</span>
+</span><span id="DataFrame-500"><a href="#DataFrame-500"><span class="linenos">500</span></a> <span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span>
+</span><span id="DataFrame-501"><a href="#DataFrame-501"><span class="linenos">501</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">column</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">this</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Star</span><span class="p">)</span>
+</span><span id="DataFrame-502"><a href="#DataFrame-502"><span class="linenos">502</span></a> <span class="k">else</span> <span class="n">column</span><span class="o">.</span><span class="n">sql</span><span class="p">()</span>
+</span><span id="DataFrame-503"><a href="#DataFrame-503"><span class="linenos">503</span></a> <span class="p">)</span>
+</span><span id="DataFrame-504"><a href="#DataFrame-504"><span class="linenos">504</span></a> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">self_columns</span> <span class="o">+</span> <span class="n">other_columns</span>
+</span><span id="DataFrame-505"><a href="#DataFrame-505"><span class="linenos">505</span></a> <span class="p">]</span>
+</span><span id="DataFrame-506"><a href="#DataFrame-506"><span class="linenos">506</span></a> <span class="n">select_column_names</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame-507"><a href="#DataFrame-507"><span class="linenos">507</span></a> <span class="n">column_name</span>
+</span><span id="DataFrame-508"><a href="#DataFrame-508"><span class="linenos">508</span></a> <span class="k">for</span> <span class="n">column_name</span> <span class="ow">in</span> <span class="n">select_column_names</span>
+</span><span id="DataFrame-509"><a href="#DataFrame-509"><span class="linenos">509</span></a> <span class="k">if</span> <span class="n">column_name</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">join_column_names</span>
+</span><span id="DataFrame-510"><a href="#DataFrame-510"><span class="linenos">510</span></a> <span class="p">]</span>
+</span><span id="DataFrame-511"><a href="#DataFrame-511"><span class="linenos">511</span></a> <span class="n">select_column_names</span> <span class="o">=</span> <span class="n">join_column_names</span> <span class="o">+</span> <span class="n">select_column_names</span>
+</span><span id="DataFrame-512"><a href="#DataFrame-512"><span class="linenos">512</span></a> <span class="k">else</span><span class="p">:</span>
+</span><span id="DataFrame-513"><a href="#DataFrame-513"><span class="linenos">513</span></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
+</span><span id="DataFrame-514"><a href="#DataFrame-514"><span class="linenos">514</span></a><span class="sd"> Unique characteristics of join on expressions:</span>
+</span><span id="DataFrame-515"><a href="#DataFrame-515"><span class="linenos">515</span></a><span class="sd"> * There is no deduplication of the results.</span>
+</span><span id="DataFrame-516"><a href="#DataFrame-516"><span class="linenos">516</span></a><span class="sd"> * The left join dataframe columns go first and right come after. No sort preference is given to join columns</span>
+</span><span id="DataFrame-517"><a href="#DataFrame-517"><span class="linenos">517</span></a><span class="sd"> &quot;&quot;&quot;</span>
+</span><span id="DataFrame-518"><a href="#DataFrame-518"><span class="linenos">518</span></a> <span class="n">join_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">join_columns</span><span class="p">,</span> <span class="n">join_expression</span><span class="p">)</span>
+</span><span id="DataFrame-519"><a href="#DataFrame-519"><span class="linenos">519</span></a> <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">join_columns</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">1</span><span class="p">:</span>
+</span><span id="DataFrame-520"><a href="#DataFrame-520"><span class="linenos">520</span></a> <span class="n">join_columns</span> <span class="o">=</span> <span class="p">[</span><span class="n">functools</span><span class="o">.</span><span class="n">reduce</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">:</span> <span class="n">x</span> <span class="o">&amp;</span> <span class="n">y</span><span class="p">,</span> <span class="n">join_columns</span><span class="p">)]</span>
+</span><span id="DataFrame-521"><a href="#DataFrame-521"><span class="linenos">521</span></a> <span class="n">join_clause</span> <span class="o">=</span> <span class="n">join_columns</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
+</span><span id="DataFrame-522"><a href="#DataFrame-522"><span class="linenos">522</span></a> <span class="n">select_column_names</span> <span class="o">=</span> <span class="p">[</span><span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">self_columns</span> <span class="o">+</span> <span class="n">other_columns</span><span class="p">]</span>
+</span><span id="DataFrame-523"><a href="#DataFrame-523"><span class="linenos">523</span></a>
+</span><span id="DataFrame-524"><a href="#DataFrame-524"><span class="linenos">524</span></a> <span class="c1"># Update the on expression with the actual join clause to replace the dummy one from before</span>
+</span><span id="DataFrame-525"><a href="#DataFrame-525"><span class="linenos">525</span></a> <span class="n">join_expression</span><span class="o">.</span><span class="n">args</span><span class="p">[</span><span class="s2">&quot;joins&quot;</span><span class="p">][</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">&quot;on&quot;</span><span class="p">,</span> <span class="n">join_clause</span><span class="o">.</span><span class="n">expression</span><span class="p">)</span>
+</span><span id="DataFrame-526"><a href="#DataFrame-526"><span class="linenos">526</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="n">join_expression</span><span class="p">)</span>
+</span><span id="DataFrame-527"><a href="#DataFrame-527"><span class="linenos">527</span></a> <span class="n">new_df</span><span class="o">.</span><span class="n">pending_join_hints</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">pending_join_hints</span><span class="p">)</span>
+</span><span id="DataFrame-528"><a href="#DataFrame-528"><span class="linenos">528</span></a> <span class="n">new_df</span><span class="o">.</span><span class="n">pending_hints</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="n">other_df</span><span class="o">.</span><span class="n">pending_hints</span><span class="p">)</span>
+</span><span id="DataFrame-529"><a href="#DataFrame-529"><span class="linenos">529</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="n">new_df</span><span class="o">.</span><span class="n">select</span><span class="o">.</span><span class="n">__wrapped__</span><span class="p">(</span><span class="n">new_df</span><span class="p">,</span> <span class="o">*</span><span class="n">select_column_names</span><span class="p">)</span>
+</span><span id="DataFrame-530"><a href="#DataFrame-530"><span class="linenos">530</span></a> <span class="k">return</span> <span class="n">new_df</span>
+</span><span id="DataFrame-531"><a href="#DataFrame-531"><span class="linenos">531</span></a>
+</span><span id="DataFrame-532"><a href="#DataFrame-532"><span class="linenos">532</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">ORDER_BY</span><span class="p">)</span>
+</span><span id="DataFrame-533"><a href="#DataFrame-533"><span class="linenos">533</span></a> <span class="k">def</span> <span class="nf">orderBy</span><span class="p">(</span>
+</span><span id="DataFrame-534"><a href="#DataFrame-534"><span class="linenos">534</span></a> <span class="bp">self</span><span class="p">,</span>
+</span><span id="DataFrame-535"><a href="#DataFrame-535"><span class="linenos">535</span></a> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Column</span><span class="p">],</span>
+</span><span id="DataFrame-536"><a href="#DataFrame-536"><span class="linenos">536</span></a> <span class="n">ascending</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrame-537"><a href="#DataFrame-537"><span class="linenos">537</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame-538"><a href="#DataFrame-538"><span class="linenos">538</span></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
+</span><span id="DataFrame-539"><a href="#DataFrame-539"><span class="linenos">539</span></a><span class="sd"> This implementation lets any ordered columns take priority over whatever is provided in `ascending`. Spark</span>
+</span><span id="DataFrame-540"><a href="#DataFrame-540"><span class="linenos">540</span></a><span class="sd"> has irregular behavior and can result in runtime errors. Users shouldn&#39;t be mixing the two anyways so this</span>
+</span><span id="DataFrame-541"><a href="#DataFrame-541"><span class="linenos">541</span></a><span class="sd"> is unlikely to come up.</span>
+</span><span id="DataFrame-542"><a href="#DataFrame-542"><span class="linenos">542</span></a><span class="sd"> &quot;&quot;&quot;</span>
+</span><span id="DataFrame-543"><a href="#DataFrame-543"><span class="linenos">543</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">cols</span><span class="p">)</span>
+</span><span id="DataFrame-544"><a href="#DataFrame-544"><span class="linenos">544</span></a> <span class="n">pre_ordered_col_indexes</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame-545"><a href="#DataFrame-545"><span class="linenos">545</span></a> <span class="n">i</span> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">col</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">col</span><span class="o">.</span><span class="n">expression</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Ordered</span><span class="p">)</span>
+</span><span id="DataFrame-546"><a href="#DataFrame-546"><span class="linenos">546</span></a> <span class="p">]</span>
+</span><span id="DataFrame-547"><a href="#DataFrame-547"><span class="linenos">547</span></a> <span class="k">if</span> <span class="n">ascending</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+</span><span id="DataFrame-548"><a href="#DataFrame-548"><span class="linenos">548</span></a> <span class="n">ascending</span> <span class="o">=</span> <span class="p">[</span><span class="kc">True</span><span class="p">]</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span>
+</span><span id="DataFrame-549"><a href="#DataFrame-549"><span class="linenos">549</span></a> <span class="k">elif</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">ascending</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span>
+</span><span id="DataFrame-550"><a href="#DataFrame-550"><span class="linenos">550</span></a> <span class="n">ascending</span> <span class="o">=</span> <span class="p">[</span><span class="n">ascending</span><span class="p">]</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span>
+</span><span id="DataFrame-551"><a href="#DataFrame-551"><span class="linenos">551</span></a> <span class="n">ascending</span> <span class="o">=</span> <span class="p">[</span><span class="nb">bool</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">x</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">ascending</span><span class="p">)]</span>
+</span><span id="DataFrame-552"><a href="#DataFrame-552"><span class="linenos">552</span></a> <span class="k">assert</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span> <span class="o">==</span> <span class="nb">len</span><span class="p">(</span>
+</span><span id="DataFrame-553"><a href="#DataFrame-553"><span class="linenos">553</span></a> <span class="n">ascending</span>
+</span><span id="DataFrame-554"><a href="#DataFrame-554"><span class="linenos">554</span></a> <span class="p">),</span> <span class="s2">&quot;The length of items in ascending must equal the number of columns provided&quot;</span>
+</span><span id="DataFrame-555"><a href="#DataFrame-555"><span class="linenos">555</span></a> <span class="n">col_and_ascending</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="nb">zip</span><span class="p">(</span><span class="n">columns</span><span class="p">,</span> <span class="n">ascending</span><span class="p">))</span>
+</span><span id="DataFrame-556"><a href="#DataFrame-556"><span class="linenos">556</span></a> <span class="n">order_by_columns</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame-557"><a href="#DataFrame-557"><span class="linenos">557</span></a> <span class="p">(</span>
+</span><span id="DataFrame-558"><a href="#DataFrame-558"><span class="linenos">558</span></a> <span class="n">exp</span><span class="o">.</span><span class="n">Ordered</span><span class="p">(</span><span class="n">this</span><span class="o">=</span><span class="n">col</span><span class="o">.</span><span class="n">expression</span><span class="p">,</span> <span class="n">desc</span><span class="o">=</span><span class="ow">not</span> <span class="n">asc</span><span class="p">)</span>
+</span><span id="DataFrame-559"><a href="#DataFrame-559"><span class="linenos">559</span></a> <span class="k">if</span> <span class="n">i</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">pre_ordered_col_indexes</span>
+</span><span id="DataFrame-560"><a href="#DataFrame-560"><span class="linenos">560</span></a> <span class="k">else</span> <span class="n">columns</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">.</span><span class="n">column_expression</span>
+</span><span id="DataFrame-561"><a href="#DataFrame-561"><span class="linenos">561</span></a> <span class="p">)</span>
+</span><span id="DataFrame-562"><a href="#DataFrame-562"><span class="linenos">562</span></a> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="p">(</span><span class="n">col</span><span class="p">,</span> <span class="n">asc</span><span class="p">)</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">col_and_ascending</span><span class="p">)</span>
+</span><span id="DataFrame-563"><a href="#DataFrame-563"><span class="linenos">563</span></a> <span class="p">]</span>
+</span><span id="DataFrame-564"><a href="#DataFrame-564"><span class="linenos">564</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">order_by</span><span class="p">(</span><span class="o">*</span><span class="n">order_by_columns</span><span class="p">))</span>
+</span><span id="DataFrame-565"><a href="#DataFrame-565"><span class="linenos">565</span></a>
+</span><span id="DataFrame-566"><a href="#DataFrame-566"><span class="linenos">566</span></a> <span class="n">sort</span> <span class="o">=</span> <span class="n">orderBy</span>
+</span><span id="DataFrame-567"><a href="#DataFrame-567"><span class="linenos">567</span></a>
+</span><span id="DataFrame-568"><a href="#DataFrame-568"><span class="linenos">568</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
+</span><span id="DataFrame-569"><a href="#DataFrame-569"><span class="linenos">569</span></a> <span class="k">def</span> <span class="nf">union</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame-570"><a href="#DataFrame-570"><span class="linenos">570</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set_operation</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Union</span><span class="p">,</span> <span class="n">other</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span>
+</span><span id="DataFrame-571"><a href="#DataFrame-571"><span class="linenos">571</span></a>
+</span><span id="DataFrame-572"><a href="#DataFrame-572"><span class="linenos">572</span></a> <span class="n">unionAll</span> <span class="o">=</span> <span class="n">union</span>
+</span><span id="DataFrame-573"><a href="#DataFrame-573"><span class="linenos">573</span></a>
+</span><span id="DataFrame-574"><a href="#DataFrame-574"><span class="linenos">574</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
+</span><span id="DataFrame-575"><a href="#DataFrame-575"><span class="linenos">575</span></a> <span class="k">def</span> <span class="nf">unionByName</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">,</span> <span class="n">allowMissingColumns</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">):</span>
+</span><span id="DataFrame-576"><a href="#DataFrame-576"><span class="linenos">576</span></a> <span class="n">l_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">columns</span>
+</span><span id="DataFrame-577"><a href="#DataFrame-577"><span class="linenos">577</span></a> <span class="n">r_columns</span> <span class="o">=</span> <span class="n">other</span><span class="o">.</span><span class="n">columns</span>
+</span><span id="DataFrame-578"><a href="#DataFrame-578"><span class="linenos">578</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">allowMissingColumns</span><span class="p">:</span>
+</span><span id="DataFrame-579"><a href="#DataFrame-579"><span class="linenos">579</span></a> <span class="n">l_expressions</span> <span class="o">=</span> <span class="n">l_columns</span>
+</span><span id="DataFrame-580"><a href="#DataFrame-580"><span class="linenos">580</span></a> <span class="n">r_expressions</span> <span class="o">=</span> <span class="n">l_columns</span>
+</span><span id="DataFrame-581"><a href="#DataFrame-581"><span class="linenos">581</span></a> <span class="k">else</span><span class="p">:</span>
+</span><span id="DataFrame-582"><a href="#DataFrame-582"><span class="linenos">582</span></a> <span class="n">l_expressions</span> <span class="o">=</span> <span class="p">[]</span>
+</span><span id="DataFrame-583"><a href="#DataFrame-583"><span class="linenos">583</span></a> <span class="n">r_expressions</span> <span class="o">=</span> <span class="p">[]</span>
+</span><span id="DataFrame-584"><a href="#DataFrame-584"><span class="linenos">584</span></a> <span class="n">r_columns_unused</span> <span class="o">=</span> <span class="n">copy</span><span class="p">(</span><span class="n">r_columns</span><span class="p">)</span>
+</span><span id="DataFrame-585"><a href="#DataFrame-585"><span class="linenos">585</span></a> <span class="k">for</span> <span class="n">l_column</span> <span class="ow">in</span> <span class="n">l_columns</span><span class="p">:</span>
+</span><span id="DataFrame-586"><a href="#DataFrame-586"><span class="linenos">586</span></a> <span class="n">l_expressions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">l_column</span><span class="p">)</span>
+</span><span id="DataFrame-587"><a href="#DataFrame-587"><span class="linenos">587</span></a> <span class="k">if</span> <span class="n">l_column</span> <span class="ow">in</span> <span class="n">r_columns</span><span class="p">:</span>
+</span><span id="DataFrame-588"><a href="#DataFrame-588"><span class="linenos">588</span></a> <span class="n">r_expressions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">l_column</span><span class="p">)</span>
+</span><span id="DataFrame-589"><a href="#DataFrame-589"><span class="linenos">589</span></a> <span class="n">r_columns_unused</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="n">l_column</span><span class="p">)</span>
+</span><span id="DataFrame-590"><a href="#DataFrame-590"><span class="linenos">590</span></a> <span class="k">else</span><span class="p">:</span>
+</span><span id="DataFrame-591"><a href="#DataFrame-591"><span class="linenos">591</span></a> <span class="n">r_expressions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">alias_</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Null</span><span class="p">(),</span> <span class="n">l_column</span><span class="p">,</span> <span class="n">copy</span><span class="o">=</span><span class="kc">False</span><span class="p">))</span>
+</span><span id="DataFrame-592"><a href="#DataFrame-592"><span class="linenos">592</span></a> <span class="k">for</span> <span class="n">r_column</span> <span class="ow">in</span> <span class="n">r_columns_unused</span><span class="p">:</span>
+</span><span id="DataFrame-593"><a href="#DataFrame-593"><span class="linenos">593</span></a> <span class="n">l_expressions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">alias_</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Null</span><span class="p">(),</span> <span class="n">r_column</span><span class="p">,</span> <span class="n">copy</span><span class="o">=</span><span class="kc">False</span><span class="p">))</span>
+</span><span id="DataFrame-594"><a href="#DataFrame-594"><span class="linenos">594</span></a> <span class="n">r_expressions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">r_column</span><span class="p">)</span>
+</span><span id="DataFrame-595"><a href="#DataFrame-595"><span class="linenos">595</span></a> <span class="n">r_df</span> <span class="o">=</span> <span class="p">(</span>
+</span><span id="DataFrame-596"><a href="#DataFrame-596"><span class="linenos">596</span></a> <span class="n">other</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span><span class="o">.</span><span class="n">_convert_leaf_to_cte</span><span class="p">()</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="bp">self</span><span class="o">.</span><span class="n">_ensure_list_of_columns</span><span class="p">(</span><span class="n">r_expressions</span><span class="p">))</span>
+</span><span id="DataFrame-597"><a href="#DataFrame-597"><span class="linenos">597</span></a> <span class="p">)</span>
+</span><span id="DataFrame-598"><a href="#DataFrame-598"><span class="linenos">598</span></a> <span class="n">l_df</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
+</span><span id="DataFrame-599"><a href="#DataFrame-599"><span class="linenos">599</span></a> <span class="k">if</span> <span class="n">allowMissingColumns</span><span class="p">:</span>
+</span><span id="DataFrame-600"><a href="#DataFrame-600"><span class="linenos">600</span></a> <span class="n">l_df</span> <span class="o">=</span> <span class="n">l_df</span><span class="o">.</span><span class="n">_convert_leaf_to_cte</span><span class="p">()</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="bp">self</span><span class="o">.</span><span class="n">_ensure_list_of_columns</span><span class="p">(</span><span class="n">l_expressions</span><span class="p">))</span>
+</span><span id="DataFrame-601"><a href="#DataFrame-601"><span class="linenos">601</span></a> <span class="k">return</span> <span class="n">l_df</span><span class="o">.</span><span class="n">_set_operation</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Union</span><span class="p">,</span> <span class="n">r_df</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span>
+</span><span id="DataFrame-602"><a href="#DataFrame-602"><span class="linenos">602</span></a>
+</span><span id="DataFrame-603"><a href="#DataFrame-603"><span class="linenos">603</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
+</span><span id="DataFrame-604"><a href="#DataFrame-604"><span class="linenos">604</span></a> <span class="k">def</span> <span class="nf">intersect</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame-605"><a href="#DataFrame-605"><span class="linenos">605</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set_operation</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Intersect</span><span class="p">,</span> <span class="n">other</span><span class="p">,</span> <span class="kc">True</span><span class="p">)</span>
+</span><span id="DataFrame-606"><a href="#DataFrame-606"><span class="linenos">606</span></a>
+</span><span id="DataFrame-607"><a href="#DataFrame-607"><span class="linenos">607</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
+</span><span id="DataFrame-608"><a href="#DataFrame-608"><span class="linenos">608</span></a> <span class="k">def</span> <span class="nf">intersectAll</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame-609"><a href="#DataFrame-609"><span class="linenos">609</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set_operation</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Intersect</span><span class="p">,</span> <span class="n">other</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span>
+</span><span id="DataFrame-610"><a href="#DataFrame-610"><span class="linenos">610</span></a>
+</span><span id="DataFrame-611"><a href="#DataFrame-611"><span class="linenos">611</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
+</span><span id="DataFrame-612"><a href="#DataFrame-612"><span class="linenos">612</span></a> <span class="k">def</span> <span class="nf">exceptAll</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame-613"><a href="#DataFrame-613"><span class="linenos">613</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set_operation</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Except</span><span class="p">,</span> <span class="n">other</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span>
+</span><span id="DataFrame-614"><a href="#DataFrame-614"><span class="linenos">614</span></a>
+</span><span id="DataFrame-615"><a href="#DataFrame-615"><span class="linenos">615</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">SELECT</span><span class="p">)</span>
+</span><span id="DataFrame-616"><a href="#DataFrame-616"><span class="linenos">616</span></a> <span class="k">def</span> <span class="nf">distinct</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame-617"><a href="#DataFrame-617"><span class="linenos">617</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">distinct</span><span class="p">())</span>
+</span><span id="DataFrame-618"><a href="#DataFrame-618"><span class="linenos">618</span></a>
+</span><span id="DataFrame-619"><a href="#DataFrame-619"><span class="linenos">619</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">SELECT</span><span class="p">)</span>
+</span><span id="DataFrame-620"><a href="#DataFrame-620"><span class="linenos">620</span></a> <span class="k">def</span> <span class="nf">dropDuplicates</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">):</span>
+</span><span id="DataFrame-621"><a href="#DataFrame-621"><span class="linenos">621</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">subset</span><span class="p">:</span>
+</span><span id="DataFrame-622"><a href="#DataFrame-622"><span class="linenos">622</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">distinct</span><span class="p">()</span>
+</span><span id="DataFrame-623"><a href="#DataFrame-623"><span class="linenos">623</span></a> <span class="n">column_names</span> <span class="o">=</span> <span class="n">ensure_list</span><span class="p">(</span><span class="n">subset</span><span class="p">)</span>
+</span><span id="DataFrame-624"><a href="#DataFrame-624"><span class="linenos">624</span></a> <span class="n">window</span> <span class="o">=</span> <span class="n">Window</span><span class="o">.</span><span class="n">partitionBy</span><span class="p">(</span><span class="o">*</span><span class="n">column_names</span><span class="p">)</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="o">*</span><span class="n">column_names</span><span class="p">)</span>
+</span><span id="DataFrame-625"><a href="#DataFrame-625"><span class="linenos">625</span></a> <span class="k">return</span> <span class="p">(</span>
+</span><span id="DataFrame-626"><a href="#DataFrame-626"><span class="linenos">626</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
+</span><span id="DataFrame-627"><a href="#DataFrame-627"><span class="linenos">627</span></a> <span class="o">.</span><span class="n">withColumn</span><span class="p">(</span><span class="s2">&quot;row_num&quot;</span><span class="p">,</span> <span class="n">F</span><span class="o">.</span><span class="n">row_number</span><span class="p">()</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="n">window</span><span class="p">))</span>
+</span><span id="DataFrame-628"><a href="#DataFrame-628"><span class="linenos">628</span></a> <span class="o">.</span><span class="n">where</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="s2">&quot;row_num&quot;</span><span class="p">)</span> <span class="o">==</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="mi">1</span><span class="p">))</span>
+</span><span id="DataFrame-629"><a href="#DataFrame-629"><span class="linenos">629</span></a> <span class="o">.</span><span class="n">drop</span><span class="p">(</span><span class="s2">&quot;row_num&quot;</span><span class="p">)</span>
+</span><span id="DataFrame-630"><a href="#DataFrame-630"><span class="linenos">630</span></a> <span class="p">)</span>
+</span><span id="DataFrame-631"><a href="#DataFrame-631"><span class="linenos">631</span></a>
+</span><span id="DataFrame-632"><a href="#DataFrame-632"><span class="linenos">632</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
+</span><span id="DataFrame-633"><a href="#DataFrame-633"><span class="linenos">633</span></a> <span class="k">def</span> <span class="nf">dropna</span><span class="p">(</span>
+</span><span id="DataFrame-634"><a href="#DataFrame-634"><span class="linenos">634</span></a> <span class="bp">self</span><span class="p">,</span>
+</span><span id="DataFrame-635"><a href="#DataFrame-635"><span class="linenos">635</span></a> <span class="n">how</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;any&quot;</span><span class="p">,</span>
+</span><span id="DataFrame-636"><a href="#DataFrame-636"><span class="linenos">636</span></a> <span class="n">thresh</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrame-637"><a href="#DataFrame-637"><span class="linenos">637</span></a> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="o">...</span><span class="p">],</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrame-638"><a href="#DataFrame-638"><span class="linenos">638</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame-639"><a href="#DataFrame-639"><span class="linenos">639</span></a> <span class="n">minimum_non_null</span> <span class="o">=</span> <span class="n">thresh</span> <span class="ow">or</span> <span class="mi">0</span> <span class="c1"># will be determined later if thresh is null</span>
+</span><span id="DataFrame-640"><a href="#DataFrame-640"><span class="linenos">640</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
+</span><span id="DataFrame-641"><a href="#DataFrame-641"><span class="linenos">641</span></a> <span class="n">all_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_outer_select_columns</span><span class="p">(</span><span class="n">new_df</span><span class="o">.</span><span class="n">expression</span><span class="p">)</span>
+</span><span id="DataFrame-642"><a href="#DataFrame-642"><span class="linenos">642</span></a> <span class="k">if</span> <span class="n">subset</span><span class="p">:</span>
+</span><span id="DataFrame-643"><a href="#DataFrame-643"><span class="linenos">643</span></a> <span class="n">null_check_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">subset</span><span class="p">)</span>
+</span><span id="DataFrame-644"><a href="#DataFrame-644"><span class="linenos">644</span></a> <span class="k">else</span><span class="p">:</span>
+</span><span id="DataFrame-645"><a href="#DataFrame-645"><span class="linenos">645</span></a> <span class="n">null_check_columns</span> <span class="o">=</span> <span class="n">all_columns</span>
+</span><span id="DataFrame-646"><a href="#DataFrame-646"><span class="linenos">646</span></a> <span class="k">if</span> <span class="n">thresh</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+</span><span id="DataFrame-647"><a href="#DataFrame-647"><span class="linenos">647</span></a> <span class="n">minimum_num_nulls</span> <span class="o">=</span> <span class="mi">1</span> <span class="k">if</span> <span class="n">how</span> <span class="o">==</span> <span class="s2">&quot;any&quot;</span> <span class="k">else</span> <span class="nb">len</span><span class="p">(</span><span class="n">null_check_columns</span><span class="p">)</span>
+</span><span id="DataFrame-648"><a href="#DataFrame-648"><span class="linenos">648</span></a> <span class="k">else</span><span class="p">:</span>
+</span><span id="DataFrame-649"><a href="#DataFrame-649"><span class="linenos">649</span></a> <span class="n">minimum_num_nulls</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">null_check_columns</span><span class="p">)</span> <span class="o">-</span> <span class="n">minimum_non_null</span> <span class="o">+</span> <span class="mi">1</span>
+</span><span id="DataFrame-650"><a href="#DataFrame-650"><span class="linenos">650</span></a> <span class="k">if</span> <span class="n">minimum_num_nulls</span> <span class="o">&gt;</span> <span class="nb">len</span><span class="p">(</span><span class="n">null_check_columns</span><span class="p">):</span>
+</span><span id="DataFrame-651"><a href="#DataFrame-651"><span class="linenos">651</span></a> <span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span>
+</span><span id="DataFrame-652"><a href="#DataFrame-652"><span class="linenos">652</span></a> <span class="sa">f</span><span class="s2">&quot;The minimum num nulls for dropna must be less than or equal to the number of columns. &quot;</span>
+</span><span id="DataFrame-653"><a href="#DataFrame-653"><span class="linenos">653</span></a> <span class="sa">f</span><span class="s2">&quot;Minimum num nulls: </span><span class="si">{</span><span class="n">minimum_num_nulls</span><span class="si">}</span><span class="s2">, Num Columns: </span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="n">null_check_columns</span><span class="p">)</span><span class="si">}</span><span class="s2">&quot;</span>
+</span><span id="DataFrame-654"><a href="#DataFrame-654"><span class="linenos">654</span></a> <span class="p">)</span>
+</span><span id="DataFrame-655"><a href="#DataFrame-655"><span class="linenos">655</span></a> <span class="n">if_null_checks</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame-656"><a href="#DataFrame-656"><span class="linenos">656</span></a> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">column</span><span class="o">.</span><span class="n">isNull</span><span class="p">(),</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="mi">1</span><span class="p">))</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="mi">0</span><span class="p">))</span> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">null_check_columns</span>
+</span><span id="DataFrame-657"><a href="#DataFrame-657"><span class="linenos">657</span></a> <span class="p">]</span>
+</span><span id="DataFrame-658"><a href="#DataFrame-658"><span class="linenos">658</span></a> <span class="n">nulls_added_together</span> <span class="o">=</span> <span class="n">functools</span><span class="o">.</span><span class="n">reduce</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">:</span> <span class="n">x</span> <span class="o">+</span> <span class="n">y</span><span class="p">,</span> <span class="n">if_null_checks</span><span class="p">)</span>
+</span><span id="DataFrame-659"><a href="#DataFrame-659"><span class="linenos">659</span></a> <span class="n">num_nulls</span> <span class="o">=</span> <span class="n">nulls_added_together</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">&quot;num_nulls&quot;</span><span class="p">)</span>
+</span><span id="DataFrame-660"><a href="#DataFrame-660"><span class="linenos">660</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="n">new_df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">num_nulls</span><span class="p">,</span> <span class="n">append</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
+</span><span id="DataFrame-661"><a href="#DataFrame-661"><span class="linenos">661</span></a> <span class="n">filtered_df</span> <span class="o">=</span> <span class="n">new_df</span><span class="o">.</span><span class="n">where</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="s2">&quot;num_nulls&quot;</span><span class="p">)</span> <span class="o">&lt;</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="n">minimum_num_nulls</span><span class="p">))</span>
+</span><span id="DataFrame-662"><a href="#DataFrame-662"><span class="linenos">662</span></a> <span class="n">final_df</span> <span class="o">=</span> <span class="n">filtered_df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="n">all_columns</span><span class="p">)</span>
+</span><span id="DataFrame-663"><a href="#DataFrame-663"><span class="linenos">663</span></a> <span class="k">return</span> <span class="n">final_df</span>
+</span><span id="DataFrame-664"><a href="#DataFrame-664"><span class="linenos">664</span></a>
+</span><span id="DataFrame-665"><a href="#DataFrame-665"><span class="linenos">665</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
+</span><span id="DataFrame-666"><a href="#DataFrame-666"><span class="linenos">666</span></a> <span class="k">def</span> <span class="nf">fillna</span><span class="p">(</span>
+</span><span id="DataFrame-667"><a href="#DataFrame-667"><span class="linenos">667</span></a> <span class="bp">self</span><span class="p">,</span>
+</span><span id="DataFrame-668"><a href="#DataFrame-668"><span class="linenos">668</span></a> <span class="n">value</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="n">ColumnLiterals</span><span class="p">],</span>
+</span><span id="DataFrame-669"><a href="#DataFrame-669"><span class="linenos">669</span></a> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="o">...</span><span class="p">],</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrame-670"><a href="#DataFrame-670"><span class="linenos">670</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame-671"><a href="#DataFrame-671"><span class="linenos">671</span></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
+</span><span id="DataFrame-672"><a href="#DataFrame-672"><span class="linenos">672</span></a><span class="sd"> Functionality Difference: If you provide a value to replace a null and that type conflicts</span>
+</span><span id="DataFrame-673"><a href="#DataFrame-673"><span class="linenos">673</span></a><span class="sd"> with the type of the column then PySpark will just ignore your replacement.</span>
+</span><span id="DataFrame-674"><a href="#DataFrame-674"><span class="linenos">674</span></a><span class="sd"> This will try to cast them to be the same in some cases. So they won&#39;t always match.</span>
+</span><span id="DataFrame-675"><a href="#DataFrame-675"><span class="linenos">675</span></a><span class="sd"> Best to not mix types so make sure replacement is the same type as the column</span>
+</span><span id="DataFrame-676"><a href="#DataFrame-676"><span class="linenos">676</span></a>
+</span><span id="DataFrame-677"><a href="#DataFrame-677"><span class="linenos">677</span></a><span class="sd"> Possibility for improvement: Use `typeof` function to get the type of the column</span>
+</span><span id="DataFrame-678"><a href="#DataFrame-678"><span class="linenos">678</span></a><span class="sd"> and check if it matches the type of the value provided. If not then make it null.</span>
+</span><span id="DataFrame-679"><a href="#DataFrame-679"><span class="linenos">679</span></a><span class="sd"> &quot;&quot;&quot;</span>
+</span><span id="DataFrame-680"><a href="#DataFrame-680"><span class="linenos">680</span></a> <span class="kn">from</span> <span class="nn">sqlglot.dataframe.sql.functions</span> <span class="kn">import</span> <span class="n">lit</span>
+</span><span id="DataFrame-681"><a href="#DataFrame-681"><span class="linenos">681</span></a>
+</span><span id="DataFrame-682"><a href="#DataFrame-682"><span class="linenos">682</span></a> <span class="n">values</span> <span class="o">=</span> <span class="kc">None</span>
+</span><span id="DataFrame-683"><a href="#DataFrame-683"><span class="linenos">683</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="kc">None</span>
+</span><span id="DataFrame-684"><a href="#DataFrame-684"><span class="linenos">684</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
+</span><span id="DataFrame-685"><a href="#DataFrame-685"><span class="linenos">685</span></a> <span class="n">all_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_outer_select_columns</span><span class="p">(</span><span class="n">new_df</span><span class="o">.</span><span class="n">expression</span><span class="p">)</span>
+</span><span id="DataFrame-686"><a href="#DataFrame-686"><span class="linenos">686</span></a> <span class="n">all_column_mapping</span> <span class="o">=</span> <span class="p">{</span><span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">:</span> <span class="n">column</span> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">all_columns</span><span class="p">}</span>
+</span><span id="DataFrame-687"><a href="#DataFrame-687"><span class="linenos">687</span></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span>
+</span><span id="DataFrame-688"><a href="#DataFrame-688"><span class="linenos">688</span></a> <span class="n">values</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">value</span><span class="o">.</span><span class="n">values</span><span class="p">())</span>
+</span><span id="DataFrame-689"><a href="#DataFrame-689"><span class="linenos">689</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="nb">list</span><span class="p">(</span><span class="n">value</span><span class="p">))</span>
+</span><span id="DataFrame-690"><a href="#DataFrame-690"><span class="linenos">690</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">columns</span><span class="p">:</span>
+</span><span id="DataFrame-691"><a href="#DataFrame-691"><span class="linenos">691</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">subset</span><span class="p">)</span> <span class="k">if</span> <span class="n">subset</span> <span class="k">else</span> <span class="n">all_columns</span>
+</span><span id="DataFrame-692"><a href="#DataFrame-692"><span class="linenos">692</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">values</span><span class="p">:</span>
+</span><span id="DataFrame-693"><a href="#DataFrame-693"><span class="linenos">693</span></a> <span class="n">values</span> <span class="o">=</span> <span class="p">[</span><span class="n">value</span><span class="p">]</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span>
+</span><span id="DataFrame-694"><a href="#DataFrame-694"><span class="linenos">694</span></a> <span class="n">value_columns</span> <span class="o">=</span> <span class="p">[</span><span class="n">lit</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> <span class="k">for</span> <span class="n">value</span> <span class="ow">in</span> <span class="n">values</span><span class="p">]</span>
+</span><span id="DataFrame-695"><a href="#DataFrame-695"><span class="linenos">695</span></a>
+</span><span id="DataFrame-696"><a href="#DataFrame-696"><span class="linenos">696</span></a> <span class="n">null_replacement_mapping</span> <span class="o">=</span> <span class="p">{</span>
+</span><span id="DataFrame-697"><a href="#DataFrame-697"><span class="linenos">697</span></a> <span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">:</span> <span class="p">(</span>
+</span><span id="DataFrame-698"><a href="#DataFrame-698"><span class="linenos">698</span></a> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">column</span><span class="o">.</span><span class="n">isNull</span><span class="p">(),</span> <span class="n">value</span><span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">column</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">)</span>
+</span><span id="DataFrame-699"><a href="#DataFrame-699"><span class="linenos">699</span></a> <span class="p">)</span>
+</span><span id="DataFrame-700"><a href="#DataFrame-700"><span class="linenos">700</span></a> <span class="k">for</span> <span class="n">column</span><span class="p">,</span> <span class="n">value</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">columns</span><span class="p">,</span> <span class="n">value_columns</span><span class="p">)</span>
+</span><span id="DataFrame-701"><a href="#DataFrame-701"><span class="linenos">701</span></a> <span class="p">}</span>
+</span><span id="DataFrame-702"><a href="#DataFrame-702"><span class="linenos">702</span></a> <span class="n">null_replacement_mapping</span> <span class="o">=</span> <span class="p">{</span><span class="o">**</span><span class="n">all_column_mapping</span><span class="p">,</span> <span class="o">**</span><span class="n">null_replacement_mapping</span><span class="p">}</span>
+</span><span id="DataFrame-703"><a href="#DataFrame-703"><span class="linenos">703</span></a> <span class="n">null_replacement_columns</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame-704"><a href="#DataFrame-704"><span class="linenos">704</span></a> <span class="n">null_replacement_mapping</span><span class="p">[</span><span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">]</span> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">all_columns</span>
+</span><span id="DataFrame-705"><a href="#DataFrame-705"><span class="linenos">705</span></a> <span class="p">]</span>
+</span><span id="DataFrame-706"><a href="#DataFrame-706"><span class="linenos">706</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="n">new_df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="n">null_replacement_columns</span><span class="p">)</span>
+</span><span id="DataFrame-707"><a href="#DataFrame-707"><span class="linenos">707</span></a> <span class="k">return</span> <span class="n">new_df</span>
+</span><span id="DataFrame-708"><a href="#DataFrame-708"><span class="linenos">708</span></a>
+</span><span id="DataFrame-709"><a href="#DataFrame-709"><span class="linenos">709</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
+</span><span id="DataFrame-710"><a href="#DataFrame-710"><span class="linenos">710</span></a> <span class="k">def</span> <span class="nf">replace</span><span class="p">(</span>
+</span><span id="DataFrame-711"><a href="#DataFrame-711"><span class="linenos">711</span></a> <span class="bp">self</span><span class="p">,</span>
+</span><span id="DataFrame-712"><a href="#DataFrame-712"><span class="linenos">712</span></a> <span class="n">to_replace</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Dict</span><span class="p">],</span>
+</span><span id="DataFrame-713"><a href="#DataFrame-713"><span class="linenos">713</span></a> <span class="n">value</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrame-714"><a href="#DataFrame-714"><span class="linenos">714</span></a> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Collection</span><span class="p">[</span><span class="n">ColumnOrName</span><span class="p">]</span> <span class="o">|</span> <span class="n">ColumnOrName</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrame-715"><a href="#DataFrame-715"><span class="linenos">715</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame-716"><a href="#DataFrame-716"><span class="linenos">716</span></a> <span class="kn">from</span> <span class="nn">sqlglot.dataframe.sql.functions</span> <span class="kn">import</span> <span class="n">lit</span>
+</span><span id="DataFrame-717"><a href="#DataFrame-717"><span class="linenos">717</span></a>
+</span><span id="DataFrame-718"><a href="#DataFrame-718"><span class="linenos">718</span></a> <span class="n">old_values</span> <span class="o">=</span> <span class="kc">None</span>
+</span><span id="DataFrame-719"><a href="#DataFrame-719"><span class="linenos">719</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
+</span><span id="DataFrame-720"><a href="#DataFrame-720"><span class="linenos">720</span></a> <span class="n">all_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_outer_select_columns</span><span class="p">(</span><span class="n">new_df</span><span class="o">.</span><span class="n">expression</span><span class="p">)</span>
+</span><span id="DataFrame-721"><a href="#DataFrame-721"><span class="linenos">721</span></a> <span class="n">all_column_mapping</span> <span class="o">=</span> <span class="p">{</span><span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">:</span> <span class="n">column</span> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">all_columns</span><span class="p">}</span>
+</span><span id="DataFrame-722"><a href="#DataFrame-722"><span class="linenos">722</span></a>
+</span><span id="DataFrame-723"><a href="#DataFrame-723"><span class="linenos">723</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">subset</span><span class="p">)</span> <span class="k">if</span> <span class="n">subset</span> <span class="k">else</span> <span class="n">all_columns</span>
+</span><span id="DataFrame-724"><a href="#DataFrame-724"><span class="linenos">724</span></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">to_replace</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span>
+</span><span id="DataFrame-725"><a href="#DataFrame-725"><span class="linenos">725</span></a> <span class="n">old_values</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">to_replace</span><span class="p">)</span>
+</span><span id="DataFrame-726"><a href="#DataFrame-726"><span class="linenos">726</span></a> <span class="n">new_values</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">to_replace</span><span class="o">.</span><span class="n">values</span><span class="p">())</span>
+</span><span id="DataFrame-727"><a href="#DataFrame-727"><span class="linenos">727</span></a> <span class="k">elif</span> <span class="ow">not</span> <span class="n">old_values</span> <span class="ow">and</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">to_replace</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span>
+</span><span id="DataFrame-728"><a href="#DataFrame-728"><span class="linenos">728</span></a> <span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="nb">list</span><span class="p">),</span> <span class="s2">&quot;value must be a list since the replacements are a list&quot;</span>
+</span><span id="DataFrame-729"><a href="#DataFrame-729"><span class="linenos">729</span></a> <span class="k">assert</span> <span class="nb">len</span><span class="p">(</span><span class="n">to_replace</span><span class="p">)</span> <span class="o">==</span> <span class="nb">len</span><span class="p">(</span>
+</span><span id="DataFrame-730"><a href="#DataFrame-730"><span class="linenos">730</span></a> <span class="n">value</span>
+</span><span id="DataFrame-731"><a href="#DataFrame-731"><span class="linenos">731</span></a> <span class="p">),</span> <span class="s2">&quot;the replacements and values must be the same length&quot;</span>
+</span><span id="DataFrame-732"><a href="#DataFrame-732"><span class="linenos">732</span></a> <span class="n">old_values</span> <span class="o">=</span> <span class="n">to_replace</span>
+</span><span id="DataFrame-733"><a href="#DataFrame-733"><span class="linenos">733</span></a> <span class="n">new_values</span> <span class="o">=</span> <span class="n">value</span>
+</span><span id="DataFrame-734"><a href="#DataFrame-734"><span class="linenos">734</span></a> <span class="k">else</span><span class="p">:</span>
+</span><span id="DataFrame-735"><a href="#DataFrame-735"><span class="linenos">735</span></a> <span class="n">old_values</span> <span class="o">=</span> <span class="p">[</span><span class="n">to_replace</span><span class="p">]</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span>
+</span><span id="DataFrame-736"><a href="#DataFrame-736"><span class="linenos">736</span></a> <span class="n">new_values</span> <span class="o">=</span> <span class="p">[</span><span class="n">value</span><span class="p">]</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span>
+</span><span id="DataFrame-737"><a href="#DataFrame-737"><span class="linenos">737</span></a> <span class="n">old_values</span> <span class="o">=</span> <span class="p">[</span><span class="n">lit</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> <span class="k">for</span> <span class="n">value</span> <span class="ow">in</span> <span class="n">old_values</span><span class="p">]</span>
+</span><span id="DataFrame-738"><a href="#DataFrame-738"><span class="linenos">738</span></a> <span class="n">new_values</span> <span class="o">=</span> <span class="p">[</span><span class="n">lit</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> <span class="k">for</span> <span class="n">value</span> <span class="ow">in</span> <span class="n">new_values</span><span class="p">]</span>
+</span><span id="DataFrame-739"><a href="#DataFrame-739"><span class="linenos">739</span></a>
+</span><span id="DataFrame-740"><a href="#DataFrame-740"><span class="linenos">740</span></a> <span class="n">replacement_mapping</span> <span class="o">=</span> <span class="p">{}</span>
+</span><span id="DataFrame-741"><a href="#DataFrame-741"><span class="linenos">741</span></a> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">columns</span><span class="p">:</span>
+</span><span id="DataFrame-742"><a href="#DataFrame-742"><span class="linenos">742</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="kc">None</span><span class="p">)</span>
+</span><span id="DataFrame-743"><a href="#DataFrame-743"><span class="linenos">743</span></a> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="p">(</span><span class="n">old_value</span><span class="p">,</span> <span class="n">new_value</span><span class="p">)</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="nb">zip</span><span class="p">(</span><span class="n">old_values</span><span class="p">,</span> <span class="n">new_values</span><span class="p">)):</span>
+</span><span id="DataFrame-744"><a href="#DataFrame-744"><span class="linenos">744</span></a> <span class="k">if</span> <span class="n">i</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
+</span><span id="DataFrame-745"><a href="#DataFrame-745"><span class="linenos">745</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">column</span> <span class="o">==</span> <span class="n">old_value</span><span class="p">,</span> <span class="n">new_value</span><span class="p">)</span>
+</span><span id="DataFrame-746"><a href="#DataFrame-746"><span class="linenos">746</span></a> <span class="k">else</span><span class="p">:</span>
+</span><span id="DataFrame-747"><a href="#DataFrame-747"><span class="linenos">747</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="n">expression</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">column</span> <span class="o">==</span> <span class="n">old_value</span><span class="p">,</span> <span class="n">new_value</span><span class="p">)</span> <span class="c1"># type: ignore</span>
+</span><span id="DataFrame-748"><a href="#DataFrame-748"><span class="linenos">748</span></a> <span class="n">replacement_mapping</span><span class="p">[</span><span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">]</span> <span class="o">=</span> <span class="n">expression</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">column</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span>
+</span><span id="DataFrame-749"><a href="#DataFrame-749"><span class="linenos">749</span></a> <span class="n">column</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">alias_or_name</span>
+</span><span id="DataFrame-750"><a href="#DataFrame-750"><span class="linenos">750</span></a> <span class="p">)</span>
+</span><span id="DataFrame-751"><a href="#DataFrame-751"><span class="linenos">751</span></a>
+</span><span id="DataFrame-752"><a href="#DataFrame-752"><span class="linenos">752</span></a> <span class="n">replacement_mapping</span> <span class="o">=</span> <span class="p">{</span><span class="o">**</span><span class="n">all_column_mapping</span><span class="p">,</span> <span class="o">**</span><span class="n">replacement_mapping</span><span class="p">}</span>
+</span><span id="DataFrame-753"><a href="#DataFrame-753"><span class="linenos">753</span></a> <span class="n">replacement_columns</span> <span class="o">=</span> <span class="p">[</span><span class="n">replacement_mapping</span><span class="p">[</span><span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">]</span> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">all_columns</span><span class="p">]</span>
+</span><span id="DataFrame-754"><a href="#DataFrame-754"><span class="linenos">754</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="n">new_df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="n">replacement_columns</span><span class="p">)</span>
+</span><span id="DataFrame-755"><a href="#DataFrame-755"><span class="linenos">755</span></a> <span class="k">return</span> <span class="n">new_df</span>
+</span><span id="DataFrame-756"><a href="#DataFrame-756"><span class="linenos">756</span></a>
+</span><span id="DataFrame-757"><a href="#DataFrame-757"><span class="linenos">757</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">SELECT</span><span class="p">)</span>
+</span><span id="DataFrame-758"><a href="#DataFrame-758"><span class="linenos">758</span></a> <span class="k">def</span> <span class="nf">withColumn</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">colName</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">col</span><span class="p">:</span> <span class="n">Column</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame-759"><a href="#DataFrame-759"><span class="linenos">759</span></a> <span class="n">col</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_col</span><span class="p">(</span><span class="n">col</span><span class="p">)</span>
+</span><span id="DataFrame-760"><a href="#DataFrame-760"><span class="linenos">760</span></a> <span class="n">existing_col_names</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">named_selects</span>
+</span><span id="DataFrame-761"><a href="#DataFrame-761"><span class="linenos">761</span></a> <span class="n">existing_col_index</span> <span class="o">=</span> <span class="p">(</span>
+</span><span id="DataFrame-762"><a href="#DataFrame-762"><span class="linenos">762</span></a> <span class="n">existing_col_names</span><span class="o">.</span><span class="n">index</span><span class="p">(</span><span class="n">colName</span><span class="p">)</span> <span class="k">if</span> <span class="n">colName</span> <span class="ow">in</span> <span class="n">existing_col_names</span> <span class="k">else</span> <span class="kc">None</span>
+</span><span id="DataFrame-763"><a href="#DataFrame-763"><span class="linenos">763</span></a> <span class="p">)</span>
+</span><span id="DataFrame-764"><a href="#DataFrame-764"><span class="linenos">764</span></a> <span class="k">if</span> <span class="n">existing_col_index</span><span class="p">:</span>
+</span><span id="DataFrame-765"><a href="#DataFrame-765"><span class="linenos">765</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
+</span><span id="DataFrame-766"><a href="#DataFrame-766"><span class="linenos">766</span></a> <span class="n">expression</span><span class="o">.</span><span class="n">expressions</span><span class="p">[</span><span class="n">existing_col_index</span><span class="p">]</span> <span class="o">=</span> <span class="n">col</span><span class="o">.</span><span class="n">expression</span>
+</span><span id="DataFrame-767"><a href="#DataFrame-767"><span class="linenos">767</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="n">expression</span><span class="p">)</span>
+</span><span id="DataFrame-768"><a href="#DataFrame-768"><span class="linenos">768</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">col</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">colName</span><span class="p">),</span> <span class="n">append</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
+</span><span id="DataFrame-769"><a href="#DataFrame-769"><span class="linenos">769</span></a>
+</span><span id="DataFrame-770"><a href="#DataFrame-770"><span class="linenos">770</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">SELECT</span><span class="p">)</span>
+</span><span id="DataFrame-771"><a href="#DataFrame-771"><span class="linenos">771</span></a> <span class="k">def</span> <span class="nf">withColumnRenamed</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">existing</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">new</span><span class="p">:</span> <span class="nb">str</span><span class="p">):</span>
+</span><span id="DataFrame-772"><a href="#DataFrame-772"><span class="linenos">772</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
+</span><span id="DataFrame-773"><a href="#DataFrame-773"><span class="linenos">773</span></a> <span class="n">existing_columns</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame-774"><a href="#DataFrame-774"><span class="linenos">774</span></a> <span class="n">expression</span>
+</span><span id="DataFrame-775"><a href="#DataFrame-775"><span class="linenos">775</span></a> <span class="k">for</span> <span class="n">expression</span> <span class="ow">in</span> <span class="n">expression</span><span class="o">.</span><span class="n">expressions</span>
+</span><span id="DataFrame-776"><a href="#DataFrame-776"><span class="linenos">776</span></a> <span class="k">if</span> <span class="n">expression</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="o">==</span> <span class="n">existing</span>
+</span><span id="DataFrame-777"><a href="#DataFrame-777"><span class="linenos">777</span></a> <span class="p">]</span>
+</span><span id="DataFrame-778"><a href="#DataFrame-778"><span class="linenos">778</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">existing_columns</span><span class="p">:</span>
+</span><span id="DataFrame-779"><a href="#DataFrame-779"><span class="linenos">779</span></a> <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;Tried to rename a column that doesn&#39;t exist&quot;</span><span class="p">)</span>
+</span><span id="DataFrame-780"><a href="#DataFrame-780"><span class="linenos">780</span></a> <span class="k">for</span> <span class="n">existing_column</span> <span class="ow">in</span> <span class="n">existing_columns</span><span class="p">:</span>
+</span><span id="DataFrame-781"><a href="#DataFrame-781"><span class="linenos">781</span></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">existing_column</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Column</span><span class="p">):</span>
+</span><span id="DataFrame-782"><a href="#DataFrame-782"><span class="linenos">782</span></a> <span class="n">existing_column</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">alias_</span><span class="p">(</span><span class="n">existing_column</span><span class="p">,</span> <span class="n">new</span><span class="p">))</span>
+</span><span id="DataFrame-783"><a href="#DataFrame-783"><span class="linenos">783</span></a> <span class="k">else</span><span class="p">:</span>
+</span><span id="DataFrame-784"><a href="#DataFrame-784"><span class="linenos">784</span></a> <span class="n">existing_column</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">&quot;alias&quot;</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">to_identifier</span><span class="p">(</span><span class="n">new</span><span class="p">))</span>
+</span><span id="DataFrame-785"><a href="#DataFrame-785"><span class="linenos">785</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="n">expression</span><span class="p">)</span>
+</span><span id="DataFrame-786"><a href="#DataFrame-786"><span class="linenos">786</span></a>
+</span><span id="DataFrame-787"><a href="#DataFrame-787"><span class="linenos">787</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">SELECT</span><span class="p">)</span>
+</span><span id="DataFrame-788"><a href="#DataFrame-788"><span class="linenos">788</span></a> <span class="k">def</span> <span class="nf">drop</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Column</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame-789"><a href="#DataFrame-789"><span class="linenos">789</span></a> <span class="n">all_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_outer_select_columns</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="p">)</span>
+</span><span id="DataFrame-790"><a href="#DataFrame-790"><span class="linenos">790</span></a> <span class="n">drop_cols</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">cols</span><span class="p">)</span>
+</span><span id="DataFrame-791"><a href="#DataFrame-791"><span class="linenos">791</span></a> <span class="n">new_columns</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame-792"><a href="#DataFrame-792"><span class="linenos">792</span></a> <span class="n">col</span>
+</span><span id="DataFrame-793"><a href="#DataFrame-793"><span class="linenos">793</span></a> <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="n">all_columns</span>
+</span><span id="DataFrame-794"><a href="#DataFrame-794"><span class="linenos">794</span></a> <span class="k">if</span> <span class="n">col</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="ow">not</span> <span class="ow">in</span> <span class="p">[</span><span class="n">drop_column</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="k">for</span> <span class="n">drop_column</span> <span class="ow">in</span> <span class="n">drop_cols</span><span class="p">]</span>
+</span><span id="DataFrame-795"><a href="#DataFrame-795"><span class="linenos">795</span></a> <span class="p">]</span>
+</span><span id="DataFrame-796"><a href="#DataFrame-796"><span class="linenos">796</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="n">new_columns</span><span class="p">,</span> <span class="n">append</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
+</span><span id="DataFrame-797"><a href="#DataFrame-797"><span class="linenos">797</span></a>
+</span><span id="DataFrame-798"><a href="#DataFrame-798"><span class="linenos">798</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">LIMIT</span><span class="p">)</span>
+</span><span id="DataFrame-799"><a href="#DataFrame-799"><span class="linenos">799</span></a> <span class="k">def</span> <span class="nf">limit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">num</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame-800"><a href="#DataFrame-800"><span class="linenos">800</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">limit</span><span class="p">(</span><span class="n">num</span><span class="p">))</span>
+</span><span id="DataFrame-801"><a href="#DataFrame-801"><span class="linenos">801</span></a>
+</span><span id="DataFrame-802"><a href="#DataFrame-802"><span class="linenos">802</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">NO_OP</span><span class="p">)</span>
+</span><span id="DataFrame-803"><a href="#DataFrame-803"><span class="linenos">803</span></a> <span class="k">def</span> <span class="nf">hint</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="o">*</span><span class="n">parameters</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">int</span><span class="p">]])</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame-804"><a href="#DataFrame-804"><span class="linenos">804</span></a> <span class="n">parameter_list</span> <span class="o">=</span> <span class="n">ensure_list</span><span class="p">(</span><span class="n">parameters</span><span class="p">)</span>
+</span><span id="DataFrame-805"><a href="#DataFrame-805"><span class="linenos">805</span></a> <span class="n">parameter_columns</span> <span class="o">=</span> <span class="p">(</span>
+</span><span id="DataFrame-806"><a href="#DataFrame-806"><span class="linenos">806</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_list_of_columns</span><span class="p">(</span><span class="n">parameter_list</span><span class="p">)</span>
+</span><span id="DataFrame-807"><a href="#DataFrame-807"><span class="linenos">807</span></a> <span class="k">if</span> <span class="n">parameters</span>
+</span><span id="DataFrame-808"><a href="#DataFrame-808"><span class="linenos">808</span></a> <span class="k">else</span> <span class="n">Column</span><span class="o">.</span><span class="n">ensure_cols</span><span class="p">([</span><span class="bp">self</span><span class="o">.</span><span class="n">sequence_id</span><span class="p">])</span>
+</span><span id="DataFrame-809"><a href="#DataFrame-809"><span class="linenos">809</span></a> <span class="p">)</span>
+</span><span id="DataFrame-810"><a href="#DataFrame-810"><span class="linenos">810</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_hint</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">parameter_columns</span><span class="p">)</span>
+</span><span id="DataFrame-811"><a href="#DataFrame-811"><span class="linenos">811</span></a>
+</span><span id="DataFrame-812"><a href="#DataFrame-812"><span class="linenos">812</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">NO_OP</span><span class="p">)</span>
+</span><span id="DataFrame-813"><a href="#DataFrame-813"><span class="linenos">813</span></a> <span class="k">def</span> <span class="nf">repartition</span><span class="p">(</span>
+</span><span id="DataFrame-814"><a href="#DataFrame-814"><span class="linenos">814</span></a> <span class="bp">self</span><span class="p">,</span> <span class="n">numPartitions</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">ColumnOrName</span><span class="p">],</span> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="n">ColumnOrName</span>
+</span><span id="DataFrame-815"><a href="#DataFrame-815"><span class="linenos">815</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame-816"><a href="#DataFrame-816"><span class="linenos">816</span></a> <span class="n">num_partition_cols</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_list_of_columns</span><span class="p">(</span><span class="n">numPartitions</span><span class="p">)</span>
+</span><span id="DataFrame-817"><a href="#DataFrame-817"><span class="linenos">817</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">cols</span><span class="p">)</span>
+</span><span id="DataFrame-818"><a href="#DataFrame-818"><span class="linenos">818</span></a> <span class="n">args</span> <span class="o">=</span> <span class="n">num_partition_cols</span> <span class="o">+</span> <span class="n">columns</span>
+</span><span id="DataFrame-819"><a href="#DataFrame-819"><span class="linenos">819</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_hint</span><span class="p">(</span><span class="s2">&quot;repartition&quot;</span><span class="p">,</span> <span class="n">args</span><span class="p">)</span>
+</span><span id="DataFrame-820"><a href="#DataFrame-820"><span class="linenos">820</span></a>
+</span><span id="DataFrame-821"><a href="#DataFrame-821"><span class="linenos">821</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">NO_OP</span><span class="p">)</span>
+</span><span id="DataFrame-822"><a href="#DataFrame-822"><span class="linenos">822</span></a> <span class="k">def</span> <span class="nf">coalesce</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">numPartitions</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame-823"><a href="#DataFrame-823"><span class="linenos">823</span></a> <span class="n">num_partitions</span> <span class="o">=</span> <span class="n">Column</span><span class="o">.</span><span class="n">ensure_cols</span><span class="p">([</span><span class="n">numPartitions</span><span class="p">])</span>
+</span><span id="DataFrame-824"><a href="#DataFrame-824"><span class="linenos">824</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_hint</span><span class="p">(</span><span class="s2">&quot;coalesce&quot;</span><span class="p">,</span> <span class="n">num_partitions</span><span class="p">)</span>
+</span><span id="DataFrame-825"><a href="#DataFrame-825"><span class="linenos">825</span></a>
+</span><span id="DataFrame-826"><a href="#DataFrame-826"><span class="linenos">826</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">NO_OP</span><span class="p">)</span>
+</span><span id="DataFrame-827"><a href="#DataFrame-827"><span class="linenos">827</span></a> <span class="k">def</span> <span class="nf">cache</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame-828"><a href="#DataFrame-828"><span class="linenos">828</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_cache</span><span class="p">(</span><span class="n">storage_level</span><span class="o">=</span><span class="s2">&quot;MEMORY_AND_DISK&quot;</span><span class="p">)</span>
+</span><span id="DataFrame-829"><a href="#DataFrame-829"><span class="linenos">829</span></a>
+</span><span id="DataFrame-830"><a href="#DataFrame-830"><span class="linenos">830</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">NO_OP</span><span class="p">)</span>
+</span><span id="DataFrame-831"><a href="#DataFrame-831"><span class="linenos">831</span></a> <span class="k">def</span> <span class="nf">persist</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">storageLevel</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;MEMORY_AND_DISK_SER&quot;</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame-832"><a href="#DataFrame-832"><span class="linenos">832</span></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
+</span><span id="DataFrame-833"><a href="#DataFrame-833"><span class="linenos">833</span></a><span class="sd"> Storage Level Options: https://spark.apache.org/docs/3.0.0-preview/sql-ref-syntax-aux-cache-cache-table.html</span>
+</span><span id="DataFrame-834"><a href="#DataFrame-834"><span class="linenos">834</span></a><span class="sd"> &quot;&quot;&quot;</span>
+</span><span id="DataFrame-835"><a href="#DataFrame-835"><span class="linenos">835</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_cache</span><span class="p">(</span><span class="n">storageLevel</span><span class="p">)</span>
</span></pre></div>
@@ -1838,30 +1848,30 @@
<input id="DataFrame.__init__-view-source" class="view-source-toggle-state" type="checkbox" aria-hidden="true" tabindex="-1">
<div class="attr function">
- <span class="name">DataFrame</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="n">spark</span><span class="p">:</span> <span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139791554931696&#39;</span><span class="o">&gt;</span>,</span><span class="param"> <span class="n">expression</span><span class="p">:</span> <span class="n"><a href="../expressions.html#Select">sqlglot.expressions.Select</a></span>,</span><span class="param"> <span class="n">branch_id</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>,</span><span class="param"> <span class="n">sequence_id</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>,</span><span class="param"> <span class="n">last_op</span><span class="p">:</span> <span class="n">sqlglot</span><span class="o">.</span><span class="n">dataframe</span><span class="o">.</span><span class="n">sql</span><span class="o">.</span><span class="n">operations</span><span class="o">.</span><span class="n">Operation</span> <span class="o">=</span> <span class="o">&lt;</span><span class="n">Operation</span><span class="o">.</span><span class="n">INIT</span><span class="p">:</span> <span class="o">-</span><span class="mi">1</span><span class="o">&gt;</span>,</span><span class="param"> <span class="n">pending_hints</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="n"><a href="../expressions.html#Expression">sqlglot.expressions.Expression</a></span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span>,</span><span class="param"> <span class="n">output_expression_container</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139791555035296&#39;</span><span class="o">&gt;</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>,</span><span class="param"> <span class="o">**</span><span class="n">kwargs</span></span>)</span>
+ <span class="name">DataFrame</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="n">spark</span><span class="p">:</span> <span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139844522482208&#39;</span><span class="o">&gt;</span>,</span><span class="param"> <span class="n">expression</span><span class="p">:</span> <span class="n"><a href="../expressions.html#Select">sqlglot.expressions.Select</a></span>,</span><span class="param"> <span class="n">branch_id</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>,</span><span class="param"> <span class="n">sequence_id</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>,</span><span class="param"> <span class="n">last_op</span><span class="p">:</span> <span class="n">sqlglot</span><span class="o">.</span><span class="n">dataframe</span><span class="o">.</span><span class="n">sql</span><span class="o">.</span><span class="n">operations</span><span class="o">.</span><span class="n">Operation</span> <span class="o">=</span> <span class="o">&lt;</span><span class="n">Operation</span><span class="o">.</span><span class="n">INIT</span><span class="p">:</span> <span class="o">-</span><span class="mi">1</span><span class="o">&gt;</span>,</span><span class="param"> <span class="n">pending_hints</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="n"><a href="../expressions.html#Expression">sqlglot.expressions.Expression</a></span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span>,</span><span class="param"> <span class="n">output_expression_container</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139844523683472&#39;</span><span class="o">&gt;</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>,</span><span class="param"> <span class="o">**</span><span class="n">kwargs</span></span>)</span>
<label class="view-source-button" for="DataFrame.__init__-view-source"><span>View Source</span></label>
</div>
<a class="headerlink" href="#DataFrame.__init__"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.__init__-50"><a href="#DataFrame.__init__-50"><span class="linenos">50</span></a> <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
-</span><span id="DataFrame.__init__-51"><a href="#DataFrame.__init__-51"><span class="linenos">51</span></a> <span class="bp">self</span><span class="p">,</span>
-</span><span id="DataFrame.__init__-52"><a href="#DataFrame.__init__-52"><span class="linenos">52</span></a> <span class="n">spark</span><span class="p">:</span> <span class="n">SparkSession</span><span class="p">,</span>
-</span><span id="DataFrame.__init__-53"><a href="#DataFrame.__init__-53"><span class="linenos">53</span></a> <span class="n">expression</span><span class="p">:</span> <span class="n">exp</span><span class="o">.</span><span class="n">Select</span><span class="p">,</span>
-</span><span id="DataFrame.__init__-54"><a href="#DataFrame.__init__-54"><span class="linenos">54</span></a> <span class="n">branch_id</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrame.__init__-55"><a href="#DataFrame.__init__-55"><span class="linenos">55</span></a> <span class="n">sequence_id</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrame.__init__-56"><a href="#DataFrame.__init__-56"><span class="linenos">56</span></a> <span class="n">last_op</span><span class="p">:</span> <span class="n">Operation</span> <span class="o">=</span> <span class="n">Operation</span><span class="o">.</span><span class="n">INIT</span><span class="p">,</span>
-</span><span id="DataFrame.__init__-57"><a href="#DataFrame.__init__-57"><span class="linenos">57</span></a> <span class="n">pending_hints</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="n">exp</span><span class="o">.</span><span class="n">Expression</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrame.__init__-58"><a href="#DataFrame.__init__-58"><span class="linenos">58</span></a> <span class="n">output_expression_container</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">OutputExpressionContainer</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrame.__init__-59"><a href="#DataFrame.__init__-59"><span class="linenos">59</span></a> <span class="o">**</span><span class="n">kwargs</span><span class="p">,</span>
-</span><span id="DataFrame.__init__-60"><a href="#DataFrame.__init__-60"><span class="linenos">60</span></a> <span class="p">):</span>
-</span><span id="DataFrame.__init__-61"><a href="#DataFrame.__init__-61"><span class="linenos">61</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span> <span class="o">=</span> <span class="n">spark</span>
-</span><span id="DataFrame.__init__-62"><a href="#DataFrame.__init__-62"><span class="linenos">62</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span> <span class="o">=</span> <span class="n">expression</span>
-</span><span id="DataFrame.__init__-63"><a href="#DataFrame.__init__-63"><span class="linenos">63</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">branch_id</span> <span class="o">=</span> <span class="n">branch_id</span> <span class="ow">or</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">_random_branch_id</span>
-</span><span id="DataFrame.__init__-64"><a href="#DataFrame.__init__-64"><span class="linenos">64</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">sequence_id</span> <span class="o">=</span> <span class="n">sequence_id</span> <span class="ow">or</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">_random_sequence_id</span>
-</span><span id="DataFrame.__init__-65"><a href="#DataFrame.__init__-65"><span class="linenos">65</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">last_op</span> <span class="o">=</span> <span class="n">last_op</span>
-</span><span id="DataFrame.__init__-66"><a href="#DataFrame.__init__-66"><span class="linenos">66</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">pending_hints</span> <span class="o">=</span> <span class="n">pending_hints</span> <span class="ow">or</span> <span class="p">[]</span>
-</span><span id="DataFrame.__init__-67"><a href="#DataFrame.__init__-67"><span class="linenos">67</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">output_expression_container</span> <span class="o">=</span> <span class="n">output_expression_container</span> <span class="ow">or</span> <span class="n">exp</span><span class="o">.</span><span class="n">Select</span><span class="p">()</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.__init__-48"><a href="#DataFrame.__init__-48"><span class="linenos">48</span></a> <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
+</span><span id="DataFrame.__init__-49"><a href="#DataFrame.__init__-49"><span class="linenos">49</span></a> <span class="bp">self</span><span class="p">,</span>
+</span><span id="DataFrame.__init__-50"><a href="#DataFrame.__init__-50"><span class="linenos">50</span></a> <span class="n">spark</span><span class="p">:</span> <span class="n">SparkSession</span><span class="p">,</span>
+</span><span id="DataFrame.__init__-51"><a href="#DataFrame.__init__-51"><span class="linenos">51</span></a> <span class="n">expression</span><span class="p">:</span> <span class="n">exp</span><span class="o">.</span><span class="n">Select</span><span class="p">,</span>
+</span><span id="DataFrame.__init__-52"><a href="#DataFrame.__init__-52"><span class="linenos">52</span></a> <span class="n">branch_id</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrame.__init__-53"><a href="#DataFrame.__init__-53"><span class="linenos">53</span></a> <span class="n">sequence_id</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrame.__init__-54"><a href="#DataFrame.__init__-54"><span class="linenos">54</span></a> <span class="n">last_op</span><span class="p">:</span> <span class="n">Operation</span> <span class="o">=</span> <span class="n">Operation</span><span class="o">.</span><span class="n">INIT</span><span class="p">,</span>
+</span><span id="DataFrame.__init__-55"><a href="#DataFrame.__init__-55"><span class="linenos">55</span></a> <span class="n">pending_hints</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="n">exp</span><span class="o">.</span><span class="n">Expression</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrame.__init__-56"><a href="#DataFrame.__init__-56"><span class="linenos">56</span></a> <span class="n">output_expression_container</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">OutputExpressionContainer</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrame.__init__-57"><a href="#DataFrame.__init__-57"><span class="linenos">57</span></a> <span class="o">**</span><span class="n">kwargs</span><span class="p">,</span>
+</span><span id="DataFrame.__init__-58"><a href="#DataFrame.__init__-58"><span class="linenos">58</span></a> <span class="p">):</span>
+</span><span id="DataFrame.__init__-59"><a href="#DataFrame.__init__-59"><span class="linenos">59</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span> <span class="o">=</span> <span class="n">spark</span>
+</span><span id="DataFrame.__init__-60"><a href="#DataFrame.__init__-60"><span class="linenos">60</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span> <span class="o">=</span> <span class="n">expression</span>
+</span><span id="DataFrame.__init__-61"><a href="#DataFrame.__init__-61"><span class="linenos">61</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">branch_id</span> <span class="o">=</span> <span class="n">branch_id</span> <span class="ow">or</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">_random_branch_id</span>
+</span><span id="DataFrame.__init__-62"><a href="#DataFrame.__init__-62"><span class="linenos">62</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">sequence_id</span> <span class="o">=</span> <span class="n">sequence_id</span> <span class="ow">or</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">_random_sequence_id</span>
+</span><span id="DataFrame.__init__-63"><a href="#DataFrame.__init__-63"><span class="linenos">63</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">last_op</span> <span class="o">=</span> <span class="n">last_op</span>
+</span><span id="DataFrame.__init__-64"><a href="#DataFrame.__init__-64"><span class="linenos">64</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">pending_hints</span> <span class="o">=</span> <span class="n">pending_hints</span> <span class="ow">or</span> <span class="p">[]</span>
+</span><span id="DataFrame.__init__-65"><a href="#DataFrame.__init__-65"><span class="linenos">65</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">output_expression_container</span> <span class="o">=</span> <span class="n">output_expression_container</span> <span class="ow">or</span> <span class="n">exp</span><span class="o">.</span><span class="n">Select</span><span class="p">()</span>
</span></pre></div>
@@ -1954,9 +1964,9 @@
</div>
<a class="headerlink" href="#DataFrame.sparkSession"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.sparkSession-79"><a href="#DataFrame.sparkSession-79"><span class="linenos">79</span></a> <span class="nd">@property</span>
-</span><span id="DataFrame.sparkSession-80"><a href="#DataFrame.sparkSession-80"><span class="linenos">80</span></a> <span class="k">def</span> <span class="nf">sparkSession</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
-</span><span id="DataFrame.sparkSession-81"><a href="#DataFrame.sparkSession-81"><span class="linenos">81</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.sparkSession-77"><a href="#DataFrame.sparkSession-77"><span class="linenos">77</span></a> <span class="nd">@property</span>
+</span><span id="DataFrame.sparkSession-78"><a href="#DataFrame.sparkSession-78"><span class="linenos">78</span></a> <span class="k">def</span> <span class="nf">sparkSession</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+</span><span id="DataFrame.sparkSession-79"><a href="#DataFrame.sparkSession-79"><span class="linenos">79</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span>
</span></pre></div>
@@ -1972,9 +1982,9 @@
</div>
<a class="headerlink" href="#DataFrame.write"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.write-83"><a href="#DataFrame.write-83"><span class="linenos">83</span></a> <span class="nd">@property</span>
-</span><span id="DataFrame.write-84"><a href="#DataFrame.write-84"><span class="linenos">84</span></a> <span class="k">def</span> <span class="nf">write</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
-</span><span id="DataFrame.write-85"><a href="#DataFrame.write-85"><span class="linenos">85</span></a> <span class="k">return</span> <span class="n">DataFrameWriter</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.write-81"><a href="#DataFrame.write-81"><span class="linenos">81</span></a> <span class="nd">@property</span>
+</span><span id="DataFrame.write-82"><a href="#DataFrame.write-82"><span class="linenos">82</span></a> <span class="k">def</span> <span class="nf">write</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+</span><span id="DataFrame.write-83"><a href="#DataFrame.write-83"><span class="linenos">83</span></a> <span class="k">return</span> <span class="n">DataFrameWriter</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
</span></pre></div>
@@ -1990,19 +2000,19 @@
</div>
<a class="headerlink" href="#DataFrame.latest_cte_name"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.latest_cte_name-87"><a href="#DataFrame.latest_cte_name-87"><span class="linenos">87</span></a> <span class="nd">@property</span>
-</span><span id="DataFrame.latest_cte_name-88"><a href="#DataFrame.latest_cte_name-88"><span class="linenos">88</span></a> <span class="k">def</span> <span class="nf">latest_cte_name</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">str</span><span class="p">:</span>
-</span><span id="DataFrame.latest_cte_name-89"><a href="#DataFrame.latest_cte_name-89"><span class="linenos">89</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">ctes</span><span class="p">:</span>
-</span><span id="DataFrame.latest_cte_name-90"><a href="#DataFrame.latest_cte_name-90"><span class="linenos">90</span></a> <span class="n">from_exp</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">args</span><span class="p">[</span><span class="s2">&quot;from&quot;</span><span class="p">]</span>
-</span><span id="DataFrame.latest_cte_name-91"><a href="#DataFrame.latest_cte_name-91"><span class="linenos">91</span></a> <span class="k">if</span> <span class="n">from_exp</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">:</span>
-</span><span id="DataFrame.latest_cte_name-92"><a href="#DataFrame.latest_cte_name-92"><span class="linenos">92</span></a> <span class="k">return</span> <span class="n">from_exp</span><span class="o">.</span><span class="n">alias_or_name</span>
-</span><span id="DataFrame.latest_cte_name-93"><a href="#DataFrame.latest_cte_name-93"><span class="linenos">93</span></a> <span class="n">table_alias</span> <span class="o">=</span> <span class="n">from_exp</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">TableAlias</span><span class="p">)</span>
-</span><span id="DataFrame.latest_cte_name-94"><a href="#DataFrame.latest_cte_name-94"><span class="linenos">94</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">table_alias</span><span class="p">:</span>
-</span><span id="DataFrame.latest_cte_name-95"><a href="#DataFrame.latest_cte_name-95"><span class="linenos">95</span></a> <span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span>
-</span><span id="DataFrame.latest_cte_name-96"><a href="#DataFrame.latest_cte_name-96"><span class="linenos">96</span></a> <span class="sa">f</span><span class="s2">&quot;Could not find an alias name for this expression: </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="si">}</span><span class="s2">&quot;</span>
-</span><span id="DataFrame.latest_cte_name-97"><a href="#DataFrame.latest_cte_name-97"><span class="linenos">97</span></a> <span class="p">)</span>
-</span><span id="DataFrame.latest_cte_name-98"><a href="#DataFrame.latest_cte_name-98"><span class="linenos">98</span></a> <span class="k">return</span> <span class="n">table_alias</span><span class="o">.</span><span class="n">alias_or_name</span>
-</span><span id="DataFrame.latest_cte_name-99"><a href="#DataFrame.latest_cte_name-99"><span class="linenos">99</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">ctes</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">alias</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.latest_cte_name-85"><a href="#DataFrame.latest_cte_name-85"><span class="linenos">85</span></a> <span class="nd">@property</span>
+</span><span id="DataFrame.latest_cte_name-86"><a href="#DataFrame.latest_cte_name-86"><span class="linenos">86</span></a> <span class="k">def</span> <span class="nf">latest_cte_name</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">str</span><span class="p">:</span>
+</span><span id="DataFrame.latest_cte_name-87"><a href="#DataFrame.latest_cte_name-87"><span class="linenos">87</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">ctes</span><span class="p">:</span>
+</span><span id="DataFrame.latest_cte_name-88"><a href="#DataFrame.latest_cte_name-88"><span class="linenos">88</span></a> <span class="n">from_exp</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">args</span><span class="p">[</span><span class="s2">&quot;from&quot;</span><span class="p">]</span>
+</span><span id="DataFrame.latest_cte_name-89"><a href="#DataFrame.latest_cte_name-89"><span class="linenos">89</span></a> <span class="k">if</span> <span class="n">from_exp</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">:</span>
+</span><span id="DataFrame.latest_cte_name-90"><a href="#DataFrame.latest_cte_name-90"><span class="linenos">90</span></a> <span class="k">return</span> <span class="n">from_exp</span><span class="o">.</span><span class="n">alias_or_name</span>
+</span><span id="DataFrame.latest_cte_name-91"><a href="#DataFrame.latest_cte_name-91"><span class="linenos">91</span></a> <span class="n">table_alias</span> <span class="o">=</span> <span class="n">from_exp</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">TableAlias</span><span class="p">)</span>
+</span><span id="DataFrame.latest_cte_name-92"><a href="#DataFrame.latest_cte_name-92"><span class="linenos">92</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">table_alias</span><span class="p">:</span>
+</span><span id="DataFrame.latest_cte_name-93"><a href="#DataFrame.latest_cte_name-93"><span class="linenos">93</span></a> <span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span>
+</span><span id="DataFrame.latest_cte_name-94"><a href="#DataFrame.latest_cte_name-94"><span class="linenos">94</span></a> <span class="sa">f</span><span class="s2">&quot;Could not find an alias name for this expression: </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="si">}</span><span class="s2">&quot;</span>
+</span><span id="DataFrame.latest_cte_name-95"><a href="#DataFrame.latest_cte_name-95"><span class="linenos">95</span></a> <span class="p">)</span>
+</span><span id="DataFrame.latest_cte_name-96"><a href="#DataFrame.latest_cte_name-96"><span class="linenos">96</span></a> <span class="k">return</span> <span class="n">table_alias</span><span class="o">.</span><span class="n">alias_or_name</span>
+</span><span id="DataFrame.latest_cte_name-97"><a href="#DataFrame.latest_cte_name-97"><span class="linenos">97</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">ctes</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">alias</span>
</span></pre></div>
@@ -2018,9 +2028,9 @@
</div>
<a class="headerlink" href="#DataFrame.pending_join_hints"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.pending_join_hints-101"><a href="#DataFrame.pending_join_hints-101"><span class="linenos">101</span></a> <span class="nd">@property</span>
-</span><span id="DataFrame.pending_join_hints-102"><a href="#DataFrame.pending_join_hints-102"><span class="linenos">102</span></a> <span class="k">def</span> <span class="nf">pending_join_hints</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
-</span><span id="DataFrame.pending_join_hints-103"><a href="#DataFrame.pending_join_hints-103"><span class="linenos">103</span></a> <span class="k">return</span> <span class="p">[</span><span class="n">hint</span> <span class="k">for</span> <span class="n">hint</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">pending_hints</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">hint</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">JoinHint</span><span class="p">)]</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.pending_join_hints-99"><a href="#DataFrame.pending_join_hints-99"><span class="linenos"> 99</span></a> <span class="nd">@property</span>
+</span><span id="DataFrame.pending_join_hints-100"><a href="#DataFrame.pending_join_hints-100"><span class="linenos">100</span></a> <span class="k">def</span> <span class="nf">pending_join_hints</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+</span><span id="DataFrame.pending_join_hints-101"><a href="#DataFrame.pending_join_hints-101"><span class="linenos">101</span></a> <span class="k">return</span> <span class="p">[</span><span class="n">hint</span> <span class="k">for</span> <span class="n">hint</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">pending_hints</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">hint</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">JoinHint</span><span class="p">)]</span>
</span></pre></div>
@@ -2036,9 +2046,9 @@
</div>
<a class="headerlink" href="#DataFrame.pending_partition_hints"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.pending_partition_hints-105"><a href="#DataFrame.pending_partition_hints-105"><span class="linenos">105</span></a> <span class="nd">@property</span>
-</span><span id="DataFrame.pending_partition_hints-106"><a href="#DataFrame.pending_partition_hints-106"><span class="linenos">106</span></a> <span class="k">def</span> <span class="nf">pending_partition_hints</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
-</span><span id="DataFrame.pending_partition_hints-107"><a href="#DataFrame.pending_partition_hints-107"><span class="linenos">107</span></a> <span class="k">return</span> <span class="p">[</span><span class="n">hint</span> <span class="k">for</span> <span class="n">hint</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">pending_hints</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">hint</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Anonymous</span><span class="p">)]</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.pending_partition_hints-103"><a href="#DataFrame.pending_partition_hints-103"><span class="linenos">103</span></a> <span class="nd">@property</span>
+</span><span id="DataFrame.pending_partition_hints-104"><a href="#DataFrame.pending_partition_hints-104"><span class="linenos">104</span></a> <span class="k">def</span> <span class="nf">pending_partition_hints</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+</span><span id="DataFrame.pending_partition_hints-105"><a href="#DataFrame.pending_partition_hints-105"><span class="linenos">105</span></a> <span class="k">return</span> <span class="p">[</span><span class="n">hint</span> <span class="k">for</span> <span class="n">hint</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">pending_hints</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">hint</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Anonymous</span><span class="p">)]</span>
</span></pre></div>
@@ -2054,9 +2064,9 @@
</div>
<a class="headerlink" href="#DataFrame.columns"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.columns-109"><a href="#DataFrame.columns-109"><span class="linenos">109</span></a> <span class="nd">@property</span>
-</span><span id="DataFrame.columns-110"><a href="#DataFrame.columns-110"><span class="linenos">110</span></a> <span class="k">def</span> <span class="nf">columns</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span>
-</span><span id="DataFrame.columns-111"><a href="#DataFrame.columns-111"><span class="linenos">111</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">named_selects</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.columns-107"><a href="#DataFrame.columns-107"><span class="linenos">107</span></a> <span class="nd">@property</span>
+</span><span id="DataFrame.columns-108"><a href="#DataFrame.columns-108"><span class="linenos">108</span></a> <span class="k">def</span> <span class="nf">columns</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span>
+</span><span id="DataFrame.columns-109"><a href="#DataFrame.columns-109"><span class="linenos">109</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">named_selects</span>
</span></pre></div>
@@ -2072,9 +2082,9 @@
</div>
<a class="headerlink" href="#DataFrame.na"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.na-113"><a href="#DataFrame.na-113"><span class="linenos">113</span></a> <span class="nd">@property</span>
-</span><span id="DataFrame.na-114"><a href="#DataFrame.na-114"><span class="linenos">114</span></a> <span class="k">def</span> <span class="nf">na</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrameNaFunctions</span><span class="p">:</span>
-</span><span id="DataFrame.na-115"><a href="#DataFrame.na-115"><span class="linenos">115</span></a> <span class="k">return</span> <span class="n">DataFrameNaFunctions</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.na-111"><a href="#DataFrame.na-111"><span class="linenos">111</span></a> <span class="nd">@property</span>
+</span><span id="DataFrame.na-112"><a href="#DataFrame.na-112"><span class="linenos">112</span></a> <span class="k">def</span> <span class="nf">na</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrameNaFunctions</span><span class="p">:</span>
+</span><span id="DataFrame.na-113"><a href="#DataFrame.na-113"><span class="linenos">113</span></a> <span class="k">return</span> <span class="n">DataFrameNaFunctions</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
</span></pre></div>
@@ -2086,7 +2096,7 @@
<div class="attr function">
<span class="def">def</span>
- <span class="name">sql</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">self</span>,</span><span class="param"> <span class="n">dialect</span><span class="p">:</span> <span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139791550901712&#39;</span><span class="o">&gt;</span> <span class="o">=</span> <span class="kc">None</span>,</span><span class="param"> <span class="n">optimize</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span>,</span><span class="param"> <span class="o">**</span><span class="n">kwargs</span></span><span class="return-annotation">) -> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span>:</span></span>
+ <span class="name">sql</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">self</span>,</span><span class="param"> <span class="n">dialect</span><span class="p">:</span> <span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139844519222032&#39;</span><span class="o">&gt;</span> <span class="o">=</span> <span class="kc">None</span>,</span><span class="param"> <span class="n">optimize</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span>,</span><span class="param"> <span class="o">**</span><span class="n">kwargs</span></span><span class="return-annotation">) -> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span>:</span></span>
<label class="view-source-button" for="DataFrame.sql-view-source"><span>View Source</span></label>
@@ -2103,63 +2113,64 @@
</span><span id="DataFrame.sql-307"><a href="#DataFrame.sql-307"><span class="linenos">307</span></a> <span class="n">replacement_mapping</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Dict</span><span class="p">[</span><span class="n">exp</span><span class="o">.</span><span class="n">Identifier</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Identifier</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span>
</span><span id="DataFrame.sql-308"><a href="#DataFrame.sql-308"><span class="linenos">308</span></a>
</span><span id="DataFrame.sql-309"><a href="#DataFrame.sql-309"><span class="linenos">309</span></a> <span class="k">for</span> <span class="n">expression_type</span><span class="p">,</span> <span class="n">select_expression</span> <span class="ow">in</span> <span class="n">select_expressions</span><span class="p">:</span>
-</span><span id="DataFrame.sql-310"><a href="#DataFrame.sql-310"><span class="linenos">310</span></a> <span class="n">select_expression</span> <span class="o">=</span> <span class="n">select_expression</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="n">replace_id_value</span><span class="p">,</span> <span class="n">replacement_mapping</span><span class="p">)</span>
-</span><span id="DataFrame.sql-311"><a href="#DataFrame.sql-311"><span class="linenos">311</span></a> <span class="k">if</span> <span class="n">optimize</span><span class="p">:</span>
-</span><span id="DataFrame.sql-312"><a href="#DataFrame.sql-312"><span class="linenos">312</span></a> <span class="n">quote_identifiers</span><span class="p">(</span><span class="n">select_expression</span><span class="p">,</span> <span class="n">dialect</span><span class="o">=</span><span class="n">dialect</span><span class="p">)</span>
-</span><span id="DataFrame.sql-313"><a href="#DataFrame.sql-313"><span class="linenos">313</span></a> <span class="n">select_expression</span> <span class="o">=</span> <span class="n">t</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span>
-</span><span id="DataFrame.sql-314"><a href="#DataFrame.sql-314"><span class="linenos">314</span></a> <span class="n">exp</span><span class="o">.</span><span class="n">Select</span><span class="p">,</span> <span class="n">optimize_func</span><span class="p">(</span><span class="n">select_expression</span><span class="p">,</span> <span class="n">dialect</span><span class="o">=</span><span class="n">dialect</span><span class="p">)</span>
-</span><span id="DataFrame.sql-315"><a href="#DataFrame.sql-315"><span class="linenos">315</span></a> <span class="p">)</span>
-</span><span id="DataFrame.sql-316"><a href="#DataFrame.sql-316"><span class="linenos">316</span></a>
-</span><span id="DataFrame.sql-317"><a href="#DataFrame.sql-317"><span class="linenos">317</span></a> <span class="n">select_expression</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">_replace_cte_names_with_hashes</span><span class="p">(</span><span class="n">select_expression</span><span class="p">)</span>
-</span><span id="DataFrame.sql-318"><a href="#DataFrame.sql-318"><span class="linenos">318</span></a>
-</span><span id="DataFrame.sql-319"><a href="#DataFrame.sql-319"><span class="linenos">319</span></a> <span class="n">expression</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="n">exp</span><span class="o">.</span><span class="n">Select</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Cache</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Drop</span><span class="p">]</span>
-</span><span id="DataFrame.sql-320"><a href="#DataFrame.sql-320"><span class="linenos">320</span></a> <span class="k">if</span> <span class="n">expression_type</span> <span class="o">==</span> <span class="n">exp</span><span class="o">.</span><span class="n">Cache</span><span class="p">:</span>
-</span><span id="DataFrame.sql-321"><a href="#DataFrame.sql-321"><span class="linenos">321</span></a> <span class="n">cache_table_name</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">_create_hash_from_expression</span><span class="p">(</span><span class="n">select_expression</span><span class="p">)</span>
-</span><span id="DataFrame.sql-322"><a href="#DataFrame.sql-322"><span class="linenos">322</span></a> <span class="n">cache_table</span> <span class="o">=</span> <span class="n">exp</span><span class="o">.</span><span class="n">to_table</span><span class="p">(</span><span class="n">cache_table_name</span><span class="p">)</span>
-</span><span id="DataFrame.sql-323"><a href="#DataFrame.sql-323"><span class="linenos">323</span></a> <span class="n">original_alias_name</span> <span class="o">=</span> <span class="n">select_expression</span><span class="o">.</span><span class="n">args</span><span class="p">[</span><span class="s2">&quot;cte_alias_name&quot;</span><span class="p">]</span>
-</span><span id="DataFrame.sql-324"><a href="#DataFrame.sql-324"><span class="linenos">324</span></a>
-</span><span id="DataFrame.sql-325"><a href="#DataFrame.sql-325"><span class="linenos">325</span></a> <span class="n">replacement_mapping</span><span class="p">[</span><span class="n">exp</span><span class="o">.</span><span class="n">to_identifier</span><span class="p">(</span><span class="n">original_alias_name</span><span class="p">)]</span> <span class="o">=</span> <span class="n">exp</span><span class="o">.</span><span class="n">to_identifier</span><span class="p">(</span> <span class="c1"># type: ignore</span>
-</span><span id="DataFrame.sql-326"><a href="#DataFrame.sql-326"><span class="linenos">326</span></a> <span class="n">cache_table_name</span>
-</span><span id="DataFrame.sql-327"><a href="#DataFrame.sql-327"><span class="linenos">327</span></a> <span class="p">)</span>
-</span><span id="DataFrame.sql-328"><a href="#DataFrame.sql-328"><span class="linenos">328</span></a> <span class="n">sqlglot</span><span class="o">.</span><span class="n">schema</span><span class="o">.</span><span class="n">add_table</span><span class="p">(</span>
-</span><span id="DataFrame.sql-329"><a href="#DataFrame.sql-329"><span class="linenos">329</span></a> <span class="n">cache_table_name</span><span class="p">,</span>
-</span><span id="DataFrame.sql-330"><a href="#DataFrame.sql-330"><span class="linenos">330</span></a> <span class="p">{</span>
-</span><span id="DataFrame.sql-331"><a href="#DataFrame.sql-331"><span class="linenos">331</span></a> <span class="n">expression</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">:</span> <span class="n">expression</span><span class="o">.</span><span class="n">type</span><span class="o">.</span><span class="n">sql</span><span class="p">(</span><span class="n">dialect</span><span class="o">=</span><span class="n">dialect</span><span class="p">)</span>
-</span><span id="DataFrame.sql-332"><a href="#DataFrame.sql-332"><span class="linenos">332</span></a> <span class="k">for</span> <span class="n">expression</span> <span class="ow">in</span> <span class="n">select_expression</span><span class="o">.</span><span class="n">expressions</span>
-</span><span id="DataFrame.sql-333"><a href="#DataFrame.sql-333"><span class="linenos">333</span></a> <span class="p">},</span>
-</span><span id="DataFrame.sql-334"><a href="#DataFrame.sql-334"><span class="linenos">334</span></a> <span class="n">dialect</span><span class="o">=</span><span class="n">dialect</span><span class="p">,</span>
-</span><span id="DataFrame.sql-335"><a href="#DataFrame.sql-335"><span class="linenos">335</span></a> <span class="p">)</span>
-</span><span id="DataFrame.sql-336"><a href="#DataFrame.sql-336"><span class="linenos">336</span></a>
-</span><span id="DataFrame.sql-337"><a href="#DataFrame.sql-337"><span class="linenos">337</span></a> <span class="n">cache_storage_level</span> <span class="o">=</span> <span class="n">select_expression</span><span class="o">.</span><span class="n">args</span><span class="p">[</span><span class="s2">&quot;cache_storage_level&quot;</span><span class="p">]</span>
-</span><span id="DataFrame.sql-338"><a href="#DataFrame.sql-338"><span class="linenos">338</span></a> <span class="n">options</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame.sql-339"><a href="#DataFrame.sql-339"><span class="linenos">339</span></a> <span class="n">exp</span><span class="o">.</span><span class="n">Literal</span><span class="o">.</span><span class="n">string</span><span class="p">(</span><span class="s2">&quot;storageLevel&quot;</span><span class="p">),</span>
-</span><span id="DataFrame.sql-340"><a href="#DataFrame.sql-340"><span class="linenos">340</span></a> <span class="n">exp</span><span class="o">.</span><span class="n">Literal</span><span class="o">.</span><span class="n">string</span><span class="p">(</span><span class="n">cache_storage_level</span><span class="p">),</span>
-</span><span id="DataFrame.sql-341"><a href="#DataFrame.sql-341"><span class="linenos">341</span></a> <span class="p">]</span>
-</span><span id="DataFrame.sql-342"><a href="#DataFrame.sql-342"><span class="linenos">342</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="n">exp</span><span class="o">.</span><span class="n">Cache</span><span class="p">(</span>
-</span><span id="DataFrame.sql-343"><a href="#DataFrame.sql-343"><span class="linenos">343</span></a> <span class="n">this</span><span class="o">=</span><span class="n">cache_table</span><span class="p">,</span> <span class="n">expression</span><span class="o">=</span><span class="n">select_expression</span><span class="p">,</span> <span class="n">lazy</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">options</span><span class="o">=</span><span class="n">options</span>
-</span><span id="DataFrame.sql-344"><a href="#DataFrame.sql-344"><span class="linenos">344</span></a> <span class="p">)</span>
-</span><span id="DataFrame.sql-345"><a href="#DataFrame.sql-345"><span class="linenos">345</span></a>
-</span><span id="DataFrame.sql-346"><a href="#DataFrame.sql-346"><span class="linenos">346</span></a> <span class="c1"># We will drop the &quot;view&quot; if it exists before running the cache table</span>
-</span><span id="DataFrame.sql-347"><a href="#DataFrame.sql-347"><span class="linenos">347</span></a> <span class="n">output_expressions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Drop</span><span class="p">(</span><span class="n">this</span><span class="o">=</span><span class="n">cache_table</span><span class="p">,</span> <span class="n">exists</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">kind</span><span class="o">=</span><span class="s2">&quot;VIEW&quot;</span><span class="p">))</span>
-</span><span id="DataFrame.sql-348"><a href="#DataFrame.sql-348"><span class="linenos">348</span></a> <span class="k">elif</span> <span class="n">expression_type</span> <span class="o">==</span> <span class="n">exp</span><span class="o">.</span><span class="n">Create</span><span class="p">:</span>
-</span><span id="DataFrame.sql-349"><a href="#DataFrame.sql-349"><span class="linenos">349</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">output_expression_container</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
-</span><span id="DataFrame.sql-350"><a href="#DataFrame.sql-350"><span class="linenos">350</span></a> <span class="n">expression</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">&quot;expression&quot;</span><span class="p">,</span> <span class="n">select_expression</span><span class="p">)</span>
-</span><span id="DataFrame.sql-351"><a href="#DataFrame.sql-351"><span class="linenos">351</span></a> <span class="k">elif</span> <span class="n">expression_type</span> <span class="o">==</span> <span class="n">exp</span><span class="o">.</span><span class="n">Insert</span><span class="p">:</span>
-</span><span id="DataFrame.sql-352"><a href="#DataFrame.sql-352"><span class="linenos">352</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">output_expression_container</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
-</span><span id="DataFrame.sql-353"><a href="#DataFrame.sql-353"><span class="linenos">353</span></a> <span class="n">select_without_ctes</span> <span class="o">=</span> <span class="n">select_expression</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
-</span><span id="DataFrame.sql-354"><a href="#DataFrame.sql-354"><span class="linenos">354</span></a> <span class="n">select_without_ctes</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">&quot;with&quot;</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
-</span><span id="DataFrame.sql-355"><a href="#DataFrame.sql-355"><span class="linenos">355</span></a> <span class="n">expression</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">&quot;expression&quot;</span><span class="p">,</span> <span class="n">select_without_ctes</span><span class="p">)</span>
-</span><span id="DataFrame.sql-356"><a href="#DataFrame.sql-356"><span class="linenos">356</span></a>
-</span><span id="DataFrame.sql-357"><a href="#DataFrame.sql-357"><span class="linenos">357</span></a> <span class="k">if</span> <span class="n">select_expression</span><span class="o">.</span><span class="n">ctes</span><span class="p">:</span>
-</span><span id="DataFrame.sql-358"><a href="#DataFrame.sql-358"><span class="linenos">358</span></a> <span class="n">expression</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">&quot;with&quot;</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">With</span><span class="p">(</span><span class="n">expressions</span><span class="o">=</span><span class="n">select_expression</span><span class="o">.</span><span class="n">ctes</span><span class="p">))</span>
-</span><span id="DataFrame.sql-359"><a href="#DataFrame.sql-359"><span class="linenos">359</span></a> <span class="k">elif</span> <span class="n">expression_type</span> <span class="o">==</span> <span class="n">exp</span><span class="o">.</span><span class="n">Select</span><span class="p">:</span>
-</span><span id="DataFrame.sql-360"><a href="#DataFrame.sql-360"><span class="linenos">360</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="n">select_expression</span>
-</span><span id="DataFrame.sql-361"><a href="#DataFrame.sql-361"><span class="linenos">361</span></a> <span class="k">else</span><span class="p">:</span>
-</span><span id="DataFrame.sql-362"><a href="#DataFrame.sql-362"><span class="linenos">362</span></a> <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Invalid expression type: </span><span class="si">{</span><span class="n">expression_type</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
-</span><span id="DataFrame.sql-363"><a href="#DataFrame.sql-363"><span class="linenos">363</span></a>
-</span><span id="DataFrame.sql-364"><a href="#DataFrame.sql-364"><span class="linenos">364</span></a> <span class="n">output_expressions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">expression</span><span class="p">)</span>
-</span><span id="DataFrame.sql-365"><a href="#DataFrame.sql-365"><span class="linenos">365</span></a>
-</span><span id="DataFrame.sql-366"><a href="#DataFrame.sql-366"><span class="linenos">366</span></a> <span class="k">return</span> <span class="p">[</span><span class="n">expression</span><span class="o">.</span><span class="n">sql</span><span class="p">(</span><span class="n">dialect</span><span class="o">=</span><span class="n">dialect</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="k">for</span> <span class="n">expression</span> <span class="ow">in</span> <span class="n">output_expressions</span><span class="p">]</span>
+</span><span id="DataFrame.sql-310"><a href="#DataFrame.sql-310"><span class="linenos">310</span></a> <span class="n">select_expression</span> <span class="o">=</span> <span class="n">select_expression</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span>
+</span><span id="DataFrame.sql-311"><a href="#DataFrame.sql-311"><span class="linenos">311</span></a> <span class="n">replace_id_value</span><span class="p">,</span> <span class="n">replacement_mapping</span>
+</span><span id="DataFrame.sql-312"><a href="#DataFrame.sql-312"><span class="linenos">312</span></a> <span class="p">)</span><span class="o">.</span><span class="n">assert_is</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Select</span><span class="p">)</span>
+</span><span id="DataFrame.sql-313"><a href="#DataFrame.sql-313"><span class="linenos">313</span></a> <span class="k">if</span> <span class="n">optimize</span><span class="p">:</span>
+</span><span id="DataFrame.sql-314"><a href="#DataFrame.sql-314"><span class="linenos">314</span></a> <span class="n">select_expression</span> <span class="o">=</span> <span class="n">t</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span>
+</span><span id="DataFrame.sql-315"><a href="#DataFrame.sql-315"><span class="linenos">315</span></a> <span class="n">exp</span><span class="o">.</span><span class="n">Select</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">_optimize</span><span class="p">(</span><span class="n">select_expression</span><span class="p">,</span> <span class="n">dialect</span><span class="o">=</span><span class="n">dialect</span><span class="p">)</span>
+</span><span id="DataFrame.sql-316"><a href="#DataFrame.sql-316"><span class="linenos">316</span></a> <span class="p">)</span>
+</span><span id="DataFrame.sql-317"><a href="#DataFrame.sql-317"><span class="linenos">317</span></a>
+</span><span id="DataFrame.sql-318"><a href="#DataFrame.sql-318"><span class="linenos">318</span></a> <span class="n">select_expression</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">_replace_cte_names_with_hashes</span><span class="p">(</span><span class="n">select_expression</span><span class="p">)</span>
+</span><span id="DataFrame.sql-319"><a href="#DataFrame.sql-319"><span class="linenos">319</span></a>
+</span><span id="DataFrame.sql-320"><a href="#DataFrame.sql-320"><span class="linenos">320</span></a> <span class="n">expression</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="n">exp</span><span class="o">.</span><span class="n">Select</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Cache</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Drop</span><span class="p">]</span>
+</span><span id="DataFrame.sql-321"><a href="#DataFrame.sql-321"><span class="linenos">321</span></a> <span class="k">if</span> <span class="n">expression_type</span> <span class="o">==</span> <span class="n">exp</span><span class="o">.</span><span class="n">Cache</span><span class="p">:</span>
+</span><span id="DataFrame.sql-322"><a href="#DataFrame.sql-322"><span class="linenos">322</span></a> <span class="n">cache_table_name</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">_create_hash_from_expression</span><span class="p">(</span><span class="n">select_expression</span><span class="p">)</span>
+</span><span id="DataFrame.sql-323"><a href="#DataFrame.sql-323"><span class="linenos">323</span></a> <span class="n">cache_table</span> <span class="o">=</span> <span class="n">exp</span><span class="o">.</span><span class="n">to_table</span><span class="p">(</span><span class="n">cache_table_name</span><span class="p">)</span>
+</span><span id="DataFrame.sql-324"><a href="#DataFrame.sql-324"><span class="linenos">324</span></a> <span class="n">original_alias_name</span> <span class="o">=</span> <span class="n">select_expression</span><span class="o">.</span><span class="n">args</span><span class="p">[</span><span class="s2">&quot;cte_alias_name&quot;</span><span class="p">]</span>
+</span><span id="DataFrame.sql-325"><a href="#DataFrame.sql-325"><span class="linenos">325</span></a>
+</span><span id="DataFrame.sql-326"><a href="#DataFrame.sql-326"><span class="linenos">326</span></a> <span class="n">replacement_mapping</span><span class="p">[</span><span class="n">exp</span><span class="o">.</span><span class="n">to_identifier</span><span class="p">(</span><span class="n">original_alias_name</span><span class="p">)]</span> <span class="o">=</span> <span class="n">exp</span><span class="o">.</span><span class="n">to_identifier</span><span class="p">(</span> <span class="c1"># type: ignore</span>
+</span><span id="DataFrame.sql-327"><a href="#DataFrame.sql-327"><span class="linenos">327</span></a> <span class="n">cache_table_name</span>
+</span><span id="DataFrame.sql-328"><a href="#DataFrame.sql-328"><span class="linenos">328</span></a> <span class="p">)</span>
+</span><span id="DataFrame.sql-329"><a href="#DataFrame.sql-329"><span class="linenos">329</span></a> <span class="n">sqlglot</span><span class="o">.</span><span class="n">schema</span><span class="o">.</span><span class="n">add_table</span><span class="p">(</span>
+</span><span id="DataFrame.sql-330"><a href="#DataFrame.sql-330"><span class="linenos">330</span></a> <span class="n">cache_table_name</span><span class="p">,</span>
+</span><span id="DataFrame.sql-331"><a href="#DataFrame.sql-331"><span class="linenos">331</span></a> <span class="p">{</span>
+</span><span id="DataFrame.sql-332"><a href="#DataFrame.sql-332"><span class="linenos">332</span></a> <span class="n">expression</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">:</span> <span class="n">expression</span><span class="o">.</span><span class="n">type</span><span class="o">.</span><span class="n">sql</span><span class="p">(</span><span class="n">dialect</span><span class="o">=</span><span class="n">dialect</span><span class="p">)</span>
+</span><span id="DataFrame.sql-333"><a href="#DataFrame.sql-333"><span class="linenos">333</span></a> <span class="k">for</span> <span class="n">expression</span> <span class="ow">in</span> <span class="n">select_expression</span><span class="o">.</span><span class="n">expressions</span>
+</span><span id="DataFrame.sql-334"><a href="#DataFrame.sql-334"><span class="linenos">334</span></a> <span class="p">},</span>
+</span><span id="DataFrame.sql-335"><a href="#DataFrame.sql-335"><span class="linenos">335</span></a> <span class="n">dialect</span><span class="o">=</span><span class="n">dialect</span><span class="p">,</span>
+</span><span id="DataFrame.sql-336"><a href="#DataFrame.sql-336"><span class="linenos">336</span></a> <span class="p">)</span>
+</span><span id="DataFrame.sql-337"><a href="#DataFrame.sql-337"><span class="linenos">337</span></a>
+</span><span id="DataFrame.sql-338"><a href="#DataFrame.sql-338"><span class="linenos">338</span></a> <span class="n">cache_storage_level</span> <span class="o">=</span> <span class="n">select_expression</span><span class="o">.</span><span class="n">args</span><span class="p">[</span><span class="s2">&quot;cache_storage_level&quot;</span><span class="p">]</span>
+</span><span id="DataFrame.sql-339"><a href="#DataFrame.sql-339"><span class="linenos">339</span></a> <span class="n">options</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame.sql-340"><a href="#DataFrame.sql-340"><span class="linenos">340</span></a> <span class="n">exp</span><span class="o">.</span><span class="n">Literal</span><span class="o">.</span><span class="n">string</span><span class="p">(</span><span class="s2">&quot;storageLevel&quot;</span><span class="p">),</span>
+</span><span id="DataFrame.sql-341"><a href="#DataFrame.sql-341"><span class="linenos">341</span></a> <span class="n">exp</span><span class="o">.</span><span class="n">Literal</span><span class="o">.</span><span class="n">string</span><span class="p">(</span><span class="n">cache_storage_level</span><span class="p">),</span>
+</span><span id="DataFrame.sql-342"><a href="#DataFrame.sql-342"><span class="linenos">342</span></a> <span class="p">]</span>
+</span><span id="DataFrame.sql-343"><a href="#DataFrame.sql-343"><span class="linenos">343</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="n">exp</span><span class="o">.</span><span class="n">Cache</span><span class="p">(</span>
+</span><span id="DataFrame.sql-344"><a href="#DataFrame.sql-344"><span class="linenos">344</span></a> <span class="n">this</span><span class="o">=</span><span class="n">cache_table</span><span class="p">,</span> <span class="n">expression</span><span class="o">=</span><span class="n">select_expression</span><span class="p">,</span> <span class="n">lazy</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">options</span><span class="o">=</span><span class="n">options</span>
+</span><span id="DataFrame.sql-345"><a href="#DataFrame.sql-345"><span class="linenos">345</span></a> <span class="p">)</span>
+</span><span id="DataFrame.sql-346"><a href="#DataFrame.sql-346"><span class="linenos">346</span></a>
+</span><span id="DataFrame.sql-347"><a href="#DataFrame.sql-347"><span class="linenos">347</span></a> <span class="c1"># We will drop the &quot;view&quot; if it exists before running the cache table</span>
+</span><span id="DataFrame.sql-348"><a href="#DataFrame.sql-348"><span class="linenos">348</span></a> <span class="n">output_expressions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Drop</span><span class="p">(</span><span class="n">this</span><span class="o">=</span><span class="n">cache_table</span><span class="p">,</span> <span class="n">exists</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">kind</span><span class="o">=</span><span class="s2">&quot;VIEW&quot;</span><span class="p">))</span>
+</span><span id="DataFrame.sql-349"><a href="#DataFrame.sql-349"><span class="linenos">349</span></a> <span class="k">elif</span> <span class="n">expression_type</span> <span class="o">==</span> <span class="n">exp</span><span class="o">.</span><span class="n">Create</span><span class="p">:</span>
+</span><span id="DataFrame.sql-350"><a href="#DataFrame.sql-350"><span class="linenos">350</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">output_expression_container</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
+</span><span id="DataFrame.sql-351"><a href="#DataFrame.sql-351"><span class="linenos">351</span></a> <span class="n">expression</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">&quot;expression&quot;</span><span class="p">,</span> <span class="n">select_expression</span><span class="p">)</span>
+</span><span id="DataFrame.sql-352"><a href="#DataFrame.sql-352"><span class="linenos">352</span></a> <span class="k">elif</span> <span class="n">expression_type</span> <span class="o">==</span> <span class="n">exp</span><span class="o">.</span><span class="n">Insert</span><span class="p">:</span>
+</span><span id="DataFrame.sql-353"><a href="#DataFrame.sql-353"><span class="linenos">353</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">output_expression_container</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
+</span><span id="DataFrame.sql-354"><a href="#DataFrame.sql-354"><span class="linenos">354</span></a> <span class="n">select_without_ctes</span> <span class="o">=</span> <span class="n">select_expression</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
+</span><span id="DataFrame.sql-355"><a href="#DataFrame.sql-355"><span class="linenos">355</span></a> <span class="n">select_without_ctes</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">&quot;with&quot;</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
+</span><span id="DataFrame.sql-356"><a href="#DataFrame.sql-356"><span class="linenos">356</span></a> <span class="n">expression</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">&quot;expression&quot;</span><span class="p">,</span> <span class="n">select_without_ctes</span><span class="p">)</span>
+</span><span id="DataFrame.sql-357"><a href="#DataFrame.sql-357"><span class="linenos">357</span></a>
+</span><span id="DataFrame.sql-358"><a href="#DataFrame.sql-358"><span class="linenos">358</span></a> <span class="k">if</span> <span class="n">select_expression</span><span class="o">.</span><span class="n">ctes</span><span class="p">:</span>
+</span><span id="DataFrame.sql-359"><a href="#DataFrame.sql-359"><span class="linenos">359</span></a> <span class="n">expression</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">&quot;with&quot;</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">With</span><span class="p">(</span><span class="n">expressions</span><span class="o">=</span><span class="n">select_expression</span><span class="o">.</span><span class="n">ctes</span><span class="p">))</span>
+</span><span id="DataFrame.sql-360"><a href="#DataFrame.sql-360"><span class="linenos">360</span></a> <span class="k">elif</span> <span class="n">expression_type</span> <span class="o">==</span> <span class="n">exp</span><span class="o">.</span><span class="n">Select</span><span class="p">:</span>
+</span><span id="DataFrame.sql-361"><a href="#DataFrame.sql-361"><span class="linenos">361</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="n">select_expression</span>
+</span><span id="DataFrame.sql-362"><a href="#DataFrame.sql-362"><span class="linenos">362</span></a> <span class="k">else</span><span class="p">:</span>
+</span><span id="DataFrame.sql-363"><a href="#DataFrame.sql-363"><span class="linenos">363</span></a> <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Invalid expression type: </span><span class="si">{</span><span class="n">expression_type</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
+</span><span id="DataFrame.sql-364"><a href="#DataFrame.sql-364"><span class="linenos">364</span></a>
+</span><span id="DataFrame.sql-365"><a href="#DataFrame.sql-365"><span class="linenos">365</span></a> <span class="n">output_expressions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">expression</span><span class="p">)</span>
+</span><span id="DataFrame.sql-366"><a href="#DataFrame.sql-366"><span class="linenos">366</span></a>
+</span><span id="DataFrame.sql-367"><a href="#DataFrame.sql-367"><span class="linenos">367</span></a> <span class="k">return</span> <span class="p">[</span><span class="n">expression</span><span class="o">.</span><span class="n">sql</span><span class="p">(</span><span class="n">dialect</span><span class="o">=</span><span class="n">dialect</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="k">for</span> <span class="n">expression</span> <span class="ow">in</span> <span class="n">output_expressions</span><span class="p">]</span>
</span></pre></div>
@@ -2177,8 +2188,8 @@
</div>
<a class="headerlink" href="#DataFrame.copy"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.copy-368"><a href="#DataFrame.copy-368"><span class="linenos">368</span></a> <span class="k">def</span> <span class="nf">copy</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame.copy-369"><a href="#DataFrame.copy-369"><span class="linenos">369</span></a> <span class="k">return</span> <span class="n">DataFrame</span><span class="p">(</span><span class="o">**</span><span class="n">object_to_dict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">))</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.copy-369"><a href="#DataFrame.copy-369"><span class="linenos">369</span></a> <span class="k">def</span> <span class="nf">copy</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame.copy-370"><a href="#DataFrame.copy-370"><span class="linenos">370</span></a> <span class="k">return</span> <span class="n">DataFrame</span><span class="p">(</span><span class="o">**</span><span class="n">object_to_dict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">))</span>
</span></pre></div>
@@ -2197,43 +2208,43 @@
</div>
<a class="headerlink" href="#DataFrame.select"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.select-371"><a href="#DataFrame.select-371"><span class="linenos">371</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">SELECT</span><span class="p">)</span>
-</span><span id="DataFrame.select-372"><a href="#DataFrame.select-372"><span class="linenos">372</span></a> <span class="k">def</span> <span class="nf">select</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">cols</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame.select-373"><a href="#DataFrame.select-373"><span class="linenos">373</span></a> <span class="n">cols</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">cols</span><span class="p">)</span>
-</span><span id="DataFrame.select-374"><a href="#DataFrame.select-374"><span class="linenos">374</span></a> <span class="n">kwargs</span><span class="p">[</span><span class="s2">&quot;append&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">kwargs</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;append&quot;</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span>
-</span><span id="DataFrame.select-375"><a href="#DataFrame.select-375"><span class="linenos">375</span></a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">args</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;joins&quot;</span><span class="p">):</span>
-</span><span id="DataFrame.select-376"><a href="#DataFrame.select-376"><span class="linenos">376</span></a> <span class="n">ambiguous_cols</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame.select-377"><a href="#DataFrame.select-377"><span class="linenos">377</span></a> <span class="n">col</span>
-</span><span id="DataFrame.select-378"><a href="#DataFrame.select-378"><span class="linenos">378</span></a> <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="n">cols</span>
-</span><span id="DataFrame.select-379"><a href="#DataFrame.select-379"><span class="linenos">379</span></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">col</span><span class="o">.</span><span class="n">column_expression</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Column</span><span class="p">)</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">col</span><span class="o">.</span><span class="n">column_expression</span><span class="o">.</span><span class="n">table</span>
-</span><span id="DataFrame.select-380"><a href="#DataFrame.select-380"><span class="linenos">380</span></a> <span class="p">]</span>
-</span><span id="DataFrame.select-381"><a href="#DataFrame.select-381"><span class="linenos">381</span></a> <span class="k">if</span> <span class="n">ambiguous_cols</span><span class="p">:</span>
-</span><span id="DataFrame.select-382"><a href="#DataFrame.select-382"><span class="linenos">382</span></a> <span class="n">join_table_identifiers</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame.select-383"><a href="#DataFrame.select-383"><span class="linenos">383</span></a> <span class="n">x</span><span class="o">.</span><span class="n">this</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">get_tables_from_expression_with_join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="p">)</span>
-</span><span id="DataFrame.select-384"><a href="#DataFrame.select-384"><span class="linenos">384</span></a> <span class="p">]</span>
-</span><span id="DataFrame.select-385"><a href="#DataFrame.select-385"><span class="linenos">385</span></a> <span class="n">cte_names_in_join</span> <span class="o">=</span> <span class="p">[</span><span class="n">x</span><span class="o">.</span><span class="n">this</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">join_table_identifiers</span><span class="p">]</span>
-</span><span id="DataFrame.select-386"><a href="#DataFrame.select-386"><span class="linenos">386</span></a> <span class="c1"># If we have columns that resolve to multiple CTE expressions then we want to use each CTE left-to-right</span>
-</span><span id="DataFrame.select-387"><a href="#DataFrame.select-387"><span class="linenos">387</span></a> <span class="c1"># and therefore we allow multiple columns with the same name in the result. This matches the behavior</span>
-</span><span id="DataFrame.select-388"><a href="#DataFrame.select-388"><span class="linenos">388</span></a> <span class="c1"># of Spark.</span>
-</span><span id="DataFrame.select-389"><a href="#DataFrame.select-389"><span class="linenos">389</span></a> <span class="n">resolved_column_position</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Dict</span><span class="p">[</span><span class="n">Column</span><span class="p">,</span> <span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span><span class="n">col</span><span class="p">:</span> <span class="o">-</span><span class="mi">1</span> <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="n">ambiguous_cols</span><span class="p">}</span>
-</span><span id="DataFrame.select-390"><a href="#DataFrame.select-390"><span class="linenos">390</span></a> <span class="k">for</span> <span class="n">ambiguous_col</span> <span class="ow">in</span> <span class="n">ambiguous_cols</span><span class="p">:</span>
-</span><span id="DataFrame.select-391"><a href="#DataFrame.select-391"><span class="linenos">391</span></a> <span class="n">ctes_with_column</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame.select-392"><a href="#DataFrame.select-392"><span class="linenos">392</span></a> <span class="n">cte</span>
-</span><span id="DataFrame.select-393"><a href="#DataFrame.select-393"><span class="linenos">393</span></a> <span class="k">for</span> <span class="n">cte</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">ctes</span>
-</span><span id="DataFrame.select-394"><a href="#DataFrame.select-394"><span class="linenos">394</span></a> <span class="k">if</span> <span class="n">cte</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="ow">in</span> <span class="n">cte_names_in_join</span>
-</span><span id="DataFrame.select-395"><a href="#DataFrame.select-395"><span class="linenos">395</span></a> <span class="ow">and</span> <span class="n">ambiguous_col</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="ow">in</span> <span class="n">cte</span><span class="o">.</span><span class="n">this</span><span class="o">.</span><span class="n">named_selects</span>
-</span><span id="DataFrame.select-396"><a href="#DataFrame.select-396"><span class="linenos">396</span></a> <span class="p">]</span>
-</span><span id="DataFrame.select-397"><a href="#DataFrame.select-397"><span class="linenos">397</span></a> <span class="c1"># Check if there is a CTE with this column that we haven&#39;t used before. If so, use it. Otherwise,</span>
-</span><span id="DataFrame.select-398"><a href="#DataFrame.select-398"><span class="linenos">398</span></a> <span class="c1"># use the same CTE we used before</span>
-</span><span id="DataFrame.select-399"><a href="#DataFrame.select-399"><span class="linenos">399</span></a> <span class="n">cte</span> <span class="o">=</span> <span class="n">seq_get</span><span class="p">(</span><span class="n">ctes_with_column</span><span class="p">,</span> <span class="n">resolved_column_position</span><span class="p">[</span><span class="n">ambiguous_col</span><span class="p">]</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span>
-</span><span id="DataFrame.select-400"><a href="#DataFrame.select-400"><span class="linenos">400</span></a> <span class="k">if</span> <span class="n">cte</span><span class="p">:</span>
-</span><span id="DataFrame.select-401"><a href="#DataFrame.select-401"><span class="linenos">401</span></a> <span class="n">resolved_column_position</span><span class="p">[</span><span class="n">ambiguous_col</span><span class="p">]</span> <span class="o">+=</span> <span class="mi">1</span>
-</span><span id="DataFrame.select-402"><a href="#DataFrame.select-402"><span class="linenos">402</span></a> <span class="k">else</span><span class="p">:</span>
-</span><span id="DataFrame.select-403"><a href="#DataFrame.select-403"><span class="linenos">403</span></a> <span class="n">cte</span> <span class="o">=</span> <span class="n">ctes_with_column</span><span class="p">[</span><span class="n">resolved_column_position</span><span class="p">[</span><span class="n">ambiguous_col</span><span class="p">]]</span>
-</span><span id="DataFrame.select-404"><a href="#DataFrame.select-404"><span class="linenos">404</span></a> <span class="n">ambiguous_col</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">&quot;table&quot;</span><span class="p">,</span> <span class="n">cte</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">)</span>
-</span><span id="DataFrame.select-405"><a href="#DataFrame.select-405"><span class="linenos">405</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span>
-</span><span id="DataFrame.select-406"><a href="#DataFrame.select-406"><span class="linenos">406</span></a> <span class="n">expression</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="p">[</span><span class="n">x</span><span class="o">.</span><span class="n">expression</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">cols</span><span class="p">],</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">),</span> <span class="o">**</span><span class="n">kwargs</span>
-</span><span id="DataFrame.select-407"><a href="#DataFrame.select-407"><span class="linenos">407</span></a> <span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.select-372"><a href="#DataFrame.select-372"><span class="linenos">372</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">SELECT</span><span class="p">)</span>
+</span><span id="DataFrame.select-373"><a href="#DataFrame.select-373"><span class="linenos">373</span></a> <span class="k">def</span> <span class="nf">select</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">cols</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame.select-374"><a href="#DataFrame.select-374"><span class="linenos">374</span></a> <span class="n">cols</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">cols</span><span class="p">)</span>
+</span><span id="DataFrame.select-375"><a href="#DataFrame.select-375"><span class="linenos">375</span></a> <span class="n">kwargs</span><span class="p">[</span><span class="s2">&quot;append&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">kwargs</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;append&quot;</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span>
+</span><span id="DataFrame.select-376"><a href="#DataFrame.select-376"><span class="linenos">376</span></a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">args</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;joins&quot;</span><span class="p">):</span>
+</span><span id="DataFrame.select-377"><a href="#DataFrame.select-377"><span class="linenos">377</span></a> <span class="n">ambiguous_cols</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame.select-378"><a href="#DataFrame.select-378"><span class="linenos">378</span></a> <span class="n">col</span>
+</span><span id="DataFrame.select-379"><a href="#DataFrame.select-379"><span class="linenos">379</span></a> <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="n">cols</span>
+</span><span id="DataFrame.select-380"><a href="#DataFrame.select-380"><span class="linenos">380</span></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">col</span><span class="o">.</span><span class="n">column_expression</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Column</span><span class="p">)</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">col</span><span class="o">.</span><span class="n">column_expression</span><span class="o">.</span><span class="n">table</span>
+</span><span id="DataFrame.select-381"><a href="#DataFrame.select-381"><span class="linenos">381</span></a> <span class="p">]</span>
+</span><span id="DataFrame.select-382"><a href="#DataFrame.select-382"><span class="linenos">382</span></a> <span class="k">if</span> <span class="n">ambiguous_cols</span><span class="p">:</span>
+</span><span id="DataFrame.select-383"><a href="#DataFrame.select-383"><span class="linenos">383</span></a> <span class="n">join_table_identifiers</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame.select-384"><a href="#DataFrame.select-384"><span class="linenos">384</span></a> <span class="n">x</span><span class="o">.</span><span class="n">this</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">get_tables_from_expression_with_join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="p">)</span>
+</span><span id="DataFrame.select-385"><a href="#DataFrame.select-385"><span class="linenos">385</span></a> <span class="p">]</span>
+</span><span id="DataFrame.select-386"><a href="#DataFrame.select-386"><span class="linenos">386</span></a> <span class="n">cte_names_in_join</span> <span class="o">=</span> <span class="p">[</span><span class="n">x</span><span class="o">.</span><span class="n">this</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">join_table_identifiers</span><span class="p">]</span>
+</span><span id="DataFrame.select-387"><a href="#DataFrame.select-387"><span class="linenos">387</span></a> <span class="c1"># If we have columns that resolve to multiple CTE expressions then we want to use each CTE left-to-right</span>
+</span><span id="DataFrame.select-388"><a href="#DataFrame.select-388"><span class="linenos">388</span></a> <span class="c1"># and therefore we allow multiple columns with the same name in the result. This matches the behavior</span>
+</span><span id="DataFrame.select-389"><a href="#DataFrame.select-389"><span class="linenos">389</span></a> <span class="c1"># of Spark.</span>
+</span><span id="DataFrame.select-390"><a href="#DataFrame.select-390"><span class="linenos">390</span></a> <span class="n">resolved_column_position</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Dict</span><span class="p">[</span><span class="n">Column</span><span class="p">,</span> <span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span><span class="n">col</span><span class="p">:</span> <span class="o">-</span><span class="mi">1</span> <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="n">ambiguous_cols</span><span class="p">}</span>
+</span><span id="DataFrame.select-391"><a href="#DataFrame.select-391"><span class="linenos">391</span></a> <span class="k">for</span> <span class="n">ambiguous_col</span> <span class="ow">in</span> <span class="n">ambiguous_cols</span><span class="p">:</span>
+</span><span id="DataFrame.select-392"><a href="#DataFrame.select-392"><span class="linenos">392</span></a> <span class="n">ctes_with_column</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame.select-393"><a href="#DataFrame.select-393"><span class="linenos">393</span></a> <span class="n">cte</span>
+</span><span id="DataFrame.select-394"><a href="#DataFrame.select-394"><span class="linenos">394</span></a> <span class="k">for</span> <span class="n">cte</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">ctes</span>
+</span><span id="DataFrame.select-395"><a href="#DataFrame.select-395"><span class="linenos">395</span></a> <span class="k">if</span> <span class="n">cte</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="ow">in</span> <span class="n">cte_names_in_join</span>
+</span><span id="DataFrame.select-396"><a href="#DataFrame.select-396"><span class="linenos">396</span></a> <span class="ow">and</span> <span class="n">ambiguous_col</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="ow">in</span> <span class="n">cte</span><span class="o">.</span><span class="n">this</span><span class="o">.</span><span class="n">named_selects</span>
+</span><span id="DataFrame.select-397"><a href="#DataFrame.select-397"><span class="linenos">397</span></a> <span class="p">]</span>
+</span><span id="DataFrame.select-398"><a href="#DataFrame.select-398"><span class="linenos">398</span></a> <span class="c1"># Check if there is a CTE with this column that we haven&#39;t used before. If so, use it. Otherwise,</span>
+</span><span id="DataFrame.select-399"><a href="#DataFrame.select-399"><span class="linenos">399</span></a> <span class="c1"># use the same CTE we used before</span>
+</span><span id="DataFrame.select-400"><a href="#DataFrame.select-400"><span class="linenos">400</span></a> <span class="n">cte</span> <span class="o">=</span> <span class="n">seq_get</span><span class="p">(</span><span class="n">ctes_with_column</span><span class="p">,</span> <span class="n">resolved_column_position</span><span class="p">[</span><span class="n">ambiguous_col</span><span class="p">]</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span>
+</span><span id="DataFrame.select-401"><a href="#DataFrame.select-401"><span class="linenos">401</span></a> <span class="k">if</span> <span class="n">cte</span><span class="p">:</span>
+</span><span id="DataFrame.select-402"><a href="#DataFrame.select-402"><span class="linenos">402</span></a> <span class="n">resolved_column_position</span><span class="p">[</span><span class="n">ambiguous_col</span><span class="p">]</span> <span class="o">+=</span> <span class="mi">1</span>
+</span><span id="DataFrame.select-403"><a href="#DataFrame.select-403"><span class="linenos">403</span></a> <span class="k">else</span><span class="p">:</span>
+</span><span id="DataFrame.select-404"><a href="#DataFrame.select-404"><span class="linenos">404</span></a> <span class="n">cte</span> <span class="o">=</span> <span class="n">ctes_with_column</span><span class="p">[</span><span class="n">resolved_column_position</span><span class="p">[</span><span class="n">ambiguous_col</span><span class="p">]]</span>
+</span><span id="DataFrame.select-405"><a href="#DataFrame.select-405"><span class="linenos">405</span></a> <span class="n">ambiguous_col</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">&quot;table&quot;</span><span class="p">,</span> <span class="n">cte</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">)</span>
+</span><span id="DataFrame.select-406"><a href="#DataFrame.select-406"><span class="linenos">406</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span>
+</span><span id="DataFrame.select-407"><a href="#DataFrame.select-407"><span class="linenos">407</span></a> <span class="n">expression</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="p">[</span><span class="n">x</span><span class="o">.</span><span class="n">expression</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">cols</span><span class="p">],</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">),</span> <span class="o">**</span><span class="n">kwargs</span>
+</span><span id="DataFrame.select-408"><a href="#DataFrame.select-408"><span class="linenos">408</span></a> <span class="p">)</span>
</span></pre></div>
@@ -2252,16 +2263,16 @@
</div>
<a class="headerlink" href="#DataFrame.alias"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.alias-409"><a href="#DataFrame.alias-409"><span class="linenos">409</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">NO_OP</span><span class="p">)</span>
-</span><span id="DataFrame.alias-410"><a href="#DataFrame.alias-410"><span class="linenos">410</span></a> <span class="k">def</span> <span class="nf">alias</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame.alias-411"><a href="#DataFrame.alias-411"><span class="linenos">411</span></a> <span class="n">new_sequence_id</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">_random_sequence_id</span>
-</span><span id="DataFrame.alias-412"><a href="#DataFrame.alias-412"><span class="linenos">412</span></a> <span class="n">df</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
-</span><span id="DataFrame.alias-413"><a href="#DataFrame.alias-413"><span class="linenos">413</span></a> <span class="k">for</span> <span class="n">join_hint</span> <span class="ow">in</span> <span class="n">df</span><span class="o">.</span><span class="n">pending_join_hints</span><span class="p">:</span>
-</span><span id="DataFrame.alias-414"><a href="#DataFrame.alias-414"><span class="linenos">414</span></a> <span class="k">for</span> <span class="n">expression</span> <span class="ow">in</span> <span class="n">join_hint</span><span class="o">.</span><span class="n">expressions</span><span class="p">:</span>
-</span><span id="DataFrame.alias-415"><a href="#DataFrame.alias-415"><span class="linenos">415</span></a> <span class="k">if</span> <span class="n">expression</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">sequence_id</span><span class="p">:</span>
-</span><span id="DataFrame.alias-416"><a href="#DataFrame.alias-416"><span class="linenos">416</span></a> <span class="n">expression</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">&quot;this&quot;</span><span class="p">,</span> <span class="n">Column</span><span class="o">.</span><span class="n">ensure_col</span><span class="p">(</span><span class="n">new_sequence_id</span><span class="p">)</span><span class="o">.</span><span class="n">expression</span><span class="p">)</span>
-</span><span id="DataFrame.alias-417"><a href="#DataFrame.alias-417"><span class="linenos">417</span></a> <span class="n">df</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">_add_alias_to_mapping</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">new_sequence_id</span><span class="p">)</span>
-</span><span id="DataFrame.alias-418"><a href="#DataFrame.alias-418"><span class="linenos">418</span></a> <span class="k">return</span> <span class="n">df</span><span class="o">.</span><span class="n">_convert_leaf_to_cte</span><span class="p">(</span><span class="n">sequence_id</span><span class="o">=</span><span class="n">new_sequence_id</span><span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.alias-410"><a href="#DataFrame.alias-410"><span class="linenos">410</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">NO_OP</span><span class="p">)</span>
+</span><span id="DataFrame.alias-411"><a href="#DataFrame.alias-411"><span class="linenos">411</span></a> <span class="k">def</span> <span class="nf">alias</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame.alias-412"><a href="#DataFrame.alias-412"><span class="linenos">412</span></a> <span class="n">new_sequence_id</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">_random_sequence_id</span>
+</span><span id="DataFrame.alias-413"><a href="#DataFrame.alias-413"><span class="linenos">413</span></a> <span class="n">df</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
+</span><span id="DataFrame.alias-414"><a href="#DataFrame.alias-414"><span class="linenos">414</span></a> <span class="k">for</span> <span class="n">join_hint</span> <span class="ow">in</span> <span class="n">df</span><span class="o">.</span><span class="n">pending_join_hints</span><span class="p">:</span>
+</span><span id="DataFrame.alias-415"><a href="#DataFrame.alias-415"><span class="linenos">415</span></a> <span class="k">for</span> <span class="n">expression</span> <span class="ow">in</span> <span class="n">join_hint</span><span class="o">.</span><span class="n">expressions</span><span class="p">:</span>
+</span><span id="DataFrame.alias-416"><a href="#DataFrame.alias-416"><span class="linenos">416</span></a> <span class="k">if</span> <span class="n">expression</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">sequence_id</span><span class="p">:</span>
+</span><span id="DataFrame.alias-417"><a href="#DataFrame.alias-417"><span class="linenos">417</span></a> <span class="n">expression</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">&quot;this&quot;</span><span class="p">,</span> <span class="n">Column</span><span class="o">.</span><span class="n">ensure_col</span><span class="p">(</span><span class="n">new_sequence_id</span><span class="p">)</span><span class="o">.</span><span class="n">expression</span><span class="p">)</span>
+</span><span id="DataFrame.alias-418"><a href="#DataFrame.alias-418"><span class="linenos">418</span></a> <span class="n">df</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">_add_alias_to_mapping</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">new_sequence_id</span><span class="p">)</span>
+</span><span id="DataFrame.alias-419"><a href="#DataFrame.alias-419"><span class="linenos">419</span></a> <span class="k">return</span> <span class="n">df</span><span class="o">.</span><span class="n">_convert_leaf_to_cte</span><span class="p">(</span><span class="n">sequence_id</span><span class="o">=</span><span class="n">new_sequence_id</span><span class="p">)</span>
</span></pre></div>
@@ -2280,10 +2291,10 @@
</div>
<a class="headerlink" href="#DataFrame.where"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.where-420"><a href="#DataFrame.where-420"><span class="linenos">420</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">WHERE</span><span class="p">)</span>
-</span><span id="DataFrame.where-421"><a href="#DataFrame.where-421"><span class="linenos">421</span></a> <span class="k">def</span> <span class="nf">where</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">column</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="n">Column</span><span class="p">,</span> <span class="nb">bool</span><span class="p">],</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame.where-422"><a href="#DataFrame.where-422"><span class="linenos">422</span></a> <span class="n">col</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_col</span><span class="p">(</span><span class="n">column</span><span class="p">)</span>
-</span><span id="DataFrame.where-423"><a href="#DataFrame.where-423"><span class="linenos">423</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">where</span><span class="p">(</span><span class="n">col</span><span class="o">.</span><span class="n">expression</span><span class="p">))</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.where-421"><a href="#DataFrame.where-421"><span class="linenos">421</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">WHERE</span><span class="p">)</span>
+</span><span id="DataFrame.where-422"><a href="#DataFrame.where-422"><span class="linenos">422</span></a> <span class="k">def</span> <span class="nf">where</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">column</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="n">Column</span><span class="p">,</span> <span class="nb">bool</span><span class="p">],</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame.where-423"><a href="#DataFrame.where-423"><span class="linenos">423</span></a> <span class="n">col</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_col</span><span class="p">(</span><span class="n">column</span><span class="p">)</span>
+</span><span id="DataFrame.where-424"><a href="#DataFrame.where-424"><span class="linenos">424</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">where</span><span class="p">(</span><span class="n">col</span><span class="o">.</span><span class="n">expression</span><span class="p">))</span>
</span></pre></div>
@@ -2302,10 +2313,10 @@
</div>
<a class="headerlink" href="#DataFrame.filter"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.filter-420"><a href="#DataFrame.filter-420"><span class="linenos">420</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">WHERE</span><span class="p">)</span>
-</span><span id="DataFrame.filter-421"><a href="#DataFrame.filter-421"><span class="linenos">421</span></a> <span class="k">def</span> <span class="nf">where</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">column</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="n">Column</span><span class="p">,</span> <span class="nb">bool</span><span class="p">],</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame.filter-422"><a href="#DataFrame.filter-422"><span class="linenos">422</span></a> <span class="n">col</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_col</span><span class="p">(</span><span class="n">column</span><span class="p">)</span>
-</span><span id="DataFrame.filter-423"><a href="#DataFrame.filter-423"><span class="linenos">423</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">where</span><span class="p">(</span><span class="n">col</span><span class="o">.</span><span class="n">expression</span><span class="p">))</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.filter-421"><a href="#DataFrame.filter-421"><span class="linenos">421</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">WHERE</span><span class="p">)</span>
+</span><span id="DataFrame.filter-422"><a href="#DataFrame.filter-422"><span class="linenos">422</span></a> <span class="k">def</span> <span class="nf">where</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">column</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="n">Column</span><span class="p">,</span> <span class="nb">bool</span><span class="p">],</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame.filter-423"><a href="#DataFrame.filter-423"><span class="linenos">423</span></a> <span class="n">col</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_col</span><span class="p">(</span><span class="n">column</span><span class="p">)</span>
+</span><span id="DataFrame.filter-424"><a href="#DataFrame.filter-424"><span class="linenos">424</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">where</span><span class="p">(</span><span class="n">col</span><span class="o">.</span><span class="n">expression</span><span class="p">))</span>
</span></pre></div>
@@ -2324,10 +2335,10 @@
</div>
<a class="headerlink" href="#DataFrame.groupBy"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.groupBy-427"><a href="#DataFrame.groupBy-427"><span class="linenos">427</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">GROUP_BY</span><span class="p">)</span>
-</span><span id="DataFrame.groupBy-428"><a href="#DataFrame.groupBy-428"><span class="linenos">428</span></a> <span class="k">def</span> <span class="nf">groupBy</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">cols</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">GroupedData</span><span class="p">:</span>
-</span><span id="DataFrame.groupBy-429"><a href="#DataFrame.groupBy-429"><span class="linenos">429</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">cols</span><span class="p">)</span>
-</span><span id="DataFrame.groupBy-430"><a href="#DataFrame.groupBy-430"><span class="linenos">430</span></a> <span class="k">return</span> <span class="n">GroupedData</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">columns</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">last_op</span><span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.groupBy-428"><a href="#DataFrame.groupBy-428"><span class="linenos">428</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">GROUP_BY</span><span class="p">)</span>
+</span><span id="DataFrame.groupBy-429"><a href="#DataFrame.groupBy-429"><span class="linenos">429</span></a> <span class="k">def</span> <span class="nf">groupBy</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">cols</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">GroupedData</span><span class="p">:</span>
+</span><span id="DataFrame.groupBy-430"><a href="#DataFrame.groupBy-430"><span class="linenos">430</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">cols</span><span class="p">)</span>
+</span><span id="DataFrame.groupBy-431"><a href="#DataFrame.groupBy-431"><span class="linenos">431</span></a> <span class="k">return</span> <span class="n">GroupedData</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">columns</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">last_op</span><span class="p">)</span>
</span></pre></div>
@@ -2346,10 +2357,10 @@
</div>
<a class="headerlink" href="#DataFrame.agg"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.agg-432"><a href="#DataFrame.agg-432"><span class="linenos">432</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">SELECT</span><span class="p">)</span>
-</span><span id="DataFrame.agg-433"><a href="#DataFrame.agg-433"><span class="linenos">433</span></a> <span class="k">def</span> <span class="nf">agg</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">exprs</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame.agg-434"><a href="#DataFrame.agg-434"><span class="linenos">434</span></a> <span class="n">cols</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">exprs</span><span class="p">)</span>
-</span><span id="DataFrame.agg-435"><a href="#DataFrame.agg-435"><span class="linenos">435</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">groupBy</span><span class="p">()</span><span class="o">.</span><span class="n">agg</span><span class="p">(</span><span class="o">*</span><span class="n">cols</span><span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.agg-433"><a href="#DataFrame.agg-433"><span class="linenos">433</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">SELECT</span><span class="p">)</span>
+</span><span id="DataFrame.agg-434"><a href="#DataFrame.agg-434"><span class="linenos">434</span></a> <span class="k">def</span> <span class="nf">agg</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">exprs</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame.agg-435"><a href="#DataFrame.agg-435"><span class="linenos">435</span></a> <span class="n">cols</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">exprs</span><span class="p">)</span>
+</span><span id="DataFrame.agg-436"><a href="#DataFrame.agg-436"><span class="linenos">436</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">groupBy</span><span class="p">()</span><span class="o">.</span><span class="n">agg</span><span class="p">(</span><span class="o">*</span><span class="n">cols</span><span class="p">)</span>
</span></pre></div>
@@ -2368,99 +2379,99 @@
</div>
<a class="headerlink" href="#DataFrame.join"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.join-437"><a href="#DataFrame.join-437"><span class="linenos">437</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
-</span><span id="DataFrame.join-438"><a href="#DataFrame.join-438"><span class="linenos">438</span></a> <span class="k">def</span> <span class="nf">join</span><span class="p">(</span>
-</span><span id="DataFrame.join-439"><a href="#DataFrame.join-439"><span class="linenos">439</span></a> <span class="bp">self</span><span class="p">,</span>
-</span><span id="DataFrame.join-440"><a href="#DataFrame.join-440"><span class="linenos">440</span></a> <span class="n">other_df</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">,</span>
-</span><span id="DataFrame.join-441"><a href="#DataFrame.join-441"><span class="linenos">441</span></a> <span class="n">on</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">],</span> <span class="n">Column</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="n">Column</span><span class="p">]],</span>
-</span><span id="DataFrame.join-442"><a href="#DataFrame.join-442"><span class="linenos">442</span></a> <span class="n">how</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;inner&quot;</span><span class="p">,</span>
-</span><span id="DataFrame.join-443"><a href="#DataFrame.join-443"><span class="linenos">443</span></a> <span class="o">**</span><span class="n">kwargs</span><span class="p">,</span>
-</span><span id="DataFrame.join-444"><a href="#DataFrame.join-444"><span class="linenos">444</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame.join-445"><a href="#DataFrame.join-445"><span class="linenos">445</span></a> <span class="n">other_df</span> <span class="o">=</span> <span class="n">other_df</span><span class="o">.</span><span class="n">_convert_leaf_to_cte</span><span class="p">()</span>
-</span><span id="DataFrame.join-446"><a href="#DataFrame.join-446"><span class="linenos">446</span></a> <span class="n">join_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_list_of_columns</span><span class="p">(</span><span class="n">on</span><span class="p">)</span>
-</span><span id="DataFrame.join-447"><a href="#DataFrame.join-447"><span class="linenos">447</span></a> <span class="c1"># We will determine actual &quot;join on&quot; expression later so we don&#39;t provide it at first</span>
-</span><span id="DataFrame.join-448"><a href="#DataFrame.join-448"><span class="linenos">448</span></a> <span class="n">join_expression</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">join</span><span class="p">(</span>
-</span><span id="DataFrame.join-449"><a href="#DataFrame.join-449"><span class="linenos">449</span></a> <span class="n">other_df</span><span class="o">.</span><span class="n">latest_cte_name</span><span class="p">,</span> <span class="n">join_type</span><span class="o">=</span><span class="n">how</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;_&quot;</span><span class="p">,</span> <span class="s2">&quot; &quot;</span><span class="p">)</span>
-</span><span id="DataFrame.join-450"><a href="#DataFrame.join-450"><span class="linenos">450</span></a> <span class="p">)</span>
-</span><span id="DataFrame.join-451"><a href="#DataFrame.join-451"><span class="linenos">451</span></a> <span class="n">join_expression</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_add_ctes_to_expression</span><span class="p">(</span><span class="n">join_expression</span><span class="p">,</span> <span class="n">other_df</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">ctes</span><span class="p">)</span>
-</span><span id="DataFrame.join-452"><a href="#DataFrame.join-452"><span class="linenos">452</span></a> <span class="n">self_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_outer_select_columns</span><span class="p">(</span><span class="n">join_expression</span><span class="p">)</span>
-</span><span id="DataFrame.join-453"><a href="#DataFrame.join-453"><span class="linenos">453</span></a> <span class="n">other_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_outer_select_columns</span><span class="p">(</span><span class="n">other_df</span><span class="p">)</span>
-</span><span id="DataFrame.join-454"><a href="#DataFrame.join-454"><span class="linenos">454</span></a> <span class="c1"># Determines the join clause and select columns to be used passed on what type of columns were provided for</span>
-</span><span id="DataFrame.join-455"><a href="#DataFrame.join-455"><span class="linenos">455</span></a> <span class="c1"># the join. The columns returned changes based on how the on expression is provided.</span>
-</span><span id="DataFrame.join-456"><a href="#DataFrame.join-456"><span class="linenos">456</span></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">join_columns</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">expression</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Column</span><span class="p">):</span>
-</span><span id="DataFrame.join-457"><a href="#DataFrame.join-457"><span class="linenos">457</span></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
-</span><span id="DataFrame.join-458"><a href="#DataFrame.join-458"><span class="linenos">458</span></a><span class="sd"> Unique characteristics of join on column names only:</span>
-</span><span id="DataFrame.join-459"><a href="#DataFrame.join-459"><span class="linenos">459</span></a><span class="sd"> * The column names are put at the front of the select list</span>
-</span><span id="DataFrame.join-460"><a href="#DataFrame.join-460"><span class="linenos">460</span></a><span class="sd"> * The column names are deduplicated across the entire select list and only the column names (other dups are allowed)</span>
-</span><span id="DataFrame.join-461"><a href="#DataFrame.join-461"><span class="linenos">461</span></a><span class="sd"> &quot;&quot;&quot;</span>
-</span><span id="DataFrame.join-462"><a href="#DataFrame.join-462"><span class="linenos">462</span></a> <span class="n">table_names</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame.join-463"><a href="#DataFrame.join-463"><span class="linenos">463</span></a> <span class="n">table</span><span class="o">.</span><span class="n">alias_or_name</span>
-</span><span id="DataFrame.join-464"><a href="#DataFrame.join-464"><span class="linenos">464</span></a> <span class="k">for</span> <span class="n">table</span> <span class="ow">in</span> <span class="n">get_tables_from_expression_with_join</span><span class="p">(</span><span class="n">join_expression</span><span class="p">)</span>
-</span><span id="DataFrame.join-465"><a href="#DataFrame.join-465"><span class="linenos">465</span></a> <span class="p">]</span>
-</span><span id="DataFrame.join-466"><a href="#DataFrame.join-466"><span class="linenos">466</span></a> <span class="n">potential_ctes</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame.join-467"><a href="#DataFrame.join-467"><span class="linenos">467</span></a> <span class="n">cte</span>
-</span><span id="DataFrame.join-468"><a href="#DataFrame.join-468"><span class="linenos">468</span></a> <span class="k">for</span> <span class="n">cte</span> <span class="ow">in</span> <span class="n">join_expression</span><span class="o">.</span><span class="n">ctes</span>
-</span><span id="DataFrame.join-469"><a href="#DataFrame.join-469"><span class="linenos">469</span></a> <span class="k">if</span> <span class="n">cte</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="ow">in</span> <span class="n">table_names</span>
-</span><span id="DataFrame.join-470"><a href="#DataFrame.join-470"><span class="linenos">470</span></a> <span class="ow">and</span> <span class="n">cte</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="o">!=</span> <span class="n">other_df</span><span class="o">.</span><span class="n">latest_cte_name</span>
-</span><span id="DataFrame.join-471"><a href="#DataFrame.join-471"><span class="linenos">471</span></a> <span class="p">]</span>
-</span><span id="DataFrame.join-472"><a href="#DataFrame.join-472"><span class="linenos">472</span></a> <span class="c1"># Determine the table to reference for the left side of the join by checking each of the left side</span>
-</span><span id="DataFrame.join-473"><a href="#DataFrame.join-473"><span class="linenos">473</span></a> <span class="c1"># tables and see if they have the column being referenced.</span>
-</span><span id="DataFrame.join-474"><a href="#DataFrame.join-474"><span class="linenos">474</span></a> <span class="n">join_column_pairs</span> <span class="o">=</span> <span class="p">[]</span>
-</span><span id="DataFrame.join-475"><a href="#DataFrame.join-475"><span class="linenos">475</span></a> <span class="k">for</span> <span class="n">join_column</span> <span class="ow">in</span> <span class="n">join_columns</span><span class="p">:</span>
-</span><span id="DataFrame.join-476"><a href="#DataFrame.join-476"><span class="linenos">476</span></a> <span class="n">num_matching_ctes</span> <span class="o">=</span> <span class="mi">0</span>
-</span><span id="DataFrame.join-477"><a href="#DataFrame.join-477"><span class="linenos">477</span></a> <span class="k">for</span> <span class="n">cte</span> <span class="ow">in</span> <span class="n">potential_ctes</span><span class="p">:</span>
-</span><span id="DataFrame.join-478"><a href="#DataFrame.join-478"><span class="linenos">478</span></a> <span class="k">if</span> <span class="n">join_column</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="ow">in</span> <span class="n">cte</span><span class="o">.</span><span class="n">this</span><span class="o">.</span><span class="n">named_selects</span><span class="p">:</span>
-</span><span id="DataFrame.join-479"><a href="#DataFrame.join-479"><span class="linenos">479</span></a> <span class="n">left_column</span> <span class="o">=</span> <span class="n">join_column</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span><span class="o">.</span><span class="n">set_table_name</span><span class="p">(</span><span class="n">cte</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">)</span>
-</span><span id="DataFrame.join-480"><a href="#DataFrame.join-480"><span class="linenos">480</span></a> <span class="n">right_column</span> <span class="o">=</span> <span class="n">join_column</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span><span class="o">.</span><span class="n">set_table_name</span><span class="p">(</span><span class="n">other_df</span><span class="o">.</span><span class="n">latest_cte_name</span><span class="p">)</span>
-</span><span id="DataFrame.join-481"><a href="#DataFrame.join-481"><span class="linenos">481</span></a> <span class="n">join_column_pairs</span><span class="o">.</span><span class="n">append</span><span class="p">((</span><span class="n">left_column</span><span class="p">,</span> <span class="n">right_column</span><span class="p">))</span>
-</span><span id="DataFrame.join-482"><a href="#DataFrame.join-482"><span class="linenos">482</span></a> <span class="n">num_matching_ctes</span> <span class="o">+=</span> <span class="mi">1</span>
-</span><span id="DataFrame.join-483"><a href="#DataFrame.join-483"><span class="linenos">483</span></a> <span class="k">if</span> <span class="n">num_matching_ctes</span> <span class="o">&gt;</span> <span class="mi">1</span><span class="p">:</span>
-</span><span id="DataFrame.join-484"><a href="#DataFrame.join-484"><span class="linenos">484</span></a> <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
-</span><span id="DataFrame.join-485"><a href="#DataFrame.join-485"><span class="linenos">485</span></a> <span class="sa">f</span><span class="s2">&quot;Column </span><span class="si">{</span><span class="n">join_column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="si">}</span><span class="s2"> is ambiguous. Please specify the table name.&quot;</span>
-</span><span id="DataFrame.join-486"><a href="#DataFrame.join-486"><span class="linenos">486</span></a> <span class="p">)</span>
-</span><span id="DataFrame.join-487"><a href="#DataFrame.join-487"><span class="linenos">487</span></a> <span class="k">elif</span> <span class="n">num_matching_ctes</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
-</span><span id="DataFrame.join-488"><a href="#DataFrame.join-488"><span class="linenos">488</span></a> <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
-</span><span id="DataFrame.join-489"><a href="#DataFrame.join-489"><span class="linenos">489</span></a> <span class="sa">f</span><span class="s2">&quot;Column </span><span class="si">{</span><span class="n">join_column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="si">}</span><span class="s2"> does not exist in any of the tables.&quot;</span>
-</span><span id="DataFrame.join-490"><a href="#DataFrame.join-490"><span class="linenos">490</span></a> <span class="p">)</span>
-</span><span id="DataFrame.join-491"><a href="#DataFrame.join-491"><span class="linenos">491</span></a> <span class="n">join_clause</span> <span class="o">=</span> <span class="n">functools</span><span class="o">.</span><span class="n">reduce</span><span class="p">(</span>
-</span><span id="DataFrame.join-492"><a href="#DataFrame.join-492"><span class="linenos">492</span></a> <span class="k">lambda</span> <span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">:</span> <span class="n">x</span> <span class="o">&amp;</span> <span class="n">y</span><span class="p">,</span>
-</span><span id="DataFrame.join-493"><a href="#DataFrame.join-493"><span class="linenos">493</span></a> <span class="p">[</span><span class="n">left_column</span> <span class="o">==</span> <span class="n">right_column</span> <span class="k">for</span> <span class="n">left_column</span><span class="p">,</span> <span class="n">right_column</span> <span class="ow">in</span> <span class="n">join_column_pairs</span><span class="p">],</span>
-</span><span id="DataFrame.join-494"><a href="#DataFrame.join-494"><span class="linenos">494</span></a> <span class="p">)</span>
-</span><span id="DataFrame.join-495"><a href="#DataFrame.join-495"><span class="linenos">495</span></a> <span class="n">join_column_names</span> <span class="o">=</span> <span class="p">[</span><span class="n">left_col</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="k">for</span> <span class="n">left_col</span><span class="p">,</span> <span class="n">_</span> <span class="ow">in</span> <span class="n">join_column_pairs</span><span class="p">]</span>
-</span><span id="DataFrame.join-496"><a href="#DataFrame.join-496"><span class="linenos">496</span></a> <span class="c1"># To match spark behavior only the join clause gets deduplicated and it gets put in the front of the column list</span>
-</span><span id="DataFrame.join-497"><a href="#DataFrame.join-497"><span class="linenos">497</span></a> <span class="n">select_column_names</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame.join-498"><a href="#DataFrame.join-498"><span class="linenos">498</span></a> <span class="p">(</span>
-</span><span id="DataFrame.join-499"><a href="#DataFrame.join-499"><span class="linenos">499</span></a> <span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span>
-</span><span id="DataFrame.join-500"><a href="#DataFrame.join-500"><span class="linenos">500</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">column</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">this</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Star</span><span class="p">)</span>
-</span><span id="DataFrame.join-501"><a href="#DataFrame.join-501"><span class="linenos">501</span></a> <span class="k">else</span> <span class="n">column</span><span class="o">.</span><span class="n">sql</span><span class="p">()</span>
-</span><span id="DataFrame.join-502"><a href="#DataFrame.join-502"><span class="linenos">502</span></a> <span class="p">)</span>
-</span><span id="DataFrame.join-503"><a href="#DataFrame.join-503"><span class="linenos">503</span></a> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">self_columns</span> <span class="o">+</span> <span class="n">other_columns</span>
-</span><span id="DataFrame.join-504"><a href="#DataFrame.join-504"><span class="linenos">504</span></a> <span class="p">]</span>
-</span><span id="DataFrame.join-505"><a href="#DataFrame.join-505"><span class="linenos">505</span></a> <span class="n">select_column_names</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame.join-506"><a href="#DataFrame.join-506"><span class="linenos">506</span></a> <span class="n">column_name</span>
-</span><span id="DataFrame.join-507"><a href="#DataFrame.join-507"><span class="linenos">507</span></a> <span class="k">for</span> <span class="n">column_name</span> <span class="ow">in</span> <span class="n">select_column_names</span>
-</span><span id="DataFrame.join-508"><a href="#DataFrame.join-508"><span class="linenos">508</span></a> <span class="k">if</span> <span class="n">column_name</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">join_column_names</span>
-</span><span id="DataFrame.join-509"><a href="#DataFrame.join-509"><span class="linenos">509</span></a> <span class="p">]</span>
-</span><span id="DataFrame.join-510"><a href="#DataFrame.join-510"><span class="linenos">510</span></a> <span class="n">select_column_names</span> <span class="o">=</span> <span class="n">join_column_names</span> <span class="o">+</span> <span class="n">select_column_names</span>
-</span><span id="DataFrame.join-511"><a href="#DataFrame.join-511"><span class="linenos">511</span></a> <span class="k">else</span><span class="p">:</span>
-</span><span id="DataFrame.join-512"><a href="#DataFrame.join-512"><span class="linenos">512</span></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
-</span><span id="DataFrame.join-513"><a href="#DataFrame.join-513"><span class="linenos">513</span></a><span class="sd"> Unique characteristics of join on expressions:</span>
-</span><span id="DataFrame.join-514"><a href="#DataFrame.join-514"><span class="linenos">514</span></a><span class="sd"> * There is no deduplication of the results.</span>
-</span><span id="DataFrame.join-515"><a href="#DataFrame.join-515"><span class="linenos">515</span></a><span class="sd"> * The left join dataframe columns go first and right come after. No sort preference is given to join columns</span>
-</span><span id="DataFrame.join-516"><a href="#DataFrame.join-516"><span class="linenos">516</span></a><span class="sd"> &quot;&quot;&quot;</span>
-</span><span id="DataFrame.join-517"><a href="#DataFrame.join-517"><span class="linenos">517</span></a> <span class="n">join_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">join_columns</span><span class="p">,</span> <span class="n">join_expression</span><span class="p">)</span>
-</span><span id="DataFrame.join-518"><a href="#DataFrame.join-518"><span class="linenos">518</span></a> <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">join_columns</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">1</span><span class="p">:</span>
-</span><span id="DataFrame.join-519"><a href="#DataFrame.join-519"><span class="linenos">519</span></a> <span class="n">join_columns</span> <span class="o">=</span> <span class="p">[</span><span class="n">functools</span><span class="o">.</span><span class="n">reduce</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">:</span> <span class="n">x</span> <span class="o">&amp;</span> <span class="n">y</span><span class="p">,</span> <span class="n">join_columns</span><span class="p">)]</span>
-</span><span id="DataFrame.join-520"><a href="#DataFrame.join-520"><span class="linenos">520</span></a> <span class="n">join_clause</span> <span class="o">=</span> <span class="n">join_columns</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
-</span><span id="DataFrame.join-521"><a href="#DataFrame.join-521"><span class="linenos">521</span></a> <span class="n">select_column_names</span> <span class="o">=</span> <span class="p">[</span><span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">self_columns</span> <span class="o">+</span> <span class="n">other_columns</span><span class="p">]</span>
-</span><span id="DataFrame.join-522"><a href="#DataFrame.join-522"><span class="linenos">522</span></a>
-</span><span id="DataFrame.join-523"><a href="#DataFrame.join-523"><span class="linenos">523</span></a> <span class="c1"># Update the on expression with the actual join clause to replace the dummy one from before</span>
-</span><span id="DataFrame.join-524"><a href="#DataFrame.join-524"><span class="linenos">524</span></a> <span class="n">join_expression</span><span class="o">.</span><span class="n">args</span><span class="p">[</span><span class="s2">&quot;joins&quot;</span><span class="p">][</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">&quot;on&quot;</span><span class="p">,</span> <span class="n">join_clause</span><span class="o">.</span><span class="n">expression</span><span class="p">)</span>
-</span><span id="DataFrame.join-525"><a href="#DataFrame.join-525"><span class="linenos">525</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="n">join_expression</span><span class="p">)</span>
-</span><span id="DataFrame.join-526"><a href="#DataFrame.join-526"><span class="linenos">526</span></a> <span class="n">new_df</span><span class="o">.</span><span class="n">pending_join_hints</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">pending_join_hints</span><span class="p">)</span>
-</span><span id="DataFrame.join-527"><a href="#DataFrame.join-527"><span class="linenos">527</span></a> <span class="n">new_df</span><span class="o">.</span><span class="n">pending_hints</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="n">other_df</span><span class="o">.</span><span class="n">pending_hints</span><span class="p">)</span>
-</span><span id="DataFrame.join-528"><a href="#DataFrame.join-528"><span class="linenos">528</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="n">new_df</span><span class="o">.</span><span class="n">select</span><span class="o">.</span><span class="n">__wrapped__</span><span class="p">(</span><span class="n">new_df</span><span class="p">,</span> <span class="o">*</span><span class="n">select_column_names</span><span class="p">)</span>
-</span><span id="DataFrame.join-529"><a href="#DataFrame.join-529"><span class="linenos">529</span></a> <span class="k">return</span> <span class="n">new_df</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.join-438"><a href="#DataFrame.join-438"><span class="linenos">438</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
+</span><span id="DataFrame.join-439"><a href="#DataFrame.join-439"><span class="linenos">439</span></a> <span class="k">def</span> <span class="nf">join</span><span class="p">(</span>
+</span><span id="DataFrame.join-440"><a href="#DataFrame.join-440"><span class="linenos">440</span></a> <span class="bp">self</span><span class="p">,</span>
+</span><span id="DataFrame.join-441"><a href="#DataFrame.join-441"><span class="linenos">441</span></a> <span class="n">other_df</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">,</span>
+</span><span id="DataFrame.join-442"><a href="#DataFrame.join-442"><span class="linenos">442</span></a> <span class="n">on</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">],</span> <span class="n">Column</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="n">Column</span><span class="p">]],</span>
+</span><span id="DataFrame.join-443"><a href="#DataFrame.join-443"><span class="linenos">443</span></a> <span class="n">how</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;inner&quot;</span><span class="p">,</span>
+</span><span id="DataFrame.join-444"><a href="#DataFrame.join-444"><span class="linenos">444</span></a> <span class="o">**</span><span class="n">kwargs</span><span class="p">,</span>
+</span><span id="DataFrame.join-445"><a href="#DataFrame.join-445"><span class="linenos">445</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame.join-446"><a href="#DataFrame.join-446"><span class="linenos">446</span></a> <span class="n">other_df</span> <span class="o">=</span> <span class="n">other_df</span><span class="o">.</span><span class="n">_convert_leaf_to_cte</span><span class="p">()</span>
+</span><span id="DataFrame.join-447"><a href="#DataFrame.join-447"><span class="linenos">447</span></a> <span class="n">join_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_list_of_columns</span><span class="p">(</span><span class="n">on</span><span class="p">)</span>
+</span><span id="DataFrame.join-448"><a href="#DataFrame.join-448"><span class="linenos">448</span></a> <span class="c1"># We will determine actual &quot;join on&quot; expression later so we don&#39;t provide it at first</span>
+</span><span id="DataFrame.join-449"><a href="#DataFrame.join-449"><span class="linenos">449</span></a> <span class="n">join_expression</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">join</span><span class="p">(</span>
+</span><span id="DataFrame.join-450"><a href="#DataFrame.join-450"><span class="linenos">450</span></a> <span class="n">other_df</span><span class="o">.</span><span class="n">latest_cte_name</span><span class="p">,</span> <span class="n">join_type</span><span class="o">=</span><span class="n">how</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;_&quot;</span><span class="p">,</span> <span class="s2">&quot; &quot;</span><span class="p">)</span>
+</span><span id="DataFrame.join-451"><a href="#DataFrame.join-451"><span class="linenos">451</span></a> <span class="p">)</span>
+</span><span id="DataFrame.join-452"><a href="#DataFrame.join-452"><span class="linenos">452</span></a> <span class="n">join_expression</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_add_ctes_to_expression</span><span class="p">(</span><span class="n">join_expression</span><span class="p">,</span> <span class="n">other_df</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">ctes</span><span class="p">)</span>
+</span><span id="DataFrame.join-453"><a href="#DataFrame.join-453"><span class="linenos">453</span></a> <span class="n">self_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_outer_select_columns</span><span class="p">(</span><span class="n">join_expression</span><span class="p">)</span>
+</span><span id="DataFrame.join-454"><a href="#DataFrame.join-454"><span class="linenos">454</span></a> <span class="n">other_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_outer_select_columns</span><span class="p">(</span><span class="n">other_df</span><span class="p">)</span>
+</span><span id="DataFrame.join-455"><a href="#DataFrame.join-455"><span class="linenos">455</span></a> <span class="c1"># Determines the join clause and select columns to be used passed on what type of columns were provided for</span>
+</span><span id="DataFrame.join-456"><a href="#DataFrame.join-456"><span class="linenos">456</span></a> <span class="c1"># the join. The columns returned changes based on how the on expression is provided.</span>
+</span><span id="DataFrame.join-457"><a href="#DataFrame.join-457"><span class="linenos">457</span></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">join_columns</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">expression</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Column</span><span class="p">):</span>
+</span><span id="DataFrame.join-458"><a href="#DataFrame.join-458"><span class="linenos">458</span></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
+</span><span id="DataFrame.join-459"><a href="#DataFrame.join-459"><span class="linenos">459</span></a><span class="sd"> Unique characteristics of join on column names only:</span>
+</span><span id="DataFrame.join-460"><a href="#DataFrame.join-460"><span class="linenos">460</span></a><span class="sd"> * The column names are put at the front of the select list</span>
+</span><span id="DataFrame.join-461"><a href="#DataFrame.join-461"><span class="linenos">461</span></a><span class="sd"> * The column names are deduplicated across the entire select list and only the column names (other dups are allowed)</span>
+</span><span id="DataFrame.join-462"><a href="#DataFrame.join-462"><span class="linenos">462</span></a><span class="sd"> &quot;&quot;&quot;</span>
+</span><span id="DataFrame.join-463"><a href="#DataFrame.join-463"><span class="linenos">463</span></a> <span class="n">table_names</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame.join-464"><a href="#DataFrame.join-464"><span class="linenos">464</span></a> <span class="n">table</span><span class="o">.</span><span class="n">alias_or_name</span>
+</span><span id="DataFrame.join-465"><a href="#DataFrame.join-465"><span class="linenos">465</span></a> <span class="k">for</span> <span class="n">table</span> <span class="ow">in</span> <span class="n">get_tables_from_expression_with_join</span><span class="p">(</span><span class="n">join_expression</span><span class="p">)</span>
+</span><span id="DataFrame.join-466"><a href="#DataFrame.join-466"><span class="linenos">466</span></a> <span class="p">]</span>
+</span><span id="DataFrame.join-467"><a href="#DataFrame.join-467"><span class="linenos">467</span></a> <span class="n">potential_ctes</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame.join-468"><a href="#DataFrame.join-468"><span class="linenos">468</span></a> <span class="n">cte</span>
+</span><span id="DataFrame.join-469"><a href="#DataFrame.join-469"><span class="linenos">469</span></a> <span class="k">for</span> <span class="n">cte</span> <span class="ow">in</span> <span class="n">join_expression</span><span class="o">.</span><span class="n">ctes</span>
+</span><span id="DataFrame.join-470"><a href="#DataFrame.join-470"><span class="linenos">470</span></a> <span class="k">if</span> <span class="n">cte</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="ow">in</span> <span class="n">table_names</span>
+</span><span id="DataFrame.join-471"><a href="#DataFrame.join-471"><span class="linenos">471</span></a> <span class="ow">and</span> <span class="n">cte</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="o">!=</span> <span class="n">other_df</span><span class="o">.</span><span class="n">latest_cte_name</span>
+</span><span id="DataFrame.join-472"><a href="#DataFrame.join-472"><span class="linenos">472</span></a> <span class="p">]</span>
+</span><span id="DataFrame.join-473"><a href="#DataFrame.join-473"><span class="linenos">473</span></a> <span class="c1"># Determine the table to reference for the left side of the join by checking each of the left side</span>
+</span><span id="DataFrame.join-474"><a href="#DataFrame.join-474"><span class="linenos">474</span></a> <span class="c1"># tables and see if they have the column being referenced.</span>
+</span><span id="DataFrame.join-475"><a href="#DataFrame.join-475"><span class="linenos">475</span></a> <span class="n">join_column_pairs</span> <span class="o">=</span> <span class="p">[]</span>
+</span><span id="DataFrame.join-476"><a href="#DataFrame.join-476"><span class="linenos">476</span></a> <span class="k">for</span> <span class="n">join_column</span> <span class="ow">in</span> <span class="n">join_columns</span><span class="p">:</span>
+</span><span id="DataFrame.join-477"><a href="#DataFrame.join-477"><span class="linenos">477</span></a> <span class="n">num_matching_ctes</span> <span class="o">=</span> <span class="mi">0</span>
+</span><span id="DataFrame.join-478"><a href="#DataFrame.join-478"><span class="linenos">478</span></a> <span class="k">for</span> <span class="n">cte</span> <span class="ow">in</span> <span class="n">potential_ctes</span><span class="p">:</span>
+</span><span id="DataFrame.join-479"><a href="#DataFrame.join-479"><span class="linenos">479</span></a> <span class="k">if</span> <span class="n">join_column</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="ow">in</span> <span class="n">cte</span><span class="o">.</span><span class="n">this</span><span class="o">.</span><span class="n">named_selects</span><span class="p">:</span>
+</span><span id="DataFrame.join-480"><a href="#DataFrame.join-480"><span class="linenos">480</span></a> <span class="n">left_column</span> <span class="o">=</span> <span class="n">join_column</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span><span class="o">.</span><span class="n">set_table_name</span><span class="p">(</span><span class="n">cte</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">)</span>
+</span><span id="DataFrame.join-481"><a href="#DataFrame.join-481"><span class="linenos">481</span></a> <span class="n">right_column</span> <span class="o">=</span> <span class="n">join_column</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span><span class="o">.</span><span class="n">set_table_name</span><span class="p">(</span><span class="n">other_df</span><span class="o">.</span><span class="n">latest_cte_name</span><span class="p">)</span>
+</span><span id="DataFrame.join-482"><a href="#DataFrame.join-482"><span class="linenos">482</span></a> <span class="n">join_column_pairs</span><span class="o">.</span><span class="n">append</span><span class="p">((</span><span class="n">left_column</span><span class="p">,</span> <span class="n">right_column</span><span class="p">))</span>
+</span><span id="DataFrame.join-483"><a href="#DataFrame.join-483"><span class="linenos">483</span></a> <span class="n">num_matching_ctes</span> <span class="o">+=</span> <span class="mi">1</span>
+</span><span id="DataFrame.join-484"><a href="#DataFrame.join-484"><span class="linenos">484</span></a> <span class="k">if</span> <span class="n">num_matching_ctes</span> <span class="o">&gt;</span> <span class="mi">1</span><span class="p">:</span>
+</span><span id="DataFrame.join-485"><a href="#DataFrame.join-485"><span class="linenos">485</span></a> <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
+</span><span id="DataFrame.join-486"><a href="#DataFrame.join-486"><span class="linenos">486</span></a> <span class="sa">f</span><span class="s2">&quot;Column </span><span class="si">{</span><span class="n">join_column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="si">}</span><span class="s2"> is ambiguous. Please specify the table name.&quot;</span>
+</span><span id="DataFrame.join-487"><a href="#DataFrame.join-487"><span class="linenos">487</span></a> <span class="p">)</span>
+</span><span id="DataFrame.join-488"><a href="#DataFrame.join-488"><span class="linenos">488</span></a> <span class="k">elif</span> <span class="n">num_matching_ctes</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
+</span><span id="DataFrame.join-489"><a href="#DataFrame.join-489"><span class="linenos">489</span></a> <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
+</span><span id="DataFrame.join-490"><a href="#DataFrame.join-490"><span class="linenos">490</span></a> <span class="sa">f</span><span class="s2">&quot;Column </span><span class="si">{</span><span class="n">join_column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="si">}</span><span class="s2"> does not exist in any of the tables.&quot;</span>
+</span><span id="DataFrame.join-491"><a href="#DataFrame.join-491"><span class="linenos">491</span></a> <span class="p">)</span>
+</span><span id="DataFrame.join-492"><a href="#DataFrame.join-492"><span class="linenos">492</span></a> <span class="n">join_clause</span> <span class="o">=</span> <span class="n">functools</span><span class="o">.</span><span class="n">reduce</span><span class="p">(</span>
+</span><span id="DataFrame.join-493"><a href="#DataFrame.join-493"><span class="linenos">493</span></a> <span class="k">lambda</span> <span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">:</span> <span class="n">x</span> <span class="o">&amp;</span> <span class="n">y</span><span class="p">,</span>
+</span><span id="DataFrame.join-494"><a href="#DataFrame.join-494"><span class="linenos">494</span></a> <span class="p">[</span><span class="n">left_column</span> <span class="o">==</span> <span class="n">right_column</span> <span class="k">for</span> <span class="n">left_column</span><span class="p">,</span> <span class="n">right_column</span> <span class="ow">in</span> <span class="n">join_column_pairs</span><span class="p">],</span>
+</span><span id="DataFrame.join-495"><a href="#DataFrame.join-495"><span class="linenos">495</span></a> <span class="p">)</span>
+</span><span id="DataFrame.join-496"><a href="#DataFrame.join-496"><span class="linenos">496</span></a> <span class="n">join_column_names</span> <span class="o">=</span> <span class="p">[</span><span class="n">left_col</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="k">for</span> <span class="n">left_col</span><span class="p">,</span> <span class="n">_</span> <span class="ow">in</span> <span class="n">join_column_pairs</span><span class="p">]</span>
+</span><span id="DataFrame.join-497"><a href="#DataFrame.join-497"><span class="linenos">497</span></a> <span class="c1"># To match spark behavior only the join clause gets deduplicated and it gets put in the front of the column list</span>
+</span><span id="DataFrame.join-498"><a href="#DataFrame.join-498"><span class="linenos">498</span></a> <span class="n">select_column_names</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame.join-499"><a href="#DataFrame.join-499"><span class="linenos">499</span></a> <span class="p">(</span>
+</span><span id="DataFrame.join-500"><a href="#DataFrame.join-500"><span class="linenos">500</span></a> <span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span>
+</span><span id="DataFrame.join-501"><a href="#DataFrame.join-501"><span class="linenos">501</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">column</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">this</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Star</span><span class="p">)</span>
+</span><span id="DataFrame.join-502"><a href="#DataFrame.join-502"><span class="linenos">502</span></a> <span class="k">else</span> <span class="n">column</span><span class="o">.</span><span class="n">sql</span><span class="p">()</span>
+</span><span id="DataFrame.join-503"><a href="#DataFrame.join-503"><span class="linenos">503</span></a> <span class="p">)</span>
+</span><span id="DataFrame.join-504"><a href="#DataFrame.join-504"><span class="linenos">504</span></a> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">self_columns</span> <span class="o">+</span> <span class="n">other_columns</span>
+</span><span id="DataFrame.join-505"><a href="#DataFrame.join-505"><span class="linenos">505</span></a> <span class="p">]</span>
+</span><span id="DataFrame.join-506"><a href="#DataFrame.join-506"><span class="linenos">506</span></a> <span class="n">select_column_names</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame.join-507"><a href="#DataFrame.join-507"><span class="linenos">507</span></a> <span class="n">column_name</span>
+</span><span id="DataFrame.join-508"><a href="#DataFrame.join-508"><span class="linenos">508</span></a> <span class="k">for</span> <span class="n">column_name</span> <span class="ow">in</span> <span class="n">select_column_names</span>
+</span><span id="DataFrame.join-509"><a href="#DataFrame.join-509"><span class="linenos">509</span></a> <span class="k">if</span> <span class="n">column_name</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">join_column_names</span>
+</span><span id="DataFrame.join-510"><a href="#DataFrame.join-510"><span class="linenos">510</span></a> <span class="p">]</span>
+</span><span id="DataFrame.join-511"><a href="#DataFrame.join-511"><span class="linenos">511</span></a> <span class="n">select_column_names</span> <span class="o">=</span> <span class="n">join_column_names</span> <span class="o">+</span> <span class="n">select_column_names</span>
+</span><span id="DataFrame.join-512"><a href="#DataFrame.join-512"><span class="linenos">512</span></a> <span class="k">else</span><span class="p">:</span>
+</span><span id="DataFrame.join-513"><a href="#DataFrame.join-513"><span class="linenos">513</span></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
+</span><span id="DataFrame.join-514"><a href="#DataFrame.join-514"><span class="linenos">514</span></a><span class="sd"> Unique characteristics of join on expressions:</span>
+</span><span id="DataFrame.join-515"><a href="#DataFrame.join-515"><span class="linenos">515</span></a><span class="sd"> * There is no deduplication of the results.</span>
+</span><span id="DataFrame.join-516"><a href="#DataFrame.join-516"><span class="linenos">516</span></a><span class="sd"> * The left join dataframe columns go first and right come after. No sort preference is given to join columns</span>
+</span><span id="DataFrame.join-517"><a href="#DataFrame.join-517"><span class="linenos">517</span></a><span class="sd"> &quot;&quot;&quot;</span>
+</span><span id="DataFrame.join-518"><a href="#DataFrame.join-518"><span class="linenos">518</span></a> <span class="n">join_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">join_columns</span><span class="p">,</span> <span class="n">join_expression</span><span class="p">)</span>
+</span><span id="DataFrame.join-519"><a href="#DataFrame.join-519"><span class="linenos">519</span></a> <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">join_columns</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">1</span><span class="p">:</span>
+</span><span id="DataFrame.join-520"><a href="#DataFrame.join-520"><span class="linenos">520</span></a> <span class="n">join_columns</span> <span class="o">=</span> <span class="p">[</span><span class="n">functools</span><span class="o">.</span><span class="n">reduce</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">:</span> <span class="n">x</span> <span class="o">&amp;</span> <span class="n">y</span><span class="p">,</span> <span class="n">join_columns</span><span class="p">)]</span>
+</span><span id="DataFrame.join-521"><a href="#DataFrame.join-521"><span class="linenos">521</span></a> <span class="n">join_clause</span> <span class="o">=</span> <span class="n">join_columns</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
+</span><span id="DataFrame.join-522"><a href="#DataFrame.join-522"><span class="linenos">522</span></a> <span class="n">select_column_names</span> <span class="o">=</span> <span class="p">[</span><span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">self_columns</span> <span class="o">+</span> <span class="n">other_columns</span><span class="p">]</span>
+</span><span id="DataFrame.join-523"><a href="#DataFrame.join-523"><span class="linenos">523</span></a>
+</span><span id="DataFrame.join-524"><a href="#DataFrame.join-524"><span class="linenos">524</span></a> <span class="c1"># Update the on expression with the actual join clause to replace the dummy one from before</span>
+</span><span id="DataFrame.join-525"><a href="#DataFrame.join-525"><span class="linenos">525</span></a> <span class="n">join_expression</span><span class="o">.</span><span class="n">args</span><span class="p">[</span><span class="s2">&quot;joins&quot;</span><span class="p">][</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">&quot;on&quot;</span><span class="p">,</span> <span class="n">join_clause</span><span class="o">.</span><span class="n">expression</span><span class="p">)</span>
+</span><span id="DataFrame.join-526"><a href="#DataFrame.join-526"><span class="linenos">526</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="n">join_expression</span><span class="p">)</span>
+</span><span id="DataFrame.join-527"><a href="#DataFrame.join-527"><span class="linenos">527</span></a> <span class="n">new_df</span><span class="o">.</span><span class="n">pending_join_hints</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">pending_join_hints</span><span class="p">)</span>
+</span><span id="DataFrame.join-528"><a href="#DataFrame.join-528"><span class="linenos">528</span></a> <span class="n">new_df</span><span class="o">.</span><span class="n">pending_hints</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="n">other_df</span><span class="o">.</span><span class="n">pending_hints</span><span class="p">)</span>
+</span><span id="DataFrame.join-529"><a href="#DataFrame.join-529"><span class="linenos">529</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="n">new_df</span><span class="o">.</span><span class="n">select</span><span class="o">.</span><span class="n">__wrapped__</span><span class="p">(</span><span class="n">new_df</span><span class="p">,</span> <span class="o">*</span><span class="n">select_column_names</span><span class="p">)</span>
+</span><span id="DataFrame.join-530"><a href="#DataFrame.join-530"><span class="linenos">530</span></a> <span class="k">return</span> <span class="n">new_df</span>
</span></pre></div>
@@ -2479,39 +2490,39 @@
</div>
<a class="headerlink" href="#DataFrame.orderBy"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.orderBy-531"><a href="#DataFrame.orderBy-531"><span class="linenos">531</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">ORDER_BY</span><span class="p">)</span>
-</span><span id="DataFrame.orderBy-532"><a href="#DataFrame.orderBy-532"><span class="linenos">532</span></a> <span class="k">def</span> <span class="nf">orderBy</span><span class="p">(</span>
-</span><span id="DataFrame.orderBy-533"><a href="#DataFrame.orderBy-533"><span class="linenos">533</span></a> <span class="bp">self</span><span class="p">,</span>
-</span><span id="DataFrame.orderBy-534"><a href="#DataFrame.orderBy-534"><span class="linenos">534</span></a> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Column</span><span class="p">],</span>
-</span><span id="DataFrame.orderBy-535"><a href="#DataFrame.orderBy-535"><span class="linenos">535</span></a> <span class="n">ascending</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrame.orderBy-536"><a href="#DataFrame.orderBy-536"><span class="linenos">536</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame.orderBy-537"><a href="#DataFrame.orderBy-537"><span class="linenos">537</span></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
-</span><span id="DataFrame.orderBy-538"><a href="#DataFrame.orderBy-538"><span class="linenos">538</span></a><span class="sd"> This implementation lets any ordered columns take priority over whatever is provided in `ascending`. Spark</span>
-</span><span id="DataFrame.orderBy-539"><a href="#DataFrame.orderBy-539"><span class="linenos">539</span></a><span class="sd"> has irregular behavior and can result in runtime errors. Users shouldn&#39;t be mixing the two anyways so this</span>
-</span><span id="DataFrame.orderBy-540"><a href="#DataFrame.orderBy-540"><span class="linenos">540</span></a><span class="sd"> is unlikely to come up.</span>
-</span><span id="DataFrame.orderBy-541"><a href="#DataFrame.orderBy-541"><span class="linenos">541</span></a><span class="sd"> &quot;&quot;&quot;</span>
-</span><span id="DataFrame.orderBy-542"><a href="#DataFrame.orderBy-542"><span class="linenos">542</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">cols</span><span class="p">)</span>
-</span><span id="DataFrame.orderBy-543"><a href="#DataFrame.orderBy-543"><span class="linenos">543</span></a> <span class="n">pre_ordered_col_indexes</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame.orderBy-544"><a href="#DataFrame.orderBy-544"><span class="linenos">544</span></a> <span class="n">i</span> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">col</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">col</span><span class="o">.</span><span class="n">expression</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Ordered</span><span class="p">)</span>
-</span><span id="DataFrame.orderBy-545"><a href="#DataFrame.orderBy-545"><span class="linenos">545</span></a> <span class="p">]</span>
-</span><span id="DataFrame.orderBy-546"><a href="#DataFrame.orderBy-546"><span class="linenos">546</span></a> <span class="k">if</span> <span class="n">ascending</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
-</span><span id="DataFrame.orderBy-547"><a href="#DataFrame.orderBy-547"><span class="linenos">547</span></a> <span class="n">ascending</span> <span class="o">=</span> <span class="p">[</span><span class="kc">True</span><span class="p">]</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span>
-</span><span id="DataFrame.orderBy-548"><a href="#DataFrame.orderBy-548"><span class="linenos">548</span></a> <span class="k">elif</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">ascending</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span>
-</span><span id="DataFrame.orderBy-549"><a href="#DataFrame.orderBy-549"><span class="linenos">549</span></a> <span class="n">ascending</span> <span class="o">=</span> <span class="p">[</span><span class="n">ascending</span><span class="p">]</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span>
-</span><span id="DataFrame.orderBy-550"><a href="#DataFrame.orderBy-550"><span class="linenos">550</span></a> <span class="n">ascending</span> <span class="o">=</span> <span class="p">[</span><span class="nb">bool</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">x</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">ascending</span><span class="p">)]</span>
-</span><span id="DataFrame.orderBy-551"><a href="#DataFrame.orderBy-551"><span class="linenos">551</span></a> <span class="k">assert</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span> <span class="o">==</span> <span class="nb">len</span><span class="p">(</span>
-</span><span id="DataFrame.orderBy-552"><a href="#DataFrame.orderBy-552"><span class="linenos">552</span></a> <span class="n">ascending</span>
-</span><span id="DataFrame.orderBy-553"><a href="#DataFrame.orderBy-553"><span class="linenos">553</span></a> <span class="p">),</span> <span class="s2">&quot;The length of items in ascending must equal the number of columns provided&quot;</span>
-</span><span id="DataFrame.orderBy-554"><a href="#DataFrame.orderBy-554"><span class="linenos">554</span></a> <span class="n">col_and_ascending</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="nb">zip</span><span class="p">(</span><span class="n">columns</span><span class="p">,</span> <span class="n">ascending</span><span class="p">))</span>
-</span><span id="DataFrame.orderBy-555"><a href="#DataFrame.orderBy-555"><span class="linenos">555</span></a> <span class="n">order_by_columns</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame.orderBy-556"><a href="#DataFrame.orderBy-556"><span class="linenos">556</span></a> <span class="p">(</span>
-</span><span id="DataFrame.orderBy-557"><a href="#DataFrame.orderBy-557"><span class="linenos">557</span></a> <span class="n">exp</span><span class="o">.</span><span class="n">Ordered</span><span class="p">(</span><span class="n">this</span><span class="o">=</span><span class="n">col</span><span class="o">.</span><span class="n">expression</span><span class="p">,</span> <span class="n">desc</span><span class="o">=</span><span class="ow">not</span> <span class="n">asc</span><span class="p">)</span>
-</span><span id="DataFrame.orderBy-558"><a href="#DataFrame.orderBy-558"><span class="linenos">558</span></a> <span class="k">if</span> <span class="n">i</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">pre_ordered_col_indexes</span>
-</span><span id="DataFrame.orderBy-559"><a href="#DataFrame.orderBy-559"><span class="linenos">559</span></a> <span class="k">else</span> <span class="n">columns</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">.</span><span class="n">column_expression</span>
-</span><span id="DataFrame.orderBy-560"><a href="#DataFrame.orderBy-560"><span class="linenos">560</span></a> <span class="p">)</span>
-</span><span id="DataFrame.orderBy-561"><a href="#DataFrame.orderBy-561"><span class="linenos">561</span></a> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="p">(</span><span class="n">col</span><span class="p">,</span> <span class="n">asc</span><span class="p">)</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">col_and_ascending</span><span class="p">)</span>
-</span><span id="DataFrame.orderBy-562"><a href="#DataFrame.orderBy-562"><span class="linenos">562</span></a> <span class="p">]</span>
-</span><span id="DataFrame.orderBy-563"><a href="#DataFrame.orderBy-563"><span class="linenos">563</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">order_by</span><span class="p">(</span><span class="o">*</span><span class="n">order_by_columns</span><span class="p">))</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.orderBy-532"><a href="#DataFrame.orderBy-532"><span class="linenos">532</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">ORDER_BY</span><span class="p">)</span>
+</span><span id="DataFrame.orderBy-533"><a href="#DataFrame.orderBy-533"><span class="linenos">533</span></a> <span class="k">def</span> <span class="nf">orderBy</span><span class="p">(</span>
+</span><span id="DataFrame.orderBy-534"><a href="#DataFrame.orderBy-534"><span class="linenos">534</span></a> <span class="bp">self</span><span class="p">,</span>
+</span><span id="DataFrame.orderBy-535"><a href="#DataFrame.orderBy-535"><span class="linenos">535</span></a> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Column</span><span class="p">],</span>
+</span><span id="DataFrame.orderBy-536"><a href="#DataFrame.orderBy-536"><span class="linenos">536</span></a> <span class="n">ascending</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrame.orderBy-537"><a href="#DataFrame.orderBy-537"><span class="linenos">537</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame.orderBy-538"><a href="#DataFrame.orderBy-538"><span class="linenos">538</span></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
+</span><span id="DataFrame.orderBy-539"><a href="#DataFrame.orderBy-539"><span class="linenos">539</span></a><span class="sd"> This implementation lets any ordered columns take priority over whatever is provided in `ascending`. Spark</span>
+</span><span id="DataFrame.orderBy-540"><a href="#DataFrame.orderBy-540"><span class="linenos">540</span></a><span class="sd"> has irregular behavior and can result in runtime errors. Users shouldn&#39;t be mixing the two anyways so this</span>
+</span><span id="DataFrame.orderBy-541"><a href="#DataFrame.orderBy-541"><span class="linenos">541</span></a><span class="sd"> is unlikely to come up.</span>
+</span><span id="DataFrame.orderBy-542"><a href="#DataFrame.orderBy-542"><span class="linenos">542</span></a><span class="sd"> &quot;&quot;&quot;</span>
+</span><span id="DataFrame.orderBy-543"><a href="#DataFrame.orderBy-543"><span class="linenos">543</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">cols</span><span class="p">)</span>
+</span><span id="DataFrame.orderBy-544"><a href="#DataFrame.orderBy-544"><span class="linenos">544</span></a> <span class="n">pre_ordered_col_indexes</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame.orderBy-545"><a href="#DataFrame.orderBy-545"><span class="linenos">545</span></a> <span class="n">i</span> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">col</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">col</span><span class="o">.</span><span class="n">expression</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Ordered</span><span class="p">)</span>
+</span><span id="DataFrame.orderBy-546"><a href="#DataFrame.orderBy-546"><span class="linenos">546</span></a> <span class="p">]</span>
+</span><span id="DataFrame.orderBy-547"><a href="#DataFrame.orderBy-547"><span class="linenos">547</span></a> <span class="k">if</span> <span class="n">ascending</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+</span><span id="DataFrame.orderBy-548"><a href="#DataFrame.orderBy-548"><span class="linenos">548</span></a> <span class="n">ascending</span> <span class="o">=</span> <span class="p">[</span><span class="kc">True</span><span class="p">]</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span>
+</span><span id="DataFrame.orderBy-549"><a href="#DataFrame.orderBy-549"><span class="linenos">549</span></a> <span class="k">elif</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">ascending</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span>
+</span><span id="DataFrame.orderBy-550"><a href="#DataFrame.orderBy-550"><span class="linenos">550</span></a> <span class="n">ascending</span> <span class="o">=</span> <span class="p">[</span><span class="n">ascending</span><span class="p">]</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span>
+</span><span id="DataFrame.orderBy-551"><a href="#DataFrame.orderBy-551"><span class="linenos">551</span></a> <span class="n">ascending</span> <span class="o">=</span> <span class="p">[</span><span class="nb">bool</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">x</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">ascending</span><span class="p">)]</span>
+</span><span id="DataFrame.orderBy-552"><a href="#DataFrame.orderBy-552"><span class="linenos">552</span></a> <span class="k">assert</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span> <span class="o">==</span> <span class="nb">len</span><span class="p">(</span>
+</span><span id="DataFrame.orderBy-553"><a href="#DataFrame.orderBy-553"><span class="linenos">553</span></a> <span class="n">ascending</span>
+</span><span id="DataFrame.orderBy-554"><a href="#DataFrame.orderBy-554"><span class="linenos">554</span></a> <span class="p">),</span> <span class="s2">&quot;The length of items in ascending must equal the number of columns provided&quot;</span>
+</span><span id="DataFrame.orderBy-555"><a href="#DataFrame.orderBy-555"><span class="linenos">555</span></a> <span class="n">col_and_ascending</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="nb">zip</span><span class="p">(</span><span class="n">columns</span><span class="p">,</span> <span class="n">ascending</span><span class="p">))</span>
+</span><span id="DataFrame.orderBy-556"><a href="#DataFrame.orderBy-556"><span class="linenos">556</span></a> <span class="n">order_by_columns</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame.orderBy-557"><a href="#DataFrame.orderBy-557"><span class="linenos">557</span></a> <span class="p">(</span>
+</span><span id="DataFrame.orderBy-558"><a href="#DataFrame.orderBy-558"><span class="linenos">558</span></a> <span class="n">exp</span><span class="o">.</span><span class="n">Ordered</span><span class="p">(</span><span class="n">this</span><span class="o">=</span><span class="n">col</span><span class="o">.</span><span class="n">expression</span><span class="p">,</span> <span class="n">desc</span><span class="o">=</span><span class="ow">not</span> <span class="n">asc</span><span class="p">)</span>
+</span><span id="DataFrame.orderBy-559"><a href="#DataFrame.orderBy-559"><span class="linenos">559</span></a> <span class="k">if</span> <span class="n">i</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">pre_ordered_col_indexes</span>
+</span><span id="DataFrame.orderBy-560"><a href="#DataFrame.orderBy-560"><span class="linenos">560</span></a> <span class="k">else</span> <span class="n">columns</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">.</span><span class="n">column_expression</span>
+</span><span id="DataFrame.orderBy-561"><a href="#DataFrame.orderBy-561"><span class="linenos">561</span></a> <span class="p">)</span>
+</span><span id="DataFrame.orderBy-562"><a href="#DataFrame.orderBy-562"><span class="linenos">562</span></a> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="p">(</span><span class="n">col</span><span class="p">,</span> <span class="n">asc</span><span class="p">)</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">col_and_ascending</span><span class="p">)</span>
+</span><span id="DataFrame.orderBy-563"><a href="#DataFrame.orderBy-563"><span class="linenos">563</span></a> <span class="p">]</span>
+</span><span id="DataFrame.orderBy-564"><a href="#DataFrame.orderBy-564"><span class="linenos">564</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">order_by</span><span class="p">(</span><span class="o">*</span><span class="n">order_by_columns</span><span class="p">))</span>
</span></pre></div>
@@ -2534,39 +2545,39 @@ is unlikely to come up.</p>
</div>
<a class="headerlink" href="#DataFrame.sort"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.sort-531"><a href="#DataFrame.sort-531"><span class="linenos">531</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">ORDER_BY</span><span class="p">)</span>
-</span><span id="DataFrame.sort-532"><a href="#DataFrame.sort-532"><span class="linenos">532</span></a> <span class="k">def</span> <span class="nf">orderBy</span><span class="p">(</span>
-</span><span id="DataFrame.sort-533"><a href="#DataFrame.sort-533"><span class="linenos">533</span></a> <span class="bp">self</span><span class="p">,</span>
-</span><span id="DataFrame.sort-534"><a href="#DataFrame.sort-534"><span class="linenos">534</span></a> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Column</span><span class="p">],</span>
-</span><span id="DataFrame.sort-535"><a href="#DataFrame.sort-535"><span class="linenos">535</span></a> <span class="n">ascending</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrame.sort-536"><a href="#DataFrame.sort-536"><span class="linenos">536</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame.sort-537"><a href="#DataFrame.sort-537"><span class="linenos">537</span></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
-</span><span id="DataFrame.sort-538"><a href="#DataFrame.sort-538"><span class="linenos">538</span></a><span class="sd"> This implementation lets any ordered columns take priority over whatever is provided in `ascending`. Spark</span>
-</span><span id="DataFrame.sort-539"><a href="#DataFrame.sort-539"><span class="linenos">539</span></a><span class="sd"> has irregular behavior and can result in runtime errors. Users shouldn&#39;t be mixing the two anyways so this</span>
-</span><span id="DataFrame.sort-540"><a href="#DataFrame.sort-540"><span class="linenos">540</span></a><span class="sd"> is unlikely to come up.</span>
-</span><span id="DataFrame.sort-541"><a href="#DataFrame.sort-541"><span class="linenos">541</span></a><span class="sd"> &quot;&quot;&quot;</span>
-</span><span id="DataFrame.sort-542"><a href="#DataFrame.sort-542"><span class="linenos">542</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">cols</span><span class="p">)</span>
-</span><span id="DataFrame.sort-543"><a href="#DataFrame.sort-543"><span class="linenos">543</span></a> <span class="n">pre_ordered_col_indexes</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame.sort-544"><a href="#DataFrame.sort-544"><span class="linenos">544</span></a> <span class="n">i</span> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">col</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">col</span><span class="o">.</span><span class="n">expression</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Ordered</span><span class="p">)</span>
-</span><span id="DataFrame.sort-545"><a href="#DataFrame.sort-545"><span class="linenos">545</span></a> <span class="p">]</span>
-</span><span id="DataFrame.sort-546"><a href="#DataFrame.sort-546"><span class="linenos">546</span></a> <span class="k">if</span> <span class="n">ascending</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
-</span><span id="DataFrame.sort-547"><a href="#DataFrame.sort-547"><span class="linenos">547</span></a> <span class="n">ascending</span> <span class="o">=</span> <span class="p">[</span><span class="kc">True</span><span class="p">]</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span>
-</span><span id="DataFrame.sort-548"><a href="#DataFrame.sort-548"><span class="linenos">548</span></a> <span class="k">elif</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">ascending</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span>
-</span><span id="DataFrame.sort-549"><a href="#DataFrame.sort-549"><span class="linenos">549</span></a> <span class="n">ascending</span> <span class="o">=</span> <span class="p">[</span><span class="n">ascending</span><span class="p">]</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span>
-</span><span id="DataFrame.sort-550"><a href="#DataFrame.sort-550"><span class="linenos">550</span></a> <span class="n">ascending</span> <span class="o">=</span> <span class="p">[</span><span class="nb">bool</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">x</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">ascending</span><span class="p">)]</span>
-</span><span id="DataFrame.sort-551"><a href="#DataFrame.sort-551"><span class="linenos">551</span></a> <span class="k">assert</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span> <span class="o">==</span> <span class="nb">len</span><span class="p">(</span>
-</span><span id="DataFrame.sort-552"><a href="#DataFrame.sort-552"><span class="linenos">552</span></a> <span class="n">ascending</span>
-</span><span id="DataFrame.sort-553"><a href="#DataFrame.sort-553"><span class="linenos">553</span></a> <span class="p">),</span> <span class="s2">&quot;The length of items in ascending must equal the number of columns provided&quot;</span>
-</span><span id="DataFrame.sort-554"><a href="#DataFrame.sort-554"><span class="linenos">554</span></a> <span class="n">col_and_ascending</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="nb">zip</span><span class="p">(</span><span class="n">columns</span><span class="p">,</span> <span class="n">ascending</span><span class="p">))</span>
-</span><span id="DataFrame.sort-555"><a href="#DataFrame.sort-555"><span class="linenos">555</span></a> <span class="n">order_by_columns</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame.sort-556"><a href="#DataFrame.sort-556"><span class="linenos">556</span></a> <span class="p">(</span>
-</span><span id="DataFrame.sort-557"><a href="#DataFrame.sort-557"><span class="linenos">557</span></a> <span class="n">exp</span><span class="o">.</span><span class="n">Ordered</span><span class="p">(</span><span class="n">this</span><span class="o">=</span><span class="n">col</span><span class="o">.</span><span class="n">expression</span><span class="p">,</span> <span class="n">desc</span><span class="o">=</span><span class="ow">not</span> <span class="n">asc</span><span class="p">)</span>
-</span><span id="DataFrame.sort-558"><a href="#DataFrame.sort-558"><span class="linenos">558</span></a> <span class="k">if</span> <span class="n">i</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">pre_ordered_col_indexes</span>
-</span><span id="DataFrame.sort-559"><a href="#DataFrame.sort-559"><span class="linenos">559</span></a> <span class="k">else</span> <span class="n">columns</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">.</span><span class="n">column_expression</span>
-</span><span id="DataFrame.sort-560"><a href="#DataFrame.sort-560"><span class="linenos">560</span></a> <span class="p">)</span>
-</span><span id="DataFrame.sort-561"><a href="#DataFrame.sort-561"><span class="linenos">561</span></a> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="p">(</span><span class="n">col</span><span class="p">,</span> <span class="n">asc</span><span class="p">)</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">col_and_ascending</span><span class="p">)</span>
-</span><span id="DataFrame.sort-562"><a href="#DataFrame.sort-562"><span class="linenos">562</span></a> <span class="p">]</span>
-</span><span id="DataFrame.sort-563"><a href="#DataFrame.sort-563"><span class="linenos">563</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">order_by</span><span class="p">(</span><span class="o">*</span><span class="n">order_by_columns</span><span class="p">))</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.sort-532"><a href="#DataFrame.sort-532"><span class="linenos">532</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">ORDER_BY</span><span class="p">)</span>
+</span><span id="DataFrame.sort-533"><a href="#DataFrame.sort-533"><span class="linenos">533</span></a> <span class="k">def</span> <span class="nf">orderBy</span><span class="p">(</span>
+</span><span id="DataFrame.sort-534"><a href="#DataFrame.sort-534"><span class="linenos">534</span></a> <span class="bp">self</span><span class="p">,</span>
+</span><span id="DataFrame.sort-535"><a href="#DataFrame.sort-535"><span class="linenos">535</span></a> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Column</span><span class="p">],</span>
+</span><span id="DataFrame.sort-536"><a href="#DataFrame.sort-536"><span class="linenos">536</span></a> <span class="n">ascending</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrame.sort-537"><a href="#DataFrame.sort-537"><span class="linenos">537</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame.sort-538"><a href="#DataFrame.sort-538"><span class="linenos">538</span></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
+</span><span id="DataFrame.sort-539"><a href="#DataFrame.sort-539"><span class="linenos">539</span></a><span class="sd"> This implementation lets any ordered columns take priority over whatever is provided in `ascending`. Spark</span>
+</span><span id="DataFrame.sort-540"><a href="#DataFrame.sort-540"><span class="linenos">540</span></a><span class="sd"> has irregular behavior and can result in runtime errors. Users shouldn&#39;t be mixing the two anyways so this</span>
+</span><span id="DataFrame.sort-541"><a href="#DataFrame.sort-541"><span class="linenos">541</span></a><span class="sd"> is unlikely to come up.</span>
+</span><span id="DataFrame.sort-542"><a href="#DataFrame.sort-542"><span class="linenos">542</span></a><span class="sd"> &quot;&quot;&quot;</span>
+</span><span id="DataFrame.sort-543"><a href="#DataFrame.sort-543"><span class="linenos">543</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">cols</span><span class="p">)</span>
+</span><span id="DataFrame.sort-544"><a href="#DataFrame.sort-544"><span class="linenos">544</span></a> <span class="n">pre_ordered_col_indexes</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame.sort-545"><a href="#DataFrame.sort-545"><span class="linenos">545</span></a> <span class="n">i</span> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">col</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">col</span><span class="o">.</span><span class="n">expression</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Ordered</span><span class="p">)</span>
+</span><span id="DataFrame.sort-546"><a href="#DataFrame.sort-546"><span class="linenos">546</span></a> <span class="p">]</span>
+</span><span id="DataFrame.sort-547"><a href="#DataFrame.sort-547"><span class="linenos">547</span></a> <span class="k">if</span> <span class="n">ascending</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+</span><span id="DataFrame.sort-548"><a href="#DataFrame.sort-548"><span class="linenos">548</span></a> <span class="n">ascending</span> <span class="o">=</span> <span class="p">[</span><span class="kc">True</span><span class="p">]</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span>
+</span><span id="DataFrame.sort-549"><a href="#DataFrame.sort-549"><span class="linenos">549</span></a> <span class="k">elif</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">ascending</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span>
+</span><span id="DataFrame.sort-550"><a href="#DataFrame.sort-550"><span class="linenos">550</span></a> <span class="n">ascending</span> <span class="o">=</span> <span class="p">[</span><span class="n">ascending</span><span class="p">]</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span>
+</span><span id="DataFrame.sort-551"><a href="#DataFrame.sort-551"><span class="linenos">551</span></a> <span class="n">ascending</span> <span class="o">=</span> <span class="p">[</span><span class="nb">bool</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">x</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">ascending</span><span class="p">)]</span>
+</span><span id="DataFrame.sort-552"><a href="#DataFrame.sort-552"><span class="linenos">552</span></a> <span class="k">assert</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span> <span class="o">==</span> <span class="nb">len</span><span class="p">(</span>
+</span><span id="DataFrame.sort-553"><a href="#DataFrame.sort-553"><span class="linenos">553</span></a> <span class="n">ascending</span>
+</span><span id="DataFrame.sort-554"><a href="#DataFrame.sort-554"><span class="linenos">554</span></a> <span class="p">),</span> <span class="s2">&quot;The length of items in ascending must equal the number of columns provided&quot;</span>
+</span><span id="DataFrame.sort-555"><a href="#DataFrame.sort-555"><span class="linenos">555</span></a> <span class="n">col_and_ascending</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="nb">zip</span><span class="p">(</span><span class="n">columns</span><span class="p">,</span> <span class="n">ascending</span><span class="p">))</span>
+</span><span id="DataFrame.sort-556"><a href="#DataFrame.sort-556"><span class="linenos">556</span></a> <span class="n">order_by_columns</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame.sort-557"><a href="#DataFrame.sort-557"><span class="linenos">557</span></a> <span class="p">(</span>
+</span><span id="DataFrame.sort-558"><a href="#DataFrame.sort-558"><span class="linenos">558</span></a> <span class="n">exp</span><span class="o">.</span><span class="n">Ordered</span><span class="p">(</span><span class="n">this</span><span class="o">=</span><span class="n">col</span><span class="o">.</span><span class="n">expression</span><span class="p">,</span> <span class="n">desc</span><span class="o">=</span><span class="ow">not</span> <span class="n">asc</span><span class="p">)</span>
+</span><span id="DataFrame.sort-559"><a href="#DataFrame.sort-559"><span class="linenos">559</span></a> <span class="k">if</span> <span class="n">i</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">pre_ordered_col_indexes</span>
+</span><span id="DataFrame.sort-560"><a href="#DataFrame.sort-560"><span class="linenos">560</span></a> <span class="k">else</span> <span class="n">columns</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">.</span><span class="n">column_expression</span>
+</span><span id="DataFrame.sort-561"><a href="#DataFrame.sort-561"><span class="linenos">561</span></a> <span class="p">)</span>
+</span><span id="DataFrame.sort-562"><a href="#DataFrame.sort-562"><span class="linenos">562</span></a> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="p">(</span><span class="n">col</span><span class="p">,</span> <span class="n">asc</span><span class="p">)</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">col_and_ascending</span><span class="p">)</span>
+</span><span id="DataFrame.sort-563"><a href="#DataFrame.sort-563"><span class="linenos">563</span></a> <span class="p">]</span>
+</span><span id="DataFrame.sort-564"><a href="#DataFrame.sort-564"><span class="linenos">564</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">order_by</span><span class="p">(</span><span class="o">*</span><span class="n">order_by_columns</span><span class="p">))</span>
</span></pre></div>
@@ -2589,9 +2600,9 @@ is unlikely to come up.</p>
</div>
<a class="headerlink" href="#DataFrame.union"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.union-567"><a href="#DataFrame.union-567"><span class="linenos">567</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
-</span><span id="DataFrame.union-568"><a href="#DataFrame.union-568"><span class="linenos">568</span></a> <span class="k">def</span> <span class="nf">union</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame.union-569"><a href="#DataFrame.union-569"><span class="linenos">569</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set_operation</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Union</span><span class="p">,</span> <span class="n">other</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.union-568"><a href="#DataFrame.union-568"><span class="linenos">568</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
+</span><span id="DataFrame.union-569"><a href="#DataFrame.union-569"><span class="linenos">569</span></a> <span class="k">def</span> <span class="nf">union</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame.union-570"><a href="#DataFrame.union-570"><span class="linenos">570</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set_operation</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Union</span><span class="p">,</span> <span class="n">other</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span>
</span></pre></div>
@@ -2610,9 +2621,9 @@ is unlikely to come up.</p>
</div>
<a class="headerlink" href="#DataFrame.unionAll"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.unionAll-567"><a href="#DataFrame.unionAll-567"><span class="linenos">567</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
-</span><span id="DataFrame.unionAll-568"><a href="#DataFrame.unionAll-568"><span class="linenos">568</span></a> <span class="k">def</span> <span class="nf">union</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame.unionAll-569"><a href="#DataFrame.unionAll-569"><span class="linenos">569</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set_operation</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Union</span><span class="p">,</span> <span class="n">other</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.unionAll-568"><a href="#DataFrame.unionAll-568"><span class="linenos">568</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
+</span><span id="DataFrame.unionAll-569"><a href="#DataFrame.unionAll-569"><span class="linenos">569</span></a> <span class="k">def</span> <span class="nf">union</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame.unionAll-570"><a href="#DataFrame.unionAll-570"><span class="linenos">570</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set_operation</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Union</span><span class="p">,</span> <span class="n">other</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span>
</span></pre></div>
@@ -2631,34 +2642,34 @@ is unlikely to come up.</p>
</div>
<a class="headerlink" href="#DataFrame.unionByName"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.unionByName-573"><a href="#DataFrame.unionByName-573"><span class="linenos">573</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
-</span><span id="DataFrame.unionByName-574"><a href="#DataFrame.unionByName-574"><span class="linenos">574</span></a> <span class="k">def</span> <span class="nf">unionByName</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">,</span> <span class="n">allowMissingColumns</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">):</span>
-</span><span id="DataFrame.unionByName-575"><a href="#DataFrame.unionByName-575"><span class="linenos">575</span></a> <span class="n">l_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">columns</span>
-</span><span id="DataFrame.unionByName-576"><a href="#DataFrame.unionByName-576"><span class="linenos">576</span></a> <span class="n">r_columns</span> <span class="o">=</span> <span class="n">other</span><span class="o">.</span><span class="n">columns</span>
-</span><span id="DataFrame.unionByName-577"><a href="#DataFrame.unionByName-577"><span class="linenos">577</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">allowMissingColumns</span><span class="p">:</span>
-</span><span id="DataFrame.unionByName-578"><a href="#DataFrame.unionByName-578"><span class="linenos">578</span></a> <span class="n">l_expressions</span> <span class="o">=</span> <span class="n">l_columns</span>
-</span><span id="DataFrame.unionByName-579"><a href="#DataFrame.unionByName-579"><span class="linenos">579</span></a> <span class="n">r_expressions</span> <span class="o">=</span> <span class="n">l_columns</span>
-</span><span id="DataFrame.unionByName-580"><a href="#DataFrame.unionByName-580"><span class="linenos">580</span></a> <span class="k">else</span><span class="p">:</span>
-</span><span id="DataFrame.unionByName-581"><a href="#DataFrame.unionByName-581"><span class="linenos">581</span></a> <span class="n">l_expressions</span> <span class="o">=</span> <span class="p">[]</span>
-</span><span id="DataFrame.unionByName-582"><a href="#DataFrame.unionByName-582"><span class="linenos">582</span></a> <span class="n">r_expressions</span> <span class="o">=</span> <span class="p">[]</span>
-</span><span id="DataFrame.unionByName-583"><a href="#DataFrame.unionByName-583"><span class="linenos">583</span></a> <span class="n">r_columns_unused</span> <span class="o">=</span> <span class="n">copy</span><span class="p">(</span><span class="n">r_columns</span><span class="p">)</span>
-</span><span id="DataFrame.unionByName-584"><a href="#DataFrame.unionByName-584"><span class="linenos">584</span></a> <span class="k">for</span> <span class="n">l_column</span> <span class="ow">in</span> <span class="n">l_columns</span><span class="p">:</span>
-</span><span id="DataFrame.unionByName-585"><a href="#DataFrame.unionByName-585"><span class="linenos">585</span></a> <span class="n">l_expressions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">l_column</span><span class="p">)</span>
-</span><span id="DataFrame.unionByName-586"><a href="#DataFrame.unionByName-586"><span class="linenos">586</span></a> <span class="k">if</span> <span class="n">l_column</span> <span class="ow">in</span> <span class="n">r_columns</span><span class="p">:</span>
-</span><span id="DataFrame.unionByName-587"><a href="#DataFrame.unionByName-587"><span class="linenos">587</span></a> <span class="n">r_expressions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">l_column</span><span class="p">)</span>
-</span><span id="DataFrame.unionByName-588"><a href="#DataFrame.unionByName-588"><span class="linenos">588</span></a> <span class="n">r_columns_unused</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="n">l_column</span><span class="p">)</span>
-</span><span id="DataFrame.unionByName-589"><a href="#DataFrame.unionByName-589"><span class="linenos">589</span></a> <span class="k">else</span><span class="p">:</span>
-</span><span id="DataFrame.unionByName-590"><a href="#DataFrame.unionByName-590"><span class="linenos">590</span></a> <span class="n">r_expressions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">alias_</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Null</span><span class="p">(),</span> <span class="n">l_column</span><span class="p">,</span> <span class="n">copy</span><span class="o">=</span><span class="kc">False</span><span class="p">))</span>
-</span><span id="DataFrame.unionByName-591"><a href="#DataFrame.unionByName-591"><span class="linenos">591</span></a> <span class="k">for</span> <span class="n">r_column</span> <span class="ow">in</span> <span class="n">r_columns_unused</span><span class="p">:</span>
-</span><span id="DataFrame.unionByName-592"><a href="#DataFrame.unionByName-592"><span class="linenos">592</span></a> <span class="n">l_expressions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">alias_</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Null</span><span class="p">(),</span> <span class="n">r_column</span><span class="p">,</span> <span class="n">copy</span><span class="o">=</span><span class="kc">False</span><span class="p">))</span>
-</span><span id="DataFrame.unionByName-593"><a href="#DataFrame.unionByName-593"><span class="linenos">593</span></a> <span class="n">r_expressions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">r_column</span><span class="p">)</span>
-</span><span id="DataFrame.unionByName-594"><a href="#DataFrame.unionByName-594"><span class="linenos">594</span></a> <span class="n">r_df</span> <span class="o">=</span> <span class="p">(</span>
-</span><span id="DataFrame.unionByName-595"><a href="#DataFrame.unionByName-595"><span class="linenos">595</span></a> <span class="n">other</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span><span class="o">.</span><span class="n">_convert_leaf_to_cte</span><span class="p">()</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="bp">self</span><span class="o">.</span><span class="n">_ensure_list_of_columns</span><span class="p">(</span><span class="n">r_expressions</span><span class="p">))</span>
-</span><span id="DataFrame.unionByName-596"><a href="#DataFrame.unionByName-596"><span class="linenos">596</span></a> <span class="p">)</span>
-</span><span id="DataFrame.unionByName-597"><a href="#DataFrame.unionByName-597"><span class="linenos">597</span></a> <span class="n">l_df</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
-</span><span id="DataFrame.unionByName-598"><a href="#DataFrame.unionByName-598"><span class="linenos">598</span></a> <span class="k">if</span> <span class="n">allowMissingColumns</span><span class="p">:</span>
-</span><span id="DataFrame.unionByName-599"><a href="#DataFrame.unionByName-599"><span class="linenos">599</span></a> <span class="n">l_df</span> <span class="o">=</span> <span class="n">l_df</span><span class="o">.</span><span class="n">_convert_leaf_to_cte</span><span class="p">()</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="bp">self</span><span class="o">.</span><span class="n">_ensure_list_of_columns</span><span class="p">(</span><span class="n">l_expressions</span><span class="p">))</span>
-</span><span id="DataFrame.unionByName-600"><a href="#DataFrame.unionByName-600"><span class="linenos">600</span></a> <span class="k">return</span> <span class="n">l_df</span><span class="o">.</span><span class="n">_set_operation</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Union</span><span class="p">,</span> <span class="n">r_df</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.unionByName-574"><a href="#DataFrame.unionByName-574"><span class="linenos">574</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
+</span><span id="DataFrame.unionByName-575"><a href="#DataFrame.unionByName-575"><span class="linenos">575</span></a> <span class="k">def</span> <span class="nf">unionByName</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">,</span> <span class="n">allowMissingColumns</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">):</span>
+</span><span id="DataFrame.unionByName-576"><a href="#DataFrame.unionByName-576"><span class="linenos">576</span></a> <span class="n">l_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">columns</span>
+</span><span id="DataFrame.unionByName-577"><a href="#DataFrame.unionByName-577"><span class="linenos">577</span></a> <span class="n">r_columns</span> <span class="o">=</span> <span class="n">other</span><span class="o">.</span><span class="n">columns</span>
+</span><span id="DataFrame.unionByName-578"><a href="#DataFrame.unionByName-578"><span class="linenos">578</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">allowMissingColumns</span><span class="p">:</span>
+</span><span id="DataFrame.unionByName-579"><a href="#DataFrame.unionByName-579"><span class="linenos">579</span></a> <span class="n">l_expressions</span> <span class="o">=</span> <span class="n">l_columns</span>
+</span><span id="DataFrame.unionByName-580"><a href="#DataFrame.unionByName-580"><span class="linenos">580</span></a> <span class="n">r_expressions</span> <span class="o">=</span> <span class="n">l_columns</span>
+</span><span id="DataFrame.unionByName-581"><a href="#DataFrame.unionByName-581"><span class="linenos">581</span></a> <span class="k">else</span><span class="p">:</span>
+</span><span id="DataFrame.unionByName-582"><a href="#DataFrame.unionByName-582"><span class="linenos">582</span></a> <span class="n">l_expressions</span> <span class="o">=</span> <span class="p">[]</span>
+</span><span id="DataFrame.unionByName-583"><a href="#DataFrame.unionByName-583"><span class="linenos">583</span></a> <span class="n">r_expressions</span> <span class="o">=</span> <span class="p">[]</span>
+</span><span id="DataFrame.unionByName-584"><a href="#DataFrame.unionByName-584"><span class="linenos">584</span></a> <span class="n">r_columns_unused</span> <span class="o">=</span> <span class="n">copy</span><span class="p">(</span><span class="n">r_columns</span><span class="p">)</span>
+</span><span id="DataFrame.unionByName-585"><a href="#DataFrame.unionByName-585"><span class="linenos">585</span></a> <span class="k">for</span> <span class="n">l_column</span> <span class="ow">in</span> <span class="n">l_columns</span><span class="p">:</span>
+</span><span id="DataFrame.unionByName-586"><a href="#DataFrame.unionByName-586"><span class="linenos">586</span></a> <span class="n">l_expressions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">l_column</span><span class="p">)</span>
+</span><span id="DataFrame.unionByName-587"><a href="#DataFrame.unionByName-587"><span class="linenos">587</span></a> <span class="k">if</span> <span class="n">l_column</span> <span class="ow">in</span> <span class="n">r_columns</span><span class="p">:</span>
+</span><span id="DataFrame.unionByName-588"><a href="#DataFrame.unionByName-588"><span class="linenos">588</span></a> <span class="n">r_expressions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">l_column</span><span class="p">)</span>
+</span><span id="DataFrame.unionByName-589"><a href="#DataFrame.unionByName-589"><span class="linenos">589</span></a> <span class="n">r_columns_unused</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="n">l_column</span><span class="p">)</span>
+</span><span id="DataFrame.unionByName-590"><a href="#DataFrame.unionByName-590"><span class="linenos">590</span></a> <span class="k">else</span><span class="p">:</span>
+</span><span id="DataFrame.unionByName-591"><a href="#DataFrame.unionByName-591"><span class="linenos">591</span></a> <span class="n">r_expressions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">alias_</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Null</span><span class="p">(),</span> <span class="n">l_column</span><span class="p">,</span> <span class="n">copy</span><span class="o">=</span><span class="kc">False</span><span class="p">))</span>
+</span><span id="DataFrame.unionByName-592"><a href="#DataFrame.unionByName-592"><span class="linenos">592</span></a> <span class="k">for</span> <span class="n">r_column</span> <span class="ow">in</span> <span class="n">r_columns_unused</span><span class="p">:</span>
+</span><span id="DataFrame.unionByName-593"><a href="#DataFrame.unionByName-593"><span class="linenos">593</span></a> <span class="n">l_expressions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">alias_</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Null</span><span class="p">(),</span> <span class="n">r_column</span><span class="p">,</span> <span class="n">copy</span><span class="o">=</span><span class="kc">False</span><span class="p">))</span>
+</span><span id="DataFrame.unionByName-594"><a href="#DataFrame.unionByName-594"><span class="linenos">594</span></a> <span class="n">r_expressions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">r_column</span><span class="p">)</span>
+</span><span id="DataFrame.unionByName-595"><a href="#DataFrame.unionByName-595"><span class="linenos">595</span></a> <span class="n">r_df</span> <span class="o">=</span> <span class="p">(</span>
+</span><span id="DataFrame.unionByName-596"><a href="#DataFrame.unionByName-596"><span class="linenos">596</span></a> <span class="n">other</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span><span class="o">.</span><span class="n">_convert_leaf_to_cte</span><span class="p">()</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="bp">self</span><span class="o">.</span><span class="n">_ensure_list_of_columns</span><span class="p">(</span><span class="n">r_expressions</span><span class="p">))</span>
+</span><span id="DataFrame.unionByName-597"><a href="#DataFrame.unionByName-597"><span class="linenos">597</span></a> <span class="p">)</span>
+</span><span id="DataFrame.unionByName-598"><a href="#DataFrame.unionByName-598"><span class="linenos">598</span></a> <span class="n">l_df</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
+</span><span id="DataFrame.unionByName-599"><a href="#DataFrame.unionByName-599"><span class="linenos">599</span></a> <span class="k">if</span> <span class="n">allowMissingColumns</span><span class="p">:</span>
+</span><span id="DataFrame.unionByName-600"><a href="#DataFrame.unionByName-600"><span class="linenos">600</span></a> <span class="n">l_df</span> <span class="o">=</span> <span class="n">l_df</span><span class="o">.</span><span class="n">_convert_leaf_to_cte</span><span class="p">()</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="bp">self</span><span class="o">.</span><span class="n">_ensure_list_of_columns</span><span class="p">(</span><span class="n">l_expressions</span><span class="p">))</span>
+</span><span id="DataFrame.unionByName-601"><a href="#DataFrame.unionByName-601"><span class="linenos">601</span></a> <span class="k">return</span> <span class="n">l_df</span><span class="o">.</span><span class="n">_set_operation</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Union</span><span class="p">,</span> <span class="n">r_df</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span>
</span></pre></div>
@@ -2677,9 +2688,9 @@ is unlikely to come up.</p>
</div>
<a class="headerlink" href="#DataFrame.intersect"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.intersect-602"><a href="#DataFrame.intersect-602"><span class="linenos">602</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
-</span><span id="DataFrame.intersect-603"><a href="#DataFrame.intersect-603"><span class="linenos">603</span></a> <span class="k">def</span> <span class="nf">intersect</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame.intersect-604"><a href="#DataFrame.intersect-604"><span class="linenos">604</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set_operation</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Intersect</span><span class="p">,</span> <span class="n">other</span><span class="p">,</span> <span class="kc">True</span><span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.intersect-603"><a href="#DataFrame.intersect-603"><span class="linenos">603</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
+</span><span id="DataFrame.intersect-604"><a href="#DataFrame.intersect-604"><span class="linenos">604</span></a> <span class="k">def</span> <span class="nf">intersect</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame.intersect-605"><a href="#DataFrame.intersect-605"><span class="linenos">605</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set_operation</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Intersect</span><span class="p">,</span> <span class="n">other</span><span class="p">,</span> <span class="kc">True</span><span class="p">)</span>
</span></pre></div>
@@ -2698,9 +2709,9 @@ is unlikely to come up.</p>
</div>
<a class="headerlink" href="#DataFrame.intersectAll"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.intersectAll-606"><a href="#DataFrame.intersectAll-606"><span class="linenos">606</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
-</span><span id="DataFrame.intersectAll-607"><a href="#DataFrame.intersectAll-607"><span class="linenos">607</span></a> <span class="k">def</span> <span class="nf">intersectAll</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame.intersectAll-608"><a href="#DataFrame.intersectAll-608"><span class="linenos">608</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set_operation</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Intersect</span><span class="p">,</span> <span class="n">other</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.intersectAll-607"><a href="#DataFrame.intersectAll-607"><span class="linenos">607</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
+</span><span id="DataFrame.intersectAll-608"><a href="#DataFrame.intersectAll-608"><span class="linenos">608</span></a> <span class="k">def</span> <span class="nf">intersectAll</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame.intersectAll-609"><a href="#DataFrame.intersectAll-609"><span class="linenos">609</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set_operation</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Intersect</span><span class="p">,</span> <span class="n">other</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span>
</span></pre></div>
@@ -2719,9 +2730,9 @@ is unlikely to come up.</p>
</div>
<a class="headerlink" href="#DataFrame.exceptAll"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.exceptAll-610"><a href="#DataFrame.exceptAll-610"><span class="linenos">610</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
-</span><span id="DataFrame.exceptAll-611"><a href="#DataFrame.exceptAll-611"><span class="linenos">611</span></a> <span class="k">def</span> <span class="nf">exceptAll</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame.exceptAll-612"><a href="#DataFrame.exceptAll-612"><span class="linenos">612</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set_operation</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Except</span><span class="p">,</span> <span class="n">other</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.exceptAll-611"><a href="#DataFrame.exceptAll-611"><span class="linenos">611</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
+</span><span id="DataFrame.exceptAll-612"><a href="#DataFrame.exceptAll-612"><span class="linenos">612</span></a> <span class="k">def</span> <span class="nf">exceptAll</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame.exceptAll-613"><a href="#DataFrame.exceptAll-613"><span class="linenos">613</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set_operation</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">Except</span><span class="p">,</span> <span class="n">other</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span>
</span></pre></div>
@@ -2740,9 +2751,9 @@ is unlikely to come up.</p>
</div>
<a class="headerlink" href="#DataFrame.distinct"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.distinct-614"><a href="#DataFrame.distinct-614"><span class="linenos">614</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">SELECT</span><span class="p">)</span>
-</span><span id="DataFrame.distinct-615"><a href="#DataFrame.distinct-615"><span class="linenos">615</span></a> <span class="k">def</span> <span class="nf">distinct</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame.distinct-616"><a href="#DataFrame.distinct-616"><span class="linenos">616</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">distinct</span><span class="p">())</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.distinct-615"><a href="#DataFrame.distinct-615"><span class="linenos">615</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">SELECT</span><span class="p">)</span>
+</span><span id="DataFrame.distinct-616"><a href="#DataFrame.distinct-616"><span class="linenos">616</span></a> <span class="k">def</span> <span class="nf">distinct</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame.distinct-617"><a href="#DataFrame.distinct-617"><span class="linenos">617</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">distinct</span><span class="p">())</span>
</span></pre></div>
@@ -2761,18 +2772,18 @@ is unlikely to come up.</p>
</div>
<a class="headerlink" href="#DataFrame.dropDuplicates"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.dropDuplicates-618"><a href="#DataFrame.dropDuplicates-618"><span class="linenos">618</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">SELECT</span><span class="p">)</span>
-</span><span id="DataFrame.dropDuplicates-619"><a href="#DataFrame.dropDuplicates-619"><span class="linenos">619</span></a> <span class="k">def</span> <span class="nf">dropDuplicates</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">):</span>
-</span><span id="DataFrame.dropDuplicates-620"><a href="#DataFrame.dropDuplicates-620"><span class="linenos">620</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">subset</span><span class="p">:</span>
-</span><span id="DataFrame.dropDuplicates-621"><a href="#DataFrame.dropDuplicates-621"><span class="linenos">621</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">distinct</span><span class="p">()</span>
-</span><span id="DataFrame.dropDuplicates-622"><a href="#DataFrame.dropDuplicates-622"><span class="linenos">622</span></a> <span class="n">column_names</span> <span class="o">=</span> <span class="n">ensure_list</span><span class="p">(</span><span class="n">subset</span><span class="p">)</span>
-</span><span id="DataFrame.dropDuplicates-623"><a href="#DataFrame.dropDuplicates-623"><span class="linenos">623</span></a> <span class="n">window</span> <span class="o">=</span> <span class="n">Window</span><span class="o">.</span><span class="n">partitionBy</span><span class="p">(</span><span class="o">*</span><span class="n">column_names</span><span class="p">)</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="o">*</span><span class="n">column_names</span><span class="p">)</span>
-</span><span id="DataFrame.dropDuplicates-624"><a href="#DataFrame.dropDuplicates-624"><span class="linenos">624</span></a> <span class="k">return</span> <span class="p">(</span>
-</span><span id="DataFrame.dropDuplicates-625"><a href="#DataFrame.dropDuplicates-625"><span class="linenos">625</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
-</span><span id="DataFrame.dropDuplicates-626"><a href="#DataFrame.dropDuplicates-626"><span class="linenos">626</span></a> <span class="o">.</span><span class="n">withColumn</span><span class="p">(</span><span class="s2">&quot;row_num&quot;</span><span class="p">,</span> <span class="n">F</span><span class="o">.</span><span class="n">row_number</span><span class="p">()</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="n">window</span><span class="p">))</span>
-</span><span id="DataFrame.dropDuplicates-627"><a href="#DataFrame.dropDuplicates-627"><span class="linenos">627</span></a> <span class="o">.</span><span class="n">where</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="s2">&quot;row_num&quot;</span><span class="p">)</span> <span class="o">==</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="mi">1</span><span class="p">))</span>
-</span><span id="DataFrame.dropDuplicates-628"><a href="#DataFrame.dropDuplicates-628"><span class="linenos">628</span></a> <span class="o">.</span><span class="n">drop</span><span class="p">(</span><span class="s2">&quot;row_num&quot;</span><span class="p">)</span>
-</span><span id="DataFrame.dropDuplicates-629"><a href="#DataFrame.dropDuplicates-629"><span class="linenos">629</span></a> <span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.dropDuplicates-619"><a href="#DataFrame.dropDuplicates-619"><span class="linenos">619</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">SELECT</span><span class="p">)</span>
+</span><span id="DataFrame.dropDuplicates-620"><a href="#DataFrame.dropDuplicates-620"><span class="linenos">620</span></a> <span class="k">def</span> <span class="nf">dropDuplicates</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">):</span>
+</span><span id="DataFrame.dropDuplicates-621"><a href="#DataFrame.dropDuplicates-621"><span class="linenos">621</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">subset</span><span class="p">:</span>
+</span><span id="DataFrame.dropDuplicates-622"><a href="#DataFrame.dropDuplicates-622"><span class="linenos">622</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">distinct</span><span class="p">()</span>
+</span><span id="DataFrame.dropDuplicates-623"><a href="#DataFrame.dropDuplicates-623"><span class="linenos">623</span></a> <span class="n">column_names</span> <span class="o">=</span> <span class="n">ensure_list</span><span class="p">(</span><span class="n">subset</span><span class="p">)</span>
+</span><span id="DataFrame.dropDuplicates-624"><a href="#DataFrame.dropDuplicates-624"><span class="linenos">624</span></a> <span class="n">window</span> <span class="o">=</span> <span class="n">Window</span><span class="o">.</span><span class="n">partitionBy</span><span class="p">(</span><span class="o">*</span><span class="n">column_names</span><span class="p">)</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="o">*</span><span class="n">column_names</span><span class="p">)</span>
+</span><span id="DataFrame.dropDuplicates-625"><a href="#DataFrame.dropDuplicates-625"><span class="linenos">625</span></a> <span class="k">return</span> <span class="p">(</span>
+</span><span id="DataFrame.dropDuplicates-626"><a href="#DataFrame.dropDuplicates-626"><span class="linenos">626</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
+</span><span id="DataFrame.dropDuplicates-627"><a href="#DataFrame.dropDuplicates-627"><span class="linenos">627</span></a> <span class="o">.</span><span class="n">withColumn</span><span class="p">(</span><span class="s2">&quot;row_num&quot;</span><span class="p">,</span> <span class="n">F</span><span class="o">.</span><span class="n">row_number</span><span class="p">()</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="n">window</span><span class="p">))</span>
+</span><span id="DataFrame.dropDuplicates-628"><a href="#DataFrame.dropDuplicates-628"><span class="linenos">628</span></a> <span class="o">.</span><span class="n">where</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="s2">&quot;row_num&quot;</span><span class="p">)</span> <span class="o">==</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="mi">1</span><span class="p">))</span>
+</span><span id="DataFrame.dropDuplicates-629"><a href="#DataFrame.dropDuplicates-629"><span class="linenos">629</span></a> <span class="o">.</span><span class="n">drop</span><span class="p">(</span><span class="s2">&quot;row_num&quot;</span><span class="p">)</span>
+</span><span id="DataFrame.dropDuplicates-630"><a href="#DataFrame.dropDuplicates-630"><span class="linenos">630</span></a> <span class="p">)</span>
</span></pre></div>
@@ -2791,38 +2802,38 @@ is unlikely to come up.</p>
</div>
<a class="headerlink" href="#DataFrame.dropna"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.dropna-631"><a href="#DataFrame.dropna-631"><span class="linenos">631</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
-</span><span id="DataFrame.dropna-632"><a href="#DataFrame.dropna-632"><span class="linenos">632</span></a> <span class="k">def</span> <span class="nf">dropna</span><span class="p">(</span>
-</span><span id="DataFrame.dropna-633"><a href="#DataFrame.dropna-633"><span class="linenos">633</span></a> <span class="bp">self</span><span class="p">,</span>
-</span><span id="DataFrame.dropna-634"><a href="#DataFrame.dropna-634"><span class="linenos">634</span></a> <span class="n">how</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;any&quot;</span><span class="p">,</span>
-</span><span id="DataFrame.dropna-635"><a href="#DataFrame.dropna-635"><span class="linenos">635</span></a> <span class="n">thresh</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrame.dropna-636"><a href="#DataFrame.dropna-636"><span class="linenos">636</span></a> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="o">...</span><span class="p">],</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrame.dropna-637"><a href="#DataFrame.dropna-637"><span class="linenos">637</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame.dropna-638"><a href="#DataFrame.dropna-638"><span class="linenos">638</span></a> <span class="n">minimum_non_null</span> <span class="o">=</span> <span class="n">thresh</span> <span class="ow">or</span> <span class="mi">0</span> <span class="c1"># will be determined later if thresh is null</span>
-</span><span id="DataFrame.dropna-639"><a href="#DataFrame.dropna-639"><span class="linenos">639</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
-</span><span id="DataFrame.dropna-640"><a href="#DataFrame.dropna-640"><span class="linenos">640</span></a> <span class="n">all_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_outer_select_columns</span><span class="p">(</span><span class="n">new_df</span><span class="o">.</span><span class="n">expression</span><span class="p">)</span>
-</span><span id="DataFrame.dropna-641"><a href="#DataFrame.dropna-641"><span class="linenos">641</span></a> <span class="k">if</span> <span class="n">subset</span><span class="p">:</span>
-</span><span id="DataFrame.dropna-642"><a href="#DataFrame.dropna-642"><span class="linenos">642</span></a> <span class="n">null_check_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">subset</span><span class="p">)</span>
-</span><span id="DataFrame.dropna-643"><a href="#DataFrame.dropna-643"><span class="linenos">643</span></a> <span class="k">else</span><span class="p">:</span>
-</span><span id="DataFrame.dropna-644"><a href="#DataFrame.dropna-644"><span class="linenos">644</span></a> <span class="n">null_check_columns</span> <span class="o">=</span> <span class="n">all_columns</span>
-</span><span id="DataFrame.dropna-645"><a href="#DataFrame.dropna-645"><span class="linenos">645</span></a> <span class="k">if</span> <span class="n">thresh</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
-</span><span id="DataFrame.dropna-646"><a href="#DataFrame.dropna-646"><span class="linenos">646</span></a> <span class="n">minimum_num_nulls</span> <span class="o">=</span> <span class="mi">1</span> <span class="k">if</span> <span class="n">how</span> <span class="o">==</span> <span class="s2">&quot;any&quot;</span> <span class="k">else</span> <span class="nb">len</span><span class="p">(</span><span class="n">null_check_columns</span><span class="p">)</span>
-</span><span id="DataFrame.dropna-647"><a href="#DataFrame.dropna-647"><span class="linenos">647</span></a> <span class="k">else</span><span class="p">:</span>
-</span><span id="DataFrame.dropna-648"><a href="#DataFrame.dropna-648"><span class="linenos">648</span></a> <span class="n">minimum_num_nulls</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">null_check_columns</span><span class="p">)</span> <span class="o">-</span> <span class="n">minimum_non_null</span> <span class="o">+</span> <span class="mi">1</span>
-</span><span id="DataFrame.dropna-649"><a href="#DataFrame.dropna-649"><span class="linenos">649</span></a> <span class="k">if</span> <span class="n">minimum_num_nulls</span> <span class="o">&gt;</span> <span class="nb">len</span><span class="p">(</span><span class="n">null_check_columns</span><span class="p">):</span>
-</span><span id="DataFrame.dropna-650"><a href="#DataFrame.dropna-650"><span class="linenos">650</span></a> <span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span>
-</span><span id="DataFrame.dropna-651"><a href="#DataFrame.dropna-651"><span class="linenos">651</span></a> <span class="sa">f</span><span class="s2">&quot;The minimum num nulls for dropna must be less than or equal to the number of columns. &quot;</span>
-</span><span id="DataFrame.dropna-652"><a href="#DataFrame.dropna-652"><span class="linenos">652</span></a> <span class="sa">f</span><span class="s2">&quot;Minimum num nulls: </span><span class="si">{</span><span class="n">minimum_num_nulls</span><span class="si">}</span><span class="s2">, Num Columns: </span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="n">null_check_columns</span><span class="p">)</span><span class="si">}</span><span class="s2">&quot;</span>
-</span><span id="DataFrame.dropna-653"><a href="#DataFrame.dropna-653"><span class="linenos">653</span></a> <span class="p">)</span>
-</span><span id="DataFrame.dropna-654"><a href="#DataFrame.dropna-654"><span class="linenos">654</span></a> <span class="n">if_null_checks</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame.dropna-655"><a href="#DataFrame.dropna-655"><span class="linenos">655</span></a> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">column</span><span class="o">.</span><span class="n">isNull</span><span class="p">(),</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="mi">1</span><span class="p">))</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="mi">0</span><span class="p">))</span> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">null_check_columns</span>
-</span><span id="DataFrame.dropna-656"><a href="#DataFrame.dropna-656"><span class="linenos">656</span></a> <span class="p">]</span>
-</span><span id="DataFrame.dropna-657"><a href="#DataFrame.dropna-657"><span class="linenos">657</span></a> <span class="n">nulls_added_together</span> <span class="o">=</span> <span class="n">functools</span><span class="o">.</span><span class="n">reduce</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">:</span> <span class="n">x</span> <span class="o">+</span> <span class="n">y</span><span class="p">,</span> <span class="n">if_null_checks</span><span class="p">)</span>
-</span><span id="DataFrame.dropna-658"><a href="#DataFrame.dropna-658"><span class="linenos">658</span></a> <span class="n">num_nulls</span> <span class="o">=</span> <span class="n">nulls_added_together</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">&quot;num_nulls&quot;</span><span class="p">)</span>
-</span><span id="DataFrame.dropna-659"><a href="#DataFrame.dropna-659"><span class="linenos">659</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="n">new_df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">num_nulls</span><span class="p">,</span> <span class="n">append</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
-</span><span id="DataFrame.dropna-660"><a href="#DataFrame.dropna-660"><span class="linenos">660</span></a> <span class="n">filtered_df</span> <span class="o">=</span> <span class="n">new_df</span><span class="o">.</span><span class="n">where</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="s2">&quot;num_nulls&quot;</span><span class="p">)</span> <span class="o">&lt;</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="n">minimum_num_nulls</span><span class="p">))</span>
-</span><span id="DataFrame.dropna-661"><a href="#DataFrame.dropna-661"><span class="linenos">661</span></a> <span class="n">final_df</span> <span class="o">=</span> <span class="n">filtered_df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="n">all_columns</span><span class="p">)</span>
-</span><span id="DataFrame.dropna-662"><a href="#DataFrame.dropna-662"><span class="linenos">662</span></a> <span class="k">return</span> <span class="n">final_df</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.dropna-632"><a href="#DataFrame.dropna-632"><span class="linenos">632</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
+</span><span id="DataFrame.dropna-633"><a href="#DataFrame.dropna-633"><span class="linenos">633</span></a> <span class="k">def</span> <span class="nf">dropna</span><span class="p">(</span>
+</span><span id="DataFrame.dropna-634"><a href="#DataFrame.dropna-634"><span class="linenos">634</span></a> <span class="bp">self</span><span class="p">,</span>
+</span><span id="DataFrame.dropna-635"><a href="#DataFrame.dropna-635"><span class="linenos">635</span></a> <span class="n">how</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;any&quot;</span><span class="p">,</span>
+</span><span id="DataFrame.dropna-636"><a href="#DataFrame.dropna-636"><span class="linenos">636</span></a> <span class="n">thresh</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrame.dropna-637"><a href="#DataFrame.dropna-637"><span class="linenos">637</span></a> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="o">...</span><span class="p">],</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrame.dropna-638"><a href="#DataFrame.dropna-638"><span class="linenos">638</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame.dropna-639"><a href="#DataFrame.dropna-639"><span class="linenos">639</span></a> <span class="n">minimum_non_null</span> <span class="o">=</span> <span class="n">thresh</span> <span class="ow">or</span> <span class="mi">0</span> <span class="c1"># will be determined later if thresh is null</span>
+</span><span id="DataFrame.dropna-640"><a href="#DataFrame.dropna-640"><span class="linenos">640</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
+</span><span id="DataFrame.dropna-641"><a href="#DataFrame.dropna-641"><span class="linenos">641</span></a> <span class="n">all_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_outer_select_columns</span><span class="p">(</span><span class="n">new_df</span><span class="o">.</span><span class="n">expression</span><span class="p">)</span>
+</span><span id="DataFrame.dropna-642"><a href="#DataFrame.dropna-642"><span class="linenos">642</span></a> <span class="k">if</span> <span class="n">subset</span><span class="p">:</span>
+</span><span id="DataFrame.dropna-643"><a href="#DataFrame.dropna-643"><span class="linenos">643</span></a> <span class="n">null_check_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">subset</span><span class="p">)</span>
+</span><span id="DataFrame.dropna-644"><a href="#DataFrame.dropna-644"><span class="linenos">644</span></a> <span class="k">else</span><span class="p">:</span>
+</span><span id="DataFrame.dropna-645"><a href="#DataFrame.dropna-645"><span class="linenos">645</span></a> <span class="n">null_check_columns</span> <span class="o">=</span> <span class="n">all_columns</span>
+</span><span id="DataFrame.dropna-646"><a href="#DataFrame.dropna-646"><span class="linenos">646</span></a> <span class="k">if</span> <span class="n">thresh</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+</span><span id="DataFrame.dropna-647"><a href="#DataFrame.dropna-647"><span class="linenos">647</span></a> <span class="n">minimum_num_nulls</span> <span class="o">=</span> <span class="mi">1</span> <span class="k">if</span> <span class="n">how</span> <span class="o">==</span> <span class="s2">&quot;any&quot;</span> <span class="k">else</span> <span class="nb">len</span><span class="p">(</span><span class="n">null_check_columns</span><span class="p">)</span>
+</span><span id="DataFrame.dropna-648"><a href="#DataFrame.dropna-648"><span class="linenos">648</span></a> <span class="k">else</span><span class="p">:</span>
+</span><span id="DataFrame.dropna-649"><a href="#DataFrame.dropna-649"><span class="linenos">649</span></a> <span class="n">minimum_num_nulls</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">null_check_columns</span><span class="p">)</span> <span class="o">-</span> <span class="n">minimum_non_null</span> <span class="o">+</span> <span class="mi">1</span>
+</span><span id="DataFrame.dropna-650"><a href="#DataFrame.dropna-650"><span class="linenos">650</span></a> <span class="k">if</span> <span class="n">minimum_num_nulls</span> <span class="o">&gt;</span> <span class="nb">len</span><span class="p">(</span><span class="n">null_check_columns</span><span class="p">):</span>
+</span><span id="DataFrame.dropna-651"><a href="#DataFrame.dropna-651"><span class="linenos">651</span></a> <span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span>
+</span><span id="DataFrame.dropna-652"><a href="#DataFrame.dropna-652"><span class="linenos">652</span></a> <span class="sa">f</span><span class="s2">&quot;The minimum num nulls for dropna must be less than or equal to the number of columns. &quot;</span>
+</span><span id="DataFrame.dropna-653"><a href="#DataFrame.dropna-653"><span class="linenos">653</span></a> <span class="sa">f</span><span class="s2">&quot;Minimum num nulls: </span><span class="si">{</span><span class="n">minimum_num_nulls</span><span class="si">}</span><span class="s2">, Num Columns: </span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="n">null_check_columns</span><span class="p">)</span><span class="si">}</span><span class="s2">&quot;</span>
+</span><span id="DataFrame.dropna-654"><a href="#DataFrame.dropna-654"><span class="linenos">654</span></a> <span class="p">)</span>
+</span><span id="DataFrame.dropna-655"><a href="#DataFrame.dropna-655"><span class="linenos">655</span></a> <span class="n">if_null_checks</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame.dropna-656"><a href="#DataFrame.dropna-656"><span class="linenos">656</span></a> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">column</span><span class="o">.</span><span class="n">isNull</span><span class="p">(),</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="mi">1</span><span class="p">))</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="mi">0</span><span class="p">))</span> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">null_check_columns</span>
+</span><span id="DataFrame.dropna-657"><a href="#DataFrame.dropna-657"><span class="linenos">657</span></a> <span class="p">]</span>
+</span><span id="DataFrame.dropna-658"><a href="#DataFrame.dropna-658"><span class="linenos">658</span></a> <span class="n">nulls_added_together</span> <span class="o">=</span> <span class="n">functools</span><span class="o">.</span><span class="n">reduce</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">:</span> <span class="n">x</span> <span class="o">+</span> <span class="n">y</span><span class="p">,</span> <span class="n">if_null_checks</span><span class="p">)</span>
+</span><span id="DataFrame.dropna-659"><a href="#DataFrame.dropna-659"><span class="linenos">659</span></a> <span class="n">num_nulls</span> <span class="o">=</span> <span class="n">nulls_added_together</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">&quot;num_nulls&quot;</span><span class="p">)</span>
+</span><span id="DataFrame.dropna-660"><a href="#DataFrame.dropna-660"><span class="linenos">660</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="n">new_df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">num_nulls</span><span class="p">,</span> <span class="n">append</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
+</span><span id="DataFrame.dropna-661"><a href="#DataFrame.dropna-661"><span class="linenos">661</span></a> <span class="n">filtered_df</span> <span class="o">=</span> <span class="n">new_df</span><span class="o">.</span><span class="n">where</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="s2">&quot;num_nulls&quot;</span><span class="p">)</span> <span class="o">&lt;</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="n">minimum_num_nulls</span><span class="p">))</span>
+</span><span id="DataFrame.dropna-662"><a href="#DataFrame.dropna-662"><span class="linenos">662</span></a> <span class="n">final_df</span> <span class="o">=</span> <span class="n">filtered_df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="n">all_columns</span><span class="p">)</span>
+</span><span id="DataFrame.dropna-663"><a href="#DataFrame.dropna-663"><span class="linenos">663</span></a> <span class="k">return</span> <span class="n">final_df</span>
</span></pre></div>
@@ -2835,55 +2846,55 @@ is unlikely to come up.</p>
<div class="decorator">@operation(Operation.FROM)</div>
<span class="def">def</span>
- <span class="name">fillna</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">self</span>,</span><span class="param"> <span class="n">value</span><span class="p">:</span> <span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139791550370512&#39;</span><span class="o">&gt;</span>,</span><span class="param"> <span class="n">subset</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="o">...</span><span class="p">],</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">],</span> <span class="n">NoneType</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span></span><span class="return-annotation">) -> <span class="n"><a href="#DataFrame">DataFrame</a></span>:</span></span>
+ <span class="name">fillna</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">self</span>,</span><span class="param"> <span class="n">value</span><span class="p">:</span> <span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139844517633936&#39;</span><span class="o">&gt;</span>,</span><span class="param"> <span class="n">subset</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="o">...</span><span class="p">],</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">],</span> <span class="n">NoneType</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span></span><span class="return-annotation">) -> <span class="n"><a href="#DataFrame">DataFrame</a></span>:</span></span>
<label class="view-source-button" for="DataFrame.fillna-view-source"><span>View Source</span></label>
</div>
<a class="headerlink" href="#DataFrame.fillna"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.fillna-664"><a href="#DataFrame.fillna-664"><span class="linenos">664</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
-</span><span id="DataFrame.fillna-665"><a href="#DataFrame.fillna-665"><span class="linenos">665</span></a> <span class="k">def</span> <span class="nf">fillna</span><span class="p">(</span>
-</span><span id="DataFrame.fillna-666"><a href="#DataFrame.fillna-666"><span class="linenos">666</span></a> <span class="bp">self</span><span class="p">,</span>
-</span><span id="DataFrame.fillna-667"><a href="#DataFrame.fillna-667"><span class="linenos">667</span></a> <span class="n">value</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="n">ColumnLiterals</span><span class="p">],</span>
-</span><span id="DataFrame.fillna-668"><a href="#DataFrame.fillna-668"><span class="linenos">668</span></a> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="o">...</span><span class="p">],</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrame.fillna-669"><a href="#DataFrame.fillna-669"><span class="linenos">669</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame.fillna-670"><a href="#DataFrame.fillna-670"><span class="linenos">670</span></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
-</span><span id="DataFrame.fillna-671"><a href="#DataFrame.fillna-671"><span class="linenos">671</span></a><span class="sd"> Functionality Difference: If you provide a value to replace a null and that type conflicts</span>
-</span><span id="DataFrame.fillna-672"><a href="#DataFrame.fillna-672"><span class="linenos">672</span></a><span class="sd"> with the type of the column then PySpark will just ignore your replacement.</span>
-</span><span id="DataFrame.fillna-673"><a href="#DataFrame.fillna-673"><span class="linenos">673</span></a><span class="sd"> This will try to cast them to be the same in some cases. So they won&#39;t always match.</span>
-</span><span id="DataFrame.fillna-674"><a href="#DataFrame.fillna-674"><span class="linenos">674</span></a><span class="sd"> Best to not mix types so make sure replacement is the same type as the column</span>
-</span><span id="DataFrame.fillna-675"><a href="#DataFrame.fillna-675"><span class="linenos">675</span></a>
-</span><span id="DataFrame.fillna-676"><a href="#DataFrame.fillna-676"><span class="linenos">676</span></a><span class="sd"> Possibility for improvement: Use `typeof` function to get the type of the column</span>
-</span><span id="DataFrame.fillna-677"><a href="#DataFrame.fillna-677"><span class="linenos">677</span></a><span class="sd"> and check if it matches the type of the value provided. If not then make it null.</span>
-</span><span id="DataFrame.fillna-678"><a href="#DataFrame.fillna-678"><span class="linenos">678</span></a><span class="sd"> &quot;&quot;&quot;</span>
-</span><span id="DataFrame.fillna-679"><a href="#DataFrame.fillna-679"><span class="linenos">679</span></a> <span class="kn">from</span> <span class="nn">sqlglot.dataframe.sql.functions</span> <span class="kn">import</span> <span class="n">lit</span>
-</span><span id="DataFrame.fillna-680"><a href="#DataFrame.fillna-680"><span class="linenos">680</span></a>
-</span><span id="DataFrame.fillna-681"><a href="#DataFrame.fillna-681"><span class="linenos">681</span></a> <span class="n">values</span> <span class="o">=</span> <span class="kc">None</span>
-</span><span id="DataFrame.fillna-682"><a href="#DataFrame.fillna-682"><span class="linenos">682</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="kc">None</span>
-</span><span id="DataFrame.fillna-683"><a href="#DataFrame.fillna-683"><span class="linenos">683</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
-</span><span id="DataFrame.fillna-684"><a href="#DataFrame.fillna-684"><span class="linenos">684</span></a> <span class="n">all_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_outer_select_columns</span><span class="p">(</span><span class="n">new_df</span><span class="o">.</span><span class="n">expression</span><span class="p">)</span>
-</span><span id="DataFrame.fillna-685"><a href="#DataFrame.fillna-685"><span class="linenos">685</span></a> <span class="n">all_column_mapping</span> <span class="o">=</span> <span class="p">{</span><span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">:</span> <span class="n">column</span> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">all_columns</span><span class="p">}</span>
-</span><span id="DataFrame.fillna-686"><a href="#DataFrame.fillna-686"><span class="linenos">686</span></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span>
-</span><span id="DataFrame.fillna-687"><a href="#DataFrame.fillna-687"><span class="linenos">687</span></a> <span class="n">values</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">value</span><span class="o">.</span><span class="n">values</span><span class="p">())</span>
-</span><span id="DataFrame.fillna-688"><a href="#DataFrame.fillna-688"><span class="linenos">688</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="nb">list</span><span class="p">(</span><span class="n">value</span><span class="p">))</span>
-</span><span id="DataFrame.fillna-689"><a href="#DataFrame.fillna-689"><span class="linenos">689</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">columns</span><span class="p">:</span>
-</span><span id="DataFrame.fillna-690"><a href="#DataFrame.fillna-690"><span class="linenos">690</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">subset</span><span class="p">)</span> <span class="k">if</span> <span class="n">subset</span> <span class="k">else</span> <span class="n">all_columns</span>
-</span><span id="DataFrame.fillna-691"><a href="#DataFrame.fillna-691"><span class="linenos">691</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">values</span><span class="p">:</span>
-</span><span id="DataFrame.fillna-692"><a href="#DataFrame.fillna-692"><span class="linenos">692</span></a> <span class="n">values</span> <span class="o">=</span> <span class="p">[</span><span class="n">value</span><span class="p">]</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span>
-</span><span id="DataFrame.fillna-693"><a href="#DataFrame.fillna-693"><span class="linenos">693</span></a> <span class="n">value_columns</span> <span class="o">=</span> <span class="p">[</span><span class="n">lit</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> <span class="k">for</span> <span class="n">value</span> <span class="ow">in</span> <span class="n">values</span><span class="p">]</span>
-</span><span id="DataFrame.fillna-694"><a href="#DataFrame.fillna-694"><span class="linenos">694</span></a>
-</span><span id="DataFrame.fillna-695"><a href="#DataFrame.fillna-695"><span class="linenos">695</span></a> <span class="n">null_replacement_mapping</span> <span class="o">=</span> <span class="p">{</span>
-</span><span id="DataFrame.fillna-696"><a href="#DataFrame.fillna-696"><span class="linenos">696</span></a> <span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">:</span> <span class="p">(</span>
-</span><span id="DataFrame.fillna-697"><a href="#DataFrame.fillna-697"><span class="linenos">697</span></a> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">column</span><span class="o">.</span><span class="n">isNull</span><span class="p">(),</span> <span class="n">value</span><span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">column</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">)</span>
-</span><span id="DataFrame.fillna-698"><a href="#DataFrame.fillna-698"><span class="linenos">698</span></a> <span class="p">)</span>
-</span><span id="DataFrame.fillna-699"><a href="#DataFrame.fillna-699"><span class="linenos">699</span></a> <span class="k">for</span> <span class="n">column</span><span class="p">,</span> <span class="n">value</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">columns</span><span class="p">,</span> <span class="n">value_columns</span><span class="p">)</span>
-</span><span id="DataFrame.fillna-700"><a href="#DataFrame.fillna-700"><span class="linenos">700</span></a> <span class="p">}</span>
-</span><span id="DataFrame.fillna-701"><a href="#DataFrame.fillna-701"><span class="linenos">701</span></a> <span class="n">null_replacement_mapping</span> <span class="o">=</span> <span class="p">{</span><span class="o">**</span><span class="n">all_column_mapping</span><span class="p">,</span> <span class="o">**</span><span class="n">null_replacement_mapping</span><span class="p">}</span>
-</span><span id="DataFrame.fillna-702"><a href="#DataFrame.fillna-702"><span class="linenos">702</span></a> <span class="n">null_replacement_columns</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame.fillna-703"><a href="#DataFrame.fillna-703"><span class="linenos">703</span></a> <span class="n">null_replacement_mapping</span><span class="p">[</span><span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">]</span> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">all_columns</span>
-</span><span id="DataFrame.fillna-704"><a href="#DataFrame.fillna-704"><span class="linenos">704</span></a> <span class="p">]</span>
-</span><span id="DataFrame.fillna-705"><a href="#DataFrame.fillna-705"><span class="linenos">705</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="n">new_df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="n">null_replacement_columns</span><span class="p">)</span>
-</span><span id="DataFrame.fillna-706"><a href="#DataFrame.fillna-706"><span class="linenos">706</span></a> <span class="k">return</span> <span class="n">new_df</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.fillna-665"><a href="#DataFrame.fillna-665"><span class="linenos">665</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
+</span><span id="DataFrame.fillna-666"><a href="#DataFrame.fillna-666"><span class="linenos">666</span></a> <span class="k">def</span> <span class="nf">fillna</span><span class="p">(</span>
+</span><span id="DataFrame.fillna-667"><a href="#DataFrame.fillna-667"><span class="linenos">667</span></a> <span class="bp">self</span><span class="p">,</span>
+</span><span id="DataFrame.fillna-668"><a href="#DataFrame.fillna-668"><span class="linenos">668</span></a> <span class="n">value</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="n">ColumnLiterals</span><span class="p">],</span>
+</span><span id="DataFrame.fillna-669"><a href="#DataFrame.fillna-669"><span class="linenos">669</span></a> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="o">...</span><span class="p">],</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrame.fillna-670"><a href="#DataFrame.fillna-670"><span class="linenos">670</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame.fillna-671"><a href="#DataFrame.fillna-671"><span class="linenos">671</span></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
+</span><span id="DataFrame.fillna-672"><a href="#DataFrame.fillna-672"><span class="linenos">672</span></a><span class="sd"> Functionality Difference: If you provide a value to replace a null and that type conflicts</span>
+</span><span id="DataFrame.fillna-673"><a href="#DataFrame.fillna-673"><span class="linenos">673</span></a><span class="sd"> with the type of the column then PySpark will just ignore your replacement.</span>
+</span><span id="DataFrame.fillna-674"><a href="#DataFrame.fillna-674"><span class="linenos">674</span></a><span class="sd"> This will try to cast them to be the same in some cases. So they won&#39;t always match.</span>
+</span><span id="DataFrame.fillna-675"><a href="#DataFrame.fillna-675"><span class="linenos">675</span></a><span class="sd"> Best to not mix types so make sure replacement is the same type as the column</span>
+</span><span id="DataFrame.fillna-676"><a href="#DataFrame.fillna-676"><span class="linenos">676</span></a>
+</span><span id="DataFrame.fillna-677"><a href="#DataFrame.fillna-677"><span class="linenos">677</span></a><span class="sd"> Possibility for improvement: Use `typeof` function to get the type of the column</span>
+</span><span id="DataFrame.fillna-678"><a href="#DataFrame.fillna-678"><span class="linenos">678</span></a><span class="sd"> and check if it matches the type of the value provided. If not then make it null.</span>
+</span><span id="DataFrame.fillna-679"><a href="#DataFrame.fillna-679"><span class="linenos">679</span></a><span class="sd"> &quot;&quot;&quot;</span>
+</span><span id="DataFrame.fillna-680"><a href="#DataFrame.fillna-680"><span class="linenos">680</span></a> <span class="kn">from</span> <span class="nn">sqlglot.dataframe.sql.functions</span> <span class="kn">import</span> <span class="n">lit</span>
+</span><span id="DataFrame.fillna-681"><a href="#DataFrame.fillna-681"><span class="linenos">681</span></a>
+</span><span id="DataFrame.fillna-682"><a href="#DataFrame.fillna-682"><span class="linenos">682</span></a> <span class="n">values</span> <span class="o">=</span> <span class="kc">None</span>
+</span><span id="DataFrame.fillna-683"><a href="#DataFrame.fillna-683"><span class="linenos">683</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="kc">None</span>
+</span><span id="DataFrame.fillna-684"><a href="#DataFrame.fillna-684"><span class="linenos">684</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
+</span><span id="DataFrame.fillna-685"><a href="#DataFrame.fillna-685"><span class="linenos">685</span></a> <span class="n">all_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_outer_select_columns</span><span class="p">(</span><span class="n">new_df</span><span class="o">.</span><span class="n">expression</span><span class="p">)</span>
+</span><span id="DataFrame.fillna-686"><a href="#DataFrame.fillna-686"><span class="linenos">686</span></a> <span class="n">all_column_mapping</span> <span class="o">=</span> <span class="p">{</span><span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">:</span> <span class="n">column</span> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">all_columns</span><span class="p">}</span>
+</span><span id="DataFrame.fillna-687"><a href="#DataFrame.fillna-687"><span class="linenos">687</span></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span>
+</span><span id="DataFrame.fillna-688"><a href="#DataFrame.fillna-688"><span class="linenos">688</span></a> <span class="n">values</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">value</span><span class="o">.</span><span class="n">values</span><span class="p">())</span>
+</span><span id="DataFrame.fillna-689"><a href="#DataFrame.fillna-689"><span class="linenos">689</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="nb">list</span><span class="p">(</span><span class="n">value</span><span class="p">))</span>
+</span><span id="DataFrame.fillna-690"><a href="#DataFrame.fillna-690"><span class="linenos">690</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">columns</span><span class="p">:</span>
+</span><span id="DataFrame.fillna-691"><a href="#DataFrame.fillna-691"><span class="linenos">691</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">subset</span><span class="p">)</span> <span class="k">if</span> <span class="n">subset</span> <span class="k">else</span> <span class="n">all_columns</span>
+</span><span id="DataFrame.fillna-692"><a href="#DataFrame.fillna-692"><span class="linenos">692</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">values</span><span class="p">:</span>
+</span><span id="DataFrame.fillna-693"><a href="#DataFrame.fillna-693"><span class="linenos">693</span></a> <span class="n">values</span> <span class="o">=</span> <span class="p">[</span><span class="n">value</span><span class="p">]</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span>
+</span><span id="DataFrame.fillna-694"><a href="#DataFrame.fillna-694"><span class="linenos">694</span></a> <span class="n">value_columns</span> <span class="o">=</span> <span class="p">[</span><span class="n">lit</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> <span class="k">for</span> <span class="n">value</span> <span class="ow">in</span> <span class="n">values</span><span class="p">]</span>
+</span><span id="DataFrame.fillna-695"><a href="#DataFrame.fillna-695"><span class="linenos">695</span></a>
+</span><span id="DataFrame.fillna-696"><a href="#DataFrame.fillna-696"><span class="linenos">696</span></a> <span class="n">null_replacement_mapping</span> <span class="o">=</span> <span class="p">{</span>
+</span><span id="DataFrame.fillna-697"><a href="#DataFrame.fillna-697"><span class="linenos">697</span></a> <span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">:</span> <span class="p">(</span>
+</span><span id="DataFrame.fillna-698"><a href="#DataFrame.fillna-698"><span class="linenos">698</span></a> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">column</span><span class="o">.</span><span class="n">isNull</span><span class="p">(),</span> <span class="n">value</span><span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">column</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">)</span>
+</span><span id="DataFrame.fillna-699"><a href="#DataFrame.fillna-699"><span class="linenos">699</span></a> <span class="p">)</span>
+</span><span id="DataFrame.fillna-700"><a href="#DataFrame.fillna-700"><span class="linenos">700</span></a> <span class="k">for</span> <span class="n">column</span><span class="p">,</span> <span class="n">value</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">columns</span><span class="p">,</span> <span class="n">value_columns</span><span class="p">)</span>
+</span><span id="DataFrame.fillna-701"><a href="#DataFrame.fillna-701"><span class="linenos">701</span></a> <span class="p">}</span>
+</span><span id="DataFrame.fillna-702"><a href="#DataFrame.fillna-702"><span class="linenos">702</span></a> <span class="n">null_replacement_mapping</span> <span class="o">=</span> <span class="p">{</span><span class="o">**</span><span class="n">all_column_mapping</span><span class="p">,</span> <span class="o">**</span><span class="n">null_replacement_mapping</span><span class="p">}</span>
+</span><span id="DataFrame.fillna-703"><a href="#DataFrame.fillna-703"><span class="linenos">703</span></a> <span class="n">null_replacement_columns</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame.fillna-704"><a href="#DataFrame.fillna-704"><span class="linenos">704</span></a> <span class="n">null_replacement_mapping</span><span class="p">[</span><span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">]</span> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">all_columns</span>
+</span><span id="DataFrame.fillna-705"><a href="#DataFrame.fillna-705"><span class="linenos">705</span></a> <span class="p">]</span>
+</span><span id="DataFrame.fillna-706"><a href="#DataFrame.fillna-706"><span class="linenos">706</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="n">new_df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="n">null_replacement_columns</span><span class="p">)</span>
+</span><span id="DataFrame.fillna-707"><a href="#DataFrame.fillna-707"><span class="linenos">707</span></a> <span class="k">return</span> <span class="n">new_df</span>
</span></pre></div>
@@ -2904,59 +2915,59 @@ and check if it matches the type of the value provided. If not then make it null
<div class="decorator">@operation(Operation.FROM)</div>
<span class="def">def</span>
- <span class="name">replace</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">self</span>,</span><span class="param"> <span class="n">to_replace</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="n">List</span><span class="p">,</span> <span class="n">Dict</span><span class="p">]</span>,</span><span class="param"> <span class="n">value</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="n">List</span><span class="p">,</span> <span class="n">NoneType</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>,</span><span class="param"> <span class="n">subset</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="n">Collection</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139791551045616&#39;</span><span class="o">&gt;</span><span class="p">],</span> <span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139791551045616&#39;</span><span class="o">&gt;</span><span class="p">,</span> <span class="n">NoneType</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span></span><span class="return-annotation">) -> <span class="n"><a href="#DataFrame">DataFrame</a></span>:</span></span>
+ <span class="name">replace</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">self</span>,</span><span class="param"> <span class="n">to_replace</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="n">List</span><span class="p">,</span> <span class="n">Dict</span><span class="p">]</span>,</span><span class="param"> <span class="n">value</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="n">List</span><span class="p">,</span> <span class="n">NoneType</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>,</span><span class="param"> <span class="n">subset</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="n">Collection</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139844517449296&#39;</span><span class="o">&gt;</span><span class="p">],</span> <span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139844517449296&#39;</span><span class="o">&gt;</span><span class="p">,</span> <span class="n">NoneType</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span></span><span class="return-annotation">) -> <span class="n"><a href="#DataFrame">DataFrame</a></span>:</span></span>
<label class="view-source-button" for="DataFrame.replace-view-source"><span>View Source</span></label>
</div>
<a class="headerlink" href="#DataFrame.replace"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.replace-708"><a href="#DataFrame.replace-708"><span class="linenos">708</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
-</span><span id="DataFrame.replace-709"><a href="#DataFrame.replace-709"><span class="linenos">709</span></a> <span class="k">def</span> <span class="nf">replace</span><span class="p">(</span>
-</span><span id="DataFrame.replace-710"><a href="#DataFrame.replace-710"><span class="linenos">710</span></a> <span class="bp">self</span><span class="p">,</span>
-</span><span id="DataFrame.replace-711"><a href="#DataFrame.replace-711"><span class="linenos">711</span></a> <span class="n">to_replace</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Dict</span><span class="p">],</span>
-</span><span id="DataFrame.replace-712"><a href="#DataFrame.replace-712"><span class="linenos">712</span></a> <span class="n">value</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrame.replace-713"><a href="#DataFrame.replace-713"><span class="linenos">713</span></a> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Collection</span><span class="p">[</span><span class="n">ColumnOrName</span><span class="p">]</span> <span class="o">|</span> <span class="n">ColumnOrName</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrame.replace-714"><a href="#DataFrame.replace-714"><span class="linenos">714</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame.replace-715"><a href="#DataFrame.replace-715"><span class="linenos">715</span></a> <span class="kn">from</span> <span class="nn">sqlglot.dataframe.sql.functions</span> <span class="kn">import</span> <span class="n">lit</span>
-</span><span id="DataFrame.replace-716"><a href="#DataFrame.replace-716"><span class="linenos">716</span></a>
-</span><span id="DataFrame.replace-717"><a href="#DataFrame.replace-717"><span class="linenos">717</span></a> <span class="n">old_values</span> <span class="o">=</span> <span class="kc">None</span>
-</span><span id="DataFrame.replace-718"><a href="#DataFrame.replace-718"><span class="linenos">718</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
-</span><span id="DataFrame.replace-719"><a href="#DataFrame.replace-719"><span class="linenos">719</span></a> <span class="n">all_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_outer_select_columns</span><span class="p">(</span><span class="n">new_df</span><span class="o">.</span><span class="n">expression</span><span class="p">)</span>
-</span><span id="DataFrame.replace-720"><a href="#DataFrame.replace-720"><span class="linenos">720</span></a> <span class="n">all_column_mapping</span> <span class="o">=</span> <span class="p">{</span><span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">:</span> <span class="n">column</span> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">all_columns</span><span class="p">}</span>
-</span><span id="DataFrame.replace-721"><a href="#DataFrame.replace-721"><span class="linenos">721</span></a>
-</span><span id="DataFrame.replace-722"><a href="#DataFrame.replace-722"><span class="linenos">722</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">subset</span><span class="p">)</span> <span class="k">if</span> <span class="n">subset</span> <span class="k">else</span> <span class="n">all_columns</span>
-</span><span id="DataFrame.replace-723"><a href="#DataFrame.replace-723"><span class="linenos">723</span></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">to_replace</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span>
-</span><span id="DataFrame.replace-724"><a href="#DataFrame.replace-724"><span class="linenos">724</span></a> <span class="n">old_values</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">to_replace</span><span class="p">)</span>
-</span><span id="DataFrame.replace-725"><a href="#DataFrame.replace-725"><span class="linenos">725</span></a> <span class="n">new_values</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">to_replace</span><span class="o">.</span><span class="n">values</span><span class="p">())</span>
-</span><span id="DataFrame.replace-726"><a href="#DataFrame.replace-726"><span class="linenos">726</span></a> <span class="k">elif</span> <span class="ow">not</span> <span class="n">old_values</span> <span class="ow">and</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">to_replace</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span>
-</span><span id="DataFrame.replace-727"><a href="#DataFrame.replace-727"><span class="linenos">727</span></a> <span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="nb">list</span><span class="p">),</span> <span class="s2">&quot;value must be a list since the replacements are a list&quot;</span>
-</span><span id="DataFrame.replace-728"><a href="#DataFrame.replace-728"><span class="linenos">728</span></a> <span class="k">assert</span> <span class="nb">len</span><span class="p">(</span><span class="n">to_replace</span><span class="p">)</span> <span class="o">==</span> <span class="nb">len</span><span class="p">(</span>
-</span><span id="DataFrame.replace-729"><a href="#DataFrame.replace-729"><span class="linenos">729</span></a> <span class="n">value</span>
-</span><span id="DataFrame.replace-730"><a href="#DataFrame.replace-730"><span class="linenos">730</span></a> <span class="p">),</span> <span class="s2">&quot;the replacements and values must be the same length&quot;</span>
-</span><span id="DataFrame.replace-731"><a href="#DataFrame.replace-731"><span class="linenos">731</span></a> <span class="n">old_values</span> <span class="o">=</span> <span class="n">to_replace</span>
-</span><span id="DataFrame.replace-732"><a href="#DataFrame.replace-732"><span class="linenos">732</span></a> <span class="n">new_values</span> <span class="o">=</span> <span class="n">value</span>
-</span><span id="DataFrame.replace-733"><a href="#DataFrame.replace-733"><span class="linenos">733</span></a> <span class="k">else</span><span class="p">:</span>
-</span><span id="DataFrame.replace-734"><a href="#DataFrame.replace-734"><span class="linenos">734</span></a> <span class="n">old_values</span> <span class="o">=</span> <span class="p">[</span><span class="n">to_replace</span><span class="p">]</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span>
-</span><span id="DataFrame.replace-735"><a href="#DataFrame.replace-735"><span class="linenos">735</span></a> <span class="n">new_values</span> <span class="o">=</span> <span class="p">[</span><span class="n">value</span><span class="p">]</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span>
-</span><span id="DataFrame.replace-736"><a href="#DataFrame.replace-736"><span class="linenos">736</span></a> <span class="n">old_values</span> <span class="o">=</span> <span class="p">[</span><span class="n">lit</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> <span class="k">for</span> <span class="n">value</span> <span class="ow">in</span> <span class="n">old_values</span><span class="p">]</span>
-</span><span id="DataFrame.replace-737"><a href="#DataFrame.replace-737"><span class="linenos">737</span></a> <span class="n">new_values</span> <span class="o">=</span> <span class="p">[</span><span class="n">lit</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> <span class="k">for</span> <span class="n">value</span> <span class="ow">in</span> <span class="n">new_values</span><span class="p">]</span>
-</span><span id="DataFrame.replace-738"><a href="#DataFrame.replace-738"><span class="linenos">738</span></a>
-</span><span id="DataFrame.replace-739"><a href="#DataFrame.replace-739"><span class="linenos">739</span></a> <span class="n">replacement_mapping</span> <span class="o">=</span> <span class="p">{}</span>
-</span><span id="DataFrame.replace-740"><a href="#DataFrame.replace-740"><span class="linenos">740</span></a> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">columns</span><span class="p">:</span>
-</span><span id="DataFrame.replace-741"><a href="#DataFrame.replace-741"><span class="linenos">741</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="kc">None</span><span class="p">)</span>
-</span><span id="DataFrame.replace-742"><a href="#DataFrame.replace-742"><span class="linenos">742</span></a> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="p">(</span><span class="n">old_value</span><span class="p">,</span> <span class="n">new_value</span><span class="p">)</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="nb">zip</span><span class="p">(</span><span class="n">old_values</span><span class="p">,</span> <span class="n">new_values</span><span class="p">)):</span>
-</span><span id="DataFrame.replace-743"><a href="#DataFrame.replace-743"><span class="linenos">743</span></a> <span class="k">if</span> <span class="n">i</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
-</span><span id="DataFrame.replace-744"><a href="#DataFrame.replace-744"><span class="linenos">744</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">column</span> <span class="o">==</span> <span class="n">old_value</span><span class="p">,</span> <span class="n">new_value</span><span class="p">)</span>
-</span><span id="DataFrame.replace-745"><a href="#DataFrame.replace-745"><span class="linenos">745</span></a> <span class="k">else</span><span class="p">:</span>
-</span><span id="DataFrame.replace-746"><a href="#DataFrame.replace-746"><span class="linenos">746</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="n">expression</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">column</span> <span class="o">==</span> <span class="n">old_value</span><span class="p">,</span> <span class="n">new_value</span><span class="p">)</span> <span class="c1"># type: ignore</span>
-</span><span id="DataFrame.replace-747"><a href="#DataFrame.replace-747"><span class="linenos">747</span></a> <span class="n">replacement_mapping</span><span class="p">[</span><span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">]</span> <span class="o">=</span> <span class="n">expression</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">column</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span>
-</span><span id="DataFrame.replace-748"><a href="#DataFrame.replace-748"><span class="linenos">748</span></a> <span class="n">column</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">alias_or_name</span>
-</span><span id="DataFrame.replace-749"><a href="#DataFrame.replace-749"><span class="linenos">749</span></a> <span class="p">)</span>
-</span><span id="DataFrame.replace-750"><a href="#DataFrame.replace-750"><span class="linenos">750</span></a>
-</span><span id="DataFrame.replace-751"><a href="#DataFrame.replace-751"><span class="linenos">751</span></a> <span class="n">replacement_mapping</span> <span class="o">=</span> <span class="p">{</span><span class="o">**</span><span class="n">all_column_mapping</span><span class="p">,</span> <span class="o">**</span><span class="n">replacement_mapping</span><span class="p">}</span>
-</span><span id="DataFrame.replace-752"><a href="#DataFrame.replace-752"><span class="linenos">752</span></a> <span class="n">replacement_columns</span> <span class="o">=</span> <span class="p">[</span><span class="n">replacement_mapping</span><span class="p">[</span><span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">]</span> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">all_columns</span><span class="p">]</span>
-</span><span id="DataFrame.replace-753"><a href="#DataFrame.replace-753"><span class="linenos">753</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="n">new_df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="n">replacement_columns</span><span class="p">)</span>
-</span><span id="DataFrame.replace-754"><a href="#DataFrame.replace-754"><span class="linenos">754</span></a> <span class="k">return</span> <span class="n">new_df</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.replace-709"><a href="#DataFrame.replace-709"><span class="linenos">709</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">FROM</span><span class="p">)</span>
+</span><span id="DataFrame.replace-710"><a href="#DataFrame.replace-710"><span class="linenos">710</span></a> <span class="k">def</span> <span class="nf">replace</span><span class="p">(</span>
+</span><span id="DataFrame.replace-711"><a href="#DataFrame.replace-711"><span class="linenos">711</span></a> <span class="bp">self</span><span class="p">,</span>
+</span><span id="DataFrame.replace-712"><a href="#DataFrame.replace-712"><span class="linenos">712</span></a> <span class="n">to_replace</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Dict</span><span class="p">],</span>
+</span><span id="DataFrame.replace-713"><a href="#DataFrame.replace-713"><span class="linenos">713</span></a> <span class="n">value</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrame.replace-714"><a href="#DataFrame.replace-714"><span class="linenos">714</span></a> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Collection</span><span class="p">[</span><span class="n">ColumnOrName</span><span class="p">]</span> <span class="o">|</span> <span class="n">ColumnOrName</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrame.replace-715"><a href="#DataFrame.replace-715"><span class="linenos">715</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame.replace-716"><a href="#DataFrame.replace-716"><span class="linenos">716</span></a> <span class="kn">from</span> <span class="nn">sqlglot.dataframe.sql.functions</span> <span class="kn">import</span> <span class="n">lit</span>
+</span><span id="DataFrame.replace-717"><a href="#DataFrame.replace-717"><span class="linenos">717</span></a>
+</span><span id="DataFrame.replace-718"><a href="#DataFrame.replace-718"><span class="linenos">718</span></a> <span class="n">old_values</span> <span class="o">=</span> <span class="kc">None</span>
+</span><span id="DataFrame.replace-719"><a href="#DataFrame.replace-719"><span class="linenos">719</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
+</span><span id="DataFrame.replace-720"><a href="#DataFrame.replace-720"><span class="linenos">720</span></a> <span class="n">all_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_outer_select_columns</span><span class="p">(</span><span class="n">new_df</span><span class="o">.</span><span class="n">expression</span><span class="p">)</span>
+</span><span id="DataFrame.replace-721"><a href="#DataFrame.replace-721"><span class="linenos">721</span></a> <span class="n">all_column_mapping</span> <span class="o">=</span> <span class="p">{</span><span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">:</span> <span class="n">column</span> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">all_columns</span><span class="p">}</span>
+</span><span id="DataFrame.replace-722"><a href="#DataFrame.replace-722"><span class="linenos">722</span></a>
+</span><span id="DataFrame.replace-723"><a href="#DataFrame.replace-723"><span class="linenos">723</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">subset</span><span class="p">)</span> <span class="k">if</span> <span class="n">subset</span> <span class="k">else</span> <span class="n">all_columns</span>
+</span><span id="DataFrame.replace-724"><a href="#DataFrame.replace-724"><span class="linenos">724</span></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">to_replace</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span>
+</span><span id="DataFrame.replace-725"><a href="#DataFrame.replace-725"><span class="linenos">725</span></a> <span class="n">old_values</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">to_replace</span><span class="p">)</span>
+</span><span id="DataFrame.replace-726"><a href="#DataFrame.replace-726"><span class="linenos">726</span></a> <span class="n">new_values</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">to_replace</span><span class="o">.</span><span class="n">values</span><span class="p">())</span>
+</span><span id="DataFrame.replace-727"><a href="#DataFrame.replace-727"><span class="linenos">727</span></a> <span class="k">elif</span> <span class="ow">not</span> <span class="n">old_values</span> <span class="ow">and</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">to_replace</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span>
+</span><span id="DataFrame.replace-728"><a href="#DataFrame.replace-728"><span class="linenos">728</span></a> <span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="nb">list</span><span class="p">),</span> <span class="s2">&quot;value must be a list since the replacements are a list&quot;</span>
+</span><span id="DataFrame.replace-729"><a href="#DataFrame.replace-729"><span class="linenos">729</span></a> <span class="k">assert</span> <span class="nb">len</span><span class="p">(</span><span class="n">to_replace</span><span class="p">)</span> <span class="o">==</span> <span class="nb">len</span><span class="p">(</span>
+</span><span id="DataFrame.replace-730"><a href="#DataFrame.replace-730"><span class="linenos">730</span></a> <span class="n">value</span>
+</span><span id="DataFrame.replace-731"><a href="#DataFrame.replace-731"><span class="linenos">731</span></a> <span class="p">),</span> <span class="s2">&quot;the replacements and values must be the same length&quot;</span>
+</span><span id="DataFrame.replace-732"><a href="#DataFrame.replace-732"><span class="linenos">732</span></a> <span class="n">old_values</span> <span class="o">=</span> <span class="n">to_replace</span>
+</span><span id="DataFrame.replace-733"><a href="#DataFrame.replace-733"><span class="linenos">733</span></a> <span class="n">new_values</span> <span class="o">=</span> <span class="n">value</span>
+</span><span id="DataFrame.replace-734"><a href="#DataFrame.replace-734"><span class="linenos">734</span></a> <span class="k">else</span><span class="p">:</span>
+</span><span id="DataFrame.replace-735"><a href="#DataFrame.replace-735"><span class="linenos">735</span></a> <span class="n">old_values</span> <span class="o">=</span> <span class="p">[</span><span class="n">to_replace</span><span class="p">]</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span>
+</span><span id="DataFrame.replace-736"><a href="#DataFrame.replace-736"><span class="linenos">736</span></a> <span class="n">new_values</span> <span class="o">=</span> <span class="p">[</span><span class="n">value</span><span class="p">]</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span>
+</span><span id="DataFrame.replace-737"><a href="#DataFrame.replace-737"><span class="linenos">737</span></a> <span class="n">old_values</span> <span class="o">=</span> <span class="p">[</span><span class="n">lit</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> <span class="k">for</span> <span class="n">value</span> <span class="ow">in</span> <span class="n">old_values</span><span class="p">]</span>
+</span><span id="DataFrame.replace-738"><a href="#DataFrame.replace-738"><span class="linenos">738</span></a> <span class="n">new_values</span> <span class="o">=</span> <span class="p">[</span><span class="n">lit</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> <span class="k">for</span> <span class="n">value</span> <span class="ow">in</span> <span class="n">new_values</span><span class="p">]</span>
+</span><span id="DataFrame.replace-739"><a href="#DataFrame.replace-739"><span class="linenos">739</span></a>
+</span><span id="DataFrame.replace-740"><a href="#DataFrame.replace-740"><span class="linenos">740</span></a> <span class="n">replacement_mapping</span> <span class="o">=</span> <span class="p">{}</span>
+</span><span id="DataFrame.replace-741"><a href="#DataFrame.replace-741"><span class="linenos">741</span></a> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">columns</span><span class="p">:</span>
+</span><span id="DataFrame.replace-742"><a href="#DataFrame.replace-742"><span class="linenos">742</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="kc">None</span><span class="p">)</span>
+</span><span id="DataFrame.replace-743"><a href="#DataFrame.replace-743"><span class="linenos">743</span></a> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="p">(</span><span class="n">old_value</span><span class="p">,</span> <span class="n">new_value</span><span class="p">)</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="nb">zip</span><span class="p">(</span><span class="n">old_values</span><span class="p">,</span> <span class="n">new_values</span><span class="p">)):</span>
+</span><span id="DataFrame.replace-744"><a href="#DataFrame.replace-744"><span class="linenos">744</span></a> <span class="k">if</span> <span class="n">i</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
+</span><span id="DataFrame.replace-745"><a href="#DataFrame.replace-745"><span class="linenos">745</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">column</span> <span class="o">==</span> <span class="n">old_value</span><span class="p">,</span> <span class="n">new_value</span><span class="p">)</span>
+</span><span id="DataFrame.replace-746"><a href="#DataFrame.replace-746"><span class="linenos">746</span></a> <span class="k">else</span><span class="p">:</span>
+</span><span id="DataFrame.replace-747"><a href="#DataFrame.replace-747"><span class="linenos">747</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="n">expression</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">column</span> <span class="o">==</span> <span class="n">old_value</span><span class="p">,</span> <span class="n">new_value</span><span class="p">)</span> <span class="c1"># type: ignore</span>
+</span><span id="DataFrame.replace-748"><a href="#DataFrame.replace-748"><span class="linenos">748</span></a> <span class="n">replacement_mapping</span><span class="p">[</span><span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">]</span> <span class="o">=</span> <span class="n">expression</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">column</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span>
+</span><span id="DataFrame.replace-749"><a href="#DataFrame.replace-749"><span class="linenos">749</span></a> <span class="n">column</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">alias_or_name</span>
+</span><span id="DataFrame.replace-750"><a href="#DataFrame.replace-750"><span class="linenos">750</span></a> <span class="p">)</span>
+</span><span id="DataFrame.replace-751"><a href="#DataFrame.replace-751"><span class="linenos">751</span></a>
+</span><span id="DataFrame.replace-752"><a href="#DataFrame.replace-752"><span class="linenos">752</span></a> <span class="n">replacement_mapping</span> <span class="o">=</span> <span class="p">{</span><span class="o">**</span><span class="n">all_column_mapping</span><span class="p">,</span> <span class="o">**</span><span class="n">replacement_mapping</span><span class="p">}</span>
+</span><span id="DataFrame.replace-753"><a href="#DataFrame.replace-753"><span class="linenos">753</span></a> <span class="n">replacement_columns</span> <span class="o">=</span> <span class="p">[</span><span class="n">replacement_mapping</span><span class="p">[</span><span class="n">column</span><span class="o">.</span><span class="n">alias_or_name</span><span class="p">]</span> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">all_columns</span><span class="p">]</span>
+</span><span id="DataFrame.replace-754"><a href="#DataFrame.replace-754"><span class="linenos">754</span></a> <span class="n">new_df</span> <span class="o">=</span> <span class="n">new_df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="n">replacement_columns</span><span class="p">)</span>
+</span><span id="DataFrame.replace-755"><a href="#DataFrame.replace-755"><span class="linenos">755</span></a> <span class="k">return</span> <span class="n">new_df</span>
</span></pre></div>
@@ -2975,18 +2986,18 @@ and check if it matches the type of the value provided. If not then make it null
</div>
<a class="headerlink" href="#DataFrame.withColumn"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.withColumn-756"><a href="#DataFrame.withColumn-756"><span class="linenos">756</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">SELECT</span><span class="p">)</span>
-</span><span id="DataFrame.withColumn-757"><a href="#DataFrame.withColumn-757"><span class="linenos">757</span></a> <span class="k">def</span> <span class="nf">withColumn</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">colName</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">col</span><span class="p">:</span> <span class="n">Column</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame.withColumn-758"><a href="#DataFrame.withColumn-758"><span class="linenos">758</span></a> <span class="n">col</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_col</span><span class="p">(</span><span class="n">col</span><span class="p">)</span>
-</span><span id="DataFrame.withColumn-759"><a href="#DataFrame.withColumn-759"><span class="linenos">759</span></a> <span class="n">existing_col_names</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">named_selects</span>
-</span><span id="DataFrame.withColumn-760"><a href="#DataFrame.withColumn-760"><span class="linenos">760</span></a> <span class="n">existing_col_index</span> <span class="o">=</span> <span class="p">(</span>
-</span><span id="DataFrame.withColumn-761"><a href="#DataFrame.withColumn-761"><span class="linenos">761</span></a> <span class="n">existing_col_names</span><span class="o">.</span><span class="n">index</span><span class="p">(</span><span class="n">colName</span><span class="p">)</span> <span class="k">if</span> <span class="n">colName</span> <span class="ow">in</span> <span class="n">existing_col_names</span> <span class="k">else</span> <span class="kc">None</span>
-</span><span id="DataFrame.withColumn-762"><a href="#DataFrame.withColumn-762"><span class="linenos">762</span></a> <span class="p">)</span>
-</span><span id="DataFrame.withColumn-763"><a href="#DataFrame.withColumn-763"><span class="linenos">763</span></a> <span class="k">if</span> <span class="n">existing_col_index</span><span class="p">:</span>
-</span><span id="DataFrame.withColumn-764"><a href="#DataFrame.withColumn-764"><span class="linenos">764</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
-</span><span id="DataFrame.withColumn-765"><a href="#DataFrame.withColumn-765"><span class="linenos">765</span></a> <span class="n">expression</span><span class="o">.</span><span class="n">expressions</span><span class="p">[</span><span class="n">existing_col_index</span><span class="p">]</span> <span class="o">=</span> <span class="n">col</span><span class="o">.</span><span class="n">expression</span>
-</span><span id="DataFrame.withColumn-766"><a href="#DataFrame.withColumn-766"><span class="linenos">766</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="n">expression</span><span class="p">)</span>
-</span><span id="DataFrame.withColumn-767"><a href="#DataFrame.withColumn-767"><span class="linenos">767</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">col</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">colName</span><span class="p">),</span> <span class="n">append</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.withColumn-757"><a href="#DataFrame.withColumn-757"><span class="linenos">757</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">SELECT</span><span class="p">)</span>
+</span><span id="DataFrame.withColumn-758"><a href="#DataFrame.withColumn-758"><span class="linenos">758</span></a> <span class="k">def</span> <span class="nf">withColumn</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">colName</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">col</span><span class="p">:</span> <span class="n">Column</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame.withColumn-759"><a href="#DataFrame.withColumn-759"><span class="linenos">759</span></a> <span class="n">col</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_col</span><span class="p">(</span><span class="n">col</span><span class="p">)</span>
+</span><span id="DataFrame.withColumn-760"><a href="#DataFrame.withColumn-760"><span class="linenos">760</span></a> <span class="n">existing_col_names</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">named_selects</span>
+</span><span id="DataFrame.withColumn-761"><a href="#DataFrame.withColumn-761"><span class="linenos">761</span></a> <span class="n">existing_col_index</span> <span class="o">=</span> <span class="p">(</span>
+</span><span id="DataFrame.withColumn-762"><a href="#DataFrame.withColumn-762"><span class="linenos">762</span></a> <span class="n">existing_col_names</span><span class="o">.</span><span class="n">index</span><span class="p">(</span><span class="n">colName</span><span class="p">)</span> <span class="k">if</span> <span class="n">colName</span> <span class="ow">in</span> <span class="n">existing_col_names</span> <span class="k">else</span> <span class="kc">None</span>
+</span><span id="DataFrame.withColumn-763"><a href="#DataFrame.withColumn-763"><span class="linenos">763</span></a> <span class="p">)</span>
+</span><span id="DataFrame.withColumn-764"><a href="#DataFrame.withColumn-764"><span class="linenos">764</span></a> <span class="k">if</span> <span class="n">existing_col_index</span><span class="p">:</span>
+</span><span id="DataFrame.withColumn-765"><a href="#DataFrame.withColumn-765"><span class="linenos">765</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
+</span><span id="DataFrame.withColumn-766"><a href="#DataFrame.withColumn-766"><span class="linenos">766</span></a> <span class="n">expression</span><span class="o">.</span><span class="n">expressions</span><span class="p">[</span><span class="n">existing_col_index</span><span class="p">]</span> <span class="o">=</span> <span class="n">col</span><span class="o">.</span><span class="n">expression</span>
+</span><span id="DataFrame.withColumn-767"><a href="#DataFrame.withColumn-767"><span class="linenos">767</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="n">expression</span><span class="p">)</span>
+</span><span id="DataFrame.withColumn-768"><a href="#DataFrame.withColumn-768"><span class="linenos">768</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">col</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">colName</span><span class="p">),</span> <span class="n">append</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
</span></pre></div>
@@ -3005,22 +3016,22 @@ and check if it matches the type of the value provided. If not then make it null
</div>
<a class="headerlink" href="#DataFrame.withColumnRenamed"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.withColumnRenamed-769"><a href="#DataFrame.withColumnRenamed-769"><span class="linenos">769</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">SELECT</span><span class="p">)</span>
-</span><span id="DataFrame.withColumnRenamed-770"><a href="#DataFrame.withColumnRenamed-770"><span class="linenos">770</span></a> <span class="k">def</span> <span class="nf">withColumnRenamed</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">existing</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">new</span><span class="p">:</span> <span class="nb">str</span><span class="p">):</span>
-</span><span id="DataFrame.withColumnRenamed-771"><a href="#DataFrame.withColumnRenamed-771"><span class="linenos">771</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
-</span><span id="DataFrame.withColumnRenamed-772"><a href="#DataFrame.withColumnRenamed-772"><span class="linenos">772</span></a> <span class="n">existing_columns</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame.withColumnRenamed-773"><a href="#DataFrame.withColumnRenamed-773"><span class="linenos">773</span></a> <span class="n">expression</span>
-</span><span id="DataFrame.withColumnRenamed-774"><a href="#DataFrame.withColumnRenamed-774"><span class="linenos">774</span></a> <span class="k">for</span> <span class="n">expression</span> <span class="ow">in</span> <span class="n">expression</span><span class="o">.</span><span class="n">expressions</span>
-</span><span id="DataFrame.withColumnRenamed-775"><a href="#DataFrame.withColumnRenamed-775"><span class="linenos">775</span></a> <span class="k">if</span> <span class="n">expression</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="o">==</span> <span class="n">existing</span>
-</span><span id="DataFrame.withColumnRenamed-776"><a href="#DataFrame.withColumnRenamed-776"><span class="linenos">776</span></a> <span class="p">]</span>
-</span><span id="DataFrame.withColumnRenamed-777"><a href="#DataFrame.withColumnRenamed-777"><span class="linenos">777</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">existing_columns</span><span class="p">:</span>
-</span><span id="DataFrame.withColumnRenamed-778"><a href="#DataFrame.withColumnRenamed-778"><span class="linenos">778</span></a> <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;Tried to rename a column that doesn&#39;t exist&quot;</span><span class="p">)</span>
-</span><span id="DataFrame.withColumnRenamed-779"><a href="#DataFrame.withColumnRenamed-779"><span class="linenos">779</span></a> <span class="k">for</span> <span class="n">existing_column</span> <span class="ow">in</span> <span class="n">existing_columns</span><span class="p">:</span>
-</span><span id="DataFrame.withColumnRenamed-780"><a href="#DataFrame.withColumnRenamed-780"><span class="linenos">780</span></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">existing_column</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Column</span><span class="p">):</span>
-</span><span id="DataFrame.withColumnRenamed-781"><a href="#DataFrame.withColumnRenamed-781"><span class="linenos">781</span></a> <span class="n">existing_column</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">alias_</span><span class="p">(</span><span class="n">existing_column</span><span class="p">,</span> <span class="n">new</span><span class="p">))</span>
-</span><span id="DataFrame.withColumnRenamed-782"><a href="#DataFrame.withColumnRenamed-782"><span class="linenos">782</span></a> <span class="k">else</span><span class="p">:</span>
-</span><span id="DataFrame.withColumnRenamed-783"><a href="#DataFrame.withColumnRenamed-783"><span class="linenos">783</span></a> <span class="n">existing_column</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">&quot;alias&quot;</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">to_identifier</span><span class="p">(</span><span class="n">new</span><span class="p">))</span>
-</span><span id="DataFrame.withColumnRenamed-784"><a href="#DataFrame.withColumnRenamed-784"><span class="linenos">784</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="n">expression</span><span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.withColumnRenamed-770"><a href="#DataFrame.withColumnRenamed-770"><span class="linenos">770</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">SELECT</span><span class="p">)</span>
+</span><span id="DataFrame.withColumnRenamed-771"><a href="#DataFrame.withColumnRenamed-771"><span class="linenos">771</span></a> <span class="k">def</span> <span class="nf">withColumnRenamed</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">existing</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">new</span><span class="p">:</span> <span class="nb">str</span><span class="p">):</span>
+</span><span id="DataFrame.withColumnRenamed-772"><a href="#DataFrame.withColumnRenamed-772"><span class="linenos">772</span></a> <span class="n">expression</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
+</span><span id="DataFrame.withColumnRenamed-773"><a href="#DataFrame.withColumnRenamed-773"><span class="linenos">773</span></a> <span class="n">existing_columns</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame.withColumnRenamed-774"><a href="#DataFrame.withColumnRenamed-774"><span class="linenos">774</span></a> <span class="n">expression</span>
+</span><span id="DataFrame.withColumnRenamed-775"><a href="#DataFrame.withColumnRenamed-775"><span class="linenos">775</span></a> <span class="k">for</span> <span class="n">expression</span> <span class="ow">in</span> <span class="n">expression</span><span class="o">.</span><span class="n">expressions</span>
+</span><span id="DataFrame.withColumnRenamed-776"><a href="#DataFrame.withColumnRenamed-776"><span class="linenos">776</span></a> <span class="k">if</span> <span class="n">expression</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="o">==</span> <span class="n">existing</span>
+</span><span id="DataFrame.withColumnRenamed-777"><a href="#DataFrame.withColumnRenamed-777"><span class="linenos">777</span></a> <span class="p">]</span>
+</span><span id="DataFrame.withColumnRenamed-778"><a href="#DataFrame.withColumnRenamed-778"><span class="linenos">778</span></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">existing_columns</span><span class="p">:</span>
+</span><span id="DataFrame.withColumnRenamed-779"><a href="#DataFrame.withColumnRenamed-779"><span class="linenos">779</span></a> <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;Tried to rename a column that doesn&#39;t exist&quot;</span><span class="p">)</span>
+</span><span id="DataFrame.withColumnRenamed-780"><a href="#DataFrame.withColumnRenamed-780"><span class="linenos">780</span></a> <span class="k">for</span> <span class="n">existing_column</span> <span class="ow">in</span> <span class="n">existing_columns</span><span class="p">:</span>
+</span><span id="DataFrame.withColumnRenamed-781"><a href="#DataFrame.withColumnRenamed-781"><span class="linenos">781</span></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">existing_column</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">Column</span><span class="p">):</span>
+</span><span id="DataFrame.withColumnRenamed-782"><a href="#DataFrame.withColumnRenamed-782"><span class="linenos">782</span></a> <span class="n">existing_column</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="n">exp</span><span class="o">.</span><span class="n">alias_</span><span class="p">(</span><span class="n">existing_column</span><span class="p">,</span> <span class="n">new</span><span class="p">))</span>
+</span><span id="DataFrame.withColumnRenamed-783"><a href="#DataFrame.withColumnRenamed-783"><span class="linenos">783</span></a> <span class="k">else</span><span class="p">:</span>
+</span><span id="DataFrame.withColumnRenamed-784"><a href="#DataFrame.withColumnRenamed-784"><span class="linenos">784</span></a> <span class="n">existing_column</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">&quot;alias&quot;</span><span class="p">,</span> <span class="n">exp</span><span class="o">.</span><span class="n">to_identifier</span><span class="p">(</span><span class="n">new</span><span class="p">))</span>
+</span><span id="DataFrame.withColumnRenamed-785"><a href="#DataFrame.withColumnRenamed-785"><span class="linenos">785</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="n">expression</span><span class="p">)</span>
</span></pre></div>
@@ -3039,16 +3050,16 @@ and check if it matches the type of the value provided. If not then make it null
</div>
<a class="headerlink" href="#DataFrame.drop"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.drop-786"><a href="#DataFrame.drop-786"><span class="linenos">786</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">SELECT</span><span class="p">)</span>
-</span><span id="DataFrame.drop-787"><a href="#DataFrame.drop-787"><span class="linenos">787</span></a> <span class="k">def</span> <span class="nf">drop</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Column</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame.drop-788"><a href="#DataFrame.drop-788"><span class="linenos">788</span></a> <span class="n">all_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_outer_select_columns</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="p">)</span>
-</span><span id="DataFrame.drop-789"><a href="#DataFrame.drop-789"><span class="linenos">789</span></a> <span class="n">drop_cols</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">cols</span><span class="p">)</span>
-</span><span id="DataFrame.drop-790"><a href="#DataFrame.drop-790"><span class="linenos">790</span></a> <span class="n">new_columns</span> <span class="o">=</span> <span class="p">[</span>
-</span><span id="DataFrame.drop-791"><a href="#DataFrame.drop-791"><span class="linenos">791</span></a> <span class="n">col</span>
-</span><span id="DataFrame.drop-792"><a href="#DataFrame.drop-792"><span class="linenos">792</span></a> <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="n">all_columns</span>
-</span><span id="DataFrame.drop-793"><a href="#DataFrame.drop-793"><span class="linenos">793</span></a> <span class="k">if</span> <span class="n">col</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="ow">not</span> <span class="ow">in</span> <span class="p">[</span><span class="n">drop_column</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="k">for</span> <span class="n">drop_column</span> <span class="ow">in</span> <span class="n">drop_cols</span><span class="p">]</span>
-</span><span id="DataFrame.drop-794"><a href="#DataFrame.drop-794"><span class="linenos">794</span></a> <span class="p">]</span>
-</span><span id="DataFrame.drop-795"><a href="#DataFrame.drop-795"><span class="linenos">795</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="n">new_columns</span><span class="p">,</span> <span class="n">append</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.drop-787"><a href="#DataFrame.drop-787"><span class="linenos">787</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">SELECT</span><span class="p">)</span>
+</span><span id="DataFrame.drop-788"><a href="#DataFrame.drop-788"><span class="linenos">788</span></a> <span class="k">def</span> <span class="nf">drop</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Column</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame.drop-789"><a href="#DataFrame.drop-789"><span class="linenos">789</span></a> <span class="n">all_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_outer_select_columns</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="p">)</span>
+</span><span id="DataFrame.drop-790"><a href="#DataFrame.drop-790"><span class="linenos">790</span></a> <span class="n">drop_cols</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">cols</span><span class="p">)</span>
+</span><span id="DataFrame.drop-791"><a href="#DataFrame.drop-791"><span class="linenos">791</span></a> <span class="n">new_columns</span> <span class="o">=</span> <span class="p">[</span>
+</span><span id="DataFrame.drop-792"><a href="#DataFrame.drop-792"><span class="linenos">792</span></a> <span class="n">col</span>
+</span><span id="DataFrame.drop-793"><a href="#DataFrame.drop-793"><span class="linenos">793</span></a> <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="n">all_columns</span>
+</span><span id="DataFrame.drop-794"><a href="#DataFrame.drop-794"><span class="linenos">794</span></a> <span class="k">if</span> <span class="n">col</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="ow">not</span> <span class="ow">in</span> <span class="p">[</span><span class="n">drop_column</span><span class="o">.</span><span class="n">alias_or_name</span> <span class="k">for</span> <span class="n">drop_column</span> <span class="ow">in</span> <span class="n">drop_cols</span><span class="p">]</span>
+</span><span id="DataFrame.drop-795"><a href="#DataFrame.drop-795"><span class="linenos">795</span></a> <span class="p">]</span>
+</span><span id="DataFrame.drop-796"><a href="#DataFrame.drop-796"><span class="linenos">796</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="n">new_columns</span><span class="p">,</span> <span class="n">append</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
</span></pre></div>
@@ -3067,9 +3078,9 @@ and check if it matches the type of the value provided. If not then make it null
</div>
<a class="headerlink" href="#DataFrame.limit"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.limit-797"><a href="#DataFrame.limit-797"><span class="linenos">797</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">LIMIT</span><span class="p">)</span>
-</span><span id="DataFrame.limit-798"><a href="#DataFrame.limit-798"><span class="linenos">798</span></a> <span class="k">def</span> <span class="nf">limit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">num</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame.limit-799"><a href="#DataFrame.limit-799"><span class="linenos">799</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">limit</span><span class="p">(</span><span class="n">num</span><span class="p">))</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.limit-798"><a href="#DataFrame.limit-798"><span class="linenos">798</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">LIMIT</span><span class="p">)</span>
+</span><span id="DataFrame.limit-799"><a href="#DataFrame.limit-799"><span class="linenos">799</span></a> <span class="k">def</span> <span class="nf">limit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">num</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame.limit-800"><a href="#DataFrame.limit-800"><span class="linenos">800</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">expression</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">expression</span><span class="o">.</span><span class="n">limit</span><span class="p">(</span><span class="n">num</span><span class="p">))</span>
</span></pre></div>
@@ -3088,15 +3099,15 @@ and check if it matches the type of the value provided. If not then make it null
</div>
<a class="headerlink" href="#DataFrame.hint"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.hint-801"><a href="#DataFrame.hint-801"><span class="linenos">801</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">NO_OP</span><span class="p">)</span>
-</span><span id="DataFrame.hint-802"><a href="#DataFrame.hint-802"><span class="linenos">802</span></a> <span class="k">def</span> <span class="nf">hint</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="o">*</span><span class="n">parameters</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">int</span><span class="p">]])</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame.hint-803"><a href="#DataFrame.hint-803"><span class="linenos">803</span></a> <span class="n">parameter_list</span> <span class="o">=</span> <span class="n">ensure_list</span><span class="p">(</span><span class="n">parameters</span><span class="p">)</span>
-</span><span id="DataFrame.hint-804"><a href="#DataFrame.hint-804"><span class="linenos">804</span></a> <span class="n">parameter_columns</span> <span class="o">=</span> <span class="p">(</span>
-</span><span id="DataFrame.hint-805"><a href="#DataFrame.hint-805"><span class="linenos">805</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_list_of_columns</span><span class="p">(</span><span class="n">parameter_list</span><span class="p">)</span>
-</span><span id="DataFrame.hint-806"><a href="#DataFrame.hint-806"><span class="linenos">806</span></a> <span class="k">if</span> <span class="n">parameters</span>
-</span><span id="DataFrame.hint-807"><a href="#DataFrame.hint-807"><span class="linenos">807</span></a> <span class="k">else</span> <span class="n">Column</span><span class="o">.</span><span class="n">ensure_cols</span><span class="p">([</span><span class="bp">self</span><span class="o">.</span><span class="n">sequence_id</span><span class="p">])</span>
-</span><span id="DataFrame.hint-808"><a href="#DataFrame.hint-808"><span class="linenos">808</span></a> <span class="p">)</span>
-</span><span id="DataFrame.hint-809"><a href="#DataFrame.hint-809"><span class="linenos">809</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_hint</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">parameter_columns</span><span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.hint-802"><a href="#DataFrame.hint-802"><span class="linenos">802</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">NO_OP</span><span class="p">)</span>
+</span><span id="DataFrame.hint-803"><a href="#DataFrame.hint-803"><span class="linenos">803</span></a> <span class="k">def</span> <span class="nf">hint</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="o">*</span><span class="n">parameters</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">int</span><span class="p">]])</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame.hint-804"><a href="#DataFrame.hint-804"><span class="linenos">804</span></a> <span class="n">parameter_list</span> <span class="o">=</span> <span class="n">ensure_list</span><span class="p">(</span><span class="n">parameters</span><span class="p">)</span>
+</span><span id="DataFrame.hint-805"><a href="#DataFrame.hint-805"><span class="linenos">805</span></a> <span class="n">parameter_columns</span> <span class="o">=</span> <span class="p">(</span>
+</span><span id="DataFrame.hint-806"><a href="#DataFrame.hint-806"><span class="linenos">806</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_list_of_columns</span><span class="p">(</span><span class="n">parameter_list</span><span class="p">)</span>
+</span><span id="DataFrame.hint-807"><a href="#DataFrame.hint-807"><span class="linenos">807</span></a> <span class="k">if</span> <span class="n">parameters</span>
+</span><span id="DataFrame.hint-808"><a href="#DataFrame.hint-808"><span class="linenos">808</span></a> <span class="k">else</span> <span class="n">Column</span><span class="o">.</span><span class="n">ensure_cols</span><span class="p">([</span><span class="bp">self</span><span class="o">.</span><span class="n">sequence_id</span><span class="p">])</span>
+</span><span id="DataFrame.hint-809"><a href="#DataFrame.hint-809"><span class="linenos">809</span></a> <span class="p">)</span>
+</span><span id="DataFrame.hint-810"><a href="#DataFrame.hint-810"><span class="linenos">810</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_hint</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">parameter_columns</span><span class="p">)</span>
</span></pre></div>
@@ -3109,20 +3120,20 @@ and check if it matches the type of the value provided. If not then make it null
<div class="decorator">@operation(Operation.NO_OP)</div>
<span class="def">def</span>
- <span class="name">repartition</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">self</span>,</span><span class="param"> <span class="n">numPartitions</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139791551045616&#39;</span><span class="o">&gt;</span><span class="p">]</span>,</span><span class="param"> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139791551045616&#39;</span><span class="o">&gt;</span></span><span class="return-annotation">) -> <span class="n"><a href="#DataFrame">DataFrame</a></span>:</span></span>
+ <span class="name">repartition</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">self</span>,</span><span class="param"> <span class="n">numPartitions</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139844517449296&#39;</span><span class="o">&gt;</span><span class="p">]</span>,</span><span class="param"> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139844517449296&#39;</span><span class="o">&gt;</span></span><span class="return-annotation">) -> <span class="n"><a href="#DataFrame">DataFrame</a></span>:</span></span>
<label class="view-source-button" for="DataFrame.repartition-view-source"><span>View Source</span></label>
</div>
<a class="headerlink" href="#DataFrame.repartition"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.repartition-811"><a href="#DataFrame.repartition-811"><span class="linenos">811</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">NO_OP</span><span class="p">)</span>
-</span><span id="DataFrame.repartition-812"><a href="#DataFrame.repartition-812"><span class="linenos">812</span></a> <span class="k">def</span> <span class="nf">repartition</span><span class="p">(</span>
-</span><span id="DataFrame.repartition-813"><a href="#DataFrame.repartition-813"><span class="linenos">813</span></a> <span class="bp">self</span><span class="p">,</span> <span class="n">numPartitions</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">ColumnOrName</span><span class="p">],</span> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="n">ColumnOrName</span>
-</span><span id="DataFrame.repartition-814"><a href="#DataFrame.repartition-814"><span class="linenos">814</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame.repartition-815"><a href="#DataFrame.repartition-815"><span class="linenos">815</span></a> <span class="n">num_partition_cols</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_list_of_columns</span><span class="p">(</span><span class="n">numPartitions</span><span class="p">)</span>
-</span><span id="DataFrame.repartition-816"><a href="#DataFrame.repartition-816"><span class="linenos">816</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">cols</span><span class="p">)</span>
-</span><span id="DataFrame.repartition-817"><a href="#DataFrame.repartition-817"><span class="linenos">817</span></a> <span class="n">args</span> <span class="o">=</span> <span class="n">num_partition_cols</span> <span class="o">+</span> <span class="n">columns</span>
-</span><span id="DataFrame.repartition-818"><a href="#DataFrame.repartition-818"><span class="linenos">818</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_hint</span><span class="p">(</span><span class="s2">&quot;repartition&quot;</span><span class="p">,</span> <span class="n">args</span><span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.repartition-812"><a href="#DataFrame.repartition-812"><span class="linenos">812</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">NO_OP</span><span class="p">)</span>
+</span><span id="DataFrame.repartition-813"><a href="#DataFrame.repartition-813"><span class="linenos">813</span></a> <span class="k">def</span> <span class="nf">repartition</span><span class="p">(</span>
+</span><span id="DataFrame.repartition-814"><a href="#DataFrame.repartition-814"><span class="linenos">814</span></a> <span class="bp">self</span><span class="p">,</span> <span class="n">numPartitions</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">ColumnOrName</span><span class="p">],</span> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="n">ColumnOrName</span>
+</span><span id="DataFrame.repartition-815"><a href="#DataFrame.repartition-815"><span class="linenos">815</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame.repartition-816"><a href="#DataFrame.repartition-816"><span class="linenos">816</span></a> <span class="n">num_partition_cols</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_list_of_columns</span><span class="p">(</span><span class="n">numPartitions</span><span class="p">)</span>
+</span><span id="DataFrame.repartition-817"><a href="#DataFrame.repartition-817"><span class="linenos">817</span></a> <span class="n">columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ensure_and_normalize_cols</span><span class="p">(</span><span class="n">cols</span><span class="p">)</span>
+</span><span id="DataFrame.repartition-818"><a href="#DataFrame.repartition-818"><span class="linenos">818</span></a> <span class="n">args</span> <span class="o">=</span> <span class="n">num_partition_cols</span> <span class="o">+</span> <span class="n">columns</span>
+</span><span id="DataFrame.repartition-819"><a href="#DataFrame.repartition-819"><span class="linenos">819</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_hint</span><span class="p">(</span><span class="s2">&quot;repartition&quot;</span><span class="p">,</span> <span class="n">args</span><span class="p">)</span>
</span></pre></div>
@@ -3141,10 +3152,10 @@ and check if it matches the type of the value provided. If not then make it null
</div>
<a class="headerlink" href="#DataFrame.coalesce"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.coalesce-820"><a href="#DataFrame.coalesce-820"><span class="linenos">820</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">NO_OP</span><span class="p">)</span>
-</span><span id="DataFrame.coalesce-821"><a href="#DataFrame.coalesce-821"><span class="linenos">821</span></a> <span class="k">def</span> <span class="nf">coalesce</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">numPartitions</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame.coalesce-822"><a href="#DataFrame.coalesce-822"><span class="linenos">822</span></a> <span class="n">num_partitions</span> <span class="o">=</span> <span class="n">Column</span><span class="o">.</span><span class="n">ensure_cols</span><span class="p">([</span><span class="n">numPartitions</span><span class="p">])</span>
-</span><span id="DataFrame.coalesce-823"><a href="#DataFrame.coalesce-823"><span class="linenos">823</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_hint</span><span class="p">(</span><span class="s2">&quot;coalesce&quot;</span><span class="p">,</span> <span class="n">num_partitions</span><span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.coalesce-821"><a href="#DataFrame.coalesce-821"><span class="linenos">821</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">NO_OP</span><span class="p">)</span>
+</span><span id="DataFrame.coalesce-822"><a href="#DataFrame.coalesce-822"><span class="linenos">822</span></a> <span class="k">def</span> <span class="nf">coalesce</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">numPartitions</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame.coalesce-823"><a href="#DataFrame.coalesce-823"><span class="linenos">823</span></a> <span class="n">num_partitions</span> <span class="o">=</span> <span class="n">Column</span><span class="o">.</span><span class="n">ensure_cols</span><span class="p">([</span><span class="n">numPartitions</span><span class="p">])</span>
+</span><span id="DataFrame.coalesce-824"><a href="#DataFrame.coalesce-824"><span class="linenos">824</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_hint</span><span class="p">(</span><span class="s2">&quot;coalesce&quot;</span><span class="p">,</span> <span class="n">num_partitions</span><span class="p">)</span>
</span></pre></div>
@@ -3163,9 +3174,9 @@ and check if it matches the type of the value provided. If not then make it null
</div>
<a class="headerlink" href="#DataFrame.cache"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.cache-825"><a href="#DataFrame.cache-825"><span class="linenos">825</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">NO_OP</span><span class="p">)</span>
-</span><span id="DataFrame.cache-826"><a href="#DataFrame.cache-826"><span class="linenos">826</span></a> <span class="k">def</span> <span class="nf">cache</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame.cache-827"><a href="#DataFrame.cache-827"><span class="linenos">827</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_cache</span><span class="p">(</span><span class="n">storage_level</span><span class="o">=</span><span class="s2">&quot;MEMORY_AND_DISK&quot;</span><span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.cache-826"><a href="#DataFrame.cache-826"><span class="linenos">826</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">NO_OP</span><span class="p">)</span>
+</span><span id="DataFrame.cache-827"><a href="#DataFrame.cache-827"><span class="linenos">827</span></a> <span class="k">def</span> <span class="nf">cache</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame.cache-828"><a href="#DataFrame.cache-828"><span class="linenos">828</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_cache</span><span class="p">(</span><span class="n">storage_level</span><span class="o">=</span><span class="s2">&quot;MEMORY_AND_DISK&quot;</span><span class="p">)</span>
</span></pre></div>
@@ -3184,12 +3195,12 @@ and check if it matches the type of the value provided. If not then make it null
</div>
<a class="headerlink" href="#DataFrame.persist"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.persist-829"><a href="#DataFrame.persist-829"><span class="linenos">829</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">NO_OP</span><span class="p">)</span>
-</span><span id="DataFrame.persist-830"><a href="#DataFrame.persist-830"><span class="linenos">830</span></a> <span class="k">def</span> <span class="nf">persist</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">storageLevel</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;MEMORY_AND_DISK_SER&quot;</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrame.persist-831"><a href="#DataFrame.persist-831"><span class="linenos">831</span></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
-</span><span id="DataFrame.persist-832"><a href="#DataFrame.persist-832"><span class="linenos">832</span></a><span class="sd"> Storage Level Options: https://spark.apache.org/docs/3.0.0-preview/sql-ref-syntax-aux-cache-cache-table.html</span>
-</span><span id="DataFrame.persist-833"><a href="#DataFrame.persist-833"><span class="linenos">833</span></a><span class="sd"> &quot;&quot;&quot;</span>
-</span><span id="DataFrame.persist-834"><a href="#DataFrame.persist-834"><span class="linenos">834</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_cache</span><span class="p">(</span><span class="n">storageLevel</span><span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrame.persist-830"><a href="#DataFrame.persist-830"><span class="linenos">830</span></a> <span class="nd">@operation</span><span class="p">(</span><span class="n">Operation</span><span class="o">.</span><span class="n">NO_OP</span><span class="p">)</span>
+</span><span id="DataFrame.persist-831"><a href="#DataFrame.persist-831"><span class="linenos">831</span></a> <span class="k">def</span> <span class="nf">persist</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">storageLevel</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;MEMORY_AND_DISK_SER&quot;</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrame.persist-832"><a href="#DataFrame.persist-832"><span class="linenos">832</span></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
+</span><span id="DataFrame.persist-833"><a href="#DataFrame.persist-833"><span class="linenos">833</span></a><span class="sd"> Storage Level Options: https://spark.apache.org/docs/3.0.0-preview/sql-ref-syntax-aux-cache-cache-table.html</span>
+</span><span id="DataFrame.persist-834"><a href="#DataFrame.persist-834"><span class="linenos">834</span></a><span class="sd"> &quot;&quot;&quot;</span>
+</span><span id="DataFrame.persist-835"><a href="#DataFrame.persist-835"><span class="linenos">835</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_cache</span><span class="p">(</span><span class="n">storageLevel</span><span class="p">)</span>
</span></pre></div>
@@ -3829,7 +3840,7 @@ and check if it matches the type of the value provided. If not then make it null
<input id="Column.__init__-view-source" class="view-source-toggle-state" type="checkbox" aria-hidden="true" tabindex="-1">
<div class="attr function">
- <span class="name">Column</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="n">expression</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139791552968640&#39;</span><span class="o">&gt;</span><span class="p">,</span> <span class="n"><a href="../expressions.html#Expression">sqlglot.expressions.Expression</a></span><span class="p">,</span> <span class="n">NoneType</span><span class="p">]</span></span>)</span>
+ <span class="name">Column</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="n">expression</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139844519830208&#39;</span><span class="o">&gt;</span><span class="p">,</span> <span class="n"><a href="../expressions.html#Expression">sqlglot.expressions.Expression</a></span><span class="p">,</span> <span class="n">NoneType</span><span class="p">]</span></span>)</span>
<label class="view-source-button" for="Column.__init__-view-source"><span>View Source</span></label>
@@ -3873,7 +3884,7 @@ and check if it matches the type of the value provided. If not then make it null
<div class="decorator">@classmethod</div>
<span class="def">def</span>
- <span class="name">ensure_col</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">cls</span>,</span><span class="param"> <span class="n">value</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139791552968640&#39;</span><span class="o">&gt;</span><span class="p">,</span> <span class="n"><a href="../expressions.html#Expression">sqlglot.expressions.Expression</a></span><span class="p">,</span> <span class="n">NoneType</span><span class="p">]</span></span><span class="return-annotation">) -> <span class="n"><a href="#Column">Column</a></span>:</span></span>
+ <span class="name">ensure_col</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">cls</span>,</span><span class="param"> <span class="n">value</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139844519830208&#39;</span><span class="o">&gt;</span><span class="p">,</span> <span class="n"><a href="../expressions.html#Expression">sqlglot.expressions.Expression</a></span><span class="p">,</span> <span class="n">NoneType</span><span class="p">]</span></span><span class="return-annotation">) -> <span class="n"><a href="#Column">Column</a></span>:</span></span>
<label class="view-source-button" for="Column.ensure_col-view-source"><span>View Source</span></label>
@@ -3894,7 +3905,7 @@ and check if it matches the type of the value provided. If not then make it null
<div class="decorator">@classmethod</div>
<span class="def">def</span>
- <span class="name">ensure_cols</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">cls</span>,</span><span class="param"> <span class="n">args</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139791552968640&#39;</span><span class="o">&gt;</span><span class="p">,</span> <span class="n"><a href="../expressions.html#Expression">sqlglot.expressions.Expression</a></span><span class="p">]]</span></span><span class="return-annotation">) -> <span class="n">List</span><span class="p">[</span><span class="n"><a href="#Column">Column</a></span><span class="p">]</span>:</span></span>
+ <span class="name">ensure_cols</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">cls</span>,</span><span class="param"> <span class="n">args</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139844519830208&#39;</span><span class="o">&gt;</span><span class="p">,</span> <span class="n"><a href="../expressions.html#Expression">sqlglot.expressions.Expression</a></span><span class="p">]]</span></span><span class="return-annotation">) -> <span class="n">List</span><span class="p">[</span><span class="n"><a href="#Column">Column</a></span><span class="p">]</span>:</span></span>
<label class="view-source-button" for="Column.ensure_cols-view-source"><span>View Source</span></label>
@@ -3915,7 +3926,7 @@ and check if it matches the type of the value provided. If not then make it null
<div class="decorator">@classmethod</div>
<span class="def">def</span>
- <span class="name">invoke_anonymous_function</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">cls</span>,</span><span class="param"> <span class="n">column</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139791552968640&#39;</span><span class="o">&gt;</span><span class="p">]</span>,</span><span class="param"> <span class="n">func_name</span><span class="p">:</span> <span class="nb">str</span>,</span><span class="param"> <span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139791552968640&#39;</span><span class="o">&gt;</span><span class="p">]</span></span><span class="return-annotation">) -> <span class="n"><a href="#Column">Column</a></span>:</span></span>
+ <span class="name">invoke_anonymous_function</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">cls</span>,</span><span class="param"> <span class="n">column</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139844519830208&#39;</span><span class="o">&gt;</span><span class="p">]</span>,</span><span class="param"> <span class="n">func_name</span><span class="p">:</span> <span class="nb">str</span>,</span><span class="param"> <span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139844519830208&#39;</span><span class="o">&gt;</span><span class="p">]</span></span><span class="return-annotation">) -> <span class="n"><a href="#Column">Column</a></span>:</span></span>
<label class="view-source-button" for="Column.invoke_anonymous_function-view-source"><span>View Source</span></label>
@@ -3942,7 +3953,7 @@ and check if it matches the type of the value provided. If not then make it null
<div class="decorator">@classmethod</div>
<span class="def">def</span>
- <span class="name">invoke_expression_over_column</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">cls</span>,</span><span class="param"> <span class="n">column</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139791552968640&#39;</span><span class="o">&gt;</span><span class="p">]</span>,</span><span class="param"> <span class="n">callable_expression</span><span class="p">:</span> <span class="n">Callable</span>,</span><span class="param"> <span class="o">**</span><span class="n">kwargs</span></span><span class="return-annotation">) -> <span class="n"><a href="#Column">Column</a></span>:</span></span>
+ <span class="name">invoke_expression_over_column</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">cls</span>,</span><span class="param"> <span class="n">column</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139844519830208&#39;</span><span class="o">&gt;</span><span class="p">]</span>,</span><span class="param"> <span class="n">callable_expression</span><span class="p">:</span> <span class="n">Callable</span>,</span><span class="param"> <span class="o">**</span><span class="n">kwargs</span></span><span class="return-annotation">) -> <span class="n"><a href="#Column">Column</a></span>:</span></span>
<label class="view-source-button" for="Column.invoke_expression_over_column-view-source"><span>View Source</span></label>
@@ -3981,7 +3992,7 @@ and check if it matches the type of the value provided. If not then make it null
<div class="attr function">
<span class="def">def</span>
- <span class="name">binary_op</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">self</span>,</span><span class="param"> <span class="n">klass</span><span class="p">:</span> <span class="n">Callable</span>,</span><span class="param"> <span class="n">other</span><span class="p">:</span> <span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139791552968640&#39;</span><span class="o">&gt;</span>,</span><span class="param"> <span class="o">**</span><span class="n">kwargs</span></span><span class="return-annotation">) -> <span class="n"><a href="#Column">Column</a></span>:</span></span>
+ <span class="name">binary_op</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">self</span>,</span><span class="param"> <span class="n">klass</span><span class="p">:</span> <span class="n">Callable</span>,</span><span class="param"> <span class="n">other</span><span class="p">:</span> <span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139844519830208&#39;</span><span class="o">&gt;</span>,</span><span class="param"> <span class="o">**</span><span class="n">kwargs</span></span><span class="return-annotation">) -> <span class="n"><a href="#Column">Column</a></span>:</span></span>
<label class="view-source-button" for="Column.binary_op-view-source"><span>View Source</span></label>
@@ -4002,7 +4013,7 @@ and check if it matches the type of the value provided. If not then make it null
<div class="attr function">
<span class="def">def</span>
- <span class="name">inverse_binary_op</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">self</span>,</span><span class="param"> <span class="n">klass</span><span class="p">:</span> <span class="n">Callable</span>,</span><span class="param"> <span class="n">other</span><span class="p">:</span> <span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139791552968640&#39;</span><span class="o">&gt;</span>,</span><span class="param"> <span class="o">**</span><span class="n">kwargs</span></span><span class="return-annotation">) -> <span class="n"><a href="#Column">Column</a></span>:</span></span>
+ <span class="name">inverse_binary_op</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">self</span>,</span><span class="param"> <span class="n">klass</span><span class="p">:</span> <span class="n">Callable</span>,</span><span class="param"> <span class="n">other</span><span class="p">:</span> <span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139844519830208&#39;</span><span class="o">&gt;</span>,</span><span class="param"> <span class="o">**</span><span class="n">kwargs</span></span><span class="return-annotation">) -> <span class="n"><a href="#Column">Column</a></span>:</span></span>
<label class="view-source-button" for="Column.inverse_binary_op-view-source"><span>View Source</span></label>
@@ -4596,7 +4607,7 @@ Sqlglot doesn't currently replicate this class so it only accepts a string</p>
<div class="attr function">
<span class="def">def</span>
- <span class="name">isin</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">self</span>,</span><span class="param"> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139791552968640&#39;</span><span class="o">&gt;</span><span class="p">,</span> <span class="n">Iterable</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139791552968640&#39;</span><span class="o">&gt;</span><span class="p">]]</span></span><span class="return-annotation">):</span></span>
+ <span class="name">isin</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">self</span>,</span><span class="param"> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139844519830208&#39;</span><span class="o">&gt;</span><span class="p">,</span> <span class="n">Iterable</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139844519830208&#39;</span><span class="o">&gt;</span><span class="p">]]</span></span><span class="return-annotation">):</span></span>
<label class="view-source-button" for="Column.isin-view-source"><span>View Source</span></label>
@@ -4617,7 +4628,7 @@ Sqlglot doesn't currently replicate this class so it only accepts a string</p>
<div class="attr function">
<span class="def">def</span>
- <span class="name">between</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">self</span>,</span><span class="param"> <span class="n">lowerBound</span><span class="p">:</span> <span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139791552968640&#39;</span><span class="o">&gt;</span>,</span><span class="param"> <span class="n">upperBound</span><span class="p">:</span> <span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139791552968640&#39;</span><span class="o">&gt;</span></span><span class="return-annotation">) -> <span class="n"><a href="#Column">Column</a></span>:</span></span>
+ <span class="name">between</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">self</span>,</span><span class="param"> <span class="n">lowerBound</span><span class="p">:</span> <span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139844519830208&#39;</span><span class="o">&gt;</span>,</span><span class="param"> <span class="n">upperBound</span><span class="p">:</span> <span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139844519830208&#39;</span><span class="o">&gt;</span></span><span class="return-annotation">) -> <span class="n"><a href="#Column">Column</a></span>:</span></span>
<label class="view-source-button" for="Column.between-view-source"><span>View Source</span></label>
@@ -4652,7 +4663,7 @@ Sqlglot doesn't currently replicate this class so it only accepts a string</p>
<div class="attr function">
<span class="def">def</span>
- <span class="name">over</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">self</span>,</span><span class="param"> <span class="n">window</span><span class="p">:</span> <span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139791549327696&#39;</span><span class="o">&gt;</span></span><span class="return-annotation">) -> <span class="n"><a href="#Column">Column</a></span>:</span></span>
+ <span class="name">over</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">self</span>,</span><span class="param"> <span class="n">window</span><span class="p">:</span> <span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139844518160288&#39;</span><span class="o">&gt;</span></span><span class="return-annotation">) -> <span class="n"><a href="#Column">Column</a></span>:</span></span>
<label class="view-source-button" for="Column.over-view-source"><span>View Source</span></label>
@@ -4680,32 +4691,32 @@ Sqlglot doesn't currently replicate this class so it only accepts a string</p>
</div>
<a class="headerlink" href="#DataFrameNaFunctions"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrameNaFunctions-837"><a href="#DataFrameNaFunctions-837"><span class="linenos">837</span></a><span class="k">class</span> <span class="nc">DataFrameNaFunctions</span><span class="p">:</span>
-</span><span id="DataFrameNaFunctions-838"><a href="#DataFrameNaFunctions-838"><span class="linenos">838</span></a> <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">df</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">):</span>
-</span><span id="DataFrameNaFunctions-839"><a href="#DataFrameNaFunctions-839"><span class="linenos">839</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">df</span> <span class="o">=</span> <span class="n">df</span>
-</span><span id="DataFrameNaFunctions-840"><a href="#DataFrameNaFunctions-840"><span class="linenos">840</span></a>
-</span><span id="DataFrameNaFunctions-841"><a href="#DataFrameNaFunctions-841"><span class="linenos">841</span></a> <span class="k">def</span> <span class="nf">drop</span><span class="p">(</span>
-</span><span id="DataFrameNaFunctions-842"><a href="#DataFrameNaFunctions-842"><span class="linenos">842</span></a> <span class="bp">self</span><span class="p">,</span>
-</span><span id="DataFrameNaFunctions-843"><a href="#DataFrameNaFunctions-843"><span class="linenos">843</span></a> <span class="n">how</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;any&quot;</span><span class="p">,</span>
-</span><span id="DataFrameNaFunctions-844"><a href="#DataFrameNaFunctions-844"><span class="linenos">844</span></a> <span class="n">thresh</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrameNaFunctions-845"><a href="#DataFrameNaFunctions-845"><span class="linenos">845</span></a> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="o">...</span><span class="p">],</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrameNaFunctions-846"><a href="#DataFrameNaFunctions-846"><span class="linenos">846</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrameNaFunctions-847"><a href="#DataFrameNaFunctions-847"><span class="linenos">847</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">df</span><span class="o">.</span><span class="n">dropna</span><span class="p">(</span><span class="n">how</span><span class="o">=</span><span class="n">how</span><span class="p">,</span> <span class="n">thresh</span><span class="o">=</span><span class="n">thresh</span><span class="p">,</span> <span class="n">subset</span><span class="o">=</span><span class="n">subset</span><span class="p">)</span>
-</span><span id="DataFrameNaFunctions-848"><a href="#DataFrameNaFunctions-848"><span class="linenos">848</span></a>
-</span><span id="DataFrameNaFunctions-849"><a href="#DataFrameNaFunctions-849"><span class="linenos">849</span></a> <span class="k">def</span> <span class="nf">fill</span><span class="p">(</span>
-</span><span id="DataFrameNaFunctions-850"><a href="#DataFrameNaFunctions-850"><span class="linenos">850</span></a> <span class="bp">self</span><span class="p">,</span>
-</span><span id="DataFrameNaFunctions-851"><a href="#DataFrameNaFunctions-851"><span class="linenos">851</span></a> <span class="n">value</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">bool</span><span class="p">,</span> <span class="nb">float</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">]],</span>
-</span><span id="DataFrameNaFunctions-852"><a href="#DataFrameNaFunctions-852"><span class="linenos">852</span></a> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="o">...</span><span class="p">],</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrameNaFunctions-853"><a href="#DataFrameNaFunctions-853"><span class="linenos">853</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrameNaFunctions-854"><a href="#DataFrameNaFunctions-854"><span class="linenos">854</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">df</span><span class="o">.</span><span class="n">fillna</span><span class="p">(</span><span class="n">value</span><span class="o">=</span><span class="n">value</span><span class="p">,</span> <span class="n">subset</span><span class="o">=</span><span class="n">subset</span><span class="p">)</span>
-</span><span id="DataFrameNaFunctions-855"><a href="#DataFrameNaFunctions-855"><span class="linenos">855</span></a>
-</span><span id="DataFrameNaFunctions-856"><a href="#DataFrameNaFunctions-856"><span class="linenos">856</span></a> <span class="k">def</span> <span class="nf">replace</span><span class="p">(</span>
-</span><span id="DataFrameNaFunctions-857"><a href="#DataFrameNaFunctions-857"><span class="linenos">857</span></a> <span class="bp">self</span><span class="p">,</span>
-</span><span id="DataFrameNaFunctions-858"><a href="#DataFrameNaFunctions-858"><span class="linenos">858</span></a> <span class="n">to_replace</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Dict</span><span class="p">],</span>
-</span><span id="DataFrameNaFunctions-859"><a href="#DataFrameNaFunctions-859"><span class="linenos">859</span></a> <span class="n">value</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrameNaFunctions-860"><a href="#DataFrameNaFunctions-860"><span class="linenos">860</span></a> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrameNaFunctions-861"><a href="#DataFrameNaFunctions-861"><span class="linenos">861</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrameNaFunctions-862"><a href="#DataFrameNaFunctions-862"><span class="linenos">862</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">df</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="n">to_replace</span><span class="o">=</span><span class="n">to_replace</span><span class="p">,</span> <span class="n">value</span><span class="o">=</span><span class="n">value</span><span class="p">,</span> <span class="n">subset</span><span class="o">=</span><span class="n">subset</span><span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrameNaFunctions-838"><a href="#DataFrameNaFunctions-838"><span class="linenos">838</span></a><span class="k">class</span> <span class="nc">DataFrameNaFunctions</span><span class="p">:</span>
+</span><span id="DataFrameNaFunctions-839"><a href="#DataFrameNaFunctions-839"><span class="linenos">839</span></a> <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">df</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">):</span>
+</span><span id="DataFrameNaFunctions-840"><a href="#DataFrameNaFunctions-840"><span class="linenos">840</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">df</span> <span class="o">=</span> <span class="n">df</span>
+</span><span id="DataFrameNaFunctions-841"><a href="#DataFrameNaFunctions-841"><span class="linenos">841</span></a>
+</span><span id="DataFrameNaFunctions-842"><a href="#DataFrameNaFunctions-842"><span class="linenos">842</span></a> <span class="k">def</span> <span class="nf">drop</span><span class="p">(</span>
+</span><span id="DataFrameNaFunctions-843"><a href="#DataFrameNaFunctions-843"><span class="linenos">843</span></a> <span class="bp">self</span><span class="p">,</span>
+</span><span id="DataFrameNaFunctions-844"><a href="#DataFrameNaFunctions-844"><span class="linenos">844</span></a> <span class="n">how</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;any&quot;</span><span class="p">,</span>
+</span><span id="DataFrameNaFunctions-845"><a href="#DataFrameNaFunctions-845"><span class="linenos">845</span></a> <span class="n">thresh</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrameNaFunctions-846"><a href="#DataFrameNaFunctions-846"><span class="linenos">846</span></a> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="o">...</span><span class="p">],</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrameNaFunctions-847"><a href="#DataFrameNaFunctions-847"><span class="linenos">847</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrameNaFunctions-848"><a href="#DataFrameNaFunctions-848"><span class="linenos">848</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">df</span><span class="o">.</span><span class="n">dropna</span><span class="p">(</span><span class="n">how</span><span class="o">=</span><span class="n">how</span><span class="p">,</span> <span class="n">thresh</span><span class="o">=</span><span class="n">thresh</span><span class="p">,</span> <span class="n">subset</span><span class="o">=</span><span class="n">subset</span><span class="p">)</span>
+</span><span id="DataFrameNaFunctions-849"><a href="#DataFrameNaFunctions-849"><span class="linenos">849</span></a>
+</span><span id="DataFrameNaFunctions-850"><a href="#DataFrameNaFunctions-850"><span class="linenos">850</span></a> <span class="k">def</span> <span class="nf">fill</span><span class="p">(</span>
+</span><span id="DataFrameNaFunctions-851"><a href="#DataFrameNaFunctions-851"><span class="linenos">851</span></a> <span class="bp">self</span><span class="p">,</span>
+</span><span id="DataFrameNaFunctions-852"><a href="#DataFrameNaFunctions-852"><span class="linenos">852</span></a> <span class="n">value</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">bool</span><span class="p">,</span> <span class="nb">float</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">]],</span>
+</span><span id="DataFrameNaFunctions-853"><a href="#DataFrameNaFunctions-853"><span class="linenos">853</span></a> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="o">...</span><span class="p">],</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrameNaFunctions-854"><a href="#DataFrameNaFunctions-854"><span class="linenos">854</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrameNaFunctions-855"><a href="#DataFrameNaFunctions-855"><span class="linenos">855</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">df</span><span class="o">.</span><span class="n">fillna</span><span class="p">(</span><span class="n">value</span><span class="o">=</span><span class="n">value</span><span class="p">,</span> <span class="n">subset</span><span class="o">=</span><span class="n">subset</span><span class="p">)</span>
+</span><span id="DataFrameNaFunctions-856"><a href="#DataFrameNaFunctions-856"><span class="linenos">856</span></a>
+</span><span id="DataFrameNaFunctions-857"><a href="#DataFrameNaFunctions-857"><span class="linenos">857</span></a> <span class="k">def</span> <span class="nf">replace</span><span class="p">(</span>
+</span><span id="DataFrameNaFunctions-858"><a href="#DataFrameNaFunctions-858"><span class="linenos">858</span></a> <span class="bp">self</span><span class="p">,</span>
+</span><span id="DataFrameNaFunctions-859"><a href="#DataFrameNaFunctions-859"><span class="linenos">859</span></a> <span class="n">to_replace</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Dict</span><span class="p">],</span>
+</span><span id="DataFrameNaFunctions-860"><a href="#DataFrameNaFunctions-860"><span class="linenos">860</span></a> <span class="n">value</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrameNaFunctions-861"><a href="#DataFrameNaFunctions-861"><span class="linenos">861</span></a> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrameNaFunctions-862"><a href="#DataFrameNaFunctions-862"><span class="linenos">862</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrameNaFunctions-863"><a href="#DataFrameNaFunctions-863"><span class="linenos">863</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">df</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="n">to_replace</span><span class="o">=</span><span class="n">to_replace</span><span class="p">,</span> <span class="n">value</span><span class="o">=</span><span class="n">value</span><span class="p">,</span> <span class="n">subset</span><span class="o">=</span><span class="n">subset</span><span class="p">)</span>
</span></pre></div>
@@ -4721,8 +4732,8 @@ Sqlglot doesn't currently replicate this class so it only accepts a string</p>
</div>
<a class="headerlink" href="#DataFrameNaFunctions.__init__"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrameNaFunctions.__init__-838"><a href="#DataFrameNaFunctions.__init__-838"><span class="linenos">838</span></a> <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">df</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">):</span>
-</span><span id="DataFrameNaFunctions.__init__-839"><a href="#DataFrameNaFunctions.__init__-839"><span class="linenos">839</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">df</span> <span class="o">=</span> <span class="n">df</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrameNaFunctions.__init__-839"><a href="#DataFrameNaFunctions.__init__-839"><span class="linenos">839</span></a> <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">df</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">):</span>
+</span><span id="DataFrameNaFunctions.__init__-840"><a href="#DataFrameNaFunctions.__init__-840"><span class="linenos">840</span></a> <span class="bp">self</span><span class="o">.</span><span class="n">df</span> <span class="o">=</span> <span class="n">df</span>
</span></pre></div>
@@ -4751,13 +4762,13 @@ Sqlglot doesn't currently replicate this class so it only accepts a string</p>
</div>
<a class="headerlink" href="#DataFrameNaFunctions.drop"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrameNaFunctions.drop-841"><a href="#DataFrameNaFunctions.drop-841"><span class="linenos">841</span></a> <span class="k">def</span> <span class="nf">drop</span><span class="p">(</span>
-</span><span id="DataFrameNaFunctions.drop-842"><a href="#DataFrameNaFunctions.drop-842"><span class="linenos">842</span></a> <span class="bp">self</span><span class="p">,</span>
-</span><span id="DataFrameNaFunctions.drop-843"><a href="#DataFrameNaFunctions.drop-843"><span class="linenos">843</span></a> <span class="n">how</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;any&quot;</span><span class="p">,</span>
-</span><span id="DataFrameNaFunctions.drop-844"><a href="#DataFrameNaFunctions.drop-844"><span class="linenos">844</span></a> <span class="n">thresh</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrameNaFunctions.drop-845"><a href="#DataFrameNaFunctions.drop-845"><span class="linenos">845</span></a> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="o">...</span><span class="p">],</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrameNaFunctions.drop-846"><a href="#DataFrameNaFunctions.drop-846"><span class="linenos">846</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrameNaFunctions.drop-847"><a href="#DataFrameNaFunctions.drop-847"><span class="linenos">847</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">df</span><span class="o">.</span><span class="n">dropna</span><span class="p">(</span><span class="n">how</span><span class="o">=</span><span class="n">how</span><span class="p">,</span> <span class="n">thresh</span><span class="o">=</span><span class="n">thresh</span><span class="p">,</span> <span class="n">subset</span><span class="o">=</span><span class="n">subset</span><span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrameNaFunctions.drop-842"><a href="#DataFrameNaFunctions.drop-842"><span class="linenos">842</span></a> <span class="k">def</span> <span class="nf">drop</span><span class="p">(</span>
+</span><span id="DataFrameNaFunctions.drop-843"><a href="#DataFrameNaFunctions.drop-843"><span class="linenos">843</span></a> <span class="bp">self</span><span class="p">,</span>
+</span><span id="DataFrameNaFunctions.drop-844"><a href="#DataFrameNaFunctions.drop-844"><span class="linenos">844</span></a> <span class="n">how</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;any&quot;</span><span class="p">,</span>
+</span><span id="DataFrameNaFunctions.drop-845"><a href="#DataFrameNaFunctions.drop-845"><span class="linenos">845</span></a> <span class="n">thresh</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrameNaFunctions.drop-846"><a href="#DataFrameNaFunctions.drop-846"><span class="linenos">846</span></a> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="o">...</span><span class="p">],</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrameNaFunctions.drop-847"><a href="#DataFrameNaFunctions.drop-847"><span class="linenos">847</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrameNaFunctions.drop-848"><a href="#DataFrameNaFunctions.drop-848"><span class="linenos">848</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">df</span><span class="o">.</span><span class="n">dropna</span><span class="p">(</span><span class="n">how</span><span class="o">=</span><span class="n">how</span><span class="p">,</span> <span class="n">thresh</span><span class="o">=</span><span class="n">thresh</span><span class="p">,</span> <span class="n">subset</span><span class="o">=</span><span class="n">subset</span><span class="p">)</span>
</span></pre></div>
@@ -4775,12 +4786,12 @@ Sqlglot doesn't currently replicate this class so it only accepts a string</p>
</div>
<a class="headerlink" href="#DataFrameNaFunctions.fill"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrameNaFunctions.fill-849"><a href="#DataFrameNaFunctions.fill-849"><span class="linenos">849</span></a> <span class="k">def</span> <span class="nf">fill</span><span class="p">(</span>
-</span><span id="DataFrameNaFunctions.fill-850"><a href="#DataFrameNaFunctions.fill-850"><span class="linenos">850</span></a> <span class="bp">self</span><span class="p">,</span>
-</span><span id="DataFrameNaFunctions.fill-851"><a href="#DataFrameNaFunctions.fill-851"><span class="linenos">851</span></a> <span class="n">value</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">bool</span><span class="p">,</span> <span class="nb">float</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">]],</span>
-</span><span id="DataFrameNaFunctions.fill-852"><a href="#DataFrameNaFunctions.fill-852"><span class="linenos">852</span></a> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="o">...</span><span class="p">],</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrameNaFunctions.fill-853"><a href="#DataFrameNaFunctions.fill-853"><span class="linenos">853</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrameNaFunctions.fill-854"><a href="#DataFrameNaFunctions.fill-854"><span class="linenos">854</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">df</span><span class="o">.</span><span class="n">fillna</span><span class="p">(</span><span class="n">value</span><span class="o">=</span><span class="n">value</span><span class="p">,</span> <span class="n">subset</span><span class="o">=</span><span class="n">subset</span><span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrameNaFunctions.fill-850"><a href="#DataFrameNaFunctions.fill-850"><span class="linenos">850</span></a> <span class="k">def</span> <span class="nf">fill</span><span class="p">(</span>
+</span><span id="DataFrameNaFunctions.fill-851"><a href="#DataFrameNaFunctions.fill-851"><span class="linenos">851</span></a> <span class="bp">self</span><span class="p">,</span>
+</span><span id="DataFrameNaFunctions.fill-852"><a href="#DataFrameNaFunctions.fill-852"><span class="linenos">852</span></a> <span class="n">value</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">bool</span><span class="p">,</span> <span class="nb">float</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">]],</span>
+</span><span id="DataFrameNaFunctions.fill-853"><a href="#DataFrameNaFunctions.fill-853"><span class="linenos">853</span></a> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="o">...</span><span class="p">],</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrameNaFunctions.fill-854"><a href="#DataFrameNaFunctions.fill-854"><span class="linenos">854</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrameNaFunctions.fill-855"><a href="#DataFrameNaFunctions.fill-855"><span class="linenos">855</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">df</span><span class="o">.</span><span class="n">fillna</span><span class="p">(</span><span class="n">value</span><span class="o">=</span><span class="n">value</span><span class="p">,</span> <span class="n">subset</span><span class="o">=</span><span class="n">subset</span><span class="p">)</span>
</span></pre></div>
@@ -4798,13 +4809,13 @@ Sqlglot doesn't currently replicate this class so it only accepts a string</p>
</div>
<a class="headerlink" href="#DataFrameNaFunctions.replace"></a>
- <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrameNaFunctions.replace-856"><a href="#DataFrameNaFunctions.replace-856"><span class="linenos">856</span></a> <span class="k">def</span> <span class="nf">replace</span><span class="p">(</span>
-</span><span id="DataFrameNaFunctions.replace-857"><a href="#DataFrameNaFunctions.replace-857"><span class="linenos">857</span></a> <span class="bp">self</span><span class="p">,</span>
-</span><span id="DataFrameNaFunctions.replace-858"><a href="#DataFrameNaFunctions.replace-858"><span class="linenos">858</span></a> <span class="n">to_replace</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Dict</span><span class="p">],</span>
-</span><span id="DataFrameNaFunctions.replace-859"><a href="#DataFrameNaFunctions.replace-859"><span class="linenos">859</span></a> <span class="n">value</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrameNaFunctions.replace-860"><a href="#DataFrameNaFunctions.replace-860"><span class="linenos">860</span></a> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="DataFrameNaFunctions.replace-861"><a href="#DataFrameNaFunctions.replace-861"><span class="linenos">861</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
-</span><span id="DataFrameNaFunctions.replace-862"><a href="#DataFrameNaFunctions.replace-862"><span class="linenos">862</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">df</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="n">to_replace</span><span class="o">=</span><span class="n">to_replace</span><span class="p">,</span> <span class="n">value</span><span class="o">=</span><span class="n">value</span><span class="p">,</span> <span class="n">subset</span><span class="o">=</span><span class="n">subset</span><span class="p">)</span>
+ <div class="pdoc-code codehilite"><pre><span></span><span id="DataFrameNaFunctions.replace-857"><a href="#DataFrameNaFunctions.replace-857"><span class="linenos">857</span></a> <span class="k">def</span> <span class="nf">replace</span><span class="p">(</span>
+</span><span id="DataFrameNaFunctions.replace-858"><a href="#DataFrameNaFunctions.replace-858"><span class="linenos">858</span></a> <span class="bp">self</span><span class="p">,</span>
+</span><span id="DataFrameNaFunctions.replace-859"><a href="#DataFrameNaFunctions.replace-859"><span class="linenos">859</span></a> <span class="n">to_replace</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Dict</span><span class="p">],</span>
+</span><span id="DataFrameNaFunctions.replace-860"><a href="#DataFrameNaFunctions.replace-860"><span class="linenos">860</span></a> <span class="n">value</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrameNaFunctions.replace-861"><a href="#DataFrameNaFunctions.replace-861"><span class="linenos">861</span></a> <span class="n">subset</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="DataFrameNaFunctions.replace-862"><a href="#DataFrameNaFunctions.replace-862"><span class="linenos">862</span></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
+</span><span id="DataFrameNaFunctions.replace-863"><a href="#DataFrameNaFunctions.replace-863"><span class="linenos">863</span></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">df</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="n">to_replace</span><span class="o">=</span><span class="n">to_replace</span><span class="p">,</span> <span class="n">value</span><span class="o">=</span><span class="n">value</span><span class="p">,</span> <span class="n">subset</span><span class="o">=</span><span class="n">subset</span><span class="p">)</span>
</span></pre></div>
@@ -4897,7 +4908,7 @@ Sqlglot doesn't currently replicate this class so it only accepts a string</p>
<div class="decorator">@classmethod</div>
<span class="def">def</span>
- <span class="name">partitionBy</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">cls</span>,</span><span class="param"> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139791549852656&#39;</span><span class="o">&gt;</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139791549852656&#39;</span><span class="o">&gt;</span><span class="p">]]</span></span><span class="return-annotation">) -> <span class="n"><a href="#WindowSpec">WindowSpec</a></span>:</span></span>
+ <span class="name">partitionBy</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">cls</span>,</span><span class="param"> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139844516812000&#39;</span><span class="o">&gt;</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139844516812000&#39;</span><span class="o">&gt;</span><span class="p">]]</span></span><span class="return-annotation">) -> <span class="n"><a href="#WindowSpec">WindowSpec</a></span>:</span></span>
<label class="view-source-button" for="Window.partitionBy-view-source"><span>View Source</span></label>
@@ -4918,7 +4929,7 @@ Sqlglot doesn't currently replicate this class so it only accepts a string</p>
<div class="decorator">@classmethod</div>
<span class="def">def</span>
- <span class="name">orderBy</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">cls</span>,</span><span class="param"> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139791549852656&#39;</span><span class="o">&gt;</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139791549852656&#39;</span><span class="o">&gt;</span><span class="p">]]</span></span><span class="return-annotation">) -> <span class="n"><a href="#WindowSpec">WindowSpec</a></span>:</span></span>
+ <span class="name">orderBy</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">cls</span>,</span><span class="param"> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139844516812000&#39;</span><span class="o">&gt;</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139844516812000&#39;</span><span class="o">&gt;</span><span class="p">]]</span></span><span class="return-annotation">) -> <span class="n"><a href="#WindowSpec">WindowSpec</a></span>:</span></span>
<label class="view-source-button" for="Window.orderBy-view-source"><span>View Source</span></label>
@@ -5160,7 +5171,7 @@ Sqlglot doesn't currently replicate this class so it only accepts a string</p>
<div class="attr function">
<span class="def">def</span>
- <span class="name">partitionBy</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">self</span>,</span><span class="param"> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139791549852656&#39;</span><span class="o">&gt;</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139791549852656&#39;</span><span class="o">&gt;</span><span class="p">]]</span></span><span class="return-annotation">) -> <span class="n"><a href="#WindowSpec">WindowSpec</a></span>:</span></span>
+ <span class="name">partitionBy</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">self</span>,</span><span class="param"> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139844516812000&#39;</span><span class="o">&gt;</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139844516812000&#39;</span><span class="o">&gt;</span><span class="p">]]</span></span><span class="return-annotation">) -> <span class="n"><a href="#WindowSpec">WindowSpec</a></span>:</span></span>
<label class="view-source-button" for="WindowSpec.partitionBy-view-source"><span>View Source</span></label>
@@ -5187,7 +5198,7 @@ Sqlglot doesn't currently replicate this class so it only accepts a string</p>
<div class="attr function">
<span class="def">def</span>
- <span class="name">orderBy</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">self</span>,</span><span class="param"> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139791549852656&#39;</span><span class="o">&gt;</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139791549852656&#39;</span><span class="o">&gt;</span><span class="p">]]</span></span><span class="return-annotation">) -> <span class="n"><a href="#WindowSpec">WindowSpec</a></span>:</span></span>
+ <span class="name">orderBy</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="bp">self</span>,</span><span class="param"> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139844516812000&#39;</span><span class="o">&gt;</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="o">&lt;</span><span class="n">MagicMock</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;139844516812000&#39;</span><span class="o">&gt;</span><span class="p">]]</span></span><span class="return-annotation">) -> <span class="n"><a href="#WindowSpec">WindowSpec</a></span>:</span></span>
<label class="view-source-button" for="WindowSpec.orderBy-view-source"><span>View Source</span></label>