summaryrefslogtreecommitdiffstats
path: root/www/queryplanner.html
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-05 17:28:19 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-05 17:28:19 +0000
commit18657a960e125336f704ea058e25c27bd3900dcb (patch)
tree17b438b680ed45a996d7b59951e6aa34023783f2 /www/queryplanner.html
parentInitial commit. (diff)
downloadsqlite3-upstream.tar.xz
sqlite3-upstream.zip
Adding upstream version 3.40.1.upstream/3.40.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'www/queryplanner.html')
-rw-r--r--www/queryplanner.html1037
1 files changed, 1037 insertions, 0 deletions
diff --git a/www/queryplanner.html b/www/queryplanner.html
new file mode 100644
index 0000000..3aa80da
--- /dev/null
+++ b/www/queryplanner.html
@@ -0,0 +1,1037 @@
+<!DOCTYPE html>
+<html><head>
+<meta name="viewport" content="width=device-width, initial-scale=1.0">
+<meta http-equiv="content-type" content="text/html; charset=UTF-8">
+<link href="sqlite.css" rel="stylesheet">
+<title>Query Planning</title>
+<!-- path= -->
+</head>
+<body>
+<div class=nosearch>
+<a href="index.html">
+<img class="logo" src="images/sqlite370_banner.gif" alt="SQLite" border="0">
+</a>
+<div><!-- IE hack to prevent disappearing logo --></div>
+<div class="tagline desktoponly">
+Small. Fast. Reliable.<br>Choose any three.
+</div>
+<div class="menu mainmenu">
+<ul>
+<li><a href="index.html">Home</a>
+<li class='mobileonly'><a href="javascript:void(0)" onclick='toggle_div("submenu")'>Menu</a>
+<li class='wideonly'><a href='about.html'>About</a>
+<li class='desktoponly'><a href="docs.html">Documentation</a>
+<li class='desktoponly'><a href="download.html">Download</a>
+<li class='wideonly'><a href='copyright.html'>License</a>
+<li class='desktoponly'><a href="support.html">Support</a>
+<li class='desktoponly'><a href="prosupport.html">Purchase</a>
+<li class='search' id='search_menubutton'>
+<a href="javascript:void(0)" onclick='toggle_search()'>Search</a>
+</ul>
+</div>
+<div class="menu submenu" id="submenu">
+<ul>
+<li><a href='about.html'>About</a>
+<li><a href='docs.html'>Documentation</a>
+<li><a href='download.html'>Download</a>
+<li><a href='support.html'>Support</a>
+<li><a href='prosupport.html'>Purchase</a>
+</ul>
+</div>
+<div class="searchmenu" id="searchmenu">
+<form method="GET" action="search">
+<select name="s" id="searchtype">
+<option value="d">Search Documentation</option>
+<option value="c">Search Changelog</option>
+</select>
+<input type="text" name="q" id="searchbox" value="">
+<input type="submit" value="Go">
+</form>
+</div>
+</div>
+<script>
+function toggle_div(nm) {
+var w = document.getElementById(nm);
+if( w.style.display=="block" ){
+w.style.display = "none";
+}else{
+w.style.display = "block";
+}
+}
+function toggle_search() {
+var w = document.getElementById("searchmenu");
+if( w.style.display=="block" ){
+w.style.display = "none";
+} else {
+w.style.display = "block";
+setTimeout(function(){
+document.getElementById("searchbox").focus()
+}, 30);
+}
+}
+function div_off(nm){document.getElementById(nm).style.display="none";}
+window.onbeforeunload = function(e){div_off("submenu");}
+/* Disable the Search feature if we are not operating from CGI, since */
+/* Search is accomplished using CGI and will not work without it. */
+if( !location.origin || !location.origin.match || !location.origin.match(/http/) ){
+document.getElementById("search_menubutton").style.display = "none";
+}
+/* Used by the Hide/Show button beside syntax diagrams, to toggle the */
+function hideorshow(btn,obj){
+var x = document.getElementById(obj);
+var b = document.getElementById(btn);
+if( x.style.display!='none' ){
+x.style.display = 'none';
+b.innerHTML='show';
+}else{
+x.style.display = '';
+b.innerHTML='hide';
+}
+return false;
+}
+var antiRobot = 0;
+function antiRobotGo(){
+if( antiRobot!=3 ) return;
+antiRobot = 7;
+var j = document.getElementById("mtimelink");
+if(j && j.hasAttribute("data-href")) j.href=j.getAttribute("data-href");
+}
+function antiRobotDefense(){
+document.body.onmousedown=function(){
+antiRobot |= 2;
+antiRobotGo();
+document.body.onmousedown=null;
+}
+document.body.onmousemove=function(){
+antiRobot |= 2;
+antiRobotGo();
+document.body.onmousemove=null;
+}
+setTimeout(function(){
+antiRobot |= 1;
+antiRobotGo();
+}, 100)
+antiRobotGo();
+}
+antiRobotDefense();
+</script>
+<div class=fancy>
+<div class=nosearch>
+<div class="fancy_title">
+Query Planning
+</div>
+<div class="fancy_toc">
+<a onclick="toggle_toc()">
+<span class="fancy_toc_mark" id="toc_mk">&#x25ba;</span>
+Table Of Contents
+</a>
+<div id="toc_sub"><div class="fancy-toc1"><a href="#_searching">1. Searching</a></div>
+<div class="fancy-toc2"><a href="#_tables_without_indices">1.1. Tables Without Indices</a></div>
+<div class="fancy-toc2"><a href="#_lookup_by_rowid">1.2. Lookup By Rowid</a></div>
+<div class="fancy-toc2"><a href="#_lookup_by_index">1.3. Lookup By Index</a></div>
+<div class="fancy-toc2"><a href="#_multiple_result_rows">1.4. Multiple Result Rows</a></div>
+<div class="fancy-toc2"><a href="#_multiple_and_connected_where_clause_terms">1.5. Multiple AND-Connected WHERE-Clause Terms</a></div>
+<div class="fancy-toc2"><a href="#_multi_column_indices">1.6. Multi-Column Indices</a></div>
+<div class="fancy-toc2"><a href="#_covering_indexes">1.7. Covering Indexes</a></div>
+<div class="fancy-toc2"><a href="#_or_connected_terms_in_the_where_clause">1.8. OR-Connected Terms In The WHERE Clause</a></div>
+<div class="fancy-toc1"><a href="#_sorting">2. Sorting</a></div>
+<div class="fancy-toc2"><a href="#_sorting_by_rowid">2.1. Sorting By Rowid</a></div>
+<div class="fancy-toc2"><a href="#_sorting_by_index">2.2. Sorting By Index</a></div>
+<div class="fancy-toc2"><a href="#_sorting_by_covering_index">2.3. Sorting By Covering Index</a></div>
+<div class="fancy-toc1"><a href="#_searching_and_sorting_at_the_same_time">3. Searching And Sorting At The Same Time</a></div>
+<div class="fancy-toc2"><a href="#_searching_and_sorting_with_a_multi_column_index">3.1. Searching And Sorting With A Multi-Column Index</a></div>
+<div class="fancy-toc2"><a href="#_searching_and_sorting_with_a_covering_index">3.2. Searching And Sorting With A Covering Index</a></div>
+<div class="fancy-toc2"><a href="#_partial_sorting_using_an_index_a_k_a_block_sorting_">3.3. Partial Sorting Using An Index (a.k.a. Block Sorting)</a></div>
+<div class="fancy-toc1"><a href="#_without_rowid_tables">4. WITHOUT ROWID tables</a></div>
+</div>
+</div>
+<script>
+function toggle_toc(){
+var sub = document.getElementById("toc_sub")
+var mk = document.getElementById("toc_mk")
+if( sub.style.display!="block" ){
+sub.style.display = "block";
+mk.innerHTML = "&#x25bc;";
+} else {
+sub.style.display = "none";
+mk.innerHTML = "&#x25ba;";
+}
+}
+</script>
+</div>
+
+
+
+
+
+
+<h2 style="margin-left:1.0em" notoc="1" id="overview"> Overview</h2>
+
+<p>
+The best feature of SQL (in <u>all</u> its implementations, not just SQLite)
+is that it is a <i>declarative</i> language, not a <i>procedural</i>
+language. When programming in SQL you tell the system <i>what</i> you
+want to compute, not <i>how</i> to compute it. The task of figuring out
+the <i>how</i> is delegated to the <i>query planner</i> subsystem within
+the SQL database engine.</p>
+
+<p>For any given SQL statement, there might be hundreds or thousands or
+even millions of different algorithms of performing the operation. All
+of these algorithms will get the correct answer, though some will run
+faster than others.
+The query planner is an
+<a href="https://en.wikipedia.org/wiki/Artificial_intelligence">AI</a> that
+tries to pick the fastest and most efficient algorithm for each SQL
+statement.
+</p>
+
+<p>
+Most of the time, the query planner in SQLite does a good job.
+However, the query planner needs indices to
+work with.
+These indices must normally be added by programmers.
+Rarely, the query planner AI will make a suboptimal algorithm
+choice.
+In those cases, programmers may want to provide additional
+hints to help the query planner do a better job.
+</p>
+
+<p>
+This document provides background information about how the
+SQLite query planner and query engine work.
+Programmers can use this information to help create better
+indexes, and provide hints to help the query planner when
+needed.
+</p>
+
+<p>
+Additional information is provided in the
+<a href="optoverview.html">SQLite query planner</a> and
+<a href="queryplanner-ng.html">next generation query planner</a> documents.
+</p>
+
+<a name="searching"></a>
+
+<h1 id="_searching"><span>1. </span> Searching</h1>
+
+<h2 id="_tables_without_indices"><span>1.1. </span> Tables Without Indices</h2>
+
+<p>
+Most tables in SQLite consist of zero or more rows with a unique integer
+key (the <a href="lang_createtable.html#rowid">rowid</a> or <a href="lang_createtable.html#rowid">INTEGER PRIMARY KEY</a>) followed by content.
+(The exception is <a href="withoutrowid.html">WITHOUT ROWID</a> tables.)
+The rows
+are logically stored in order of increasing rowid. As an example, this
+article uses a table named "FruitsForSale" which relates various fruits
+to the state
+where they are grown and their unit price at market. The schema is this:
+</p>
+
+<center><table><tr><td><pre>
+CREATE TABLE FruitsForSale(
+ Fruit TEXT,
+ State TEXT,
+ Price REAL
+);
+</pre></table></center>
+
+
+<p>
+With some (arbitrary) data, such a table might be logically stored on disk
+as shown in figure 1:
+</p>
+
+<a name='fig1'></a>
+<p><center>
+<img src="images/qp/tab.gif" alt="figure 1"><br>
+Figure 1: Logical Layout Of Table "FruitsForSale"
+</center></p>
+
+
+<p>
+In this example, the rowids are not
+consecutive but they are ordered. SQLite usually creates rowids beginning
+with one and increasing by one with each added row. But if rows are
+deleted, gaps can appear in the sequence. And the application can control
+the rowid assigned if desired, so that rows are not necessarily inserted
+at the bottom. But regardless of what happens, the rowids are always
+unique and in strictly ascending order.
+</p>
+
+<p>
+Suppose you want to look up the price of peaches. The query would
+be as follows:
+</p>
+
+<center><table><tr><td><pre>
+SELECT price FROM fruitsforsale WHERE fruit='Peach';
+</pre></table></center>
+
+
+<p>
+To satisfy this query, SQLite reads every row out of the
+table, checks to see if the "fruit" column has the value of "Peach" and if
+so, outputs the "price" column from that row. The process is illustrated
+by <a href="#fig2">figure 2</a> below.
+This is algorithm is called a <i>full table scan</i>
+since the entire content of the
+table must be read and examined in order to find the one row of interest.
+With a table of only 7 rows, a full table scan is acceptable,
+but if the table contained 7 million rows, a full table scan might read
+megabytes of content in order to find a single 8-byte number.
+For that reason, one normally tries to avoid full table scans.
+</p>
+
+<a name='fig2'></a>
+<p><center>
+<img src="images/qp/fullscan.gif" alt="figure 2"><br>
+Figure 2: Full Table Scan
+</center></p>
+
+
+<h2 id="_lookup_by_rowid"><span>1.2. </span> Lookup By Rowid</h2>
+
+<p>
+One technique for avoiding a full table scan is to do lookups by
+rowid (or by the equivalent <a href="lang_createtable.html#rowid">INTEGER PRIMARY KEY</a>). To lookup the
+price of peaches, one would query for the entry with a rowid of 4:
+</p>
+
+<center><table><tr><td><pre>
+SELECT price FROM fruitsforsale WHERE rowid=4;
+</pre></table></center>
+
+
+<p>
+Since the information is stored in the table in rowid order, SQLite
+can find the correct row using a binary search.
+If the table contains N elements, the time required to look up the
+desired row is proportional to logN rather than being proportional
+to N as in a full table scan. If the table contains 10 million elements,
+that means the query will be on the order of N/logN or about 1 million
+times faster.
+</p>
+
+<a name='fig3'></a>
+<p><center>
+<img src="images/qp/rowidlu.gif" alt="figure 3"><br>
+Figure 3: Lookup By Rowid
+</center></p>
+
+
+<h2 id="_lookup_by_index"><span>1.3. </span> Lookup By Index</h2>
+<p>
+The problem with looking up information by rowid is that you probably
+do not care what the price of "item 4" is - you want to know the price
+of peaches. And so a rowid lookup is not helpful.
+</p>
+
+<p>
+To make the original query more efficient, we can add an index on the
+"fruit" column of the "fruitsforsale" table like this:
+</p>
+
+<center><table><tr><td><pre>
+CREATE INDEX Idx1 ON fruitsforsale(fruit);
+</pre></table></center>
+
+
+<p>
+An index is another table similar to the original "fruitsforsale" table
+but with the content (the fruit column in this case) stored in front of the
+rowid and with all rows in content order.
+<a href="#fig4">Figure 4</a> gives a logical view of the Idx1 index.
+The "fruit" column is the primary key used to order the elements of the
+table and the "rowid" is the secondary key used to break the tie when
+two or more rows have the same "fruit". In the example, the rowid
+has to be used as a tie-breaker for the "Orange" rows.
+Notice that since the rowid
+is always unique over all elements of the original table, the composite key
+of "fruit" followed by "rowid" will be unique over all elements of the index.
+</p>
+
+<a name='fig4'></a>
+<p><center>
+<img src="images/qp/idx1.gif" alt="figure 4"><br>
+Figure 4: An Index On The Fruit Column
+</center></p>
+
+
+<p>
+This new index can be used to implement a faster algorithm for the
+original "Price of Peaches" query.
+</p>
+
+<center><table><tr><td><pre>
+SELECT price FROM fruitsforsale WHERE fruit='Peach';
+</pre></table></center>
+
+
+<p>
+The query starts by doing a binary search on the Idx1 index for entries
+that have fruit='Peach'. SQLite can do this binary search on the Idx1 index
+but not on the original FruitsForSale table because the rows in Idx1 are sorted
+by the "fruit" column. Having found a row in the Idx1 index that has
+fruit='Peach', the database engine can extract the rowid for that row.
+Then the database engines does a second binary search
+on the original FruitsForSale table to find the
+original row that contains fruit='Peach'.
+From the row in the FruitsForSale table,
+SQLite can then extract the value of the price column.
+This procedure is illustrated by <a href="#fig5">figure 5</a>.
+</p>
+
+<a name='fig5'></a>
+<p><center>
+<img src="images/qp/idx1lu1.gif" alt="figure 5"><br>
+Figure 5: Indexed Lookup For The Price Of Peaches
+</center></p>
+
+
+<p>
+SQLite has to do two binary searches to find the price of peaches using
+the method show above. But for a table with a large number of rows, this
+is still much faster than doing a full table scan.
+</p>
+
+<h2 id="_multiple_result_rows"><span>1.4. </span> Multiple Result Rows</h2>
+
+<p>
+In the previous query the fruit='Peach' constraint narrowed the result
+down to a single row. But the same technique works even if multiple
+rows are obtained. Suppose we looked up the price of Oranges instead of
+Peaches:
+</p>
+
+<center><table><tr><td><pre>
+SELECT price FROM fruitsforsale WHERE fruit='Orange'
+</pre></table></center>
+<a name='fig6'></a>
+<p><center>
+<img src="images/qp/idx1lu2.gif" alt="figure 6"><br>
+Figure 6: Indexed Lookup For The Price Of Oranges
+</center></p>
+
+
+<p>
+In this case, SQLite still does a single binary search to find the first
+entry of the index where fruit='Orange'. Then it extracts the rowid from
+the index and uses that rowid to lookup the original table entry via
+binary search and output the price from the original table. But instead
+of quitting, the database engine then advances to the next row of index
+to repeat the process for next fruit='Orange' entry. Advancing to the
+next row of an index (or table) is much less costly than doing a binary
+search since the next row is often located on the same database page as
+the current row. In fact, the cost of advancing to the next row is so
+cheap in comparison to a binary search that we usually ignore it. So
+our estimate for the total cost of this query is 3 binary searches.
+If the number of rows of output is K and the number of rows in the table
+is N, then in general the cost of doing the query is proportional
+to (K+1)*logN.
+</p>
+
+<h2 id="_multiple_and_connected_where_clause_terms"><span>1.5. </span> Multiple AND-Connected WHERE-Clause Terms</h2>
+
+<p>
+Next, suppose that you want to look up the price of not just any orange,
+but specifically California-grown oranges. The appropriate query would
+be as follows:
+</p>
+
+<center><table><tr><td><pre>
+SELECT price FROM fruitsforsale WHERE fruit='Orange' AND state='CA'
+</pre></table></center>
+<a name='fig7'></a>
+<p><center>
+<img src="images/qp/idx1lu3.gif" alt="figure 7"><br>
+Figure 7: Indexed Lookup Of California Oranges
+</center></p>
+
+
+<p>
+One approach to this query is to use the fruit='Orange' term of the WHERE
+clause to find all rows dealing with oranges, then filter those rows
+by rejecting any that are from states other than California. This
+process is shown by <a href="#fig7">figure 7</a> above. This is a perfectly
+reasonable approach in most cases. Yes, the database engine did have
+to do an extra binary search for the Florida orange row that was
+later rejected, so it was not as efficient as we might hope, though
+for many applications it is efficient enough.
+</p>
+
+<p>
+Suppose that in addition to the index on "fruit" there was also
+an index on "state".
+</p>
+
+<center><table><tr><td><pre>
+CREATE INDEX Idx2 ON fruitsforsale(state);
+</pre></table></center>
+<a name='fig8'></a>
+<p><center>
+<img src="images/qp/idx2.gif" alt="figure 8"><br>
+Figure 8: Index On The State Column
+</center></p>
+
+
+<p>
+The "state" index works just like the "fruit" index in that it is a
+new table with an extra column in front of the rowid and sorted by
+that extra column as the primary key. The only difference is that
+in Idx2, the first column is "state" instead of "fruit" as it is with
+Idx1. In our example data set, there is more redundancy in the "state"
+column and so they are more duplicate entries. The ties are still
+resolved using the rowid.
+</p>
+
+<p>
+Using the new Idx2 index on "state", SQLite has another option for
+lookup up the price of California oranges: it can look up every row
+that contains fruit from California and filter out those rows that
+are not oranges.
+</p>
+
+<a name='fig9'></a>
+<p><center>
+<img src="images/qp/idx2lu1.gif" alt="figure 9"><br>
+Figure 9: Indexed Lookup Of California Oranges
+</center></p>
+
+
+<p>
+Using Idx2 instead of Idx1 causes SQLite to examine a different set of
+rows, but it gets the same answer in the end (which is very important -
+remember that indices should never change the answer, only help SQLite to
+get to the answer more quickly) and it does the same amount of work.
+So the Idx2 index did not help performance in this case.
+</p>
+
+<p>
+The last two queries take the same amount of time, in our example.
+So which index, Idx1 or Idx2, will SQLite choose? If the
+<a href="lang_analyze.html">ANALYZE</a> command has been run on the database, so that SQLite has
+had an opportunity to gather statistics about the available indices,
+then SQLite will know that the Idx1 index usually narrows the search
+down to a single item (our example of fruit='Orange' is the exception
+to this rule) whereas the Idx2 index will normally only narrow the
+search down to two rows. So, if all else is equal, SQLite will
+choose Idx1 with the hope of narrowing the search to as small
+a number of rows as possible. This choice is only possible because
+of the statistics provided by <a href="lang_analyze.html">ANALYZE</a>. If <a href="lang_analyze.html">ANALYZE</a> has not been
+run then the choice of which index to use is arbitrary.
+</p>
+
+<h2 id="_multi_column_indices"><span>1.6. </span> Multi-Column Indices</h2>
+
+<p>
+To get the maximum performance out of a query with multiple AND-connected
+terms in the WHERE clause, you really want a multi-column index with
+columns for each of the AND terms. In this case we create a new index
+on the "fruit" and "state" columns of FruitsForSale:
+</p>
+
+<center><table><tr><td><pre>
+CREATE INDEX Idx3 ON FruitsForSale(fruit, state);
+</pre></table></center>
+<a name='fig10'></a>
+<p><center>
+<img src="images/qp/idx3.gif" alt="figure 1"><br>
+Figure 1: A Two-Column Index
+</center></p>
+
+
+<p>
+A multi-column index follows the same pattern as a single-column index;
+the indexed columns are added in front of the rowid. The only difference
+is that now multiple columns are added. The left-most column is the
+primary key used for ordering the rows in the index. The second column is
+used to break ties in the left-most column. If there were a third column,
+it would be used to break ties for the first two columns. And so forth for
+all columns in the index. Because rowid is guaranteed
+to be unique, every row of the index will be unique even if all of the
+content columns for two rows are the same. That case does not happen
+in our sample data, but there is one case (fruit='Orange') where there
+is a tie on the first column which must be broken by the second column.
+</p>
+
+<p>
+Given the new multi-column Idx3 index, it is now possible for SQLite
+to find the price of California oranges using only 2 binary searches:
+</p>
+
+<center><table><tr><td><pre>
+SELECT price FROM fruitsforsale WHERE fruit='Orange' AND state='CA'
+</pre></table></center>
+<a name='fig11'></a>
+<p><center>
+<img src="images/qp/idx3lu1.gif" alt="figure 11"><br>
+Figure 11: Lookup Using A Two-Column Index
+</center></p>
+
+
+<p>
+With the Idx3 index on both columns that are constrained by the WHERE clause,
+SQLite can do a single binary search against Idx3 to find the one rowid
+for California oranges, then do a single binary search to find the price
+for that item in the original table. There are no dead-ends and no
+wasted binary searches. This is a more efficient query.
+</p>
+
+<p>
+Note that Idx3 contains all the same information as the original
+<a href="#fig3">Idx1</a>. And so if we have Idx3, we do not really need Idx1
+any more. The "price of peaches" query can be satisfied using Idx3
+by simply ignoring the "state" column of Idx3:
+</p>
+
+<center><table><tr><td><pre>
+SELECT price FROM fruitsforsale WHERE fruit='Peach'
+</pre></table></center>
+<a name='fig12'></a>
+<p><center>
+<img src="images/qp/idx3lu2.gif" alt="figure 12"><br>
+Figure 12: Single-Column Lookup On A Multi-Column Index
+</center></p>
+
+
+<p>
+Hence, a good rule of thumb is that your database schema should never
+contain two indices where one index is a prefix of the other. Drop the
+index with fewer columns. SQLite will still be able to do efficient
+lookups with the longer index.
+</p>
+
+<a name="covidx"></a>
+
+<h2 id="_covering_indexes"><span>1.7. </span> Covering Indexes</h2>
+
+<p>
+The "price of California oranges" query was made more efficient through
+the use of a two-column index. But SQLite can do even better with a
+three-column index that also includes the "price" column:
+</p>
+
+<center><table><tr><td><pre>
+CREATE INDEX Idx4 ON FruitsForSale(fruit, state, price);
+</pre></table></center>
+<a name='fig13'></a>
+<p><center>
+<img src="images/qp/idx4.gif" alt="figure 13"><br>
+Figure 13: A Covering Index
+</center></p>
+
+
+<p>
+This new index contains all the columns of the original FruitsForSale table that
+are used by the query - both the search terms and the output. We call
+this a "covering index". Because all of the information needed is in
+the covering index, SQLite never needs to consult the original table
+in order to find the price.
+</p>
+
+<center><table><tr><td><pre>
+SELECT price FROM fruitsforsale WHERE fruit='Orange' AND state='CA';
+</pre></table></center>
+<a name='fig14'></a>
+<p><center>
+<img src="images/qp/idx4lu1.gif" alt="figure 14"><br>
+Figure 14: Query Using A Covering Index
+</center></p>
+
+
+<p>
+Hence, by adding extra "output" columns onto the end of an index, one
+can avoid having to reference the original table and thereby
+cut the number of binary searches for a query in half. This is a
+constant-factor improvement in performance (roughly a doubling of
+the speed). But on the other hand, it is also just a refinement;
+A two-fold performance increase is not nearly as dramatic as the
+one-million-fold increase seen when the table was first indexed.
+And for most queries, the difference between 1 microsecond and
+2 microseconds is unlikely to be noticed.
+</p>
+
+<a name="or_in_where"></a>
+
+<h2 id="_or_connected_terms_in_the_where_clause"><span>1.8. </span> OR-Connected Terms In The WHERE Clause</h2>
+
+<p>
+Multi-column indices only work if the constraint terms in the WHERE
+clause of the query are connected by AND.
+So Idx3 and Idx4 are helpful when the search is for items that
+are both Oranges and grown in California, but neither index would
+be that useful if we wanted all items that were either oranges
+<i>or</i> are grown in California.
+</p>
+
+<center><table><tr><td><pre>
+SELECT price FROM FruitsForSale WHERE fruit='Orange' OR state='CA';
+</pre></table></center>
+
+
+<p>
+When confronted with OR-connected terms in a WHERE clause, SQLite
+examines each OR term separately and tries to use an index to
+find the rowids associated with each term.
+It then takes the union of the resulting rowid sets to find
+the end result. The following figure illustrates this process:
+</p>
+
+<a name='fig15'></a>
+<p><center>
+<img src="images/qp/orquery.gif" alt="figure 15"><br>
+Figure 15: Query With OR Constraints
+</center></p>
+
+
+<p>
+The diagram above implies that SQLite computes all of the rowids first
+and then combines them with a union operation before starting to do
+rowid lookups on the original table. In reality, the rowid lookups
+are interspersed with rowid computations. SQLite uses one index at
+a time to find rowids while remembering which rowids it has seen
+before so as to avoid duplicates. That is just an implementation
+detail, though. The diagram, while not 100% accurate, provides a good
+overview of what is happening.
+</p>
+
+<p>
+In order for the OR-by-UNION technique shown above to be useful, there
+must be an index available that helps resolve every OR-connected term
+in the WHERE clause. If even a single OR-connected term is not indexed,
+then a full table scan would have to be done in order to find the rowids
+generated by the one term, and if SQLite has to do a full table scan, it
+might as well do it on the original table and get all of the results in
+a single pass without having to mess with union operations and follow-on
+binary searches.
+</p>
+
+<p>
+One can see how the OR-by-UNION technique could also be leveraged to
+use multiple indices on queries where the WHERE clause has terms connected
+by AND, by using an intersect operator in place of union. Many SQL
+database engines will do just that. But the performance gain over using
+just a single index is slight and so SQLite does not implement that technique
+at this time. However, a future version SQLite might be enhanced to support
+AND-by-INTERSECT.
+</p>
+
+<a name="sorting"></a>
+
+<h1 id="_sorting"><span>2. </span> Sorting</h1>
+
+<p>
+SQLite (like all other SQL database engines) can also use indices to
+satisfy the ORDER BY clauses in a query, in addition to expediting
+lookup. In other words, indices can be used to speed up sorting as
+well as searching.
+</p>
+
+<p>
+When no appropriate indices are available, a query with an ORDER BY
+clause must be sorted as a separate step. Consider this query:
+</p>
+
+<center><table><tr><td><pre>
+SELECT * FROM fruitsforsale ORDER BY fruit;
+</pre></table></center>
+
+
+<p>
+SQLite processes this by gathering all the output of query and then
+running that output through a sorter.
+</p>
+
+<a name='fig16'></a>
+<p><center>
+<img src="images/qp/obfruitnoidx.gif" alt="figure 16"><br>
+Figure 16: Sorting Without An Index
+</center></p>
+
+
+<p>
+If the number of output rows is K, then the time needed to sort is
+proportional to KlogK. If K is small, the sorting time is usually
+not a factor, but in a query such as the above where K==N, the time
+needed to sort can be much greater than the time needed to do a
+full table scan. Furthermore, the entire output is accumulated in
+temporary storage (which might be either in main memory or on disk,
+depending on various compile-time and run-time settings)
+which can mean that a lot of temporary storage is required to complete
+the query.
+</p>
+
+<h2 id="_sorting_by_rowid"><span>2.1. </span> Sorting By Rowid</h2>
+
+<p>
+Because sorting can be expensive, SQLite works hard to convert ORDER BY
+clauses into no-ops. If SQLite determines that output will
+naturally appear in the order specified, then no sorting is done.
+So, for example, if you request the output in rowid order, no sorting
+will be done:
+</p>
+
+<center><table><tr><td><pre>
+SELECT * FROM fruitsforsale ORDER BY rowid;
+</pre></table></center>
+<a name='fig17'></a>
+<p><center>
+<img src="images/qp/obrowid.gif" alt="figure 17"><br>
+Figure 17: Sorting By Rowid
+</center></p>
+
+
+<p>
+You can also request a reverse-order sort like this:
+</p>
+
+<center><table><tr><td><pre>
+SELECT * FROM fruitsforsale ORDER BY rowid DESC;
+</pre></table></center>
+
+
+<p>
+SQLite will still omit the sorting step. But in order for output to
+appear in the correct order, SQLite will do the table scan starting at
+the end and working toward the beginning, rather than starting at the
+beginning and working toward the end as shown in
+<a href="#fig17">figure 17</a>.
+</p>
+
+<h2 id="_sorting_by_index"><span>2.2. </span> Sorting By Index</h2>
+
+<p>
+Of course, ordering the output of a query by rowid is seldom useful.
+Usually one wants to order the output by some other column.
+</p>
+
+<p>
+If an index is available on the ORDER BY column, that index can be used
+for sorting. Consider the request for all items sorted by "fruit":
+</p>
+
+<center><table><tr><td><pre>
+SELECT * FROM fruitsforsale ORDER BY fruit;
+</pre></table></center>
+
+
+<a name='fig18'></a>
+<p><center>
+<img src="images/qp/obfruitidx1.gif" alt="figure 18"><br>
+Figure 18: Sorting With An Index
+</center></p>
+
+
+<p>
+The Idx1 index is scanned from top to bottom (or from bottom to top if
+"ORDER BY fruit DESC" is used) in order to find the rowids for each item
+in order by fruit. Then for each rowid, a binary search is done to lookup
+and output that row. In this way, the output appears in the requested order
+without the need to gather the entire output and sort it using a separate step.
+</p>
+
+<p>
+But does this really save time? The number of steps in the
+<a href="#fig16">original indexless sort</a> is proportional to NlogN since
+that is how much time it takes to sort N rows. But when we use Idx1 as
+shown here, we have to do N rowid lookups which take logN time each, so
+the total time of NlogN is the same!
+</p>
+
+<p>
+SQLite uses a cost-based query planner. When there are two or more ways
+of solving the same query, SQLite tries to estimate the total amount of
+time needed to run the query using each plan, and then uses the plan with
+the lowest estimated cost. A cost is computed mostly from the estimated
+time, and so this case could go either way depending on the table size and
+what WHERE clause constraints were available, and so forth. But generally
+speaking, the indexed sort would probably be chosen, if for no other
+reason, because it does not need to accumulate the entire result set in
+temporary storage before sorting and thus uses much less temporary storage.
+</p>
+
+<h2 id="_sorting_by_covering_index"><span>2.3. </span> Sorting By Covering Index</h2>
+
+<p>
+If a covering index can be used for a query, then the multiple rowid lookups
+can be avoided and the cost of the query drops dramatically.
+</p>
+
+<a name='fig19'></a>
+<p><center>
+<img src="images/qp/obfruitidx4.gif" alt="figure 19"><br>
+Figure 19: Sorting With A Covering Index
+</center></p>
+
+
+<p>
+With a covering index, SQLite can simply walk the index from one end to the
+other and deliver the output in time proportional to N and without having
+allocate a large buffer to hold the result set.
+</p>
+
+<h1 id="_searching_and_sorting_at_the_same_time"><span>3. </span> Searching And Sorting At The Same Time</h1>
+
+<p>
+The previous discussion has treated searching and sorting as separate
+topics. But in practice, it is often the case that one wants to search
+and sort at the same time. Fortunately, it is possible to do this
+using a single index.
+</p>
+
+<h2 id="_searching_and_sorting_with_a_multi_column_index"><span>3.1. </span> Searching And Sorting With A Multi-Column Index</h2>
+
+<p>
+Suppose we want to find the prices of all kinds of oranges sorted in
+order of the state where they are grown. The query is this:
+</p>
+
+<center><table><tr><td><pre>
+SELECT price FROM fruitforsale WHERE fruit='Orange' ORDER BY state
+</pre></table></center>
+
+
+<p>
+The query contains both a search restriction in the WHERE clause
+and a sort order in the ORDER BY clause. Both the search and the sort
+can be accomplished at the same time using the two-column index Idx3.
+</p>
+
+<a name='fig20'></a>
+<p><center>
+<img src="images/qp/fruitobstate0.gif" alt="figure 20"><br>
+Figure 20: Search And Sort By Multi-Column Index
+</center></p>
+
+
+<p>
+The query does a binary search on the index to find the subset of rows
+that have fruit='Orange'. (Because the fruit column is the left-most column
+of the index and the rows of the index are in sorted order, all such
+rows will be adjacent.) Then it scans the matching index rows from top to
+bottom to get the rowids for the original table, and for each rowid does
+a binary search on the original table to find the price.
+</p>
+
+<p>
+You will notice that there is no "sort" box anywhere in the above diagram.
+The ORDER BY clause of the query has become a no-op. No sorting has to be
+done here because the output order is by the state column and the state
+column also happens to be the first column after the fruit column in the
+index. So, if we scan entries of the index that have the same value for
+the fruit column from top to bottom, those index entries are guaranteed to
+be ordered by the state column.
+</p>
+
+<a name="srchsortcovidx"></a>
+
+<h2 id="_searching_and_sorting_with_a_covering_index"><span>3.2. </span> Searching And Sorting With A Covering Index</h2>
+
+<p>
+A <a href="queryplanner.html#covidx">covering index</a> can also be used to search and sort at the same time.
+Consider the following:
+</p>
+
+<center><table><tr><td><pre>
+SELECT * FROM fruitforsale WHERE fruit='Orange' ORDER BY state
+</pre></table></center>
+<a name='fig21'></a>
+<p><center>
+<img src="images/qp/fruitobstate.gif" alt="figure 21"><br>
+Figure 21: Search And Sort By Covering Index
+</center></p>
+
+
+<p>
+As before, SQLite does single binary search
+for the range of rows in the covering
+index that satisfy the WHERE clause, the scans that range from top to
+bottom to get the desired results.
+The rows that satisfy the WHERE clause are guaranteed to be adjacent
+since the WHERE clause is an equality constraint on the left-most
+column of the index. And by scanning the matching index rows from
+top to bottom, the output is guaranteed to be ordered by state since the
+state column is the very next column to the right of the fruit column.
+And so the resulting query is very efficient.
+</p>
+
+<p>
+SQLite can pull a similar trick for a descending ORDER BY:
+</p>
+
+<center><table><tr><td><pre>
+SELECT * FROM fruitforsale WHERE fruit='Orange' ORDER BY state DESC
+</pre></table></center>
+
+
+<p>
+The same basic algorithm is followed, except this time the matching rows
+of the index are scanned from bottom to top instead of from top to bottom,
+so that the states will appear in descending order.
+</p>
+
+<a name="partialsort"></a>
+
+<h2 id="_partial_sorting_using_an_index_a_k_a_block_sorting_"><span>3.3. </span> Partial Sorting Using An Index (a.k.a. Block Sorting)</h2>
+
+<p>
+Sometimes only part of an ORDER BY clause can be satisfied using indexes.
+Consider, for example, the following query:
+</p>
+
+<center><table><tr><td><pre>
+SELECT * FROM fruitforsale ORDER BY fruit, price
+</pre></table></center>
+
+
+<p>
+If the covering index is used for the scan, the "fruit" column will appear
+naturally in the correct order, but when there are two or more rows with
+the same fruit, the price might be out of order. When this occurs, SQLite
+does many small sorts, one sort for each distinct value of fruit, rather
+than one large sort. Figure 22 below illustrates the concept.
+</p>
+
+<a name='fig22'></a>
+<p><center>
+<img src="images/qp/partial-sort.gif" alt="figure 22"><br>
+Figure 22: Partial Sort By Index
+</center></p>
+
+
+<p>
+In the example, instead of a single sort of 7 elements, there
+are 5 sorts of one-element each and 1 sort of 2 elements for the
+case of fruit=='Orange'.
+
+</p><p>
+The advantages of doing many smaller sorts instead of a single large sort
+are:
+</p><ol>
+<li>Multiple small sorts collectively use fewer CPU cycles than a single
+ large sort.
+</li><li>Each small sort is run independently, meaning that much less information
+ needs to be kept in temporary storage at any one time.
+</li><li>Those columns of the ORDER BY that are already in the correct order
+ due to indexes can be omitted from the sort key, further reducing
+ storage requirements and CPU time.
+</li><li>Output rows can be returned to the application as each small sort
+ completes, and well before the table scan is complete.
+</li><li>If a LIMIT clause is present, it might be possible to avoid scanning
+ the entire table.
+</li></ol>
+
+<p>Because of these advantages, SQLite always tries to do a partial sort using an
+index even if a complete sort by index is not possible.</p>
+
+<h1 id="_without_rowid_tables"><span>4. </span> WITHOUT ROWID tables</h1>
+
+<p>
+The basic principals described above apply to both ordinary rowid tables
+and <a href="withoutrowid.html">WITHOUT ROWID</a> tables.
+The only difference is that the rowid column that serves as the key for
+tables and that appears as the right-most term in indexes is replaced by
+the PRIMARY KEY.
+</p>
+<p align="center"><small><i>This page last modified on <a href="https://sqlite.org/docsrc/honeypot" id="mtimelink" data-href="https://sqlite.org/docsrc/finfo/pages/queryplanner.in?m=baee8e6b0dbeb0173">2022-10-26 13:30:36</a> UTC </small></i></p>
+