summaryrefslogtreecommitdiffstats
path: root/www/optoverview.html
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--www/optoverview.html1623
1 files changed, 1623 insertions, 0 deletions
diff --git a/www/optoverview.html b/www/optoverview.html
new file mode 100644
index 0000000..5fa1146
--- /dev/null
+++ b/www/optoverview.html
@@ -0,0 +1,1623 @@
+<!DOCTYPE html>
+<html><head>
+<meta name="viewport" content="width=device-width, initial-scale=1.0">
+<meta http-equiv="content-type" content="text/html; charset=UTF-8">
+<link href="sqlite.css" rel="stylesheet">
+<title>The SQLite Query Optimizer Overview</title>
+<!-- path= -->
+</head>
+<body>
+<div class=nosearch>
+<a href="index.html">
+<img class="logo" src="images/sqlite370_banner.gif" alt="SQLite" border="0">
+</a>
+<div><!-- IE hack to prevent disappearing logo --></div>
+<div class="tagline desktoponly">
+Small. Fast. Reliable.<br>Choose any three.
+</div>
+<div class="menu mainmenu">
+<ul>
+<li><a href="index.html">Home</a>
+<li class='mobileonly'><a href="javascript:void(0)" onclick='toggle_div("submenu")'>Menu</a>
+<li class='wideonly'><a href='about.html'>About</a>
+<li class='desktoponly'><a href="docs.html">Documentation</a>
+<li class='desktoponly'><a href="download.html">Download</a>
+<li class='wideonly'><a href='copyright.html'>License</a>
+<li class='desktoponly'><a href="support.html">Support</a>
+<li class='desktoponly'><a href="prosupport.html">Purchase</a>
+<li class='search' id='search_menubutton'>
+<a href="javascript:void(0)" onclick='toggle_search()'>Search</a>
+</ul>
+</div>
+<div class="menu submenu" id="submenu">
+<ul>
+<li><a href='about.html'>About</a>
+<li><a href='docs.html'>Documentation</a>
+<li><a href='download.html'>Download</a>
+<li><a href='support.html'>Support</a>
+<li><a href='prosupport.html'>Purchase</a>
+</ul>
+</div>
+<div class="searchmenu" id="searchmenu">
+<form method="GET" action="search">
+<select name="s" id="searchtype">
+<option value="d">Search Documentation</option>
+<option value="c">Search Changelog</option>
+</select>
+<input type="text" name="q" id="searchbox" value="">
+<input type="submit" value="Go">
+</form>
+</div>
+</div>
+<script>
+function toggle_div(nm) {
+var w = document.getElementById(nm);
+if( w.style.display=="block" ){
+w.style.display = "none";
+}else{
+w.style.display = "block";
+}
+}
+function toggle_search() {
+var w = document.getElementById("searchmenu");
+if( w.style.display=="block" ){
+w.style.display = "none";
+} else {
+w.style.display = "block";
+setTimeout(function(){
+document.getElementById("searchbox").focus()
+}, 30);
+}
+}
+function div_off(nm){document.getElementById(nm).style.display="none";}
+window.onbeforeunload = function(e){div_off("submenu");}
+/* Disable the Search feature if we are not operating from CGI, since */
+/* Search is accomplished using CGI and will not work without it. */
+if( !location.origin || !location.origin.match || !location.origin.match(/http/) ){
+document.getElementById("search_menubutton").style.display = "none";
+}
+/* Used by the Hide/Show button beside syntax diagrams, to toggle the */
+function hideorshow(btn,obj){
+var x = document.getElementById(obj);
+var b = document.getElementById(btn);
+if( x.style.display!='none' ){
+x.style.display = 'none';
+b.innerHTML='show';
+}else{
+x.style.display = '';
+b.innerHTML='hide';
+}
+return false;
+}
+var antiRobot = 0;
+function antiRobotGo(){
+if( antiRobot!=3 ) return;
+antiRobot = 7;
+var j = document.getElementById("mtimelink");
+if(j && j.hasAttribute("data-href")) j.href=j.getAttribute("data-href");
+}
+function antiRobotDefense(){
+document.body.onmousedown=function(){
+antiRobot |= 2;
+antiRobotGo();
+document.body.onmousedown=null;
+}
+document.body.onmousemove=function(){
+antiRobot |= 2;
+antiRobotGo();
+document.body.onmousemove=null;
+}
+setTimeout(function(){
+antiRobot |= 1;
+antiRobotGo();
+}, 100)
+antiRobotGo();
+}
+antiRobotDefense();
+</script>
+<div class=fancy>
+<div class=nosearch>
+<div class="fancy_title">
+The SQLite Query Optimizer Overview
+</div>
+<div class="fancy_toc">
+<a onclick="toggle_toc()">
+<span class="fancy_toc_mark" id="toc_mk">&#x25ba;</span>
+Table Of Contents
+</a>
+<div id="toc_sub"><div class="fancy-toc1"><a href="#introduction">1. Introduction</a></div>
+<div class="fancy-toc1"><a href="#where_clause_analysis">2. WHERE Clause Analysis</a></div>
+<div class="fancy-toc2"><a href="#index_term_usage_examples">2.1. Index Term Usage Examples</a></div>
+<div class="fancy-toc1"><a href="#the_between_optimization">3. The BETWEEN Optimization</a></div>
+<div class="fancy-toc1"><a href="#or_optimizations">4. OR Optimizations</a></div>
+<div class="fancy-toc1"><a href="#the_like_optimization">5. The LIKE Optimization</a></div>
+<div class="fancy-toc1"><a href="#the_skip_scan_optimization">6. The Skip-Scan Optimization</a></div>
+<div class="fancy-toc1"><a href="#joins">7. Joins</a></div>
+<div class="fancy-toc2"><a href="#order_of_tables_in_a_join">7.1. Order of Tables in a Join</a></div>
+<div class="fancy-toc2"><a href="#manual_control_of_query_plans_using_sqlite_stat_tables">7.2. Manual Control Of Query Plans Using SQLITE_STAT Tables</a></div>
+<div class="fancy-toc2"><a href="#manual_control_of_query_plans_using_cross_join">7.3. Manual Control of Query Plans using CROSS JOIN</a></div>
+<div class="fancy-toc1"><a href="#choosing_between_multiple_indexes">8. Choosing Between Multiple Indexes</a></div>
+<div class="fancy-toc2"><a href="#disqualifying_where_clause_terms_using_unary_">8.1. Disqualifying WHERE Clause Terms using Unary-"+"</a></div>
+<div class="fancy-toc2"><a href="#range_queries">8.2. Range Queries</a></div>
+<div class="fancy-toc1"><a href="#covering_indexes">9. Covering Indexes</a></div>
+<div class="fancy-toc1"><a href="#order_by_optimizations">10. ORDER BY Optimizations</a></div>
+<div class="fancy-toc2"><a href="#partial_order_by_via_index">10.1. Partial ORDER BY via Index</a></div>
+<div class="fancy-toc1"><a href="#subquery_flattening">11. Subquery Flattening</a></div>
+<div class="fancy-toc1"><a href="#subquery_co_routines">12. Subquery Co-routines</a></div>
+<div class="fancy-toc2"><a href="#using_co_routines_to_defer_work_until_after_the_sorting">12.1. Using Co-routines to Defer Work until after the Sorting</a></div>
+<div class="fancy-toc1"><a href="#the_min_max_optimization">13. The MIN/MAX Optimization</a></div>
+<div class="fancy-toc1"><a href="#automatic_indexes">14. Automatic Indexes</a></div>
+<div class="fancy-toc2"><a href="#hash_joins">14.1. Hash Joins</a></div>
+<div class="fancy-toc1"><a href="#the_push_down_optimization">15. The Push-Down Optimization</a></div>
+<div class="fancy-toc1"><a href="#the_left_join_strength_reduction_optimization">16. The LEFT JOIN Strength Reduction Optimization</a></div>
+<div class="fancy-toc1"><a href="#the_omit_left_join_optimization">17. The Omit LEFT JOIN Optimization</a></div>
+<div class="fancy-toc1"><a href="#the_constant_propagation_optimization">18. The Constant Propagation Optimization</a></div>
+</div>
+</div>
+<script>
+function toggle_toc(){
+var sub = document.getElementById("toc_sub")
+var mk = document.getElementById("toc_mk")
+if( sub.style.display!="block" ){
+sub.style.display = "block";
+mk.innerHTML = "&#x25bc;";
+} else {
+sub.style.display = "none";
+mk.innerHTML = "&#x25ba;";
+}
+}
+</script>
+</div>
+
+
+
+
+
+<h1 id="introduction"><span>1. </span>Introduction</h1>
+<p>
+ This document provides an overview of how the query planner and optimizer
+ for SQLite works.
+
+
+</p><p>
+ Given a single SQL statement, there might be dozens, hundreds, or even
+ thousands of ways to implement that statement, depending on the complexity
+ of the statement itself and of the underlying database schema. The
+ task of the query planner is to select the algorithm that minimizes
+ disk I/O and CPU overhead.
+
+
+</p><p>
+ Additional background information is available in the
+ <a href="queryplanner.html">indexing tutorial</a> document.
+
+
+</p><p>
+ With release 3.8.0 (2013-08-26),
+ the SQLite query planner was reimplemented as the
+ <a href="queryplanner-ng.html">Next Generation Query Planner</a> or "NGQP". All of the features, techniques,
+ and algorithms described in this document are applicable to both the
+ pre-3.8.0 legacy query planner and to the NGQP. For further information on
+ how the NGQP differs from the legacy query planner, see the
+ <a href="queryplanner-ng.html">detailed description of the NGQP</a>.
+
+
+
+
+<a name="where_clause"></a>
+
+</p><h1 id="where_clause_analysis"><span>2. </span>WHERE Clause Analysis</h1>
+
+<p>
+ The WHERE clause on a query is broken up into "terms" where each term
+ is separated from the others by an AND operator.
+ If the WHERE clause is composed of constraints separated by the OR
+ operator then the entire clause is considered to be a single "term"
+ to which the <a href="#or_opt">OR-clause optimization</a> is applied.
+
+</p><p>
+ All terms of the WHERE clause are analyzed to see if they can be
+ satisfied using indexes.
+ To be usable by an index a term must usually be of one of the following
+ forms:
+
+<blockquote><pre><b>
+ </b><i>column</i><b> = </b><i>expression</i><b>
+ </b><i>column</i><b> IS </b><i>expression</i><b>
+ </b><i>column</i><b> &gt; </b><i>expression</i><b>
+ </b><i>column</i><b> &gt;= </b><i>expression</i><b>
+ </b><i>column</i><b> &lt; </b><i>expression</i><b>
+ </b><i>column</i><b> &lt;= </b><i>expression</i><b>
+ </b><i>expression</i><b> = </b><i>column</i><b>
+ </b><i>expression</i><b> &gt; </b><i>column</i><b>
+ </b><i>expression</i><b> &gt;= </b><i>column</i><b>
+ </b><i>expression</i><b> &lt; </b><i>column</i><b>
+ </b><i>expression</i><b> &lt;= </b><i>column</i><b>
+ </b><i>column</i><b> IN (</b><i>expression-list</i><b>)
+ </b><i>column</i><b> IN (</b><i>subquery</i><b>)
+ </b><i>column</i><b> IS NULL
+ </b><i>column</i><b> LIKE </b><i>pattern</i><b>
+ </b><i>column</i><b> GLOB </b><i>pattern</i><b>
+</b></pre></blockquote>
+
+</p><p>
+ If an index is created using a statement like this:
+
+</p><div class="codeblock"><pre>CREATE INDEX idx_ex1 ON ex1(a,b,c,d,e,...,y,z);
+</pre></div>
+
+<p>
+ Then the index might be used if the initial columns of the index
+ (columns a, b, and so forth) appear in WHERE clause terms.
+ The initial columns of the index must be used with
+ the <b>=</b> or <b>IN</b> or <b>IS</b> operators.
+ The right-most column that is used can employ inequalities.
+ For the right-most
+ column of an index that is used, there can be up to two inequalities
+ that must sandwich the allowed values of the column between two extremes.
+
+</p><p>
+ It is not necessary for every column of an index to appear in a
+ WHERE clause term in order for that index to be used.
+ However, there cannot be gaps in the columns of the index that are used.
+ Thus for the example index above, if there is no WHERE clause term
+ that constrains column c, then terms that constrain columns a and b can
+ be used with the index but not terms that constrain columns d through z.
+ Similarly, index columns will not normally be used (for indexing purposes)
+ if they are to the right of a
+ column that is constrained only by inequalities.
+ (See the <a href="optoverview.html#skipscan">skip-scan optimization</a> below for the exception.)
+
+</p><p>
+ In the case of <a href="expridx.html">indexes on expressions</a>, whenever the word "column" is
+ used in the foregoing text, one can substitute "indexed expression"
+ (meaning a copy of the expression that appears in the <a href="lang_createindex.html">CREATE INDEX</a>
+ statement) and everything will work the same.
+
+
+<a name="idxexamp"></a>
+
+</p><h2 id="index_term_usage_examples"><span>2.1. </span>Index Term Usage Examples</h2>
+<p>
+ For the index above and WHERE clause like this:
+
+</p><div class="codeblock"><pre>... WHERE a=5 AND b IN (1,2,3) AND c IS NULL AND d='hello'
+</pre></div>
+<p>
+ The first four columns a, b, c, and d of the index would be usable since
+ those four columns form a prefix of the index and are all bound by
+ equality constraints.
+
+</p><p>
+ For the index above and WHERE clause like this:
+
+</p><div class="codeblock"><pre>... WHERE a=5 AND b IN (1,2,3) AND c>12 AND d='hello'
+</pre></div>
+<p>
+ Only columns a, b, and c of the index would be usable. The d column
+ would not be usable because it occurs to the right of c and c is
+ constrained only by inequalities.
+
+</p><p>
+ For the index above and WHERE clause like this:
+
+</p><div class="codeblock"><pre>... WHERE a=5 AND b IN (1,2,3) AND d='hello'
+</pre></div>
+<p>
+ Only columns a and b of the index would be usable. The d column
+ would not be usable because column c is not constrained and there can
+ be no gaps in the set of columns that usable by the index.
+
+</p><p>
+ For the index above and WHERE clause like this:
+
+</p><div class="codeblock"><pre>... WHERE b IN (1,2,3) AND c NOT NULL AND d='hello'
+</pre></div>
+<p>
+ The index is not usable at all because the left-most column of the
+ index (column "a") is not constrained. Assuming there are no other
+ indexes, the query above would result in a full table scan.
+
+</p><p>
+ For the index above and WHERE clause like this:
+
+</p><div class="codeblock"><pre>... WHERE a=5 OR b IN (1,2,3) OR c NOT NULL OR d='hello'
+</pre></div>
+<p>
+ The index is not usable because the WHERE clause terms are connected
+ by OR instead of AND. This query would result in a full table scan.
+ However, if three additional indexes where added that contained columns
+ b, c, and d as their left-most columns, then the
+ <a href="#or_opt">OR-clause optimization</a> might apply.
+
+
+<a name="between_opt"></a>
+
+</p><h1 id="the_between_optimization"><span>3. </span>The BETWEEN Optimization</h1>
+
+<p>
+ If a term of the WHERE clause is of the following form:
+
+<blockquote><pre><b>
+ </b><i>expr1</i><b> BETWEEN </b><i>expr2</i><b> AND </b><i>expr3</i><b>
+</b></pre></blockquote>
+</p><p>
+ Then two "virtual" terms are added as follows:
+
+<blockquote><pre><b>
+ </b><i>expr1</i><b> &gt;= </b><i>expr2</i><b> AND </b><i>expr1</i><b> &lt;= </b><i>expr3</i><b>
+</b></pre></blockquote>
+</p><p>
+ Virtual terms are used for analysis only and do not cause any byte-code
+ to be generated.
+ If both virtual terms end up being used as constraints on an index,
+ then the original BETWEEN term is omitted and the corresponding test
+ is not performed on input rows.
+ Thus if the BETWEEN term ends up being used as an index constraint
+ no tests are ever performed on that term.
+ On the other hand, the
+ virtual terms themselves never causes tests to be performed on
+ input rows.
+ Thus if the BETWEEN term is not used as an index constraint and
+ instead must be used to test input rows, the <i>expr1</i> expression is
+ only evaluated once.
+
+<a name="or_opt"></a>
+
+</p><h1 id="or_optimizations"><span>4. </span>OR Optimizations</h1>
+
+<p>
+ WHERE clause constraints that are connected by OR instead of AND can
+ be handled in two different ways.
+ If a term consists of multiple subterms containing a common column
+ name and separated by OR, like this:
+
+<blockquote><pre><b>
+ </b><i>column</i><b> = </b><i>expr1</i><b> OR </b><i>column</i><b> = </b><i>expr2</i><b> OR </b><i>column</i><b> = </b><i>expr3</i><b> OR ...
+</b></pre></blockquote>
+</p><p>
+ Then that term is rewritten as follows:
+
+<blockquote><pre><b>
+ </b><i>column</i><b> IN (</b><i>expr1</i><b>,</b><i>expr2</i><b>,</b><i>expr3</i><b>,...)
+</b></pre></blockquote>
+</p><p>
+ The rewritten term then might go on to constrain an index using the
+ normal rules for <b>IN</b> operators. Note that <i>column</i> must be
+ the same column in every OR-connected subterm,
+ although the column can occur on either the left or the right side of
+ the <b>=</b> operator.
+
+</p><p>
+ If and only if the previously described conversion of OR to an IN operator
+ does not work, the second OR-clause optimization is attempted.
+ Suppose the OR clause consists of multiple subterms as follows:
+
+<blockquote><pre><b>
+ </b><i>expr1</i><b> OR </b><i>expr2</i><b> OR </b><i>expr3</i><b>
+</b></pre></blockquote>
+</p><p>
+ Individual subterms might be a single comparison expression like
+ <b>a=5</b> or <b>x&gt;y</b> or they can be
+ LIKE or BETWEEN expressions, or a subterm
+ can be a parenthesized list of AND-connected sub-subterms.
+ Each subterm is analyzed as if it were itself the entire WHERE clause
+ in order to see if the subterm is indexable by itself.
+ If <u>every</u> subterm of an OR clause is separately indexable
+ then the OR clause might be coded such that a separate index is used
+ to evaluate each term of the OR clause. One way to think about how
+ SQLite uses separate indexes for each OR clause term is to imagine
+ that the WHERE clause where rewritten as follows:
+
+<blockquote><pre><b>
+ rowid IN (SELECT rowid FROM </b><i>table</i><b> WHERE </b><i>expr1</i><b>
+ UNION SELECT rowid FROM </b><i>table</i><b> WHERE </b><i>expr2</i><b>
+ UNION SELECT rowid FROM </b><i>table</i><b> WHERE </b><i>expr3</i><b>)
+</b></pre></blockquote>
+</p><p>
+ The rewritten expression above is conceptual; WHERE clauses containing
+ OR are not really rewritten this way.
+ The actual implementation of the OR clause uses a mechanism that is
+ more efficient and that works even for <a href="withoutrowid.html">WITHOUT ROWID</a> tables or
+ tables in which the "rowid" is inaccessible. Nevertheless,
+ the essence of the implementation is captured by the statement
+ above: Separate indexes are used to find candidate result rows
+ from each OR clause term and the final result is the union of
+ those rows.
+
+</p><p>
+ Note that in most cases, SQLite will only use a single index for each
+ table in the FROM clause of a query. The second OR-clause optimization
+ described here is the exception to that rule. With an OR-clause,
+ a different index might be used for each subterm in the OR-clause.
+
+</p><p>
+ For any given query, the fact that the OR-clause optimization described
+ here can be used does not guarantee that it will be used.
+ SQLite uses a cost-based query planner that estimates the CPU and
+ disk I/O costs of various competing query plans and chooses the plan
+ that it thinks will be the fastest. If there are many OR terms in
+ the WHERE clause or if some of the indexes on individual OR-clause
+ subterms are not very selective, then SQLite might decide that it is
+ faster to use a different query algorithm, or even a full-table scan.
+ Application developers can use the
+ <a href="lang_explain.html">EXPLAIN QUERY PLAN</a> prefix on a statement to get a
+ high-level overview of the chosen query strategy.
+
+<a name="like_opt"></a>
+
+</p><h1 id="the_like_optimization"><span>5. </span>The LIKE Optimization</h1>
+
+<p>
+ A WHERE-clause term that uses the <a href="lang_expr.html#like">LIKE</a> or <a href="lang_expr.html#glob">GLOB</a> operator
+ can sometimes be used with an index to do a range search,
+ almost as if the LIKE or GLOB were an alternative to a <a href="lang_expr.html#between">BETWEEN</a>
+ operator.
+ There are many conditions on this optimization:
+
+</p><p>
+ </p><ol>
+ <li>The right-hand side of the LIKE or GLOB must be either a string literal
+ or a <a href="lang_expr.html#varparam">parameter</a> bound to a string literal
+ that does not begin with a wildcard character.</li>
+ <li>It must not be possible to make the LIKE or GLOB operator true by
+ having a numeric value (instead of a string or blob) on the
+ left-hand side. This means that either:
+ <ol type="A">
+ <li> the left-hand side of the LIKE or GLOB operator is the name
+ of an indexed column with <a href="datatype3.html#affinity">TEXT affinity</a>, or</li>
+ <li> the right-hand side pattern argument does not begin with a
+ minus sign ("-") or a digit.</li>
+ </ol>
+ This constraint arises from the fact that numbers do not sort in
+ lexicographical order. For example: 9&lt;10 but '9'&gt;'10'.</li>
+ <li>The built-in functions used to implement LIKE and GLOB must not
+ have been overloaded using the <a href="c3ref/create_function.html">sqlite3_create_function()</a> API.</li>
+ <li>For the GLOB operator, the column must be indexed using the
+ built-in BINARY collating sequence.</li>
+ <li>For the LIKE operator, if <a href="pragma.html#pragma_case_sensitive_like">case_sensitive_like</a> mode is enabled then
+ the column must indexed using BINARY collating sequence, or if
+ <a href="pragma.html#pragma_case_sensitive_like">case_sensitive_like</a> mode is disabled then the column must indexed
+ using built-in NOCASE collating sequence.</li>
+ <li>If the ESCAPE option is used, the ESCAPE character must be ASCII,
+ or a single-byte character in UTF-8.
+ </li></ol>
+
+<p>
+ The LIKE operator has two modes that can be set by a
+ <a href="pragma.html#pragma_case_sensitive_like">pragma</a>. The
+ default mode is for LIKE comparisons to be insensitive to differences
+ of case for latin1 characters. Thus, by default, the following
+ expression is true:
+
+</p><div class="codeblock"><pre>'a' LIKE 'A'
+</pre></div>
+<p>
+ If the case_sensitive_like pragma is enabled as follows:
+
+</p><div class="codeblock"><pre>PRAGMA case_sensitive_like=ON;
+</pre></div>
+<p>
+ Then the LIKE operator pays attention to case and the example above would
+ evaluate to false. Note that case insensitivity only applies to
+ latin1 characters - basically the upper and lower case letters of English
+ in the lower 127 byte codes of ASCII. International character sets
+ are case sensitive in SQLite unless an application-defined
+ <a href="datatype3.html#collation">collating sequence</a> and <a href="lang_corefunc.html#like">like() SQL function</a> are provided that
+ take non-ASCII characters into account.
+ If an application-defined collating sequence and/or like() SQL
+ function are provided, the LIKE optimization described here will never
+ be taken.
+
+</p><p>
+ The LIKE operator is case insensitive by default because this is what
+ the SQL standard requires. You can change the default behavior at
+ compile time by using the <a href="compile.html#case_sensitive_like">SQLITE_CASE_SENSITIVE_LIKE</a> command-line option
+ to the compiler.
+
+</p><p>
+ The LIKE optimization might occur if the column named on the left of the
+ operator is indexed using the built-in BINARY collating sequence and
+ case_sensitive_like is turned on. Or the optimization might occur if
+ the column is indexed using the built-in NOCASE collating sequence and the
+ case_sensitive_like mode is off. These are the only two combinations
+ under which LIKE operators will be optimized.
+
+</p><p>
+ The GLOB operator is always case sensitive. The column on the left side
+ of the GLOB operator must always use the built-in BINARY collating sequence
+ or no attempt will be made to optimize that operator with indexes.
+
+</p><p>
+ The LIKE optimization will only be attempted if
+ the right-hand side of the GLOB or LIKE operator is either
+ literal string or a <a href="lang_expr.html#varparam">parameter</a> that has been <a href="c3ref/bind_blob.html">bound</a>
+ to a string literal. The string literal must not
+ begin with a wildcard; if the right-hand side begins with a wildcard
+ character then this optimization is not attempted. If the right-hand side
+ is a <a href="lang_expr.html#varparam">parameter</a> that is bound to a string, then this optimization is
+ only attempted if the <a href="c3ref/stmt.html">prepared statement</a> containing the expression
+ was compiled with <a href="c3ref/prepare.html">sqlite3_prepare_v2()</a> or <a href="c3ref/prepare.html">sqlite3_prepare16_v2()</a>.
+ The LIKE optimization is not attempted if the
+ right-hand side is a <a href="lang_expr.html#varparam">parameter</a> and the statement was prepared using
+ <a href="c3ref/prepare.html">sqlite3_prepare()</a> or <a href="c3ref/prepare.html">sqlite3_prepare16()</a>.
+
+</p><p>
+ Suppose the initial sequence of non-wildcard characters on the right-hand
+ side of the LIKE or GLOB operator is <i>x</i>. We are using a single
+ character to denote this non-wildcard prefix but the reader should
+ understand that the prefix can consist of more than 1 character.
+ Let <i>y</i> be the smallest string that is the same length as /x/ but which
+ compares greater than <i>x</i>. For example, if <i>x</i> is
+ <tt>'hello'</tt> then
+ <i>y</i> would be <tt>'hellp'</tt>.
+ The LIKE and GLOB optimizations consist of adding two virtual terms
+ like this:
+
+<blockquote><pre><b>
+ </b><i>column</i><b> &gt;= </b><i>x</i><b> AND </b><i>column</i><b> &lt; </b><i>y</i><b>
+</b></pre></blockquote>
+</p><p>
+ Under most circumstances, the original LIKE or GLOB operator is still
+ tested against each input row even if the virtual terms are used to
+ constrain an index. This is because we do not know what additional
+ constraints may be imposed by characters to the right
+ of the <i>x</i> prefix. However, if there is only a single
+ global wildcard to the right of <i>x</i>, then the original LIKE or
+ GLOB test is disabled.
+ In other words, if the pattern is like this:
+
+<blockquote><pre><b>
+ </b><i>column</i><b> LIKE </b><i>x</i><b>%
+ </b><i>column</i><b> GLOB </b><i>x</i><b>*
+</b></pre></blockquote>
+</p><p>
+ then the original LIKE or GLOB tests are disabled when the virtual
+ terms constrain an index because in that case we know that all of the
+ rows selected by the index will pass the LIKE or GLOB test.
+
+</p><p>
+ Note that when the right-hand side of a LIKE or GLOB operator is
+ a <a href="lang_expr.html#varparam">parameter</a> and the statement is prepared using <a href="c3ref/prepare.html">sqlite3_prepare_v2()</a>
+ or <a href="c3ref/prepare.html">sqlite3_prepare16_v2()</a> then the statement is automatically reparsed
+ and recompiled on the first <a href="c3ref/step.html">sqlite3_step()</a> call of each run if the binding
+ to the right-hand side parameter has changed since the previous run.
+ This reparse and recompile is essentially the same action that occurs
+ following a schema change. The recompile is necessary so that the query
+ planner can examine the new value bound to the right-hand side of the
+ LIKE or GLOB operator and determine whether or not to employ the
+ optimization described above.
+
+<a name="skipscan"></a>
+
+</p><h1 id="the_skip_scan_optimization"><span>6. </span>The Skip-Scan Optimization</h1>
+
+<p>
+ The general rule is that indexes are only useful if there are
+ WHERE-clause constraints on the left-most columns of the index.
+ However, in some cases,
+ SQLite is able to use an index even if the first few columns of
+ the index are omitted from the WHERE clause but later columns
+ are included.
+
+
+</p><p>
+ Consider a table such as the following:
+
+</p><div class="codeblock"><pre>CREATE TABLE people(
+ name TEXT PRIMARY KEY,
+ role TEXT NOT NULL,
+ height INT NOT NULL, -- in cm
+ CHECK( role IN ('student','teacher') )
+);
+CREATE INDEX people_idx1 ON people(role, height);
+</pre></div>
+
+<p>
+ The people table has one entry for each person in a large
+ organization. Each person is either a "student" or a "teacher",
+ as determined by the "role" field. The table also records the height in
+ centimeters of each person. The role and height are indexed.
+ Notice that the left-most column of the index is not very
+ selective - it only contains two possible values.
+
+
+</p><p>
+ Now consider a query to find the names of everyone in the
+ organization that is 180cm tall or taller:
+
+
+</p><div class="codeblock"><pre>SELECT name FROM people WHERE height>=180;
+</pre></div>
+
+<p>
+ Because the left-most column of the index does not appear in the
+ WHERE clause of the query, one is tempted to conclude that the
+ index is not usable here. However, SQLite is able to use the index.
+ Conceptually, SQLite uses the index as if the query were more
+ like the following:
+
+
+</p><div class="codeblock"><pre>SELECT name FROM people
+ WHERE role IN (SELECT DISTINCT role FROM people)
+ AND height>=180;
+</pre></div>
+
+<p>
+ Or this:
+
+</p><div class="codeblock"><pre>SELECT name FROM people WHERE role='teacher' AND height>=180
+UNION ALL
+SELECT name FROM people WHERE role='student' AND height>=180;
+</pre></div>
+
+<p>
+ The alternative query formulations shown above are conceptual only.
+ SQLite does not really transform the query.
+ The actual query plan is like this:
+ SQLite locates the first possible value for "role", which it
+ can do by rewinding the "people_idx1" index to the beginning and reading
+ the first record. SQLite stores this first "role" value in an
+ internal variable that we will here call "$role". Then SQLite
+ runs a query like: "SELECT name FROM people WHERE role=$role AND height>=180".
+ This query has an equality constraint on the left-most column of the
+ index and so the index can be used to resolve that query. Once
+ that query is finished, SQLite then uses the "people_idx1" index to
+ locate the next value of the "role" column, using code that is logically
+ similar to "SELECT role FROM people WHERE role>$role LIMIT 1".
+ This new "role" value overwrites the $role variable, and the process
+ repeats until all possible values for "role" have been examined.
+
+</p><p>
+ We call this kind of index usage a "skip-scan" because the database
+ engine is basically doing a full scan of the index but it optimizes the
+ scan (making it less than "full") by occasionally skipping ahead to the
+ next candidate value.
+
+</p><p>
+ SQLite might use a skip-scan on an index if it knows that the first
+ one or more columns contain many duplication values.
+ If there are too few duplicates
+ in the left-most columns of the index, then it would
+ be faster to simply step ahead to the next value, and thus do
+ a full table scan, than to do a binary search on an index to locate
+ the next left-column value.
+
+</p><p>
+ The only way that SQLite can know that there are many duplicates
+ in the left-most columns of an index
+ is if the <a href="lang_analyze.html">ANALYZE</a> command has been run
+ on the database.
+ Without the results of ANALYZE, SQLite has to guess at the "shape" of
+ the data in the table, and the default guess is that there are an average
+ of 10 duplicates for every value in the left-most column of the index.
+ Skip-scan only becomes profitable (it only gets to be faster than
+ a full table scan) when the number of duplicates is about 18 or more.
+ Hence, a skip-scan is never used on a database that has not been analyzed.
+
+<a name="joins"></a>
+
+</p><h1 id="joins"><span>7. </span>Joins</h1>
+
+<p>
+ The ON and USING clauses of an inner join are converted into additional
+ terms of the WHERE clause prior to WHERE clause analysis described
+ <a href="#where_clause">above in paragraph 2.0</a>.
+ Thus with SQLite, there is no computational
+ advantage to use the newer SQL92 join syntax
+ over the older SQL89 comma-join syntax. They both end up accomplishing
+ exactly the same thing on inner joins.
+
+</p><p>
+ For an OUTER JOIN the situation is more complex. The following
+ two queries are not equivalent:
+
+</p><div class="codeblock"><pre>SELECT * FROM tab1 LEFT JOIN tab2 ON tab1.x=tab2.y;
+SELECT * FROM tab1 LEFT JOIN tab2 WHERE tab1.x=tab2.y;
+</pre></div>
+<p>
+ For an inner join, the two queries above would be identical. However,
+ special processing applies to the ON and USING clauses of an OUTER join:
+ specifically, the constraints in an ON or USING clause do not apply if
+ the right table of the join is on a null row, but the constraints do apply
+ in the WHERE clause. The net effect is that putting the ON or USING
+ clause expressions for a LEFT JOIN in the WHERE clause effectively converts
+ the query to an
+ ordinary INNER JOIN - albeit an inner join that runs more slowly.
+
+<a name="table_order"></a>
+
+</p><h2 id="order_of_tables_in_a_join"><span>7.1. </span>Order of Tables in a Join</h2>
+
+<p>
+ The current implementation of
+ SQLite uses only loop joins. That is to say, joins are implemented as
+ nested loops.
+
+</p><p>
+ The default order of the nested loops in a join is for the left-most
+ table in the FROM clause to form the outer loop and the right-most
+ table to form the inner loop.
+ However, SQLite will nest the loops in a different order if doing so
+ will help it to select better indexes.
+
+</p><p>
+ Inner joins can be freely reordered. However a left outer join is
+ neither commutative nor associative and hence will not be reordered.
+ Inner joins to the left and right of the outer join might be reordered
+ if the optimizer thinks that is advantageous but the outer joins are
+ always evaluated in the order in which they occur.
+
+</p><p>
+ SQLite <a href="lang_select.html#crossjoin">treats the CROSS JOIN operator specially</a>.
+ The CROSS JOIN operator is commutative, in theory. However, SQLite chooses to
+ never reorder tables in a CROSS JOIN. This provides a mechanism
+ by which the programmer can force SQLite to choose a particular loop nesting
+ order.
+
+</p><p>
+ When selecting the order of tables in a join, SQLite uses an efficient
+ polynomial-time algorithm. Because of this,
+ SQLite is able to plan queries with 50- or 60-way joins in a matter of
+ microseconds
+
+</p><p>
+ Join reordering is automatic and usually works well enough that
+ programmers do not have to think about it, especially if <a href="lang_analyze.html">ANALYZE</a>
+ has been used to gather statistics about the available indexes,
+ though occasionally some hints from the programmer are needed.
+ Consider, for example, the following schema:
+
+</p><div class="codeblock"><pre>CREATE TABLE node(
+ id INTEGER PRIMARY KEY,
+ name TEXT
+);
+CREATE INDEX node_idx ON node(name);
+CREATE TABLE edge(
+ orig INTEGER REFERENCES node,
+ dest INTEGER REFERENCES node,
+ PRIMARY KEY(orig, dest)
+);
+CREATE INDEX edge_idx ON edge(dest,orig);
+</pre></div>
+<p>
+ The schema above defines a directed graph with the ability to store a
+ name at each node. Now consider a query against this schema:
+
+</p><div class="codeblock"><pre>SELECT *
+ FROM edge AS e,
+ node AS n1,
+ node AS n2
+ WHERE n1.name = 'alice'
+ AND n2.name = 'bob'
+ AND e.orig = n1.id
+ AND e.dest = n2.id;
+</pre></div>
+<p>
+ This query asks for is all information about edges that go from
+ nodes labeled "alice" to nodes labeled "bob".
+ The query optimizer in SQLite has basically two choices on how to
+ implement this query. (There are actually six different choices, but
+ we will only consider two of them here.)
+ Pseudocode below demonstrating these two choices.
+
+<a name="option1"></a>
+
+</p><p>Option 1:
+</p><div class="codeblock"><pre>foreach n1 where n1.name='alice' do:
+ foreach n2 where n2.name='bob' do:
+ foreach e where e.orig=n1.id and e.dest=n2.id
+ return n1.*, n2.*, e.*
+ end
+ end
+end
+</pre></div>
+<a name="option2"></a>
+
+<p>Option 2:
+</p><div class="codeblock"><pre>foreach n1 where n1.name='alice' do:
+ foreach e where e.orig=n1.id do:
+ foreach n2 where n2.id=e.dest and n2.name='bob' do:
+ return n1.*, n2.*, e.*
+ end
+ end
+end
+</pre></div>
+<p>
+ The same indexes are used to speed up every loop in both implementation
+ options.
+ The only difference in these two query plans is the order in which
+ the loops are nested.
+
+</p><p>
+ So which query plan is better? It turns out that the answer depends on
+ what kind of data is found in the node and edge tables.
+
+</p><p>
+ Let the number of alice nodes be M and the number of bob nodes be N.
+ Consider two scenarios. In the first scenario, M and N are both 2 but
+ there are thousands of edges on each node. In this case, option 1 is
+ preferred. With option 1, the inner loop checks for the existence of
+ an edge between a pair of nodes and outputs the result if found.
+ Because there are only 2 alice and bob nodes each, the inner loop
+ only has to run four times and the query is very quick. Option 2 would
+ take much longer here. The outer loop of option 2 only executes twice,
+ but because there are a large number of edges leaving each alice node,
+ the middle loop has to iterate many thousands of times. It will be
+ much slower. So in the first scenario, we prefer to use option 1.
+
+</p><p>
+ Now consider the case where M and N are both 3500. Alice nodes are
+ abundant. This time suppose each of these nodes is connected by only one
+ or two edges. Now option 2 is preferred. With option 2,
+ the outer loop still has to run 3500 times, but the middle loop only
+ runs once or twice for each outer loop and the inner loop will only
+ run once for each middle loop, if at all. So the total number of
+ iterations of the inner loop is around 7000. Option 1, on the other
+ hand, has to run both its outer loop and its middle loop 3500 times
+ each, resulting in 12 million iterations of the middle loop.
+ Thus in the second scenario, option 2 is nearly 2000 times faster
+ than option 1.
+
+</p><p>
+ So you can see that depending on how the data is structured in the table,
+ either query plan 1 or query plan 2 might be better. Which plan does
+ SQLite choose by default? As of version 3.6.18, without running <a href="lang_analyze.html">ANALYZE</a>,
+ SQLite will choose option 2.
+ If the <a href="lang_analyze.html">ANALYZE</a> command is run in order to gather statistics,
+ a different choice might be made if the statistics indicate that the
+ alternative is likely to run faster.
+
+<a name="manctrl"></a>
+
+</p><h2 id="manual_control_of_query_plans_using_sqlite_stat_tables"><span>7.2. </span>Manual Control Of Query Plans Using SQLITE_STAT Tables</h2>
+
+<p>
+ SQLite provides the ability for advanced programmers to exercise control
+ over the query plan chosen by the optimizer. One method for doing this
+ is to fudge the <a href="lang_analyze.html">ANALYZE</a> results in the <a href="fileformat2.html#stat1tab">sqlite_stat1</a>,
+ <a href="fileformat2.html#stat3tab">sqlite_stat3</a>, and/or <a href="fileformat2.html#stat4tab">sqlite_stat4</a> tables. This is not
+ recommended for most situations.
+
+<a name="crossjoin"></a>
+
+</p><h2 id="manual_control_of_query_plans_using_cross_join"><span>7.3. </span>Manual Control of Query Plans using CROSS JOIN</h2>
+
+<p>
+ Programmers can force SQLite to use a particular loop nesting order
+ for a join by using the CROSS JOIN operator instead of just JOIN,
+ INNER JOIN, NATURAL JOIN, or a "," join. Though CROSS JOINs are
+ commutative in theory, SQLite chooses to never reorder the tables in
+ a CROSS JOIN. Hence, the left table of a CROSS JOIN will always be
+ in an outer loop relative to the right table.
+
+</p><p>
+ In the following query, the optimizer is free to reorder the
+ tables of FROM clause any way it sees fit:
+
+</p><div class="codeblock"><pre>SELECT *
+ FROM node AS n1,
+ edge AS e,
+ node AS n2
+ WHERE n1.name = 'alice'
+ AND n2.name = 'bob'
+ AND e.orig = n1.id
+ AND e.dest = n2.id;
+</pre></div>
+<p>
+ In the following logically equivalent formulation of the same query,
+ the substitution of "CROSS JOIN" for the "," means that the order
+ of tables must be N1, E, N2.
+
+</p><div class="codeblock"><pre>SELECT *
+ FROM node AS n1 CROSS JOIN
+ edge AS e CROSS JOIN
+ node AS n2
+ WHERE n1.name = 'alice'
+ AND n2.name = 'bob'
+ AND e.orig = n1.id
+ AND e.dest = n2.id;
+</pre></div>
+<p>
+ In the latter query, the query plan must be
+ <a href="#option2">option 2</a>. Note that
+ you must use the keyword "CROSS" in order to disable the table reordering
+ optimization; INNER JOIN, NATURAL JOIN, JOIN, and other similar
+ combinations work just like a comma join in that the optimizer is
+ free to reorder tables as it sees fit. (Table reordering is also
+ disabled on an outer join, but that is because outer joins are not
+ associative or commutative. Reordering tables in OUTER JOIN changes
+ the result.)
+
+</p><p>
+ See "<a href="queryplanner-ng.html#fossilcasestudy">The Fossil NGQP Upgrade Case Study</a>" for another real-world example
+ of using CROSS JOIN to manually control the nesting order of a join.
+ The <a href="queryplanner-ng.html#howtofix">query planner checklist</a> found later in the same document provides
+ further guidance on manual control of the query planner.
+
+<a name="multi_index"></a>
+
+</p><h1 id="choosing_between_multiple_indexes"><span>8. </span>Choosing Between Multiple Indexes</h1>
+
+<p>
+ Each table in the FROM clause of a query can use at most one index
+ (except when the <a href="#or_opt">OR-clause optimization</a> comes into
+ play)
+ and SQLite strives to use at least one index on each table. Sometimes,
+ two or more indexes might be candidates for use on a single table.
+ For example:
+
+</p><div class="codeblock"><pre>CREATE TABLE ex2(x,y,z);
+CREATE INDEX ex2i1 ON ex2(x);
+CREATE INDEX ex2i2 ON ex2(y);
+SELECT z FROM ex2 WHERE x=5 AND y=6;
+</pre></div>
+<p>
+ For the SELECT statement above, the optimizer can use the ex2i1 index
+ to lookup rows of ex2 that contain x=5 and then test each row against
+ the y=6 term. Or it can use the ex2i2 index to lookup rows
+ of ex2 that contain y=6 then test each of those rows against the
+ x=5 term.
+
+</p><p>
+ When faced with a choice of two or more indexes, SQLite tries to estimate
+ the total amount of work needed to perform the query using each option.
+ It then selects the option that gives the least estimated work.
+
+</p><p>
+ To help the optimizer get a more accurate estimate of the work involved
+ in using various indexes, the user may optionally run the <a href="lang_analyze.html">ANALYZE</a> command.
+ The <a href="lang_analyze.html">ANALYZE</a> command scans all indexes of database where there might
+ be a choice between two or more indexes and gathers statistics on the
+ selectiveness of those indexes. The statistics gathered by
+ this scan are stored in special database tables names shows names all
+ begin with "<b>sqlite_stat</b>".
+ The content of these tables is not updated as the database
+ changes so after making significant changes it might be prudent to
+ rerun <a href="lang_analyze.html">ANALYZE</a>.
+ The results of an ANALYZE command are only available to database connections
+ that are opened after the ANALYZE command completes.
+
+</p><p>
+ The various <b>sqlite_stat</b><i>N</i> tables contain information on how
+ selective the various indexes are. For example, the <a href="fileformat2.html#stat1tab">sqlite_stat1</a>
+ table might indicate that an equality constraint on column x reduces the
+ search space to 10 rows on average, whereas an equality constraint on
+ column y reduces the search space to 3 rows on average. In that case,
+ SQLite would prefer to use index ex2i2 since that index is more selective.
+
+<a name="uplus"></a>
+
+</p><h2 id="disqualifying_where_clause_terms_using_unary_"><span>8.1. </span>Disqualifying WHERE Clause Terms using Unary-"+"</h2>
+
+<p>
+ Terms of the WHERE clause can be manually disqualified for use with
+ indexes by prepending a unary <b>+</b> operator to the column name. The
+ unary <b>+</b> is a no-op and will not generate any byte code in the prepared
+ statement.
+ However, the unary <b>+</b> operator will prevent the term from
+ constraining an index.
+ So, in the example above, if the query were rewritten as:
+
+</p><div class="codeblock"><pre>SELECT z FROM ex2 WHERE +x=5 AND y=6;
+</pre></div>
+<p>
+ The <b>+</b> operator on the <b>x</b> column will prevent that term from
+ constraining an index. This would force the use of the ex2i2 index.
+
+</p><p>
+ Note that the unary <b>+</b> operator also removes
+ <a href="datatype3.html#affinity">type affinity</a> from
+ an expression, and in some cases this can cause subtle changes in
+ the meaning of an expression.
+ In the example above,
+ if column <b>x</b> has <a href="datatype3.html#affinity">TEXT affinity</a>
+ then the comparison "x=5" will be done as text. The <b>+</b> operator
+ removes the affinity. So the comparison "<b>+x=5</b>" will compare the text
+ in column <b>x</b> with the numeric value 5 and will always be false.
+
+<a name="rangequery"></a>
+
+</p><h2 id="range_queries"><span>8.2. </span>Range Queries</h2>
+
+<p>
+ Consider a slightly different scenario:
+
+</p><div class="codeblock"><pre>CREATE TABLE ex2(x,y,z);
+CREATE INDEX ex2i1 ON ex2(x);
+CREATE INDEX ex2i2 ON ex2(y);
+SELECT z FROM ex2 WHERE x BETWEEN 1 AND 100 AND y BETWEEN 1 AND 100;
+</pre></div>
+<p>
+ Further suppose that column x contains values spread out
+ between 0 and 1,000,000 and column y contains values
+ that span between 0 and 1,000. In that scenario,
+ the range constraint on column x should reduce the search space by
+ a factor of 10,000 whereas the range constraint on column y should
+ reduce the search space by a factor of only 10. So the ex2i1 index
+ should be preferred.
+
+</p><p>
+ SQLite will make this determination, but only if it has been compiled
+ with <a href="compile.html#enable_stat3">SQLITE_ENABLE_STAT3</a> or <a href="compile.html#enable_stat4">SQLITE_ENABLE_STAT4</a>.
+ The <a href="compile.html#enable_stat3">SQLITE_ENABLE_STAT3</a> and <a href="compile.html#enable_stat4">SQLITE_ENABLE_STAT4</a> options causes
+ the <a href="lang_analyze.html">ANALYZE</a> command to collect a histogram of column content in the
+ <a href="fileformat2.html#stat3tab">sqlite_stat3</a> or <a href="fileformat2.html#stat4tab">sqlite_stat4</a> tables and to use this histogram to
+ make a better guess at the best query to use for range constraints
+ such as the above. The main difference between STAT3 and STAT4 is
+ that STAT3 records histogram data for only the left-most column of
+ an index whereas STAT4 records histogram data for all columns of an
+ index. For single-column indexes, STAT3 and STAT4 work the same.
+
+</p><p>
+ The histogram data is only useful if the right-hand side of the constraint
+ is a simple compile-time constant or <a href="lang_expr.html#varparam">parameter</a> and not an expression.
+
+</p><p>
+ Another limitation of the histogram data is that it only applies to the
+ left-most column on an index. Consider this scenario:
+
+</p><div class="codeblock"><pre>CREATE TABLE ex3(w,x,y,z);
+CREATE INDEX ex3i1 ON ex2(w, x);
+CREATE INDEX ex3i2 ON ex2(w, y);
+SELECT z FROM ex3 WHERE w=5 AND x BETWEEN 1 AND 100 AND y BETWEEN 1 AND 100;
+</pre></div>
+<p>
+ Here the inequalities are on columns x and y which are not the
+ left-most index columns. Hence, the histogram data which is collected no
+ left-most column of indexes is useless in helping to choose between the
+ range constraints on columns x and y.
+
+<a name="covidx"></a>
+
+</p><h1 id="covering_indexes"><span>9. </span>Covering Indexes</h1>
+
+<p>
+ When doing an indexed lookup of a row, the usual procedure is to
+ do a binary search on the index to find the index entry, then extract
+ the <a href="lang_createtable.html#rowid">rowid</a> from the index and use that <a href="lang_createtable.html#rowid">rowid</a> to do a binary search on
+ the original table. Thus a typical indexed lookup involves two
+ binary searches.
+ If, however, all columns that were to be fetched from the table are
+ already available in the index itself, SQLite will use the values
+ contained in the index and will never look up the original table
+ row. This saves one binary search for each row and can make many
+ queries run twice as fast.
+
+</p><p>
+ When an index contains all of the data needed for a query and when the
+ original table never needs to be consulted, we call that index a
+ "covering index".
+
+<a name="order_by"></a>
+
+</p><h1 id="order_by_optimizations"><span>10. </span>ORDER BY Optimizations</h1>
+
+<p>
+ SQLite attempts to use an index to satisfy the ORDER BY clause of a
+ query when possible.
+ When faced with the choice of using an index to satisfy WHERE clause
+ constraints or satisfying an ORDER BY clause, SQLite does the same
+ cost analysis described above
+ and chooses the index that it believes will result in the fastest answer.
+
+</p><p>
+ SQLite will also attempt to use indexes to help satisfy GROUP BY clauses
+ and the DISTINCT keyword. If the nested loops of the join can be arranged
+ such that rows that are equivalent for the GROUP BY or for the DISTINCT are
+ consecutive, then the GROUP BY or DISTINCT logic can determine if the
+ current row is part of the same group or if the current row is distinct
+ simply by comparing the current row to the previous row.
+ This can be much faster than the alternative of comparing each row to
+ all prior rows.
+
+<a name="partsort"></a>
+
+</p><h2 id="partial_order_by_via_index"><span>10.1. </span>Partial ORDER BY via Index</h2>
+
+<p>
+ If a query contains an ORDER BY clause with multiple terms, it might
+ be that SQLite can use indexes to cause rows to come out in the order
+ of some prefix of the terms in the ORDER BY but that later terms in
+ the ORDER BY are not satisfied. In that case, SQLite does block sorting.
+ Suppose the ORDER BY clause has four terms and the natural order of the
+ query results in rows appearing in order of the first two terms. As
+ each row is output by the query engine and enters the sorter, the
+ outputs in the current row corresponding to the first two terms of
+ the ORDER BY are compared against the previous row. If they have
+ changed, the current sort is finished and output and a new sort is
+ started. This results in a slightly faster sort. Event bigger
+ advantages are that many fewer rows need to be held in memory,
+ reducing memory requirements, and outputs can begin to appear before
+ the core query has run to completion.
+
+<a name="flattening"></a>
+
+</p><h1 id="subquery_flattening"><span>11. </span>Subquery Flattening</h1>
+
+<p>
+ When a subquery occurs in the FROM clause of a SELECT, the simplest
+ behavior is to evaluate the subquery into a transient table, then run
+ the outer SELECT against the transient table. Such a plan
+ can be suboptimal since the transient table will not have any indexes
+ and the outer query (which is likely a join) will be forced to do a
+ full table scan on the transient table.
+
+</p><p>
+ To overcome this problem, SQLite attempts to flatten subqueries in
+ the FROM clause of a SELECT.
+ This involves inserting the FROM clause of the subquery into the
+ FROM clause of the outer query and rewriting expressions in
+ the outer query that refer to the result set of the subquery.
+ For example:
+
+</p><div class="codeblock"><pre>SELECT t1.a, t2.b FROM t2, (SELECT x+y AS a FROM t1 WHERE z&lt;100) WHERE a&gt;5
+</pre></div>
+<p>
+ Would be rewritten using query flattening as:
+
+</p><div class="codeblock"><pre>SELECT t1.x+t1.y AS a, t2.b FROM t2, t1 WHERE z&lt;100 AND a&gt;5
+</pre></div>
+<p>
+ There is a long list of conditions that must all be met in order for
+ query flattening to occur. Some of the constraints are marked as
+ obsolete by italic text. These extra constraints are retained in the
+ documentation to preserve the numbering of the other constraints.
+
+</p><p>
+ Casual readers are not expected to understand all of these rules.
+ A key take-away from this section is that the rules for determining
+ if query flatting is safe or unsafe are subtle and
+ complex. There have been multiple bugs over the years caused by
+ over-aggressive query flattening. On the other hand, performance
+ of complex queries and/or queries involving views tends to suffer
+ if query flattening is more conservative.
+
+</p><p>
+ </p><ol>
+ <li value="1"> <i>(Obsolete. Query flattening is no longer
+ attempted for aggregate subqueries.)</i>
+
+ </li><li value="2"> <i>(Obsolete. Query flattening is no longer
+ attempted for aggregate subqueries.)</i>
+
+ </li><li value="3">
+ If the subquery is the right operand of a LEFT JOIN then
+ <ol type="a"><li> the subquery may not be a join, and
+ </li><li> the FROM clause of the subquery may
+ not contain a virtual table, and
+ </li><li> the outer query may not be an aggregate.</li></ol></li>
+
+ <li value="4"> The subquery is not DISTINCT.
+
+ </li><li value="5"> <i>(Subsumed into constraint 4)</i>
+
+ </li><li value="6"> <i>(Obsolete. Query flattening is no longer
+ attempted for aggregate subqueries.)</i>
+
+ </li><li value="7">
+ The subquery has a FROM clause.
+
+ </li><li value="8">
+ The subquery does not use LIMIT or the outer query is not a join.
+
+ </li><li value="9">
+ The subquery does not use LIMIT or the outer query does not use
+ aggregates.
+
+ </li><li value="10"> <i>(Restriction relaxed in 2005)</i>
+
+ </li><li value="11">
+ The subquery and the outer query do not both have ORDER BY clauses.
+
+ </li><li value="12"> <i>(Subsumed into constraint 3)</i>
+
+ </li><li value="13"> The subquery and outer query do not both use LIMIT.
+
+ </li><li value="14"> The subquery does not use OFFSET.
+
+ </li><li value="15">
+ If the outer query is part of a compound select, then the
+ subquery may not have a LIMIT clause.
+
+ </li><li value="16">
+ If the outer query is an aggregate, then the subquery may
+ not contain ORDER BY.
+
+ </li><li value="17">
+ If the sub-query is a compound SELECT, then
+ <ol type="a">
+ <li> all compound operators must be UNION ALL, and
+ </li><li> no terms with the subquery compound may be aggregate
+ or DISTINCT, and
+ </li><li> every term within the subquery must have a FROM clause, and
+ </li><li> the outer query may not be an aggregate, DISTINCT query, or join.
+ </li></ol>
+
+ The parent and sub-query may contain WHERE clauses. Subject to
+ rules (11), (12) and (13), they may also contain ORDER BY,
+ LIMIT and OFFSET clauses.
+
+ </li><li value="18">
+ If the sub-query is a compound select, then all terms of the
+ ORDER by clause of the parent must be simple references to
+ columns of the sub-query.
+
+ </li><li value="19">
+ If the subquery uses LIMIT then the outer query may not
+ have a WHERE clause.
+
+ </li><li value="20">
+ If the sub-query is a compound select, then it must not use
+ an ORDER BY clause.
+
+ </li><li value="21">
+ If the subquery uses LIMIT, then the outer query may not be
+ DISTINCT.
+
+ </li><li value="22"> The subquery may not be a recursive CTE.
+
+ </li><li value="23"> <i>(Subsumed into constraint 17d.)</i>
+
+ </li><li value="24"> <i>(Obsolete. Query flattening is no longer
+ attempted for aggregate subqueries.)</i>
+ </li></ol>
+
+
+<p>
+ Query flattening is an important optimization when views are used as
+ each use of a view is translated into a subquery.
+
+<a name="coroutines"></a>
+
+</p><h1 id="subquery_co_routines"><span>12. </span>Subquery Co-routines</h1>
+
+<p>
+ Prior to SQLite 3.7.15 (2012-12-12),
+ a subquery in the FROM clause would be
+ either flattened into the outer query, or else the subquery would be run
+ to completion
+ before the outer query started, the result set from the subquery
+ would be stored in a transient table,
+ and then the transient table would be used in the outer query. Newer
+ versions of SQLite have a third option, which is to implement the subquery
+ using a co-routine.
+
+</p><p>
+ A co-routine is like a subroutine in that it runs in the same thread
+ as the caller and eventually returns control back to the caller. The
+ difference is that a co-routine also has the ability to return
+ before it has finished, and then resume where it left off the next
+ time it is called.
+
+</p><p>
+ When a subquery is implemented as a co-routine, byte-code is generated
+ to implement the subquery as if it were a standalone query, except
+ instead of returning rows of results back to the application, the
+ co-routine yields control back to the caller after each row is computed.
+ The caller can then use that one computed row as part of its computation,
+ then invoke the co-routine again when it is ready for the next row.
+
+</p><p>
+ Co-routines are better than storing the complete result set of the subquery
+ in a transient table because co-routines use less memory. With a co-routine,
+ only a single row of the result needs to be remembered, whereas all rows of
+ the result must be stored for a transient table. Also, because the
+ co-routine does not need to run to completion before the outer query
+ begins its work, the first rows of output can appear much sooner, and if
+ the overall query is aborted, less work is done overall.
+
+</p><p>
+ On the other hand, if the result of the subquery must be scanned multiple
+ times (because, for example, it is just one table in a join) then it
+ is better to use a transient table to remember the entire result of the
+ subquery, in order to avoid computing the subquery more than once.
+
+<a name="deferred_work"></a>
+
+</p><h2 id="using_co_routines_to_defer_work_until_after_the_sorting"><span>12.1. </span>Using Co-routines to Defer Work until after the Sorting</h2>
+
+<p>
+ As of SQLite version 3.21.0 (2017-10-24), the query planner will
+ always prefer to use a co-routine to implement FROM-clause subqueries
+ that contains an ORDER BY clause and that are not part of a join when
+ the result set of the outer query is "complex". This feature allows
+ applications to shift expensive computations from before the
+ sorter until after the sorter, which can result in faster operation.
+ For example, consider this query:
+
+</p><div class="codeblock"><pre>SELECT expensive_function(a) FROM tab ORDER BY date DESC LIMIT 5;
+</pre></div>
+<p>
+ The goal of this query is to compute some value for the five most
+ recent entries in the table. In the query above, the
+ "expensive_function()" is invoked prior to the sort and thus is
+ invoked on every row of the table, even
+ rows that are ultimately omitted due to the LIMIT clause.
+ A co-routine can be used to work around this:
+
+</p><div class="codeblock"><pre>SELECT expensive_function(a) FROM (
+ SELECT a FROM tab ORDER BY date DESC LIMIT 5
+);
+</pre></div>
+<p>
+ In the revised query, the subquery implemented by a co-routine computes
+ the five most recent values for "a". Those five values are passed from the
+ co-routine up into the outer query where the "expensive_function()" is
+ invoked on only the specific rows that the application cares about.
+
+</p><p>
+ The query planner in future versions of SQLite might grow smart enough
+ to make transformations such as the above automatically, in both directions.
+ That is to say, future versions of SQLite might transform queries of the
+ first form into the second, or queries written the second way into the
+ first. As of SQLite version 3.22.0 (2018-01-22), the query planner
+ will flatten the subquery if the outer query does not make use of any
+ user-defined functions or subqueries in its result set. For the examples
+ shown above, however, SQLite implements each of the queries as
+ written.
+
+<a name="minmax"></a>
+
+</p><h1 id="the_min_max_optimization"><span>13. </span>The MIN/MAX Optimization</h1>
+
+<p>
+ Queries that contain a single MIN() or MAX() aggregate function whose
+ argument is the left-most column of an index might be satisfied
+ by doing a single index lookup rather than by scanning the entire table.
+ Examples:
+
+</p><div class="codeblock"><pre>SELECT MIN(x) FROM table;
+SELECT MAX(x)+1 FROM table;
+</pre></div>
+
+<a name="autoindex"></a>
+
+<h1 id="automatic_indexes"><span>14. </span>Automatic Indexes</h1>
+
+<p>
+ When no indexes are available to aid the evaluation of a query, SQLite
+ might create an automatic index that lasts only for the duration
+ of a single SQL statement.
+ Since the cost of constructing the automatic index is
+ O(NlogN) (where N is the number of entries in the table) and the cost of
+ doing a full table scan is only O(N), an automatic index will
+ only be created if SQLite expects that the lookup will be run more than
+ logN times during the course of the SQL statement. Consider an example:
+
+</p><div class="codeblock"><pre>CREATE TABLE t1(a,b);
+CREATE TABLE t2(c,d);
+-- Insert many rows into both t1 and t2
+SELECT * FROM t1, t2 WHERE a=c;
+</pre></div>
+<p>
+ In the query above, if both t1 and t2 have approximately N rows, then
+ without any indexes the query will require O(N*N) time. On the other
+ hand, creating an index on table t2 requires O(NlogN) time and using
+ that index to evaluate the query requires an additional O(NlogN) time.
+ In the absence of <a href="lang_analyze.html">ANALYZE</a> information, SQLite guesses that N is one
+ million and hence it believes that constructing the automatic index will
+ be the cheaper approach.
+
+</p><p>
+ An automatic index might also be used for a subquery:
+
+</p><div class="codeblock"><pre>CREATE TABLE t1(a,b);
+CREATE TABLE t2(c,d);
+-- Insert many rows into both t1 and t2
+SELECT a, (SELECT d FROM t2 WHERE c=b) FROM t1;
+</pre></div>
+<p>
+ In this example, the t2 table is used in a subquery to translate values
+ of the t1.b column. If each table contains N rows, SQLite expects that
+ the subquery will run N times, and hence it will believe it is faster
+ to construct an automatic, transient index on t2 first and then use
+ that index to satisfy the N instances of the subquery.
+
+</p><p>
+ The automatic indexing capability can be disabled at run-time using
+ the <a href="pragma.html#pragma_automatic_index">automatic_index pragma</a>. Automatic indexing is turned on by
+ default, but this can be changed so that automatic indexing is off
+ by default using the <a href="compile.html#default_automatic_index">SQLITE_DEFAULT_AUTOMATIC_INDEX</a> compile-time option.
+ The ability to create automatic indexes can be completely disabled by
+ compiling with the <a href="compile.html#omit_automatic_index">SQLITE_OMIT_AUTOMATIC_INDEX</a> compile-time option.
+
+</p><p>
+ In SQLite <a href="releaselog/3_8_0.html">version 3.8.0</a> (2013-08-26) and later,
+ an <a href="rescode.html#warning_autoindex">SQLITE_WARNING_AUTOINDEX</a> message is sent
+ to the <a href="errlog.html">error log</a> every time a statement is prepared that uses an
+ automatic index. Application developers can and should use these warnings
+ to identify the need for new persistent indexes in the schema.
+
+</p><p>
+ Do not confuse automatic indexes with the <a href="fileformat2.html#intschema">internal indexes</a> (having names
+ like "sqlite_autoindex_<i>table</i>_<i>N</i>") that are sometimes
+ created to implement a <a href="lang_createtable.html#primkeyconst">PRIMARY KEY constraint</a> or <a href="lang_createtable.html#uniqueconst">UNIQUE constraint</a>.
+ The automatic indexes described here exist only for the duration of a
+ single query, are never persisted to disk, and are only visible to a
+ single database connection. Internal indexes are part of the implementation
+ of PRIMARY KEY and UNIQUE constraints, are long-lasting and persisted
+ to disk, and are visible to all database connections. The term "autoindex"
+ appears in the names of <a href="fileformat2.html#intschema">internal indexes</a> for legacy reasons and does
+ not indicate that internal indexes and automatic indexes are related.
+
+<a name="hashjoin"></a>
+
+</p><h2 id="hash_joins"><span>14.1. </span>Hash Joins</h2>
+
+<p>
+ An automatic index is about the same thing as a
+ <a href="https://en.wikipedia.org/wiki/Hash_join">hash join</a>. The only difference
+ is that a B-Tree is used instead of a hash table. If you are willing to
+ say that the transient B-Tree constructed for an automatic index is
+ really just a fancy hash table, then a query that uses an automatic
+ index is just a hash join.
+
+</p><p>
+ SQLite constructs an transient index instead of a hash table in this
+ instance because it already has a robust and high performance B-Tree
+ implementation at hand, whereas a hash-table would need to be added.
+ Adding a separate hash table implementation to handle this one case
+ would increase the size of the library (which is designed for use on
+ low-memory embedded devices) for minimal performance gain. SQLite might
+ be enhanced with a hash-table implementation someday, but for now it seems
+ better to continue using automatic indexes in cases where client/server
+ database engines might use a hash join.
+
+<a name="pushdown"></a>
+
+</p><h1 id="the_push_down_optimization"><span>15. </span>The Push-Down Optimization</h1>
+
+<p>
+ If a subquery cannot be <a href="optoverview.html#flattening">flattened</a> into the outer query, it might
+ still be possible to enhance performance by "pushing down" WHERE clause
+ terms from the outer query into the subquery. Consider an example:
+
+</p><div class="codeblock"><pre>CREATE TABLE t1(a INT, b INT);
+CREATE TABLE t2(x INT, y INT);
+CREATE VIEW v1(a,b) AS SELECT DISTINCT a, b FROM t1;
+
+SELECT x, y, b
+ FROM t2 JOIN v1 ON (x=a)
+ WHERE b BETWEEN 10 AND 20;
+</pre></div>
+
+<p>
+ The view v1 cannot be <a href="optoverview.html#flattening">flattened</a> because it is DISTINCT. It must
+ instead be run as a subquery with the results being stored in a
+ transient table, then the join is performed between t2 and the
+ transient table. The push-down optimization pushes down the
+ "b BETWEEN 10 AND 20" term into the view. This makes the transient
+ table smaller, and helps the subquery to run faster if there
+ is an index on t1.b. The resulting evaluation is like this:
+
+</p><div class="codeblock"><pre>SELECT x, y, b
+ FROM t2
+ JOIN (SELECT DISTINCT a, b FROM t1 WHERE b BETWEEN 10 AND 20)
+ WHERE b BETWEEN 10 AND 20;
+</pre></div>
+
+<p>
+ The push-down optimization cannot always be used. For example,
+ if the subquery contains a LIMIT, then pushing down any part of
+ the WHERE clause from the outer query could change the result of
+ the inner query. There are other restrictions, explained in a
+ comment in the source code on the pushDownWhereTerms() routine
+ that implements this optimization.
+
+<a name="leftjoinreduction"></a>
+
+</p><h1 id="the_left_join_strength_reduction_optimization"><span>16. </span>The LEFT JOIN Strength Reduction Optimization</h1>
+
+<p>
+ A LEFT JOIN can sometimes be converted into an ordinary JOIN
+ if there are terms in the WHERE clause that guarantee that the
+ two joins will give identical results. In particular, if any
+ column in the right-hand table of the LEFT JOIN must be non-NULL
+ in order for the WHERE clause to be true, then the LEFT JOIN is
+ demoted to an ordinary JOIN.
+
+</p><p>
+ The prover that determines whether any column of the right-hand
+ table of a LEFT JOIN must be non-NULL in the WHERE clause is
+ imperfect. It sometimes returns a false negative. In other words,
+ it sometimes fails to reduce the strength of a LEFT JOIN when doing
+ so was in fact possible. For example, the prover does not know
+ the <a href="lang_datefunc.html">datetime() SQL function</a> will always return NULL if its first
+ argument is NULL, and so it will not recognize that the LEFT JOIN
+ in the following query could be strength-reduced:
+
+</p><div class="codeblock"><pre>SELECT urls.url
+ FROM urls
+ LEFT JOIN
+ (SELECT *
+ FROM (SELECT url_id AS uid, max(retrieval_time) AS rtime
+ FROM lookups GROUP BY 1 ORDER BY 1)
+ WHERE uid IN (358341,358341,358341)
+ ) recent
+ ON u.source_seed_id = recent.xyz OR u.url_id = recent.xyz
+ WHERE
+ DATETIME(recent.rtime) > DATETIME('now', '-5 days');
+</pre></div>
+
+<p>
+ It is possible that future enhancements to the prover might enable it
+ to recognize that NULL inputs to certain built-in functions
+ always result in a NULL answer. However, not all built-in
+ functions have that property (for example <a href="lang_corefunc.html#coalesce">coalesce()</a>) and, of
+ course, the prover will never be able to reason about
+ <a href="appfunc.html">application-defined SQL functions</a>.
+
+
+<a name="omitnoopjoin"></a>
+
+</p><h1 id="the_omit_left_join_optimization"><span>17. </span>The Omit LEFT JOIN Optimization</h1>
+
+<p>
+ Sometimes a LEFT JOIN can be completely omitted from a query without
+ changing the result. This can happen if all of the following are
+ true:
+
+</p><p>
+ </p><ol>
+ <li> The query is not an aggregate
+ </li><li> Either the query is DISTINCT or else the ON or USING clause
+ on the LEFT JOIN constrains the join such that it matches
+ only a single row
+ </li><li> The right-hand table of the LEFT JOIN is not be used anywhere
+ in the query outside of its own USING or ON clause.
+ </li></ol>
+
+<p>
+ LEFT JOIN elimination often comes up when LEFT JOINs are used
+ inside of views, and then the view is used in such as way that
+ none of the columns of the right-hand table of the LEFT JOIN are
+ referenced.
+
+</p><p>
+ Here is a simple example of omitting a LEFT JOIN:
+
+</p><div class="codeblock"><pre>CREATE TABLE t1(ipk INTEGER PRIMARY KEY, v1);
+CREATE TABLE t2(ipk INTEGER PRIMARY KEY, v2);
+CREATE TABLE t3(ipk INTEGER PRIMARY KEY, v3);
+
+SELECT v1, v3 FROM t1
+ LEFT JOIN t2 ON (t1.ipk=t2.ipk)
+ LEFT JOIN t3 ON (t1.ipk=t3.ipk)
+</pre></div>
+
+<p>
+ The t2 table is completely unused in the query above, and so the
+ query planner is able to implement the query as if it were written:
+
+</p><div class="codeblock"><pre>SELECT v1, v3 FROM t1
+ LEFT JOIN t3 ON (t1.ipk=t3.ipk)
+</pre></div>
+
+
+<a name="constprop"></a>
+
+<h1 id="the_constant_propagation_optimization"><span>18. </span>The Constant Propagation Optimization</h1>
+
+<p>
+ When a WHERE clause contains two or more equality constraints connected
+ by the AND operator such that all of the <a href="datatype3.html#affinity">affinities</a> of the various
+ constraints are the same, then SQLite might use the transitive property
+ of equality to construct new "virtual" constraints that can be used to
+ simplify expressions and/or improve performance. This is called the
+ "constant-propagation optimization".
+
+</p><p>
+ For example, consider the following schema and query:
+
+</p><div class="codeblock"><pre>CREATE TABLE t1(a INTEGER PRIMARY KEY, b INT, c INT);
+SELECT * FROM t1 WHERE a=b AND b=5;
+</pre></div>
+
+<p>
+ SQLite looks at the "a=b" and "b=5" constraints and deduces that
+ if those two constraints are true, then it must also be the case
+ that "a=5" is true. This means that the desired row can be looked up
+ quickly using a value of 5 for the INTEGER PRIMARY KEY.
+</p><p align="center"><small><i>This page last modified on <a href="https://sqlite.org/docsrc/honeypot" id="mtimelink" data-href="https://sqlite.org/docsrc/finfo/pages/optoverview.in?m=f697eab2d82b4d002">2022-09-19 12:27:16</a> UTC </small></i></p>
+