1 files changed, 1633 insertions, 0 deletions
diff --git a/www/optoverview.html b/www/optoverview.html
new file mode 100644
index 0000000..3dee3f1
--- /dev/null
+++ b/www/optoverview.html
@@ -0,0 +1,1633 @@
+<!DOCTYPE html>
+<html><head>
+<meta name="viewport" content="width=device-width, initial-scale=1.0">
+<meta http-equiv="content-type" content="text/html; charset=UTF-8">
+<link href="sqlite.css" rel="stylesheet">
+<title>The SQLite Query Optimizer Overview</title>
+<!-- path= -->
+</head>
+<body>
+<div class=nosearch>
+<a href="index.html">
+<img class="logo" src="images/sqlite370_banner.gif" alt="SQLite" border="0">
+</a>
+<div><!-- IE hack to prevent disappearing logo --></div>
+<div class="tagline desktoponly">
+Small. Fast. Reliable.<br>Choose any three.
+</div>
+<div class="menu mainmenu">
+<ul>
+<li><a href="index.html">Home</a>
+<li class='mobileonly'><a href="javascript:void(0)" onclick='toggle_div("submenu")'>Menu</a>
+<li class='wideonly'><a href='about.html'>About</a>
+<li class='desktoponly'><a href="docs.html">Documentation</a>
+<li class='desktoponly'><a href="download.html">Download</a>
+<li class='wideonly'><a href='copyright.html'>License</a>
+<li class='desktoponly'><a href="support.html">Support</a>
+<li class='desktoponly'><a href="prosupport.html">Purchase</a>
+<li class='search' id='search_menubutton'>
+<a href="javascript:void(0)" onclick='toggle_search()'>Search</a>
+</ul>
+</div>
+<div class="menu submenu" id="submenu">
+<ul>
+<li><a href='about.html'>About</a>
+<li><a href='docs.html'>Documentation</a>
+<li><a href='download.html'>Download</a>
+<li><a href='support.html'>Support</a>
+<li><a href='prosupport.html'>Purchase</a>
+</ul>
+</div>
+<div class="searchmenu" id="searchmenu">
+<form method="GET" action="search">
+<select name="s" id="searchtype">
+<option value="d">Search Documentation</option>
+<option value="c">Search Changelog</option>
+</select>
+<input type="text" name="q" id="searchbox" value="">
+<input type="submit" value="Go">
+</form>
+</div>
+</div>
+<script>
+function toggle_div(nm) {
+var w = document.getElementById(nm);
+if( w.style.display=="block" ){
+w.style.display = "none";
+}else{
+w.style.display = "block";
+}
+}
+function toggle_search() {
+var w = document.getElementById("searchmenu");
+if( w.style.display=="block" ){
+w.style.display = "none";
+} else {
+w.style.display = "block";
+setTimeout(function(){
+document.getElementById("searchbox").focus()
+}, 30);
+}
+}
+function div_off(nm){document.getElementById(nm).style.display="none";}
+window.onbeforeunload = function(e){div_off("submenu");}
+/* Disable the Search feature if we are not operating from CGI, since */
+/* Search is accomplished using CGI and will not work without it. */
+if( !location.origin || !location.origin.match || !location.origin.match(/http/) ){
+document.getElementById("search_menubutton").style.display = "none";
+}
+/* Used by the Hide/Show button beside syntax diagrams, to toggle the */
+function hideorshow(btn,obj){
+var x = document.getElementById(obj);
+var b = document.getElementById(btn);
+if( x.style.display!='none' ){
+x.style.display = 'none';
+b.innerHTML='show';
+}else{
+x.style.display = '';
+b.innerHTML='hide';
+}
+return false;
+}
+var antiRobot = 0;
+function antiRobotGo(){
+if( antiRobot!=3 ) return;
+antiRobot = 7;
+var j = document.getElementById("mtimelink");
+if(j && j.hasAttribute("data-href")) j.href=j.getAttribute("data-href");
+}
+function antiRobotDefense(){
+document.body.onmousedown=function(){
+antiRobot |= 2;
+antiRobotGo();
+document.body.onmousedown=null;
+}
+document.body.onmousemove=function(){
+antiRobot |= 2;
+antiRobotGo();
+document.body.onmousemove=null;
+}
+setTimeout(function(){
+antiRobot |= 1;
+antiRobotGo();
+}, 100)
+antiRobotGo();
+}
+antiRobotDefense();
+</script>
+<div class=fancy>
+<div class=nosearch>
+<div class="fancy_title">
+The SQLite Query Optimizer Overview
+</div>
+<div class="fancy_toc">
+<a onclick="toggle_toc()">
+<span class="fancy_toc_mark" id="toc_mk">&#x25ba;</span>
+Table Of Contents
+</a>
+<div id="toc_sub"><div class="fancy-toc1"><a href="#introduction">1. Introduction</a></div>
+<div class="fancy-toc1"><a href="#where_clause_analysis">2. WHERE Clause Analysis</a></div>
+<div class="fancy-toc2"><a href="#index_term_usage_examples">2.1. Index Term Usage Examples</a></div>
+<div class="fancy-toc1"><a href="#the_between_optimization">3. The BETWEEN Optimization</a></div>
+<div class="fancy-toc1"><a href="#or_optimizations">4. OR Optimizations</a></div>
+<div class="fancy-toc1"><a href="#the_like_optimization">5. The LIKE Optimization</a></div>
+<div class="fancy-toc1"><a href="#the_skip_scan_optimization">6. The Skip-Scan Optimization</a></div>
+<div class="fancy-toc1"><a href="#joins">7. Joins</a></div>
+<div class="fancy-toc2"><a href="#order_of_tables_in_a_join">7.1. Order of Tables in a Join</a></div>
+<div class="fancy-toc2"><a href="#manual_control_of_query_plans_using_sqlite_stat_tables">7.2. Manual Control Of Query Plans Using SQLITE_STAT Tables</a></div>
+<div class="fancy-toc2"><a href="#manual_control_of_query_plans_using_cross_join">7.3. Manual Control of Query Plans using CROSS JOIN</a></div>
+<div class="fancy-toc1"><a href="#choosing_between_multiple_indexes">8. Choosing Between Multiple Indexes</a></div>
+<div class="fancy-toc2"><a href="#disqualifying_where_clause_terms_using_unary_">8.1. Disqualifying WHERE Clause Terms using Unary-"+"</a></div>
+<div class="fancy-toc2"><a href="#range_queries">8.2. Range Queries</a></div>
+<div class="fancy-toc1"><a href="#covering_indexes">9. Covering Indexes</a></div>
+<div class="fancy-toc1"><a href="#order_by_optimizations">10. ORDER BY Optimizations</a></div>
+<div class="fancy-toc2"><a href="#partial_order_by_via_index">10.1. Partial ORDER BY via Index</a></div>
+<div class="fancy-toc1"><a href="#subquery_flattening">11. Subquery Flattening</a></div>
+<div class="fancy-toc1"><a href="#subquery_co_routines">12. Subquery Co-routines</a></div>
+<div class="fancy-toc2"><a href="#using_co_routines_to_defer_work_until_after_the_sorting">12.1. Using Co-routines to Defer Work until after the Sorting</a></div>
+<div class="fancy-toc1"><a href="#the_min_max_optimization">13. The MIN/MAX Optimization</a></div>
+<div class="fancy-toc1"><a href="#automatic_indexes">14. Automatic Indexes</a></div>
+<div class="fancy-toc2"><a href="#hash_joins">14.1. Hash Joins</a></div>
+<div class="fancy-toc1"><a href="#the_push_down_optimization">15. The Push-Down Optimization</a></div>
+<div class="fancy-toc1"><a href="#the_outer_join_strength_reduction_optimization">16. The OUTER JOIN Strength Reduction Optimization</a></div>
+<div class="fancy-toc1"><a href="#the_omit_outer_join_optimization">17. The Omit OUTER JOIN Optimization</a></div>
+<div class="fancy-toc1"><a href="#the_constant_propagation_optimization">18. The Constant Propagation Optimization</a></div>
+</div>
+</div>
+<script>
+function toggle_toc(){
+var sub = document.getElementById("toc_sub")
+var mk = document.getElementById("toc_mk")
+if( sub.style.display!="block" ){
+sub.style.display = "block";
+mk.innerHTML = "&#x25bc;";
+} else {
+sub.style.display = "none";
+mk.innerHTML = "&#x25ba;";
+}
+}
+</script>
+</div>
+
+
+
+
+
+<h1 id="introduction"><span>1. </span>Introduction</h1>
+<p>
+  This document provides an overview of how the query planner and optimizer
+  for SQLite works.
+
+
+</p><p>
+  Given a single SQL statement, there might be dozens, hundreds, or even
+  thousands of ways to implement that statement, depending on the complexity
+  of the statement itself and of the underlying database schema.  The 
+  task of the query planner is to select the algorithm that minimizes
+  disk I/O and CPU overhead.
+
+
+</p><p>
+  Additional background information is available in the
+  <a href="queryplanner.html">indexing tutorial</a> document.
+
+
+</p><p>
+  With release 3.8.0 (2013-08-26),
+  the SQLite query planner was reimplemented as the
+  <a href="queryplanner-ng.html">Next Generation Query Planner</a> or "NGQP".  All of the features, techniques,
+  and algorithms described in this document are applicable to both the
+  pre-3.8.0 legacy query planner and to the NGQP.  For further information on
+  how the NGQP differs from the legacy query planner, see the 
+  <a href="queryplanner-ng.html">detailed description of the NGQP</a>.
+
+
+
+
+<a name="where_clause"></a>
+
+</p><h1 id="where_clause_analysis"><span>2. </span>WHERE Clause Analysis</h1>
+
+<p>
+  The WHERE clause on a query is broken up into "terms" where each term
+  is separated from the others by an AND operator.
+  If the WHERE clause is composed of constraints separated by the OR
+  operator then the entire clause is considered to be a single "term"
+  to which the <a href="#or_opt">OR-clause optimization</a> is applied.
+
+</p><p>
+  All terms of the WHERE clause are analyzed to see if they can be
+  satisfied using indexes.
+  To be usable by an index a term must usually be of one of the following
+  forms:
+
+<blockquote><pre><b>
+  </b><i>column</i><b> = </b><i>expression</i><b>
+  </b><i>column</i><b> IS </b><i>expression</i><b>
+  </b><i>column</i><b> &gt; </b><i>expression</i><b>
+  </b><i>column</i><b> &gt;= </b><i>expression</i><b>
+  </b><i>column</i><b> &lt; </b><i>expression</i><b>
+  </b><i>column</i><b> &lt;= </b><i>expression</i><b>
+  </b><i>expression</i><b> = </b><i>column</i><b>
+  </b><i>expression</i><b> &gt; </b><i>column</i><b>
+  </b><i>expression</i><b> &gt;= </b><i>column</i><b>
+  </b><i>expression</i><b> &lt; </b><i>column</i><b>
+  </b><i>expression</i><b> &lt;= </b><i>column</i><b>
+  </b><i>column</i><b> IN (</b><i>expression-list</i><b>)
+  </b><i>column</i><b> IN (</b><i>subquery</i><b>)
+  </b><i>column</i><b> IS NULL
+  </b><i>column</i><b> LIKE </b><i>pattern</i><b>
+  </b><i>column</i><b> GLOB </b><i>pattern</i><b>
+</b></pre></blockquote>
+
+</p><p>
+  If an index is created using a statement like this:
+
+</p><div class="codeblock"><pre>CREATE INDEX idx_ex1 ON ex1(a,b,c,d,e,...,y,z);
+</pre></div>
+
+<p>
+  Then the index might be used if the initial columns of the index
+  (columns a, b, and so forth) appear in WHERE clause terms.
+  The initial columns of the index must be used with
+  the <b>=</b> or <b>IN</b> or <b>IS</b> operators.  
+  The right-most column that is used can employ inequalities.  
+  For the right-most
+  column of an index that is used, there can be up to two inequalities
+  that must sandwich the allowed values of the column between two extremes.
+
+</p><p>
+  It is not necessary for every column of an index to appear in a
+  WHERE clause term in order for that index to be used. 
+  However, there cannot be gaps in the columns of the index that are used.
+  Thus for the example index above, if there is no WHERE clause term
+  that constrains column c, then terms that constrain columns a and b can
+  be used with the index but not terms that constrain columns d through z.
+  Similarly, index columns will not normally be used (for indexing purposes)
+  if they are to the right of a 
+  column that is constrained only by inequalities.
+  (See the <a href="optoverview.html#skipscan">skip-scan optimization</a> below for the exception.)
+
+</p><p>
+  In the case of <a href="expridx.html">indexes on expressions</a>, whenever the word "column" is
+  used in the foregoing text, one can substitute "indexed expression"
+  (meaning a copy of the expression that appears in the <a href="lang_createindex.html">CREATE INDEX</a>
+  statement) and everything will work the same.
+  
+
+<a name="idxexamp"></a>
+
+</p><h2 id="index_term_usage_examples"><span>2.1. </span>Index Term Usage Examples</h2>
+<p>
+  For the index above and WHERE clause like this:
+
+</p><div class="codeblock"><pre>... WHERE a=5 AND b IN (1,2,3) AND c IS NULL AND d='hello'
+</pre></div>
+<p>
+  The first four columns a, b, c, and d of the index would be usable since
+  those four columns form a prefix of the index and are all bound by
+  equality constraints.
+
+</p><p>
+  For the index above and WHERE clause like this:
+
+</p><div class="codeblock"><pre>... WHERE a=5 AND b IN (1,2,3) AND c>12 AND d='hello'
+</pre></div>
+<p>
+  Only columns a, b, and c of the index would be usable.  The d column
+  would not be usable because it occurs to the right of c and c is
+  constrained only by inequalities.
+
+</p><p>
+  For the index above and WHERE clause like this:
+
+</p><div class="codeblock"><pre>... WHERE a=5 AND b IN (1,2,3) AND d='hello'
+</pre></div>
+<p>
+  Only columns a and b of the index would be usable.  The d column
+  would not be usable because column c is not constrained and there can
+  be no gaps in the set of columns that usable by the index.
+
+</p><p>
+  For the index above and WHERE clause like this:
+
+</p><div class="codeblock"><pre>... WHERE b IN (1,2,3) AND c NOT NULL AND d='hello'
+</pre></div>
+<p>
+  The index is not usable at all because the left-most column of the
+  index (column "a") is not constrained.  Assuming there are no other
+  indexes, the query above would result in a full table scan.
+
+</p><p>
+  For the index above and WHERE clause like this:
+
+</p><div class="codeblock"><pre>... WHERE a=5 OR b IN (1,2,3) OR c NOT NULL OR d='hello'
+</pre></div>
+<p>
+  The index is not usable because the WHERE clause terms are connected
+  by OR instead of AND. This query would result in a full table scan.
+  However, if three additional indexes where added that contained columns
+  b, c, and d as their left-most columns, then the
+  <a href="#or_opt">OR-clause optimization</a> might apply.
+
+
+<a name="between_opt"></a>
+
+</p><h1 id="the_between_optimization"><span>3. </span>The BETWEEN Optimization</h1>
+
+<p>
+  If a term of the WHERE clause is of the following form:
+
+<blockquote><pre><b>
+  </b><i>expr1</i><b> BETWEEN </b><i>expr2</i><b> AND </b><i>expr3</i><b>
+</b></pre></blockquote>
+</p><p>
+  Then two "virtual" terms are added as follows:
+
+<blockquote><pre><b>
+  </b><i>expr1</i><b> &gt;= </b><i>expr2</i><b> AND </b><i>expr1</i><b> &lt;= </b><i>expr3</i><b>
+</b></pre></blockquote>
+</p><p>
+  Virtual terms are used for analysis only and do not cause any byte-code
+  to be generated.
+  If both virtual terms end up being used as constraints on an index,
+  then the original BETWEEN term is omitted and the corresponding test
+  is not performed on input rows.
+  Thus if the BETWEEN term ends up being used as an index constraint
+  no tests are ever performed on that term.
+  On the other hand, the
+  virtual terms themselves never causes tests to be performed on
+  input rows.
+  Thus if the BETWEEN term is not used as an index constraint and
+  instead must be used to test input rows, the <i>expr1</i> expression is
+  only evaluated once.
+
+<a name="or_opt"></a>
+
+</p><h1 id="or_optimizations"><span>4. </span>OR Optimizations</h1>
+
+<p>
+  WHERE clause constraints that are connected by OR instead of AND can
+  be handled in two different ways.
+  If a term consists of multiple subterms containing a common column
+  name and separated by OR, like this:
+
+<blockquote><pre><b>
+  </b><i>column</i><b> = </b><i>expr1</i><b> OR </b><i>column</i><b> = </b><i>expr2</i><b> OR </b><i>column</i><b> = </b><i>expr3</i><b> OR ...
+</b></pre></blockquote>
+</p><p>
+  Then that term is rewritten as follows:
+
+<blockquote><pre><b>
+  </b><i>column</i><b> IN (</b><i>expr1</i><b>,</b><i>expr2</i><b>,</b><i>expr3</i><b>,...)
+</b></pre></blockquote>
+</p><p>
+  The rewritten term then might go on to constrain an index using the
+  normal rules for <b>IN</b> operators.  Note that <i>column</i> must be
+  the same column in every OR-connected subterm,
+  although the column can occur on either the left or the right side of
+  the <b>=</b> operator.
+
+</p><p>
+  If and only if the previously described conversion of OR to an IN operator
+  does not work, the second OR-clause optimization is attempted.
+  Suppose the OR clause consists of multiple subterms as follows:
+
+<blockquote><pre><b>
+  </b><i>expr1</i><b> OR </b><i>expr2</i><b> OR </b><i>expr3</i><b>
+</b></pre></blockquote>
+</p><p>
+  Individual subterms might be a single comparison expression like
+  <b>a=5</b> or <b>x&gt;y</b> or they can be 
+  LIKE or BETWEEN expressions, or a subterm
+  can be a parenthesized list of AND-connected sub-subterms.
+  Each subterm is analyzed as if it were itself the entire WHERE clause
+  in order to see if the subterm is indexable by itself.
+  If <u>every</u> subterm of an OR clause is separately indexable
+  then the OR clause might be coded such that a separate index is used
+  to evaluate each term of the OR clause.  One way to think about how
+  SQLite uses separate indexes for each OR clause term is to imagine
+  that the WHERE clause where rewritten as follows:
+
+<blockquote><pre><b>
+  rowid IN (SELECT rowid FROM </b><i>table</i><b> WHERE </b><i>expr1</i><b>
+            UNION SELECT rowid FROM </b><i>table</i><b> WHERE </b><i>expr2</i><b>
+            UNION SELECT rowid FROM </b><i>table</i><b> WHERE </b><i>expr3</i><b>)
+</b></pre></blockquote>
+</p><p>
+  The rewritten expression above is conceptual; WHERE clauses containing
+  OR are not really rewritten this way.
+  The actual implementation of the OR clause uses a mechanism that is
+  more efficient and that works even for <a href="withoutrowid.html">WITHOUT ROWID</a> tables or 
+  tables in which the "rowid" is inaccessible.  Nevertheless,
+  the essence of the implementation is captured by the statement
+  above:  Separate indexes are used to find candidate result rows
+  from each OR clause term and the final result is the union of
+  those rows.
+
+</p><p>
+  Note that in most cases, SQLite will only use a single index for each
+  table in the FROM clause of a query.  The second OR-clause optimization
+  described here is the exception to that rule.  With an OR-clause,
+  a different index might be used for each subterm in the OR-clause.
+
+</p><p>
+  For any given query, the fact that the OR-clause optimization described
+  here can be used does not guarantee that it will be used.
+  SQLite uses a cost-based query planner that estimates the CPU and
+  disk I/O costs of various competing query plans and chooses the plan
+  that it thinks will be the fastest.  If there are many OR terms in
+  the WHERE clause or if some of the indexes on individual OR-clause 
+  subterms are not very selective, then SQLite might decide that it is
+  faster to use a different query algorithm, or even a full-table scan.
+  Application developers can use the
+  <a href="lang_explain.html">EXPLAIN QUERY PLAN</a> prefix on a statement to get a
+  high-level overview of the chosen query strategy.
+
+<a name="like_opt"></a>
+
+</p><h1 id="the_like_optimization"><span>5. </span>The LIKE Optimization</h1>
+
+<p>
+  A WHERE-clause term that uses the <a href="lang_expr.html#like">LIKE</a> or <a href="lang_expr.html#glob">GLOB</a> operator
+  can sometimes be used with an index to do a range search, 
+  almost as if the LIKE or GLOB were an alternative to a <a href="lang_expr.html#between">BETWEEN</a>
+  operator.
+  There are many conditions on this optimization:
+
+</p><p>
+  </p><ol>
+  <li>The right-hand side of the LIKE or GLOB must be either a string literal
+      or a <a href="lang_expr.html#varparam">parameter</a> bound to a string literal
+      that does not begin with a wildcard character.</li>
+  <li>It must not be possible to make the LIKE or GLOB operator true by
+      having a numeric value (instead of a string or blob) on the
+      left-hand side. This means that either:
+      <ol type="A">
+      <li> the left-hand side of the LIKE or GLOB operator is the name
+           of an indexed column with <a href="datatype3.html#affinity">TEXT affinity</a>, or</li>
+      <li> the right-hand side pattern argument does not begin with a
+           minus sign ("-") or a digit.</li>
+      </ol>
+      This constraint arises from the fact that numbers do not sort in
+      lexicographical order.  For example: 9&lt;10 but '9'&gt;'10'.</li>
+  <li>The built-in functions used to implement LIKE and GLOB must not
+      have been overloaded using the <a href="c3ref/create_function.html">sqlite3_create_function()</a> API.</li>
+  <li>For the GLOB operator, the column must be indexed using the 
+      built-in BINARY collating sequence.</li>
+  <li>For the LIKE operator, if <a href="pragma.html#pragma_case_sensitive_like">case_sensitive_like</a> mode is enabled then
+      the column must indexed using BINARY collating sequence, or if
+      <a href="pragma.html#pragma_case_sensitive_like">case_sensitive_like</a> mode is disabled then the column must indexed
+      using built-in NOCASE collating sequence.</li>
+  <li>If the ESCAPE option is used, the ESCAPE character must be ASCII,
+      or a single-byte character in UTF-8.
+  </li></ol>
+
+<p>
+  The LIKE operator has two modes that can be set by a
+  <a href="pragma.html#pragma_case_sensitive_like">pragma</a>.  The
+  default mode is for LIKE comparisons to be insensitive to differences
+  of case for latin1 characters.  Thus, by default, the following
+  expression is true:
+
+</p><div class="codeblock"><pre>'a' LIKE 'A'
+</pre></div>
+<p>
+  If the case_sensitive_like pragma is enabled as follows:
+
+</p><div class="codeblock"><pre>PRAGMA case_sensitive_like=ON;
+</pre></div>
+<p>
+  Then the LIKE operator pays attention to case and the example above would
+  evaluate to false.  Note that case insensitivity only applies to
+  latin1 characters - basically the upper and lower case letters of English
+  in the lower 127 byte codes of ASCII.  International character sets
+  are case sensitive in SQLite unless an application-defined
+  <a href="datatype3.html#collation">collating sequence</a> and <a href="lang_corefunc.html#like">like() SQL function</a> are provided that
+  take non-ASCII characters into account.
+  If an application-defined collating sequence and/or like() SQL
+  function are provided, the LIKE optimization described here will never
+  be taken.
+
+</p><p>
+  The LIKE operator is case insensitive by default because this is what
+  the SQL standard requires.  You can change the default behavior at
+  compile time by using the <a href="compile.html#case_sensitive_like">SQLITE_CASE_SENSITIVE_LIKE</a> command-line option
+  to the compiler.
+
+</p><p>
+  The LIKE optimization might occur if the column named on the left of the
+  operator is indexed using the built-in BINARY collating sequence and
+  case_sensitive_like is turned on.  Or the optimization might occur if
+  the column is indexed using the built-in NOCASE collating sequence and the 
+  case_sensitive_like mode is off.  These are the only two combinations
+  under which LIKE operators will be optimized.
+
+</p><p>
+  The GLOB operator is always case sensitive.  The column on the left side
+  of the GLOB operator must always use the built-in BINARY collating sequence
+  or no attempt will be made to optimize that operator with indexes.
+
+</p><p>
+  The LIKE optimization will only be attempted if
+  the right-hand side of the GLOB or LIKE operator is either
+  literal string or a <a href="lang_expr.html#varparam">parameter</a> that has been <a href="c3ref/bind_blob.html">bound</a>
+  to a string literal.  The string literal must not
+  begin with a wildcard; if the right-hand side begins with a wildcard
+  character then this optimization is not attempted.  If the right-hand side 
+  is a <a href="lang_expr.html#varparam">parameter</a> that is bound to a string, then this optimization is
+  only attempted if the <a href="c3ref/stmt.html">prepared statement</a> containing the expression
+  was compiled with <a href="c3ref/prepare.html">sqlite3_prepare_v2()</a> or <a href="c3ref/prepare.html">sqlite3_prepare16_v2()</a>.
+  The LIKE optimization is not attempted if the
+  right-hand side is a <a href="lang_expr.html#varparam">parameter</a> and the statement was prepared using
+  <a href="c3ref/prepare.html">sqlite3_prepare()</a> or <a href="c3ref/prepare.html">sqlite3_prepare16()</a>.
+
+</p><p>
+  Suppose the initial sequence of non-wildcard characters on the right-hand
+  side of the LIKE or GLOB operator is <i>x</i>.  We are using a single 
+  character to denote this non-wildcard prefix but the reader should
+  understand that the prefix can consist of more than 1 character.
+  Let <i>y</i> be the smallest string that is the same length as /x/ but which
+  compares greater than <i>x</i>.  For example, if <i>x</i> is
+  <tt>'hello'</tt> then
+  <i>y</i> would be <tt>'hellp'</tt>.
+  The LIKE and GLOB optimizations consist of adding two virtual terms
+  like this:
+
+<blockquote><pre><b>
+  </b><i>column</i><b> &gt;= </b><i>x</i><b> AND </b><i>column</i><b> &lt; </b><i>y</i><b>
+</b></pre></blockquote>
+</p><p>
+  Under most circumstances, the original LIKE or GLOB operator is still
+  tested against each input row even if the virtual terms are used to
+  constrain an index.  This is because we do not know what additional
+  constraints may be imposed by characters to the right
+  of the <i>x</i> prefix.  However, if there is only a single
+  global wildcard to the right of <i>x</i>, then the original LIKE or 
+  GLOB test is disabled.
+  In other words, if the pattern is like this:
+
+<blockquote><pre><b>
+  </b><i>column</i><b> LIKE </b><i>x</i><b>%
+  </b><i>column</i><b> GLOB </b><i>x</i><b>*
+</b></pre></blockquote>
+</p><p>
+  then the original LIKE or GLOB tests are disabled when the virtual
+  terms constrain an index because in that case we know that all of the
+  rows selected by the index will pass the LIKE or GLOB test.
+
+</p><p>
+  Note that when the right-hand side of a LIKE or GLOB operator is
+  a <a href="lang_expr.html#varparam">parameter</a> and the statement is prepared using <a href="c3ref/prepare.html">sqlite3_prepare_v2()</a>
+  or <a href="c3ref/prepare.html">sqlite3_prepare16_v2()</a> then the statement is automatically reparsed
+  and recompiled on the first <a href="c3ref/step.html">sqlite3_step()</a> call of each run if the binding
+  to the right-hand side parameter has changed since the previous run.
+  This reparse and recompile is essentially the same action that occurs
+  following a schema change.  The recompile is necessary so that the query
+  planner can examine the new value bound to the right-hand side of the
+  LIKE or GLOB operator and determine whether or not to employ the
+  optimization described above.
+
+<a name="skipscan"></a>
+
+</p><h1 id="the_skip_scan_optimization"><span>6. </span>The Skip-Scan Optimization</h1>
+
+<p>
+  The general rule is that indexes are only useful if there are 
+  WHERE-clause constraints on the left-most columns of the index.
+  However, in some cases,
+  SQLite is able to use an index even if the first few columns of
+  the index are omitted from the WHERE clause but later columns 
+  are included.
+
+
+</p><p>
+  Consider a table such as the following:
+
+</p><div class="codeblock"><pre>CREATE TABLE people(
+  name TEXT PRIMARY KEY,
+  role TEXT NOT NULL,
+  height INT NOT NULL, -- in cm
+  CHECK( role IN ('student','teacher') )
+);
+CREATE INDEX people_idx1 ON people(role, height);
+</pre></div>
+
+<p>
+  The people table has one entry for each person in a large
+  organization.  Each person is either a "student" or a "teacher",
+  as determined by the "role" field.  The table also records the height in
+  centimeters of each person.  The role and height are indexed.
+  Notice that the left-most column of the index is not very
+  selective - it only contains two possible values.
+
+
+</p><p>
+  Now consider a query to find the names of everyone in the
+  organization that is 180cm tall or taller:
+
+
+</p><div class="codeblock"><pre>SELECT name FROM people WHERE height>=180;
+</pre></div>
+
+<p>
+  Because the left-most column of the index does not appear in the
+  WHERE clause of the query, one is tempted to conclude that the
+  index is not usable here.  However, SQLite is able to use the index.
+  Conceptually, SQLite uses the index as if the query were more
+  like the following:
+
+
+</p><div class="codeblock"><pre>SELECT name FROM people
+ WHERE role IN (SELECT DISTINCT role FROM people)
+   AND height>=180;
+</pre></div>
+
+<p>
+  Or this:
+
+</p><div class="codeblock"><pre>SELECT name FROM people WHERE role='teacher' AND height>=180
+UNION ALL
+SELECT name FROM people WHERE role='student' AND height>=180;
+</pre></div>
+
+<p>
+  The alternative query formulations shown above are conceptual only.
+  SQLite does not really transform the query. 
+  The actual query plan is like this:
+  SQLite locates the first possible value for "role", which it
+  can do by rewinding the "people_idx1" index to the beginning and reading
+  the first record.  SQLite stores this first "role" value in an
+  internal variable that we will here call "$role".  Then SQLite
+  runs a query like: "SELECT name FROM people WHERE role=$role AND height>=180".
+  This query has an equality constraint on the left-most column of the
+  index and so the index can be used to resolve that query.  Once
+  that query is finished, SQLite then uses the "people_idx1" index to
+  locate the next value of the "role" column, using code that is logically
+  similar to "SELECT role FROM people WHERE role>$role LIMIT 1".
+  This new "role" value overwrites the $role variable, and the process
+  repeats until all possible values for "role" have been examined.
+
+</p><p>
+  We call this kind of index usage a "skip-scan" because the database
+  engine is basically doing a full scan of the index but it optimizes the
+  scan (making it less than "full") by occasionally skipping ahead to the
+  next candidate value.
+
+</p><p>
+  SQLite might use a skip-scan on an index if it knows that the first
+  one or more columns contain many duplication values.
+  If there are too few duplicates
+  in the left-most columns of the index, then it would
+  be faster to simply step ahead to the next value, and thus do
+  a full table scan, than to do a binary search on an index to locate
+  the next left-column value.
+
+</p><p>
+  The only way that SQLite can know that there are many duplicates
+  in the left-most columns of an index
+  is if the <a href="lang_analyze.html">ANALYZE</a> command has been run
+  on the database.
+  Without the results of ANALYZE, SQLite has to guess at the "shape" of
+  the data in the table, and the default guess is that there are an average
+  of 10 duplicates for every value in the left-most column of the index.
+  Skip-scan only becomes profitable (it only gets to be faster than
+  a full table scan) when the number of duplicates is about 18 or more.
+  Hence, a skip-scan is never used on a database that has not been analyzed.
+
+<a name="joins"></a>
+
+</p><h1 id="joins"><span>7. </span>Joins</h1>
+
+<p>
+  The ON and USING clauses of an inner join are converted into additional
+  terms of the WHERE clause prior to WHERE clause analysis described
+  <a href="#where_clause">above in paragraph 2.0</a>.
+  Thus with SQLite, there is no computational
+  advantage to use the newer SQL92 join syntax
+  over the older SQL89 comma-join syntax.  They both end up accomplishing
+  exactly the same thing on inner joins.
+
+</p><p>
+  For an OUTER JOIN the situation is more complex.  The following
+  two queries are not equivalent:
+
+</p><div class="codeblock"><pre>SELECT * FROM tab1 LEFT JOIN tab2 ON tab1.x=tab2.y;
+SELECT * FROM tab1 LEFT JOIN tab2 WHERE tab1.x=tab2.y;
+</pre></div>
+<p>
+  For an inner join, the two queries above would be identical.  However,
+  special processing applies to the ON and USING clauses of an OUTER join:
+  specifically, the constraints in an ON or USING clause do not apply if
+  the right table of the join is on a null row, but the constraints do apply
+  in the WHERE clause.  The net effect is that putting the ON or USING
+  clause expressions for a LEFT JOIN in the WHERE clause effectively converts
+  the query to an
+  ordinary INNER JOIN - albeit an inner join that runs more slowly.
+
+<a name="table_order"></a>
+
+</p><h2 id="order_of_tables_in_a_join"><span>7.1. </span>Order of Tables in a Join</h2>
+
+<p>
+  The current implementation of 
+  SQLite uses only loop joins.  That is to say, joins are implemented as
+  nested loops.
+
+</p><p>
+  The default order of the nested loops in a join is for the left-most
+  table in the FROM clause to form the outer loop and the right-most
+  table to form the inner loop.
+  However, SQLite will nest the loops in a different order if doing so
+  will help it to select better indexes.
+
+</p><p>
+  Inner joins can be freely reordered.  However a left outer join is
+  neither commutative nor associative and hence will not be reordered.
+  Inner joins to the left and right of the outer join might be reordered
+  if the optimizer thinks that is advantageous but the outer joins are
+  always evaluated in the order in which they occur.
+
+</p><p>
+  SQLite <a href="lang_select.html#crossjoin">treats the CROSS JOIN operator specially</a>.
+  The CROSS JOIN operator is commutative, in theory.  However, SQLite chooses to
+  never reorder tables in a CROSS JOIN.  This provides a mechanism
+  by which the programmer can force SQLite to choose a particular loop nesting
+  order.  
+
+</p><p>
+  When selecting the order of tables in a join, SQLite uses an efficient
+  polynomial-time algorithm.  Because of this,
+  SQLite is able to plan queries with 50- or 60-way joins in a matter of
+  microseconds
+
+</p><p>
+  Join reordering is automatic and usually works well enough that
+  programmers do not have to think about it, especially if <a href="lang_analyze.html">ANALYZE</a>
+  has been used to gather statistics about the available indexes,
+  though occasionally some hints from the programmer are needed.
+  Consider, for example, the following schema:
+
+</p><div class="codeblock"><pre>CREATE TABLE node(
+   id INTEGER PRIMARY KEY,
+   name TEXT
+);
+CREATE INDEX node_idx ON node(name);
+CREATE TABLE edge(
+   orig INTEGER REFERENCES node,
+   dest INTEGER REFERENCES node,
+   PRIMARY KEY(orig, dest)
+);
+CREATE INDEX edge_idx ON edge(dest,orig);
+</pre></div>
+<p>
+  The schema above defines a directed graph with the ability to store a
+  name at each node. Now consider a query against this schema:
+
+</p><div class="codeblock"><pre>SELECT *
+  FROM edge AS e,
+       node AS n1,
+       node AS n2
+ WHERE n1.name = 'alice'
+   AND n2.name = 'bob'
+   AND e.orig = n1.id
+   AND e.dest = n2.id;
+</pre></div>
+<p>
+  This query asks for is all information about edges that go from
+  nodes labeled "alice" to nodes labeled "bob".
+  The query optimizer in SQLite has basically two choices on how to
+  implement this query.  (There are actually six different choices, but
+  we will only consider two of them here.)
+  Pseudocode below demonstrating these two choices.
+
+<a name="option1"></a>
+
+</p><p>Option 1:
+</p><div class="codeblock"><pre>foreach n1 where n1.name='alice' do:
+  foreach n2 where n2.name='bob' do:
+    foreach e where e.orig=n1.id and e.dest=n2.id
+      return n1.*, n2.*, e.*
+    end
+  end
+end
+</pre></div>
+<a name="option2"></a>
+
+<p>Option 2:
+</p><div class="codeblock"><pre>foreach n1 where n1.name='alice' do:
+  foreach e where e.orig=n1.id do:
+    foreach n2 where n2.id=e.dest and n2.name='bob' do:
+      return n1.*, n2.*, e.*
+    end
+  end
+end
+</pre></div>
+<p>
+  The same indexes are used to speed up every loop in both implementation
+  options.
+  The only difference in these two query plans is the order in which
+  the loops are nested.
+
+</p><p>
+  So which query plan is better? It turns out that the answer depends on
+  what kind of data is found in the node and edge tables.
+
+</p><p>
+  Let the number of alice nodes be M and the number of bob nodes be N.
+  Consider two scenarios. In the first scenario, M and N are both 2 but
+  there are thousands of edges on each node. In this case, option 1 is
+  preferred. With option 1, the inner loop checks for the existence of
+  an edge between a pair of nodes and outputs the result if found. 
+  Because there are only 2 alice and bob nodes each, the inner loop
+  only has to run four times and the query is very quick. Option 2 would
+  take much longer here. The outer loop of option 2 only executes twice,
+  but because there are a large number of edges leaving each alice node,
+  the middle loop has to iterate many thousands of times. It will be
+  much slower. So in the first scenario, we prefer to use option 1.
+
+</p><p>
+  Now consider the case where M and N are both 3500. Alice nodes are
+  abundant.  This time suppose each of these nodes is connected by only one
+  or two edges. Now option 2 is preferred. With option 2,
+  the outer loop still has to run 3500 times, but the middle loop only
+  runs once or twice for each outer loop and the inner loop will only
+  run once for each middle loop, if at all. So the total number of
+  iterations of the inner loop is around 7000. Option 1, on the other
+  hand, has to run both its outer loop and its middle loop 3500 times
+  each, resulting in 12 million iterations of the middle loop.
+  Thus in the second scenario, option 2 is nearly 2000 times faster
+  than option 1.
+
+</p><p>
+  So you can see that depending on how the data is structured in the table,
+  either query plan 1 or query plan 2 might be better.  Which plan does
+  SQLite choose by default?  As of version 3.6.18, without running <a href="lang_analyze.html">ANALYZE</a>,
+  SQLite will choose option 2.
+  If the <a href="lang_analyze.html">ANALYZE</a> command is run in order to gather statistics,
+  a different choice might be made if the statistics indicate that the
+  alternative is likely to run faster.
+
+<a name="manctrl"></a>
+
+</p><h2 id="manual_control_of_query_plans_using_sqlite_stat_tables"><span>7.2. </span>Manual Control Of Query Plans Using SQLITE_STAT Tables</h2>
+
+<p>
+  SQLite provides the ability for advanced programmers to exercise control
+  over the query plan chosen by the optimizer. One method for doing this
+  is to fudge the <a href="lang_analyze.html">ANALYZE</a> results in the <a href="fileformat2.html#stat1tab">sqlite_stat1</a>, 
+  <a href="fileformat2.html#stat3tab">sqlite_stat3</a>, and/or <a href="fileformat2.html#stat4tab">sqlite_stat4</a> tables.  This is not
+  recommended for most situations.
+
+<a name="crossjoin"></a>
+
+</p><h2 id="manual_control_of_query_plans_using_cross_join"><span>7.3. </span>Manual Control of Query Plans using CROSS JOIN</h2>
+
+<p>
+  Programmers can force SQLite to use a particular loop nesting order
+  for a join by using the CROSS JOIN operator instead of just JOIN, 
+  INNER JOIN, NATURAL JOIN, or a "," join.  Though CROSS JOINs are
+  commutative in theory, SQLite chooses to never reorder the tables in
+  a CROSS JOIN.  Hence, the left table of a CROSS JOIN will always be
+  in an outer loop relative to the right table.
+
+</p><p>
+  In the following query, the optimizer is free to reorder the 
+  tables of FROM clause any way it sees fit:
+
+</p><div class="codeblock"><pre>SELECT *
+  FROM node AS n1,
+       edge AS e,
+       node AS n2
+ WHERE n1.name = 'alice'
+   AND n2.name = 'bob'
+   AND e.orig = n1.id
+   AND e.dest = n2.id;
+</pre></div>
+<p>
+  In the following logically equivalent formulation of the same query,
+  the substitution of "CROSS JOIN" for the "," means that the order
+  of tables must be N1, E, N2.
+
+</p><div class="codeblock"><pre>SELECT *
+  FROM node AS n1 CROSS JOIN
+       edge AS e CROSS JOIN
+       node AS n2
+ WHERE n1.name = 'alice'
+   AND n2.name = 'bob'
+   AND e.orig = n1.id
+   AND e.dest = n2.id;
+</pre></div>
+<p>
+  In the latter query, the query plan must be 
+  <a href="#option2">option 2</a>.  Note that
+  you must use the keyword "CROSS" in order to disable the table reordering
+  optimization; INNER JOIN, NATURAL JOIN, JOIN, and other similar
+  combinations work just like a comma join in that the optimizer is
+  free to reorder tables as it sees fit. (Table reordering is also
+  disabled on an outer join, but that is because outer joins are not
+  associative or commutative. Reordering tables in OUTER JOIN changes
+  the result.)
+
+</p><p>
+  See "<a href="queryplanner-ng.html#fossilcasestudy">The Fossil NGQP Upgrade Case Study</a>" for another real-world example
+  of using CROSS JOIN to manually control the nesting order of a join.
+  The <a href="queryplanner-ng.html#howtofix">query planner checklist</a> found later in the same document provides
+  further guidance on manual control of the query planner.
+
+<a name="multi_index"></a>
+
+</p><h1 id="choosing_between_multiple_indexes"><span>8. </span>Choosing Between Multiple Indexes</h1>
+
+<p>
+  Each table in the FROM clause of a query can use at most one index
+  (except when the <a href="#or_opt">OR-clause optimization</a> comes into
+  play)
+  and SQLite strives to use at least one index on each table.  Sometimes,
+  two or more indexes might be candidates for use on a single table.
+  For example:
+
+</p><div class="codeblock"><pre>CREATE TABLE ex2(x,y,z);
+CREATE INDEX ex2i1 ON ex2(x);
+CREATE INDEX ex2i2 ON ex2(y);
+SELECT z FROM ex2 WHERE x=5 AND y=6;
+</pre></div>
+<p>
+  For the SELECT statement above, the optimizer can use the ex2i1 index
+  to lookup rows of ex2 that contain x=5 and then test each row against
+  the y=6 term.  Or it can use the ex2i2 index to lookup rows
+  of ex2 that contain y=6 then test each of those rows against the
+  x=5 term.
+
+</p><p>
+  When faced with a choice of two or more indexes, SQLite tries to estimate
+  the total amount of work needed to perform the query using each option.
+  It then selects the option that gives the least estimated work.
+
+</p><p>
+  To help the optimizer get a more accurate estimate of the work involved
+  in using various indexes, the user may optionally run the <a href="lang_analyze.html">ANALYZE</a> command.
+  The <a href="lang_analyze.html">ANALYZE</a> command scans all indexes of database where there might
+  be a choice between two or more indexes and gathers statistics on the
+  selectiveness of those indexes.  The statistics gathered by
+  this scan are stored in special database tables names shows names all
+  begin with "<b>sqlite_stat</b>".
+  The content of these tables is not updated as the database
+  changes so after making significant changes it might be prudent to
+  rerun <a href="lang_analyze.html">ANALYZE</a>.
+  The results of an ANALYZE command are only available to database connections
+  that are opened after the ANALYZE command completes.
+
+</p><p>
+  The various <b>sqlite_stat</b><i>N</i> tables contain information on how
+  selective the various indexes are.  For example, the <a href="fileformat2.html#stat1tab">sqlite_stat1</a>
+  table might indicate that an equality constraint on column x reduces the
+  search space to 10 rows on average, whereas an equality constraint on
+  column y reduces the search space to 3 rows on average.  In that case,
+  SQLite would prefer to use index ex2i2 since that index is more selective.
+
+<a name="uplus"></a>
+
+</p><h2 id="disqualifying_where_clause_terms_using_unary_"><span>8.1. </span>Disqualifying WHERE Clause Terms using Unary-"+"</h2>
+
+<p>
+  Terms of the WHERE clause can be manually disqualified for use with
+  indexes by prepending a unary <b>+</b> operator to the column name.  The
+  unary <b>+</b> is a no-op and will not generate any byte code in the prepared
+  statement.
+  However, the unary <b>+</b> operator will prevent the term from
+  constraining an index.
+  So, in the example above, if the query were rewritten as:
+
+</p><div class="codeblock"><pre>SELECT z FROM ex2 WHERE +x=5 AND y=6;
+</pre></div>
+<p>
+  The <b>+</b> operator on the <b>x</b> column will prevent that term from 
+  constraining an index.  This would force the use of the ex2i2 index.
+
+</p><p>
+  Note that the unary <b>+</b> operator also removes 
+  <a href="datatype3.html#affinity">type affinity</a> from
+  an expression, and in some cases this can cause subtle changes in
+  the meaning of an expression.
+  In the example above,
+  if column <b>x</b> has <a href="datatype3.html#affinity">TEXT affinity</a>
+  then the comparison "x=5" will be done as text.  The <b>+</b> operator
+  removes the affinity.  So the comparison "<b>+x=5</b>" will compare the text
+  in column <b>x</b> with the numeric value 5 and will always be false.
+
+<a name="rangequery"></a>
+
+</p><h2 id="range_queries"><span>8.2. </span>Range Queries</h2>
+
+<p>
+  Consider a slightly different scenario:
+
+</p><div class="codeblock"><pre>CREATE TABLE ex2(x,y,z);
+CREATE INDEX ex2i1 ON ex2(x);
+CREATE INDEX ex2i2 ON ex2(y);
+SELECT z FROM ex2 WHERE x BETWEEN 1 AND 100 AND y BETWEEN 1 AND 100;
+</pre></div>
+<p>
+  Further suppose that column x contains values spread out
+  between 0 and 1,000,000 and column y contains values
+  that span between 0 and 1,000.  In that scenario,
+  the range constraint on column x should reduce the search space by
+  a factor of 10,000 whereas the range constraint on column y should
+  reduce the search space by a factor of only 10.  So the ex2i1 index
+  should be preferred.
+
+</p><p>
+  SQLite will make this determination, but only if it has been compiled
+  with <a href="compile.html#enable_stat3">SQLITE_ENABLE_STAT3</a> or <a href="compile.html#enable_stat4">SQLITE_ENABLE_STAT4</a>.
+  The <a href="compile.html#enable_stat3">SQLITE_ENABLE_STAT3</a> and <a href="compile.html#enable_stat4">SQLITE_ENABLE_STAT4</a> options causes
+  the <a href="lang_analyze.html">ANALYZE</a> command to collect a histogram of column content in the
+  <a href="fileformat2.html#stat3tab">sqlite_stat3</a> or <a href="fileformat2.html#stat4tab">sqlite_stat4</a> tables and to use this histogram to 
+  make a better guess at the best query to use for range constraints
+  such as the above.  The main difference between STAT3 and STAT4 is
+  that STAT3 records histogram data for only the left-most column of
+  an index whereas STAT4 records histogram data for all columns of an
+  index.  For single-column indexes, STAT3 and STAT4 work the same.
+
+</p><p>
+  The histogram data is only useful if the right-hand side of the constraint
+  is a simple compile-time constant or <a href="lang_expr.html#varparam">parameter</a> and not an expression.
+
+</p><p>
+  Another limitation of the histogram data is that it only applies to the
+  left-most column on an index.  Consider this scenario:
+
+</p><div class="codeblock"><pre>CREATE TABLE ex3(w,x,y,z);
+CREATE INDEX ex3i1 ON ex2(w, x);
+CREATE INDEX ex3i2 ON ex2(w, y);
+SELECT z FROM ex3 WHERE w=5 AND x BETWEEN 1 AND 100 AND y BETWEEN 1 AND 100;
+</pre></div>
+<p>
+  Here the inequalities are on columns x and y which are not the
+  left-most index columns.  Hence, the histogram data which is collected no
+  left-most column of indexes is useless in helping to choose between the
+  range constraints on columns x and y.
+
+<a name="covidx"></a>
+
+</p><h1 id="covering_indexes"><span>9. </span>Covering Indexes</h1>
+
+<p>
+  When doing an indexed lookup of a row, the usual procedure is to
+  do a binary search on the index to find the index entry, then extract
+  the <a href="lang_createtable.html#rowid">rowid</a> from the index and use that <a href="lang_createtable.html#rowid">rowid</a> to do a binary search on
+  the original table.  Thus a typical indexed lookup involves two
+  binary searches.
+  If, however, all columns that were to be fetched from the table are
+  already available in the index itself, SQLite will use the values
+  contained in the index and will never look up the original table
+  row.  This saves one binary search for each row and can make many
+  queries run twice as fast.
+
+</p><p>
+  When an index contains all of the data needed for a query and when the
+  original table never needs to be consulted, we call that index a
+  "covering index".
+
+<a name="order_by"></a>
+
+</p><h1 id="order_by_optimizations"><span>10. </span>ORDER BY Optimizations</h1>
+
+<p>
+  SQLite attempts to use an index to satisfy the ORDER BY clause of a
+  query when possible.
+  When faced with the choice of using an index to satisfy WHERE clause
+  constraints or satisfying an ORDER BY clause, SQLite does the same
+  cost analysis described above
+  and chooses the index that it believes will result in the fastest answer.
+
+</p><p>
+  SQLite will also attempt to use indexes to help satisfy GROUP BY clauses
+  and the DISTINCT keyword.  If the nested loops of the join can be arranged
+  such that rows that are equivalent for the GROUP BY or for the DISTINCT are
+  consecutive, then the GROUP BY or DISTINCT logic can determine if the 
+  current row is part of the same group or if the current row is distinct
+  simply by comparing the current row to the previous row.
+  This can be much faster than the alternative of comparing each row to
+  all prior rows.
+
+<a name="partsort"></a>
+
+</p><h2 id="partial_order_by_via_index"><span>10.1. </span>Partial ORDER BY via Index</h2>
+
+<p>
+  If a query contains an ORDER BY clause with multiple terms, it might
+  be that SQLite can use indexes to cause rows to come out in the order
+  of some prefix of the terms in the ORDER BY but that later terms in
+  the ORDER BY are not satisfied.  In that case, SQLite does block sorting.
+  Suppose the ORDER BY clause has four terms and the natural order of the
+  query results in rows appearing in order of the first two terms.  As
+  each row is output by the query engine and enters the sorter, the 
+  outputs in the current row corresponding to the first two terms of 
+  the ORDER BY are compared against the previous row.  If they have
+  changed, the current sort is finished and output and a new sort is
+  started.  This results in a slightly faster sort.  Even bigger
+  advantages are that many fewer rows need to be held in memory,
+  reducing memory requirements, and outputs can begin to appear before
+  the core query has run to completion.
+
+<a name="flattening"></a>
+
+</p><h1 id="subquery_flattening"><span>11. </span>Subquery Flattening</h1>
+
+<p>
+  When a subquery occurs in the FROM clause of a SELECT, the simplest
+  behavior is to evaluate the subquery into a transient table, then run
+  the outer SELECT against the transient table.  Such a plan
+  can be suboptimal since the transient table will not have any indexes
+  and the outer query (which is likely a join) will be forced to do a
+  full table scan on the transient table.
+
+</p><p>
+  To overcome this problem, SQLite attempts to flatten subqueries in
+  the FROM clause of a SELECT.
+  This involves inserting the FROM clause of the subquery into the
+  FROM clause of the outer query and rewriting expressions in
+  the outer query that refer to the result set of the subquery.
+  For example:
+
+</p><div class="codeblock"><pre>SELECT t1.a, t2.b FROM t2, (SELECT x+y AS a FROM t1 WHERE z&lt;100) WHERE a&gt;5
+</pre></div>
+<p>
+  Would be rewritten using query flattening as:
+
+</p><div class="codeblock"><pre>SELECT t1.x+t1.y AS a, t2.b FROM t2, t1 WHERE z&lt;100 AND a&gt;5
+</pre></div>
+<p>
+  There is a long list of conditions that must all be met in order for
+  query flattening to occur.  Some of the constraints are marked as 
+  obsolete by italic text.  These extra constraints are retained in the
+  documentation to preserve the numbering of the other constraints.
+
+</p><p>
+  Casual readers are not expected to understand all of these rules.
+  A key take-away from this section is that the rules for determining
+  if query flatting is safe or unsafe are subtle and
+  complex.  There have been multiple bugs over the years caused by
+  over-aggressive query flattening.  On the other hand, performance
+  of complex queries and/or queries involving views tends to suffer
+  if query flattening is more conservative.
+
+</p><p>
+  </p><ol>
+  <li value="1">  <i>(Obsolete.  Query flattening is no longer
+                      attempted for aggregate subqueries.)</i>
+
+  </li><li value="2">  <i>(Obsolete.  Query flattening is no longer
+                      attempted for aggregate subqueries.)</i>
+
+  </li><li value="3">
+  If the subquery is the right operand of a LEFT JOIN then
+   <ol type="a"><li> the subquery may not be a join, and
+   </li><li> the FROM clause of the subquery may
+   not contain a virtual table, and
+   </li><li> the outer query may not be an aggregate.</li></ol></li>
+
+  <li value="4">  The subquery is not DISTINCT.
+
+  </li><li value="5"> <i>(Subsumed into constraint 4)</i>
+
+  </li><li value="6"> <i>(Obsolete.  Query flattening is no longer
+                      attempted for aggregate subqueries.)</i>
+
+  </li><li value="7">
+  The subquery has a FROM clause.
+
+  </li><li value="8">
+  The subquery does not use LIMIT or the outer query is not a join.
+
+  </li><li value="9">
+  The subquery does not use LIMIT or the outer query does not use
+  aggregates.
+
+  </li><li value="10"> <i>(Restriction relaxed in 2005)</i>
+
+  </li><li value="11">
+  The subquery and the outer query do not both have ORDER BY clauses.
+
+  </li><li value="12"> <i>(Subsumed into constraint 3)</i>
+
+  </li><li value="13">  The subquery and outer query do not both use LIMIT.
+
+  </li><li value="14">  The subquery does not use OFFSET.
+
+  </li><li value="15">
+  If the outer query is part of a compound select, then the
+  subquery may not have a LIMIT clause.
+
+  </li><li value="16">
+  If the outer query is an aggregate, then the subquery may
+  not contain ORDER BY. 
+
+  </li><li value="17">
+  If the sub-query is a compound SELECT, then
+  <ol type="a">
+  <li> all compound operators must be UNION ALL, and
+  </li><li> no terms with the subquery compound may be aggregate
+       or DISTINCT, and
+  </li><li> every term within the subquery must have a FROM clause, and
+  </li><li> the outer query may not be an aggregate, DISTINCT query, or join.
+  </li></ol>
+
+  The parent and sub-query may contain WHERE clauses. Subject to
+  rules (11), (12) and (13), they may also contain ORDER BY,
+  LIMIT and OFFSET clauses.
+
+  </li><li value="18">
+  If the sub-query is a compound select, then all terms of the
+  ORDER by clause of the parent must be simple references to 
+  columns of the sub-query.
+
+  </li><li value="19">
+  If the subquery uses LIMIT then the outer query may not
+  have a WHERE clause.
+
+  </li><li value="20">
+  If the sub-query is a compound select, then it must not use
+   an ORDER BY clause.
+
+  </li><li value="21">
+  If the subquery uses LIMIT, then the outer query may not be
+  DISTINCT.
+
+  </li><li value="22"> The subquery may not be a recursive CTE.
+
+  </li><li value="23"> <i>(Subsumed into constraint 17d.)</i>
+
+  </li><li value="24"> <i>(Obsolete. Query flattening is no longer
+                      attempted for aggregate subqueries.)</i>
+  </li></ol>
+
+
+<p>
+  Query flattening is an important optimization when views are used as
+  each use of a view is translated into a subquery.
+
+<a name="coroutines"></a>
+
+</p><h1 id="subquery_co_routines"><span>12. </span>Subquery Co-routines</h1>
+
+<p>
+  Prior to SQLite 3.7.15 (2012-12-12),
+  a subquery in the FROM clause would be
+  either flattened into the outer query, or else the subquery would be run
+  to completion
+  before the outer query started, the result set from the subquery
+  would be stored in a transient table,
+  and then the transient table would be used in the outer query.  Newer
+  versions of SQLite have a third option, which is to implement the subquery
+  using a co-routine.
+
+</p><p>
+  A co-routine is like a subroutine in that it runs in the same thread
+  as the caller and eventually returns control back to the caller.  The
+  difference is that a co-routine also has the ability to return
+  before it has finished, and then resume where it left off the next
+  time it is called.
+
+</p><p>
+  When a subquery is implemented as a co-routine, byte-code is generated
+  to implement the subquery as if it were a standalone query, except
+  instead of returning rows of results back to the application, the
+  co-routine yields control back to the caller after each row is computed.
+  The caller can then use that one computed row as part of its computation,
+  then invoke the co-routine again when it is ready for the next row.
+
+</p><p>
+  Co-routines are better than storing the complete result set of the subquery
+  in a transient table because co-routines use less memory.  With a co-routine,
+  only a single row of the result needs to be remembered, whereas all rows of
+  the result must be stored for a transient table.  Also, because the
+  co-routine does not need to run to completion before the outer query
+  begins its work, the first rows of output can appear much sooner, and if
+  the overall query is abandoned before it has finished, less work is done
+  overall.
+
+</p><p>
+  On the other hand, if the result of the subquery must be scanned multiple
+  times (because, for example, it is just one table in a join) then it
+  is better to use a transient table to remember the entire result of the
+  subquery, in order to avoid computing the subquery more than once.
+
+<a name="deferred_work"></a>
+
+</p><h2 id="using_co_routines_to_defer_work_until_after_the_sorting"><span>12.1. </span>Using Co-routines to Defer Work until after the Sorting</h2>
+
+<p>
+  As of SQLite version 3.21.0 (2017-10-24), the query planner will
+  always prefer to use a co-routine to implement FROM-clause subqueries 
+  that contains an ORDER BY clause and that are not part of a join when
+  the result set of the outer query is "complex".  This feature allows
+  applications to shift expensive computations from before the
+  sorter until after the sorter, which can result in faster operation.
+  For example, consider this query:
+
+</p><div class="codeblock"><pre>SELECT expensive_function(a) FROM tab ORDER BY date DESC LIMIT 5;
+</pre></div>
+<p>
+  The goal of this query is to compute some value for the five most
+  recent entries in the table.  In the query above, the
+  "expensive_function()" is invoked prior to the sort and thus is
+  invoked on every row of the table, even
+  rows that are ultimately omitted due to the LIMIT clause.
+  A co-routine can be used to work around this:
+
+</p><div class="codeblock"><pre>SELECT expensive_function(a) FROM (
+  SELECT a FROM tab ORDER BY date DESC LIMIT 5
+);
+</pre></div>
+<p>
+  In the revised query, the subquery implemented by a co-routine computes
+  the five most recent values for "a".  Those five values are passed from the
+  co-routine up into the outer query where the "expensive_function()" is
+  invoked on only the specific rows that the application cares about.
+
+</p><p>
+  The query planner in future versions of SQLite might grow smart enough
+  to make transformations such as the above automatically, in both directions.
+  That is to say, future versions of SQLite might transform queries of the
+  first form into the second, or queries written the second way into the
+  first.  As of SQLite version 3.22.0 (2018-01-22), the query planner
+  will flatten the subquery if the outer query does not make use of any
+  user-defined functions or subqueries in its result set.  For the examples
+  shown above, however, SQLite implements each of the queries as
+  written.
+
+<a name="minmax"></a>
+
+</p><h1 id="the_min_max_optimization"><span>13. </span>The MIN/MAX Optimization</h1>
+
+<p>
+  Queries that contain a single MIN() or MAX() aggregate function whose
+  argument is the left-most column of an index might be satisfied
+  by doing a single index lookup rather than by scanning the entire table.
+  Examples:
+
+</p><div class="codeblock"><pre>SELECT MIN(x) FROM table;
+SELECT MAX(x)+1 FROM table;
+</pre></div>
+
+<a name="autoindex"></a>
+
+<h1 id="automatic_indexes"><span>14. </span>Automatic Indexes</h1>
+
+<p>
+  When no indexes are available to aid the evaluation of a query, SQLite
+  might create an automatic index that lasts only for the duration
+  of a single SQL statement.
+  Since the cost of constructing the automatic index is
+  O(NlogN) (where N is the number of entries in the table) and the cost of
+  doing a full table scan is only O(N), an automatic index will
+  only be created if SQLite expects that the lookup will be run more than
+  logN times during the course of the SQL statement. Consider an example:
+
+</p><div class="codeblock"><pre>CREATE TABLE t1(a,b);
+CREATE TABLE t2(c,d);
+-- Insert many rows into both t1 and t2
+SELECT * FROM t1, t2 WHERE a=c;
+</pre></div>
+<p>
+  In the query above, if both t1 and t2 have approximately N rows, then
+  without any indexes the query will require O(N*N) time.  On the other
+  hand, creating an index on table t2 requires O(NlogN) time and using
+  that index to evaluate the query requires an additional O(NlogN) time.
+  In the absence of <a href="lang_analyze.html">ANALYZE</a> information, SQLite guesses that N is one
+  million and hence it believes that constructing the automatic index will
+  be the cheaper approach.
+
+</p><p>
+  An automatic index might also be used for a subquery:
+
+</p><div class="codeblock"><pre>CREATE TABLE t1(a,b);
+CREATE TABLE t2(c,d);
+-- Insert many rows into both t1 and t2
+SELECT a, (SELECT d FROM t2 WHERE c=b) FROM t1;
+</pre></div>
+<p>
+  In this example, the t2 table is used in a subquery to translate values
+  of the t1.b column.  If each table contains N rows, SQLite expects that
+  the subquery will run N times, and hence it will believe it is faster
+  to construct an automatic, transient index on t2 first and then use
+  that index to satisfy the N instances of the subquery.
+
+</p><p>
+  The automatic indexing capability can be disabled at run-time using
+  the <a href="pragma.html#pragma_automatic_index">automatic_index pragma</a>.  Automatic indexing is turned on by
+  default, but this can be changed so that automatic indexing is off
+  by default using the <a href="compile.html#default_automatic_index">SQLITE_DEFAULT_AUTOMATIC_INDEX</a> compile-time option.
+  The ability to create automatic indexes can be completely disabled by
+  compiling with the <a href="compile.html#omit_automatic_index">SQLITE_OMIT_AUTOMATIC_INDEX</a> compile-time option.
+
+</p><p>
+  In SQLite <a href="releaselog/3_8_0.html">version 3.8.0</a> (2013-08-26) and later, 
+  an <a href="rescode.html#warning_autoindex">SQLITE_WARNING_AUTOINDEX</a> message is sent
+  to the <a href="errlog.html">error log</a> every time a statement is prepared that uses an
+  automatic index.  Application developers can and should use these warnings
+  to identify the need for new persistent indexes in the schema.
+
+</p><p>
+  Do not confuse automatic indexes with the <a href="fileformat2.html#intschema">internal indexes</a> (having names
+  like "sqlite_autoindex_<i>table</i>_<i>N</i>") that are sometimes
+  created to implement a <a href="lang_createtable.html#primkeyconst">PRIMARY KEY constraint</a> or <a href="lang_createtable.html#uniqueconst">UNIQUE constraint</a>.
+  The automatic indexes described here exist only for the duration of a
+  single query, are never persisted to disk, and are only visible to a
+  single database connection.  Internal indexes are part of the implementation
+  of PRIMARY KEY and UNIQUE constraints, are long-lasting and persisted
+  to disk, and are visible to all database connections.  The term "autoindex"
+  appears in the names of <a href="fileformat2.html#intschema">internal indexes</a> for legacy reasons and does
+  not indicate that internal indexes and automatic indexes are related.
+
+<a name="hashjoin"></a>
+
+</p><h2 id="hash_joins"><span>14.1. </span>Hash Joins</h2>
+
+<p>
+ An automatic index is about the same thing as a
+ <a href="https://en.wikipedia.org/wiki/Hash_join">hash join</a>.  The only difference
+ is that a B-Tree is used instead of a hash table.  If you are willing to
+ say that the transient B-Tree constructed for an automatic index is
+ really just a fancy hash table, then a query that uses an automatic
+ index is just a hash join.
+
+</p><p>
+ SQLite constructs a transient index instead of a hash table in this
+ instance because it already has a robust and high performance B-Tree
+ implementation at hand, whereas a hash-table would need to be added.
+ Adding a separate hash table implementation to handle this one case
+ would increase the size of the library (which is designed for use on
+ low-memory embedded devices) for minimal performance gain.  SQLite might
+ be enhanced with a hash-table implementation someday, but for now it seems
+ better to continue using automatic indexes in cases where client/server
+ database engines might use a hash join.
+
+<a name="pushdown"></a>
+
+</p><h1 id="the_push_down_optimization"><span>15. </span>The Push-Down Optimization</h1>
+
+<p>
+  If a subquery cannot be <a href="optoverview.html#flattening">flattened</a> into the outer query, it might
+  still be possible to enhance performance by "pushing down" WHERE clause
+  terms from the outer query into the subquery.  Consider an example:
+
+</p><div class="codeblock"><pre>CREATE TABLE t1(a INT, b INT);
+CREATE TABLE t2(x INT, y INT);
+CREATE VIEW v1(a,b) AS SELECT DISTINCT a, b FROM t1;
+
+SELECT x, y, b
+  FROM t2 JOIN v1 ON (x=a)
+ WHERE b BETWEEN 10 AND 20;
+</pre></div>
+
+<p>
+  The view v1 cannot be <a href="optoverview.html#flattening">flattened</a> because it is DISTINCT.  It must
+  instead be run as a subquery with the results being stored in a
+  transient table, then the join is performed between t2 and the
+  transient table.  The push-down optimization pushes down the
+  "b BETWEEN 10 AND 20" term into the view.  This makes the transient
+  table smaller, and helps the subquery to run faster if there
+  is an index on t1.b.  The resulting evaluation is like this:
+
+</p><div class="codeblock"><pre>SELECT x, y, b
+  FROM t2
+  JOIN (SELECT DISTINCT a, b FROM t1 WHERE b BETWEEN 10 AND 20)
+ WHERE b BETWEEN 10 AND 20;
+</pre></div>
+
+<p>
+  The push-down optimization cannot always be used.  For example,
+  if the subquery contains a LIMIT, then pushing down any part of
+  the WHERE clause from the outer query could change the result of
+  the inner query.  There are other restrictions, explained in a
+  comment in the source code on the pushDownWhereTerms() routine
+  that implements this optimization.
+
+<a name="leftjoinreduction"></a>
+
+</p><h1 id="the_outer_join_strength_reduction_optimization"><span>16. </span>The OUTER JOIN Strength Reduction Optimization</h1>
+
+<p>
+  An OUTER JOIN (either a LEFT JOIN, a RIGHT JOIN, or a FULL JOIN)
+  can sometimes be simplified.  A LEFT or RIGHT JOIN can be converted
+  into an ordinary (INNER) JOIN, or a FULL JOIN might be converted into
+  either a LEFT or a RIGHT JOIN.  This can happen if there are terms
+  in the WHERE clause that guarantee the same result after simplification.
+  For example, if any
+  column in the right-hand table of the LEFT JOIN must be non-NULL
+  in order for the WHERE clause to be true, then the LEFT JOIN is
+  demoted to an ordinary JOIN.
+
+</p><p>
+  The theorem prover that determines whether a join can be simplified is
+  imperfect.  It sometimes returns a false negative.  In other words,
+  it sometimes fails to prove that reducing the strength of an OUTER JOIN
+  is safe when in fact it is safe.
+  For example, the prover does not know
+  the <a href="lang_datefunc.html#dttm">datetime() SQL function</a> will always return NULL if its first
+  argument is NULL, and so it will not recognize that the LEFT JOIN
+  in the following query could be strength-reduced:
+
+</p><div class="codeblock"><pre>SELECT urls.url
+  FROM urls
+  LEFT JOIN
+    (SELECT *
+      FROM (SELECT url_id AS uid, max(retrieval_time) AS rtime
+              FROM lookups GROUP BY 1 ORDER BY 1)
+      WHERE uid IN (358341,358341,358341)
+    ) recent
+    ON u.source_seed_id = recent.xyz OR u.url_id = recent.xyz
+ WHERE
+     DATETIME(recent.rtime) > DATETIME('now', '-5 days');
+</pre></div>
+
+<p>
+  It is possible that future enhancements to the prover might enable it
+  to recognize that NULL inputs to certain built-in functions
+  always result in a NULL answer.  However, not all built-in
+  functions have that property (for example <a href="lang_corefunc.html#coalesce">coalesce()</a>) and, of
+  course, the prover will never be able to reason about
+  <a href="appfunc.html">application-defined SQL functions</a>.
+
+<a name="omitnoopjoin"></a>
+
+</p><h1 id="the_omit_outer_join_optimization"><span>17. </span>The Omit OUTER JOIN Optimization</h1>
+
+<p>
+  Sometimes a LEFT or RIGHT JOIN can be completely omitted from a query without
+  changing the result.  This can happen if all of the following are
+  true:
+
+</p><p>
+  </p><ol>
+  <li> The query is not an aggregate
+  </li><li> Either the query is DISTINCT or else the ON or USING clause
+       on the OUTER JOIN constrains the join such that it matches
+       only a single row
+  </li><li> The right-hand table of the LEFT JOIN or the left-hand table of
+       a RIGHT JOIN is not be used anywhere
+       in the query outside of its own USING or ON clause.
+  </li></ol>
+
+<p>
+  OUTER JOIN elimination often comes up when OUTER JOINs are used
+  inside of views, and then the view is used in such as way that
+  none of the columns on the right-hand table of the LEFT JOIN or
+  on the left-hand table of a RIGHT JOIN are referenced.
+
+</p><p>
+  Here is a simple example of omitting a LEFT JOIN:
+
+</p><div class="codeblock"><pre>CREATE TABLE t1(ipk INTEGER PRIMARY KEY, v1);
+CREATE TABLE t2(ipk INTEGER PRIMARY KEY, v2);
+CREATE TABLE t3(ipk INTEGER PRIMARY KEY, v3);
+
+SELECT v1, v3 FROM t1 
+  LEFT JOIN t2 ON (t1.ipk=t2.ipk)
+  LEFT JOIN t3 ON (t1.ipk=t3.ipk)
+</pre></div>
+
+<p>
+  The t2 table is completely unused in the query above, and so the
+  query planner is able to implement the query as if it were written:
+
+</p><div class="codeblock"><pre>SELECT v1, v3 FROM t1 
+  LEFT JOIN t3 ON (t1.ipk=t3.ipk)
+</pre></div>
+
+<p>
+  As of this writing, only LEFT JOINs are eliminated.  This optimize
+  has not yet been generalized to work with RIGHT JOINs as RIGHT JOIN
+  is a relatively new addition to SQLite.  That asymmetry will probably
+  be corrected in a future release.
+
+
+<a name="constprop"></a>
+
+</p><h1 id="the_constant_propagation_optimization"><span>18. </span>The Constant Propagation Optimization</h1>
+
+<p>
+  When a WHERE clause contains two or more equality constraints connected
+  by the AND operator such that all of the <a href="datatype3.html#affinity">affinities</a> of the various
+  constraints are the same, then SQLite might use the transitive property
+  of equality to construct new "virtual" constraints that can be used to
+  simplify expressions and/or improve performance.  This is called the
+  "constant-propagation optimization".
+
+</p><p>
+  For example, consider the following schema and query:
+
+</p><div class="codeblock"><pre>CREATE TABLE t1(a INTEGER PRIMARY KEY, b INT, c INT);
+SELECT * FROM t1 WHERE a=b AND b=5;
+</pre></div>
+
+<p>
+  SQLite looks at the "a=b" and "b=5" constraints and deduces that
+  if those two constraints are true, then it must also be the case
+  that "a=5" is true.  This means that the desired row can be looked up
+  quickly using a value of 5 for the INTEGER PRIMARY KEY.
+</p><p align="center"><small><i>This page last modified on  <a href="https://sqlite.org/docsrc/honeypot" id="mtimelink"  data-href="https://sqlite.org/docsrc/finfo/pages/optoverview.in?m=be9cc8690c">2024-01-13 19:04:50</a> UTC </small></i></p>
+