summaryrefslogtreecommitdiffstats
path: root/www/howitworks.html
diff options
context:
space:
mode:
Diffstat (limited to 'www/howitworks.html')
-rw-r--r--www/howitworks.html387
1 files changed, 387 insertions, 0 deletions
diff --git a/www/howitworks.html b/www/howitworks.html
new file mode 100644
index 0000000..a313691
--- /dev/null
+++ b/www/howitworks.html
@@ -0,0 +1,387 @@
+<!DOCTYPE html>
+<html><head>
+<meta name="viewport" content="width=device-width, initial-scale=1.0">
+<meta http-equiv="content-type" content="text/html; charset=UTF-8">
+<link href="sqlite.css" rel="stylesheet">
+<title>How SQLite Works</title>
+<!-- path= -->
+</head>
+<body>
+<div class=nosearch>
+<a href="index.html">
+<img class="logo" src="images/sqlite370_banner.gif" alt="SQLite" border="0">
+</a>
+<div><!-- IE hack to prevent disappearing logo --></div>
+<div class="tagline desktoponly">
+Small. Fast. Reliable.<br>Choose any three.
+</div>
+<div class="menu mainmenu">
+<ul>
+<li><a href="index.html">Home</a>
+<li class='mobileonly'><a href="javascript:void(0)" onclick='toggle_div("submenu")'>Menu</a>
+<li class='wideonly'><a href='about.html'>About</a>
+<li class='desktoponly'><a href="docs.html">Documentation</a>
+<li class='desktoponly'><a href="download.html">Download</a>
+<li class='wideonly'><a href='copyright.html'>License</a>
+<li class='desktoponly'><a href="support.html">Support</a>
+<li class='desktoponly'><a href="prosupport.html">Purchase</a>
+<li class='search' id='search_menubutton'>
+<a href="javascript:void(0)" onclick='toggle_search()'>Search</a>
+</ul>
+</div>
+<div class="menu submenu" id="submenu">
+<ul>
+<li><a href='about.html'>About</a>
+<li><a href='docs.html'>Documentation</a>
+<li><a href='download.html'>Download</a>
+<li><a href='support.html'>Support</a>
+<li><a href='prosupport.html'>Purchase</a>
+</ul>
+</div>
+<div class="searchmenu" id="searchmenu">
+<form method="GET" action="search">
+<select name="s" id="searchtype">
+<option value="d">Search Documentation</option>
+<option value="c">Search Changelog</option>
+</select>
+<input type="text" name="q" id="searchbox" value="">
+<input type="submit" value="Go">
+</form>
+</div>
+</div>
+<script>
+function toggle_div(nm) {
+var w = document.getElementById(nm);
+if( w.style.display=="block" ){
+w.style.display = "none";
+}else{
+w.style.display = "block";
+}
+}
+function toggle_search() {
+var w = document.getElementById("searchmenu");
+if( w.style.display=="block" ){
+w.style.display = "none";
+} else {
+w.style.display = "block";
+setTimeout(function(){
+document.getElementById("searchbox").focus()
+}, 30);
+}
+}
+function div_off(nm){document.getElementById(nm).style.display="none";}
+window.onbeforeunload = function(e){div_off("submenu");}
+/* Disable the Search feature if we are not operating from CGI, since */
+/* Search is accomplished using CGI and will not work without it. */
+if( !location.origin || !location.origin.match || !location.origin.match(/http/) ){
+document.getElementById("search_menubutton").style.display = "none";
+}
+/* Used by the Hide/Show button beside syntax diagrams, to toggle the */
+function hideorshow(btn,obj){
+var x = document.getElementById(obj);
+var b = document.getElementById(btn);
+if( x.style.display!='none' ){
+x.style.display = 'none';
+b.innerHTML='show';
+}else{
+x.style.display = '';
+b.innerHTML='hide';
+}
+return false;
+}
+var antiRobot = 0;
+function antiRobotGo(){
+if( antiRobot!=3 ) return;
+antiRobot = 7;
+var j = document.getElementById("mtimelink");
+if(j && j.hasAttribute("data-href")) j.href=j.getAttribute("data-href");
+}
+function antiRobotDefense(){
+document.body.onmousedown=function(){
+antiRobot |= 2;
+antiRobotGo();
+document.body.onmousedown=null;
+}
+document.body.onmousemove=function(){
+antiRobot |= 2;
+antiRobotGo();
+document.body.onmousemove=null;
+}
+setTimeout(function(){
+antiRobot |= 1;
+antiRobotGo();
+}, 100)
+antiRobotGo();
+}
+antiRobotDefense();
+</script>
+<div class=fancy>
+<div class=nosearch>
+<div class="fancy_title">
+How SQLite Works
+</div>
+<div class="fancy_toc">
+<a onclick="toggle_toc()">
+<span class="fancy_toc_mark" id="toc_mk">&#x25ba;</span>
+Table Of Contents
+</a>
+<div id="toc_sub"><div class="fancy-toc1"><a href="#background">1. Background</a></div>
+<div class="fancy-toc2"><a href="#sqlite_is_different_from_most_other_sql_databases">1.1. SQLite Is Different From Most Other SQL Databases</a></div>
+<div class="fancy-toc1"><a href="#sql_is_a_programming_language">2. SQL Is A Programming Language</a></div>
+<div class="fancy-toc2"><a href="#programming_language_processing_steps">2.1. Programming Language Processing Steps</a></div>
+<div class="fancy-toc2"><a href="#compiling_sqlite_programs">2.2. Compiling SQLite Programs</a></div>
+<div class="fancy-toc1"><a href="#further_reading">3. Further Reading</a></div>
+</div>
+</div>
+<script>
+function toggle_toc(){
+var sub = document.getElementById("toc_sub")
+var mk = document.getElementById("toc_mk")
+if( sub.style.display!="block" ){
+sub.style.display = "block";
+mk.innerHTML = "&#x25bc;";
+} else {
+sub.style.display = "none";
+mk.innerHTML = "&#x25ba;";
+}
+}
+</script>
+</div>
+
+
+
+
+
+<h1 id="background"><span>1. </span>Background</h1>
+
+<p>
+SQLite is a <a href="https://en.wikipedia.org/wiki/Library_(computing)">software library</a>
+that translates high-level disk I/O requests generated by an application
+into low-level I/O operations that
+can be carried out by the operating system. The application constructs
+high-level I/O requests using the
+<a href="https://en.wikipedia.org/wiki/SQL">SQL language</a>.
+SQLite translates each high-level SQL statement into a sequence of
+many low-level I/O requests (open a file, read a few bytes from a file,
+write a few bytes into a file, etc.) that do the work requested by
+the SQL.
+
+
+</p><p>
+An application program could do all its disk I/O by direct calls to
+operating system I/O routines
+or by using a key/value storage engine like
+<a href="https://en.wikipedia.org/wiki/Berkeley_DB">Berkeley DB</a> or
+<a href="https://en.wikipedia.org/wiki/RocksDB">RocksDB</a> (to name but two).
+But there are advantages to using a higher-level interface based
+on the SQL language.
+
+</p><ol>
+<li><p>
+SQL is a very high-level language.
+A few lines of SQL can replace hundreds or thousands of lines of
+procedural code. SQL thus reduces
+the amount of work needed to develop and maintain the application,
+and thereby helps to reduce the number of bugs in the application.
+
+</p></li><li><p>
+SQL and SQLite are
+<a href="https://en.wikipedia.org/wiki/Database_transaction">transactional</a>.
+The use of a transactional storage system makes it much easier to
+reason about the behavior of the application, and to write applications
+that are robust, even in the face of software bugs, hardware faults,
+or power losses.
+
+</p></li><li><p>
+SQLite is often
+<a href="fasterthanfs.html">faster than direct low-level I/O</a>.
+This is counterintuitive. One would expect that a
+high-level interface such as SQLite would impose a run-time penalty.
+And, theoretically, that is correct. But in practice, SQL-based
+systems such as SQLite do so many behind-the-scenes optimizations
+that an application developer would never have time to create and
+maintain, that the SQL-based systems end up providing
+a net performance gain.
+</p></li></ol>
+
+<h2 id="sqlite_is_different_from_most_other_sql_databases"><span>1.1. </span>SQLite Is Different From Most Other SQL Databases</h2>
+
+<p>
+There are many SQL-based database management systems available, besides
+SQLite. Common options include MySQL, PostgreSQL, and SQL-Server.
+All these systems use the SQL langauge to communicate with the
+application, just like SQLite. But these other systems different
+from SQLite in important respects.
+
+</p><ol>
+<li><p>
+SQLite is a <a href="serverless.html">serverless</a> software library, whereas the other
+systems are client-server based. With MySQL, PostgreSQL, SQL-Server,
+and others, the application sends a message containing some SQL
+over to a separate server thread or process. That separate
+thread or process performs the requested I/O, then send the results
+back to the application. But there is no separate thread or
+process with SQLite. SQLite runs in the same address space as
+the application, using the same program counter and heap storage.
+SQLite does no interprocess communication (IPC). When an application
+sends an SQL statement into SQLite (by invoking a the appropriate
+SQLite library subroutine), SQLite interprets the SQL
+in the same thread as the caller. When an SQLite API routine
+returns, it does not leave behind any background tasks that
+run separately from the application.
+
+</p></li><li><p>
+An SQLite database is a single ordinary file on disk
+(with a <a href="fileformat2.html">well-defined file format</a>). With other
+systems, a "database" is usually a large number of separate
+files hidden away in obscure directories of the filesystem, or
+even spread across multiple machines. But with SQLite, a
+complete database is just an ordinary disk file.
+</p></li></ol>
+
+<h1 id="sql_is_a_programming_language"><span>2. </span>SQL Is A Programming Language</h1>
+
+<p>
+The best way to understand how SQL database engines work is to
+think of SQL as a programming language, not as a "query language".
+Each SQL statement is a separate program. Applications construct
+SQL program source files and send them to the database engine.
+The database engine compiles the SQL source code into executable
+form, runs that executable, then sends the result back to the
+application.
+
+</p><p>
+While SQL is a programming language, it is different from other
+programming languages like C, Javascript, Python, or Go in that
+SQL is a
+<a href="https://en.wikipedia.org/wiki/Declarative_programming">declarative language</a>
+where the others are
+<a href="https://en.wikipedia.org/wiki/Imperative_programming">imperative languages</a>.
+This is an important difference that has implications for the
+design of the compiler used to translate program source text into an
+executable format. However, those details
+should not detract from the fact that
+SQL is really just another programming language.
+
+</p><h2 id="programming_language_processing_steps"><span>2.1. </span>Programming Language Processing Steps</h2>
+
+<p>
+All programming languages are processed in two steps:
+
+</p><ol>
+<li><p>
+Translate the program source text into an executable format.
+</p></li><li><p>
+Run the executable generated in the previous step in order to
+carry out the desired action.
+</p></li></ol>
+
+<p>
+All programming languages uses those two basic steps.
+The main difference is in the executable format.
+
+</p><p>
+"Compiled" languages like C++ and Rust translate
+the source text into machine code that can be directly executed
+by the underlying hardware. There exist SQL database systems
+that do the same with SQL - they translate each SQL statement
+directly into machine code. But that approach is uncommon and
+is not the approach taken by SQLite.
+
+</p><p>
+Other languages like Java, Perl, Python, and TCL
+typically translate the program source text into bytecode.
+This bytecode is then run through an interpreter that
+reads the bytecode and carries out the desired operations.
+SQLite uses this bytecode approach. If you preceed
+any SQL statement with the "<a href="lang_explain.html">EXPLAIN</a>" keyword in SQLite,
+it will show you the bytecode that is generated rather
+than run the bytecode.
+
+</p><p>
+Another approach is to translate the program source text
+into a tree of objects in memory. This tree is the "executable".
+An interpret runs the executable by walking the tree.
+This is the technique used by MySQL, PostgreSQL, and
+SQL-Server.
+
+</p><p>
+Of course, not every language fits neatly into one of the
+above categories. This applies to both SQL database engines
+and more familiar imperative programming languages. Javascript
+is famous for using a hybrid execution model, where the code
+is initially compiled into a tree of objects, but might be
+further translating (using just-in-time compilation) down into
+more efficient bytecode or machine code, as a means of boosting
+performance.
+
+</p><p>
+The executable format really ends up being just an implementation
+detail. The key point is that all languages have a compiler
+step which translates programs into an executable format and
+a run step that executes the compiled program.
+
+
+</p><h2 id="compiling_sqlite_programs"><span>2.2. </span>Compiling SQLite Programs</h2>
+
+<p>
+When an SQL program is submitted to SQLite, the first step is
+to split the source text into "tokens". A token might be:
+
+</p><ul>
+<li>A language keyword like "SELECT" or "UPDATE".
+</li><li>An identifier for a table or column or variable.
+</li><li>Punctuation characters like "," or "==" or ";".
+</li><li>Literal values: numeric or string constants.
+</li><li>Whitespace or comments.
+</li></ul>
+
+<p>
+Whitespace and comment tokens are discarded. All other tokens
+are fed into an
+<a href="https://en.wikipedia.org/wiki/LALR_parser">LALR(1) Parser</a>
+that analyzes the structure of the input program and generates an
+<a href="https://en.wikipedia.org/wiki/Abstract_syntax_tree">Abstract Syntax Tree (AST)</a>
+for the input program.
+
+</p><p>
+The parser forwards the AST on to the code generator.
+The code generator is the heart of SQLite, and is where most of
+the magic happens.
+The code generator resolves symbolic names in the AST - matching
+the names of columns and tables in the input SQL into actual
+columns and tables of the database. The code generator also does
+various transformations on the AST to "optimize" it. Finally
+the code generator chooses appropriate algorithms to implement
+the operations requested by the AST and constructs bytecode to
+carry out those operations.
+
+</p><p>
+The bytecode generated by the code generator is called a
+"<a href="c3ref/stmt.html">prepared statement</a>". Translating SQL source text into a
+prepared statement is analogous to converting a C++ program
+into machine code by invoking gcc or clang. Human-readable
+source text (SQL or C++) goes in, and a machine readable executable
+(bytecode or machine code) comes out.
+
+</p><h1 id="further_reading"><span>3. </span>Further Reading</h1>
+
+<ul>
+<li><p>
+The <a href="atomiccommit.html">Atomic Commit</a> document describes how SQLite implements
+transactions.
+
+</p></li><li><p>
+The <a href="opcode.html">bytecode engine</a> document has more information
+on the bytecode format used by SQLite, and how to view and interpret
+an SQLite prepared statement.
+
+</p></li><li><p>
+The <a href="optoverview.html">SQLite query planner</a> and
+<a href="queryplanner-ng.html">Next Generation Query Planner</a> documents have further details
+on the algorithms SQLite uses to implement SQL statements and
+how it goes above choosing an appropriate algorithm for each
+individual SQL statement.
+</p></li></ul>
+
+