summaryrefslogtreecommitdiffstats
path: root/www/howitworks.html
blob: a3136910b19f69030a497d7036c5a815f3952a71 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
<!DOCTYPE html>
<html><head>
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta http-equiv="content-type" content="text/html; charset=UTF-8">
<link href="sqlite.css" rel="stylesheet">
<title>How SQLite Works</title>
<!-- path= -->
</head>
<body>
<div class=nosearch>
<a href="index.html">
<img class="logo" src="images/sqlite370_banner.gif" alt="SQLite" border="0">
</a>
<div><!-- IE hack to prevent disappearing logo --></div>
<div class="tagline desktoponly">
Small. Fast. Reliable.<br>Choose any three.
</div>
<div class="menu mainmenu">
<ul>
<li><a href="index.html">Home</a>
<li class='mobileonly'><a href="javascript:void(0)" onclick='toggle_div("submenu")'>Menu</a>
<li class='wideonly'><a href='about.html'>About</a>
<li class='desktoponly'><a href="docs.html">Documentation</a>
<li class='desktoponly'><a href="download.html">Download</a>
<li class='wideonly'><a href='copyright.html'>License</a>
<li class='desktoponly'><a href="support.html">Support</a>
<li class='desktoponly'><a href="prosupport.html">Purchase</a>
<li class='search' id='search_menubutton'>
<a href="javascript:void(0)" onclick='toggle_search()'>Search</a>
</ul>
</div>
<div class="menu submenu" id="submenu">
<ul>
<li><a href='about.html'>About</a>
<li><a href='docs.html'>Documentation</a>
<li><a href='download.html'>Download</a>
<li><a href='support.html'>Support</a>
<li><a href='prosupport.html'>Purchase</a>
</ul>
</div>
<div class="searchmenu" id="searchmenu">
<form method="GET" action="search">
<select name="s" id="searchtype">
<option value="d">Search Documentation</option>
<option value="c">Search Changelog</option>
</select>
<input type="text" name="q" id="searchbox" value="">
<input type="submit" value="Go">
</form>
</div>
</div>
<script>
function toggle_div(nm) {
var w = document.getElementById(nm);
if( w.style.display=="block" ){
w.style.display = "none";
}else{
w.style.display = "block";
}
}
function toggle_search() {
var w = document.getElementById("searchmenu");
if( w.style.display=="block" ){
w.style.display = "none";
} else {
w.style.display = "block";
setTimeout(function(){
document.getElementById("searchbox").focus()
}, 30);
}
}
function div_off(nm){document.getElementById(nm).style.display="none";}
window.onbeforeunload = function(e){div_off("submenu");}
/* Disable the Search feature if we are not operating from CGI, since */
/* Search is accomplished using CGI and will not work without it. */
if( !location.origin || !location.origin.match || !location.origin.match(/http/) ){
document.getElementById("search_menubutton").style.display = "none";
}
/* Used by the Hide/Show button beside syntax diagrams, to toggle the */
function hideorshow(btn,obj){
var x = document.getElementById(obj);
var b = document.getElementById(btn);
if( x.style.display!='none' ){
x.style.display = 'none';
b.innerHTML='show';
}else{
x.style.display = '';
b.innerHTML='hide';
}
return false;
}
var antiRobot = 0;
function antiRobotGo(){
if( antiRobot!=3 ) return;
antiRobot = 7;
var j = document.getElementById("mtimelink");
if(j && j.hasAttribute("data-href")) j.href=j.getAttribute("data-href");
}
function antiRobotDefense(){
document.body.onmousedown=function(){
antiRobot |= 2;
antiRobotGo();
document.body.onmousedown=null;
}
document.body.onmousemove=function(){
antiRobot |= 2;
antiRobotGo();
document.body.onmousemove=null;
}
setTimeout(function(){
antiRobot |= 1;
antiRobotGo();
}, 100)
antiRobotGo();
}
antiRobotDefense();
</script>
<div class=fancy>
<div class=nosearch>
<div class="fancy_title">
How SQLite Works
</div>
<div class="fancy_toc">
<a onclick="toggle_toc()">
<span class="fancy_toc_mark" id="toc_mk">&#x25ba;</span>
Table Of Contents
</a>
<div id="toc_sub"><div class="fancy-toc1"><a href="#background">1. Background</a></div>
<div class="fancy-toc2"><a href="#sqlite_is_different_from_most_other_sql_databases">1.1. SQLite Is Different From Most Other SQL Databases</a></div>
<div class="fancy-toc1"><a href="#sql_is_a_programming_language">2. SQL Is A Programming Language</a></div>
<div class="fancy-toc2"><a href="#programming_language_processing_steps">2.1. Programming Language Processing Steps</a></div>
<div class="fancy-toc2"><a href="#compiling_sqlite_programs">2.2. Compiling SQLite Programs</a></div>
<div class="fancy-toc1"><a href="#further_reading">3. Further Reading</a></div>
</div>
</div>
<script>
function toggle_toc(){
var sub = document.getElementById("toc_sub")
var mk = document.getElementById("toc_mk")
if( sub.style.display!="block" ){
sub.style.display = "block";
mk.innerHTML = "&#x25bc;";
} else {
sub.style.display = "none";
mk.innerHTML = "&#x25ba;";
}
}
</script>
</div>





<h1 id="background"><span>1. </span>Background</h1>

<p>
SQLite is a <a href="https://en.wikipedia.org/wiki/Library_(computing)">software library</a>
that translates high-level disk I/O requests generated by an application
into low-level I/O operations that
can be carried out by the operating system.  The application constructs 
high-level I/O requests using the
<a href="https://en.wikipedia.org/wiki/SQL">SQL language</a>.
SQLite translates each high-level SQL statement into a sequence of
many low-level I/O requests (open a file, read a few bytes from a file,
write a few bytes into a file, etc.) that do the work requested by
the SQL.


</p><p>
An application program could do all its disk I/O by direct calls to
operating system I/O routines
or by using a key/value storage engine like 
<a href="https://en.wikipedia.org/wiki/Berkeley_DB">Berkeley DB</a> or
<a href="https://en.wikipedia.org/wiki/RocksDB">RocksDB</a> (to name but two).
But there are advantages to using a higher-level interface based
on the SQL language.

</p><ol>
<li><p>
SQL is a very high-level language.
A few lines of SQL can replace hundreds or thousands of lines of
procedural code.  SQL thus reduces
the amount of work needed to develop and maintain the application,
and thereby helps to reduce the number of bugs in the application.

</p></li><li><p>
SQL and SQLite are 
<a href="https://en.wikipedia.org/wiki/Database_transaction">transactional</a>.
The use of a transactional storage system makes it much easier to
reason about the behavior of the application, and to write applications
that are robust, even in the face of software bugs, hardware faults,
or power losses.

</p></li><li><p>
SQLite is often
<a href="fasterthanfs.html">faster than direct low-level I/O</a>.
This is counterintuitive.  One would expect that a
high-level interface such as SQLite would impose a run-time penalty.
And, theoretically, that is correct.  But in practice, SQL-based
systems such as SQLite do so many behind-the-scenes optimizations
that an application developer would never have time to create and
maintain, that the SQL-based systems end up providing
a net performance gain.
</p></li></ol>

<h2 id="sqlite_is_different_from_most_other_sql_databases"><span>1.1. </span>SQLite Is Different From Most Other SQL Databases</h2>

<p>
There are many SQL-based database management systems available, besides
SQLite.  Common options include MySQL, PostgreSQL, and SQL-Server.
All these systems use the SQL langauge to communicate with the 
application, just like SQLite.  But these other systems different
from SQLite in important respects.

</p><ol>
<li><p>
SQLite is a <a href="serverless.html">serverless</a> software library, whereas the other
systems are client-server based.  With MySQL, PostgreSQL, SQL-Server,
and others, the application sends a message containing some SQL
over to a separate server thread or process. That separate
thread or process performs the requested I/O, then send the results
back to the application.  But there is no separate thread or
process with SQLite.  SQLite runs in the same address space as
the application, using the same program counter and heap storage.
SQLite does no interprocess communication (IPC).  When an application
sends an SQL statement into SQLite (by invoking a the appropriate
SQLite library subroutine), SQLite interprets the SQL
in the same thread as the caller.  When an SQLite API routine
returns, it does not leave behind any background tasks that
run separately from the application.

</p></li><li><p>
An SQLite database is a single ordinary file on disk
(with a <a href="fileformat2.html">well-defined file format</a>).  With other
systems, a "database" is usually a large number of separate
files hidden away in obscure directories of the filesystem, or
even spread across multiple machines.  But with SQLite, a
complete database is just an ordinary disk file.
</p></li></ol>

<h1 id="sql_is_a_programming_language"><span>2. </span>SQL Is A Programming Language</h1>

<p>
The best way to understand how SQL database engines work is to
think of SQL as a programming language, not as a "query language".
Each SQL statement is a separate program.  Applications construct
SQL program source files and send them to the database engine.
The database engine compiles the SQL source code into executable
form, runs that executable, then sends the result back to the
application.

</p><p>
While SQL is a programming language, it is different from other
programming languages like C, Javascript, Python, or Go in that
SQL is a 
<a href="https://en.wikipedia.org/wiki/Declarative_programming">declarative language</a>
where the others are
<a href="https://en.wikipedia.org/wiki/Imperative_programming">imperative languages</a>.
This is an important difference that has implications for the
design of the compiler used to translate program source text into an
executable format. However, those details 
should not detract from the fact that
SQL is really just another programming language.

</p><h2 id="programming_language_processing_steps"><span>2.1. </span>Programming Language Processing Steps</h2>

<p>
All programming languages are processed in two steps:

</p><ol>
<li><p>
Translate the program source text into an executable format.
</p></li><li><p>
Run the executable generated in the previous step in order to
carry out the desired action.
</p></li></ol>

<p>
All programming languages uses those two basic steps.
The main difference is in the executable format.

</p><p>
"Compiled" languages like C++ and Rust translate
the source text into machine code that can be directly executed
by the underlying hardware.  There exist SQL database systems
that do the same with SQL - they translate each SQL statement
directly into machine code.  But that approach is uncommon and
is not the approach taken by SQLite.

</p><p>
Other languages like Java, Perl, Python, and TCL
typically translate the program source text into bytecode.
This bytecode is then run through an interpreter that
reads the bytecode and carries out the desired operations.
SQLite uses this bytecode approach.  If you preceed
any SQL statement with the "<a href="lang_explain.html">EXPLAIN</a>" keyword in SQLite,
it will show you the bytecode that is generated rather
than run the bytecode.

</p><p>
Another approach is to translate the program source text
into a tree of objects in memory.  This tree is the "executable".
An interpret runs the executable by walking the tree.
This is the technique used by MySQL, PostgreSQL, and
SQL-Server.

</p><p>
Of course, not every language fits neatly into one of the
above categories.  This applies to both SQL database engines
and more familiar imperative programming languages.  Javascript
is famous for using a hybrid execution model, where the code
is initially compiled into a tree of objects, but might be
further translating (using just-in-time compilation) down into
more efficient bytecode or machine code, as a means of boosting
performance.

</p><p>
The executable format really ends up being just an implementation
detail.  The key point is that all languages have a compiler
step which translates programs into an executable format and
a run step that executes the compiled program.


</p><h2 id="compiling_sqlite_programs"><span>2.2. </span>Compiling SQLite Programs</h2>

<p>
When an SQL program is submitted to SQLite, the first step is
to split the source text into "tokens".  A token might be:

</p><ul>
<li>A language keyword like "SELECT" or "UPDATE".
</li><li>An identifier for a table or column or variable.
</li><li>Punctuation characters like "," or "==" or ";".
</li><li>Literal values: numeric or string constants.
</li><li>Whitespace or comments.
</li></ul>

<p>
Whitespace and comment tokens are discarded.  All other tokens
are fed into an
<a href="https://en.wikipedia.org/wiki/LALR_parser">LALR(1) Parser</a>
that analyzes the structure of the input program and generates an
<a href="https://en.wikipedia.org/wiki/Abstract_syntax_tree">Abstract Syntax Tree (AST)</a>
for the input program.

</p><p>
The parser forwards the AST on to the code generator.
The code generator is the heart of SQLite, and is where most of
the magic happens.
The code generator resolves symbolic names in the AST - matching
the names of columns and tables in the input SQL into actual
columns and tables of the database.  The code generator also does
various transformations on the AST to "optimize" it.  Finally
the code generator chooses appropriate algorithms to implement
the operations requested by the AST and constructs bytecode to
carry out those operations.

</p><p>
The bytecode generated by the code generator is called a
"<a href="c3ref/stmt.html">prepared statement</a>".  Translating SQL source text into a
prepared statement is analogous to converting a C++ program
into machine code by invoking gcc or clang.  Human-readable
source text (SQL or C++) goes in, and a machine readable executable
(bytecode or machine code) comes out.

</p><h1 id="further_reading"><span>3. </span>Further Reading</h1>

<ul>
<li><p>
The <a href="atomiccommit.html">Atomic Commit</a> document describes how SQLite implements
transactions.

</p></li><li><p>
The <a href="opcode.html">bytecode engine</a> document has more information
on the bytecode format used by SQLite, and how to view and interpret
an SQLite prepared statement.

</p></li><li><p>
The <a href="optoverview.html">SQLite query planner</a> and
<a href="queryplanner-ng.html">Next Generation Query Planner</a> documents have further details
on the algorithms SQLite uses to implement SQL statements and
how it goes above choosing an appropriate algorithm for each
individual SQL statement.
</p></li></ul>