summaryrefslogtreecommitdiffstats
path: root/www/floatingpoint.html
blob: 06909b28eba4637f03b3f8a29b9e745597f1ac6e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
<!DOCTYPE html>
<html><head>
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta http-equiv="content-type" content="text/html; charset=UTF-8">
<link href="sqlite.css" rel="stylesheet">
<title>Floating Point Numbers</title>
<!-- path= -->
</head>
<body>
<div class=nosearch>
<a href="index.html">
<img class="logo" src="images/sqlite370_banner.gif" alt="SQLite" border="0">
</a>
<div><!-- IE hack to prevent disappearing logo --></div>
<div class="tagline desktoponly">
Small. Fast. Reliable.<br>Choose any three.
</div>
<div class="menu mainmenu">
<ul>
<li><a href="index.html">Home</a>
<li class='mobileonly'><a href="javascript:void(0)" onclick='toggle_div("submenu")'>Menu</a>
<li class='wideonly'><a href='about.html'>About</a>
<li class='desktoponly'><a href="docs.html">Documentation</a>
<li class='desktoponly'><a href="download.html">Download</a>
<li class='wideonly'><a href='copyright.html'>License</a>
<li class='desktoponly'><a href="support.html">Support</a>
<li class='desktoponly'><a href="prosupport.html">Purchase</a>
<li class='search' id='search_menubutton'>
<a href="javascript:void(0)" onclick='toggle_search()'>Search</a>
</ul>
</div>
<div class="menu submenu" id="submenu">
<ul>
<li><a href='about.html'>About</a>
<li><a href='docs.html'>Documentation</a>
<li><a href='download.html'>Download</a>
<li><a href='support.html'>Support</a>
<li><a href='prosupport.html'>Purchase</a>
</ul>
</div>
<div class="searchmenu" id="searchmenu">
<form method="GET" action="search">
<select name="s" id="searchtype">
<option value="d">Search Documentation</option>
<option value="c">Search Changelog</option>
</select>
<input type="text" name="q" id="searchbox" value="">
<input type="submit" value="Go">
</form>
</div>
</div>
<script>
function toggle_div(nm) {
var w = document.getElementById(nm);
if( w.style.display=="block" ){
w.style.display = "none";
}else{
w.style.display = "block";
}
}
function toggle_search() {
var w = document.getElementById("searchmenu");
if( w.style.display=="block" ){
w.style.display = "none";
} else {
w.style.display = "block";
setTimeout(function(){
document.getElementById("searchbox").focus()
}, 30);
}
}
function div_off(nm){document.getElementById(nm).style.display="none";}
window.onbeforeunload = function(e){div_off("submenu");}
/* Disable the Search feature if we are not operating from CGI, since */
/* Search is accomplished using CGI and will not work without it. */
if( !location.origin || !location.origin.match || !location.origin.match(/http/) ){
document.getElementById("search_menubutton").style.display = "none";
}
/* Used by the Hide/Show button beside syntax diagrams, to toggle the */
function hideorshow(btn,obj){
var x = document.getElementById(obj);
var b = document.getElementById(btn);
if( x.style.display!='none' ){
x.style.display = 'none';
b.innerHTML='show';
}else{
x.style.display = '';
b.innerHTML='hide';
}
return false;
}
var antiRobot = 0;
function antiRobotGo(){
if( antiRobot!=3 ) return;
antiRobot = 7;
var j = document.getElementById("mtimelink");
if(j && j.hasAttribute("data-href")) j.href=j.getAttribute("data-href");
}
function antiRobotDefense(){
document.body.onmousedown=function(){
antiRobot |= 2;
antiRobotGo();
document.body.onmousedown=null;
}
document.body.onmousemove=function(){
antiRobot |= 2;
antiRobotGo();
document.body.onmousemove=null;
}
setTimeout(function(){
antiRobot |= 1;
antiRobotGo();
}, 100)
antiRobotGo();
}
antiRobotDefense();
</script>
<div class=fancy>
<div class=nosearch>
<div class="fancy_title">
Floating Point Numbers
</div>
<div class="fancy_toc">
<a onclick="toggle_toc()">
<span class="fancy_toc_mark" id="toc_mk">&#x25ba;</span>
Table Of Contents
</a>
<div id="toc_sub"><div class="fancy-toc1"><a href="#how_sqlite_stores_numbers">1. How SQLite Stores Numbers</a></div>
<div class="fancy-toc2"><a href="#floating_point_accuracy">1.1. Floating-Point Accuracy</a></div>
<div class="fancy-toc2"><a href="#floating_point_numbers">1.2. Floating Point Numbers</a></div>
<div class="fancy-toc3"><a href="#unrepresentable_numbers">1.2.1. Unrepresentable numbers</a></div>
<div class="fancy-toc3"><a href="#is_it_close_enough_">1.2.2. Is it close enough?</a></div>
<div class="fancy-toc1"><a href="#extensions_for_dealing_with_floating_point_numbers">2. Extensions For Dealing With Floating Point Numbers</a></div>
<div class="fancy-toc2"><a href="#the_ieee754_c_extension">2.1. The ieee754.c Extension</a></div>
<div class="fancy-toc3"><a href="#the_ieee754_function">2.1.1. The ieee754() function</a></div>
<div class="fancy-toc3"><a href="#the_ieee754_mantissa_and_ieee754_exponent_functions">2.1.2. The ieee754_mantissa() and ieee754_exponent() functions</a></div>
<div class="fancy-toc3"><a href="#the_ieee754_from_blob_and_ieee754_to_blob_functions">2.1.3. The ieee754_from_blob() and ieee754_to_blob() functions</a></div>
<div class="fancy-toc2"><a href="#the_decimal_c_extension">2.2. The decimal.c Extension</a></div>
</div>
</div>
<script>
function toggle_toc(){
var sub = document.getElementById("toc_sub")
var mk = document.getElementById("toc_mk")
if( sub.style.display!="block" ){
sub.style.display = "block";
mk.innerHTML = "&#x25bc;";
} else {
sub.style.display = "none";
mk.innerHTML = "&#x25ba;";
}
}
</script>
</div>




<h1 id="how_sqlite_stores_numbers"><span>1. </span>How SQLite Stores Numbers</h1>

<p>
SQLite stores integer values in the 64-bit 
<a href="https://en.wikipedia.org/wiki/Two%27s_complement">twos-complement</a>
format&sup1.
This gives a storage range of -9223372036854775808 to +9223372036854775807,
inclusive.  Integers within this range are exact.

<a name="fpapprox"></a>

</p><p>
So-called "REAL" or floating point values are stored in the
<a href="https://en.wikipedia.org/wiki/IEEE_754">IEEE 754</a>
<a href="https://en.wikipedia.org/wiki/Double-precision_floating-point_format">Binary-64</a>
format&sup1.
This gives a range of positive values between approximately
1.7976931348623157e+308 and 4.9406564584124654e-324 with an equivalent
range of negative values.  A binary64 can also be 0.0 (and -0.0), positive
and negative infinity and "NaN" or "Not-a-Number".  Floating point
values are approximate.

</p><p>
Pay close attention to the last sentence in the previous paragraph:
</p><blockquote><b>
Floating point values are approximate.
</b></blockquote>

<p>
If you need an exact answer, you should not use binary64 floating-point
values, in SQLite or in any other product.  This is not an SQLite limitation.
It is a mathematical limitation inherent in the design of floating-point numbers.

</p><p>&mdash;<br>&sup1;
Exception:  The <a href="rtree.html">R-Tree extension</a> stores information as 32-bit floating
point or integer values.

</p><h2 id="floating_point_accuracy"><span>1.1. </span>Floating-Point Accuracy</h2>

<p>
SQLite promises to preserve the 15 most significant digits of a floating
point value.  However, it makes no guarantees about the accuracy of
computations on floating point values, as no such guarantees are possible.
Performing math on floating-point values introduces error.
For example, consider what happens if you attempt to subtract two floating-point
numbers of similar magnitude:

</p><blockquote>
<table border="0" cellpadding="0" cellspacing="0">
<tr><td align="right">1152693165.1106291898</td></tr>
<tr><td align="right">-1152693165.1106280772</td></tr>
<tr><td><hr>
</td></tr><tr><td align="right">0.0000011126
</td></tr></table>
</blockquote>

<p>The result shown above (0.0000011126) is the correct answer.  But if you
do this computation using binary64 floating-point, the answer you get is
0.00000095367431640625 - an error of about 14%.  If you do many similar
computations as part of your program, the errors add up so that your final
result might be completely meaningless.

</p><p>The error arises because only about the first 15 significant digits of
each number are stored accurately, and the first difference between the two numbers
being subtracted is in the 16th digit.  

</p><h2 id="floating_point_numbers"><span>1.2. </span>Floating Point Numbers</h2>

<p>
The binary64 floating-point format uses 64 bits per number.  Hence there
are 1.845e+19 different possible floating point values.  On the other hand
there are infinitely many real numbers in the range of 
1.7977e+308 and 4.9407e-324.  It follows then that binary64 cannot possibly
represent all possible real numbers within that range.  Approximations are
required.

</p><p>
An IEEE 754 floating-point value is an integer multiplied by a power
of two:

</p><blockquote>
<big>M &times 2<sup><small>E</small></sup></big>
</blockquote>

<p>The M value is the "mantissa" and E is the "exponent".  Both
M and E are integers.

</p><p>For Binary64, M is a 53-bit integer and E is an 11-bit integer that is
offset so that represents a range of values between -1074 and +972, inclusive.

</p><p><i>(NB:  The usual description of IEEE 754 is more complex, and it is important
to understand the added complexity if you really want to appreciate the details,
merits, and limitations of IEEE 754.  However, the integer description shown
here, while not exactly right, is easier to understand and is sufficient for
the purposes of this article.)</i></p>

<h3 id="unrepresentable_numbers"><span>1.2.1. </span>Unrepresentable numbers</h3>

<p>Not every decimal number with fewer than 16 significant digits can be
represented exactly as a binary64 number.  In fact, most decimal numbers
with digits to the right of the decimal point lack an exact binary64
equivalent.  For example, if you have a database column that is intended
to hold an item price in dollars and cents, the only cents value that
can be exactly represented are 0.00, 0.25, 0.50, and 0.75.  Any other
numbers to the right of the decimal point result in an approximation.
If you provide a "price" value of 47.49, that number will be represented
in binary64 as:

</p><blockquote>
6683623321994527 &times; 2<sup><small>-47</small></sup>
</blockquote>

<p>Which works out to be:

</p><blockquote>
47.49000000000000198951966012828052043914794921875
</blockquote>

<p>That number is very close to 47.49, but it is not exact.  It is a little
too big.  If we reduce M by one to 6683623321994526 so that we have the
next smaller possible binary64 value, we get:

</p><blockquote>
47.4899999999999948840923025272786617279052734375
</blockquote>


<p>
This second number is too small.
The first number is closer to the desired value of 47.49, so that is the
one that gets used.  But it is not exact.  Most decimal values work this
way in IEEE 754.  Remember the key point we made above:

</p><blockquote><b>
Floating point values are approximate.
</b></blockquote>

<p>If you remember nothing else about floating-point values, 
please don't forget this one key idea.

</p><h3 id="is_it_close_enough_"><span>1.2.2. </span>Is it close enough?</h3>

<p>The precision provided by IEEE 754 Binary64 is sufficient for most computations.
For example, if "47.49" represents a price and inflation is running
at 2% per year, then the price is going up by about 0.0000000301 dollars per
second.  The error in the recorded value of 47.49 represents about 66 nanoseconds
worth of inflation.  So if the 47.49 price is exact
when you enter it, then the effects of inflation will cause the true value to
exactly equal the value actually stored
(47.4900000000000019895196601282805204391479492187) in less than 
one ten-millionth of a second.
Surely that level of precision is sufficient for most purposes?

</p><h1 id="extensions_for_dealing_with_floating_point_numbers"><span>2. </span>Extensions For Dealing With Floating Point Numbers</h1>

<a name="ieee754ext"></a>

<h2 id="the_ieee754_c_extension"><span>2.1. </span>The ieee754.c Extension</h2>

<p>The ieee754 extension converts a floating point number between its
binary64 representation and the M&times;2<sup><small>E</small></sup> format.
In other words in the expression:

</p><blockquote>
<big>F = M &times 2<sup><small>E</small></sup></big>
</blockquote>

<p>The ieee754 extension converts between F and (M,E) and back again.

</p><p>The ieee754 extension is not part of the <a href="amalgamation.html">amalgamation</a>, but it is included
by default in the <a href="cli.html">CLI</a>.  If you want to include the ieee754 extension in your
application, you will need to compile and load it separately.

<a name="ieee754"></a>

</p><h3 id="the_ieee754_function"><span>2.1.1. </span>The ieee754() function</h3>

<p>The ieee754(F) SQL function takes a single floating-point argument
as its input and returns a string that looks like this:

</p><blockquote>
'ieee754(M,E)'
</blockquote>

<p>Except that the M and E are replaced by the mantissa and exponent of the
floating point number.  For example:

</p><div class="codeblock"><pre>sqlite> .mode box
sqlite> SELECT ieee754(47.49) AS x;
┌───────────────────────────────┐
│               x               │
├───────────────────────────────┤
│ ieee754(6683623321994527,-47) │
└───────────────────────────────┘
</pre></div>

<p>
Going in the other direction, the 2-argument version of ieee754() takes
the M and E values and converts them into the corresponding F value:

</p><div class="codeblock"><pre>sqlite> select ieee754(6683623321994527,-47) as x;
┌───────┐
│   x   │
├───────┤
│ 47.49 │
└───────┘
</pre></div>

<a name="ieee754m"></a>

<h3 id="the_ieee754_mantissa_and_ieee754_exponent_functions"><span>2.1.2. </span>The ieee754_mantissa() and ieee754_exponent() functions</h3>

<p>The text output of the one-argument form of ieee754() is great for human
readability, but it is awkward to use as part of a larger expression.  Hence
the ieee754_mantissa() and ieee754_exponent() routines were added to return
the M and E values corresponding to their single argument F
value.
For example:

</p><div class="codeblock"><pre>sqlite> .mode box
sqlite> SELECT ieee754_mantissa(47.49) AS M, ieee754_exponent(47.49) AS E;
┌──────────────────┬─────┐
│        M         │  E  │
├──────────────────┼─────┤
│ 6683623321994527 │ -47 │
└──────────────────┴─────┘
</pre></div>

<a name="ieee754b"></a>

<h3 id="the_ieee754_from_blob_and_ieee754_to_blob_functions"><span>2.1.3. </span>The ieee754_from_blob() and ieee754_to_blob() functions</h3>

<p>The ieee754_to_blob(F) SQL function converts the floating point number F
into an 8-byte BLOB that is the big-endian binary64 encoding of that number.
The ieee754_from_blob(B) function goes the other way, converting an 8-byte
blob into the floating-point value that the binary64 encoding represents.

</p><p>So, for example, if you read
<a href="https://en.wikipedia.org/wiki/Double-precision_floating-point_format">on
Wikipedia</a> that the encoding for the minimum positive binary64 value is
0x0000000000000001, then you can find the corresponding floating point value
like this:

</p><div class="codeblock"><pre>sqlite> .mode box
sqlite> SELECT ieee754_from_blob(x'0000000000000001') AS F;
┌───────────────────────┐
│           F           │
├───────────────────────┤
│ 4.94065645841247e-324 │
└───────────────────────┘
</pre></div>

<p>Or go the other way:

</p><div class="codeblock"><pre>sqlite> .mode box
sqlite> SELECT quote(ieee754_to_blob(4.94065645841247e-324)) AS binary64;
┌─────────────────────┐
│      binary64       │
├─────────────────────┤
│ X'0000000000000001' │
└─────────────────────┘
</pre></div>

<a name="decext"></a>

<h2 id="the_decimal_c_extension"><span>2.2. </span>The decimal.c Extension</h2>

<p>The decimal extension provides arbitrary-precision decimal arithmetic on
numbers stored as text strings.  Because the numbers are stored to arbitrary
precision and as text, no approximations are needed.  Computations can be
done exactly.

</p><p>The decimal extension is not (currently) part of the SQLite <a href="amalgamation.html">amalgamation</a>.
However, it is included in the <a href="cli.html">CLI</a>.

</p><p>There are three math functions available:

</p><p>
</p><ul>
<li> decimal_add(A,B)
</li><li> decimal_sub(A,B)
</li><li> decimal_mul(A,B)
</li><li> decimal_pow2(N)
</li></ul>


<p>The first three functions respectively add, subtract, and multiply their arguments
and return a new text string that is the decimal representation of the result. The
argument is interpreted as text.
There is no division operator because decimal division often does not generate
a finite decimal result.

</p><p>The decimal_pow2(N) function returns 2.0 raised to the N-th power where N is an
integer between -20000 and +20000.

</p><p>Use the decimal_cmp(A,B) to compare two decimal values.  The result will
be negative, zero, or positive if A is less than, equal to, or greater than B,
respectively.

</p><p>The decimal_sum(X) function is an aggregate, like the built-in
<a href="lang_aggfunc.html#sumunc">sum() aggregate function</a>, except that decimal_sum() computes its result
to arbitrary precision and is therefore precise.

</p><p>The decimal extension provides the "decimal" collating sequences
that compares decimal text strings in numeric order.

</p><p>The decimal(X) and decimal_exp(X) generate a decimal representation for input X.
The decimal_exp(X) function returns the result in exponential notation (with a "e+NN"
at the end) and decimal(X) returns a pure decimal (without the "e+NN").  If the input
X is a floating point value, it is expanded to its exact decimal equivalent.  For
example:

</p><div class="codeblock"><pre>sqlite> .mode qbox
sqlite> select decimal(47.49);
┌──────────────────────────────────────────────────────┐
│                    decimal(47.49)                    │
├──────────────────────────────────────────────────────┤
│ '47.49000000000000198951966012828052043914794921875' │
└──────────────────────────────────────────────────────┘
</pre></div>