diff options
Diffstat (limited to 'www/floatingpoint.html')
-rw-r--r-- | www/floatingpoint.html | 480 |
1 files changed, 480 insertions, 0 deletions
diff --git a/www/floatingpoint.html b/www/floatingpoint.html new file mode 100644 index 0000000..06909b2 --- /dev/null +++ b/www/floatingpoint.html @@ -0,0 +1,480 @@ +<!DOCTYPE html> +<html><head> +<meta name="viewport" content="width=device-width, initial-scale=1.0"> +<meta http-equiv="content-type" content="text/html; charset=UTF-8"> +<link href="sqlite.css" rel="stylesheet"> +<title>Floating Point Numbers</title> +<!-- path= --> +</head> +<body> +<div class=nosearch> +<a href="index.html"> +<img class="logo" src="images/sqlite370_banner.gif" alt="SQLite" border="0"> +</a> +<div><!-- IE hack to prevent disappearing logo --></div> +<div class="tagline desktoponly"> +Small. Fast. Reliable.<br>Choose any three. +</div> +<div class="menu mainmenu"> +<ul> +<li><a href="index.html">Home</a> +<li class='mobileonly'><a href="javascript:void(0)" onclick='toggle_div("submenu")'>Menu</a> +<li class='wideonly'><a href='about.html'>About</a> +<li class='desktoponly'><a href="docs.html">Documentation</a> +<li class='desktoponly'><a href="download.html">Download</a> +<li class='wideonly'><a href='copyright.html'>License</a> +<li class='desktoponly'><a href="support.html">Support</a> +<li class='desktoponly'><a href="prosupport.html">Purchase</a> +<li class='search' id='search_menubutton'> +<a href="javascript:void(0)" onclick='toggle_search()'>Search</a> +</ul> +</div> +<div class="menu submenu" id="submenu"> +<ul> +<li><a href='about.html'>About</a> +<li><a href='docs.html'>Documentation</a> +<li><a href='download.html'>Download</a> +<li><a href='support.html'>Support</a> +<li><a href='prosupport.html'>Purchase</a> +</ul> +</div> +<div class="searchmenu" id="searchmenu"> +<form method="GET" action="search"> +<select name="s" id="searchtype"> +<option value="d">Search Documentation</option> +<option value="c">Search Changelog</option> +</select> +<input type="text" name="q" id="searchbox" value=""> +<input type="submit" value="Go"> +</form> +</div> +</div> +<script> +function toggle_div(nm) { +var w = document.getElementById(nm); +if( w.style.display=="block" ){ +w.style.display = "none"; +}else{ +w.style.display = "block"; +} +} +function toggle_search() { +var w = document.getElementById("searchmenu"); +if( w.style.display=="block" ){ +w.style.display = "none"; +} else { +w.style.display = "block"; +setTimeout(function(){ +document.getElementById("searchbox").focus() +}, 30); +} +} +function div_off(nm){document.getElementById(nm).style.display="none";} +window.onbeforeunload = function(e){div_off("submenu");} +/* Disable the Search feature if we are not operating from CGI, since */ +/* Search is accomplished using CGI and will not work without it. */ +if( !location.origin || !location.origin.match || !location.origin.match(/http/) ){ +document.getElementById("search_menubutton").style.display = "none"; +} +/* Used by the Hide/Show button beside syntax diagrams, to toggle the */ +function hideorshow(btn,obj){ +var x = document.getElementById(obj); +var b = document.getElementById(btn); +if( x.style.display!='none' ){ +x.style.display = 'none'; +b.innerHTML='show'; +}else{ +x.style.display = ''; +b.innerHTML='hide'; +} +return false; +} +var antiRobot = 0; +function antiRobotGo(){ +if( antiRobot!=3 ) return; +antiRobot = 7; +var j = document.getElementById("mtimelink"); +if(j && j.hasAttribute("data-href")) j.href=j.getAttribute("data-href"); +} +function antiRobotDefense(){ +document.body.onmousedown=function(){ +antiRobot |= 2; +antiRobotGo(); +document.body.onmousedown=null; +} +document.body.onmousemove=function(){ +antiRobot |= 2; +antiRobotGo(); +document.body.onmousemove=null; +} +setTimeout(function(){ +antiRobot |= 1; +antiRobotGo(); +}, 100) +antiRobotGo(); +} +antiRobotDefense(); +</script> +<div class=fancy> +<div class=nosearch> +<div class="fancy_title"> +Floating Point Numbers +</div> +<div class="fancy_toc"> +<a onclick="toggle_toc()"> +<span class="fancy_toc_mark" id="toc_mk">►</span> +Table Of Contents +</a> +<div id="toc_sub"><div class="fancy-toc1"><a href="#how_sqlite_stores_numbers">1. How SQLite Stores Numbers</a></div> +<div class="fancy-toc2"><a href="#floating_point_accuracy">1.1. Floating-Point Accuracy</a></div> +<div class="fancy-toc2"><a href="#floating_point_numbers">1.2. Floating Point Numbers</a></div> +<div class="fancy-toc3"><a href="#unrepresentable_numbers">1.2.1. Unrepresentable numbers</a></div> +<div class="fancy-toc3"><a href="#is_it_close_enough_">1.2.2. Is it close enough?</a></div> +<div class="fancy-toc1"><a href="#extensions_for_dealing_with_floating_point_numbers">2. Extensions For Dealing With Floating Point Numbers</a></div> +<div class="fancy-toc2"><a href="#the_ieee754_c_extension">2.1. The ieee754.c Extension</a></div> +<div class="fancy-toc3"><a href="#the_ieee754_function">2.1.1. The ieee754() function</a></div> +<div class="fancy-toc3"><a href="#the_ieee754_mantissa_and_ieee754_exponent_functions">2.1.2. The ieee754_mantissa() and ieee754_exponent() functions</a></div> +<div class="fancy-toc3"><a href="#the_ieee754_from_blob_and_ieee754_to_blob_functions">2.1.3. The ieee754_from_blob() and ieee754_to_blob() functions</a></div> +<div class="fancy-toc2"><a href="#the_decimal_c_extension">2.2. The decimal.c Extension</a></div> +</div> +</div> +<script> +function toggle_toc(){ +var sub = document.getElementById("toc_sub") +var mk = document.getElementById("toc_mk") +if( sub.style.display!="block" ){ +sub.style.display = "block"; +mk.innerHTML = "▼"; +} else { +sub.style.display = "none"; +mk.innerHTML = "►"; +} +} +</script> +</div> + + + + +<h1 id="how_sqlite_stores_numbers"><span>1. </span>How SQLite Stores Numbers</h1> + +<p> +SQLite stores integer values in the 64-bit +<a href="https://en.wikipedia.org/wiki/Two%27s_complement">twos-complement</a> +format¹. +This gives a storage range of -9223372036854775808 to +9223372036854775807, +inclusive. Integers within this range are exact. + +<a name="fpapprox"></a> + +</p><p> +So-called "REAL" or floating point values are stored in the +<a href="https://en.wikipedia.org/wiki/IEEE_754">IEEE 754</a> +<a href="https://en.wikipedia.org/wiki/Double-precision_floating-point_format">Binary-64</a> +format¹. +This gives a range of positive values between approximately +1.7976931348623157e+308 and 4.9406564584124654e-324 with an equivalent +range of negative values. A binary64 can also be 0.0 (and -0.0), positive +and negative infinity and "NaN" or "Not-a-Number". Floating point +values are approximate. + +</p><p> +Pay close attention to the last sentence in the previous paragraph: +</p><blockquote><b> +Floating point values are approximate. +</b></blockquote> + +<p> +If you need an exact answer, you should not use binary64 floating-point +values, in SQLite or in any other product. This is not an SQLite limitation. +It is a mathematical limitation inherent in the design of floating-point numbers. + +</p><p>—<br>¹ +Exception: The <a href="rtree.html">R-Tree extension</a> stores information as 32-bit floating +point or integer values. + +</p><h2 id="floating_point_accuracy"><span>1.1. </span>Floating-Point Accuracy</h2> + +<p> +SQLite promises to preserve the 15 most significant digits of a floating +point value. However, it makes no guarantees about the accuracy of +computations on floating point values, as no such guarantees are possible. +Performing math on floating-point values introduces error. +For example, consider what happens if you attempt to subtract two floating-point +numbers of similar magnitude: + +</p><blockquote> +<table border="0" cellpadding="0" cellspacing="0"> +<tr><td align="right">1152693165.1106291898</td></tr> +<tr><td align="right">-1152693165.1106280772</td></tr> +<tr><td><hr> +</td></tr><tr><td align="right">0.0000011126 +</td></tr></table> +</blockquote> + +<p>The result shown above (0.0000011126) is the correct answer. But if you +do this computation using binary64 floating-point, the answer you get is +0.00000095367431640625 - an error of about 14%. If you do many similar +computations as part of your program, the errors add up so that your final +result might be completely meaningless. + +</p><p>The error arises because only about the first 15 significant digits of +each number are stored accurately, and the first difference between the two numbers +being subtracted is in the 16th digit. + +</p><h2 id="floating_point_numbers"><span>1.2. </span>Floating Point Numbers</h2> + +<p> +The binary64 floating-point format uses 64 bits per number. Hence there +are 1.845e+19 different possible floating point values. On the other hand +there are infinitely many real numbers in the range of +1.7977e+308 and 4.9407e-324. It follows then that binary64 cannot possibly +represent all possible real numbers within that range. Approximations are +required. + +</p><p> +An IEEE 754 floating-point value is an integer multiplied by a power +of two: + +</p><blockquote> +<big>M × 2<sup><small>E</small></sup></big> +</blockquote> + +<p>The M value is the "mantissa" and E is the "exponent". Both +M and E are integers. + +</p><p>For Binary64, M is a 53-bit integer and E is an 11-bit integer that is +offset so that represents a range of values between -1074 and +972, inclusive. + +</p><p><i>(NB: The usual description of IEEE 754 is more complex, and it is important +to understand the added complexity if you really want to appreciate the details, +merits, and limitations of IEEE 754. However, the integer description shown +here, while not exactly right, is easier to understand and is sufficient for +the purposes of this article.)</i></p> + +<h3 id="unrepresentable_numbers"><span>1.2.1. </span>Unrepresentable numbers</h3> + +<p>Not every decimal number with fewer than 16 significant digits can be +represented exactly as a binary64 number. In fact, most decimal numbers +with digits to the right of the decimal point lack an exact binary64 +equivalent. For example, if you have a database column that is intended +to hold an item price in dollars and cents, the only cents value that +can be exactly represented are 0.00, 0.25, 0.50, and 0.75. Any other +numbers to the right of the decimal point result in an approximation. +If you provide a "price" value of 47.49, that number will be represented +in binary64 as: + +</p><blockquote> +6683623321994527 × 2<sup><small>-47</small></sup> +</blockquote> + +<p>Which works out to be: + +</p><blockquote> +47.49000000000000198951966012828052043914794921875 +</blockquote> + +<p>That number is very close to 47.49, but it is not exact. It is a little +too big. If we reduce M by one to 6683623321994526 so that we have the +next smaller possible binary64 value, we get: + +</p><blockquote> +47.4899999999999948840923025272786617279052734375 +</blockquote> + + +<p> +This second number is too small. +The first number is closer to the desired value of 47.49, so that is the +one that gets used. But it is not exact. Most decimal values work this +way in IEEE 754. Remember the key point we made above: + +</p><blockquote><b> +Floating point values are approximate. +</b></blockquote> + +<p>If you remember nothing else about floating-point values, +please don't forget this one key idea. + +</p><h3 id="is_it_close_enough_"><span>1.2.2. </span>Is it close enough?</h3> + +<p>The precision provided by IEEE 754 Binary64 is sufficient for most computations. +For example, if "47.49" represents a price and inflation is running +at 2% per year, then the price is going up by about 0.0000000301 dollars per +second. The error in the recorded value of 47.49 represents about 66 nanoseconds +worth of inflation. So if the 47.49 price is exact +when you enter it, then the effects of inflation will cause the true value to +exactly equal the value actually stored +(47.4900000000000019895196601282805204391479492187) in less than +one ten-millionth of a second. +Surely that level of precision is sufficient for most purposes? + +</p><h1 id="extensions_for_dealing_with_floating_point_numbers"><span>2. </span>Extensions For Dealing With Floating Point Numbers</h1> + +<a name="ieee754ext"></a> + +<h2 id="the_ieee754_c_extension"><span>2.1. </span>The ieee754.c Extension</h2> + +<p>The ieee754 extension converts a floating point number between its +binary64 representation and the M×2<sup><small>E</small></sup> format. +In other words in the expression: + +</p><blockquote> +<big>F = M × 2<sup><small>E</small></sup></big> +</blockquote> + +<p>The ieee754 extension converts between F and (M,E) and back again. + +</p><p>The ieee754 extension is not part of the <a href="amalgamation.html">amalgamation</a>, but it is included +by default in the <a href="cli.html">CLI</a>. If you want to include the ieee754 extension in your +application, you will need to compile and load it separately. + +<a name="ieee754"></a> + +</p><h3 id="the_ieee754_function"><span>2.1.1. </span>The ieee754() function</h3> + +<p>The ieee754(F) SQL function takes a single floating-point argument +as its input and returns a string that looks like this: + +</p><blockquote> +'ieee754(M,E)' +</blockquote> + +<p>Except that the M and E are replaced by the mantissa and exponent of the +floating point number. For example: + +</p><div class="codeblock"><pre>sqlite> .mode box +sqlite> SELECT ieee754(47.49) AS x; +┌───────────────────────────────┐ +│ x │ +├───────────────────────────────┤ +│ ieee754(6683623321994527,-47) │ +└───────────────────────────────┘ +</pre></div> + +<p> +Going in the other direction, the 2-argument version of ieee754() takes +the M and E values and converts them into the corresponding F value: + +</p><div class="codeblock"><pre>sqlite> select ieee754(6683623321994527,-47) as x; +┌───────┐ +│ x │ +├───────┤ +│ 47.49 │ +└───────┘ +</pre></div> + +<a name="ieee754m"></a> + +<h3 id="the_ieee754_mantissa_and_ieee754_exponent_functions"><span>2.1.2. </span>The ieee754_mantissa() and ieee754_exponent() functions</h3> + +<p>The text output of the one-argument form of ieee754() is great for human +readability, but it is awkward to use as part of a larger expression. Hence +the ieee754_mantissa() and ieee754_exponent() routines were added to return +the M and E values corresponding to their single argument F +value. +For example: + +</p><div class="codeblock"><pre>sqlite> .mode box +sqlite> SELECT ieee754_mantissa(47.49) AS M, ieee754_exponent(47.49) AS E; +┌──────────────────┬─────┐ +│ M │ E │ +├──────────────────┼─────┤ +│ 6683623321994527 │ -47 │ +└──────────────────┴─────┘ +</pre></div> + +<a name="ieee754b"></a> + +<h3 id="the_ieee754_from_blob_and_ieee754_to_blob_functions"><span>2.1.3. </span>The ieee754_from_blob() and ieee754_to_blob() functions</h3> + +<p>The ieee754_to_blob(F) SQL function converts the floating point number F +into an 8-byte BLOB that is the big-endian binary64 encoding of that number. +The ieee754_from_blob(B) function goes the other way, converting an 8-byte +blob into the floating-point value that the binary64 encoding represents. + +</p><p>So, for example, if you read +<a href="https://en.wikipedia.org/wiki/Double-precision_floating-point_format">on +Wikipedia</a> that the encoding for the minimum positive binary64 value is +0x0000000000000001, then you can find the corresponding floating point value +like this: + +</p><div class="codeblock"><pre>sqlite> .mode box +sqlite> SELECT ieee754_from_blob(x'0000000000000001') AS F; +┌───────────────────────┐ +│ F │ +├───────────────────────┤ +│ 4.94065645841247e-324 │ +└───────────────────────┘ +</pre></div> + +<p>Or go the other way: + +</p><div class="codeblock"><pre>sqlite> .mode box +sqlite> SELECT quote(ieee754_to_blob(4.94065645841247e-324)) AS binary64; +┌─────────────────────┐ +│ binary64 │ +├─────────────────────┤ +│ X'0000000000000001' │ +└─────────────────────┘ +</pre></div> + +<a name="decext"></a> + +<h2 id="the_decimal_c_extension"><span>2.2. </span>The decimal.c Extension</h2> + +<p>The decimal extension provides arbitrary-precision decimal arithmetic on +numbers stored as text strings. Because the numbers are stored to arbitrary +precision and as text, no approximations are needed. Computations can be +done exactly. + +</p><p>The decimal extension is not (currently) part of the SQLite <a href="amalgamation.html">amalgamation</a>. +However, it is included in the <a href="cli.html">CLI</a>. + +</p><p>There are three math functions available: + +</p><p> +</p><ul> +<li> decimal_add(A,B) +</li><li> decimal_sub(A,B) +</li><li> decimal_mul(A,B) +</li><li> decimal_pow2(N) +</li></ul> + + +<p>The first three functions respectively add, subtract, and multiply their arguments +and return a new text string that is the decimal representation of the result. The +argument is interpreted as text. +There is no division operator because decimal division often does not generate +a finite decimal result. + +</p><p>The decimal_pow2(N) function returns 2.0 raised to the N-th power where N is an +integer between -20000 and +20000. + +</p><p>Use the decimal_cmp(A,B) to compare two decimal values. The result will +be negative, zero, or positive if A is less than, equal to, or greater than B, +respectively. + +</p><p>The decimal_sum(X) function is an aggregate, like the built-in +<a href="lang_aggfunc.html#sumunc">sum() aggregate function</a>, except that decimal_sum() computes its result +to arbitrary precision and is therefore precise. + +</p><p>The decimal extension provides the "decimal" collating sequences +that compares decimal text strings in numeric order. + +</p><p>The decimal(X) and decimal_exp(X) generate a decimal representation for input X. +The decimal_exp(X) function returns the result in exponential notation (with a "e+NN" +at the end) and decimal(X) returns a pure decimal (without the "e+NN"). If the input +X is a floating point value, it is expanded to its exact decimal equivalent. For +example: + +</p><div class="codeblock"><pre>sqlite> .mode qbox +sqlite> select decimal(47.49); +┌──────────────────────────────────────────────────────┐ +│ decimal(47.49) │ +├──────────────────────────────────────────────────────┤ +│ '47.49000000000000198951966012828052043914794921875' │ +└──────────────────────────────────────────────────────┘ +</pre></div> + + |