summaryrefslogtreecommitdiffstats
path: root/www/cpu.html
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--www/cpu.html377
1 files changed, 377 insertions, 0 deletions
diff --git a/www/cpu.html b/www/cpu.html
new file mode 100644
index 0000000..192cbaf
--- /dev/null
+++ b/www/cpu.html
@@ -0,0 +1,377 @@
+<!DOCTYPE html>
+<html><head>
+<meta name="viewport" content="width=device-width, initial-scale=1.0">
+<meta http-equiv="content-type" content="text/html; charset=UTF-8">
+<link href="sqlite.css" rel="stylesheet">
+<title>Measuring and Reducing CPU Usage in SQLite</title>
+<!-- path= -->
+</head>
+<body>
+<div class=nosearch>
+<a href="index.html">
+<img class="logo" src="images/sqlite370_banner.gif" alt="SQLite" border="0">
+</a>
+<div><!-- IE hack to prevent disappearing logo --></div>
+<div class="tagline desktoponly">
+Small. Fast. Reliable.<br>Choose any three.
+</div>
+<div class="menu mainmenu">
+<ul>
+<li><a href="index.html">Home</a>
+<li class='mobileonly'><a href="javascript:void(0)" onclick='toggle_div("submenu")'>Menu</a>
+<li class='wideonly'><a href='about.html'>About</a>
+<li class='desktoponly'><a href="docs.html">Documentation</a>
+<li class='desktoponly'><a href="download.html">Download</a>
+<li class='wideonly'><a href='copyright.html'>License</a>
+<li class='desktoponly'><a href="support.html">Support</a>
+<li class='desktoponly'><a href="prosupport.html">Purchase</a>
+<li class='search' id='search_menubutton'>
+<a href="javascript:void(0)" onclick='toggle_search()'>Search</a>
+</ul>
+</div>
+<div class="menu submenu" id="submenu">
+<ul>
+<li><a href='about.html'>About</a>
+<li><a href='docs.html'>Documentation</a>
+<li><a href='download.html'>Download</a>
+<li><a href='support.html'>Support</a>
+<li><a href='prosupport.html'>Purchase</a>
+</ul>
+</div>
+<div class="searchmenu" id="searchmenu">
+<form method="GET" action="search">
+<select name="s" id="searchtype">
+<option value="d">Search Documentation</option>
+<option value="c">Search Changelog</option>
+</select>
+<input type="text" name="q" id="searchbox" value="">
+<input type="submit" value="Go">
+</form>
+</div>
+</div>
+<script>
+function toggle_div(nm) {
+var w = document.getElementById(nm);
+if( w.style.display=="block" ){
+w.style.display = "none";
+}else{
+w.style.display = "block";
+}
+}
+function toggle_search() {
+var w = document.getElementById("searchmenu");
+if( w.style.display=="block" ){
+w.style.display = "none";
+} else {
+w.style.display = "block";
+setTimeout(function(){
+document.getElementById("searchbox").focus()
+}, 30);
+}
+}
+function div_off(nm){document.getElementById(nm).style.display="none";}
+window.onbeforeunload = function(e){div_off("submenu");}
+/* Disable the Search feature if we are not operating from CGI, since */
+/* Search is accomplished using CGI and will not work without it. */
+if( !location.origin || !location.origin.match || !location.origin.match(/http/) ){
+document.getElementById("search_menubutton").style.display = "none";
+}
+/* Used by the Hide/Show button beside syntax diagrams, to toggle the */
+function hideorshow(btn,obj){
+var x = document.getElementById(obj);
+var b = document.getElementById(btn);
+if( x.style.display!='none' ){
+x.style.display = 'none';
+b.innerHTML='show';
+}else{
+x.style.display = '';
+b.innerHTML='hide';
+}
+return false;
+}
+var antiRobot = 0;
+function antiRobotGo(){
+if( antiRobot!=3 ) return;
+antiRobot = 7;
+var j = document.getElementById("mtimelink");
+if(j && j.hasAttribute("data-href")) j.href=j.getAttribute("data-href");
+}
+function antiRobotDefense(){
+document.body.onmousedown=function(){
+antiRobot |= 2;
+antiRobotGo();
+document.body.onmousedown=null;
+}
+document.body.onmousemove=function(){
+antiRobot |= 2;
+antiRobotGo();
+document.body.onmousemove=null;
+}
+setTimeout(function(){
+antiRobot |= 1;
+antiRobotGo();
+}, 100)
+antiRobotGo();
+}
+antiRobotDefense();
+</script>
+<div class=fancy>
+<div class=nosearch>
+<div class="fancy_title">
+Measuring and Reducing CPU Usage in SQLite
+</div>
+<div class="fancy_toc">
+<a onclick="toggle_toc()">
+<span class="fancy_toc_mark" id="toc_mk">&#x25ba;</span>
+Table Of Contents
+</a>
+<div id="toc_sub"><div class="fancy-toc1"><a href="#overview">1. Overview</a></div>
+<div class="fancy-toc1"><a href="#measuring_performance">2. Measuring Performance</a></div>
+<div class="fancy-toc2"><a href="#compile_options">2.1. Compile Options</a></div>
+<div class="fancy-toc2"><a href="#workload">2.2. Workload</a></div>
+<div class="fancy-toc2"><a href="#performance_measurement">2.3. Performance Measurement</a></div>
+<div class="fancy-toc2"><a href="#microoptimizations">2.4. Microoptimizations</a></div>
+<div class="fancy-toc1"><a href="#performance_measurement_workflow">3. Performance Measurement Workflow</a></div>
+<div class="fancy-toc1"><a href="#limitations">4. Limitations</a></div>
+</div>
+</div>
+<script>
+function toggle_toc(){
+var sub = document.getElementById("toc_sub")
+var mk = document.getElementById("toc_mk")
+if( sub.style.display!="block" ){
+sub.style.display = "block";
+mk.innerHTML = "&#x25bc;";
+} else {
+sub.style.display = "none";
+mk.innerHTML = "&#x25ba;";
+}
+}
+</script>
+</div>
+
+
+
+
+
+<h1 id="overview"><span>1. </span>Overview</h1>
+
+<p>The graph below shows the number of CPU cycles used by SQLite on a
+standard workload, for versions of SQLite going back about 10 years.
+Recent versions of SQLite use about one third as many the CPU cycles
+compared to older versions.
+
+</p><p>
+This article describes how the SQLite developers measure CPU usage,
+what those measurements actually mean, and the techniques used by
+SQLite developers on their continuing quest to further reduce the
+CPU usage of the SQLite library.
+</p>
+
+<center>
+<hr>
+<div class="imgcontainer">
+<img src="./images/cpu-usage.jpg"></div><br>
+Measured using cachegrind on Ubuntu 16.04 on x64 with gcc 5.4.0 and -Os.<br>
+<hr>
+</center>
+
+<h1 id="measuring_performance"><span>2. </span>Measuring Performance</h1>
+
+<p>In brief, the CPU performance of SQLite is measured as follows:
+
+</p><p></p><ol>
+<li> Compile SQLite in an as-delivered configuration, without any special
+ telemetry or debugging options.
+</li><li> Link SQLite against a test program that runs approximately 30,000
+ SQL statements representing a typical workload.
+</li><li> Count the number of CPU cycles consumed using
+ <a href="http://valgrind.org/docs/manual/cg-manual.html">cachegrind</a>.
+</li></ol>
+
+<h2 id="compile_options"><span>2.1. </span>Compile Options</h2>
+
+<p>For performance measurement, SQLite is compiled in approximately the same
+way as it would be for use in production systems. The compile-time configuration
+is "approximate" in the sense that every production use of SQLite is
+different. Compile-time options used by one system are not necessarily
+the same as those used by others. The key point is that options that
+significantly impact the generated machine code are avoided. For example,
+the -DSQLITE_DEBUG option is omitted because that option inserts thousands
+of assert() statements in the middle of performance critical sections of the
+SQLite library. The -pg option (on GCC) is omitted because it causes the
+compiler to emit extra probabilistic performance measuring code which interferes
+with actual performance measurements.
+
+</p><p>
+For performance measurements,
+the -Os option is used (optimize for size) rather than -O2 because the
+-O2 option creates so much code movement that it is difficult to associate
+specific CPU instructions to C source code lines.
+
+</p><h2 id="workload"><span>2.2. </span>Workload</h2>
+
+<p>
+The "typical" workload is generated by the
+<a href="https://sqlite.org/src/file/test/speedtest1.c">speedtest1.c</a>
+program in the canonical SQLite source tree. This program strives to
+exercise the SQLite library in a way that is typical of real-world
+applications. Of course, every application is different, and so
+no test program can exactly mirror the behavior of all applications.
+
+</p><p>
+The speedtest1.c program is updated from time to time as the SQLite
+developers' understanding of what constitutes "typical" usage evolves.
+
+</p><p>
+The
+<a href="https://sqlite.org/src/file/tool/speed-check.sh">speed-check.sh</a> shell
+script, also in the canonical source tree, is used to run the speedtest1.c
+program. To replicate the performance measurements, collect the following
+files into a single directory:
+</p><ul>
+<li> the "speed-check.sh" script,
+</li><li> the "speedtest1.c" test program, and
+</li><li> the <a href="amalgamation.html">SQLite amalgamation</a> source files "sqlite3.c" and
+ "sqlite3.h"
+</li></ul>
+<p>
+Then run "sh speed-check.sh trunk".
+
+
+</p><h2 id="performance_measurement"><span>2.3. </span>Performance Measurement</h2>
+
+<p>
+<a href="http://valgrind.org/docs/manual/cg-manual.html">Cachegrind</a> is used to
+measure performance because it gives answers that are repeatable to
+7 or more significant digits. In comparison, actual (wall-clock)
+run times are scarcely repeatable beyond one significant digit.
+
+<a name="microopt"></a>
+
+</p><h2 id="microoptimizations"><span>2.4. </span>Microoptimizations</h2>
+
+<p>
+The high repeatability of cachegrind allows the SQLite developers to
+implement and measure "microoptimizations". A microoptimization is
+a change to the code that results in a very small performance increase.
+Typical micro-optimizations reduce the number of CPU cycles by 0.1% or
+0.05% or even less. Such improvements are impossible to measure with
+real-world timings. But hundreds or thousands of microoptimizations
+add up, resulting in measurable real-world performance gains.
+
+</p><h1 id="performance_measurement_workflow"><span>3. </span>Performance Measurement Workflow</h1>
+
+<p>
+As SQLite developers edit the SQLite source code, they run the
+<a href="https://sqlite.org/src/file/tool/speed-check.sh">speed-check.sh</a>
+shell script to track the performance impact of changes. This
+script compiles the speedtest1.c program, runs it under cachegrind,
+processes the cachegrind output using the
+<a href="https://sqlite.org/src/file/tool/cg_anno.tcl">cg_anno.tcl</a> TCL
+script, then saves the results in a series of text files.
+Typical output from the speed-check.sh script looks like this:
+
+</p><blockquote><pre>
+==8683==
+==8683== I refs: <font color="red">1,060,925,768</font>
+==8683== I1 misses: 23,731,246
+==8683== LLi misses: 5,176
+==8683== I1 miss rate: 2.24%
+==8683== LLi miss rate: 0.00%
+==8683==
+==8683== D refs: 557,686,925 (361,828,925 rd + 195,858,000 wr)
+==8683== D1 misses: 5,067,063 ( 3,544,278 rd + 1,522,785 wr)
+==8683== LLd misses: 57,958 ( 16,067 rd + 41,891 wr)
+==8683== D1 miss rate: 0.9% ( 1.0% + 0.8% )
+==8683== LLd miss rate: 0.0% ( 0.0% + 0.0% )
+==8683==
+==8683== LL refs: 28,798,309 ( 27,275,524 rd + 1,522,785 wr)
+==8683== LL misses: 63,134 ( 21,243 rd + 41,891 wr)
+==8683== LL miss rate: 0.0% ( 0.0% + 0.0% )
+ text data bss dec hex filename
+ 523044 8240 1976 <font color="red">533260</font> 8230c sqlite3.o
+ 220507 1007870 7769352 sqlite3.c
+</pre></blockquote>
+
+<p>The important parts of the output (the parts that the developers pay
+the most attention to) are shown in red.
+Basically, the developers want to know the size of the compiled SQLite
+library and how many CPU cycles were needed to run the performance test.
+
+</p><p>The output from the
+<a href="https://sqlite.org/src/file/tool/cg_anno.tcl">cg_anno.tcl</a> script
+shows the number of CPU cycles spent on each line of code.
+The report is approximately 80,000 lines long. The following is a brief
+snippet taken from the middle of the report to show what it looks like:
+
+</p><blockquote><pre>
+ . SQLITE_PRIVATE int sqlite3BtreeNext(BtCursor *pCur, int *pRes){
+ . MemPage *pPage;
+ . assert( cursorOwnsBtShared(pCur) );
+ . assert( pRes!=0 );
+ . assert( *pRes==0 || *pRes==1 );
+ . assert( pCur-&gt;skipNext==0 || pCur-&gt;eState!=CURSOR_VALID );
+ 369,648 pCur-&gt;info.nSize = 0;
+ 369,648 pCur-&gt;curFlags &amp;= ~(BTCF_ValidNKey|BTCF_ValidOvfl);
+ 369,648 *pRes = 0;
+ 739,296 if( pCur-&gt;eState!=CURSOR_VALID ) return btreeNext(pCur, pRes);
+ 1,473,580 pPage = pCur-&gt;apPage&#91;pCur-&gt;iPage&#93;;
+ 1,841,975 if( (++pCur-&gt;aiIdx&#91;pCur-&gt;iPage&#93;)&gt;=pPage-&gt;nCell ){
+ 4,340 pCur-&gt;aiIdx&#91;pCur-&gt;iPage&#93;--;
+ 5,593 return btreeNext(pCur, pRes);
+ . }
+ 728,110 if( pPage-&gt;leaf ){
+ . return SQLITE_OK;
+ . }else{
+ 3,117 return moveToLeftmost(pCur);
+ . }
+ 721,876 }
+</pre></blockquote>
+
+<p>
+The numbers on the left are the CPU cycle counts for that line of code,
+of course.
+
+</p><p>
+The cg_anno.tcl script removes extraneous details from the default
+cachegrind annotation
+output so that before-and-after reports can be compared using a
+side-by-side diff to view specific details of how a
+micro-optimization attempt affected performance.
+
+
+</p><h1 id="limitations"><span>4. </span>Limitations</h1>
+
+<p>The use of the standardized speedtest1.c workload and cachegrind has
+enabled significant performance improvement.
+However, it is important to recognize the limitations of this approach:
+
+</p><ul>
+<li><p>
+Performance measurements are done with a single compiler (gcc 5.4.0),
+optimization setting (-Os), and
+on a single platform (Ubuntu 16.04 LTS on x64). The performance of
+other compilers and processors may vary.
+
+</p></li><li><p>
+The speedtest1.c workload that is being measured tries to be representative
+of a wide range of typical uses of SQLite. But every application is
+different. The speedtest1.c workload might not be a good proxy for the
+kinds of activities performed by some applications. The SQLite developers
+are constantly working to improve the speedtest1.c program, to make it
+a better proxy for actual SQLite usage. Community feedback is welcomed.
+
+</p></li><li><p>
+The cycle counts provided by cachegrind are a good proxy for actual
+performance, but they are not 100% accurate.
+
+</p></li><li><p>
+Only CPU cycle counts are being measured here.
+CPU cycle counts are a good proxy for energy consumption,
+but do not necessary correlate well with real-world timings.
+Time spent doing I/O is not reflected in the CPU cycle counts,
+and I/O time predominates in many SQLite usage scenarios.
+</p></li></ul>
+<p align="center"><small><i>This page last modified on <a href="https://sqlite.org/docsrc/honeypot" id="mtimelink" data-href="https://sqlite.org/docsrc/finfo/pages/cpu.in?m=30de04a5bf">2022-01-08 05:02:57</a> UTC </small></i></p>
+