diff options
Diffstat (limited to 'www/dbstat.html')
-rw-r--r-- | www/dbstat.html | 360 |
1 files changed, 360 insertions, 0 deletions
diff --git a/www/dbstat.html b/www/dbstat.html new file mode 100644 index 0000000..1157e4c --- /dev/null +++ b/www/dbstat.html @@ -0,0 +1,360 @@ +<!DOCTYPE html> +<html><head> +<meta name="viewport" content="width=device-width, initial-scale=1.0"> +<meta http-equiv="content-type" content="text/html; charset=UTF-8"> +<link href="sqlite.css" rel="stylesheet"> +<title>The DBSTAT Virtual Table</title> +<!-- path= --> +</head> +<body> +<div class=nosearch> +<a href="index.html"> +<img class="logo" src="images/sqlite370_banner.gif" alt="SQLite" border="0"> +</a> +<div><!-- IE hack to prevent disappearing logo --></div> +<div class="tagline desktoponly"> +Small. Fast. Reliable.<br>Choose any three. +</div> +<div class="menu mainmenu"> +<ul> +<li><a href="index.html">Home</a> +<li class='mobileonly'><a href="javascript:void(0)" onclick='toggle_div("submenu")'>Menu</a> +<li class='wideonly'><a href='about.html'>About</a> +<li class='desktoponly'><a href="docs.html">Documentation</a> +<li class='desktoponly'><a href="download.html">Download</a> +<li class='wideonly'><a href='copyright.html'>License</a> +<li class='desktoponly'><a href="support.html">Support</a> +<li class='desktoponly'><a href="prosupport.html">Purchase</a> +<li class='search' id='search_menubutton'> +<a href="javascript:void(0)" onclick='toggle_search()'>Search</a> +</ul> +</div> +<div class="menu submenu" id="submenu"> +<ul> +<li><a href='about.html'>About</a> +<li><a href='docs.html'>Documentation</a> +<li><a href='download.html'>Download</a> +<li><a href='support.html'>Support</a> +<li><a href='prosupport.html'>Purchase</a> +</ul> +</div> +<div class="searchmenu" id="searchmenu"> +<form method="GET" action="search"> +<select name="s" id="searchtype"> +<option value="d">Search Documentation</option> +<option value="c">Search Changelog</option> +</select> +<input type="text" name="q" id="searchbox" value=""> +<input type="submit" value="Go"> +</form> +</div> +</div> +<script> +function toggle_div(nm) { +var w = document.getElementById(nm); +if( w.style.display=="block" ){ +w.style.display = "none"; +}else{ +w.style.display = "block"; +} +} +function toggle_search() { +var w = document.getElementById("searchmenu"); +if( w.style.display=="block" ){ +w.style.display = "none"; +} else { +w.style.display = "block"; +setTimeout(function(){ +document.getElementById("searchbox").focus() +}, 30); +} +} +function div_off(nm){document.getElementById(nm).style.display="none";} +window.onbeforeunload = function(e){div_off("submenu");} +/* Disable the Search feature if we are not operating from CGI, since */ +/* Search is accomplished using CGI and will not work without it. */ +if( !location.origin || !location.origin.match || !location.origin.match(/http/) ){ +document.getElementById("search_menubutton").style.display = "none"; +} +/* Used by the Hide/Show button beside syntax diagrams, to toggle the */ +function hideorshow(btn,obj){ +var x = document.getElementById(obj); +var b = document.getElementById(btn); +if( x.style.display!='none' ){ +x.style.display = 'none'; +b.innerHTML='show'; +}else{ +x.style.display = ''; +b.innerHTML='hide'; +} +return false; +} +var antiRobot = 0; +function antiRobotGo(){ +if( antiRobot!=3 ) return; +antiRobot = 7; +var j = document.getElementById("mtimelink"); +if(j && j.hasAttribute("data-href")) j.href=j.getAttribute("data-href"); +} +function antiRobotDefense(){ +document.body.onmousedown=function(){ +antiRobot |= 2; +antiRobotGo(); +document.body.onmousedown=null; +} +document.body.onmousemove=function(){ +antiRobot |= 2; +antiRobotGo(); +document.body.onmousemove=null; +} +setTimeout(function(){ +antiRobot |= 1; +antiRobotGo(); +}, 100) +antiRobotGo(); +} +antiRobotDefense(); +</script> +<div class=fancy> +<div class=nosearch> +<div class="fancy_title"> +The DBSTAT Virtual Table +</div> +</div> + + + + + +<h1 id="overview"><span>1. </span>Overview</h1> + +<p> +The DBSTAT virtual table is a read-only <a href="vtab.html#epovtab">eponymous virtual table</a> that returns +information about the amount of disk space used to store the content +of an SQLite database. +Example use cases for the +DBSTAT virtual table include the <a href="sqlanalyze.html">sqlite3_analyzer.exe</a> +utility program and the +<a href="https://www.sqlite.org/src/repo-tabsize">table size pie-chart</a> in +the <a href="https://www.fossil-scm.org/">Fossil-implemented</a> version control system +for SQLite. +</p> + +<p> +The DBSTAT virtual table is available on all +<a href="c3ref/sqlite3.html">database connections</a> when SQLite is built using the +<a href="compile.html#enable_dbstat_vtab">SQLITE_ENABLE_DBSTAT_VTAB</a> compile-time option. + +</p><p> +The DBSTAT virtual table is an <a href="vtab.html#epovtab">eponymous virtual table</a>, meaning +that is not necessary to run <a href="lang_createvtab.html">CREATE VIRTUAL TABLE</a> to create an +instance of the dbstat virtual table before using it. The "dbstat" +module name can be used as if it were a table name to query the +dbstat virtual table directly. For example: + +</p><div class="codeblock"><pre>SELECT * FROM dbstat; +</pre></div> + +<p> +If a named virtual table that uses the dbstat module is desired, +then the recommended way to create an instance of the dbstat +virtual table is as follows: + +</p><div class="codeblock"><pre>CREATE VIRTUAL TABLE temp.stat USING dbstat(main); +</pre></div> + +<p> +Note the "temp." qualifier before the virtual table name ("stat"). This +qualifier causes the virtual table to be temporary - to only exist for +the duration of the current database connection. This is the +recommended approach. + +</p><p> +The "main" argument to dbstat is default schema +for which information is to be provided. The default is "main", and +so the use of "main" in the example above is redundant. For any +particular query, the schema can be changed by specifying the +alternative schema as a function argument to the virtual table +name in the FROM clause of the query. (See further discussion of +<a href="lang_select.html#tabfunc1">table-valued functions in the FROM clause</a> for more details.) + +</p><p> +The schema for the DBSTAT virtual table looks like this: +</p><div class="codeblock"><pre>CREATE TABLE dbstat( + name TEXT, -- Name of table or index + path TEXT, -- Path to page from root + pageno INTEGER, -- Page number, or page count + pagetype TEXT, -- 'internal', 'leaf', 'overflow', or NULL + ncell INTEGER, -- Cells on page (0 for overflow pages) + payload INTEGER, -- Bytes of payload on this page or btree + unused INTEGER, -- Bytes of unused space on this page or btree + mx_payload INTEGER, -- Largest payload size of all cells on this row + pgoffset INTEGER, -- Byte offset of the page in the database file + pgsize INTEGER, -- Size of the page, in bytes + schema TEXT HIDDEN, -- Database schema being analyzed + aggregate BOOL HIDDEN -- True to enable aggregate mode +); +</pre></div> + +<p> +The DBSTAT table only reports on the content of btrees within the database file. +Freelist pages, pointer-map pages, and the lock page are omitted from +the analysis. + +</p><p> +By default, there is a single row in the DBSTAT table for each +btree page the database file. Each row provides +information about the space utilization of that one page of the +database. However, if the hidden column "aggregate" is TRUE, then +results are aggregated and there is a single row in the DBSTAT table +for each btree in the database, providing information about space +utilization across the entire btree. + +<a name="dbstatpath"></a> +</p><h1 id="the_path_column_of_the_dbstat_virtual_table"><span>2. </span>The "path" column of the dbstat virtual table</h1> + +<p> +The "path" column describes the path taken from the +root node of the btree structure to each page. The +"path" of the root node itself is '/'. +The "path" is NULL when "aggregate" is TRUE. + +The "path" for the left-most child page of the root of +a btree page is '/000/'. (Btrees store content ordered from left to right +so the pages to the left have smaller keys than the pages to the right.) +The next to left-most child of the root page is '/001', and so on, +each sibling page identified by a 3-digit hex value. +The children of the 451st left-most sibling have paths such +as '/1c2/000/, '/1c2/001/' etc. + +Overflow pages are specified by appending a '+' character and a +six-digit hexadecimal value to the path to the cell they are linked +from. For example, the three overflow pages in a chain linked from +the left-most cell of the 450th child of the root page are identified +by the paths: + +</p><div class="codeblock"><pre>'/1c2/000+000000' // First page in overflow chain +'/1c2/000+000001' // Second page in overflow chain +'/1c2/000+000002' // Third page in overflow chain +</pre></div> + +<p> +If the paths are sorted using the BINARY collation sequence, then +the overflow pages associated with a cell will appear earlier in the +sort-order than its child page: + +</p><div class="codeblock"><pre>'/1c2/000/' // Left-most child of 451st child of root +</pre></div> + +<a name="dbstatagg"></a> + +<h1 id="aggregated_data"><span>3. </span>Aggregated Data</h1> + +<p> +Beginning with SQLite version 3.31.0 (2020-01-22), the DBSTAT table +has a new <a href="vtab.html#hiddencol">hidden column</a> named "aggregate", which if constrained to be +TRUE will cause DBSTAT to generate one row per btree in the database, +rather than one row per page. When running in aggregated mode, the +"path", "pagetype", and "pgoffset" columns are always NULL and the +"pageno" column holds the number of pages in the entire btree, rather +than the number of the page that corresponds to the row. + +</p><p> +The following table shows the meanings of the (non-hidden) columns of +DBSTAT in both normal and aggregated mode: + +</p><center><blockquote> +<table border="1" cellpadding="5" cellspacing="0"> +<tr> +<th>Column +</th><th>Normal meaning +</th><th>Aggregate-mode meaning +</th></tr> +<tr> +<th>name +</th><td colspan="2"> +The name of the table or index that is implemented by +the btree of the current row +</td></tr><tr> +<th>path +</th><td>See <a href="#dbstatpath">description above</a> +</td><td>Always NULL +</td></tr><tr> +<th>pageno +</th><td>The page number of the database page for the current row +</td><td>The total number of pages in the btree for the current row +</td></tr><tr> +<th>pagetype +</th><td>'leaf' or 'interior' +</td><td>Always NULL +</td></tr><tr> +<th>ncell +</th><td colspan="2">Number of cells on the current page or btree +</td></tr><tr> +<th>payload +</th><td colspan="2">Bytes of useful payload on the current page or btree +</td></tr><tr> +<th>unused +</th><td colspan="2">Unused bytes of on the current page or btree +</td></tr><tr> +<th>mx_payload +</th><td colspan="2">The largest payload found anywhere in the current page +or btree. +</td></tr><tr> +<th>pgoffset +</th><td>Byte offset to the start of the page +</td><td>Always NULL +</td></tr><tr> +<th>pgsize +</th><td colspan="2">Total storage space used by the current page or btree. +</td></tr></table> +</blockquote></center> + +<h1 id="example_uses_of_the_dbstat_virtual_table"><span>4. </span>Example uses of the dbstat virtual table</h1> + +<p> +To find the total number of pages used to store table "xyz" in schema "aux1", +use either of the following two queries (the first is the traditional way, +and the second shows the use of the aggregated feature): + +</p><div class="codeblock"><pre>SELECT count(*) FROM dbstat('aux1') WHERE name='xyz'; +SELECT pageno FROM dbstat('aux1',1) WHERE name='xyz'; +</pre></div> + +<p> +To see how efficiently the content of a table is stored on disk, +compute the amount of space used to hold actual content divided +by the total amount of disk space used. The closer this number +is to 100%, the more efficient the packing. (In this example, the +'xyz' table is assumed to be in the 'main' schema. Again, there +are two different versions that show the use of DBSTAT both without +and with the new aggregated feature, respectively.) + +</p><div class="codeblock"><pre>SELECT sum(pgsize-unused)*100.0/sum(pgsize) FROM dbstat WHERE name='xyz'; +SELECT (pgsize-unused)*100.0/pgsize FROM dbstat + WHERE name='xyz' AND aggregate=TRUE; +</pre></div> + +<p> +To find the average fan-out for a table, run: + +</p><div class="codeblock"><pre>SELECT avg(ncell) FROM dbstat WHERE name='xyz' AND pagetype='internal'; +</pre></div> + + +<p> +Modern filesystems operate faster when disk accesses are sequential. +Hence, SQLite will run faster if the content of the database file +is on sequential pages. To find out what fraction of the pages in +a database are sequential (and thus obtain a measurement that might +be useful in determining when to <a href="lang_vacuum.html">VACUUM</a>), run a query like the following: + +</p><div class="codeblock"><pre>CREATE TEMP TABLE s(rowid INTEGER PRIMARY KEY, pageno INT); +INSERT INTO s(pageno) SELECT pageno FROM dbstat ORDER BY path; +SELECT sum(s1.pageno+1==s2.pageno)*1.0/count(*) + FROM s AS s1, s AS s2 + WHERE s1.rowid+1=s2.rowid; +DROP TABLE s; +</pre></div> +<p align="center"><small><i>This page last modified on <a href="https://sqlite.org/docsrc/honeypot" id="mtimelink" data-href="https://sqlite.org/docsrc/finfo/pages/dbstat.in?m=47144226efc96ca2e">2020-01-22 17:49:40</a> UTC </small></i></p> + |