diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-13 14:07:11 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-13 14:07:11 +0000 |
commit | 63847496f14c813a5d80efd5b7de0f1294ffe1e3 (patch) | |
tree | 01c7571c7c762ceee70638549a99834fdd7c411b /tool/spaceanal.tcl | |
parent | Initial commit. (diff) | |
download | sqlite3-63847496f14c813a5d80efd5b7de0f1294ffe1e3.tar.xz sqlite3-63847496f14c813a5d80efd5b7de0f1294ffe1e3.zip |
Adding upstream version 3.45.1.upstream/3.45.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'tool/spaceanal.tcl')
-rw-r--r-- | tool/spaceanal.tcl | 895 |
1 files changed, 895 insertions, 0 deletions
diff --git a/tool/spaceanal.tcl b/tool/spaceanal.tcl new file mode 100644 index 0000000..8fe72b9 --- /dev/null +++ b/tool/spaceanal.tcl @@ -0,0 +1,895 @@ +# Run this TCL script using an SQLite-enabled TCL interpreter to get a report +# on how much disk space is used by a particular data to actually store data +# versus how much space is unused. +# +# The dbstat virtual table is required. +# + +if {[catch { + +# Argument $tname is the name of a table within the database opened by +# database handle [db]. Return true if it is a WITHOUT ROWID table, or +# false otherwise. +# +proc is_without_rowid {tname} { + set t [string map {' ''} $tname] + db eval "PRAGMA index_list = '$t'" o { + if {$o(origin) == "pk"} { + set n $o(name) + if {0==[db one { SELECT count(*) FROM sqlite_schema WHERE name=$n }]} { + return 1 + } + } + } + return 0 +} + +# Read and run TCL commands from standard input. Used to implement +# the --tclsh option. +# +proc tclsh {} { + set line {} + while {![eof stdin]} { + if {$line!=""} { + puts -nonewline "> " + } else { + puts -nonewline "% " + } + flush stdout + append line [gets stdin] + if {[info complete $line]} { + if {[catch {uplevel #0 $line} result]} { + puts stderr "Error: $result" + } elseif {$result!=""} { + puts $result + } + set line {} + } else { + append line \n + } + } +} + + +# Get the name of the database to analyze +# +proc usage {} { + set argv0 [file rootname [file tail [info nameofexecutable]]] + puts stderr "Usage: $argv0 ?--pageinfo? ?--stats? database-filename" + puts stderr { +Analyze the SQLite3 database file specified by the "database-filename" +argument and output a report detailing size and storage efficiency +information for the database and its constituent tables and indexes. + +Options: + + --pageinfo Show how each page of the database-file is used + + --stats Output SQL text that creates a new database containing + statistics about the database that was analyzed + + --tclsh Run the built-in TCL interpreter interactively (for debugging) + + --version Show the version number of SQLite +} + exit 1 +} +set file_to_analyze {} +set flags(-pageinfo) 0 +set flags(-stats) 0 +set flags(-debug) 0 +append argv {} +foreach arg $argv { + if {[regexp {^-+pageinfo$} $arg]} { + set flags(-pageinfo) 1 + } elseif {[regexp {^-+stats$} $arg]} { + set flags(-stats) 1 + } elseif {[regexp {^-+debug$} $arg]} { + set flags(-debug) 1 + } elseif {[regexp {^-+tclsh$} $arg]} { + tclsh + exit 0 + } elseif {[regexp {^-+version$} $arg]} { + sqlite3 mem :memory: + puts [mem one {SELECT sqlite_version()||' '||sqlite_source_id()}] + mem close + exit 0 + } elseif {[regexp {^-} $arg]} { + puts stderr "Unknown option: $arg" + usage + } elseif {$file_to_analyze!=""} { + usage + } else { + set file_to_analyze $arg + } +} +if {$file_to_analyze==""} usage +set root_filename $file_to_analyze +regexp {^file:(//)?([^?]*)} $file_to_analyze all x1 root_filename +if {![file exists $root_filename]} { + puts stderr "No such file: $root_filename" + exit 1 +} +if {![file readable $root_filename]} { + puts stderr "File is not readable: $root_filename" + exit 1 +} +set true_file_size [file size $root_filename] +if {$true_file_size<512} { + puts stderr "Empty or malformed database: $root_filename" + exit 1 +} + +# Compute the total file size assuming test_multiplexor is being used. +# Assume that SQLITE_ENABLE_8_3_NAMES might be enabled +# +set extension [file extension $root_filename] +set pattern $root_filename +append pattern {[0-3][0-9][0-9]} +foreach f [glob -nocomplain $pattern] { + incr true_file_size [file size $f] + set extension {} +} +if {[string length $extension]>=2 && [string length $extension]<=4} { + set pattern [file rootname $root_filename] + append pattern {.[0-3][0-9][0-9]} + foreach f [glob -nocomplain $pattern] { + incr true_file_size [file size $f] + } +} + +# Open the database +# +if {[catch {sqlite3 db $file_to_analyze -uri 1} msg]} { + puts stderr "error trying to open $file_to_analyze: $msg" + exit 1 +} +if {$flags(-debug)} { + proc dbtrace {txt} {puts $txt; flush stdout;} + db trace ::dbtrace +} + +# Make sure all required compile-time options are available +# +if {![db exists {SELECT 1 FROM pragma_compile_options + WHERE compile_options='ENABLE_DBSTAT_VTAB'}]} { + puts "The SQLite database engine linked with this application\ + lacks required capabilities. Recompile using the\ + -DSQLITE_ENABLE_DBSTAT_VTAB compile-time option to fix\ + this problem." + exit 1 +} + +db eval {SELECT count(*) FROM sqlite_schema} +set pageSize [expr {wide([db one {PRAGMA page_size}])}] + +if {$flags(-pageinfo)} { + db eval {CREATE VIRTUAL TABLE temp.stat USING dbstat} + db eval {SELECT name, path, pageno FROM temp.stat ORDER BY pageno} { + puts "$pageno $name $path" + } + exit 0 +} +if {$flags(-stats)} { + db eval {CREATE VIRTUAL TABLE temp.stat USING dbstat} + puts "BEGIN;" + puts "CREATE TABLE stats(" + puts " name STRING, /* Name of table or index */" + puts " path INTEGER, /* Path to page from root */" + puts " pageno INTEGER, /* Page number */" + puts " pagetype STRING, /* 'internal', 'leaf' or 'overflow' */" + puts " ncell INTEGER, /* Cells on page (0 for overflow) */" + puts " payload INTEGER, /* Bytes of payload on this page */" + puts " unused INTEGER, /* Bytes of unused space on this page */" + puts " mx_payload INTEGER, /* Largest payload size of all cells */" + puts " pgoffset INTEGER, /* Offset of page in file */" + puts " pgsize INTEGER /* Size of the page */" + puts ");" + db eval {SELECT quote(name) || ',' || + quote(path) || ',' || + quote(pageno) || ',' || + quote(pagetype) || ',' || + quote(ncell) || ',' || + quote(payload) || ',' || + quote(unused) || ',' || + quote(mx_payload) || ',' || + quote(pgoffset) || ',' || + quote(pgsize) AS x FROM stat} { + puts "INSERT INTO stats VALUES($x);" + } + puts "COMMIT;" + exit 0 +} + + +# In-memory database for collecting statistics. This script loops through +# the tables and indices in the database being analyzed, adding a row for each +# to an in-memory database (for which the schema is shown below). It then +# queries the in-memory db to produce the space-analysis report. +# +sqlite3 mem :memory: +if {$flags(-debug)} { + proc dbtrace {txt} {puts $txt; flush stdout;} + mem trace ::dbtrace +} +set tabledef {CREATE TABLE space_used( + name clob, -- Name of a table or index in the database file + tblname clob, -- Name of associated table + is_index boolean, -- TRUE if it is an index, false for a table + is_without_rowid boolean, -- TRUE if WITHOUT ROWID table + nentry int, -- Number of entries in the BTree + leaf_entries int, -- Number of leaf entries + depth int, -- Depth of the b-tree + payload int, -- Total amount of data stored in this table or index + ovfl_payload int, -- Total amount of data stored on overflow pages + ovfl_cnt int, -- Number of entries that use overflow + mx_payload int, -- Maximum payload size + int_pages int, -- Number of interior pages used + leaf_pages int, -- Number of leaf pages used + ovfl_pages int, -- Number of overflow pages used + int_unused int, -- Number of unused bytes on interior pages + leaf_unused int, -- Number of unused bytes on primary pages + ovfl_unused int, -- Number of unused bytes on overflow pages + gap_cnt int, -- Number of gaps in the page layout + compressed_size int -- Total bytes stored on disk +);} +mem eval $tabledef + +# Create a temporary "dbstat" virtual table. +# +db eval {CREATE VIRTUAL TABLE temp.stat USING dbstat} +db eval {CREATE TEMP TABLE dbstat AS SELECT * FROM temp.stat + ORDER BY name, path} +db eval {DROP TABLE temp.stat} + +set isCompressed 0 +set compressOverhead 0 +set depth 0 +set sql { SELECT name, tbl_name FROM sqlite_schema WHERE rootpage>0 } +foreach {name tblname} [concat sqlite_schema sqlite_schema [db eval $sql]] { + + set is_index [expr {$name!=$tblname}] + set is_without_rowid [is_without_rowid $name] + db eval { + SELECT + sum(ncell) AS nentry, + sum((pagetype=='leaf')*ncell) AS leaf_entries, + sum(payload) AS payload, + sum((pagetype=='overflow') * payload) AS ovfl_payload, + sum(path LIKE '%+000000') AS ovfl_cnt, + max(mx_payload) AS mx_payload, + sum(pagetype=='internal') AS int_pages, + sum(pagetype=='leaf') AS leaf_pages, + sum(pagetype=='overflow') AS ovfl_pages, + sum((pagetype=='internal') * unused) AS int_unused, + sum((pagetype=='leaf') * unused) AS leaf_unused, + sum((pagetype=='overflow') * unused) AS ovfl_unused, + sum(pgsize) AS compressed_size, + max((length(CASE WHEN path LIKE '%+%' THEN '' ELSE path END)+3)/4) + AS depth + FROM temp.dbstat WHERE name = $name + } break + + set total_pages [expr {$leaf_pages+$int_pages+$ovfl_pages}] + set storage [expr {$total_pages*$pageSize}] + if {!$isCompressed && $storage>$compressed_size} { + set isCompressed 1 + set compressOverhead 14 + } + + # Column 'gap_cnt' is set to the number of non-contiguous entries in the + # list of pages visited if the b-tree structure is traversed in a top-down + # fashion (each node visited before its child-tree is passed). Any overflow + # chains present are traversed from start to finish before any child-tree + # is. + # + set gap_cnt 0 + set prev 0 + db eval { + SELECT pageno, pagetype FROM temp.dbstat + WHERE name=$name + ORDER BY pageno + } { + if {$prev>0 && $pagetype=="leaf" && $pageno!=$prev+1} { + incr gap_cnt + } + set prev $pageno + } + mem eval { + INSERT INTO space_used VALUES( + $name, + $tblname, + $is_index, + $is_without_rowid, + $nentry, + $leaf_entries, + $depth, + $payload, + $ovfl_payload, + $ovfl_cnt, + $mx_payload, + $int_pages, + $leaf_pages, + $ovfl_pages, + $int_unused, + $leaf_unused, + $ovfl_unused, + $gap_cnt, + $compressed_size + ); + } +} + +proc integerify {real} { + if {[string is double -strict $real]} { + return [expr {wide($real)}] + } else { + return 0 + } +} +mem function int integerify + +# Quote a string for use in an SQL query. Examples: +# +# [quote {hello world}] == {'hello world'} +# [quote {hello world's}] == {'hello world''s'} +# +proc quote {txt} { + return [string map {' ''} $txt] +} + +# Output a title line +# +proc titleline {title} { + if {$title==""} { + puts [string repeat * 79] + } else { + set len [string length $title] + set stars [string repeat * [expr 79-$len-5]] + puts "*** $title $stars" + } +} + +# Generate a single line of output in the statistics section of the +# report. +# +proc statline {title value {extra {}}} { + set len [string length $title] + set dots [string repeat . [expr 50-$len]] + set len [string length $value] + set sp2 [string range { } $len end] + if {$extra ne ""} { + set extra " $extra" + } + puts "$title$dots $value$sp2$extra" +} + +# Generate a formatted percentage value for $num/$denom +# +proc percent {num denom {of {}}} { + if {$denom==0.0} {return ""} + set v [expr {$num*100.0/$denom}] + set of {} + if {$v==100.0 || $v<0.001 || ($v>1.0 && $v<99.0)} { + return [format {%5.1f%% %s} $v $of] + } elseif {$v<0.1 || $v>99.9} { + return [format {%7.3f%% %s} $v $of] + } else { + return [format {%6.2f%% %s} $v $of] + } +} + +proc divide {num denom} { + if {$denom==0} {return 0.0} + return [format %.2f [expr double($num)/double($denom)]] +} + +# Generate a subreport that covers some subset of the database. +# the $where clause determines which subset to analyze. +# +proc subreport {title where showFrag} { + global pageSize file_pgcnt compressOverhead + + # Query the in-memory database for the sum of various statistics + # for the subset of tables/indices identified by the WHERE clause in + # $where. Note that even if the WHERE clause matches no rows, the + # following query returns exactly one row (because it is an aggregate). + # + # The results of the query are stored directly by SQLite into local + # variables (i.e. $nentry, $payload etc.). + # + mem eval " + SELECT + int(sum( + CASE WHEN (is_without_rowid OR is_index) THEN nentry + ELSE leaf_entries + END + )) AS nentry, + int(sum(payload)) AS payload, + int(sum(ovfl_payload)) AS ovfl_payload, + max(mx_payload) AS mx_payload, + int(sum(ovfl_cnt)) as ovfl_cnt, + int(sum(leaf_pages)) AS leaf_pages, + int(sum(int_pages)) AS int_pages, + int(sum(ovfl_pages)) AS ovfl_pages, + int(sum(leaf_unused)) AS leaf_unused, + int(sum(int_unused)) AS int_unused, + int(sum(ovfl_unused)) AS ovfl_unused, + int(sum(gap_cnt)) AS gap_cnt, + int(sum(compressed_size)) AS compressed_size, + int(max(depth)) AS depth, + count(*) AS cnt + FROM space_used WHERE $where" {} {} + + # Output the sub-report title, nicely decorated with * characters. + # + puts "" + titleline $title + puts "" + + # Calculate statistics and store the results in TCL variables, as follows: + # + # total_pages: Database pages consumed. + # total_pages_percent: Pages consumed as a percentage of the file. + # storage: Bytes consumed. + # payload_percent: Payload bytes used as a percentage of $storage. + # total_unused: Unused bytes on pages. + # avg_payload: Average payload per btree entry. + # avg_fanout: Average fanout for internal pages. + # avg_unused: Average unused bytes per btree entry. + # avg_meta: Average metadata overhead per entry. + # ovfl_cnt_percent: Percentage of btree entries that use overflow pages. + # + set total_pages [expr {$leaf_pages+$int_pages+$ovfl_pages}] + set total_pages_percent [percent $total_pages $file_pgcnt] + set storage [expr {$total_pages*$pageSize}] + set payload_percent [percent $payload $storage {of storage consumed}] + set total_unused [expr {$ovfl_unused+$int_unused+$leaf_unused}] + set avg_payload [divide $payload $nentry] + set avg_unused [divide $total_unused $nentry] + set total_meta [expr {$storage - $payload - $total_unused}] + set total_meta [expr {$total_meta + 4*($ovfl_pages - $ovfl_cnt)}] + set meta_percent [percent $total_meta $storage {of metadata}] + set avg_meta [divide $total_meta $nentry] + if {$int_pages>0} { + # TODO: Is this formula correct? + set nTab [mem eval " + SELECT count(*) FROM ( + SELECT DISTINCT tblname FROM space_used WHERE $where AND is_index=0 + ) + "] + set avg_fanout [mem eval " + SELECT (sum(leaf_pages+int_pages)-$nTab)/sum(int_pages) FROM space_used + WHERE $where + "] + set avg_fanout [format %.2f $avg_fanout] + } + set ovfl_cnt_percent [percent $ovfl_cnt $nentry {of all entries}] + + # Print out the sub-report statistics. + # + statline {Percentage of total database} $total_pages_percent + statline {Number of entries} $nentry + statline {Bytes of storage consumed} $storage + if {$compressed_size!=$storage} { + set compressed_size [expr {$compressed_size+$compressOverhead*$total_pages}] + set pct [expr {$compressed_size*100.0/$storage}] + set pct [format {%5.1f%%} $pct] + statline {Bytes used after compression} $compressed_size $pct + } + statline {Bytes of payload} $payload $payload_percent + statline {Bytes of metadata} $total_meta $meta_percent + if {$cnt==1} {statline {B-tree depth} $depth} + statline {Average payload per entry} $avg_payload + statline {Average unused bytes per entry} $avg_unused + statline {Average metadata per entry} $avg_meta + if {[info exists avg_fanout]} { + statline {Average fanout} $avg_fanout + } + if {$showFrag && $total_pages>1} { + set fragmentation [percent $gap_cnt [expr {$total_pages-1}]] + statline {Non-sequential pages} $gap_cnt $fragmentation + } + statline {Maximum payload per entry} $mx_payload + statline {Entries that use overflow} $ovfl_cnt $ovfl_cnt_percent + if {$int_pages>0} { + statline {Index pages used} $int_pages + } + statline {Primary pages used} $leaf_pages + statline {Overflow pages used} $ovfl_pages + statline {Total pages used} $total_pages + if {$int_unused>0} { + set int_unused_percent [ + percent $int_unused [expr {$int_pages*$pageSize}] {of index space}] + statline "Unused bytes on index pages" $int_unused $int_unused_percent + } + statline "Unused bytes on primary pages" $leaf_unused [ + percent $leaf_unused [expr {$leaf_pages*$pageSize}] {of primary space}] + statline "Unused bytes on overflow pages" $ovfl_unused [ + percent $ovfl_unused [expr {$ovfl_pages*$pageSize}] {of overflow space}] + statline "Unused bytes on all pages" $total_unused [ + percent $total_unused $storage {of all space}] + return 1 +} + +# Calculate the overhead in pages caused by auto-vacuum. +# +# This procedure calculates and returns the number of pages used by the +# auto-vacuum 'pointer-map'. If the database does not support auto-vacuum, +# then 0 is returned. The two arguments are the size of the database file in +# pages and the page size used by the database (in bytes). +proc autovacuum_overhead {filePages pageSize} { + + # Set $autovacuum to non-zero for databases that support auto-vacuum. + set autovacuum [db one {PRAGMA auto_vacuum}] + + # If the database is not an auto-vacuum database or the file consists + # of one page only then there is no overhead for auto-vacuum. Return zero. + if {0==$autovacuum || $filePages==1} { + return 0 + } + + # The number of entries on each pointer map page. The layout of the + # database file is one pointer-map page, followed by $ptrsPerPage other + # pages, followed by a pointer-map page etc. The first pointer-map page + # is the second page of the file overall. + set ptrsPerPage [expr double($pageSize/5)] + + # Return the number of pointer map pages in the database. + return [expr wide(ceil( ($filePages-1.0)/($ptrsPerPage+1.0) ))] +} + + +# Calculate the summary statistics for the database and store the results +# in TCL variables. They are output below. Variables are as follows: +# +# pageSize: Size of each page in bytes. +# file_bytes: File size in bytes. +# file_pgcnt: Number of pages in the file. +# file_pgcnt2: Number of pages in the file (calculated). +# av_pgcnt: Pages consumed by the auto-vacuum pointer-map. +# av_percent: Percentage of the file consumed by auto-vacuum pointer-map. +# inuse_pgcnt: Data pages in the file. +# inuse_percent: Percentage of pages used to store data. +# free_pgcnt: Free pages calculated as (<total pages> - <in-use pages>) +# free_pgcnt2: Free pages in the file according to the file header. +# free_percent: Percentage of file consumed by free pages (calculated). +# free_percent2: Percentage of file consumed by free pages (header). +# ntable: Number of tables in the db. +# nindex: Number of indices in the db. +# nautoindex: Number of indices created automatically. +# nmanindex: Number of indices created manually. +# user_payload: Number of bytes of payload in table btrees +# (not including sqlite_schema) +# user_percent: $user_payload as a percentage of total file size. + +### The following, setting $file_bytes based on the actual size of the file +### on disk, causes this tool to choke on zipvfs databases. So set it based +### on the return of [PRAGMA page_count] instead. +if 0 { + set file_bytes [file size $file_to_analyze] + set file_pgcnt [expr {$file_bytes/$pageSize}] +} +set file_pgcnt [db one {PRAGMA page_count}] +set file_bytes [expr {$file_pgcnt * $pageSize}] + +set av_pgcnt [autovacuum_overhead $file_pgcnt $pageSize] +set av_percent [percent $av_pgcnt $file_pgcnt] + +set sql {SELECT sum(leaf_pages+int_pages+ovfl_pages) FROM space_used} +set inuse_pgcnt [expr wide([mem eval $sql])] +set inuse_percent [percent $inuse_pgcnt $file_pgcnt] + +set free_pgcnt [expr {$file_pgcnt-$inuse_pgcnt-$av_pgcnt}] +if {$file_bytes>1073741824 && $free_pgcnt>0} {incr free_pgcnt -1} +set free_percent [percent $free_pgcnt $file_pgcnt] +set free_pgcnt2 [db one {PRAGMA freelist_count}] +set free_percent2 [percent $free_pgcnt2 $file_pgcnt] + +set file_pgcnt2 [expr {$inuse_pgcnt+$free_pgcnt2+$av_pgcnt}] + +# Account for the lockbyte page +if {$file_pgcnt2*$pageSize>1073742335} {incr file_pgcnt2} + +set ntable [db eval {SELECT count(*)+1 FROM sqlite_schema WHERE type='table'}] +set nindex [db eval {SELECT count(*) FROM sqlite_schema WHERE type='index'}] +set sql {SELECT count(*) FROM sqlite_schema WHERE name LIKE 'sqlite_autoindex%'} +set nautoindex [db eval $sql] +set nmanindex [expr {$nindex-$nautoindex}] + +# set total_payload [mem eval "SELECT sum(payload) FROM space_used"] +set user_payload [mem one {SELECT int(sum(payload)) FROM space_used + WHERE NOT is_index AND name NOT LIKE 'sqlite_schema'}] +set user_percent [percent $user_payload $file_bytes] + +# Output the summary statistics calculated above. +# +puts "/** Disk-Space Utilization Report For $root_filename" +puts "" +statline {Page size in bytes} $pageSize +statline {Pages in the whole file (measured)} $file_pgcnt +statline {Pages in the whole file (calculated)} $file_pgcnt2 +statline {Pages that store data} $inuse_pgcnt $inuse_percent +statline {Pages on the freelist (per header)} $free_pgcnt2 $free_percent2 +statline {Pages on the freelist (calculated)} $free_pgcnt $free_percent +statline {Pages of auto-vacuum overhead} $av_pgcnt $av_percent +statline {Number of tables in the database} $ntable +statline {Number of indices} $nindex +statline {Number of defined indices} $nmanindex +statline {Number of implied indices} $nautoindex +if {$isCompressed} { + statline {Size of uncompressed content in bytes} $file_bytes + set efficiency [percent $true_file_size $file_bytes] + statline {Size of compressed file on disk} $true_file_size $efficiency +} else { + statline {Size of the file in bytes} $file_bytes +} +statline {Bytes of user payload stored} $user_payload $user_percent + +# Output table rankings +# +puts "" +titleline "Page counts for all tables with their indices" +puts "" +mem eval {SELECT tblname, count(*) AS cnt, + int(sum(int_pages+leaf_pages+ovfl_pages)) AS size + FROM space_used GROUP BY tblname ORDER BY size+0 DESC, tblname} {} { + statline [string toupper $tblname] $size [percent $size $file_pgcnt] +} +puts "" +titleline "Page counts for all tables and indices separately" +puts "" +mem eval { + SELECT + upper(name) AS nm, + int(int_pages+leaf_pages+ovfl_pages) AS size + FROM space_used + ORDER BY size+0 DESC, name} {} { + statline $nm $size [percent $size $file_pgcnt] +} +if {$isCompressed} { + puts "" + titleline "Bytes of disk space used after compression" + puts "" + set csum 0 + mem eval {SELECT tblname, + int(sum(compressed_size)) + + $compressOverhead*sum(int_pages+leaf_pages+ovfl_pages) + AS csize + FROM space_used GROUP BY tblname ORDER BY csize+0 DESC, tblname} {} { + incr csum $csize + statline [string toupper $tblname] $csize [percent $csize $true_file_size] + } + set overhead [expr {$true_file_size - $csum}] + if {$overhead>0} { + statline {Header and free space} $overhead [percent $overhead $true_file_size] + } +} + +# Output subreports +# +if {$nindex>0} { + subreport {All tables and indices} 1 0 +} +subreport {All tables} {NOT is_index} 0 +if {$nindex>0} { + subreport {All indices} {is_index} 0 +} +foreach tbl [mem eval {SELECT DISTINCT tblname name FROM space_used + ORDER BY name}] { + set qn [quote $tbl] + set name [string toupper $tbl] + set n [mem eval {SELECT count(*) FROM space_used WHERE tblname=$tbl}] + if {$n>1} { + set idxlist [mem eval "SELECT name FROM space_used + WHERE tblname='$qn' AND is_index + ORDER BY 1"] + subreport "Table $name and all its indices" "tblname='$qn'" 0 + subreport "Table $name w/o any indices" "name='$qn'" 1 + if {[llength $idxlist]>1} { + subreport "Indices of table $name" "tblname='$qn' AND is_index" 0 + } + foreach idx $idxlist { + set qidx [quote $idx] + subreport "Index [string toupper $idx] of table $name" "name='$qidx'" 1 + } + } else { + subreport "Table $name" "name='$qn'" 1 + } +} + +# Output instructions on what the numbers above mean. +# +puts "" +titleline Definitions +puts { +Page size in bytes + + The number of bytes in a single page of the database file. + Usually 1024. + +Number of pages in the whole file +} +puts " The number of $pageSize-byte pages that go into forming the complete + database" +puts { +Pages that store data + + The number of pages that store data, either as primary B*Tree pages or + as overflow pages. The number at the right is the data pages divided by + the total number of pages in the file. + +Pages on the freelist + + The number of pages that are not currently in use but are reserved for + future use. The percentage at the right is the number of freelist pages + divided by the total number of pages in the file. + +Pages of auto-vacuum overhead + + The number of pages that store data used by the database to facilitate + auto-vacuum. This is zero for databases that do not support auto-vacuum. + +Number of tables in the database + + The number of tables in the database, including the SQLITE_SCHEMA table + used to store schema information. + +Number of indices + + The total number of indices in the database. + +Number of defined indices + + The number of indices created using an explicit CREATE INDEX statement. + +Number of implied indices + + The number of indices used to implement PRIMARY KEY or UNIQUE constraints + on tables. + +Size of the file in bytes + + The total amount of disk space used by the entire database files. + +Bytes of user payload stored + + The total number of bytes of user payload stored in the database. The + schema information in the SQLITE_SCHEMA table is not counted when + computing this number. The percentage at the right shows the payload + divided by the total file size. + +Percentage of total database + + The amount of the complete database file that is devoted to storing + information described by this category. + +Number of entries + + The total number of B-Tree key/value pairs stored under this category. + +Bytes of storage consumed + + The total amount of disk space required to store all B-Tree entries + under this category. The is the total number of pages used times + the pages size. + +Bytes of payload + + The amount of payload stored under this category. Payload is the data + part of table entries and the key part of index entries. The percentage + at the right is the bytes of payload divided by the bytes of storage + consumed. + +Bytes of metadata + + The amount of formatting and structural information stored in the + table or index. Metadata includes the btree page header, the cell pointer + array, the size field for each cell, the left child pointer or non-leaf + cells, the overflow pointers for overflow cells, and the rowid value for + rowid table cells. In other words, metadata is everything that is neither + unused space nor content. The record header in the payload is counted as + content, not metadata. + +Average payload per entry + + The average amount of payload on each entry. This is just the bytes of + payload divided by the number of entries. + +Average unused bytes per entry + + The average amount of free space remaining on all pages under this + category on a per-entry basis. This is the number of unused bytes on + all pages divided by the number of entries. + +Non-sequential pages + + The number of pages in the table or index that are out of sequence. + Many filesystems are optimized for sequential file access so a small + number of non-sequential pages might result in faster queries, + especially for larger database files that do not fit in the disk cache. + Note that after running VACUUM, the root page of each table or index is + at the beginning of the database file and all other pages are in a + separate part of the database file, resulting in a single non- + sequential page. + +Maximum payload per entry + + The largest payload size of any entry. + +Entries that use overflow + + The number of entries that user one or more overflow pages. + +Total pages used + + This is the number of pages used to hold all information in the current + category. This is the sum of index, primary, and overflow pages. + +Index pages used + + This is the number of pages in a table B-tree that hold only key (rowid) + information and no data. + +Primary pages used + + This is the number of B-tree pages that hold both key and data. + +Overflow pages used + + The total number of overflow pages used for this category. + +Unused bytes on index pages + + The total number of bytes of unused space on all index pages. The + percentage at the right is the number of unused bytes divided by the + total number of bytes on index pages. + +Unused bytes on primary pages + + The total number of bytes of unused space on all primary pages. The + percentage at the right is the number of unused bytes divided by the + total number of bytes on primary pages. + +Unused bytes on overflow pages + + The total number of bytes of unused space on all overflow pages. The + percentage at the right is the number of unused bytes divided by the + total number of bytes on overflow pages. + +Unused bytes on all pages + + The total number of bytes of unused space on all primary and overflow + pages. The percentage at the right is the number of unused bytes + divided by the total number of bytes. +} + +# Output a dump of the in-memory database. This can be used for more +# complex offline analysis. +# +titleline {} +puts "The entire text of this report can be sourced into any SQL database" +puts "engine for further analysis. All of the text above is an SQL comment." +puts "The data used to generate this report follows:" +puts "*/" +puts "BEGIN;" +puts $tabledef +unset -nocomplain x +mem eval {SELECT * FROM space_used} x { + puts -nonewline "INSERT INTO space_used VALUES" + set sep ( + foreach col $x(*) { + set v $x($col) + if {$v=="" || ![string is double $v]} {set v '[quote $v]'} + puts -nonewline $sep$v + set sep , + } + puts ");" +} +puts "COMMIT;" + +} err]} { + puts "ERROR: $err" + puts $errorInfo + exit 1 +} |