/* * dbsize.c * Database object size functions, and related inquiries * * Copyright (c) 2002-2022, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/utils/adt/dbsize.c * */ #include "postgres.h" #include #include "access/htup_details.h" #include "access/relation.h" #include "catalog/catalog.h" #include "catalog/namespace.h" #include "catalog/pg_authid.h" #include "catalog/pg_tablespace.h" #include "commands/dbcommands.h" #include "commands/tablespace.h" #include "miscadmin.h" #include "storage/fd.h" #include "utils/acl.h" #include "utils/builtins.h" #include "utils/numeric.h" #include "utils/rel.h" #include "utils/relfilenodemap.h" #include "utils/relmapper.h" #include "utils/syscache.h" /* Divide by two and round away from zero */ #define half_rounded(x) (((x) + ((x) < 0 ? -1 : 1)) / 2) /* Units used in pg_size_pretty functions. All units must be powers of 2 */ struct size_pretty_unit { const char *name; /* bytes, kB, MB, GB etc */ uint32 limit; /* upper limit, prior to half rounding after * converting to this unit. */ bool round; /* do half rounding for this unit */ uint8 unitbits; /* (1 << unitbits) bytes to make 1 of this * unit */ }; /* When adding units here also update the error message in pg_size_bytes */ static const struct size_pretty_unit size_pretty_units[] = { {"bytes", 10 * 1024, false, 0}, {"kB", 20 * 1024 - 1, true, 10}, {"MB", 20 * 1024 - 1, true, 20}, {"GB", 20 * 1024 - 1, true, 30}, {"TB", 20 * 1024 - 1, true, 40}, {"PB", 20 * 1024 - 1, true, 50}, {NULL, 0, false, 0} }; /* Return physical size of directory contents, or 0 if dir doesn't exist */ static int64 db_dir_size(const char *path) { int64 dirsize = 0; struct dirent *direntry; DIR *dirdesc; char filename[MAXPGPATH * 2]; dirdesc = AllocateDir(path); if (!dirdesc) return 0; while ((direntry = ReadDir(dirdesc, path)) != NULL) { struct stat fst; CHECK_FOR_INTERRUPTS(); if (strcmp(direntry->d_name, ".") == 0 || strcmp(direntry->d_name, "..") == 0) continue; snprintf(filename, sizeof(filename), "%s/%s", path, direntry->d_name); if (stat(filename, &fst) < 0) { if (errno == ENOENT) continue; else ereport(ERROR, (errcode_for_file_access(), errmsg("could not stat file \"%s\": %m", filename))); } dirsize += fst.st_size; } FreeDir(dirdesc); return dirsize; } /* * calculate size of database in all tablespaces */ static int64 calculate_database_size(Oid dbOid) { int64 totalsize; DIR *dirdesc; struct dirent *direntry; char dirpath[MAXPGPATH]; char pathname[MAXPGPATH + 21 + sizeof(TABLESPACE_VERSION_DIRECTORY)]; AclResult aclresult; /* * User must have connect privilege for target database or have privileges * of pg_read_all_stats */ aclresult = pg_database_aclcheck(dbOid, GetUserId(), ACL_CONNECT); if (aclresult != ACLCHECK_OK && !has_privs_of_role(GetUserId(), ROLE_PG_READ_ALL_STATS)) { aclcheck_error(aclresult, OBJECT_DATABASE, get_database_name(dbOid)); } /* Shared storage in pg_global is not counted */ /* Include pg_default storage */ snprintf(pathname, sizeof(pathname), "base/%u", dbOid); totalsize = db_dir_size(pathname); /* Scan the non-default tablespaces */ snprintf(dirpath, MAXPGPATH, "pg_tblspc"); dirdesc = AllocateDir(dirpath); while ((direntry = ReadDir(dirdesc, dirpath)) != NULL) { CHECK_FOR_INTERRUPTS(); if (strcmp(direntry->d_name, ".") == 0 || strcmp(direntry->d_name, "..") == 0) continue; snprintf(pathname, sizeof(pathname), "pg_tblspc/%s/%s/%u", direntry->d_name, TABLESPACE_VERSION_DIRECTORY, dbOid); totalsize += db_dir_size(pathname); } FreeDir(dirdesc); return totalsize; } Datum pg_database_size_oid(PG_FUNCTION_ARGS) { Oid dbOid = PG_GETARG_OID(0); int64 size; size = calculate_database_size(dbOid); if (size == 0) PG_RETURN_NULL(); PG_RETURN_INT64(size); } Datum pg_database_size_name(PG_FUNCTION_ARGS) { Name dbName = PG_GETARG_NAME(0); Oid dbOid = get_database_oid(NameStr(*dbName), false); int64 size; size = calculate_database_size(dbOid); if (size == 0) PG_RETURN_NULL(); PG_RETURN_INT64(size); } /* * Calculate total size of tablespace. Returns -1 if the tablespace directory * cannot be found. */ static int64 calculate_tablespace_size(Oid tblspcOid) { char tblspcPath[MAXPGPATH]; char pathname[MAXPGPATH * 2]; int64 totalsize = 0; DIR *dirdesc; struct dirent *direntry; AclResult aclresult; /* * User must have privileges of pg_read_all_stats or have CREATE privilege * for target tablespace, either explicitly granted or implicitly because * it is default for current database. */ if (tblspcOid != MyDatabaseTableSpace && !has_privs_of_role(GetUserId(), ROLE_PG_READ_ALL_STATS)) { aclresult = pg_tablespace_aclcheck(tblspcOid, GetUserId(), ACL_CREATE); if (aclresult != ACLCHECK_OK) aclcheck_error(aclresult, OBJECT_TABLESPACE, get_tablespace_name(tblspcOid)); } if (tblspcOid == DEFAULTTABLESPACE_OID) snprintf(tblspcPath, MAXPGPATH, "base"); else if (tblspcOid == GLOBALTABLESPACE_OID) snprintf(tblspcPath, MAXPGPATH, "global"); else snprintf(tblspcPath, MAXPGPATH, "pg_tblspc/%u/%s", tblspcOid, TABLESPACE_VERSION_DIRECTORY); dirdesc = AllocateDir(tblspcPath); if (!dirdesc) return -1; while ((direntry = ReadDir(dirdesc, tblspcPath)) != NULL) { struct stat fst; CHECK_FOR_INTERRUPTS(); if (strcmp(direntry->d_name, ".") == 0 || strcmp(direntry->d_name, "..") == 0) continue; snprintf(pathname, sizeof(pathname), "%s/%s", tblspcPath, direntry->d_name); if (stat(pathname, &fst) < 0) { if (errno == ENOENT) continue; else ereport(ERROR, (errcode_for_file_access(), errmsg("could not stat file \"%s\": %m", pathname))); } if (S_ISDIR(fst.st_mode)) totalsize += db_dir_size(pathname); totalsize += fst.st_size; } FreeDir(dirdesc); return totalsize; } Datum pg_tablespace_size_oid(PG_FUNCTION_ARGS) { Oid tblspcOid = PG_GETARG_OID(0); int64 size; size = calculate_tablespace_size(tblspcOid); if (size < 0) PG_RETURN_NULL(); PG_RETURN_INT64(size); } Datum pg_tablespace_size_name(PG_FUNCTION_ARGS) { Name tblspcName = PG_GETARG_NAME(0); Oid tblspcOid = get_tablespace_oid(NameStr(*tblspcName), false); int64 size; size = calculate_tablespace_size(tblspcOid); if (size < 0) PG_RETURN_NULL(); PG_RETURN_INT64(size); } /* * calculate size of (one fork of) a relation * * Note: we can safely apply this to temp tables of other sessions, so there * is no check here or at the call sites for that. */ static int64 calculate_relation_size(RelFileNode *rfn, BackendId backend, ForkNumber forknum) { int64 totalsize = 0; char *relationpath; char pathname[MAXPGPATH]; unsigned int segcount = 0; relationpath = relpathbackend(*rfn, backend, forknum); for (segcount = 0;; segcount++) { struct stat fst; CHECK_FOR_INTERRUPTS(); if (segcount == 0) snprintf(pathname, MAXPGPATH, "%s", relationpath); else snprintf(pathname, MAXPGPATH, "%s.%u", relationpath, segcount); if (stat(pathname, &fst) < 0) { if (errno == ENOENT) break; else ereport(ERROR, (errcode_for_file_access(), errmsg("could not stat file \"%s\": %m", pathname))); } totalsize += fst.st_size; } return totalsize; } Datum pg_relation_size(PG_FUNCTION_ARGS) { Oid relOid = PG_GETARG_OID(0); text *forkName = PG_GETARG_TEXT_PP(1); Relation rel; int64 size; rel = try_relation_open(relOid, AccessShareLock); /* * Before 9.2, we used to throw an error if the relation didn't exist, but * that makes queries like "SELECT pg_relation_size(oid) FROM pg_class" * less robust, because while we scan pg_class with an MVCC snapshot, * someone else might drop the table. It's better to return NULL for * already-dropped tables than throw an error and abort the whole query. */ if (rel == NULL) PG_RETURN_NULL(); size = calculate_relation_size(&(rel->rd_node), rel->rd_backend, forkname_to_number(text_to_cstring(forkName))); relation_close(rel, AccessShareLock); PG_RETURN_INT64(size); } /* * Calculate total on-disk size of a TOAST relation, including its indexes. * Must not be applied to non-TOAST relations. */ static int64 calculate_toast_table_size(Oid toastrelid) { int64 size = 0; Relation toastRel; ForkNumber forkNum; ListCell *lc; List *indexlist; toastRel = relation_open(toastrelid, AccessShareLock); /* toast heap size, including FSM and VM size */ for (forkNum = 0; forkNum <= MAX_FORKNUM; forkNum++) size += calculate_relation_size(&(toastRel->rd_node), toastRel->rd_backend, forkNum); /* toast index size, including FSM and VM size */ indexlist = RelationGetIndexList(toastRel); /* Size is calculated using all the indexes available */ foreach(lc, indexlist) { Relation toastIdxRel; toastIdxRel = relation_open(lfirst_oid(lc), AccessShareLock); for (forkNum = 0; forkNum <= MAX_FORKNUM; forkNum++) size += calculate_relation_size(&(toastIdxRel->rd_node), toastIdxRel->rd_backend, forkNum); relation_close(toastIdxRel, AccessShareLock); } list_free(indexlist); relation_close(toastRel, AccessShareLock); return size; } /* * Calculate total on-disk size of a given table, * including FSM and VM, plus TOAST table if any. * Indexes other than the TOAST table's index are not included. * * Note that this also behaves sanely if applied to an index or toast table; * those won't have attached toast tables, but they can have multiple forks. */ static int64 calculate_table_size(Relation rel) { int64 size = 0; ForkNumber forkNum; /* * heap size, including FSM and VM */ for (forkNum = 0; forkNum <= MAX_FORKNUM; forkNum++) size += calculate_relation_size(&(rel->rd_node), rel->rd_backend, forkNum); /* * Size of toast relation */ if (OidIsValid(rel->rd_rel->reltoastrelid)) size += calculate_toast_table_size(rel->rd_rel->reltoastrelid); return size; } /* * Calculate total on-disk size of all indexes attached to the given table. * * Can be applied safely to an index, but you'll just get zero. */ static int64 calculate_indexes_size(Relation rel) { int64 size = 0; /* * Aggregate all indexes on the given relation */ if (rel->rd_rel->relhasindex) { List *index_oids = RelationGetIndexList(rel); ListCell *cell; foreach(cell, index_oids) { Oid idxOid = lfirst_oid(cell); Relation idxRel; ForkNumber forkNum; idxRel = relation_open(idxOid, AccessShareLock); for (forkNum = 0; forkNum <= MAX_FORKNUM; forkNum++) size += calculate_relation_size(&(idxRel->rd_node), idxRel->rd_backend, forkNum); relation_close(idxRel, AccessShareLock); } list_free(index_oids); } return size; } Datum pg_table_size(PG_FUNCTION_ARGS) { Oid relOid = PG_GETARG_OID(0); Relation rel; int64 size; rel = try_relation_open(relOid, AccessShareLock); if (rel == NULL) PG_RETURN_NULL(); size = calculate_table_size(rel); relation_close(rel, AccessShareLock); PG_RETURN_INT64(size); } Datum pg_indexes_size(PG_FUNCTION_ARGS) { Oid relOid = PG_GETARG_OID(0); Relation rel; int64 size; rel = try_relation_open(relOid, AccessShareLock); if (rel == NULL) PG_RETURN_NULL(); size = calculate_indexes_size(rel); relation_close(rel, AccessShareLock); PG_RETURN_INT64(size); } /* * Compute the on-disk size of all files for the relation, * including heap data, index data, toast data, FSM, VM. */ static int64 calculate_total_relation_size(Relation rel) { int64 size; /* * Aggregate the table size, this includes size of the heap, toast and * toast index with free space and visibility map */ size = calculate_table_size(rel); /* * Add size of all attached indexes as well */ size += calculate_indexes_size(rel); return size; } Datum pg_total_relation_size(PG_FUNCTION_ARGS) { Oid relOid = PG_GETARG_OID(0); Relation rel; int64 size; rel = try_relation_open(relOid, AccessShareLock); if (rel == NULL) PG_RETURN_NULL(); size = calculate_total_relation_size(rel); relation_close(rel, AccessShareLock); PG_RETURN_INT64(size); } /* * formatting with size units */ Datum pg_size_pretty(PG_FUNCTION_ARGS) { int64 size = PG_GETARG_INT64(0); char buf[64]; const struct size_pretty_unit *unit; for (unit = size_pretty_units; unit->name != NULL; unit++) { uint8 bits; /* use this unit if there are no more units or we're below the limit */ if (unit[1].name == NULL || Abs(size) < unit->limit) { if (unit->round) size = half_rounded(size); snprintf(buf, sizeof(buf), INT64_FORMAT " %s", size, unit->name); break; } /* * Determine the number of bits to use to build the divisor. We may * need to use 1 bit less than the difference between this and the * next unit if the next unit uses half rounding. Or we may need to * shift an extra bit if this unit uses half rounding and the next one * does not. We use division rather than shifting right by this * number of bits to ensure positive and negative values are rounded * in the same way. */ bits = (unit[1].unitbits - unit->unitbits - (unit[1].round == true) + (unit->round == true)); size /= ((int64) 1) << bits; } PG_RETURN_TEXT_P(cstring_to_text(buf)); } static char * numeric_to_cstring(Numeric n) { Datum d = NumericGetDatum(n); return DatumGetCString(DirectFunctionCall1(numeric_out, d)); } static bool numeric_is_less(Numeric a, Numeric b) { Datum da = NumericGetDatum(a); Datum db = NumericGetDatum(b); return DatumGetBool(DirectFunctionCall2(numeric_lt, da, db)); } static Numeric numeric_absolute(Numeric n) { Datum d = NumericGetDatum(n); Datum result; result = DirectFunctionCall1(numeric_abs, d); return DatumGetNumeric(result); } static Numeric numeric_half_rounded(Numeric n) { Datum d = NumericGetDatum(n); Datum zero; Datum one; Datum two; Datum result; zero = NumericGetDatum(int64_to_numeric(0)); one = NumericGetDatum(int64_to_numeric(1)); two = NumericGetDatum(int64_to_numeric(2)); if (DatumGetBool(DirectFunctionCall2(numeric_ge, d, zero))) d = DirectFunctionCall2(numeric_add, d, one); else d = DirectFunctionCall2(numeric_sub, d, one); result = DirectFunctionCall2(numeric_div_trunc, d, two); return DatumGetNumeric(result); } static Numeric numeric_truncated_divide(Numeric n, int64 divisor) { Datum d = NumericGetDatum(n); Datum divisor_numeric; Datum result; divisor_numeric = NumericGetDatum(int64_to_numeric(divisor)); result = DirectFunctionCall2(numeric_div_trunc, d, divisor_numeric); return DatumGetNumeric(result); } Datum pg_size_pretty_numeric(PG_FUNCTION_ARGS) { Numeric size = PG_GETARG_NUMERIC(0); char *result = NULL; const struct size_pretty_unit *unit; for (unit = size_pretty_units; unit->name != NULL; unit++) { unsigned int shiftby; /* use this unit if there are no more units or we're below the limit */ if (unit[1].name == NULL || numeric_is_less(numeric_absolute(size), int64_to_numeric(unit->limit))) { if (unit->round) size = numeric_half_rounded(size); result = psprintf("%s %s", numeric_to_cstring(size), unit->name); break; } /* * Determine the number of bits to use to build the divisor. We may * need to use 1 bit less than the difference between this and the * next unit if the next unit uses half rounding. Or we may need to * shift an extra bit if this unit uses half rounding and the next one * does not. */ shiftby = (unit[1].unitbits - unit->unitbits - (unit[1].round == true) + (unit->round == true)); size = numeric_truncated_divide(size, ((int64) 1) << shiftby); } PG_RETURN_TEXT_P(cstring_to_text(result)); } /* * Convert a human-readable size to a size in bytes */ Datum pg_size_bytes(PG_FUNCTION_ARGS) { text *arg = PG_GETARG_TEXT_PP(0); char *str, *strptr, *endptr; char saved_char; Numeric num; int64 result; bool have_digits = false; str = text_to_cstring(arg); /* Skip leading whitespace */ strptr = str; while (isspace((unsigned char) *strptr)) strptr++; /* Check that we have a valid number and determine where it ends */ endptr = strptr; /* Part (1): sign */ if (*endptr == '-' || *endptr == '+') endptr++; /* Part (2): main digit string */ if (isdigit((unsigned char) *endptr)) { have_digits = true; do endptr++; while (isdigit((unsigned char) *endptr)); } /* Part (3): optional decimal point and fractional digits */ if (*endptr == '.') { endptr++; if (isdigit((unsigned char) *endptr)) { have_digits = true; do endptr++; while (isdigit((unsigned char) *endptr)); } } /* Complain if we don't have a valid number at this point */ if (!have_digits) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid size: \"%s\"", str))); /* Part (4): optional exponent */ if (*endptr == 'e' || *endptr == 'E') { long exponent; char *cp; /* * Note we might one day support EB units, so if what follows 'E' * isn't a number, just treat it all as a unit to be parsed. */ exponent = strtol(endptr + 1, &cp, 10); (void) exponent; /* Silence -Wunused-result warnings */ if (cp > endptr + 1) endptr = cp; } /* * Parse the number, saving the next character, which may be the first * character of the unit string. */ saved_char = *endptr; *endptr = '\0'; num = DatumGetNumeric(DirectFunctionCall3(numeric_in, CStringGetDatum(strptr), ObjectIdGetDatum(InvalidOid), Int32GetDatum(-1))); *endptr = saved_char; /* Skip whitespace between number and unit */ strptr = endptr; while (isspace((unsigned char) *strptr)) strptr++; /* Handle possible unit */ if (*strptr != '\0') { const struct size_pretty_unit *unit; int64 multiplier = 0; /* Trim any trailing whitespace */ endptr = str + VARSIZE_ANY_EXHDR(arg) - 1; while (isspace((unsigned char) *endptr)) endptr--; endptr++; *endptr = '\0'; for (unit = size_pretty_units; unit->name != NULL; unit++) { /* Parse the unit case-insensitively */ if (pg_strcasecmp(strptr, unit->name) == 0) { multiplier = ((int64) 1) << unit->unitbits; break; } } /* Verify we found a valid unit in the loop above */ if (unit->name == NULL) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid size: \"%s\"", text_to_cstring(arg)), errdetail("Invalid size unit: \"%s\".", strptr), errhint("Valid units are \"bytes\", \"kB\", \"MB\", \"GB\", \"TB\", and \"PB\"."))); if (multiplier > 1) { Numeric mul_num; mul_num = int64_to_numeric(multiplier); num = DatumGetNumeric(DirectFunctionCall2(numeric_mul, NumericGetDatum(mul_num), NumericGetDatum(num))); } } result = DatumGetInt64(DirectFunctionCall1(numeric_int8, NumericGetDatum(num))); PG_RETURN_INT64(result); } /* * Get the filenode of a relation * * This is expected to be used in queries like * SELECT pg_relation_filenode(oid) FROM pg_class; * That leads to a couple of choices. We work from the pg_class row alone * rather than actually opening each relation, for efficiency. We don't * fail if we can't find the relation --- some rows might be visible in * the query's MVCC snapshot even though the relations have been dropped. * (Note: we could avoid using the catcache, but there's little point * because the relation mapper also works "in the now".) We also don't * fail if the relation doesn't have storage. In all these cases it * seems better to quietly return NULL. */ Datum pg_relation_filenode(PG_FUNCTION_ARGS) { Oid relid = PG_GETARG_OID(0); Oid result; HeapTuple tuple; Form_pg_class relform; tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid)); if (!HeapTupleIsValid(tuple)) PG_RETURN_NULL(); relform = (Form_pg_class) GETSTRUCT(tuple); if (RELKIND_HAS_STORAGE(relform->relkind)) { if (relform->relfilenode) result = relform->relfilenode; else /* Consult the relation mapper */ result = RelationMapOidToFilenode(relid, relform->relisshared); } else { /* no storage, return NULL */ result = InvalidOid; } ReleaseSysCache(tuple); if (!OidIsValid(result)) PG_RETURN_NULL(); PG_RETURN_OID(result); } /* * Get the relation via (reltablespace, relfilenode) * * This is expected to be used when somebody wants to match an individual file * on the filesystem back to its table. That's not trivially possible via * pg_class, because that doesn't contain the relfilenodes of shared and nailed * tables. * * We don't fail but return NULL if we cannot find a mapping. * * InvalidOid can be passed instead of the current database's default * tablespace. */ Datum pg_filenode_relation(PG_FUNCTION_ARGS) { Oid reltablespace = PG_GETARG_OID(0); Oid relfilenode = PG_GETARG_OID(1); Oid heaprel; /* test needed so RelidByRelfilenode doesn't misbehave */ if (!OidIsValid(relfilenode)) PG_RETURN_NULL(); heaprel = RelidByRelfilenode(reltablespace, relfilenode); if (!OidIsValid(heaprel)) PG_RETURN_NULL(); else PG_RETURN_OID(heaprel); } /* * Get the pathname (relative to $PGDATA) of a relation * * See comments for pg_relation_filenode. */ Datum pg_relation_filepath(PG_FUNCTION_ARGS) { Oid relid = PG_GETARG_OID(0); HeapTuple tuple; Form_pg_class relform; RelFileNode rnode; BackendId backend; char *path; tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid)); if (!HeapTupleIsValid(tuple)) PG_RETURN_NULL(); relform = (Form_pg_class) GETSTRUCT(tuple); if (RELKIND_HAS_STORAGE(relform->relkind)) { /* This logic should match RelationInitPhysicalAddr */ if (relform->reltablespace) rnode.spcNode = relform->reltablespace; else rnode.spcNode = MyDatabaseTableSpace; if (rnode.spcNode == GLOBALTABLESPACE_OID) rnode.dbNode = InvalidOid; else rnode.dbNode = MyDatabaseId; if (relform->relfilenode) rnode.relNode = relform->relfilenode; else /* Consult the relation mapper */ rnode.relNode = RelationMapOidToFilenode(relid, relform->relisshared); } else { /* no storage, return NULL */ rnode.relNode = InvalidOid; /* some compilers generate warnings without these next two lines */ rnode.dbNode = InvalidOid; rnode.spcNode = InvalidOid; } if (!OidIsValid(rnode.relNode)) { ReleaseSysCache(tuple); PG_RETURN_NULL(); } /* Determine owning backend. */ switch (relform->relpersistence) { case RELPERSISTENCE_UNLOGGED: case RELPERSISTENCE_PERMANENT: backend = InvalidBackendId; break; case RELPERSISTENCE_TEMP: if (isTempOrTempToastNamespace(relform->relnamespace)) backend = BackendIdForTempRelations(); else { /* Do it the hard way. */ backend = GetTempNamespaceBackendId(relform->relnamespace); Assert(backend != InvalidBackendId); } break; default: elog(ERROR, "invalid relpersistence: %c", relform->relpersistence); backend = InvalidBackendId; /* placate compiler */ break; } ReleaseSysCache(tuple); path = relpathbackend(rnode, backend, MAIN_FORKNUM); PG_RETURN_TEXT_P(cstring_to_text(path)); }