summaryrefslogtreecommitdiffstats
path: root/contrib/sepgsql/uavc.c
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/sepgsql/uavc.c')
-rw-r--r--contrib/sepgsql/uavc.c521
1 files changed, 521 insertions, 0 deletions
diff --git a/contrib/sepgsql/uavc.c b/contrib/sepgsql/uavc.c
new file mode 100644
index 0000000..6e3a892
--- /dev/null
+++ b/contrib/sepgsql/uavc.c
@@ -0,0 +1,521 @@
+/* -------------------------------------------------------------------------
+ *
+ * contrib/sepgsql/uavc.c
+ *
+ * Implementation of userspace access vector cache; that enables to cache
+ * access control decisions recently used, and reduce number of kernel
+ * invocations to avoid unnecessary performance hit.
+ *
+ * Copyright (c) 2011-2023, PostgreSQL Global Development Group
+ *
+ * -------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "catalog/pg_proc.h"
+#include "commands/seclabel.h"
+#include "common/hashfn.h"
+#include "sepgsql.h"
+#include "storage/ipc.h"
+#include "utils/guc.h"
+#include "utils/memutils.h"
+
+/*
+ * avc_cache
+ *
+ * It enables to cache access control decision (and behavior on execution of
+ * trusted procedure, db_procedure class only) for a particular pair of
+ * security labels and object class in userspace.
+ */
+typedef struct
+{
+ uint32 hash; /* hash value of this cache entry */
+ char *scontext; /* security context of the subject */
+ char *tcontext; /* security context of the target */
+ uint16 tclass; /* object class of the target */
+
+ uint32 allowed; /* permissions to be allowed */
+ uint32 auditallow; /* permissions to be audited on allowed */
+ uint32 auditdeny; /* permissions to be audited on denied */
+
+ bool permissive; /* true, if permissive rule */
+ bool hot_cache; /* true, if recently referenced */
+ bool tcontext_is_valid;
+ /* true, if tcontext is valid */
+ char *ncontext; /* temporary scontext on execution of trusted
+ * procedure, or NULL elsewhere */
+} avc_cache;
+
+/*
+ * Declaration of static variables
+ */
+#define AVC_NUM_SLOTS 512
+#define AVC_NUM_RECLAIM 16
+#define AVC_DEF_THRESHOLD 384
+
+static MemoryContext avc_mem_cxt;
+static List *avc_slots[AVC_NUM_SLOTS]; /* avc's hash buckets */
+static int avc_num_caches; /* number of caches currently used */
+static int avc_lru_hint; /* index of the buckets to be reclaimed next */
+static int avc_threshold; /* threshold to launch cache-reclaiming */
+static char *avc_unlabeled; /* system 'unlabeled' label */
+
+/*
+ * Hash function
+ */
+static uint32
+sepgsql_avc_hash(const char *scontext, const char *tcontext, uint16 tclass)
+{
+ return hash_any((const unsigned char *) scontext, strlen(scontext))
+ ^ hash_any((const unsigned char *) tcontext, strlen(tcontext))
+ ^ tclass;
+}
+
+/*
+ * Reset all the avc caches
+ */
+static void
+sepgsql_avc_reset(void)
+{
+ MemoryContextReset(avc_mem_cxt);
+
+ memset(avc_slots, 0, sizeof(List *) * AVC_NUM_SLOTS);
+ avc_num_caches = 0;
+ avc_lru_hint = 0;
+ avc_unlabeled = NULL;
+}
+
+/*
+ * Reclaim caches recently unreferenced
+ */
+static void
+sepgsql_avc_reclaim(void)
+{
+ ListCell *cell;
+ int index;
+
+ while (avc_num_caches >= avc_threshold - AVC_NUM_RECLAIM)
+ {
+ index = avc_lru_hint;
+
+ foreach(cell, avc_slots[index])
+ {
+ avc_cache *cache = lfirst(cell);
+
+ if (!cache->hot_cache)
+ {
+ avc_slots[index]
+ = foreach_delete_current(avc_slots[index], cell);
+
+ pfree(cache->scontext);
+ pfree(cache->tcontext);
+ if (cache->ncontext)
+ pfree(cache->ncontext);
+ pfree(cache);
+
+ avc_num_caches--;
+ }
+ else
+ {
+ cache->hot_cache = false;
+ }
+ }
+ avc_lru_hint = (avc_lru_hint + 1) % AVC_NUM_SLOTS;
+ }
+}
+
+/* -------------------------------------------------------------------------
+ *
+ * sepgsql_avc_check_valid
+ *
+ * This function checks whether the cached entries are still valid. If
+ * the security policy has been reloaded (or any other events that requires
+ * resetting userspace caches has occurred) since the last reference to
+ * the access vector cache, we must flush the cache.
+ *
+ * Access control decisions must be atomic, but multiple system calls may
+ * be required to make a decision; thus, when referencing the access vector
+ * cache, we must loop until we complete without an intervening cache flush
+ * event. In practice, looping even once should be very rare. Callers should
+ * do something like this:
+ *
+ * sepgsql_avc_check_valid();
+ * do {
+ * :
+ * <reference to uavc>
+ * :
+ * } while (!sepgsql_avc_check_valid())
+ *
+ * -------------------------------------------------------------------------
+ */
+static bool
+sepgsql_avc_check_valid(void)
+{
+ if (selinux_status_updated() > 0)
+ {
+ sepgsql_avc_reset();
+
+ return false;
+ }
+ return true;
+}
+
+/*
+ * sepgsql_avc_unlabeled
+ *
+ * Returns an alternative label to be applied when no label or an invalid
+ * label would otherwise be assigned.
+ */
+static char *
+sepgsql_avc_unlabeled(void)
+{
+ if (!avc_unlabeled)
+ {
+ char *unlabeled;
+
+ if (security_get_initial_context_raw("unlabeled", &unlabeled) < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INTERNAL_ERROR),
+ errmsg("SELinux: failed to get initial security label: %m")));
+ PG_TRY();
+ {
+ avc_unlabeled = MemoryContextStrdup(avc_mem_cxt, unlabeled);
+ }
+ PG_FINALLY();
+ {
+ freecon(unlabeled);
+ }
+ PG_END_TRY();
+ }
+ return avc_unlabeled;
+}
+
+/*
+ * sepgsql_avc_compute
+ *
+ * A fallback path, when cache mishit. It asks SELinux its access control
+ * decision for the supplied pair of security context and object class.
+ */
+static avc_cache *
+sepgsql_avc_compute(const char *scontext, const char *tcontext, uint16 tclass)
+{
+ char *ucontext = NULL;
+ char *ncontext = NULL;
+ MemoryContext oldctx;
+ avc_cache *cache;
+ uint32 hash;
+ int index;
+ struct av_decision avd;
+
+ hash = sepgsql_avc_hash(scontext, tcontext, tclass);
+ index = hash % AVC_NUM_SLOTS;
+
+ /*
+ * Validation check of the supplied security context. Because it always
+ * invoke system-call, frequent check should be avoided. Unless security
+ * policy is reloaded, validation status shall be kept, so we also cache
+ * whether the supplied security context was valid, or not.
+ */
+ if (security_check_context_raw(tcontext) != 0)
+ ucontext = sepgsql_avc_unlabeled();
+
+ /*
+ * Ask SELinux its access control decision
+ */
+ if (!ucontext)
+ sepgsql_compute_avd(scontext, tcontext, tclass, &avd);
+ else
+ sepgsql_compute_avd(scontext, ucontext, tclass, &avd);
+
+ /*
+ * It also caches a security label to be switched when a client labeled as
+ * 'scontext' executes a procedure labeled as 'tcontext', not only access
+ * control decision on the procedure. The security label to be switched
+ * shall be computed uniquely on a pair of 'scontext' and 'tcontext',
+ * thus, it is reasonable to cache the new label on avc, and enables to
+ * reduce unnecessary system calls. It shall be referenced at
+ * sepgsql_needs_fmgr_hook to check whether the supplied function is a
+ * trusted procedure, or not.
+ */
+ if (tclass == SEPG_CLASS_DB_PROCEDURE)
+ {
+ if (!ucontext)
+ ncontext = sepgsql_compute_create(scontext, tcontext,
+ SEPG_CLASS_PROCESS, NULL);
+ else
+ ncontext = sepgsql_compute_create(scontext, ucontext,
+ SEPG_CLASS_PROCESS, NULL);
+ if (strcmp(scontext, ncontext) == 0)
+ {
+ pfree(ncontext);
+ ncontext = NULL;
+ }
+ }
+
+ /*
+ * Set up an avc_cache object
+ */
+ oldctx = MemoryContextSwitchTo(avc_mem_cxt);
+
+ cache = palloc0(sizeof(avc_cache));
+
+ cache->hash = hash;
+ cache->scontext = pstrdup(scontext);
+ cache->tcontext = pstrdup(tcontext);
+ cache->tclass = tclass;
+
+ cache->allowed = avd.allowed;
+ cache->auditallow = avd.auditallow;
+ cache->auditdeny = avd.auditdeny;
+ cache->hot_cache = true;
+ if (avd.flags & SELINUX_AVD_FLAGS_PERMISSIVE)
+ cache->permissive = true;
+ if (!ucontext)
+ cache->tcontext_is_valid = true;
+ if (ncontext)
+ cache->ncontext = pstrdup(ncontext);
+
+ avc_num_caches++;
+
+ if (avc_num_caches > avc_threshold)
+ sepgsql_avc_reclaim();
+
+ avc_slots[index] = lcons(cache, avc_slots[index]);
+
+ MemoryContextSwitchTo(oldctx);
+
+ return cache;
+}
+
+/*
+ * sepgsql_avc_lookup
+ *
+ * Look up a cache entry that matches the supplied security contexts and
+ * object class. If not found, create a new cache entry.
+ */
+static avc_cache *
+sepgsql_avc_lookup(const char *scontext, const char *tcontext, uint16 tclass)
+{
+ avc_cache *cache;
+ ListCell *cell;
+ uint32 hash;
+ int index;
+
+ hash = sepgsql_avc_hash(scontext, tcontext, tclass);
+ index = hash % AVC_NUM_SLOTS;
+
+ foreach(cell, avc_slots[index])
+ {
+ cache = lfirst(cell);
+
+ if (cache->hash == hash &&
+ cache->tclass == tclass &&
+ strcmp(cache->tcontext, tcontext) == 0 &&
+ strcmp(cache->scontext, scontext) == 0)
+ {
+ cache->hot_cache = true;
+ return cache;
+ }
+ }
+ /* not found, so insert a new cache */
+ return sepgsql_avc_compute(scontext, tcontext, tclass);
+}
+
+/*
+ * sepgsql_avc_check_perms(_label)
+ *
+ * It returns 'true', if the security policy suggested to allow the required
+ * permissions. Otherwise, it returns 'false' or raises an error according
+ * to the 'abort_on_violation' argument.
+ * The 'tobject' and 'tclass' identify the target object being referenced,
+ * and 'required' is a bitmask of permissions (SEPG_*__*) defined for each
+ * object classes.
+ * The 'audit_name' is the object name (optional). If SEPGSQL_AVC_NOAUDIT
+ * was supplied, it means to skip all the audit messages.
+ */
+bool
+sepgsql_avc_check_perms_label(const char *tcontext,
+ uint16 tclass, uint32 required,
+ const char *audit_name,
+ bool abort_on_violation)
+{
+ char *scontext = sepgsql_get_client_label();
+ avc_cache *cache;
+ uint32 denied;
+ uint32 audited;
+ bool result;
+
+ sepgsql_avc_check_valid();
+ do
+ {
+ result = true;
+
+ /*
+ * If the target object is unlabeled, we perform the check using the
+ * label supplied by sepgsql_avc_unlabeled().
+ */
+ if (tcontext)
+ cache = sepgsql_avc_lookup(scontext, tcontext, tclass);
+ else
+ cache = sepgsql_avc_lookup(scontext,
+ sepgsql_avc_unlabeled(), tclass);
+
+ denied = required & ~cache->allowed;
+
+ /*
+ * Compute permissions to be audited
+ */
+ if (sepgsql_get_debug_audit())
+ audited = (denied ? (denied & ~0) : (required & ~0));
+ else
+ audited = denied ? (denied & cache->auditdeny)
+ : (required & cache->auditallow);
+
+ if (denied)
+ {
+ /*
+ * In permissive mode or permissive domain, violated permissions
+ * shall be audited to the log files at once, and then implicitly
+ * allowed to avoid a flood of access denied logs, because the
+ * purpose of permissive mode/domain is to collect a violation log
+ * that will make it possible to fix up the security policy.
+ */
+ if (!sepgsql_getenforce() || cache->permissive)
+ cache->allowed |= required;
+ else
+ result = false;
+ }
+ } while (!sepgsql_avc_check_valid());
+
+ /*
+ * In the case when we have something auditable actions here,
+ * sepgsql_audit_log shall be called with text representation of security
+ * labels for both of subject and object. It records this access
+ * violation, so DBA will be able to find out unexpected security problems
+ * later.
+ */
+ if (audited != 0 &&
+ audit_name != SEPGSQL_AVC_NOAUDIT &&
+ sepgsql_get_mode() != SEPGSQL_MODE_INTERNAL)
+ {
+ sepgsql_audit_log(denied != 0,
+ (sepgsql_getenforce() && !cache->permissive),
+ cache->scontext,
+ cache->tcontext_is_valid ?
+ cache->tcontext : sepgsql_avc_unlabeled(),
+ cache->tclass,
+ audited,
+ audit_name);
+ }
+
+ if (abort_on_violation && !result)
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("SELinux: security policy violation")));
+
+ return result;
+}
+
+bool
+sepgsql_avc_check_perms(const ObjectAddress *tobject,
+ uint16 tclass, uint32 required,
+ const char *audit_name,
+ bool abort_on_violation)
+{
+ char *tcontext = GetSecurityLabel(tobject, SEPGSQL_LABEL_TAG);
+ bool rc;
+
+ rc = sepgsql_avc_check_perms_label(tcontext,
+ tclass, required,
+ audit_name, abort_on_violation);
+ if (tcontext)
+ pfree(tcontext);
+
+ return rc;
+}
+
+/*
+ * sepgsql_avc_trusted_proc
+ *
+ * If the supplied function OID is configured as a trusted procedure, this
+ * function will return a security label to be used during the execution of
+ * that function. Otherwise, it returns NULL.
+ */
+char *
+sepgsql_avc_trusted_proc(Oid functionId)
+{
+ char *scontext = sepgsql_get_client_label();
+ char *tcontext;
+ ObjectAddress tobject;
+ avc_cache *cache;
+
+ tobject.classId = ProcedureRelationId;
+ tobject.objectId = functionId;
+ tobject.objectSubId = 0;
+ tcontext = GetSecurityLabel(&tobject, SEPGSQL_LABEL_TAG);
+
+ sepgsql_avc_check_valid();
+ do
+ {
+ if (tcontext)
+ cache = sepgsql_avc_lookup(scontext, tcontext,
+ SEPG_CLASS_DB_PROCEDURE);
+ else
+ cache = sepgsql_avc_lookup(scontext, sepgsql_avc_unlabeled(),
+ SEPG_CLASS_DB_PROCEDURE);
+ } while (!sepgsql_avc_check_valid());
+
+ return cache->ncontext;
+}
+
+/*
+ * sepgsql_avc_exit
+ *
+ * Clean up userspace AVC on process exit.
+ */
+static void
+sepgsql_avc_exit(int code, Datum arg)
+{
+ selinux_status_close();
+}
+
+/*
+ * sepgsql_avc_init
+ *
+ * Initialize the userspace AVC. This should be called from _PG_init.
+ */
+void
+sepgsql_avc_init(void)
+{
+ int rc;
+
+ /*
+ * All the avc stuff shall be allocated in avc_mem_cxt
+ */
+ avc_mem_cxt = AllocSetContextCreate(TopMemoryContext,
+ "userspace access vector cache",
+ ALLOCSET_DEFAULT_SIZES);
+ memset(avc_slots, 0, sizeof(avc_slots));
+ avc_num_caches = 0;
+ avc_lru_hint = 0;
+ avc_threshold = AVC_DEF_THRESHOLD;
+
+ /*
+ * SELinux allows to mmap(2) its kernel status page in read-only mode to
+ * inform userspace applications its status updating (such as policy
+ * reloading) without system-call invocations. This feature is only
+ * supported in Linux-2.6.38 or later, however, libselinux provides a
+ * fallback mode to know its status using netlink sockets.
+ */
+ rc = selinux_status_open(1);
+ if (rc < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INTERNAL_ERROR),
+ errmsg("SELinux: could not open selinux status : %m")));
+ else if (rc > 0)
+ ereport(LOG,
+ (errmsg("SELinux: kernel status page uses fallback mode")));
+
+ /* Arrange to close selinux status page on process exit. */
+ on_proc_exit(sepgsql_avc_exit, 0);
+}