/* SPDX-License-Identifier: LGPL-2.1-or-later */

#include <errno.h>
#include <stdlib.h>
#include <unistd.h>

#include "alloc-util.h"
#include "dev-setup.h"
#include "fd-util.h"
#include "fs-util.h"
#include "label-util.h"
#include "lock-util.h"
#include "log.h"
#include "mkdir-label.h"
#include "nulstr-util.h"
#include "path-util.h"
#include "terminal-util.h"
#include "umask-util.h"
#include "user-util.h"

int lock_dev_console(void) {
        _cleanup_close_ int fd = -EBADF;
        int r;

        /* NB: We do not use O_NOFOLLOW here, because some container managers might place a symlink to some
         * pty in /dev/console, in which case it should be fine to lock the target TTY. */
        fd = open_terminal("/dev/console", O_RDONLY|O_CLOEXEC|O_NOCTTY);
        if (fd < 0)
                return fd;

        r = lock_generic(fd, LOCK_BSD, LOCK_EX);
        if (r < 0)
                return r;

        return TAKE_FD(fd);
}

int dev_setup(const char *prefix, uid_t uid, gid_t gid) {
        static const char symlinks[] =
                "-/proc/kcore\0"     "/dev/core\0"
                "/proc/self/fd\0"    "/dev/fd\0"
                "/proc/self/fd/0\0"  "/dev/stdin\0"
                "/proc/self/fd/1\0"  "/dev/stdout\0"
                "/proc/self/fd/2\0"  "/dev/stderr\0";

        int r;

        NULSTR_FOREACH_PAIR(j, k, symlinks) {
                _cleanup_free_ char *link_name = NULL;
                const char *n;

                if (j[0] == '-') {
                        j++;

                        if (access(j, F_OK) < 0)
                                continue;
                }

                if (prefix) {
                        link_name = path_join(prefix, k);
                        if (!link_name)
                                return -ENOMEM;

                        n = link_name;
                } else
                        n = k;

                r = symlink_label(j, n);
                if (r < 0)
                        log_debug_errno(r, "Failed to symlink %s to %s: %m", j, n);

                if (uid != UID_INVALID || gid != GID_INVALID)
                        if (lchown(n, uid, gid) < 0)
                                log_debug_errno(errno, "Failed to chown %s: %m", n);
        }

        return 0;
}

int make_inaccessible_nodes(
                const char *parent_dir,
                uid_t uid,
                gid_t gid) {

        static const mode_t table[] = {
                S_IFREG,
                S_IFDIR,
                S_IFIFO,
                S_IFSOCK,

                /* The following two are likely to fail if we lack the privs for it (for example in an userns
                 * environment, if CAP_SYS_MKNOD is missing, or if a device node policy prohibits creation of
                 * device nodes with a major/minor of 0). But that's entirely fine. Consumers of these files
                 * should implement falling back to use a different node then, for example
                 * <root>/inaccessible/sock, which is close enough in behaviour and semantics for most uses.
                 */
                S_IFCHR,
                S_IFBLK,

                /* NB: S_IFLNK is not listed here, as there is no such thing as an inaccessible symlink */
        };

        _cleanup_close_ int parent_fd = -EBADF, inaccessible_fd = -EBADF;
        int r;

        if (!parent_dir)
                parent_dir = "/run/systemd";

        BLOCK_WITH_UMASK(0000);

        parent_fd = open(parent_dir, O_DIRECTORY|O_CLOEXEC|O_PATH, 0);
        if (parent_fd < 0)
                return -errno;

        inaccessible_fd = open_mkdir_at_full(parent_fd, "inaccessible", O_CLOEXEC, XO_LABEL, 0755);
        if (inaccessible_fd < 0)
                return inaccessible_fd;

        /* Set up inaccessible (and empty) file nodes of all types. This are used to as mount sources for over-mounting
         * ("masking") file nodes that shall become inaccessible and empty for specific containers or services. We try
         * to lock down these nodes as much as we can, but otherwise try to match them as closely as possible with the
         * underlying file, i.e. in the best case we offer the same node type as the underlying node. */

        FOREACH_ELEMENT(m, table) {
                _cleanup_free_ char *path = NULL;
                mode_t inode_type = *m;
                const char *fn;

                fn = inode_type_to_string(inode_type);
                path = path_join(parent_dir, fn);
                if (!path)
                        return log_oom();

                if (S_ISDIR(inode_type))
                        r = mkdirat_label(inaccessible_fd, fn, 0000);
                else
                        r = mknodat_label(inaccessible_fd, fn, inode_type | 0000, makedev(0, 0));
                if (r == -EEXIST) {
                        if (fchmodat(inaccessible_fd, fn, 0000, AT_SYMLINK_NOFOLLOW) < 0)
                                log_debug_errno(errno, "Failed to adjust access mode of existing inode '%s', ignoring: %m", path);
                } else if (r < 0) {
                        log_debug_errno(r, "Failed to create '%s', ignoring: %m", path);
                        continue;
                }

                if (uid_is_valid(uid) || gid_is_valid(gid))
                        if (fchownat(inaccessible_fd, fn, uid, gid, AT_SYMLINK_NOFOLLOW) < 0)
                                log_debug_errno(errno, "Failed to chown '%s', ignoring: %m", path);
        }

        if (fchmod(inaccessible_fd, 0555) < 0)
                log_debug_errno(errno, "Failed to mark inaccessible directory read-only, ignoring: %m");

        return 0;
}