summaryrefslogtreecommitdiffstats
path: root/src/mount
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
commite6918187568dbd01842d8d1d2c808ce16a894239 (patch)
tree64f88b554b444a49f656b6c656111a145cbbaa28 /src/mount
parentInitial commit. (diff)
downloadceph-upstream/18.2.2.tar.xz
ceph-upstream/18.2.2.zip
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/mount')
-rw-r--r--src/mount/CMakeLists.txt10
-rw-r--r--src/mount/canonicalize.c203
-rw-r--r--src/mount/conf.cc110
-rw-r--r--src/mount/mount.ceph.c1017
-rw-r--r--src/mount/mount.ceph.h44
-rw-r--r--src/mount/mtab.c294
6 files changed, 1678 insertions, 0 deletions
diff --git a/src/mount/CMakeLists.txt b/src/mount/CMakeLists.txt
new file mode 100644
index 000000000..605ca2f67
--- /dev/null
+++ b/src/mount/CMakeLists.txt
@@ -0,0 +1,10 @@
+find_package(PkgConfig QUIET REQUIRED)
+pkg_check_modules(CAPNG REQUIRED libcap-ng)
+
+set(mount_ceph_srcs
+ mount.ceph.c conf.cc)
+add_executable(mount.ceph ${mount_ceph_srcs}
+ $<TARGET_OBJECTS:parse_secret_objs>
+ $<TARGET_OBJECTS:common_mountcephfs_objs>)
+target_link_libraries(mount.ceph keyutils::keyutils ${CAPNG_LIBRARIES} global ceph-common)
+install(TARGETS mount.ceph DESTINATION ${CMAKE_INSTALL_SBINDIR})
diff --git a/src/mount/canonicalize.c b/src/mount/canonicalize.c
new file mode 100644
index 000000000..02efbbedf
--- /dev/null
+++ b/src/mount/canonicalize.c
@@ -0,0 +1,203 @@
+/*
+ * canonicalize.c -- canonicalize pathname by removing symlinks
+ * Copyright (C) 1993 Rick Sladkey <jrs@world.std.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Library Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Library Public License for more details.
+ *
+ */
+
+/*
+ * This routine is part of libc. We include it nevertheless,
+ * since the libc version has some security flaws.
+ *
+ * TODO: use canonicalize_file_name() when exist in glibc
+ */
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <unistd.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <limits.h>
+
+#ifndef MAXSYMLINKS
+# define MAXSYMLINKS 256
+#endif
+
+static char *
+myrealpath(const char *path, char *resolved_path, int maxreslth) {
+ int readlinks = 0;
+ char *npath;
+ char *link_path;
+ int n;
+ char *buf = NULL;
+
+ npath = resolved_path;
+
+ /* If it's a relative pathname use getcwd for starters. */
+ if (*path != '/') {
+ if (!getcwd(npath, maxreslth-2))
+ return NULL;
+ npath += strlen(npath);
+ if (npath[-1] != '/')
+ *npath++ = '/';
+ } else {
+ *npath++ = '/';
+ path++;
+ }
+
+ /* Expand each slash-separated pathname component. */
+ link_path = malloc(PATH_MAX+1);
+ if (!link_path)
+ return NULL;
+ while (*path != '\0') {
+ /* Ignore stray "/" */
+ if (*path == '/') {
+ path++;
+ continue;
+ }
+ if (*path == '.' && (path[1] == '\0' || path[1] == '/')) {
+ /* Ignore "." */
+ path++;
+ continue;
+ }
+ if (*path == '.' && path[1] == '.' &&
+ (path[2] == '\0' || path[2] == '/')) {
+ /* Backup for ".." */
+ path += 2;
+ while (npath > resolved_path+1 &&
+ (--npath)[-1] != '/')
+ ;
+ continue;
+ }
+ /* Safely copy the next pathname component. */
+ while (*path != '\0' && *path != '/') {
+ if (npath-resolved_path > maxreslth-2) {
+ errno = ENAMETOOLONG;
+ goto err;
+ }
+ *npath++ = *path++;
+ }
+
+ /* Protect against infinite loops. */
+ if (readlinks++ > MAXSYMLINKS) {
+ errno = ELOOP;
+ goto err;
+ }
+
+ /* See if last pathname component is a symlink. */
+ *npath = '\0';
+
+ n = readlink(resolved_path, link_path, PATH_MAX);
+ if (n < 0) {
+ /* EINVAL means the file exists but isn't a symlink. */
+ if (errno != EINVAL)
+ goto err;
+ } else {
+ int m;
+ char *newbuf;
+
+ /* Note: readlink doesn't add the null byte. */
+ link_path[n] = '\0';
+ if (*link_path == '/')
+ /* Start over for an absolute symlink. */
+ npath = resolved_path;
+ else
+ /* Otherwise back up over this component. */
+ while (*(--npath) != '/')
+ ;
+
+ /* Insert symlink contents into path. */
+ m = strlen(path);
+ newbuf = malloc(m + n + 1);
+ if (!newbuf)
+ goto err;
+ memcpy(newbuf, link_path, n);
+ memcpy(newbuf + n, path, m + 1);
+ free(buf);
+ path = buf = newbuf;
+ }
+ *npath++ = '/';
+ }
+ /* Delete trailing slash but don't whomp a lone slash. */
+ if (npath != resolved_path+1 && npath[-1] == '/')
+ npath--;
+ /* Make sure it's null terminated. */
+ *npath = '\0';
+
+ free(link_path);
+ free(buf);
+ return resolved_path;
+
+ err:
+ free(link_path);
+ free(buf);
+ return NULL;
+}
+
+/*
+ * Converts private "dm-N" names to "/dev/mapper/<name>"
+ *
+ * Since 2.6.29 (patch 784aae735d9b0bba3f8b9faef4c8b30df3bf0128) kernel sysfs
+ * provides the real DM device names in /sys/block/<ptname>/dm/name
+ */
+char *
+canonicalize_dm_name(const char *ptname)
+{
+ FILE *f;
+ size_t sz;
+ char path[268], name[256], *res = NULL;
+
+ snprintf(path, sizeof(path), "/sys/block/%s/dm/name", ptname);
+ if (!(f = fopen(path, "r")))
+ return NULL;
+
+ /* read "<name>\n" from sysfs */
+ if (fgets(name, sizeof(name), f) && (sz = strlen(name)) > 1) {
+ name[sz - 1] = '\0';
+ snprintf(path, sizeof(path), "/dev/mapper/%s", name);
+ res = strdup(path);
+ }
+ fclose(f);
+ return res;
+}
+
+char *
+canonicalize_path(const char *path)
+{
+ char *canonical;
+ char *p;
+
+ if (path == NULL)
+ return NULL;
+
+ canonical = malloc(PATH_MAX+2);
+ if (!canonical)
+ return NULL;
+ if (!myrealpath(path, canonical, PATH_MAX+1)) {
+ free(canonical);
+ return strdup(path);
+ }
+
+
+ p = strrchr(canonical, '/');
+ if (p && strncmp(p, "/dm-", 4) == 0 && isdigit(*(p + 4))) {
+ p = canonicalize_dm_name(p+1);
+ if (p) {
+ free(canonical);
+ return p;
+ }
+ }
+
+ return canonical;
+}
+
+
diff --git a/src/mount/conf.cc b/src/mount/conf.cc
new file mode 100644
index 000000000..67fc9324e
--- /dev/null
+++ b/src/mount/conf.cc
@@ -0,0 +1,110 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <string>
+#include <vector>
+#include <cstring>
+#include <map>
+
+#include "common/async/context_pool.h"
+#include "common/ceph_context.h"
+#include "common/ceph_argparse.h"
+#include "common/config.h"
+#include "global/global_init.h"
+
+#include "auth/KeyRing.h"
+#include "mon/MonClient.h"
+
+#include "mount.ceph.h"
+
+using namespace std;
+
+extern "C" void mount_ceph_get_config_info(const char *config_file,
+ const char *name,
+ bool v2_addrs,
+ struct ceph_config_info *cci)
+{
+ int err;
+ KeyRing keyring;
+ CryptoKey secret;
+ std::string secret_str;
+ std::string monaddrs;
+ vector<const char *> args = { "--name", name };
+ bool first = true;
+
+ if (config_file) {
+ args.push_back("--conf");
+ args.push_back(config_file);
+ }
+
+ /* Create CephContext */
+ auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT,
+ CODE_ENVIRONMENT_UTILITY,
+ CINIT_FLAG_NO_DAEMON_ACTIONS|CINIT_FLAG_NO_MON_CONFIG);
+ auto& conf = cct->_conf;
+
+ conf.parse_env(cct->get_module_type()); // environment variables override
+ conf.apply_changes(nullptr);
+
+ auto fsid = conf.get_val<uuid_d>("fsid");
+ fsid.print(cci->cci_fsid);
+
+ ceph::async::io_context_pool ioc(1);
+ MonClient monc = MonClient(cct.get(), ioc);
+ err = monc.build_initial_monmap();
+ if (err)
+ goto scrape_keyring;
+
+ for (const auto& mon : monc.monmap.addr_mons) {
+ auto& eaddr = mon.first;
+
+ /*
+ * Filter v1 addrs if we're running in ms_mode=legacy. Filter
+ * v2 addrs for any other ms_mode.
+ */
+ if (v2_addrs) {
+ if (!eaddr.is_msgr2())
+ continue;
+ } else {
+ if (!eaddr.is_legacy())
+ continue;
+ }
+
+ std::string addr = eaddr.ip_n_port_to_str();
+ /* If this will overrun cci_mons, stop here */
+ if (monaddrs.length() + 1 + addr.length() + 1 > sizeof(cci->cci_mons))
+ break;
+
+ if (first)
+ first = false;
+ else
+ monaddrs += ",";
+
+ monaddrs += addr;
+ }
+
+ if (monaddrs.length())
+ strcpy(cci->cci_mons, monaddrs.c_str());
+ else
+ mount_ceph_debug("Could not discover monitor addresses\n");
+
+scrape_keyring:
+ err = keyring.from_ceph_context(cct.get());
+ if (err) {
+ mount_ceph_debug("keyring.from_ceph_context failed: %d\n", err);
+ return;
+ }
+
+ if (!keyring.get_secret(conf->name, secret)) {
+ mount_ceph_debug("keyring.get_secret failed\n");
+ return;
+ }
+
+ secret.encode_base64(secret_str);
+
+ if (secret_str.length() + 1 > sizeof(cci->cci_secret)) {
+ mount_ceph_debug("secret is too long\n");
+ return;
+ }
+ strcpy(cci->cci_secret, secret_str.c_str());
+}
diff --git a/src/mount/mount.ceph.c b/src/mount/mount.ceph.c
new file mode 100644
index 000000000..631b0b3fc
--- /dev/null
+++ b/src/mount/mount.ceph.c
@@ -0,0 +1,1017 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <sys/mount.h>
+#include <stdbool.h>
+#include <sys/mman.h>
+#include <wait.h>
+#include <cap-ng.h>
+#include <getopt.h>
+
+#include "common/module.h"
+#include "common/secret.h"
+#include "include/addr_parsing.h"
+#include "mount.ceph.h"
+
+#ifndef MS_RELATIME
+# define MS_RELATIME (1<<21)
+#endif
+
+bool verboseflag = false;
+bool fakeflag = false;
+bool skip_mtab_flag = false;
+bool v2_addrs = true;
+bool no_fallback = false;
+bool ms_mode_specified = false;
+bool mon_addr_specified = false;
+static const char * const EMPTY_STRING = "";
+
+/* TODO duplicates logic from kernel */
+#define CEPH_AUTH_NAME_DEFAULT "guest"
+
+/* path to sysfs for ceph */
+#define CEPH_SYS_FS_PATH "/sys/module/ceph/"
+#define CEPH_SYS_FS_PARAM_PATH CEPH_SYS_FS_PATH"/parameters"
+
+/*
+ * mount support hint from kernel -- we only need to check
+ * v2 support for catching bugs.
+ */
+#define CEPH_V2_MOUNT_SUPPORT_PATH CEPH_SYS_FS_PARAM_PATH"/mount_syntax_v2"
+
+#define CEPH_DEFAULT_V2_MS_MODE "prefer-crc"
+
+#include "mtab.c"
+
+enum mount_dev_format {
+ MOUNT_DEV_FORMAT_OLD = 0,
+ MOUNT_DEV_FORMAT_NEW = 1,
+};
+
+struct ceph_mount_info {
+ unsigned long cmi_flags;
+ char *cmi_name;
+ char *cmi_fsname;
+ char *cmi_fsid;
+ char *cmi_path;
+ char *cmi_mons;
+ char *cmi_conf;
+ char *cmi_opts;
+ int cmi_opts_len;
+ char cmi_secret[SECRET_BUFSIZE];
+
+ /* mount dev syntax format */
+ enum mount_dev_format format;
+};
+
+static void mon_addr_as_resolve_param(char *mon_addr)
+{
+ for (; *mon_addr; ++mon_addr)
+ if (*mon_addr == '/')
+ *mon_addr = ',';
+}
+
+static void resolved_mon_addr_as_mount_opt(char *mon_addr)
+{
+ for (; *mon_addr; ++mon_addr)
+ if (*mon_addr == ',')
+ *mon_addr = '/';
+}
+
+static void resolved_mon_addr_as_mount_dev(char *mon_addr)
+{
+ for (; *mon_addr; ++mon_addr)
+ if (*mon_addr == '/')
+ *mon_addr = ',';
+}
+
+static void block_signals (int how)
+{
+ sigset_t sigs;
+
+ sigfillset (&sigs);
+ sigdelset(&sigs, SIGTRAP);
+ sigdelset(&sigs, SIGSEGV);
+ sigprocmask (how, &sigs, (sigset_t *) 0);
+}
+
+void mount_ceph_debug(const char *fmt, ...)
+{
+ if (verboseflag) {
+ va_list args;
+
+ va_start(args, fmt);
+ vprintf(fmt, args);
+ va_end(args);
+ }
+}
+
+/*
+ * append a key value pair option to option string.
+ */
+static void append_opt(const char *key, const char *value,
+ struct ceph_mount_info *cmi, int *pos)
+{
+ if (*pos != 0)
+ *pos = safe_cat(&cmi->cmi_opts, &cmi->cmi_opts_len, *pos, ",");
+
+ if (value) {
+ *pos = safe_cat(&cmi->cmi_opts, &cmi->cmi_opts_len, *pos, key);
+ *pos = safe_cat(&cmi->cmi_opts, &cmi->cmi_opts_len, *pos, "=");
+ *pos = safe_cat(&cmi->cmi_opts, &cmi->cmi_opts_len, *pos, value);
+ } else {
+ *pos = safe_cat(&cmi->cmi_opts, &cmi->cmi_opts_len, *pos, key);
+ }
+}
+
+/*
+ * remove a key value pair from option string. caller should ensure that the
+ * key value pair is separated by "=".
+ */
+static int remove_opt(struct ceph_mount_info *cmi, const char *key, char **value)
+{
+ char *key_start = strstr(cmi->cmi_opts, key);
+ if (!key_start) {
+ return -ENOENT;
+ }
+
+ /* key present -- try to split */
+ char *key_sep = strstr(key_start, "=");
+ if (!key_sep) {
+ return -ENOENT;
+ }
+
+ if (strncmp(key, key_start, key_sep - key_start) != 0) {
+ return -ENOENT;
+ }
+
+ ++key_sep;
+ char *value_end = strstr(key_sep, ",");
+ if (!value_end)
+ value_end = key_sep + strlen(key_sep);
+
+ if (value_end != key_sep && value) {
+ size_t len1 = value_end - key_sep;
+ *value = strndup(key_sep, len1+1);
+ if (!*value)
+ return -ENOMEM;
+ (*value)[len1] = '\0';
+ }
+
+ /* purge it */
+ size_t len2 = strlen(value_end);
+ if (len2) {
+ ++value_end;
+ memmove(key_start, value_end, len2);
+ } else {
+ /* last kv pair - swallow the comma */
+ if (*(key_start - 1) == ',') {
+ --key_start;
+ }
+ *key_start = '\0';
+ }
+
+ return 0;
+}
+
+static void record_name(const char *name, struct ceph_mount_info *cmi)
+{
+ int name_pos = 0;
+ int name_len = 0;
+
+ name_pos = safe_cat(&cmi->cmi_name, &name_len, name_pos, name);
+}
+
+/*
+ * parse old device string of format: <mon_addr>:/<path>
+ */
+static int parse_old_dev(const char *dev_str, struct ceph_mount_info *cmi,
+ int *opt_pos)
+{
+ size_t len;
+ char *mount_path;
+
+ mount_path = strstr(dev_str, ":/");
+ if (!mount_path) {
+ fprintf(stderr, "source mount path was not specified\n");
+ return -EINVAL;
+ }
+
+ len = mount_path - dev_str;
+ if (len != 0) {
+ free(cmi->cmi_mons);
+ /* overrides mon_addr passed via mount option (if any) */
+ cmi->cmi_mons = strndup(dev_str, len);
+ if (!cmi->cmi_mons)
+ return -ENOMEM;
+ mon_addr_specified = true;
+ } else {
+ /* reset mon_addr=<> mount option */
+ mon_addr_specified = false;
+ }
+
+ mount_path++;
+ cmi->cmi_path = strdup(mount_path);
+ if (!cmi->cmi_path)
+ return -ENOMEM;
+ if (!cmi->cmi_name)
+ record_name(CEPH_AUTH_NAME_DEFAULT, cmi);
+
+ cmi->format = MOUNT_DEV_FORMAT_OLD;
+ return 0;
+}
+
+/*
+ * parse new device string of format: name@<fsid>.fs_name=/path
+ */
+static int parse_new_dev(const char *dev_str, struct ceph_mount_info *cmi,
+ int *opt_pos)
+{
+ size_t len;
+ char *name;
+ char *name_end;
+ char *dot;
+ char *fs_name;
+
+ name_end = strstr(dev_str, "@");
+ if (!name_end) {
+ mount_ceph_debug("invalid new device string format\n");
+ return -ENODEV;
+ }
+
+ len = name_end - dev_str;
+ if (!len) {
+ fprintf(stderr, "missing <name> in device\n");
+ return -EINVAL;
+ }
+
+ name = (char *)alloca(len+1);
+ memcpy(name, dev_str, len);
+ name[len] = '\0';
+
+ if (cmi->cmi_name && strcmp(cmi->cmi_name, name)) {
+ fprintf(stderr, "mismatching ceph user in mount option and device string\n");
+ return -EINVAL;
+ }
+
+ /* record name and store in option string */
+ if (!cmi->cmi_name) {
+ record_name(name, cmi);
+ append_opt("name", name, cmi, opt_pos);
+ }
+
+ ++name_end;
+ /* check if an fsid is included in the device string */
+ dot = strstr(name_end, ".");
+ if (!dot) {
+ fprintf(stderr, "invalid device string format\n");
+ return -EINVAL;
+ }
+ len = dot - name_end;
+ if (len) {
+ /* check if this _looks_ like a UUID */
+ if (len != CLUSTER_FSID_LEN - 1) {
+ fprintf(stderr, "invalid device string format\n");
+ return -EINVAL;
+ }
+
+ cmi->cmi_fsid = strndup(name_end, len);
+ if (!cmi->cmi_fsid)
+ return -ENOMEM;
+ }
+
+ ++dot;
+ fs_name = strstr(dot, "=");
+ if (!fs_name) {
+ fprintf(stderr, "invalid device string format\n");
+ return -EINVAL;
+ }
+ len = fs_name - dot;
+ if (!len) {
+ fprintf(stderr, "missing <fs_name> in device\n");
+ return -EINVAL;
+ }
+ cmi->cmi_fsname = strndup(dot, len);
+ if (!cmi->cmi_fsname)
+ return -ENOMEM;
+
+ ++fs_name;
+ if (strlen(fs_name)) {
+ cmi->cmi_path = strdup(fs_name);
+ if (!cmi->cmi_path)
+ return -ENOMEM;
+ }
+
+ /* new-style dev - force using v2 addrs first */
+ if (!ms_mode_specified && !mon_addr_specified)
+ append_opt("ms_mode", CEPH_DEFAULT_V2_MS_MODE, cmi,
+ opt_pos);
+
+ cmi->format = MOUNT_DEV_FORMAT_NEW;
+ return 0;
+}
+
+static int parse_dev(const char *dev_str, struct ceph_mount_info *cmi,
+ int *opt_pos)
+{
+ int ret;
+
+ ret = parse_new_dev(dev_str, cmi, opt_pos);
+ if (ret < 0 && ret != -ENODEV)
+ return -EINVAL;
+ if (ret)
+ ret = parse_old_dev(dev_str, cmi, opt_pos);
+ if (ret < 0)
+ fprintf(stderr, "error parsing device string\n");
+ return ret;
+}
+
+/* resolve monitor host and optionally record in option string.
+ * use opt_pos to determine if the caller wants to record the
+ * resolved address in mount option (c.f., mount_old_device_format).
+ */
+static int finalize_src(struct ceph_mount_info *cmi, int *opt_pos,
+ char **resolved_addr)
+{
+ char *src;
+ size_t len = strlen(cmi->cmi_mons);
+ char *addr = alloca(len+1);
+
+ memcpy(addr, cmi->cmi_mons, len+1);
+ mon_addr_as_resolve_param(addr);
+
+ src = resolve_addrs(addr);
+ if (!src)
+ return -1;
+
+ mount_ceph_debug("mount.ceph: resolved to: \"%s\"\n", src);
+ if (opt_pos) {
+ resolved_mon_addr_as_mount_opt(src);
+ append_opt("mon_addr", src, cmi, opt_pos);
+ } else if (resolved_addr) {
+ *resolved_addr = strdup(src);
+ }
+ free(src);
+ return 0;
+}
+
+static int
+drop_capabilities()
+{
+ capng_setpid(getpid());
+ capng_clear(CAPNG_SELECT_BOTH);
+ if (capng_update(CAPNG_ADD, CAPNG_PERMITTED, CAP_DAC_READ_SEARCH)) {
+ fprintf(stderr, "Unable to update permitted capability set.\n");
+ return EX_SYSERR;
+ }
+ if (capng_update(CAPNG_ADD, CAPNG_EFFECTIVE, CAP_DAC_READ_SEARCH)) {
+ fprintf(stderr, "Unable to update effective capability set.\n");
+ return EX_SYSERR;
+ }
+ if (capng_apply(CAPNG_SELECT_BOTH)) {
+ fprintf(stderr, "Unable to apply new capability set.\n");
+ return EX_SYSERR;
+ }
+ return 0;
+}
+
+/*
+ * Attempt to fetch info from the local config file, if one is present. Since
+ * this involves activity that may be dangerous for a privileged task, we
+ * fork(), have the child drop privileges and do the processing and then hand
+ * back the results via memory shared with the parent.
+ */
+static int fetch_config_info(struct ceph_mount_info *cmi)
+{
+ int ret = 0;
+ pid_t pid;
+ struct ceph_config_info *cci;
+
+ /* Don't do anything if we already have requisite info */
+ if (cmi->cmi_secret[0] && cmi->cmi_mons && cmi->cmi_fsid)
+ return 0;
+
+ cci = mmap((void *)0, sizeof(*cci), PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_SHARED, -1, 0);
+ if (cci == MAP_FAILED) {
+ mount_ceph_debug("Unable to allocate memory: %s\n",
+ strerror(errno));
+ return EX_SYSERR;
+ }
+
+ pid = fork();
+ if (pid < 0) {
+ mount_ceph_debug("fork() failure: %s\n", strerror(errno));
+ ret = EX_SYSERR;
+ goto out;
+ }
+
+ if (pid == 0) {
+ char *entity_name = NULL;
+ int name_pos = 0;
+ int name_len = 0;
+
+ /* child */
+ ret = drop_capabilities();
+ if (ret)
+ exit(1);
+
+ name_pos = safe_cat(&entity_name, &name_len, name_pos, "client.");
+ name_pos = safe_cat(&entity_name, &name_len, name_pos, cmi->cmi_name);
+ mount_ceph_get_config_info(cmi->cmi_conf, entity_name, v2_addrs, cci);
+ free(entity_name);
+ exit(0);
+ } else {
+ /* parent */
+ pid = wait(&ret);
+ if (!WIFEXITED(ret)) {
+ mount_ceph_debug("Child process terminated abnormally.\n");
+ ret = EX_SYSERR;
+ goto out;
+ }
+ ret = WEXITSTATUS(ret);
+ if (ret) {
+ mount_ceph_debug("Child exited with status %d\n", ret);
+ ret = EX_SYSERR;
+ goto out;
+ }
+
+ /*
+ * Copy values from MAP_SHARED buffer to cmi if we didn't
+ * already find anything and we got something from the child.
+ */
+ size_t len;
+ if (!cmi->cmi_secret[0] && cci->cci_secret[0]) {
+
+ len = strnlen(cci->cci_secret, SECRET_BUFSIZE);
+ if (len < SECRET_BUFSIZE) {
+ memcpy(cmi->cmi_secret, cci->cci_secret, len + 1);
+ } else {
+ mount_ceph_debug("secret is too long (len=%zu max=%zu)!\n", len, SECRET_BUFSIZE);
+ }
+ }
+ if (!cmi->cmi_mons && cci->cci_mons[0]) {
+ len = strnlen(cci->cci_mons, MON_LIST_BUFSIZE);
+ if (len < MON_LIST_BUFSIZE)
+ cmi->cmi_mons = strndup(cci->cci_mons, len + 1);
+ }
+ if (!cmi->cmi_fsid) {
+ len = strnlen(cci->cci_fsid, CLUSTER_FSID_LEN);
+ if (len < CLUSTER_FSID_LEN)
+ cmi->cmi_fsid = strndup(cci->cci_fsid, len + 1);
+ }
+ }
+out:
+ munmap(cci, sizeof(*cci));
+ return ret;
+}
+
+/*
+ * this one is partially based on parse_options() from cifs.mount.c
+ */
+static int parse_options(const char *data, struct ceph_mount_info *cmi,
+ int *opt_pos)
+{
+ char * next_keyword = NULL;
+ char *name = NULL;
+
+ if (data == EMPTY_STRING)
+ goto out;
+
+ mount_ceph_debug("parsing options: %s\n", data);
+
+ do {
+ char * value = NULL;
+ bool skip = true;
+
+ /* check if ends with trailing comma */
+ if(*data == 0)
+ break;
+ next_keyword = strchr(data,',');
+
+ /* temporarily null terminate end of keyword=value pair */
+ if(next_keyword)
+ *next_keyword++ = 0;
+
+ /* temporarily null terminate keyword to make keyword and value distinct */
+ if ((value = strchr(data, '=')) != NULL) {
+ *value = '\0';
+ value++;
+ }
+
+ if (strcmp(data, "ro") == 0) {
+ cmi->cmi_flags |= MS_RDONLY;
+ } else if (strcmp(data, "rw") == 0) {
+ cmi->cmi_flags &= ~MS_RDONLY;
+ } else if (strcmp(data, "nosuid") == 0) {
+ cmi->cmi_flags |= MS_NOSUID;
+ } else if (strcmp(data, "suid") == 0) {
+ cmi->cmi_flags &= ~MS_NOSUID;
+ } else if (strcmp(data, "dev") == 0) {
+ cmi->cmi_flags &= ~MS_NODEV;
+ } else if (strcmp(data, "nodev") == 0) {
+ cmi->cmi_flags |= MS_NODEV;
+ } else if (strcmp(data, "noexec") == 0) {
+ cmi->cmi_flags |= MS_NOEXEC;
+ } else if (strcmp(data, "exec") == 0) {
+ cmi->cmi_flags &= ~MS_NOEXEC;
+ } else if (strcmp(data, "sync") == 0) {
+ cmi->cmi_flags |= MS_SYNCHRONOUS;
+ } else if (strcmp(data, "remount") == 0) {
+ cmi->cmi_flags |= MS_REMOUNT;
+ } else if (strcmp(data, "mandlock") == 0) {
+ cmi->cmi_flags |= MS_MANDLOCK;
+ } else if ((strcmp(data, "nobrl") == 0) ||
+ (strcmp(data, "nolock") == 0)) {
+ cmi->cmi_flags &= ~MS_MANDLOCK;
+ } else if (strcmp(data, "noatime") == 0) {
+ cmi->cmi_flags |= MS_NOATIME;
+ } else if (strcmp(data, "nodiratime") == 0) {
+ cmi->cmi_flags |= MS_NODIRATIME;
+ } else if (strcmp(data, "relatime") == 0) {
+ cmi->cmi_flags |= MS_RELATIME;
+ } else if (strcmp(data, "strictatime") == 0) {
+ cmi->cmi_flags |= MS_STRICTATIME;
+ } else if (strcmp(data, "noauto") == 0) {
+ /* ignore */
+ } else if (strcmp(data, "_netdev") == 0) {
+ /* ignore */
+ } else if (strcmp(data, "nofail") == 0) {
+ /* ignore */
+ } else if (strcmp(data, "fs") == 0) {
+ if (!value || !*value) {
+ fprintf(stderr, "mount option fs requires a value.\n");
+ return -EINVAL;
+ }
+ data = "mds_namespace";
+ skip = false;
+ } else if (strcmp(data, "nofallback") == 0) {
+ no_fallback = true;
+ } else if (strcmp(data, "secretfile") == 0) {
+ int ret;
+
+ if (!value || !*value) {
+ fprintf(stderr, "keyword secretfile found, but no secret file specified\n");
+ return -EINVAL;
+ }
+ ret = read_secret_from_file(value, cmi->cmi_secret, sizeof(cmi->cmi_secret));
+ if (ret < 0) {
+ fprintf(stderr, "error reading secret file: %d\n", ret);
+ return ret;
+ }
+ } else if (strcmp(data, "secret") == 0) {
+ size_t len;
+
+ if (!value || !*value) {
+ fprintf(stderr, "mount option secret requires a value.\n");
+ return -EINVAL;
+ }
+
+ len = strnlen(value, sizeof(cmi->cmi_secret)) + 1;
+ if (len <= sizeof(cmi->cmi_secret))
+ memcpy(cmi->cmi_secret, value, len);
+ } else if (strcmp(data, "conf") == 0) {
+ if (!value || !*value) {
+ fprintf(stderr, "mount option conf requires a value.\n");
+ return -EINVAL;
+ }
+ /* keep pointer to value */
+ cmi->cmi_conf = strdup(value);
+ if (!cmi->cmi_conf)
+ return -ENOMEM;
+ } else if (strcmp(data, "name") == 0) {
+ if (!value || !*value) {
+ fprintf(stderr, "mount option name requires a value.\n");
+ return -EINVAL;
+ }
+ /* keep pointer to value */
+ name = value;
+ skip = false;
+ } else if (strcmp(data, "ms_mode") == 0) {
+ if (!value || !*value) {
+ fprintf(stderr, "mount option ms_mode requires a value.\n");
+ return -EINVAL;
+ }
+ /* Only legacy ms_mode needs v1 addrs */
+ v2_addrs = strcmp(value, "legacy");
+ skip = false;
+ ms_mode_specified = true;
+ } else if (strcmp(data, "mon_addr") == 0) {
+ /* monitor address to use for mounting */
+ if (!value || !*value) {
+ fprintf(stderr, "mount option mon_addr requires a value.\n");
+ return -EINVAL;
+ }
+ cmi->cmi_mons = strdup(value);
+ if (!cmi->cmi_mons)
+ return -ENOMEM;
+ mon_addr_specified = true;
+ } else {
+ /* unrecognized mount options, passing to kernel */
+ skip = false;
+ }
+
+ /* Copy (possibly modified) option to out */
+ if (!skip)
+ append_opt(data, value, cmi, opt_pos);
+ data = next_keyword;
+ } while (data);
+
+out:
+ /*
+ * set ->cmi_name conditionally -- this gets checked when parsing new
+ * device format. for old device format, ->cmi_name is set to default
+ * user name when name option is not passed in.
+ */
+ if (name)
+ record_name(name, cmi);
+ if (cmi->cmi_opts)
+ mount_ceph_debug("mount.ceph: options \"%s\".\n", cmi->cmi_opts);
+
+ if (!cmi->cmi_opts) {
+ cmi->cmi_opts = strdup(EMPTY_STRING);
+ if (!cmi->cmi_opts)
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+
+static int parse_arguments(int argc, char *const *const argv,
+ const char **src, const char **node, const char **opts)
+{
+ int opt = 0;
+ static struct option long_options[] = {
+ { "help", no_argument, 0, 'h' },
+ { "no-mtab", no_argument, 0, 'n' },
+ { "verbose", no_argument, 0, 'v' },
+ { "fake", no_argument, 0, 'f' },
+ { "options", required_argument, 0, 'o' },
+ { 0, 0, 0, 0 }
+ };
+
+ if (argc < 2) {
+ // There were no arguments. Just show the usage.
+ return 1;
+ }
+ if ((!strcmp(argv[1], "-h")) || (!strcmp(argv[1], "--help"))) {
+ // The user asked for help.
+ return 1;
+ }
+
+ // The first two arguments are positional
+ if (argc < 3)
+ return -EINVAL;
+ *src = argv[1];
+ *node = argv[2];
+
+ // Parse the remaining options
+ *opts = EMPTY_STRING;
+ while ((opt = getopt_long(argc, argv, "hnvfo:",
+ long_options, NULL)) != -1) {
+ switch (opt) {
+ case 'h' : // -h or --help
+ return 1;
+ case 'n' : // -n or --no-mtab
+ skip_mtab_flag = true;
+ break;
+ case 'v' : // -v or --verbose
+ verboseflag = true;
+ break;
+ case 'f' : // -f or --fake
+ fakeflag = true;
+ break;
+ case 'o' : // -o or --options
+ *opts = optarg;
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
+/* modprobe failing doesn't necessarily prevent from working, so this
+ returns void */
+static void modprobe(void)
+{
+ int r;
+
+ r = module_load("ceph", NULL);
+ if (r)
+ printf("failed to load ceph kernel module (%d)\n", r);
+}
+
+static void usage(const char *prog_name)
+{
+ printf("usage: %s [src] [mount-point] [-n] [-v] [-o ceph-options]\n",
+ prog_name);
+ printf("options:\n");
+ printf("\t-h, --help\tPrint this help\n");
+ printf("\t-n, --no-mtab\tDo not update /etc/mtab\n");
+ printf("\t-v, --verbose\tVerbose\n");
+ printf("\t-f, --fake\tFake mount, do not actually mount\n");
+ printf("ceph-options: refer to mount.ceph(8)\n");
+ printf("\n");
+}
+
+/*
+ * The structure itself lives on the stack, so don't free it. Just the
+ * pointers inside.
+ */
+static void ceph_mount_info_free(struct ceph_mount_info *cmi)
+{
+ free(cmi->cmi_opts);
+ free(cmi->cmi_name);
+ free(cmi->cmi_fsname);
+ free(cmi->cmi_fsid);
+ free(cmi->cmi_path);
+ free(cmi->cmi_mons);
+ free(cmi->cmi_conf);
+}
+
+static int call_mount_system_call(const char *rsrc, const char *node, struct ceph_mount_info *cmi)
+{
+ int r = 0;
+ if (!fakeflag) {
+ r = mount(rsrc, node, "ceph", cmi->cmi_flags, cmi->cmi_opts);
+ }
+ return r;
+}
+
+static int mount_new_device_format(const char *node, struct ceph_mount_info *cmi)
+{
+ int r;
+ char *rsrc = NULL;
+ int pos = 0;
+ int len = 0;
+
+ if (!cmi->cmi_fsid) {
+ fprintf(stderr, "missing ceph cluster-id");
+ return -EINVAL;
+ }
+
+ pos = safe_cat(&rsrc, &len, pos, cmi->cmi_name);
+ pos = safe_cat(&rsrc, &len, pos, "@");
+ pos = safe_cat(&rsrc, &len, pos, cmi->cmi_fsid);
+ pos = safe_cat(&rsrc, &len, pos, ".");
+ pos = safe_cat(&rsrc, &len, pos, cmi->cmi_fsname);
+ pos = safe_cat(&rsrc, &len, pos, "=");
+ if (cmi->cmi_path)
+ safe_cat(&rsrc, &len, pos, cmi->cmi_path);
+
+ mount_ceph_debug("mount.ceph: trying mount with new device syntax: %s\n",
+ rsrc);
+ if (cmi->cmi_opts)
+ mount_ceph_debug("mount.ceph: options \"%s\" will pass to kernel\n",
+ cmi->cmi_opts);
+ r = call_mount_system_call(rsrc, node, cmi);
+ if (r)
+ r = -errno;
+ free(rsrc);
+ return r;
+}
+
+static int mount_old_device_format(const char *node, struct ceph_mount_info *cmi)
+{
+ int r;
+ int len = 0;
+ int pos = 0;
+ char *mon_addr = NULL;
+ char *rsrc = NULL;
+
+ r = remove_opt(cmi, "mon_addr", &mon_addr);
+ if (r) {
+ fprintf(stderr, "failed to switch using old device format\n");
+ return -EINVAL;
+ }
+
+ /* if we reach here and still have a v2 addr, we'd need to
+ * refresh with v1 addrs, since we'll be not passing ms_mode
+ * with the old syntax.
+ */
+ if (v2_addrs && !ms_mode_specified && !mon_addr_specified) {
+ mount_ceph_debug("mount.ceph: switching to using v1 address with old syntax\n");
+ v2_addrs = false;
+ free(mon_addr);
+ free(cmi->cmi_mons);
+ mon_addr = NULL;
+ cmi->cmi_mons = NULL;
+ fetch_config_info(cmi);
+ if (!cmi->cmi_mons) {
+ fprintf(stderr, "unable to determine (v1) mon addresses\n");
+ return -EINVAL;
+ }
+ r = finalize_src(cmi, NULL, &mon_addr);
+ if (r) {
+ fprintf(stderr, "failed to resolve (v1) mon addresses\n");
+ return -EINVAL;
+ }
+ remove_opt(cmi, "ms_mode", NULL);
+ }
+
+ pos = strlen(cmi->cmi_opts);
+ if (cmi->cmi_fsname)
+ append_opt("mds_namespace", cmi->cmi_fsname, cmi, &pos);
+ if (cmi->cmi_fsid)
+ append_opt("fsid", cmi->cmi_fsid, cmi, &pos);
+
+ pos = 0;
+ resolved_mon_addr_as_mount_dev(mon_addr);
+ pos = safe_cat(&rsrc, &len, pos, mon_addr);
+ pos = safe_cat(&rsrc, &len, pos, ":");
+ if (cmi->cmi_path)
+ safe_cat(&rsrc, &len, pos, cmi->cmi_path);
+
+ mount_ceph_debug("mount.ceph: trying mount with old device syntax: %s\n",
+ rsrc);
+ if (cmi->cmi_opts)
+ mount_ceph_debug("mount.ceph: options \"%s\" will pass to kernel\n",
+ cmi->cmi_opts);
+
+ r = call_mount_system_call(rsrc, node, cmi);
+ free(mon_addr);
+ free(rsrc);
+
+ return r;
+}
+
+/*
+ * check whether to fall-back to using old-style mount syntax (called
+ * when new-style mount syntax fails). this is mostly to catch any
+ * new-style (v2) implementation bugs in the kernel and is primarly
+ * used in teuthology tests.
+ */
+static bool should_fallback()
+{
+ int ret;
+ struct stat stbuf;
+
+ if (!no_fallback)
+ return true;
+
+ ret = stat(CEPH_V2_MOUNT_SUPPORT_PATH, &stbuf);
+ if (ret) {
+ mount_ceph_debug("mount.ceph: v2 mount support check returned %d\n",
+ errno);
+ if (errno == ENOENT)
+ mount_ceph_debug("mount.ceph: kernel does not support v2"
+ " syntax\n");
+ /* fallback on *all* errors */
+ return true;
+ }
+
+ fprintf(stderr, "mount.ceph: kernel BUG!\n");
+ return false;
+}
+
+static int do_mount(const char *dev, const char *node,
+ struct ceph_mount_info *cmi) {
+ int pos = 0;
+ int retval= -EINVAL;
+ bool fallback = true;
+
+ /* no v2 addresses available via config - try v1 addresses */
+ if (v2_addrs &&
+ !cmi->cmi_mons &&
+ !ms_mode_specified &&
+ !mon_addr_specified) {
+ mount_ceph_debug("mount.ceph: switching to using v1 address\n");
+ v2_addrs = false;
+ fetch_config_info(cmi);
+ remove_opt(cmi, "ms_mode", NULL);
+ }
+
+ if (!cmi->cmi_mons) {
+ fprintf(stderr, "unable to determine mon addresses\n");
+ return -EINVAL;
+ }
+
+ pos = strlen(cmi->cmi_opts);
+ retval = finalize_src(cmi, &pos, NULL);
+ if (retval) {
+ fprintf(stderr, "failed to resolve source\n");
+ return -EINVAL;
+ }
+
+ retval = -1;
+ if (cmi->format == MOUNT_DEV_FORMAT_NEW) {
+ retval = mount_new_device_format(node, cmi);
+ if (retval)
+ fallback = (should_fallback() && retval == -EINVAL && cmi->cmi_fsid);
+ }
+
+ /* pass-through or fallback to old-style mount device */
+ if (retval && fallback)
+ retval = mount_old_device_format(node, cmi);
+ if (retval) {
+ retval = EX_FAIL;
+ switch (errno) {
+ case ENODEV:
+ fprintf(stderr, "mount error: ceph filesystem not supported by the system\n");
+ break;
+ case EHOSTUNREACH:
+ fprintf(stderr, "mount error: no mds server is up or the cluster is laggy\n");
+ break;
+ default:
+ fprintf(stderr, "mount error %d = %s\n", errno, strerror(errno));
+ }
+ }
+
+ if (!retval && !skip_mtab_flag) {
+ update_mtab_entry(dev, node, "ceph", cmi->cmi_opts, cmi->cmi_flags, 0, 0);
+ }
+
+ return retval;
+}
+
+static int append_key_or_secret_option(struct ceph_mount_info *cmi)
+{
+ int pos = strlen(cmi->cmi_opts);
+
+ if (!cmi->cmi_secret[0] && !is_kernel_secret(cmi->cmi_name))
+ return 0;
+
+ if (pos)
+ pos = safe_cat(&cmi->cmi_opts, &cmi->cmi_opts_len, pos, ",");
+
+ /* when parsing kernel options (-o remount) we get '<hidden>' as the secret */
+ if (cmi->cmi_secret[0] && (strcmp(cmi->cmi_secret, "<hidden>") != 0)) {
+ int ret = set_kernel_secret(cmi->cmi_secret, cmi->cmi_name);
+ if (ret < 0) {
+ if (ret == -ENODEV || ret == -ENOSYS) {
+ /* old kernel; fall back to secret= in options */
+ pos = safe_cat(&cmi->cmi_opts,
+ &cmi->cmi_opts_len, pos,
+ "secret=");
+ pos = safe_cat(&cmi->cmi_opts,
+ &cmi->cmi_opts_len, pos,
+ cmi->cmi_secret);
+ return 0;
+ }
+ fprintf(stderr, "adding ceph secret key to kernel failed: %s\n",
+ strerror(-ret));
+ return ret;
+ }
+ }
+
+ pos = safe_cat(&cmi->cmi_opts, &cmi->cmi_opts_len, pos, "key=");
+ pos = safe_cat(&cmi->cmi_opts, &cmi->cmi_opts_len, pos, cmi->cmi_name);
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ int opt_pos = 0;
+ const char *dev, *node, *opts;
+ int retval;
+ struct ceph_mount_info cmi = { 0 };
+
+ retval = parse_arguments(argc, argv, &dev, &node, &opts);
+ if (retval) {
+ usage(argv[0]);
+ retval = (retval > 0) ? 0 : EX_USAGE;
+ goto out;
+ }
+
+ retval = parse_options(opts, &cmi, &opt_pos);
+ if (retval) {
+ fprintf(stderr, "failed to parse ceph_options: %d\n", retval);
+ retval = EX_USAGE;
+ goto out;
+ }
+
+ retval = parse_dev(dev, &cmi, &opt_pos);
+ if (retval) {
+ fprintf(stderr, "unable to parse mount device string: %d\n", retval);
+ retval = EX_USAGE;
+ goto out;
+ }
+
+ /*
+ * We don't care if this errors out, since this is best-effort.
+ * note that this fetches v1 or v2 addr depending on @v2_addr
+ * flag.
+ */
+ fetch_config_info(&cmi);
+
+ /* Ensure the ceph key_type is available */
+ modprobe();
+
+ retval = append_key_or_secret_option(&cmi);
+ if (retval) {
+ fprintf(stderr, "couldn't append secret option: %d\n", retval);
+ retval = EX_USAGE;
+ goto out;
+ }
+
+ block_signals(SIG_BLOCK);
+ retval = do_mount(dev, node, &cmi);
+ block_signals(SIG_UNBLOCK);
+out:
+ ceph_mount_info_free(&cmi);
+ return retval;
+}
+
diff --git a/src/mount/mount.ceph.h b/src/mount/mount.ceph.h
new file mode 100644
index 000000000..9bd6bbfcc
--- /dev/null
+++ b/src/mount/mount.ceph.h
@@ -0,0 +1,44 @@
+#ifndef _SRC_MOUNT_MOUNT_CEPH_H
+#define _SRC_MOUNT_MOUNT_CEPH_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * See class CryptoKey
+ *
+ * 2 (for the type of secret) +
+ * 8 (for the timestamp) +
+ * 2 (for the length of secret) +
+ * 16 (for an AES-128 key)
+ */
+#define MAX_RAW_SECRET_LEN (2 + 8 + 2 + 16)
+
+/* Max length of base64 encoded secret. 4/3 original size (rounded up) */
+#define MAX_SECRET_LEN ((MAX_RAW_SECRET_LEN + (3 - 1)) * 4 / 3)
+
+/* Max Including null terminator */
+#define SECRET_BUFSIZE (MAX_SECRET_LEN + 1)
+
+/* 2k should be enough for anyone? */
+#define MON_LIST_BUFSIZE 2048
+
+#define CLUSTER_FSID_LEN 37
+
+void mount_ceph_debug(const char *fmt, ...);
+
+struct ceph_config_info {
+ char cci_secret[SECRET_BUFSIZE]; // auth secret
+ char cci_mons[MON_LIST_BUFSIZE]; // monitor addrs
+ char cci_fsid[CLUSTER_FSID_LEN]; // cluster fsid
+};
+
+void mount_ceph_get_config_info(const char *config_file, const char *name,
+ bool v2_addrs, struct ceph_config_info *cci);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SRC_MOUNT_MOUNT_CEPH_H */
diff --git a/src/mount/mtab.c b/src/mount/mtab.c
new file mode 100644
index 000000000..3ce368fd3
--- /dev/null
+++ b/src/mount/mtab.c
@@ -0,0 +1,294 @@
+
+/*
+ * this code lifted from util-linux-ng, licensed GPLv2+,
+ *
+ * git://git.kernel.org/pub/scm/utils/util-linux-ng/util-linux-ng.git
+ *
+ * whoever decided that each special mount program is responsible
+ * for updating /etc/mtab should be spanked.
+ *
+ * <sage@newdream.net>
+ */
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/vfs.h>
+#include <time.h>
+#include <mntent.h>
+#include <stdarg.h>
+
+#include "mount/canonicalize.c"
+
+
+/* Updating mtab ----------------------------------------------*/
+
+/* Flag for already existing lock file. */
+static int we_created_lockfile = 0;
+static int lockfile_fd = -1;
+
+/* Flag to indicate that signals have been set up. */
+static int signals_have_been_setup = 0;
+
+/* Ensure that the lock is released if we are interrupted. */
+extern char *strsignal(int sig); /* not always in <string.h> */
+
+static void
+setlkw_timeout (int sig) {
+ /* nothing, fcntl will fail anyway */
+}
+
+#define _PATH_MOUNTED "/etc/mtab"
+#define _PATH_MOUNTED_LOCK "/etc/mtab~"
+#define PROC_SUPER_MAGIC 0x9fa0
+
+/* exit status - bits below are ORed */
+#define EX_USAGE 1 /* incorrect invocation or permission */
+#define EX_SYSERR 2 /* out of memory, cannot fork, ... */
+#define EX_SOFTWARE 4 /* internal mount bug or wrong version */
+#define EX_USER 8 /* user interrupt */
+#define EX_FILEIO 16 /* problems writing, locking, ... mtab/fstab */
+#define EX_FAIL 32 /* mount failure */
+#define EX_SOMEOK 64 /* some mount succeeded */
+
+int die(int err, const char *fmt, ...) {
+ va_list args;
+
+ va_start(args, fmt);
+ vfprintf(stderr, fmt, args);
+ fprintf(stderr, "\n");
+ va_end(args);
+
+ exit(err);
+}
+
+static void
+handler (int sig) {
+ die(EX_USER, "%s", strsignal(sig));
+}
+
+/* Remove lock file. */
+void
+unlock_mtab (void) {
+ if (we_created_lockfile) {
+ close(lockfile_fd);
+ lockfile_fd = -1;
+ unlink (_PATH_MOUNTED_LOCK);
+ we_created_lockfile = 0;
+ }
+}
+
+/* Create the lock file.
+ The lock file will be removed if we catch a signal or when we exit. */
+/* The old code here used flock on a lock file /etc/mtab~ and deleted
+ this lock file afterwards. However, as rgooch remarks, that has a
+ race: a second mount may be waiting on the lock and proceed as
+ soon as the lock file is deleted by the first mount, and immediately
+ afterwards a third mount comes, creates a new /etc/mtab~, applies
+ flock to that, and also proceeds, so that the second and third mount
+ now both are scribbling in /etc/mtab.
+ The new code uses a link() instead of a creat(), where we proceed
+ only if it was us that created the lock, and hence we always have
+ to delete the lock afterwards. Now the use of flock() is in principle
+ superfluous, but avoids an arbitrary sleep(). */
+
+/* Where does the link point to? Obvious choices are mtab and mtab~~.
+ HJLu points out that the latter leads to races. Right now we use
+ mtab~.<pid> instead. Use 20 as upper bound for the length of %d. */
+#define MOUNTLOCK_LINKTARGET _PATH_MOUNTED_LOCK "%d"
+#define MOUNTLOCK_LINKTARGET_LTH (sizeof(_PATH_MOUNTED_LOCK)+20)
+
+/*
+ * The original mount locking code has used sleep(1) between attempts and
+ * maximal number of attempts has been 5.
+ *
+ * There was very small number of attempts and extremely long waiting (1s)
+ * that is useless on machines with large number of concurret mount processes.
+ *
+ * Now we wait few thousand microseconds between attempts and we have global
+ * time limit (30s) rather than limit for number of attempts. The advantage
+ * is that this method also counts time which we spend in fcntl(F_SETLKW) and
+ * number of attempts is not so much restricted.
+ *
+ * -- kzak@redhat.com [2007-Mar-2007]
+ */
+
+/* maximum seconds between first and last attempt */
+#define MOUNTLOCK_MAXTIME 30
+
+/* sleep time (in microseconds, max=999999) between attempts */
+#define MOUNTLOCK_WAITTIME 5000
+
+void
+lock_mtab (void) {
+ int i;
+ struct timespec waittime;
+ struct timeval maxtime;
+ char linktargetfile[MOUNTLOCK_LINKTARGET_LTH];
+
+ if (!signals_have_been_setup) {
+ int sig = 0;
+ struct sigaction sa;
+
+ sa.sa_handler = handler;
+ sa.sa_flags = 0;
+ sigfillset (&sa.sa_mask);
+
+ while (sigismember (&sa.sa_mask, ++sig) != -1
+ && sig != SIGCHLD) {
+ if (sig == SIGALRM)
+ sa.sa_handler = setlkw_timeout;
+ else
+ sa.sa_handler = handler;
+ sigaction (sig, &sa, (struct sigaction *) 0);
+ }
+ signals_have_been_setup = 1;
+ }
+
+ snprintf(linktargetfile, sizeof(linktargetfile), MOUNTLOCK_LINKTARGET,
+ getpid ());
+
+ i = open (linktargetfile, O_WRONLY|O_CREAT, S_IRUSR|S_IWUSR);
+ if (i < 0) {
+ int errsv = errno;
+ /* linktargetfile does not exist (as a file)
+ and we cannot create it. Read-only filesystem?
+ Too many files open in the system?
+ Filesystem full? */
+ die (EX_FILEIO, "can't create lock file %s: %s "
+ "(use -n flag to override)",
+ linktargetfile, strerror (errsv));
+ }
+ close(i);
+
+ gettimeofday(&maxtime, NULL);
+ maxtime.tv_sec += MOUNTLOCK_MAXTIME;
+
+ waittime.tv_sec = 0;
+ waittime.tv_nsec = (1000 * MOUNTLOCK_WAITTIME);
+
+ /* Repeat until it was us who made the link */
+ while (!we_created_lockfile) {
+ struct timeval now;
+ struct flock flock;
+ int errsv, j;
+
+ j = link(linktargetfile, _PATH_MOUNTED_LOCK);
+ errsv = errno;
+
+ if (j == 0)
+ we_created_lockfile = 1;
+
+ if (j < 0 && errsv != EEXIST) {
+ (void) unlink(linktargetfile);
+ die (EX_FILEIO, "can't link lock file %s: %s "
+ "(use -n flag to override)",
+ _PATH_MOUNTED_LOCK, strerror (errsv));
+ }
+
+ lockfile_fd = open (_PATH_MOUNTED_LOCK, O_WRONLY);
+
+ if (lockfile_fd < 0) {
+ /* Strange... Maybe the file was just deleted? */
+ int errsv = errno;
+ gettimeofday(&now, NULL);
+ if (errno == ENOENT && now.tv_sec < maxtime.tv_sec) {
+ we_created_lockfile = 0;
+ continue;
+ }
+ (void) unlink(linktargetfile);
+ die (EX_FILEIO, "can't open lock file %s: %s "
+ "(use -n flag to override)",
+ _PATH_MOUNTED_LOCK, strerror (errsv));
+ }
+
+ flock.l_type = F_WRLCK;
+ flock.l_whence = SEEK_SET;
+ flock.l_start = 0;
+ flock.l_len = 0;
+
+ if (j == 0) {
+ /* We made the link. Now claim the lock. */
+ if (fcntl (lockfile_fd, F_SETLK, &flock) == -1) {
+ /* proceed, since it was us who created the lockfile anyway */
+ }
+ (void) unlink(linktargetfile);
+ } else {
+ /* Someone else made the link. Wait. */
+ gettimeofday(&now, NULL);
+ if (now.tv_sec < maxtime.tv_sec) {
+ alarm(maxtime.tv_sec - now.tv_sec);
+ if (fcntl (lockfile_fd, F_SETLKW, &flock) == -1) {
+ int errsv = errno;
+ (void) unlink(linktargetfile);
+ die (EX_FILEIO, "can't lock lock file %s: %s",
+ _PATH_MOUNTED_LOCK, (errno == EINTR) ?
+ "timed out" : strerror (errsv));
+ }
+ alarm(0);
+
+ nanosleep(&waittime, NULL);
+ } else {
+ (void) unlink(linktargetfile);
+ die (EX_FILEIO, "Cannot create link %s\n"
+ "Perhaps there is a stale lock file?\n",
+ _PATH_MOUNTED_LOCK);
+ }
+ close(lockfile_fd);
+ }
+ }
+}
+
+static void
+update_mtab_entry(const char *spec, const char *node, const char *type,
+ const char *opts, int flags, int freq, int pass) {
+ struct statfs buf;
+ int err = statfs(_PATH_MOUNTED, &buf);
+ if (err) {
+ printf("mount: can't statfs %s: %s", _PATH_MOUNTED,
+ strerror (err));
+ return;
+ }
+ /* /etc/mtab is symbol link to /proc/self/mounts? */
+ if (buf.f_type == PROC_SUPER_MAGIC)
+ return;
+
+ if (!opts)
+ opts = "rw";
+
+ struct mntent mnt;
+ mnt.mnt_fsname = strdup(spec);
+ mnt.mnt_dir = canonicalize_path(node);
+ mnt.mnt_type = strdup(type);
+ mnt.mnt_opts = strdup(opts);
+ mnt.mnt_freq = freq;
+ mnt.mnt_passno = pass;
+
+ FILE *fp;
+
+ lock_mtab();
+ fp = setmntent(_PATH_MOUNTED, "a+");
+ if (fp == NULL) {
+ int errsv = errno;
+ printf("mount: can't open %s: %s", _PATH_MOUNTED,
+ strerror (errsv));
+ } else {
+ if ((addmntent (fp, &mnt)) == 1) {
+ int errsv = errno;
+ printf("mount: error writing %s: %s",
+ _PATH_MOUNTED, strerror (errsv));
+ }
+ }
+ endmntent(fp);
+ unlock_mtab();
+
+ free(mnt.mnt_fsname);
+ free(mnt.mnt_dir);
+ free(mnt.mnt_type);
+ free(mnt.mnt_opts);
+}