summaryrefslogtreecommitdiffstats
path: root/src/mount
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:45:59 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:45:59 +0000
commit19fcec84d8d7d21e796c7624e521b60d28ee21ed (patch)
tree42d26aa27d1e3f7c0b8bd3fd14e7d7082f5008dc /src/mount
parentInitial commit. (diff)
downloadceph-upstream/16.2.11+ds.tar.xz
ceph-upstream/16.2.11+ds.zip
Adding upstream version 16.2.11+ds.upstream/16.2.11+dsupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/mount')
-rw-r--r--src/mount/CMakeLists.txt10
-rw-r--r--src/mount/canonicalize.c203
-rw-r--r--src/mount/conf.cc107
-rw-r--r--src/mount/mount.ceph.c566
-rw-r--r--src/mount/mount.ceph.h41
-rw-r--r--src/mount/mtab.c294
6 files changed, 1221 insertions, 0 deletions
diff --git a/src/mount/CMakeLists.txt b/src/mount/CMakeLists.txt
new file mode 100644
index 000000000..605ca2f67
--- /dev/null
+++ b/src/mount/CMakeLists.txt
@@ -0,0 +1,10 @@
+find_package(PkgConfig QUIET REQUIRED)
+pkg_check_modules(CAPNG REQUIRED libcap-ng)
+
+set(mount_ceph_srcs
+ mount.ceph.c conf.cc)
+add_executable(mount.ceph ${mount_ceph_srcs}
+ $<TARGET_OBJECTS:parse_secret_objs>
+ $<TARGET_OBJECTS:common_mountcephfs_objs>)
+target_link_libraries(mount.ceph keyutils::keyutils ${CAPNG_LIBRARIES} global ceph-common)
+install(TARGETS mount.ceph DESTINATION ${CMAKE_INSTALL_SBINDIR})
diff --git a/src/mount/canonicalize.c b/src/mount/canonicalize.c
new file mode 100644
index 000000000..02efbbedf
--- /dev/null
+++ b/src/mount/canonicalize.c
@@ -0,0 +1,203 @@
+/*
+ * canonicalize.c -- canonicalize pathname by removing symlinks
+ * Copyright (C) 1993 Rick Sladkey <jrs@world.std.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Library Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Library Public License for more details.
+ *
+ */
+
+/*
+ * This routine is part of libc. We include it nevertheless,
+ * since the libc version has some security flaws.
+ *
+ * TODO: use canonicalize_file_name() when exist in glibc
+ */
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <unistd.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <limits.h>
+
+#ifndef MAXSYMLINKS
+# define MAXSYMLINKS 256
+#endif
+
+static char *
+myrealpath(const char *path, char *resolved_path, int maxreslth) {
+ int readlinks = 0;
+ char *npath;
+ char *link_path;
+ int n;
+ char *buf = NULL;
+
+ npath = resolved_path;
+
+ /* If it's a relative pathname use getcwd for starters. */
+ if (*path != '/') {
+ if (!getcwd(npath, maxreslth-2))
+ return NULL;
+ npath += strlen(npath);
+ if (npath[-1] != '/')
+ *npath++ = '/';
+ } else {
+ *npath++ = '/';
+ path++;
+ }
+
+ /* Expand each slash-separated pathname component. */
+ link_path = malloc(PATH_MAX+1);
+ if (!link_path)
+ return NULL;
+ while (*path != '\0') {
+ /* Ignore stray "/" */
+ if (*path == '/') {
+ path++;
+ continue;
+ }
+ if (*path == '.' && (path[1] == '\0' || path[1] == '/')) {
+ /* Ignore "." */
+ path++;
+ continue;
+ }
+ if (*path == '.' && path[1] == '.' &&
+ (path[2] == '\0' || path[2] == '/')) {
+ /* Backup for ".." */
+ path += 2;
+ while (npath > resolved_path+1 &&
+ (--npath)[-1] != '/')
+ ;
+ continue;
+ }
+ /* Safely copy the next pathname component. */
+ while (*path != '\0' && *path != '/') {
+ if (npath-resolved_path > maxreslth-2) {
+ errno = ENAMETOOLONG;
+ goto err;
+ }
+ *npath++ = *path++;
+ }
+
+ /* Protect against infinite loops. */
+ if (readlinks++ > MAXSYMLINKS) {
+ errno = ELOOP;
+ goto err;
+ }
+
+ /* See if last pathname component is a symlink. */
+ *npath = '\0';
+
+ n = readlink(resolved_path, link_path, PATH_MAX);
+ if (n < 0) {
+ /* EINVAL means the file exists but isn't a symlink. */
+ if (errno != EINVAL)
+ goto err;
+ } else {
+ int m;
+ char *newbuf;
+
+ /* Note: readlink doesn't add the null byte. */
+ link_path[n] = '\0';
+ if (*link_path == '/')
+ /* Start over for an absolute symlink. */
+ npath = resolved_path;
+ else
+ /* Otherwise back up over this component. */
+ while (*(--npath) != '/')
+ ;
+
+ /* Insert symlink contents into path. */
+ m = strlen(path);
+ newbuf = malloc(m + n + 1);
+ if (!newbuf)
+ goto err;
+ memcpy(newbuf, link_path, n);
+ memcpy(newbuf + n, path, m + 1);
+ free(buf);
+ path = buf = newbuf;
+ }
+ *npath++ = '/';
+ }
+ /* Delete trailing slash but don't whomp a lone slash. */
+ if (npath != resolved_path+1 && npath[-1] == '/')
+ npath--;
+ /* Make sure it's null terminated. */
+ *npath = '\0';
+
+ free(link_path);
+ free(buf);
+ return resolved_path;
+
+ err:
+ free(link_path);
+ free(buf);
+ return NULL;
+}
+
+/*
+ * Converts private "dm-N" names to "/dev/mapper/<name>"
+ *
+ * Since 2.6.29 (patch 784aae735d9b0bba3f8b9faef4c8b30df3bf0128) kernel sysfs
+ * provides the real DM device names in /sys/block/<ptname>/dm/name
+ */
+char *
+canonicalize_dm_name(const char *ptname)
+{
+ FILE *f;
+ size_t sz;
+ char path[268], name[256], *res = NULL;
+
+ snprintf(path, sizeof(path), "/sys/block/%s/dm/name", ptname);
+ if (!(f = fopen(path, "r")))
+ return NULL;
+
+ /* read "<name>\n" from sysfs */
+ if (fgets(name, sizeof(name), f) && (sz = strlen(name)) > 1) {
+ name[sz - 1] = '\0';
+ snprintf(path, sizeof(path), "/dev/mapper/%s", name);
+ res = strdup(path);
+ }
+ fclose(f);
+ return res;
+}
+
+char *
+canonicalize_path(const char *path)
+{
+ char *canonical;
+ char *p;
+
+ if (path == NULL)
+ return NULL;
+
+ canonical = malloc(PATH_MAX+2);
+ if (!canonical)
+ return NULL;
+ if (!myrealpath(path, canonical, PATH_MAX+1)) {
+ free(canonical);
+ return strdup(path);
+ }
+
+
+ p = strrchr(canonical, '/');
+ if (p && strncmp(p, "/dm-", 4) == 0 && isdigit(*(p + 4))) {
+ p = canonicalize_dm_name(p+1);
+ if (p) {
+ free(canonical);
+ return p;
+ }
+ }
+
+ return canonical;
+}
+
+
diff --git a/src/mount/conf.cc b/src/mount/conf.cc
new file mode 100644
index 000000000..05dc09c04
--- /dev/null
+++ b/src/mount/conf.cc
@@ -0,0 +1,107 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <string>
+#include <vector>
+#include <cstring>
+#include <map>
+
+#include "common/async/context_pool.h"
+#include "common/ceph_context.h"
+#include "common/ceph_argparse.h"
+#include "common/config.h"
+#include "global/global_init.h"
+
+#include "auth/KeyRing.h"
+#include "mon/MonClient.h"
+
+#include "mount.ceph.h"
+
+
+extern "C" void mount_ceph_get_config_info(const char *config_file,
+ const char *name,
+ bool v2_addrs,
+ struct ceph_config_info *cci)
+{
+ int err;
+ KeyRing keyring;
+ CryptoKey secret;
+ std::string secret_str;
+ std::string monaddrs;
+ vector<const char *> args = { "--name", name };
+ bool first = true;
+
+ if (config_file) {
+ args.push_back("--conf");
+ args.push_back(config_file);
+ }
+
+ /* Create CephContext */
+ auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT,
+ CODE_ENVIRONMENT_UTILITY,
+ CINIT_FLAG_NO_DAEMON_ACTIONS|CINIT_FLAG_NO_MON_CONFIG);
+ auto& conf = cct->_conf;
+
+ conf.parse_env(cct->get_module_type()); // environment variables override
+ conf.apply_changes(nullptr);
+
+ ceph::async::io_context_pool ioc(1);
+ MonClient monc = MonClient(cct.get(), ioc);
+ err = monc.build_initial_monmap();
+ if (err)
+ goto scrape_keyring;
+
+ for (const auto& mon : monc.monmap.addr_mons) {
+ auto& eaddr = mon.first;
+
+ /*
+ * Filter v1 addrs if we're running in ms_mode=legacy. Filter
+ * v2 addrs for any other ms_mode.
+ */
+ if (v2_addrs) {
+ if (!eaddr.is_msgr2())
+ continue;
+ } else {
+ if (!eaddr.is_legacy())
+ continue;
+ }
+
+ std::string addr;
+ addr += eaddr.ip_n_port_to_str();
+ /* If this will overrun cci_mons, stop here */
+ if (monaddrs.length() + 1 + addr.length() + 1 > sizeof(cci->cci_mons))
+ break;
+
+ if (first)
+ first = false;
+ else
+ monaddrs += ",";
+
+ monaddrs += addr;
+ }
+
+ if (monaddrs.length())
+ strcpy(cci->cci_mons, monaddrs.c_str());
+ else
+ mount_ceph_debug("Could not discover monitor addresses\n");
+
+scrape_keyring:
+ err = keyring.from_ceph_context(cct.get());
+ if (err) {
+ mount_ceph_debug("keyring.from_ceph_context failed: %d\n", err);
+ return;
+ }
+
+ if (!keyring.get_secret(conf->name, secret)) {
+ mount_ceph_debug("keyring.get_secret failed\n");
+ return;
+ }
+
+ secret.encode_base64(secret_str);
+
+ if (secret_str.length() + 1 > sizeof(cci->cci_secret)) {
+ mount_ceph_debug("secret is too long\n");
+ return;
+ }
+ strcpy(cci->cci_secret, secret_str.c_str());
+}
diff --git a/src/mount/mount.ceph.c b/src/mount/mount.ceph.c
new file mode 100644
index 000000000..af3e4389f
--- /dev/null
+++ b/src/mount/mount.ceph.c
@@ -0,0 +1,566 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <sys/mount.h>
+#include <stdbool.h>
+#include <sys/mman.h>
+#include <wait.h>
+#include <cap-ng.h>
+
+#include "common/module.h"
+#include "common/secret.h"
+#include "include/addr_parsing.h"
+#include "mount.ceph.h"
+
+#ifndef MS_RELATIME
+# define MS_RELATIME (1<<21)
+#endif
+
+bool verboseflag = false;
+bool skip_mtab_flag = false;
+bool v2_addrs = false;
+static const char * const EMPTY_STRING = "";
+
+/* TODO duplicates logic from kernel */
+#define CEPH_AUTH_NAME_DEFAULT "guest"
+
+#include "mtab.c"
+
+struct ceph_mount_info {
+ unsigned long cmi_flags;
+ char *cmi_name;
+ char *cmi_path;
+ char *cmi_mons;
+ char *cmi_conf;
+ char *cmi_opts;
+ int cmi_opts_len;
+ char cmi_secret[SECRET_BUFSIZE];
+};
+
+static void block_signals (int how)
+{
+ sigset_t sigs;
+
+ sigfillset (&sigs);
+ sigdelset(&sigs, SIGTRAP);
+ sigdelset(&sigs, SIGSEGV);
+ sigprocmask (how, &sigs, (sigset_t *) 0);
+}
+
+void mount_ceph_debug(const char *fmt, ...)
+{
+ if (verboseflag) {
+ va_list args;
+
+ va_start(args, fmt);
+ vprintf(fmt, args);
+ va_end(args);
+ }
+}
+
+static int parse_src(const char *orig_str, struct ceph_mount_info *cmi)
+{
+ size_t len;
+ char *mount_path;
+
+ mount_path = strstr(orig_str, ":/");
+ if (!mount_path) {
+ fprintf(stderr, "source mount path was not specified\n");
+ return -EINVAL;
+ }
+
+ len = mount_path - orig_str;
+ if (len != 0) {
+ cmi->cmi_mons = strndup(orig_str, len);
+ if (!cmi->cmi_mons)
+ return -ENOMEM;
+ }
+
+ mount_path++;
+ cmi->cmi_path = strdup(mount_path);
+ if (!cmi->cmi_path)
+ return -ENOMEM;
+ return 0;
+}
+
+static char *finalize_src(struct ceph_mount_info *cmi)
+{
+ int pos, len;
+ char *src;
+
+ src = resolve_addrs(cmi->cmi_mons);
+ if (!src)
+ return NULL;
+
+ len = strlen(src);
+ pos = safe_cat(&src, &len, len, ":");
+ safe_cat(&src, &len, pos, cmi->cmi_path);
+
+ return src;
+}
+
+static int
+drop_capabilities()
+{
+ capng_setpid(getpid());
+ capng_clear(CAPNG_SELECT_BOTH);
+ if (capng_update(CAPNG_ADD, CAPNG_PERMITTED, CAP_DAC_READ_SEARCH)) {
+ fprintf(stderr, "Unable to update permitted capability set.\n");
+ return EX_SYSERR;
+ }
+ if (capng_update(CAPNG_ADD, CAPNG_EFFECTIVE, CAP_DAC_READ_SEARCH)) {
+ fprintf(stderr, "Unable to update effective capability set.\n");
+ return EX_SYSERR;
+ }
+ if (capng_apply(CAPNG_SELECT_BOTH)) {
+ fprintf(stderr, "Unable to apply new capability set.\n");
+ return EX_SYSERR;
+ }
+ return 0;
+}
+
+/*
+ * Attempt to fetch info from the local config file, if one is present. Since
+ * this involves activity that may be dangerous for a privileged task, we
+ * fork(), have the child drop privileges and do the processing and then hand
+ * back the results via memory shared with the parent.
+ */
+static int fetch_config_info(struct ceph_mount_info *cmi)
+{
+ int ret = 0;
+ pid_t pid;
+ struct ceph_config_info *cci;
+
+ /* Don't do anything if we already have requisite info */
+ if (cmi->cmi_secret[0] && cmi->cmi_mons)
+ return 0;
+
+ cci = mmap((void *)0, sizeof(*cci), PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_SHARED, -1, 0);
+ if (cci == MAP_FAILED) {
+ mount_ceph_debug("Unable to allocate memory: %s\n",
+ strerror(errno));
+ return EX_SYSERR;
+ }
+
+ pid = fork();
+ if (pid < 0) {
+ mount_ceph_debug("fork() failure: %s\n", strerror(errno));
+ ret = EX_SYSERR;
+ goto out;
+ }
+
+ if (pid == 0) {
+ /* child */
+ ret = drop_capabilities();
+ if (ret)
+ exit(1);
+ mount_ceph_get_config_info(cmi->cmi_conf, cmi->cmi_name, v2_addrs, cci);
+ exit(0);
+ } else {
+ /* parent */
+ pid = wait(&ret);
+ if (!WIFEXITED(ret)) {
+ mount_ceph_debug("Child process terminated abnormally.\n");
+ ret = EX_SYSERR;
+ goto out;
+ }
+ ret = WEXITSTATUS(ret);
+ if (ret) {
+ mount_ceph_debug("Child exited with status %d\n", ret);
+ ret = EX_SYSERR;
+ goto out;
+ }
+
+ /*
+ * Copy values from MAP_SHARED buffer to cmi if we didn't
+ * already find anything and we got something from the child.
+ */
+ size_t len;
+ if (!cmi->cmi_secret[0] && cci->cci_secret[0]) {
+
+ len = strnlen(cci->cci_secret, SECRET_BUFSIZE);
+ if (len < SECRET_BUFSIZE) {
+ memcpy(cmi->cmi_secret, cci->cci_secret, len + 1);
+ } else {
+ mount_ceph_debug("secret is too long (len=%zu max=%zu)!\n", len, SECRET_BUFSIZE);
+ }
+ }
+ if (!cmi->cmi_mons && cci->cci_mons[0]) {
+ len = strnlen(cci->cci_mons, MON_LIST_BUFSIZE);
+ if (len < MON_LIST_BUFSIZE)
+ cmi->cmi_mons = strndup(cci->cci_mons, len + 1);
+ }
+ }
+out:
+ munmap(cci, sizeof(*cci));
+ return ret;
+}
+
+/*
+ * this one is partially based on parse_options() from cifs.mount.c
+ */
+static int parse_options(const char *data, struct ceph_mount_info *cmi)
+{
+ char * next_keyword = NULL;
+ int pos = 0;
+ char *name = NULL;
+ int name_len = 0;
+ int name_pos = 0;
+
+ if (data == EMPTY_STRING)
+ goto out;
+
+ mount_ceph_debug("parsing options: %s\n", data);
+
+ do {
+ char * value = NULL;
+ bool skip = true;
+
+ /* check if ends with trailing comma */
+ if(*data == 0)
+ break;
+ next_keyword = strchr(data,',');
+
+ /* temporarily null terminate end of keyword=value pair */
+ if(next_keyword)
+ *next_keyword++ = 0;
+
+ /* temporarily null terminate keyword to make keyword and value distinct */
+ if ((value = strchr(data, '=')) != NULL) {
+ *value = '\0';
+ value++;
+ }
+
+ if (strcmp(data, "ro") == 0) {
+ cmi->cmi_flags |= MS_RDONLY;
+ } else if (strcmp(data, "rw") == 0) {
+ cmi->cmi_flags &= ~MS_RDONLY;
+ } else if (strcmp(data, "nosuid") == 0) {
+ cmi->cmi_flags |= MS_NOSUID;
+ } else if (strcmp(data, "suid") == 0) {
+ cmi->cmi_flags &= ~MS_NOSUID;
+ } else if (strcmp(data, "dev") == 0) {
+ cmi->cmi_flags &= ~MS_NODEV;
+ } else if (strcmp(data, "nodev") == 0) {
+ cmi->cmi_flags |= MS_NODEV;
+ } else if (strcmp(data, "noexec") == 0) {
+ cmi->cmi_flags |= MS_NOEXEC;
+ } else if (strcmp(data, "exec") == 0) {
+ cmi->cmi_flags &= ~MS_NOEXEC;
+ } else if (strcmp(data, "sync") == 0) {
+ cmi->cmi_flags |= MS_SYNCHRONOUS;
+ } else if (strcmp(data, "remount") == 0) {
+ cmi->cmi_flags |= MS_REMOUNT;
+ } else if (strcmp(data, "mandlock") == 0) {
+ cmi->cmi_flags |= MS_MANDLOCK;
+ } else if ((strcmp(data, "nobrl") == 0) ||
+ (strcmp(data, "nolock") == 0)) {
+ cmi->cmi_flags &= ~MS_MANDLOCK;
+ } else if (strcmp(data, "noatime") == 0) {
+ cmi->cmi_flags |= MS_NOATIME;
+ } else if (strcmp(data, "nodiratime") == 0) {
+ cmi->cmi_flags |= MS_NODIRATIME;
+ } else if (strcmp(data, "relatime") == 0) {
+ cmi->cmi_flags |= MS_RELATIME;
+ } else if (strcmp(data, "strictatime") == 0) {
+ cmi->cmi_flags |= MS_STRICTATIME;
+ } else if (strcmp(data, "noauto") == 0) {
+ /* ignore */
+ } else if (strcmp(data, "_netdev") == 0) {
+ /* ignore */
+ } else if (strcmp(data, "nofail") == 0) {
+ /* ignore */
+ } else if (strcmp(data, "fs") == 0) {
+ if (!value || !*value) {
+ fprintf(stderr, "mount option fs requires a value.\n");
+ return -EINVAL;
+ }
+ data = "mds_namespace";
+ skip = false;
+ } else if (strcmp(data, "secretfile") == 0) {
+ int ret;
+
+ if (!value || !*value) {
+ fprintf(stderr, "keyword secretfile found, but no secret file specified\n");
+ return -EINVAL;
+ }
+ ret = read_secret_from_file(value, cmi->cmi_secret, sizeof(cmi->cmi_secret));
+ if (ret < 0) {
+ fprintf(stderr, "error reading secret file: %d\n", ret);
+ return ret;
+ }
+ } else if (strcmp(data, "secret") == 0) {
+ size_t len;
+
+ if (!value || !*value) {
+ fprintf(stderr, "mount option secret requires a value.\n");
+ return -EINVAL;
+ }
+
+ len = strnlen(value, sizeof(cmi->cmi_secret)) + 1;
+ if (len <= sizeof(cmi->cmi_secret))
+ memcpy(cmi->cmi_secret, value, len);
+ } else if (strcmp(data, "conf") == 0) {
+ if (!value || !*value) {
+ fprintf(stderr, "mount option conf requires a value.\n");
+ return -EINVAL;
+ }
+ /* keep pointer to value */
+ cmi->cmi_conf = strdup(value);
+ if (!cmi->cmi_conf)
+ return -ENOMEM;
+ } else if (strcmp(data, "name") == 0) {
+ if (!value || !*value) {
+ fprintf(stderr, "mount option name requires a value.\n");
+ return -EINVAL;
+ }
+ /* keep pointer to value */
+ name = value;
+ skip = false;
+ } else if (strcmp(data, "ms_mode") == 0) {
+ if (!value || !*value) {
+ fprintf(stderr, "mount option ms_mode requires a value.\n");
+ return -EINVAL;
+ }
+ /* Only legacy ms_mode needs v1 addrs */
+ v2_addrs = strcmp(value, "legacy");
+ skip = false;
+ } else {
+ /* unrecognized mount options, passing to kernel */
+ skip = false;
+ }
+
+ /* Copy (possibly modified) option to out */
+ if (!skip) {
+ if (pos)
+ pos = safe_cat(&cmi->cmi_opts, &cmi->cmi_opts_len, pos, ",");
+
+ if (value) {
+ pos = safe_cat(&cmi->cmi_opts, &cmi->cmi_opts_len, pos, data);
+ pos = safe_cat(&cmi->cmi_opts, &cmi->cmi_opts_len, pos, "=");
+ pos = safe_cat(&cmi->cmi_opts, &cmi->cmi_opts_len, pos, value);
+ } else {
+ pos = safe_cat(&cmi->cmi_opts, &cmi->cmi_opts_len, pos, data);
+ }
+ }
+ data = next_keyword;
+ } while (data);
+
+out:
+ name_pos = safe_cat(&cmi->cmi_name, &name_len, name_pos, "client.");
+ name_pos = safe_cat(&cmi->cmi_name, &name_len, name_pos,
+ name ? name : CEPH_AUTH_NAME_DEFAULT);
+
+ if (cmi->cmi_opts)
+ mount_ceph_debug("mount.ceph: options \"%s\" will pass to kernel.\n",
+ cmi->cmi_opts);
+
+ if (!cmi->cmi_opts) {
+ cmi->cmi_opts = strdup(EMPTY_STRING);
+ if (!cmi->cmi_opts)
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+
+static int parse_arguments(int argc, char *const *const argv,
+ const char **src, const char **node, const char **opts)
+{
+ int i;
+
+ if (argc < 2) {
+ // There were no arguments. Just show the usage.
+ return 1;
+ }
+ if ((!strcmp(argv[1], "-h")) || (!strcmp(argv[1], "--help"))) {
+ // The user asked for help.
+ return 1;
+ }
+
+ // The first two arguments are positional
+ if (argc < 3)
+ return -EINVAL;
+ *src = argv[1];
+ *node = argv[2];
+
+ // Parse the remaining options
+ *opts = EMPTY_STRING;
+ for (i = 3; i < argc; ++i) {
+ if (!strcmp("-h", argv[i]))
+ return 1;
+ else if (!strcmp("-n", argv[i]))
+ skip_mtab_flag = true;
+ else if (!strcmp("-v", argv[i]))
+ verboseflag = true;
+ else if (!strcmp("-o", argv[i])) {
+ ++i;
+ if (i >= argc) {
+ fprintf(stderr, "Option -o requires an argument.\n\n");
+ return -EINVAL;
+ }
+ *opts = argv[i];
+ }
+ else {
+ fprintf(stderr, "Can't understand option: '%s'\n\n", argv[i]);
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
+/* modprobe failing doesn't necessarily prevent from working, so this
+ returns void */
+static void modprobe(void)
+{
+ int r;
+
+ r = module_load("ceph", NULL);
+ if (r)
+ printf("failed to load ceph kernel module (%d)\n", r);
+}
+
+static void usage(const char *prog_name)
+{
+ printf("usage: %s [src] [mount-point] [-n] [-v] [-o ceph-options]\n",
+ prog_name);
+ printf("options:\n");
+ printf("\t-h: Print this help\n");
+ printf("\t-n: Do not update /etc/mtab\n");
+ printf("\t-v: Verbose\n");
+ printf("\tceph-options: refer to mount.ceph(8)\n");
+ printf("\n");
+}
+
+/*
+ * The structure itself lives on the stack, so don't free it. Just the
+ * pointers inside.
+ */
+static void ceph_mount_info_free(struct ceph_mount_info *cmi)
+{
+ free(cmi->cmi_opts);
+ free(cmi->cmi_name);
+ free(cmi->cmi_path);
+ free(cmi->cmi_mons);
+ free(cmi->cmi_conf);
+}
+
+static int append_key_or_secret_option(struct ceph_mount_info *cmi)
+{
+ int pos = strlen(cmi->cmi_opts);
+
+ if (!cmi->cmi_secret[0] && !is_kernel_secret(cmi->cmi_name))
+ return 0;
+
+ if (pos)
+ pos = safe_cat(&cmi->cmi_opts, &cmi->cmi_opts_len, pos, ",");
+
+ /* when parsing kernel options (-o remount) we get '<hidden>' as the secret */
+ if (cmi->cmi_secret[0] && (strcmp(cmi->cmi_secret, "<hidden>") != 0)) {
+ int ret = set_kernel_secret(cmi->cmi_secret, cmi->cmi_name);
+ if (ret < 0) {
+ if (ret == -ENODEV || ret == -ENOSYS) {
+ /* old kernel; fall back to secret= in options */
+ pos = safe_cat(&cmi->cmi_opts,
+ &cmi->cmi_opts_len, pos,
+ "secret=");
+ pos = safe_cat(&cmi->cmi_opts,
+ &cmi->cmi_opts_len, pos,
+ cmi->cmi_secret);
+ return 0;
+ }
+ fprintf(stderr, "adding ceph secret key to kernel failed: %s\n",
+ strerror(-ret));
+ return ret;
+ }
+ }
+
+ pos = safe_cat(&cmi->cmi_opts, &cmi->cmi_opts_len, pos, "key=");
+ pos = safe_cat(&cmi->cmi_opts, &cmi->cmi_opts_len, pos, cmi->cmi_name);
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ const char *src, *node, *opts;
+ char *rsrc = NULL;
+ int retval;
+ struct ceph_mount_info cmi = { 0 };
+
+ retval = parse_arguments(argc, argv, &src, &node, &opts);
+ if (retval) {
+ usage(argv[0]);
+ retval = (retval > 0) ? 0 : EX_USAGE;
+ goto out;
+ }
+
+ retval = parse_options(opts, &cmi);
+ if (retval) {
+ fprintf(stderr, "failed to parse ceph_options: %d\n", retval);
+ retval = EX_USAGE;
+ goto out;
+ }
+
+ retval = parse_src(src, &cmi);
+ if (retval) {
+ fprintf(stderr, "unable to parse mount source: %d\n", retval);
+ retval = EX_USAGE;
+ goto out;
+ }
+
+ /* We don't care if this errors out, since this is best-effort */
+ fetch_config_info(&cmi);
+
+ if (!cmi.cmi_mons) {
+ fprintf(stderr, "unable to determine mon addresses\n");
+ retval = EX_USAGE;
+ goto out;
+ }
+
+ rsrc = finalize_src(&cmi);
+ if (!rsrc) {
+ fprintf(stderr, "failed to resolve source\n");
+ retval = EX_USAGE;
+ goto out;
+ }
+
+ /* Ensure the ceph key_type is available */
+ modprobe();
+
+ retval = append_key_or_secret_option(&cmi);
+ if (retval) {
+ fprintf(stderr, "couldn't append secret option: %d\n", retval);
+ retval = EX_USAGE;
+ goto out;
+ }
+
+ block_signals(SIG_BLOCK);
+
+ if (mount(rsrc, node, "ceph", cmi.cmi_flags, cmi.cmi_opts)) {
+ retval = EX_FAIL;
+ switch (errno) {
+ case ENODEV:
+ fprintf(stderr, "mount error: ceph filesystem not supported by the system\n");
+ break;
+ case EHOSTUNREACH:
+ fprintf(stderr, "mount error: no mds server is up or the cluster is laggy\n");
+ break;
+ default:
+ fprintf(stderr, "mount error %d = %s\n",errno,strerror(errno));
+ }
+ } else {
+ if (!skip_mtab_flag) {
+ update_mtab_entry(rsrc, node, "ceph", cmi.cmi_opts, cmi.cmi_flags, 0, 0);
+ }
+ }
+
+ block_signals(SIG_UNBLOCK);
+out:
+ ceph_mount_info_free(&cmi);
+ free(rsrc);
+ return retval;
+}
+
diff --git a/src/mount/mount.ceph.h b/src/mount/mount.ceph.h
new file mode 100644
index 000000000..673175db5
--- /dev/null
+++ b/src/mount/mount.ceph.h
@@ -0,0 +1,41 @@
+#ifndef _SRC_MOUNT_MOUNT_CEPH_H
+#define _SRC_MOUNT_MOUNT_CEPH_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * See class CryptoKey
+ *
+ * 2 (for the type of secret) +
+ * 8 (for the timestamp) +
+ * 2 (for the length of secret) +
+ * 16 (for an AES-128 key)
+ */
+#define MAX_RAW_SECRET_LEN (2 + 8 + 2 + 16)
+
+/* Max length of base64 encoded secret. 4/3 original size (rounded up) */
+#define MAX_SECRET_LEN ((MAX_RAW_SECRET_LEN + (3 - 1)) * 4 / 3)
+
+/* Max Including null terminator */
+#define SECRET_BUFSIZE (MAX_SECRET_LEN + 1)
+
+/* 2k should be enough for anyone? */
+#define MON_LIST_BUFSIZE 2048
+
+void mount_ceph_debug(const char *fmt, ...);
+
+struct ceph_config_info {
+ char cci_secret[SECRET_BUFSIZE]; // auth secret
+ char cci_mons[MON_LIST_BUFSIZE]; // monitor addrs
+};
+
+void mount_ceph_get_config_info(const char *config_file, const char *name,
+ bool v2_addrs, struct ceph_config_info *cci);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SRC_MOUNT_MOUNT_CEPH_H */
diff --git a/src/mount/mtab.c b/src/mount/mtab.c
new file mode 100644
index 000000000..3ce368fd3
--- /dev/null
+++ b/src/mount/mtab.c
@@ -0,0 +1,294 @@
+
+/*
+ * this code lifted from util-linux-ng, licensed GPLv2+,
+ *
+ * git://git.kernel.org/pub/scm/utils/util-linux-ng/util-linux-ng.git
+ *
+ * whoever decided that each special mount program is responsible
+ * for updating /etc/mtab should be spanked.
+ *
+ * <sage@newdream.net>
+ */
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/vfs.h>
+#include <time.h>
+#include <mntent.h>
+#include <stdarg.h>
+
+#include "mount/canonicalize.c"
+
+
+/* Updating mtab ----------------------------------------------*/
+
+/* Flag for already existing lock file. */
+static int we_created_lockfile = 0;
+static int lockfile_fd = -1;
+
+/* Flag to indicate that signals have been set up. */
+static int signals_have_been_setup = 0;
+
+/* Ensure that the lock is released if we are interrupted. */
+extern char *strsignal(int sig); /* not always in <string.h> */
+
+static void
+setlkw_timeout (int sig) {
+ /* nothing, fcntl will fail anyway */
+}
+
+#define _PATH_MOUNTED "/etc/mtab"
+#define _PATH_MOUNTED_LOCK "/etc/mtab~"
+#define PROC_SUPER_MAGIC 0x9fa0
+
+/* exit status - bits below are ORed */
+#define EX_USAGE 1 /* incorrect invocation or permission */
+#define EX_SYSERR 2 /* out of memory, cannot fork, ... */
+#define EX_SOFTWARE 4 /* internal mount bug or wrong version */
+#define EX_USER 8 /* user interrupt */
+#define EX_FILEIO 16 /* problems writing, locking, ... mtab/fstab */
+#define EX_FAIL 32 /* mount failure */
+#define EX_SOMEOK 64 /* some mount succeeded */
+
+int die(int err, const char *fmt, ...) {
+ va_list args;
+
+ va_start(args, fmt);
+ vfprintf(stderr, fmt, args);
+ fprintf(stderr, "\n");
+ va_end(args);
+
+ exit(err);
+}
+
+static void
+handler (int sig) {
+ die(EX_USER, "%s", strsignal(sig));
+}
+
+/* Remove lock file. */
+void
+unlock_mtab (void) {
+ if (we_created_lockfile) {
+ close(lockfile_fd);
+ lockfile_fd = -1;
+ unlink (_PATH_MOUNTED_LOCK);
+ we_created_lockfile = 0;
+ }
+}
+
+/* Create the lock file.
+ The lock file will be removed if we catch a signal or when we exit. */
+/* The old code here used flock on a lock file /etc/mtab~ and deleted
+ this lock file afterwards. However, as rgooch remarks, that has a
+ race: a second mount may be waiting on the lock and proceed as
+ soon as the lock file is deleted by the first mount, and immediately
+ afterwards a third mount comes, creates a new /etc/mtab~, applies
+ flock to that, and also proceeds, so that the second and third mount
+ now both are scribbling in /etc/mtab.
+ The new code uses a link() instead of a creat(), where we proceed
+ only if it was us that created the lock, and hence we always have
+ to delete the lock afterwards. Now the use of flock() is in principle
+ superfluous, but avoids an arbitrary sleep(). */
+
+/* Where does the link point to? Obvious choices are mtab and mtab~~.
+ HJLu points out that the latter leads to races. Right now we use
+ mtab~.<pid> instead. Use 20 as upper bound for the length of %d. */
+#define MOUNTLOCK_LINKTARGET _PATH_MOUNTED_LOCK "%d"
+#define MOUNTLOCK_LINKTARGET_LTH (sizeof(_PATH_MOUNTED_LOCK)+20)
+
+/*
+ * The original mount locking code has used sleep(1) between attempts and
+ * maximal number of attempts has been 5.
+ *
+ * There was very small number of attempts and extremely long waiting (1s)
+ * that is useless on machines with large number of concurret mount processes.
+ *
+ * Now we wait few thousand microseconds between attempts and we have global
+ * time limit (30s) rather than limit for number of attempts. The advantage
+ * is that this method also counts time which we spend in fcntl(F_SETLKW) and
+ * number of attempts is not so much restricted.
+ *
+ * -- kzak@redhat.com [2007-Mar-2007]
+ */
+
+/* maximum seconds between first and last attempt */
+#define MOUNTLOCK_MAXTIME 30
+
+/* sleep time (in microseconds, max=999999) between attempts */
+#define MOUNTLOCK_WAITTIME 5000
+
+void
+lock_mtab (void) {
+ int i;
+ struct timespec waittime;
+ struct timeval maxtime;
+ char linktargetfile[MOUNTLOCK_LINKTARGET_LTH];
+
+ if (!signals_have_been_setup) {
+ int sig = 0;
+ struct sigaction sa;
+
+ sa.sa_handler = handler;
+ sa.sa_flags = 0;
+ sigfillset (&sa.sa_mask);
+
+ while (sigismember (&sa.sa_mask, ++sig) != -1
+ && sig != SIGCHLD) {
+ if (sig == SIGALRM)
+ sa.sa_handler = setlkw_timeout;
+ else
+ sa.sa_handler = handler;
+ sigaction (sig, &sa, (struct sigaction *) 0);
+ }
+ signals_have_been_setup = 1;
+ }
+
+ snprintf(linktargetfile, sizeof(linktargetfile), MOUNTLOCK_LINKTARGET,
+ getpid ());
+
+ i = open (linktargetfile, O_WRONLY|O_CREAT, S_IRUSR|S_IWUSR);
+ if (i < 0) {
+ int errsv = errno;
+ /* linktargetfile does not exist (as a file)
+ and we cannot create it. Read-only filesystem?
+ Too many files open in the system?
+ Filesystem full? */
+ die (EX_FILEIO, "can't create lock file %s: %s "
+ "(use -n flag to override)",
+ linktargetfile, strerror (errsv));
+ }
+ close(i);
+
+ gettimeofday(&maxtime, NULL);
+ maxtime.tv_sec += MOUNTLOCK_MAXTIME;
+
+ waittime.tv_sec = 0;
+ waittime.tv_nsec = (1000 * MOUNTLOCK_WAITTIME);
+
+ /* Repeat until it was us who made the link */
+ while (!we_created_lockfile) {
+ struct timeval now;
+ struct flock flock;
+ int errsv, j;
+
+ j = link(linktargetfile, _PATH_MOUNTED_LOCK);
+ errsv = errno;
+
+ if (j == 0)
+ we_created_lockfile = 1;
+
+ if (j < 0 && errsv != EEXIST) {
+ (void) unlink(linktargetfile);
+ die (EX_FILEIO, "can't link lock file %s: %s "
+ "(use -n flag to override)",
+ _PATH_MOUNTED_LOCK, strerror (errsv));
+ }
+
+ lockfile_fd = open (_PATH_MOUNTED_LOCK, O_WRONLY);
+
+ if (lockfile_fd < 0) {
+ /* Strange... Maybe the file was just deleted? */
+ int errsv = errno;
+ gettimeofday(&now, NULL);
+ if (errno == ENOENT && now.tv_sec < maxtime.tv_sec) {
+ we_created_lockfile = 0;
+ continue;
+ }
+ (void) unlink(linktargetfile);
+ die (EX_FILEIO, "can't open lock file %s: %s "
+ "(use -n flag to override)",
+ _PATH_MOUNTED_LOCK, strerror (errsv));
+ }
+
+ flock.l_type = F_WRLCK;
+ flock.l_whence = SEEK_SET;
+ flock.l_start = 0;
+ flock.l_len = 0;
+
+ if (j == 0) {
+ /* We made the link. Now claim the lock. */
+ if (fcntl (lockfile_fd, F_SETLK, &flock) == -1) {
+ /* proceed, since it was us who created the lockfile anyway */
+ }
+ (void) unlink(linktargetfile);
+ } else {
+ /* Someone else made the link. Wait. */
+ gettimeofday(&now, NULL);
+ if (now.tv_sec < maxtime.tv_sec) {
+ alarm(maxtime.tv_sec - now.tv_sec);
+ if (fcntl (lockfile_fd, F_SETLKW, &flock) == -1) {
+ int errsv = errno;
+ (void) unlink(linktargetfile);
+ die (EX_FILEIO, "can't lock lock file %s: %s",
+ _PATH_MOUNTED_LOCK, (errno == EINTR) ?
+ "timed out" : strerror (errsv));
+ }
+ alarm(0);
+
+ nanosleep(&waittime, NULL);
+ } else {
+ (void) unlink(linktargetfile);
+ die (EX_FILEIO, "Cannot create link %s\n"
+ "Perhaps there is a stale lock file?\n",
+ _PATH_MOUNTED_LOCK);
+ }
+ close(lockfile_fd);
+ }
+ }
+}
+
+static void
+update_mtab_entry(const char *spec, const char *node, const char *type,
+ const char *opts, int flags, int freq, int pass) {
+ struct statfs buf;
+ int err = statfs(_PATH_MOUNTED, &buf);
+ if (err) {
+ printf("mount: can't statfs %s: %s", _PATH_MOUNTED,
+ strerror (err));
+ return;
+ }
+ /* /etc/mtab is symbol link to /proc/self/mounts? */
+ if (buf.f_type == PROC_SUPER_MAGIC)
+ return;
+
+ if (!opts)
+ opts = "rw";
+
+ struct mntent mnt;
+ mnt.mnt_fsname = strdup(spec);
+ mnt.mnt_dir = canonicalize_path(node);
+ mnt.mnt_type = strdup(type);
+ mnt.mnt_opts = strdup(opts);
+ mnt.mnt_freq = freq;
+ mnt.mnt_passno = pass;
+
+ FILE *fp;
+
+ lock_mtab();
+ fp = setmntent(_PATH_MOUNTED, "a+");
+ if (fp == NULL) {
+ int errsv = errno;
+ printf("mount: can't open %s: %s", _PATH_MOUNTED,
+ strerror (errsv));
+ } else {
+ if ((addmntent (fp, &mnt)) == 1) {
+ int errsv = errno;
+ printf("mount: error writing %s: %s",
+ _PATH_MOUNTED, strerror (errsv));
+ }
+ }
+ endmntent(fp);
+ unlock_mtab();
+
+ free(mnt.mnt_fsname);
+ free(mnt.mnt_dir);
+ free(mnt.mnt_type);
+ free(mnt.mnt_opts);
+}