summaryrefslogtreecommitdiffstats
path: root/support/export/cache.c
diff options
context:
space:
mode:
Diffstat (limited to 'support/export/cache.c')
-rw-r--r--support/export/cache.c1934
1 files changed, 1934 insertions, 0 deletions
diff --git a/support/export/cache.c b/support/export/cache.c
new file mode 100644
index 0000000..6c0a44a
--- /dev/null
+++ b/support/export/cache.c
@@ -0,0 +1,1934 @@
+/*
+ * Handle communication with knfsd internal cache
+ *
+ * We open /proc/net/rpc/{auth.unix.ip,nfsd.export,nfsd.fh}/channel
+ * and listen for requests (using my_svc_run)
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <sys/sysmacros.h>
+#include <sys/types.h>
+#include <sys/select.h>
+#include <sys/stat.h>
+#include <sys/vfs.h>
+#include <sys/wait.h>
+#include <time.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <ctype.h>
+#include <pwd.h>
+#include <grp.h>
+#include <mntent.h>
+#include "misc.h"
+#include "nfsd_path.h"
+#include "nfslib.h"
+#include "exportfs.h"
+#include "export.h"
+#include "pseudoflavors.h"
+#include "xcommon.h"
+#include "reexport.h"
+
+#ifdef HAVE_JUNCTION_SUPPORT
+#include "fsloc.h"
+#endif
+
+#ifdef USE_BLKID
+#include "blkid/blkid.h"
+#endif
+
+enum nfsd_fsid {
+ FSID_DEV = 0,
+ FSID_NUM,
+ FSID_MAJOR_MINOR,
+ FSID_ENCODE_DEV,
+ FSID_UUID4_INUM,
+ FSID_UUID8,
+ FSID_UUID16,
+ FSID_UUID16_INUM,
+};
+
+#undef is_mountpoint
+static int is_mountpoint(const char *path)
+{
+ return check_is_mountpoint(path, nfsd_path_lstat);
+}
+
+static ssize_t cache_read(int fd, char *buf, size_t len)
+{
+ return nfsd_path_read(fd, buf, len);
+}
+
+static ssize_t cache_write(int fd, const char *buf, size_t len)
+{
+ return nfsd_path_write(fd, buf, len);
+}
+
+static bool path_lookup_error(int err)
+{
+ switch (err) {
+ case ELOOP:
+ case ENAMETOOLONG:
+ case ENOENT:
+ case ENOTDIR:
+ case EACCES:
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * Support routines for text-based upcalls.
+ * Fields are separated by spaces.
+ * Fields are either mangled to quote space tab newline slosh with slosh
+ * or a hexified with a leading \x
+ * Record is terminated with newline.
+ *
+ */
+
+#define INITIAL_MANAGED_GROUPS 100
+
+extern int use_ipaddr;
+
+static void auth_unix_ip(int f)
+{
+ /* requests are
+ * class IP-ADDR
+ * Ignore if class != "nfsd"
+ * Otherwise find domainname and write back:
+ *
+ * "nfsd" IP-ADDR expiry domainname
+ */
+ char class[20];
+ char ipaddr[INET6_ADDRSTRLEN + 1];
+ char *client = NULL;
+ struct addrinfo *ai = NULL;
+ struct addrinfo *tmp = NULL;
+ char buf[RPC_CHAN_BUF_SIZE], *bp;
+ int blen;
+
+ blen = read(f, buf, sizeof(buf));
+ if (blen <= 0 || buf[blen-1] != '\n') return;
+ buf[blen-1] = 0;
+
+ xlog(D_CALL, "auth_unix_ip: inbuf '%s'", buf);
+
+ bp = buf;
+
+ if (qword_get(&bp, class, 20) <= 0 ||
+ strcmp(class, "nfsd") != 0)
+ return;
+
+ if (qword_get(&bp, ipaddr, sizeof(ipaddr) - 1) <= 0)
+ return;
+
+ tmp = host_pton(ipaddr);
+ if (tmp == NULL)
+ return;
+
+ auth_reload();
+
+ /* addr is a valid address, find the domain name... */
+ ai = client_resolve(tmp->ai_addr);
+ if (ai) {
+ client = client_compose(ai);
+ nfs_freeaddrinfo(ai);
+ }
+ if (!client)
+ xlog(D_AUTH, "failed authentication for IP %s", ipaddr);
+ else if (!use_ipaddr)
+ xlog(D_AUTH, "successful authentication for IP %s as %s",
+ ipaddr, *client ? client : "DEFAULT");
+ else
+ xlog(D_AUTH, "successful authentication for IP %s",
+ ipaddr);
+
+ bp = buf; blen = sizeof(buf);
+ qword_add(&bp, &blen, "nfsd");
+ qword_add(&bp, &blen, ipaddr);
+ qword_adduint(&bp, &blen, time(0) + default_ttl);
+ if (use_ipaddr && client) {
+ memmove(ipaddr + 1, ipaddr, strlen(ipaddr) + 1);
+ ipaddr[0] = '$';
+ qword_add(&bp, &blen, ipaddr);
+ } else if (client)
+ qword_add(&bp, &blen, *client?client:"DEFAULT");
+ qword_addeol(&bp, &blen);
+ if (blen <= 0 || write(f, buf, bp - buf) != bp - buf)
+ xlog(L_ERROR, "auth_unix_ip: error writing reply");
+
+ xlog(D_CALL, "auth_unix_ip: client %p '%s'", client, client?client: "DEFAULT");
+
+ free(client);
+ nfs_freeaddrinfo(tmp);
+
+}
+
+static void auth_unix_gid(int f)
+{
+ /* Request are
+ * uid
+ * reply is
+ * uid expiry count list of group ids
+ */
+ uid_t uid;
+ struct passwd *pw;
+ static gid_t *groups = NULL;
+ static int groups_len = 0;
+ gid_t *more_groups;
+ int ngroups;
+ int rv, i;
+ char buf[RPC_CHAN_BUF_SIZE], *bp;
+ int blen;
+
+ if (groups_len == 0) {
+ groups = malloc(sizeof(gid_t) * INITIAL_MANAGED_GROUPS);
+ if (!groups)
+ return;
+
+ groups_len = INITIAL_MANAGED_GROUPS;
+ }
+
+ ngroups = groups_len;
+
+ blen = read(f, buf, sizeof(buf));
+ if (blen <= 0 || buf[blen-1] != '\n') return;
+ buf[blen-1] = 0;
+
+ bp = buf;
+ if (qword_get_uint(&bp, &uid) != 0)
+ return;
+
+ pw = getpwuid(uid);
+ if (!pw)
+ rv = -1;
+ else {
+ rv = getgrouplist(pw->pw_name, pw->pw_gid, groups, &ngroups);
+ if (rv == -1 && ngroups >= groups_len) {
+ more_groups = realloc(groups, sizeof(gid_t)*ngroups);
+ if (!more_groups)
+ rv = -1;
+ else {
+ groups = more_groups;
+ groups_len = ngroups;
+ rv = getgrouplist(pw->pw_name, pw->pw_gid,
+ groups, &ngroups);
+ }
+ }
+ }
+
+ bp = buf; blen = sizeof(buf);
+ qword_adduint(&bp, &blen, uid);
+ qword_adduint(&bp, &blen, time(0) + default_ttl);
+ if (rv >= 0) {
+ qword_adduint(&bp, &blen, ngroups);
+ for (i=0; i<ngroups; i++)
+ qword_adduint(&bp, &blen, groups[i]);
+ } else
+ qword_adduint(&bp, &blen, 0);
+ qword_addeol(&bp, &blen);
+ if (blen <= 0 || write(f, buf, bp - buf) != bp - buf)
+ xlog(L_ERROR, "auth_unix_gid: error writing reply");
+}
+
+static int match_crossmnt_fsidnum(uint32_t parsed_fsidnum, char *path)
+{
+ uint32_t fsidnum;
+
+ if (reexpdb_fsidnum_by_path(path, &fsidnum, 0) == 0)
+ return 0;
+
+ return fsidnum == parsed_fsidnum;
+}
+
+#ifdef USE_BLKID
+static const char *get_uuid_blkdev(char *path)
+{
+ /* We set *safe if we know that we need the
+ * fsid from statfs too.
+ */
+ static blkid_cache cache = NULL;
+ struct stat stb;
+ char *devname;
+ blkid_tag_iterate iter;
+ blkid_dev dev;
+ const char *type;
+ const char *val, *uuid = NULL;
+
+ if (cache == NULL)
+ blkid_get_cache(&cache, NULL);
+
+ if (nfsd_path_stat(path, &stb) != 0)
+ return NULL;
+ devname = blkid_devno_to_devname(stb.st_dev);
+ if (!devname)
+ return NULL;
+ dev = blkid_get_dev(cache, devname, BLKID_DEV_NORMAL);
+ free(devname);
+ if (!dev)
+ return NULL;
+ iter = blkid_tag_iterate_begin(dev);
+ if (!iter)
+ return NULL;
+ while (blkid_tag_next(iter, &type, &val) == 0) {
+ if (strcmp(type, "UUID") == 0)
+ uuid = val;
+ if (strcmp(type, "TYPE") == 0 &&
+ strcmp(val, "btrfs") == 0) {
+ uuid = NULL;
+ break;
+ }
+ }
+ blkid_tag_iterate_end(iter);
+ return uuid;
+}
+#else
+#define get_uuid_blkdev(path) (NULL)
+#endif
+
+static int get_uuid(const char *val, size_t uuidlen, char *u)
+{
+ /* extract hex digits from uuidstr and compose a uuid
+ * of the given length (max 16), xoring bytes to make
+ * a smaller uuid.
+ */
+ size_t i = 0;
+
+ memset(u, 0, uuidlen);
+ for ( ; *val ; val++) {
+ int c = *val;
+ if (!isxdigit(c))
+ continue;
+ if (isalpha(c)) {
+ if (isupper(c))
+ c = c - 'A' + 10;
+ else
+ c = c - 'a' + 10;
+ } else
+ c = c - '0' + 0;
+ if ((i&1) == 0)
+ c <<= 4;
+ u[i/2] ^= (char)c;
+ i++;
+ if (i == uuidlen*2)
+ i = 0;
+ }
+ return 1;
+}
+
+
+/*
+ * Don't ask libblkid for these filesystems. Note that BTRF is ignored, because
+ * we generate the identifier from statfs->f_fsid. The rest are network or
+ * pseudo filesystems. (See <linux/magic.h> for the basic IDs.)
+ */
+static const unsigned long nonblkid_filesystems[] = {
+ 0x2fc12fc1, /* ZFS_SUPER_MAGIC */
+ 0x9123683E, /* BTRFS_SUPER_MAGIC */
+ 0xFF534D42, /* CIFS_MAGIC_NUMBER */
+ 0x1373, /* DEVFS_SUPER_MAGIC */
+ 0x73757245, /* CODA_SUPER_MAGIC */
+ 0x564C, /* NCP_SUPER_MAGIC */
+ 0x6969, /* NFS_SUPER_MAGIC */
+ 0x9FA0, /* PROC_SUPER_MAGIC */
+ 0x62656572, /* SYSFS_MAGIC */
+ 0x517B, /* SMB_SUPER_MAGIC */
+ 0x01021994, /* TMPFS_SUPER_MAGIC */
+ 0 /* last */
+};
+
+static int uuid_by_path(char *path, int type, size_t uuidlen, char *uuid)
+{
+ /* get a uuid for the filesystem found at 'path'.
+ * There are several possible ways of generating the
+ * uuids (types).
+ * Type 0 is used for new filehandles, while other types
+ * may be used to interpret old filehandle - to ensure smooth
+ * forward migration.
+ * We return 1 if a uuid was found (and it might be worth
+ * trying the next type) or 0 if no more uuid types can be
+ * extracted.
+ */
+
+ /* Possible sources of uuid are
+ * - blkid uuid
+ * - statfs uuid
+ *
+ * On some filesystems (e.g. vfat) the statfs uuid is simply an
+ * encoding of the device that the filesystem is mounted from, so
+ * it we be very bad to use that (as device numbers change). blkid
+ * must be preferred.
+ * On other filesystems (e.g. btrfs) the statfs uuid contains
+ * important info that the blkid uuid cannot contain: This happens
+ * when multiple subvolumes are exported (they have the same
+ * blkid uuid but different statfs uuids).
+ * We rely on get_uuid_blkdev *knowing* which is which and not returning
+ * a uuid for filesystems where the statfs uuid is better.
+ *
+ */
+ struct statfs st;
+ char fsid_val[17];
+ const char *blkid_val = NULL;
+ const char *val;
+ int rc;
+
+ rc = nfsd_path_statfs(path, &st);
+
+ if (type == 0 && rc == 0) {
+ const unsigned long *bad;
+ for (bad = nonblkid_filesystems; *bad; bad++) {
+ if (*bad == (unsigned long)st.f_type)
+ break;
+ }
+ if (*bad == 0)
+ blkid_val = get_uuid_blkdev(path);
+ }
+
+ if (rc == 0 &&
+ (st.f_fsid.__val[0] || st.f_fsid.__val[1]))
+ snprintf(fsid_val, 17, "%08x%08x",
+ st.f_fsid.__val[0], st.f_fsid.__val[1]);
+ else
+ fsid_val[0] = 0;
+
+ if (blkid_val && (type--) == 0)
+ val = blkid_val;
+ else if (fsid_val[0] && (type--) == 0)
+ val = fsid_val;
+ else
+ return 0;
+
+ get_uuid(val, uuidlen, uuid);
+ return 1;
+}
+
+/* Iterate through /etc/mtab, finding mountpoints
+ * at or below a given path
+ */
+static char *next_mnt(void **v, char *p)
+{
+ FILE *f;
+ struct mntent *me;
+ size_t l = strlen(p);
+
+ if (*v == NULL) {
+ f = setmntent("/etc/mtab", "r");
+ *v = f;
+ } else
+ f = *v;
+ while ((me = getmntent(f)) != NULL && l >= 1) {
+ char *mnt_dir = nfsd_path_strip_root(me->mnt_dir);
+
+ if (!mnt_dir)
+ continue;
+
+ /* Everything below "/" is a proper sub-mount */
+ if (strcmp(p, "/") == 0)
+ return mnt_dir;
+
+ if (strncmp(mnt_dir, p, l) == 0 && mnt_dir[l] == '/')
+ return mnt_dir;
+ }
+ endmntent(f);
+ *v = NULL;
+ return NULL;
+}
+
+/* same_path() check is two paths refer to the same directory.
+ * We don't rely on 'strcmp()' as some filesystems support case-insensitive
+ * names and we might have two different names for the one directory.
+ * Theoretically the lengths of the names could be different, but the
+ * number of components must be the same.
+ * So if the paths have the same number of components (but aren't identical)
+ * we ask the kernel if they are the same thing.
+ * By preference we use name_to_handle_at(), as the mntid it returns
+ * will distinguish between bind-mount points. If that isn't available
+ * we fall back on lstat, which is usually good enough.
+ */
+static inline int count_slashes(char *p)
+{
+ int cnt = 0;
+ while (*p)
+ if (*p++ == '/')
+ cnt++;
+ return cnt;
+}
+
+#if defined(HAVE_STRUCT_FILE_HANDLE)
+static int check_same_path_by_handle(const char *child, const char *parent)
+{
+ struct {
+ struct file_handle fh;
+ unsigned char handle[128];
+ } fchild, fparent;
+ int mnt_child, mnt_parent;
+
+ fchild.fh.handle_bytes = 128;
+ fparent.fh.handle_bytes = 128;
+
+ /* This process should have the CAP_DAC_READ_SEARCH capability */
+ if (nfsd_name_to_handle_at(AT_FDCWD, child, &fchild.fh, &mnt_child, 0) < 0)
+ return -1;
+ if (nfsd_name_to_handle_at(AT_FDCWD, parent, &fparent.fh, &mnt_parent, 0) < 0) {
+ /* If the child resolved, but the parent did not, they differ */
+ if (path_lookup_error(errno))
+ return 0;
+ /* Otherwise, we just don't know */
+ return -1;
+ }
+
+ if (mnt_child != mnt_parent ||
+ fchild.fh.handle_bytes != fparent.fh.handle_bytes ||
+ fchild.fh.handle_type != fparent.fh.handle_type ||
+ memcmp(fchild.handle, fparent.handle,
+ fchild.fh.handle_bytes) != 0)
+ return 0;
+
+ return 1;
+}
+#else
+static int check_same_path_by_handle(const char *child, const char *parent)
+{
+ errno = ENOSYS;
+ return -1;
+}
+#endif
+
+static int check_same_path_by_inode(const char *child, const char *parent)
+{
+ struct stat sc, sp;
+
+ /* This is nearly good enough. However if a directory is
+ * bind-mounted in two places and both are exported, it
+ * could give a false positive
+ */
+ if (nfsd_path_lstat(child, &sc) != 0)
+ return 0;
+ if (nfsd_path_lstat(parent, &sp) != 0)
+ return 0;
+ if (sc.st_dev != sp.st_dev)
+ return 0;
+ if (sc.st_ino != sp.st_ino)
+ return 0;
+
+ return 1;
+}
+
+static int same_path(char *child, char *parent, int len)
+{
+ static char p[PATH_MAX];
+ int err;
+
+ if (len <= 0)
+ len = strlen(child);
+ strncpy(p, child, len);
+ p[len] = 0;
+ if (strcmp(p, parent) == 0)
+ return 1;
+
+ /* If number of '/' are different, they must be different */
+ if (count_slashes(p) != count_slashes(parent))
+ return 0;
+
+ /* Try to use filehandle approach before falling back to stat() */
+ err = check_same_path_by_handle(p, parent);
+ if (err != -1)
+ return err;
+ return check_same_path_by_inode(p, parent);
+}
+
+static int is_subdirectory(char *child, char *parent)
+{
+ /* Check is child is strictly a subdirectory of
+ * parent or a more distant descendant.
+ */
+ size_t l = strlen(parent);
+
+ if (strcmp(parent, "/") == 0 && child[1] != 0)
+ return 1;
+
+ return (same_path(child, parent, l) && child[l] == '/');
+}
+
+static int path_matches(nfs_export *exp, char *path)
+{
+ /* Does the path match the export? I.e. is it an
+ * exact match, or does the export have CROSSMOUNT, and path
+ * is a descendant?
+ */
+ return same_path(path, exp->m_export.e_path, 0)
+ || ((exp->m_export.e_flags & NFSEXP_CROSSMOUNT)
+ && is_subdirectory(path, exp->m_export.e_path));
+}
+
+static int
+export_matches(nfs_export *exp, char *dom, char *path, struct addrinfo *ai)
+{
+ return path_matches(exp, path) && client_matches(exp, dom, ai);
+}
+
+/* True iff e1 is a child of e2 (or descendant) and e2 has crossmnt set: */
+static bool subexport(struct exportent *e1, struct exportent *e2)
+{
+ char *p1 = e1->e_path, *p2 = e2->e_path;
+
+ return e2->e_flags & NFSEXP_CROSSMOUNT
+ && is_subdirectory(p1, p2);
+}
+
+struct parsed_fsid {
+ int fsidtype;
+ /* We could use a union for this, but it would be more
+ * complicated; why bother? */
+ uint64_t inode;
+ unsigned int minor;
+ unsigned int major;
+ uint32_t fsidnum;
+ size_t uuidlen;
+ char *fhuuid;
+};
+
+static int parse_fsid(int fsidtype, int fsidlen, char *fsid,
+ struct parsed_fsid *parsed)
+{
+ uint32_t dev;
+ uint32_t inode32;
+
+ memset(parsed, 0, sizeof(*parsed));
+ parsed->fsidtype = fsidtype;
+ switch(fsidtype) {
+ case FSID_DEV: /* 4 bytes: 2 major, 2 minor, 4 inode */
+ if (fsidlen != 8)
+ return -1;
+ memcpy(&dev, fsid, 4);
+ memcpy(&inode32, fsid+4, 4);
+ parsed->inode = inode32;
+ parsed->major = ntohl(dev)>>16;
+ parsed->minor = ntohl(dev) & 0xFFFF;
+ break;
+
+ case FSID_NUM: /* 4 bytes - fsid */
+ if (fsidlen != 4)
+ return -1;
+ memcpy(&parsed->fsidnum, fsid, 4);
+ break;
+
+ case FSID_MAJOR_MINOR: /* 12 bytes: 4 major, 4 minor, 4 inode
+ * This format is never actually used but was
+ * an historical accident
+ */
+ if (fsidlen != 12)
+ return -1;
+ memcpy(&dev, fsid, 4);
+ parsed->major = ntohl(dev);
+ memcpy(&dev, fsid+4, 4);
+ parsed->minor = ntohl(dev);
+ memcpy(&inode32, fsid+8, 4);
+ parsed->inode = inode32;
+ break;
+
+ case FSID_ENCODE_DEV: /* 8 bytes: 4 byte packed device number, 4 inode */
+ /* This is *host* endian, not net-byte-order, because
+ * no-one outside this host has any business interpreting it
+ */
+ if (fsidlen != 8)
+ return -1;
+ memcpy(&dev, fsid, 4);
+ memcpy(&inode32, fsid+4, 4);
+ parsed->inode = inode32;
+ parsed->major = (dev & 0xfff00) >> 8;
+ parsed->minor = (dev & 0xff) | ((dev >> 12) & 0xfff00);
+ break;
+
+ case FSID_UUID4_INUM: /* 4 byte inode number and 4 byte uuid */
+ if (fsidlen != 8)
+ return -1;
+ memcpy(&inode32, fsid, 4);
+ parsed->inode = inode32;
+ parsed->uuidlen = 4;
+ parsed->fhuuid = fsid+4;
+ break;
+ case FSID_UUID8: /* 8 byte uuid */
+ if (fsidlen != 8)
+ return -1;
+ parsed->uuidlen = 8;
+ parsed->fhuuid = fsid;
+ break;
+ case FSID_UUID16: /* 16 byte uuid */
+ if (fsidlen != 16)
+ return -1;
+ parsed->uuidlen = 16;
+ parsed->fhuuid = fsid;
+ break;
+ case FSID_UUID16_INUM: /* 8 byte inode number and 16 byte uuid */
+ if (fsidlen != 24)
+ return -1;
+ memcpy(&parsed->inode, fsid, 8);
+ parsed->uuidlen = 16;
+ parsed->fhuuid = fsid+8;
+ break;
+ }
+ return 0;
+}
+
+static int match_fsid(struct parsed_fsid *parsed, nfs_export *exp, char *path)
+{
+ struct stat stb;
+ int type;
+ char u[16];
+
+ if (nfsd_path_stat(path, &stb) != 0)
+ goto path_error;
+ if (!S_ISDIR(stb.st_mode) && !S_ISREG(stb.st_mode))
+ goto nomatch;
+
+ switch (parsed->fsidtype) {
+ case FSID_DEV:
+ case FSID_MAJOR_MINOR:
+ case FSID_ENCODE_DEV:
+ if (stb.st_ino != parsed->inode)
+ goto nomatch;
+ if (parsed->major != major(stb.st_dev) ||
+ parsed->minor != minor(stb.st_dev))
+ goto nomatch;
+ goto match;
+ case FSID_NUM:
+ if (((exp->m_export.e_flags & NFSEXP_FSID) == 0 ||
+ exp->m_export.e_fsid != parsed->fsidnum)) {
+ if ((exp->m_export.e_flags & NFSEXP_CROSSMOUNT) && exp->m_export.e_reexport != REEXP_NONE &&
+ match_crossmnt_fsidnum(parsed->fsidnum, path))
+ goto match;
+
+ goto nomatch;
+ }
+ goto match;
+ case FSID_UUID4_INUM:
+ case FSID_UUID16_INUM:
+ if (stb.st_ino != parsed->inode)
+ goto nomatch;
+ goto check_uuid;
+ case FSID_UUID8:
+ case FSID_UUID16:
+ errno = 0;
+ if (!is_mountpoint(path)) {
+ if (!errno)
+ goto nomatch;
+ goto path_error;
+ }
+ check_uuid:
+ if (exp->m_export.e_uuid) {
+ get_uuid(exp->m_export.e_uuid, parsed->uuidlen, u);
+ if (memcmp(u, parsed->fhuuid, parsed->uuidlen) == 0)
+ goto match;
+ }
+ else
+ for (type = 0;
+ uuid_by_path(path, type, parsed->uuidlen, u);
+ type++)
+ if (memcmp(u, parsed->fhuuid, parsed->uuidlen) == 0)
+ goto match;
+ }
+nomatch:
+ return 0;
+match:
+ return 1;
+path_error:
+ if (path_lookup_error(errno))
+ goto nomatch;
+ return -1;
+}
+
+static struct addrinfo *lookup_client_addr(char *dom)
+{
+ struct addrinfo *ret;
+ struct addrinfo *tmp;
+
+ dom++; /* skip initial "$" */
+
+ tmp = host_pton(dom);
+ if (tmp == NULL)
+ return NULL;
+ ret = client_resolve(tmp->ai_addr);
+ nfs_freeaddrinfo(tmp);
+ return ret;
+}
+
+#define RETRY_SEC 120
+struct delayed {
+ char *message;
+ time_t last_attempt;
+ int f;
+ struct delayed *next;
+} *delayed;
+
+static int nfsd_handle_fh(int f, char *bp, int blen)
+{
+ /* request are:
+ * domain fsidtype fsid
+ * interpret fsid, find export point and options, and write:
+ * domain fsidtype fsid expiry path
+ */
+ char *dom;
+ int fsidtype;
+ int fsidlen;
+ char fsid[32];
+ struct parsed_fsid parsed;
+ struct exportent *found = NULL;
+ struct addrinfo *ai = NULL;
+ char *found_path = NULL;
+ nfs_export *exp;
+ int i;
+ int dev_missing = 0;
+ char buf[RPC_CHAN_BUF_SIZE];
+ int did_uncover = 0;
+ int ret = 0;
+
+ dom = malloc(blen);
+ if (dom == NULL)
+ return ret;
+ if (qword_get(&bp, dom, blen) <= 0)
+ goto out;
+ if (qword_get_int(&bp, &fsidtype) != 0)
+ goto out;
+ if (fsidtype < 0 || fsidtype > 7)
+ goto out; /* unknown type */
+ if ((fsidlen = qword_get(&bp, fsid, 32)) <= 0)
+ goto out;
+ if (parse_fsid(fsidtype, fsidlen, fsid, &parsed))
+ goto out;
+
+ auth_reload();
+
+ if (is_ipaddr_client(dom)) {
+ ai = lookup_client_addr(dom);
+ if (!ai)
+ goto out;
+ }
+
+ /* Now determine export point for this fsid/domain */
+ for (i=0 ; i < MCL_MAXTYPES; i++) {
+ nfs_export *next_exp;
+ for (exp = exportlist[i].p_head; exp; exp = next_exp) {
+ char *path;
+
+ if (!did_uncover && parsed.fsidnum && parsed.fsidtype == FSID_NUM && exp->m_export.e_reexport != REEXP_NONE) {
+ reexpdb_uncover_subvolume(parsed.fsidnum);
+ did_uncover = 1;
+ }
+
+ if (exp->m_export.e_flags & NFSEXP_CROSSMOUNT) {
+ static nfs_export *prev = NULL;
+ static void *mnt = NULL;
+
+ if (prev == exp) {
+ /* try a submount */
+ path = next_mnt(&mnt, exp->m_export.e_path);
+ if (!path) {
+ next_exp = exp->m_next;
+ prev = NULL;
+ continue;
+ }
+ next_exp = exp;
+ } else {
+ prev = exp;
+ mnt = NULL;
+ path = exp->m_export.e_path;
+ next_exp = exp;
+ }
+ } else {
+ path = exp->m_export.e_path;
+ next_exp = exp->m_next;
+ }
+
+ if (!is_ipaddr_client(dom)
+ && !namelist_client_matches(exp, dom))
+ continue;
+ if (exp->m_export.e_mountpoint &&
+ !is_mountpoint(exp->m_export.e_mountpoint[0]?
+ exp->m_export.e_mountpoint:
+ exp->m_export.e_path))
+ dev_missing ++;
+
+ switch(match_fsid(&parsed, exp, path)) {
+ case 0:
+ continue;
+ case -1:
+ dev_missing ++;
+ continue;
+ }
+ if (is_ipaddr_client(dom)
+ && !ipaddr_client_matches(exp, ai))
+ continue;
+ if (!found || subexport(&exp->m_export, found)) {
+ found = &exp->m_export;
+ free(found_path);
+ found_path = strdup(path);
+ if (found_path == NULL)
+ goto out;
+ } else if (strcmp(found->e_path, exp->m_export.e_path) != 0
+ && !subexport(found, &exp->m_export))
+ {
+ xlog(L_WARNING, "%s and %s have same filehandle for %s, using first",
+ found_path, path, dom);
+ } else {
+ /* same path, if one is V4ROOT, choose the other */
+ if (found->e_flags & NFSEXP_V4ROOT) {
+ found = &exp->m_export;
+ free(found_path);
+ found_path = strdup(path);
+ if (found_path == NULL)
+ goto out;
+ }
+ }
+ }
+ }
+
+ if (!found) {
+ /* The missing dev could be what we want, so just be
+ * quiet rather than returning stale yet
+ */
+ if (dev_missing) {
+ ret = 1;
+ goto out;
+ }
+ } else if (found->e_mountpoint &&
+ !is_mountpoint(found->e_mountpoint[0]?
+ found->e_mountpoint:
+ found->e_path)) {
+ /* Cannot export this yet
+ * should log a warning, but need to rate limit
+ xlog(L_WARNING, "%s not exported as %d not a mountpoint",
+ found->e_path, found->e_mountpoint);
+ */
+ ret = 1;
+ goto out;
+ }
+
+ bp = buf; blen = sizeof(buf);
+ qword_add(&bp, &blen, dom);
+ qword_addint(&bp, &blen, fsidtype);
+ qword_addhex(&bp, &blen, fsid, fsidlen);
+ /* The fsid -> path lookup can be quite expensive as it
+ * potentially stats and reads lots of devices, and some of those
+ * might have spun-down. The Answer is not likely to
+ * change underneath us, and an 'exportfs -f' can always
+ * remove this from the kernel, so use a really log
+ * timeout. Maybe this should be configurable on the command
+ * line.
+ */
+ qword_addint(&bp, &blen, 0x7fffffff);
+ if (found)
+ qword_add(&bp, &blen, found_path);
+ qword_addeol(&bp, &blen);
+ if (blen <= 0 || cache_write(f, buf, bp - buf) != bp - buf)
+ xlog(L_ERROR, "nfsd_fh: error writing reply");
+ if (!found)
+ xlog(D_AUTH, "denied access to %s", *dom == '$' ? dom+1 : dom);
+out:
+ if (found_path)
+ free(found_path);
+ nfs_freeaddrinfo(ai);
+ free(dom);
+ if (!ret)
+ xlog(D_CALL, "nfsd_fh: found %p path %s",
+ found, found ? found->e_path : NULL);
+ return ret;
+}
+
+static void nfsd_fh(int f)
+{
+ struct delayed *d, **dp;
+ char inbuf[RPC_CHAN_BUF_SIZE];
+ int blen;
+
+ blen = cache_read(f, inbuf, sizeof(inbuf));
+ if (blen <= 0 || inbuf[blen-1] != '\n') return;
+ inbuf[blen-1] = 0;
+
+ xlog(D_CALL, "nfsd_fh: inbuf '%s'", inbuf);
+
+ if (nfsd_handle_fh(f, inbuf, blen) == 0)
+ return;
+ /* We don't have a definitive answer to give the kernel.
+ * This is because an export marked "mountpoint" isn't a
+ * mountpoint, or because a stat of a mountpoint fails with
+ * a strange error like ETIMEDOUT as is possible with an
+ * NFS mount marked "softerr" which is being re-exported.
+ *
+ * We cannot tell the kernel to retry, so we have to
+ * retry ourselves.
+ */
+ d = malloc(sizeof(*d));
+
+ if (!d)
+ return;
+ d->message = strndup(inbuf, blen);
+ if (!d->message) {
+ free(d);
+ return;
+ }
+ d->f = f;
+ d->last_attempt = time(NULL);
+ d->next = NULL;
+ dp = &delayed;
+ while (*dp)
+ dp = &(*dp)->next;
+ *dp = d;
+}
+
+static void nfsd_retry_fh(struct delayed *d)
+{
+ struct delayed **dp;
+
+ if (nfsd_handle_fh(d->f, d->message, strlen(d->message)+1) == 0) {
+ free(d->message);
+ free(d);
+ return;
+ }
+ d->last_attempt = time(NULL);
+ d->next = NULL;
+ dp = &delayed;
+ while (*dp)
+ dp = &(*dp)->next;
+ *dp = d;
+}
+
+#ifdef HAVE_JUNCTION_SUPPORT
+static void write_fsloc(char **bp, int *blen, struct exportent *ep)
+{
+ struct servers *servers;
+
+ if (ep->e_fslocmethod == FSLOC_NONE)
+ return;
+
+ servers = replicas_lookup(ep->e_fslocmethod, ep->e_fslocdata);
+ if (!servers)
+ return;
+ qword_add(bp, blen, "fsloc");
+ qword_addint(bp, blen, servers->h_num);
+ if (servers->h_num >= 0) {
+ int i;
+ for (i=0; i<servers->h_num; i++) {
+ qword_add(bp, blen, servers->h_mp[i]->h_host);
+ qword_add(bp, blen, servers->h_mp[i]->h_path);
+ }
+ }
+ qword_addint(bp, blen, servers->h_referral);
+ release_replicas(servers);
+}
+#endif
+
+static void write_secinfo(char **bp, int *blen, struct exportent *ep, int flag_mask, int extra_flag)
+{
+ struct sec_entry *p;
+
+ for (p = ep->e_secinfo; p->flav; p++)
+ ; /* Do nothing */
+ if (p == ep->e_secinfo) {
+ /* There was no sec= option */
+ return;
+ }
+ fix_pseudoflavor_flags(ep);
+ qword_add(bp, blen, "secinfo");
+ qword_addint(bp, blen, p - ep->e_secinfo);
+ for (p = ep->e_secinfo; p->flav; p++) {
+ qword_addint(bp, blen, p->flav->fnum);
+ qword_addint(bp, blen, (p->flags | extra_flag) & flag_mask);
+ }
+}
+
+static void write_xprtsec(char **bp, int *blen, struct exportent *ep)
+{
+ struct xprtsec_entry *p;
+
+ for (p = ep->e_xprtsec; p->info; p++);
+ if (p == ep->e_xprtsec)
+ return;
+
+ qword_add(bp, blen, "xprtsec");
+ qword_addint(bp, blen, p - ep->e_xprtsec);
+ for (p = ep->e_xprtsec; p->info; p++)
+ qword_addint(bp, blen, p->info->number);
+}
+
+static int can_reexport_via_fsidnum(struct exportent *exp, struct statfs *st)
+{
+ if (st->f_type != 0x6969 /* NFS_SUPER_MAGIC */)
+ return 0;
+
+ return exp->e_reexport == REEXP_PREDEFINED_FSIDNUM ||
+ exp->e_reexport == REEXP_AUTO_FSIDNUM;
+}
+
+static int dump_to_cache(int f, char *buf, int blen, char *domain,
+ char *path, struct exportent *exp, int ttl)
+{
+ char *bp = buf;
+ time_t now = time(0);
+ size_t buflen;
+ ssize_t err;
+
+ if (ttl <= 1)
+ ttl = default_ttl;
+
+ qword_add(&bp, &blen, domain);
+ qword_add(&bp, &blen, path);
+ if (exp) {
+ int different_fs = strcmp(path, exp->e_path) != 0;
+ int flag_mask = different_fs ? ~NFSEXP_FSID : ~0;
+ int rc, do_fsidnum = 0;
+ uint32_t fsidnum = exp->e_fsid;
+
+ if (different_fs) {
+ struct statfs st;
+
+ rc = nfsd_path_statfs(path, &st);
+ if (rc) {
+ xlog(L_WARNING, "unable to statfs %s", path);
+ errno = EINVAL;
+ return -1;
+ }
+
+ if (can_reexport_via_fsidnum(exp, &st)) {
+ do_fsidnum = 1;
+ flag_mask = ~0;
+ }
+ }
+
+ qword_adduint(&bp, &blen, now + exp->e_ttl);
+
+ if (do_fsidnum) {
+ uint32_t search_fsidnum = 0;
+ if (exp->e_reexport != REEXP_NONE && reexpdb_fsidnum_by_path(path, &search_fsidnum,
+ exp->e_reexport == REEXP_AUTO_FSIDNUM) == 0) {
+ errno = EINVAL;
+ return -1;
+ }
+ fsidnum = search_fsidnum;
+ qword_addint(&bp, &blen, exp->e_flags | NFSEXP_FSID);
+ } else {
+ qword_addint(&bp, &blen, exp->e_flags & flag_mask);
+ }
+
+ qword_addint(&bp, &blen, exp->e_anonuid);
+ qword_addint(&bp, &blen, exp->e_anongid);
+ qword_addint(&bp, &blen, fsidnum);
+
+#ifdef HAVE_JUNCTION_SUPPORT
+ write_fsloc(&bp, &blen, exp);
+#endif
+ write_secinfo(&bp, &blen, exp, flag_mask, do_fsidnum ? NFSEXP_FSID : 0);
+ if (exp->e_uuid == NULL || different_fs) {
+ char u[16];
+ if ((exp->e_flags & flag_mask & NFSEXP_FSID) == 0 &&
+ uuid_by_path(path, 0, 16, u)) {
+ qword_add(&bp, &blen, "uuid");
+ qword_addhex(&bp, &blen, u, 16);
+ }
+ } else {
+ char u[16];
+ get_uuid(exp->e_uuid, 16, u);
+ qword_add(&bp, &blen, "uuid");
+ qword_addhex(&bp, &blen, u, 16);
+ }
+ write_xprtsec(&bp, &blen, exp);
+ xlog(D_AUTH, "granted access to %s for %s",
+ path, *domain == '$' ? domain+1 : domain);
+ } else {
+ qword_adduint(&bp, &blen, now + ttl);
+ xlog(D_AUTH, "denied access to %s for %s",
+ path, *domain == '$' ? domain+1 : domain);
+ }
+ qword_addeol(&bp, &blen);
+ if (blen <= 0) {
+ errno = ENOBUFS;
+ return -1;
+ }
+ buflen = bp - buf;
+ err = cache_write(f, buf, buflen);
+ if (err < 0)
+ return err;
+ if ((size_t)err != buflen) {
+ errno = ENOSPC;
+ return -1;
+ }
+ return 0;
+}
+
+static nfs_export *
+lookup_export(char *dom, char *path, struct addrinfo *ai)
+{
+ nfs_export *exp;
+ nfs_export *found = NULL;
+ int found_type = 0;
+ int i;
+
+ for (i=0 ; i < MCL_MAXTYPES; i++) {
+ for (exp = exportlist[i].p_head; exp; exp = exp->m_next) {
+ if (!export_matches(exp, dom, path, ai))
+ continue;
+ if (!found) {
+ found = exp;
+ found_type = i;
+ continue;
+ }
+ /* Always prefer non-V4ROOT exports */
+ if (exp->m_export.e_flags & NFSEXP_V4ROOT)
+ continue;
+ if (found->m_export.e_flags & NFSEXP_V4ROOT) {
+ found = exp;
+ found_type = i;
+ continue;
+ }
+
+ /* If one is a CROSSMOUNT, then prefer the longest path */
+ if (((found->m_export.e_flags & NFSEXP_CROSSMOUNT) ||
+ (exp->m_export.e_flags & NFSEXP_CROSSMOUNT)) &&
+ strlen(found->m_export.e_path) !=
+ strlen(exp->m_export.e_path)) {
+
+ if (strlen(exp->m_export.e_path) >
+ strlen(found->m_export.e_path)) {
+ found = exp;
+ found_type = i;
+ }
+ continue;
+
+ } else if (found_type == i && found->m_warned == 0) {
+ xlog(L_WARNING, "%s exported to both %s and %s, "
+ "arbitrarily choosing options from first",
+ path, found->m_client->m_hostname, exp->m_client->m_hostname);
+ found->m_warned = 1;
+ }
+ }
+ }
+ return found;
+}
+
+#ifdef HAVE_JUNCTION_SUPPORT
+
+#include <libxml/parser.h>
+#include "junction.h"
+
+struct nfs_fsloc_set {
+ int ns_ttl;
+ struct nfs_fsloc *ns_current;
+ struct nfs_fsloc *ns_list;
+};
+
+/*
+ * Find the export entry for the parent of "pathname".
+ * Caller must not free returned exportent.
+ */
+static struct exportent *lookup_parent_export(char *dom,
+ const char *pathname, struct addrinfo *ai)
+{
+ char *parent, *slash;
+ nfs_export *result;
+
+ parent = strdup(pathname);
+ if (parent == NULL) {
+ xlog(D_GENERAL, "%s: failed to allocate parent path buffer",
+ __func__);
+ goto out_default;
+ }
+ xlog(D_CALL, "%s: pathname = '%s'", __func__, pathname);
+
+again:
+ /* shorten pathname by one component */
+ slash = strrchr(parent, '/');
+ if (slash == NULL) {
+ xlog(D_GENERAL, "%s: no slash found in pathname",
+ __func__);
+ goto out_default;
+ }
+ *slash = '\0';
+
+ if (strlen(parent) == 0) {
+ result = lookup_export(dom, "/", ai);
+ if (result == NULL) {
+ xlog(L_ERROR, "%s: no root export found.", __func__);
+ goto out_default;
+ }
+ goto out;
+ }
+
+ result = lookup_export(dom, parent, ai);
+ if (result == NULL) {
+ xlog(D_GENERAL, "%s: lookup_export(%s) found nothing",
+ __func__, parent);
+ goto again;
+ }
+
+out:
+ xlog(D_CALL, "%s: found export for %s", __func__, parent);
+ free(parent);
+ return &result->m_export;
+
+out_default:
+ free(parent);
+ return mkexportent("*", "/", "insecure");
+}
+
+static int get_next_location(struct nfs_fsloc_set *locset,
+ char **hostname, char **export_path, int *ttl)
+{
+ char *hostname_tmp, *export_path_tmp;
+ struct nfs_fsloc *fsloc;
+
+ if (locset->ns_current == NULL)
+ return ENOENT;
+ fsloc = locset->ns_current;
+
+ hostname_tmp = strdup(fsloc->nfl_hostname);
+ if (hostname_tmp == NULL)
+ return ENOMEM;
+
+ if (nsdb_path_array_to_posix(fsloc->nfl_rootpath,
+ &export_path_tmp)) {
+ free(hostname_tmp);
+ return EINVAL;
+ }
+
+ *hostname = hostname_tmp;
+ *export_path = export_path_tmp;
+ *ttl = locset->ns_ttl;
+ locset->ns_current = locset->ns_current->nfl_next;
+ return 0;
+}
+
+/*
+ * Walk through a set of FS locations and build an e_fslocdata string.
+ * Returns true if all went to plan; otherwise, false.
+ */
+static bool locations_to_fslocdata(struct nfs_fsloc_set *locations,
+ char *fslocdata, size_t remaining, int *ttl)
+{
+ char *server, *last_path, *rootpath, *ptr;
+ _Bool seen = false;
+
+ last_path = NULL;
+ rootpath = NULL;
+ server = NULL;
+ ptr = fslocdata;
+ *ttl = 0;
+
+ for (;;) {
+ int len, status;
+
+ status = get_next_location(locations, &server,
+ &rootpath, ttl);
+ if (status == ENOENT)
+ break;
+ if (status) {
+ xlog(D_GENERAL, "%s: failed to parse location: %s",
+ __func__, strerror(status));
+ goto out_false;
+ }
+ xlog(D_GENERAL, "%s: Location: %s:%s",
+ __func__, server, rootpath);
+
+ if (last_path && strcmp(rootpath, last_path) == 0) {
+ len = snprintf(ptr, remaining, "+%s", server);
+ if (len < 0) {
+ xlog(D_GENERAL, "%s: snprintf: %m", __func__);
+ goto out_false;
+ }
+ if ((size_t)len >= remaining) {
+ xlog(D_GENERAL, "%s: fslocdata buffer overflow", __func__);
+ goto out_false;
+ }
+ remaining -= (size_t)len;
+ ptr += len;
+ } else {
+ if (last_path == NULL)
+ len = snprintf(ptr, remaining, "%s@%s",
+ rootpath, server);
+ else
+ len = snprintf(ptr, remaining, ":%s@%s",
+ rootpath, server);
+ if (len < 0) {
+ xlog(D_GENERAL, "%s: snprintf: %m", __func__);
+ goto out_false;
+ }
+ if ((size_t)len >= remaining) {
+ xlog(D_GENERAL, "%s: fslocdata buffer overflow",
+ __func__);
+ goto out_false;
+ }
+ remaining -= (size_t)len;
+ ptr += len;
+ last_path = rootpath;
+ }
+
+ seen = true;
+ free(rootpath);
+ free(server);
+ }
+
+ xlog(D_CALL, "%s: fslocdata='%s', ttl=%d",
+ __func__, fslocdata, *ttl);
+ return seen;
+
+out_false:
+ free(rootpath);
+ free(server);
+ return false;
+}
+
+/*
+ * Duplicate the junction's parent's export options and graft in
+ * the fslocdata we constructed from the locations list.
+ */
+static struct exportent *create_junction_exportent(struct exportent *parent,
+ const char *junction, const char *fslocdata, int ttl)
+{
+ static struct exportent *eep;
+
+ eep = (struct exportent *)malloc(sizeof(*eep));
+ if (eep == NULL)
+ goto out_nomem;
+
+ dupexportent(eep, parent);
+ strcpy(eep->e_path, junction);
+ eep->e_hostname = strdup(parent->e_hostname);
+ if (eep->e_hostname == NULL) {
+ free(eep);
+ goto out_nomem;
+ }
+ free(eep->e_uuid);
+ eep->e_uuid = NULL;
+ eep->e_ttl = (unsigned int)ttl;
+
+ free(eep->e_fslocdata);
+ eep->e_fslocdata = strdup(fslocdata);
+ if (eep->e_fslocdata == NULL) {
+ free(eep->e_hostname);
+ free(eep);
+ goto out_nomem;
+ }
+ eep->e_fslocmethod = FSLOC_REFER;
+ return eep;
+
+out_nomem:
+ xlog(L_ERROR, "%s: No memory", __func__);
+ return NULL;
+}
+
+/*
+ * Walk through the set of FS locations and build an exportent.
+ * Returns pointer to an exportent if "junction" refers to a junction.
+ */
+static struct exportent *locations_to_export(struct nfs_fsloc_set *locations,
+ const char *junction, struct exportent *parent)
+{
+ static char fslocdata[BUFSIZ];
+ int ttl;
+
+ fslocdata[0] = '\0';
+ if (!locations_to_fslocdata(locations, fslocdata, sizeof(fslocdata), &ttl))
+ return NULL;
+ return create_junction_exportent(parent, junction, fslocdata, ttl);
+}
+
+static int
+nfs_get_basic_junction(const char *junct_path, struct nfs_fsloc_set **locset)
+{
+ struct nfs_fsloc_set *new;
+ FedFsStatus retval;
+
+ new = calloc(1, sizeof(struct nfs_fsloc_set));
+ if (new == NULL)
+ return ENOMEM;
+
+ retval = nfs_get_locations(junct_path, &new->ns_list);
+ if (retval) {
+ nfs_free_locations(new->ns_list);
+ free(new);
+ return EINVAL;
+ }
+
+ new->ns_current = new->ns_list;
+ new->ns_ttl = 300;
+ *locset = new;
+ return 0;
+}
+
+static struct exportent *lookup_junction(char *dom, const char *pathname,
+ struct addrinfo *ai)
+{
+ struct exportent *parent, *exp = NULL;
+ struct nfs_fsloc_set *locations;
+ int status;
+
+ xmlInitParser();
+
+ if (nfs_is_junction(pathname)) {
+ xlog(D_GENERAL, "%s: %s is not a junction",
+ __func__, pathname);
+ goto out;
+ }
+ status = nfs_get_basic_junction(pathname, &locations);
+ if (status) {
+ xlog(L_WARNING, "Dangling junction %s: %s",
+ pathname, strerror(status));
+ goto out;
+ }
+
+ parent = lookup_parent_export(dom, pathname, ai);
+ if (parent == NULL)
+ goto free_locations;
+
+ exp = locations_to_export(locations, pathname, parent);
+
+free_locations:
+ nfs_free_locations(locations->ns_list);
+ free(locations);
+
+out:
+ xmlCleanupParser();
+ return exp;
+}
+
+static void lookup_nonexport(int f, char *buf, int buflen, char *dom, char *path,
+ struct addrinfo *ai)
+{
+ struct exportent *eep;
+
+ eep = lookup_junction(dom, path, ai);
+ dump_to_cache(f, buf, buflen, dom, path, eep, 0);
+ if (eep == NULL)
+ return;
+ exportent_release(eep);
+ free(eep);
+}
+
+#else /* !HAVE_JUNCTION_SUPPORT */
+
+static void lookup_nonexport(int f, char *buf, int buflen, char *dom, char *path,
+ struct addrinfo *UNUSED(ai))
+{
+ dump_to_cache(f, buf, buflen, dom, path, NULL, 0);
+}
+
+#endif /* !HAVE_JUNCTION_SUPPORT */
+
+static void nfsd_export(int f)
+{
+ /* requests are:
+ * domain path
+ * determine export options and return:
+ * domain path expiry flags anonuid anongid fsid
+ */
+
+ char *dom, *path;
+ nfs_export *found = NULL;
+ struct addrinfo *ai = NULL;
+ char buf[RPC_CHAN_BUF_SIZE], *bp;
+ int blen;
+
+ blen = cache_read(f, buf, sizeof(buf));
+ if (blen <= 0 || buf[blen-1] != '\n') return;
+ buf[blen-1] = 0;
+
+ xlog(D_CALL, "nfsd_export: inbuf '%s'", buf);
+
+ bp = buf;
+ dom = malloc(blen);
+ path = malloc(blen);
+
+ if (!dom || !path)
+ goto out;
+
+ if (qword_get(&bp, dom, blen) <= 0)
+ goto out;
+ if (qword_get(&bp, path, blen) <= 0)
+ goto out;
+
+ auth_reload();
+
+ if (is_ipaddr_client(dom)) {
+ ai = lookup_client_addr(dom);
+ if (!ai)
+ goto out;
+ }
+
+ found = lookup_export(dom, path, ai);
+
+ if (found) {
+ char *mp = found->m_export.e_mountpoint;
+
+ if (mp && !*mp)
+ mp = found->m_export.e_path;
+ errno = 0;
+ if (mp && !is_mountpoint(mp)) {
+ if (errno != 0 && !path_lookup_error(errno))
+ goto out;
+ /* Exportpoint is not mounted, so tell kernel it is
+ * not available.
+ * This will cause it not to appear in the V4 Pseudo-root
+ * and so a "mount" of this path will fail, just like with
+ * V3.
+ * Any filehandle for this mountpoint from an earlier
+ * mount will block in nfsd.fh lookup.
+ */
+ xlog(L_WARNING,
+ "Cannot export path '%s': not a mountpoint",
+ path);
+ dump_to_cache(f, buf, sizeof(buf), dom, path,
+ NULL, 60);
+ } else if (dump_to_cache(f, buf, sizeof(buf), dom, path,
+ &found->m_export, 0) < 0) {
+ xlog(L_WARNING,
+ "Cannot export %s, possibly unsupported filesystem"
+ " or fsid= required", path);
+ dump_to_cache(f, buf, sizeof(buf), dom, path, NULL, 0);
+ }
+ } else
+ lookup_nonexport(f, buf, sizeof(buf), dom, path, ai);
+
+ out:
+ xlog(D_CALL, "nfsd_export: found %p path %s", found, path ? path : NULL);
+ if (dom) free(dom);
+ if (path) free(path);
+ nfs_freeaddrinfo(ai);
+}
+
+
+struct {
+ char *cache_name;
+ void (*cache_handle)(int f);
+ int f;
+} cachelist[] = {
+ { "auth.unix.ip", auth_unix_ip, -1 },
+ { "auth.unix.gid", auth_unix_gid, -1 },
+ { "nfsd.export", nfsd_export, -1 },
+ { "nfsd.fh", nfsd_fh, -1 },
+ { NULL, NULL, -1 }
+};
+
+extern int manage_gids;
+
+/**
+ * cache_open - prepare communications channels with kernel RPC caches
+ *
+ */
+void cache_open(void)
+{
+ int i;
+
+ for (i=0; cachelist[i].cache_name; i++ ) {
+ char path[100];
+ if (!manage_gids && cachelist[i].cache_handle == auth_unix_gid)
+ continue;
+ sprintf(path, "/proc/net/rpc/%s/channel", cachelist[i].cache_name);
+ cachelist[i].f = open(path, O_RDWR);
+ }
+}
+
+/**
+ * cache_set_fds - prepare cache file descriptors for one iteration of the service loop
+ * @fdset: pointer to fd_set to prepare
+ */
+void cache_set_fds(fd_set *fdset)
+{
+ int i;
+ for (i=0; cachelist[i].cache_name; i++) {
+ if (cachelist[i].f >= 0)
+ FD_SET(cachelist[i].f, fdset);
+ }
+}
+
+/**
+ * cache_process_req - process any active cache file descriptors during service loop iteration
+ * @fdset: pointer to fd_set to examine for activity
+ */
+int cache_process_req(fd_set *readfds)
+{
+ int i;
+ int cnt = 0;
+ for (i=0; cachelist[i].cache_name; i++) {
+ if (cachelist[i].f >= 0 &&
+ FD_ISSET(cachelist[i].f, readfds)) {
+ cnt++;
+ cachelist[i].cache_handle(cachelist[i].f);
+ FD_CLR(cachelist[i].f, readfds);
+ }
+ }
+ return cnt;
+}
+
+/**
+ * cache_process - process incoming upcalls
+ * Returns -ve on error, or number of fds in svc_fds
+ * that might need processing.
+ */
+int cache_process(fd_set *readfds)
+{
+ fd_set fdset;
+ int selret;
+ struct timeval tv = { 24*3600, 0 };
+
+ if (!readfds) {
+ FD_ZERO(&fdset);
+ readfds = &fdset;
+ }
+ cache_set_fds(readfds);
+ v4clients_set_fds(readfds);
+
+ if (delayed) {
+ time_t now = time(NULL);
+ time_t delay;
+ if (delayed->last_attempt > now)
+ /* Clock updated - retry immediately */
+ delayed->last_attempt = now - RETRY_SEC;
+ delay = delayed->last_attempt + RETRY_SEC - now;
+ if (delay < 0)
+ delay = 0;
+ tv.tv_sec = delay;
+ }
+ selret = select(FD_SETSIZE, readfds, NULL, NULL, &tv);
+
+ if (delayed) {
+ time_t now = time(NULL);
+ struct delayed *d = delayed;
+
+ if (d->last_attempt + RETRY_SEC <= now) {
+ delayed = d->next;
+ d->next = NULL;
+ nfsd_retry_fh(d);
+ }
+ }
+
+ switch (selret) {
+ case -1:
+ if (errno == EINTR || errno == ECONNREFUSED
+ || errno == ENETUNREACH || errno == EHOSTUNREACH)
+ return 0;
+ return -1;
+
+ default:
+ selret -= cache_process_req(readfds);
+ selret -= v4clients_process(readfds);
+ if (selret < 0)
+ selret = 0;
+ }
+ return selret;
+}
+
+/*
+ * Give IP->domain and domain+path->options to kernel
+ * % echo nfsd $IP $[now+DEFAULT_TTL] $domain > /proc/net/rpc/auth.unix.ip/channel
+ * % echo $domain $path $[now+DEFAULT_TTL] $options $anonuid $anongid $fsid > /proc/net/rpc/nfsd.export/channel
+ */
+
+static int cache_export_ent(char *buf, int buflen, char *domain, struct exportent *exp, char *path)
+{
+ int f, err;
+
+ f = open("/proc/net/rpc/nfsd.export/channel", O_WRONLY);
+ if (f < 0) return -1;
+
+ err = dump_to_cache(f, buf, buflen, domain, exp->e_path, exp, 0);
+ if (err) {
+ xlog(L_WARNING,
+ "Cannot export %s, possibly unsupported filesystem or"
+ " fsid= required", exp->e_path);
+ }
+
+ while (err == 0 && (exp->e_flags & NFSEXP_CROSSMOUNT) && path) {
+ /* really an 'if', but we can break out of
+ * a 'while' more easily */
+ /* Look along 'path' for other filesystems
+ * and export them with the same options
+ */
+ struct stat stb;
+ size_t l = strlen(exp->e_path);
+ dev_t dev;
+
+ if (strlen(path) <= l || path[l] != '/' ||
+ strncmp(exp->e_path, path, l) != 0)
+ break;
+ if (nfsd_path_stat(exp->e_path, &stb) != 0)
+ break;
+ dev = stb.st_dev;
+ while(path[l] == '/') {
+ char c;
+ /* errors for submount should fail whole filesystem */
+ int err2;
+
+ l++;
+ while (path[l] != '/' && path[l])
+ l++;
+ c = path[l];
+ path[l] = 0;
+ err2 = nfsd_path_lstat(path, &stb);
+ path[l] = c;
+ if (err2 < 0)
+ break;
+ if (stb.st_dev == dev)
+ continue;
+ dev = stb.st_dev;
+ path[l] = 0;
+ dump_to_cache(f, buf, buflen, domain, path, exp, 0);
+ path[l] = c;
+ }
+ break;
+ }
+
+ close(f);
+ return err;
+}
+
+/**
+ * cache_export - Inform kernel of a new nfs_export
+ * @exp: target nfs_export
+ * @path: NUL-terminated C string containing export path
+ */
+int cache_export(nfs_export *exp, char *path)
+{
+ char ip[INET6_ADDRSTRLEN];
+ char buf[RPC_CHAN_BUF_SIZE], *bp;
+ int blen, f;
+
+ f = open("/proc/net/rpc/auth.unix.ip/channel", O_WRONLY);
+ if (f < 0)
+ return -1;
+
+ bp = buf, blen = sizeof(buf);
+ qword_add(&bp, &blen, "nfsd");
+ qword_add(&bp, &blen, host_ntop(get_addrlist(exp->m_client, 0), ip, sizeof(ip)));
+ qword_adduint(&bp, &blen, time(0) + exp->m_export.e_ttl);
+ qword_add(&bp, &blen, exp->m_client->m_hostname);
+ qword_addeol(&bp, &blen);
+ if (blen <= 0 || cache_write(f, buf, bp - buf) != bp - buf) blen = -1;
+ close(f);
+ if (blen < 0) return -1;
+
+ return cache_export_ent(buf, sizeof(buf), exp->m_client->m_hostname, &exp->m_export, path);
+}
+
+/**
+ * cache_get_filehandle - given an nfs_export, get its root filehandle
+ * @exp: target nfs_export
+ * @len: length of requested file handle
+ * @p: NUL-terminated C string containing export path
+ *
+ * Returns pointer to NFS file handle of root directory of export
+ *
+ * {
+ * echo $domain $path $length
+ * read filehandle <&0
+ * } <> /proc/fs/nfsd/filehandle
+ */
+struct nfs_fh_len *
+cache_get_filehandle(nfs_export *exp, int len, char *p)
+{
+ static struct nfs_fh_len fh;
+ char buf[RPC_CHAN_BUF_SIZE], *bp;
+ int blen, f;
+
+ f = open("/proc/fs/nfsd/filehandle", O_RDWR);
+ if (f < 0) {
+ f = open("/proc/fs/nfs/filehandle", O_RDWR);
+ if (f < 0) return NULL;
+ }
+
+ bp = buf, blen = sizeof(buf);
+ qword_add(&bp, &blen, exp->m_client->m_hostname);
+ qword_add(&bp, &blen, p);
+ qword_addint(&bp, &blen, len);
+ qword_addeol(&bp, &blen);
+ if (blen <= 0 || cache_write(f, buf, bp - buf) != bp - buf) {
+ close(f);
+ return NULL;
+ }
+ bp = buf;
+ blen = cache_read(f, buf, sizeof(buf));
+ close(f);
+
+ if (blen <= 0 || buf[blen-1] != '\n')
+ return NULL;
+ buf[blen-1] = 0;
+
+ memset(fh.fh_handle, 0, sizeof(fh.fh_handle));
+ fh.fh_size = qword_get(&bp, (char *)fh.fh_handle, NFS3_FHSIZE);
+ return &fh;
+}
+
+/* Wait for all worker child processes to exit and reap them */
+void
+cache_wait_for_workers(char *prog)
+{
+ int status;
+ pid_t pid;
+
+ for (;;) {
+
+ pid = waitpid(0, &status, 0);
+
+ if (pid < 0) {
+ if (errno == ECHILD)
+ return; /* no more children */
+ xlog(L_FATAL, "%s: can't wait: %s\n", prog,
+ strerror(errno));
+ }
+
+ /* Note: because we SIG_IGN'd SIGCHLD earlier, this
+ * does not happen on 2.6 kernels, and waitpid() blocks
+ * until all the children are dead then returns with
+ * -ECHILD. But, we don't need to do anything on the
+ * death of individual workers, so we don't care. */
+ xlog(L_NOTICE, "%s: reaped child %d, status %d\n",
+ prog, (int)pid, status);
+ }
+}
+
+/* Fork num_threads worker children and wait for them */
+int
+cache_fork_workers(char *prog, int num_threads)
+{
+ int i;
+ pid_t pid;
+
+ if (num_threads <= 1)
+ return 1;
+
+ xlog(L_NOTICE, "%s: starting %d threads\n", prog, num_threads);
+
+ for (i = 0 ; i < num_threads ; i++) {
+ pid = fork();
+ if (pid < 0) {
+ xlog(L_FATAL, "%s: cannot fork: %s\n", prog,
+ strerror(errno));
+ }
+ if (pid == 0) {
+ /* worker child */
+
+ /* Re-enable the default action on SIGTERM et al
+ * so that workers die naturally when sent them.
+ * Only the parent unregisters with pmap and
+ * hence needs to do special SIGTERM handling. */
+ struct sigaction sa;
+ sa.sa_handler = SIG_DFL;
+ sa.sa_flags = 0;
+ sigemptyset(&sa.sa_mask);
+ sigaction(SIGHUP, &sa, NULL);
+ sigaction(SIGINT, &sa, NULL);
+ sigaction(SIGTERM, &sa, NULL);
+
+ /* fall into my_svc_run in caller */
+ return 1;
+ }
+ }
+
+ /* in parent */
+ cache_wait_for_workers(prog);
+ return 0;
+}