summaryrefslogtreecommitdiffstats
path: root/src/os/fs
diff options
context:
space:
mode:
Diffstat (limited to 'src/os/fs')
-rw-r--r--src/os/fs/FS.cc186
-rw-r--r--src/os/fs/FS.h50
-rw-r--r--src/os/fs/XFS.cc55
-rw-r--r--src/os/fs/XFS.h31
-rw-r--r--src/os/fs/ZFS.cc83
-rw-r--r--src/os/fs/ZFS.h39
-rw-r--r--src/os/fs/btrfs_ioctl.h201
7 files changed, 645 insertions, 0 deletions
diff --git a/src/os/fs/FS.cc b/src/os/fs/FS.cc
new file mode 100644
index 000000000..a7d085402
--- /dev/null
+++ b/src/os/fs/FS.cc
@@ -0,0 +1,186 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2014 Red Hat
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#ifdef __linux__
+#include <linux/falloc.h>
+#endif
+
+#include "FS.h"
+
+#include "acconfig.h"
+
+#ifdef HAVE_LIBXFS
+#include "XFS.h"
+#endif
+
+#if defined(__APPLE__) || defined(__FreeBSD__)
+#include <sys/mount.h>
+#else
+#include <sys/vfs.h>
+#endif
+#include "include/compat.h"
+
+// ---------------
+
+FS *FS::create(uint64_t f_type)
+{
+ switch (f_type) {
+#ifdef HAVE_LIBXFS
+ case XFS_SUPER_MAGIC:
+ return new XFS;
+#endif
+ default:
+ return new FS;
+ }
+}
+
+FS *FS::create_by_fd(int fd)
+{
+ struct statfs st;
+ ::fstatfs(fd, &st);
+ return create(st.f_type);
+}
+
+// ---------------
+
+int FS::set_alloc_hint(int fd, uint64_t hint)
+{
+ return 0; // no-op
+}
+
+#ifdef HAVE_NAME_TO_HANDLE_AT
+int FS::get_handle(int fd, std::string *h)
+{
+ char buf[sizeof(struct file_handle) + MAX_HANDLE_SZ];
+ struct file_handle *fh = (struct file_handle *)buf;
+ int mount_id;
+
+ fh->handle_bytes = MAX_HANDLE_SZ;
+ int r = name_to_handle_at(fd, "", fh, &mount_id, AT_EMPTY_PATH);
+ if (r < 0) {
+ return -errno;
+ }
+ *h = std::string(buf, fh->handle_bytes + sizeof(struct file_handle));
+ return 0;
+}
+
+int FS::open_handle(int mount_fd, const std::string& h, int flags)
+{
+ if (h.length() < sizeof(struct file_handle)) {
+ return -EINVAL;
+ }
+ struct file_handle *fh = (struct file_handle *)h.data();
+ if (fh->handle_bytes > h.length()) {
+ return -ERANGE;
+ }
+ int fd = open_by_handle_at(mount_fd, fh, flags);
+ if (fd < 0)
+ return -errno;
+ return fd;
+}
+
+#else // HAVE_NAME_TO_HANDLE_AT
+
+int FS::get_handle(int fd, std::string *h)
+{
+ return -EOPNOTSUPP;
+}
+
+int FS::open_handle(int mount_fd, const std::string& h, int flags)
+{
+ return -EOPNOTSUPP;
+}
+
+#endif // HAVE_NAME_TO_HANDLE_AT
+
+int FS::copy_file_range(int to_fd, uint64_t to_offset,
+ int from_fd,
+ uint64_t from_offset, uint64_t from_len)
+{
+ ceph_abort_msg("write me");
+}
+
+int FS::zero(int fd, uint64_t offset, uint64_t length)
+{
+ int r;
+
+ /*
+
+ From the fallocate(2) man page:
+
+ Specifying the FALLOC_FL_PUNCH_HOLE flag (available since Linux 2.6.38)
+ in mode deallocates space (i.e., creates a hole) in the byte range
+ starting at offset and continuing for len bytes. Within the specified
+ range, partial filesystem blocks are zeroed, and whole filesystem
+ blocks are removed from the file. After a successful call, subsequent
+ reads from this range will return zeroes.
+
+ The FALLOC_FL_PUNCH_HOLE flag must be ORed with FALLOC_FL_KEEP_SIZE in
+ mode; in other words, even when punching off the end of the file, the
+ file size (as reported by stat(2)) does not change.
+
+ Not all filesystems support FALLOC_FL_PUNCH_HOLE; if a filesystem
+ doesn't support the operation, an error is returned. The operation is
+ supported on at least the following filesystems:
+
+ * XFS (since Linux 2.6.38)
+
+ * ext4 (since Linux 3.0)
+
+ * Btrfs (since Linux 3.7)
+
+ * tmpfs (since Linux 3.5)
+
+ So: we only do this is PUNCH_HOLE *and* KEEP_SIZE are defined.
+
+ */
+#if !defined(__APPLE__) && !defined(__FreeBSD__)
+# ifdef CEPH_HAVE_FALLOCATE
+# ifdef FALLOC_FL_KEEP_SIZE
+ // first try fallocate
+ r = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, offset, length);
+ if (r < 0) {
+ r = -errno;
+ }
+ if (r != -EOPNOTSUPP) {
+ goto out; // a real error
+ }
+ // if that failed (-EOPNOTSUPP), fall back to writing zeros.
+# endif
+# endif
+#endif
+
+ {
+ // fall back to writing zeros
+ ceph::bufferlist bl;
+ bl.append_zero(length);
+ r = ::lseek64(fd, offset, SEEK_SET);
+ if (r < 0) {
+ r = -errno;
+ goto out;
+ }
+ r = bl.write_fd(fd);
+ }
+
+ out:
+ return r;
+}
+
+// ---------------
+
diff --git a/src/os/fs/FS.h b/src/os/fs/FS.h
new file mode 100644
index 000000000..a1852f49f
--- /dev/null
+++ b/src/os/fs/FS.h
@@ -0,0 +1,50 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2014 Red Hat
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef CEPH_OS_FS_H
+#define CEPH_OS_FS_H
+
+#include <errno.h>
+#include <time.h>
+
+#include <string>
+
+#include "include/types.h"
+#include "common/Cond.h"
+
+class FS {
+public:
+ virtual ~FS() { }
+
+ static FS *create(uint64_t f_type);
+ static FS *create_by_fd(int fd);
+
+ virtual const char *get_name() {
+ return "generic";
+ }
+
+ virtual int set_alloc_hint(int fd, uint64_t hint);
+
+ virtual int get_handle(int fd, std::string *h);
+ virtual int open_handle(int mount_fd, const std::string& h, int flags);
+
+ virtual int copy_file_range(int to_fd, uint64_t to_offset,
+ int from_fd,
+ uint64_t from_offset, uint64_t from_len);
+ virtual int zero(int fd, uint64_t offset, uint64_t length);
+
+ // -- aio --
+};
+
+#endif
diff --git a/src/os/fs/XFS.cc b/src/os/fs/XFS.cc
new file mode 100644
index 000000000..c72ee1a08
--- /dev/null
+++ b/src/os/fs/XFS.cc
@@ -0,0 +1,55 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2014 Red Hat
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include "XFS.h"
+
+#include <xfs/xfs.h>
+
+int XFS::set_alloc_hint(int fd, uint64_t val)
+{
+ struct fsxattr fsx;
+ struct stat sb;
+ int ret;
+
+ if (fstat(fd, &sb) < 0) {
+ ret = -errno;
+ return ret;
+ }
+ if (!S_ISREG(sb.st_mode)) {
+ return -EINVAL;
+ }
+
+ if (ioctl(fd, XFS_IOC_FSGETXATTR, &fsx) < 0) {
+ ret = -errno;
+ return ret;
+ }
+
+ // already set?
+ if ((fsx.fsx_xflags & XFS_XFLAG_EXTSIZE) && fsx.fsx_extsize == val)
+ return 0;
+
+ // xfs won't change extent size if any extents are allocated
+ if (fsx.fsx_nextents != 0)
+ return 0;
+
+ fsx.fsx_xflags |= XFS_XFLAG_EXTSIZE;
+ fsx.fsx_extsize = val;
+
+ if (ioctl(fd, XFS_IOC_FSSETXATTR, &fsx) < 0) {
+ ret = -errno;
+ return ret;
+ }
+
+ return 0;
+}
diff --git a/src/os/fs/XFS.h b/src/os/fs/XFS.h
new file mode 100644
index 000000000..f0ea717e3
--- /dev/null
+++ b/src/os/fs/XFS.h
@@ -0,0 +1,31 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2014 Red Hat
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef CEPH_OS_XFS_H
+#define CEPH_OS_XFS_H
+
+#include "FS.h"
+
+# ifndef XFS_SUPER_MAGIC
+#define XFS_SUPER_MAGIC 0x58465342
+# endif
+
+class XFS : public FS {
+ const char *get_name() override {
+ return "xfs";
+ }
+ int set_alloc_hint(int fd, uint64_t hint) override;
+};
+
+#endif
diff --git a/src/os/fs/ZFS.cc b/src/os/fs/ZFS.cc
new file mode 100644
index 000000000..02520796c
--- /dev/null
+++ b/src/os/fs/ZFS.cc
@@ -0,0 +1,83 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#define HAVE_IOCTL_IN_SYS_IOCTL_H
+#include <libzfs.h>
+#include "ZFS.h"
+
+const int ZFS::TYPE_FILESYSTEM = ZFS_TYPE_FILESYSTEM;
+const int ZFS::TYPE_SNAPSHOT = ZFS_TYPE_SNAPSHOT;
+const int ZFS::TYPE_VOLUME = ZFS_TYPE_VOLUME;
+const int ZFS::TYPE_DATASET = ZFS_TYPE_DATASET;
+
+ZFS::~ZFS()
+{
+ if (g_zfs)
+ ::libzfs_fini((libzfs_handle_t*)g_zfs);
+}
+
+int ZFS::init()
+{
+ g_zfs = ::libzfs_init();
+ return g_zfs ? 0 : -EINVAL;
+}
+
+ZFS::Handle *ZFS::open(const char *n, int t)
+{
+ return (ZFS::Handle*)::zfs_open((libzfs_handle_t*)g_zfs, n, (zfs_type_t)t);
+}
+
+void ZFS::close(ZFS::Handle *h)
+{
+ ::zfs_close((zfs_handle_t*)h);
+}
+
+const char *ZFS::get_name(ZFS::Handle *h)
+{
+ return ::zfs_get_name((zfs_handle_t*)h);
+}
+
+ZFS::Handle *ZFS::path_to_zhandle(const char *p, int t)
+{
+ return ::zfs_path_to_zhandle((libzfs_handle_t*)g_zfs, (char *)p, (zfs_type_t)t);
+}
+
+int ZFS::create(const char *n, int t)
+{
+ return ::zfs_create((libzfs_handle_t*)g_zfs, n, (zfs_type_t)t, NULL);
+}
+
+int ZFS::snapshot(const char *n, bool r)
+{
+ return ::zfs_snapshot((libzfs_handle_t*)g_zfs, n, (boolean_t)r, NULL);
+}
+
+int ZFS::rollback(ZFS::Handle *h, ZFS::Handle *snap, bool f)
+{
+ return ::zfs_rollback((zfs_handle_t*)h, (zfs_handle_t*)snap, (boolean_t)f);
+}
+
+int ZFS::destroy_snaps(ZFS::Handle *h, const char *n, bool d)
+{
+ return ::zfs_destroy_snaps((zfs_handle_t*)h, (char *)n, (boolean_t)d);
+}
+
+bool ZFS::is_mounted(ZFS::Handle *h, char **p)
+{
+ return (bool)::zfs_is_mounted((zfs_handle_t*)h, p);
+}
+
+int ZFS::mount(ZFS::Handle *h, const char *o, int f)
+{
+ return ::zfs_mount((zfs_handle_t*)h, o, f);
+}
+
+int ZFS::umount(ZFS::Handle *h, const char *o, int f)
+{
+ return ::zfs_unmount((zfs_handle_t*)h, o, f);
+}
+
+int ZFS::iter_snapshots_sorted(ZFS::Handle *h, ZFS::iter_func f, void *d)
+{
+ return ::zfs_iter_snapshots_sorted((zfs_handle_t*)h, (zfs_iter_f)f, d);
+}
diff --git a/src/os/fs/ZFS.h b/src/os/fs/ZFS.h
new file mode 100644
index 000000000..3ebe11107
--- /dev/null
+++ b/src/os/fs/ZFS.h
@@ -0,0 +1,39 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_ZFS_H
+#define CEPH_ZFS_H
+
+// Simple wrapper to hide libzfs.h. (it conflicts with standard linux headers)
+class ZFS {
+ void *g_zfs;
+public:
+
+ static const int TYPE_FILESYSTEM;
+ static const int TYPE_SNAPSHOT;
+ static const int TYPE_VOLUME;
+ static const int TYPE_POOL;
+ static const int TYPE_DATASET;
+
+ typedef void Handle;
+ typedef int (*iter_func)(Handle *, void *);
+
+ static const char *get_name(Handle *);
+
+ ZFS() : g_zfs(NULL) {}
+ ~ZFS();
+ int init();
+ Handle *open(const char *, int);
+ void close(Handle *);
+ Handle *path_to_zhandle(const char *, int);
+ int create(const char *, int);
+ int snapshot(const char *, bool);
+ int rollback(Handle *, Handle *, bool);
+ int destroy_snaps(Handle *, const char *, bool);
+ int iter_snapshots_sorted(Handle *, iter_func, void *);
+ int mount(Handle *, const char *, int);
+ int umount(Handle *, const char *, int);
+ bool is_mounted(Handle *, char **);
+};
+
+#endif
diff --git a/src/os/fs/btrfs_ioctl.h b/src/os/fs/btrfs_ioctl.h
new file mode 100644
index 000000000..277498ca8
--- /dev/null
+++ b/src/os/fs/btrfs_ioctl.h
@@ -0,0 +1,201 @@
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __IOCTL_
+#define __IOCTL_
+
+#if defined(__linux__)
+#include <linux/ioctl.h>
+#elif defined(__FreeBSD__)
+#include <sys/ioctl.h>
+#endif
+
+#define BTRFS_IOCTL_MAGIC 0x94
+#define BTRFS_VOL_NAME_MAX 255
+
+/* this should be 4k */
+#define BTRFS_PATH_NAME_MAX 4087
+struct btrfs_ioctl_vol_args {
+ __s64 fd;
+ char name[BTRFS_PATH_NAME_MAX + 1];
+};
+
+#define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0)
+
+#define BTRFS_SUBVOL_NAME_MAX 4039
+struct btrfs_ioctl_vol_args_v2 {
+ __s64 fd;
+ __u64 transid;
+ __u64 flags;
+ __u64 unused[4];
+ char name[BTRFS_SUBVOL_NAME_MAX + 1];
+};
+
+#define BTRFS_INO_LOOKUP_PATH_MAX 4080
+struct btrfs_ioctl_ino_lookup_args {
+ __u64 treeid;
+ __u64 objectid;
+ char name[BTRFS_INO_LOOKUP_PATH_MAX];
+};
+
+struct btrfs_ioctl_search_key {
+ /* which root are we searching. 0 is the tree of tree roots */
+ __u64 tree_id;
+
+ /* keys returned will be >= min and <= max */
+ __u64 min_objectid;
+ __u64 max_objectid;
+
+ /* keys returned will be >= min and <= max */
+ __u64 min_offset;
+ __u64 max_offset;
+
+ /* max and min transids to search for */
+ __u64 min_transid;
+ __u64 max_transid;
+
+ /* keys returned will be >= min and <= max */
+ __u32 min_type;
+ __u32 max_type;
+
+ /*
+ * how many items did userland ask for, and how many are we
+ * returning
+ */
+ __u32 nr_items;
+
+ /* align to 64 bits */
+ __u32 unused;
+
+ /* some extra for later */
+ __u64 unused1;
+ __u64 unused2;
+ __u64 unused3;
+ __u64 unused4;
+};
+
+struct btrfs_ioctl_search_header {
+ __u64 transid;
+ __u64 objectid;
+ __u64 offset;
+ __u32 type;
+ __u32 len;
+};
+
+#define BTRFS_SEARCH_ARGS_BUFSIZE (4096 - sizeof(struct btrfs_ioctl_search_key))
+/*
+ * the buf is an array of search headers where
+ * each header is followed by the actual item
+ * the type field is expanded to 32 bits for alignment
+ */
+struct btrfs_ioctl_search_args {
+ struct btrfs_ioctl_search_key key;
+ char buf[BTRFS_SEARCH_ARGS_BUFSIZE];
+};
+
+struct btrfs_ioctl_clone_range_args {
+ __s64 src_fd;
+ __u64 src_offset, src_length;
+ __u64 dest_offset;
+};
+
+/* flags for the defrag range ioctl */
+#define BTRFS_DEFRAG_RANGE_COMPRESS 1
+#define BTRFS_DEFRAG_RANGE_START_IO 2
+
+struct btrfs_ioctl_defrag_range_args {
+ /* start of the defrag operation */
+ __u64 start;
+
+ /* number of bytes to defrag, use (u64)-1 to say all */
+ __u64 len;
+
+ /*
+ * flags for the operation, which can include turning
+ * on compression for this one defrag
+ */
+ __u64 flags;
+
+ /*
+ * any extent bigger than this will be considered
+ * already defragged. Use 0 to take the kernel default
+ * Use 1 to say every single extent must be rewritten
+ */
+ __u32 extent_thresh;
+
+ /* spare for later */
+ __u32 unused[5];
+};
+
+struct btrfs_ioctl_space_info {
+ __u64 flags;
+ __u64 total_bytes;
+ __u64 used_bytes;
+};
+
+struct btrfs_ioctl_space_args {
+ __u64 space_slots;
+ __u64 total_spaces;
+ struct btrfs_ioctl_space_info spaces[0];
+};
+
+#define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \
+ struct btrfs_ioctl_vol_args)
+#define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \
+ struct btrfs_ioctl_vol_args)
+#define BTRFS_IOC_RESIZE _IOW(BTRFS_IOCTL_MAGIC, 3, \
+ struct btrfs_ioctl_vol_args)
+#define BTRFS_IOC_SCAN_DEV _IOW(BTRFS_IOCTL_MAGIC, 4, \
+ struct btrfs_ioctl_vol_args)
+/* trans start and trans end are dangerous, and only for
+ * use by applications that know how to avoid the
+ * resulting deadlocks
+ */
+#define BTRFS_IOC_TRANS_START _IO(BTRFS_IOCTL_MAGIC, 6)
+#define BTRFS_IOC_TRANS_END _IO(BTRFS_IOCTL_MAGIC, 7)
+#define BTRFS_IOC_SYNC _IO(BTRFS_IOCTL_MAGIC, 8)
+
+#define BTRFS_IOC_CLONE _IOW(BTRFS_IOCTL_MAGIC, 9, int)
+#define BTRFS_IOC_ADD_DEV _IOW(BTRFS_IOCTL_MAGIC, 10, \
+ struct btrfs_ioctl_vol_args)
+#define BTRFS_IOC_RM_DEV _IOW(BTRFS_IOCTL_MAGIC, 11, \
+ struct btrfs_ioctl_vol_args)
+#define BTRFS_IOC_BALANCE _IOW(BTRFS_IOCTL_MAGIC, 12, \
+ struct btrfs_ioctl_vol_args)
+
+#define BTRFS_IOC_CLONE_RANGE _IOW(BTRFS_IOCTL_MAGIC, 13, \
+ struct btrfs_ioctl_clone_range_args)
+
+#define BTRFS_IOC_SUBVOL_CREATE _IOW(BTRFS_IOCTL_MAGIC, 14, \
+ struct btrfs_ioctl_vol_args)
+#define BTRFS_IOC_SNAP_DESTROY _IOW(BTRFS_IOCTL_MAGIC, 15, \
+ struct btrfs_ioctl_vol_args)
+#define BTRFS_IOC_DEFRAG_RANGE _IOW(BTRFS_IOCTL_MAGIC, 16, \
+ struct btrfs_ioctl_defrag_range_args)
+#define BTRFS_IOC_TREE_SEARCH _IOWR(BTRFS_IOCTL_MAGIC, 17, \
+ struct btrfs_ioctl_search_args)
+#define BTRFS_IOC_INO_LOOKUP _IOWR(BTRFS_IOCTL_MAGIC, 18, \
+ struct btrfs_ioctl_ino_lookup_args)
+#define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, u64)
+#define BTRFS_IOC_SPACE_INFO _IOWR(BTRFS_IOCTL_MAGIC, 20, \
+ struct btrfs_ioctl_space_args)
+#define BTRFS_IOC_START_SYNC _IOR(BTRFS_IOCTL_MAGIC, 24, __u64)
+#define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64)
+#define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \
+ struct btrfs_ioctl_vol_args_v2)
+#endif