diff options
Diffstat (limited to 'src/os/fs')
-rw-r--r-- | src/os/fs/FS.cc | 186 | ||||
-rw-r--r-- | src/os/fs/FS.h | 50 | ||||
-rw-r--r-- | src/os/fs/XFS.cc | 55 | ||||
-rw-r--r-- | src/os/fs/XFS.h | 31 | ||||
-rw-r--r-- | src/os/fs/ZFS.cc | 83 | ||||
-rw-r--r-- | src/os/fs/ZFS.h | 39 | ||||
-rw-r--r-- | src/os/fs/btrfs_ioctl.h | 201 |
7 files changed, 645 insertions, 0 deletions
diff --git a/src/os/fs/FS.cc b/src/os/fs/FS.cc new file mode 100644 index 000000000..a7d085402 --- /dev/null +++ b/src/os/fs/FS.cc @@ -0,0 +1,186 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2014 Red Hat + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include <errno.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> + +#ifdef __linux__ +#include <linux/falloc.h> +#endif + +#include "FS.h" + +#include "acconfig.h" + +#ifdef HAVE_LIBXFS +#include "XFS.h" +#endif + +#if defined(__APPLE__) || defined(__FreeBSD__) +#include <sys/mount.h> +#else +#include <sys/vfs.h> +#endif +#include "include/compat.h" + +// --------------- + +FS *FS::create(uint64_t f_type) +{ + switch (f_type) { +#ifdef HAVE_LIBXFS + case XFS_SUPER_MAGIC: + return new XFS; +#endif + default: + return new FS; + } +} + +FS *FS::create_by_fd(int fd) +{ + struct statfs st; + ::fstatfs(fd, &st); + return create(st.f_type); +} + +// --------------- + +int FS::set_alloc_hint(int fd, uint64_t hint) +{ + return 0; // no-op +} + +#ifdef HAVE_NAME_TO_HANDLE_AT +int FS::get_handle(int fd, std::string *h) +{ + char buf[sizeof(struct file_handle) + MAX_HANDLE_SZ]; + struct file_handle *fh = (struct file_handle *)buf; + int mount_id; + + fh->handle_bytes = MAX_HANDLE_SZ; + int r = name_to_handle_at(fd, "", fh, &mount_id, AT_EMPTY_PATH); + if (r < 0) { + return -errno; + } + *h = std::string(buf, fh->handle_bytes + sizeof(struct file_handle)); + return 0; +} + +int FS::open_handle(int mount_fd, const std::string& h, int flags) +{ + if (h.length() < sizeof(struct file_handle)) { + return -EINVAL; + } + struct file_handle *fh = (struct file_handle *)h.data(); + if (fh->handle_bytes > h.length()) { + return -ERANGE; + } + int fd = open_by_handle_at(mount_fd, fh, flags); + if (fd < 0) + return -errno; + return fd; +} + +#else // HAVE_NAME_TO_HANDLE_AT + +int FS::get_handle(int fd, std::string *h) +{ + return -EOPNOTSUPP; +} + +int FS::open_handle(int mount_fd, const std::string& h, int flags) +{ + return -EOPNOTSUPP; +} + +#endif // HAVE_NAME_TO_HANDLE_AT + +int FS::copy_file_range(int to_fd, uint64_t to_offset, + int from_fd, + uint64_t from_offset, uint64_t from_len) +{ + ceph_abort_msg("write me"); +} + +int FS::zero(int fd, uint64_t offset, uint64_t length) +{ + int r; + + /* + + From the fallocate(2) man page: + + Specifying the FALLOC_FL_PUNCH_HOLE flag (available since Linux 2.6.38) + in mode deallocates space (i.e., creates a hole) in the byte range + starting at offset and continuing for len bytes. Within the specified + range, partial filesystem blocks are zeroed, and whole filesystem + blocks are removed from the file. After a successful call, subsequent + reads from this range will return zeroes. + + The FALLOC_FL_PUNCH_HOLE flag must be ORed with FALLOC_FL_KEEP_SIZE in + mode; in other words, even when punching off the end of the file, the + file size (as reported by stat(2)) does not change. + + Not all filesystems support FALLOC_FL_PUNCH_HOLE; if a filesystem + doesn't support the operation, an error is returned. The operation is + supported on at least the following filesystems: + + * XFS (since Linux 2.6.38) + + * ext4 (since Linux 3.0) + + * Btrfs (since Linux 3.7) + + * tmpfs (since Linux 3.5) + + So: we only do this is PUNCH_HOLE *and* KEEP_SIZE are defined. + + */ +#if !defined(__APPLE__) && !defined(__FreeBSD__) +# ifdef CEPH_HAVE_FALLOCATE +# ifdef FALLOC_FL_KEEP_SIZE + // first try fallocate + r = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, offset, length); + if (r < 0) { + r = -errno; + } + if (r != -EOPNOTSUPP) { + goto out; // a real error + } + // if that failed (-EOPNOTSUPP), fall back to writing zeros. +# endif +# endif +#endif + + { + // fall back to writing zeros + ceph::bufferlist bl; + bl.append_zero(length); + r = ::lseek64(fd, offset, SEEK_SET); + if (r < 0) { + r = -errno; + goto out; + } + r = bl.write_fd(fd); + } + + out: + return r; +} + +// --------------- + diff --git a/src/os/fs/FS.h b/src/os/fs/FS.h new file mode 100644 index 000000000..a1852f49f --- /dev/null +++ b/src/os/fs/FS.h @@ -0,0 +1,50 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2014 Red Hat + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#ifndef CEPH_OS_FS_H +#define CEPH_OS_FS_H + +#include <errno.h> +#include <time.h> + +#include <string> + +#include "include/types.h" +#include "common/Cond.h" + +class FS { +public: + virtual ~FS() { } + + static FS *create(uint64_t f_type); + static FS *create_by_fd(int fd); + + virtual const char *get_name() { + return "generic"; + } + + virtual int set_alloc_hint(int fd, uint64_t hint); + + virtual int get_handle(int fd, std::string *h); + virtual int open_handle(int mount_fd, const std::string& h, int flags); + + virtual int copy_file_range(int to_fd, uint64_t to_offset, + int from_fd, + uint64_t from_offset, uint64_t from_len); + virtual int zero(int fd, uint64_t offset, uint64_t length); + + // -- aio -- +}; + +#endif diff --git a/src/os/fs/XFS.cc b/src/os/fs/XFS.cc new file mode 100644 index 000000000..c72ee1a08 --- /dev/null +++ b/src/os/fs/XFS.cc @@ -0,0 +1,55 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2014 Red Hat + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include "XFS.h" + +#include <xfs/xfs.h> + +int XFS::set_alloc_hint(int fd, uint64_t val) +{ + struct fsxattr fsx; + struct stat sb; + int ret; + + if (fstat(fd, &sb) < 0) { + ret = -errno; + return ret; + } + if (!S_ISREG(sb.st_mode)) { + return -EINVAL; + } + + if (ioctl(fd, XFS_IOC_FSGETXATTR, &fsx) < 0) { + ret = -errno; + return ret; + } + + // already set? + if ((fsx.fsx_xflags & XFS_XFLAG_EXTSIZE) && fsx.fsx_extsize == val) + return 0; + + // xfs won't change extent size if any extents are allocated + if (fsx.fsx_nextents != 0) + return 0; + + fsx.fsx_xflags |= XFS_XFLAG_EXTSIZE; + fsx.fsx_extsize = val; + + if (ioctl(fd, XFS_IOC_FSSETXATTR, &fsx) < 0) { + ret = -errno; + return ret; + } + + return 0; +} diff --git a/src/os/fs/XFS.h b/src/os/fs/XFS.h new file mode 100644 index 000000000..f0ea717e3 --- /dev/null +++ b/src/os/fs/XFS.h @@ -0,0 +1,31 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2014 Red Hat + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#ifndef CEPH_OS_XFS_H +#define CEPH_OS_XFS_H + +#include "FS.h" + +# ifndef XFS_SUPER_MAGIC +#define XFS_SUPER_MAGIC 0x58465342 +# endif + +class XFS : public FS { + const char *get_name() override { + return "xfs"; + } + int set_alloc_hint(int fd, uint64_t hint) override; +}; + +#endif diff --git a/src/os/fs/ZFS.cc b/src/os/fs/ZFS.cc new file mode 100644 index 000000000..02520796c --- /dev/null +++ b/src/os/fs/ZFS.cc @@ -0,0 +1,83 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#define HAVE_IOCTL_IN_SYS_IOCTL_H +#include <libzfs.h> +#include "ZFS.h" + +const int ZFS::TYPE_FILESYSTEM = ZFS_TYPE_FILESYSTEM; +const int ZFS::TYPE_SNAPSHOT = ZFS_TYPE_SNAPSHOT; +const int ZFS::TYPE_VOLUME = ZFS_TYPE_VOLUME; +const int ZFS::TYPE_DATASET = ZFS_TYPE_DATASET; + +ZFS::~ZFS() +{ + if (g_zfs) + ::libzfs_fini((libzfs_handle_t*)g_zfs); +} + +int ZFS::init() +{ + g_zfs = ::libzfs_init(); + return g_zfs ? 0 : -EINVAL; +} + +ZFS::Handle *ZFS::open(const char *n, int t) +{ + return (ZFS::Handle*)::zfs_open((libzfs_handle_t*)g_zfs, n, (zfs_type_t)t); +} + +void ZFS::close(ZFS::Handle *h) +{ + ::zfs_close((zfs_handle_t*)h); +} + +const char *ZFS::get_name(ZFS::Handle *h) +{ + return ::zfs_get_name((zfs_handle_t*)h); +} + +ZFS::Handle *ZFS::path_to_zhandle(const char *p, int t) +{ + return ::zfs_path_to_zhandle((libzfs_handle_t*)g_zfs, (char *)p, (zfs_type_t)t); +} + +int ZFS::create(const char *n, int t) +{ + return ::zfs_create((libzfs_handle_t*)g_zfs, n, (zfs_type_t)t, NULL); +} + +int ZFS::snapshot(const char *n, bool r) +{ + return ::zfs_snapshot((libzfs_handle_t*)g_zfs, n, (boolean_t)r, NULL); +} + +int ZFS::rollback(ZFS::Handle *h, ZFS::Handle *snap, bool f) +{ + return ::zfs_rollback((zfs_handle_t*)h, (zfs_handle_t*)snap, (boolean_t)f); +} + +int ZFS::destroy_snaps(ZFS::Handle *h, const char *n, bool d) +{ + return ::zfs_destroy_snaps((zfs_handle_t*)h, (char *)n, (boolean_t)d); +} + +bool ZFS::is_mounted(ZFS::Handle *h, char **p) +{ + return (bool)::zfs_is_mounted((zfs_handle_t*)h, p); +} + +int ZFS::mount(ZFS::Handle *h, const char *o, int f) +{ + return ::zfs_mount((zfs_handle_t*)h, o, f); +} + +int ZFS::umount(ZFS::Handle *h, const char *o, int f) +{ + return ::zfs_unmount((zfs_handle_t*)h, o, f); +} + +int ZFS::iter_snapshots_sorted(ZFS::Handle *h, ZFS::iter_func f, void *d) +{ + return ::zfs_iter_snapshots_sorted((zfs_handle_t*)h, (zfs_iter_f)f, d); +} diff --git a/src/os/fs/ZFS.h b/src/os/fs/ZFS.h new file mode 100644 index 000000000..3ebe11107 --- /dev/null +++ b/src/os/fs/ZFS.h @@ -0,0 +1,39 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_ZFS_H +#define CEPH_ZFS_H + +// Simple wrapper to hide libzfs.h. (it conflicts with standard linux headers) +class ZFS { + void *g_zfs; +public: + + static const int TYPE_FILESYSTEM; + static const int TYPE_SNAPSHOT; + static const int TYPE_VOLUME; + static const int TYPE_POOL; + static const int TYPE_DATASET; + + typedef void Handle; + typedef int (*iter_func)(Handle *, void *); + + static const char *get_name(Handle *); + + ZFS() : g_zfs(NULL) {} + ~ZFS(); + int init(); + Handle *open(const char *, int); + void close(Handle *); + Handle *path_to_zhandle(const char *, int); + int create(const char *, int); + int snapshot(const char *, bool); + int rollback(Handle *, Handle *, bool); + int destroy_snaps(Handle *, const char *, bool); + int iter_snapshots_sorted(Handle *, iter_func, void *); + int mount(Handle *, const char *, int); + int umount(Handle *, const char *, int); + bool is_mounted(Handle *, char **); +}; + +#endif diff --git a/src/os/fs/btrfs_ioctl.h b/src/os/fs/btrfs_ioctl.h new file mode 100644 index 000000000..277498ca8 --- /dev/null +++ b/src/os/fs/btrfs_ioctl.h @@ -0,0 +1,201 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#ifndef __IOCTL_ +#define __IOCTL_ + +#if defined(__linux__) +#include <linux/ioctl.h> +#elif defined(__FreeBSD__) +#include <sys/ioctl.h> +#endif + +#define BTRFS_IOCTL_MAGIC 0x94 +#define BTRFS_VOL_NAME_MAX 255 + +/* this should be 4k */ +#define BTRFS_PATH_NAME_MAX 4087 +struct btrfs_ioctl_vol_args { + __s64 fd; + char name[BTRFS_PATH_NAME_MAX + 1]; +}; + +#define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0) + +#define BTRFS_SUBVOL_NAME_MAX 4039 +struct btrfs_ioctl_vol_args_v2 { + __s64 fd; + __u64 transid; + __u64 flags; + __u64 unused[4]; + char name[BTRFS_SUBVOL_NAME_MAX + 1]; +}; + +#define BTRFS_INO_LOOKUP_PATH_MAX 4080 +struct btrfs_ioctl_ino_lookup_args { + __u64 treeid; + __u64 objectid; + char name[BTRFS_INO_LOOKUP_PATH_MAX]; +}; + +struct btrfs_ioctl_search_key { + /* which root are we searching. 0 is the tree of tree roots */ + __u64 tree_id; + + /* keys returned will be >= min and <= max */ + __u64 min_objectid; + __u64 max_objectid; + + /* keys returned will be >= min and <= max */ + __u64 min_offset; + __u64 max_offset; + + /* max and min transids to search for */ + __u64 min_transid; + __u64 max_transid; + + /* keys returned will be >= min and <= max */ + __u32 min_type; + __u32 max_type; + + /* + * how many items did userland ask for, and how many are we + * returning + */ + __u32 nr_items; + + /* align to 64 bits */ + __u32 unused; + + /* some extra for later */ + __u64 unused1; + __u64 unused2; + __u64 unused3; + __u64 unused4; +}; + +struct btrfs_ioctl_search_header { + __u64 transid; + __u64 objectid; + __u64 offset; + __u32 type; + __u32 len; +}; + +#define BTRFS_SEARCH_ARGS_BUFSIZE (4096 - sizeof(struct btrfs_ioctl_search_key)) +/* + * the buf is an array of search headers where + * each header is followed by the actual item + * the type field is expanded to 32 bits for alignment + */ +struct btrfs_ioctl_search_args { + struct btrfs_ioctl_search_key key; + char buf[BTRFS_SEARCH_ARGS_BUFSIZE]; +}; + +struct btrfs_ioctl_clone_range_args { + __s64 src_fd; + __u64 src_offset, src_length; + __u64 dest_offset; +}; + +/* flags for the defrag range ioctl */ +#define BTRFS_DEFRAG_RANGE_COMPRESS 1 +#define BTRFS_DEFRAG_RANGE_START_IO 2 + +struct btrfs_ioctl_defrag_range_args { + /* start of the defrag operation */ + __u64 start; + + /* number of bytes to defrag, use (u64)-1 to say all */ + __u64 len; + + /* + * flags for the operation, which can include turning + * on compression for this one defrag + */ + __u64 flags; + + /* + * any extent bigger than this will be considered + * already defragged. Use 0 to take the kernel default + * Use 1 to say every single extent must be rewritten + */ + __u32 extent_thresh; + + /* spare for later */ + __u32 unused[5]; +}; + +struct btrfs_ioctl_space_info { + __u64 flags; + __u64 total_bytes; + __u64 used_bytes; +}; + +struct btrfs_ioctl_space_args { + __u64 space_slots; + __u64 total_spaces; + struct btrfs_ioctl_space_info spaces[0]; +}; + +#define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_RESIZE _IOW(BTRFS_IOCTL_MAGIC, 3, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_SCAN_DEV _IOW(BTRFS_IOCTL_MAGIC, 4, \ + struct btrfs_ioctl_vol_args) +/* trans start and trans end are dangerous, and only for + * use by applications that know how to avoid the + * resulting deadlocks + */ +#define BTRFS_IOC_TRANS_START _IO(BTRFS_IOCTL_MAGIC, 6) +#define BTRFS_IOC_TRANS_END _IO(BTRFS_IOCTL_MAGIC, 7) +#define BTRFS_IOC_SYNC _IO(BTRFS_IOCTL_MAGIC, 8) + +#define BTRFS_IOC_CLONE _IOW(BTRFS_IOCTL_MAGIC, 9, int) +#define BTRFS_IOC_ADD_DEV _IOW(BTRFS_IOCTL_MAGIC, 10, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_RM_DEV _IOW(BTRFS_IOCTL_MAGIC, 11, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_BALANCE _IOW(BTRFS_IOCTL_MAGIC, 12, \ + struct btrfs_ioctl_vol_args) + +#define BTRFS_IOC_CLONE_RANGE _IOW(BTRFS_IOCTL_MAGIC, 13, \ + struct btrfs_ioctl_clone_range_args) + +#define BTRFS_IOC_SUBVOL_CREATE _IOW(BTRFS_IOCTL_MAGIC, 14, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_SNAP_DESTROY _IOW(BTRFS_IOCTL_MAGIC, 15, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_DEFRAG_RANGE _IOW(BTRFS_IOCTL_MAGIC, 16, \ + struct btrfs_ioctl_defrag_range_args) +#define BTRFS_IOC_TREE_SEARCH _IOWR(BTRFS_IOCTL_MAGIC, 17, \ + struct btrfs_ioctl_search_args) +#define BTRFS_IOC_INO_LOOKUP _IOWR(BTRFS_IOCTL_MAGIC, 18, \ + struct btrfs_ioctl_ino_lookup_args) +#define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, u64) +#define BTRFS_IOC_SPACE_INFO _IOWR(BTRFS_IOCTL_MAGIC, 20, \ + struct btrfs_ioctl_space_args) +#define BTRFS_IOC_START_SYNC _IOR(BTRFS_IOCTL_MAGIC, 24, __u64) +#define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64) +#define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \ + struct btrfs_ioctl_vol_args_v2) +#endif |