diff options
Diffstat (limited to 'src/extent-scan.c')
-rw-r--r-- | src/extent-scan.c | 228 |
1 files changed, 228 insertions, 0 deletions
diff --git a/src/extent-scan.c b/src/extent-scan.c new file mode 100644 index 0000000..15a22a9 --- /dev/null +++ b/src/extent-scan.c @@ -0,0 +1,228 @@ +/* extent-scan.c -- core functions for scanning extents + Copyright (C) 2010-2020 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + + Written by Jie Liu (jeff.liu@oracle.com). */ + +#include <config.h> +#include <sys/types.h> +#include <sys/ioctl.h> +#include <sys/utsname.h> +#include <assert.h> + +#include "system.h" +#include "extent-scan.h" +#include "fiemap.h" +#include "xstrtol.h" + + +/* Work around Linux kernel issues on BTRFS and EXT4. */ +static bool +extent_need_sync (void) +{ + /* For now always return true, to be on the safe side. + If/when FIEMAP semantics are well defined (before SEEK_HOLE support + is usable) and kernels implementing them are in use, we may relax + this once again. */ + return true; + +#if FIEMAP_BEHAVIOR_IS_DEFINED_AND_USABLE + static int need_sync = -1; + + if (need_sync == -1) + { + struct utsname name; + need_sync = 0; /* No workaround by default. */ + +# ifdef __linux__ + if (uname (&name) != -1 && STRNCMP_LIT (name.release, "2.6.") == 0) + { + unsigned long val; + if (xstrtoul (name.release + 4, NULL, 10, &val, NULL) == LONGINT_OK) + { + if (val < 39) + need_sync = 1; + } + } +# endif + } + + return need_sync; +#endif +} + +/* Allocate space for struct extent_scan, initialize the entries if + necessary and return it as the input argument of extent_scan_read(). */ +extern void +extent_scan_init (int src_fd, struct extent_scan *scan) +{ + scan->fd = src_fd; + scan->ei_count = 0; + scan->ext_info = NULL; + scan->scan_start = 0; + scan->initial_scan_failed = false; + scan->hit_final_extent = false; + scan->fm_flags = extent_need_sync () ? FIEMAP_FLAG_SYNC : 0; +} + +#ifdef __linux__ +# ifndef FS_IOC_FIEMAP +# define FS_IOC_FIEMAP _IOWR ('f', 11, struct fiemap) +# endif +/* Call ioctl(2) with FS_IOC_FIEMAP (available in linux 2.6.27) to + obtain a map of file extents excluding holes. */ +extern bool +extent_scan_read (struct extent_scan *scan) +{ + unsigned int si = 0; + struct extent_info *last_ei = scan->ext_info; + + while (true) + { + union { struct fiemap f; char c[4096]; } fiemap_buf; + struct fiemap *fiemap = &fiemap_buf.f; + struct fiemap_extent *fm_extents = &fiemap->fm_extents[0]; + enum { headersize = offsetof (struct fiemap, fm_extents) }; + enum { count = (sizeof fiemap_buf - headersize) / sizeof *fm_extents }; + verify (count > 1); + + /* This is required at least to initialize fiemap->fm_start, + but also serves (in mid 2010) to appease valgrind, which + appears not to know the semantics of the FIEMAP ioctl. */ + memset (&fiemap_buf, 0, sizeof fiemap_buf); + + fiemap->fm_start = scan->scan_start; + fiemap->fm_flags = scan->fm_flags; + fiemap->fm_extent_count = count; + fiemap->fm_length = FIEMAP_MAX_OFFSET - scan->scan_start; + + /* Fall back to the standard copy if call ioctl(2) failed for + the first time. */ + if (ioctl (scan->fd, FS_IOC_FIEMAP, fiemap) < 0) + { + if (scan->scan_start == 0) + scan->initial_scan_failed = true; + return false; + } + + /* If 0 extents are returned, then no more scans are needed. */ + if (fiemap->fm_mapped_extents == 0) + { + scan->hit_final_extent = true; + return scan->scan_start != 0; + } + + assert (scan->ei_count <= SIZE_MAX - fiemap->fm_mapped_extents); + scan->ei_count += fiemap->fm_mapped_extents; + { + /* last_ei points into a buffer that may be freed via xnrealloc. + Record its offset and adjust after allocation. */ + size_t prev_idx = last_ei - scan->ext_info; + scan->ext_info = xnrealloc (scan->ext_info, scan->ei_count, + sizeof (struct extent_info)); + last_ei = scan->ext_info + prev_idx; + } + + unsigned int i = 0; + for (i = 0; i < fiemap->fm_mapped_extents; i++) + { + assert (fm_extents[i].fe_logical + <= OFF_T_MAX - fm_extents[i].fe_length); + + verify (sizeof last_ei->ext_flags >= sizeof fm_extents->fe_flags); + + if (si && last_ei->ext_flags + == (fm_extents[i].fe_flags & ~FIEMAP_EXTENT_LAST) + && (last_ei->ext_logical + last_ei->ext_length + == fm_extents[i].fe_logical)) + { + /* Merge previous with last. */ + last_ei->ext_length += fm_extents[i].fe_length; + /* Copy flags in case different. */ + last_ei->ext_flags = fm_extents[i].fe_flags; + } + else if ((si == 0 && scan->scan_start > fm_extents[i].fe_logical) + || (si && (last_ei->ext_logical + last_ei->ext_length + > fm_extents[i].fe_logical))) + { + /* BTRFS before 2.6.38 could return overlapping extents + for sparse files. We adjust the returned extents + rather than failing, as otherwise it would be inefficient + to detect this on the initial scan. */ + uint64_t new_logical; + uint64_t length_adjust; + if (si == 0) + new_logical = scan->scan_start; + else + { + /* We could return here if scan->scan_start == 0 + but don't so as to minimize special cases. */ + new_logical = last_ei->ext_logical + last_ei->ext_length; + } + length_adjust = new_logical - fm_extents[i].fe_logical; + /* If an extent is contained within the previous one, fail. */ + if (length_adjust < fm_extents[i].fe_length) + { + if (scan->scan_start == 0) + scan->initial_scan_failed = true; + return false; + } + fm_extents[i].fe_logical = new_logical; + fm_extents[i].fe_length -= length_adjust; + /* Process the adjusted extent again. */ + i--; + continue; + } + else + { + last_ei = scan->ext_info + si; + last_ei->ext_logical = fm_extents[i].fe_logical; + last_ei->ext_length = fm_extents[i].fe_length; + last_ei->ext_flags = fm_extents[i].fe_flags; + si++; + } + } + + if (last_ei->ext_flags & FIEMAP_EXTENT_LAST) + scan->hit_final_extent = true; + + /* If we have enough extents, discard the last as it might + be merged with one from the next scan. */ + if (si > count && !scan->hit_final_extent) + last_ei = scan->ext_info + --si - 1; + + /* We don't bother reallocating any trailing slots. */ + scan->ei_count = si; + + if (scan->hit_final_extent) + break; + else + scan->scan_start = last_ei->ext_logical + last_ei->ext_length; + + if (si >= count) + break; + } + + return true; +} +#else +extern bool +extent_scan_read (struct extent_scan *scan _GL_UNUSED) +{ + scan->initial_scan_failed = true; + errno = ENOTSUP; + return false; +} +#endif |