summaryrefslogtreecommitdiffstats
path: root/src/extent-scan.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/extent-scan.c')
-rw-r--r--src/extent-scan.c228
1 files changed, 228 insertions, 0 deletions
diff --git a/src/extent-scan.c b/src/extent-scan.c
new file mode 100644
index 0000000..15a22a9
--- /dev/null
+++ b/src/extent-scan.c
@@ -0,0 +1,228 @@
+/* extent-scan.c -- core functions for scanning extents
+ Copyright (C) 2010-2020 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+ Written by Jie Liu (jeff.liu@oracle.com). */
+
+#include <config.h>
+#include <sys/types.h>
+#include <sys/ioctl.h>
+#include <sys/utsname.h>
+#include <assert.h>
+
+#include "system.h"
+#include "extent-scan.h"
+#include "fiemap.h"
+#include "xstrtol.h"
+
+
+/* Work around Linux kernel issues on BTRFS and EXT4. */
+static bool
+extent_need_sync (void)
+{
+ /* For now always return true, to be on the safe side.
+ If/when FIEMAP semantics are well defined (before SEEK_HOLE support
+ is usable) and kernels implementing them are in use, we may relax
+ this once again. */
+ return true;
+
+#if FIEMAP_BEHAVIOR_IS_DEFINED_AND_USABLE
+ static int need_sync = -1;
+
+ if (need_sync == -1)
+ {
+ struct utsname name;
+ need_sync = 0; /* No workaround by default. */
+
+# ifdef __linux__
+ if (uname (&name) != -1 && STRNCMP_LIT (name.release, "2.6.") == 0)
+ {
+ unsigned long val;
+ if (xstrtoul (name.release + 4, NULL, 10, &val, NULL) == LONGINT_OK)
+ {
+ if (val < 39)
+ need_sync = 1;
+ }
+ }
+# endif
+ }
+
+ return need_sync;
+#endif
+}
+
+/* Allocate space for struct extent_scan, initialize the entries if
+ necessary and return it as the input argument of extent_scan_read(). */
+extern void
+extent_scan_init (int src_fd, struct extent_scan *scan)
+{
+ scan->fd = src_fd;
+ scan->ei_count = 0;
+ scan->ext_info = NULL;
+ scan->scan_start = 0;
+ scan->initial_scan_failed = false;
+ scan->hit_final_extent = false;
+ scan->fm_flags = extent_need_sync () ? FIEMAP_FLAG_SYNC : 0;
+}
+
+#ifdef __linux__
+# ifndef FS_IOC_FIEMAP
+# define FS_IOC_FIEMAP _IOWR ('f', 11, struct fiemap)
+# endif
+/* Call ioctl(2) with FS_IOC_FIEMAP (available in linux 2.6.27) to
+ obtain a map of file extents excluding holes. */
+extern bool
+extent_scan_read (struct extent_scan *scan)
+{
+ unsigned int si = 0;
+ struct extent_info *last_ei = scan->ext_info;
+
+ while (true)
+ {
+ union { struct fiemap f; char c[4096]; } fiemap_buf;
+ struct fiemap *fiemap = &fiemap_buf.f;
+ struct fiemap_extent *fm_extents = &fiemap->fm_extents[0];
+ enum { headersize = offsetof (struct fiemap, fm_extents) };
+ enum { count = (sizeof fiemap_buf - headersize) / sizeof *fm_extents };
+ verify (count > 1);
+
+ /* This is required at least to initialize fiemap->fm_start,
+ but also serves (in mid 2010) to appease valgrind, which
+ appears not to know the semantics of the FIEMAP ioctl. */
+ memset (&fiemap_buf, 0, sizeof fiemap_buf);
+
+ fiemap->fm_start = scan->scan_start;
+ fiemap->fm_flags = scan->fm_flags;
+ fiemap->fm_extent_count = count;
+ fiemap->fm_length = FIEMAP_MAX_OFFSET - scan->scan_start;
+
+ /* Fall back to the standard copy if call ioctl(2) failed for
+ the first time. */
+ if (ioctl (scan->fd, FS_IOC_FIEMAP, fiemap) < 0)
+ {
+ if (scan->scan_start == 0)
+ scan->initial_scan_failed = true;
+ return false;
+ }
+
+ /* If 0 extents are returned, then no more scans are needed. */
+ if (fiemap->fm_mapped_extents == 0)
+ {
+ scan->hit_final_extent = true;
+ return scan->scan_start != 0;
+ }
+
+ assert (scan->ei_count <= SIZE_MAX - fiemap->fm_mapped_extents);
+ scan->ei_count += fiemap->fm_mapped_extents;
+ {
+ /* last_ei points into a buffer that may be freed via xnrealloc.
+ Record its offset and adjust after allocation. */
+ size_t prev_idx = last_ei - scan->ext_info;
+ scan->ext_info = xnrealloc (scan->ext_info, scan->ei_count,
+ sizeof (struct extent_info));
+ last_ei = scan->ext_info + prev_idx;
+ }
+
+ unsigned int i = 0;
+ for (i = 0; i < fiemap->fm_mapped_extents; i++)
+ {
+ assert (fm_extents[i].fe_logical
+ <= OFF_T_MAX - fm_extents[i].fe_length);
+
+ verify (sizeof last_ei->ext_flags >= sizeof fm_extents->fe_flags);
+
+ if (si && last_ei->ext_flags
+ == (fm_extents[i].fe_flags & ~FIEMAP_EXTENT_LAST)
+ && (last_ei->ext_logical + last_ei->ext_length
+ == fm_extents[i].fe_logical))
+ {
+ /* Merge previous with last. */
+ last_ei->ext_length += fm_extents[i].fe_length;
+ /* Copy flags in case different. */
+ last_ei->ext_flags = fm_extents[i].fe_flags;
+ }
+ else if ((si == 0 && scan->scan_start > fm_extents[i].fe_logical)
+ || (si && (last_ei->ext_logical + last_ei->ext_length
+ > fm_extents[i].fe_logical)))
+ {
+ /* BTRFS before 2.6.38 could return overlapping extents
+ for sparse files. We adjust the returned extents
+ rather than failing, as otherwise it would be inefficient
+ to detect this on the initial scan. */
+ uint64_t new_logical;
+ uint64_t length_adjust;
+ if (si == 0)
+ new_logical = scan->scan_start;
+ else
+ {
+ /* We could return here if scan->scan_start == 0
+ but don't so as to minimize special cases. */
+ new_logical = last_ei->ext_logical + last_ei->ext_length;
+ }
+ length_adjust = new_logical - fm_extents[i].fe_logical;
+ /* If an extent is contained within the previous one, fail. */
+ if (length_adjust < fm_extents[i].fe_length)
+ {
+ if (scan->scan_start == 0)
+ scan->initial_scan_failed = true;
+ return false;
+ }
+ fm_extents[i].fe_logical = new_logical;
+ fm_extents[i].fe_length -= length_adjust;
+ /* Process the adjusted extent again. */
+ i--;
+ continue;
+ }
+ else
+ {
+ last_ei = scan->ext_info + si;
+ last_ei->ext_logical = fm_extents[i].fe_logical;
+ last_ei->ext_length = fm_extents[i].fe_length;
+ last_ei->ext_flags = fm_extents[i].fe_flags;
+ si++;
+ }
+ }
+
+ if (last_ei->ext_flags & FIEMAP_EXTENT_LAST)
+ scan->hit_final_extent = true;
+
+ /* If we have enough extents, discard the last as it might
+ be merged with one from the next scan. */
+ if (si > count && !scan->hit_final_extent)
+ last_ei = scan->ext_info + --si - 1;
+
+ /* We don't bother reallocating any trailing slots. */
+ scan->ei_count = si;
+
+ if (scan->hit_final_extent)
+ break;
+ else
+ scan->scan_start = last_ei->ext_logical + last_ei->ext_length;
+
+ if (si >= count)
+ break;
+ }
+
+ return true;
+}
+#else
+extern bool
+extent_scan_read (struct extent_scan *scan _GL_UNUSED)
+{
+ scan->initial_scan_failed = true;
+ errno = ENOTSUP;
+ return false;
+}
+#endif