From e6918187568dbd01842d8d1d2c808ce16a894239 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 21 Apr 2024 13:54:28 +0200 Subject: Adding upstream version 18.2.2. Signed-off-by: Daniel Baumann --- src/librbd/migration/QCOW.h | 466 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 466 insertions(+) create mode 100644 src/librbd/migration/QCOW.h (limited to 'src/librbd/migration/QCOW.h') diff --git a/src/librbd/migration/QCOW.h b/src/librbd/migration/QCOW.h new file mode 100644 index 000000000..23401e515 --- /dev/null +++ b/src/librbd/migration/QCOW.h @@ -0,0 +1,466 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +/* Based on QEMU block/qcow.cc and block/qcow2.h, which has this license: */ + +/* + * Block driver for the QCOW version 2 format + * + * Copyright (c) 2004-2006 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef CEPH_LIBRBD_MIGRATION_QCOW2_H +#define CEPH_LIBRBD_MIGRATION_QCOW2_H + +#include "include/ceph_assert.h" +#include "include/int_types.h" +#include "librbd/migration/QCOW.h" + +#define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb) + +#define QCOW_CRYPT_NONE 0 +#define QCOW_CRYPT_AES 1 +#define QCOW_CRYPT_LUKS 2 + +#define QCOW_MAX_CRYPT_CLUSTERS 32 +#define QCOW_MAX_SNAPSHOTS 65536 + +/* Field widths in qcow2 mean normal cluster offsets cannot reach + * 64PB; depending on cluster size, compressed clusters can have a + * smaller limit (64PB for up to 16k clusters, then ramps down to + * 512TB for 2M clusters). */ +#define QCOW_MAX_CLUSTER_OFFSET ((1ULL << 56) - 1) + +/* 8 MB refcount table is enough for 2 PB images at 64k cluster size + * (128 GB for 512 byte clusters, 2 EB for 2 MB clusters) */ +#define QCOW_MAX_REFTABLE_SIZE (1ULL << 23) + +/* 32 MB L1 table is enough for 2 PB images at 64k cluster size + * (128 GB for 512 byte clusters, 2 EB for 2 MB clusters) */ +#define QCOW_MAX_L1_SIZE (1ULL << 25) + +/* Allow for an average of 1k per snapshot table entry, should be plenty of + * space for snapshot names and IDs */ +#define QCOW_MAX_SNAPSHOTS_SIZE (1024 * QCOW_MAX_SNAPSHOTS) + +/* Maximum amount of extra data per snapshot table entry to accept */ +#define QCOW_MAX_SNAPSHOT_EXTRA_DATA 1024 + +/* Bitmap header extension constraints */ +#define QCOW2_MAX_BITMAPS 65535 +#define QCOW2_MAX_BITMAP_DIRECTORY_SIZE (1024 * QCOW2_MAX_BITMAPS) + +/* Maximum of parallel sub-request per guest request */ +#define QCOW2_MAX_WORKERS 8 + +/* indicate that the refcount of the referenced cluster is exactly one. */ +#define QCOW_OFLAG_COPIED (1ULL << 63) +/* indicate that the cluster is compressed (they never have the copied flag) */ +#define QCOW_OFLAG_COMPRESSED (1ULL << 62) +/* The cluster reads as all zeros */ +#define QCOW_OFLAG_ZERO (1ULL << 0) + +#define QCOW_EXTL2_SUBCLUSTERS_PER_CLUSTER 32 + +/* The subcluster X [0..31] is allocated */ +#define QCOW_OFLAG_SUB_ALLOC(X) (1ULL << (X)) +/* The subcluster X [0..31] reads as zeroes */ +#define QCOW_OFLAG_SUB_ZERO(X) (QCOW_OFLAG_SUB_ALLOC(X) << 32) +/* Subclusters [X, Y) (0 <= X <= Y <= 32) are allocated */ +#define QCOW_OFLAG_SUB_ALLOC_RANGE(X, Y) \ + (QCOW_OFLAG_SUB_ALLOC(Y) - QCOW_OFLAG_SUB_ALLOC(X)) +/* Subclusters [X, Y) (0 <= X <= Y <= 32) read as zeroes */ +#define QCOW_OFLAG_SUB_ZERO_RANGE(X, Y) \ + (QCOW_OFLAG_SUB_ALLOC_RANGE(X, Y) << 32) +/* L2 entry bitmap with all allocation bits set */ +#define QCOW_L2_BITMAP_ALL_ALLOC (QCOW_OFLAG_SUB_ALLOC_RANGE(0, 32)) +/* L2 entry bitmap with all "read as zeroes" bits set */ +#define QCOW_L2_BITMAP_ALL_ZEROES (QCOW_OFLAG_SUB_ZERO_RANGE(0, 32)) + +/* Size of normal and extended L2 entries */ +#define QCOW_L2E_SIZE_NORMAL (sizeof(uint64_t)) +#define QCOW_L2E_SIZE_EXTENDED (sizeof(uint64_t) * 2) + +/* Size of L1 table entries */ +#define QCOW_L1E_SIZE (sizeof(uint64_t)) + +/* Size of reftable entries */ +#define QCOW_REFTABLE_ENTRY_SIZE (sizeof(uint64_t)) + +#define QCOW_MIN_CLUSTER_BITS 9 +#define QCOW_MAX_CLUSTER_BITS 21 + +/* Defined in the qcow2 spec (compressed cluster descriptor) */ +#define QCOW2_COMPRESSED_SECTOR_SIZE 512U +#define QCOW2_COMPRESSED_SECTOR_MASK (~(QCOW2_COMPRESSED_SECTOR_SIZE - 1ULL)) + +#define QCOW_L2_CACHE_SIZE 16 + +/* Must be at least 2 to cover COW */ +#define QCOW_MIN_L2_CACHE_SIZE 2 /* cache entries */ + +/* Must be at least 4 to cover all cases of refcount table growth */ +#define QCOW_MIN_REFCOUNT_CACHE_SIZE 4 /* clusters */ + +#define QCOW_DEFAULT_L2_CACHE_MAX_SIZE (1ULL << 25) +#define QCOW_DEFAULT_CACHE_CLEAN_INTERVAL 600 /* seconds */ + +#define QCOW_DEFAULT_CLUSTER_SIZE 65536 + +#define QCOW2_OPT_DATA_FILE "data-file" +#define QCOW2_OPT_LAZY_REFCOUNTS "lazy-refcounts" +#define QCOW2_OPT_DISCARD_REQUEST "pass-discard-request" +#define QCOW2_OPT_DISCARD_SNAPSHOT "pass-discard-snapshot" +#define QCOW2_OPT_DISCARD_OTHER "pass-discard-other" +#define QCOW2_OPT_OVERLAP "overlap-check" +#define QCOW2_OPT_OVERLAP_TEMPLATE "overlap-check.template" +#define QCOW2_OPT_OVERLAP_MAIN_HEADER "overlap-check.main-header" +#define QCOW2_OPT_OVERLAP_ACTIVE_L1 "overlap-check.active-l1" +#define QCOW2_OPT_OVERLAP_ACTIVE_L2 "overlap-check.active-l2" +#define QCOW2_OPT_OVERLAP_REFCOUNT_TABLE "overlap-check.refcount-table" +#define QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK "overlap-check.refcount-block" +#define QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE "overlap-check.snapshot-table" +#define QCOW2_OPT_OVERLAP_INACTIVE_L1 "overlap-check.inactive-l1" +#define QCOW2_OPT_OVERLAP_INACTIVE_L2 "overlap-check.inactive-l2" +#define QCOW2_OPT_OVERLAP_BITMAP_DIRECTORY "overlap-check.bitmap-directory" +#define QCOW2_OPT_CACHE_SIZE "cache-size" +#define QCOW2_OPT_L2_CACHE_SIZE "l2-cache-size" +#define QCOW2_OPT_L2_CACHE_ENTRY_SIZE "l2-cache-entry-size" +#define QCOW2_OPT_REFCOUNT_CACHE_SIZE "refcount-cache-size" +#define QCOW2_OPT_CACHE_CLEAN_INTERVAL "cache-clean-interval" + +typedef struct QCowHeaderProbe { + uint32_t magic; + uint32_t version; +} __attribute__((__packed__)) QCowHeaderProbe; + +typedef struct QCowHeaderV1 +{ + uint32_t magic; + uint32_t version; + uint64_t backing_file_offset; + uint32_t backing_file_size; + uint32_t mtime; + uint64_t size; /* in bytes */ + uint8_t cluster_bits; + uint8_t l2_bits; + uint16_t padding; + uint32_t crypt_method; + uint64_t l1_table_offset; +} __attribute__((__packed__)) QCowHeaderV1; + +typedef struct QCowHeader { + uint32_t magic; + uint32_t version; + uint64_t backing_file_offset; + uint32_t backing_file_size; + uint32_t cluster_bits; + uint64_t size; /* in bytes */ + uint32_t crypt_method; + uint32_t l1_size; /* XXX: save number of clusters instead ? */ + uint64_t l1_table_offset; + uint64_t refcount_table_offset; + uint32_t refcount_table_clusters; + uint32_t nb_snapshots; + uint64_t snapshots_offset; + + /* The following fields are only valid for version >= 3 */ + uint64_t incompatible_features; + uint64_t compatible_features; + uint64_t autoclear_features; + + uint32_t refcount_order; + uint32_t header_length; + + /* Additional fields */ + uint8_t compression_type; + + /* header must be a multiple of 8 */ + uint8_t padding[7]; +} __attribute__((__packed__)) QCowHeader; + +typedef struct QCowSnapshotHeader { + /* header is 8 byte aligned */ + uint64_t l1_table_offset; + + uint32_t l1_size; + uint16_t id_str_size; + uint16_t name_size; + + uint32_t date_sec; + uint32_t date_nsec; + + uint64_t vm_clock_nsec; + + uint32_t vm_state_size; + uint32_t extra_data_size; /* for extension */ + /* extra data follows */ + /* id_str follows */ + /* name follows */ +} __attribute__((__packed__)) QCowSnapshotHeader; + +typedef struct QCowSnapshotExtraData { + uint64_t vm_state_size_large; + uint64_t disk_size; + uint64_t icount; +} __attribute__((__packed__)) QCowSnapshotExtraData; + + +typedef struct QCowSnapshot { + uint64_t l1_table_offset; + uint32_t l1_size; + char *id_str; + char *name; + uint64_t disk_size; + uint64_t vm_state_size; + uint32_t date_sec; + uint32_t date_nsec; + uint64_t vm_clock_nsec; + /* icount value for the moment when snapshot was taken */ + uint64_t icount; + /* Size of all extra data, including QCowSnapshotExtraData if available */ + uint32_t extra_data_size; + /* Data beyond QCowSnapshotExtraData, if any */ + void *unknown_extra_data; +} QCowSnapshot; + +typedef struct Qcow2CryptoHeaderExtension { + uint64_t offset; + uint64_t length; +} __attribute__((__packed__)) Qcow2CryptoHeaderExtension; + +typedef struct Qcow2UnknownHeaderExtension { + uint32_t magic; + uint32_t len; + uint8_t data[]; +} Qcow2UnknownHeaderExtension; + +enum { + QCOW2_FEAT_TYPE_INCOMPATIBLE = 0, + QCOW2_FEAT_TYPE_COMPATIBLE = 1, + QCOW2_FEAT_TYPE_AUTOCLEAR = 2, +}; + +/* Incompatible feature bits */ +enum { + QCOW2_INCOMPAT_DIRTY_BITNR = 0, + QCOW2_INCOMPAT_CORRUPT_BITNR = 1, + QCOW2_INCOMPAT_DATA_FILE_BITNR = 2, + QCOW2_INCOMPAT_COMPRESSION_BITNR = 3, + QCOW2_INCOMPAT_EXTL2_BITNR = 4, + QCOW2_INCOMPAT_DIRTY = 1 << QCOW2_INCOMPAT_DIRTY_BITNR, + QCOW2_INCOMPAT_CORRUPT = 1 << QCOW2_INCOMPAT_CORRUPT_BITNR, + QCOW2_INCOMPAT_DATA_FILE = 1 << QCOW2_INCOMPAT_DATA_FILE_BITNR, + QCOW2_INCOMPAT_COMPRESSION = 1 << QCOW2_INCOMPAT_COMPRESSION_BITNR, + QCOW2_INCOMPAT_EXTL2 = 1 << QCOW2_INCOMPAT_EXTL2_BITNR, + + QCOW2_INCOMPAT_MASK = QCOW2_INCOMPAT_DIRTY + | QCOW2_INCOMPAT_CORRUPT + | QCOW2_INCOMPAT_DATA_FILE + | QCOW2_INCOMPAT_COMPRESSION + | QCOW2_INCOMPAT_EXTL2, +}; + +/* Compatible feature bits */ +enum { + QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR = 0, + QCOW2_COMPAT_LAZY_REFCOUNTS = 1 << QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR, + + QCOW2_COMPAT_FEAT_MASK = QCOW2_COMPAT_LAZY_REFCOUNTS, +}; + +/* Autoclear feature bits */ +enum { + QCOW2_AUTOCLEAR_BITMAPS_BITNR = 0, + QCOW2_AUTOCLEAR_DATA_FILE_RAW_BITNR = 1, + QCOW2_AUTOCLEAR_BITMAPS = 1 << QCOW2_AUTOCLEAR_BITMAPS_BITNR, + QCOW2_AUTOCLEAR_DATA_FILE_RAW = 1 << QCOW2_AUTOCLEAR_DATA_FILE_RAW_BITNR, + + QCOW2_AUTOCLEAR_MASK = QCOW2_AUTOCLEAR_BITMAPS + | QCOW2_AUTOCLEAR_DATA_FILE_RAW, +}; + +enum qcow2_discard_type { + QCOW2_DISCARD_NEVER = 0, + QCOW2_DISCARD_ALWAYS, + QCOW2_DISCARD_REQUEST, + QCOW2_DISCARD_SNAPSHOT, + QCOW2_DISCARD_OTHER, + QCOW2_DISCARD_MAX +}; + +typedef struct Qcow2Feature { + uint8_t type; + uint8_t bit; + char name[46]; +} __attribute__((__packed__)) Qcow2Feature; + +typedef struct Qcow2DiscardRegion { + uint64_t offset; + uint64_t bytes; +} Qcow2DiscardRegion; + +typedef uint64_t Qcow2GetRefcountFunc(const void *refcount_array, + uint64_t index); +typedef void Qcow2SetRefcountFunc(void *refcount_array, + uint64_t index, uint64_t value); + +typedef struct Qcow2BitmapHeaderExt { + uint32_t nb_bitmaps; + uint32_t reserved32; + uint64_t bitmap_directory_size; + uint64_t bitmap_directory_offset; +} __attribute__((__packed__)) Qcow2BitmapHeaderExt; + +#define QCOW_RC_CACHE_SIZE QCOW_L2_CACHE_SIZE; + +typedef struct Qcow2COWRegion { + /** + * Offset of the COW region in bytes from the start of the first cluster + * touched by the request. + */ + unsigned offset; + + /** Number of bytes to copy */ + unsigned nb_bytes; +} Qcow2COWRegion; + +/** + * Describes an in-flight (part of a) write request that writes to clusters + * that are not referenced in their L2 table yet. + */ +typedef struct QCowL2Meta +{ + /** Guest offset of the first newly allocated cluster */ + uint64_t offset; + + /** Host offset of the first newly allocated cluster */ + uint64_t alloc_offset; + + /** Number of newly allocated clusters */ + int nb_clusters; + + /** Do not free the old clusters */ + bool keep_old_clusters; + + /** + * The COW Region between the start of the first allocated cluster and the + * area the guest actually writes to. + */ + Qcow2COWRegion cow_start; + + /** + * The COW Region between the area the guest actually writes to and the + * end of the last allocated cluster. + */ + Qcow2COWRegion cow_end; + + /* + * Indicates that COW regions are already handled and do not require + * any more processing. + */ + bool skip_cow; + + /** + * Indicates that this is not a normal write request but a preallocation. + * If the image has extended L2 entries this means that no new individual + * subclusters will be marked as allocated in the L2 bitmap (but any + * existing contents of that bitmap will be kept). + */ + bool prealloc; + + /** Pointer to next L2Meta of the same write request */ + struct QCowL2Meta *next; +} QCowL2Meta; + +typedef enum QCow2ClusterType { + QCOW2_CLUSTER_UNALLOCATED, + QCOW2_CLUSTER_ZERO_PLAIN, + QCOW2_CLUSTER_ZERO_ALLOC, + QCOW2_CLUSTER_NORMAL, + QCOW2_CLUSTER_COMPRESSED, +} QCow2ClusterType; + +typedef enum QCow2MetadataOverlap { + QCOW2_OL_MAIN_HEADER_BITNR = 0, + QCOW2_OL_ACTIVE_L1_BITNR = 1, + QCOW2_OL_ACTIVE_L2_BITNR = 2, + QCOW2_OL_REFCOUNT_TABLE_BITNR = 3, + QCOW2_OL_REFCOUNT_BLOCK_BITNR = 4, + QCOW2_OL_SNAPSHOT_TABLE_BITNR = 5, + QCOW2_OL_INACTIVE_L1_BITNR = 6, + QCOW2_OL_INACTIVE_L2_BITNR = 7, + QCOW2_OL_BITMAP_DIRECTORY_BITNR = 8, + + QCOW2_OL_MAX_BITNR = 9, + + QCOW2_OL_NONE = 0, + QCOW2_OL_MAIN_HEADER = (1 << QCOW2_OL_MAIN_HEADER_BITNR), + QCOW2_OL_ACTIVE_L1 = (1 << QCOW2_OL_ACTIVE_L1_BITNR), + QCOW2_OL_ACTIVE_L2 = (1 << QCOW2_OL_ACTIVE_L2_BITNR), + QCOW2_OL_REFCOUNT_TABLE = (1 << QCOW2_OL_REFCOUNT_TABLE_BITNR), + QCOW2_OL_REFCOUNT_BLOCK = (1 << QCOW2_OL_REFCOUNT_BLOCK_BITNR), + QCOW2_OL_SNAPSHOT_TABLE = (1 << QCOW2_OL_SNAPSHOT_TABLE_BITNR), + QCOW2_OL_INACTIVE_L1 = (1 << QCOW2_OL_INACTIVE_L1_BITNR), + /* NOTE: Checking overlaps with inactive L2 tables will result in bdrv + * reads. */ + QCOW2_OL_INACTIVE_L2 = (1 << QCOW2_OL_INACTIVE_L2_BITNR), + QCOW2_OL_BITMAP_DIRECTORY = (1 << QCOW2_OL_BITMAP_DIRECTORY_BITNR), +} QCow2MetadataOverlap; + +/* Perform all overlap checks which can be done in constant time */ +#define QCOW2_OL_CONSTANT \ + (QCOW2_OL_MAIN_HEADER | QCOW2_OL_ACTIVE_L1 | QCOW2_OL_REFCOUNT_TABLE | \ + QCOW2_OL_SNAPSHOT_TABLE | QCOW2_OL_BITMAP_DIRECTORY) + +/* Perform all overlap checks which don't require disk access */ +#define QCOW2_OL_CACHED \ + (QCOW2_OL_CONSTANT | QCOW2_OL_ACTIVE_L2 | QCOW2_OL_REFCOUNT_BLOCK | \ + QCOW2_OL_INACTIVE_L1) + +/* Perform all overlap checks */ +#define QCOW2_OL_ALL \ + (QCOW2_OL_CACHED | QCOW2_OL_INACTIVE_L2) + +#define QCOW_L1E_OFFSET_MASK 0x00fffffffffffe00ULL +#define QCOW_L2E_OFFSET_MASK 0x00fffffffffffe00ULL +#define QCOW_L2E_COMPRESSED_OFFSET_SIZE_MASK 0x3fffffffffffffffULL + +#define REFT_OFFSET_MASK 0xfffffffffffffe00ULL + +#define INV_OFFSET (-1ULL) + +static inline uint64_t l2meta_cow_start(QCowL2Meta *m) +{ + return m->offset + m->cow_start.offset; +} + +static inline uint64_t l2meta_cow_end(QCowL2Meta *m) +{ + return m->offset + m->cow_end.offset + m->cow_end.nb_bytes; +} + +static inline uint64_t refcount_diff(uint64_t r1, uint64_t r2) +{ + return r1 > r2 ? r1 - r2 : r2 - r1; +} + +#endif // CEPH_LIBRBD_MIGRATION_QCOW2_H -- cgit v1.2.3