summaryrefslogtreecommitdiffstats
path: root/restripe.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--restripe.c1038
1 files changed, 1038 insertions, 0 deletions
diff --git a/restripe.c b/restripe.c
new file mode 100644
index 0000000..a7a7229
--- /dev/null
+++ b/restripe.c
@@ -0,0 +1,1038 @@
+/*
+ * mdadm - manage Linux "md" devices aka RAID arrays.
+ *
+ * Copyright (C) 2006-2009 Neil Brown <neilb@suse.de>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Neil Brown
+ * Email: <neilb@suse.de>
+ */
+
+#include "mdadm.h"
+#include <stdint.h>
+
+/* To restripe, we read from old geometry to a buffer, and
+ * read from buffer to new geometry.
+ * When reading, we might have missing devices and so could need
+ * to reconstruct.
+ * When writing, we need to create correct parity and Q.
+ *
+ */
+
+int geo_map(int block, unsigned long long stripe, int raid_disks,
+ int level, int layout)
+{
+ /* On the given stripe, find which disk in the array will have
+ * block numbered 'block'.
+ * '-1' means the parity block.
+ * '-2' means the Q syndrome.
+ */
+ int pd;
+
+ /* layout is not relevant for raid0 and raid4 */
+ if ((level == 0) ||
+ (level == 4))
+ layout = 0;
+
+ switch(level*100 + layout) {
+ case 000:
+ case 400:
+ case 500 + ALGORITHM_PARITY_N:
+ /* raid 4 isn't messed around by parity blocks */
+ if (block == -1)
+ return raid_disks-1; /* parity block */
+ return block;
+ case 500 + ALGORITHM_LEFT_ASYMMETRIC:
+ pd = (raid_disks-1) - stripe % raid_disks;
+ if (block == -1)
+ return pd;
+ if (block >= pd)
+ block++;
+ return block;
+
+ case 500 + ALGORITHM_RIGHT_ASYMMETRIC:
+ pd = stripe % raid_disks;
+ if (block == -1)
+ return pd;
+ if (block >= pd)
+ block++;
+ return block;
+
+ case 500 + ALGORITHM_LEFT_SYMMETRIC:
+ pd = (raid_disks - 1) - stripe % raid_disks;
+ if (block == -1)
+ return pd;
+ return (pd + 1 + block) % raid_disks;
+
+ case 500 + ALGORITHM_RIGHT_SYMMETRIC:
+ pd = stripe % raid_disks;
+ if (block == -1)
+ return pd;
+ return (pd + 1 + block) % raid_disks;
+
+ case 500 + ALGORITHM_PARITY_0:
+ return block + 1;
+
+ case 600 + ALGORITHM_PARITY_N_6:
+ if (block == -2)
+ return raid_disks - 1;
+ if (block == -1)
+ return raid_disks - 2; /* parity block */
+ return block;
+ case 600 + ALGORITHM_LEFT_ASYMMETRIC_6:
+ if (block == -2)
+ return raid_disks - 1;
+ raid_disks--;
+ pd = (raid_disks-1) - stripe % raid_disks;
+ if (block == -1)
+ return pd;
+ if (block >= pd)
+ block++;
+ return block;
+
+ case 600 + ALGORITHM_RIGHT_ASYMMETRIC_6:
+ if (block == -2)
+ return raid_disks - 1;
+ raid_disks--;
+ pd = stripe % raid_disks;
+ if (block == -1)
+ return pd;
+ if (block >= pd)
+ block++;
+ return block;
+
+ case 600 + ALGORITHM_LEFT_SYMMETRIC_6:
+ if (block == -2)
+ return raid_disks - 1;
+ raid_disks--;
+ pd = (raid_disks - 1) - stripe % raid_disks;
+ if (block == -1)
+ return pd;
+ return (pd + 1 + block) % raid_disks;
+
+ case 600 + ALGORITHM_RIGHT_SYMMETRIC_6:
+ if (block == -2)
+ return raid_disks - 1;
+ raid_disks--;
+ pd = stripe % raid_disks;
+ if (block == -1)
+ return pd;
+ return (pd + 1 + block) % raid_disks;
+
+ case 600 + ALGORITHM_PARITY_0_6:
+ if (block == -2)
+ return raid_disks - 1;
+ return block + 1;
+
+ case 600 + ALGORITHM_PARITY_0:
+ if (block == -1)
+ return 0;
+ if (block == -2)
+ return 1;
+ return block + 2;
+
+ case 600 + ALGORITHM_LEFT_ASYMMETRIC:
+ pd = raid_disks - 1 - (stripe % raid_disks);
+ if (block == -1)
+ return pd;
+ if (block == -2)
+ return (pd+1) % raid_disks;
+ if (pd == raid_disks - 1)
+ return block+1;
+ if (block >= pd)
+ return block+2;
+ return block;
+
+ case 600 + ALGORITHM_ROTATING_ZERO_RESTART:
+ /* Different order for calculating Q, otherwize same as ... */
+ case 600 + ALGORITHM_RIGHT_ASYMMETRIC:
+ pd = stripe % raid_disks;
+ if (block == -1)
+ return pd;
+ if (block == -2)
+ return (pd+1) % raid_disks;
+ if (pd == raid_disks - 1)
+ return block+1;
+ if (block >= pd)
+ return block+2;
+ return block;
+
+ case 600 + ALGORITHM_LEFT_SYMMETRIC:
+ pd = raid_disks - 1 - (stripe % raid_disks);
+ if (block == -1)
+ return pd;
+ if (block == -2)
+ return (pd+1) % raid_disks;
+ return (pd + 2 + block) % raid_disks;
+
+ case 600 + ALGORITHM_RIGHT_SYMMETRIC:
+ pd = stripe % raid_disks;
+ if (block == -1)
+ return pd;
+ if (block == -2)
+ return (pd+1) % raid_disks;
+ return (pd + 2 + block) % raid_disks;
+
+ case 600 + ALGORITHM_ROTATING_N_RESTART:
+ /* Same a left_asymmetric, by first stripe is
+ * D D D P Q rather than
+ * Q D D D P
+ */
+ pd = raid_disks - 1 - ((stripe + 1) % raid_disks);
+ if (block == -1)
+ return pd;
+ if (block == -2)
+ return (pd+1) % raid_disks;
+ if (pd == raid_disks - 1)
+ return block+1;
+ if (block >= pd)
+ return block+2;
+ return block;
+
+ case 600 + ALGORITHM_ROTATING_N_CONTINUE:
+ /* Same as left_symmetric but Q is before P */
+ pd = raid_disks - 1 - (stripe % raid_disks);
+ if (block == -1)
+ return pd;
+ if (block == -2)
+ return (pd+raid_disks-1) % raid_disks;
+ return (pd + 1 + block) % raid_disks;
+ }
+ return -1;
+}
+
+int is_ddf(int layout)
+{
+ switch (layout)
+ {
+ default:
+ return 0;
+ case ALGORITHM_ROTATING_N_CONTINUE:
+ case ALGORITHM_ROTATING_N_RESTART:
+ case ALGORITHM_ROTATING_ZERO_RESTART:
+ return 1;
+ }
+}
+
+void xor_blocks(char *target, char **sources, int disks, int size)
+{
+ int i, j;
+ /* Amazingly inefficient... */
+ for (i=0; i<size; i++) {
+ char c = 0;
+ for (j=0 ; j<disks; j++)
+ c ^= sources[j][i];
+ target[i] = c;
+ }
+}
+
+void qsyndrome(uint8_t *p, uint8_t *q, uint8_t **sources, int disks, int size)
+{
+ int d, z;
+ uint8_t wq0, wp0, wd0, w10, w20;
+ for ( d = 0; d < size; d++) {
+ wq0 = wp0 = sources[disks-1][d];
+ for ( z = disks-2 ; z >= 0 ; z-- ) {
+ wd0 = sources[z][d];
+ wp0 ^= wd0;
+ w20 = (wq0&0x80) ? 0xff : 0x00;
+ w10 = (wq0 << 1) & 0xff;
+ w20 &= 0x1d;
+ w10 ^= w20;
+ wq0 = w10 ^ wd0;
+ }
+ p[d] = wp0;
+ q[d] = wq0;
+ }
+}
+
+/*
+ * The following was taken from linux/drivers/md/mktables.c, and modified
+ * to create in-memory tables rather than C code
+ */
+static uint8_t gfmul(uint8_t a, uint8_t b)
+{
+ uint8_t v = 0;
+
+ while (b) {
+ if (b & 1)
+ v ^= a;
+ a = (a << 1) ^ (a & 0x80 ? 0x1d : 0);
+ b >>= 1;
+ }
+
+ return v;
+}
+
+static uint8_t gfpow(uint8_t a, int b)
+{
+ uint8_t v = 1;
+
+ b %= 255;
+ if (b < 0)
+ b += 255;
+
+ while (b) {
+ if (b & 1)
+ v = gfmul(v, a);
+ a = gfmul(a, a);
+ b >>= 1;
+ }
+
+ return v;
+}
+
+int tables_ready = 0;
+uint8_t raid6_gfmul[256][256];
+uint8_t raid6_gfexp[256];
+uint8_t raid6_gfinv[256];
+uint8_t raid6_gfexi[256];
+uint8_t raid6_gflog[256];
+uint8_t raid6_gfilog[256];
+void make_tables(void)
+{
+ int i, j;
+ uint8_t v;
+ uint32_t b, log;
+
+ /* Compute multiplication table */
+ for (i = 0; i < 256; i++)
+ for (j = 0; j < 256; j++)
+ raid6_gfmul[i][j] = gfmul(i, j);
+
+ /* Compute power-of-2 table (exponent) */
+ v = 1;
+ for (i = 0; i < 256; i++) {
+ raid6_gfexp[i] = v;
+ v = gfmul(v, 2);
+ if (v == 1)
+ v = 0; /* For entry 255, not a real entry */
+ }
+
+ /* Compute inverse table x^-1 == x^254 */
+ for (i = 0; i < 256; i++)
+ raid6_gfinv[i] = gfpow(i, 254);
+
+ /* Compute inv(2^x + 1) (exponent-xor-inverse) table */
+ for (i = 0; i < 256; i ++)
+ raid6_gfexi[i] = raid6_gfinv[raid6_gfexp[i] ^ 1];
+
+ /* Compute log and inverse log */
+ /* Modified code from:
+ * https://web.eecs.utk.edu/~plank/plank/papers/CS-96-332.html
+ */
+ b = 1;
+ raid6_gflog[0] = 0;
+ raid6_gfilog[255] = 0;
+
+ for (log = 0; log < 255; log++) {
+ raid6_gflog[b] = (uint8_t) log;
+ raid6_gfilog[log] = (uint8_t) b;
+ b = b << 1;
+ if (b & 256) b = b ^ 0435;
+ }
+
+ tables_ready = 1;
+}
+
+uint8_t *zero;
+int zero_size;
+
+void ensure_zero_has_size(int chunk_size)
+{
+ if (zero == NULL || chunk_size > zero_size) {
+ if (zero)
+ free(zero);
+ zero = xcalloc(1, chunk_size);
+ zero_size = chunk_size;
+ }
+}
+
+/* Following was taken from linux/drivers/md/raid6recov.c */
+
+/* Recover two failed data blocks. */
+
+void raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
+ uint8_t **ptrs, int neg_offset)
+{
+ uint8_t *p, *q, *dp, *dq;
+ uint8_t px, qx, db;
+ const uint8_t *pbmul; /* P multiplier table for B data */
+ const uint8_t *qmul; /* Q multiplier table (for both) */
+
+ if (faila > failb) {
+ int t = faila;
+ faila = failb;
+ failb = t;
+ }
+
+ if (neg_offset) {
+ p = ptrs[-1];
+ q = ptrs[-2];
+ } else {
+ p = ptrs[disks-2];
+ q = ptrs[disks-1];
+ }
+
+ /* Compute syndrome with zero for the missing data pages
+ Use the dead data pages as temporary storage for
+ delta p and delta q */
+ dp = ptrs[faila];
+ ptrs[faila] = zero;
+ dq = ptrs[failb];
+ ptrs[failb] = zero;
+
+ qsyndrome(dp, dq, ptrs, disks-2, bytes);
+
+ /* Restore pointer table */
+ ptrs[faila] = dp;
+ ptrs[failb] = dq;
+
+ /* Now, pick the proper data tables */
+ pbmul = raid6_gfmul[raid6_gfexi[failb-faila]];
+ qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]];
+
+ /* Now do it... */
+ while ( bytes-- ) {
+ px = *p ^ *dp;
+ qx = qmul[*q ^ *dq];
+ *dq++ = db = pbmul[px] ^ qx; /* Reconstructed B */
+ *dp++ = db ^ px; /* Reconstructed A */
+ p++; q++;
+ }
+}
+
+/* Recover failure of one data block plus the P block */
+void raid6_datap_recov(int disks, size_t bytes, int faila, uint8_t **ptrs,
+ int neg_offset)
+{
+ uint8_t *p, *q, *dq;
+ const uint8_t *qmul; /* Q multiplier table */
+
+ if (neg_offset) {
+ p = ptrs[-1];
+ q = ptrs[-2];
+ } else {
+ p = ptrs[disks-2];
+ q = ptrs[disks-1];
+ }
+
+ /* Compute syndrome with zero for the missing data page
+ Use the dead data page as temporary storage for delta q */
+ dq = ptrs[faila];
+ ptrs[faila] = zero;
+
+ qsyndrome(p, dq, ptrs, disks-2, bytes);
+
+ /* Restore pointer table */
+ ptrs[faila] = dq;
+
+ /* Now, pick the proper data tables */
+ qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]]];
+
+ /* Now do it... */
+ while ( bytes-- ) {
+ *p++ ^= *dq = qmul[*q ^ *dq];
+ q++; dq++;
+ }
+}
+
+/* Try to find out if a specific disk has a problem */
+int raid6_check_disks(int data_disks, int start, int chunk_size,
+ int level, int layout, int diskP, int diskQ,
+ uint8_t *p, uint8_t *q, char **stripes)
+{
+ int i;
+ int data_id, diskD;
+ uint8_t Px, Qx;
+ int curr_broken_disk = -1;
+ int prev_broken_disk = -1;
+ int broken_status = 0;
+
+ for(i = 0; i < chunk_size; i++) {
+ Px = (uint8_t)stripes[diskP][i] ^ (uint8_t)p[i];
+ Qx = (uint8_t)stripes[diskQ][i] ^ (uint8_t)q[i];
+
+ if((Px != 0) && (Qx == 0))
+ curr_broken_disk = diskP;
+
+ if((Px == 0) && (Qx != 0))
+ curr_broken_disk = diskQ;
+
+ if((Px != 0) && (Qx != 0)) {
+ data_id = (raid6_gflog[Qx] - raid6_gflog[Px]);
+ if(data_id < 0) data_id += 255;
+ diskD = geo_map(data_id, start/chunk_size,
+ data_disks + 2, level, layout);
+ curr_broken_disk = diskD;
+ }
+
+ if((Px == 0) && (Qx == 0))
+ curr_broken_disk = prev_broken_disk;
+
+ if(curr_broken_disk >= data_disks + 2)
+ broken_status = 2;
+
+ switch(broken_status) {
+ case 0:
+ if(curr_broken_disk != -1) {
+ prev_broken_disk = curr_broken_disk;
+ broken_status = 1;
+ }
+ break;
+
+ case 1:
+ if(curr_broken_disk != prev_broken_disk)
+ broken_status = 2;
+ break;
+
+ case 2:
+ default:
+ curr_broken_disk = prev_broken_disk = -2;
+ break;
+ }
+ }
+
+ return curr_broken_disk;
+}
+
+/*******************************************************************************
+ * Function: save_stripes
+ * Description:
+ * Function reads data (only data without P and Q) from array and writes
+ * it to buf and opcjonaly to backup files
+ * Parameters:
+ * source : A list of 'fds' of the active disks.
+ * Some may be absent
+ * offsets : A list of offsets on disk belonging
+ * to the array [bytes]
+ * raid_disks : geometry: number of disks in the array
+ * chunk_size : geometry: chunk size [bytes]
+ * level : geometry: RAID level
+ * layout : geometry: layout
+ * nwrites : number of backup files
+ * dest : A list of 'fds' for mirrored targets
+ * (e.g. backup files). They are already seeked to right
+ * (write) location. If NULL, data will be wrote
+ * to the buf only
+ * start : start address of data to read (must be stripe-aligned)
+ * [bytes]
+ * length - : length of data to read (must be stripe-aligned)
+ * [bytes]
+ * buf : buffer for data. It is large enough to hold
+ * one stripe. It is stripe aligned
+ * Returns:
+ * 0 : success
+ * -1 : fail
+ ******************************************************************************/
+int save_stripes(int *source, unsigned long long *offsets,
+ int raid_disks, int chunk_size, int level, int layout,
+ int nwrites, int *dest,
+ unsigned long long start, unsigned long long length,
+ char *buf)
+{
+ int len;
+ int data_disks = raid_disks - (level == 0 ? 0 : level <=5 ? 1 : 2);
+ int disk;
+ int i;
+ unsigned long long length_test;
+
+ if (!tables_ready)
+ make_tables();
+ ensure_zero_has_size(chunk_size);
+
+ len = data_disks * chunk_size;
+ length_test = length / len;
+ length_test *= len;
+
+ if (length != length_test) {
+ dprintf("Error: save_stripes(): Data are not alligned. EXIT\n");
+ dprintf("\tArea for saving stripes (length) = %llu\n", length);
+ dprintf("\tWork step (len) = %i\n", len);
+ dprintf("\tExpected save area (length_test) = %llu\n",
+ length_test);
+ abort();
+ }
+
+ while (length > 0) {
+ int failed = 0;
+ int fdisk[3], fblock[3];
+ for (disk = 0; disk < raid_disks ; disk++) {
+ unsigned long long offset;
+ int dnum;
+
+ offset = (start/chunk_size/data_disks)*chunk_size;
+ dnum = geo_map(disk < data_disks ? disk : data_disks - disk - 1,
+ start/chunk_size/data_disks,
+ raid_disks, level, layout);
+ if (dnum < 0) abort();
+ if (source[dnum] < 0 ||
+ lseek64(source[dnum],
+ offsets[dnum] + offset, 0) < 0 ||
+ read(source[dnum], buf+disk * chunk_size,
+ chunk_size) != chunk_size) {
+ if (failed <= 2) {
+ fdisk[failed] = dnum;
+ fblock[failed] = disk;
+ failed++;
+ }
+ }
+ }
+ if (failed == 0 || fblock[0] >= data_disks)
+ /* all data disks are good */
+ ;
+ else if (failed == 1 || fblock[1] >= data_disks+1) {
+ /* one failed data disk and good parity */
+ char *bufs[data_disks];
+ for (i=0; i < data_disks; i++)
+ if (fblock[0] == i)
+ bufs[i] = buf + data_disks*chunk_size;
+ else
+ bufs[i] = buf + i*chunk_size;
+
+ xor_blocks(buf + fblock[0]*chunk_size,
+ bufs, data_disks, chunk_size);
+ } else if (failed > 2 || level != 6)
+ /* too much failure */
+ return -1;
+ else {
+ /* RAID6 computations needed. */
+ uint8_t *bufs[data_disks+4];
+ int qdisk;
+ int syndrome_disks;
+ disk = geo_map(-1, start/chunk_size/data_disks,
+ raid_disks, level, layout);
+ qdisk = geo_map(-2, start/chunk_size/data_disks,
+ raid_disks, level, layout);
+ if (is_ddf(layout)) {
+ /* q over 'raid_disks' blocks, in device order.
+ * 'p' and 'q' get to be all zero
+ */
+ for (i = 0; i < raid_disks; i++)
+ bufs[i] = zero;
+ for (i = 0; i < data_disks; i++) {
+ int dnum = geo_map(i,
+ start/chunk_size/data_disks,
+ raid_disks, level, layout);
+ int snum;
+ /* i is the logical block number, so is index to 'buf'.
+ * dnum is physical disk number
+ * and thus the syndrome number.
+ */
+ snum = dnum;
+ bufs[snum] = (uint8_t*)buf + chunk_size * i;
+ }
+ syndrome_disks = raid_disks;
+ } else {
+ /* for md, q is over 'data_disks' blocks,
+ * starting immediately after 'q'
+ * Note that for the '_6' variety, the p block
+ * makes a hole that we need to be careful of.
+ */
+ int j;
+ int snum = 0;
+ for (j = 0; j < raid_disks; j++) {
+ int dnum = (qdisk + 1 + j) % raid_disks;
+ if (dnum == disk || dnum == qdisk)
+ continue;
+ for (i = 0; i < data_disks; i++)
+ if (geo_map(i,
+ start/chunk_size/data_disks,
+ raid_disks, level, layout) == dnum)
+ break;
+ /* i is the logical block number, so is index to 'buf'.
+ * dnum is physical disk number
+ * snum is syndrome disk for which 0 is immediately after Q
+ */
+ bufs[snum] = (uint8_t*)buf + chunk_size * i;
+
+ if (fblock[0] == i)
+ fdisk[0] = snum;
+ if (fblock[1] == i)
+ fdisk[1] = snum;
+ snum++;
+ }
+
+ syndrome_disks = data_disks;
+ }
+
+ /* Place P and Q blocks at end of bufs */
+ bufs[syndrome_disks] = (uint8_t*)buf + chunk_size * data_disks;
+ bufs[syndrome_disks+1] = (uint8_t*)buf + chunk_size * (data_disks+1);
+
+ if (fblock[1] == data_disks)
+ /* One data failed, and parity failed */
+ raid6_datap_recov(syndrome_disks+2, chunk_size,
+ fdisk[0], bufs, 0);
+ else {
+ /* Two data blocks failed, P,Q OK */
+ raid6_2data_recov(syndrome_disks+2, chunk_size,
+ fdisk[0], fdisk[1], bufs, 0);
+ }
+ }
+ if (dest) {
+ for (i = 0; i < nwrites; i++)
+ if (write(dest[i], buf, len) != len)
+ return -1;
+ } else {
+ /* build next stripe in buffer */
+ buf += len;
+ }
+ length -= len;
+ start += len;
+ }
+ return 0;
+}
+
+/* Restore data:
+ * We are given:
+ * A list of 'fds' of the active disks. Some may be '-1' for not-available.
+ * A geometry: raid_disks, chunk_size, level, layout
+ * An 'fd' to read from. It is already seeked to the right (Read) location.
+ * A start and length.
+ * The length must be a multiple of the stripe size.
+ *
+ * We build a full stripe in memory and then write it out.
+ * We assume that there are enough working devices.
+ */
+int restore_stripes(int *dest, unsigned long long *offsets,
+ int raid_disks, int chunk_size, int level, int layout,
+ int source, unsigned long long read_offset,
+ unsigned long long start, unsigned long long length,
+ char *src_buf)
+{
+ char *stripe_buf;
+ char **stripes = xmalloc(raid_disks * sizeof(char*));
+ char **blocks = xmalloc(raid_disks * sizeof(char*));
+ int i;
+ int rv;
+
+ int data_disks = raid_disks - (level == 0 ? 0 : level <= 5 ? 1 : 2);
+
+ if (posix_memalign((void**)&stripe_buf, 4096, raid_disks * chunk_size))
+ stripe_buf = NULL;
+
+ if (zero == NULL || chunk_size > zero_size) {
+ if (zero)
+ free(zero);
+ zero = xcalloc(1, chunk_size);
+ zero_size = chunk_size;
+ }
+
+ if (stripe_buf == NULL || stripes == NULL || blocks == NULL ||
+ zero == NULL) {
+ rv = -2;
+ goto abort;
+ }
+ for (i = 0; i < raid_disks; i++)
+ stripes[i] = stripe_buf + i * chunk_size;
+ while (length > 0) {
+ unsigned int len = data_disks * chunk_size;
+ unsigned long long offset;
+ int disk, qdisk;
+ int syndrome_disks;
+ if (length < len) {
+ rv = -3;
+ goto abort;
+ }
+ for (i = 0; i < data_disks; i++) {
+ int disk = geo_map(i, start/chunk_size/data_disks,
+ raid_disks, level, layout);
+ if (src_buf == NULL) {
+ /* read from file */
+ if (lseek64(source, read_offset, 0) !=
+ (off64_t)read_offset) {
+ rv = -1;
+ goto abort;
+ }
+ if (read(source,
+ stripes[disk],
+ chunk_size) != chunk_size) {
+ rv = -1;
+ goto abort;
+ }
+ } else {
+ /* read from input buffer */
+ memcpy(stripes[disk],
+ src_buf + read_offset,
+ chunk_size);
+ }
+ read_offset += chunk_size;
+ }
+ /* We have the data, now do the parity */
+ offset = (start/chunk_size/data_disks) * chunk_size;
+ switch (level) {
+ case 4:
+ case 5:
+ disk = geo_map(-1, start/chunk_size/data_disks,
+ raid_disks, level, layout);
+ for (i = 0; i < data_disks; i++)
+ blocks[i] = stripes[(disk+1+i) % raid_disks];
+ xor_blocks(stripes[disk], blocks, data_disks, chunk_size);
+ break;
+ case 6:
+ disk = geo_map(-1, start/chunk_size/data_disks,
+ raid_disks, level, layout);
+ qdisk = geo_map(-2, start/chunk_size/data_disks,
+ raid_disks, level, layout);
+ if (is_ddf(layout)) {
+ /* q over 'raid_disks' blocks, in device order.
+ * 'p' and 'q' get to be all zero
+ */
+ for (i = 0; i < raid_disks; i++)
+ if (i == disk || i == qdisk)
+ blocks[i] = (char*)zero;
+ else
+ blocks[i] = stripes[i];
+ syndrome_disks = raid_disks;
+ } else {
+ /* for md, q is over 'data_disks' blocks,
+ * starting immediately after 'q'
+ */
+ for (i = 0; i < data_disks; i++)
+ blocks[i] = stripes[(qdisk+1+i) % raid_disks];
+
+ syndrome_disks = data_disks;
+ }
+ qsyndrome((uint8_t*)stripes[disk],
+ (uint8_t*)stripes[qdisk],
+ (uint8_t**)blocks,
+ syndrome_disks, chunk_size);
+ break;
+ }
+ for (i=0; i < raid_disks ; i++)
+ if (dest[i] >= 0) {
+ if (lseek64(dest[i],
+ offsets[i]+offset, 0) < 0) {
+ rv = -1;
+ goto abort;
+ }
+ if (write(dest[i], stripes[i],
+ chunk_size) != chunk_size) {
+ rv = -1;
+ goto abort;
+ }
+ }
+ length -= len;
+ start += len;
+ }
+ rv = 0;
+
+abort:
+ free(stripe_buf);
+ free(stripes);
+ free(blocks);
+ return rv;
+}
+
+#ifdef MAIN
+
+int test_stripes(int *source, unsigned long long *offsets,
+ int raid_disks, int chunk_size, int level, int layout,
+ unsigned long long start, unsigned long long length)
+{
+ /* ready the data and p (and q) blocks, and check we got them right */
+ char *stripe_buf = xmalloc(raid_disks * chunk_size);
+ char **stripes = xmalloc(raid_disks * sizeof(char*));
+ char **blocks = xmalloc(raid_disks * sizeof(char*));
+ uint8_t *p = xmalloc(chunk_size);
+ uint8_t *q = xmalloc(chunk_size);
+
+ int i;
+ int diskP, diskQ;
+ int data_disks = raid_disks - (level == 5 ? 1: 2);
+
+ if (!tables_ready)
+ make_tables();
+
+ for ( i = 0 ; i < raid_disks ; i++)
+ stripes[i] = stripe_buf + i * chunk_size;
+
+ while (length > 0) {
+ int disk;
+
+ for (i = 0 ; i < raid_disks ; i++) {
+ if ((lseek64(source[i], offsets[i]+start, 0) < 0) ||
+ (read(source[i], stripes[i], chunk_size) !=
+ chunk_size)) {
+ free(q);
+ free(p);
+ free(blocks);
+ free(stripes);
+ free(stripe_buf);
+ return -1;
+ }
+ }
+ for (i = 0 ; i < data_disks ; i++) {
+ int disk = geo_map(i, start/chunk_size, raid_disks,
+ level, layout);
+ blocks[i] = stripes[disk];
+ printf("%d->%d\n", i, disk);
+ }
+ switch(level) {
+ case 6:
+ qsyndrome(p, q, (uint8_t**)blocks, data_disks, chunk_size);
+ diskP = geo_map(-1, start/chunk_size, raid_disks,
+ level, layout);
+ if (memcmp(p, stripes[diskP], chunk_size) != 0) {
+ printf("P(%d) wrong at %llu\n", diskP,
+ start / chunk_size);
+ }
+ diskQ = geo_map(-2, start/chunk_size, raid_disks,
+ level, layout);
+ if (memcmp(q, stripes[diskQ], chunk_size) != 0) {
+ printf("Q(%d) wrong at %llu\n", diskQ,
+ start / chunk_size);
+ }
+ disk = raid6_check_disks(data_disks, start, chunk_size,
+ level, layout, diskP, diskQ,
+ p, q, stripes);
+ if(disk >= 0) {
+ printf("Possible failed disk: %d\n", disk);
+ }
+ if(disk == -2) {
+ printf("Failure detected, but disk unknown\n");
+ }
+ break;
+ }
+ length -= chunk_size;
+ start += chunk_size;
+ }
+ return 0;
+}
+
+unsigned long long getnum(char *str, char **err)
+{
+ char *e;
+ unsigned long long rv = strtoull(str, &e, 10);
+ if (e==str || *e) {
+ *err = str;
+ return 0;
+ }
+ return rv;
+}
+
+char const Name[] = "test_restripe";
+int main(int argc, char *argv[])
+{
+ /* save/restore file raid_disks chunk_size level layout start length devices...
+ */
+ int save;
+ int *fds;
+ char *file;
+ char *buf;
+ int storefd;
+ unsigned long long *offsets;
+ int raid_disks, chunk_size, level, layout;
+ unsigned long long start, length;
+ int i;
+
+ char *err = NULL;
+ if (argc < 10) {
+ fprintf(stderr, "Usage: test_stripe save/restore file raid_disks chunk_size level layout start length devices...\n");
+ exit(1);
+ }
+ if (strcmp(argv[1], "save")==0)
+ save = 1;
+ else if (strcmp(argv[1], "restore") == 0)
+ save = 0;
+ else if (strcmp(argv[1], "test") == 0)
+ save = 2;
+ else {
+ fprintf(stderr, "test_stripe: must give 'save' or 'restore'.\n");
+ exit(2);
+ }
+
+ file = argv[2];
+ raid_disks = getnum(argv[3], &err);
+ chunk_size = getnum(argv[4], &err);
+ level = getnum(argv[5], &err);
+ layout = getnum(argv[6], &err);
+ start = getnum(argv[7], &err);
+ length = getnum(argv[8], &err);
+ if (err) {
+ fprintf(stderr, "test_stripe: Bad number: %s\n", err);
+ exit(2);
+ }
+ if (argc != raid_disks + 9) {
+ fprintf(stderr, "test_stripe: wrong number of devices: want %d found %d\n",
+ raid_disks, argc-9);
+ exit(2);
+ }
+ fds = xmalloc(raid_disks * sizeof(*fds));
+ offsets = xcalloc(raid_disks, sizeof(*offsets));
+
+ storefd = open(file, O_RDWR);
+ if (storefd < 0) {
+ perror(file);
+ fprintf(stderr, "test_stripe: could not open %s.\n", file);
+ exit(3);
+ }
+ for (i=0; i<raid_disks; i++) {
+ char *p;
+ p = strchr(argv[9+i], ':');
+
+ if(p != NULL) {
+ *p++ = '\0';
+ offsets[i] = atoll(p) * 512;
+ }
+
+ fds[i] = open(argv[9+i], O_RDWR);
+ if (fds[i] < 0) {
+ perror(argv[9+i]);
+ fprintf(stderr,"test_stripe: cannot open %s.\n", argv[9+i]);
+ exit(3);
+ }
+ }
+
+ buf = xmalloc(raid_disks * chunk_size);
+
+ if (save == 1) {
+ int rv = save_stripes(fds, offsets,
+ raid_disks, chunk_size, level, layout,
+ 1, &storefd,
+ start, length, buf);
+ if (rv != 0) {
+ fprintf(stderr,
+ "test_stripe: save_stripes returned %d\n", rv);
+ exit(1);
+ }
+ } else if (save == 2) {
+ int rv = test_stripes(fds, offsets,
+ raid_disks, chunk_size, level, layout,
+ start, length);
+ if (rv != 0) {
+ fprintf(stderr,
+ "test_stripe: test_stripes returned %d\n", rv);
+ exit(1);
+ }
+ } else {
+ int rv = restore_stripes(fds, offsets,
+ raid_disks, chunk_size, level, layout,
+ storefd, 0ULL,
+ start, length, NULL);
+ if (rv != 0) {
+ fprintf(stderr,
+ "test_stripe: restore_stripes returned %d\n",
+ rv);
+ exit(1);
+ }
+ }
+ exit(0);
+}
+
+#endif /* MAIN */