summaryrefslogtreecommitdiffstats
path: root/storage/maria/ma_control_file.c
blob: 65b8b0922aa924faa16ba40a1f44164bb77eadbf (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
/* Copyright (C) 2007 MySQL AB & Guilhem Bichot & Michael Widenius

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; version 2 of the License.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */

/*
  WL#3234 Maria control file
  First version written by Guilhem Bichot on 2006-04-27.
*/

#ifndef EXTRACT_DEFINITIONS
#include "maria_def.h"
#include "ma_checkpoint.h"
#endif

/*
  A control file contains the following objects:

Start of create time variables (at start of file):
  - Magic string (including version number of Maria control file)
  - Uuid
  - Size of create time part
  - Size of dynamic part
  - Maria block size
.....  Here we can add new variables without changing format
  - Checksum of create time part (last of block)

Start of changeable part:
  - Checksum of changeable part
  - LSN of last checkpoint
  - Number of last log file
  - Max trid in control file (since Maria 1.5 May 2008)
  - Number of consecutive recovery failures (since Maria 1.5 May 2008)
.....  Here we can add new variables without changing format

The idea is that one can add new variables to the control file and still
use it with old program versions. If one needs to do an incompatible change
one should increment the control file version number.
*/

/* Total size should be < sector size for atomic write operation */
#define CF_MAX_SIZE 512
#define CF_MIN_SIZE (CF_BLOCKSIZE_OFFSET + CF_BLOCKSIZE_SIZE + \
                     CF_CHECKSUM_SIZE * 2 + CF_LSN_SIZE + CF_FILENO_SIZE)

/* Create time variables */
#define CF_MAGIC_STRING "\xfe\xfe\xc"
#define CF_MAGIC_STRING_OFFSET 0
#define CF_MAGIC_STRING_SIZE   (sizeof(CF_MAGIC_STRING)-1)
#define CF_VERSION_OFFSET      (CF_MAGIC_STRING_OFFSET + CF_MAGIC_STRING_SIZE)
#define CF_VERSION_SIZE        1
#define CF_UUID_OFFSET         (CF_VERSION_OFFSET + CF_VERSION_SIZE)
#define CF_UUID_SIZE           MY_UUID_SIZE
#define CF_CREATE_TIME_SIZE_OFFSET  (CF_UUID_OFFSET + CF_UUID_SIZE)
#define CF_SIZE_SIZE           2
#define CF_CHANGEABLE_SIZE_OFFSET   (CF_CREATE_TIME_SIZE_OFFSET + CF_SIZE_SIZE)
#define CF_BLOCKSIZE_OFFSET    (CF_CHANGEABLE_SIZE_OFFSET + CF_SIZE_SIZE)
#define CF_BLOCKSIZE_SIZE      2

#define CF_CREATE_TIME_TOTAL_SIZE (CF_BLOCKSIZE_OFFSET + CF_BLOCKSIZE_SIZE + \
                                   CF_CHECKSUM_SIZE)

/*
  Start of the part that changes during execution
  This is stored at offset uint2korr(file[CF_CHANGEABLE_SIZE])
*/
#define CF_CHECKSUM_OFFSET 0
#define CF_CHECKSUM_SIZE 4
#define CF_LSN_OFFSET (CF_CHECKSUM_OFFSET + CF_CHECKSUM_SIZE)
#define CF_LSN_SIZE LSN_STORE_SIZE
#define CF_FILENO_OFFSET (CF_LSN_OFFSET + CF_LSN_SIZE)
#define CF_FILENO_SIZE 4
#define CF_MAX_TRID_OFFSET (CF_FILENO_OFFSET + CF_FILENO_SIZE)
#define CF_MAX_TRID_SIZE TRANSID_SIZE
#define CF_RECOV_FAIL_OFFSET (CF_MAX_TRID_OFFSET + CF_MAX_TRID_SIZE)
#define CF_RECOV_FAIL_SIZE 1
#define CF_CHANGEABLE_TOTAL_SIZE (CF_RECOV_FAIL_OFFSET + CF_RECOV_FAIL_SIZE)

/*
  The following values should not be changed, except when changing version
  number of the maria control file. These are the minimum sizes of the
  parts the code can handle.
*/

#define CF_MIN_CREATE_TIME_TOTAL_SIZE \
(CF_BLOCKSIZE_OFFSET + CF_BLOCKSIZE_SIZE + CF_CHECKSUM_SIZE)
#define CF_MIN_CHANGEABLE_TOTAL_SIZE \
(CF_FILENO_OFFSET + CF_FILENO_SIZE)

#ifndef EXTRACT_DEFINITIONS

/* This module owns these two vars. */
/**
   This LSN serves for the two-checkpoint rule, and also to find the
   checkpoint record when doing a recovery.
*/
LSN    last_checkpoint_lsn= LSN_IMPOSSIBLE;
uint32 last_logno=          FILENO_IMPOSSIBLE;
/**
   The maximum transaction id given to a transaction. It is only updated at
   clean shutdown (in case of crash, logs have better information).
*/
TrID   max_trid_in_control_file= 0;

/**
  Number of consecutive log or recovery failures. Reset to 0 after recovery's
  success.
*/
uint8 recovery_failures= 0;

/**
   @brief If log's lock should be asserted when writing to control file.

   Can be re-used by any function which needs to be thread-safe except when
   it is called at startup.
*/
my_bool maria_multi_threaded= FALSE;
/** @brief if currently doing a recovery */
my_bool maria_in_recovery= FALSE;

/**
  Control file is less then  512 bytes (a disk sector),
  to be as atomic as possible
*/
static int control_file_fd= -1;

static uint cf_create_time_size;
static uint cf_changeable_size;

/**
   @brief Create Maria control file
*/

static CONTROL_FILE_ERROR create_control_file(const char *name,
                                              int open_flags)
{
  uint32 sum;
  uchar buffer[CF_CREATE_TIME_TOTAL_SIZE];
  ulong rnd1,rnd2;

  DBUG_ENTER("maria_create_control_file");

  if ((control_file_fd= mysql_file_create(key_file_control, name, 0,
                                  open_flags, MYF(MY_SYNC_DIR | MY_WME))) < 0)
    DBUG_RETURN(CONTROL_FILE_UNKNOWN_ERROR);

  /* Reset variables, as we are creating the file */
  cf_create_time_size= CF_CREATE_TIME_TOTAL_SIZE;
  cf_changeable_size=  CF_CHANGEABLE_TOTAL_SIZE;

  /* Create unique uuid for the control file */
  my_random_bytes((uchar *)&rnd1, sizeof (rnd1));
  my_random_bytes((uchar *)&rnd2, sizeof (rnd2));
  my_uuid_init(rnd1, rnd2);
  my_uuid(maria_uuid);

  /* Prepare and write the file header */
  memcpy(buffer, CF_MAGIC_STRING, CF_MAGIC_STRING_SIZE);
  buffer[CF_VERSION_OFFSET]= CONTROL_FILE_VERSION;
  memcpy(buffer + CF_UUID_OFFSET, maria_uuid, CF_UUID_SIZE);
  int2store(buffer + CF_CREATE_TIME_SIZE_OFFSET, cf_create_time_size);
  int2store(buffer + CF_CHANGEABLE_SIZE_OFFSET, cf_changeable_size);

  /* Write create time variables */
  int2store(buffer + CF_BLOCKSIZE_OFFSET, maria_block_size);

  /* Store checksum for create time parts */
  sum= (uint32) my_checksum(0, buffer, cf_create_time_size -
                            CF_CHECKSUM_SIZE);
  int4store(buffer + cf_create_time_size - CF_CHECKSUM_SIZE, sum);

  if (my_pwrite(control_file_fd, buffer, cf_create_time_size,
                0, MYF(MY_FNABP |  MY_WME)))
    DBUG_RETURN(CONTROL_FILE_UNKNOWN_ERROR);

  /*
    To be safer we should make sure that there are no logs or data/index
    files around (indeed it could be that the control file alone was deleted
    or not restored, and we should not go on with life at this point).

    Things should still be relatively safe as if someone tries to use
    an old table with a new control file the different uuid:s between
    the files will cause ma_open() to generate an HA_ERR_OLD_FILE
    error. When used from mysqld this will cause the table to be open
    in repair mode which will remove all dependencies between the
    table and the old control file.

    We could have a tool which can rebuild the control file, by reading the
    directory of logs, finding the newest log, reading it to find last
    checkpoint... Slow but can save your db. For this to be possible, we
    must always write to the control file right after writing the checkpoint
    log record, and do nothing in between (i.e. the checkpoint must be
    usable as soon as it has been written to the log).
  */

  /* init the file with these "undefined" values */
  DBUG_RETURN(ma_control_file_write_and_force(LSN_IMPOSSIBLE,
                                              FILENO_IMPOSSIBLE, 0, 0));
}


/**
  Locks control file exclusively. This is kept for the duration of the engine
  process, to prevent another Maria instance to write to our logs or control
  file.
*/

static int lock_control_file(const char *name, my_bool do_retry)
{
  /*
    On Windows, my_lock() uses locking() which is mandatory locking and so
    prevents maria-recovery.test from copying the control file. And in case of
    crash, it may take a while for Windows to unlock file, causing downtime.
  */
  /**
    @todo BUG We should explore my_sopen(_SH_DENYWRD) to open or create the
    file under Windows.
  */
#ifndef __WIN__
  uint retry= 0;
  uint retry_count= do_retry ? MARIA_MAX_CONTROL_FILE_LOCK_RETRY : 0;

  /*
    We can't here use the automatic wait in my_lock() as the alarm thread
    may not yet exists.
  */
  while (my_lock(control_file_fd, F_WRLCK, 0L, F_TO_EOF,
                 MYF(MY_SEEK_NOT_DONE | MY_FORCE_LOCK | MY_NO_WAIT)))
  {
    if (retry == 0)
      my_printf_error(HA_ERR_INITIALIZATION,
                      "Can't lock aria control file '%s' for exclusive use, "
                      "error: %d. Will retry for %d seconds", 0,
                      name, my_errno, retry_count);
    if (++retry > retry_count)
      return 1;
    sleep(1);
  }
#endif
  return 0;
}


/*
  @brief Initialize control file subsystem

  Looks for the control file. If none and creation is requested, creates file.
  If present, reads it to find out last checkpoint's LSN and last log, updates
  the last_checkpoint_lsn and last_logno global variables.
  Called at engine's start.

  @note
    The format of the control file is defined in the comments and defines
    at the start of this file.

  @param create_if_missing create file if not found

  @return Operation status
    @retval 0      OK
    @retval 1      Error (in which case the file is left closed)
*/

CONTROL_FILE_ERROR ma_control_file_open(my_bool create_if_missing,
                                        my_bool print_error,
                                        my_bool wait_for_lock)
{
  uchar buffer[CF_MAX_SIZE];
  char name[FN_REFLEN], errmsg_buff[256];
  const char *errmsg, *lock_failed_errmsg= "Could not get an exclusive lock;"
    " file is probably in use by another process";
  uint new_cf_create_time_size, new_cf_changeable_size, new_block_size;
  my_off_t file_size;
  int open_flags= O_BINARY | /*O_DIRECT |*/ O_RDWR | O_CLOEXEC;
  int error= CONTROL_FILE_UNKNOWN_ERROR;
  DBUG_ENTER("ma_control_file_open");

  /*
    If you change sizes in the #defines, you at least have to change the
    "*store" and "*korr" calls in this file, and can even create backward
    compatibility problems. Beware!
  */
  DBUG_ASSERT(CF_LSN_SIZE == (3+4));
  DBUG_ASSERT(CF_FILENO_SIZE == 4);

  if (control_file_fd >= 0) /* already open */
    DBUG_RETURN(0);

  if (fn_format(name, CONTROL_FILE_BASE_NAME,
                maria_data_root, "", MYF(MY_WME)) == NullS)
    DBUG_RETURN(CONTROL_FILE_UNKNOWN_ERROR);

  if (my_access(name,F_OK))
  {
    CONTROL_FILE_ERROR create_error;
    if (!create_if_missing)
    {
      error= CONTROL_FILE_MISSING;
      errmsg= "Can't find file";
      goto err;
    }
    if ((create_error= create_control_file(name, open_flags)))
    {
      error= create_error;
      errmsg= "Can't create file";
      goto err;
    }
    if (lock_control_file(name, wait_for_lock))
    {
      error= CONTROL_FILE_LOCKED;
      errmsg= lock_failed_errmsg;
      goto err;
    }
    goto ok;
  }

  /* Otherwise, file exists */
  if ((control_file_fd= mysql_file_open(key_file_control, name,
                                        open_flags, MYF(MY_WME))) < 0)
  {
    errmsg= "Can't open file";
    goto err;
  }

  /* lock it before reading content */
  if (lock_control_file(name, wait_for_lock))
  {
    error= CONTROL_FILE_LOCKED;
    errmsg= lock_failed_errmsg;
    goto err;
  }

  file_size= mysql_file_seek(control_file_fd, 0, SEEK_END, MYF(MY_WME));
  if (file_size == MY_FILEPOS_ERROR)
  {
    errmsg= "Can't read size";
    goto err;
  }
  if (file_size < CF_MIN_SIZE)
  {
    /*
      Given that normally we write only a sector and it's atomic, the only
      possibility for a file to be of too short size is if we crashed at the
      very first startup, between file creation and file write. Quite unlikely
      (and can be made even more unlikely by doing this: create a temp file,
      write it, and then rename it to be the control file).
      What's more likely is if someone forgot to restore the control file,
      just did a "touch control" to try to get Maria to start, or if the
      disk/filesystem has a problem.
      So let's be rigid.
    */
    error= CONTROL_FILE_TOO_SMALL;
    errmsg= "Size of control file is smaller than expected";
    goto err;
  }

  /* Check if control file is unexpectedly big */
  if (file_size > CF_MAX_SIZE)
  {
    error= CONTROL_FILE_TOO_BIG;
    errmsg= "File size bigger than expected";
    goto err;
  }

  if (mysql_file_pread(control_file_fd, buffer, (size_t)file_size, 0, MYF(MY_FNABP)))
  {
    errmsg= "Can't read file";
    goto err;
  }

  if (memcmp(buffer + CF_MAGIC_STRING_OFFSET,
             CF_MAGIC_STRING, CF_MAGIC_STRING_SIZE))
  {
    error= CONTROL_FILE_BAD_MAGIC_STRING;
    errmsg= "Missing valid id at start of file. File is not a valid aria control file";
    goto err;
  }

  if (buffer[CF_VERSION_OFFSET] > CONTROL_FILE_VERSION)
  {
    error= CONTROL_FILE_BAD_VERSION;
    sprintf(errmsg_buff, "File is from a future aria system: %d. Current version is: %d",
            (int) buffer[CF_VERSION_OFFSET], CONTROL_FILE_VERSION);
    errmsg= errmsg_buff;
    goto err;
  }

  new_cf_create_time_size= uint2korr(buffer + CF_CREATE_TIME_SIZE_OFFSET);
  new_cf_changeable_size=  uint2korr(buffer + CF_CHANGEABLE_SIZE_OFFSET);

  if (new_cf_create_time_size < CF_MIN_CREATE_TIME_TOTAL_SIZE ||
      new_cf_changeable_size <  CF_MIN_CHANGEABLE_TOTAL_SIZE ||
      new_cf_create_time_size + new_cf_changeable_size != file_size)
  {
    error= CONTROL_FILE_INCONSISTENT_INFORMATION;
    errmsg= "Sizes stored in control file are inconsistent";
    goto err;
  }

  new_block_size= uint2korr(buffer + CF_BLOCKSIZE_OFFSET);
  if (new_block_size != maria_block_size && maria_block_size)
  {
    error= CONTROL_FILE_WRONG_BLOCKSIZE;
    sprintf(errmsg_buff,
            "Block size in control file (%u) is different than given aria_block_size: %u",
            new_block_size, (uint) maria_block_size);
    errmsg= errmsg_buff;
    goto err;
  }
  maria_block_size= new_block_size;

  if (my_checksum(0, buffer, new_cf_create_time_size - CF_CHECKSUM_SIZE) !=
      uint4korr(buffer + new_cf_create_time_size - CF_CHECKSUM_SIZE))
  {
    error= CONTROL_FILE_BAD_HEAD_CHECKSUM;
    errmsg= "Fixed part checksum mismatch";
    goto err;
  }

  if (my_checksum(0, buffer + new_cf_create_time_size + CF_CHECKSUM_SIZE,
                  new_cf_changeable_size - CF_CHECKSUM_SIZE) !=
      uint4korr(buffer + new_cf_create_time_size))
  {
    error= CONTROL_FILE_BAD_CHECKSUM;
    errmsg= "Changeable part (end of control file) checksum mismatch";
    goto err;
  }

  memcpy(maria_uuid, buffer + CF_UUID_OFFSET, CF_UUID_SIZE);
  cf_create_time_size= new_cf_create_time_size;
  cf_changeable_size=  new_cf_changeable_size;
  last_checkpoint_lsn= lsn_korr(buffer + new_cf_create_time_size +
                                CF_LSN_OFFSET);
  last_logno= uint4korr(buffer + new_cf_create_time_size + CF_FILENO_OFFSET);
  if (new_cf_changeable_size >= (CF_MAX_TRID_OFFSET + CF_MAX_TRID_SIZE))
    max_trid_in_control_file=
      transid_korr(buffer + new_cf_create_time_size + CF_MAX_TRID_OFFSET);
  if (new_cf_changeable_size >= (CF_RECOV_FAIL_OFFSET + CF_RECOV_FAIL_SIZE))
    recovery_failures=
      (buffer + new_cf_create_time_size + CF_RECOV_FAIL_OFFSET)[0];

ok:
  DBUG_RETURN(0);

err:
  if (print_error)
    my_printf_error(HA_ERR_INITIALIZATION,
                    "Got error '%s' when trying to use aria control file "
                    "'%s'", 0, errmsg, name);
  ma_control_file_end(); /* will unlock file if needed */
  DBUG_RETURN(error);
}


/*
  Write information durably to the control file; stores this information into
  the last_checkpoint_lsn, last_logno, max_trid_in_control_file,
  recovery_failures global variables.
  Called when we have created a new log (after syncing this log's creation),
  when we have written a checkpoint (after syncing this log record), at
  shutdown (for storing trid in case logs are soon removed by user), and
  before and after recovery (to store recovery_failures).
  Variables last_checkpoint_lsn and last_logno must be protected by caller
  using log's lock, unless this function is called at startup.

  SYNOPSIS
    ma_control_file_write_and_force()
    last_checkpoint_lsn_arg LSN of last checkpoint
    last_logno_arg          last log file number
    max_trid_arg            maximum transaction longid
    recovery_failures_arg   consecutive recovery failures

  NOTE
    We always want to do one single my_pwrite() here to be as atomic as
    possible.

  RETURN
    0 - OK
    1 - Error
*/

int ma_control_file_write_and_force(LSN last_checkpoint_lsn_arg,
                                    uint32 last_logno_arg,
                                    TrID max_trid_arg,
                                    uint8 recovery_failures_arg)
{
  uchar buffer[CF_MAX_SIZE];
  uint32 sum;
  my_bool no_need_sync;
  DBUG_ENTER("ma_control_file_write_and_force");

  /*
    We don't need to sync if this is just an increase of
    recovery_failures: it's even good if that counter is not increased on disk
    in case of power or hardware failure (less false positives when removing
    logs).
  */
  no_need_sync= ((last_checkpoint_lsn == last_checkpoint_lsn_arg) &&
                 (last_logno == last_logno_arg) &&
                 (max_trid_in_control_file == max_trid_arg) &&
                 (recovery_failures_arg > 0));

  if (control_file_fd < 0)
    DBUG_RETURN(1);

#ifndef DBUG_OFF
  if (maria_multi_threaded)
    translog_lock_handler_assert_owner();
#endif

  lsn_store(buffer + CF_LSN_OFFSET, last_checkpoint_lsn_arg);
  int4store(buffer + CF_FILENO_OFFSET, last_logno_arg);
  transid_store(buffer + CF_MAX_TRID_OFFSET, max_trid_arg);
  (buffer + CF_RECOV_FAIL_OFFSET)[0]= recovery_failures_arg;

  if (cf_changeable_size > CF_CHANGEABLE_TOTAL_SIZE)
  {
    /*
      More room than needed for us. Must be a newer version. Clear part which
      we cannot maintain, so that any future version notices we didn't
      maintain its extra data.
    */
    uint zeroed= cf_changeable_size - CF_CHANGEABLE_TOTAL_SIZE;
    char msg[150];
    bzero(buffer + CF_CHANGEABLE_TOTAL_SIZE, zeroed);
    my_snprintf(msg, sizeof(msg),
                "Control file must be from a newer version; zero-ing out %u"
                " unknown bytes in control file at offset %u", zeroed,
                cf_changeable_size + cf_create_time_size);
    ma_message_no_user(ME_WARNING, msg);
  }
  else
  {
    /* not enough room for what we need to store: enlarge */
    cf_changeable_size= CF_CHANGEABLE_TOTAL_SIZE;
  }
  /* Note that the create-time portion is not touched */

  /* Checksum is stored first */
  compile_time_assert(CF_CHECKSUM_OFFSET == 0);
  sum= my_checksum(0, buffer + CF_CHECKSUM_SIZE,
                   cf_changeable_size - CF_CHECKSUM_SIZE);
  int4store(buffer, sum);

  if (my_pwrite(control_file_fd, buffer, cf_changeable_size,
                cf_create_time_size, MYF(MY_FNABP |  MY_WME)) ||
      (!no_need_sync && mysql_file_sync(control_file_fd, MYF(MY_WME))))
    DBUG_RETURN(1);

  last_checkpoint_lsn= last_checkpoint_lsn_arg;
  last_logno= last_logno_arg;
  max_trid_in_control_file= max_trid_arg;
  recovery_failures= recovery_failures_arg;

  cf_changeable_size= CF_CHANGEABLE_TOTAL_SIZE; /* no more warning */
  DBUG_RETURN(0);
}


/*
  Free resources taken by control file subsystem

  SYNOPSIS
    ma_control_file_end()
*/

int ma_control_file_end(void)
{
  int close_error;
  DBUG_ENTER("ma_control_file_end");

  if (control_file_fd < 0) /* already closed */
    DBUG_RETURN(0);

#ifndef __WIN__
  (void) my_lock(control_file_fd, F_UNLCK, 0L, F_TO_EOF,
                 MYF(MY_SEEK_NOT_DONE | MY_FORCE_LOCK));
#endif

  close_error= mysql_file_close(control_file_fd, MYF(MY_WME));
  /*
    As mysql_file_close() frees structures even if close() fails, we do the
    same, i.e. we mark the file as closed in all cases.
  */
  control_file_fd= -1;
  /*
    As this module owns these variables, closing the module forbids access to
    them (just a safety):
  */
  last_checkpoint_lsn= LSN_IMPOSSIBLE;
  last_logno= FILENO_IMPOSSIBLE;
  max_trid_in_control_file= recovery_failures= 0;

  DBUG_RETURN(close_error);
}


/**
  Tells if control file is initialized.
*/

my_bool ma_control_file_inited(void)
{
  return (control_file_fd >= 0);
}

/**
   Print content of aria_log_control file
*/

my_bool print_aria_log_control()
{
  uchar buffer[CF_MAX_SIZE];
  char name[FN_REFLEN], uuid_str[MY_UUID_STRING_LENGTH+1];
  const char *errmsg;
  uint new_cf_create_time_size, new_cf_changeable_size;
  my_off_t file_size;
  ulong logno;
  ulonglong trid,checkpoint_lsn;
  int open_flags= O_BINARY | /*O_DIRECT |*/ O_RDWR | O_CLOEXEC;
  int error= CONTROL_FILE_UNKNOWN_ERROR;
  uint recovery_fails;
  File file;
  DBUG_ENTER("ma_control_file_open");

  if (fn_format(name, CONTROL_FILE_BASE_NAME,
                maria_data_root, "", MYF(MY_WME)) == NullS)
    DBUG_RETURN(CONTROL_FILE_UNKNOWN_ERROR);

  if ((file= mysql_file_open(key_file_control, name,
                             open_flags, MYF(MY_WME))) < 0)
  {
    errmsg= "Can't open file";
    goto err;
  }

  file_size= mysql_file_seek(file, 0, SEEK_END, MYF(MY_WME));
  if (file_size == MY_FILEPOS_ERROR)
  {
    errmsg= "Can't read size";
    goto err;
  }
  if (file_size < CF_MIN_SIZE)
  {
    /*
      Given that normally we write only a sector and it's atomic, the only
      possibility for a file to be of too short size is if we crashed at the
      very first startup, between file creation and file write. Quite unlikely
      (and can be made even more unlikely by doing this: create a temp file,
      write it, and then rename it to be the control file).
      What's more likely is if someone forgot to restore the control file,
      just did a "touch control" to try to get Maria to start, or if the
      disk/filesystem has a problem.
      So let's be rigid.
    */
    error= CONTROL_FILE_TOO_SMALL;
    errmsg= "Size of control file is smaller than expected";
    goto err;
  }

  /* Check if control file is unexpectedly big */
  if (file_size > CF_MAX_SIZE)
  {
    error= CONTROL_FILE_TOO_BIG;
    errmsg= "File size bigger than expected";
    goto err;
  }

  if (mysql_file_pread(file, buffer, (size_t)file_size, 0, MYF(MY_FNABP)))
  {
    errmsg= "Can't read file";
    goto err;
  }

  if (memcmp(buffer + CF_MAGIC_STRING_OFFSET,
             CF_MAGIC_STRING, CF_MAGIC_STRING_SIZE))
  {
    error= CONTROL_FILE_BAD_MAGIC_STRING;
    errmsg= "Missing valid id at start of file. File is not a valid aria control file";
    goto err;
  }

  printf("Aria file version:   %u\n", buffer[CF_VERSION_OFFSET]);

  new_cf_create_time_size= uint2korr(buffer + CF_CREATE_TIME_SIZE_OFFSET);
  new_cf_changeable_size=  uint2korr(buffer + CF_CHANGEABLE_SIZE_OFFSET);

  if (new_cf_create_time_size < CF_MIN_CREATE_TIME_TOTAL_SIZE ||
      new_cf_changeable_size <  CF_MIN_CHANGEABLE_TOTAL_SIZE ||
      new_cf_create_time_size + new_cf_changeable_size != file_size)
  {
    error= CONTROL_FILE_INCONSISTENT_INFORMATION;
    errmsg= "Sizes stored in control file are inconsistent";
    goto err;
  }
  checkpoint_lsn= lsn_korr(buffer + new_cf_create_time_size +
                           CF_LSN_OFFSET);
  logno= uint4korr(buffer + new_cf_create_time_size + CF_FILENO_OFFSET);
  my_uuid2str(buffer + CF_UUID_OFFSET, uuid_str);
  uuid_str[MY_UUID_STRING_LENGTH]= 0;

  printf("Block size:          %u\n", uint2korr(buffer + CF_BLOCKSIZE_OFFSET));
  printf("maria_uuid:          %s\n", uuid_str);
  printf("last_checkpoint_lsn: " LSN_FMT "\n", LSN_IN_PARTS(checkpoint_lsn));
  printf("last_log_number:     %lu\n", (ulong) logno);
  if (new_cf_changeable_size >= (CF_MAX_TRID_OFFSET + CF_MAX_TRID_SIZE))
  {
    trid= transid_korr(buffer + new_cf_create_time_size + CF_MAX_TRID_OFFSET);
    printf("trid:                %llu\n", (ulonglong) trid);
  }
  if (new_cf_changeable_size >= (CF_RECOV_FAIL_OFFSET + CF_RECOV_FAIL_SIZE))
  {
    recovery_fails=
      (buffer + new_cf_create_time_size + CF_RECOV_FAIL_OFFSET)[0];
    printf("recovery_failures:   %u\n", recovery_fails);
  }

  DBUG_RETURN(0);

err:
  my_printf_error(HA_ERR_INITIALIZATION,
                  "Got error '%s' when trying to use aria control file "
                  "'%s'", 0, errmsg, name);
  DBUG_RETURN(error);
}

#endif /* EXTRACT_DEFINITIONS */