diff options
Diffstat (limited to 'src/bin/pg_test_fsync/pg_test_fsync.c')
-rw-r--r-- | src/bin/pg_test_fsync/pg_test_fsync.c | 652 |
1 files changed, 652 insertions, 0 deletions
diff --git a/src/bin/pg_test_fsync/pg_test_fsync.c b/src/bin/pg_test_fsync/pg_test_fsync.c new file mode 100644 index 0000000..f7bc199 --- /dev/null +++ b/src/bin/pg_test_fsync/pg_test_fsync.c @@ -0,0 +1,652 @@ +/* + * pg_test_fsync.c + * tests all supported fsync() methods + */ + +#include "postgres_fe.h" + +#include <limits.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <fcntl.h> +#include <time.h> +#include <unistd.h> +#include <signal.h> + +#include "access/xlogdefs.h" +#include "common/logging.h" +#include "common/pg_prng.h" +#include "getopt_long.h" + +/* + * put the temp files in the local directory + * unless the user specifies otherwise + */ +#define FSYNC_FILENAME "./pg_test_fsync.out" + +#define XLOG_BLCKSZ_K (XLOG_BLCKSZ / 1024) + +#define LABEL_FORMAT " %-30s" +#define NA_FORMAT "%21s\n" +/* translator: maintain alignment with NA_FORMAT */ +#define OPS_FORMAT gettext_noop("%13.3f ops/sec %6.0f usecs/op\n") +#define USECS_SEC 1000000 + +/* These are macros to avoid timing the function call overhead. */ +#ifndef WIN32 +#define START_TIMER \ +do { \ + alarm_triggered = false; \ + alarm(secs_per_test); \ + gettimeofday(&start_t, NULL); \ +} while (0) +#else +/* WIN32 doesn't support alarm, so we create a thread and sleep there */ +#define START_TIMER \ +do { \ + alarm_triggered = false; \ + if (CreateThread(NULL, 0, process_alarm, NULL, 0, NULL) == \ + INVALID_HANDLE_VALUE) \ + pg_fatal("could not create thread for alarm"); \ + gettimeofday(&start_t, NULL); \ +} while (0) +#endif + +#define STOP_TIMER \ +do { \ + gettimeofday(&stop_t, NULL); \ + print_elapse(start_t, stop_t, ops); \ +} while (0) + + +static const char *progname; + +static unsigned int secs_per_test = 5; +static int needs_unlink = 0; +static char full_buf[DEFAULT_XLOG_SEG_SIZE], + *buf, + *filename = FSYNC_FILENAME; +static struct timeval start_t, + stop_t; +static bool alarm_triggered = false; + + +static void handle_args(int argc, char *argv[]); +static void prepare_buf(void); +static void test_open(void); +static void test_non_sync(void); +static void test_sync(int writes_per_op); +static void test_open_syncs(void); +static void test_open_sync(const char *msg, int writes_size); +static void test_file_descriptor_sync(void); + +#ifndef WIN32 +static void process_alarm(int sig); +#else +static DWORD WINAPI process_alarm(LPVOID param); +#endif +static void signal_cleanup(int sig); + +#ifdef HAVE_FSYNC_WRITETHROUGH +static int pg_fsync_writethrough(int fd); +#endif +static void print_elapse(struct timeval start_t, struct timeval stop_t, int ops); + +#define die(msg) pg_fatal("%s: %m", _(msg)) + + +int +main(int argc, char *argv[]) +{ + pg_logging_init(argv[0]); + set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_test_fsync")); + progname = get_progname(argv[0]); + + handle_args(argc, argv); + + /* Prevent leaving behind the test file */ + pqsignal(SIGINT, signal_cleanup); + pqsignal(SIGTERM, signal_cleanup); +#ifndef WIN32 + pqsignal(SIGALRM, process_alarm); +#endif +#ifdef SIGHUP + /* Not defined on win32 */ + pqsignal(SIGHUP, signal_cleanup); +#endif + + pg_prng_seed(&pg_global_prng_state, (uint64) time(NULL)); + + prepare_buf(); + + test_open(); + + /* Test using 1 XLOG_BLCKSZ write */ + test_sync(1); + + /* Test using 2 XLOG_BLCKSZ writes */ + test_sync(2); + + test_open_syncs(); + + test_file_descriptor_sync(); + + test_non_sync(); + + unlink(filename); + + return 0; +} + +static void +handle_args(int argc, char *argv[]) +{ + static struct option long_options[] = { + {"filename", required_argument, NULL, 'f'}, + {"secs-per-test", required_argument, NULL, 's'}, + {NULL, 0, NULL, 0} + }; + + int option; /* Command line option */ + int optindex = 0; /* used by getopt_long */ + unsigned long optval; /* used for option parsing */ + char *endptr; + + if (argc > 1) + { + if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0) + { + printf(_("Usage: %s [-f FILENAME] [-s SECS-PER-TEST]\n"), progname); + exit(0); + } + if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0) + { + puts("pg_test_fsync (PostgreSQL) " PG_VERSION); + exit(0); + } + } + + while ((option = getopt_long(argc, argv, "f:s:", + long_options, &optindex)) != -1) + { + switch (option) + { + case 'f': + filename = pg_strdup(optarg); + break; + + case 's': + errno = 0; + optval = strtoul(optarg, &endptr, 10); + + if (endptr == optarg || *endptr != '\0' || + errno != 0 || optval != (unsigned int) optval) + { + pg_log_error("invalid argument for option %s", "--secs-per-test"); + pg_log_error_hint("Try \"%s --help\" for more information.", progname); + exit(1); + } + + secs_per_test = (unsigned int) optval; + if (secs_per_test == 0) + pg_fatal("%s must be in range %u..%u", + "--secs-per-test", 1, UINT_MAX); + break; + + default: + /* getopt_long already emitted a complaint */ + pg_log_error_hint("Try \"%s --help\" for more information.", progname); + exit(1); + } + } + + if (argc > optind) + { + pg_log_error("too many command-line arguments (first is \"%s\")", + argv[optind]); + pg_log_error_hint("Try \"%s --help\" for more information.", progname); + exit(1); + } + + printf(ngettext("%u second per test\n", + "%u seconds per test\n", + secs_per_test), + secs_per_test); +#if defined(O_DIRECT) + printf(_("O_DIRECT supported on this platform for open_datasync and open_sync.\n")); +#elif defined(F_NOCACHE) + printf(_("F_NOCACHE supported on this platform for open_datasync and open_sync.\n")); +#else + printf(_("Direct I/O is not supported on this platform.\n")); +#endif +} + +static void +prepare_buf(void) +{ + int ops; + + /* write random data into buffer */ + for (ops = 0; ops < DEFAULT_XLOG_SEG_SIZE; ops++) + full_buf[ops] = (char) pg_prng_int32(&pg_global_prng_state); + + buf = (char *) TYPEALIGN(XLOG_BLCKSZ, full_buf); +} + +static void +test_open(void) +{ + int tmpfile; + + /* + * test if we can open the target file + */ + if ((tmpfile = open(filename, O_RDWR | O_CREAT | PG_BINARY, S_IRUSR | S_IWUSR)) == -1) + die("could not open output file"); + needs_unlink = 1; + if (write(tmpfile, full_buf, DEFAULT_XLOG_SEG_SIZE) != + DEFAULT_XLOG_SEG_SIZE) + die("write failed"); + + /* fsync now so that dirty buffers don't skew later tests */ + if (fsync(tmpfile) != 0) + die("fsync failed"); + + close(tmpfile); +} + +static int +open_direct(const char *path, int flags, mode_t mode) +{ + int fd; + +#ifdef O_DIRECT + flags |= O_DIRECT; +#endif + + fd = open(path, flags, mode); + +#if !defined(O_DIRECT) && defined(F_NOCACHE) + if (fd >= 0 && fcntl(fd, F_NOCACHE, 1) < 0) + { + int save_errno = errno; + + close(fd); + errno = save_errno; + return -1; + } +#endif + + return fd; +} + +static void +test_sync(int writes_per_op) +{ + int tmpfile, + ops, + writes; + bool fs_warning = false; + + if (writes_per_op == 1) + printf(_("\nCompare file sync methods using one %dkB write:\n"), XLOG_BLCKSZ_K); + else + printf(_("\nCompare file sync methods using two %dkB writes:\n"), XLOG_BLCKSZ_K); + printf(_("(in wal_sync_method preference order, except fdatasync is Linux's default)\n")); + + /* + * Test open_datasync if available + */ + printf(LABEL_FORMAT, "open_datasync"); + fflush(stdout); + +#ifdef OPEN_DATASYNC_FLAG + if ((tmpfile = open_direct(filename, O_RDWR | O_DSYNC | PG_BINARY, 0)) == -1) + { + printf(NA_FORMAT, _("n/a*")); + fs_warning = true; + } + else + { + START_TIMER; + for (ops = 0; alarm_triggered == false; ops++) + { + for (writes = 0; writes < writes_per_op; writes++) + if (pg_pwrite(tmpfile, + buf, + XLOG_BLCKSZ, + writes * XLOG_BLCKSZ) != XLOG_BLCKSZ) + die("write failed"); + } + STOP_TIMER; + close(tmpfile); + } +#else + printf(NA_FORMAT, _("n/a")); +#endif + +/* + * Test fdatasync if available + */ + printf(LABEL_FORMAT, "fdatasync"); + fflush(stdout); + +#ifdef HAVE_FDATASYNC + if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1) + die("could not open output file"); + START_TIMER; + for (ops = 0; alarm_triggered == false; ops++) + { + for (writes = 0; writes < writes_per_op; writes++) + if (pg_pwrite(tmpfile, + buf, + XLOG_BLCKSZ, + writes * XLOG_BLCKSZ) != XLOG_BLCKSZ) + die("write failed"); + fdatasync(tmpfile); + } + STOP_TIMER; + close(tmpfile); +#else + printf(NA_FORMAT, _("n/a")); +#endif + +/* + * Test fsync + */ + printf(LABEL_FORMAT, "fsync"); + fflush(stdout); + + if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1) + die("could not open output file"); + START_TIMER; + for (ops = 0; alarm_triggered == false; ops++) + { + for (writes = 0; writes < writes_per_op; writes++) + if (pg_pwrite(tmpfile, + buf, + XLOG_BLCKSZ, + writes * XLOG_BLCKSZ) != XLOG_BLCKSZ) + die("write failed"); + if (fsync(tmpfile) != 0) + die("fsync failed"); + } + STOP_TIMER; + close(tmpfile); + +/* + * If fsync_writethrough is available, test as well + */ + printf(LABEL_FORMAT, "fsync_writethrough"); + fflush(stdout); + +#ifdef HAVE_FSYNC_WRITETHROUGH + if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1) + die("could not open output file"); + START_TIMER; + for (ops = 0; alarm_triggered == false; ops++) + { + for (writes = 0; writes < writes_per_op; writes++) + if (pg_pwrite(tmpfile, + buf, + XLOG_BLCKSZ, + writes * XLOG_BLCKSZ) != XLOG_BLCKSZ) + die("write failed"); + if (pg_fsync_writethrough(tmpfile) != 0) + die("fsync failed"); + } + STOP_TIMER; + close(tmpfile); +#else + printf(NA_FORMAT, _("n/a")); +#endif + +/* + * Test open_sync if available + */ + printf(LABEL_FORMAT, "open_sync"); + fflush(stdout); + +#ifdef OPEN_SYNC_FLAG + if ((tmpfile = open_direct(filename, O_RDWR | OPEN_SYNC_FLAG | PG_BINARY, 0)) == -1) + { + printf(NA_FORMAT, _("n/a*")); + fs_warning = true; + } + else + { + START_TIMER; + for (ops = 0; alarm_triggered == false; ops++) + { + for (writes = 0; writes < writes_per_op; writes++) + if (pg_pwrite(tmpfile, + buf, + XLOG_BLCKSZ, + writes * XLOG_BLCKSZ) != XLOG_BLCKSZ) + + /* + * This can generate write failures if the filesystem has + * a large block size, e.g. 4k, and there is no support + * for O_DIRECT writes smaller than the file system block + * size, e.g. XFS. + */ + die("write failed"); + } + STOP_TIMER; + close(tmpfile); + } +#else + printf(NA_FORMAT, _("n/a")); +#endif + + if (fs_warning) + { + printf(_("* This file system and its mount options do not support direct\n" + " I/O, e.g. ext4 in journaled mode.\n")); + } +} + +static void +test_open_syncs(void) +{ + printf(_("\nCompare open_sync with different write sizes:\n")); + printf(_("(This is designed to compare the cost of writing 16kB in different write\n" + "open_sync sizes.)\n")); + + test_open_sync(_(" 1 * 16kB open_sync write"), 16); + test_open_sync(_(" 2 * 8kB open_sync writes"), 8); + test_open_sync(_(" 4 * 4kB open_sync writes"), 4); + test_open_sync(_(" 8 * 2kB open_sync writes"), 2); + test_open_sync(_("16 * 1kB open_sync writes"), 1); +} + +/* + * Test open_sync with different size files + */ +static void +test_open_sync(const char *msg, int writes_size) +{ +#ifdef OPEN_SYNC_FLAG + int tmpfile, + ops, + writes; +#endif + + printf(LABEL_FORMAT, msg); + fflush(stdout); + +#ifdef OPEN_SYNC_FLAG + if ((tmpfile = open_direct(filename, O_RDWR | OPEN_SYNC_FLAG | PG_BINARY, 0)) == -1) + printf(NA_FORMAT, _("n/a*")); + else + { + START_TIMER; + for (ops = 0; alarm_triggered == false; ops++) + { + for (writes = 0; writes < 16 / writes_size; writes++) + if (pg_pwrite(tmpfile, + buf, + writes_size * 1024, + writes * writes_size * 1024) != + writes_size * 1024) + die("write failed"); + } + STOP_TIMER; + close(tmpfile); + } +#else + printf(NA_FORMAT, _("n/a")); +#endif +} + +static void +test_file_descriptor_sync(void) +{ + int tmpfile, + ops; + + /* + * Test whether fsync can sync data written on a different descriptor for + * the same file. This checks the efficiency of multi-process fsyncs + * against the same file. Possibly this should be done with writethrough + * on platforms which support it. + */ + printf(_("\nTest if fsync on non-write file descriptor is honored:\n")); + printf(_("(If the times are similar, fsync() can sync data written on a different\n" + "descriptor.)\n")); + + /* + * first write, fsync and close, which is the normal behavior without + * multiple descriptors + */ + printf(LABEL_FORMAT, "write, fsync, close"); + fflush(stdout); + + START_TIMER; + for (ops = 0; alarm_triggered == false; ops++) + { + if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1) + die("could not open output file"); + if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ) + die("write failed"); + if (fsync(tmpfile) != 0) + die("fsync failed"); + close(tmpfile); + + /* + * open and close the file again to be consistent with the following + * test + */ + if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1) + die("could not open output file"); + close(tmpfile); + } + STOP_TIMER; + + /* + * Now open, write, close, open again and fsync This simulates processes + * fsyncing each other's writes. + */ + printf(LABEL_FORMAT, "write, close, fsync"); + fflush(stdout); + + START_TIMER; + for (ops = 0; alarm_triggered == false; ops++) + { + if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1) + die("could not open output file"); + if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ) + die("write failed"); + close(tmpfile); + /* reopen file */ + if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1) + die("could not open output file"); + if (fsync(tmpfile) != 0) + die("fsync failed"); + close(tmpfile); + } + STOP_TIMER; +} + +static void +test_non_sync(void) +{ + int tmpfile, + ops; + + /* + * Test a simple write without fsync + */ + printf(_("\nNon-sync'ed %dkB writes:\n"), XLOG_BLCKSZ_K); + printf(LABEL_FORMAT, "write"); + fflush(stdout); + + if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1) + die("could not open output file"); + START_TIMER; + for (ops = 0; alarm_triggered == false; ops++) + { + if (pg_pwrite(tmpfile, buf, XLOG_BLCKSZ, 0) != XLOG_BLCKSZ) + die("write failed"); + } + STOP_TIMER; + close(tmpfile); +} + +static void +signal_cleanup(int signum) +{ + /* Delete the file if it exists. Ignore errors */ + if (needs_unlink) + unlink(filename); + /* Finish incomplete line on stdout */ + puts(""); + exit(signum); +} + +#ifdef HAVE_FSYNC_WRITETHROUGH + +static int +pg_fsync_writethrough(int fd) +{ +#ifdef WIN32 + return _commit(fd); +#elif defined(F_FULLFSYNC) + return (fcntl(fd, F_FULLFSYNC, 0) == -1) ? -1 : 0; +#else + errno = ENOSYS; + return -1; +#endif +} +#endif + +/* + * print out the writes per second for tests + */ +static void +print_elapse(struct timeval start_t, struct timeval stop_t, int ops) +{ + double total_time = (stop_t.tv_sec - start_t.tv_sec) + + (stop_t.tv_usec - start_t.tv_usec) * 0.000001; + double per_second = ops / total_time; + double avg_op_time_us = (total_time / ops) * USECS_SEC; + + printf(_(OPS_FORMAT), per_second, avg_op_time_us); +} + +#ifndef WIN32 +static void +process_alarm(int sig) +{ + alarm_triggered = true; +} +#else +static DWORD WINAPI +process_alarm(LPVOID param) +{ + /* WIN32 doesn't support alarm, so we create a thread and sleep here */ + Sleep(secs_per_test * 1000); + alarm_triggered = true; + ExitThread(0); +} +#endif |