summaryrefslogtreecommitdiffstats
path: root/src/rocksdb/port/win/io_win.h
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/rocksdb/port/win/io_win.h508
1 files changed, 508 insertions, 0 deletions
diff --git a/src/rocksdb/port/win/io_win.h b/src/rocksdb/port/win/io_win.h
new file mode 100644
index 000000000..a4fee8346
--- /dev/null
+++ b/src/rocksdb/port/win/io_win.h
@@ -0,0 +1,508 @@
+// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
+// This source code is licensed under both the GPLv2 (found in the
+// COPYING file in the root directory) and Apache 2.0 License
+// (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#pragma once
+
+#include <stdint.h>
+#include <windows.h>
+
+#include <mutex>
+#include <string>
+
+#include "rocksdb/file_system.h"
+#include "rocksdb/status.h"
+#include "util/aligned_buffer.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace port {
+
+std::string GetWindowsErrSz(DWORD err);
+
+inline IOStatus IOErrorFromWindowsError(const std::string& context, DWORD err) {
+ return ((err == ERROR_HANDLE_DISK_FULL) || (err == ERROR_DISK_FULL))
+ ? IOStatus::NoSpace(context, GetWindowsErrSz(err))
+ : ((err == ERROR_FILE_NOT_FOUND) || (err == ERROR_PATH_NOT_FOUND))
+ ? IOStatus::PathNotFound(context, GetWindowsErrSz(err))
+ : IOStatus::IOError(context, GetWindowsErrSz(err));
+}
+
+inline IOStatus IOErrorFromLastWindowsError(const std::string& context) {
+ return IOErrorFromWindowsError(context, GetLastError());
+}
+
+inline IOStatus IOError(const std::string& context, int err_number) {
+ return (err_number == ENOSPC)
+ ? IOStatus::NoSpace(context, errnoStr(err_number).c_str())
+ : (err_number == ENOENT)
+ ? IOStatus::PathNotFound(context, errnoStr(err_number).c_str())
+ : IOStatus::IOError(context, errnoStr(err_number).c_str());
+}
+
+class WinFileData;
+
+IOStatus pwrite(const WinFileData* file_data, const Slice& data,
+ uint64_t offset, size_t& bytes_written);
+
+IOStatus pread(const WinFileData* file_data, char* src, size_t num_bytes,
+ uint64_t offset, size_t& bytes_read);
+
+IOStatus fallocate(const std::string& filename, HANDLE hFile, uint64_t to_size);
+
+IOStatus ftruncate(const std::string& filename, HANDLE hFile, uint64_t toSize);
+
+size_t GetUniqueIdFromFile(HANDLE hFile, char* id, size_t max_size);
+
+class WinFileData {
+ protected:
+ const std::string filename_;
+ HANDLE hFile_;
+ // If true, the I/O issued would be direct I/O which the buffer
+ // will need to be aligned (not sure there is a guarantee that the buffer
+ // passed in is aligned).
+ const bool use_direct_io_;
+ const size_t sector_size_;
+
+ public:
+ // We want this class be usable both for inheritance (prive
+ // or protected) and for containment so __ctor and __dtor public
+ WinFileData(const std::string& filename, HANDLE hFile, bool direct_io);
+
+ virtual ~WinFileData() { this->CloseFile(); }
+
+ bool CloseFile() {
+ bool result = true;
+
+ if (hFile_ != NULL && hFile_ != INVALID_HANDLE_VALUE) {
+ result = ::CloseHandle(hFile_);
+ assert(result);
+ hFile_ = NULL;
+ }
+ return result;
+ }
+
+ const std::string& GetName() const { return filename_; }
+
+ HANDLE GetFileHandle() const { return hFile_; }
+
+ bool use_direct_io() const { return use_direct_io_; }
+
+ size_t GetSectorSize() const { return sector_size_; }
+
+ bool IsSectorAligned(const size_t off) const;
+
+ WinFileData(const WinFileData&) = delete;
+ WinFileData& operator=(const WinFileData&) = delete;
+};
+
+class WinSequentialFile : protected WinFileData, public FSSequentialFile {
+ // Override for behavior change when creating a custom env
+ virtual IOStatus PositionedReadInternal(char* src, size_t numBytes,
+ uint64_t offset,
+ size_t& bytes_read) const;
+
+ public:
+ WinSequentialFile(const std::string& fname, HANDLE f,
+ const FileOptions& options);
+
+ ~WinSequentialFile();
+
+ WinSequentialFile(const WinSequentialFile&) = delete;
+ WinSequentialFile& operator=(const WinSequentialFile&) = delete;
+
+ IOStatus Read(size_t n, const IOOptions& options, Slice* result,
+ char* scratch, IODebugContext* dbg) override;
+ IOStatus PositionedRead(uint64_t offset, size_t n, const IOOptions& options,
+ Slice* result, char* scratch,
+ IODebugContext* dbg) override;
+
+ IOStatus Skip(uint64_t n) override;
+
+ IOStatus InvalidateCache(size_t offset, size_t length) override;
+
+ virtual bool use_direct_io() const override {
+ return WinFileData::use_direct_io();
+ }
+};
+
+// mmap() based random-access
+class WinMmapReadableFile : private WinFileData, public FSRandomAccessFile {
+ HANDLE hMap_;
+
+ const void* mapped_region_;
+ const size_t length_;
+
+ public:
+ // mapped_region_[0,length-1] contains the mmapped contents of the file.
+ WinMmapReadableFile(const std::string& fileName, HANDLE hFile, HANDLE hMap,
+ const void* mapped_region, size_t length);
+
+ ~WinMmapReadableFile();
+
+ WinMmapReadableFile(const WinMmapReadableFile&) = delete;
+ WinMmapReadableFile& operator=(const WinMmapReadableFile&) = delete;
+
+ IOStatus Read(uint64_t offset, size_t n, const IOOptions& options,
+ Slice* result, char* scratch,
+ IODebugContext* dbg) const override;
+
+ virtual IOStatus InvalidateCache(size_t offset, size_t length) override;
+
+ virtual size_t GetUniqueId(char* id, size_t max_size) const override;
+};
+
+// We preallocate and use memcpy to append new
+// data to the file. This is safe since we either properly close the
+// file before reading from it, or for log files, the reading code
+// knows enough to skip zero suffixes.
+class WinMmapFile : private WinFileData, public FSWritableFile {
+ private:
+ HANDLE hMap_;
+
+ const size_t page_size_; // We flush the mapping view in page_size
+ // increments. We may decide if this is a memory
+ // page size or SSD page size
+ const size_t
+ allocation_granularity_; // View must start at such a granularity
+
+ size_t reserved_size_; // Preallocated size
+
+ size_t mapping_size_; // The max size of the mapping object
+ // we want to guess the final file size to minimize the remapping
+ size_t view_size_; // How much memory to map into a view at a time
+
+ char* mapped_begin_; // Must begin at the file offset that is aligned with
+ // allocation_granularity_
+ char* mapped_end_;
+ char* dst_; // Where to write next (in range [mapped_begin_,mapped_end_])
+ char* last_sync_; // Where have we synced up to
+
+ uint64_t file_offset_; // Offset of mapped_begin_ in file
+
+ // Do we have unsynced writes?
+ bool pending_sync_;
+
+ // Can only truncate or reserve to a sector size aligned if
+ // used on files that are opened with Unbuffered I/O
+ IOStatus TruncateFile(uint64_t toSize);
+
+ IOStatus UnmapCurrentRegion();
+
+ IOStatus MapNewRegion(const IOOptions& options, IODebugContext* dbg);
+
+ virtual IOStatus PreallocateInternal(uint64_t spaceToReserve);
+
+ public:
+ WinMmapFile(const std::string& fname, HANDLE hFile, size_t page_size,
+ size_t allocation_granularity, const FileOptions& options);
+
+ ~WinMmapFile();
+
+ WinMmapFile(const WinMmapFile&) = delete;
+ WinMmapFile& operator=(const WinMmapFile&) = delete;
+
+ IOStatus Append(const Slice& data, const IOOptions& options,
+ IODebugContext* dbg) override;
+ IOStatus Append(const Slice& data, const IOOptions& opts,
+ const DataVerificationInfo& /* verification_info */,
+ IODebugContext* dbg) override {
+ return Append(data, opts, dbg);
+ }
+
+ // Means Close() will properly take care of truncate
+ // and it does not need any additional information
+ IOStatus Truncate(uint64_t size, const IOOptions& options,
+ IODebugContext* dbg) override;
+
+ IOStatus Close(const IOOptions& options, IODebugContext* dbg) override;
+
+ IOStatus Flush(const IOOptions& options, IODebugContext* dbg) override;
+
+ // Flush only data
+ IOStatus Sync(const IOOptions& options, IODebugContext* dbg) override;
+
+ /**
+ * Flush data as well as metadata to stable storage.
+ */
+ IOStatus Fsync(const IOOptions& options, IODebugContext* dbg) override;
+
+ /**
+ * Get the size of valid data in the file. This will not match the
+ * size that is returned from the filesystem because we use mmap
+ * to extend file by map_size every time.
+ */
+ uint64_t GetFileSize(const IOOptions& options, IODebugContext* dbg) override;
+
+ IOStatus InvalidateCache(size_t offset, size_t length) override;
+
+ IOStatus Allocate(uint64_t offset, uint64_t len, const IOOptions& options,
+ IODebugContext* dbg) override;
+
+ virtual size_t GetUniqueId(char* id, size_t max_size) const override;
+};
+
+class WinRandomAccessImpl {
+ protected:
+ WinFileData* file_base_;
+ size_t alignment_;
+
+ // Override for behavior change when creating a custom env
+ virtual IOStatus PositionedReadInternal(char* src, size_t numBytes,
+ uint64_t offset,
+ size_t& bytes_read) const;
+
+ WinRandomAccessImpl(WinFileData* file_base, size_t alignment,
+ const FileOptions& options);
+
+ virtual ~WinRandomAccessImpl() {}
+
+ IOStatus ReadImpl(uint64_t offset, size_t n, Slice* result,
+ char* scratch) const;
+
+ size_t GetAlignment() const { return alignment_; }
+
+ public:
+ WinRandomAccessImpl(const WinRandomAccessImpl&) = delete;
+ WinRandomAccessImpl& operator=(const WinRandomAccessImpl&) = delete;
+};
+
+// pread() based random-access
+class WinRandomAccessFile
+ : private WinFileData,
+ protected WinRandomAccessImpl, // Want to be able to override
+ // PositionedReadInternal
+ public FSRandomAccessFile {
+ public:
+ WinRandomAccessFile(const std::string& fname, HANDLE hFile, size_t alignment,
+ const FileOptions& options);
+
+ ~WinRandomAccessFile();
+
+ IOStatus Read(uint64_t offset, size_t n, const IOOptions& options,
+ Slice* result, char* scratch,
+ IODebugContext* dbg) const override;
+
+ virtual size_t GetUniqueId(char* id, size_t max_size) const override;
+
+ virtual bool use_direct_io() const override {
+ return WinFileData::use_direct_io();
+ }
+
+ IOStatus InvalidateCache(size_t offset, size_t length) override;
+
+ virtual size_t GetRequiredBufferAlignment() const override;
+};
+
+// This is a sequential write class. It has been mimicked (as others) after
+// the original Posix class. We add support for unbuffered I/O on windows as
+// well
+// we utilize the original buffer as an alignment buffer to write directly to
+// file with no buffering.
+// No buffering requires that the provided buffer is aligned to the physical
+// sector size (SSD page size) and
+// that all SetFilePointer() operations to occur with such an alignment.
+// We thus always write in sector/page size increments to the drive and leave
+// the tail for the next write OR for Close() at which point we pad with zeros.
+// No padding is required for
+// buffered access.
+class WinWritableImpl {
+ protected:
+ WinFileData* file_data_;
+ const uint64_t alignment_;
+ uint64_t
+ next_write_offset_; // Needed because Windows does not support O_APPEND
+ uint64_t reservedsize_; // how far we have reserved space
+
+ virtual IOStatus PreallocateInternal(uint64_t spaceToReserve);
+
+ WinWritableImpl(WinFileData* file_data, size_t alignment);
+
+ ~WinWritableImpl() {}
+
+ uint64_t GetAlignment() const { return alignment_; }
+
+ IOStatus AppendImpl(const Slice& data);
+
+ // Requires that the data is aligned as specified by
+ // GetRequiredBufferAlignment()
+ IOStatus PositionedAppendImpl(const Slice& data, uint64_t offset);
+
+ IOStatus TruncateImpl(uint64_t size);
+
+ IOStatus CloseImpl();
+
+ IOStatus SyncImpl(const IOOptions& options, IODebugContext* dbg);
+
+ uint64_t GetFileNextWriteOffset() {
+ // Double accounting now here with WritableFileWriter
+ // and this size will be wrong when unbuffered access is used
+ // but tests implement their own writable files and do not use
+ // WritableFileWrapper
+ // so we need to squeeze a square peg through
+ // a round hole here.
+ return next_write_offset_;
+ }
+
+ IOStatus AllocateImpl(uint64_t offset, uint64_t len);
+
+ public:
+ WinWritableImpl(const WinWritableImpl&) = delete;
+ WinWritableImpl& operator=(const WinWritableImpl&) = delete;
+};
+
+class WinWritableFile : private WinFileData,
+ protected WinWritableImpl,
+ public FSWritableFile {
+ public:
+ WinWritableFile(const std::string& fname, HANDLE hFile, size_t alignment,
+ size_t capacity, const FileOptions& options);
+
+ ~WinWritableFile();
+
+ IOStatus Append(const Slice& data, const IOOptions& options,
+ IODebugContext* dbg) override;
+ IOStatus Append(const Slice& data, const IOOptions& opts,
+ const DataVerificationInfo& /* verification_info */,
+ IODebugContext* dbg) override {
+ return Append(data, opts, dbg);
+ }
+
+ // Requires that the data is aligned as specified by
+ // GetRequiredBufferAlignment()
+ IOStatus PositionedAppend(const Slice& data, uint64_t offset,
+ const IOOptions& options,
+ IODebugContext* dbg) override;
+ IOStatus PositionedAppend(const Slice& data, uint64_t offset,
+ const IOOptions& opts,
+ const DataVerificationInfo& /* verification_info */,
+ IODebugContext* dbg) override {
+ return PositionedAppend(data, offset, opts, dbg);
+ }
+
+ // Need to implement this so the file is truncated correctly
+ // when buffered and unbuffered mode
+ IOStatus Truncate(uint64_t size, const IOOptions& options,
+ IODebugContext* dbg) override;
+
+ IOStatus Close(const IOOptions& options, IODebugContext* dbg) override;
+
+ // write out the cached data to the OS cache
+ // This is now taken care of the WritableFileWriter
+ IOStatus Flush(const IOOptions& options, IODebugContext* dbg) override;
+
+ IOStatus Sync(const IOOptions& options, IODebugContext* dbg) override;
+
+ IOStatus Fsync(const IOOptions& options, IODebugContext* dbg) override;
+
+ virtual bool IsSyncThreadSafe() const override;
+
+ // Indicates if the class makes use of direct I/O
+ // Use PositionedAppend
+ virtual bool use_direct_io() const override;
+
+ virtual size_t GetRequiredBufferAlignment() const override;
+
+ uint64_t GetFileSize(const IOOptions& options, IODebugContext* dbg) override;
+
+ IOStatus Allocate(uint64_t offset, uint64_t len, const IOOptions& options,
+ IODebugContext* dbg) override;
+
+ virtual size_t GetUniqueId(char* id, size_t max_size) const override;
+};
+
+class WinRandomRWFile : private WinFileData,
+ protected WinRandomAccessImpl,
+ protected WinWritableImpl,
+ public FSRandomRWFile {
+ public:
+ WinRandomRWFile(const std::string& fname, HANDLE hFile, size_t alignment,
+ const FileOptions& options);
+
+ ~WinRandomRWFile() {}
+
+ // Indicates if the class makes use of direct I/O
+ // If false you must pass aligned buffer to Write()
+ virtual bool use_direct_io() const override;
+
+ // Use the returned alignment value to allocate aligned
+ // buffer for Write() when use_direct_io() returns true
+ virtual size_t GetRequiredBufferAlignment() const override;
+
+ // Write bytes in `data` at offset `offset`, Returns Status::OK() on success.
+ // Pass aligned buffer when use_direct_io() returns true.
+ IOStatus Write(uint64_t offset, const Slice& data, const IOOptions& options,
+ IODebugContext* dbg) override;
+
+ // Read up to `n` bytes starting from offset `offset` and store them in
+ // result, provided `scratch` size should be at least `n`.
+ // Returns Status::OK() on success.
+ IOStatus Read(uint64_t offset, size_t n, const IOOptions& options,
+ Slice* result, char* scratch,
+ IODebugContext* dbg) const override;
+
+ IOStatus Flush(const IOOptions& options, IODebugContext* dbg) override;
+
+ IOStatus Sync(const IOOptions& options, IODebugContext* dbg) override;
+
+ IOStatus Fsync(const IOOptions& options, IODebugContext* dbg) override {
+ return Sync(options, dbg);
+ }
+
+ IOStatus Close(const IOOptions& options, IODebugContext* dbg) override;
+};
+
+class WinMemoryMappedBuffer : public MemoryMappedFileBuffer {
+ private:
+ HANDLE file_handle_;
+ HANDLE map_handle_;
+
+ public:
+ WinMemoryMappedBuffer(HANDLE file_handle, HANDLE map_handle, void* base,
+ size_t size)
+ : MemoryMappedFileBuffer(base, size),
+ file_handle_(file_handle),
+ map_handle_(map_handle) {}
+ ~WinMemoryMappedBuffer() override;
+};
+
+class WinDirectory : public FSDirectory {
+ const std::string filename_;
+ HANDLE handle_;
+
+ public:
+ explicit WinDirectory(const std::string& filename, HANDLE h) noexcept
+ : filename_(filename), handle_(h) {
+ assert(handle_ != INVALID_HANDLE_VALUE);
+ }
+ ~WinDirectory() {
+ if (handle_ != NULL) {
+ IOStatus s = WinDirectory::Close(IOOptions(), nullptr);
+ s.PermitUncheckedError();
+ }
+ }
+ const std::string& GetName() const { return filename_; }
+ IOStatus Fsync(const IOOptions& options, IODebugContext* dbg) override;
+ IOStatus Close(const IOOptions& options, IODebugContext* dbg) override;
+
+ size_t GetUniqueId(char* id, size_t max_size) const override;
+};
+
+class WinFileLock : public FileLock {
+ public:
+ explicit WinFileLock(HANDLE hFile) : hFile_(hFile) {
+ assert(hFile != NULL);
+ assert(hFile != INVALID_HANDLE_VALUE);
+ }
+
+ ~WinFileLock();
+
+ private:
+ HANDLE hFile_;
+};
+} // namespace port
+} // namespace ROCKSDB_NAMESPACE