diff options
Diffstat (limited to 'src/rocksdb/tools/ldb_test.py')
-rw-r--r-- | src/rocksdb/tools/ldb_test.py | 955 |
1 files changed, 955 insertions, 0 deletions
diff --git a/src/rocksdb/tools/ldb_test.py b/src/rocksdb/tools/ldb_test.py new file mode 100644 index 000000000..e243d69c0 --- /dev/null +++ b/src/rocksdb/tools/ldb_test.py @@ -0,0 +1,955 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +from __future__ import absolute_import, division, print_function, unicode_literals + +import glob + +import os +import os.path +import re +import shutil +import subprocess +import tempfile +import time +import unittest + + +def my_check_output(*popenargs, **kwargs): + """ + If we had python 2.7, we should simply use subprocess.check_output. + This is a stop-gap solution for python 2.6 + """ + if "stdout" in kwargs: + raise ValueError("stdout argument not allowed, it will be overridden.") + process = subprocess.Popen( + stderr=subprocess.PIPE, stdout=subprocess.PIPE, *popenargs, **kwargs + ) + output, unused_err = process.communicate() + retcode = process.poll() + if retcode: + cmd = kwargs.get("args") + if cmd is None: + cmd = popenargs[0] + raise Exception("Exit code is not 0. It is %d. Command: %s" % (retcode, cmd)) + return output.decode("utf-8") + + +def run_err_null(cmd): + return os.system(cmd + " 2>/dev/null ") + + +class LDBTestCase(unittest.TestCase): + def setUp(self): + self.TMP_DIR = tempfile.mkdtemp(prefix="ldb_test_") + self.DB_NAME = "testdb" + + def tearDown(self): + assert ( + self.TMP_DIR.strip() != "/" + and self.TMP_DIR.strip() != "/tmp" + and self.TMP_DIR.strip() != "/tmp/" + ) # Just some paranoia + + shutil.rmtree(self.TMP_DIR) + + def dbParam(self, dbName): + return "--db=%s" % os.path.join(self.TMP_DIR, dbName) + + def assertRunOKFull( + self, params, expectedOutput, unexpected=False, isPattern=False + ): + """ + All command-line params must be specified. + Allows full flexibility in testing; for example: missing db param. + """ + output = my_check_output( + './ldb %s |grep -v "Created bg thread"' % params, shell=True + ) + if not unexpected: + if isPattern: + self.assertNotEqual(expectedOutput.search(output.strip()), None) + else: + self.assertEqual(output.strip(), expectedOutput.strip()) + else: + if isPattern: + self.assertEqual(expectedOutput.search(output.strip()), None) + else: + self.assertNotEqual(output.strip(), expectedOutput.strip()) + + def assertRunFAILFull(self, params): + """ + All command-line params must be specified. + Allows full flexibility in testing; for example: missing db param. + """ + try: + + my_check_output( + './ldb %s >/dev/null 2>&1 |grep -v "Created bg \ + thread"' + % params, + shell=True, + ) + except Exception: + return + self.fail( + "Exception should have been raised for command with params: %s" % params + ) + + def assertRunOK(self, params, expectedOutput, unexpected=False): + """ + Uses the default test db. + """ + self.assertRunOKFull( + "%s %s" % (self.dbParam(self.DB_NAME), params), expectedOutput, unexpected + ) + + def assertRunFAIL(self, params): + """ + Uses the default test db. + """ + self.assertRunFAILFull("%s %s" % (self.dbParam(self.DB_NAME), params)) + + def testSimpleStringPutGet(self): + print("Running testSimpleStringPutGet...") + self.assertRunFAIL("put x1 y1") + self.assertRunOK("put --create_if_missing x1 y1", "OK") + self.assertRunOK("get x1", "y1") + self.assertRunFAIL("get x2") + + self.assertRunOK("put x2 y2", "OK") + self.assertRunOK("get x1", "y1") + self.assertRunOK("get x2", "y2") + self.assertRunFAIL("get x3") + + self.assertRunOK("scan --from=x1 --to=z", "x1 : y1\nx2 : y2") + self.assertRunOK("put x3 y3", "OK") + + self.assertRunOK("scan --from=x1 --to=z", "x1 : y1\nx2 : y2\nx3 : y3") + self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3") + self.assertRunOK("scan --from=x", "x1 : y1\nx2 : y2\nx3 : y3") + + self.assertRunOK("scan --to=x2", "x1 : y1") + self.assertRunOK("scan --from=x1 --to=z --max_keys=1", "x1 : y1") + self.assertRunOK("scan --from=x1 --to=z --max_keys=2", "x1 : y1\nx2 : y2") + + self.assertRunOK( + "scan --from=x1 --to=z --max_keys=3", "x1 : y1\nx2 : y2\nx3 : y3" + ) + self.assertRunOK( + "scan --from=x1 --to=z --max_keys=4", "x1 : y1\nx2 : y2\nx3 : y3" + ) + self.assertRunOK("scan --from=x1 --to=x2", "x1 : y1") + self.assertRunOK("scan --from=x2 --to=x4", "x2 : y2\nx3 : y3") + self.assertRunFAIL("scan --from=x4 --to=z") # No results => FAIL + self.assertRunFAIL("scan --from=x1 --to=z --max_keys=foo") + + self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3") + + self.assertRunOK("delete x1", "OK") + self.assertRunOK("scan", "x2 : y2\nx3 : y3") + + self.assertRunOK("delete NonExistentKey", "OK") + # It is weird that GET and SCAN raise exception for + # non-existent key, while delete does not + + self.assertRunOK("checkconsistency", "OK") + + def dumpDb(self, params, dumpFile): + return 0 == run_err_null("./ldb dump %s > %s" % (params, dumpFile)) + + def loadDb(self, params, dumpFile): + return 0 == run_err_null("cat %s | ./ldb load %s" % (dumpFile, params)) + + def writeExternSst(self, params, inputDumpFile, outputSst): + return 0 == run_err_null( + "cat %s | ./ldb write_extern_sst %s %s" % (inputDumpFile, outputSst, params) + ) + + def ingestExternSst(self, params, inputSst): + return 0 == run_err_null("./ldb ingest_extern_sst %s %s" % (inputSst, params)) + + def testStringBatchPut(self): + print("Running testStringBatchPut...") + self.assertRunOK("batchput x1 y1 --create_if_missing", "OK") + self.assertRunOK("scan", "x1 : y1") + self.assertRunOK('batchput x2 y2 x3 y3 "x4 abc" "y4 xyz"', "OK") + self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 abc : y4 xyz") + self.assertRunFAIL("batchput") + self.assertRunFAIL("batchput k1") + self.assertRunFAIL("batchput k1 v1 k2") + + def testBlobBatchPut(self): + print("Running testBlobBatchPut...") + + dbPath = os.path.join(self.TMP_DIR, self.DB_NAME) + self.assertRunOK("batchput x1 y1 --create_if_missing --enable_blob_files", "OK") + self.assertRunOK("scan", "x1 : y1") + self.assertRunOK( + 'batchput --enable_blob_files x2 y2 x3 y3 "x4 abc" "y4 xyz"', "OK" + ) + self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 abc : y4 xyz") + + blob_files = self.getBlobFiles(dbPath) + self.assertTrue(len(blob_files) >= 1) + + def testBlobPut(self): + print("Running testBlobPut...") + + dbPath = os.path.join(self.TMP_DIR, self.DB_NAME) + self.assertRunOK("put --create_if_missing --enable_blob_files x1 y1", "OK") + self.assertRunOK("get x1", "y1") + self.assertRunOK("put --enable_blob_files x2 y2", "OK") + self.assertRunOK("get x1", "y1") + self.assertRunOK("get x2", "y2") + self.assertRunFAIL("get x3") + + blob_files = self.getBlobFiles(dbPath) + self.assertTrue(len(blob_files) >= 1) + + def testBlobStartingLevel(self): + print("Running testBlobStartingLevel...") + + dbPath = os.path.join(self.TMP_DIR, self.DB_NAME) + self.assertRunOK( + "put --create_if_missing --enable_blob_files --blob_file_starting_level=10 x1 y1", + "OK", + ) + self.assertRunOK("get x1", "y1") + + blob_files = self.getBlobFiles(dbPath) + self.assertTrue(len(blob_files) == 0) + + self.assertRunOK( + "put --enable_blob_files --blob_file_starting_level=0 x2 y2", "OK" + ) + self.assertRunOK("get x1", "y1") + self.assertRunOK("get x2", "y2") + self.assertRunFAIL("get x3") + + blob_files = self.getBlobFiles(dbPath) + self.assertTrue(len(blob_files) >= 1) + + def testCountDelimDump(self): + print("Running testCountDelimDump...") + self.assertRunOK("batchput x.1 x1 --create_if_missing", "OK") + self.assertRunOK("batchput y.abc abc y.2 2 z.13c pqr", "OK") + self.assertRunOK( + "dump --count_delim", + "x => count:1\tsize:5\ny => count:2\tsize:12\nz => count:1\tsize:8", + ) + self.assertRunOK( + 'dump --count_delim="."', + "x => count:1\tsize:5\ny => count:2\tsize:12\nz => count:1\tsize:8", + ) + self.assertRunOK("batchput x,2 x2 x,abc xabc", "OK") + self.assertRunOK( + 'dump --count_delim=","', + "x => count:2\tsize:14\nx.1 => count:1\tsize:5\ny.2 => count:1\tsize:4\ny.abc => count:1\tsize:8\nz.13c => count:1\tsize:8", + ) + + def testCountDelimIDump(self): + print("Running testCountDelimIDump...") + self.assertRunOK("batchput x.1 x1 --create_if_missing", "OK") + self.assertRunOK("batchput y.abc abc y.2 2 z.13c pqr", "OK") + self.assertRunOK( + "idump --count_delim", + "x => count:1\tsize:5\ny => count:2\tsize:12\nz => count:1\tsize:8", + ) + self.assertRunOK( + 'idump --count_delim="."', + "x => count:1\tsize:5\ny => count:2\tsize:12\nz => count:1\tsize:8", + ) + self.assertRunOK("batchput x,2 x2 x,abc xabc", "OK") + self.assertRunOK( + 'idump --count_delim=","', + "x => count:2\tsize:14\nx.1 => count:1\tsize:5\ny.2 => count:1\tsize:4\ny.abc => count:1\tsize:8\nz.13c => count:1\tsize:8", + ) + + def testInvalidCmdLines(self): + print("Running testInvalidCmdLines...") + # db not specified + self.assertRunFAILFull("put 0x6133 0x6233 --hex --create_if_missing") + # No param called he + self.assertRunFAIL("put 0x6133 0x6233 --he --create_if_missing") + # max_keys is not applicable for put + self.assertRunFAIL("put 0x6133 0x6233 --max_keys=1 --create_if_missing") + # hex has invalid boolean value + + def testHexPutGet(self): + print("Running testHexPutGet...") + self.assertRunOK("put a1 b1 --create_if_missing", "OK") + self.assertRunOK("scan", "a1 : b1") + self.assertRunOK("scan --hex", "0x6131 : 0x6231") + self.assertRunFAIL("put --hex 6132 6232") + self.assertRunOK("put --hex 0x6132 0x6232", "OK") + self.assertRunOK("scan --hex", "0x6131 : 0x6231\n0x6132 : 0x6232") + self.assertRunOK("scan", "a1 : b1\na2 : b2") + self.assertRunOK("get a1", "b1") + self.assertRunOK("get --hex 0x6131", "0x6231") + self.assertRunOK("get a2", "b2") + self.assertRunOK("get --hex 0x6132", "0x6232") + self.assertRunOK("get --key_hex 0x6132", "b2") + self.assertRunOK("get --key_hex --value_hex 0x6132", "0x6232") + self.assertRunOK("get --value_hex a2", "0x6232") + self.assertRunOK( + "scan --key_hex --value_hex", "0x6131 : 0x6231\n0x6132 : 0x6232" + ) + self.assertRunOK( + "scan --hex --from=0x6131 --to=0x6133", "0x6131 : 0x6231\n0x6132 : 0x6232" + ) + self.assertRunOK("scan --hex --from=0x6131 --to=0x6132", "0x6131 : 0x6231") + self.assertRunOK("scan --key_hex", "0x6131 : b1\n0x6132 : b2") + self.assertRunOK("scan --value_hex", "a1 : 0x6231\na2 : 0x6232") + self.assertRunOK("batchput --hex 0x6133 0x6233 0x6134 0x6234", "OK") + self.assertRunOK("scan", "a1 : b1\na2 : b2\na3 : b3\na4 : b4") + self.assertRunOK("delete --hex 0x6133", "OK") + self.assertRunOK("scan", "a1 : b1\na2 : b2\na4 : b4") + self.assertRunOK("checkconsistency", "OK") + + def testTtlPutGet(self): + print("Running testTtlPutGet...") + self.assertRunOK("put a1 b1 --ttl --create_if_missing", "OK") + self.assertRunOK("scan --hex", "0x6131 : 0x6231", True) + self.assertRunOK("dump --ttl ", "a1 ==> b1", True) + self.assertRunOK("dump --hex --ttl ", "0x6131 ==> 0x6231\nKeys in range: 1") + self.assertRunOK("scan --hex --ttl", "0x6131 : 0x6231") + self.assertRunOK("get --value_hex a1", "0x6231", True) + self.assertRunOK("get --ttl a1", "b1") + self.assertRunOK("put a3 b3 --create_if_missing", "OK") + # fails because timstamp's length is greater than value's + self.assertRunFAIL("get --ttl a3") + self.assertRunOK("checkconsistency", "OK") + + def testInvalidCmdLines(self): # noqa: F811 T25377293 Grandfathered in + print("Running testInvalidCmdLines...") + # db not specified + self.assertRunFAILFull("put 0x6133 0x6233 --hex --create_if_missing") + # No param called he + self.assertRunFAIL("put 0x6133 0x6233 --he --create_if_missing") + # max_keys is not applicable for put + self.assertRunFAIL("put 0x6133 0x6233 --max_keys=1 --create_if_missing") + # hex has invalid boolean value + self.assertRunFAIL("put 0x6133 0x6233 --hex=Boo --create_if_missing") + + def testDumpLoad(self): + print("Running testDumpLoad...") + self.assertRunOK("batchput --create_if_missing x1 y1 x2 y2 x3 y3 x4 y4", "OK") + self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") + origDbPath = os.path.join(self.TMP_DIR, self.DB_NAME) + + # Dump and load without any additional params specified + dumpFilePath = os.path.join(self.TMP_DIR, "dump1") + loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump1") + self.assertTrue(self.dumpDb("--db=%s" % origDbPath, dumpFilePath)) + self.assertTrue( + self.loadDb("--db=%s --create_if_missing" % loadedDbPath, dumpFilePath) + ) + self.assertRunOKFull( + "scan --db=%s" % loadedDbPath, "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4" + ) + + # Dump and load in hex + dumpFilePath = os.path.join(self.TMP_DIR, "dump2") + loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump2") + self.assertTrue(self.dumpDb("--db=%s --hex" % origDbPath, dumpFilePath)) + self.assertTrue( + self.loadDb( + "--db=%s --hex --create_if_missing" % loadedDbPath, dumpFilePath + ) + ) + self.assertRunOKFull( + "scan --db=%s" % loadedDbPath, "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4" + ) + + # Dump only a portion of the key range + dumpFilePath = os.path.join(self.TMP_DIR, "dump3") + loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump3") + self.assertTrue( + self.dumpDb("--db=%s --from=x1 --to=x3" % origDbPath, dumpFilePath) + ) + self.assertTrue( + self.loadDb("--db=%s --create_if_missing" % loadedDbPath, dumpFilePath) + ) + self.assertRunOKFull("scan --db=%s" % loadedDbPath, "x1 : y1\nx2 : y2") + + # Dump upto max_keys rows + dumpFilePath = os.path.join(self.TMP_DIR, "dump4") + loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump4") + self.assertTrue(self.dumpDb("--db=%s --max_keys=3" % origDbPath, dumpFilePath)) + self.assertTrue( + self.loadDb("--db=%s --create_if_missing" % loadedDbPath, dumpFilePath) + ) + self.assertRunOKFull("scan --db=%s" % loadedDbPath, "x1 : y1\nx2 : y2\nx3 : y3") + + # Load into an existing db, create_if_missing is not specified + self.assertTrue(self.dumpDb("--db=%s" % origDbPath, dumpFilePath)) + self.assertTrue(self.loadDb("--db=%s" % loadedDbPath, dumpFilePath)) + self.assertRunOKFull( + "scan --db=%s" % loadedDbPath, "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4" + ) + + # Dump and load with WAL disabled + dumpFilePath = os.path.join(self.TMP_DIR, "dump5") + loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump5") + self.assertTrue(self.dumpDb("--db=%s" % origDbPath, dumpFilePath)) + self.assertTrue( + self.loadDb( + "--db=%s --disable_wal --create_if_missing" % loadedDbPath, dumpFilePath + ) + ) + self.assertRunOKFull( + "scan --db=%s" % loadedDbPath, "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4" + ) + + # Dump and load with lots of extra params specified + extraParams = " ".join( + [ + "--bloom_bits=14", + "--block_size=1024", + "--auto_compaction=true", + "--write_buffer_size=4194304", + "--file_size=2097152", + ] + ) + dumpFilePath = os.path.join(self.TMP_DIR, "dump6") + loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump6") + self.assertTrue( + self.dumpDb("--db=%s %s" % (origDbPath, extraParams), dumpFilePath) + ) + self.assertTrue( + self.loadDb( + "--db=%s %s --create_if_missing" % (loadedDbPath, extraParams), + dumpFilePath, + ) + ) + self.assertRunOKFull( + "scan --db=%s" % loadedDbPath, "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4" + ) + + # Dump with count_only + dumpFilePath = os.path.join(self.TMP_DIR, "dump7") + loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump7") + self.assertTrue(self.dumpDb("--db=%s --count_only" % origDbPath, dumpFilePath)) + self.assertTrue( + self.loadDb("--db=%s --create_if_missing" % loadedDbPath, dumpFilePath) + ) + # DB should have atleast one value for scan to work + self.assertRunOKFull("put --db=%s k1 v1" % loadedDbPath, "OK") + self.assertRunOKFull("scan --db=%s" % loadedDbPath, "k1 : v1") + + # Dump command fails because of typo in params + dumpFilePath = os.path.join(self.TMP_DIR, "dump8") + self.assertFalse( + self.dumpDb("--db=%s --create_if_missing" % origDbPath, dumpFilePath) + ) + + # Dump and load with BlobDB enabled + blobParams = " ".join( + ["--enable_blob_files", "--min_blob_size=1", "--blob_file_size=2097152"] + ) + dumpFilePath = os.path.join(self.TMP_DIR, "dump9") + loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump9") + self.assertTrue(self.dumpDb("--db=%s" % (origDbPath), dumpFilePath)) + self.assertTrue( + self.loadDb( + "--db=%s %s --create_if_missing --disable_wal" + % (loadedDbPath, blobParams), + dumpFilePath, + ) + ) + self.assertRunOKFull( + "scan --db=%s" % loadedDbPath, "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4" + ) + blob_files = self.getBlobFiles(loadedDbPath) + self.assertTrue(len(blob_files) >= 1) + + def testIDumpBasics(self): + print("Running testIDumpBasics...") + self.assertRunOK("put a val --create_if_missing", "OK") + self.assertRunOK("put b val", "OK") + self.assertRunOK( + "idump", + "'a' seq:1, type:1 => val\n" + "'b' seq:2, type:1 => val\nInternal keys in range: 2", + ) + self.assertRunOK( + "idump --input_key_hex --from=%s --to=%s" % (hex(ord("a")), hex(ord("b"))), + "'a' seq:1, type:1 => val\nInternal keys in range: 1", + ) + + def testIDumpDecodeBlobIndex(self): + print("Running testIDumpDecodeBlobIndex...") + self.assertRunOK("put a val --create_if_missing", "OK") + self.assertRunOK("put b val --enable_blob_files", "OK") + + # Pattern to expect from dump with decode_blob_index flag enabled. + regex = ".*\[blob ref\].*" + expected_pattern = re.compile(regex) + cmd = "idump %s --decode_blob_index" + self.assertRunOKFull( + (cmd) % (self.dbParam(self.DB_NAME)), + expected_pattern, + unexpected=False, + isPattern=True, + ) + + def testMiscAdminTask(self): + print("Running testMiscAdminTask...") + # These tests need to be improved; for example with asserts about + # whether compaction or level reduction actually took place. + self.assertRunOK("batchput --create_if_missing x1 y1 x2 y2 x3 y3 x4 y4", "OK") + self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") + origDbPath = os.path.join(self.TMP_DIR, self.DB_NAME) + + self.assertTrue(0 == run_err_null("./ldb compact --db=%s" % origDbPath)) + self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") + + self.assertTrue( + 0 == run_err_null("./ldb reduce_levels --db=%s --new_levels=2" % origDbPath) + ) + self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") + + self.assertTrue( + 0 == run_err_null("./ldb reduce_levels --db=%s --new_levels=3" % origDbPath) + ) + self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") + + self.assertTrue( + 0 == run_err_null("./ldb compact --db=%s --from=x1 --to=x3" % origDbPath) + ) + self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") + + self.assertTrue( + 0 + == run_err_null( + "./ldb compact --db=%s --hex --from=0x6131 --to=0x6134" % origDbPath + ) + ) + self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") + + # TODO(dilip): Not sure what should be passed to WAL.Currently corrupted. + self.assertTrue( + 0 + == run_err_null( + "./ldb dump_wal --db=%s --walfile=%s --header" + % (origDbPath, os.path.join(origDbPath, "LOG")) + ) + ) + self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") + + def testCheckConsistency(self): + print("Running testCheckConsistency...") + + dbPath = os.path.join(self.TMP_DIR, self.DB_NAME) + self.assertRunOK("put x1 y1 --create_if_missing", "OK") + self.assertRunOK("put x2 y2", "OK") + self.assertRunOK("get x1", "y1") + self.assertRunOK("checkconsistency", "OK") + + sstFilePath = my_check_output( + "ls %s" % os.path.join(dbPath, "*.sst"), shell=True + ) + + # Modify the file + my_check_output("echo 'evil' > %s" % sstFilePath, shell=True) + self.assertRunFAIL("checkconsistency") + + # Delete the file + my_check_output("rm -f %s" % sstFilePath, shell=True) + self.assertRunFAIL("checkconsistency") + + def dumpLiveFiles(self, params, dumpFile): + return 0 == run_err_null("./ldb dump_live_files %s > %s" % (params, dumpFile)) + + def testDumpLiveFiles(self): + print("Running testDumpLiveFiles...") + + dbPath = os.path.join(self.TMP_DIR, self.DB_NAME) + self.assertRunOK("put x1 y1 --create_if_missing", "OK") + self.assertRunOK("put x2 y2 --enable_blob_files", "OK") + dumpFilePath = os.path.join(self.TMP_DIR, "dump1") + self.assertTrue(self.dumpLiveFiles("--db=%s" % dbPath, dumpFilePath)) + self.assertRunOK("delete x1", "OK") + self.assertRunOK("put x3 y3", "OK") + dumpFilePath = os.path.join(self.TMP_DIR, "dump2") + + # Test that if the user provides a db path that ends with + # a slash '/', there is no double (or more!) slashes in the + # SST and manifest file names. + + # Add a '/' at the end of dbPath (which normally shouldnt contain any) + if dbPath[-1] != "/": + dbPath += "/" + + # Call the dump_live_files function with the edited dbPath name. + self.assertTrue( + self.dumpLiveFiles( + "--db=%s --decode_blob_index --dump_uncompressed_blobs" % dbPath, + dumpFilePath, + ) + ) + + # Investigate the output + with open(dumpFilePath, "r") as tmp: + data = tmp.read() + + # Check that all the SST filenames have a correct full path (no multiple '/'). + sstFileList = re.findall(r"%s.*\d+.sst" % dbPath, data) + self.assertTrue(len(sstFileList) >= 1) + for sstFilename in sstFileList: + filenumber = re.findall(r"\d+.sst", sstFilename)[0] + self.assertEqual(sstFilename, dbPath + filenumber) + + # Check that all the Blob filenames have a correct full path (no multiple '/'). + blobFileList = re.findall(r"%s.*\d+.blob" % dbPath, data) + self.assertTrue(len(blobFileList) >= 1) + for blobFilename in blobFileList: + filenumber = re.findall(r"\d+.blob", blobFilename)[0] + self.assertEqual(blobFilename, dbPath + filenumber) + + # Check that all the manifest filenames + # have a correct full path (no multiple '/'). + manifestFileList = re.findall(r"%s.*MANIFEST-\d+" % dbPath, data) + self.assertTrue(len(manifestFileList) >= 1) + for manifestFilename in manifestFileList: + filenumber = re.findall(r"(?<=MANIFEST-)\d+", manifestFilename)[0] + self.assertEqual(manifestFilename, dbPath + "MANIFEST-" + filenumber) + + # Check that the blob file index is decoded. + decodedBlobIndex = re.findall(r"\[blob ref\]", data) + self.assertTrue(len(decodedBlobIndex) >= 1) + + def listLiveFilesMetadata(self, params, dumpFile): + return 0 == run_err_null( + "./ldb list_live_files_metadata %s > %s" % (params, dumpFile) + ) + + def testListLiveFilesMetadata(self): + print("Running testListLiveFilesMetadata...") + + dbPath = os.path.join(self.TMP_DIR, self.DB_NAME) + self.assertRunOK("put x1 y1 --create_if_missing", "OK") + self.assertRunOK("put x2 y2", "OK") + + # Compare the SST filename and the level of list_live_files_metadata + # with the data collected from dump_live_files. + dumpFilePath1 = os.path.join(self.TMP_DIR, "dump1") + self.assertTrue(self.dumpLiveFiles("--db=%s" % dbPath, dumpFilePath1)) + dumpFilePath2 = os.path.join(self.TMP_DIR, "dump2") + self.assertTrue( + self.listLiveFilesMetadata( + "--sort_by_filename --db=%s" % dbPath, dumpFilePath2 + ) + ) + + # Collect SST filename and level from dump_live_files + with open(dumpFilePath1, "r") as tmp: + data = tmp.read() + filename1 = re.findall(r".*\d+\.sst", data)[0] + level1 = re.findall(r"level:\d+", data)[0].split(":")[1] + + # Collect SST filename and level from list_live_files_metadata + with open(dumpFilePath2, "r") as tmp: + data = tmp.read() + filename2 = re.findall(r".*\d+\.sst", data)[0] + level2 = re.findall(r"level \d+", data)[0].split(" ")[1] + + # Assert equality between filenames and levels. + self.assertEqual(filename1, filename2) + self.assertEqual(level1, level2) + + # Create multiple column families and compare the output + # of list_live_files_metadata with dump_live_files once again. + # Create new CF, and insert data: + self.assertRunOK("create_column_family mycol1", "OK") + self.assertRunOK("put --column_family=mycol1 v1 v2", "OK") + self.assertRunOK("create_column_family mycol2", "OK") + self.assertRunOK("put --column_family=mycol2 h1 h2", "OK") + self.assertRunOK("put --column_family=mycol2 h3 h4", "OK") + + # Call dump_live_files and list_live_files_metadata + # and pipe the output to compare them later. + dumpFilePath3 = os.path.join(self.TMP_DIR, "dump3") + self.assertTrue(self.dumpLiveFiles("--db=%s" % dbPath, dumpFilePath3)) + dumpFilePath4 = os.path.join(self.TMP_DIR, "dump4") + self.assertTrue( + self.listLiveFilesMetadata( + "--sort_by_filename --db=%s" % dbPath, dumpFilePath4 + ) + ) + + # dump_live_files: + # parse the output and create a map: + # [key: sstFilename]->[value:[LSM level, Column Family Name]] + referenceMap = {} + with open(dumpFilePath3, "r") as tmp: + data = tmp.read() + # Note: the following regex are contingent on what the + # dump_live_files outputs. + namesAndLevels = re.findall(r"\d+.sst level:\d+", data) + cfs = re.findall(r"(?<=column family name=)\w+", data) + # re.findall should not reorder the data. + # Therefore namesAndLevels[i] matches the data from cfs[i]. + for count, nameAndLevel in enumerate(namesAndLevels): + sstFilename = re.findall(r"\d+.sst", nameAndLevel)[0] + sstLevel = re.findall(r"(?<=level:)\d+", nameAndLevel)[0] + cf = cfs[count] + referenceMap[sstFilename] = [sstLevel, cf] + + # list_live_files_metadata: + # parse the output and create a map: + # [key: sstFilename]->[value:[LSM level, Column Family Name]] + testMap = {} + with open(dumpFilePath4, "r") as tmp: + data = tmp.read() + # Since for each SST file, all the information is contained + # on one line, the parsing is easy to perform and relies on + # the appearance of an "00xxx.sst" pattern. + sstLines = re.findall(r".*\d+.sst.*", data) + for line in sstLines: + sstFilename = re.findall(r"\d+.sst", line)[0] + sstLevel = re.findall(r"(?<=level )\d+", line)[0] + cf = re.findall(r"(?<=column family \')\w+(?=\')", line)[0] + testMap[sstFilename] = [sstLevel, cf] + + # Compare the map obtained from dump_live_files and the map + # obtained from list_live_files_metadata. Everything should match. + self.assertEqual(referenceMap, testMap) + + def getManifests(self, directory): + return glob.glob(directory + "/MANIFEST-*") + + def getSSTFiles(self, directory): + return glob.glob(directory + "/*.sst") + + def getWALFiles(self, directory): + return glob.glob(directory + "/*.log") + + def getBlobFiles(self, directory): + return glob.glob(directory + "/*.blob") + + def copyManifests(self, src, dest): + return 0 == run_err_null("cp " + src + " " + dest) + + def testManifestDump(self): + print("Running testManifestDump...") + dbPath = os.path.join(self.TMP_DIR, self.DB_NAME) + self.assertRunOK("put 1 1 --create_if_missing", "OK") + self.assertRunOK("put 2 2", "OK") + self.assertRunOK("put 3 3", "OK") + # Pattern to expect from manifest_dump. + num = "[0-9]+" + st = ".*" + subpat = st + " seq:" + num + ", type:" + num + regex = num + ":" + num + "\[" + subpat + ".." + subpat + "\]" + expected_pattern = re.compile(regex) + cmd = "manifest_dump --db=%s" + manifest_files = self.getManifests(dbPath) + self.assertTrue(len(manifest_files) == 1) + # Test with the default manifest file in dbPath. + self.assertRunOKFull( + cmd % dbPath, expected_pattern, unexpected=False, isPattern=True + ) + self.copyManifests(manifest_files[0], manifest_files[0] + "1") + manifest_files = self.getManifests(dbPath) + self.assertTrue(len(manifest_files) == 2) + # Test with multiple manifest files in dbPath. + self.assertRunFAILFull(cmd % dbPath) + # Running it with the copy we just created should pass. + self.assertRunOKFull( + (cmd + " --path=%s") % (dbPath, manifest_files[1]), + expected_pattern, + unexpected=False, + isPattern=True, + ) + # Make sure that using the dump with --path will result in identical + # output as just using manifest_dump. + cmd = "dump --path=%s" + self.assertRunOKFull( + (cmd) % (manifest_files[1]), + expected_pattern, + unexpected=False, + isPattern=True, + ) + + # Check if null characters doesn't infer with output format. + self.assertRunOK("put a1 b1", "OK") + self.assertRunOK("put a2 b2", "OK") + self.assertRunOK("put --hex 0x12000DA0 0x80C0000B", "OK") + self.assertRunOK("put --hex 0x7200004f 0x80000004", "OK") + self.assertRunOK("put --hex 0xa000000a 0xf000000f", "OK") + self.assertRunOK("put a3 b3", "OK") + self.assertRunOK("put a4 b4", "OK") + + # Verifies that all "levels" are printed out. + # There should be 66 mentions of levels. + expected_verbose_output = re.compile("matched") + # Test manifest_dump verbose and verify that key 0x7200004f + # is present. Note that we are forced to use grep here because + # an output with a non-terminating null character in it isn't piped + # correctly through the Python subprocess object. + # Also note that 0x72=r and 0x4f=O, hence the regex \'r.{2}O\' + # (we cannot use null character in the subprocess input either, + # so we have to use '.{2}') + cmd_verbose = ( + "manifest_dump --verbose --db=%s | grep -aq $''r.{2}O'' && echo 'matched' || echo 'not matched'" + % dbPath + ) + + self.assertRunOKFull( + cmd_verbose, expected_verbose_output, unexpected=False, isPattern=True + ) + + def testGetProperty(self): + print("Running testGetProperty...") + dbPath = os.path.join(self.TMP_DIR, self.DB_NAME) + self.assertRunOK("put 1 1 --create_if_missing", "OK") + self.assertRunOK("put 2 2", "OK") + # A "string" property + cmd = "--db=%s get_property rocksdb.estimate-num-keys" + self.assertRunOKFull(cmd % dbPath, "rocksdb.estimate-num-keys: 2") + # A "map" property + # FIXME: why doesn't this pick up two entries? + cmd = "--db=%s get_property rocksdb.aggregated-table-properties" + part = "rocksdb.aggregated-table-properties.num_entries: " + expected_pattern = re.compile(part) + self.assertRunOKFull( + cmd % dbPath, expected_pattern, unexpected=False, isPattern=True + ) + # An invalid property + cmd = "--db=%s get_property rocksdb.this-property-does-not-exist" + self.assertRunFAILFull(cmd % dbPath) + + def testSSTDump(self): + print("Running testSSTDump...") + + dbPath = os.path.join(self.TMP_DIR, self.DB_NAME) + self.assertRunOK("put sst1 sst1_val --create_if_missing", "OK") + self.assertRunOK("put sst2 sst2_val --enable_blob_files", "OK") + self.assertRunOK("get sst1", "sst1_val") + + # Pattern to expect from SST dump. + regex = ".*Sst file format:.*\n.*\[blob ref\].*" + expected_pattern = re.compile(regex) + + sst_files = self.getSSTFiles(dbPath) + self.assertTrue(len(sst_files) >= 1) + cmd = "dump --path=%s --decode_blob_index" + self.assertRunOKFull( + (cmd) % (sst_files[0]), expected_pattern, unexpected=False, isPattern=True + ) + + def testBlobDump(self): + print("Running testBlobDump") + dbPath = os.path.join(self.TMP_DIR, self.DB_NAME) + self.assertRunOK("batchput x1 y1 --create_if_missing --enable_blob_files", "OK") + self.assertRunOK( + 'batchput --enable_blob_files x2 y2 x3 y3 "x4 abc" "y4 xyz"', "OK" + ) + + # Pattern to expect from blob file dump. + regex = ".*Blob log header[\s\S]*Blob log footer[\s\S]*Read record[\s\S]*Summary" # noqa + expected_pattern = re.compile(regex) + blob_files = self.getBlobFiles(dbPath) + self.assertTrue(len(blob_files) >= 1) + cmd = "dump --path=%s --dump_uncompressed_blobs" + self.assertRunOKFull( + (cmd) % (blob_files[0]), expected_pattern, unexpected=False, isPattern=True + ) + + def testWALDump(self): + print("Running testWALDump...") + + dbPath = os.path.join(self.TMP_DIR, self.DB_NAME) + self.assertRunOK("put wal1 wal1_val --create_if_missing", "OK") + self.assertRunOK("put wal2 wal2_val", "OK") + self.assertRunOK("get wal1", "wal1_val") + + # Pattern to expect from WAL dump. + regex = "^Sequence,Count,ByteSize,Physical Offset,Key\(s\).*" + expected_pattern = re.compile(regex) + + wal_files = self.getWALFiles(dbPath) + self.assertTrue(len(wal_files) >= 1) + cmd = "dump --path=%s" + self.assertRunOKFull( + (cmd) % (wal_files[0]), expected_pattern, unexpected=False, isPattern=True + ) + + def testListColumnFamilies(self): + print("Running testListColumnFamilies...") + self.assertRunOK("put x1 y1 --create_if_missing", "OK") + cmd = 'list_column_families | grep -v "Column families"' + # Test on valid dbPath. + self.assertRunOK(cmd, "{default}") + # Test on empty path. + self.assertRunFAIL(cmd) + + def testColumnFamilies(self): + print("Running testColumnFamilies...") + _ = os.path.join(self.TMP_DIR, self.DB_NAME) + self.assertRunOK("put cf1_1 1 --create_if_missing", "OK") + self.assertRunOK("put cf1_2 2 --create_if_missing", "OK") + self.assertRunOK("put cf1_3 3 --try_load_options", "OK") + # Given non-default column family to single CF DB. + self.assertRunFAIL("get cf1_1 --column_family=two") + self.assertRunOK("create_column_family two", "OK") + self.assertRunOK("put cf2_1 1 --create_if_missing --column_family=two", "OK") + self.assertRunOK("put cf2_2 2 --create_if_missing --column_family=two", "OK") + self.assertRunOK("delete cf1_2", "OK") + self.assertRunOK("create_column_family three", "OK") + self.assertRunOK("delete cf2_2 --column_family=two", "OK") + self.assertRunOK("put cf3_1 3 --create_if_missing --column_family=three", "OK") + self.assertRunOK("get cf1_1 --column_family=default", "1") + self.assertRunOK("dump --column_family=two", "cf2_1 ==> 1\nKeys in range: 1") + self.assertRunOK( + "dump --column_family=two --try_load_options", + "cf2_1 ==> 1\nKeys in range: 1", + ) + self.assertRunOK("dump", "cf1_1 ==> 1\ncf1_3 ==> 3\nKeys in range: 2") + self.assertRunOK("get cf2_1 --column_family=two", "1") + self.assertRunOK("get cf3_1 --column_family=three", "3") + self.assertRunOK("drop_column_family three", "OK") + # non-existing column family. + self.assertRunFAIL("get cf3_1 --column_family=four") + self.assertRunFAIL("drop_column_family four") + + def testIngestExternalSst(self): + print("Running testIngestExternalSst...") + + # Dump, load, write external sst and ingest it in another db + dbPath = os.path.join(self.TMP_DIR, "db1") + self.assertRunOK( + "batchput --db=%s --create_if_missing x1 y1 x2 y2 x3 y3 x4 y4" % dbPath, + "OK", + ) + self.assertRunOK("scan --db=%s" % dbPath, "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") + dumpFilePath = os.path.join(self.TMP_DIR, "dump1") + with open(dumpFilePath, "w") as f: + f.write("x1 ==> y10\nx2 ==> y20\nx3 ==> y30\nx4 ==> y40") + externSstPath = os.path.join(self.TMP_DIR, "extern_data1.sst") + self.assertTrue( + self.writeExternSst( + "--create_if_missing --db=%s" % dbPath, dumpFilePath, externSstPath + ) + ) + # cannot ingest if allow_global_seqno is false + self.assertFalse( + self.ingestExternSst( + "--create_if_missing --allow_global_seqno=false --db=%s" % dbPath, + externSstPath, + ) + ) + self.assertTrue( + self.ingestExternSst( + "--create_if_missing --allow_global_seqno --db=%s" % dbPath, + externSstPath, + ) + ) + self.assertRunOKFull( + "scan --db=%s" % dbPath, "x1 : y10\nx2 : y20\nx3 : y30\nx4 : y40" + ) + + +if __name__ == "__main__": + unittest.main() |