diff options
Diffstat (limited to 'test/python/file_load_common.py')
-rw-r--r-- | test/python/file_load_common.py | 248 |
1 files changed, 248 insertions, 0 deletions
diff --git a/test/python/file_load_common.py b/test/python/file_load_common.py new file mode 100644 index 0000000..5587405 --- /dev/null +++ b/test/python/file_load_common.py @@ -0,0 +1,248 @@ +######################################################################## +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +######################################################################## + +"""Collection of test cases shared between different file format types.""" + +import os +import os.path +import collections +import orcus + + +class Address(object): + + def __init__(self, pos_s): + self.sheet_name, self.row, self.column = pos_s.split('/') + self.row = int(self.row) + self.column = int(self.column) + + def __repr__(self): + return "(sheet={}; row={}, column={})".format(self.sheet_name, self.row, self.column) + + +class ExpectedSheet(object): + + def __init__(self, name): + self.__name = name + self.__rows = collections.OrderedDict() + self.__max_column = 0 + self.__max_row = 0 + + @property + def name(self): + return self.__name + + @property + def data_size(self): + return {"column": self.__max_column+1, "row": self.__max_row+1} + + def get_rows(self): + rows = list() + for i in range(self.__max_row+1): + row = [(orcus.CellType.EMPTY, None) for _ in range(self.__max_column+1)] + if i in self.__rows: + for col_pos, cell in self.__rows[i].items(): + row[col_pos] = cell + rows.append(tuple(row)) + return tuple(rows) + + def insert_cell(self, row, column, cell_type, cell_value, result): + if row not in self.__rows: + self.__rows[row] = collections.OrderedDict() + + row_data = self.__rows[row] + + if cell_type == "numeric": + row_data[column] = (orcus.CellType.NUMERIC, float(cell_value)) + elif cell_type == "string": + row_data[column] = (orcus.CellType.STRING, self.__unescape_string_cell_value(cell_value)) + elif cell_type == "boolean": + if cell_value == "true": + row_data[column] = (orcus.CellType.BOOLEAN, True) + elif cell_value == "false": + row_data[column] = (orcus.CellType.BOOLEAN, False) + else: + raise RuntimeError("invalid boolean value: {}".format(cell_value)) + elif cell_type == "formula": + row_data[column] = (orcus.CellType.FORMULA, result, cell_value) + else: + raise RuntimeError("unhandled cell value type: {}".format(cell_type)) + + # Update the data range. + if row > self.__max_row: + self.__max_row = row + if column > self.__max_column: + self.__max_column = column + + def __unescape_string_cell_value(self, v): + if v[0] != '"' or v[-1] != '"': + raise RuntimeError("string value is expected to be quoted.") + + v = v[1:-1] # remove the outer quotes. + + buf = [] + escaped_char = False + for c in v: + if escaped_char: + buf.append(c) + escaped_char = False + continue + + if c == '\\': + escaped_char = True + continue + + buf.append(c) + + return "".join(buf) + + +class ExpectedDocument(object): + + def __init__(self, filepath): + self.sheets = [] + + with open(filepath, "r") as f: + for line in f.readlines(): + line = line.strip() + self.__parse_line(line) + + def __parse_line(self, line): + if not line: + return + + # Split the line into 3 parts - position, cell type and the value. + # Note that a valid formula expression may contain ':', so we cannot + # simply split the line by ':'. + + parts = list() + idx = line.find(':') + while idx >= 0: + parts.append(line[:idx]) + line = line[idx+1:] + if len(parts) == 2: + # Append the rest. + parts.append(line) + break + + idx = line.find(':') + + if len(parts) != 3: + raise RuntimeError( + "line is expected to contain 3 parts, but not all parts are identified.") + + if parts[1] in ("merge-width", "merge-height"): + return + + pos, cell_type, cell_value = parts[0], parts[1], parts[2] + result = None + if cell_type == "formula": + # Split the cell value into formula expression and result. + idx = cell_value.rfind(':') + if idx < 0: + raise RuntimeError("formula line is expected to contain a result value.") + cell_value, result = cell_value[:idx], cell_value[idx+1:] + try: + result = float(result) + except ValueError: + pass + + pos = Address(pos) + + if not self.sheets or self.sheets[-1].name != pos.sheet_name: + self.sheets.append(ExpectedSheet(pos.sheet_name)) + + self.sheets[-1].insert_cell(pos.row, pos.column, cell_type, cell_value, result) + + +def _compare_cells(expected, actual): + type = expected[0] + + if type != actual.type: + return False + + if type == orcus.CellType.EMPTY: + return True + + if type in (orcus.CellType.BOOLEAN, orcus.CellType.NUMERIC, orcus.CellType.STRING): + return expected[1] == actual.value + + if type == orcus.CellType.FORMULA: + return expected[1] == actual.value and expected[2] == actual.formula + + return False + + +class DocLoader: + + def __init__(self, mod_loader): + self._mod_loader = mod_loader + + def load(self, filepath, recalc): + with open(filepath, "rb") as f: + return self._mod_loader.read(f, recalc=recalc) + + def load_from_value(self, filepath): + with open(filepath, "rb") as f: + bytes = f.read() + return self._mod_loader.read(bytes, recalc=False) + + +def run_test_dir(self, test_dir, doc_loader): + """Run test case for loading a file into a document. + + :param test_dir: test directory that contains an input file (whose base + name is 'input') and a content check file (check.txt). + :param mod_loader: module object that contains function called 'read'. + """ + + print("test directory: {}".format(test_dir)) + expected = ExpectedDocument(os.path.join(test_dir, "check.txt")) + + # Find the input file to load. + input_file = None + for file_name in os.listdir(test_dir): + name, ext = os.path.splitext(file_name) + if name == "input": + input_file = os.path.join(test_dir, file_name) + break + + print("input file: {}".format(input_file)) + self.assertIsNot(input_file, None) + + doc = doc_loader.load(input_file, True) + self.assertIsInstance(doc, orcus.Document) + + # Sometimes the actual document contains trailing empty sheets, which the + # expected document does not store. + self.assertTrue(len(expected.sheets)) + self.assertTrue(len(expected.sheets) <= len(doc.sheets)) + + expected_sheets = {sh.name: sh for sh in expected.sheets} + actual_sheets = {sh.name: sh for sh in doc.sheets} + + for sheet_name, actual_sheet in actual_sheets.items(): + if sheet_name in expected_sheets: + expected_sheet = expected_sheets[sheet_name] + self.assertEqual(expected_sheet.data_size, actual_sheet.data_size) + for expected_row, actual_row in zip(expected_sheet.get_rows(), actual_sheet.get_rows()): + for expected, actual in zip(expected_row, actual_row): + self.assertTrue(_compare_cells(expected, actual)) + else: + # This sheet must be empty since it's not in the expected document. + # Make sure it returns empty row set. + rows = [row for row in actual_sheet.get_rows()] + self.assertEqual(len(rows), 0) + + # Also make sure the document loads fine without recalc. + doc = doc_loader.load(input_file, False) + self.assertIsInstance(doc, orcus.Document) + + # Make sure the document loads from in-memory value. + doc = doc_loader.load_from_value(input_file) + self.assertIsInstance(doc, orcus.Document) |