1 files changed, 248 insertions, 0 deletions
diff --git a/test/python/file_load_common.py b/test/python/file_load_common.py
new file mode 100644
index 0000000..5587405
--- /dev/null
+++ b/test/python/file_load_common.py
@@ -0,0 +1,248 @@
+########################################################################
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+########################################################################
+
+"""Collection of test cases shared between different file format types."""
+
+import os
+import os.path
+import collections
+import orcus
+
+
+class Address(object):
+
+    def __init__(self, pos_s):
+        self.sheet_name, self.row, self.column = pos_s.split('/')
+        self.row = int(self.row)
+        self.column = int(self.column)
+
+    def __repr__(self):
+        return "(sheet={}; row={}, column={})".format(self.sheet_name, self.row, self.column)
+
+
+class ExpectedSheet(object):
+
+    def __init__(self, name):
+        self.__name = name
+        self.__rows = collections.OrderedDict()
+        self.__max_column = 0
+        self.__max_row = 0
+
+    @property
+    def name(self):
+        return self.__name
+
+    @property
+    def data_size(self):
+        return {"column": self.__max_column+1, "row": self.__max_row+1}
+
+    def get_rows(self):
+        rows = list()
+        for i in range(self.__max_row+1):
+            row = [(orcus.CellType.EMPTY, None) for _ in range(self.__max_column+1)]
+            if i in self.__rows:
+                for col_pos, cell in self.__rows[i].items():
+                    row[col_pos] = cell
+            rows.append(tuple(row))
+        return tuple(rows)
+
+    def insert_cell(self, row, column, cell_type, cell_value, result):
+        if row not in self.__rows:
+            self.__rows[row] = collections.OrderedDict()
+
+        row_data = self.__rows[row]
+
+        if cell_type == "numeric":
+            row_data[column] = (orcus.CellType.NUMERIC, float(cell_value))
+        elif cell_type == "string":
+            row_data[column] = (orcus.CellType.STRING, self.__unescape_string_cell_value(cell_value))
+        elif cell_type == "boolean":
+            if cell_value == "true":
+                row_data[column] = (orcus.CellType.BOOLEAN, True)
+            elif cell_value == "false":
+                row_data[column] = (orcus.CellType.BOOLEAN, False)
+            else:
+                raise RuntimeError("invalid boolean value: {}".format(cell_value))
+        elif cell_type == "formula":
+            row_data[column] = (orcus.CellType.FORMULA, result, cell_value)
+        else:
+            raise RuntimeError("unhandled cell value type: {}".format(cell_type))
+
+        # Update the data range.
+        if row > self.__max_row:
+            self.__max_row = row
+        if column > self.__max_column:
+            self.__max_column = column
+
+    def __unescape_string_cell_value(self, v):
+        if v[0] != '"' or v[-1] != '"':
+            raise RuntimeError("string value is expected to be quoted.")
+
+        v = v[1:-1]  # remove the outer quotes.
+
+        buf = []
+        escaped_char = False
+        for c in v:
+            if escaped_char:
+                buf.append(c)
+                escaped_char = False
+                continue
+
+            if c == '\\':
+                escaped_char = True
+                continue
+
+            buf.append(c)
+
+        return "".join(buf)
+
+
+class ExpectedDocument(object):
+
+    def __init__(self, filepath):
+        self.sheets = []
+
+        with open(filepath, "r") as f:
+            for line in f.readlines():
+                line = line.strip()
+                self.__parse_line(line)
+
+    def __parse_line(self, line):
+        if not line:
+            return
+
+        # Split the line into 3 parts - position, cell type and the value.
+        # Note that a valid formula expression may contain ':', so we cannot
+        # simply split the line by ':'.
+
+        parts = list()
+        idx = line.find(':')
+        while idx >= 0:
+            parts.append(line[:idx])
+            line = line[idx+1:]
+            if len(parts) == 2:
+                # Append the rest.
+                parts.append(line)
+                break
+
+            idx = line.find(':')
+
+        if len(parts) != 3:
+            raise RuntimeError(
+                "line is expected to contain 3 parts, but not all parts are identified.")
+
+        if parts[1] in ("merge-width", "merge-height"):
+            return
+
+        pos, cell_type, cell_value = parts[0], parts[1], parts[2]
+        result = None
+        if cell_type == "formula":
+            # Split the cell value into formula expression and result.
+            idx = cell_value.rfind(':')
+            if idx < 0:
+                raise RuntimeError("formula line is expected to contain a result value.")
+            cell_value, result = cell_value[:idx], cell_value[idx+1:]
+            try:
+                result = float(result)
+            except ValueError:
+                pass
+
+        pos = Address(pos)
+
+        if not self.sheets or self.sheets[-1].name != pos.sheet_name:
+            self.sheets.append(ExpectedSheet(pos.sheet_name))
+
+        self.sheets[-1].insert_cell(pos.row, pos.column, cell_type, cell_value, result)
+
+
+def _compare_cells(expected, actual):
+    type = expected[0]
+
+    if type != actual.type:
+        return False
+
+    if type == orcus.CellType.EMPTY:
+        return True
+
+    if type in (orcus.CellType.BOOLEAN, orcus.CellType.NUMERIC, orcus.CellType.STRING):
+        return expected[1] == actual.value
+
+    if type == orcus.CellType.FORMULA:
+        return expected[1] == actual.value and expected[2] == actual.formula
+
+    return False
+
+
+class DocLoader:
+
+    def __init__(self, mod_loader):
+        self._mod_loader = mod_loader
+
+    def load(self, filepath, recalc):
+        with open(filepath, "rb") as f:
+            return self._mod_loader.read(f, recalc=recalc)
+
+    def load_from_value(self, filepath):
+        with open(filepath, "rb") as f:
+            bytes = f.read()
+        return self._mod_loader.read(bytes, recalc=False)
+
+
+def run_test_dir(self, test_dir, doc_loader):
+    """Run test case for loading a file into a document.
+
+    :param test_dir: test directory that contains an input file (whose base
+       name is 'input') and a content check file (check.txt).
+    :param mod_loader: module object that contains function called 'read'.
+    """
+
+    print("test directory: {}".format(test_dir))
+    expected = ExpectedDocument(os.path.join(test_dir, "check.txt"))
+
+    # Find the input file to load.
+    input_file = None
+    for file_name in os.listdir(test_dir):
+        name, ext = os.path.splitext(file_name)
+        if name == "input":
+            input_file = os.path.join(test_dir, file_name)
+            break
+
+    print("input file: {}".format(input_file))
+    self.assertIsNot(input_file, None)
+
+    doc = doc_loader.load(input_file, True)
+    self.assertIsInstance(doc, orcus.Document)
+
+    # Sometimes the actual document contains trailing empty sheets, which the
+    # expected document does not store.
+    self.assertTrue(len(expected.sheets))
+    self.assertTrue(len(expected.sheets) <= len(doc.sheets))
+
+    expected_sheets = {sh.name: sh for sh in expected.sheets}
+    actual_sheets = {sh.name: sh for sh in doc.sheets}
+
+    for sheet_name, actual_sheet in actual_sheets.items():
+        if sheet_name in expected_sheets:
+            expected_sheet = expected_sheets[sheet_name]
+            self.assertEqual(expected_sheet.data_size, actual_sheet.data_size)
+            for expected_row, actual_row in zip(expected_sheet.get_rows(), actual_sheet.get_rows()):
+                for expected, actual in zip(expected_row, actual_row):
+                    self.assertTrue(_compare_cells(expected, actual))
+        else:
+            # This sheet must be empty since it's not in the expected document.
+            # Make sure it returns empty row set.
+            rows = [row for row in actual_sheet.get_rows()]
+            self.assertEqual(len(rows), 0)
+
+    # Also make sure the document loads fine without recalc.
+    doc = doc_loader.load(input_file, False)
+    self.assertIsInstance(doc, orcus.Document)
+
+    # Make sure the document loads from in-memory value.
+    doc = doc_loader.load_from_value(input_file)
+    self.assertIsInstance(doc, orcus.Document)