summaryrefslogtreecommitdiffstats
path: root/test/python/file_load_common.py
diff options
context:
space:
mode:
Diffstat (limited to 'test/python/file_load_common.py')
-rw-r--r--test/python/file_load_common.py248
1 files changed, 248 insertions, 0 deletions
diff --git a/test/python/file_load_common.py b/test/python/file_load_common.py
new file mode 100644
index 0000000..5587405
--- /dev/null
+++ b/test/python/file_load_common.py
@@ -0,0 +1,248 @@
+########################################################################
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+########################################################################
+
+"""Collection of test cases shared between different file format types."""
+
+import os
+import os.path
+import collections
+import orcus
+
+
+class Address(object):
+
+ def __init__(self, pos_s):
+ self.sheet_name, self.row, self.column = pos_s.split('/')
+ self.row = int(self.row)
+ self.column = int(self.column)
+
+ def __repr__(self):
+ return "(sheet={}; row={}, column={})".format(self.sheet_name, self.row, self.column)
+
+
+class ExpectedSheet(object):
+
+ def __init__(self, name):
+ self.__name = name
+ self.__rows = collections.OrderedDict()
+ self.__max_column = 0
+ self.__max_row = 0
+
+ @property
+ def name(self):
+ return self.__name
+
+ @property
+ def data_size(self):
+ return {"column": self.__max_column+1, "row": self.__max_row+1}
+
+ def get_rows(self):
+ rows = list()
+ for i in range(self.__max_row+1):
+ row = [(orcus.CellType.EMPTY, None) for _ in range(self.__max_column+1)]
+ if i in self.__rows:
+ for col_pos, cell in self.__rows[i].items():
+ row[col_pos] = cell
+ rows.append(tuple(row))
+ return tuple(rows)
+
+ def insert_cell(self, row, column, cell_type, cell_value, result):
+ if row not in self.__rows:
+ self.__rows[row] = collections.OrderedDict()
+
+ row_data = self.__rows[row]
+
+ if cell_type == "numeric":
+ row_data[column] = (orcus.CellType.NUMERIC, float(cell_value))
+ elif cell_type == "string":
+ row_data[column] = (orcus.CellType.STRING, self.__unescape_string_cell_value(cell_value))
+ elif cell_type == "boolean":
+ if cell_value == "true":
+ row_data[column] = (orcus.CellType.BOOLEAN, True)
+ elif cell_value == "false":
+ row_data[column] = (orcus.CellType.BOOLEAN, False)
+ else:
+ raise RuntimeError("invalid boolean value: {}".format(cell_value))
+ elif cell_type == "formula":
+ row_data[column] = (orcus.CellType.FORMULA, result, cell_value)
+ else:
+ raise RuntimeError("unhandled cell value type: {}".format(cell_type))
+
+ # Update the data range.
+ if row > self.__max_row:
+ self.__max_row = row
+ if column > self.__max_column:
+ self.__max_column = column
+
+ def __unescape_string_cell_value(self, v):
+ if v[0] != '"' or v[-1] != '"':
+ raise RuntimeError("string value is expected to be quoted.")
+
+ v = v[1:-1] # remove the outer quotes.
+
+ buf = []
+ escaped_char = False
+ for c in v:
+ if escaped_char:
+ buf.append(c)
+ escaped_char = False
+ continue
+
+ if c == '\\':
+ escaped_char = True
+ continue
+
+ buf.append(c)
+
+ return "".join(buf)
+
+
+class ExpectedDocument(object):
+
+ def __init__(self, filepath):
+ self.sheets = []
+
+ with open(filepath, "r") as f:
+ for line in f.readlines():
+ line = line.strip()
+ self.__parse_line(line)
+
+ def __parse_line(self, line):
+ if not line:
+ return
+
+ # Split the line into 3 parts - position, cell type and the value.
+ # Note that a valid formula expression may contain ':', so we cannot
+ # simply split the line by ':'.
+
+ parts = list()
+ idx = line.find(':')
+ while idx >= 0:
+ parts.append(line[:idx])
+ line = line[idx+1:]
+ if len(parts) == 2:
+ # Append the rest.
+ parts.append(line)
+ break
+
+ idx = line.find(':')
+
+ if len(parts) != 3:
+ raise RuntimeError(
+ "line is expected to contain 3 parts, but not all parts are identified.")
+
+ if parts[1] in ("merge-width", "merge-height"):
+ return
+
+ pos, cell_type, cell_value = parts[0], parts[1], parts[2]
+ result = None
+ if cell_type == "formula":
+ # Split the cell value into formula expression and result.
+ idx = cell_value.rfind(':')
+ if idx < 0:
+ raise RuntimeError("formula line is expected to contain a result value.")
+ cell_value, result = cell_value[:idx], cell_value[idx+1:]
+ try:
+ result = float(result)
+ except ValueError:
+ pass
+
+ pos = Address(pos)
+
+ if not self.sheets or self.sheets[-1].name != pos.sheet_name:
+ self.sheets.append(ExpectedSheet(pos.sheet_name))
+
+ self.sheets[-1].insert_cell(pos.row, pos.column, cell_type, cell_value, result)
+
+
+def _compare_cells(expected, actual):
+ type = expected[0]
+
+ if type != actual.type:
+ return False
+
+ if type == orcus.CellType.EMPTY:
+ return True
+
+ if type in (orcus.CellType.BOOLEAN, orcus.CellType.NUMERIC, orcus.CellType.STRING):
+ return expected[1] == actual.value
+
+ if type == orcus.CellType.FORMULA:
+ return expected[1] == actual.value and expected[2] == actual.formula
+
+ return False
+
+
+class DocLoader:
+
+ def __init__(self, mod_loader):
+ self._mod_loader = mod_loader
+
+ def load(self, filepath, recalc):
+ with open(filepath, "rb") as f:
+ return self._mod_loader.read(f, recalc=recalc)
+
+ def load_from_value(self, filepath):
+ with open(filepath, "rb") as f:
+ bytes = f.read()
+ return self._mod_loader.read(bytes, recalc=False)
+
+
+def run_test_dir(self, test_dir, doc_loader):
+ """Run test case for loading a file into a document.
+
+ :param test_dir: test directory that contains an input file (whose base
+ name is 'input') and a content check file (check.txt).
+ :param mod_loader: module object that contains function called 'read'.
+ """
+
+ print("test directory: {}".format(test_dir))
+ expected = ExpectedDocument(os.path.join(test_dir, "check.txt"))
+
+ # Find the input file to load.
+ input_file = None
+ for file_name in os.listdir(test_dir):
+ name, ext = os.path.splitext(file_name)
+ if name == "input":
+ input_file = os.path.join(test_dir, file_name)
+ break
+
+ print("input file: {}".format(input_file))
+ self.assertIsNot(input_file, None)
+
+ doc = doc_loader.load(input_file, True)
+ self.assertIsInstance(doc, orcus.Document)
+
+ # Sometimes the actual document contains trailing empty sheets, which the
+ # expected document does not store.
+ self.assertTrue(len(expected.sheets))
+ self.assertTrue(len(expected.sheets) <= len(doc.sheets))
+
+ expected_sheets = {sh.name: sh for sh in expected.sheets}
+ actual_sheets = {sh.name: sh for sh in doc.sheets}
+
+ for sheet_name, actual_sheet in actual_sheets.items():
+ if sheet_name in expected_sheets:
+ expected_sheet = expected_sheets[sheet_name]
+ self.assertEqual(expected_sheet.data_size, actual_sheet.data_size)
+ for expected_row, actual_row in zip(expected_sheet.get_rows(), actual_sheet.get_rows()):
+ for expected, actual in zip(expected_row, actual_row):
+ self.assertTrue(_compare_cells(expected, actual))
+ else:
+ # This sheet must be empty since it's not in the expected document.
+ # Make sure it returns empty row set.
+ rows = [row for row in actual_sheet.get_rows()]
+ self.assertEqual(len(rows), 0)
+
+ # Also make sure the document loads fine without recalc.
+ doc = doc_loader.load(input_file, False)
+ self.assertIsInstance(doc, orcus.Document)
+
+ # Make sure the document loads from in-memory value.
+ doc = doc_loader.load_from_value(input_file)
+ self.assertIsInstance(doc, orcus.Document)