11 files changed, 1168 insertions, 0 deletions
diff --git a/test/python/env.json.in b/test/python/env.json.in
new file mode 100644
index 0000000..26b82d6
--- /dev/null
+++ b/test/python/env.json.in
@@ -0,0 +1,6 @@
+{
+    "version-major": @ORCUS_MAJOR_VERSION@,
+    "version-minor": @ORCUS_MINOR_VERSION@,
+    "version-micro": @ORCUS_MICRO_VERSION@
+}
+
diff --git a/test/python/file_load_common.py b/test/python/file_load_common.py
new file mode 100644
index 0000000..5587405
--- /dev/null
+++ b/test/python/file_load_common.py
@@ -0,0 +1,248 @@
+########################################################################
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+########################################################################
+
+"""Collection of test cases shared between different file format types."""
+
+import os
+import os.path
+import collections
+import orcus
+
+
+class Address(object):
+
+    def __init__(self, pos_s):
+        self.sheet_name, self.row, self.column = pos_s.split('/')
+        self.row = int(self.row)
+        self.column = int(self.column)
+
+    def __repr__(self):
+        return "(sheet={}; row={}, column={})".format(self.sheet_name, self.row, self.column)
+
+
+class ExpectedSheet(object):
+
+    def __init__(self, name):
+        self.__name = name
+        self.__rows = collections.OrderedDict()
+        self.__max_column = 0
+        self.__max_row = 0
+
+    @property
+    def name(self):
+        return self.__name
+
+    @property
+    def data_size(self):
+        return {"column": self.__max_column+1, "row": self.__max_row+1}
+
+    def get_rows(self):
+        rows = list()
+        for i in range(self.__max_row+1):
+            row = [(orcus.CellType.EMPTY, None) for _ in range(self.__max_column+1)]
+            if i in self.__rows:
+                for col_pos, cell in self.__rows[i].items():
+                    row[col_pos] = cell
+            rows.append(tuple(row))
+        return tuple(rows)
+
+    def insert_cell(self, row, column, cell_type, cell_value, result):
+        if row not in self.__rows:
+            self.__rows[row] = collections.OrderedDict()
+
+        row_data = self.__rows[row]
+
+        if cell_type == "numeric":
+            row_data[column] = (orcus.CellType.NUMERIC, float(cell_value))
+        elif cell_type == "string":
+            row_data[column] = (orcus.CellType.STRING, self.__unescape_string_cell_value(cell_value))
+        elif cell_type == "boolean":
+            if cell_value == "true":
+                row_data[column] = (orcus.CellType.BOOLEAN, True)
+            elif cell_value == "false":
+                row_data[column] = (orcus.CellType.BOOLEAN, False)
+            else:
+                raise RuntimeError("invalid boolean value: {}".format(cell_value))
+        elif cell_type == "formula":
+            row_data[column] = (orcus.CellType.FORMULA, result, cell_value)
+        else:
+            raise RuntimeError("unhandled cell value type: {}".format(cell_type))
+
+        # Update the data range.
+        if row > self.__max_row:
+            self.__max_row = row
+        if column > self.__max_column:
+            self.__max_column = column
+
+    def __unescape_string_cell_value(self, v):
+        if v[0] != '"' or v[-1] != '"':
+            raise RuntimeError("string value is expected to be quoted.")
+
+        v = v[1:-1]  # remove the outer quotes.
+
+        buf = []
+        escaped_char = False
+        for c in v:
+            if escaped_char:
+                buf.append(c)
+                escaped_char = False
+                continue
+
+            if c == '\\':
+                escaped_char = True
+                continue
+
+            buf.append(c)
+
+        return "".join(buf)
+
+
+class ExpectedDocument(object):
+
+    def __init__(self, filepath):
+        self.sheets = []
+
+        with open(filepath, "r") as f:
+            for line in f.readlines():
+                line = line.strip()
+                self.__parse_line(line)
+
+    def __parse_line(self, line):
+        if not line:
+            return
+
+        # Split the line into 3 parts - position, cell type and the value.
+        # Note that a valid formula expression may contain ':', so we cannot
+        # simply split the line by ':'.
+
+        parts = list()
+        idx = line.find(':')
+        while idx >= 0:
+            parts.append(line[:idx])
+            line = line[idx+1:]
+            if len(parts) == 2:
+                # Append the rest.
+                parts.append(line)
+                break
+
+            idx = line.find(':')
+
+        if len(parts) != 3:
+            raise RuntimeError(
+                "line is expected to contain 3 parts, but not all parts are identified.")
+
+        if parts[1] in ("merge-width", "merge-height"):
+            return
+
+        pos, cell_type, cell_value = parts[0], parts[1], parts[2]
+        result = None
+        if cell_type == "formula":
+            # Split the cell value into formula expression and result.
+            idx = cell_value.rfind(':')
+            if idx < 0:
+                raise RuntimeError("formula line is expected to contain a result value.")
+            cell_value, result = cell_value[:idx], cell_value[idx+1:]
+            try:
+                result = float(result)
+            except ValueError:
+                pass
+
+        pos = Address(pos)
+
+        if not self.sheets or self.sheets[-1].name != pos.sheet_name:
+            self.sheets.append(ExpectedSheet(pos.sheet_name))
+
+        self.sheets[-1].insert_cell(pos.row, pos.column, cell_type, cell_value, result)
+
+
+def _compare_cells(expected, actual):
+    type = expected[0]
+
+    if type != actual.type:
+        return False
+
+    if type == orcus.CellType.EMPTY:
+        return True
+
+    if type in (orcus.CellType.BOOLEAN, orcus.CellType.NUMERIC, orcus.CellType.STRING):
+        return expected[1] == actual.value
+
+    if type == orcus.CellType.FORMULA:
+        return expected[1] == actual.value and expected[2] == actual.formula
+
+    return False
+
+
+class DocLoader:
+
+    def __init__(self, mod_loader):
+        self._mod_loader = mod_loader
+
+    def load(self, filepath, recalc):
+        with open(filepath, "rb") as f:
+            return self._mod_loader.read(f, recalc=recalc)
+
+    def load_from_value(self, filepath):
+        with open(filepath, "rb") as f:
+            bytes = f.read()
+        return self._mod_loader.read(bytes, recalc=False)
+
+
+def run_test_dir(self, test_dir, doc_loader):
+    """Run test case for loading a file into a document.
+
+    :param test_dir: test directory that contains an input file (whose base
+       name is 'input') and a content check file (check.txt).
+    :param mod_loader: module object that contains function called 'read'.
+    """
+
+    print("test directory: {}".format(test_dir))
+    expected = ExpectedDocument(os.path.join(test_dir, "check.txt"))
+
+    # Find the input file to load.
+    input_file = None
+    for file_name in os.listdir(test_dir):
+        name, ext = os.path.splitext(file_name)
+        if name == "input":
+            input_file = os.path.join(test_dir, file_name)
+            break
+
+    print("input file: {}".format(input_file))
+    self.assertIsNot(input_file, None)
+
+    doc = doc_loader.load(input_file, True)
+    self.assertIsInstance(doc, orcus.Document)
+
+    # Sometimes the actual document contains trailing empty sheets, which the
+    # expected document does not store.
+    self.assertTrue(len(expected.sheets))
+    self.assertTrue(len(expected.sheets) <= len(doc.sheets))
+
+    expected_sheets = {sh.name: sh for sh in expected.sheets}
+    actual_sheets = {sh.name: sh for sh in doc.sheets}
+
+    for sheet_name, actual_sheet in actual_sheets.items():
+        if sheet_name in expected_sheets:
+            expected_sheet = expected_sheets[sheet_name]
+            self.assertEqual(expected_sheet.data_size, actual_sheet.data_size)
+            for expected_row, actual_row in zip(expected_sheet.get_rows(), actual_sheet.get_rows()):
+                for expected, actual in zip(expected_row, actual_row):
+                    self.assertTrue(_compare_cells(expected, actual))
+        else:
+            # This sheet must be empty since it's not in the expected document.
+            # Make sure it returns empty row set.
+            rows = [row for row in actual_sheet.get_rows()]
+            self.assertEqual(len(rows), 0)
+
+    # Also make sure the document loads fine without recalc.
+    doc = doc_loader.load(input_file, False)
+    self.assertIsInstance(doc, orcus.Document)
+
+    # Make sure the document loads from in-memory value.
+    doc = doc_loader.load_from_value(input_file)
+    self.assertIsInstance(doc, orcus.Document)
diff --git a/test/python/perf/test_json.py b/test/python/perf/test_json.py
new file mode 100755
index 0000000..acd34cc
--- /dev/null
+++ b/test/python/perf/test_json.py
@@ -0,0 +1,273 @@
+#!/usr/bin/env python3
+
+from datetime import datetime
+import json
+import orcus.json
+
+swagger_json = """
+{
+    "swagger": "2.0",
+    "info": {
+        "version": "1.0.0",
+        "title": "Swagger Petstore (Simple)",
+        "description": "A sample API that uses a petstore as an example to demonstrate features in the swagger-2.0 specification",
+        "termsOfService": "http://helloreverb.com/terms/",
+        "contact": {
+            "name": "Swagger API team",
+            "email": "foo@example.com",
+            "url": "http://swagger.io"
+        },
+        "license": {
+            "name": "MIT",
+            "url": "http://opensource.org/licenses/MIT"
+        }
+    },
+    "host": "petstore.swagger.wordnik.com",
+    "basePath": "/api",
+    "schemes": [
+        "http"
+    ],
+    "consumes": [
+        "application/json"
+    ],
+    "produces": [
+        "application/json"
+    ],
+    "paths": {
+        "/pets": {
+            "get": {
+                "description": "Returns all pets from the system that the user has access to",
+                "operationId": "findPets",
+                "produces": [
+                    "application/json",
+                    "application/xml",
+                    "text/xml",
+                    "text/html"
+                ],
+                "parameters": [
+                    {
+                        "name": "tags",
+                        "in": "query",
+                        "description": "tags to filter by",
+                        "required": false,
+                        "type": "array",
+                        "items": {
+                            "type": "string"
+                        },
+                        "collectionFormat": "csv"
+                    },
+                    {
+                        "name": "limit",
+                        "in": "query",
+                        "description": "maximum number of results to return",
+                        "required": false,
+                        "type": "integer",
+                        "format": "int32"
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "pet response",
+                        "schema": {
+                            "type": "array",
+                            "items": {
+                                "$ref": "#/definitions/pet"
+                            }
+                        }
+                    },
+                    "default": {
+                        "description": "unexpected error",
+                        "schema": {
+                            "$ref": "#/definitions/errorModel"
+                        }
+                    }
+                }
+            },
+            "post": {
+                "description": "Creates a new pet in the store.  Duplicates are allowed",
+                "operationId": "addPet",
+                "produces": [
+                    "application/json"
+                ],
+                "parameters": [
+                    {
+                        "name": "pet",
+                        "in": "body",
+                        "description": "Pet to add to the store",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/newPet"
+                        }
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "pet response",
+                        "schema": {
+                            "$ref": "#/definitions/pet"
+                        }
+                    },
+                    "default": {
+                        "description": "unexpected error",
+                        "schema": {
+                            "$ref": "#/definitions/errorModel"
+                        }
+                    }
+                }
+            }
+        },
+        "/pets/{id}": {
+            "get": {
+                "description": "Returns a user based on a single ID, if the user does not have access to the pet",
+                "operationId": "findPetById",
+                "produces": [
+                    "application/json",
+                    "application/xml",
+                    "text/xml",
+                    "text/html"
+                ],
+                "parameters": [
+                    {
+                        "name": "id",
+                        "in": "path",
+                        "description": "ID of pet to fetch",
+                        "required": true,
+                        "type": "integer",
+                        "format": "int64"
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "pet response",
+                        "schema": {
+                            "$ref": "#/definitions/pet"
+                        }
+                    },
+                    "default": {
+                        "description": "unexpected error",
+                        "schema": {
+                            "$ref": "#/definitions/errorModel"
+                        }
+                    }
+                }
+            },
+            "delete": {
+                "description": "deletes a single pet based on the ID supplied",
+                "operationId": "deletePet",
+                "parameters": [
+                    {
+                        "name": "id",
+                        "in": "path",
+                        "description": "ID of pet to delete",
+                        "required": true,
+                        "type": "integer",
+                        "format": "int64"
+                    }
+                ],
+                "responses": {
+                    "204": {
+                        "description": "pet deleted"
+                    },
+                    "default": {
+                        "description": "unexpected error",
+                        "schema": {
+                            "$ref": "#/definitions/errorModel"
+                        }
+                    }
+                }
+            }
+        }
+    },
+    "definitions": {
+        "pet": {
+            "required": [
+                "id",
+                "name"
+            ],
+            "properties": {
+                "id": {
+                    "type": "integer",
+                    "format": "int64"
+                },
+                "name": {
+                    "type": "string"
+                },
+                "tag": {
+                    "type": "string"
+                }
+            }
+        },
+        "newPet": {
+            "required": [
+                "name"
+            ],
+            "properties": {
+                "id": {
+                    "type": "integer",
+                    "format": "int64"
+                },
+                "name": {
+                    "type": "string"
+                },
+                "tag": {
+                    "type": "string"
+                }
+            }
+        },
+        "errorModel": {
+            "required": [
+                "code",
+                "message"
+            ],
+            "properties": {
+                "code": {
+                    "type": "integer",
+                    "format": "int32"
+                },
+                "message": {
+                    "type": "string"
+                }
+            }
+        }
+    }
+}
+"""
+
+class ScopePrinter(object):
+
+    def __init__(self, name):
+        self.start_time = None
+        self.name = name
+
+    def __enter__(self):
+        self.start_time = datetime.now()
+        print("--- {} started".format(self.name))
+
+    def __exit__(self, type, value, traceback):
+        end_time = datetime.now()
+        diff = end_time - self.start_time
+        sec = diff.seconds + diff.microseconds / 1000000.0
+        print("--- {} ended (duration: {} seconds)".format(self.name, sec))
+
+
+def run_builtin():
+    for i in range(20000):
+        o = json.loads(swagger_json)
+
+
+def run_orcus():
+    for i in range(20000):
+        o = orcus.json.loads(swagger_json)
+
+
+def main():
+    with ScopePrinter("builtin"):
+        run_builtin()
+
+    with ScopePrinter("orcus"):
+        run_orcus()
+
+
+if __name__ == '__main__':
+    main()
+
diff --git a/test/python/test_csv.py b/test/python/test_csv.py
new file mode 100755
index 0000000..c6f59e6
--- /dev/null
+++ b/test/python/test_csv.py
@@ -0,0 +1,47 @@
+#!/usr/bin/env python3
+########################################################################
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+########################################################################
+
+import unittest
+import os
+import os.path
+
+from orcus import csv
+
+import file_load_common as common
+
+
+class DocLoader:
+
+    def load(self, filepath, recalc):
+        with open(filepath, "r") as f:
+            return csv.read(f)
+
+    def load_from_value(self, filepath):
+        with open(filepath, "r") as f:
+            content = f.read()
+        return csv.read(content)
+
+
+class TestCase(unittest.TestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        # base directory for ods test files.
+        basedir = os.path.join(os.path.dirname(__file__), "..", "csv")
+        cls.basedir = os.path.normpath(basedir)
+
+    def test_import(self):
+        test_dirs = ("simple-numbers", "normal-quotes", "double-quotes", "quoted-with-delim")
+        for test_dir in test_dirs:
+            test_dir = os.path.join(self.basedir, test_dir)
+            common.run_test_dir(self, test_dir, DocLoader())
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/python/test_csv_export.py b/test/python/test_csv_export.py
new file mode 100755
index 0000000..9c6d6b4
--- /dev/null
+++ b/test/python/test_csv_export.py
@@ -0,0 +1,107 @@
+#!/usr/bin/env python3
+########################################################################
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+########################################################################
+
+import unittest
+import os
+import os.path
+import file_load_common as common
+from orcus import FormatType, csv
+
+
+class MockFileObject(object):
+
+    def __init__(self):
+        self._content = None
+
+    def write(self, bytes):
+        self._content = bytes
+
+    def read(self):
+        return self._content
+
+    @property
+    def bytes(self):
+        return self._content
+
+
+class TestCase(unittest.TestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        # base directory for xlsx test files.
+        basedir_xlsx = os.path.join(os.path.dirname(__file__), "..", "xlsx")
+        cls.basedir_xlsx = os.path.normpath(basedir_xlsx)
+
+    @unittest.skipIf(os.environ.get("WITH_PYTHON_XLSX") is None, "python xlsx module is disabled")
+    def test_export_from_xlsx(self):
+        from orcus import xlsx
+
+        test_dirs = (
+            "raw-values-1",
+            "empty-shared-strings",
+            "named-expression",
+        )
+
+        for test_dir in test_dirs:
+            test_dir = os.path.join(self.basedir_xlsx, test_dir)
+            input_file = os.path.join(test_dir, "input.xlsx")
+            with open(input_file, "rb") as f:
+                doc = xlsx.read(f)
+
+            # Build an expected document object from the check file.
+            check_file = os.path.join(test_dir, "check.txt")
+            check_doc = common.ExpectedDocument(check_file)
+
+            # check_doc only contains non-empty sheets.
+            data_sheet_names = set()
+            for sheet in check_doc.sheets:
+                data_sheet_names.add(sheet.name)
+
+            for sheet in doc.sheets:
+                mfo = MockFileObject()
+                sheet.write(mfo, format=FormatType.CSV)
+
+                if mfo.bytes is None:
+                    self.assertFalse(sheet.name in data_sheet_names)
+                    continue
+
+                # Load the csv stream into a document again.
+                doc_reload = csv.read(mfo)
+                self.assertEqual(1, len(doc_reload.sheets))
+                for row1, row2 in zip(sheet.get_rows(), doc_reload.sheets[0].get_rows()):
+                    # Only comare cell values, not cell types.
+                    row1 = [c.value for c in row1]
+                    row2 = [c.value for c in row2]
+                    self.assertEqual(row1, row2)
+
+            # Make sure we raise an exception on invalid format type.
+            # We currently only support exporting sheet as csv.
+
+            invalid_formats = (
+                "foo",
+                FormatType.GNUMERIC,
+                FormatType.JSON,
+                FormatType.ODS,
+                FormatType.XLSX,
+                FormatType.XLS_XML,
+                FormatType.XLS_XML,
+                FormatType.XML,
+                FormatType.YAML,
+            )
+
+            for invalid_format in invalid_formats:
+                mfo = MockFileObject()
+                with self.assertRaises(Exception):
+                    doc.sheets[0].write(mfo, format=invalid_format)
+
+
+if __name__ == '__main__':
+    unittest.main()
+
+
diff --git a/test/python/test_gnumeric.py b/test/python/test_gnumeric.py
new file mode 100755
index 0000000..f715776
--- /dev/null
+++ b/test/python/test_gnumeric.py
@@ -0,0 +1,39 @@
+#!/usr/bin/env python3
+########################################################################
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+########################################################################
+
+import unittest
+import os
+import os.path
+
+from orcus import gnumeric
+
+import file_load_common as common
+
+
+class TestCase(unittest.TestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        # base directory for xlsx test files.
+        basedir = os.path.join(os.path.dirname(__file__), "..", "gnumeric")
+        cls.basedir = os.path.normpath(basedir)
+
+    def test_import(self):
+
+        test_dirs = (
+            "raw-values-1",
+        )
+
+        for test_dir in test_dirs:
+            test_dir = os.path.join(self.basedir, test_dir)
+            common.run_test_dir(self, test_dir, common.DocLoader(gnumeric))
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/python/test_json.py b/test/python/test_json.py
new file mode 100755
index 0000000..2fde74c
--- /dev/null
+++ b/test/python/test_json.py
@@ -0,0 +1,45 @@
+#!/usr/bin/env python3
+########################################################################
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+########################################################################
+
+import unittest
+from orcus import json
+
+
+class JsonTest(unittest.TestCase):
+
+    def test_loads(self):
+        s = '[1,2,3,"foo",[4,5,6], {"a": 12.3, "b": 34.4, "c": [true, false, null]}]'
+        o = json.loads(s)
+        self.assertTrue(isinstance(o, list))
+        self.assertEqual(len(o), 6)
+        self.assertEqual(o[0], 1)
+        self.assertEqual(o[1], 2)
+        self.assertEqual(o[2], 3)
+        self.assertEqual(o[3], "foo")
+
+        self.assertTrue(isinstance(o[4], list))
+        self.assertEqual(o[4][0], 4)
+        self.assertEqual(o[4][1], 5)
+        self.assertEqual(o[4][2], 6)
+
+        d = o[5]
+        self.assertTrue(isinstance(d, dict))
+        self.assertEqual(len(d), 3)
+        self.assertEqual(d["a"], 12.3)
+        self.assertEqual(d["b"], 34.4)
+
+        l = d["c"]
+        self.assertEqual(len(l), 3)
+        self.assertEqual(l[0], True)
+        self.assertEqual(l[1], False)
+        self.assertEqual(l[2], None)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/python/test_module.py b/test/python/test_module.py
new file mode 100755
index 0000000..2d6ca4b
--- /dev/null
+++ b/test/python/test_module.py
@@ -0,0 +1,56 @@
+#!/usr/bin/env python3
+########################################################################
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+########################################################################
+
+import unittest
+import orcus
+import os.path
+import json
+import os
+from pathlib import Path
+
+
+class ModuleTest(unittest.TestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        top_builddir = Path(os.environ["BUILDDIR"])
+        with open(top_builddir / "test" / "python" / "env.json", "r") as f:
+            cls.env = json.load(f)
+
+    def test_version(self):
+        s = orcus.__version__
+        expected = f"{self.env['version-major']}.{self.env['version-minor']}.{self.env['version-micro']}"
+        self.assertEqual(expected, s)
+
+    def test_detect_format(self):
+        test_root_dir = os.path.join(os.path.dirname(__file__), "..")
+
+        checks = (
+            (("ods", "raw-values-1", "input.ods"), orcus.FormatType.ODS),
+            (("xlsx", "raw-values-1", "input.xlsx"), orcus.FormatType.XLSX),
+            (("xls-xml", "raw-values-1", "input.xml"), orcus.FormatType.XLS_XML),
+            (("gnumeric", "raw-values-1", "input.gnumeric"), orcus.FormatType.GNUMERIC),
+        )
+
+        for check in checks:
+            filepath = os.path.join(test_root_dir, *check[0])
+            with open(filepath, "rb") as f:
+                # Pass the file object directly.
+                fmt = orcus.detect_format(f)
+                self.assertEqual(check[1], fmt)
+
+                # Pass the bytes.
+                f.seek(0)
+                bytes = f.read()
+                fmt = orcus.detect_format(bytes)
+                self.assertEqual(check[1], fmt)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/python/test_ods.py b/test/python/test_ods.py
new file mode 100755
index 0000000..e4c0dc9
--- /dev/null
+++ b/test/python/test_ods.py
@@ -0,0 +1,164 @@
+#!/usr/bin/env python3
+########################################################################
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+########################################################################
+
+import unittest
+from pathlib import Path
+
+from orcus import ods, FormulaTokenType, FormulaTokenOp
+
+import file_load_common as common
+
+
+class TestCase(unittest.TestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        # base directory for ods test files.
+        basedir = Path(__file__).parent / ".." / "ods"
+        cls.basedir = basedir.resolve()
+
+    def test_import(self):
+        test_dirs = ("raw-values-1", "formula-1", "formula-2")
+        for test_dir in test_dirs:
+            test_dir = self.basedir / test_dir
+            common.run_test_dir(self, test_dir, common.DocLoader(ods))
+
+    def test_formula_tokens_1(self):
+        filepath = self.basedir / "formula-1" / "input.ods"
+        with open(filepath, "rb") as f:
+            doc = ods.read(f, recalc=False)
+
+        self.assertEqual(len(doc.sheets), 1)
+
+        # The 'Formula' sheet contains 4 formula cells in A1:A4.
+        sheet = doc.sheets[0]
+        self.assertEqual(sheet.name, "Formula")
+        rows = [row for row in sheet.get_rows()]
+        self.assertEqual(len(rows), 4)
+
+        expected = ("1*2", "12/3", "AVERAGE($A1:A$2)", "SUM($A$1:$A$3)")
+        for row, expected_formula in zip(sheet.get_rows(), expected):
+            c = row[0]
+            self.assertEqual(c.formula, expected_formula)
+
+        expected = (
+            (
+                ("1", FormulaTokenType.VALUE, FormulaTokenOp.VALUE),
+                ("*", FormulaTokenType.OPERATOR, FormulaTokenOp.MULTIPLY),
+                ("2", FormulaTokenType.VALUE, FormulaTokenOp.VALUE)
+            ),
+            (
+                ("12", FormulaTokenType.VALUE, FormulaTokenOp.VALUE),
+                ("/", FormulaTokenType.OPERATOR, FormulaTokenOp.DIVIDE),
+                ("3", FormulaTokenType.VALUE, FormulaTokenOp.VALUE)
+            ),
+            (
+                ("AVERAGE", FormulaTokenType.FUNCTION, FormulaTokenOp.FUNCTION),
+                ("(", FormulaTokenType.OPERATOR, FormulaTokenOp.OPEN),
+                ("$A1:A$2", FormulaTokenType.REFERENCE, FormulaTokenOp.RANGE_REF),
+                (")", FormulaTokenType.OPERATOR, FormulaTokenOp.CLOSE)
+            ),
+            (
+                ("SUM", FormulaTokenType.FUNCTION, FormulaTokenOp.FUNCTION),
+                ("(", FormulaTokenType.OPERATOR, FormulaTokenOp.OPEN),
+                ("$A$1:$A$3", FormulaTokenType.REFERENCE, FormulaTokenOp.RANGE_REF),
+                (")", FormulaTokenType.OPERATOR, FormulaTokenOp.CLOSE)
+            ),
+        )
+
+        for row, expected_formula_tokens in zip(sheet.get_rows(), expected):
+            c = row[0]
+            iter = c.get_formula_tokens()
+            for token, expected_token in zip(iter, expected_formula_tokens):
+                self.assertEqual(str(token), expected_token[0])
+                self.assertEqual(token.type, expected_token[1])
+                self.assertEqual(token.op, expected_token[2])
+
+    def test_formula_tokens_2(self):
+        filepath = self.basedir / "formula-2" / "input.ods"
+        with open(filepath, "rb") as f:
+            doc = ods.read(f, recalc=False)
+
+        self.assertEqual(len(doc.sheets), 1)
+
+        expected = (
+            (
+                ("CONCATENATE", FormulaTokenType.FUNCTION, FormulaTokenOp.FUNCTION),
+                ("(", FormulaTokenType.OPERATOR, FormulaTokenOp.OPEN),
+                ("A2", FormulaTokenType.REFERENCE, FormulaTokenOp.SINGLE_REF),
+                (",", FormulaTokenType.OPERATOR, FormulaTokenOp.SEP),
+                ('" "', FormulaTokenType.VALUE, FormulaTokenOp.STRING),
+                (",", FormulaTokenType.OPERATOR, FormulaTokenOp.SEP),
+                ("B2", FormulaTokenType.REFERENCE, FormulaTokenOp.SINGLE_REF),
+                (")", FormulaTokenType.OPERATOR, FormulaTokenOp.CLOSE),
+            ),
+            (
+                ("CONCATENATE", FormulaTokenType.FUNCTION, FormulaTokenOp.FUNCTION),
+                ("(", FormulaTokenType.OPERATOR, FormulaTokenOp.OPEN),
+                ("A3", FormulaTokenType.REFERENCE, FormulaTokenOp.SINGLE_REF),
+                (",", FormulaTokenType.OPERATOR, FormulaTokenOp.SEP),
+                ('" "', FormulaTokenType.VALUE, FormulaTokenOp.STRING),
+                (",", FormulaTokenType.OPERATOR, FormulaTokenOp.SEP),
+                ("B3", FormulaTokenType.REFERENCE, FormulaTokenOp.SINGLE_REF),
+                (")", FormulaTokenType.OPERATOR, FormulaTokenOp.CLOSE),
+            ),
+            (
+                ("CONCATENATE", FormulaTokenType.FUNCTION, FormulaTokenOp.FUNCTION),
+                ("(", FormulaTokenType.OPERATOR, FormulaTokenOp.OPEN),
+                ("A4", FormulaTokenType.REFERENCE, FormulaTokenOp.SINGLE_REF),
+                (",", FormulaTokenType.OPERATOR, FormulaTokenOp.SEP),
+                ('" "', FormulaTokenType.VALUE, FormulaTokenOp.STRING),
+                (",", FormulaTokenType.OPERATOR, FormulaTokenOp.SEP),
+                ("B4", FormulaTokenType.REFERENCE, FormulaTokenOp.SINGLE_REF),
+                (")", FormulaTokenType.OPERATOR, FormulaTokenOp.CLOSE),
+            ),
+            (
+                ("CONCATENATE", FormulaTokenType.FUNCTION, FormulaTokenOp.FUNCTION),
+                ("(", FormulaTokenType.OPERATOR, FormulaTokenOp.OPEN),
+                ("A5", FormulaTokenType.REFERENCE, FormulaTokenOp.SINGLE_REF),
+                (",", FormulaTokenType.OPERATOR, FormulaTokenOp.SEP),
+                ('" "', FormulaTokenType.VALUE, FormulaTokenOp.STRING),
+                (",", FormulaTokenType.OPERATOR, FormulaTokenOp.SEP),
+                ("B5", FormulaTokenType.REFERENCE, FormulaTokenOp.SINGLE_REF),
+                (")", FormulaTokenType.OPERATOR, FormulaTokenOp.CLOSE),
+            ),
+            (
+                ("CONCATENATE", FormulaTokenType.FUNCTION, FormulaTokenOp.FUNCTION),
+                ("(", FormulaTokenType.OPERATOR, FormulaTokenOp.OPEN),
+                ("A6", FormulaTokenType.REFERENCE, FormulaTokenOp.SINGLE_REF),
+                (",", FormulaTokenType.OPERATOR, FormulaTokenOp.SEP),
+                ('" "', FormulaTokenType.VALUE, FormulaTokenOp.STRING),
+                (",", FormulaTokenType.OPERATOR, FormulaTokenOp.SEP),
+                ("B6", FormulaTokenType.REFERENCE, FormulaTokenOp.SINGLE_REF),
+                (")", FormulaTokenType.OPERATOR, FormulaTokenOp.CLOSE),
+            ),
+            (
+                ("CONCATENATE", FormulaTokenType.FUNCTION, FormulaTokenOp.FUNCTION),
+                ("(", FormulaTokenType.OPERATOR, FormulaTokenOp.OPEN),
+                ("A7", FormulaTokenType.REFERENCE, FormulaTokenOp.SINGLE_REF),
+                (",", FormulaTokenType.OPERATOR, FormulaTokenOp.SEP),
+                ('" "', FormulaTokenType.VALUE, FormulaTokenOp.STRING),
+                (",", FormulaTokenType.OPERATOR, FormulaTokenOp.SEP),
+                ("B7", FormulaTokenType.REFERENCE, FormulaTokenOp.SINGLE_REF),
+                (")", FormulaTokenType.OPERATOR, FormulaTokenOp.CLOSE),
+            ),
+        )
+
+        # Check cells in column C.
+        rows = [row for row in doc.sheets[0].get_rows()]
+        for row, expected_tokens in zip(rows[1:], expected):  # skip the header row
+            tokens = row[2].get_formula_tokens()
+            for token, expected_token in zip(tokens, expected_tokens):
+                self.assertEqual(str(token), expected_token[0])
+                self.assertEqual(token.type, expected_token[1])
+                self.assertEqual(token.op, expected_token[2])
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/python/test_xls_xml.py b/test/python/test_xls_xml.py
new file mode 100755
index 0000000..b8ae918
--- /dev/null
+++ b/test/python/test_xls_xml.py
@@ -0,0 +1,80 @@
+#!/usr/bin/env python3
+########################################################################
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+########################################################################
+
+import unittest
+import os
+import os.path
+
+import orcus
+from orcus import xls_xml
+
+import file_load_common as common
+
+
+class TestCase(unittest.TestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        # base directory for ods test files.
+        basedir = os.path.join(os.path.dirname(__file__), "..", "xls-xml")
+        cls.basedir = os.path.normpath(basedir)
+
+    def test_import(self):
+
+        test_dirs = (
+            "basic",
+            "bold-and-italic",
+            "colored-text",
+            "empty-rows",
+            "merged-cells",
+            "named-expression",
+            "named-expression-sheet-local",
+            "raw-values-1",
+        )
+
+        for test_dir in test_dirs:
+            test_dir = os.path.join(self.basedir, test_dir)
+            common.run_test_dir(self, test_dir, common.DocLoader(xls_xml))
+
+    def test_skip_error_cells(self):
+        filepath = os.path.join(self.basedir, "formula-cells-parse-error", "input.xml")
+        with open(filepath, "rb") as f:
+            bytes = f.read()
+
+        with self.assertRaises(RuntimeError):
+            doc = xls_xml.read(bytes)
+
+        with self.assertRaises(RuntimeError):  # TODO : should we raise a more specific error?
+            doc = xls_xml.read(bytes, error_policy="fail")
+
+        # With the 'skip' policy, formula cells with erroneous formulas are
+        # imported as formula cells with error.
+        doc = xls_xml.read(bytes, error_policy="skip")
+
+        # Make sure cells B2 and A5 are imported as formula cells.
+        rows = [row for row in doc.sheets[0].get_rows()]
+        c = rows[1][1]
+        self.assertEqual(c.type, orcus.CellType.FORMULA_WITH_ERROR)
+        self.assertFalse(c.formula)  # formula string should be empty
+        # error formula tokens consist of: error token, string token (original formula), string token (error message).
+        formula_tokens = [t for t in c.get_formula_tokens()]
+        self.assertEqual(formula_tokens[0].type, orcus.FormulaTokenType.ERROR)
+        self.assertEqual(formula_tokens[1].type, orcus.FormulaTokenType.VALUE)
+        self.assertEqual(formula_tokens[2].type, orcus.FormulaTokenType.VALUE)
+        c = rows[4][0]
+        self.assertEqual(c.type, orcus.CellType.FORMULA_WITH_ERROR)
+        self.assertFalse(c.formula)  # formula string should be empty
+        formula_tokens = [t for t in c.get_formula_tokens()]
+        self.assertEqual(formula_tokens[0].type, orcus.FormulaTokenType.ERROR)
+        self.assertEqual(formula_tokens[1].type, orcus.FormulaTokenType.VALUE)
+        self.assertEqual(formula_tokens[2].type, orcus.FormulaTokenType.VALUE)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/python/test_xlsx.py b/test/python/test_xlsx.py
new file mode 100755
index 0000000..1f691da
--- /dev/null
+++ b/test/python/test_xlsx.py
@@ -0,0 +1,103 @@
+#!/usr/bin/env python3
+########################################################################
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+########################################################################
+
+import unittest
+import os
+import os.path
+import mmap
+
+from orcus import xlsx
+
+import file_load_common as common
+
+
+class TestCase(unittest.TestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        # base directory for xlsx test files.
+        basedir = os.path.join(os.path.dirname(__file__), "..", "xlsx")
+        cls.basedir = os.path.normpath(basedir)
+
+    def test_import(self):
+
+        test_dirs = (
+            "boolean-values",
+            "empty-shared-strings",
+            "formula-cells",
+            "formula-shared",
+            "named-expression",
+            "named-expression-sheet-local",
+            "raw-values-1",
+        )
+
+        for test_dir in test_dirs:
+            test_dir = os.path.join(self.basedir, test_dir)
+            common.run_test_dir(self, test_dir, common.DocLoader(xlsx))
+
+    def test_named_expression(self):
+        filepath = os.path.join(self.basedir, "named-expression", "input.xlsx")
+        with open(filepath, "rb") as f:
+            doc = xlsx.read(f)
+
+        named_exps = doc.get_named_expressions()
+        self.assertEqual(named_exps.names(), {"MyRange", "MyRange2"})
+        self.assertEqual(len(named_exps), 2)
+
+        named_exps_dict = {x[0]: x[1] for x in named_exps}
+        exp = named_exps_dict["MyRange"]
+        self.assertEqual(exp.origin, "Sheet1!$A$1")
+        self.assertEqual(exp.formula, "$A$1:$A$5")
+        iter = exp.get_formula_tokens()
+        self.assertEqual(len(iter), 1)
+        tokens = [t for t in iter]
+        self.assertEqual(str(tokens[0]), "$A$1:$A$5")
+
+        exp = named_exps_dict["MyRange2"]
+        self.assertEqual(exp.origin, "Sheet1!$A$1")
+        self.assertEqual(exp.formula, "$A$1:$B$5")
+        iter = exp.get_formula_tokens()
+        self.assertEqual(len(iter), 1)
+        tokens = [t for t in iter]
+        self.assertEqual(str(tokens[0]), "$A$1:$B$5")
+
+    def test_named_expression_sheet_local(self):
+        filepath = os.path.join(self.basedir, "named-expression-sheet-local", "input.xlsx")
+        with open(filepath, "rb") as f:
+            doc = xlsx.read(f)
+
+        sheet = doc.sheets[0]
+        named_exps = sheet.get_named_expressions()
+        self.assertEqual(len(named_exps), 1)
+        self.assertEqual(named_exps.names(), {"MyRange",})
+
+        named_exps_dict = {x[0]: x[1] for x in named_exps}
+        exp = named_exps_dict["MyRange"]
+        self.assertEqual(exp.formula, "$A$1:$B$3")
+        iter = exp.get_formula_tokens()
+        self.assertEqual(len(iter), 1)
+        tokens = [t for t in iter]
+        self.assertEqual(str(tokens[0]), "$A$1:$B$3")
+
+        sheet = doc.sheets[1]
+        named_exps = sheet.get_named_expressions()
+        self.assertEqual(named_exps.names(), {"MyRange",})
+        self.assertEqual(len(named_exps), 1)
+
+        named_exps_dict = {x[0]: x[1] for x in named_exps}
+        exp = named_exps_dict["MyRange"]
+        self.assertEqual(exp.formula, "$A$4:$B$5")
+        iter = exp.get_formula_tokens()
+        self.assertEqual(len(iter), 1)
+        tokens = [t for t in iter]
+        self.assertEqual(str(tokens[0]), "$A$4:$B$5")
+
+
+if __name__ == '__main__':
+    unittest.main()