summaryrefslogtreecommitdiffstats
path: root/test/python
diff options
context:
space:
mode:
Diffstat (limited to 'test/python')
-rw-r--r--test/python/env.json.in6
-rw-r--r--test/python/file_load_common.py248
-rwxr-xr-xtest/python/perf/test_json.py273
-rwxr-xr-xtest/python/test_csv.py47
-rwxr-xr-xtest/python/test_csv_export.py107
-rwxr-xr-xtest/python/test_gnumeric.py39
-rwxr-xr-xtest/python/test_json.py45
-rwxr-xr-xtest/python/test_module.py56
-rwxr-xr-xtest/python/test_ods.py164
-rwxr-xr-xtest/python/test_xls_xml.py80
-rwxr-xr-xtest/python/test_xlsx.py103
11 files changed, 1168 insertions, 0 deletions
diff --git a/test/python/env.json.in b/test/python/env.json.in
new file mode 100644
index 0000000..26b82d6
--- /dev/null
+++ b/test/python/env.json.in
@@ -0,0 +1,6 @@
+{
+ "version-major": @ORCUS_MAJOR_VERSION@,
+ "version-minor": @ORCUS_MINOR_VERSION@,
+ "version-micro": @ORCUS_MICRO_VERSION@
+}
+
diff --git a/test/python/file_load_common.py b/test/python/file_load_common.py
new file mode 100644
index 0000000..5587405
--- /dev/null
+++ b/test/python/file_load_common.py
@@ -0,0 +1,248 @@
+########################################################################
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+########################################################################
+
+"""Collection of test cases shared between different file format types."""
+
+import os
+import os.path
+import collections
+import orcus
+
+
+class Address(object):
+
+ def __init__(self, pos_s):
+ self.sheet_name, self.row, self.column = pos_s.split('/')
+ self.row = int(self.row)
+ self.column = int(self.column)
+
+ def __repr__(self):
+ return "(sheet={}; row={}, column={})".format(self.sheet_name, self.row, self.column)
+
+
+class ExpectedSheet(object):
+
+ def __init__(self, name):
+ self.__name = name
+ self.__rows = collections.OrderedDict()
+ self.__max_column = 0
+ self.__max_row = 0
+
+ @property
+ def name(self):
+ return self.__name
+
+ @property
+ def data_size(self):
+ return {"column": self.__max_column+1, "row": self.__max_row+1}
+
+ def get_rows(self):
+ rows = list()
+ for i in range(self.__max_row+1):
+ row = [(orcus.CellType.EMPTY, None) for _ in range(self.__max_column+1)]
+ if i in self.__rows:
+ for col_pos, cell in self.__rows[i].items():
+ row[col_pos] = cell
+ rows.append(tuple(row))
+ return tuple(rows)
+
+ def insert_cell(self, row, column, cell_type, cell_value, result):
+ if row not in self.__rows:
+ self.__rows[row] = collections.OrderedDict()
+
+ row_data = self.__rows[row]
+
+ if cell_type == "numeric":
+ row_data[column] = (orcus.CellType.NUMERIC, float(cell_value))
+ elif cell_type == "string":
+ row_data[column] = (orcus.CellType.STRING, self.__unescape_string_cell_value(cell_value))
+ elif cell_type == "boolean":
+ if cell_value == "true":
+ row_data[column] = (orcus.CellType.BOOLEAN, True)
+ elif cell_value == "false":
+ row_data[column] = (orcus.CellType.BOOLEAN, False)
+ else:
+ raise RuntimeError("invalid boolean value: {}".format(cell_value))
+ elif cell_type == "formula":
+ row_data[column] = (orcus.CellType.FORMULA, result, cell_value)
+ else:
+ raise RuntimeError("unhandled cell value type: {}".format(cell_type))
+
+ # Update the data range.
+ if row > self.__max_row:
+ self.__max_row = row
+ if column > self.__max_column:
+ self.__max_column = column
+
+ def __unescape_string_cell_value(self, v):
+ if v[0] != '"' or v[-1] != '"':
+ raise RuntimeError("string value is expected to be quoted.")
+
+ v = v[1:-1] # remove the outer quotes.
+
+ buf = []
+ escaped_char = False
+ for c in v:
+ if escaped_char:
+ buf.append(c)
+ escaped_char = False
+ continue
+
+ if c == '\\':
+ escaped_char = True
+ continue
+
+ buf.append(c)
+
+ return "".join(buf)
+
+
+class ExpectedDocument(object):
+
+ def __init__(self, filepath):
+ self.sheets = []
+
+ with open(filepath, "r") as f:
+ for line in f.readlines():
+ line = line.strip()
+ self.__parse_line(line)
+
+ def __parse_line(self, line):
+ if not line:
+ return
+
+ # Split the line into 3 parts - position, cell type and the value.
+ # Note that a valid formula expression may contain ':', so we cannot
+ # simply split the line by ':'.
+
+ parts = list()
+ idx = line.find(':')
+ while idx >= 0:
+ parts.append(line[:idx])
+ line = line[idx+1:]
+ if len(parts) == 2:
+ # Append the rest.
+ parts.append(line)
+ break
+
+ idx = line.find(':')
+
+ if len(parts) != 3:
+ raise RuntimeError(
+ "line is expected to contain 3 parts, but not all parts are identified.")
+
+ if parts[1] in ("merge-width", "merge-height"):
+ return
+
+ pos, cell_type, cell_value = parts[0], parts[1], parts[2]
+ result = None
+ if cell_type == "formula":
+ # Split the cell value into formula expression and result.
+ idx = cell_value.rfind(':')
+ if idx < 0:
+ raise RuntimeError("formula line is expected to contain a result value.")
+ cell_value, result = cell_value[:idx], cell_value[idx+1:]
+ try:
+ result = float(result)
+ except ValueError:
+ pass
+
+ pos = Address(pos)
+
+ if not self.sheets or self.sheets[-1].name != pos.sheet_name:
+ self.sheets.append(ExpectedSheet(pos.sheet_name))
+
+ self.sheets[-1].insert_cell(pos.row, pos.column, cell_type, cell_value, result)
+
+
+def _compare_cells(expected, actual):
+ type = expected[0]
+
+ if type != actual.type:
+ return False
+
+ if type == orcus.CellType.EMPTY:
+ return True
+
+ if type in (orcus.CellType.BOOLEAN, orcus.CellType.NUMERIC, orcus.CellType.STRING):
+ return expected[1] == actual.value
+
+ if type == orcus.CellType.FORMULA:
+ return expected[1] == actual.value and expected[2] == actual.formula
+
+ return False
+
+
+class DocLoader:
+
+ def __init__(self, mod_loader):
+ self._mod_loader = mod_loader
+
+ def load(self, filepath, recalc):
+ with open(filepath, "rb") as f:
+ return self._mod_loader.read(f, recalc=recalc)
+
+ def load_from_value(self, filepath):
+ with open(filepath, "rb") as f:
+ bytes = f.read()
+ return self._mod_loader.read(bytes, recalc=False)
+
+
+def run_test_dir(self, test_dir, doc_loader):
+ """Run test case for loading a file into a document.
+
+ :param test_dir: test directory that contains an input file (whose base
+ name is 'input') and a content check file (check.txt).
+ :param mod_loader: module object that contains function called 'read'.
+ """
+
+ print("test directory: {}".format(test_dir))
+ expected = ExpectedDocument(os.path.join(test_dir, "check.txt"))
+
+ # Find the input file to load.
+ input_file = None
+ for file_name in os.listdir(test_dir):
+ name, ext = os.path.splitext(file_name)
+ if name == "input":
+ input_file = os.path.join(test_dir, file_name)
+ break
+
+ print("input file: {}".format(input_file))
+ self.assertIsNot(input_file, None)
+
+ doc = doc_loader.load(input_file, True)
+ self.assertIsInstance(doc, orcus.Document)
+
+ # Sometimes the actual document contains trailing empty sheets, which the
+ # expected document does not store.
+ self.assertTrue(len(expected.sheets))
+ self.assertTrue(len(expected.sheets) <= len(doc.sheets))
+
+ expected_sheets = {sh.name: sh for sh in expected.sheets}
+ actual_sheets = {sh.name: sh for sh in doc.sheets}
+
+ for sheet_name, actual_sheet in actual_sheets.items():
+ if sheet_name in expected_sheets:
+ expected_sheet = expected_sheets[sheet_name]
+ self.assertEqual(expected_sheet.data_size, actual_sheet.data_size)
+ for expected_row, actual_row in zip(expected_sheet.get_rows(), actual_sheet.get_rows()):
+ for expected, actual in zip(expected_row, actual_row):
+ self.assertTrue(_compare_cells(expected, actual))
+ else:
+ # This sheet must be empty since it's not in the expected document.
+ # Make sure it returns empty row set.
+ rows = [row for row in actual_sheet.get_rows()]
+ self.assertEqual(len(rows), 0)
+
+ # Also make sure the document loads fine without recalc.
+ doc = doc_loader.load(input_file, False)
+ self.assertIsInstance(doc, orcus.Document)
+
+ # Make sure the document loads from in-memory value.
+ doc = doc_loader.load_from_value(input_file)
+ self.assertIsInstance(doc, orcus.Document)
diff --git a/test/python/perf/test_json.py b/test/python/perf/test_json.py
new file mode 100755
index 0000000..acd34cc
--- /dev/null
+++ b/test/python/perf/test_json.py
@@ -0,0 +1,273 @@
+#!/usr/bin/env python3
+
+from datetime import datetime
+import json
+import orcus.json
+
+swagger_json = """
+{
+ "swagger": "2.0",
+ "info": {
+ "version": "1.0.0",
+ "title": "Swagger Petstore (Simple)",
+ "description": "A sample API that uses a petstore as an example to demonstrate features in the swagger-2.0 specification",
+ "termsOfService": "http://helloreverb.com/terms/",
+ "contact": {
+ "name": "Swagger API team",
+ "email": "foo@example.com",
+ "url": "http://swagger.io"
+ },
+ "license": {
+ "name": "MIT",
+ "url": "http://opensource.org/licenses/MIT"
+ }
+ },
+ "host": "petstore.swagger.wordnik.com",
+ "basePath": "/api",
+ "schemes": [
+ "http"
+ ],
+ "consumes": [
+ "application/json"
+ ],
+ "produces": [
+ "application/json"
+ ],
+ "paths": {
+ "/pets": {
+ "get": {
+ "description": "Returns all pets from the system that the user has access to",
+ "operationId": "findPets",
+ "produces": [
+ "application/json",
+ "application/xml",
+ "text/xml",
+ "text/html"
+ ],
+ "parameters": [
+ {
+ "name": "tags",
+ "in": "query",
+ "description": "tags to filter by",
+ "required": false,
+ "type": "array",
+ "items": {
+ "type": "string"
+ },
+ "collectionFormat": "csv"
+ },
+ {
+ "name": "limit",
+ "in": "query",
+ "description": "maximum number of results to return",
+ "required": false,
+ "type": "integer",
+ "format": "int32"
+ }
+ ],
+ "responses": {
+ "200": {
+ "description": "pet response",
+ "schema": {
+ "type": "array",
+ "items": {
+ "$ref": "#/definitions/pet"
+ }
+ }
+ },
+ "default": {
+ "description": "unexpected error",
+ "schema": {
+ "$ref": "#/definitions/errorModel"
+ }
+ }
+ }
+ },
+ "post": {
+ "description": "Creates a new pet in the store. Duplicates are allowed",
+ "operationId": "addPet",
+ "produces": [
+ "application/json"
+ ],
+ "parameters": [
+ {
+ "name": "pet",
+ "in": "body",
+ "description": "Pet to add to the store",
+ "required": true,
+ "schema": {
+ "$ref": "#/definitions/newPet"
+ }
+ }
+ ],
+ "responses": {
+ "200": {
+ "description": "pet response",
+ "schema": {
+ "$ref": "#/definitions/pet"
+ }
+ },
+ "default": {
+ "description": "unexpected error",
+ "schema": {
+ "$ref": "#/definitions/errorModel"
+ }
+ }
+ }
+ }
+ },
+ "/pets/{id}": {
+ "get": {
+ "description": "Returns a user based on a single ID, if the user does not have access to the pet",
+ "operationId": "findPetById",
+ "produces": [
+ "application/json",
+ "application/xml",
+ "text/xml",
+ "text/html"
+ ],
+ "parameters": [
+ {
+ "name": "id",
+ "in": "path",
+ "description": "ID of pet to fetch",
+ "required": true,
+ "type": "integer",
+ "format": "int64"
+ }
+ ],
+ "responses": {
+ "200": {
+ "description": "pet response",
+ "schema": {
+ "$ref": "#/definitions/pet"
+ }
+ },
+ "default": {
+ "description": "unexpected error",
+ "schema": {
+ "$ref": "#/definitions/errorModel"
+ }
+ }
+ }
+ },
+ "delete": {
+ "description": "deletes a single pet based on the ID supplied",
+ "operationId": "deletePet",
+ "parameters": [
+ {
+ "name": "id",
+ "in": "path",
+ "description": "ID of pet to delete",
+ "required": true,
+ "type": "integer",
+ "format": "int64"
+ }
+ ],
+ "responses": {
+ "204": {
+ "description": "pet deleted"
+ },
+ "default": {
+ "description": "unexpected error",
+ "schema": {
+ "$ref": "#/definitions/errorModel"
+ }
+ }
+ }
+ }
+ }
+ },
+ "definitions": {
+ "pet": {
+ "required": [
+ "id",
+ "name"
+ ],
+ "properties": {
+ "id": {
+ "type": "integer",
+ "format": "int64"
+ },
+ "name": {
+ "type": "string"
+ },
+ "tag": {
+ "type": "string"
+ }
+ }
+ },
+ "newPet": {
+ "required": [
+ "name"
+ ],
+ "properties": {
+ "id": {
+ "type": "integer",
+ "format": "int64"
+ },
+ "name": {
+ "type": "string"
+ },
+ "tag": {
+ "type": "string"
+ }
+ }
+ },
+ "errorModel": {
+ "required": [
+ "code",
+ "message"
+ ],
+ "properties": {
+ "code": {
+ "type": "integer",
+ "format": "int32"
+ },
+ "message": {
+ "type": "string"
+ }
+ }
+ }
+ }
+}
+"""
+
+class ScopePrinter(object):
+
+ def __init__(self, name):
+ self.start_time = None
+ self.name = name
+
+ def __enter__(self):
+ self.start_time = datetime.now()
+ print("--- {} started".format(self.name))
+
+ def __exit__(self, type, value, traceback):
+ end_time = datetime.now()
+ diff = end_time - self.start_time
+ sec = diff.seconds + diff.microseconds / 1000000.0
+ print("--- {} ended (duration: {} seconds)".format(self.name, sec))
+
+
+def run_builtin():
+ for i in range(20000):
+ o = json.loads(swagger_json)
+
+
+def run_orcus():
+ for i in range(20000):
+ o = orcus.json.loads(swagger_json)
+
+
+def main():
+ with ScopePrinter("builtin"):
+ run_builtin()
+
+ with ScopePrinter("orcus"):
+ run_orcus()
+
+
+if __name__ == '__main__':
+ main()
+
diff --git a/test/python/test_csv.py b/test/python/test_csv.py
new file mode 100755
index 0000000..c6f59e6
--- /dev/null
+++ b/test/python/test_csv.py
@@ -0,0 +1,47 @@
+#!/usr/bin/env python3
+########################################################################
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+########################################################################
+
+import unittest
+import os
+import os.path
+
+from orcus import csv
+
+import file_load_common as common
+
+
+class DocLoader:
+
+ def load(self, filepath, recalc):
+ with open(filepath, "r") as f:
+ return csv.read(f)
+
+ def load_from_value(self, filepath):
+ with open(filepath, "r") as f:
+ content = f.read()
+ return csv.read(content)
+
+
+class TestCase(unittest.TestCase):
+
+ @classmethod
+ def setUpClass(cls):
+ # base directory for ods test files.
+ basedir = os.path.join(os.path.dirname(__file__), "..", "csv")
+ cls.basedir = os.path.normpath(basedir)
+
+ def test_import(self):
+ test_dirs = ("simple-numbers", "normal-quotes", "double-quotes", "quoted-with-delim")
+ for test_dir in test_dirs:
+ test_dir = os.path.join(self.basedir, test_dir)
+ common.run_test_dir(self, test_dir, DocLoader())
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/python/test_csv_export.py b/test/python/test_csv_export.py
new file mode 100755
index 0000000..9c6d6b4
--- /dev/null
+++ b/test/python/test_csv_export.py
@@ -0,0 +1,107 @@
+#!/usr/bin/env python3
+########################################################################
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+########################################################################
+
+import unittest
+import os
+import os.path
+import file_load_common as common
+from orcus import FormatType, csv
+
+
+class MockFileObject(object):
+
+ def __init__(self):
+ self._content = None
+
+ def write(self, bytes):
+ self._content = bytes
+
+ def read(self):
+ return self._content
+
+ @property
+ def bytes(self):
+ return self._content
+
+
+class TestCase(unittest.TestCase):
+
+ @classmethod
+ def setUpClass(cls):
+ # base directory for xlsx test files.
+ basedir_xlsx = os.path.join(os.path.dirname(__file__), "..", "xlsx")
+ cls.basedir_xlsx = os.path.normpath(basedir_xlsx)
+
+ @unittest.skipIf(os.environ.get("WITH_PYTHON_XLSX") is None, "python xlsx module is disabled")
+ def test_export_from_xlsx(self):
+ from orcus import xlsx
+
+ test_dirs = (
+ "raw-values-1",
+ "empty-shared-strings",
+ "named-expression",
+ )
+
+ for test_dir in test_dirs:
+ test_dir = os.path.join(self.basedir_xlsx, test_dir)
+ input_file = os.path.join(test_dir, "input.xlsx")
+ with open(input_file, "rb") as f:
+ doc = xlsx.read(f)
+
+ # Build an expected document object from the check file.
+ check_file = os.path.join(test_dir, "check.txt")
+ check_doc = common.ExpectedDocument(check_file)
+
+ # check_doc only contains non-empty sheets.
+ data_sheet_names = set()
+ for sheet in check_doc.sheets:
+ data_sheet_names.add(sheet.name)
+
+ for sheet in doc.sheets:
+ mfo = MockFileObject()
+ sheet.write(mfo, format=FormatType.CSV)
+
+ if mfo.bytes is None:
+ self.assertFalse(sheet.name in data_sheet_names)
+ continue
+
+ # Load the csv stream into a document again.
+ doc_reload = csv.read(mfo)
+ self.assertEqual(1, len(doc_reload.sheets))
+ for row1, row2 in zip(sheet.get_rows(), doc_reload.sheets[0].get_rows()):
+ # Only comare cell values, not cell types.
+ row1 = [c.value for c in row1]
+ row2 = [c.value for c in row2]
+ self.assertEqual(row1, row2)
+
+ # Make sure we raise an exception on invalid format type.
+ # We currently only support exporting sheet as csv.
+
+ invalid_formats = (
+ "foo",
+ FormatType.GNUMERIC,
+ FormatType.JSON,
+ FormatType.ODS,
+ FormatType.XLSX,
+ FormatType.XLS_XML,
+ FormatType.XLS_XML,
+ FormatType.XML,
+ FormatType.YAML,
+ )
+
+ for invalid_format in invalid_formats:
+ mfo = MockFileObject()
+ with self.assertRaises(Exception):
+ doc.sheets[0].write(mfo, format=invalid_format)
+
+
+if __name__ == '__main__':
+ unittest.main()
+
+
diff --git a/test/python/test_gnumeric.py b/test/python/test_gnumeric.py
new file mode 100755
index 0000000..f715776
--- /dev/null
+++ b/test/python/test_gnumeric.py
@@ -0,0 +1,39 @@
+#!/usr/bin/env python3
+########################################################################
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+########################################################################
+
+import unittest
+import os
+import os.path
+
+from orcus import gnumeric
+
+import file_load_common as common
+
+
+class TestCase(unittest.TestCase):
+
+ @classmethod
+ def setUpClass(cls):
+ # base directory for xlsx test files.
+ basedir = os.path.join(os.path.dirname(__file__), "..", "gnumeric")
+ cls.basedir = os.path.normpath(basedir)
+
+ def test_import(self):
+
+ test_dirs = (
+ "raw-values-1",
+ )
+
+ for test_dir in test_dirs:
+ test_dir = os.path.join(self.basedir, test_dir)
+ common.run_test_dir(self, test_dir, common.DocLoader(gnumeric))
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/python/test_json.py b/test/python/test_json.py
new file mode 100755
index 0000000..2fde74c
--- /dev/null
+++ b/test/python/test_json.py
@@ -0,0 +1,45 @@
+#!/usr/bin/env python3
+########################################################################
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+########################################################################
+
+import unittest
+from orcus import json
+
+
+class JsonTest(unittest.TestCase):
+
+ def test_loads(self):
+ s = '[1,2,3,"foo",[4,5,6], {"a": 12.3, "b": 34.4, "c": [true, false, null]}]'
+ o = json.loads(s)
+ self.assertTrue(isinstance(o, list))
+ self.assertEqual(len(o), 6)
+ self.assertEqual(o[0], 1)
+ self.assertEqual(o[1], 2)
+ self.assertEqual(o[2], 3)
+ self.assertEqual(o[3], "foo")
+
+ self.assertTrue(isinstance(o[4], list))
+ self.assertEqual(o[4][0], 4)
+ self.assertEqual(o[4][1], 5)
+ self.assertEqual(o[4][2], 6)
+
+ d = o[5]
+ self.assertTrue(isinstance(d, dict))
+ self.assertEqual(len(d), 3)
+ self.assertEqual(d["a"], 12.3)
+ self.assertEqual(d["b"], 34.4)
+
+ l = d["c"]
+ self.assertEqual(len(l), 3)
+ self.assertEqual(l[0], True)
+ self.assertEqual(l[1], False)
+ self.assertEqual(l[2], None)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/python/test_module.py b/test/python/test_module.py
new file mode 100755
index 0000000..2d6ca4b
--- /dev/null
+++ b/test/python/test_module.py
@@ -0,0 +1,56 @@
+#!/usr/bin/env python3
+########################################################################
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+########################################################################
+
+import unittest
+import orcus
+import os.path
+import json
+import os
+from pathlib import Path
+
+
+class ModuleTest(unittest.TestCase):
+
+ @classmethod
+ def setUpClass(cls):
+ top_builddir = Path(os.environ["BUILDDIR"])
+ with open(top_builddir / "test" / "python" / "env.json", "r") as f:
+ cls.env = json.load(f)
+
+ def test_version(self):
+ s = orcus.__version__
+ expected = f"{self.env['version-major']}.{self.env['version-minor']}.{self.env['version-micro']}"
+ self.assertEqual(expected, s)
+
+ def test_detect_format(self):
+ test_root_dir = os.path.join(os.path.dirname(__file__), "..")
+
+ checks = (
+ (("ods", "raw-values-1", "input.ods"), orcus.FormatType.ODS),
+ (("xlsx", "raw-values-1", "input.xlsx"), orcus.FormatType.XLSX),
+ (("xls-xml", "raw-values-1", "input.xml"), orcus.FormatType.XLS_XML),
+ (("gnumeric", "raw-values-1", "input.gnumeric"), orcus.FormatType.GNUMERIC),
+ )
+
+ for check in checks:
+ filepath = os.path.join(test_root_dir, *check[0])
+ with open(filepath, "rb") as f:
+ # Pass the file object directly.
+ fmt = orcus.detect_format(f)
+ self.assertEqual(check[1], fmt)
+
+ # Pass the bytes.
+ f.seek(0)
+ bytes = f.read()
+ fmt = orcus.detect_format(bytes)
+ self.assertEqual(check[1], fmt)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/python/test_ods.py b/test/python/test_ods.py
new file mode 100755
index 0000000..e4c0dc9
--- /dev/null
+++ b/test/python/test_ods.py
@@ -0,0 +1,164 @@
+#!/usr/bin/env python3
+########################################################################
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+########################################################################
+
+import unittest
+from pathlib import Path
+
+from orcus import ods, FormulaTokenType, FormulaTokenOp
+
+import file_load_common as common
+
+
+class TestCase(unittest.TestCase):
+
+ @classmethod
+ def setUpClass(cls):
+ # base directory for ods test files.
+ basedir = Path(__file__).parent / ".." / "ods"
+ cls.basedir = basedir.resolve()
+
+ def test_import(self):
+ test_dirs = ("raw-values-1", "formula-1", "formula-2")
+ for test_dir in test_dirs:
+ test_dir = self.basedir / test_dir
+ common.run_test_dir(self, test_dir, common.DocLoader(ods))
+
+ def test_formula_tokens_1(self):
+ filepath = self.basedir / "formula-1" / "input.ods"
+ with open(filepath, "rb") as f:
+ doc = ods.read(f, recalc=False)
+
+ self.assertEqual(len(doc.sheets), 1)
+
+ # The 'Formula' sheet contains 4 formula cells in A1:A4.
+ sheet = doc.sheets[0]
+ self.assertEqual(sheet.name, "Formula")
+ rows = [row for row in sheet.get_rows()]
+ self.assertEqual(len(rows), 4)
+
+ expected = ("1*2", "12/3", "AVERAGE($A1:A$2)", "SUM($A$1:$A$3)")
+ for row, expected_formula in zip(sheet.get_rows(), expected):
+ c = row[0]
+ self.assertEqual(c.formula, expected_formula)
+
+ expected = (
+ (
+ ("1", FormulaTokenType.VALUE, FormulaTokenOp.VALUE),
+ ("*", FormulaTokenType.OPERATOR, FormulaTokenOp.MULTIPLY),
+ ("2", FormulaTokenType.VALUE, FormulaTokenOp.VALUE)
+ ),
+ (
+ ("12", FormulaTokenType.VALUE, FormulaTokenOp.VALUE),
+ ("/", FormulaTokenType.OPERATOR, FormulaTokenOp.DIVIDE),
+ ("3", FormulaTokenType.VALUE, FormulaTokenOp.VALUE)
+ ),
+ (
+ ("AVERAGE", FormulaTokenType.FUNCTION, FormulaTokenOp.FUNCTION),
+ ("(", FormulaTokenType.OPERATOR, FormulaTokenOp.OPEN),
+ ("$A1:A$2", FormulaTokenType.REFERENCE, FormulaTokenOp.RANGE_REF),
+ (")", FormulaTokenType.OPERATOR, FormulaTokenOp.CLOSE)
+ ),
+ (
+ ("SUM", FormulaTokenType.FUNCTION, FormulaTokenOp.FUNCTION),
+ ("(", FormulaTokenType.OPERATOR, FormulaTokenOp.OPEN),
+ ("$A$1:$A$3", FormulaTokenType.REFERENCE, FormulaTokenOp.RANGE_REF),
+ (")", FormulaTokenType.OPERATOR, FormulaTokenOp.CLOSE)
+ ),
+ )
+
+ for row, expected_formula_tokens in zip(sheet.get_rows(), expected):
+ c = row[0]
+ iter = c.get_formula_tokens()
+ for token, expected_token in zip(iter, expected_formula_tokens):
+ self.assertEqual(str(token), expected_token[0])
+ self.assertEqual(token.type, expected_token[1])
+ self.assertEqual(token.op, expected_token[2])
+
+ def test_formula_tokens_2(self):
+ filepath = self.basedir / "formula-2" / "input.ods"
+ with open(filepath, "rb") as f:
+ doc = ods.read(f, recalc=False)
+
+ self.assertEqual(len(doc.sheets), 1)
+
+ expected = (
+ (
+ ("CONCATENATE", FormulaTokenType.FUNCTION, FormulaTokenOp.FUNCTION),
+ ("(", FormulaTokenType.OPERATOR, FormulaTokenOp.OPEN),
+ ("A2", FormulaTokenType.REFERENCE, FormulaTokenOp.SINGLE_REF),
+ (",", FormulaTokenType.OPERATOR, FormulaTokenOp.SEP),
+ ('" "', FormulaTokenType.VALUE, FormulaTokenOp.STRING),
+ (",", FormulaTokenType.OPERATOR, FormulaTokenOp.SEP),
+ ("B2", FormulaTokenType.REFERENCE, FormulaTokenOp.SINGLE_REF),
+ (")", FormulaTokenType.OPERATOR, FormulaTokenOp.CLOSE),
+ ),
+ (
+ ("CONCATENATE", FormulaTokenType.FUNCTION, FormulaTokenOp.FUNCTION),
+ ("(", FormulaTokenType.OPERATOR, FormulaTokenOp.OPEN),
+ ("A3", FormulaTokenType.REFERENCE, FormulaTokenOp.SINGLE_REF),
+ (",", FormulaTokenType.OPERATOR, FormulaTokenOp.SEP),
+ ('" "', FormulaTokenType.VALUE, FormulaTokenOp.STRING),
+ (",", FormulaTokenType.OPERATOR, FormulaTokenOp.SEP),
+ ("B3", FormulaTokenType.REFERENCE, FormulaTokenOp.SINGLE_REF),
+ (")", FormulaTokenType.OPERATOR, FormulaTokenOp.CLOSE),
+ ),
+ (
+ ("CONCATENATE", FormulaTokenType.FUNCTION, FormulaTokenOp.FUNCTION),
+ ("(", FormulaTokenType.OPERATOR, FormulaTokenOp.OPEN),
+ ("A4", FormulaTokenType.REFERENCE, FormulaTokenOp.SINGLE_REF),
+ (",", FormulaTokenType.OPERATOR, FormulaTokenOp.SEP),
+ ('" "', FormulaTokenType.VALUE, FormulaTokenOp.STRING),
+ (",", FormulaTokenType.OPERATOR, FormulaTokenOp.SEP),
+ ("B4", FormulaTokenType.REFERENCE, FormulaTokenOp.SINGLE_REF),
+ (")", FormulaTokenType.OPERATOR, FormulaTokenOp.CLOSE),
+ ),
+ (
+ ("CONCATENATE", FormulaTokenType.FUNCTION, FormulaTokenOp.FUNCTION),
+ ("(", FormulaTokenType.OPERATOR, FormulaTokenOp.OPEN),
+ ("A5", FormulaTokenType.REFERENCE, FormulaTokenOp.SINGLE_REF),
+ (",", FormulaTokenType.OPERATOR, FormulaTokenOp.SEP),
+ ('" "', FormulaTokenType.VALUE, FormulaTokenOp.STRING),
+ (",", FormulaTokenType.OPERATOR, FormulaTokenOp.SEP),
+ ("B5", FormulaTokenType.REFERENCE, FormulaTokenOp.SINGLE_REF),
+ (")", FormulaTokenType.OPERATOR, FormulaTokenOp.CLOSE),
+ ),
+ (
+ ("CONCATENATE", FormulaTokenType.FUNCTION, FormulaTokenOp.FUNCTION),
+ ("(", FormulaTokenType.OPERATOR, FormulaTokenOp.OPEN),
+ ("A6", FormulaTokenType.REFERENCE, FormulaTokenOp.SINGLE_REF),
+ (",", FormulaTokenType.OPERATOR, FormulaTokenOp.SEP),
+ ('" "', FormulaTokenType.VALUE, FormulaTokenOp.STRING),
+ (",", FormulaTokenType.OPERATOR, FormulaTokenOp.SEP),
+ ("B6", FormulaTokenType.REFERENCE, FormulaTokenOp.SINGLE_REF),
+ (")", FormulaTokenType.OPERATOR, FormulaTokenOp.CLOSE),
+ ),
+ (
+ ("CONCATENATE", FormulaTokenType.FUNCTION, FormulaTokenOp.FUNCTION),
+ ("(", FormulaTokenType.OPERATOR, FormulaTokenOp.OPEN),
+ ("A7", FormulaTokenType.REFERENCE, FormulaTokenOp.SINGLE_REF),
+ (",", FormulaTokenType.OPERATOR, FormulaTokenOp.SEP),
+ ('" "', FormulaTokenType.VALUE, FormulaTokenOp.STRING),
+ (",", FormulaTokenType.OPERATOR, FormulaTokenOp.SEP),
+ ("B7", FormulaTokenType.REFERENCE, FormulaTokenOp.SINGLE_REF),
+ (")", FormulaTokenType.OPERATOR, FormulaTokenOp.CLOSE),
+ ),
+ )
+
+ # Check cells in column C.
+ rows = [row for row in doc.sheets[0].get_rows()]
+ for row, expected_tokens in zip(rows[1:], expected): # skip the header row
+ tokens = row[2].get_formula_tokens()
+ for token, expected_token in zip(tokens, expected_tokens):
+ self.assertEqual(str(token), expected_token[0])
+ self.assertEqual(token.type, expected_token[1])
+ self.assertEqual(token.op, expected_token[2])
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/python/test_xls_xml.py b/test/python/test_xls_xml.py
new file mode 100755
index 0000000..b8ae918
--- /dev/null
+++ b/test/python/test_xls_xml.py
@@ -0,0 +1,80 @@
+#!/usr/bin/env python3
+########################################################################
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+########################################################################
+
+import unittest
+import os
+import os.path
+
+import orcus
+from orcus import xls_xml
+
+import file_load_common as common
+
+
+class TestCase(unittest.TestCase):
+
+ @classmethod
+ def setUpClass(cls):
+ # base directory for ods test files.
+ basedir = os.path.join(os.path.dirname(__file__), "..", "xls-xml")
+ cls.basedir = os.path.normpath(basedir)
+
+ def test_import(self):
+
+ test_dirs = (
+ "basic",
+ "bold-and-italic",
+ "colored-text",
+ "empty-rows",
+ "merged-cells",
+ "named-expression",
+ "named-expression-sheet-local",
+ "raw-values-1",
+ )
+
+ for test_dir in test_dirs:
+ test_dir = os.path.join(self.basedir, test_dir)
+ common.run_test_dir(self, test_dir, common.DocLoader(xls_xml))
+
+ def test_skip_error_cells(self):
+ filepath = os.path.join(self.basedir, "formula-cells-parse-error", "input.xml")
+ with open(filepath, "rb") as f:
+ bytes = f.read()
+
+ with self.assertRaises(RuntimeError):
+ doc = xls_xml.read(bytes)
+
+ with self.assertRaises(RuntimeError): # TODO : should we raise a more specific error?
+ doc = xls_xml.read(bytes, error_policy="fail")
+
+ # With the 'skip' policy, formula cells with erroneous formulas are
+ # imported as formula cells with error.
+ doc = xls_xml.read(bytes, error_policy="skip")
+
+ # Make sure cells B2 and A5 are imported as formula cells.
+ rows = [row for row in doc.sheets[0].get_rows()]
+ c = rows[1][1]
+ self.assertEqual(c.type, orcus.CellType.FORMULA_WITH_ERROR)
+ self.assertFalse(c.formula) # formula string should be empty
+ # error formula tokens consist of: error token, string token (original formula), string token (error message).
+ formula_tokens = [t for t in c.get_formula_tokens()]
+ self.assertEqual(formula_tokens[0].type, orcus.FormulaTokenType.ERROR)
+ self.assertEqual(formula_tokens[1].type, orcus.FormulaTokenType.VALUE)
+ self.assertEqual(formula_tokens[2].type, orcus.FormulaTokenType.VALUE)
+ c = rows[4][0]
+ self.assertEqual(c.type, orcus.CellType.FORMULA_WITH_ERROR)
+ self.assertFalse(c.formula) # formula string should be empty
+ formula_tokens = [t for t in c.get_formula_tokens()]
+ self.assertEqual(formula_tokens[0].type, orcus.FormulaTokenType.ERROR)
+ self.assertEqual(formula_tokens[1].type, orcus.FormulaTokenType.VALUE)
+ self.assertEqual(formula_tokens[2].type, orcus.FormulaTokenType.VALUE)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/python/test_xlsx.py b/test/python/test_xlsx.py
new file mode 100755
index 0000000..1f691da
--- /dev/null
+++ b/test/python/test_xlsx.py
@@ -0,0 +1,103 @@
+#!/usr/bin/env python3
+########################################################################
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+########################################################################
+
+import unittest
+import os
+import os.path
+import mmap
+
+from orcus import xlsx
+
+import file_load_common as common
+
+
+class TestCase(unittest.TestCase):
+
+ @classmethod
+ def setUpClass(cls):
+ # base directory for xlsx test files.
+ basedir = os.path.join(os.path.dirname(__file__), "..", "xlsx")
+ cls.basedir = os.path.normpath(basedir)
+
+ def test_import(self):
+
+ test_dirs = (
+ "boolean-values",
+ "empty-shared-strings",
+ "formula-cells",
+ "formula-shared",
+ "named-expression",
+ "named-expression-sheet-local",
+ "raw-values-1",
+ )
+
+ for test_dir in test_dirs:
+ test_dir = os.path.join(self.basedir, test_dir)
+ common.run_test_dir(self, test_dir, common.DocLoader(xlsx))
+
+ def test_named_expression(self):
+ filepath = os.path.join(self.basedir, "named-expression", "input.xlsx")
+ with open(filepath, "rb") as f:
+ doc = xlsx.read(f)
+
+ named_exps = doc.get_named_expressions()
+ self.assertEqual(named_exps.names(), {"MyRange", "MyRange2"})
+ self.assertEqual(len(named_exps), 2)
+
+ named_exps_dict = {x[0]: x[1] for x in named_exps}
+ exp = named_exps_dict["MyRange"]
+ self.assertEqual(exp.origin, "Sheet1!$A$1")
+ self.assertEqual(exp.formula, "$A$1:$A$5")
+ iter = exp.get_formula_tokens()
+ self.assertEqual(len(iter), 1)
+ tokens = [t for t in iter]
+ self.assertEqual(str(tokens[0]), "$A$1:$A$5")
+
+ exp = named_exps_dict["MyRange2"]
+ self.assertEqual(exp.origin, "Sheet1!$A$1")
+ self.assertEqual(exp.formula, "$A$1:$B$5")
+ iter = exp.get_formula_tokens()
+ self.assertEqual(len(iter), 1)
+ tokens = [t for t in iter]
+ self.assertEqual(str(tokens[0]), "$A$1:$B$5")
+
+ def test_named_expression_sheet_local(self):
+ filepath = os.path.join(self.basedir, "named-expression-sheet-local", "input.xlsx")
+ with open(filepath, "rb") as f:
+ doc = xlsx.read(f)
+
+ sheet = doc.sheets[0]
+ named_exps = sheet.get_named_expressions()
+ self.assertEqual(len(named_exps), 1)
+ self.assertEqual(named_exps.names(), {"MyRange",})
+
+ named_exps_dict = {x[0]: x[1] for x in named_exps}
+ exp = named_exps_dict["MyRange"]
+ self.assertEqual(exp.formula, "$A$1:$B$3")
+ iter = exp.get_formula_tokens()
+ self.assertEqual(len(iter), 1)
+ tokens = [t for t in iter]
+ self.assertEqual(str(tokens[0]), "$A$1:$B$3")
+
+ sheet = doc.sheets[1]
+ named_exps = sheet.get_named_expressions()
+ self.assertEqual(named_exps.names(), {"MyRange",})
+ self.assertEqual(len(named_exps), 1)
+
+ named_exps_dict = {x[0]: x[1] for x in named_exps}
+ exp = named_exps_dict["MyRange"]
+ self.assertEqual(exp.formula, "$A$4:$B$5")
+ iter = exp.get_formula_tokens()
+ self.assertEqual(len(iter), 1)
+ tokens = [t for t in iter]
+ self.assertEqual(str(tokens[0]), "$A$4:$B$5")
+
+
+if __name__ == '__main__':
+ unittest.main()