diff options
Diffstat (limited to 'test/python')
-rw-r--r-- | test/python/env.json.in | 6 | ||||
-rw-r--r-- | test/python/file_load_common.py | 248 | ||||
-rwxr-xr-x | test/python/perf/test_json.py | 273 | ||||
-rwxr-xr-x | test/python/test_csv.py | 47 | ||||
-rwxr-xr-x | test/python/test_csv_export.py | 107 | ||||
-rwxr-xr-x | test/python/test_gnumeric.py | 39 | ||||
-rwxr-xr-x | test/python/test_json.py | 45 | ||||
-rwxr-xr-x | test/python/test_module.py | 56 | ||||
-rwxr-xr-x | test/python/test_ods.py | 164 | ||||
-rwxr-xr-x | test/python/test_xls_xml.py | 80 | ||||
-rwxr-xr-x | test/python/test_xlsx.py | 103 |
11 files changed, 1168 insertions, 0 deletions
diff --git a/test/python/env.json.in b/test/python/env.json.in new file mode 100644 index 0000000..26b82d6 --- /dev/null +++ b/test/python/env.json.in @@ -0,0 +1,6 @@ +{ + "version-major": @ORCUS_MAJOR_VERSION@, + "version-minor": @ORCUS_MINOR_VERSION@, + "version-micro": @ORCUS_MICRO_VERSION@ +} + diff --git a/test/python/file_load_common.py b/test/python/file_load_common.py new file mode 100644 index 0000000..5587405 --- /dev/null +++ b/test/python/file_load_common.py @@ -0,0 +1,248 @@ +######################################################################## +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +######################################################################## + +"""Collection of test cases shared between different file format types.""" + +import os +import os.path +import collections +import orcus + + +class Address(object): + + def __init__(self, pos_s): + self.sheet_name, self.row, self.column = pos_s.split('/') + self.row = int(self.row) + self.column = int(self.column) + + def __repr__(self): + return "(sheet={}; row={}, column={})".format(self.sheet_name, self.row, self.column) + + +class ExpectedSheet(object): + + def __init__(self, name): + self.__name = name + self.__rows = collections.OrderedDict() + self.__max_column = 0 + self.__max_row = 0 + + @property + def name(self): + return self.__name + + @property + def data_size(self): + return {"column": self.__max_column+1, "row": self.__max_row+1} + + def get_rows(self): + rows = list() + for i in range(self.__max_row+1): + row = [(orcus.CellType.EMPTY, None) for _ in range(self.__max_column+1)] + if i in self.__rows: + for col_pos, cell in self.__rows[i].items(): + row[col_pos] = cell + rows.append(tuple(row)) + return tuple(rows) + + def insert_cell(self, row, column, cell_type, cell_value, result): + if row not in self.__rows: + self.__rows[row] = collections.OrderedDict() + + row_data = self.__rows[row] + + if cell_type == "numeric": + row_data[column] = (orcus.CellType.NUMERIC, float(cell_value)) + elif cell_type == "string": + row_data[column] = (orcus.CellType.STRING, self.__unescape_string_cell_value(cell_value)) + elif cell_type == "boolean": + if cell_value == "true": + row_data[column] = (orcus.CellType.BOOLEAN, True) + elif cell_value == "false": + row_data[column] = (orcus.CellType.BOOLEAN, False) + else: + raise RuntimeError("invalid boolean value: {}".format(cell_value)) + elif cell_type == "formula": + row_data[column] = (orcus.CellType.FORMULA, result, cell_value) + else: + raise RuntimeError("unhandled cell value type: {}".format(cell_type)) + + # Update the data range. + if row > self.__max_row: + self.__max_row = row + if column > self.__max_column: + self.__max_column = column + + def __unescape_string_cell_value(self, v): + if v[0] != '"' or v[-1] != '"': + raise RuntimeError("string value is expected to be quoted.") + + v = v[1:-1] # remove the outer quotes. + + buf = [] + escaped_char = False + for c in v: + if escaped_char: + buf.append(c) + escaped_char = False + continue + + if c == '\\': + escaped_char = True + continue + + buf.append(c) + + return "".join(buf) + + +class ExpectedDocument(object): + + def __init__(self, filepath): + self.sheets = [] + + with open(filepath, "r") as f: + for line in f.readlines(): + line = line.strip() + self.__parse_line(line) + + def __parse_line(self, line): + if not line: + return + + # Split the line into 3 parts - position, cell type and the value. + # Note that a valid formula expression may contain ':', so we cannot + # simply split the line by ':'. + + parts = list() + idx = line.find(':') + while idx >= 0: + parts.append(line[:idx]) + line = line[idx+1:] + if len(parts) == 2: + # Append the rest. + parts.append(line) + break + + idx = line.find(':') + + if len(parts) != 3: + raise RuntimeError( + "line is expected to contain 3 parts, but not all parts are identified.") + + if parts[1] in ("merge-width", "merge-height"): + return + + pos, cell_type, cell_value = parts[0], parts[1], parts[2] + result = None + if cell_type == "formula": + # Split the cell value into formula expression and result. + idx = cell_value.rfind(':') + if idx < 0: + raise RuntimeError("formula line is expected to contain a result value.") + cell_value, result = cell_value[:idx], cell_value[idx+1:] + try: + result = float(result) + except ValueError: + pass + + pos = Address(pos) + + if not self.sheets or self.sheets[-1].name != pos.sheet_name: + self.sheets.append(ExpectedSheet(pos.sheet_name)) + + self.sheets[-1].insert_cell(pos.row, pos.column, cell_type, cell_value, result) + + +def _compare_cells(expected, actual): + type = expected[0] + + if type != actual.type: + return False + + if type == orcus.CellType.EMPTY: + return True + + if type in (orcus.CellType.BOOLEAN, orcus.CellType.NUMERIC, orcus.CellType.STRING): + return expected[1] == actual.value + + if type == orcus.CellType.FORMULA: + return expected[1] == actual.value and expected[2] == actual.formula + + return False + + +class DocLoader: + + def __init__(self, mod_loader): + self._mod_loader = mod_loader + + def load(self, filepath, recalc): + with open(filepath, "rb") as f: + return self._mod_loader.read(f, recalc=recalc) + + def load_from_value(self, filepath): + with open(filepath, "rb") as f: + bytes = f.read() + return self._mod_loader.read(bytes, recalc=False) + + +def run_test_dir(self, test_dir, doc_loader): + """Run test case for loading a file into a document. + + :param test_dir: test directory that contains an input file (whose base + name is 'input') and a content check file (check.txt). + :param mod_loader: module object that contains function called 'read'. + """ + + print("test directory: {}".format(test_dir)) + expected = ExpectedDocument(os.path.join(test_dir, "check.txt")) + + # Find the input file to load. + input_file = None + for file_name in os.listdir(test_dir): + name, ext = os.path.splitext(file_name) + if name == "input": + input_file = os.path.join(test_dir, file_name) + break + + print("input file: {}".format(input_file)) + self.assertIsNot(input_file, None) + + doc = doc_loader.load(input_file, True) + self.assertIsInstance(doc, orcus.Document) + + # Sometimes the actual document contains trailing empty sheets, which the + # expected document does not store. + self.assertTrue(len(expected.sheets)) + self.assertTrue(len(expected.sheets) <= len(doc.sheets)) + + expected_sheets = {sh.name: sh for sh in expected.sheets} + actual_sheets = {sh.name: sh for sh in doc.sheets} + + for sheet_name, actual_sheet in actual_sheets.items(): + if sheet_name in expected_sheets: + expected_sheet = expected_sheets[sheet_name] + self.assertEqual(expected_sheet.data_size, actual_sheet.data_size) + for expected_row, actual_row in zip(expected_sheet.get_rows(), actual_sheet.get_rows()): + for expected, actual in zip(expected_row, actual_row): + self.assertTrue(_compare_cells(expected, actual)) + else: + # This sheet must be empty since it's not in the expected document. + # Make sure it returns empty row set. + rows = [row for row in actual_sheet.get_rows()] + self.assertEqual(len(rows), 0) + + # Also make sure the document loads fine without recalc. + doc = doc_loader.load(input_file, False) + self.assertIsInstance(doc, orcus.Document) + + # Make sure the document loads from in-memory value. + doc = doc_loader.load_from_value(input_file) + self.assertIsInstance(doc, orcus.Document) diff --git a/test/python/perf/test_json.py b/test/python/perf/test_json.py new file mode 100755 index 0000000..acd34cc --- /dev/null +++ b/test/python/perf/test_json.py @@ -0,0 +1,273 @@ +#!/usr/bin/env python3 + +from datetime import datetime +import json +import orcus.json + +swagger_json = """ +{ + "swagger": "2.0", + "info": { + "version": "1.0.0", + "title": "Swagger Petstore (Simple)", + "description": "A sample API that uses a petstore as an example to demonstrate features in the swagger-2.0 specification", + "termsOfService": "http://helloreverb.com/terms/", + "contact": { + "name": "Swagger API team", + "email": "foo@example.com", + "url": "http://swagger.io" + }, + "license": { + "name": "MIT", + "url": "http://opensource.org/licenses/MIT" + } + }, + "host": "petstore.swagger.wordnik.com", + "basePath": "/api", + "schemes": [ + "http" + ], + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "paths": { + "/pets": { + "get": { + "description": "Returns all pets from the system that the user has access to", + "operationId": "findPets", + "produces": [ + "application/json", + "application/xml", + "text/xml", + "text/html" + ], + "parameters": [ + { + "name": "tags", + "in": "query", + "description": "tags to filter by", + "required": false, + "type": "array", + "items": { + "type": "string" + }, + "collectionFormat": "csv" + }, + { + "name": "limit", + "in": "query", + "description": "maximum number of results to return", + "required": false, + "type": "integer", + "format": "int32" + } + ], + "responses": { + "200": { + "description": "pet response", + "schema": { + "type": "array", + "items": { + "$ref": "#/definitions/pet" + } + } + }, + "default": { + "description": "unexpected error", + "schema": { + "$ref": "#/definitions/errorModel" + } + } + } + }, + "post": { + "description": "Creates a new pet in the store. Duplicates are allowed", + "operationId": "addPet", + "produces": [ + "application/json" + ], + "parameters": [ + { + "name": "pet", + "in": "body", + "description": "Pet to add to the store", + "required": true, + "schema": { + "$ref": "#/definitions/newPet" + } + } + ], + "responses": { + "200": { + "description": "pet response", + "schema": { + "$ref": "#/definitions/pet" + } + }, + "default": { + "description": "unexpected error", + "schema": { + "$ref": "#/definitions/errorModel" + } + } + } + } + }, + "/pets/{id}": { + "get": { + "description": "Returns a user based on a single ID, if the user does not have access to the pet", + "operationId": "findPetById", + "produces": [ + "application/json", + "application/xml", + "text/xml", + "text/html" + ], + "parameters": [ + { + "name": "id", + "in": "path", + "description": "ID of pet to fetch", + "required": true, + "type": "integer", + "format": "int64" + } + ], + "responses": { + "200": { + "description": "pet response", + "schema": { + "$ref": "#/definitions/pet" + } + }, + "default": { + "description": "unexpected error", + "schema": { + "$ref": "#/definitions/errorModel" + } + } + } + }, + "delete": { + "description": "deletes a single pet based on the ID supplied", + "operationId": "deletePet", + "parameters": [ + { + "name": "id", + "in": "path", + "description": "ID of pet to delete", + "required": true, + "type": "integer", + "format": "int64" + } + ], + "responses": { + "204": { + "description": "pet deleted" + }, + "default": { + "description": "unexpected error", + "schema": { + "$ref": "#/definitions/errorModel" + } + } + } + } + } + }, + "definitions": { + "pet": { + "required": [ + "id", + "name" + ], + "properties": { + "id": { + "type": "integer", + "format": "int64" + }, + "name": { + "type": "string" + }, + "tag": { + "type": "string" + } + } + }, + "newPet": { + "required": [ + "name" + ], + "properties": { + "id": { + "type": "integer", + "format": "int64" + }, + "name": { + "type": "string" + }, + "tag": { + "type": "string" + } + } + }, + "errorModel": { + "required": [ + "code", + "message" + ], + "properties": { + "code": { + "type": "integer", + "format": "int32" + }, + "message": { + "type": "string" + } + } + } + } +} +""" + +class ScopePrinter(object): + + def __init__(self, name): + self.start_time = None + self.name = name + + def __enter__(self): + self.start_time = datetime.now() + print("--- {} started".format(self.name)) + + def __exit__(self, type, value, traceback): + end_time = datetime.now() + diff = end_time - self.start_time + sec = diff.seconds + diff.microseconds / 1000000.0 + print("--- {} ended (duration: {} seconds)".format(self.name, sec)) + + +def run_builtin(): + for i in range(20000): + o = json.loads(swagger_json) + + +def run_orcus(): + for i in range(20000): + o = orcus.json.loads(swagger_json) + + +def main(): + with ScopePrinter("builtin"): + run_builtin() + + with ScopePrinter("orcus"): + run_orcus() + + +if __name__ == '__main__': + main() + diff --git a/test/python/test_csv.py b/test/python/test_csv.py new file mode 100755 index 0000000..c6f59e6 --- /dev/null +++ b/test/python/test_csv.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 +######################################################################## +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +######################################################################## + +import unittest +import os +import os.path + +from orcus import csv + +import file_load_common as common + + +class DocLoader: + + def load(self, filepath, recalc): + with open(filepath, "r") as f: + return csv.read(f) + + def load_from_value(self, filepath): + with open(filepath, "r") as f: + content = f.read() + return csv.read(content) + + +class TestCase(unittest.TestCase): + + @classmethod + def setUpClass(cls): + # base directory for ods test files. + basedir = os.path.join(os.path.dirname(__file__), "..", "csv") + cls.basedir = os.path.normpath(basedir) + + def test_import(self): + test_dirs = ("simple-numbers", "normal-quotes", "double-quotes", "quoted-with-delim") + for test_dir in test_dirs: + test_dir = os.path.join(self.basedir, test_dir) + common.run_test_dir(self, test_dir, DocLoader()) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/python/test_csv_export.py b/test/python/test_csv_export.py new file mode 100755 index 0000000..9c6d6b4 --- /dev/null +++ b/test/python/test_csv_export.py @@ -0,0 +1,107 @@ +#!/usr/bin/env python3 +######################################################################## +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +######################################################################## + +import unittest +import os +import os.path +import file_load_common as common +from orcus import FormatType, csv + + +class MockFileObject(object): + + def __init__(self): + self._content = None + + def write(self, bytes): + self._content = bytes + + def read(self): + return self._content + + @property + def bytes(self): + return self._content + + +class TestCase(unittest.TestCase): + + @classmethod + def setUpClass(cls): + # base directory for xlsx test files. + basedir_xlsx = os.path.join(os.path.dirname(__file__), "..", "xlsx") + cls.basedir_xlsx = os.path.normpath(basedir_xlsx) + + @unittest.skipIf(os.environ.get("WITH_PYTHON_XLSX") is None, "python xlsx module is disabled") + def test_export_from_xlsx(self): + from orcus import xlsx + + test_dirs = ( + "raw-values-1", + "empty-shared-strings", + "named-expression", + ) + + for test_dir in test_dirs: + test_dir = os.path.join(self.basedir_xlsx, test_dir) + input_file = os.path.join(test_dir, "input.xlsx") + with open(input_file, "rb") as f: + doc = xlsx.read(f) + + # Build an expected document object from the check file. + check_file = os.path.join(test_dir, "check.txt") + check_doc = common.ExpectedDocument(check_file) + + # check_doc only contains non-empty sheets. + data_sheet_names = set() + for sheet in check_doc.sheets: + data_sheet_names.add(sheet.name) + + for sheet in doc.sheets: + mfo = MockFileObject() + sheet.write(mfo, format=FormatType.CSV) + + if mfo.bytes is None: + self.assertFalse(sheet.name in data_sheet_names) + continue + + # Load the csv stream into a document again. + doc_reload = csv.read(mfo) + self.assertEqual(1, len(doc_reload.sheets)) + for row1, row2 in zip(sheet.get_rows(), doc_reload.sheets[0].get_rows()): + # Only comare cell values, not cell types. + row1 = [c.value for c in row1] + row2 = [c.value for c in row2] + self.assertEqual(row1, row2) + + # Make sure we raise an exception on invalid format type. + # We currently only support exporting sheet as csv. + + invalid_formats = ( + "foo", + FormatType.GNUMERIC, + FormatType.JSON, + FormatType.ODS, + FormatType.XLSX, + FormatType.XLS_XML, + FormatType.XLS_XML, + FormatType.XML, + FormatType.YAML, + ) + + for invalid_format in invalid_formats: + mfo = MockFileObject() + with self.assertRaises(Exception): + doc.sheets[0].write(mfo, format=invalid_format) + + +if __name__ == '__main__': + unittest.main() + + diff --git a/test/python/test_gnumeric.py b/test/python/test_gnumeric.py new file mode 100755 index 0000000..f715776 --- /dev/null +++ b/test/python/test_gnumeric.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python3 +######################################################################## +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +######################################################################## + +import unittest +import os +import os.path + +from orcus import gnumeric + +import file_load_common as common + + +class TestCase(unittest.TestCase): + + @classmethod + def setUpClass(cls): + # base directory for xlsx test files. + basedir = os.path.join(os.path.dirname(__file__), "..", "gnumeric") + cls.basedir = os.path.normpath(basedir) + + def test_import(self): + + test_dirs = ( + "raw-values-1", + ) + + for test_dir in test_dirs: + test_dir = os.path.join(self.basedir, test_dir) + common.run_test_dir(self, test_dir, common.DocLoader(gnumeric)) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/python/test_json.py b/test/python/test_json.py new file mode 100755 index 0000000..2fde74c --- /dev/null +++ b/test/python/test_json.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python3 +######################################################################## +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +######################################################################## + +import unittest +from orcus import json + + +class JsonTest(unittest.TestCase): + + def test_loads(self): + s = '[1,2,3,"foo",[4,5,6], {"a": 12.3, "b": 34.4, "c": [true, false, null]}]' + o = json.loads(s) + self.assertTrue(isinstance(o, list)) + self.assertEqual(len(o), 6) + self.assertEqual(o[0], 1) + self.assertEqual(o[1], 2) + self.assertEqual(o[2], 3) + self.assertEqual(o[3], "foo") + + self.assertTrue(isinstance(o[4], list)) + self.assertEqual(o[4][0], 4) + self.assertEqual(o[4][1], 5) + self.assertEqual(o[4][2], 6) + + d = o[5] + self.assertTrue(isinstance(d, dict)) + self.assertEqual(len(d), 3) + self.assertEqual(d["a"], 12.3) + self.assertEqual(d["b"], 34.4) + + l = d["c"] + self.assertEqual(len(l), 3) + self.assertEqual(l[0], True) + self.assertEqual(l[1], False) + self.assertEqual(l[2], None) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/python/test_module.py b/test/python/test_module.py new file mode 100755 index 0000000..2d6ca4b --- /dev/null +++ b/test/python/test_module.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python3 +######################################################################## +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +######################################################################## + +import unittest +import orcus +import os.path +import json +import os +from pathlib import Path + + +class ModuleTest(unittest.TestCase): + + @classmethod + def setUpClass(cls): + top_builddir = Path(os.environ["BUILDDIR"]) + with open(top_builddir / "test" / "python" / "env.json", "r") as f: + cls.env = json.load(f) + + def test_version(self): + s = orcus.__version__ + expected = f"{self.env['version-major']}.{self.env['version-minor']}.{self.env['version-micro']}" + self.assertEqual(expected, s) + + def test_detect_format(self): + test_root_dir = os.path.join(os.path.dirname(__file__), "..") + + checks = ( + (("ods", "raw-values-1", "input.ods"), orcus.FormatType.ODS), + (("xlsx", "raw-values-1", "input.xlsx"), orcus.FormatType.XLSX), + (("xls-xml", "raw-values-1", "input.xml"), orcus.FormatType.XLS_XML), + (("gnumeric", "raw-values-1", "input.gnumeric"), orcus.FormatType.GNUMERIC), + ) + + for check in checks: + filepath = os.path.join(test_root_dir, *check[0]) + with open(filepath, "rb") as f: + # Pass the file object directly. + fmt = orcus.detect_format(f) + self.assertEqual(check[1], fmt) + + # Pass the bytes. + f.seek(0) + bytes = f.read() + fmt = orcus.detect_format(bytes) + self.assertEqual(check[1], fmt) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/python/test_ods.py b/test/python/test_ods.py new file mode 100755 index 0000000..e4c0dc9 --- /dev/null +++ b/test/python/test_ods.py @@ -0,0 +1,164 @@ +#!/usr/bin/env python3 +######################################################################## +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +######################################################################## + +import unittest +from pathlib import Path + +from orcus import ods, FormulaTokenType, FormulaTokenOp + +import file_load_common as common + + +class TestCase(unittest.TestCase): + + @classmethod + def setUpClass(cls): + # base directory for ods test files. + basedir = Path(__file__).parent / ".." / "ods" + cls.basedir = basedir.resolve() + + def test_import(self): + test_dirs = ("raw-values-1", "formula-1", "formula-2") + for test_dir in test_dirs: + test_dir = self.basedir / test_dir + common.run_test_dir(self, test_dir, common.DocLoader(ods)) + + def test_formula_tokens_1(self): + filepath = self.basedir / "formula-1" / "input.ods" + with open(filepath, "rb") as f: + doc = ods.read(f, recalc=False) + + self.assertEqual(len(doc.sheets), 1) + + # The 'Formula' sheet contains 4 formula cells in A1:A4. + sheet = doc.sheets[0] + self.assertEqual(sheet.name, "Formula") + rows = [row for row in sheet.get_rows()] + self.assertEqual(len(rows), 4) + + expected = ("1*2", "12/3", "AVERAGE($A1:A$2)", "SUM($A$1:$A$3)") + for row, expected_formula in zip(sheet.get_rows(), expected): + c = row[0] + self.assertEqual(c.formula, expected_formula) + + expected = ( + ( + ("1", FormulaTokenType.VALUE, FormulaTokenOp.VALUE), + ("*", FormulaTokenType.OPERATOR, FormulaTokenOp.MULTIPLY), + ("2", FormulaTokenType.VALUE, FormulaTokenOp.VALUE) + ), + ( + ("12", FormulaTokenType.VALUE, FormulaTokenOp.VALUE), + ("/", FormulaTokenType.OPERATOR, FormulaTokenOp.DIVIDE), + ("3", FormulaTokenType.VALUE, FormulaTokenOp.VALUE) + ), + ( + ("AVERAGE", FormulaTokenType.FUNCTION, FormulaTokenOp.FUNCTION), + ("(", FormulaTokenType.OPERATOR, FormulaTokenOp.OPEN), + ("$A1:A$2", FormulaTokenType.REFERENCE, FormulaTokenOp.RANGE_REF), + (")", FormulaTokenType.OPERATOR, FormulaTokenOp.CLOSE) + ), + ( + ("SUM", FormulaTokenType.FUNCTION, FormulaTokenOp.FUNCTION), + ("(", FormulaTokenType.OPERATOR, FormulaTokenOp.OPEN), + ("$A$1:$A$3", FormulaTokenType.REFERENCE, FormulaTokenOp.RANGE_REF), + (")", FormulaTokenType.OPERATOR, FormulaTokenOp.CLOSE) + ), + ) + + for row, expected_formula_tokens in zip(sheet.get_rows(), expected): + c = row[0] + iter = c.get_formula_tokens() + for token, expected_token in zip(iter, expected_formula_tokens): + self.assertEqual(str(token), expected_token[0]) + self.assertEqual(token.type, expected_token[1]) + self.assertEqual(token.op, expected_token[2]) + + def test_formula_tokens_2(self): + filepath = self.basedir / "formula-2" / "input.ods" + with open(filepath, "rb") as f: + doc = ods.read(f, recalc=False) + + self.assertEqual(len(doc.sheets), 1) + + expected = ( + ( + ("CONCATENATE", FormulaTokenType.FUNCTION, FormulaTokenOp.FUNCTION), + ("(", FormulaTokenType.OPERATOR, FormulaTokenOp.OPEN), + ("A2", FormulaTokenType.REFERENCE, FormulaTokenOp.SINGLE_REF), + (",", FormulaTokenType.OPERATOR, FormulaTokenOp.SEP), + ('" "', FormulaTokenType.VALUE, FormulaTokenOp.STRING), + (",", FormulaTokenType.OPERATOR, FormulaTokenOp.SEP), + ("B2", FormulaTokenType.REFERENCE, FormulaTokenOp.SINGLE_REF), + (")", FormulaTokenType.OPERATOR, FormulaTokenOp.CLOSE), + ), + ( + ("CONCATENATE", FormulaTokenType.FUNCTION, FormulaTokenOp.FUNCTION), + ("(", FormulaTokenType.OPERATOR, FormulaTokenOp.OPEN), + ("A3", FormulaTokenType.REFERENCE, FormulaTokenOp.SINGLE_REF), + (",", FormulaTokenType.OPERATOR, FormulaTokenOp.SEP), + ('" "', FormulaTokenType.VALUE, FormulaTokenOp.STRING), + (",", FormulaTokenType.OPERATOR, FormulaTokenOp.SEP), + ("B3", FormulaTokenType.REFERENCE, FormulaTokenOp.SINGLE_REF), + (")", FormulaTokenType.OPERATOR, FormulaTokenOp.CLOSE), + ), + ( + ("CONCATENATE", FormulaTokenType.FUNCTION, FormulaTokenOp.FUNCTION), + ("(", FormulaTokenType.OPERATOR, FormulaTokenOp.OPEN), + ("A4", FormulaTokenType.REFERENCE, FormulaTokenOp.SINGLE_REF), + (",", FormulaTokenType.OPERATOR, FormulaTokenOp.SEP), + ('" "', FormulaTokenType.VALUE, FormulaTokenOp.STRING), + (",", FormulaTokenType.OPERATOR, FormulaTokenOp.SEP), + ("B4", FormulaTokenType.REFERENCE, FormulaTokenOp.SINGLE_REF), + (")", FormulaTokenType.OPERATOR, FormulaTokenOp.CLOSE), + ), + ( + ("CONCATENATE", FormulaTokenType.FUNCTION, FormulaTokenOp.FUNCTION), + ("(", FormulaTokenType.OPERATOR, FormulaTokenOp.OPEN), + ("A5", FormulaTokenType.REFERENCE, FormulaTokenOp.SINGLE_REF), + (",", FormulaTokenType.OPERATOR, FormulaTokenOp.SEP), + ('" "', FormulaTokenType.VALUE, FormulaTokenOp.STRING), + (",", FormulaTokenType.OPERATOR, FormulaTokenOp.SEP), + ("B5", FormulaTokenType.REFERENCE, FormulaTokenOp.SINGLE_REF), + (")", FormulaTokenType.OPERATOR, FormulaTokenOp.CLOSE), + ), + ( + ("CONCATENATE", FormulaTokenType.FUNCTION, FormulaTokenOp.FUNCTION), + ("(", FormulaTokenType.OPERATOR, FormulaTokenOp.OPEN), + ("A6", FormulaTokenType.REFERENCE, FormulaTokenOp.SINGLE_REF), + (",", FormulaTokenType.OPERATOR, FormulaTokenOp.SEP), + ('" "', FormulaTokenType.VALUE, FormulaTokenOp.STRING), + (",", FormulaTokenType.OPERATOR, FormulaTokenOp.SEP), + ("B6", FormulaTokenType.REFERENCE, FormulaTokenOp.SINGLE_REF), + (")", FormulaTokenType.OPERATOR, FormulaTokenOp.CLOSE), + ), + ( + ("CONCATENATE", FormulaTokenType.FUNCTION, FormulaTokenOp.FUNCTION), + ("(", FormulaTokenType.OPERATOR, FormulaTokenOp.OPEN), + ("A7", FormulaTokenType.REFERENCE, FormulaTokenOp.SINGLE_REF), + (",", FormulaTokenType.OPERATOR, FormulaTokenOp.SEP), + ('" "', FormulaTokenType.VALUE, FormulaTokenOp.STRING), + (",", FormulaTokenType.OPERATOR, FormulaTokenOp.SEP), + ("B7", FormulaTokenType.REFERENCE, FormulaTokenOp.SINGLE_REF), + (")", FormulaTokenType.OPERATOR, FormulaTokenOp.CLOSE), + ), + ) + + # Check cells in column C. + rows = [row for row in doc.sheets[0].get_rows()] + for row, expected_tokens in zip(rows[1:], expected): # skip the header row + tokens = row[2].get_formula_tokens() + for token, expected_token in zip(tokens, expected_tokens): + self.assertEqual(str(token), expected_token[0]) + self.assertEqual(token.type, expected_token[1]) + self.assertEqual(token.op, expected_token[2]) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/python/test_xls_xml.py b/test/python/test_xls_xml.py new file mode 100755 index 0000000..b8ae918 --- /dev/null +++ b/test/python/test_xls_xml.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python3 +######################################################################## +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +######################################################################## + +import unittest +import os +import os.path + +import orcus +from orcus import xls_xml + +import file_load_common as common + + +class TestCase(unittest.TestCase): + + @classmethod + def setUpClass(cls): + # base directory for ods test files. + basedir = os.path.join(os.path.dirname(__file__), "..", "xls-xml") + cls.basedir = os.path.normpath(basedir) + + def test_import(self): + + test_dirs = ( + "basic", + "bold-and-italic", + "colored-text", + "empty-rows", + "merged-cells", + "named-expression", + "named-expression-sheet-local", + "raw-values-1", + ) + + for test_dir in test_dirs: + test_dir = os.path.join(self.basedir, test_dir) + common.run_test_dir(self, test_dir, common.DocLoader(xls_xml)) + + def test_skip_error_cells(self): + filepath = os.path.join(self.basedir, "formula-cells-parse-error", "input.xml") + with open(filepath, "rb") as f: + bytes = f.read() + + with self.assertRaises(RuntimeError): + doc = xls_xml.read(bytes) + + with self.assertRaises(RuntimeError): # TODO : should we raise a more specific error? + doc = xls_xml.read(bytes, error_policy="fail") + + # With the 'skip' policy, formula cells with erroneous formulas are + # imported as formula cells with error. + doc = xls_xml.read(bytes, error_policy="skip") + + # Make sure cells B2 and A5 are imported as formula cells. + rows = [row for row in doc.sheets[0].get_rows()] + c = rows[1][1] + self.assertEqual(c.type, orcus.CellType.FORMULA_WITH_ERROR) + self.assertFalse(c.formula) # formula string should be empty + # error formula tokens consist of: error token, string token (original formula), string token (error message). + formula_tokens = [t for t in c.get_formula_tokens()] + self.assertEqual(formula_tokens[0].type, orcus.FormulaTokenType.ERROR) + self.assertEqual(formula_tokens[1].type, orcus.FormulaTokenType.VALUE) + self.assertEqual(formula_tokens[2].type, orcus.FormulaTokenType.VALUE) + c = rows[4][0] + self.assertEqual(c.type, orcus.CellType.FORMULA_WITH_ERROR) + self.assertFalse(c.formula) # formula string should be empty + formula_tokens = [t for t in c.get_formula_tokens()] + self.assertEqual(formula_tokens[0].type, orcus.FormulaTokenType.ERROR) + self.assertEqual(formula_tokens[1].type, orcus.FormulaTokenType.VALUE) + self.assertEqual(formula_tokens[2].type, orcus.FormulaTokenType.VALUE) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/python/test_xlsx.py b/test/python/test_xlsx.py new file mode 100755 index 0000000..1f691da --- /dev/null +++ b/test/python/test_xlsx.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python3 +######################################################################## +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +######################################################################## + +import unittest +import os +import os.path +import mmap + +from orcus import xlsx + +import file_load_common as common + + +class TestCase(unittest.TestCase): + + @classmethod + def setUpClass(cls): + # base directory for xlsx test files. + basedir = os.path.join(os.path.dirname(__file__), "..", "xlsx") + cls.basedir = os.path.normpath(basedir) + + def test_import(self): + + test_dirs = ( + "boolean-values", + "empty-shared-strings", + "formula-cells", + "formula-shared", + "named-expression", + "named-expression-sheet-local", + "raw-values-1", + ) + + for test_dir in test_dirs: + test_dir = os.path.join(self.basedir, test_dir) + common.run_test_dir(self, test_dir, common.DocLoader(xlsx)) + + def test_named_expression(self): + filepath = os.path.join(self.basedir, "named-expression", "input.xlsx") + with open(filepath, "rb") as f: + doc = xlsx.read(f) + + named_exps = doc.get_named_expressions() + self.assertEqual(named_exps.names(), {"MyRange", "MyRange2"}) + self.assertEqual(len(named_exps), 2) + + named_exps_dict = {x[0]: x[1] for x in named_exps} + exp = named_exps_dict["MyRange"] + self.assertEqual(exp.origin, "Sheet1!$A$1") + self.assertEqual(exp.formula, "$A$1:$A$5") + iter = exp.get_formula_tokens() + self.assertEqual(len(iter), 1) + tokens = [t for t in iter] + self.assertEqual(str(tokens[0]), "$A$1:$A$5") + + exp = named_exps_dict["MyRange2"] + self.assertEqual(exp.origin, "Sheet1!$A$1") + self.assertEqual(exp.formula, "$A$1:$B$5") + iter = exp.get_formula_tokens() + self.assertEqual(len(iter), 1) + tokens = [t for t in iter] + self.assertEqual(str(tokens[0]), "$A$1:$B$5") + + def test_named_expression_sheet_local(self): + filepath = os.path.join(self.basedir, "named-expression-sheet-local", "input.xlsx") + with open(filepath, "rb") as f: + doc = xlsx.read(f) + + sheet = doc.sheets[0] + named_exps = sheet.get_named_expressions() + self.assertEqual(len(named_exps), 1) + self.assertEqual(named_exps.names(), {"MyRange",}) + + named_exps_dict = {x[0]: x[1] for x in named_exps} + exp = named_exps_dict["MyRange"] + self.assertEqual(exp.formula, "$A$1:$B$3") + iter = exp.get_formula_tokens() + self.assertEqual(len(iter), 1) + tokens = [t for t in iter] + self.assertEqual(str(tokens[0]), "$A$1:$B$3") + + sheet = doc.sheets[1] + named_exps = sheet.get_named_expressions() + self.assertEqual(named_exps.names(), {"MyRange",}) + self.assertEqual(len(named_exps), 1) + + named_exps_dict = {x[0]: x[1] for x in named_exps} + exp = named_exps_dict["MyRange"] + self.assertEqual(exp.formula, "$A$4:$B$5") + iter = exp.get_formula_tokens() + self.assertEqual(len(iter), 1) + tokens = [t for t in iter] + self.assertEqual(str(tokens[0]), "$A$4:$B$5") + + +if __name__ == '__main__': + unittest.main() |