summaryrefslogtreecommitdiffstats
path: root/bin/benchmark-document-loading
diff options
context:
space:
mode:
Diffstat (limited to 'bin/benchmark-document-loading')
-rw-r--r--bin/benchmark-document-loading486
1 files changed, 486 insertions, 0 deletions
diff --git a/bin/benchmark-document-loading b/bin/benchmark-document-loading
new file mode 100644
index 000000000..11611a2b2
--- /dev/null
+++ b/bin/benchmark-document-loading
@@ -0,0 +1,486 @@
+#!/usr/bin/env python # -*- tab-width: 4; indent-tabs-mode: nil; py-indent-offset: 4 -*-
+#
+# Version: MPL 1.1 / GPLv3+ / LGPLv3+
+#
+# The contents of this file are subject to the Mozilla Public License Version
+# 1.1 (the "License"); you may not use this file except in compliance with
+# the License or as specified alternatively below. You may obtain a copy of
+# the License at http://www.mozilla.org/MPL/
+#
+# Software distributed under the License is distributed on an "AS IS" basis,
+# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+# for the specific language governing rights and limitations under the
+# License.
+#
+# Major Contributor(s):
+# Copyright (C) 2012 Red Hat, Inc., Michael Stahl <mstahl@redhat.com>
+# (initial developer)
+#
+# All Rights Reserved.
+#
+# For minor contributions see the git repository.
+#
+# Alternatively, the contents of this file may be used under the terms of
+# either the GNU General Public License Version 3 or later (the "GPLv3+"), or
+# the GNU Lesser General Public License Version 3 or later (the "LGPLv3+"),
+# in which case the provisions of the GPLv3+ or the LGPLv3+ are applicable
+# instead of those above.
+
+# Simple script to load a bunch of documents and export them as Flat ODF
+#
+# Personally I run it like this:
+# ~/lo/master-suse/instdir/program/python ~/lo/master-suse/bin/benchmark-document-loading --soffice=path:/home/tml/lo/master-suse/instdir/program/soffice --outdir=file://$PWD/out --userdir=file:///tmp/test $PWD/docs
+#
+
+import argparse
+import datetime
+import os
+import subprocess
+import sys
+import threading
+import time
+import urllib
+try:
+ from urllib.parse import quote
+except ImportError:
+ from urllib import quote
+import uuid
+
+try:
+ import pyuno
+ import uno
+ import unohelper
+except ImportError:
+ print("pyuno not found: try to set PYTHONPATH and URE_BOOTSTRAP variables")
+ print("PYTHONPATH=/installation/opt/program")
+ print("URE_BOOTSTRAP=file:///installation/opt/program/fundamentalrc")
+ raise
+
+try:
+ from com.sun.star.beans import PropertyValue
+ from com.sun.star.document import XDocumentEventListener
+ from com.sun.star.io import IOException, XOutputStream
+except ImportError:
+ print("UNO API class not found: try to set URE_BOOTSTRAP variable")
+ print("URE_BOOTSTRAP=file:///installation/opt/program/fundamentalrc")
+ raise
+
+validCalcFileExtensions = [ ".xlsx", ".xls", ".ods", ".fods" ]
+validWriterFileExtensions = [ ".docx" , ".rtf", ".odt", ".fodt", ".doc" ]
+validImpressFileExtensions = [ ".ppt", ".pptx", ".odp", ".fodp" ]
+validDrawFileExtensions = [ ".odg", ".fodg" ]
+validRevereseFileExtensions = [ ".vsd", ".vdx", ".cdr", ".pub", ".wpd" ]
+validFileExtensions = {"calc": validCalcFileExtensions,
+ "writer": validWriterFileExtensions,
+ "impress": validImpressFileExtensions,
+ "draw": validDrawFileExtensions,
+ "reverse": validRevereseFileExtensions}
+flatODFTypes = {"calc": (".fods", "OpenDocument Spreadsheet Flat XML"),
+ "writer": (".fodt", "OpenDocument Text Flat XML"),
+ "impress": (".fodp", "OpenDocument Presentation Flat XML"),
+ "draw": (".fodg", "OpenDocument Drawing Flat XML")}
+
+outdir = ""
+
+def partition(list, pred):
+ left = []
+ right = []
+ for e in list:
+ if pred(e):
+ left.append(e)
+ else:
+ right.append(e)
+ return (left, right)
+
+def filelist(directory, suffix):
+ if not directory:
+ raise Exception("filelist: empty directory")
+ if directory[-1] != "/":
+ directory += "/"
+ files = [directory + f for f in os.listdir(directory)]
+# print(files)
+ return [f for f in files
+ if os.path.isfile(f) and os.path.splitext(f)[1] == suffix]
+
+def getFiles(dirs, suffix):
+# print( dirs )
+ files = []
+ for d in dirs:
+ files += filelist(d, suffix)
+ return files
+
+### UNO utilities ###
+
+class OutputStream( unohelper.Base, XOutputStream ):
+ def __init__( self ):
+ self.closed = 0
+
+ def closeOutput(self):
+ self.closed = 1
+
+ def writeBytes( self, seq ):
+ sys.stdout.write( seq.value )
+
+ def flush( self ):
+ pass
+
+class OfficeConnection:
+ def __init__(self, args):
+ self.args = args
+ self.soffice = None
+ self.socket = None
+ self.xContext = None
+ self.pro = None
+ def setUp(self):
+ (method, sep, rest) = self.args.soffice.partition(":")
+ if sep != ":":
+ raise Exception("soffice parameter does not specify method")
+ if method == "path":
+ socket = "pipe,name=pytest" + str(uuid.uuid1())
+ userdir = self.args.userdir
+ if not userdir:
+ raise Exception("'path' method requires --userdir")
+ if not userdir.startswith("file://"):
+ raise Exception("--userdir must be file URL")
+ self.soffice = self.bootstrap(rest, userdir, socket)
+ elif method == "connect":
+ socket = rest
+ else:
+ raise Exception("unsupported connection method: " + method)
+ self.xContext = self.connect(socket)
+
+ def bootstrap(self, soffice, userdir, socket):
+ argv = [ soffice, "--accept=" + socket + ";urp",
+ "-env:UserInstallation=" + userdir,
+ "--quickstart=no",
+ "--norestore", "--nologo", "--headless" ]
+ if self.args.valgrind:
+ argv.append("--valgrind")
+ os.putenv("SAL_LOG", "-INFO-WARN")
+ os.putenv("LIBO_ONEWAY_STABLE_ODF_EXPORT", "YES")
+ self.pro = subprocess.Popen(argv)
+# print(self.pro.pid)
+
+ def connect(self, socket):
+ xLocalContext = uno.getComponentContext()
+ xUnoResolver = xLocalContext.ServiceManager.createInstanceWithContext("com.sun.star.bridge.UnoUrlResolver", xLocalContext)
+ url = "uno:" + socket + ";urp;StarOffice.ComponentContext"
+# print("OfficeConnection: connecting to: " + url)
+ while True:
+ try:
+ xContext = xUnoResolver.resolve(url)
+ return xContext
+# except com.sun.star.connection.NoConnectException
+ except pyuno.getClass("com.sun.star.connection.NoConnectException"):
+# print("NoConnectException: sleeping...")
+ time.sleep(1)
+
+ def tearDown(self):
+ if self.soffice:
+ if self.xContext:
+ try:
+# print("tearDown: calling terminate()...")
+ xMgr = self.xContext.ServiceManager
+ xDesktop = xMgr.createInstanceWithContext("com.sun.star.frame.Desktop", self.xContext)
+ xDesktop.terminate()
+# print("...done")
+# except com.sun.star.lang.DisposedException:
+ except pyuno.getClass("com.sun.star.beans.UnknownPropertyException"):
+# print("caught UnknownPropertyException while TearDown")
+ pass # ignore, also means disposed
+ except pyuno.getClass("com.sun.star.lang.DisposedException"):
+# print("caught DisposedException while TearDown")
+ pass # ignore
+ else:
+ self.soffice.terminate()
+ ret = self.soffice.wait()
+ self.xContext = None
+ self.socket = None
+ self.soffice = None
+ if ret != 0:
+ raise Exception("Exit status indicates failure: " + str(ret))
+# return ret
+ def kill(self):
+ command = "kill " + str(self.pro.pid)
+ with open("killFile.log", "a") as killFile:
+ killFile.write(command + "\n")
+# print("kill")
+# print(command)
+ os.system(command)
+
+class PersistentConnection:
+ def __init__(self, args):
+ self.args = args
+ self.connection = None
+ def getContext(self):
+ return self.connection.xContext
+ def setUp(self):
+ assert(not self.connection)
+ conn = OfficeConnection(self.args)
+ conn.setUp()
+ self.connection = conn
+ def preTest(self):
+ assert(self.connection)
+ def postTest(self):
+ assert(self.connection)
+ def tearDown(self):
+ if self.connection:
+ try:
+ self.connection.tearDown()
+ finally:
+ self.connection = None
+ def kill(self):
+ if self.connection:
+ self.connection.kill()
+
+def simpleInvoke(connection, test):
+ try:
+ connection.preTest()
+ test.run(connection.getContext(), connection)
+ finally:
+ connection.postTest()
+
+def runConnectionTests(connection, invoker, tests):
+ try:
+ connection.setUp()
+ for test in tests:
+ invoker(connection, test)
+ finally:
+ pass
+ #connection.tearDown()
+
+class EventListener(XDocumentEventListener,unohelper.Base):
+ def __init__(self):
+ self.layoutFinished = False
+ def documentEventOccured(self, event):
+# print(str(event.EventName))
+ if event.EventName == "OnLayoutFinished":
+ self.layoutFinished = True
+ def disposing(event):
+ pass
+
+def mkPropertyValue(name, value):
+ return uno.createUnoStruct("com.sun.star.beans.PropertyValue",
+ name, 0, value, 0)
+
+### tests ###
+
+def logTimeSpent(url, startTime):
+ print(os.path.basename(urllib.parse.urlparse(url).path) + "\t" + str(time.time()-startTime))
+
+def loadFromURL(xContext, url, t, component):
+ xDesktop = xContext.ServiceManager.createInstanceWithContext("com.sun.star.frame.Desktop", xContext)
+ props = [("Hidden", True), ("ReadOnly", True)] # FilterName?
+ loadProps = tuple([mkPropertyValue(name, value) for (name, value) in props])
+ xListener = None
+ if component == "writer":
+ xListener = EventListener()
+ xGEB = xContext.getValueByName(
+ "/singletons/com.sun.star.frame.theGlobalEventBroadcaster")
+ xGEB.addDocumentEventListener(xListener)
+ try:
+ xDoc = None
+ startTime = time.time()
+ xDoc = xDesktop.loadComponentFromURL(url, "_blank", 0, loadProps)
+ if component == "calc":
+ try:
+ if xDoc:
+ xDoc.calculateAll()
+ except AttributeError:
+ pass
+ t.cancel()
+ logTimeSpent(url, startTime)
+ return xDoc
+ elif component == "writer":
+ time_ = 0
+ t.cancel()
+ while time_ < 30:
+ if xListener.layoutFinished:
+ logTimeSpent(url, startTime)
+ return xDoc
+# print("delaying...")
+ time_ += 1
+ time.sleep(1)
+ else:
+ t.cancel()
+ logTimeSpent(url, startTime)
+ return xDoc
+ with open("file.log", "a") as fh:
+ fh.write("layout did not finish\n")
+ return xDoc
+ except pyuno.getClass("com.sun.star.beans.UnknownPropertyException"):
+ xListener = None
+ raise # means crashed, handle it later
+ except pyuno.getClass("com.sun.star.lang.DisposedException"):
+ xListener = None
+ raise # means crashed, handle it later
+ except pyuno.getClass("com.sun.star.lang.IllegalArgumentException"):
+ pass # means could not open the file, ignore it
+ except:
+ if xDoc:
+# print("CLOSING")
+ xDoc.close(True)
+ raise
+ finally:
+ if xListener:
+ xGEB.removeDocumentEventListener(xListener)
+
+def exportToODF(xContext, xDoc, baseName, t, component):
+ exportFileName = outdir + "/" + os.path.splitext(baseName)[0] + flatODFTypes[component][0]
+ print("exportToODF " + baseName + " => " + exportFileName)
+ props = [("FilterName", flatODFTypes[component][1]),
+ ("Overwrite", True)]
+ storeProps = tuple([mkPropertyValue(name, value) for (name, value) in props])
+ xDoc.storeToURL(exportFileName, tuple(storeProps))
+
+def handleCrash(file, disposed):
+# print("File: " + file + " crashed")
+ with open("crashlog.txt", "a") as crashLog:
+ crashLog.write('Crash:' + file + ' ')
+ if disposed == 1:
+ crashLog.write('through disposed\n')
+# crashed_files.append(file)
+# add here the remaining handling code for crashed files
+
+def alarm_handler(args):
+ args.kill()
+
+class HandleFileTest:
+ def __init__(self, file, state, component):
+ self.file = file
+ self.state = state
+ self.component = component
+ def run(self, xContext, connection):
+# print("Loading document: " + self.file)
+ t = None
+ args = None
+ try:
+ url = "file://" + quote(self.file)
+ with open("file.log", "a") as fh:
+ fh.write(url + "\n")
+ xDoc = None
+ args = [connection]
+ t = threading.Timer(60, alarm_handler, args)
+ t.start()
+ xDoc = loadFromURL(xContext, url, t, self.component)
+ self.state.goodFiles.append(self.file)
+ exportToODF(xContext, xDoc, os.path.basename(urllib.parse.urlparse(url).path), t, self.component)
+ except pyuno.getClass("com.sun.star.beans.UnknownPropertyException"):
+# print("caught UnknownPropertyException " + self.file)
+ if not t.is_alive():
+# print("TIMEOUT!")
+ self.state.timeoutFiles.append(self.file)
+ else:
+ t.cancel()
+ handleCrash(self.file, 0)
+ self.state.badPropertyFiles.append(self.file)
+ connection.tearDown()
+ connection.setUp()
+ except pyuno.getClass("com.sun.star.lang.DisposedException"):
+# print("caught DisposedException " + self.file)
+ if not t.is_alive():
+# print("TIMEOUT!")
+ self.state.timeoutFiles.append(self.file)
+ else:
+ t.cancel()
+ handleCrash(self.file, 1)
+ self.state.badDisposedFiles.append(self.file)
+ connection.tearDown()
+ connection.setUp()
+ finally:
+ if t.is_alive():
+ t.cancel()
+ try:
+ if xDoc:
+ t = threading.Timer(10, alarm_handler, args)
+ t.start()
+ xDoc.close(True)
+ t.cancel()
+ except pyuno.getClass("com.sun.star.beans.UnknownPropertyException"):
+ print("caught UnknownPropertyException while closing")
+ self.state.badPropertyFiles.append(self.file)
+ connection.tearDown()
+ connection.setUp()
+ except pyuno.getClass("com.sun.star.lang.DisposedException"):
+ print("caught DisposedException while closing")
+ if t.is_alive():
+ t.cancel()
+ else:
+ self.state.badDisposedFiles.append(self.file)
+ connection.tearDown()
+ connection.setUp()
+# print("...done with: " + self.file)
+
+class State:
+ def __init__(self):
+ self.goodFiles = []
+ self.badDisposedFiles = []
+ self.badPropertyFiles = []
+ self.timeoutFiles = []
+
+
+def write_state_report(files_list, start_time, report_filename, description):
+ with open(report_filename, "w") as fh:
+ fh.write("%s:\n" % description)
+ fh.write("Starttime: %s\n" % start_time.isoformat())
+ for f in files_list:
+ fh.write("%s\n" % f)
+
+
+def writeReport(state, startTime):
+ write_state_report(state.goodFiles, startTime, "goodFiles.log",
+ "Files which loaded perfectly")
+ write_state_report(state.badDisposedFiles, startTime, "badDisposedFiles.log",
+ "Files which crashed with DisposedException")
+ write_state_report(state.badPropertyFiles, startTime, "badPropertyFiles.log",
+ "Files which crashed with UnknownPropertyException")
+ write_state_report(state.timeoutFiles, startTime, "timeoutFiles.log",
+ "Files which timed out")
+
+def runHandleFileTests(opts):
+ startTime = datetime.datetime.now()
+ connection = PersistentConnection(opts)
+ global outdir
+ outdir = os.path.join(opts.outdir, startTime.strftime('%Y%m%d.%H%M%S'))
+ try:
+ tests = []
+ state = State()
+# print("before map")
+ for component, validExtension in validFileExtensions.items():
+ files = []
+ for suffix in validExtension:
+ files.extend(getFiles(opts.dirs, suffix))
+ files.sort()
+ tests.extend( (HandleFileTest(file, state, component) for file in files) )
+ runConnectionTests(connection, simpleInvoke, tests)
+ finally:
+ connection.kill()
+ writeReport(state, startTime)
+
+def parseArgs(argv):
+ epilog = "'location' is a pathname, not a URL. 'outdir' and 'userdir' are URLs.\n" \
+ "The 'directory' parameters should be full absolute pathnames, not URLs."
+
+ parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter,
+ epilog=epilog)
+ parser.add_argument('--soffice', metavar='method:location', required=True,
+ help="specify soffice instance to connect to\n"
+ "supported methods: 'path', 'connect'")
+ parser.add_argument('--outdir', metavar='URL', required=True,
+ help="specify the output directory for flat ODF exports")
+ parser.add_argument('--userdir', metavar='URL',
+ help="specify user installation directory for 'path' method")
+ parser.add_argument('--valgrind', action='store_true',
+ help="pass --valgrind to soffice for 'path' method")
+ parser.add_argument('dirs', metavar='directory', nargs='+')
+
+ args = parser.parse_args(argv[1:])
+
+ return args
+
+
+if __name__ == "__main__":
+ opts = parseArgs(sys.argv)
+ runHandleFileTests(opts)
+
+# vim:set shiftwidth=4 softtabstop=4 expandtab: