summaryrefslogtreecommitdiffstats
path: root/pyuno/demo/ooextract.py
blob: d6cce94e80d59fc656c7ffdb4f53a737625d1def (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# -*- tab-width: 4; indent-tabs-mode: nil; py-indent-offset: 4 -*-
#
# This file is part of the LibreOffice project.
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
#
# This file incorporates work covered by the following license notice:
#
#   Licensed to the Apache Software Foundation (ASF) under one or more
#   contributor license agreements. See the NOTICE file distributed
#   with this work for additional information regarding copyright
#   ownership. The ASF licenses this file to you under the Apache
#   License, Version 2.0 (the "License"); you may not use this file
#   except in compliance with the License. You may obtain a copy of
#   the License at http://www.apache.org/licenses/LICENSE-2.0 .
#

import getopt,sys
import uno
from unohelper import Base,systemPathToFileUrl, absolutize
from os import getcwd

from com.sun.star.beans import PropertyValue
from com.sun.star.beans.PropertyState import DIRECT_VALUE
from com.sun.star.uno import Exception as UnoException
from com.sun.star.io import IOException,XInputStream, XOutputStream

class OutputStream(Base, XOutputStream):
    def __init__(self):
        self.closed = 0

    def closeOutput(self):
        self.closed = 1

    def writeBytes(self, seq):
        sys.stdout.write(seq.value)

    def flush(self):
        pass

def main():
    retVal = 0
    doc = None

    try:
        opts, args = getopt.getopt(sys.argv[1:], "hc:", ["help", "connection-string=", "html"])
        format = None
        url = "uno:socket,host=localhost,port=2002;urp;StarOffice.ComponentContext"
        filterName = "Text (Encoded)"
        for o, a in opts:
            if o in ("-h", "--help"):
                usage()
                sys.exit()
            if o in ("-c", "--connection-string"):
                url = "uno:" + a + ";urp;StarOffice.ComponentContext"
            if o == "--html":
                filterName = "HTML (StarWriter)"

        print(filterName)
        if not len(args):
            usage()
            sys.exit()

        ctxLocal = uno.getComponentContext()
        smgrLocal = ctxLocal.ServiceManager

        resolver = smgrLocal.createInstanceWithContext(
                 "com.sun.star.bridge.UnoUrlResolver", ctxLocal)
        ctx = resolver.resolve(url)
        smgr = ctx.ServiceManager

        desktop = smgr.createInstanceWithContext("com.sun.star.frame.Desktop", ctx)

        cwd = systemPathToFileUrl(getcwd())
        outProps = (
            PropertyValue("FilterName" , 0, filterName, 0),
            PropertyValue("OutputStream", 0, OutputStream(), 0))
        inProps = PropertyValue("Hidden", 0 , True, 0),
        for path in args:
            try:
                fileUrl = uno.absolutize(cwd, systemPathToFileUrl(path))
                doc = desktop.loadComponentFromURL(fileUrl , "_blank", 0, inProps)

                if not doc:
                    raise UnoException("Could not open stream for unknown reason", None)

                doc.storeToURL("private:stream", outProps)
            except IOException as e:
                sys.stderr.write("Error during conversion: " + e.Message + "\n")
                retVal = 1
            except UnoException as e:
                sys.stderr.write("Error (" + repr(e.__class__) + ") during conversion: " + e.Message + "\n")
                retVal = 1
            if doc:
                doc.dispose()

    except UnoException as e:
        sys.stderr.write("Error (" + repr(e.__class__) + "): " + e.Message + "\n")
        retVal = 1
    except getopt.GetoptError as e:
        sys.stderr.write(str(e) + "\n")
        usage()
        retVal = 1

    sys.exit(retVal)

def usage():
    sys.stderr.write("usage: ooextract.py --help |\n"+
                  "       [-c <connection-string> | --connection-string=<connection-string>\n"+
                  "       file1 file2 ...\n"+
                  "\n" +
                  "Extracts plain text from documents and prints it to stdout.\n" +
                  "Requires an OpenOffice.org instance to be running. The script and the\n"+
                  "running OpenOffice.org instance must be able to access the file with\n"+
                  "by the same system path.\n"
                  "\n"+
                  "-c <connection-string> | --connection-string=<connection-string>\n" +
                  "        The connection-string part of a UNO URL to where the\n" +
                  "        the script should connect to in order to do the conversion.\n" +
                  "        The strings defaults to socket,host=localhost,port=2002\n"
                  "--html \n"
                  "        Instead of the text filter, the writer html filter is used\n"
                  )

main()

# vim: set shiftwidth=4 softtabstop=4 expandtab: