summaryrefslogtreecommitdiffstats
path: root/writerperfect/source/common/DocumentHandler.cxx
blob: 28730ab5ee99a936714295110eb4d54579183498 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 *
 * For further information visit http://libwpd.sourceforge.net
 */

#include <DocumentHandler.hxx>

#include <string.h>

#include <com/sun/star/uno/Reference.hxx>
#include <com/sun/star/xml/sax/XDocumentHandler.hpp>
#include <com/sun/star/xml/sax/XAttributeList.hpp>
#include <comphelper/attributelist.hxx>
#include <xmloff/xmlimp.hxx>

namespace writerperfect
{
const unsigned char librvng_utf8_skip_data[256]
    = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1 };

static const char* librvng_utf8_next_char(const char* p)
{
    return p + librvng_utf8_skip_data[*reinterpret_cast<unsigned char const*>(p)];
}

static void unescapeXML(const char* s, const unsigned long sz, librevenge::RVNGString& res)
{
    const char* p = s;
    const char* const end = p + sz;
    while (p != end)
    {
        const char* const next = librvng_utf8_next_char(p);
        if (next > end)
        {
            // oops, the string is invalid
            break;
        }
        if (p + 4 <= end && p + 1 == next && *p == '&')
        {
            // look for &amp; , &lt; , &gt; , &apos; , &quot;
            bool escapedChar = false;
            switch (*(p + 1))
            {
                case 'a':
                    if (p + 5 <= end && strncmp(p, "&amp;", 5) == 0)
                    {
                        res.append('&');
                        p += 5;
                        escapedChar = true;
                    }
                    else if (p + 6 <= end && strncmp(p, "&apos;", 6) == 0)
                    {
                        res.append('\'');
                        p += 6;
                        escapedChar = true;
                    }
                    break;
                case 'g':
                    if (strncmp(p, "&gt;", 4) == 0)
                    {
                        res.append('>');
                        p += 4;
                        escapedChar = true;
                    }
                    break;
                case 'l':
                    if (strncmp(p, "&lt;", 4) == 0)
                    {
                        res.append('<');
                        p += 4;
                        escapedChar = true;
                    }
                    break;
                case 'q':
                    if (p + 6 <= end && strncmp(p, "&quot;", 6) == 0)
                    {
                        res.append('"');
                        p += 6;
                        escapedChar = true;
                    }
                    break;
                default:
                    break;
            }
            if (escapedChar)
                continue;
        }

        while (p != next)
        {
            res.append(*p);
            ++p;
        }
        p = next;
    }
}

using com::sun::star::uno::Reference;
using com::sun::star::xml::sax::XDocumentHandler;

DocumentHandler::DocumentHandler(Reference<XDocumentHandler> const& xHandler)
    : mxHandler(xHandler)
{
    if (SvXMLImport* pFastHandler = dynamic_cast<SvXMLImport*>(mxHandler.get()))
        mxHandler.set(new SvXMLLegacyToFastDocHandler(pFastHandler));
}

void DocumentHandler::startDocument() { mxHandler->startDocument(); }

void DocumentHandler::endDocument() { mxHandler->endDocument(); }

void DocumentHandler::startElement(const char* psName,
                                   const librevenge::RVNGPropertyList& xPropList)
{
    rtl::Reference<comphelper::AttributeList> pAttrList = new comphelper::AttributeList();
    librevenge::RVNGPropertyList::Iter i(xPropList);
    for (i.rewind(); i.next();)
    {
        // filter out librevenge elements
        if (strncmp(i.key(), "librevenge", 10) != 0)
        {
            size_t keyLength = strlen(i.key());
            OUString sName(i.key(), keyLength, RTL_TEXTENCODING_UTF8);
            OUString sValue(i()->getStr().cstr(), i()->getStr().len(), RTL_TEXTENCODING_UTF8);

            // libodfgen xml-encodes some attribute's value, so check if the value is encoded or not
            for (int j = 0; j < 9; ++j)
            {
                // list of the encoded attributes followed by their lengths
                static char const* listEncoded[9]
                    = { "draw:name",        "svg:font-family",  "style:condition",
                        "style:num-prefix", "style:num-suffix", "table:formula",
                        "text:bullet-char", "text:label",       "xlink:href" };
                static size_t const listEncodedLength[9] = { 9, 15, 15, 16, 16, 13, 16, 10, 10 };
                if (keyLength == listEncodedLength[j]
                    && strncmp(i.key(), listEncoded[j], keyLength) == 0)
                {
                    librevenge::RVNGString decodedValue("");
                    unescapeXML(i()->getStr().cstr(),
                                static_cast<unsigned long>(i()->getStr().len()), decodedValue);
                    sValue
                        = OUString(decodedValue.cstr(), decodedValue.len(), RTL_TEXTENCODING_UTF8);
                    break;
                }
            }
            pAttrList->AddAttribute(sName, sValue);
        }
    }

    OUString sElementName(psName, strlen(psName), RTL_TEXTENCODING_UTF8);
    mxHandler->startElement(sElementName, pAttrList);
}

void DocumentHandler::endElement(const char* psName)
{
    OUString sElementName(psName, strlen(psName), RTL_TEXTENCODING_UTF8);
    mxHandler->endElement(sElementName);
}

void DocumentHandler::characters(const librevenge::RVNGString& sCharacters)
{
    OUString sCharU16(sCharacters.cstr(), strlen(sCharacters.cstr()), RTL_TEXTENCODING_UTF8);
    mxHandler->characters(sCharU16);
}
}

/* vim:set shiftwidth=4 softtabstop=4 expandtab: */