summaryrefslogtreecommitdiffstats
path: root/include/xmlreader
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--include/xmlreader/README28
-rw-r--r--include/xmlreader/detail/xmlreaderdllapi.hxx35
-rw-r--r--include/xmlreader/pad.hxx59
-rw-r--r--include/xmlreader/span.hxx85
-rw-r--r--include/xmlreader/xmlreader.hxx189
5 files changed, 396 insertions, 0 deletions
diff --git a/include/xmlreader/README b/include/xmlreader/README
new file mode 100644
index 000000000..41b43e7e8
--- /dev/null
+++ b/include/xmlreader/README
@@ -0,0 +1,28 @@
+#**************************************************************
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#**************************************************************
+
+<http://qa.openoffice.org/issues/show_bug.cgi?id=115203>: "Issue 113189
+extracted xmlreader from configmgr, to make the former available within URE.
+The xmlreader library is for now considered a private part of URE (cf.
+ure/source/README), for simplicity uses OOO_DLLPUBLIC_XMLREADER-based symbol
+visibility (and thus no symbol versioning), but is of course used from outside
+URE in configmgr. This works as long as its ABI does not change. If it ever
+changes, symbol versioning will have to be added (in some form or other)."
diff --git a/include/xmlreader/detail/xmlreaderdllapi.hxx b/include/xmlreader/detail/xmlreaderdllapi.hxx
new file mode 100644
index 000000000..e917a9553
--- /dev/null
+++ b/include/xmlreader/detail/xmlreaderdllapi.hxx
@@ -0,0 +1,35 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#ifndef INCLUDED_XMLREADER_DETAIL_XMLREADERDLLAPI_HXX
+#define INCLUDED_XMLREADER_DETAIL_XMLREADERDLLAPI_HXX
+
+#include <sal/config.h>
+
+#include <sal/types.h>
+
+#if defined OOO_DLLIMPLEMENTATION_XMLREADER
+#define OOO_DLLPUBLIC_XMLREADER SAL_DLLPUBLIC_EXPORT
+#else
+#define OOO_DLLPUBLIC_XMLREADER SAL_DLLPUBLIC_IMPORT
+#endif
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/xmlreader/pad.hxx b/include/xmlreader/pad.hxx
new file mode 100644
index 000000000..d450f7a54
--- /dev/null
+++ b/include/xmlreader/pad.hxx
@@ -0,0 +1,59 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#ifndef INCLUDED_XMLREADER_PAD_HXX
+#define INCLUDED_XMLREADER_PAD_HXX
+
+#include <sal/config.h>
+
+#include <cstddef>
+
+#include <rtl/strbuf.hxx>
+#include <sal/types.h>
+#include <xmlreader/detail/xmlreaderdllapi.hxx>
+#include <xmlreader/span.hxx>
+
+namespace xmlreader {
+
+class SAL_WARN_UNUSED OOO_DLLPUBLIC_XMLREADER Pad {
+public:
+ void add(char const * begin, sal_Int32 length);
+
+ template< std::size_t N > void add(char const (& literal)[N]) {
+ add(literal, N - 1);
+ }
+
+ void addEphemeral(char const * begin, sal_Int32 length);
+
+ void clear();
+
+ Span get() const;
+
+private:
+ SAL_DLLPRIVATE void flushSpan();
+
+ Span span_;
+ OStringBuffer buffer_ {256};
+};
+
+}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/xmlreader/span.hxx b/include/xmlreader/span.hxx
new file mode 100644
index 000000000..bc2dbaacb
--- /dev/null
+++ b/include/xmlreader/span.hxx
@@ -0,0 +1,85 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#ifndef INCLUDED_XMLREADER_SPAN_HXX
+#define INCLUDED_XMLREADER_SPAN_HXX
+
+#include <sal/config.h>
+
+#include <cstddef>
+#include <cstring>
+
+#include <sal/types.h>
+#include <xmlreader/detail/xmlreaderdllapi.hxx>
+
+namespace rtl { class OUString; }
+
+namespace xmlreader {
+
+struct SAL_WARN_UNUSED OOO_DLLPUBLIC_XMLREADER Span {
+ char const * begin;
+ sal_Int32 length;
+
+ Span(): begin(nullptr), length(0) {}
+ // init length to avoid compiler warnings
+
+ Span(char const * theBegin, sal_Int32 theLength):
+ begin(theBegin), length(theLength) {}
+
+ template< std::size_t N > explicit Span(char const (& literal)[N]):
+ begin(literal), length(N - 1)
+ {}
+
+ void clear() throw() { begin = nullptr; }
+
+ bool is() const { return begin != nullptr; }
+
+ bool operator==(Span const & text) const {
+ return length == text.length
+ && std::memcmp(begin, text.begin, text.length) == 0;
+ }
+
+ bool operator!=(Span const & text) const {
+ return !(operator==(text));
+ }
+
+ bool equals(char const * textBegin, sal_Int32 textLength) const {
+ return operator==(Span(textBegin, textLength));
+ }
+
+ template< std::size_t N > bool operator==(char const (& literal)[N])
+ const
+ {
+ return operator==(Span(literal, N - 1));
+ }
+
+ template< std::size_t N > bool operator!=(char const (& literal)[N])
+ const
+ {
+ return operator!=(Span(literal, N - 1));
+ }
+
+ rtl::OUString convertFromUtf8() const;
+};
+
+}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/xmlreader/xmlreader.hxx b/include/xmlreader/xmlreader.hxx
new file mode 100644
index 000000000..4e9f1347a
--- /dev/null
+++ b/include/xmlreader/xmlreader.hxx
@@ -0,0 +1,189 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#ifndef INCLUDED_XMLREADER_XMLREADER_HXX
+#define INCLUDED_XMLREADER_XMLREADER_HXX
+
+#include <sal/config.h>
+
+#include <stack>
+#include <vector>
+
+#include <osl/file.h>
+#include <rtl/ustring.hxx>
+#include <sal/types.h>
+#include <xmlreader/detail/xmlreaderdllapi.hxx>
+#include <xmlreader/pad.hxx>
+#include <xmlreader/span.hxx>
+
+namespace xmlreader {
+
+class OOO_DLLPUBLIC_XMLREADER XmlReader {
+public:
+ explicit XmlReader(OUString const & fileUrl);
+
+ ~XmlReader();
+
+ enum { NAMESPACE_NONE = -2, NAMESPACE_UNKNOWN = -1, NAMESPACE_XML = 0 };
+
+ enum class Text { NONE, Raw, Normalized };
+
+ enum class Result { Begin, End, Text, Done };
+
+ int registerNamespaceIri(Span const & iri);
+
+ // RESULT_BEGIN: data = localName, ns = ns
+ // RESULT_END: data, ns unused
+ // RESULT_TEXT: data = text, ns unused
+ Result nextItem(Text reportText, Span * data, int * nsId);
+
+ bool nextAttribute(int * nsId, Span * localName);
+
+ // the span returned by getAttributeValue is only valid until the next call
+ // to nextItem or getAttributeValue
+ Span getAttributeValue(bool fullyNormalize);
+
+ int getNamespaceId(Span const & prefix) const;
+
+ const OUString& getUrl() const { return fileUrl_;}
+
+private:
+ XmlReader(const XmlReader&) = delete;
+ XmlReader& operator=(const XmlReader&) = delete;
+
+ typedef std::vector< Span > NamespaceIris;
+
+ // If NamespaceData (and similarly ElementData and AttributeData) is made
+ // SAL_DLLPRIVATE, at least gcc 4.2.3 erroneously warns about
+ // "'xmlreader::XmlReader' declared with greater visibility than the type of
+ // its field 'xmlreader::XmlReader::namespaces_'" (and similarly for
+ // elements_ and attributes_):
+
+ struct NamespaceData {
+ Span prefix;
+ int nsId;
+
+ NamespaceData():
+ nsId(-1) {}
+
+ NamespaceData(Span const & thePrefix, int theNsId):
+ prefix(thePrefix), nsId(theNsId) {}
+ };
+
+ typedef std::vector< NamespaceData > NamespaceList;
+
+ struct ElementData {
+ Span name;
+ NamespaceList::size_type inheritedNamespaces;
+ int defaultNamespaceId;
+
+ ElementData(
+ Span const & theName,
+ NamespaceList::size_type theInheritedNamespaces,
+ int theDefaultNamespaceId):
+ name(theName), inheritedNamespaces(theInheritedNamespaces),
+ defaultNamespaceId(theDefaultNamespaceId)
+ {}
+ };
+
+ typedef std::stack< ElementData > ElementStack;
+
+ struct AttributeData {
+ char const * nameBegin;
+ char const * nameEnd;
+ char const * nameColon;
+ char const * valueBegin;
+ char const * valueEnd;
+
+ AttributeData(
+ char const * theNameBegin, char const * theNameEnd,
+ char const * theNameColon, char const * theValueBegin,
+ char const * theValueEnd):
+ nameBegin(theNameBegin), nameEnd(theNameEnd),
+ nameColon(theNameColon), valueBegin(theValueBegin),
+ valueEnd(theValueEnd)
+ {}
+ };
+
+ typedef std::vector< AttributeData > Attributes;
+
+ enum class State { Content, StartTag, EndTag, EmptyElementTag, Done };
+
+ SAL_DLLPRIVATE char read() { return pos_ == end_ ? '\0' : *pos_++; }
+
+ SAL_DLLPRIVATE char peek() const { return pos_ == end_ ? '\0' : *pos_; }
+
+ SAL_DLLPRIVATE void normalizeLineEnds(Span const & text);
+
+ SAL_DLLPRIVATE void skipSpace();
+
+ SAL_DLLPRIVATE bool skipComment();
+
+ SAL_DLLPRIVATE void skipProcessingInstruction();
+
+ SAL_DLLPRIVATE void skipDocumentTypeDeclaration();
+
+ SAL_DLLPRIVATE Span scanCdataSection();
+
+ SAL_DLLPRIVATE bool scanName(char const ** nameColon);
+
+ SAL_DLLPRIVATE int scanNamespaceIri(
+ char const * begin, char const * end);
+
+ SAL_DLLPRIVATE char const * handleReference(
+ char const * position, char const * end);
+
+ SAL_DLLPRIVATE Span handleAttributeValue(
+ char const * begin, char const * end, bool fullyNormalize);
+
+ SAL_DLLPRIVATE Result handleStartTag(int * nsId, Span * localName);
+
+ SAL_DLLPRIVATE Result handleEndTag();
+
+ SAL_DLLPRIVATE void handleElementEnd();
+
+ SAL_DLLPRIVATE Result handleSkippedText(Span * data, int * nsId);
+
+ SAL_DLLPRIVATE Result handleRawText(Span * text);
+
+ SAL_DLLPRIVATE Result handleNormalizedText(Span * text);
+
+ SAL_DLLPRIVATE static int toNamespaceId(NamespaceIris::size_type pos);
+
+ OUString const fileUrl_;
+ oslFileHandle fileHandle_;
+ sal_uInt64 fileSize_;
+ void * fileAddress_;
+ NamespaceIris namespaceIris_;
+ NamespaceList namespaces_;
+ ElementStack elements_;
+ char const * pos_;
+ char const * end_;
+ State state_;
+ Attributes attributes_;
+ Attributes::iterator currentAttribute_;
+ bool firstAttribute_;
+ Pad pad_;
+};
+
+}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */