diff options
Diffstat (limited to 'include/xmlreader')
-rw-r--r-- | include/xmlreader/README | 28 | ||||
-rw-r--r-- | include/xmlreader/detail/xmlreaderdllapi.hxx | 35 | ||||
-rw-r--r-- | include/xmlreader/pad.hxx | 59 | ||||
-rw-r--r-- | include/xmlreader/span.hxx | 85 | ||||
-rw-r--r-- | include/xmlreader/xmlreader.hxx | 189 |
5 files changed, 396 insertions, 0 deletions
diff --git a/include/xmlreader/README b/include/xmlreader/README new file mode 100644 index 000000000..41b43e7e8 --- /dev/null +++ b/include/xmlreader/README @@ -0,0 +1,28 @@ +#************************************************************** +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#************************************************************** + +<http://qa.openoffice.org/issues/show_bug.cgi?id=115203>: "Issue 113189 +extracted xmlreader from configmgr, to make the former available within URE. +The xmlreader library is for now considered a private part of URE (cf. +ure/source/README), for simplicity uses OOO_DLLPUBLIC_XMLREADER-based symbol +visibility (and thus no symbol versioning), but is of course used from outside +URE in configmgr. This works as long as its ABI does not change. If it ever +changes, symbol versioning will have to be added (in some form or other)." diff --git a/include/xmlreader/detail/xmlreaderdllapi.hxx b/include/xmlreader/detail/xmlreaderdllapi.hxx new file mode 100644 index 000000000..e917a9553 --- /dev/null +++ b/include/xmlreader/detail/xmlreaderdllapi.hxx @@ -0,0 +1,35 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_XMLREADER_DETAIL_XMLREADERDLLAPI_HXX +#define INCLUDED_XMLREADER_DETAIL_XMLREADERDLLAPI_HXX + +#include <sal/config.h> + +#include <sal/types.h> + +#if defined OOO_DLLIMPLEMENTATION_XMLREADER +#define OOO_DLLPUBLIC_XMLREADER SAL_DLLPUBLIC_EXPORT +#else +#define OOO_DLLPUBLIC_XMLREADER SAL_DLLPUBLIC_IMPORT +#endif + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/include/xmlreader/pad.hxx b/include/xmlreader/pad.hxx new file mode 100644 index 000000000..d450f7a54 --- /dev/null +++ b/include/xmlreader/pad.hxx @@ -0,0 +1,59 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_XMLREADER_PAD_HXX +#define INCLUDED_XMLREADER_PAD_HXX + +#include <sal/config.h> + +#include <cstddef> + +#include <rtl/strbuf.hxx> +#include <sal/types.h> +#include <xmlreader/detail/xmlreaderdllapi.hxx> +#include <xmlreader/span.hxx> + +namespace xmlreader { + +class SAL_WARN_UNUSED OOO_DLLPUBLIC_XMLREADER Pad { +public: + void add(char const * begin, sal_Int32 length); + + template< std::size_t N > void add(char const (& literal)[N]) { + add(literal, N - 1); + } + + void addEphemeral(char const * begin, sal_Int32 length); + + void clear(); + + Span get() const; + +private: + SAL_DLLPRIVATE void flushSpan(); + + Span span_; + OStringBuffer buffer_ {256}; +}; + +} + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/include/xmlreader/span.hxx b/include/xmlreader/span.hxx new file mode 100644 index 000000000..bc2dbaacb --- /dev/null +++ b/include/xmlreader/span.hxx @@ -0,0 +1,85 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_XMLREADER_SPAN_HXX +#define INCLUDED_XMLREADER_SPAN_HXX + +#include <sal/config.h> + +#include <cstddef> +#include <cstring> + +#include <sal/types.h> +#include <xmlreader/detail/xmlreaderdllapi.hxx> + +namespace rtl { class OUString; } + +namespace xmlreader { + +struct SAL_WARN_UNUSED OOO_DLLPUBLIC_XMLREADER Span { + char const * begin; + sal_Int32 length; + + Span(): begin(nullptr), length(0) {} + // init length to avoid compiler warnings + + Span(char const * theBegin, sal_Int32 theLength): + begin(theBegin), length(theLength) {} + + template< std::size_t N > explicit Span(char const (& literal)[N]): + begin(literal), length(N - 1) + {} + + void clear() throw() { begin = nullptr; } + + bool is() const { return begin != nullptr; } + + bool operator==(Span const & text) const { + return length == text.length + && std::memcmp(begin, text.begin, text.length) == 0; + } + + bool operator!=(Span const & text) const { + return !(operator==(text)); + } + + bool equals(char const * textBegin, sal_Int32 textLength) const { + return operator==(Span(textBegin, textLength)); + } + + template< std::size_t N > bool operator==(char const (& literal)[N]) + const + { + return operator==(Span(literal, N - 1)); + } + + template< std::size_t N > bool operator!=(char const (& literal)[N]) + const + { + return operator!=(Span(literal, N - 1)); + } + + rtl::OUString convertFromUtf8() const; +}; + +} + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/include/xmlreader/xmlreader.hxx b/include/xmlreader/xmlreader.hxx new file mode 100644 index 000000000..4e9f1347a --- /dev/null +++ b/include/xmlreader/xmlreader.hxx @@ -0,0 +1,189 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_XMLREADER_XMLREADER_HXX +#define INCLUDED_XMLREADER_XMLREADER_HXX + +#include <sal/config.h> + +#include <stack> +#include <vector> + +#include <osl/file.h> +#include <rtl/ustring.hxx> +#include <sal/types.h> +#include <xmlreader/detail/xmlreaderdllapi.hxx> +#include <xmlreader/pad.hxx> +#include <xmlreader/span.hxx> + +namespace xmlreader { + +class OOO_DLLPUBLIC_XMLREADER XmlReader { +public: + explicit XmlReader(OUString const & fileUrl); + + ~XmlReader(); + + enum { NAMESPACE_NONE = -2, NAMESPACE_UNKNOWN = -1, NAMESPACE_XML = 0 }; + + enum class Text { NONE, Raw, Normalized }; + + enum class Result { Begin, End, Text, Done }; + + int registerNamespaceIri(Span const & iri); + + // RESULT_BEGIN: data = localName, ns = ns + // RESULT_END: data, ns unused + // RESULT_TEXT: data = text, ns unused + Result nextItem(Text reportText, Span * data, int * nsId); + + bool nextAttribute(int * nsId, Span * localName); + + // the span returned by getAttributeValue is only valid until the next call + // to nextItem or getAttributeValue + Span getAttributeValue(bool fullyNormalize); + + int getNamespaceId(Span const & prefix) const; + + const OUString& getUrl() const { return fileUrl_;} + +private: + XmlReader(const XmlReader&) = delete; + XmlReader& operator=(const XmlReader&) = delete; + + typedef std::vector< Span > NamespaceIris; + + // If NamespaceData (and similarly ElementData and AttributeData) is made + // SAL_DLLPRIVATE, at least gcc 4.2.3 erroneously warns about + // "'xmlreader::XmlReader' declared with greater visibility than the type of + // its field 'xmlreader::XmlReader::namespaces_'" (and similarly for + // elements_ and attributes_): + + struct NamespaceData { + Span prefix; + int nsId; + + NamespaceData(): + nsId(-1) {} + + NamespaceData(Span const & thePrefix, int theNsId): + prefix(thePrefix), nsId(theNsId) {} + }; + + typedef std::vector< NamespaceData > NamespaceList; + + struct ElementData { + Span name; + NamespaceList::size_type inheritedNamespaces; + int defaultNamespaceId; + + ElementData( + Span const & theName, + NamespaceList::size_type theInheritedNamespaces, + int theDefaultNamespaceId): + name(theName), inheritedNamespaces(theInheritedNamespaces), + defaultNamespaceId(theDefaultNamespaceId) + {} + }; + + typedef std::stack< ElementData > ElementStack; + + struct AttributeData { + char const * nameBegin; + char const * nameEnd; + char const * nameColon; + char const * valueBegin; + char const * valueEnd; + + AttributeData( + char const * theNameBegin, char const * theNameEnd, + char const * theNameColon, char const * theValueBegin, + char const * theValueEnd): + nameBegin(theNameBegin), nameEnd(theNameEnd), + nameColon(theNameColon), valueBegin(theValueBegin), + valueEnd(theValueEnd) + {} + }; + + typedef std::vector< AttributeData > Attributes; + + enum class State { Content, StartTag, EndTag, EmptyElementTag, Done }; + + SAL_DLLPRIVATE char read() { return pos_ == end_ ? '\0' : *pos_++; } + + SAL_DLLPRIVATE char peek() const { return pos_ == end_ ? '\0' : *pos_; } + + SAL_DLLPRIVATE void normalizeLineEnds(Span const & text); + + SAL_DLLPRIVATE void skipSpace(); + + SAL_DLLPRIVATE bool skipComment(); + + SAL_DLLPRIVATE void skipProcessingInstruction(); + + SAL_DLLPRIVATE void skipDocumentTypeDeclaration(); + + SAL_DLLPRIVATE Span scanCdataSection(); + + SAL_DLLPRIVATE bool scanName(char const ** nameColon); + + SAL_DLLPRIVATE int scanNamespaceIri( + char const * begin, char const * end); + + SAL_DLLPRIVATE char const * handleReference( + char const * position, char const * end); + + SAL_DLLPRIVATE Span handleAttributeValue( + char const * begin, char const * end, bool fullyNormalize); + + SAL_DLLPRIVATE Result handleStartTag(int * nsId, Span * localName); + + SAL_DLLPRIVATE Result handleEndTag(); + + SAL_DLLPRIVATE void handleElementEnd(); + + SAL_DLLPRIVATE Result handleSkippedText(Span * data, int * nsId); + + SAL_DLLPRIVATE Result handleRawText(Span * text); + + SAL_DLLPRIVATE Result handleNormalizedText(Span * text); + + SAL_DLLPRIVATE static int toNamespaceId(NamespaceIris::size_type pos); + + OUString const fileUrl_; + oslFileHandle fileHandle_; + sal_uInt64 fileSize_; + void * fileAddress_; + NamespaceIris namespaceIris_; + NamespaceList namespaces_; + ElementStack elements_; + char const * pos_; + char const * end_; + State state_; + Attributes attributes_; + Attributes::iterator currentAttribute_; + bool firstAttribute_; + Pad pad_; +}; + +} + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |