/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
 * This file is part of the LibreOffice project.
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 *
 * This file incorporates work covered by the following license notice:
 *
 *   Licensed to the Apache Software Foundation (ASF) under one or more
 *   contributor license agreements. See the NOTICE file distributed
 *   with this work for additional information regarding copyright
 *   ownership. The ASF licenses this file to you under the Apache
 *   License, Version 2.0 (the "License"); you may not use this file
 *   except in compliance with the License. You may obtain a copy of
 *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
 */

#include <sal/config.h>

#include <cassert>
#include <climits>

#include <com/sun/star/container/NoSuchElementException.hpp>
#include <com/sun/star/uno/RuntimeException.hpp>
#include <osl/file.h>
#include <rtl/character.hxx>
#include <rtl/string.h>
#include <rtl/ustring.hxx>
#include <sal/log.hxx>
#include <sal/types.h>
#include <xmlreader/pad.hxx>
#include <xmlreader/span.hxx>
#include <xmlreader/xmlreader.hxx>

namespace xmlreader {

namespace {

bool isSpace(char c) {
    switch (c) {
    case '\x09':
    case '\x0A':
    case '\x0D':
    case ' ':
        return true;
    default:
        return false;
    }
}

}

XmlReader::XmlReader(OUString const & fileUrl)
    : fileUrl_(fileUrl)
    , fileHandle_(nullptr)
{
    oslFileError e = osl_openFile(
        fileUrl_.pData, &fileHandle_, osl_File_OpenFlag_Read);
    switch (e)
    {
    case osl_File_E_None:
        break;
    case osl_File_E_NOENT:
        throw css::container::NoSuchElementException( fileUrl_ );
    default:
        throw css::uno::RuntimeException(
            "cannot open " + fileUrl_ + ": " + OUString::number(e));
    }
    e = osl_getFileSize(fileHandle_, &fileSize_);
    if (e == osl_File_E_None) {
        e = osl_mapFile(
            fileHandle_, &fileAddress_, fileSize_, 0,
            osl_File_MapFlag_WillNeed);
    }
    if (e != osl_File_E_None) {
        oslFileError e2 = osl_closeFile(fileHandle_);
        if (e2 != osl_File_E_None) {
            SAL_WARN(
                "xmlreader",
                "osl_closeFile of \"" << fileUrl_ << "\" failed with " << +e2);
        }
        throw css::uno::RuntimeException(
            "cannot mmap " + fileUrl_ + " (" + OUString::number(e) + ")" );
    }
    namespaceIris_.emplace_back("http://www.w3.org/XML/1998/namespace");
    namespaces_.emplace_back(Span("xml"), NAMESPACE_XML);
    pos_ = static_cast< char * >(fileAddress_);
    end_ = pos_ + fileSize_;
    state_ = State::Content;
    firstAttribute_ = true;
}

XmlReader::~XmlReader() {
    if (!fileHandle_)
        return;
    oslFileError e = osl_unmapMappedFile(fileHandle_, fileAddress_, fileSize_);
    if (e != osl_File_E_None) {
        SAL_WARN(
            "xmlreader",
            "osl_unmapMappedFile of \"" << fileUrl_ << "\" failed with " << +e);
    }
    e = osl_closeFile(fileHandle_);
    if (e != osl_File_E_None) {
        SAL_WARN(
            "xmlreader",
            "osl_closeFile of \"" << fileUrl_ << "\" failed with " << +e);
    }
}

int XmlReader::registerNamespaceIri(Span const & iri) {
    int id = toNamespaceId(namespaceIris_.size());
    namespaceIris_.push_back(iri);
    if (iri == "http://www.w3.org/2001/XMLSchema-instance") {
        // Old user layer .xcu files used the xsi namespace prefix without
        // declaring a corresponding namespace binding, see issue 77174; reading
        // those files during migration would fail without this hack that can be
        // removed once migration is no longer relevant (see
        // configmgr::Components::parseModificationLayer):
        namespaces_.emplace_back(Span("xsi"), id);
    }
    return id;
}

XmlReader::Result XmlReader::nextItem(Text reportText, Span * data, int * nsId)
{
    switch (state_) {
    case State::Content:
        switch (reportText) {
        case Text::NONE:
            return handleSkippedText(data, nsId);
        case Text::Raw:
            return handleRawText(data);
        default: // Text::Normalized
            return handleNormalizedText(data);
        }
    case State::StartTag:
        return handleStartTag(nsId, data);
    case State::EndTag:
        return handleEndTag();
    case State::EmptyElementTag:
        handleElementEnd();
        return Result::End;
    default: // State::Done
        return Result::Done;
    }
}

bool XmlReader::nextAttribute(int * nsId, Span * localName) {
    assert(nsId != nullptr && localName != nullptr);
    if (firstAttribute_) {
        currentAttribute_ = attributes_.begin();
        firstAttribute_ = false;
    } else {
        ++currentAttribute_;
    }
    if (currentAttribute_ == attributes_.end()) {
        return false;
    }
    if (currentAttribute_->nameColon == nullptr) {
        *nsId = NAMESPACE_NONE;
        *localName = Span(
            currentAttribute_->nameBegin,
            currentAttribute_->nameEnd - currentAttribute_->nameBegin);
    } else {
        *nsId = getNamespaceId(
            Span(
                currentAttribute_->nameBegin,
                currentAttribute_->nameColon - currentAttribute_->nameBegin));
        *localName = Span(
            currentAttribute_->nameColon + 1,
            currentAttribute_->nameEnd - (currentAttribute_->nameColon + 1));
    }
    return true;
}

Span XmlReader::getAttributeValue(bool fullyNormalize) {
    return handleAttributeValue(
        currentAttribute_->valueBegin, currentAttribute_->valueEnd,
        fullyNormalize);
}

int XmlReader::getNamespaceId(Span const & prefix) const {
    auto i = std::find_if(namespaces_.crbegin(), namespaces_.crend(),
        [&prefix](const NamespaceData& rNamespaceData) { return prefix == rNamespaceData.prefix; });

    if (i != namespaces_.rend())
        return i->nsId;

    return NAMESPACE_UNKNOWN;
}


void XmlReader::normalizeLineEnds(Span const & text) {
    char const * p = text.begin;
    sal_Int32 n = text.length;
    for (;;) {
        sal_Int32 i = rtl_str_indexOfChar_WithLength(p, n, '\x0D');
        if (i < 0) {
            break;
        }
        pad_.add(p, i);
        p += i + 1;
        n -= i + 1;
        if (n == 0 || *p != '\x0A') {
            pad_.add("\x0A");
        }
    }
    pad_.add(p, n);
}

void XmlReader::skipSpace() {
    while (isSpace(peek())) {
        ++pos_;
    }
}

bool XmlReader::skipComment() {
    if (rtl_str_shortenedCompare_WithLength(
            pos_, end_ - pos_, RTL_CONSTASCII_STRINGPARAM("--"),
            RTL_CONSTASCII_LENGTH("--")) !=
        0)
    {
        return false;
    }
    pos_ += RTL_CONSTASCII_LENGTH("--");
    sal_Int32 i = rtl_str_indexOfStr_WithLength(
        pos_, end_ - pos_, RTL_CONSTASCII_STRINGPARAM("--"));
    if (i < 0) {
        throw css::uno::RuntimeException(
            "premature end (within comment) of " + fileUrl_ );
    }
    pos_ += i + RTL_CONSTASCII_LENGTH("--");
    if (read() != '>') {
        throw css::uno::RuntimeException(
            "illegal \"--\" within comment in " + fileUrl_ );
    }
    return true;
}

void XmlReader::skipProcessingInstruction() {
    sal_Int32 i = rtl_str_indexOfStr_WithLength(
        pos_, end_ - pos_, RTL_CONSTASCII_STRINGPARAM("?>"));
    if (i < 0) {
        throw css::uno::RuntimeException(
            "bad '<?' in " + fileUrl_ );
    }
    pos_ += i + RTL_CONSTASCII_LENGTH("?>");
}

void XmlReader::skipDocumentTypeDeclaration() {
    // Neither is it checked that the doctypedecl is at the correct position in
    // the document, nor that it is well-formed:
    for (;;) {
        char c = read();
        switch (c) {
        case '\0': // i.e., EOF
            throw css::uno::RuntimeException(
                "premature end (within DTD) of " + fileUrl_ );
        case '"':
        case '\'':
            {
                sal_Int32 i = rtl_str_indexOfChar_WithLength(
                    pos_, end_ - pos_, c);
                if (i < 0) {
                    throw css::uno::RuntimeException(
                        "premature end (within DTD) of " + fileUrl_ );
                }
                pos_ += i + 1;
            }
            break;
        case '>':
            return;
        case '[':
            for (;;) {
                c = read();
                switch (c) {
                case '\0': // i.e., EOF
                    throw css::uno::RuntimeException(
                        "premature end (within DTD) of " + fileUrl_ );
                case '"':
                case '\'':
                    {
                        sal_Int32 i = rtl_str_indexOfChar_WithLength(
                            pos_, end_ - pos_, c);
                        if (i < 0) {
                            throw css::uno::RuntimeException(
                                "premature end (within DTD) of " + fileUrl_ );
                        }
                        pos_ += i + 1;
                    }
                    break;
                case '<':
                    switch (read()) {
                    case '\0': // i.e., EOF
                        throw css::uno::RuntimeException(
                            "premature end (within DTD) of " + fileUrl_ );
                    case '!':
                        skipComment();
                        break;
                    case '?':
                        skipProcessingInstruction();
                        break;
                    default:
                        break;
                    }
                    break;
                case ']':
                    skipSpace();
                    if (read() != '>') {
                        throw css::uno::RuntimeException(
                            "missing \">\" of DTD in " + fileUrl_ );
                    }
                    return;
                default:
                    break;
                }
            }
        default:
            break;
        }
    }
}

Span XmlReader::scanCdataSection() {
    if (rtl_str_shortenedCompare_WithLength(
            pos_, end_ - pos_, RTL_CONSTASCII_STRINGPARAM("[CDATA["),
            RTL_CONSTASCII_LENGTH("[CDATA[")) !=
        0)
    {
        return Span();
    }
    pos_ += RTL_CONSTASCII_LENGTH("[CDATA[");
    char const * begin = pos_;
    sal_Int32 i = rtl_str_indexOfStr_WithLength(
        pos_, end_ - pos_, RTL_CONSTASCII_STRINGPARAM("]]>"));
    if (i < 0) {
        throw css::uno::RuntimeException(
            "premature end (within CDATA section) of " + fileUrl_ );
    }
    pos_ += i + RTL_CONSTASCII_LENGTH("]]>");
    return Span(begin, i);
}

bool XmlReader::scanName(char const ** nameColon) {
    assert(nameColon != nullptr && *nameColon == nullptr);
    for (char const * begin = pos_;; ++pos_) {
        switch (peek()) {
        case '\0': // i.e., EOF
        case '\x09':
        case '\x0A':
        case '\x0D':
        case ' ':
        case '/':
        case '=':
        case '>':
            return pos_ != begin;
        case ':':
            *nameColon = pos_;
            break;
        default:
            break;
        }
    }
}

int XmlReader::scanNamespaceIri(char const * begin, char const * end) {
    assert(begin != nullptr && begin <= end);
    Span iri(handleAttributeValue(begin, end, false));
    for (NamespaceIris::size_type i = 0; i < namespaceIris_.size(); ++i) {
        if (namespaceIris_[i] == iri) {
            return toNamespaceId(i);
        }
    }
    return XmlReader::NAMESPACE_UNKNOWN;
}

char const * XmlReader::handleReference(char const * position, char const * end)
{
    assert(position != nullptr && *position == '&' && position < end);
    ++position;
    if (*position == '#') {
        ++position;
        sal_uInt32 val = 0;
        char const * p;
        if (*position == 'x') {
            ++position;
            p = position;
            for (;; ++position) {
                char c = *position;
                if (c >= '0' && c <= '9') {
                    val = 16 * val + (c - '0');
                } else if (c >= 'A' && c <= 'F') {
                    val = 16 * val + (c - 'A') + 10;
                } else if (c >= 'a' && c <= 'f') {
                    val = 16 * val + (c - 'a') + 10;
                } else {
                    break;
                }
                if (!rtl::isUnicodeCodePoint(val)) { // avoid overflow
                    throw css::uno::RuntimeException(
                        "'&#x...' too large in " + fileUrl_ );
                }
            }
        } else {
            p = position;
            for (;; ++position) {
                char c = *position;
                if (c >= '0' && c <= '9') {
                    val = 10 * val + (c - '0');
                } else {
                    break;
                }
                if (!rtl::isUnicodeCodePoint(val)) { // avoid overflow
                    throw css::uno::RuntimeException(
                        "'&#...' too large in " + fileUrl_ );
                }
            }
        }
        if (position == p || *position++ != ';') {
            throw css::uno::RuntimeException(
                "'&#...' missing ';' in " + fileUrl_ );
        }
        assert(rtl::isUnicodeCodePoint(val));
        if ((val < 0x20 && val != 0x9 && val != 0xA && val != 0xD) ||
            (val >= 0xD800 && val <= 0xDFFF) || val == 0xFFFE || val == 0xFFFF)
        {
            throw css::uno::RuntimeException(
                "character reference denoting invalid character in " + fileUrl_ );
        }
        char buf[4];
        sal_Int32 len;
        if (val < 0x80) {
            buf[0] = static_cast< char >(val);
            len = 1;
        } else if (val < 0x800) {
            buf[0] = static_cast< char >((val >> 6) | 0xC0);
            buf[1] = static_cast< char >((val & 0x3F) | 0x80);
            len = 2;
        } else if (val < 0x10000) {
            buf[0] = static_cast< char >((val >> 12) | 0xE0);
            buf[1] = static_cast< char >(((val >> 6) & 0x3F) | 0x80);
            buf[2] = static_cast< char >((val & 0x3F) | 0x80);
            len = 3;
        } else {
            buf[0] = static_cast< char >((val >> 18) | 0xF0);
            buf[1] = static_cast< char >(((val >> 12) & 0x3F) | 0x80);
            buf[2] = static_cast< char >(((val >> 6) & 0x3F) | 0x80);
            buf[3] = static_cast< char >((val & 0x3F) | 0x80);
            len = 4;
        }
        pad_.addEphemeral(buf, len);
        return position;
    } else {
        struct EntityRef {
            char const * inBegin;
            sal_Int32 const inLength;
            char const * outBegin;
            sal_Int32 const outLength;
        };
        static EntityRef const refs[] = {
            { RTL_CONSTASCII_STRINGPARAM("amp;"),
              RTL_CONSTASCII_STRINGPARAM("&") },
            { RTL_CONSTASCII_STRINGPARAM("lt;"),
              RTL_CONSTASCII_STRINGPARAM("<") },
            { RTL_CONSTASCII_STRINGPARAM("gt;"),
              RTL_CONSTASCII_STRINGPARAM(">") },
            { RTL_CONSTASCII_STRINGPARAM("apos;"),
              RTL_CONSTASCII_STRINGPARAM("'") },
            { RTL_CONSTASCII_STRINGPARAM("quot;"),
              RTL_CONSTASCII_STRINGPARAM("\"") } };
        for (const auto & ref : refs) {
            if (rtl_str_shortenedCompare_WithLength(
                    position, end - position, ref.inBegin, ref.inLength,
                    ref.inLength) ==
                0)
            {
                position += ref.inLength;
                pad_.add(ref.outBegin, ref.outLength);
                return position;
            }
        }
        throw css::uno::RuntimeException(
            "unknown entity reference in " + fileUrl_ );
    }
}

Span XmlReader::handleAttributeValue(
    char const * begin, char const * end, bool fullyNormalize)
{
    pad_.clear();
    if (fullyNormalize) {
        while (begin != end && isSpace(*begin)) {
            ++begin;
        }
        while (end != begin && isSpace(end[-1])) {
            --end;
        }
        char const * p = begin;
        enum Space { SPACE_NONE, SPACE_SPAN, SPACE_BREAK };
            // a single true space character can go into the current span,
            // everything else breaks the span
        Space space = SPACE_NONE;
        while (p != end) {
            switch (*p) {
            case '\x09':
            case '\x0A':
            case '\x0D':
                switch (space) {
                case SPACE_NONE:
                    pad_.add(begin, p - begin);
                    pad_.add(" ");
                    space = SPACE_BREAK;
                    break;
                case SPACE_SPAN:
                    pad_.add(begin, p - begin);
                    space = SPACE_BREAK;
                    break;
                case SPACE_BREAK:
                    break;
                }
                begin = ++p;
                break;
            case ' ':
                switch (space) {
                case SPACE_NONE:
                    ++p;
                    space = SPACE_SPAN;
                    break;
                case SPACE_SPAN:
                    pad_.add(begin, p - begin);
                    begin = ++p;
                    space = SPACE_BREAK;
                    break;
                case SPACE_BREAK:
                    begin = ++p;
                    break;
                }
                break;
            case '&':
                pad_.add(begin, p - begin);
                p = handleReference(p, end);
                begin = p;
                space = SPACE_NONE;
                break;
            default:
                ++p;
                space = SPACE_NONE;
                break;
            }
        }
        pad_.add(begin, p - begin);
    } else {
        char const * p = begin;
        while (p != end) {
            switch (*p) {
            case '\x09':
            case '\x0A':
                pad_.add(begin, p - begin);
                begin = ++p;
                pad_.add(" ");
                break;
            case '\x0D':
                pad_.add(begin, p - begin);
                ++p;
                if (peek() == '\x0A') {
                    ++p;
                }
                begin = p;
                pad_.add(" ");
                break;
            case '&':
                pad_.add(begin, p - begin);
                p = handleReference(p, end);
                begin = p;
                break;
            default:
                ++p;
                break;
            }
        }
        pad_.add(begin, p - begin);
    }
    return pad_.get();
}

XmlReader::Result XmlReader::handleStartTag(int * nsId, Span * localName) {
    assert(nsId != nullptr && localName);
    char const * nameBegin = pos_;
    char const * nameColon = nullptr;
    if (!scanName(&nameColon)) {
        throw css::uno::RuntimeException(
            "bad tag name in " + fileUrl_ );
    }
    char const * nameEnd = pos_;
    NamespaceList::size_type inheritedNamespaces = namespaces_.size();
    bool hasDefaultNs = false;
    int defaultNsId = NAMESPACE_NONE;
    attributes_.clear();
    for (;;) {
        char const * p = pos_;
        skipSpace();
        if (peek() == '/' || peek() == '>') {
            break;
        }
        if (pos_ == p) {
            throw css::uno::RuntimeException(
                "missing whitespace before attribute in " + fileUrl_ );
        }
        char const * attrNameBegin = pos_;
        char const * attrNameColon = nullptr;
        if (!scanName(&attrNameColon)) {
            throw css::uno::RuntimeException(
                "bad attribute name in " + fileUrl_ );
        }
        char const * attrNameEnd = pos_;
        skipSpace();
        if (read() != '=') {
            throw css::uno::RuntimeException(
                "missing '=' in " + fileUrl_ );
        }
        skipSpace();
        char del = read();
        if (del != '\'' && del != '"') {
            throw css::uno::RuntimeException(
                "bad attribute value in " + fileUrl_ );
        }
        char const * valueBegin = pos_;
        sal_Int32 i = rtl_str_indexOfChar_WithLength(pos_, end_ - pos_, del);
        if (i < 0) {
            throw css::uno::RuntimeException(
                "unterminated attribute value in " + fileUrl_ );
        }
        char const * valueEnd = pos_ + i;
        pos_ += i + 1;
        if (attrNameColon == nullptr &&
            Span(attrNameBegin, attrNameEnd - attrNameBegin) == "xmlns")
        {
            hasDefaultNs = true;
            defaultNsId = scanNamespaceIri(valueBegin, valueEnd);
        } else if (attrNameColon != nullptr &&
                   Span(attrNameBegin, attrNameColon - attrNameBegin) ==
                       "xmlns")
        {
            namespaces_.emplace_back(
                    Span(attrNameColon + 1, attrNameEnd - (attrNameColon + 1)),
                    scanNamespaceIri(valueBegin, valueEnd));
        } else {
            attributes_.emplace_back(
                    attrNameBegin, attrNameEnd, attrNameColon, valueBegin,
                    valueEnd);
        }
    }
    if (!hasDefaultNs && !elements_.empty()) {
        defaultNsId = elements_.top().defaultNamespaceId;
    }
    firstAttribute_ = true;
    if (peek() == '/') {
        state_ = State::EmptyElementTag;
        ++pos_;
    } else {
        state_ = State::Content;
    }
    if (peek() != '>') {
        throw css::uno::RuntimeException(
            "missing '>' in " + fileUrl_ );
    }
    ++pos_;
    elements_.push(
        ElementData(
            Span(nameBegin, nameEnd - nameBegin), inheritedNamespaces,
            defaultNsId));
    if (nameColon == nullptr) {
        *nsId = defaultNsId;
        *localName = Span(nameBegin, nameEnd - nameBegin);
    } else {
        *nsId = getNamespaceId(Span(nameBegin, nameColon - nameBegin));
        *localName = Span(nameColon + 1, nameEnd - (nameColon + 1));
    }
    return Result::Begin;
}

XmlReader::Result XmlReader::handleEndTag() {
    if (elements_.empty()) {
        throw css::uno::RuntimeException(
            "spurious end tag in " + fileUrl_ );
    }
    char const * nameBegin = pos_;
    char const * nameColon = nullptr;
    if (!scanName(&nameColon) ||
        !elements_.top().name.equals(nameBegin, pos_ - nameBegin))
    {
        throw css::uno::RuntimeException(
            "tag mismatch in " + fileUrl_ );
    }
    handleElementEnd();
    skipSpace();
    if (peek() != '>') {
        throw css::uno::RuntimeException(
            "missing '>' in " + fileUrl_ );
    }
    ++pos_;
    return Result::End;
}

void XmlReader::handleElementEnd() {
    assert(!elements_.empty());
    auto end = elements_.top().inheritedNamespaces;
    namespaces_.resize(end);
    elements_.pop();
    state_ = elements_.empty() ? State::Done : State::Content;
}

XmlReader::Result XmlReader::handleSkippedText(Span * data, int * nsId) {
    for (;;) {
        auto i = static_cast<const char*>(std::memchr(pos_, '<', end_ - pos_));
        if (!i) {
            throw css::uno::RuntimeException(
                "premature end of " + fileUrl_ );
        }
        pos_ = i + 1;
        switch (peek()) {
        case '!':
            ++pos_;
            if (!skipComment() && !scanCdataSection().is()) {
                skipDocumentTypeDeclaration();
            }
            break;
        case '/':
            ++pos_;
            return handleEndTag();
        case '?':
            ++pos_;
            skipProcessingInstruction();
            break;
        default:
            return handleStartTag(nsId, data);
        }
    }
}

XmlReader::Result XmlReader::handleRawText(Span * text) {
    pad_.clear();
    for (char const * begin = pos_;;) {
        switch (peek()) {
        case '\0': // i.e., EOF
            throw css::uno::RuntimeException(
                "premature end of " + fileUrl_ );
        case '\x0D':
            pad_.add(begin, pos_ - begin);
            ++pos_;
            if (peek() != '\x0A') {
                pad_.add("\x0A");
            }
            begin = pos_;
            break;
        case '&':
            pad_.add(begin, pos_ - begin);
            pos_ = handleReference(pos_, end_);
            begin = pos_;
            break;
        case '<':
            pad_.add(begin, pos_ - begin);
            ++pos_;
            switch (peek()) {
            case '!':
                ++pos_;
                if (!skipComment()) {
                    Span cdata(scanCdataSection());
                    if (cdata.is()) {
                        normalizeLineEnds(cdata);
                    } else {
                        skipDocumentTypeDeclaration();
                    }
                }
                begin = pos_;
                break;
            case '/':
                *text = pad_.get();
                ++pos_;
                state_ = State::EndTag;
                return Result::Text;
            case '?':
                ++pos_;
                skipProcessingInstruction();
                begin = pos_;
                break;
            default:
                *text = pad_.get();
                state_ = State::StartTag;
                return Result::Text;
            }
            break;
        default:
            ++pos_;
            break;
        }
    }
}

XmlReader::Result XmlReader::handleNormalizedText(Span * text) {
    pad_.clear();
    char const * flowBegin = pos_;
    char const * flowEnd = pos_;
    enum Space { SPACE_START, SPACE_NONE, SPACE_SPAN, SPACE_BREAK };
        // a single true space character can go into the current flow,
        // everything else breaks the flow
    Space space = SPACE_START;
    for (;;) {
        switch (peek()) {
        case '\0': // i.e., EOF
            throw css::uno::RuntimeException(
                "premature end of " + fileUrl_ );
        case '\x09':
        case '\x0A':
        case '\x0D':
            switch (space) {
            case SPACE_START:
            case SPACE_BREAK:
                break;
            case SPACE_NONE:
            case SPACE_SPAN:
                space = SPACE_BREAK;
                break;
            }
            ++pos_;
            break;
        case ' ':
            switch (space) {
            case SPACE_START:
            case SPACE_BREAK:
                break;
            case SPACE_NONE:
                space = SPACE_SPAN;
                break;
            case SPACE_SPAN:
                space = SPACE_BREAK;
                break;
            }
            ++pos_;
            break;
        case '&':
            switch (space) {
            case SPACE_START:
                break;
            case SPACE_NONE:
            case SPACE_SPAN:
                pad_.add(flowBegin, pos_ - flowBegin);
                break;
            case SPACE_BREAK:
                pad_.add(flowBegin, flowEnd - flowBegin);
                pad_.add(" ");
                break;
            }
            pos_ = handleReference(pos_, end_);
            flowBegin = pos_;
            flowEnd = pos_;
            space = SPACE_NONE;
            break;
        case '<':
            ++pos_;
            switch (peek()) {
            case '!':
                ++pos_;
                if (skipComment()) {
                    space = SPACE_BREAK;
                } else {
                    Span cdata(scanCdataSection());
                    if (cdata.is()) {
                        // CDATA is not normalized (similar to character
                        // references; it keeps the code simple), but it might
                        // arguably be better to normalize it:
                        switch (space) {
                        case SPACE_START:
                            break;
                        case SPACE_NONE:
                        case SPACE_SPAN:
                            pad_.add(flowBegin, pos_ - flowBegin);
                            break;
                        case SPACE_BREAK:
                            pad_.add(flowBegin, flowEnd - flowBegin);
                            pad_.add(" ");
                            break;
                        }
                        normalizeLineEnds(cdata);
                        flowBegin = pos_;
                        flowEnd = pos_;
                        space = SPACE_NONE;
                    } else {
                        skipDocumentTypeDeclaration();
                    }
                }
                break;
            case '/':
                ++pos_;
                pad_.add(flowBegin, flowEnd - flowBegin);
                *text = pad_.get();
                state_ = State::EndTag;
                return Result::Text;
            case '?':
                ++pos_;
                skipProcessingInstruction();
                space = SPACE_BREAK;
                break;
            default:
                pad_.add(flowBegin, flowEnd - flowBegin);
                *text = pad_.get();
                state_ = State::StartTag;
                return Result::Text;
            }
            break;
        default:
            switch (space) {
            case SPACE_START:
                flowBegin = pos_;
                break;
            case SPACE_NONE:
            case SPACE_SPAN:
                break;
            case SPACE_BREAK:
                pad_.add(flowBegin, flowEnd - flowBegin);
                pad_.add(" ");
                flowBegin = pos_;
                break;
            }
            flowEnd = ++pos_;
            space = SPACE_NONE;
            break;
        }
    }
}

int XmlReader::toNamespaceId(NamespaceIris::size_type pos) {
    assert(pos <= INT_MAX);
    return static_cast< int >(pos);
}

}

/* vim:set shiftwidth=4 softtabstop=4 expandtab: */