summaryrefslogtreecommitdiffstats
path: root/writerfilter/documentation/ooxml
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 09:06:44 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 09:06:44 +0000
commited5640d8b587fbcfed7dd7967f3de04b37a76f26 (patch)
tree7a5f7c6c9d02226d7471cb3cc8fbbf631b415303 /writerfilter/documentation/ooxml
parentInitial commit. (diff)
downloadlibreoffice-ed5640d8b587fbcfed7dd7967f3de04b37a76f26.tar.xz
libreoffice-ed5640d8b587fbcfed7dd7967f3de04b37a76f26.zip
Adding upstream version 4:7.4.7.upstream/4%7.4.7upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'writerfilter/documentation/ooxml')
-rw-r--r--writerfilter/documentation/ooxml/model.rng436
1 files changed, 436 insertions, 0 deletions
diff --git a/writerfilter/documentation/ooxml/model.rng b/writerfilter/documentation/ooxml/model.rng
new file mode 100644
index 000000000..a7d298991
--- /dev/null
+++ b/writerfilter/documentation/ooxml/model.rng
@@ -0,0 +1,436 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+-->
+<!--
+This file is both a relax-ng schema for writerfilter/source/ooxml/model.xml and
+documentation for that file. The schema has two parts:
+
+- first part: a subset of the relax-ng grammar to define *what* we expect as
+ the input in a DOCX file
+- second part: additional annotation on top of that to define *how* to handle
+ that expected input
+-->
+<grammar xmlns="http://relaxng.org/ns/structure/1.0">
+ <!--
+ First part: a subset of the relax-ng XML markup.
+
+ The order of elements in this part follow a bottom-up approach.
+ -->
+
+ <!-- Basic building blocks: element, attribute and their contents. -->
+
+ <!--
+ Describes an XML element.
+
+ Example:
+
+ <element name="charset">
+ <ref name="CT_Charset"/>
+ </element>
+ -->
+ <define name="element-element">
+ <element name="element" ns="http://relaxng.org/ns/structure/1.0">
+ <optional>
+ <attribute name="name"/>
+ </optional>
+ <oneOrMore>
+ <choice>
+ <ref name="attribute-element"/>
+ <ref name="data-element"/>
+ <ref name="ref-element"/>
+ </choice>
+ </oneOrMore>
+ </element>
+ </define>
+
+ <!--
+ Describes an attribute.
+
+ Example:
+
+ <attribute name="name">
+ <data type="string"/>
+ </attribute>
+ -->
+ <define name="attribute-element">
+ <element name="attribute" ns="http://relaxng.org/ns/structure/1.0">
+ <optional>
+ <attribute name="name"/>
+ </optional>
+ <zeroOrMore>
+ <choice>
+ <ref name="data-element"/>
+ <ref name="ref-element"/>
+ </choice>
+ </zeroOrMore>
+ </element>
+ </define>
+
+ <!--
+ Describes the type of the data contained in an attribute. Possible values:
+ boolean, integer or string. See also <text>.
+ -->
+ <define name="data-element">
+ <element name="data" ns="http://relaxng.org/ns/structure/1.0">
+ <attribute name="type"/>
+ </element>
+ </define>
+
+ <!--
+ Describes an enumeration element: a possible value for an attribute.
+ -->
+ <define name="value-element">
+ <element name="value" ns="http://relaxng.org/ns/structure/1.0">
+ <text/>
+ </element>
+ </define>
+
+ <!--
+ This element is ignored during parsing, it just helps readability.
+
+ Example:
+
+ <choice>
+ <value>true</value>
+ <value>false</value>
+ </choice>
+ -->
+ <define name="choice-element">
+ <element name="choice" ns="http://relaxng.org/ns/structure/1.0">
+ <oneOrMore>
+ <choice>
+ <ref name="data-element"/>
+ <ref name="element-element"/>
+ <ref name="ref-element"/>
+ <ref name="value-element"/>
+ </choice>
+ </oneOrMore>
+ </element>
+ </define>
+
+ <!-- Grouping elements: define and grammar. -->
+
+ <!--
+ A define is named definition of its contents, so that multiple <ref> elements
+ can refer to it, to avoid copy&paste. OOXML named (complex and simple) types
+ are described using defines.
+ -->
+ <define name="define-element">
+ <element name="define" ns="http://relaxng.org/ns/structure/1.0">
+ <attribute name="name"/>
+ <oneOrMore>
+ <choice>
+ <ref name="choice-element"/>
+ <ref name="attribute-element"/>
+ <ref name="element-element"/>
+ <ref name="data-element"/>
+ <ref name="ref-element"/>
+ <empty/>
+ </choice>
+ </oneOrMore>
+ </element>
+ </define>
+
+ <!--
+ A reference to a define.
+ -->
+ <define name="ref-element">
+ <element name="ref" ns="http://relaxng.org/ns/structure/1.0">
+ <attribute name="name"/>
+ </element>
+ </define>
+
+ <!--
+ A grammar is a set of defines, one grammar is equivalent to one .xsd file
+ from the OOXML spec.
+ -->
+ <define name="grammar-element">
+ <element name="grammar" ns="http://relaxng.org/ns/structure/1.0">
+ <attribute name="ns"/>
+ <optional>
+ <attribute name="attributeFormDefault"/>
+ </optional>
+ <zeroOrMore>
+ <ref name="include-element"/>
+ </zeroOrMore>
+ <oneOrMore>
+ <ref name="define-element"/>
+ </oneOrMore>
+ </element>
+ </define>
+
+ <!--
+ Controls the resolution of <ref> elements. The order is:
+
+ - the current grammar
+ - included grammars, if there are any
+ - the first define in the whole model
+ -->
+ <define name="include-element">
+ <element name="include" ns="http://relaxng.org/ns/structure/1.0">
+ <attribute name="href"/>
+ </element>
+ </define>
+
+ <!--
+ Second part: custom markup, building on top of the first one.
+
+ The order of elements in this part follow a top-down approach.
+
+ The output of the code generated from these elements is a token stream. There
+ are two types of tokens: SPRM tokens and attribute ones. SPRM refers to
+ Single PRoperty Modifier, in this context it means a token that contains other
+ tokens. It's used to represent an XML element. That means that SPRM tokens
+ can contain other SPRM tokens, and also attribute tokens, while attribute
+ tokens only contain simple types (boolean, integer, string).
+
+ More terminology: the types in the OOXML schema have two typical prefixes:
+
+ - CT_something: complex type, used to describe an XML element
+ - ST_something: simple type, used to describe the contents of an attribute
+
+ For tokens the following abbreviations are used:
+
+ - NS_something: namespace
+ - LN_something: local name
+ -->
+
+ <!--
+ The model element is the toplevel container for the XML element /
+ attribute mapping definition. It contains namespace aliases, direct token
+ definitions and mapping definitions for each namespace.
+ -->
+ <define name="model-element">
+ <element name="model">
+ <oneOrMore>
+ <ref name="token-element"/>
+ </oneOrMore>
+ <oneOrMore>
+ <ref name="namespace-element"/>
+ </oneOrMore>
+ </element>
+ </define>
+
+ <!--
+ A token element can explicitly define a token. This allows generating
+ such a token in the tokenizers and handling it in the domain mapper. Ideally
+ tokens are *not* defined this way, they are mapped to an XML element or
+ attribute from the OOXML specification.
+ -->
+ <define name="token-element">
+ <element name="token">
+ <!--
+ The token name must be ooxml:something, then in C++ it'll be the
+ NS_ooxml::LN_something ("OOXML namespace, something local name")
+ constant.
+ -->
+ <attribute name="tokenid"/>
+ </element>
+ </define>
+
+ <!--
+ A namespace element is a container for a subset of the relax-ng grammar
+ of a part of the OOXML specification. It also contains the resource
+ definitions, which specify how XML elements and attributes are mapped to
+ tokens.
+ -->
+ <define name="namespace-element">
+ <element name="namespace">
+ <attribute name="name"/>
+ <zeroOrMore>
+ <ref name="start-element"/>
+ </zeroOrMore>
+ <ref name="grammar-element"/>
+ <zeroOrMore>
+ <ref name="resource-element"/>
+ </zeroOrMore>
+ </element>
+ </define>
+
+ <!--
+ A start element is similar to the relax-ng start element, but this one has a
+ name attribute to refer to a define, while the relax-ng one has a ref child
+ element to do the same.
+ -->
+ <define name="start-element">
+ <element name="start">
+ <attribute name="name"/>
+ </element>
+ </define>
+
+ <!--
+ A resource element always matches (by its name attribute) a define from the
+ grammar of the namespace. It describes how that (simple or complex) type is
+ parsed during import.
+
+ Example:
+
+ <resource name="CT_Font" resource="Properties">
+ ...
+ </resource>
+
+ or
+
+ <resource name="CT_OMathPara" resource="Stream"/>
+ -->
+ <define name="resource-element">
+ <element name="resource">
+ <!-- There should be a define element with the same name attribute. -->
+ <attribute name="name"/>
+ <!--
+ This means the resource element will be handled by the
+ OOXMLFastContextHandler<resource> class.
+
+ The two most important resources:
+
+ - Properties: this maps elements/attributes to SPRM/attribute tokens
+ - Stream: If the element itself does not require any special handling,
+ but the subelements are interesting, use this resource. If no
+ explicit resource element is available, then a null context will be
+ created and the element and all its subelements will be ignored.
+ -->
+ <attribute name="resource"/>
+ <optional>
+ <attribute name="tokenid"/>
+ </optional>
+ <zeroOrMore>
+ <choice>
+ <ref name="resource-element-element"/>
+ <ref name="resource-attribute-element"/>
+ <ref name="resource-value-element"/>
+ <ref name="resource-action-element"/>
+ </choice>
+ </zeroOrMore>
+ </element>
+ </define>
+
+ <!--
+ The <element> child of a <resource> defines what element name will be handled
+ via what token.
+
+ Example:
+
+ <element name="charset" tokenid="ooxml:CT_Font_charset"/>
+
+ Means the <charset> element will be handled in the sprm() function of the handler
+ class as a NS_ooxml::LN_CT_Font_charset case. (sprm() is a logging wrapper
+ around lcl_sprm(), which is the real implementation.)
+ -->
+ <define name="resource-element-element">
+ <element name="element">
+ <attribute name="name"/>
+ <attribute name="tokenid"/>
+ </element>
+ </define>
+
+ <!--
+ The <attribute> child of a <resource> defines what attribute name will be
+ handled via what token.
+
+ Example:
+
+ <attribute name="name" tokenid="ooxml:CT_Font_name"/>
+
+ Means the <name> attribute will be handled in the attribute() (real
+ implementation in lcl_attribute()) function of the handler class as a
+ NS_ooxml::LN_CT_Font_name case.
+ -->
+ <define name="resource-attribute-element">
+ <element name="attribute">
+ <attribute name="name"/>
+ <optional>
+ <attribute name="tokenid"/>
+ </optional>
+ <optional>
+ <attribute name="action"/>
+ </optional>
+ </element>
+ </define>
+
+ <!--
+ A <value> inside a <resource> defines how to map the string data of a value
+ to a token. The tokenid attribute defines the token name, the text of the
+ element defines the string. This is useful in case the value of an attribute
+ is a choice from a predefined list.
+ -->
+ <define name="resource-value-element">
+ <element name="value">
+ <attribute name="tokenid"/>
+ <text/>
+ </element>
+ </define>
+
+ <!--
+ An <action> inside a <resource> can perform additional actions in the
+ following situations:
+
+ - start of the element
+ - end of the element
+ - character data of the element
+
+ The tokenid attribute restricts the action to a particular element.
+
+ Example:
+
+ <resource name="CT_TxbxContent" resource="Stream">
+ <action name="start" action="startTxbxContent"/>
+ <action name="end" action="endTxbxContent"/>
+ </resource>
+
+ That means that when:
+
+ - <txbxContent> starts, OOXMLFastContextHandler::startTxbxContent() will be called
+ - <txbxContent> ends, OOXMLFastContextHandler::endTxbxContent() will be called
+ -->
+ <define name="resource-action-element">
+ <element name="action">
+ <attribute name="name"/>
+ <attribute name="action"/>
+ <optional>
+ <attribute name="tokenid"/>
+ </optional>
+ <optional>
+ <attribute name="sendtokenid"/>
+ </optional>
+ <optional>
+ <ref name="resource-action-cond-element"/>
+ </optional>
+ </element>
+ </define>
+
+ <!--
+ Some actions take parameters, which can be defined by the <cond> element.
+
+ Example:
+
+ <resource name="CT_FldChar" resource="Stream">
+ <action name="start" action="fieldstart">
+ <cond tokenid="ooxml:CT_FldChar_fldCharType" value="ooxml:Value_ST_FldCharType_begin"/>
+ </action>
+ </resource>
+
+ That means:
+
+ - if the <fldChar> starts with an fldCharType attribute being "begin"
+ - then perform the "fieldstart" action.
+ -->
+ <define name="resource-action-cond-element">
+ <element name="cond">
+ <attribute name="tokenid"/>
+ <attribute name="value"/>
+ </element>
+ </define>
+
+ <!-- The entry point of the schema. -->
+ <start>
+ <ref name="model-element"/>
+ </start>
+</grammar>
+<!-- vim: ft=xml shiftwidth=2 softtabstop=2 expandtab:
+-->