diff options
Diffstat (limited to 'i18npool/source/localedata/data/locale.dtd')
-rw-r--r-- | i18npool/source/localedata/data/locale.dtd | 692 |
1 files changed, 692 insertions, 0 deletions
diff --git a/i18npool/source/localedata/data/locale.dtd b/i18npool/source/localedata/data/locale.dtd new file mode 100644 index 0000000000..81a8bdfbb3 --- /dev/null +++ b/i18npool/source/localedata/data/locale.dtd @@ -0,0 +1,692 @@ +<!-- + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . +--> + +<!-- ............................................................... --> +<!-- Locale data specification DTD ................................. --> +<!-- ............................................................... --> + +<!-- + =========================================================================== + ATTENTION! PLEASE! HEADS UP! IMPORTANT! + =========================================================================== + + Please validate your locale data contribution. + + A quick validation check can be done if you have xmllint installed and the + current locale.dtd file at hand, you can download the locale.dtd file from + https://cgit.freedesktop.org/libreoffice/core/plain/i18npool/source/localedata/data/locale.dtd + + xmllint -dtdvalid locale.dtd -noout your_data.xml + + (Note that instead of one - hyphen minus two consecutive hyphen minus + characters should be given to start an option, but a double hyphen in a XML + comment technically ends a comment (though parsers look for a matching + closing one with > as well) and let xmllint complain already about the .dtd + itself. Some versions of xmllint also accept a single hyphen minus). + + + Second, using a validating parser. A validating parser, for example, may be found at + http://unicode.org/cldr/data/tools/java/org/unicode/cldr/util/XMLValidator.java + Compile it into a class-jar and call it in the + i18npool/source/localedata/data/ directory: + java -cp <your_path>/XMLValidator.jar org.unicode.cldr.util.XMLValidator your_data.xml + + + A third possibility is: + + - temporarily (!) change the DOCTYPE of your file to read (all on one line) + <!DOCTYPE Locale SYSTEM "https://cgit.freedesktop.org/libreoffice/core/plain/i18npool/source/localedata/data/locale.dtd"> + + - upload it to the form available at http://www.validome.org/xml/ + + This will validate the file against the HEAD revision of locale.dtd + + + Please test locale data files either in an enable-dbgutil build, which + implements some checks and pops up assertion message boxes if the tests + fail, or by setting the environment variable OOO_ENABLE_LOCALE_DATA_CHECKS + to 'Y' or 'Yes' (or any other string starting with 'Y') or '1' before + starting the application, which outputs the same messages to stderr and + also works in a product build. + + Then follow this procedure: + 1. Create a new spreadsheet document. + 2. On a cell use context menu -> Format Cells -> Numbers. + 3. Select the locale in the Language list box => MUST be assertion free. + 3.a. Assertions are only shown at the very first time a number formatter + is created, respectively the first time a specific locale data is + used. To repeat steps 1.-3. you'd need to create another spreadsheet + document. + + =========================================================================== + NOTE the FormatElement comments further down. + =========================================================================== + +--> + +<!-- ............................................................... --> +<!-- Entities for characters and symbols ........................... --> + +<!ENTITY % UNOModule + 'unoid CDATA #IMPLIED'> + +<!ENTITY % MessageID + 'msgid CDATA #REQUIRED'> + +<!ENTITY % RefLocale + 'ref CDATA #IMPLIED'> +<!-- Where given, an element can be inherited from another locale, e.g. + ref="en_US" --> + +<!ENTITY % LIBModule + 'module CDATA #IMPLIED'> +<!-- The locale referred to for the sub categories, implementation detail. --> + +<!ENTITY % replaceFrom 'replaceFrom CDATA #IMPLIED'> +<!ENTITY % replaceTo 'replaceTo CDATA #IMPLIED'> +<!-- See below for the LC_FORMAT element. --> + +<!ELEMENT DefaultName (#PCDATA)> + +<!-- Locale is made of different sub categories --> +<!ELEMENT Locale (LC_INFO, LC_CTYPE, LC_FORMAT, LC_FORMAT_1?, LC_COLLATION, LC_SEARCH, LC_INDEX, LC_CALENDAR, LC_CURRENCY, LC_TRANSLITERATION, LC_MISC, LC_NumberingLevel, LC_OutLineNumberingLevel)> +<!ATTLIST Locale versionDTD CDATA #FIXED "2.0.3"> +<!-- Version identifier to prevent mismatching data files being submitted + because of older files being copied as templates. The version should be + less than or equal to the LibreOffice release, or the LibreOffice release + number plus some extension, like "2.0.enhanced", to be able to easily + determine the corresponding LibreOffice version. New versions of the DTD + with new required elements SHOULD REALLY result in a new versionDTD here + and LOCALE_VERSION_DTD in ../LocaleNode.cxx, and *.xml files MUST be + adapted then, otherwise building the data or checking it with a validating + parser will throw an error. +--> +<!ATTLIST Locale allowUpdateFromCLDR (yes|no) #REQUIRED> +<!-- Whether some data elements may be (automatically) updated from the Common + Locale Data Repository, see http://cldr.unicode.org/ + Note: This mechanism currently (2010-02-21) is outdated. +--> +<!ATTLIST Locale version CDATA #REQUIRED> +<!-- Valid number, may designate versioned data --> + + +<!ELEMENT LC_INFO (Language, Country, Platform?, Variant?)> + +<!ELEMENT Language (LangID, DefaultName) > +<!ELEMENT LangID (#PCDATA) > +<!-- LangID must be a valid two or three letter language identifier defined by + ISO 639. Use ISO 639-1 two letter code where available, else ISO 639-2 or + 639-3 three letter code. + + If the Variant element designates a BCP 47 language tag (because the + locale is not expressible as a Language,Country pair, for example contains + a script tag) then the LangID value MUST be the code 'qlt' (reserved by + ISO 639-3 for private use) to tell the language tag processor that the + actual language tag is in the Variant element. +--> + +<!ELEMENT Country (CountryID, DefaultName) > +<!ELEMENT CountryID (#PCDATA) > +<!-- CountryID must be a valid two letter country identifier defined by ISO 3166. --> + +<!ELEMENT Platform (PlatformID) > +<!ELEMENT PlatformID (#PCDATA) > +<!-- Unused, deprecated, can be generic|unix|win32|macos, best Platform element + be absent. +--> + +<!ELEMENT Variant (#PCDATA) > +<!-- If the LangID element contains the value 'qlt' then the Variant element + MUST contain the valid BCP 47 language tag of the locale. If LangID is + another ISO 639 code than 'qlt' then the Variant element must be empty or + not present. +--> + + +<!-- The LC_FORMAT element contains number format codes and may actually appear + twice (with the second occurrence named LC_FORMAT_1). One section is + mandatory and MUST contain at least all required format codes + formatindex="0" to formatindex="47" (except 10 and 11 MUST NOT be + defined), MUST NOT contain formatindex 48 and 49, MUST contain formatindex + 50, and MAY contain other format codes. See below ATTLIST FormatElement + formatindex. + + A second LC_FORMAT_1 section may follow containing other format codes. The + difference between the two sections is that they are inherited + independently if a locale uses the RefLocale mechanism (ref="..."). This + may be used to not offer locale dependent format codes to other locales + that otherwise inherit the format codes. + +--> +<!ELEMENT LC_FORMAT (DateAcceptancePattern*, FormatElement*) > +<!-- All FormatElement elements must be given if the RefLocale mechanism is not used! --> +<!ATTLIST LC_FORMAT %RefLocale;> +<!ATTLIST LC_FORMAT %replaceFrom;> +<!-- Define placeholder for currency code, usually "[CURRENCY]" --> +<!ATTLIST LC_FORMAT %replaceTo;> +<!-- Currency code to be used to replace the placeholder, e.g. "[$R-1C09]". + Note: The brackets and the leading $ character are mandatory, the + hyphen-minus separates the currency symbol from the hexagesimal MS-LCID, + letters contained in the LCID have to be in upper case, leading zeros are + to be omitted. LCIDs are defined in include/i18nlangtag/lang.h ( + https://opengrok.libreoffice.org/xref/core/include/i18nlangtag/lang.h ) +--> + +<!ELEMENT DateAcceptancePattern (#PCDATA)> +<!-- Pattern that defines an input sequence match to be accepted as + (abbreviated) date. For example, in en_US locale "M/D" accepts an input of + 11/23 as CurrentYear-November-23 whereas 11/23/ would not be a date. In + de_DE locale "D.M." accepts an input of 23.11. as CurrentYear-November-23 + whereas 23.11 would not be a date. + + For each locale one pattern that matches a full date is automatically + generated from FormatElement formatIndex="21" and does not need to be + defined, for example "M/D/Y" or "D.M.Y". + + At least one pattern for abbreviated date input must be defined, i.e. + contain D and M but not Y. + + NOTE: use only single letter D,M,Y. Multiple patterns can be defined. +--> + +<!ELEMENT LC_FORMAT_1 (FormatElement*) > +<!ATTLIST LC_FORMAT_1 %RefLocale;> +<!ATTLIST LC_FORMAT_1 %replaceFrom;> +<!ATTLIST LC_FORMAT_1 %replaceTo;> + +<!ELEMENT FormatElement ( FormatCode, DefaultName?)> +<!ATTLIST FormatElement %MessageID;> +<!ATTLIST FormatElement default (true|false) #REQUIRED > +<!ATTLIST FormatElement type (short|medium|long) #REQUIRED > +<!-- + There may be up to three groups (type="short", type="medium", type="long") + for each usage category defined. Each group, if defined, needs exactly one + default. The type roughly determines the display string length, for example + short, medium, and long date formats. + + The number formatter determines an ultimate default format of a specific + usage category by looking at the medium, long, and short default formats, + in that very particular order. +--> +<!ATTLIST FormatElement usage (FIXED_NUMBER|FRACTION_NUMBER|PERCENT_NUMBER|SCIENTIFIC_NUMBER|CURRENCY|DATE|TIME|DATE_TIME) #REQUIRED > +<!ATTLIST FormatElement formatindex CDATA #REQUIRED> +<!-- + The following FormatElements must follow specific rules: + + All: + The format indices 0..65 are reserved and, for backwards compatibility, + indices 0..49 MUST be used as stated in + offapi/com/sun/star/i18n/NumberFormatIndex.idl ( + https://opengrok.libreoffice.org/xref/core/offapi/com/sun/star/i18n/NumberFormatIndex.idl ) + Note that indices 10 ("# ?/?"), 11 ("# ??/??"), 48 (BOOLEAN) and 49 (@ + Text) are generated internally, as they aren't locale dependent, and + must not be used in locale data XML files. All other formats have to be + present. + + Note also that "must be used as stated" does not mean that the format + codes must be identical, of course the meaning of a format code should + match, for example en_US MM/DD/YY matches de_DE DD.MM.YY + Just imagine the same index being used with another locale shouldn't + change the meaning of representation significantly. + + You'll notice differences of non-matching format codes only if + documents use the Default language in number formats and either are + stored in old SO5 binary file format and loaded on another system where + languages/locales aren't the same, or if the default locale is switched + under menu Tools.Options.LanguageSettings.Languages.LocaleSetting + dialog, which exchanges formats on the fly in the spreadsheet + application. Please check it out! So far only very few locale data file + we received got that right, especially not in date formats! + + For easier comparison between locales in future please sort the + FormatElements by their formatindex="..." value within a usage group. + This isn't necessary to be technically correct and isn't done in many + locales yet, but will certainly help. + + If you want to define yet more formats than the preset and reserved + 0..49 range that's fine, only make sure those formatindex="..." values + are each >=66 and all values are unique within one locale. + + Of usage="FIXED_NUMBER": + formatindex="0" MUST be the format containing the 'General' keyword. + The keyword itself may be localized, it is good practice though to + stick with a wording known from another spreadsheet application for + better user experience. Like other format codes it may be prepended + with a [NatNum1] modifier if values are to be displayed using native + numbering if no specific format was applied. The format must have the + default="true" and type="medium" attributes. + + Of usage="DATE": + formatindex="21" is used to edit already existing date data. In order + to always edit the full century the long year YYYY code must be used. + Furthermore, the format has to be of an editable type, of course, which + means parseable. Therefore it should only contain DD, MM, YYYY and date + separators, and the YMD default order is determined from the order + encountered in this format. + TODO: Future versions should make use of an edit="true" attribute + instead of relying on this special index requirement. + + formatindex="33" must be ISO 8601 YYYY-MM-DD format code. + + Of usage="DATE_TIME": + formatindex="46" should contain a short year YY code and only HH and MM + without SS seconds. + + formatindex="47" is used to edit already existing combined date/time + data. The requirements are the same as for formatindex="21" above, plus + HH and MM and SS codes. + + formatindex="50" must contain a long year YYYY code and only HH and MM + without SS seconds. + + Of usage="TIME": + * The FormatElement with default="true" type="medium" is used to edit + times and thus must contain all HH and MM and SS codes, e.g. HH:MM:SS + * Formatindices 43, 44, 45 are special in the sense that they are + programmatically used to automatically display values that meet + certain criteria: + * Formatindex="43" contains the [HH] format code that displays hour + values greater than or equal to 24 (as opposed to a simple HH that + displays modulo 24). + * Formatindex="44" uses no hour code but 100th seconds and the + Time100SecSeparator. + * Formatindex="45" uses both [HH] hour code and 100th seconds to be + able to edit such time values without losing information. + + Of usage="CURRENCY": + formatindices 12, 13, 14, 15, 17 with [$xxx-yyy] notation must use the + xxx currency symbol that has the attribute + usedInCompatibleFormatCodes="true". The hexadecimal yyy LANGID must be + properly set. It can be found in the file include/i18nlangtag/lang.h ( + https://opengrok.libreoffice.org/xref/core/include/i18nlangtag/lang.h ) + You may verify the proper use of the xxx currency symbol with the AWK + script i18npool/source/localedata/data/currency-check.awk, it mustn't + display any output. If it does, then there's something wrong. + +--> +<!ELEMENT FormatCode (#PCDATA)> + + +<!-- The LC_CALENDAR element defines calendars used with a locale. --> +<!ELEMENT LC_CALENDAR (Calendar* ) > +<!-- At least one Calendar element must be given if the RefLocale mechanism is not used! --> +<!ATTLIST LC_CALENDAR %RefLocale;> + +<!ELEMENT Calendar (DaysOfWeek, MonthsOfYear, GenitiveMonths*, PartitiveMonths*, Eras, StartDayOfWeek, MinimalDaysInFirstWeek) > +<!ATTLIST Calendar %UNOModule;> +<!-- The unoid of a gregorian calendar MUST be lower case "gregorian", + calendars MUST match the names defined in the OASIS OpenDocument Format + (ODF) 1.2 or later specification. The implementation name registered with + the LibreOffice service registry MUST match, e.g. + com.sun.star.i18n.Calendar_gregorian +--> +<!ATTLIST Calendar default (true|false) #REQUIRED > +<!-- Exactly one Calendar element has to be the default calendar. --> + +<!ELEMENT DaysOfWeek (Day*)> +<!-- All Day elements of a Calendar must be given if the RefLocale mechanism is not used! --> +<!ATTLIST DaysOfWeek %RefLocale;> +<!-- Sequence of days is important, MUST start with Sunday. --> +<!ELEMENT Day (DayID, DefaultAbbrvName, DefaultFullName, DefaultNarrowName*)> +<!ELEMENT DayID (#PCDATA)> +<!-- Preferably the lower case abbreviated English name like sun for Sunday. --> +<!ELEMENT DefaultAbbrvName (#PCDATA)> +<!-- The abbreviated day name, e.g. Sun for Sunday. --> +<!ELEMENT DefaultFullName (#PCDATA)> +<!-- The full day name, e.g. Sunday for Sunday. --> +<!ELEMENT DefaultNarrowName (#PCDATA)> +<!-- The narrow day name, e.g. S for Sunday. + If not specified, the first letter of the corresponding DefaultFullName is taken. + --> + +<!ELEMENT MonthsOfYear (Month*)> +<!-- Nominative month names. + All Month elements of a Calendar must be given if the RefLocale mechanism is not used! + --> +<!ATTLIST MonthsOfYear %RefLocale;> +<!-- Sequence of months is important, MUST start with the first month of a + year, e.g. January in a Gregorian calendar. + --> + +<!ELEMENT GenitiveMonths (Month*)> +<!-- Possessive genitive case month names, for example in Slavic locales. The + element is optional, but if present all Month elements of a Calendar must + be given if the RefLocale mechanism is not used! If not specified, + the MonthsOfYear names will be used in the context of the number + formatter's genitive case. --> +<!ATTLIST GenitiveMonths %RefLocale;> +<!-- Sequence of months is important, MUST start with the first month of a + year, e.g. January in a Gregorian calendar. + --> + +<!ELEMENT PartitiveMonths (Month*)> +<!-- Partitive case month names, for example in Finnish locales. The + element is optional, but if present all Month elements of a Calendar must + be given if the RefLocale mechanism is not used! If not specified, + GenitiveMonths names will be used, or if those are not specified then + MonthsOfYear, in the context of the number formatter's partitive case. --> +<!ATTLIST PartitiveMonths %RefLocale;> +<!-- Sequence of months is important, MUST start with the first month of a + year, e.g. January in a Gregorian calendar. + --> + +<!-- Rules for use of nominative / genitive / partitive case month names in + number formatter when encountering MMM or MMMM: + + * MMM or MMMM immediately preceded or followed by a literal character + other than space => nominative month name (noun), for Excel and + backwards compatibility such as Finnish MMMM"ta" + * no day of month (D or DD) present in format code => nominative name + * day of month (D or DD) after MMM or MMMM => genitive name + * no genitive names defined => nominative name + * day of month (D or DD) before MMM or MMMM => partitive name + * no partitive names defined => genitive name + * no genitive names defined => nominative name + + NOTE: + + If only <MonthsOfYear> and <PartitiveMonths> are specified but not + <GenitiveMonths>, then for MMM(M) D(D) formats the <MonthsOfYear> + nominative name is displayed. Only for D(D) MMM(M) formats the + <PartitiveMonths> name is displayed. + + If only for MMM(M) D(D) formats the <GenitiveMonths> are to be displayed + but nominative names for D(D) MMM(M), then specify <PartitiveMonths> + identical to <MonthsOfYear>, do not omit it as otherwise it would inherit + from <GenitiveMonths> again. + + --> + +<!ELEMENT Month (MonthID, DefaultAbbrvName, DefaultFullName, DefaultNarrowName*)> +<!ELEMENT MonthID (#PCDATA)> +<!-- Preferably the lower case abbreviated English name like jan for January. --> + +<!ELEMENT Eras (Era*)> +<!-- All Era elements of a Calendar must be given if the RefLocale mechanism is not used! --> +<!ATTLIST Eras %RefLocale;> +<!-- The eras MUST be in chronological order, e.g. first BC then AC. --> +<!ELEMENT Era (EraID, DefaultAbbrvName, DefaultFullName)> +<!ELEMENT EraID (#PCDATA)> +<!-- If a calendar has special eras (like zh_TW ROC or ja_JP Gengou calendar) + and a date before those eras is undefined, a leading (first) dummy era + with EraID="Dummy" has to be defined to enable the number formatter to + fall back to a Gregorian calendar for those date values if the XCalendar + implementation returns an era value of 0. +--> + +<!ELEMENT StartDayOfWeek (DayID)> +<!-- MUST exactly match (case significant!) one of the DayID of DaysOfWeek --> + +<!ELEMENT MinimalDaysInFirstWeek (#PCDATA)> +<!-- The number of days of a week that must reside in the beginning of a year + to make a week the first week of the year. For example, a value of 4 means + that at least 4 days of a week must be in the new year. So if the week + starts on Monday, the first week of a year will be the week where Thursday + is in the new year. +--> + + +<!-- The LC_CURRENCY element defines currencies used with a locale. --> +<!ELEMENT LC_CURRENCY (Currency* ) > +<!-- At least one Currency element must be given if the RefLocale mechanism is not used! --> +<!ATTLIST LC_CURRENCY %RefLocale;> +<!ELEMENT Currency (CurrencyID, CurrencySymbol, BankSymbol, CurrencyName, DecimalPlaces)> +<!ATTLIST Currency default (true|false) #REQUIRED > +<!-- Exactly one Currency element has to be the default currency. --> +<!ATTLIST Currency usedInCompatibleFormatCodes (true|false) #REQUIRED > +<!-- If this currency is the one used in compatible number format codes with + <member>FormatElement::formatIndex</member> values in the range 12..17. + Those format codes are used to generate some old style currency format + codes for compatibility with StarOffice5 and StarOffice4. + Every locale data file MUST contain exactly one currency having this set to "true", + and that currency MUST be used in format codes 12..17. + For European countries using EUR it MUST be the old currency, for example, DM. +--> +<!ATTLIST Currency legacyOnly (true|false) #IMPLIED > +<!-- If this Currency element exists only to be able to correctly load legacy + documents and is not selectable in the UI otherwise. Defaults to "false" + if not specified. If this attribute is "true", 'default' and + 'usedInCompatibleFormatCodes' must both be "false". + + Currency elements must not be changed to contain only a different + CurrencySymbol element without changing the CurrencyID and BankSymbol + elements, instead the entire Currency element must be duplicated, the old + element needs this 'legacyOnly' attribute be added and 'default' and + 'usedInCompatibleFormatCodes' attributes must be set to "false", and only + in the new duplicated Currency element the CurrencySymbol element be + changed. +--> +<!ELEMENT CurrencyID (#PCDATA)> +<!-- The ISO 4217 three letter currency code, e.g. USD or EUR. --> +<!ELEMENT CurrencySymbol (#PCDATA)> +<!-- The currency symbol, e.g. $ or €. --> +<!ELEMENT BankSymbol (#PCDATA)> +<!-- The ISO 4217 three letter currency code, e.g. USD or EUR. --> +<!ELEMENT CurrencyName (#PCDATA)> +<!-- The native currency name, e.g. Dollar or Euro. --> +<!ELEMENT DecimalPlaces (#PCDATA)> +<!-- Number of decimal places used with the currency, usually 2 or 0, e.g. 2 + for cents. +--> + + +<!ELEMENT LC_CTYPE (Separators?, Markers?, TimeAM?, TimePM?, MeasurementSystem?)> +<!-- All elements must be given if the RefLocale mechanism is not used! --> +<!ATTLIST LC_CTYPE %RefLocale;> +<!ATTLIST LC_CTYPE %UNOModule;> + +<!ELEMENT Separators (DateSeparator, ThousandSeparator, DecimalSeparator, DecimalSeparatorAlternative?, TimeSeparator, Time100SecSeparator, ListSeparator, LongDateDayOfWeekSeparator, LongDateDaySeparator, LongDateMonthSeparator, LongDateYearSeparator)> +<!ELEMENT DateSeparator (#PCDATA)> +<!ELEMENT ThousandSeparator (#PCDATA)> +<!ELEMENT DecimalSeparator (#PCDATA)> +<!ELEMENT DecimalSeparatorAlternative (#PCDATA)> +<!ELEMENT TimeSeparator (#PCDATA)> +<!ELEMENT Time100SecSeparator (#PCDATA)> +<!ELEMENT ListSeparator (#PCDATA)> +<!ELEMENT LongDateDayOfWeekSeparator (#PCDATA)> +<!ELEMENT LongDateDaySeparator (#PCDATA)> +<!ELEMENT LongDateMonthSeparator (#PCDATA)> +<!ELEMENT LongDateYearSeparator (#PCDATA)> + +<!ELEMENT Markers (QuotationStart, QuotationEnd, DoubleQuotationStart, DoubleQuotationEnd)> +<!ELEMENT QuotationStart (#PCDATA)> +<!ELEMENT QuotationEnd (#PCDATA)> +<!ELEMENT DoubleQuotationStart (#PCDATA)> +<!ELEMENT DoubleQuotationEnd (#PCDATA)> + +<!ELEMENT TimeAM (#PCDATA)> +<!ELEMENT TimePM (#PCDATA)> +<!ELEMENT MeasurementSystem (#PCDATA)> + + +<!ELEMENT LC_COLLATION (Collator*, CollationOptions?)> +<!-- All elements must be given if the RefLocale mechanism is not used! --> +<!ATTLIST LC_COLLATION %RefLocale;> +<!-- + Optional ICU tailoring. + + See Collation Customization in ICU User Guide for syntax, + https://unicode-org.github.io/icu/userguide/collation/customization/ + + There are two ways to add language specific tailoring in LibreOffice. + For small tailoring, you can directly add it in locale data here. For + large tailoring, it is suggested to put the data under collator/data, so + it will be compiled to a binary format in build time and improve performance + in run time. + +--> +<!ELEMENT Collator (#PCDATA)> +<!ATTLIST Collator %UNOModule;> +<!ATTLIST Collator default (true|false) #REQUIRED > +<!ELEMENT CollationOptions (TransliterationModules+)> +<!ELEMENT TransliterationModules (#PCDATA)> + + +<!ELEMENT LC_SEARCH (SearchOptions?)> +<!-- All elements must be given if the RefLocale mechanism is not used! --> +<!ATTLIST LC_SEARCH %RefLocale;> +<!ELEMENT SearchOptions (TransliterationModules+)> + + +<!ELEMENT LC_INDEX (IndexKey*, UnicodeScript*, FollowPageWord*)> +<!ATTLIST LC_INDEX %RefLocale;> + +<!-- + The IndexKey element is optional, but should be given if the locale + requires a specific sort order in Writer's index tables or entries are to + be combined under keys. + + Index key for the algorithm and language, like >A-Z< for English => A, B, + C, ..., Y, Z. The letters specify under which key an entry goes and the + order the keys are sorted. Keys may be reordered or letters inserted to + form a specific order, for example (ve_ZA) >A-D Ḓ E-L Ḽ M N Ṋ Ṅ O-T Ṱ U-Z< + Entries that don't match a defined key are appended to the index list. Used + in Writer textprocessor. + + The initial data was setup according to the ICU collation chart at + http://oss.software.ibm.com/icu/charts/collation/ + Note: ICU site was moved to https://icu.unicode.org/ and as ICU per + default uses CLDR, collation charts are available at + https://www.unicode.org/cldr/charts/latest/ + + Possible notations in the syntax of the IndexKey element are: + + '-' (dash): Ellipsis, all letters elided by the ellipsis are included as + index keys in alphabetic order. For example, 'A-Z' includes all ASCII + letters A to Z. + + '[]' (square brackets): all letters included in square brackets are + skipping letters. It is used for CTL languages, for example in Thai + (th_TH), to skip prefix vowels or signs. For example, if ["] double quote + is defined as skipping letter, index item '"Index"' will be under 'I', not + '"'. + + '{}' (curly brackets): define multiple letters index key, for example + '{Cs}' is one of the index keys for Hungarian (hu_HU). + + '()' (parentheses): define optional description for index key. If defined, + the description will be shown as index key title, instead of the index key + itself. For example, 'E(E, É)' in Hungarian adds entries with 'E' to the + description 'E, É'. + +--> +<!ELEMENT IndexKey (#PCDATA)> +<!ATTLIST IndexKey %UNOModule;> +<!ATTLIST IndexKey %LIBModule;> +<!ATTLIST IndexKey default (true|false) #REQUIRED > +<!ATTLIST IndexKey phonetic (true|false) #REQUIRED > + +<!-- + The Unicode script types are those of + offapi/com/sun/star/i18n/UnicodeScript.idl, they define the code range for + the language. +--> +<!ELEMENT UnicodeScript (#PCDATA)> + +<!-- + The FollowPageWord entries were originally hard-coded in + ../../indexentry/indexentrysupplier.cxx, most locales used the English + ``p.'' and ``pp.'', valid data should be provided by native speakers. + These words or abbreviations are used in the Writer's index table. The + first FollowPageWord element is the abbreviation for "page" (p.), the + second FollowPageWord element the abbreviation for "page and following + pages" (pp.). +--> +<!ELEMENT FollowPageWord (#PCDATA)> + + +<!ELEMENT LC_TRANSLITERATION (Transliteration*)> +<!ATTLIST LC_TRANSLITERATION %RefLocale;> +<!ELEMENT Transliteration EMPTY> +<!ATTLIST Transliteration %UNOModule;> + +<!ELEMENT LC_MISC (ForbiddenCharacters?, BreakIteratorRules?, ReservedWords?)> +<!ATTLIST LC_MISC %RefLocale;> +<!ELEMENT ForbiddenCharacters (ForbiddenLineBeginCharacters, ForbiddenLineEndCharacters, LineBreakHangingCharacters)> +<!ELEMENT ForbiddenLineBeginCharacters (#PCDATA)> +<!ELEMENT ForbiddenLineEndCharacters (#PCDATA)> + +<!-- optional break iterator rules for the languages. + if defined, 5 rules should be all listed, each of them can be blank and default will be used. + order of the rules is significant! +--> +<!ELEMENT BreakIteratorRules (EditMode, DictionaryMode, WordCountMode, CharacterMode, LineMode)> +<!ELEMENT EditMode (#PCDATA)> +<!ELEMENT DictionaryMode (#PCDATA)> +<!ELEMENT WordCountMode (#PCDATA)> +<!ELEMENT CharacterMode (#PCDATA)> +<!ELEMENT LineMode (#PCDATA)> + +<!ELEMENT ReservedWords (trueWord, falseWord, quarter1Word, quarter2Word, quarter3Word, quarter4Word, aboveWord, belowWord, quarter1Abbreviation, quarter2Abbreviation, quarter3Abbreviation, quarter4Abbreviation)> +<!-- order is significant! --> +<!-- trueWord and falseWord are displayed (uppercased) for the BOOLEAN number + format keyword. "true", "false" --> +<!ELEMENT trueWord (#PCDATA)> +<!ELEMENT falseWord (#PCDATA)> +<!-- quarter1Word,... are displayed for the QQ number format keyword. + "1st quarter", "2nd quarter", ...; "1er trimestre", "2e trimestre", ... --> +<!ELEMENT quarter1Word (#PCDATA)> +<!ELEMENT quarter2Word (#PCDATA)> +<!ELEMENT quarter3Word (#PCDATA)> +<!ELEMENT quarter4Word (#PCDATA)> +<!-- aboveWord and belowWord are used as reference field content in Writer, it + is a "physical" object relative position, not "numerical" relative. + Like section "above" and section "below". --> +<!ELEMENT aboveWord (#PCDATA)> +<!ELEMENT belowWord (#PCDATA)> +<!-- quarter1Abbreviation,... are displayed for the Q number format keyword. + "Q1" (quarter), "T1" (trimestre), ... --> +<!ELEMENT quarter1Abbreviation (#PCDATA)> +<!ELEMENT quarter2Abbreviation (#PCDATA)> +<!ELEMENT quarter3Abbreviation (#PCDATA)> +<!ELEMENT quarter4Abbreviation (#PCDATA)> + + +<!-- + In numbering levels, the NumType attribute is a value of the constants + defined in offapi/com/sun/star/style/NumberingType.idl ( + https://opengrok.libreoffice.org/xref/core/offapi/com/sun/star/style/NumberingType.idl + ) +--> + +<!ELEMENT LC_NumberingLevel (NumberingLevel* )> +<!ATTLIST LC_NumberingLevel %RefLocale;> +<!ELEMENT NumberingLevel EMPTY> +<!ATTLIST NumberingLevel Prefix CDATA #REQUIRED> +<!ATTLIST NumberingLevel NumType CDATA #REQUIRED> +<!ATTLIST NumberingLevel Suffix CDATA #REQUIRED> +<!ATTLIST NumberingLevel Transliteration CDATA #IMPLIED> +<!ATTLIST NumberingLevel NatNum CDATA #IMPLIED> + + +<!ELEMENT LC_OutLineNumberingLevel (OutlineStyle* )> +<!ATTLIST LC_OutLineNumberingLevel %RefLocale;> +<!ELEMENT OutlineStyle (OutLineNumberingLevel+)> +<!ELEMENT OutLineNumberingLevel EMPTY> +<!ATTLIST OutLineNumberingLevel Prefix CDATA #REQUIRED > +<!ATTLIST OutLineNumberingLevel NumType CDATA #REQUIRED > +<!ATTLIST OutLineNumberingLevel Suffix CDATA #REQUIRED > +<!ATTLIST OutLineNumberingLevel BulletChar CDATA #REQUIRED > +<!ATTLIST OutLineNumberingLevel BulletFontName CDATA #REQUIRED> +<!ATTLIST OutLineNumberingLevel ParentNumbering CDATA #REQUIRED > +<!ATTLIST OutLineNumberingLevel LeftMargin CDATA #REQUIRED> +<!ATTLIST OutLineNumberingLevel SymbolTextDistance CDATA #REQUIRED > +<!ATTLIST OutLineNumberingLevel FirstLineOffset CDATA #REQUIRED > +<!ATTLIST OutLineNumberingLevel Adjust CDATA #IMPLIED > +<!ATTLIST OutLineNumberingLevel Transliteration CDATA #IMPLIED > +<!ATTLIST OutLineNumberingLevel NatNum CDATA #IMPLIED> |