summaryrefslogtreecommitdiffstats
path: root/misc
diff options
context:
space:
mode:
Diffstat (limited to 'misc')
-rw-r--r--misc/arrow-parquet/arrow-test.cpp155
-rwxr-xr-xmisc/arrow-parquet/dump-parquet.py52
-rwxr-xr-xmisc/arrow-parquet/gen-parquet-test-files.py92
-rw-r--r--misc/character-set/data.csv631
-rwxr-xr-xmisc/character-set/gen-enum.py101
-rw-r--r--misc/file-processor-modules/preview.py33
-rw-r--r--misc/notes/column-width-and-row-height.txt69
-rw-r--r--misc/notes/zlib-in-memory-gzip.txt8
-rwxr-xr-xmisc/xml-tokens/dump-xsd-keys.py145
-rwxr-xr-xmisc/xml-tokens/gen-gnumeric-tokens.py81
-rwxr-xr-xmisc/xml-tokens/gen-odf-tokens.py192
-rwxr-xr-xmisc/xml-tokens/gen-ooxml-tokens.py84
-rwxr-xr-xmisc/xml-tokens/gen-tokens.py47
-rwxr-xr-xmisc/xml-tokens/gnumeric.xsd1296
-rw-r--r--misc/xml-tokens/ooxml-extra-tokens.txt3
-rw-r--r--misc/xml-tokens/token_util.py70
-rw-r--r--misc/xml-tokens/xls-xml-tokens.txt991
17 files changed, 4050 insertions, 0 deletions
diff --git a/misc/arrow-parquet/arrow-test.cpp b/misc/arrow-parquet/arrow-test.cpp
new file mode 100644
index 0000000..f29fa9b
--- /dev/null
+++ b/misc/arrow-parquet/arrow-test.cpp
@@ -0,0 +1,155 @@
+#include <arrow/io/file.h>
+#include <parquet/stream_reader.h>
+
+#include <iostream>
+#include <memory>
+#include <string>
+#include <sstream>
+
+using std::cout;
+using std::cerr;
+using std::endl;
+
+int main(int argc, char** argv)
+{
+ if (argc < 2)
+ return EXIT_FAILURE;
+
+ const char* filepath = argv[1];
+ std::shared_ptr<arrow::io::ReadableFile> infile;
+
+ PARQUET_ASSIGN_OR_THROW(
+ infile,
+ arrow::io::ReadableFile::Open(filepath));
+
+ auto file_reader = parquet::ParquetFileReader::Open(infile);
+ auto file_md = file_reader->metadata();
+ const parquet::FileMetaData& r = *file_md;
+
+ cout << "num-columns: " << r.num_columns() << endl;
+ cout << "num-rows: " << r.num_rows() << endl;
+ cout << "num-row-groups: " << r.num_row_groups() << endl;
+ cout << "num-schema-elements: " << r.num_schema_elements() << endl;
+ cout << "can-decompress: " << r.can_decompress() << endl;
+
+ for (int i = 0; i < r.num_row_groups(); ++i)
+ {
+ cout << "row-group " << i << ":" << endl;
+ auto rg = r.RowGroup(i);
+ cout << " num rows: " << rg->num_rows() << endl;
+ cout << " total byte size: " << rg->total_byte_size() << endl;
+ cout << " total compressed size: " << rg->total_compressed_size() << endl;
+ cout << " file offset: " << rg->file_offset() << endl;
+ cout << " num columns: " << rg->num_columns() << endl;
+
+ for (int j = 0; j < rg->num_columns(); ++j)
+ {
+ cout << " column chunk " << j << ":" << endl;
+ auto cc = rg->ColumnChunk(j);
+ cout << " file path: " << cc->file_path() << endl;
+ cout << " num values: " << cc->num_values() << endl;
+ cout << " type: " << cc->type() << endl;
+ cout << " data page offset: " << std::dec << cc->data_page_offset() << endl;
+ cout << " has dictionary page: " << cc->has_dictionary_page() << endl;
+ cout << " compression: " << cc->compression() << endl;
+ if (cc->has_dictionary_page())
+ cout << " dictionary page offset: " << cc->dictionary_page_offset() << endl;
+ cout << " has index page: " << cc->has_index_page() << endl;
+ }
+ }
+
+ cout << "schema:" << endl;
+ const parquet::SchemaDescriptor* p = r.schema();
+ cout << " name: " << p->name() << endl;
+ cout << " num-columns: " << p->num_columns() << endl;
+
+ std::vector<const parquet::ColumnDescriptor*> column_types;
+ column_types.reserve(p->num_columns());
+
+ for (int i = 0; i < p->num_columns(); ++i)
+ {
+ cout << "column " << i << ":" << endl;
+ const parquet::ColumnDescriptor* col_desc = p->Column(i);
+ column_types.push_back(col_desc);
+
+ cout << " name: " << col_desc->name() << endl;
+ cout << " physical type: " << col_desc->physical_type() << endl;
+ cout << " converted type: " << col_desc->converted_type() << endl;
+ cout << " type length: " << col_desc->type_length() << endl;
+ }
+
+ parquet::StreamReader stream{std::move(file_reader)};
+
+ if (stream.eof())
+ return EXIT_SUCCESS;
+
+ cout << "row values:" << endl;
+
+ // print column labels
+ for (const parquet::ColumnDescriptor* p : column_types)
+ cout << p->name() << ' ';
+ cout << endl;
+
+ for (int i = 0; i < r.num_rows(); ++i)
+ {
+ for (const parquet::ColumnDescriptor* p : column_types)
+ {
+ switch (p->physical_type())
+ {
+ case parquet::Type::BYTE_ARRAY:
+ {
+ switch (p->converted_type())
+ {
+ case parquet::ConvertedType::UTF8:
+ {
+ std::string v;
+ stream >> v;
+ cout << v << ' ';
+ break;
+ }
+ default:
+ throw std::runtime_error("WIP: unhandled converted type for BYTE_ARRAY");
+ }
+ break;
+ }
+ case parquet::Type::INT64:
+ {
+ switch (p->converted_type())
+ {
+ case parquet::ConvertedType::NONE:
+ {
+ int64_t v;
+ stream >> v;
+ cout << v << ' ';
+ break;
+ }
+ default:
+ throw std::runtime_error("WIP: unhandled converted type for INT64");
+ }
+ break;
+ }
+ case parquet::Type::BOOLEAN:
+ {
+ if (p->converted_type() != parquet::ConvertedType::NONE)
+ throw std::runtime_error("WIP: unhandled covnerted type for BOOLEAN");
+
+ bool v;
+ stream >> v;
+ cout << v << ' ';
+ break;
+ }
+ default:
+ {
+ std::ostringstream os;
+ os << "WIP: not handled type: physical=" << p->physical_type() << "; converted=" << p->converted_type();
+ throw std::runtime_error(os.str());
+ }
+ }
+ }
+
+ stream >> parquet::EndRow;
+ cout << endl;
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/misc/arrow-parquet/dump-parquet.py b/misc/arrow-parquet/dump-parquet.py
new file mode 100755
index 0000000..742f37e
--- /dev/null
+++ b/misc/arrow-parquet/dump-parquet.py
@@ -0,0 +1,52 @@
+#!/usr/bin/env python3
+########################################################################
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+########################################################################
+
+import argparse
+from pathlib import Path
+
+import pyarrow.parquet as pq
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("path", type=Path)
+ parser.add_argument("--num", "-n", type=int, default=10, help="Number of rows to print.")
+ args = parser.parse_args()
+
+ parquet = pq.ParquetFile(args.path)
+ print(f"num-row-groups: {parquet.metadata.num_row_groups}")
+ print(f"num-rows: {parquet.metadata.num_rows}")
+ print(f"num-columns: {parquet.metadata.num_columns}")
+ print("schema:")
+ for i, name in enumerate(parquet.metadata.schema.names):
+ col = parquet.metadata.schema.column(i)
+ print(f" column {i}:")
+ for attr_name in dir(col):
+ if attr_name.startswith("_"):
+ continue
+ attr_value = getattr(col, attr_name)
+ if callable(attr_value):
+ continue
+ print(f" {attr_name}: {attr_value}")
+
+ for icol, (name, chunked_array) in enumerate(zip(parquet.metadata.schema.names, parquet.read_row_group(0))):
+ print(f"column {icol}:")
+ print(f" name: {name}")
+ print(f" type: {chunked_array.type}")
+ print(f" num-chunks: {chunked_array.num_chunks}")
+ print(f" data:")
+ for i, v in enumerate(chunked_array.chunks[0]):
+ if i == args.num:
+ break
+ print(f" - {v}")
+
+
+if __name__ == "__main__":
+ main()
+
diff --git a/misc/arrow-parquet/gen-parquet-test-files.py b/misc/arrow-parquet/gen-parquet-test-files.py
new file mode 100755
index 0000000..a066536
--- /dev/null
+++ b/misc/arrow-parquet/gen-parquet-test-files.py
@@ -0,0 +1,92 @@
+#!/usr/bin/env python3
+########################################################################
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+########################################################################
+
+import pandas as pd
+from pathlib import Path
+
+
+def gen_str(pos):
+ return gen_str.values[pos]
+
+
+gen_str.values = (
+ "ubergeek",
+ "thwarter",
+ "ironfist",
+ "turkoman",
+ "mesozoan",
+ "seatsale",
+ "hardtack",
+ "phyllary",
+ "hydriads",
+ "stranger",
+ "cistuses",
+ "capelets",
+ "headband",
+ "dudesses",
+ "aminases",
+ "eggwhite",
+ "boxscore",
+ "upsurges",
+ "blowlamp",
+ "dionysia",
+ "rejecter",
+ "keratome",
+ "diasters",
+ "juddocks",
+ "gownsman",
+ "sweepsaw",
+ "chuckeys",
+ "partyers",
+ "weredogs",
+ "exabytes",
+)
+
+
+def main():
+ data = {
+ "float64 with nan": [1.2, 3.4, None, None, 5.6]
+ }
+
+ df = pd.DataFrame(data=data)
+ df["float64 with nan"] = df["float64 with nan"].astype("float64")
+
+ print(df)
+ print(df.dtypes)
+
+ outdir = Path("../../test/parquet/basic")
+ outpath = outdir / "float-with-non.parquet"
+ df.to_parquet(outpath, engine="pyarrow", compression=None)
+
+ row_size = 10
+ data = {
+ "int32": [v for v in range(row_size)],
+ "int64": [v * 10 + v for v in range(row_size)],
+ "float32": [-v for v in range(row_size)],
+ "float64": [-v - 21 for v in range(row_size)],
+ "boolean": [(v & 0x01) != 0 for v in range(row_size)],
+ "string": [gen_str(pos) for pos in range(row_size)],
+ }
+ df = pd.DataFrame(data=data)
+ df["int32"] = df["int32"].astype("int32")
+ df["int64"] = df["int64"].astype("int64")
+ df["float32"] = df["float32"].astype("float32")
+ df["float64"] = df["float64"].astype("float64")
+
+ print(df)
+ print(df.dtypes)
+
+ df.to_parquet(outdir / f"basic-nocomp.parquet", engine="pyarrow", compression=None)
+ for comp in ("gzip", "snappy", "zstd"):
+ df.to_parquet(outdir / f"basic-{comp}.parquet", engine="pyarrow", compression=comp)
+
+
+if __name__ == "__main__":
+ main()
+
diff --git a/misc/character-set/data.csv b/misc/character-set/data.csv
new file mode 100644
index 0000000..9a2eead
--- /dev/null
+++ b/misc/character-set/data.csv
@@ -0,0 +1,631 @@
+Preferred MIME Name,Name,Aliases
+US-ASCII,US-ASCII,iso-ir-6
+,,ANSI_X3.4-1968
+,,ANSI_X3.4-1986
+,,ISO_646.irv:1991
+,,ISO646-US
+,,US-ASCII
+,,us
+,,IBM367
+,,cp367
+,,csASCII
+ISO-8859-1,ISO_8859-1:1987,iso-ir-100
+,,ISO_8859-1
+,,ISO-8859-1
+,,latin1
+,,l1
+,,IBM819
+,,CP819
+,,csISOLatin1
+ISO-8859-2,ISO_8859-2:1987,iso-ir-101
+,,ISO_8859-2
+,,ISO-8859-2
+,,latin2
+,,l2
+,,csISOLatin2
+ISO-8859-3,ISO_8859-3:1988,iso-ir-109
+,,ISO_8859-3
+,,ISO-8859-3
+,,latin3
+,,l3
+,,csISOLatin3
+ISO-8859-4,ISO_8859-4:1988,iso-ir-110
+,,ISO_8859-4
+,,ISO-8859-4
+,,latin4
+,,l4
+,,csISOLatin4
+ISO-8859-5,ISO_8859-5:1988,iso-ir-144
+,,ISO_8859-5
+,,ISO-8859-5
+,,cyrillic
+,,csISOLatinCyrillic
+ISO-8859-6,ISO_8859-6:1987,iso-ir-127
+,,ISO_8859-6
+,,ISO-8859-6
+,,ECMA-114
+,,ASMO-708
+,,arabic
+,,csISOLatinArabic
+ISO-8859-7,ISO_8859-7:1987,iso-ir-126
+,,ISO_8859-7
+,,ISO-8859-7
+,,ELOT_928
+,,ECMA-118
+,,greek
+,,greek8
+,,csISOLatinGreek
+ISO-8859-8,ISO_8859-8:1988,iso-ir-138
+,,ISO_8859-8
+,,ISO-8859-8
+,,hebrew
+,,csISOLatinHebrew
+ISO-8859-9,ISO_8859-9:1989,iso-ir-148
+,,ISO_8859-9
+,,ISO-8859-9
+,,latin5
+,,l5
+,,csISOLatin5
+ISO-8859-10,ISO-8859-10,iso-ir-157
+,,l6
+,,ISO_8859-10:1992
+,,csISOLatin6
+,,latin6
+,ISO_6937-2-add,iso-ir-142
+,,csISOTextComm
+,JIS_X0201,X0201
+,,csHalfWidthKatakana
+,JIS_Encoding,csJISEncoding
+Shift_JIS,Shift_JIS,MS_Kanji
+,,csShiftJIS
+EUC-JP,Extended_UNIX_Code_Packed_Format_for_Japanese,csEUCPkdFmtJapanese
+,,EUC-JP
+,Extended_UNIX_Code_Fixed_Width_for_Japanese,csEUCFixWidJapanese
+,BS_4730,iso-ir-4
+,,ISO646-GB
+,,gb
+,,uk
+,,csISO4UnitedKingdom
+,SEN_850200_C,iso-ir-11
+,,ISO646-SE2
+,,se2
+,,csISO11SwedishForNames
+,IT,iso-ir-15
+,,ISO646-IT
+,,csISO15Italian
+,ES,iso-ir-17
+,,ISO646-ES
+,,csISO17Spanish
+,DIN_66003,iso-ir-21
+,,de
+,,ISO646-DE
+,,csISO21German
+,NS_4551-1,iso-ir-60
+,,ISO646-NO
+,,no
+,,csISO60DanishNorwegian
+,,csISO60Norwegian1
+,NF_Z_62-010,iso-ir-69
+,,ISO646-FR
+,,fr
+,,csISO69French
+,ISO-10646-UTF-1,csISO10646UTF1
+,ISO_646.basic:1983,ref
+,,csISO646basic1983
+,INVARIANT,csINVARIANT
+,ISO_646.irv:1983,iso-ir-2
+,,irv
+,,csISO2IntlRefVersion
+,NATS-SEFI,iso-ir-8-1
+,,csNATSSEFI
+,NATS-SEFI-ADD,iso-ir-8-2
+,,csNATSSEFIADD
+,NATS-DANO,iso-ir-9-1
+,,csNATSDANO
+,NATS-DANO-ADD,iso-ir-9-2
+,,csNATSDANOADD
+,SEN_850200_B,iso-ir-10
+,,FI
+,,ISO646-FI
+,,ISO646-SE
+,,se
+,,csISO10Swedish
+,KS_C_5601-1987,iso-ir-149
+,,KS_C_5601-1989
+,,KSC_5601
+,,korean
+,,csKSC56011987
+ISO-2022-KR,ISO-2022-KR,csISO2022KR
+EUC-KR,EUC-KR,csEUCKR
+ISO-2022-JP,ISO-2022-JP,csISO2022JP
+ISO-2022-JP-2,ISO-2022-JP-2,csISO2022JP2
+,JIS_C6220-1969-jp,JIS_C6220-1969
+,,iso-ir-13
+,,katakana
+,,x0201-7
+,,csISO13JISC6220jp
+,JIS_C6220-1969-ro,iso-ir-14
+,,jp
+,,ISO646-JP
+,,csISO14JISC6220ro
+,PT,iso-ir-16
+,,ISO646-PT
+,,csISO16Portuguese
+,greek7-old,iso-ir-18
+,,csISO18Greek7Old
+,latin-greek,iso-ir-19
+,,csISO19LatinGreek
+,NF_Z_62-010_(1973),iso-ir-25
+,,ISO646-FR1
+,,csISO25French
+,Latin-greek-1,iso-ir-27
+,,csISO27LatinGreek1
+,ISO_5427,iso-ir-37
+,,csISO5427Cyrillic
+,JIS_C6226-1978,iso-ir-42
+,,csISO42JISC62261978
+,BS_viewdata,iso-ir-47
+,,csISO47BSViewdata
+,INIS,iso-ir-49
+,,csISO49INIS
+,INIS-8,iso-ir-50
+,,csISO50INIS8
+,INIS-cyrillic,iso-ir-51
+,,csISO51INISCyrillic
+,ISO_5427:1981,iso-ir-54
+,,ISO5427Cyrillic1981
+,,csISO54271981
+,ISO_5428:1980,iso-ir-55
+,,csISO5428Greek
+,GB_1988-80,iso-ir-57
+,,cn
+,,ISO646-CN
+,,csISO57GB1988
+,GB_2312-80,iso-ir-58
+,,chinese
+,,csISO58GB231280
+,NS_4551-2,ISO646-NO2
+,,iso-ir-61
+,,no2
+,,csISO61Norwegian2
+,videotex-suppl,iso-ir-70
+,,csISO70VideotexSupp1
+,PT2,iso-ir-84
+,,ISO646-PT2
+,,csISO84Portuguese2
+,ES2,iso-ir-85
+,,ISO646-ES2
+,,csISO85Spanish2
+,MSZ_7795.3,iso-ir-86
+,,ISO646-HU
+,,hu
+,,csISO86Hungarian
+,JIS_C6226-1983,iso-ir-87
+,,x0208
+,,JIS_X0208-1983
+,,csISO87JISX0208
+,greek7,iso-ir-88
+,,csISO88Greek7
+,ASMO_449,ISO_9036
+,,arabic7
+,,iso-ir-89
+,,csISO89ASMO449
+,iso-ir-90,csISO90
+,JIS_C6229-1984-a,iso-ir-91
+,,jp-ocr-a
+,,csISO91JISC62291984a
+,JIS_C6229-1984-b,iso-ir-92
+,,ISO646-JP-OCR-B
+,,jp-ocr-b
+,,csISO92JISC62991984b
+,JIS_C6229-1984-b-add,iso-ir-93
+,,jp-ocr-b-add
+,,csISO93JIS62291984badd
+,JIS_C6229-1984-hand,iso-ir-94
+,,jp-ocr-hand
+,,csISO94JIS62291984hand
+,JIS_C6229-1984-hand-add,iso-ir-95
+,,jp-ocr-hand-add
+,,csISO95JIS62291984handadd
+,JIS_C6229-1984-kana,iso-ir-96
+,,csISO96JISC62291984kana
+,ISO_2033-1983,iso-ir-98
+,,e13b
+,,csISO2033
+,ANSI_X3.110-1983,iso-ir-99
+,,CSA_T500-1983
+,,NAPLPS
+,,csISO99NAPLPS
+,T.61-7bit,iso-ir-102
+,,csISO102T617bit
+,T.61-8bit,T.61
+,,iso-ir-103
+,,csISO103T618bit
+,ECMA-cyrillic,iso-ir-111
+,,KOI8-E
+,,csISO111ECMACyrillic
+,CSA_Z243.4-1985-1,iso-ir-121
+,,ISO646-CA
+,,csa7-1
+,,csa71
+,,ca
+,,csISO121Canadian1
+,CSA_Z243.4-1985-2,iso-ir-122
+,,ISO646-CA2
+,,csa7-2
+,,csa72
+,,csISO122Canadian2
+,CSA_Z243.4-1985-gr,iso-ir-123
+,,csISO123CSAZ24341985gr
+ISO-8859-6-E,ISO_8859-6-E,csISO88596E
+,,ISO-8859-6-E
+ISO-8859-6-I,ISO_8859-6-I,csISO88596I
+,,ISO-8859-6-I
+,T.101-G2,iso-ir-128
+,,csISO128T101G2
+ISO-8859-8-E,ISO_8859-8-E,csISO88598E
+,,ISO-8859-8-E
+ISO-8859-8-I,ISO_8859-8-I,csISO88598I
+,,ISO-8859-8-I
+,CSN_369103,iso-ir-139
+,,csISO139CSN369103
+,JUS_I.B1.002,iso-ir-141
+,,ISO646-YU
+,,js
+,,yu
+,,csISO141JUSIB1002
+,IEC_P27-1,iso-ir-143
+,,csISO143IECP271
+,JUS_I.B1.003-serb,iso-ir-146
+,,serbian
+,,csISO146Serbian
+,JUS_I.B1.003-mac,macedonian
+,,iso-ir-147
+,,csISO147Macedonian
+,greek-ccitt,iso-ir-150
+,,csISO150
+,,csISO150GreekCCITT
+,NC_NC00-10:81,cuba
+,,iso-ir-151
+,,ISO646-CU
+,,csISO151Cuba
+,ISO_6937-2-25,iso-ir-152
+,,csISO6937Add
+,GOST_19768-74,ST_SEV_358-88
+,,iso-ir-153
+,,csISO153GOST1976874
+,ISO_8859-supp,iso-ir-154
+,,latin1-2-5
+,,csISO8859Supp
+,ISO_10367-box,iso-ir-155
+,,csISO10367Box
+,latin-lap,lap
+,,iso-ir-158
+,,csISO158Lap
+,JIS_X0212-1990,x0212
+,,iso-ir-159
+,,csISO159JISX02121990
+,DS_2089,DS2089
+,,ISO646-DK
+,,dk
+,,csISO646Danish
+,us-dk,csUSDK
+,dk-us,csDKUS
+,KSC5636,ISO646-KR
+,,csKSC5636
+,UNICODE-1-1-UTF-7,csUnicode11UTF7
+,ISO-2022-CN,csISO2022CN
+,ISO-2022-CN-EXT,csISO2022CNEXT
+,UTF-8,csUTF8
+,ISO-8859-13,csISO885913
+,ISO-8859-14,iso-ir-199
+,,ISO_8859-14:1998
+,,ISO_8859-14
+,,latin8
+,,iso-celtic
+,,l8
+,,csISO885914
+,ISO-8859-15,ISO_8859-15
+,,Latin-9
+,,csISO885915
+,ISO-8859-16,iso-ir-226
+,,ISO_8859-16:2001
+,,ISO_8859-16
+,,latin10
+,,l10
+,,csISO885916
+,GBK,CP936
+,,MS936
+,,windows-936
+,,csGBK
+,GB18030,csGB18030
+,OSD_EBCDIC_DF04_15,csOSDEBCDICDF0415
+,OSD_EBCDIC_DF03_IRV,csOSDEBCDICDF03IRV
+,OSD_EBCDIC_DF04_1,csOSDEBCDICDF041
+,ISO-11548-1,ISO_11548-1
+,,ISO_TR_11548-1
+,,csISO115481
+,KZ-1048,STRK1048-2002
+,,RK1048
+,,csKZ1048
+,ISO-10646-UCS-2,csUnicode
+,ISO-10646-UCS-4,csUCS4
+,ISO-10646-UCS-Basic,csUnicodeASCII
+,ISO-10646-Unicode-Latin1,csUnicodeLatin1
+,,ISO-10646
+,ISO-10646-J-1,csUnicodeJapanese
+,ISO-Unicode-IBM-1261,csUnicodeIBM1261
+,ISO-Unicode-IBM-1268,csUnicodeIBM1268
+,ISO-Unicode-IBM-1276,csUnicodeIBM1276
+,ISO-Unicode-IBM-1264,csUnicodeIBM1264
+,ISO-Unicode-IBM-1265,csUnicodeIBM1265
+,UNICODE-1-1,csUnicode11
+,SCSU,csSCSU
+,UTF-7,csUTF7
+,UTF-16BE,csUTF16BE
+,UTF-16LE,csUTF16LE
+,UTF-16,csUTF16
+,CESU-8,csCESU8
+,,csCESU-8
+,UTF-32,csUTF32
+,UTF-32BE,csUTF32BE
+,UTF-32LE,csUTF32LE
+,BOCU-1,csBOCU1
+,,csBOCU-1
+,UTF-7-IMAP,csUTF7IMAP
+,ISO-8859-1-Windows-3.0-Latin-1,csWindows30Latin1
+,ISO-8859-1-Windows-3.1-Latin-1,csWindows31Latin1
+,ISO-8859-2-Windows-Latin-2,csWindows31Latin2
+,ISO-8859-9-Windows-Latin-5,csWindows31Latin5
+,hp-roman8,roman8
+,,r8
+,,csHPRoman8
+,Adobe-Standard-Encoding,csAdobeStandardEncoding
+,Ventura-US,csVenturaUS
+,Ventura-International,csVenturaInternational
+,DEC-MCS,dec
+,,csDECMCS
+,IBM850,cp850
+,,850
+,,csPC850Multilingual
+,PC8-Danish-Norwegian,csPC8DanishNorwegian
+,IBM862,cp862
+,,862
+,,csPC862LatinHebrew
+,PC8-Turkish,csPC8Turkish
+,IBM-Symbols,csIBMSymbols
+,IBM-Thai,csIBMThai
+,HP-Legal,csHPLegal
+,HP-Pi-font,csHPPiFont
+,HP-Math8,csHPMath8
+,Adobe-Symbol-Encoding,csHPPSMath
+,HP-DeskTop,csHPDesktop
+,Ventura-Math,csVenturaMath
+,Microsoft-Publishing,csMicrosoftPublishing
+,Windows-31J,csWindows31J
+GB2312,GB2312,csGB2312
+Big5,Big5,csBig5
+,macintosh,mac
+,,csMacintosh
+,IBM037,cp037
+,,ebcdic-cp-us
+,,ebcdic-cp-ca
+,,ebcdic-cp-wt
+,,ebcdic-cp-nl
+,,csIBM037
+,IBM038,EBCDIC-INT
+,,cp038
+,,csIBM038
+,IBM273,CP273
+,,csIBM273
+,IBM274,EBCDIC-BE
+,,CP274
+,,csIBM274
+,IBM275,EBCDIC-BR
+,,cp275
+,,csIBM275
+,IBM277,EBCDIC-CP-DK
+,,EBCDIC-CP-NO
+,,csIBM277
+,IBM278,CP278
+,,ebcdic-cp-fi
+,,ebcdic-cp-se
+,,csIBM278
+,IBM280,CP280
+,,ebcdic-cp-it
+,,csIBM280
+,IBM281,EBCDIC-JP-E
+,,cp281
+,,csIBM281
+,IBM284,CP284
+,,ebcdic-cp-es
+,,csIBM284
+,IBM285,CP285
+,,ebcdic-cp-gb
+,,csIBM285
+,IBM290,cp290
+,,EBCDIC-JP-kana
+,,csIBM290
+,IBM297,cp297
+,,ebcdic-cp-fr
+,,csIBM297
+,IBM420,cp420
+,,ebcdic-cp-ar1
+,,csIBM420
+,IBM423,cp423
+,,ebcdic-cp-gr
+,,csIBM423
+,IBM424,cp424
+,,ebcdic-cp-he
+,,csIBM424
+,IBM437,cp437
+,,437
+,,csPC8CodePage437
+,IBM500,CP500
+,,ebcdic-cp-be
+,,ebcdic-cp-ch
+,,csIBM500
+,IBM851,cp851
+,,851
+,,csIBM851
+,IBM852,cp852
+,,852
+,,csPCp852
+,IBM855,cp855
+,,855
+,,csIBM855
+,IBM857,cp857
+,,857
+,,csIBM857
+,IBM860,cp860
+,,860
+,,csIBM860
+,IBM861,cp861
+,,861
+,,cp-is
+,,csIBM861
+,IBM863,cp863
+,,863
+,,csIBM863
+,IBM864,cp864
+,,csIBM864
+,IBM865,cp865
+,,865
+,,csIBM865
+,IBM868,CP868
+,,cp-ar
+,,csIBM868
+,IBM869,cp869
+,,869
+,,cp-gr
+,,csIBM869
+,IBM870,CP870
+,,ebcdic-cp-roece
+,,ebcdic-cp-yu
+,,csIBM870
+,IBM871,CP871
+,,ebcdic-cp-is
+,,csIBM871
+,IBM880,cp880
+,,EBCDIC-Cyrillic
+,,csIBM880
+,IBM891,cp891
+,,csIBM891
+,IBM903,cp903
+,,csIBM903
+,IBM904,cp904
+,,904
+,,csIBBM904
+,IBM905,CP905
+,,ebcdic-cp-tr
+,,csIBM905
+,IBM918,CP918
+,,ebcdic-cp-ar2
+,,csIBM918
+,IBM1026,CP1026
+,,csIBM1026
+,EBCDIC-AT-DE,csIBMEBCDICATDE
+,EBCDIC-AT-DE-A,csEBCDICATDEA
+,EBCDIC-CA-FR,csEBCDICCAFR
+,EBCDIC-DK-NO,csEBCDICDKNO
+,EBCDIC-DK-NO-A,csEBCDICDKNOA
+,EBCDIC-FI-SE,csEBCDICFISE
+,EBCDIC-FI-SE-A,csEBCDICFISEA
+,EBCDIC-FR,csEBCDICFR
+,EBCDIC-IT,csEBCDICIT
+,EBCDIC-PT,csEBCDICPT
+,EBCDIC-ES,csEBCDICES
+,EBCDIC-ES-A,csEBCDICESA
+,EBCDIC-ES-S,csEBCDICESS
+,EBCDIC-UK,csEBCDICUK
+,EBCDIC-US,csEBCDICUS
+,UNKNOWN-8BIT,csUnknown8BiT
+,MNEMONIC,csMnemonic
+,MNEM,csMnem
+,VISCII,csVISCII
+,VIQR,csVIQR
+KOI8-R,KOI8-R,csKOI8R
+,HZ-GB-2312,
+,IBM866,cp866
+,,866
+,,csIBM866
+,IBM775,cp775
+,,csPC775Baltic
+,KOI8-U,csKOI8U
+,IBM00858,CCSID00858
+,,CP00858
+,,PC-Multilingual-850+euro
+,,csIBM00858
+,IBM00924,CCSID00924
+,,CP00924
+,,ebcdic-Latin9--euro
+,,csIBM00924
+,IBM01140,CCSID01140
+,,CP01140
+,,ebcdic-us-37+euro
+,,csIBM01140
+,IBM01141,CCSID01141
+,,CP01141
+,,ebcdic-de-273+euro
+,,csIBM01141
+,IBM01142,CCSID01142
+,,CP01142
+,,ebcdic-dk-277+euro
+,,ebcdic-no-277+euro
+,,csIBM01142
+,IBM01143,CCSID01143
+,,CP01143
+,,ebcdic-fi-278+euro
+,,ebcdic-se-278+euro
+,,csIBM01143
+,IBM01144,CCSID01144
+,,CP01144
+,,ebcdic-it-280+euro
+,,csIBM01144
+,IBM01145,CCSID01145
+,,CP01145
+,,ebcdic-es-284+euro
+,,csIBM01145
+,IBM01146,CCSID01146
+,,CP01146
+,,ebcdic-gb-285+euro
+,,csIBM01146
+,IBM01147,CCSID01147
+,,CP01147
+,,ebcdic-fr-297+euro
+,,csIBM01147
+,IBM01148,CCSID01148
+,,CP01148
+,,ebcdic-international-500+euro
+,,csIBM01148
+,IBM01149,CCSID01149
+,,CP01149
+,,ebcdic-is-871+euro
+,,csIBM01149
+,Big5-HKSCS,csBig5HKSCS
+,IBM1047,IBM-1047
+,,csIBM1047
+,PTCP154,csPTCP154
+,,PT154
+,,CP154
+,,Cyrillic-Asian
+,Amiga-1251,Ami1251
+,,Amiga1251
+,,Ami-1251
+,KOI7-switched,csKOI7switched
+,BRF,csBRF
+,TSCII,csTSCII
+,CP51932,csCP51932
+,windows-874,cswindows874
+,windows-1250,cswindows1250
+,windows-1251,cswindows1251
+,windows-1252,cswindows1252
+,windows-1253,cswindows1253
+,windows-1254,cswindows1254
+,windows-1255,cswindows1255
+,windows-1256,cswindows1256
+,windows-1257,cswindows1257
+,windows-1258,cswindows1258
+,TIS-620,csTIS620
+,,ISO-8859-11
+,CP50220,csCP50220
diff --git a/misc/character-set/gen-enum.py b/misc/character-set/gen-enum.py
new file mode 100755
index 0000000..ccb6f95
--- /dev/null
+++ b/misc/character-set/gen-enum.py
@@ -0,0 +1,101 @@
+#!/usr/bin/env python3
+########################################################################
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+########################################################################
+
+import argparse
+import csv
+import io
+from pathlib import Path
+
+
+def _cleanse_symbol(s):
+ s = s.replace("-", "_")
+ s = s.replace(":", "_")
+ s = s.replace(".", "_")
+ s = s.replace("(", "")
+ s = s.replace(")", "")
+ return s.lower()
+
+
+def _generate_enum(enum_symbols, outpath):
+ enum_symbols = sorted(enum_symbols)
+ buf = list()
+ buf.append("enum class character_set_t")
+ buf.append("{")
+ buf.append(" unspecified = 0,")
+
+ for entry in enum_symbols:
+ buf.append(f" {entry[0]},")
+
+ buf.append("};")
+
+ outpath.write_text("\n".join(buf))
+
+
+def _generate_map_entries(aliases, outpath):
+ entries = list()
+ for symbol, mapped_strs in aliases.items():
+ for mapped_str in mapped_strs:
+ entries.append((mapped_str.lower(), symbol))
+
+ entries = sorted(entries, key=lambda x: x[0])
+ buf = ["constexpr map_type::entry entries[] = {",]
+
+ for entry in entries:
+ buf.append(f' {{ "{entry[0]}", character_set_t::{entry[1]} }},')
+
+ buf.append("};")
+
+ outpath.write_text("\n".join(buf))
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--enum-out", type=Path, default=Path("./enum.inl"))
+ parser.add_argument("--map-out", type=Path, default=Path("./map-entries.inl"))
+ parser.add_argument("filepath", type=Path)
+ args = parser.parse_args()
+
+ content = args.filepath.read_text()
+ stream = io.StringIO(content)
+
+ reader = csv.reader(stream)
+ next(reader) # skip the header row
+ aliases = dict()
+ enum_symbols = list()
+ symbol = None
+ for row in reader:
+ mime_name, name, alias = row
+
+ if mime_name:
+ # Take the MIME name as new symbol.
+ symbol = _cleanse_symbol(mime_name)
+ aliases[symbol] = set([mime_name, name])
+ enum_symbols.append((symbol, mime_name, name))
+ if alias:
+ aliases[symbol].add(alias)
+ elif name:
+ # Take the name as new symbol.
+ symbol = _cleanse_symbol(name)
+ aliases[symbol] = set([name,])
+ enum_symbols.append((symbol, name))
+ if alias:
+ aliases[symbol].add(alias)
+ else:
+ # the row only contains an alias for the current symbol.
+ if not alias:
+ raise RuntimeError("alias must be present.")
+ aliases[symbol].add(alias)
+
+ _generate_enum(enum_symbols, args.enum_out)
+ _generate_map_entries(aliases, args.map_out)
+
+
+if __name__ == "__main__":
+ main()
+
diff --git a/misc/file-processor-modules/preview.py b/misc/file-processor-modules/preview.py
new file mode 100644
index 0000000..244d8ce
--- /dev/null
+++ b/misc/file-processor-modules/preview.py
@@ -0,0 +1,33 @@
+########################################################################
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+########################################################################
+
+import orcus
+from orcus.tools.file_processor import config
+
+
+def process_document(filepath, doc):
+ buf = list()
+ for sh in doc.sheets:
+ try:
+ buf.append(f"sheet: {sh.name}")
+ for i, row in enumerate(sh.get_rows()):
+ if i > 9:
+ # Only display the first 10 rows.
+ buf.append("...")
+ break
+
+ row_s = list()
+ for cell in row:
+ v = cell.value if cell.value else ""
+ row_s.append(str(v))
+ row_s = ",".join(row_s)
+ buf.append(f"row {i}: {row_s}")
+ except Exception as e:
+ buf.append(f"???: (exception: {e})")
+
+ return buf
diff --git a/misc/notes/column-width-and-row-height.txt b/misc/notes/column-width-and-row-height.txt
new file mode 100644
index 0000000..336d9c9
--- /dev/null
+++ b/misc/notes/column-width-and-row-height.txt
@@ -0,0 +1,69 @@
+== XLSX ==
+
+Spec name: Office Open XML Part 4 - Markup Language Reference.pdf
+
+=== Column Width ===
+
+<cols>
+ <col min="1" max="2" width="13.140625" customWidth="1"/>
+ <col min="4" max="10" width="6.7109375" customWidth="1"/>
+</cols>
+
+Spec location: 3.3.1.12 col (Column Width & Formatting) (page 1946)
+
+ Column width measured as the number of characters of the maximum
+ digit width of the numbers 0, 1, 2, ..., 9 as rendered in the normal
+ style's font. There are 4 pixels of margin padding (two on each
+ side), plus 1 pixel padding for the gridlines.
+
+ width = Truncate([{Number of Characters} * {Maximum Digit Width} +
+ {5 pixel padding}]/{Maximum Digit Width}*256)/256
+
+ Using the Calibri font as an example, the maximum digit width of 11
+ point font size is 7 pixels (at 96 dpi). In fact, each digit is the
+ same width for this font. Therefore if the cell width is 8
+ characters wide, the value of this attribute shall be
+ Truncate([8*7+5]/7*256)/256 = 8.7109375.
+
+=== Row Height ===
+
+<row r="10" spans="1:10" ht="40.5" customHeight="1">
+ <c r="D10">
+ <v>11.1</v>
+ </c>
+</row>
+
+Spec location: 3.3.1.71 row (Row) (page 2012)
+
+ Row height measured in point size. There is no margin padding on
+ row height.
+
+== ODS ==
+
+=== Column Width & Row Height ===
+
+<style:style style:name="co1" style:family="table-column">
+ <style:table-column-properties fo:break-before="auto" style:column-width="0.8925in"/>
+</style:style>
+<style:style style:name="ro1" style:family="table-row">
+ <style:table-row-properties style:row-height="0.178in" fo:break-before="auto" style:use-optimal-row-height="true"/>
+</style:style>
+
+<table:table table:name="Test1" table:style-name="ta1" table:print="false">
+ <table:table-column table:style-name="co2" table:default-cell-style-name="Default"/>
+ <table:table-column table:style-name="co1" table:default-cell-style-name="Default"/>
+ <table:table-column table:style-name="co4" table:number-columns-repeated="2" table:default-cell-style-name="Default"/>
+ <table:table-row table:style-name="ro2">
+ <table:table-cell office:value-type="string">
+ <text:p>Name</text:p>
+ </table:table-cell>
+ <table:table-cell office:value-type="string">
+ <text:p>Value</text:p>
+ </table:table-cell>
+ <table:table-cell table:number-columns-repeated="6"/>
+ </table:table-row>
+</table:table>
+
+Spec doesn't say much, but it appears that the order of these
+<table:table-column> elements is significant & determines which column
+gets which automatic style, from left to right.
diff --git a/misc/notes/zlib-in-memory-gzip.txt b/misc/notes/zlib-in-memory-gzip.txt
new file mode 100644
index 0000000..acba449
--- /dev/null
+++ b/misc/notes/zlib-in-memory-gzip.txt
@@ -0,0 +1,8 @@
+Some notes on future implementation of in-memory compression and decompression
+of gzip format.
+
+http://www.experts-exchange.com/Programming/System/Windows__Programming/A_3189-In-Memory-Compression-and-Decompression-Using-ZLIB.html
+
+http://www.gzip.org/zlib/zlib_faq.html#faq20
+
+http://stackoverflow.com/questions/16682719/uncompress-data-in-memory-using-boost-gzip-decompressor
diff --git a/misc/xml-tokens/dump-xsd-keys.py b/misc/xml-tokens/dump-xsd-keys.py
new file mode 100755
index 0000000..c1e5012
--- /dev/null
+++ b/misc/xml-tokens/dump-xsd-keys.py
@@ -0,0 +1,145 @@
+#!/usr/bin/env python
+########################################################################
+#
+# Copyright (c) 2013 Kohei Yoshida
+#
+# Permission is hereby granted, free of charge, to any person
+# obtaining a copy of this software and associated documentation
+# files (the "Software"), to deal in the Software without
+# restriction, including without limitation the rights to use,
+# copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+########################################################################
+
+import xml.parsers.expat, argparse, sys
+import token_util
+
+class XMLParser:
+
+ def __init__ (self, strm):
+ self.__strm = strm
+ self.__elem = None
+ self.tokens = []
+ self.ns_tokens = []
+
+ def start_element(self, name, attrs):
+ self.__elem = name
+ if name in ['xs:element', 'xs:attribute', 'xsd:element', 'xsd:attribute'] and attrs.has_key('name'):
+ token = attrs['name']
+ if len(token) > 0:
+ self.tokens.append(token)
+
+ if name.endswith(':schema'):
+ # Check for namespace entries.
+ attr_names = attrs.keys()
+ for attr_name in attr_names:
+ if attr_name == 'xmlns':
+ self.ns_tokens.append(['', attrs[attr_name]])
+ elif attr_name.startswith('xmlns:'):
+ vals = attr_name.split(':')
+ self.ns_tokens.append([vals[1], attrs[attr_name]])
+
+ def end_element(self, name):
+ pass
+
+ def character(self, data):
+ pass
+
+ def parse (self):
+ p = xml.parsers.expat.ParserCreate()
+ p.StartElementHandler = self.start_element
+ p.EndElementHandler = self.end_element
+ p.CharacterDataHandler = self.character
+ p.Parse(self.__strm, 1)
+
+
+def parse_files(filenames):
+
+ tokens = {}
+ for filename in filenames:
+ file = open(filename, 'r')
+ chars = file.read()
+ file.close()
+
+ parser = XMLParser(chars)
+ parser.parse()
+ for token in parser.tokens:
+ tokens[token] = True
+
+ keys = tokens.keys()
+ keys.sort()
+
+ return keys
+
+
+def parse_files_ns(filenames):
+
+ tokens = {}
+ for filename in filenames:
+ file = open(filename, 'r')
+ chars = file.read()
+ file.close()
+
+ parser = XMLParser(chars)
+ parser.parse()
+ for ns_token in parser.ns_tokens:
+ alias, value = ns_token # each namespace token consists of an alias and a value.
+ if not tokens.has_key(value):
+ tokens[value] = []
+ if len(alias) > 0 and not alias in tokens[value]:
+ tokens[value].append(alias)
+
+ keys = tokens.keys()
+ keys.sort()
+ ret_val = []
+ for key in keys:
+ aliases = tokens[key]
+ t = key + " ("
+ first = True
+ for alias in aliases:
+ if first:
+ first = False
+ else:
+ t += ", "
+ t += alias
+ t += ")"
+ ret_val.append(t)
+
+ return ret_val
+
+
+desc = "Given an arbitrary XML Schema file (.xsd), dump all its keys specified in the schema to stdout."
+
+def main ():
+ parser = argparse.ArgumentParser(description=desc)
+ parser.add_argument('file', nargs='*', help='XML Shema file (.xsd)')
+ parser.add_argument('--ns', dest='ns_mode', action='store_true', default=False)
+ args = parser.parse_args(sys.argv[1:])
+ ns_mode = args.ns_mode
+
+ if ns_mode:
+ keys = parse_files_ns(args.file)
+ else:
+ keys = parse_files(args.file)
+
+ for key in keys:
+ print(key)
+
+if __name__ == '__main__':
+ main()
+
diff --git a/misc/xml-tokens/gen-gnumeric-tokens.py b/misc/xml-tokens/gen-gnumeric-tokens.py
new file mode 100755
index 0000000..6212a83
--- /dev/null
+++ b/misc/xml-tokens/gen-gnumeric-tokens.py
@@ -0,0 +1,81 @@
+#!/usr/bin/env python
+#************************************************************************
+#
+# Copyright (c) 2010-2012 Kohei Yoshida
+#
+# Permission is hereby granted, free of charge, to any person
+# obtaining a copy of this software and associated documentation
+# files (the "Software"), to deal in the Software without
+# restriction, including without limitation the rights to use,
+# copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+#***********************************************************************
+
+import xml.parsers.expat, sys
+import token_util
+
+class XMLParser:
+
+ def __init__ (self, strm):
+ self.__strm = strm
+ self.__elem = None
+ self.tokens = []
+
+ def start_element(self, name, attrs):
+ self.__elem = name
+ if name in ['xs:element', 'xs:attribute', 'xsd:element', 'xsd:attribute'] and attrs.has_key('name'):
+ token = attrs['name']
+ if len(token) > 0:
+ self.tokens.append(token)
+
+ def end_element(self, name):
+ pass
+
+ def character(self, data):
+ pass
+
+ def parse (self):
+ p = xml.parsers.expat.ParserCreate()
+ p.StartElementHandler = self.start_element
+ p.EndElementHandler = self.end_element
+ p.CharacterDataHandler = self.character
+ p.Parse(self.__strm, 1)
+
+
+def parse_file(filename):
+ file = open(filename, 'r')
+ chars = file.read()
+ file.close()
+
+ parser = XMLParser(chars)
+ parser.parse()
+ tokens = {}
+ for token in parser.tokens:
+ tokens[token] = True
+ keys = tokens.keys()
+ keys.sort()
+ return keys
+
+
+def main ():
+ tokens = parse_file(sys.argv[1])
+ token_util.gen_token_constants(sys.argv[2], tokens)
+ token_util.gen_token_names(sys.argv[3], tokens)
+
+if __name__ == '__main__':
+ main()
diff --git a/misc/xml-tokens/gen-odf-tokens.py b/misc/xml-tokens/gen-odf-tokens.py
new file mode 100755
index 0000000..2838604
--- /dev/null
+++ b/misc/xml-tokens/gen-odf-tokens.py
@@ -0,0 +1,192 @@
+#!/usr/bin/env python
+########################################################################
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+########################################################################
+
+import xml.parsers.expat
+import sys
+import argparse
+from pathlib import Path
+
+import token_util
+
+
+NS_RNG = "http://relaxng.org/ns/structure/1.0"
+
+
+class TokenParser:
+
+ def __init__ (self, strm):
+ self.__strm = strm
+ self.__elem = None
+ self.tokens = set()
+
+ def start_element(self, name, attrs):
+ self.__elem = name
+ if name in {f"{NS_RNG}:element", f"{NS_RNG}:attribute"} and "name" in attrs:
+ tokens = attrs['name'].split(':')
+ n = len(tokens)
+ if n != 2:
+ sys.stderr.write("unrecognized token type: "+attrs['name'])
+ sys.exit(1)
+
+ self.tokens.add(tokens[1])
+
+ for token in tokens:
+ self.tokens.add(token)
+
+ def character(self, data):
+ if self.__elem == f"{NS_RNG}:value":
+ s = data.strip()
+ if len(s) > 0:
+ self.tokens.add(s)
+
+ def parse(self):
+ p = xml.parsers.expat.ParserCreate(encoding="utf-8", namespace_separator=":")
+ p.StartElementHandler = self.start_element
+ p.CharacterDataHandler = self.character
+ p.Parse(self.__strm, 1)
+
+ self.tokens = sorted(self.tokens)
+
+
+class NSParser:
+
+ def __init__ (self, strm):
+ self.__strm = strm
+ self.__elem = None
+ self.ns_values = dict() # namespace values
+
+ def start_element(self, name, attrs):
+ self.__elem = name
+ if name.endswith("grammar"):
+ names = attrs.keys()
+ for name in names:
+ tokens = name.split(':')
+ if len(tokens) < 2 or tokens[0] != "xmlns":
+ continue
+
+ val = attrs[name]
+ self.ns_values[tokens[1]] = val
+
+ def parse(self):
+ p = xml.parsers.expat.ParserCreate(encoding="utf-8")
+ p.StartElementHandler = self.start_element
+ p.Parse(self.__strm, 1)
+
+ ns_values = list()
+ for k, v in self.ns_values.items():
+ if v == "http://relaxng.org/ns/structure/1.0":
+ # skip the relaxNG namespace since it is only used in the schema document.
+ continue
+ ns_values.append((k, v))
+
+ self.ns_values = sorted(ns_values, key=lambda x: x[0])
+
+
+def gen_namespace_tokens(filepath, ns_values):
+
+ # header (.hpp)
+ filepath_hpp = filepath + "_hpp.inl"
+ outfile = open(filepath_hpp, 'w')
+ outfile.write("namespace orcus {\n\n")
+ for key, _ in ns_values:
+ outfile.write("extern const xmlns_id_t NS_odf_")
+ outfile.write(key)
+ outfile.write(";\n")
+ outfile.write("\nextern const xmlns_id_t* NS_odf_all;\n")
+ outfile.write("\n}\n\n")
+ outfile.close()
+
+ # source (.cpp)
+ filepath_cpp = filepath + "_cpp.inl"
+ outfile = open(filepath_cpp, 'w')
+ outfile.write("namespace orcus {\n\n")
+ for key, value in ns_values:
+ outfile.write("const xmlns_id_t NS_odf_")
+ outfile.write(key)
+ outfile.write(" = \"")
+ outfile.write(value)
+ outfile.write("\"")
+ outfile.write(";\n")
+
+ outfile.write("\n")
+ outfile.write("namespace {\n\n")
+ outfile.write("const xmlns_id_t odf_ns[] = {\n")
+ for key, _ in ns_values:
+ outfile.write(" NS_odf_")
+ outfile.write(key)
+ outfile.write(",\n")
+ outfile.write(" nullptr\n")
+ outfile.write("};\n\n")
+ outfile.write("} // anonymous\n\n")
+
+ outfile.write("const xmlns_id_t* NS_odf_all = odf_ns;\n\n")
+
+ outfile.write("}\n\n")
+ outfile.close()
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "--ns-file-prefix", type=str,
+ help="file name prefix for optioal namespace constant files")
+ parser.add_argument(
+ "--summary-output", type=Path,
+ help="optional output file to write collected token data summary")
+ parser.add_argument(
+ "--token-constants", type=Path,
+ help="path to C++ output file where token consants are to be written to")
+ parser.add_argument(
+ "--token-names", type=Path,
+ help="path to C++ output file where token names are to be written to")
+ parser.add_argument(
+ "odf_schema", metavar="ODF-SCHEMA", type=Path, help="path to RNG ODF schema file")
+ args = parser.parse_args()
+
+ if not args.odf_schema.is_file():
+ print(f"{args.odf_schema} is not a valid file.", file=sys.stderr)
+ sys.exit(1)
+
+ schema_content = args.odf_schema.read_text()
+ parser = TokenParser(schema_content)
+ parser.parse()
+ tokens = parser.tokens
+
+ parser = NSParser(schema_content)
+ parser.parse()
+ ns_values = parser.ns_values
+
+ if args.summary_output:
+ summary_content_buf = list()
+ summary_content_buf.append("list of tokens:")
+
+ for token in tokens:
+ summary_content_buf.append(f"- \"{token}\"")
+
+ summary_content_buf.append("list of namespaces:")
+
+ for ns, value in ns_values:
+ summary_content_buf.append(f"- {ns}: \"{value}\"")
+
+ args.summary_output.write_text("\n".join(summary_content_buf))
+
+ if args.token_constants:
+ with open(args.token_constants, "w") as f:
+ token_util.gen_token_constants(f, tokens)
+
+ if args.token_names:
+ with open(args.token_names, "w") as f:
+ token_util.gen_token_names(f, tokens)
+
+ if args.ns_file_prefix is not None:
+ gen_namespace_tokens(args.ns_file_prefix, ns_values)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/misc/xml-tokens/gen-ooxml-tokens.py b/misc/xml-tokens/gen-ooxml-tokens.py
new file mode 100755
index 0000000..cee4d6f
--- /dev/null
+++ b/misc/xml-tokens/gen-ooxml-tokens.py
@@ -0,0 +1,84 @@
+#!/usr/bin/env python3
+########################################################################
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+########################################################################
+
+import xml.parsers.expat
+import zipfile
+import argparse
+import sys
+import token_util
+
+
+class XMLParser:
+
+ def __init__ (self, strm):
+ self.__strm = strm
+ self.__elem = None
+ self.tokens = []
+
+ def start_element(self, name, attrs):
+ self.__elem = name
+ if name in ['xs:element', 'xs:attribute', 'xsd:element', 'xsd:attribute'] and "name" in attrs:
+ token = attrs['name']
+ if len(token) > 0:
+ self.tokens.append(token)
+
+ def end_element(self, name):
+ pass
+
+ def character(self, data):
+ pass
+
+ def parse (self):
+ p = xml.parsers.expat.ParserCreate()
+ p.StartElementHandler = self.start_element
+ p.EndElementHandler = self.end_element
+ p.CharacterDataHandler = self.character
+ p.Parse(self.__strm, 1)
+
+
+def get_all_tokens_from_zip(fpath):
+ with zipfile.ZipFile(fpath, 'r') as zip:
+ tokens = set()
+ for item in zip.namelist():
+ fd = zip.open(item, 'r')
+ parser = XMLParser(fd.read())
+ fd.close()
+ parser.parse()
+ tokens.update(parser.tokens)
+
+ return tokens
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "-i", "--input", required=True, type=str,
+ help="Zip file containing schemas.")
+ parser.add_argument("--extra-input", type=argparse.FileType("r"), help="Optional input file containing extra token names.")
+ parser.add_argument(
+ "constant_file", metavar="CONSTANT-FILE", nargs=1, type=argparse.FileType("w"),
+ help="Output file to store constant values.")
+ parser.add_argument(
+ "name_file", metavar="NAME-FILE", nargs=1, type=argparse.FileType("w"),
+ help="Output file to store constant string names.")
+ args = parser.parse_args()
+
+ tokens = get_all_tokens_from_zip(args.input)
+
+ if args.extra_input:
+ extra_tokens = [x.strip() for x in args.extra_input.readlines()]
+ tokens.update(extra_tokens)
+
+ tokens = sorted(list(tokens))
+ token_util.gen_token_constants(args.constant_file[0], tokens)
+ token_util.gen_token_names(args.name_file[0], tokens)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/misc/xml-tokens/gen-tokens.py b/misc/xml-tokens/gen-tokens.py
new file mode 100755
index 0000000..f344e2e
--- /dev/null
+++ b/misc/xml-tokens/gen-tokens.py
@@ -0,0 +1,47 @@
+#!/usr/bin/env python3
+########################################################################
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+########################################################################
+
+import token_util
+import argparse
+import sys
+from pathlib import Path
+
+
+desc = """Generate C++ source files from a list of tokens.
+
+To generate tokens files for Excel 2003 XML (xls-xml), run
+
+ %(prog)s xls-xml-tokens.txt \\
+ ../../src/liborcus/xls_xml_token_constants.inl \\
+ ../../src/liborcus/xls_xml_tokens.inl \\
+"""
+
+def main ():
+ parser = argparse.ArgumentParser(
+ description=desc,
+ formatter_class=argparse.RawTextHelpFormatter
+ )
+ parser.add_argument("tokenlist", type=Path, help="plain-text file that contains a list of tokens.")
+ parser.add_argument("output1", type=Path, help="output file that will contain XML token values.")
+ parser.add_argument("output2", type=Path, help="output file that will contain XML token names.")
+ args = parser.parse_args()
+
+ tokens = {}
+ with open(args.tokenlist, "r") as f:
+ for line in f.readlines():
+ token = line.strip()
+ tokens[token] = True
+
+ tokens = sorted(tokens.keys())
+ token_util.gen_token_constants(args.output1, tokens)
+ token_util.gen_token_names(args.output2, tokens)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/misc/xml-tokens/gnumeric.xsd b/misc/xml-tokens/gnumeric.xsd
new file mode 100755
index 0000000..a2c79cf
--- /dev/null
+++ b/misc/xml-tokens/gnumeric.xsd
@@ -0,0 +1,1296 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
+ xmlns:gnm="http://www.gnumeric.org/v10.dtd"
+ xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0"
+ targetNamespace="http://www.gnumeric.org/v10.dtd"
+ elementFormDefault="qualified"
+ attributeFormDefault="unqualified">
+
+ <xs:annotation>
+ <xs:documentation xml:lang="en">
+ Schema for gnumeric spreadsheet documents
+ Created for Gnumeric 1.2.2,
+ Based on gnumeric source code and sample gnumeric documents
+ Author: Marc Johnson (marc_johnson27591@hotmail.com)
+
+ updated for 1.4.2 in Feb 2005
+ updated for 1.6.0 in Sept 2005
+ updated for 1.7.1 in Jun 2006
+ updated for 1.7.11 in Jun 2007 jody@gnome.org
+ updated for 1.7.91 in Nov 2007 jody@gnome.org
+ partially updated for 1.10.17 in July 2011 aguelzow@pyrshep.ca
+ </xs:documentation>
+ </xs:annotation>
+
+ <xs:element name="Workbook" type="gnm:Workbook"/>
+
+ <xs:complexType name="Version">
+ <xs:attribute name="Epoch" type="xs:nonNegativeInteger" use="required"/>
+ <xs:attribute name="Major" type="xs:nonNegativeInteger" use="required"/>
+ <xs:attribute name="Minor" type="xs:nonNegativeInteger" use="required"/>
+ <xs:attribute name="Full" type="xs:string" use="required"/>
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:complexType>
+
+ <xs:complexType name="Calculation">
+ <xs:attribute name="ManualRecalc" type="xs:boolean"/>
+ <xs:attribute name="EnableIteration" type="xs:boolean"/>
+ <xs:attribute name="MaxIterations" type="xs:nonNegativeInteger"/>
+ <xs:attribute name="FloatRadix" type="xs:positiveInteger"/>
+ <xs:attribute name="FloatDigits" type="xs:nonNegativeInteger"/>
+ <xs:attribute name="IterationTolerance" type="xs:double"/>
+ <xs:attribute name="DateConvention" type="gnm:DateConvention" use="optional" default="Lotus:1900"/>
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:complexType>
+
+ <xs:group name="WorkbookContentBlockA">
+ <xs:sequence>
+ <!-- ancient files lack this, modern files require it -->
+ <xs:element name="SheetNameIndex" type="gnm:SheetNameIndex" minOccurs="0" maxOccurs="1"/>
+
+ <xs:element name="Names" type="gnm:Names" minOccurs="0" maxOccurs="1"/>
+
+ <!-- Deprecated, moved to Calculation and expanded in 1.7.11
+ Valid value == 1904, anything else == Lotus:1900 -->
+ <xs:element name="DateConvention" type="xs:int" minOccurs="0" maxOccurs="1"/>
+
+ <!-- preferred height and width -->
+ <xs:element name="Geometry">
+ <xs:complexType>
+ <xs:attribute name="Width" type="xs:nonNegativeInteger" use="optional"/>
+ <xs:attribute name="Height" type="xs:nonNegativeInteger" use="optional"/>
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:complexType>
+ </xs:element>
+ <xs:element name="Sheets" type="gnm:Sheets"/>
+ <!-- which sheet was selected when the spreadsheet was saved? -->
+ <xs:element name="UIData">
+ <xs:complexType>
+ <xs:attribute name="SelectedTab" type="xs:nonNegativeInteger" use="required"/>
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:complexType>
+ </xs:element>
+ </xs:sequence>
+ </xs:group>
+
+ <xs:complexType name="Workbook">
+ <xs:sequence>
+ <xs:element name="Version" type="gnm:Version" minOccurs="0" maxOccurs="1"/>
+ <xs:element name="Attributes" type="gnm:Attributes"/>
+
+ <!-- ignore office:document-meta -->
+ <xs:any minOccurs="0" maxOccurs="unbounded"
+ namespace="##other" processContents="lax"/>
+
+ <xs:choice>
+ <xs:sequence>
+ <!-- In new files the Calculation element comes first, in old files last -->
+ <xs:element name="Calculation" type="gnm:Calculation" minOccurs="0" maxOccurs="1"/>
+ <xs:group ref="gnm:WorkbookContentBlockA"/>
+ </xs:sequence>
+ <xs:sequence>
+ <xs:group ref="gnm:WorkbookContentBlockA"/>
+ <xs:element name="Calculation" type="gnm:Calculation" minOccurs="0" maxOccurs="1"/>
+ </xs:sequence>
+ </xs:choice>
+ </xs:sequence>
+ <!-- version number should be v10 - - &gt;
+ <xs:attribute name="gnm" type="gnm:namespace" use="required"/> -->
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:complexType>
+
+ <!-- How values are mapped to dates. All date functions are affected -->
+ <xs:simpleType name="DateConvention">
+ <xs:restriction base="xs:string">
+ <xs:enumeration value="Lotus:1900"/>
+ <xs:enumeration value="Apple:1904"/>
+
+ <!-- future values. currently treated as Lotus:1900 -->
+ <xs:enumeration value="ODF:1899"/>
+ </xs:restriction>
+ </xs:simpleType>
+
+ <xs:simpleType name="CellRef">
+ <xs:restriction base="xs:string">
+ <xs:pattern value="[A-Z]+\d+"/>
+ </xs:restriction>
+ </xs:simpleType>
+
+ <xs:complexType name="Names">
+ <xs:sequence>
+ <xs:element name="Name" minOccurs="0" maxOccurs="unbounded">
+ <xs:complexType>
+ <xs:sequence>
+ <!-- an oversight in the sax exporter (fixed in 1.4.3)
+ leaves off the namespace. Handle both formats.
+ -->
+ <xs:choice>
+ <xs:element name="name" type="xs:string"/>
+ <xs:element name="name" type="xs:string" form="unqualified"/>
+ </xs:choice>
+ <xs:choice>
+ <xs:element name="value" type="xs:string"/>
+ <xs:element name="value" type="xs:string" form="unqualified"/>
+ </xs:choice>
+ <xs:choice>
+ <xs:element name="position" type="xs:string"/>
+ <xs:element name="position" type="xs:string" form="unqualified"/>
+ </xs:choice>
+ </xs:sequence>
+ </xs:complexType>
+ </xs:element>
+ </xs:sequence>
+ </xs:complexType>
+
+ <xs:complexType name="Attributes">
+ <xs:sequence>
+ <xs:element name="Attribute" minOccurs="0" maxOccurs="unbounded">
+ <xs:complexType>
+ <xs:sequence>
+ <xs:element name="type" minOccurs="0" maxOccurs="1" type="gnm:AttributeType"/>
+ <!-- must be 4 -->
+ <xs:element name="name" minOccurs="1" maxOccurs="1" type="xs:string"/>
+ <xs:element name="value" minOccurs="1" maxOccurs="1" type="xs:string"/>
+ </xs:sequence>
+ </xs:complexType>
+ </xs:element>
+ </xs:sequence>
+ </xs:complexType>
+ <xs:simpleType name="AttributeType">
+ <xs:restriction base="xs:integer">
+ <xs:minInclusive value="4"/>
+ <xs:maxInclusive value="4"/>
+ </xs:restriction>
+ </xs:simpleType>
+
+ <xs:complexType name="SheetNameIndex">
+ <xs:sequence>
+ <xs:element name="SheetName" minOccurs="0" maxOccurs="unbounded">
+ <xs:complexType>
+ <xs:simpleContent>
+ <xs:extension base="xs:string">
+ <xs:attribute name="Cols" type="xs:positiveInteger" form="qualified"/>
+ <xs:attribute name="Rows" type="xs:positiveInteger" form="qualified"/>
+ </xs:extension>
+ </xs:simpleContent>
+ </xs:complexType>
+ </xs:element>
+ </xs:sequence>
+ </xs:complexType>
+
+ <xs:complexType name="Sheets">
+ <xs:sequence>
+ <xs:element name="Sheet" minOccurs="0" maxOccurs="unbounded">
+ <xs:complexType>
+ <xs:sequence>
+ <!-- name of the sheet -->
+ <xs:element name="Name" type="xs:string"/>
+ <!-- maximum column used -->
+ <xs:element name="MaxCol" type="xs:integer" minOccurs="0" maxOccurs="1"/>
+ <!-- maximum row used -->
+ <xs:element name="MaxRow" type="xs:integer" minOccurs="0" maxOccurs="1"/>
+ <!-- most recently used zoom factor -->
+ <xs:element name="Zoom" type="xs:double"/>
+ <xs:element name="Names" type="gnm:Names" minOccurs="0" maxOccurs="1"/>
+ <xs:element name="PrintInformation" type="gnm:PrintInformation"/>
+ <xs:element name="Styles" type="gnm:Styles"/>
+ <xs:element name="Cols">
+ <xs:complexType>
+ <xs:sequence>
+ <xs:element name="ColInfo" type="gnm:Col_Row" minOccurs="0" maxOccurs="unbounded"/>
+ </xs:sequence>
+ <xs:attribute name="DefaultSizePts" type="xs:double" use="required"/>
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:complexType>
+ </xs:element>
+ <xs:element name="Rows">
+ <xs:complexType>
+ <xs:sequence>
+ <xs:element name="RowInfo" type="gnm:Col_Row" minOccurs="0" maxOccurs="unbounded"/>
+ </xs:sequence>
+ <xs:attribute name="DefaultSizePts" type="xs:double" use="required"/>
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:complexType>
+ </xs:element>
+ <xs:element name="Selections" type="gnm:Selections"/>
+ <xs:element name="Objects" type="gnm:Objects" minOccurs="0" maxOccurs="1"/>
+ <xs:element name="Cells" type="gnm:Cells"/>
+ <xs:element name="MergedRegions" type="gnm:MergedRegions" minOccurs="0" maxOccurs="1"/>
+ <xs:element name="SheetLayout" type="gnm:SheetLayout" minOccurs="0" maxOccurs="1"/>
+ <xs:element name="Filters" type="gnm:Filters" minOccurs="0" maxOccurs="1"/>
+ <xs:element name="Solver" type="gnm:Solver" minOccurs="0" maxOccurs="1"/>
+ <xs:element name="Scenarios" type="gnm:Scenarios" minOccurs="0" maxOccurs="1"/>
+ </xs:sequence>
+ <!-- note: xs:boolean values can be true, false, 1, 0;
+ gnumeric, in this element, always generates 'true' or
+ 'false' for its attributes
+ -->
+ <xs:attribute name="DisplayFormulas" type="xs:boolean" use="optional" default="false"/>
+ <xs:attribute name="HideZero" type="xs:boolean" use="optional" default="false"/>
+ <xs:attribute name="HideGrid" type="xs:boolean" use="optional" default="false"/>
+ <xs:attribute name="HideColHeader" type="xs:boolean" use="optional" default="false"/>
+ <xs:attribute name="HideRowHeader" type="xs:boolean" use="optional" default="false"/>
+ <xs:attribute name="DisplayOutlines" type="xs:boolean" use="optional" default="true"/>
+ <xs:attribute name="OutlineSymbolsBelow" type="xs:boolean" use="optional" default="true"/>
+ <xs:attribute name="OutlineSymbolsRight" type="xs:boolean" use="optional" default="true"/>
+ <xs:attribute name="Visibility" type="gnm:SheetVisibility" use="optional" default="GNM_SHEET_VISIBILITY_VISIBLE"/>
+ <xs:attribute name="RTL_Layout" type="xs:boolean" use="optional" default="false"/>
+ <xs:attribute name="Protected" type="xs:boolean" use="optional" default="false"/>
+ <xs:attribute name="TabColor" type="gnm:color" use="optional"/>
+ <xs:attribute name="TabTextColor" type="gnm:color" use="optional"/>
+ <xs:attribute name="GridColor" type="gnm:color" use="optional"/>
+ <xs:attribute name="ExprConvention" type="gnm:ExprConvention" use="optional" default="gnumeric:A1"/>
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:complexType>
+ </xs:element>
+ </xs:sequence>
+ </xs:complexType>
+
+ <!-- How expressions are _displayed_ no impact on evaluation -->
+ <xs:simpleType name="ExprConvention">
+ <xs:restriction base="xs:string">
+ <xs:enumeration value="gnumeric:A1"/>
+ <xs:enumeration value="gnumeric:R1C1"/>
+
+ <!-- future values, currently displayed as gnumeric:A1 -->
+ <xs:enumeration value="ODF:A1"/>
+ <xs:enumeration value="Lotus:A1"/>
+ </xs:restriction>
+ </xs:simpleType>
+
+ <xs:complexType name="PrintInformation">
+ <xs:choice minOccurs="0" maxOccurs="unbounded">
+ <xs:element name="Margins">
+ <xs:complexType>
+ <xs:sequence>
+ <xs:element name="top" type="gnm:margin"/>
+ <xs:element name="bottom" type="gnm:margin"/>
+ <xs:element name="left" type="gnm:margin" minOccurs="0" maxOccurs="unbounded"/>
+ <xs:element name="right" type="gnm:margin" minOccurs="0" maxOccurs="unbounded"/>
+ <xs:element name="header" type="gnm:margin" minOccurs="0" maxOccurs="unbounded"/>
+ <xs:element name="footer" type="gnm:margin" minOccurs="0" maxOccurs="unbounded"/>
+ </xs:sequence>
+ </xs:complexType>
+ </xs:element>
+ <xs:element name="Scale">
+ <xs:complexType>
+ <xs:attribute name="type" type="xs:string" use="required"/>
+ <xs:attribute name="percentage" type="xs:double" use="optional"/>
+ <xs:attribute name="cols" type="xs:integer" />
+ <xs:attribute name="rows" type="xs:integer" />
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:complexType>
+ </xs:element>
+ <!-- center vertically -->
+ <xs:element name="vcenter">
+ <xs:complexType>
+ <xs:attribute name="value" type="xs:boolean" use="required"/>
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:complexType>
+ </xs:element>
+ <!-- center horizontally -->
+ <xs:element name="hcenter">
+ <xs:complexType>
+ <xs:attribute name="value" type="xs:boolean" use="required"/>
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:complexType>
+ </xs:element>
+ <!-- print grid lines -->
+ <xs:element name="grid">
+ <xs:complexType>
+ <xs:attribute name="value" type="xs:boolean" use="required"/>
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:complexType>
+ </xs:element>
+ <!-- print even if only styles -->
+ <xs:element name="even_if_only_styles">
+ <xs:complexType>
+ <xs:attribute name="value" type="xs:boolean" use="required"/>
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:complexType>
+ </xs:element>
+ <!-- print in monochrome -->
+ <xs:element name="monochrome">
+ <xs:complexType>
+ <xs:attribute name="value" type="xs:boolean" use="required"/>
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:complexType>
+ </xs:element>
+ <!-- print in draft mode -->
+ <xs:element name="draft">
+ <xs:complexType>
+ <xs:attribute name="value" type="xs:boolean" use="required"/>
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:complexType>
+ </xs:element>
+ <!-- print titles -->
+ <xs:element name="titles">
+ <xs:complexType>
+ <xs:attribute name="value" type="xs:boolean" use="required"/>
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:complexType>
+ </xs:element>
+
+ <!-- repeat range -->
+ <xs:element name="repeat_top" minOccurs="0" maxOccurs="1">
+ <xs:complexType>
+ <xs:attribute name="value" type="xs:string" use="required"/>
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:complexType>
+ </xs:element>
+ <!-- repeat range -->
+ <xs:element name="repeat_left" minOccurs="0" maxOccurs="1">
+ <xs:complexType>
+ <xs:attribute name="value" type="xs:string" use="required"/>
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:complexType>
+ </xs:element>
+ <xs:element name="order" type="gnm:print_ordering"/>
+ <xs:element name="orientation" type="gnm:print_orientation"/>
+ <xs:element name="Header" type="gnm:HeaderFooter"/>
+ <xs:element name="Footer" type="gnm:HeaderFooter"/>
+ <!-- typical values are A4, US-Letter -->
+ <xs:element name="paper" type="xs:string" minOccurs="0" maxOccurs="1"/>
+ <!-- do not print this sheet when "all" is selected in the job -->
+ <xs:element name="do_not_print" minOccurs="0" maxOccurs="1">
+ <xs:complexType>
+ <xs:attribute name="value" type="xs:boolean" use="required"/>
+ </xs:complexType>
+ </xs:element>
+ <xs:element name="print-to-uri" type="xs:string" minOccurs="0" maxOccurs="1"/>
+ <xs:element name="vPageBreaks" type="gnm:PageBreaks" minOccurs="0" maxOccurs="1"/>
+ <!-- between rows -->
+ <xs:element name="hPageBreaks" type="gnm:PageBreaks" minOccurs="0" maxOccurs="1"/>
+ <!-- between cols -->
+ <xs:element name="print_range" minOccurs="0" maxOccurs="1">
+ <xs:complexType>
+ <xs:attribute name="value" use="required">
+ <xs:simpleType>
+ <xs:restriction base="xs:integer">
+ <xs:enumeration value="-1"/>
+ <!-- PRINT_SAVED_INFO -->
+ <xs:enumeration value="0"/>
+ <!-- PRINT_ACTIVE_SHEET -->
+ <xs:enumeration value="1"/>
+ <!-- PRINT_ALL_SHEETS -->
+ <xs:enumeration value="2"/>
+ <!-- PRINT_ALL_SHEETS_INCLUDING_HIDDEN -->
+ <xs:enumeration value="3"/>
+ <!-- PRINT_SHEET_RANGE -->
+ <xs:enumeration value="4"/>
+ <!-- PRINT_SHEET_SELECTION -->
+ <xs:enumeration value="5"/>
+ <!-- PRINT_IGNORE_PRINTAREA -->
+ <xs:enumeration value="6"/>
+ <!-- PRINT_SHEET_SELECTION_IGNORE_PRINTAREA -->
+ </xs:restriction>
+ </xs:simpleType>
+ </xs:attribute>
+ </xs:complexType>
+ </xs:element>
+ </xs:choice>
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:complexType>
+
+ <xs:complexType name="HeaderFooter">
+ <xs:attribute name="Left" type="xs:string" use="optional"/>
+ <xs:attribute name="Middle" type="xs:string" use="optional"/>
+ <xs:attribute name="Right" type="xs:string" use="optional"/>
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:complexType>
+
+ <xs:complexType name="PageBreaks">
+ <xs:sequence>
+ <xs:element name="break" minOccurs="0" maxOccurs="unbounded">
+ <xs:complexType>
+ <xs:attribute name="pos" type="xs:nonNegativeInteger" use="required"/>
+ <xs:attribute name="type" type="gnm:PageBreakType" use="optional" default="auto"/>
+ </xs:complexType>
+ </xs:element>
+ </xs:sequence>
+ <xs:attribute name="count" type="xs:nonNegativeInteger" use="optional"/>
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:complexType>
+
+ <xs:simpleType name="PageBreakType">
+ <xs:restriction base="xs:string">
+ <xs:enumeration value="auto"/>
+ <xs:enumeration value="manual"/>
+ <xs:enumeration value="data-slice"/>
+ </xs:restriction>
+ </xs:simpleType>
+
+ <xs:complexType name="margin">
+ <xs:attribute name="Points" type="xs:double"/>
+ <xs:attribute name="PrefUnit" type="gnm:print_units"/>
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:complexType>
+
+ <xs:simpleType name="print_units">
+ <xs:restriction base="xs:string">
+ <xs:enumeration value="mm"/>
+ <xs:enumeration value="millimeter"/>
+ <xs:enumeration value="cm"/>
+ <xs:enumeration value="centimeter"/>
+ <xs:enumeration value="in"/>
+ <xs:enumeration value="inch"/>
+ <xs:enumeration value="inches"/>
+ <xs:enumeration value="Pt"/>
+ <xs:enumeration value="Pts"/>
+ <xs:enumeration value="points"/>
+ </xs:restriction>
+ </xs:simpleType>
+
+ <xs:simpleType name="print_ordering">
+ <xs:restriction base="xs:string">
+ <!-- right, then down -->
+ <xs:enumeration value="r_then_d"/>
+ <!-- down, then right -->
+ <xs:enumeration value="d_then_r"/>
+ </xs:restriction>
+ </xs:simpleType>
+
+ <xs:simpleType name="print_orientation">
+ <xs:restriction base="xs:string">
+ <xs:enumeration value="landscape"/>
+ <xs:enumeration value="portrait"/>
+ </xs:restriction>
+ </xs:simpleType>
+
+ <xs:complexType name="Styles">
+ <xs:sequence>
+ <xs:element name="StyleRegion" minOccurs="0" maxOccurs="unbounded">
+ <xs:complexType>
+ <xs:sequence>
+ <xs:element name="Style" type="gnm:Style" minOccurs="1" maxOccurs="1"/>
+ </xs:sequence>
+ <xs:attribute name="startCol" type="xs:nonNegativeInteger" use="required"/>
+ <xs:attribute name="startRow" type="xs:nonNegativeInteger" use="required"/>
+ <xs:attribute name="endCol" type="xs:nonNegativeInteger" use="required"/>
+ <xs:attribute name="endRow" type="xs:nonNegativeInteger" use="required"/>
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:complexType>
+ </xs:element>
+ </xs:sequence>
+ </xs:complexType>
+
+ <xs:complexType name="Style">
+ <xs:sequence>
+ <xs:element name="Font" minOccurs="0" maxOccurs="1">
+ <xs:complexType>
+ <xs:simpleContent>
+ <!-- Since these might be conditional styles, the atributes are
+ not required -->
+ <xs:extension base="xs:string">
+ <xs:attribute name="Unit" type="xs:double" use="optional"/>
+ <xs:attribute name="Bold" type="xs:boolean" use="optional"/>
+ <xs:attribute name="Italic" type="xs:boolean" use="optional"/>
+ <xs:attribute name="Underline" type="gnm:underline" use="optional"/>
+ <xs:attribute name="StrikeThrough" type="xs:boolean" use="optional"/>
+ <xs:attribute name="Script" type="gnm:script" use="optional"/>
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:extension>
+ </xs:simpleContent>
+ </xs:complexType>
+ </xs:element>
+ <xs:element name="HyperLink" minOccurs="0" maxOccurs="1">
+ <xs:complexType>
+ <xs:attribute name="type" type="xs:string" use="required"/>
+ <xs:attribute name="target" type="xs:string" use="optional"/>
+ <xs:attribute name="tip" type="xs:string" use="optional"/>
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:complexType>
+ </xs:element>
+ <xs:element name="StyleBorder" minOccurs="0" maxOccurs="1">
+ <xs:complexType>
+ <xs:choice minOccurs="0" maxOccurs="6">
+ <!-- The order of the last two elements appears to be
+ version dependent-->
+ <xs:element name="Top" type="gnm:StyleBorderElement" minOccurs="0" maxOccurs="1"/>
+ <xs:element name="Bottom" type="gnm:StyleBorderElement" minOccurs="0" maxOccurs="1"/>
+ <xs:element name="Left" type="gnm:StyleBorderElement" minOccurs="0" maxOccurs="1"/>
+ <xs:element name="Right" type="gnm:StyleBorderElement" minOccurs="0" maxOccurs="1"/>
+ <xs:element name="Diagonal" type="gnm:StyleBorderElement" minOccurs="0" maxOccurs="1"/>
+ <xs:element name="Rev-Diagonal" type="gnm:StyleBorderElement" minOccurs="0" maxOccurs="1"/>
+ </xs:choice>
+ </xs:complexType>
+ </xs:element>
+ <xs:element name="Validation" minOccurs="0" maxOccurs="1">
+ <xs:complexType>
+ <xs:sequence>
+ <xs:element name="Expression0" type="xs:string" minOccurs="0" maxOccurs="1"/>
+ <xs:element name="Expression1" type="xs:string" minOccurs="0" maxOccurs="1"/>
+ </xs:sequence>
+ <xs:attribute name="Style" type="xs:integer" use="required"/>
+ <xs:attribute name="Type" type="xs:integer" use="required"/>
+ <xs:attribute name="Operator" type="xs:integer" use="optional"/>
+ <xs:attribute name="AllowBlank" type="xs:boolean" use="optional"/>
+ <xs:attribute name="UseDropdown" type="xs:boolean" use="optional"/>
+ <xs:attribute name="Title" type="xs:string" use="optional"/>
+ <xs:attribute name="Message" type="xs:string" use="optional"/>
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:complexType>
+ </xs:element>
+
+ <xs:element name="InputMessage" minOccurs="0" maxOccurs="1">
+ <xs:complexType>
+ <xs:attribute name="Title" type="xs:string" use="optional"/>
+ <xs:attribute name="Message" type="xs:string" use="optional"/>
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:complexType>
+ </xs:element>
+ <xs:element name="Condition" minOccurs="0" maxOccurs="unbounded">
+ <xs:complexType>
+ <xs:sequence>
+ <xs:element name="Expression0" type="xs:string" minOccurs="0" maxOccurs="1"/>
+ <xs:element name="Expression1" type="xs:string" minOccurs="0" maxOccurs="1"/>
+ <xs:element name="Style" type="gnm:Style" minOccurs="1" maxOccurs="1"/>
+ </xs:sequence>
+ <xs:attribute name="Operator" type="gnm:CondOp" use="required"/>
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:complexType>
+ </xs:element>
+ </xs:sequence>
+ <xs:attribute name="HAlign" type="gnm:horizontal_alignment"/>
+ <xs:attribute name="VAlign" type="gnm:vertical_alignment"/>
+ <xs:attribute name="WrapText" type="xs:boolean" />
+
+ <!-- should be bool, but some files have odd truth values -->
+ <xs:attribute name="ShrinkToFit" type="xs:integer" />
+
+ <xs:attribute name="Rotation" type="xs:integer" use="optional"/>
+ <xs:attribute name="Orient" type="xs:integer" use="optional"/>
+ <xs:attribute name="Shade" type="gnm:Stipple" use="optional"/>
+ <xs:attribute name="Indent" type="xs:integer" use="optional"/>
+ <xs:attribute name="Locked" type="xs:boolean" use="optional"/>
+ <xs:attribute name="Hidden" type="xs:boolean" use="optional"/>
+ <xs:attribute name="Fore" type="gnm:color" use="optional"/>
+ <xs:attribute name="Back" type="gnm:color" use="optional"/>
+ <xs:attribute name="PatternColor" type="gnm:color" use="optional"/>
+ <xs:attribute name="Format" type="xs:string" use="optional"/>
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:complexType>
+
+ <xs:complexType name="StyleBorderElement">
+ <xs:attribute name="Style" type="gnm:border_style" use="required"/>
+ <!-- Color is present when Style is not 0 -->
+ <xs:attribute name="Color" type="gnm:color" use="optional"/>
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:complexType>
+
+ <xs:simpleType name="border_style">
+ <xs:restriction base="xs:integer">
+ <!-- 0 = NONE
+ 1 = THIN
+ 2 = MEDIUM
+ 3 = DASHED
+ 4 = DOTTED
+ 5 = THICK
+ 6 = DOUBLE
+ 7 = HAIR
+ 8 = MEDIUM_DASH
+ 9 = DASH_DOT
+ 10 = MEDIUM_DASH_DOT
+ 11 = DASH_DOT_DOT
+ 12 = MEDIUM_DASH_DOT_DOT
+ 13 = SLANTED_DASH_DOT
+ -->
+ <xs:minInclusive value="0"/>
+ <xs:maxInclusive value="13"/>
+ </xs:restriction>
+ </xs:simpleType>
+
+ <xs:simpleType name="underline">
+ <xs:restriction base="xs:integer">
+ <!-- 0 = NONE
+ 1 = SINGLE
+ 2 = DOUBLE
+ -->
+ <xs:minInclusive value="0"/>
+ <xs:maxInclusive value="2"/>
+ </xs:restriction>
+ </xs:simpleType>
+
+ <xs:simpleType name="script">
+ <xs:restriction base="xs:integer">
+ <!-- GO_FONT_SCRIPT_SUB = -1,
+ GO_FONT_SCRIPT_STANDARD = 0,
+ GO_FONT_SCRIPT_SUPER = 1
+ -->
+ <xs:minInclusive value="-1"/>
+ <xs:maxInclusive value="1"/>
+ </xs:restriction>
+ </xs:simpleType>
+
+ <xs:simpleType name="horizontal_alignment">
+ <xs:restriction base="xs:integer">
+ <!-- this is a bit map as follows:
+ 1 = GENERAL
+ 2 = LEFT
+ 4 = RIGHT
+ 8 = CENTER
+ 16 = FILL
+ 32 = JUSTIFY
+ 64 = CENTER ACROSS SELECTION
+ 128 = DISTRIBUTED
+ -->
+ <xs:minInclusive value="1"/>
+ <xs:maxInclusive value="128"/>
+ </xs:restriction>
+ </xs:simpleType>
+
+ <xs:simpleType name="vertical_alignment">
+ <xs:restriction base="xs:integer">
+ <!-- this is a bit map as follows:
+ 1 = TOP
+ 2 = BOTTOM
+ 4 = CENTER
+ 8 = JUSTIFY
+ 16 = DISTRIBUTED
+ -->
+ <xs:minInclusive value="1"/>
+ <xs:maxInclusive value="16"/>
+ </xs:restriction>
+ </xs:simpleType>
+
+ <xs:simpleType name="Stipple">
+ <xs:restriction base="xs:integer">
+ <!-- the values are defined as follows:
+ 0 = NONE
+ 1 = Solid
+ 2 = 75%
+ 3 = 50%
+ 4 = 25%
+ 5 = 12.5%
+ 6 = 6.25%
+ 7 = Horizontal Stripe
+ 8 = Vertical Stripe
+ 9 = Reverse Diagonal Stripe
+ 10 = Diagonal Stripe
+ 11 = Diagonal Crosshatch
+ 12 = Thick Diagonal Crosshatch
+ 13 = Thin Horizontal Stripe
+ 14 = Thin Vertical Stripe
+ 15 = Thin Reverse Diagonal Stripe
+ 16 = Thin Diagonal Stripe
+ 17 = Thin Crosshatch
+ 18 = Thin Diagonal Crosshatch
+ 19 = Applix small circle
+ 20 = Applix semicircle
+ 21 = Applix small thatch
+ 22 = Applix round thatch
+ 23 = Applix Brick
+ 24 = 100%
+ 25 = 87.5%
+ -->
+ <xs:minInclusive value="0"/>
+ <xs:maxInclusive value="25"/>
+ </xs:restriction>
+ </xs:simpleType>
+
+ <xs:simpleType name="color">
+ <xs:restriction base="xs:string">
+ <!-- colors in hex, 16 bits red, green, then blue -->
+ <xs:pattern value="[0-9A-F]{1,4}:[0-9A-F]{1,4}:[0-9A-F]{1,4}"/>
+ </xs:restriction>
+ </xs:simpleType>
+
+ <xs:complexType name="Col_Row">
+ <!-- column/row number -->
+ <xs:attribute name="No" type="xs:nonNegativeInteger" use="required"/>
+ <!-- size in points -->
+ <xs:attribute name="Unit" type="xs:double" use="required"/>
+ <!-- DEPRECATED in 1.7.1 : top/left margin (no unit it does not scale) -->
+ <xs:attribute name="MarginA" type="gnm:col_row_margin" use="optional"/>
+ <!-- DEPRECATED in 1.7.1 : botton/right margin (no unit it does not scale) -->
+ <xs:attribute name="MarginB" type="gnm:col_row_margin" use="optional"/>
+ <!-- true if size is explicitly set -->
+ <xs:attribute name="HardSize" type="xs:boolean" use="optional" default="0"/>
+ <xs:attribute name="Hidden" type="xs:boolean" use="optional" default="0"/>
+ <xs:attribute name="Collapsed" type="xs:boolean" use="optional" default="0"/>
+ <xs:attribute name="OutlineLevel" type="xs:integer" use="optional" default="0"/>
+ <!-- The number of consequitive identically sized cols/rows -->
+ <xs:attribute name="Count" type="xs:integer" use="optional" default="1"/>
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:complexType>
+
+ <xs:simpleType name="col_row_margin">
+ <xs:restriction base="xs:integer">
+ <xs:minInclusive value="0"/>
+ <xs:maxInclusive value="7"/>
+ </xs:restriction>
+ </xs:simpleType>
+
+ <xs:complexType name="Selections">
+ <xs:sequence>
+ <xs:element name="Selection" minOccurs="0" maxOccurs="unbounded">
+ <xs:complexType>
+ <xs:attribute name="startCol" type="xs:nonNegativeInteger" use="required"/>
+ <xs:attribute name="startRow" type="xs:nonNegativeInteger" use="required"/>
+ <xs:attribute name="endCol" type="xs:nonNegativeInteger" use="required"/>
+ <xs:attribute name="endRow" type="xs:nonNegativeInteger" use="required"/>
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:complexType>
+ </xs:element>
+ </xs:sequence>
+ <xs:attribute name="CursorCol" type="xs:nonNegativeInteger" use="required"/>
+ <xs:attribute name="CursorRow" type="xs:nonNegativeInteger" use="required"/>
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:complexType>
+
+ <xs:attributeGroup name="ObjectAnchor">
+ <xs:attribute name="ObjectBound" type="xs:string" use="required"/>
+ <xs:attribute name="ObjectOffset" type="gnm:offsets" use="required"/>
+ <xs:attribute name="ObjectAnchorType" type="gnm:anchor_type" use="optional"/>
+ <xs:attribute name="Direction" type="gnm:direction" use="required"/>
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:attributeGroup>
+
+ <xs:attributeGroup name="ObjectProperties">
+ <xs:attribute name="Print" type="xs:boolean" use="optional"/>
+ <xs:attribute name="Name" type="xs:string" use="optional"/>
+ </xs:attributeGroup>
+
+ <xs:attributeGroup name="ObjectAdjustmentProps">
+ <xs:attribute name="Min" type="xs:double" use="required"/>
+ <xs:attribute name="Max" type="xs:double" use="required"/>
+ <xs:attribute name="Inc" type="xs:double" use="required"/>
+ <xs:attribute name="Page" type="xs:double" use="required"/>
+ <xs:attribute name="Value" type="xs:double" use="required"/>
+ <xs:attribute name="Input" type="xs:string" use="required"/>
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:attributeGroup>
+
+ <xs:complexType name="Objects">
+ <xs:choice minOccurs="0" maxOccurs="unbounded">
+ <xs:element name="CellComment">
+ <xs:complexType>
+ <xs:attributeGroup ref="gnm:ObjectAnchor"/>
+ <xs:attributeGroup ref="gnm:ObjectProperties"/>
+ <xs:attribute name="Author" type="xs:string" use="optional"/>
+ <xs:attribute name="Text" type="xs:string" use="required"/>
+ <xs:attribute name="TextFormat" type="xs:string" use="optional"/>
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:complexType>
+ </xs:element>
+ <xs:element name="SheetObjectFilled">
+ <xs:complexType>
+ <xs:choice minOccurs="0" maxOccurs="1">
+ <xs:any namespace="##local" processContents="lax"/>
+ </xs:choice>
+ <xs:attributeGroup ref="gnm:ObjectAnchor"/>
+ <xs:attributeGroup ref="gnm:ObjectProperties"/>
+ <xs:attribute name="OutlineColor" type="gnm:color" use="required"/>
+ <xs:attribute name="FillColor" type="gnm:color" use="required"/>
+ <xs:attribute name="Type" type="gnm:objectfilled" use="required"/>
+ <xs:attribute name="Width" type="xs:nonNegativeInteger" use="required"/>
+ <xs:attribute name="Label" type="xs:string" use="optional"/>
+ <xs:attribute name="LabelFormat" type="xs:string" use="optional"/>
+
+ <!-- the arrow shape attributes are only used if the type is
+ arrow (type="gnm:2")
+ -->
+ <xs:attribute name="ArrowShapeA" type="xs:double" use="optional"/>
+ <xs:attribute name="ArrowShapeB" type="xs:double" use="optional"/>
+ <xs:attribute name="ArrowShapeC" type="xs:double" use="optional"/>
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:complexType>
+ </xs:element>
+
+ <!-- widgets -->
+ <xs:element name="SheetWidgetButton">
+ <xs:complexType>
+ <xs:attributeGroup ref="gnm:ObjectAnchor"/>
+ <xs:attributeGroup ref="gnm:ObjectProperties"/>
+ <xs:attribute name="Label" type="xs:string" use="required"/>
+ <xs:attribute name="Value" type="xs:integer" />
+ <xs:attribute name="Input" type="xs:string" />
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:complexType>
+ </xs:element>
+ <xs:element name="SheetWidgetCheckbox">
+ <xs:complexType>
+ <xs:attributeGroup ref="gnm:ObjectAnchor"/>
+ <xs:attributeGroup ref="gnm:ObjectProperties"/>
+ <xs:attribute name="Label" type="xs:string" use="required"/>
+ <xs:attribute name="Value" type="xs:integer" use="required"/>
+ <xs:attribute name="Input" type="xs:string" />
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:complexType>
+ </xs:element>
+ <xs:element name="SheetWidgetToggleButton" minOccurs="0" maxOccurs="unbounded">
+ <xs:complexType>
+ <xs:attributeGroup ref="gnm:ObjectAnchor"/>
+ <xs:attributeGroup ref="gnm:ObjectProperties"/>
+ <xs:attribute name="Label" type="xs:string" use="required"/>
+ <xs:attribute name="Value" type="xs:integer" use="required"/>
+ <xs:attribute name="Input" type="xs:string" use="required"/>
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:complexType>
+ </xs:element>
+
+ <xs:element name="SheetWidgetScrollbar">
+ <xs:complexType>
+ <xs:attributeGroup ref="gnm:ObjectAnchor"/>
+ <xs:attributeGroup ref="gnm:ObjectAdjustmentProps"/>
+ <xs:attributeGroup ref="gnm:ObjectProperties"/>
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:complexType>
+ </xs:element>
+ <xs:element name="SheetWidgetSpinbutton">
+ <xs:complexType>
+ <xs:attributeGroup ref="gnm:ObjectAnchor"/>
+ <xs:attributeGroup ref="gnm:ObjectAdjustmentProps"/>
+ <xs:attributeGroup ref="gnm:ObjectProperties"/>
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:complexType>
+ </xs:element>
+ <xs:element name="SheetWidgetSlider" minOccurs="0" maxOccurs="unbounded">
+ <xs:complexType>
+ <xs:attributeGroup ref="gnm:ObjectAnchor"/>
+ <xs:attributeGroup ref="gnm:ObjectAdjustmentProps"/>
+ <xs:attributeGroup ref="gnm:ObjectProperties"/>
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:complexType>
+ </xs:element>
+
+ <xs:element name="SheetObjectImage">
+ <xs:complexType>
+ <xs:sequence>
+ <xs:element name="Content" form="unqualified">
+ <xs:complexType mixed="true">
+ <xs:attribute name="image-type" type="xs:string" use="required"/>
+ <xs:attribute name="size-bytes" type="xs:int" use="optional"/>
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:complexType>
+ </xs:element>
+ </xs:sequence>
+ <xs:attributeGroup ref="gnm:ObjectAnchor"/>
+ <xs:attributeGroup ref="gnm:ObjectProperties"/>
+ <xs:attribute name="crop-top" type="xs:double" use="required"/>
+ <xs:attribute name="crop-bottom" type="xs:double" use="required"/>
+ <xs:attribute name="crop-left" type="xs:double" use="required"/>
+ <xs:attribute name="crop-right" type="xs:double" use="required"/>
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:complexType>
+ </xs:element>
+ <xs:element name="SheetObjectGraph">
+ <xs:complexType>
+ <xs:choice minOccurs="1" maxOccurs="1">
+ <xs:element name="GogObject" type="gnm:GogObject"/>
+ <xs:element name="GogObject" type="gnm:GogObject" form="unqualified"/>
+ </xs:choice>
+ <xs:attributeGroup ref="gnm:ObjectAnchor"/>
+ <xs:attributeGroup ref="gnm:ObjectProperties"/>
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:complexType>
+ </xs:element>
+ <xs:element name="SheetWidgetFrame">
+ <xs:complexType>
+ <xs:attributeGroup ref="gnm:ObjectAnchor"/>
+ <xs:attributeGroup ref="gnm:ObjectProperties"/>
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:complexType>
+ </xs:element>
+ <xs:element name="SheetWidgetLabel">
+ <xs:complexType>
+ <xs:attributeGroup ref="gnm:ObjectAnchor"/>
+ <xs:attributeGroup ref="gnm:ObjectProperties"/>
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:complexType>
+ </xs:element>
+ <xs:element name="SheetWidgetList">
+ <xs:complexType>
+ <xs:attributeGroup ref="gnm:ObjectAnchor"/>
+ <xs:attributeGroup ref="gnm:ObjectProperties"/>
+ <xs:attribute name="Content" type="xs:string" use="required"/>
+ <xs:attribute name="Output" type="xs:string" use="required"/>
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:complexType>
+ </xs:element>
+ <xs:element name="SheetWidgetCombo">
+ <xs:complexType>
+ <xs:attributeGroup ref="gnm:ObjectAnchor"/>
+ <xs:attributeGroup ref="gnm:ObjectProperties"/>
+ <xs:attribute name="Content" type="xs:string" use="required"/>
+ <xs:attribute name="Output" type="xs:string" use="required"/>
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:complexType>
+ </xs:element>
+
+ <!-- no longer supported -->
+ <xs:element name="SheetObjectBonobo">
+ <xs:complexType>
+ <xs:attributeGroup ref="gnm:ObjectAnchor"/>
+ </xs:complexType>
+ </xs:element>
+ </xs:choice>
+ </xs:complexType>
+
+ <xs:complexType name="GogObject">
+ <xs:choice minOccurs="0" maxOccurs="unbounded">
+ <xs:element name="property" type="gnm:GogProperty"/>
+ <xs:element name="property" type="gnm:GogProperty" form="unqualified"/>
+ <xs:element name="data" type="gnm:GOData"/>
+ <xs:element name="data" type="gnm:GOData" form="unqualified"/>
+ <xs:element name="GogObject" type="gnm:GogObject"/>
+ <xs:element name="GogObject" type="gnm:GogObject" form="unqualified"/>
+ </xs:choice>
+ <xs:attribute name="type" type="xs:string" use="optional"/>
+ <xs:attribute name="role" type="xs:string" use="optional"/>
+ </xs:complexType>
+
+ <xs:complexType name="GogProperty" mixed="true">
+ <xs:sequence>
+ <!-- children are unstructured -->
+ <xs:any minOccurs="0" maxOccurs="unbounded" processContents="lax"/>
+ </xs:sequence>
+ <xs:attribute name="name" type="xs:string" use="required"/>
+ <xs:attribute name="type" type="xs:string" use="optional"/>
+ </xs:complexType>
+
+ <xs:complexType name="GOData">
+ <xs:choice minOccurs="0" maxOccurs="unbounded">
+ <xs:element name="dimension" type="gnm:GODimension"/>
+ <xs:element name="dimension" type="gnm:GODimension" form="unqualified"/>
+ </xs:choice>
+ </xs:complexType>
+ <xs:complexType name="GODimension">
+ <xs:simpleContent>
+ <xs:extension base="xs:string">
+ <xs:attribute name="id" type="xs:integer" use="required"/>
+ <xs:attribute name="type" type="xs:string" use="required"/>
+ </xs:extension>
+ </xs:simpleContent>
+ </xs:complexType>
+
+ <xs:simpleType name="offsets">
+ <xs:restriction base="xs:string">
+ <!-- if this seems hard to read, it purportedly describes 4
+ space-separated doubles
+ -->
+ <xs:pattern value="[+\-]?\d+(.\d+)?([Ee]([+\-])?\d+)? [+\-]?\d+(.\d+)?([Ee]([+\-])?\d+)? [+\-]?\d+(.\d+)?([Ee]([+\-])?\d+)? [+\-]?\d+(.\d+)?([Ee]([+\-])?\d+)?"/>
+ </xs:restriction>
+ </xs:simpleType>
+
+ <!-- As of 1.7.7 this is deprecated -->
+ <xs:simpleType name="anchor_type">
+ <xs:restriction base="xs:string">
+ <!-- if this seems hard to read, it purportedly describes 4
+ space-separated positive numbers
+
+ each number needs to be one of the following:
+ 0 = UNKNOWN
+ 16 = PERCENTAGE_FROM_COLROW_START
+ 32 = PTS_FROM_COLROW_START
+ 33 = PTS_FROM_COLROW_END
+ 48 = PTS_ABSOLUTE
+ -->
+ <xs:pattern value="\d+ \d+ \d+ \d+"/>
+ </xs:restriction>
+ </xs:simpleType>
+
+ <xs:simpleType name="direction">
+ <xs:restriction base="xs:string">
+ <xs:enumeration value="-1"/>
+ <!-- unknown -->
+ <xs:enumeration value="0"/>
+ <!-- up right -->
+ <xs:enumeration value="1"/>
+ <!-- up left -->
+ <xs:enumeration value="16"/>
+ <!-- down right -->
+ <xs:enumeration value="17"/>
+ <!-- down left -->
+ <xs:enumeration value="255"/>
+ <!-- unknown -->
+ </xs:restriction>
+ </xs:simpleType>
+
+ <xs:simpleType name="objectfilled">
+ <xs:restriction base="xs:string">
+ <xs:enumeration value="1"/>
+ <!-- line -->
+ <xs:enumeration value="2"/>
+ <!-- arrow -->
+ <xs:enumeration value="101"/>
+ <!-- box -->
+ <xs:enumeration value="102"/>
+ <!-- oval -->
+ </xs:restriction>
+ </xs:simpleType>
+
+ <xs:complexType name="Cells">
+ <xs:sequence>
+ <xs:element name="Cell" minOccurs="0" maxOccurs="unbounded">
+ <!-- expressions are stored as entered, with a leading '='.
+ Once a shared expression has been defined, subsequent
+ Cell elements using that same shared expression simply
+ use the "ExprID" attribute to indicate which shared
+ expression is used, and no Content element is included
+ -->
+ <xs:complexType mixed="true">
+ <!-- Compatibility for ancient 0.x format -->
+ <xs:sequence>
+ <xs:element name="Content" type="xs:string" minOccurs="0" maxOccurs="1"/>
+ </xs:sequence>
+
+ <!-- Col and Row are sufficient for a cell that is an element of
+ an array of cells as long as it's not the top left cell -->
+ <xs:attribute name="Col" type="xs:nonNegativeInteger" use="required"/>
+ <xs:attribute name="Row" type="xs:nonNegativeInteger" use="required"/>
+ <!-- ExprID is only used to refer to a shared expression -->
+ <xs:attribute name="ExprID" type="xs:nonNegativeInteger" use="optional"/>
+ <!-- ValueType is not used if the cell contains an expression -->
+ <xs:attribute name="ValueType" type="gnm:ValueType" use="optional"/>
+ <!-- ValueFormat is apparently used only for cell-by-cell format overrides -->
+ <xs:attribute name="ValueFormat" type="xs:string" use="optional"/>
+ <!-- Cols and Rows are used to define an array of cells -->
+ <xs:attribute name="Cols" type="xs:positiveInteger" use="optional"/>
+ <xs:attribute name="Rows" type="xs:positiveInteger" use="optional"/>
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:complexType>
+ </xs:element>
+ </xs:sequence>
+ </xs:complexType>
+
+ <xs:simpleType name="ValueType">
+ <xs:restriction base="xs:string">
+ <xs:enumeration value="10"/>
+ <!-- empty -->
+ <xs:enumeration value="20"/>
+ <!-- boolean -->
+ <xs:enumeration value="30"/>
+ <!-- integer -->
+ <xs:enumeration value="40"/>
+ <!-- float -->
+ <xs:enumeration value="50"/>
+ <!-- error -->
+ <xs:enumeration value="60"/>
+ <!-- string -->
+ <xs:enumeration value="70"/>
+ <!-- cellrange -->
+ <xs:enumeration value="80"/>
+ <!-- array -->
+ </xs:restriction>
+ </xs:simpleType>
+
+ <xs:simpleType name="CondOp">
+ <xs:restriction base="xs:nonNegativeInteger">
+ <xs:enumeration value="0"/>
+ <!-- GNM_STYLE_COND_BETWEEN -->
+ <xs:enumeration value="1"/>
+ <!-- GNM_STYLE_COND_NOT_BETWEEN -->
+ <xs:enumeration value="2"/>
+ <!-- GNM_STYLE_COND_EQUAL -->
+ <xs:enumeration value="3"/>
+ <!-- GNM_STYLE_COND_NOT_EQUAL -->
+ <xs:enumeration value="4"/>
+ <!-- GNM_STYLE_COND_GT -->
+ <xs:enumeration value="5"/>
+ <!-- GNM_STYLE_COND_LT -->
+ <xs:enumeration value="6"/>
+ <!-- GNM_STYLE_COND_GTE -->
+ <xs:enumeration value="7"/>
+ <!-- GNM_STYLE_COND_LTE -->
+
+ <!-- Arbitrary expr evaluated at EvalPos -->
+ <xs:enumeration value="8"/>
+ <!-- GNM_STYLE_COND_CUSTOM -->
+
+ <!-- New in Gnumeric 1.8 -->
+ <xs:enumeration value="16"/>
+ <!-- GNM_STYLE_COND_CONTAINS_STR -->
+ <xs:enumeration value="17"/>
+ <!-- GNM_STYLE_COND_NOT_CONTAINS_STR -->
+ <xs:enumeration value="18"/>
+ <!-- GNM_STYLE_COND_BEGINS_WITH_STR -->
+ <xs:enumeration value="19"/>
+ <!-- GNM_STYLE_COND_NOT_BEGINS_WITH_STR -->
+ <xs:enumeration value="20"/>
+ <!-- GNM_STYLE_COND_ENDS_WITH_STR -->
+ <xs:enumeration value="21"/>
+ <!-- GNM_STYLE_COND_NOT_ENDS_WITH_STR -->
+
+ <xs:enumeration value="22"/>
+ <!-- GNM_STYLE_COND_CONTAINS_ERR -->
+ <xs:enumeration value="23"/>
+ <!-- GNM_STYLE_COND_NOT_CONTAINS_ERR -->
+
+ <xs:enumeration value="24"/>
+ <!-- GNM_STYLE_COND_CONTAINS_BLANKS -->
+ <xs:enumeration value="25"/>
+ <!-- GNM_STYLE_COND_NOT_CONTAINS_BLANK -->
+ </xs:restriction>
+ </xs:simpleType>
+ <xs:simpleType name="FilterFieldType">
+ <xs:restriction base="xs:string">
+ <xs:enumeration value="expr"/>
+ <xs:enumeration value="blanks"/>
+ <xs:enumeration value="nonblanks"/>
+ <xs:enumeration value="bucket"/>
+ </xs:restriction>
+ </xs:simpleType>
+ <xs:simpleType name="FilterFieldOp">
+ <xs:restriction base="xs:string">
+ <xs:enumeration value="eq"/>
+ <xs:enumeration value="gt"/>
+ <xs:enumeration value="lt"/>
+ <xs:enumeration value="gte"/>
+ <xs:enumeration value="lte"/>
+ <xs:enumeration value="ne"/>
+ </xs:restriction>
+ </xs:simpleType>
+
+ <xs:complexType name="FilterField">
+ <xs:attribute name="Index" type="xs:nonNegativeInteger" use="required"/>
+ <xs:attribute name="Type" type="gnm:FilterFieldType" use="required"/>
+
+ <!-- WARNING WARNING WARNING
+ Value and ValueType have their senses
+ reversed due to an old typo -->
+ <!-- Only valid for Type=expr -->
+ <xs:attribute name="Value0" type="gnm:ValueType" use="optional"/>
+ <xs:attribute name="ValueType0" type="xs:string" use="optional"/>
+ <xs:attribute name="Op0" type="gnm:FilterFieldOp" use="optional"/>
+ <xs:attribute name="Value1" type="gnm:ValueType" use="optional"/>
+ <xs:attribute name="ValueType1" type="xs:string" use="optional"/>
+ <xs:attribute name="Op1" type="gnm:FilterFieldOp" use="optional"/>
+
+ <!-- Only valid for Type=bucket -->
+ <xs:attribute name="top" type="xs:boolean" use="optional"/>
+ <!-- top vs bottom -->
+ <xs:attribute name="items" type="xs:boolean" use="optional"/>
+ <!-- top n vs top n% -->
+ <xs:attribute name="count" type="xs:double" use="optional"/>
+ <!-- top COUNT -->
+
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:complexType>
+
+ <xs:complexType name="Filters">
+ <xs:sequence>
+ <xs:element name="Filter" minOccurs="0" maxOccurs="unbounded">
+ <xs:complexType>
+ <xs:sequence>
+ <xs:element name="Field" type="gnm:FilterField" minOccurs="0" maxOccurs="unbounded"/>
+ </xs:sequence>
+ <xs:attribute name="Area" type="xs:string" use="required"/>
+ </xs:complexType>
+ </xs:element>
+ </xs:sequence>
+ </xs:complexType>
+
+ <xs:complexType name="MergedRegions">
+ <xs:sequence>
+ <xs:element name="Merge" type="xs:string" minOccurs="0" maxOccurs="unbounded"/>
+ </xs:sequence>
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:complexType>
+
+ <xs:complexType name="Solver">
+ <xs:sequence>
+ <xs:element name="Constr" minOccurs="0" maxOccurs="unbounded">
+ <xs:complexType>
+ <xs:attribute name="Lcol" type="xs:nonNegativeInteger" use="required"/>
+ <xs:attribute name="Lrow" type="xs:nonNegativeInteger" use="required"/>
+ <xs:attribute name="Rcol" type="xs:nonNegativeInteger" use="required"/>
+ <xs:attribute name="Rrow" type="xs:nonNegativeInteger" use="required"/>
+ <xs:attribute name="Cols" type="xs:nonNegativeInteger" use="required"/>
+ <xs:attribute name="Rows" type="xs:nonNegativeInteger" use="required"/>
+ <xs:attribute name="Type" type="gnm:solver_constraint_t" use="required"/>
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:complexType>
+ </xs:element>
+ </xs:sequence>
+ <xs:attribute name="TargetCol" type="xs:integer" use="optional"/>
+ <xs:attribute name="TargetRow" type="xs:integer" use="optional"/>
+ <xs:attribute name="ProblemType" type="gnm:solver_problem_t" use="required"/>
+ <xs:attribute name="ModelType" type="gnm:model_type_t"/>
+ <xs:attribute name="Inputs" type="xs:string"/>
+ <xs:attribute name="MaxTime" type="xs:integer" use="required"/>
+ <xs:attribute name="MaxIter" type="xs:integer" use="required"/>
+ <xs:attribute name="NonNeg" type="xs:boolean" use="required"/>
+ <xs:attribute name="Discr" type="xs:boolean" use="required"/>
+ <xs:attribute name="AutoScale" type="xs:boolean" use="required"/>
+ <xs:attribute name="ShowIter" type="xs:boolean"/>
+ <xs:attribute name="AnswerR" type="xs:boolean"/>
+ <xs:attribute name="SensitivityR" type="xs:boolean"/>
+ <xs:attribute name="LimitsR" type="xs:boolean"/>
+ <xs:attribute name="PerformR" type="xs:boolean"/>
+ <xs:attribute name="ProgramR" type="xs:boolean" use="required"/>
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:complexType>
+
+ <xs:simpleType name="solver_constraint_t">
+ <xs:restriction base="xs:integer">
+ <xs:enumeration value="0" />
+ <!-- none -->
+ <xs:enumeration value="1" />
+ <!-- &lt;= -->
+ <xs:enumeration value="2" />
+ <!-- &gt;= -->
+ <xs:enumeration value="4" />
+ <!-- = -->
+ <xs:enumeration value="8" />
+ <!-- Int -->
+ <xs:enumeration value="16"/>
+ <!-- boolean -->
+ </xs:restriction>
+ </xs:simpleType>
+
+ <xs:simpleType name="solver_problem_t">
+ <xs:restriction base="xs:integer">
+ <xs:enumeration value="0" />
+ <!-- SolverMinimize -->
+ <xs:enumeration value="1" />
+ <!-- SolverMaximize -->
+ <xs:enumeration value="2" />
+ <!-- SolverEqualTo -->
+ </xs:restriction>
+ </xs:simpleType>
+ <xs:simpleType name="model_type_t">
+ <xs:restriction base="xs:integer">
+ <xs:enumeration value="0" />
+ <!-- GNM_SOLVER_LP -->
+ <xs:enumeration value="1" />
+ <!-- GNM_SOLVER_QP -->
+ <xs:enumeration value="2" />
+ <!-- GNM_SOLVER_NLP -->
+ </xs:restriction>
+ </xs:simpleType>
+ <xs:simpleType name="SheetVisibility">
+ <xs:restriction base="xs:string">
+ <xs:enumeration value="GNM_SHEET_VISIBILITY_VISIBLE"/>
+ <xs:enumeration value="GNM_SHEET_VISIBILITY_HIDDEN"/>
+ <xs:enumeration value="GNM_SHEET_VISIBILITY_VERY_HIDDEN"/>
+ </xs:restriction>
+ </xs:simpleType>
+
+ <xs:complexType name="Scenarios">
+ <xs:sequence>
+ <xs:element name="Scenario" minOccurs="0" maxOccurs="unbounded">
+ <xs:complexType>
+ <xs:sequence>
+ <xs:element name="Name" type="xs:string"/>
+ <xs:element name="Comment" type="xs:string"/>
+ <xs:element name="CellsStr" type="xs:string"/>
+ </xs:sequence>
+ </xs:complexType>
+ </xs:element>
+ </xs:sequence>
+ </xs:complexType>
+
+ <xs:complexType name="SheetLayout">
+ <xs:sequence>
+ <xs:element name="FreezePanes" minOccurs="0" maxOccurs="1">
+ <xs:complexType>
+ <xs:attribute name="FrozenTopLeft" type="xs:string" use="required"/>
+ <xs:attribute name="UnfrozenTopLeft" type="xs:string" use="required"/>
+ </xs:complexType>
+ </xs:element>
+ </xs:sequence>
+ <xs:attribute name="TopLeft" type="xs:string" use="required"/>
+ <xs:anyAttribute namespace="##other" processContents="lax"/>
+ </xs:complexType>
+</xs:schema>
diff --git a/misc/xml-tokens/ooxml-extra-tokens.txt b/misc/xml-tokens/ooxml-extra-tokens.txt
new file mode 100644
index 0000000..a39f3d0
--- /dev/null
+++ b/misc/xml-tokens/ooxml-extra-tokens.txt
@@ -0,0 +1,3 @@
+AlternateContent
+Choice
+Fallback
diff --git a/misc/xml-tokens/token_util.py b/misc/xml-tokens/token_util.py
new file mode 100644
index 0000000..19ba436
--- /dev/null
+++ b/misc/xml-tokens/token_util.py
@@ -0,0 +1,70 @@
+########################################################################
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+########################################################################
+
+import sys
+
+
+unknown_token_name = "??"
+
+
+def normalize_name(old):
+ new = ''
+ for c in old:
+ if c in '.-': # '.' nad '-' are not allowed in C++ symbols.
+ c = '_'
+ new += c
+ return new
+
+
+def gen_token_list(filepath, tokens, ns_tokens):
+ dic = {}
+ for t in tokens:
+ dic[t] = True
+ for t in ns_tokens:
+ dic[t] = True
+
+ keys = dic.keys()
+ keys.sort()
+ file = open(filepath, 'w')
+ for key in keys:
+ file.write(key + "\n")
+ file.close()
+
+
+def get_auto_gen_warning():
+ return "// This file has been auto-generated. Do not hand-edit this."
+
+
+def gen_token_constants(outfile, tokens):
+
+ with open(outfile, "w") as f:
+ print(get_auto_gen_warning(), file=f)
+ print(file=f)
+
+ for i, token in enumerate(tokens):
+ token = normalize_name(token)
+ print(f"const xml_token_t XML_{token} = {i+1};", file=f)
+
+
+def gen_token_names(outfile, tokens):
+
+ with open(outfile, "w") as f:
+ print(get_auto_gen_warning(), file=f)
+ print(file=f)
+
+ print("const char* token_names[] = {", file=f)
+ print(f" \"{unknown_token_name}\", // 0", file=f)
+
+ for i, token in enumerate(tokens):
+ s = ','
+ if i == len(tokens) - 1:
+ s = ' '
+ print(f" \"{token}\"{s} // {i+1}", file=f)
+ print("};", file=f)
+ print(file=f)
+ print(f"size_t token_name_count = {len(tokens)+1};", file=f)
diff --git a/misc/xml-tokens/xls-xml-tokens.txt b/misc/xml-tokens/xls-xml-tokens.txt
new file mode 100644
index 0000000..c7aa93c
--- /dev/null
+++ b/misc/xml-tokens/xls-xml-tokens.txt
@@ -0,0 +1,991 @@
+AcceptLabelsInFormulas
+Action
+ActiveChart
+ActiveCol
+ActiveColumn
+ActivePane
+ActiveRow
+ActiveRows
+ActiveSheet
+Aggregate
+AlertVersion
+Alignment
+AllItemName
+AllowDeleteCols
+AllowDeleteRows
+AllowFilter
+AllowFormatCells
+AllowInsertCols
+AllowInsertHyperlinks
+AllowInsertRows
+AllowPNG
+AllowSizeCols
+AllowSizeRows
+AllowSort
+AllowUsePivotTables
+AlternateMethod
+AppName
+Append
+ApplyAutomaticOutlineStyles
+Area
+ArrayRange
+Async
+Attribute
+AttributeType
+Authentication
+Author
+AutoFilter
+AutoFilterAnd
+AutoFilterColumn
+AutoFilterCondition
+AutoFilterOr
+AutoFitHeight
+AutoFitWidth
+AutoFormatAlignment
+AutoFormatBorder
+AutoFormatFont
+AutoFormatName
+AutoFormatNumber
+AutoFormatPattern
+AutoFormatWidth
+AutoRepublish
+AutoShowCount
+AutoShowField
+AutoShowRange
+AutoShowType
+AutoSortField
+AutoSortOrder
+B
+BackgroundQuery
+BaseField
+BaseItem
+Basic
+Behavior
+Binding
+BlackAndWhite
+BlankLineAfterItems
+BlockTotal
+Bold
+Boolean
+Border
+Borders
+Bottom
+BoundField
+Bytes
+CacheDetails
+CacheFile
+CacheIndex
+CachePosition
+CalculatedMember
+Calculation
+CantGetUniqueItems
+Caption
+CaptionAlignment
+CaseSensitive
+Category
+Cell
+CellRangeList
+CellsExpanded
+CellsExpandedSeqNum
+CellsNotExpanded
+CenterHorizontal
+CenterVertical
+Cf
+CharSet
+Characters
+CharactersWithSpaces
+ClientParameter
+ClientParameterBinding
+ClientParameterBindings
+ClientParameterValue
+Clipped
+CodeName
+Col1
+Col10
+Col11
+Col12
+Col13
+Col14
+Col15
+Col16
+Col17
+Col18
+Col19
+Col2
+Col20
+Col3
+Col4
+Col5
+Col6
+Col7
+Col8
+Col9
+ColBreak
+ColBreaks
+ColFirst
+ColLast
+Color
+Column
+ColumnInfo
+ColumnInputCell
+ColumnName
+ComboHide
+Comma
+CommandText
+CommandTextOrignal
+CommandType
+Comment
+CommentsLayout
+Company
+ComponentOptions
+Condition
+ConditionalFormatting
+Connection
+ConnectionInfo
+ConnectionString
+Consecutive
+ConsolidationReference
+Count
+CountOfSameItems
+CreateBackup
+Created
+Credential
+CredentialBinding
+CredentialValue
+Crn
+CubeField
+CubeSource
+CurrentPage
+Custom
+CustomDocumentProperties
+Data
+DataAxisEmpty
+DataField
+DataMember
+DataSource
+DataTable
+DataType
+DataValidation
+DataValueEditing
+Date1904
+Decimal
+DefaultColumnWidth
+DefaultItem
+DefaultRowHeight
+DefaultValue
+DefaultVersion
+DeletedTitle
+Delimiters
+Descending
+Description
+DetailFormat
+DetailMaxHeight
+DetailMaxWidth
+DetailRowHeight
+DetailSortOrder
+DetailWidth
+Dimension
+DisableDateRecognition
+DisableDrillDown
+DisableEdit
+DisableFieldDialog
+DisableRefresh
+DisableWizard
+DisplayDrawingObjects
+DisplayEmptyMembers
+DisplayErrorString
+DisplayFieldList
+DisplayFormulas
+DisplayIn
+DisplayInkNotes
+DisplayPageBreak
+DisplayRightToLeft
+DivID
+DoNotCalculateBeforeSave
+DoNotDisplayColHeaders
+DoNotDisplayGridlines
+DoNotDisplayHeadings
+DoNotDisplayOutline
+DoNotDisplayRowHeaders
+DoNotDisplayZeros
+DoNotJoinDelimiters
+DoNotPersist
+DoNotPersistSort
+DoNotPersstAF
+DoNotPromptForFile
+DoNotSaveLinkValues
+DocumentProperties
+DontShowInFieldList
+DownloadComponents
+DraftQuality
+DrawAspect
+DrilledLevel
+DrilledMember
+EditWebPage
+ElementType
+EmbedSaveSmartTags
+EnableMultiplePageItems
+EnableRedirections
+EnableSelection
+Encode
+EntirePage
+Entry
+Error
+ErrorHide
+ErrorMessage
+ErrorString
+ErrorStyle
+ErrorTitle
+ExcelName
+ExcelType
+ExcelWorkbook
+ExcelWorksheetType
+Expanded
+ExpandedColumnCount
+ExpandedRowCount
+ExternName
+ExtraLeftColumns
+ExtraRightColumns
+Face
+Family
+Family
+Field
+FieldLabelFormat
+FieldListBottom
+FieldListLeft
+FieldListRight
+FieldListTop
+FieldStart
+FieldType
+File
+FileName
+FillDown
+Filled
+FilterCaption
+FilterMember
+FilterOn
+FirstVisibleSheet
+FitHeight
+FitToPage
+FitWidth
+Font
+FontName
+Footer
+Format
+FormatSettings
+FormatType
+Formula
+FormulaIndex
+FormulaV10
+FreezePanes
+FrozenNoSplit
+FullColumns
+FullRows
+Function
+FunctionGroup
+FunctionGroupIndex
+FuturePersist
+FutureVer
+GrandTotalString
+GridlineColor
+GridlineColorIndex
+Gridlines
+GroupBy
+GroupDefinition
+GroupEnd
+GroupEndAuto
+GroupLevel
+GroupMember
+GroupNumber
+GroupStart
+GroupStartAuto
+GroupType
+GroupedWidth
+Guid
+HRef
+HRefScreenTip
+HTMLFormat
+HTMLTables
+HasNoAutoFormat
+HasNoRecords
+Header
+HeaderRange
+HeaderRow
+Height
+Hidden
+HideDetail
+HideDropDowns
+HideFormula
+HideHorizontalScrollBar
+HideInactiveListBorder
+HideOfficeLogo
+HidePivotTableFieldList
+HideTotalsAnnotation
+HideVerticalScrollBar
+HideWorkbookTabs
+Horizontal
+HorizontalResolution
+Href
+HtmlType
+HyperlinkBase
+I
+ID
+IMEMode
+Id
+IdWrapped
+If
+ImmediateItemsOnDrop
+Indent
+Index
+IndividualCellBorders
+InputHide
+InputMessage
+InputTitle
+InsertEntireRows
+InstanceShape
+Interior
+IntlMacro
+Invalid
+InvertedColumnMember
+InvertedRowMember
+IsGroupLevel
+IsMemberProperty
+IsNotFiltered
+Italic
+Item
+ItemType
+Iteration
+KeyboardShortcut
+Keywords
+Label
+LastAuthor
+LastPrinted
+LastSaved
+Layout
+LayoutForm
+LayoutPageBreak
+LayoutSubtotalLocation
+LeafColumnMember
+LeafRowMember
+Left
+LeftCell
+LeftColumnRightPane
+LeftColumnVisible
+LeftToRight
+LengthLevelUniqueName
+Level
+LineStyle
+Lines
+LoadMode
+LocalConnection
+Location
+LocationOfComponents
+Macro
+MainFile
+Maintain
+MajorVersion
+Manager
+Map
+MapChildItems
+MapID
+MapInfo
+Mapdata
+Margin
+Max
+MaxChange
+MaxHeight
+MaxIterations
+MaxWidth
+Measure
+Member
+MemberExpand
+MemberFormat
+MemberName
+MemberPropertiesOrder
+MemberProperty
+MemberPropertyParent
+MergeAcross
+MergeDown
+MergeLabels
+Min
+MinorVersion
+Missing
+MissingItemsLimit
+Moper
+MoveAfterReturn
+Name
+NamedCell
+NamedRange
+Names
+Namespace
+NewAsync
+NewItemsHidden
+NextId
+NextSheetNumber
+NoAutoFit
+NoAutoFormatWidth
+NoAutoPage
+NoAutoRecover
+NoAutofit
+NoColumnGrand
+NoDetailAutoFit
+NoDisplayNullString
+NoDragToColumn
+NoDragToData
+NoDragToHide
+NoDragToPage
+NoDragToRow
+NoInserts
+NoPreserveFormatting
+NoPrintRepeatItems
+NoPrinterInfo
+NoRefreshCache
+NoRowGrand
+NoSaveData
+NoSummaryColumnsRightDetail
+NoSummaryRowsBelowDetail
+NoTextToColumns
+NoTitles
+NoToggleDataHeader
+NoViewCalculatedMembers
+NonDefaultName
+NotInverted
+NotVisible
+NullString
+Number
+NumberFormat
+NumberOfCopies
+OLEObject
+OWCVersion
+ObjectID
+OfficeDocumentSettings
+OleLink
+Operator
+OptimizeCache
+Orientation
+Outline
+OverwriteCells
+PLCaption
+PLDataOrientation
+PLExport
+PLGroupType
+PLName
+PLPivotField
+PLPosition
+PLSubtotal
+PLTPivotItem
+PLTotal
+PTFormat
+PTFormula
+PTLineItem
+PTLineItems
+PTPivotData
+PTRule
+PTSource
+PageBreakZoom
+PageBreaks
+PageFieldOrder
+PageFieldStyle
+PageFieldWrapCount
+PageMargins
+PageSetup
+Pages
+Pane
+Panes
+PaperSizeIndex
+Paragraphs
+Parameter
+ParameterType
+ParameterValue
+Parent
+ParentField
+ParentIsOther
+ParentName
+ParentUniqueName
+ParseFormulaAsV10
+ParseRuleAsV10
+PasteFormula
+PasteRefersTo
+Path
+Pattern
+PatternColor
+PivotAxis
+PivotCache
+PivotField
+PivotItem
+PivotTable
+PivotView
+Position
+PrecisionAsDisplayed
+PresentationFormat
+Print
+PrintErrors
+PrintSetTitles
+ProgID
+PromptString
+ProtectContents
+ProtectObjects
+ProtectScenarios
+ProtectStructure
+ProtectWindows
+Protected
+Protection
+Proxy
+PublishObject
+PublishObjects
+Purpose
+QTSource
+Qualifier
+Query97
+QuerySource
+QueryTable
+QueryType
+Range
+RangeSelection
+ReadOnly
+ReadingOrder
+RefModeR1C1
+Reference
+RefersTo
+RefreshDate
+RefreshDateCopy
+RefreshInfo
+RefreshName
+RefreshOnChange
+RefreshOnFileOpen
+RefreshTimeSpan
+RefreshedInXl9
+Resource
+Revision
+Right
+RightToLeft
+RobustConnect
+RootElement
+Rotate
+Rotation
+Row
+RowBreak
+RowBreaks
+RowColHeadings
+RowInputCell
+RowLast
+RowNumbers
+Rule
+RuleType
+RuleV10
+S
+SOAPAction
+SQLType
+Scale
+Schema
+SchemaID
+SchemaRef
+Selected
+SelectedSheets
+Selection
+SelectionNamespaces
+SemiColon
+SeqNum
+Sequence
+ServerBased
+ServerSortOrder
+Set
+Shadow
+ShapeID
+SheetIndex
+SheetName
+ShowAllItems
+ShowAlways
+ShowCellBackgroundFromOLAP
+ShowImportExportValidationErrors
+ShowPageBreakZoom
+ShowPageMultipleItemLabel
+ShowTotals
+ShrinkToFit
+Size
+SmallGrid
+SmartTagType
+SmartTags
+SolveOrder
+Sort
+SortKey
+SortOrder
+Sorting
+Source
+SourceConnectionFile
+SourceConsolidation
+SourceDataFile
+SourceHierarchy
+SourceHierarchyLevel
+SourceName
+SourceType
+Space
+SpaceAbove
+SpaceBelow
+Span
+SplitHorizontal
+SplitVertical
+SpreadsheetAutoFit
+StandardWidth
+StartPageNumber
+StartRow
+StrikeThrough
+Style
+StyleID
+Styles
+Sub
+SubType
+Subject
+Subtotal
+SubtotalFormat
+SubtotalHiddenPageItems
+SubtotalName
+Sup
+SupBook
+Synchronous
+Tab
+TabColorIndex
+TabRatio
+Table
+TableStyle
+Tag
+Text
+TextQualifier
+TextWizardSettings
+ThousandSeparator
+Ticked
+Title
+Toolbar
+TooltipInfo
+Top
+TopCell
+TopRowBottomPane
+TopRowVisible
+TotalAlignment
+TotalAllMembers
+TotalCaptionAlignment
+TotalFormat
+TotalTime
+TotalWidth
+TransitionExpressionEvaluation
+TransitionFormulaEntry
+Type
+U
+URLString
+Uncalced
+Underline
+UniqueName
+Unsynced
+UseBlank
+UseLocalConnection
+UseSameSettings
+User
+VMLFrame
+VacatedStyle
+ValidPrinterInfo
+Value
+Value1
+Value2
+Version
+VersionLastEdit
+VersionLastRefresh
+VersionLastUpdate
+VersionRefreshableMin
+VersionUpdateableMin
+Vertical
+VerticalAlign
+VerticalResolution
+VerticalText
+ViewableRange
+Visible
+VisualTotals
+WantAdvise
+WantPict
+Watch
+Watches
+WebPostString
+Weight
+Width
+WindowHeight
+WindowHidden
+WindowIconic
+WindowTopX
+WindowTopY
+WindowWidth
+Windows
+Words
+Workbook
+WorkbookOptions
+Worksheet
+WorksheetOptions
+WorksheetSource
+WrapText
+XPath
+XSDType
+Xct
+ZeroHeight
+Zoom
+accentbar
+adj
+adjusthandles
+alignshape
+allowincell
+allowoverlap
+alt
+althref
+angle
+arc
+arcsize
+arrowok
+aspect
+aspectratio
+attribute
+autorotationcenter
+backdepth
+background
+bilevel
+blacklevel
+borderbottomcolor
+borderleftcolor
+borderrightcolor
+bordertopcolor
+brightness
+bullet
+button
+bwmode
+bwnormal
+bwpure
+callout
+caption
+chromakey
+class
+clip
+color
+color2
+colormenu
+colormode
+colormru
+colors
+complex
+connectangles
+connectloc
+connectlocs
+connectortype
+connecttype
+content
+control1
+control2
+coordorigin
+coordsize
+cropbottom
+cropleft
+cropping
+cropright
+croptop
+curve
+dashstyle
+data
+datatype
+detectmouseclick
+diffusity
+displaycustomheaders
+distance
+doubleclicknotify
+drop
+dropauto
+dt
+edge
+editas
+embosscolor
+end
+endAngle
+endarrow
+endarrowlength
+endarrowwidth
+endcap
+entry
+eqn
+ext
+extends
+extrusion
+extrusioncolor
+extrusionok
+f
+facet
+fill
+fillcolor
+filled
+fillok
+filltype
+fitpath
+fitshape
+focus
+focusposition
+focussize
+forcedash
+foredepth
+formulas
+from
+gain
+gamma
+gap
+gradientshapeok
+grayscale
+group
+grouping
+h
+handles
+hidden
+how
+hr
+hralign
+href
+hrheight
+hrnoshade
+hrpct
+hrstd
+hrwidth
+id
+idmap
+idref
+image
+imagealignshape
+imageaspect
+imagedata
+imagesize
+inset
+insetmode
+invx
+invy
+joinstyle
+length
+lengthspecified
+lightface
+lightharsh
+lightharsh2
+lightlevel
+lightlevel2
+lightposition
+lightposition2
+limo
+line
+linestyle
+lock
+lockrotationcenter
+map
+master
+matrix
+maxLength
+metal
+method
+minusx
+minusy
+miterlimit
+movie
+name
+namespaceuri
+new
+obscured
+offset
+offset2
+old
+ole
+oleicon
+oleid
+on
+oned
+onmouseover
+opacity
+opacity2
+orientation
+orientationangle
+origin
+oval
+password
+path
+phonetictext
+plane
+points
+polar
+polyline
+position
+preferrelative
+print
+proxy
+r
+radiusrange
+rect
+regroupid
+regrouptable
+relativeposition
+render
+rotation
+rotationangle
+rotationcenter
+roundrect
+row
+ruleinitiator
+ruleproxy
+rules
+selection
+shadow
+shadowcolor
+shadowok
+shape
+shapedefaults
+shapelayout
+shapetype
+shininess
+singleclick
+size
+skew
+skewamt
+skewangle
+specularity
+spid
+spidmax
+spt
+src
+start
+startAngle
+startarrow
+startarrowlength
+startarrowwidth
+string
+stroke
+strokecolor
+stroked
+strokeok
+strokeweight
+style
+switch
+tablelimits
+tableproperties
+target
+targetscreensize
+text
+textborder
+textbox
+textboxrect
+textpath
+textpathok
+title
+to
+trim
+type
+url
+useExplicit
+userId
+userdrawn
+userhidden
+v
+verticies
+viewpoint
+viewpointorigin
+visible
+weight
+worksheetoptions
+wrapcoords
+xrange
+xscale
+yrange