diff options
Diffstat (limited to 'doc/notes')
-rw-r--r-- | doc/notes/index.rst | 9 | ||||
-rw-r--r-- | doc/notes/json_map/example.json | 203 | ||||
-rw-r--r-- | doc/notes/json_map/index.rst | 326 | ||||
-rw-r--r-- | doc/notes/xml_map/example.xml | 183 | ||||
-rw-r--r-- | doc/notes/xml_map/index.rst | 314 |
5 files changed, 1035 insertions, 0 deletions
diff --git a/doc/notes/index.rst b/doc/notes/index.rst new file mode 100644 index 0000000..a474bab --- /dev/null +++ b/doc/notes/index.rst @@ -0,0 +1,9 @@ + +Notes +===== + +.. toctree:: + :maxdepth: 1 + + xml_map/index.rst + json_map/index.rst diff --git a/doc/notes/json_map/example.json b/doc/notes/json_map/example.json new file mode 100644 index 0000000..e7a2110 --- /dev/null +++ b/doc/notes/json_map/example.json @@ -0,0 +1,203 @@ +[ + { + "id": 1, + "name": [ + "Tab", + "Limpenny" + ], + "active": true, + "gender": "Male", + "language": "Kazakh" + }, + { + "id": 2, + "name": [ + "Manda", + "Hadgraft" + ], + "active": false, + "gender": "Female", + "language": "Bislama" + }, + { + "id": 3, + "name": [ + "Mickie", + "Boreham" + ], + "active": false, + "gender": "Male", + "language": "Swahili" + }, + { + "id": 4, + "name": [ + "Celinka", + "Brookfield" + ], + "active": false, + "gender": "Female", + "language": "Gagauz" + }, + { + "id": 5, + "name": [ + "Muffin", + "Bleas" + ], + "active": false, + "gender": "Female", + "language": "Hiri Motu" + }, + { + "id": 6, + "name": [ + "Jackelyn", + "Crumb" + ], + "active": false, + "gender": "Female", + "language": "Northern Sotho" + }, + { + "id": 7, + "name": [ + "Tessie", + "Hollingsbee" + ], + "active": true, + "gender": "Female", + "language": "Fijian" + }, + { + "id": 8, + "name": [ + "Yank", + "Wernham" + ], + "active": false, + "gender": "Male", + "language": "Tok Pisin" + }, + { + "id": 9, + "name": [ + "Brendan", + "Lello" + ], + "active": true, + "gender": "Male", + "language": "Fijian" + }, + { + "id": 10, + "name": [ + "Arabel", + "Rigg" + ], + "active": false, + "gender": "Female", + "language": "Kyrgyz" + }, + { + "id": 11, + "name": [ + "Carolann", + "McElory" + ], + "active": false, + "gender": "Female", + "language": "Pashto" + }, + { + "id": 12, + "name": [ + "Gasparo", + "Flack" + ], + "active": false, + "gender": "Male", + "language": "Telugu" + }, + { + "id": 13, + "name": [ + "Eolanda", + "Polendine" + ], + "active": false, + "gender": "Female", + "language": "Kashmiri" + }, + { + "id": 14, + "name": [ + "Brock", + "McCaw" + ], + "active": false, + "gender": "Male", + "language": "Tsonga" + }, + { + "id": 15, + "name": [ + "Wenda", + "Espinas" + ], + "active": false, + "gender": "Female", + "language": "Bulgarian" + }, + { + "id": 16, + "name": [ + "Zachary", + "Banane" + ], + "active": true, + "gender": "Male", + "language": "Persian" + }, + { + "id": 17, + "name": [ + "Sallyanne", + "Mengue" + ], + "active": false, + "gender": "Female", + "language": "Latvian" + }, + { + "id": 18, + "name": [ + "Elizabet", + "Hoofe" + ], + "active": true, + "gender": "Female", + "language": "Tswana" + }, + { + "id": 19, + "name": [ + "Alastair", + "Hutchence" + ], + "active": true, + "gender": "Male", + "language": "Ndebele" + }, + { + "id": 20, + "name": [ + "Minor", + "Worland" + ], + "active": true, + "gender": "Male", + "language": "Dutch" + } +] + diff --git a/doc/notes/json_map/index.rst b/doc/notes/json_map/index.rst new file mode 100644 index 0000000..00d6626 --- /dev/null +++ b/doc/notes/json_map/index.rst @@ -0,0 +1,326 @@ + +Mapping JSON to spreadsheet +=========================== + +This tutorial covers how to map JSON document to a spreadsheet document, very +similar to what we covered in :ref:`this tutorial <mapping-xml-to-ss>` +where we illustrated how to map XML document to a spreadsheet document. + +Throughout this tutorial, we will be using :download:`this sample JSON document <example.json>` +to illustrate how to achieve it using the ``orcus-json`` command. The structure +of this tutorial will be similar to the structure of the XML mapping counterpart, +since the steps are very similar. + +Examining the structure of the input JSON document +-------------------------------------------------- + +Let's first take a look at the sample JSON document: + +.. code-block:: javascript + + [ + { + "id": 1, + "name": [ + "Tab", + "Limpenny" + ], + "active": true, + "gender": "Male", + "language": "Kazakh" + }, + { + "id": 2, + "name": [ + "Manda", + "Hadgraft" + ], + "active": false, + "gender": "Female", + "language": "Bislama" + }, + { + "id": 3, + "name": [ + "Mickie", + "Boreham" + ], + "active": false, + "gender": "Male", + "language": "Swahili" + }, + + ... + +This is essentially the same content as the XML sample document we used in the +:ref:`last tutorial <mapping-xml-to-ss>` but re-formatted in JSON. + +Let run the following command: + +.. code-block:: + + orcus-json --mode structure example.json + +to analyze the structure of this JSON document. The command will generate the +following output: + +.. code-block:: + + $array[20].object(*)['active'].value + $array[20].object(*)['gender'].value + $array[20].object(*)['id'].value + $array[20].object(*)['language'].value + $array[20].object(*)['name'].array[2].value[0,1] + +This structure output resembles a variant of JSONPath but some modifications +are applied. It has the following characteristics: + +* The ``$`` symbol represents the root of the structure. +* Array node takes the form of either ``array[N]``, where the value of ``N`` + represents the number of elements. +* Object node takes the form of ``object['key']``. +* Value node, which is always a leaf node, is represented by ``value`` except + when the leaf node is an array containing values, it takes the form of ``value[0,1,2,...]``. +* The ``.`` symbols represent the node boundaries. +* The ``(*)`` symbols represent recurring nodes, which can be either array or + object. + +Auto-mapping the JSON document +------------------------------ + +Let's map this JSON document to a spreadsheet document by running: + +.. code-block:: + + orcus-json --mode map -o out -f flat example.json + +This is very similar to what we did in the XML mapping tutorial, except that +the command used is ``orcus-json`` and the input file is ``example.json``. +This will create file named ``out/range-0.txt`` which contains the following: + +.. code-block:: + + --- + Sheet name: range-0 + rows: 21 cols: 6 + +--------+-----------+-------------+-----------+--------+----------------+ + | id | field 0 | field 1 | active | gender | language | + +--------+-----------+-------------+-----------+--------+----------------+ + | 1 [v] | Tab | Limpenny | true [b] | Male | Kazakh | + +--------+-----------+-------------+-----------+--------+----------------+ + | 2 [v] | Manda | Hadgraft | false [b] | Female | Bislama | + +--------+-----------+-------------+-----------+--------+----------------+ + | 3 [v] | Mickie | Boreham | false [b] | Male | Swahili | + +--------+-----------+-------------+-----------+--------+----------------+ + | 4 [v] | Celinka | Brookfield | false [b] | Female | Gagauz | + +--------+-----------+-------------+-----------+--------+----------------+ + | 5 [v] | Muffin | Bleas | false [b] | Female | Hiri Motu | + +--------+-----------+-------------+-----------+--------+----------------+ + | 6 [v] | Jackelyn | Crumb | false [b] | Female | Northern Sotho | + +--------+-----------+-------------+-----------+--------+----------------+ + | 7 [v] | Tessie | Hollingsbee | true [b] | Female | Fijian | + +--------+-----------+-------------+-----------+--------+----------------+ + | 8 [v] | Yank | Wernham | false [b] | Male | Tok Pisin | + +--------+-----------+-------------+-----------+--------+----------------+ + | 9 [v] | Brendan | Lello | true [b] | Male | Fijian | + +--------+-----------+-------------+-----------+--------+----------------+ + | 10 [v] | Arabel | Rigg | false [b] | Female | Kyrgyz | + +--------+-----------+-------------+-----------+--------+----------------+ + | 11 [v] | Carolann | McElory | false [b] | Female | Pashto | + +--------+-----------+-------------+-----------+--------+----------------+ + | 12 [v] | Gasparo | Flack | false [b] | Male | Telugu | + +--------+-----------+-------------+-----------+--------+----------------+ + | 13 [v] | Eolanda | Polendine | false [b] | Female | Kashmiri | + +--------+-----------+-------------+-----------+--------+----------------+ + | 14 [v] | Brock | McCaw | false [b] | Male | Tsonga | + +--------+-----------+-------------+-----------+--------+----------------+ + | 15 [v] | Wenda | Espinas | false [b] | Female | Bulgarian | + +--------+-----------+-------------+-----------+--------+----------------+ + | 16 [v] | Zachary | Banane | true [b] | Male | Persian | + +--------+-----------+-------------+-----------+--------+----------------+ + | 17 [v] | Sallyanne | Mengue | false [b] | Female | Latvian | + +--------+-----------+-------------+-----------+--------+----------------+ + | 18 [v] | Elizabet | Hoofe | true [b] | Female | Tswana | + +--------+-----------+-------------+-----------+--------+----------------+ + | 19 [v] | Alastair | Hutchence | true [b] | Male | Ndebele | + +--------+-----------+-------------+-----------+--------+----------------+ + | 20 [v] | Minor | Worland | true [b] | Male | Dutch | + +--------+-----------+-------------+-----------+--------+----------------+ + +Again, this is very similar to what we saw in the XML-mapping example. Note +that cell values with ``[v]`` and ``[b]`` indicate numeric and boolean values, +respectively. Cells with no suffixes are string cells. + +Custom-mapping using map file +----------------------------- + +This process is also very similar to the process we followed for XML mapping. +We first auto-generate a map file, modify it, and use it to do the mapping again. +Since there isn't much difference between XML mapping and JSON mapping, let's +just go through this very quick. + +First step is to generate a map file for the auto-detected range by running: + +.. code-block:: + + orcus-json --mode map-gen -o map.json example.json + +which will write the mapping rules to ``map.json`` file. When you open the generated +map file, you will see something like the following: + +.. code-block:: javascript + + { + "sheets": [ + "range-0" + ], + "ranges": [ + { + "sheet": "range-0", + "row": 0, + "column": 0, + "row-header": true, + "fields": [ + { + "path": "$[]['id']" + }, + { + "path": "$[]['name'][0]" + }, + { + "path": "$[]['name'][1]" + }, + { + "path": "$[]['active']" + }, + { + "path": "$[]['gender']" + }, + { + "path": "$[]['language']" + } + ], + "row-groups": [ + { + "path": "$" + } + ] + } + ] + } + +The structure and content of the map file should look similar to the XML counterpart, +except that it is now in JSON format, and the paths are expressed in slightly +modified JSONPath bracket notation, where ``[]`` represents an array node with +no position specified. + +Now that we have a map file, let's modify this and use it to do the mapping once +again. Just like the XML mapping example, we are going to: + +* insert two blank rows above, +* drop the ``id`` and ``active`` fields, +* specify labels for the fields, and +* change the sheet name from ``range-0`` to ``My Data``. + +This is what we've come up with: + +.. code-block:: javascript + + { + "sheets": [ + "My Data" + ], + "ranges": [ + { + "sheet": "My Data", + "row": 2, + "column": 0, + "row-header": true, + "fields": [ + { + "path": "$[]['name'][0]", "label": "First Name" + }, + { + "path": "$[]['name'][1]", "label": "Last Name" + }, + { + "path": "$[]['gender']", "label": "Gender" + }, + { + "path": "$[]['language']", "label": "Language" + } + ], + "row-groups": [ + { + "path": "$" + } + ] + } + ] + } + +We'll save this file as ``map-modified.json``, and pass it to the ``orcus-json`` +command via ``--map`` or ``-m`` option: + +.. code-block:: + + orcus-json --mode map -o out -f flat -m map-modified.json example.json + +Let's check the output in ``out/My Data.txt`` and see what it contains: + +.. code-block:: + + --- + Sheet name: My Data + rows: 23 cols: 4 + +------------+-------------+--------+----------------+ + | | | | | + +------------+-------------+--------+----------------+ + | | | | | + +------------+-------------+--------+----------------+ + | First Name | Last Name | Gender | Language | + +------------+-------------+--------+----------------+ + | Tab | Limpenny | Male | Kazakh | + +------------+-------------+--------+----------------+ + | Manda | Hadgraft | Female | Bislama | + +------------+-------------+--------+----------------+ + | Mickie | Boreham | Male | Swahili | + +------------+-------------+--------+----------------+ + | Celinka | Brookfield | Female | Gagauz | + +------------+-------------+--------+----------------+ + | Muffin | Bleas | Female | Hiri Motu | + +------------+-------------+--------+----------------+ + | Jackelyn | Crumb | Female | Northern Sotho | + +------------+-------------+--------+----------------+ + | Tessie | Hollingsbee | Female | Fijian | + +------------+-------------+--------+----------------+ + | Yank | Wernham | Male | Tok Pisin | + +------------+-------------+--------+----------------+ + | Brendan | Lello | Male | Fijian | + +------------+-------------+--------+----------------+ + | Arabel | Rigg | Female | Kyrgyz | + +------------+-------------+--------+----------------+ + | Carolann | McElory | Female | Pashto | + +------------+-------------+--------+----------------+ + | Gasparo | Flack | Male | Telugu | + +------------+-------------+--------+----------------+ + | Eolanda | Polendine | Female | Kashmiri | + +------------+-------------+--------+----------------+ + | Brock | McCaw | Male | Tsonga | + +------------+-------------+--------+----------------+ + | Wenda | Espinas | Female | Bulgarian | + +------------+-------------+--------+----------------+ + | Zachary | Banane | Male | Persian | + +------------+-------------+--------+----------------+ + | Sallyanne | Mengue | Female | Latvian | + +------------+-------------+--------+----------------+ + | Elizabet | Hoofe | Female | Tswana | + +------------+-------------+--------+----------------+ + | Alastair | Hutchence | Male | Ndebele | + +------------+-------------+--------+----------------+ + | Minor | Worland | Male | Dutch | + +------------+-------------+--------+----------------+ + +The ``id`` and ``active`` fields are gone, the remaining fields have custom +labels we specified, and there are two blank rows above. It appears that all +the changes we have intended have been properly applied. diff --git a/doc/notes/xml_map/example.xml b/doc/notes/xml_map/example.xml new file mode 100644 index 0000000..0dbb6d4 --- /dev/null +++ b/doc/notes/xml_map/example.xml @@ -0,0 +1,183 @@ +<?xml version="1.0" encoding="UTF-8"?> +<dataset> + <record id="1"> + <name> + <first>Tab</first> + <last>Limpenny</last> + </name> + <active>true</active> + <gender>Male</gender> + <language>Kazakh</language> + </record> + <record id="2"> + <name> + <first>Manda</first> + <last>Hadgraft</last> + </name> + <active>false</active> + <gender>Female</gender> + <language>Bislama</language> + </record> + <record id="3"> + <name> + <first>Mickie</first> + <last>Boreham</last> + </name> + <active>false</active> + <gender>Male</gender> + <language>Swahili</language> + </record> + <record id="4"> + <name> + <first>Celinka</first> + <last>Brookfield</last> + </name> + <active>false</active> + <gender>Female</gender> + <language>Gagauz</language> + </record> + <record id="5"> + <name> + <first>Muffin</first> + <last>Bleas</last> + </name> + <active>false</active> + <gender>Female</gender> + <language>Hiri Motu</language> + </record> + <record id="6"> + <name> + <first>Jackelyn</first> + <last>Crumb</last> + </name> + <active>false</active> + <gender>Female</gender> + <language>Northern Sotho</language> + </record> + <record id="7"> + <name> + <first>Tessie</first> + <last>Hollingsbee</last> + </name> + <active>true</active> + <gender>Female</gender> + <language>Fijian</language> + </record> + <record id="8"> + <name> + <first>Yank</first> + <last>Wernham</last> + </name> + <active>false</active> + <gender>Male</gender> + <language>Tok Pisin</language> + </record> + <record id="9"> + <name> + <first>Brendan</first> + <last>Lello</last> + </name> + <active>true</active> + <gender>Male</gender> + <language>Fijian</language> + </record> + <record id="10"> + <name> + <first>Arabel</first> + <last>Rigg</last> + </name> + <active>false</active> + <gender>Female</gender> + <language>Kyrgyz</language> + </record> + <record id="11"> + <name> + <first>Carolann</first> + <last>McElory</last> + </name> + <active>false</active> + <gender>Female</gender> + <language>Pashto</language> + </record> + <record id="12"> + <name> + <first>Gasparo</first> + <last>Flack</last> + </name> + <active>false</active> + <gender>Male</gender> + <language>Telugu</language> + </record> + <record id="13"> + <name> + <first>Eolanda</first> + <last>Polendine</last> + </name> + <active>false</active> + <gender>Female</gender> + <language>Kashmiri</language> + </record> + <record id="14"> + <name> + <first>Brock</first> + <last>McCaw</last> + </name> + <active>false</active> + <gender>Male</gender> + <language>Tsonga</language> + </record> + <record id="15"> + <name> + <first>Wenda</first> + <last>Espinas</last> + </name> + <active>false</active> + <gender>Female</gender> + <language>Bulgarian</language> + </record> + <record id="16"> + <name> + <first>Zachary</first> + <last>Banane</last> + </name> + <active>true</active> + <gender>Male</gender> + <language>Persian</language> + </record> + <record id="17"> + <name> + <first>Sallyanne</first> + <last>Mengue</last> + </name> + <active>false</active> + <gender>Female</gender> + <language>Latvian</language> + </record> + <record id="18"> + <name> + <first>Elizabet</first> + <last>Hoofe</last> + </name> + <active>true</active> + <gender>Female</gender> + <language>Tswana</language> + </record> + <record id="19"> + <name> + <first>Alastair</first> + <last>Hutchence</last> + </name> + <active>true</active> + <gender>Male</gender> + <language>Ndebele</language> + </record> + <record id="20"> + <name> + <first>Minor</first> + <last>Worland</last> + </name> + <active>true</active> + <gender>Male</gender> + <language>Dutch</language> + </record> +</dataset> diff --git a/doc/notes/xml_map/index.rst b/doc/notes/xml_map/index.rst new file mode 100644 index 0000000..258920a --- /dev/null +++ b/doc/notes/xml_map/index.rst @@ -0,0 +1,314 @@ + +.. _mapping-xml-to-ss: + +Mapping XML to spreadsheet +========================== + +In this tutorial, we will go over how to use the ``orcus-xml`` command to map an +XML content into a spreadsheet document. We will be using :download:`this sample XML +document <example.xml>` throughout this tutorial. + +Examining the structure of input XML document +--------------------------------------------- + +First, let's examine the general structure of this XML document: + +.. code-block:: XML + + <?xml version="1.0" encoding="UTF-8"?> + <dataset> + <record id="1"> + <name> + <first>Tab</first> + <last>Limpenny</last> + </name> + <active>true</active> + <gender>Male</gender> + <language>Kazakh</language> + </record> + <record id="2"> + <name> + <first>Manda</first> + <last>Hadgraft</last> + </name> + <active>false</active> + <gender>Female</gender> + <language>Bislama</language> + </record> + <record id="3"> + + ... + + +It starts with the ``<dataset>`` element as its root element, which contains +recurring ``<record>`` elements each of which contains multiple fields. By +looking at each ``<record>`` element structure, you can easily infer how the +record content is structured. You can also run ``orcus-xml`` in structure +mode in order to detect the structure of its content. + +Running the following command + +.. code-block:: + + orcus-xml --mode structure example.xml + +should generate the following output: + +.. code-block:: + + /dataset + /dataset/record[*] + /dataset/record[*]/@id + /dataset/record[*]/name + /dataset/record[*]/name/first + /dataset/record[*]/name/last + /dataset/record[*]/active + /dataset/record[*]/gender + /dataset/record[*]/language + +This output lists the paths of all encountered "leaf node" items one item per +line, in order of occurrence. Each path is expressed in a XPath-like format, +except for recurring "anchor" elements which are suffixed with the ``[*]`` +symbols. An anchor element in this context is defined as a recurring non-leaf +element that contains either an attribute or a leaf element. You can think of +anchor elements as elements that define the individual record boundaries. + +Auto-mapping the XML document +----------------------------- + +Mapping this XML document to a spreadsheet document can be done by simply running +``orcus-xml`` in map mode. You also need to specify the output format type and +the output directory in order to see the content of the mapped spreadsheet +document. Running the command: + +.. code-block:: + + orcus-xml --mode map -f flat -o out example.xml + +will create an output file named ``out/range-0.txt`` which contains the following: + +.. code-block:: + + --- + Sheet name: range-0 + rows: 21 cols: 6 + +--------+-----------+-------------+--------+--------+----------------+ + | id | first | last | active | gender | language | + +--------+-----------+-------------+--------+--------+----------------+ + | 1 [v] | Tab | Limpenny | true | Male | Kazakh | + +--------+-----------+-------------+--------+--------+----------------+ + | 2 [v] | Manda | Hadgraft | false | Female | Bislama | + +--------+-----------+-------------+--------+--------+----------------+ + | 3 [v] | Mickie | Boreham | false | Male | Swahili | + +--------+-----------+-------------+--------+--------+----------------+ + | 4 [v] | Celinka | Brookfield | false | Female | Gagauz | + +--------+-----------+-------------+--------+--------+----------------+ + | 5 [v] | Muffin | Bleas | false | Female | Hiri Motu | + +--------+-----------+-------------+--------+--------+----------------+ + | 6 [v] | Jackelyn | Crumb | false | Female | Northern Sotho | + +--------+-----------+-------------+--------+--------+----------------+ + | 7 [v] | Tessie | Hollingsbee | true | Female | Fijian | + +--------+-----------+-------------+--------+--------+----------------+ + | 8 [v] | Yank | Wernham | false | Male | Tok Pisin | + +--------+-----------+-------------+--------+--------+----------------+ + | 9 [v] | Brendan | Lello | true | Male | Fijian | + +--------+-----------+-------------+--------+--------+----------------+ + | 10 [v] | Arabel | Rigg | false | Female | Kyrgyz | + +--------+-----------+-------------+--------+--------+----------------+ + | 11 [v] | Carolann | McElory | false | Female | Pashto | + +--------+-----------+-------------+--------+--------+----------------+ + | 12 [v] | Gasparo | Flack | false | Male | Telugu | + +--------+-----------+-------------+--------+--------+----------------+ + | 13 [v] | Eolanda | Polendine | false | Female | Kashmiri | + +--------+-----------+-------------+--------+--------+----------------+ + | 14 [v] | Brock | McCaw | false | Male | Tsonga | + +--------+-----------+-------------+--------+--------+----------------+ + | 15 [v] | Wenda | Espinas | false | Female | Bulgarian | + +--------+-----------+-------------+--------+--------+----------------+ + | 16 [v] | Zachary | Banane | true | Male | Persian | + +--------+-----------+-------------+--------+--------+----------------+ + | 17 [v] | Sallyanne | Mengue | false | Female | Latvian | + +--------+-----------+-------------+--------+--------+----------------+ + | 18 [v] | Elizabet | Hoofe | true | Female | Tswana | + +--------+-----------+-------------+--------+--------+----------------+ + | 19 [v] | Alastair | Hutchence | true | Male | Ndebele | + +--------+-----------+-------------+--------+--------+----------------+ + | 20 [v] | Minor | Worland | true | Male | Dutch | + +--------+-----------+-------------+--------+--------+----------------+ + +We are using the ``flat`` format type which writes the data range of a sheet +in a human-readable grid output. + +The mapped sheet content is the result of the automatic mapping of the original +XML document. In automatic mapping, all attributes and element contents that +can be mapped as field values will be mapped, and the sheet name will be automatically +generated. + +Although not applicable to this particular example, if the source XML document +contains multiple mappable ranges, they will get mapped to multiple sheets, one +sheet per range. + +Custom-mapping using map file +----------------------------- + +Generating map file +^^^^^^^^^^^^^^^^^^^ + +Automatic-mapping should work reasonably well in many cases, but sometime you +may need to customize how you map your data, and this section will go over how +you could do just that. + +The short answer is that you will need to create a map definition file and pass +it to the ``orcus-xml`` command via ``-m`` or ``--map`` option. The easiest +way to go about it is to have one generated for you. + +Running the following command: + +.. code-block:: + + orcus-xml --mode map-gen -o map.xml example.xml + +will generate a map file ``map.xml`` which contains the mapping definition based +on the auto-detected structure. The content of ``map.xml`` generated from the +example XML document should look like this: + +.. code-block:: XML + + <?xml version="1.0"?> + <map xmlns="https://gitlab.com/orcus/orcus/xml-map-definition"> + <sheet name="range-0"/> + <range sheet="range-0" row="0" column="0"> + <field path="/dataset/record/@id"/> + <field path="/dataset/record/name/first"/> + <field path="/dataset/record/name/last"/> + <field path="/dataset/record/active"/> + <field path="/dataset/record/gender"/> + <field path="/dataset/record/language"/> + <row-group path="/dataset/record"/> + </range> + </map> + +Note that since the original map file content does not include any line breaks, +you may want to run it through an XML reformatting tool such as +`xmllint <http://xmlsoft.org/xmllint.html>`_ to "prettify" its content before +viewing. + +Map file structure +^^^^^^^^^^^^^^^^^^ + +Hopefully the structure of the map file is self-explanatory, but let us go over +it a little. The ``map`` element is the root element which contains one or +more ``sheet`` elements and one or more ``range`` elements. The ``sheet`` +elements specify how many sheets should be created in the spreadsheet model, +and what their names should be via their ``name`` attributes. The ordering of +the ``sheet`` elements will reflect the ordering of the sheets in the final +spreadsheet document. + +Each ``range`` element defines one mapped range of the source XML document, and +this element itself stores the top-left position of the range in the final +spreadsheet document via ``sheet``, ``row`` and ``column`` attributes. The ``range`` +element then contains one or more ``field`` elements, and one or more ``row-group`` +elements. + +Each ``field`` element defines one field within the mapped range and the path of +the value in the source XML document. The path is expressed in XPath format. +The ordering of the ``field`` elements reflects the ordering of the field columns +in the final spreadsheet document. + +Each ``row-group`` element defines the path of an anchor element. For a simple +XML document such as our current example, you only need one ``row-group`` +element. But an XML document with more complex structure may need more than one +``row-group`` element to properly map nested recurring elements. + +Modifying map file +^^^^^^^^^^^^^^^^^^ + +Let's make some changes to this map file. First, the default sheet name ``range-0`` +doesn't look very good, so we'll change it to ``My Data``. Also, let's assume +we aren't really interested in the ID values or the "active" values (whatever +they may mean), so we'll drop those two fields. Additionally, since we don't like +the default field labels, which are taken literally from the names of the corresponding +attributes or elements, we'll define custom field labels. And finally, we'll add +two empty rows above the data range so that we can edit in some nice title afterward. + +The modified map file will look like this: + +.. code-block:: XML + + <?xml version="1.0"?> + <map xmlns="https://gitlab.com/orcus/orcus/xml-map-definition"> + <sheet name="My Data"/> + <range sheet="My Data" row="2" column="0"> + <field path="/dataset/record/name/first" label="First Name"/> + <field path="/dataset/record/name/last" label="Last Name"/> + <field path="/dataset/record/gender" label="Gender"/> + <field path="/dataset/record/language" label="Language"/> + <row-group path="/dataset/record"/> + </range> + </map> + +We'll save this as ``map-modified.xml``, and pass it to the ``orcus-xml`` command +this time around like so: + +.. code-block:: + + ./src/orcus-xml --mode map -m map-modified.xml -o out -f flat example.xml + +This will output the content of the sheet to ``out/My Data.txt``, which will +look like this: + +.. code-block:: + + --- + Sheet name: My Data + rows: 23 cols: 4 + +------------+-------------+--------+----------------+ + | | | | | + +------------+-------------+--------+----------------+ + | | | | | + +------------+-------------+--------+----------------+ + | First Name | Last Name | Gender | Language | + +------------+-------------+--------+----------------+ + | Tab | Limpenny | Male | Kazakh | + +------------+-------------+--------+----------------+ + | Manda | Hadgraft | Female | Bislama | + +------------+-------------+--------+----------------+ + | Mickie | Boreham | Male | Swahili | + +------------+-------------+--------+----------------+ + | Celinka | Brookfield | Female | Gagauz | + +------------+-------------+--------+----------------+ + | Muffin | Bleas | Female | Hiri Motu | + +------------+-------------+--------+----------------+ + | Jackelyn | Crumb | Female | Northern Sotho | + +------------+-------------+--------+----------------+ + | Tessie | Hollingsbee | Female | Fijian | + +------------+-------------+--------+----------------+ + | Yank | Wernham | Male | Tok Pisin | + +------------+-------------+--------+----------------+ + | Brendan | Lello | Male | Fijian | + +------------+-------------+--------+----------------+ + | Arabel | Rigg | Female | Kyrgyz | + +------------+-------------+--------+----------------+ + | Carolann | McElory | Female | Pashto | + +------------+-------------+--------+----------------+ + | Gasparo | Flack | Male | Telugu | + +------------+-------------+--------+----------------+ + | Eolanda | Polendine | Female | Kashmiri | + +------------+-------------+--------+----------------+ + | Brock | McCaw | Male | Tsonga | + +------------+-------------+--------+----------------+ + | Wenda | Espinas | Female | Bulgarian | + +------------+-------------+--------+----------------+ + | Zachary | Banane | Male | Persian | + +------------+-------------+--------+----------------+ + | Sallyanne | Mengue | Female | Latvian | + +------------+-------------+--------+----------------+ + | Elizabet | Hoofe | Female | Tswana | + +------------+-------------+--------+----------------+ + | Alastair | Hutchence | Male | Ndebele | + +------------+-------------+--------+----------------+ + | Minor | Worland | Male | Dutch | + +------------+-------------+--------+----------------+ + +The new output now only contains four fields, with custom labels at the top, and +now we have two empty rows above just like we intended. |