summaryrefslogtreecommitdiffstats
path: root/testing/web-platform/tests/mathml/tools/operator-dictionary.py
blob: 8de654df15547393b21eb53c5d537bb36a6e8ee0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
#!/usr/bin/env python3

from lxml import etree
from utils.misc import downloadWithProgressBar, UnicodeXMLURL, InlineAxisOperatorsURL
import json
import re
from utils import mathfont

NonBreakingSpace = 0x00A0


def parseHexaNumber(string):
    return int("0x%s" % string, 16)


def parseHexaSequence(string):
    return tuple(map(parseHexaNumber, string[1:].split("-")))


def parseSpaces(value, entry, names):
    for name in names:
        attributeValue = entry.get(name)
        if attributeValue is not None:
            value[name] = int(attributeValue)


def parseProperties(value, entry, names):
    attributeValue = entry.get("properties")
    if attributeValue is not None:
        for name in names:
            if attributeValue.find(name) >= 0:
                value[name] = True


def buildKeyAndValueFrom(characters, form):
    # Concatenate characters and form to build the key.
    key = ""
    for c in characters:
        key += chr(c)
    key += " " + form
    # But save characters as an individual property for easier manipulation in
    # this Python script.
    value = {
        "characters": characters,
    }
    return key, value


# Retrieve the spec files.
inlineAxisOperatorsTXT = downloadWithProgressBar(InlineAxisOperatorsURL)
unicodeXML = downloadWithProgressBar(UnicodeXMLURL)

# Extract the operator dictionary.
xsltTransform = etree.XSLT(etree.parse("./operator-dictionary.xsl"))

# Put the operator dictionary into a Python structure.
inlineAxisOperators = {}
with open(inlineAxisOperatorsTXT, mode="r") as f:
    for line in f:
        hexaString = re.match(r"^U\+([0-9A-F]+)", line).group(1)
        inlineAxisOperators[parseHexaNumber(hexaString)] = True

operatorDictionary = {}
root = xsltTransform(etree.parse(unicodeXML)).getroot()
for entry in root:
    characters = parseHexaSequence(entry.get("unicode"))
    assert characters != (NonBreakingSpace)
    key, value = buildKeyAndValueFrom(characters, entry.get("form"))
    # There is no dictionary-specified minsize/maxsize values, so no need to
    # parse them.
    # The fence, separator and priority properties don't have any effect on math
    # layout, so they are not added to the JSON file.
    parseSpaces(value, entry, ["lspace", "rspace"])
    parseProperties(value, entry, ["stretchy", "symmetric", "largeop",
                                   "movablelimits", "accent"])
    if (len(characters) == 1 and characters[0] in inlineAxisOperators):
        value["horizontal"] = True
    operatorDictionary[key] = value

# Create entries for the non-breaking space in all forms in order to test the
# default for operators outside the official dictionary.
for form in ["infix", "prefix", "suffix"]:
    key, value = buildKeyAndValueFrom(tuple([NonBreakingSpace]), form)
    operatorDictionary[key] = value

# Create a WOFF font with glyphs for all the operator strings.
font = mathfont.create("operators", "Copyright (c) 2019 Igalia S.L.")

# Set parameters for largeop and stretchy tests.
font.math.DisplayOperatorMinHeight = 2 * mathfont.em
font.math.MinConnectorOverlap = mathfont.em // 2

# Set parameters for accent tests so that we only have large gap when
# overscript is an accent.
font.math.UpperLimitBaselineRiseMin = 0
font.math.StretchStackTopShiftUp = 0
font.math.AccentBaseHeight = 2 * mathfont.em
font.math.OverbarVerticalGap = 0

mathfont.createSizeVariants(font, True)

# Ensure a glyph exists for the combining characters that are handled specially
# in the specification:
# U+0338 COMBINING LONG SOLIDUS OVERLAY
# U+20D2 COMBINING LONG VERTICAL LINE OVERLAY
for combining_character in [0x338, 0x20D2]:
    mathfont.createSquareGlyph(font, combining_character)

for key in operatorDictionary:
    value = operatorDictionary[key]
    for c in value["characters"]:
        if c in font:
            continue
        if c == NonBreakingSpace:
            g = font.createChar(c)
            mathfont.drawRectangleGlyph(g, mathfont.em, mathfont.em // 3, 0)
        else:
            mathfont.createSquareGlyph(font, c)
        mathfont.createStretchy(font, c, c in inlineAxisOperators)
mathfont.save(font)

# Generate the python file.
for key in operatorDictionary:
    del operatorDictionary[key]["characters"]  # Remove this temporary value.
JSON = {
    "comment": "This file was automatically generated by operator-dictionary.py. Do not edit.",
    "dictionary": operatorDictionary
}
with open('../support/operator-dictionary.json', 'w') as fp:
    json.dump(JSON, fp, sort_keys=True, ensure_ascii=True)