summaryrefslogtreecommitdiffstats
path: root/offapi/com/sun/star/i18n/KParseTokens.idl
blob: accc425660a05cdc09f2aba3a13749518eb6169d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
 * This file is part of the LibreOffice project.
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 *
 * This file incorporates work covered by the following license notice:
 *
 *   Licensed to the Apache Software Foundation (ASF) under one or more
 *   contributor license agreements. See the NOTICE file distributed
 *   with this work for additional information regarding copyright
 *   ownership. The ASF licenses this file to you under the Apache
 *   License, Version 2.0 (the "License"); you may not use this file
 *   except in compliance with the License. You may obtain a copy of
 *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
 */
#ifndef __com_sun_star_i18n_KParseTokens_idl__
#define __com_sun_star_i18n_KParseTokens_idl__


module com {  module sun {  module star {  module i18n {


/**
    These constants specify the characters a name or identifier token to
    be parsed can have.

    <p> They are passed to
    XCharacterClassification::parseAnyToken() and
    XCharacterClassification::parsePredefinedToken().
    They are also set in the ParseResult::StartFlags
    and ParseResult::ContFlags. </p>
 */

published constants KParseTokens
{
    /// ASCII A-Z upper alpha
    const long ASC_UPALPHA          = 0x00000001;

    /// ASCII a-z lower alpha
    const long ASC_LOALPHA          = 0x00000002;

    /// ASCII 0-9 digit
    const long ASC_DIGIT            = 0x00000004;

    /// ASCII '_' underscore
    const long ASC_UNDERSCORE       = 0x00000008;

    /// ASCII '$' dollar
    const long ASC_DOLLAR           = 0x00000010;

    /// ASCII '.' dot/point
    const long ASC_DOT              = 0x00000020;

    /// ASCII ':' colon
    const long ASC_COLON            = 0x00000040;

    /// Special value to allow control characters (0x00 &lt; char &lt; 0x20)
    const long ASC_CONTROL          = 0x00000200;

    /** Special value to allow anything below 128 except control
        characters. <strong>Not</strong> set in
        ParseResult. */
    const long ASC_ANY_BUT_CONTROL  = 0x00000400;

    /** Additional flag set in ParseResult::StartFlags
        or ParseResult::ContFlags. Set if none of the
        above ASC_... (except ASC_ANY_...) single values match an ASCII
        character parsed. */
    const long ASC_OTHER            = 0x00000800;

    /// Unicode (above 127) upper case letter
    const long UNI_UPALPHA          = 0x00001000;

    /// Unicode (above 127) lower case letter
    const long UNI_LOALPHA          = 0x00002000;

    /// Unicode (above 127) decimal digit number
    const long UNI_DIGIT            = 0x00004000;

    /// Unicode (above 127) title case letter
    const long UNI_TITLE_ALPHA      = 0x00008000;

    /// Unicode (above 127) modifier letter
    const long UNI_MODIFIER_LETTER  = 0x00010000;

    /// Unicode (above 127) other letter
    const long UNI_OTHER_LETTER     = 0x00020000;

    /// Unicode (above 127) letter number
    const long UNI_LETTER_NUMBER    = 0x00040000;

    /// Unicode (above 127) other number
    const long UNI_OTHER_NUMBER     = 0x00080000;

    /** If this bit is set in <em>nContCharFlags</em> parameters, the
        locale's group separator characters in numbers are accepted and
        ignored/skipped. Else a group separator in a number ends the
        current token. A leading group separator is never accepted. If
        an accepted group separator was encountered in a number
        (ParseResult::TokenType is KParseType::ASC_NUMBER or
        KParseType::UNI_NUMBER) this bit is also set in
        ParseResult::ContFlags.

        <p> <strong>NOTE:</strong> absence of this bit in
        <em>nContCharFlags</em> changes the default behaviour that in
        prior releases accepted numbers with group separators but lead
        to unexpected results when parsing formula expressions where the
        user entered a (wrong) separator that happened to be the group
        separator instead of an intended decimal separator. Usually
        inline numbers in a formula expression do not contain group
        separators.

        @since LibreOffice 6.2
     */
    const long GROUP_SEPARATOR_IN_NUMBER = 0x08000000;

    /** If this bit is set in <em>nContCharFlags</em> parameters and a
        string enclosed in double quotes is parsed and two consecutive
        double quotes are encountered, the string is ended. If this bit
        is not set, the two double quotes are parsed as one escaped
        double quote and string parsing continues. The bit is ignored in
        <em>nStartCharFlags</em> parameters.

        <p> Example: <br/>
        "abc""def"  -->  bit not set  =>  abc"def <br/>
        "abc""def"  -->  bit set  =>  abc </p>
      */
    const long TWO_DOUBLE_QUOTES_BREAK_STRING   = 0x10000000;

    /** Additional flag set in ParseResult::StartFlags
        or ParseResult::ContFlags. Set if none of the
        above UNI_... single values match a Unicode character parsed. */
    const long UNI_OTHER            = 0x20000000;

    /** Only valid for <em>nStartCharFlags</em> parameter to
        CharacterClassification::parseAnyToken() and
        CharacterClassification::parsePredefinedToken(),
        ignored on <em>nContCharFlags</em> parameter.
        <strong>Not</strong> set in ParseResult. */
    const long IGNORE_LEADING_WS    = 0x40000000;


    // useful combinations

    /// ASCII a-zA-Z lower or upper alpha
    const long ASC_ALPHA            = ASC_UPALPHA | ASC_LOALPHA;

    /// ASCII a-zA-Z0-9 alphanumeric
    const long ASC_ALNUM            = ASC_ALPHA | ASC_DIGIT;

    /// Unicode (above 127) lower or upper or title case alpha
    const long UNI_ALPHA            = UNI_UPALPHA | UNI_LOALPHA | UNI_TITLE_ALPHA;

    /// Unicode (above 127) alphanumeric
    const long UNI_ALNUM            = UNI_ALPHA | UNI_DIGIT;

    /// Unicode (above 127) alpha or letter
    const long UNI_LETTER           = UNI_ALPHA | UNI_MODIFIER_LETTER |
                                        UNI_OTHER_LETTER;

    /// Unicode (above 127) number
    const long UNI_NUMBER           = UNI_DIGIT | UNI_LETTER_NUMBER |
                                        UNI_OTHER_NUMBER;

    /// any (ASCII or Unicode) alpha
    const long ANY_ALPHA            = ASC_ALPHA | UNI_ALPHA;

    /// any (ASCII or Unicode) digit
    const long ANY_DIGIT            = ASC_DIGIT | UNI_DIGIT;

    /// any (ASCII or Unicode) alphanumeric
    const long ANY_ALNUM            = ASC_ALNUM | UNI_ALNUM;

    /// any (ASCII or Unicode) letter
    const long ANY_LETTER           = ASC_ALPHA | UNI_LETTER;

    /// any (ASCII or Unicode) number
    const long ANY_NUMBER           = ASC_DIGIT | UNI_NUMBER;

    /// any (ASCII or Unicode) letter or number
    const long ANY_LETTER_OR_NUMBER = ANY_LETTER | ANY_NUMBER;
};

}; }; }; };

#endif

/* vim:set shiftwidth=4 softtabstop=4 expandtab: */