1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
|
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* This file is part of the LibreOffice project.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This file incorporates work covered by the following license notice:
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed
* with this work for additional information regarding copyright
* ownership. The ASF licenses this file to you under the Apache
* License, Version 2.0 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of
* the License at http://www.apache.org/licenses/LICENSE-2.0 .
*/
module com { module sun { module star { module i18n {
/*
Possible tokens to be parsed with parse...Token():
UPASCALPHA=[A-Z]
LOASCALPHA=[a-z]
ASCALPHA=1*(UPASCALPHA|LOASCALPHA)
ASCDIGIT=[0-9]
ASC_UNDERSCORE='_'
ASC_SPACE=' '
ASC_HT='\0x9'
ASC_VT='\0xb'
ASC_WS=ASC_SPACE|ASC_HT|ASC_VT
ASC_DBL_QUOTE=\";
ASC_QUOTE=\'
UPASC_IDENTIFIER=UPASCALPHA *(UPASCALPHA|ASCDIGIT|ASC_UNDERSCORE)
ALPHA,DIGIT are the tokens which return true for isAlpha and isDigit
ALNUM=ALPHA|DIGIT
CHAR=anycharacter
WS=isWhiteSpace()
SIGN='+'|'-'
DECSEP=<locale dependent decimal separator>
GRPSEP=<locale dependent thousand separator>
EXPONENT=(E|e)[SIGN]1*ASC_DIGIT
IDENTIFIER=ALPHA *ALNUM
UIDENTIFIER=(ALPHA | ASC_UNDERSCORE) *(ALNUM|ASC_UNDERSCORE)
ALPHA_NAME=ALPHA *(ALNUM|DEFCHARS)
ANY_NAME=1*(ALNUM|DEFCHARS)
SINGLE_QUOTE_NAME=ASC_QUOTE(1*CHAR)ASC_QUOTE
DOUBLE_QUOTE_NAME=ASC_DBL_QUOTE(*CHAR)ASC_DBL_QUOTE
ASC_NUMBER=[SIGN]*(1*ASC_DIGIT *(GRPSEP 1*ASC_DIGIT))[DECSEP]1*ASC_DIGIT[EXPONENT]
NUMBER=[SIGN]*(1*DIGIT *(GRPSEP 1*DIGIT))[DECSEP]1*DIGIT[EXPONENT]
*/
/**
Character classification (upper, lower, digit, letter, number, ...)
and generic Unicode enabled parser.
*/
published interface XCharacterClassification : com::sun::star::uno::XInterface
{
/** Convert lower case alpha to upper case alpha, starting at
position <em>nPos</em> for <em>nCount</em> code points.
*/
string toUpper( [in] string aText, [in] long nPos, [in] long nCount,
[in] com::sun::star::lang::Locale aLocale );
/** Convert upper case alpha to lower case alpha, starting at
position <em>nPos</em> for <em>nCount</em> code points.
*/
string toLower( [in] string aText, [in] long nPos, [in] long nCount,
[in] com::sun::star::lang::Locale aLocale );
/** Convert to title case, starting at
position <em>nPos</em> for <em>nCount</em> code points.
*/
string toTitle( [in] string aText, [in] long nPos, [in] long nCount,
[in] com::sun::star::lang::Locale aLocale );
/// Get UnicodeType of character at position <em>nPos</em>.
short getType( [in] string aText, [in] long nPos );
/** Get DirectionProperty of character at position
<em>nPos</em>.
*/
short getCharacterDirection( [in] string aText, [in] long nPos );
/// Get UnicodeScript of character at position <em>nPos</em>.
short getScript( [in] string aText, [in] long nPos );
/// Get KCharacterType of character at position <em>nPos</em>.
long getCharacterType( [in] string aText, [in] long nPos,
[in] com::sun::star::lang::Locale aLocale );
/** Get accumulated KCharacterTypes of string starting
at position <em>nPos</em> of length <em>nCount</em> code points.
@returns
A number with appropriate flags set to indicate what type of
characters the string contains, each flag value being one of
KCharacterType values.
@note The accumulated bits of several characters are meaningless
as soon as characters of different classifications are
involved, which even may have a common subset like
KCharacterType::LETTER or KCharacterType::PRINTABLE, unless
it is to be determined what overall character properties are
present in the string. Use getCharacterType() of single
characters instead and handle bits as needed if sets of
character properties are to be obtained.
*/
long getStringType( [in] string aText, [in] long nPos, [in] long nCount,
[in] com::sun::star::lang::Locale aLocale );
/**
Parse a string for a token starting at position <em>nPos</em>.
<p> A name or identifier must match the
KParseTokens criteria passed in
<em>nStartCharFlags</em> and <em>nContCharFlags</em> and may
additionally contain characters of
<em>aUserDefinedCharactersStart</em> and/or
<em>aUserDefinedCharactersCont</em>. </p>
@returns
A filled ParseResult structure. If no
unambiguous token could be parsed,
ParseResult::TokenType will be set to
<b>0</b> (zero), other fields will contain the values parsed
so far.
<p> If a token may represent either a numeric value or a
name according to the passed Start/Cont-Flags/Chars, both
KParseType::ASC_NUM (or
KParseType::UNI_NUM) and
KParseType::IDENTNAME are set in
ParseResult::TokenType.
@param aText
Text to be parsed.
@param nPos
Position where parsing starts.
@param aLocale
The locale, for example, for decimal and group separator or
character type determination.
@param nStartCharFlags
A set of KParseTokens constants determining the
allowed characters a name or identifier may start with.
@param aUserDefinedCharactersStart
A set of additionally allowed characters a name or
identifier may start with.
@param nContCharFlags
A set of KParseTokens constants determining the
allowed characters a name or identifier may continue with.
@param aUserDefinedCharactersCont
A set of additionally allowed characters a name or
identifier may continue with.
@code{.cpp}
using namespace ::com::sun::star::i18n;
// First character of an identifier may be any alphabetic or underscore.
sal_Int32 nStartFlags = KParseTokens::ANY_ALPHA | KParseTokens::ASC_UNDERSCORE;
// Continuing characters may be any alphanumeric or underscore or dot.
sal_Int32 nContFlags = KParseTokens::ANY_ALNUM | KParseTokens::ASC_UNDERSCORE | KParseTokens::ASC_DOT;
// No further characters assumed to be contained in an identifier
OUString aEmptyString;
// Parse any token.
ParseResult rRes = xCC->parseAnyToken( aText, nPos, aLocale,
nStartFlags, aEmptyString, nContFlags, aEmptyString );
// Get parsed token.
if ( rRes.TokenType & (KParseType::ASC_NUMBER | KParseType::UNI_NUMBER) )
fValue = rRes.Value;
if ( rRes.TokenType & KParseType::IDENTNAME )
aName = aText.copy( nPos, rRes.EndPos - nPos );
else if ( rRes.TokenType & KParseType::SINGLE_QUOTE_NAME )
aName = rRes.DequotedNameOrString;
else if ( rRes.TokenType & KParseType::DOUBLE_QUOTE_STRING )
aString = rRes.DequotedNameOrString;
else if ( rRes.TokenType & KParseType::BOOLEAN )
aSymbol = aText.copy( nPos, rRes.EndPos - nPos );
else if ( rRes.TokenType & KParseType::ONE_SINGLE_CHAR )
aSymbol = aText.copy( nPos, rRes.EndPos - nPos );
@endcode
*/
ParseResult parseAnyToken(
[in] string aText,
[in] long nPos,
[in] com::sun::star::lang::Locale aLocale,
[in] long nStartCharFlags,
[in] string aUserDefinedCharactersStart,
[in] long nContCharFlags,
[in] string aUserDefinedCharactersCont
);
/**
Parse a string for a token of type <em>nTokenType</em> starting
at position <em>nPos</em>.
<p> Other parameters are the same as in
parseAnyToken(). If the actual token does not
match the passed <em>nTokenType</em> a
ParseResult::TokenType set to <b>0</b> (zero)
is returned. </p>
@param nTokenType
One or more of the KParseType constants.
@param aText
See #parseAnyToken
@param nPos
See #parseAnyToken
@param aLocale
See #parseAnyToken
@param nStartCharFlags
See #parseAnyToken
@param aUserDefinedCharactersStart
See #parseAnyToken
@param nContCharFlags
See #parseAnyToken
@param aUserDefinedCharactersCont
See #parseAnyToken
@code{.cpp}
// Determine if a given name is a valid name (not quoted) and contains
// only allowed characters.
using namespace ::com::sun::star::i18n;
// First character of an identifier may be any alphanumeric or underscore.
sal_Int32 nStartFlags = KParseTokens::ANY_ALNUM | KParseTokens::ASC_UNDERSCORE;
// No further characters assumed to be contained in an identifier start.
OUString aEmptyString;
// Continuing characters may be any alphanumeric or underscore.
sal_Int32 nContFlags = nStartFlags;
// Additionally, continuing characters may contain a blank.
OUString aContChars( " " );
// Parse predefined (must be an IDENTNAME) token.
ParseResult rRes = xCC->parsePredefinedToken( KParseType::IDENTNAME, rName, 0, aLocale,
nStartFlags, aEmptyString, nContFlags, aContChars );
// Test if it is an identifier name and if it only is one
// and no more else is following it.
bValid = (rRes.TokenType & KParseType::IDENTNAME) && rRes.EndPos == rName.Len();
@endcode
*/
ParseResult parsePredefinedToken(
[in] long nTokenType,
[in] string aText,
[in] long nPos,
[in] com::sun::star::lang::Locale aLocale,
[in] long nStartCharFlags,
[in] string aUserDefinedCharactersStart,
[in] long nContCharFlags,
[in] string aUserDefinedCharactersCont
);
};
}; }; }; };
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|