diff options
Diffstat (limited to 'offapi/com/sun/star/i18n/XCharacterClassification.idl')
-rw-r--r-- | offapi/com/sun/star/i18n/XCharacterClassification.idl | 274 |
1 files changed, 274 insertions, 0 deletions
diff --git a/offapi/com/sun/star/i18n/XCharacterClassification.idl b/offapi/com/sun/star/i18n/XCharacterClassification.idl new file mode 100644 index 000000000..1bc8c7c2a --- /dev/null +++ b/offapi/com/sun/star/i18n/XCharacterClassification.idl @@ -0,0 +1,274 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef __com_sun_star_i18n_XCharacterClassification_idl__ +#define __com_sun_star_i18n_XCharacterClassification_idl__ + +#include <com/sun/star/i18n/ParseResult.idl> +#include <com/sun/star/lang/Locale.idl> +#include <com/sun/star/uno/XInterface.idl> + + +module com { module sun { module star { module i18n { + + +/* + +Possible tokens to be parsed with parse...Token(): + +UPASCALPHA=[A-Z] +LOASCALPHA=[a-z] +ASCALPHA=1*(UPASCALPHA|LOASCALPHA) +ASCDIGIT=[0-9] +ASC_UNDERSCORE='_' +ASC_SPACE=' ' +ASC_HT='\0x9' +ASC_VT='\0xb' +ASC_WS=ASC_SPACE|ASC_HT|ASC_VT +ASC_DBL_QUOTE=\"; +ASC_QUOTE=\' +UPASC_IDENTIFIER=UPASCALPHA *(UPASCALPHA|ASCDIGIT|ASC_UNDERSCORE) + +ALPHA,DIGIT are the tokens which return true for isAlpha and isDigit +ALNUM=ALPHA|DIGIT +CHAR=anycharacter +WS=isWhiteSpace() +SIGN='+'|'-' +DECSEP=<locale dependent decimal separator> +GRPSEP=<locale dependent thousand separator> +EXPONENT=(E|e)[SIGN]1*ASC_DIGIT + +IDENTIFIER=ALPHA *ALNUM +UIDENTIFIER=(ALPHA | ASC_UNDERSCORE) *(ALNUM|ASC_UNDERSCORE) +ALPHA_NAME=ALPHA *(ALNUM|DEFCHARS) +ANY_NAME=1*(ALNUM|DEFCHARS) +SINGLE_QUOTE_NAME=ASC_QUOTE(1*CHAR)ASC_QUOTE +DOUBLE_QUOTE_NAME=ASC_DBL_QUOTE(*CHAR)ASC_DBL_QUOTE +ASC_NUMBER=[SIGN]*(1*ASC_DIGIT *(GRPSEP 1*ASC_DIGIT))[DECSEP]1*ASC_DIGIT[EXPONENT] +NUMBER=[SIGN]*(1*DIGIT *(GRPSEP 1*DIGIT))[DECSEP]1*DIGIT[EXPONENT] + +*/ + + +/** + Character classification (upper, lower, digit, letter, number, ...) + and generic Unicode enabled parser. + */ + +published interface XCharacterClassification : com::sun::star::uno::XInterface +{ + /** Convert lower case alpha to upper case alpha, starting at + position <em>nPos</em> for <em>nCount</em> code points. + */ + string toUpper( [in] string aText, [in] long nPos, [in] long nCount, + [in] com::sun::star::lang::Locale aLocale ); + + /** Convert upper case alpha to lower case alpha, starting at + position <em>nPos</em> for <em>nCount</em> code points. + */ + string toLower( [in] string aText, [in] long nPos, [in] long nCount, + [in] com::sun::star::lang::Locale aLocale ); + + /** Convert to title case, starting at + position <em>nPos</em> for <em>nCount</em> code points. + */ + string toTitle( [in] string aText, [in] long nPos, [in] long nCount, + [in] com::sun::star::lang::Locale aLocale ); + + /// Get UnicodeType of character at position <em>nPos</em>. + short getType( [in] string aText, [in] long nPos ); + + /** Get DirectionProperty of character at position + <em>nPos</em>. + */ + short getCharacterDirection( [in] string aText, [in] long nPos ); + + /// Get UnicodeScript of character at position <em>nPos</em>. + short getScript( [in] string aText, [in] long nPos ); + + /// Get KCharacterType of character at position <em>nPos</em>. + long getCharacterType( [in] string aText, [in] long nPos, + [in] com::sun::star::lang::Locale aLocale ); + + /** Get accumulated KCharacterTypes of string starting + at position <em>nPos</em> of length <em>nCount</em> code points. + + @returns + A number with appropriate flags set to indicate what type of + characters the string contains, each flag value being one of + KCharacterType values. + */ + long getStringType( [in] string aText, [in] long nPos, [in] long nCount, + [in] com::sun::star::lang::Locale aLocale ); + + + /** + Parse a string for a token starting at position <em>nPos</em>. + + <p> A name or identifier must match the + KParseTokens criteria passed in + <em>nStartCharFlags</em> and <em>nContCharFlags</em> and may + additionally contain characters of + <em>aUserDefinedCharactersStart</em> and/or + <em>aUserDefinedCharactersCont</em>. </p> + + + @returns + A filled ParseResult structure. If no + unambiguous token could be parsed, + ParseResult::TokenType will be set to + <b>0</b> (zero), other fields will contain the values parsed + so far. + + <p> If a token may represent either a numeric value or a + name according to the passed Start/Cont-Flags/Chars, both + KParseType::ASC_NUM (or + KParseType::UNI_NUM) and + KParseType::IDENTNAME are set in + ParseResult::TokenType. + + @param aText + Text to be parsed. + + @param nPos + Position where parsing starts. + + @param aLocale + The locale, for example, for decimal and group separator or + character type determination. + + @param nStartCharFlags + A set of KParseTokens constants determining the + allowed characters a name or identifier may start with. + + @param aUserDefinedCharactersStart + A set of additionally allowed characters a name or + identifier may start with. + + @param nContCharFlags + A set of KParseTokens constants determining the + allowed characters a name or identifier may continue with. + + @param aUserDefinedCharactersCont + A set of additionally allowed characters a name or + identifier may continue with. + + @code{.cpp} + using namespace ::com::sun::star::i18n; + // First character of an identifier may be any alphabetic or underscore. + sal_Int32 nStartFlags = KParseTokens::ANY_ALPHA | KParseTokens::ASC_UNDERSCORE; + // Continuing characters may be any alphanumeric or underscore or dot. + sal_Int32 nContFlags = KParseTokens::ANY_ALNUM | KParseTokens::ASC_UNDERSCORE | KParseTokens::ASC_DOT; + // No further characters assumed to be contained in an identifier + OUString aEmptyString; + // Parse any token. + ParseResult rRes = xCC->parseAnyToken( aText, nPos, aLocale, + nStartFlags, aEmptyString, nContFlags, aEmptyString ); + // Get parsed token. + if ( rRes.TokenType & (KParseType::ASC_NUMBER | KParseType::UNI_NUMBER) ) + fValue = rRes.Value; + if ( rRes.TokenType & KParseType::IDENTNAME ) + aName = aText.copy( nPos, rRes.EndPos - nPos ); + else if ( rRes.TokenType & KParseType::SINGLE_QUOTE_NAME ) + aName = rRes.DequotedNameOrString; + else if ( rRes.TokenType & KParseType::DOUBLE_QUOTE_STRING ) + aString = rRes.DequotedNameOrString; + else if ( rRes.TokenType & KParseType::BOOLEAN ) + aSymbol = aText.copy( nPos, rRes.EndPos - nPos ); + else if ( rRes.TokenType & KParseType::ONE_SINGLE_CHAR ) + aSymbol = aText.copy( nPos, rRes.EndPos - nPos ); + @endcode + */ + + ParseResult parseAnyToken( + [in] string aText, + [in] long nPos, + [in] com::sun::star::lang::Locale aLocale, + [in] long nStartCharFlags, + [in] string aUserDefinedCharactersStart, + [in] long nContCharFlags, + [in] string aUserDefinedCharactersCont + ); + + /** + Parse a string for a token of type <em>nTokenType</em> starting + at position <em>nPos</em>. + + <p> Other parameters are the same as in + parseAnyToken(). If the actual token does not + match the passed <em>nTokenType</em> a + ParseResult::TokenType set to <b>0</b> (zero) + is returned. </p> + + @param nTokenType + One or more of the KParseType constants. + + @param aText + See #parseAnyToken + @param nPos + See #parseAnyToken + @param aLocale + See #parseAnyToken + @param nStartCharFlags + See #parseAnyToken + @param aUserDefinedCharactersStart + See #parseAnyToken + @param nContCharFlags + See #parseAnyToken + @param aUserDefinedCharactersCont + See #parseAnyToken + + @code{.cpp} + // Determine if a given name is a valid name (not quoted) and contains + // only allowed characters. + using namespace ::com::sun::star::i18n; + // First character of an identifier may be any alphanumeric or underscore. + sal_Int32 nStartFlags = KParseTokens::ANY_ALNUM | KParseTokens::ASC_UNDERSCORE; + // No further characters assumed to be contained in an identifier start. + OUString aEmptyString; + // Continuing characters may be any alphanumeric or underscore. + sal_Int32 nContFlags = nStartFlags; + // Additionally, continuing characters may contain a blank. + OUString aContChars( " " ); + // Parse predefined (must be an IDENTNAME) token. + ParseResult rRes = xCC->parsePredefinedToken( KParseType::IDENTNAME, rName, 0, aLocale, + nStartFlags, aEmptyString, nContFlags, aContChars ); + // Test if it is an identifier name and if it only is one + // and no more else is following it. + bValid = (rRes.TokenType & KParseType::IDENTNAME) && rRes.EndPos == rName.Len(); + @endcode + */ + + ParseResult parsePredefinedToken( + [in] long nTokenType, + [in] string aText, + [in] long nPos, + [in] com::sun::star::lang::Locale aLocale, + [in] long nStartCharFlags, + [in] string aUserDefinedCharactersStart, + [in] long nContCharFlags, + [in] string aUserDefinedCharactersCont + ); +}; + +}; }; }; }; + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |