1 files changed, 274 insertions, 0 deletions
diff --git a/offapi/com/sun/star/i18n/XCharacterClassification.idl b/offapi/com/sun/star/i18n/XCharacterClassification.idl
new file mode 100644
index 000000000..1bc8c7c2a
--- /dev/null
+++ b/offapi/com/sun/star/i18n/XCharacterClassification.idl
@@ -0,0 +1,274 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ *   Licensed to the Apache Software Foundation (ASF) under one or more
+ *   contributor license agreements. See the NOTICE file distributed
+ *   with this work for additional information regarding copyright
+ *   ownership. The ASF licenses this file to you under the Apache
+ *   License, Version 2.0 (the "License"); you may not use this file
+ *   except in compliance with the License. You may obtain a copy of
+ *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#ifndef __com_sun_star_i18n_XCharacterClassification_idl__
+#define __com_sun_star_i18n_XCharacterClassification_idl__
+
+#include <com/sun/star/i18n/ParseResult.idl>
+#include <com/sun/star/lang/Locale.idl>
+#include <com/sun/star/uno/XInterface.idl>
+
+
+module com { module sun { module star { module i18n {
+
+
+/*
+
+Possible tokens to be parsed with  parse...Token():
+
+UPASCALPHA=[A-Z]
+LOASCALPHA=[a-z]
+ASCALPHA=1*(UPASCALPHA|LOASCALPHA)
+ASCDIGIT=[0-9]
+ASC_UNDERSCORE='_'
+ASC_SPACE=' '
+ASC_HT='\0x9'
+ASC_VT='\0xb'
+ASC_WS=ASC_SPACE|ASC_HT|ASC_VT
+ASC_DBL_QUOTE=\";
+ASC_QUOTE=\'
+UPASC_IDENTIFIER=UPASCALPHA *(UPASCALPHA|ASCDIGIT|ASC_UNDERSCORE)
+
+ALPHA,DIGIT are the tokens which return true for isAlpha and isDigit
+ALNUM=ALPHA|DIGIT
+CHAR=anycharacter
+WS=isWhiteSpace()
+SIGN='+'|'-'
+DECSEP=<locale dependent decimal separator>
+GRPSEP=<locale dependent thousand separator>
+EXPONENT=(E|e)[SIGN]1*ASC_DIGIT
+
+IDENTIFIER=ALPHA *ALNUM
+UIDENTIFIER=(ALPHA | ASC_UNDERSCORE) *(ALNUM|ASC_UNDERSCORE)
+ALPHA_NAME=ALPHA *(ALNUM|DEFCHARS)
+ANY_NAME=1*(ALNUM|DEFCHARS)
+SINGLE_QUOTE_NAME=ASC_QUOTE(1*CHAR)ASC_QUOTE
+DOUBLE_QUOTE_NAME=ASC_DBL_QUOTE(*CHAR)ASC_DBL_QUOTE
+ASC_NUMBER=[SIGN]*(1*ASC_DIGIT  *(GRPSEP 1*ASC_DIGIT))[DECSEP]1*ASC_DIGIT[EXPONENT]
+NUMBER=[SIGN]*(1*DIGIT  *(GRPSEP 1*DIGIT))[DECSEP]1*DIGIT[EXPONENT]
+
+*/
+
+
+/**
+    Character classification (upper, lower, digit, letter, number, ...)
+    and generic Unicode enabled parser.
+ */
+
+published interface XCharacterClassification : com::sun::star::uno::XInterface
+{
+    /** Convert lower case alpha to upper case alpha, starting at
+        position <em>nPos</em> for <em>nCount</em> code points.
+     */
+    string   toUpper( [in] string aText, [in] long nPos, [in] long nCount,
+                      [in] com::sun::star::lang::Locale aLocale );
+
+    /** Convert upper case alpha to lower case alpha, starting at
+        position <em>nPos</em> for <em>nCount</em> code points.
+     */
+    string   toLower( [in] string aText, [in] long nPos, [in] long nCount,
+                      [in] com::sun::star::lang::Locale aLocale );
+
+    /** Convert to title case, starting at
+        position <em>nPos</em> for <em>nCount</em> code points.
+     */
+    string   toTitle( [in] string aText, [in] long nPos, [in] long nCount,
+                      [in] com::sun::star::lang::Locale aLocale );
+
+    /// Get UnicodeType of character at position <em>nPos</em>.
+    short    getType( [in] string aText, [in] long nPos );
+
+    /** Get DirectionProperty of character at position
+        <em>nPos</em>.
+     */
+    short    getCharacterDirection( [in] string aText, [in] long nPos );
+
+    /// Get UnicodeScript of character at position <em>nPos</em>.
+    short    getScript( [in] string aText, [in] long nPos );
+
+    /// Get KCharacterType of character at position <em>nPos</em>.
+    long getCharacterType( [in] string aText, [in] long nPos,
+                           [in] com::sun::star::lang::Locale aLocale );
+
+    /** Get accumulated KCharacterTypes of string starting
+        at position <em>nPos</em> of length <em>nCount</em> code points.
+
+        @returns
+            A number with appropriate flags set to indicate what type of
+            characters the string contains, each flag value being one of
+            KCharacterType values.
+    */
+    long getStringType( [in] string aText, [in] long nPos, [in] long nCount,
+                        [in] com::sun::star::lang::Locale aLocale );
+
+
+    /**
+        Parse a string for a token starting at position <em>nPos</em>.
+
+        <p> A name or identifier must match the
+        KParseTokens criteria passed in
+        <em>nStartCharFlags</em> and <em>nContCharFlags</em> and may
+        additionally contain characters of
+        <em>aUserDefinedCharactersStart</em> and/or
+        <em>aUserDefinedCharactersCont</em>. </p>
+
+
+        @returns
+            A filled ParseResult structure. If no
+            unambiguous token could be parsed,
+            ParseResult::TokenType will be set to
+            <b>0</b> (zero), other fields will contain the values parsed
+            so far.
+
+            <p> If a token may represent either a numeric value or a
+            name according to the passed Start/Cont-Flags/Chars, both
+            KParseType::ASC_NUM (or
+            KParseType::UNI_NUM) and
+            KParseType::IDENTNAME are set in
+            ParseResult::TokenType.
+
+        @param  aText
+            Text to be parsed.
+
+        @param  nPos
+            Position where parsing starts.
+
+        @param  aLocale
+            The locale, for example, for decimal and group separator or
+            character type determination.
+
+        @param  nStartCharFlags
+            A set of KParseTokens constants determining the
+            allowed characters a name or identifier may start with.
+
+        @param  aUserDefinedCharactersStart
+            A set of additionally allowed characters a name or
+            identifier may start with.
+
+        @param  nContCharFlags
+            A set of KParseTokens constants determining the
+            allowed characters a name or identifier may continue with.
+
+        @param  aUserDefinedCharactersCont
+            A set of additionally allowed characters a name or
+            identifier may continue with.
+
+        @code{.cpp}
+            using namespace ::com::sun::star::i18n;
+            // First character of an identifier may be any alphabetic or underscore.
+            sal_Int32 nStartFlags = KParseTokens::ANY_ALPHA | KParseTokens::ASC_UNDERSCORE;
+            // Continuing characters may be any alphanumeric or underscore or dot.
+            sal_Int32 nContFlags = KParseTokens::ANY_ALNUM | KParseTokens::ASC_UNDERSCORE | KParseTokens::ASC_DOT;
+            // No further characters assumed to be contained in an identifier
+            OUString aEmptyString;
+            // Parse any token.
+            ParseResult rRes = xCC->parseAnyToken( aText, nPos, aLocale,
+                nStartFlags, aEmptyString, nContFlags, aEmptyString );
+            // Get parsed token.
+            if ( rRes.TokenType & (KParseType::ASC_NUMBER | KParseType::UNI_NUMBER) )
+                fValue = rRes.Value;
+            if ( rRes.TokenType & KParseType::IDENTNAME )
+                aName = aText.copy( nPos, rRes.EndPos - nPos );
+            else if ( rRes.TokenType & KParseType::SINGLE_QUOTE_NAME )
+                aName = rRes.DequotedNameOrString;
+            else if ( rRes.TokenType & KParseType::DOUBLE_QUOTE_STRING )
+                aString = rRes.DequotedNameOrString;
+            else if ( rRes.TokenType & KParseType::BOOLEAN )
+                aSymbol = aText.copy( nPos, rRes.EndPos - nPos );
+            else if ( rRes.TokenType & KParseType::ONE_SINGLE_CHAR )
+                aSymbol = aText.copy( nPos, rRes.EndPos - nPos );
+        @endcode
+     */
+
+    ParseResult parseAnyToken(
+                            [in] string aText,
+                            [in] long nPos,
+                            [in] com::sun::star::lang::Locale aLocale,
+                            [in] long nStartCharFlags,
+                            [in] string aUserDefinedCharactersStart,
+                            [in] long nContCharFlags,
+                            [in] string aUserDefinedCharactersCont
+                            );
+
+    /**
+        Parse a string for a token of type <em>nTokenType</em> starting
+        at position <em>nPos</em>.
+
+        <p> Other parameters are the same as in
+        parseAnyToken(). If the actual token does not
+        match the passed <em>nTokenType</em> a
+        ParseResult::TokenType set to <b>0</b> (zero)
+        is returned. </p>
+
+        @param  nTokenType
+            One or more of the KParseType constants.
+
+        @param aText
+            See #parseAnyToken
+        @param nPos
+            See #parseAnyToken
+        @param aLocale
+            See #parseAnyToken
+        @param nStartCharFlags
+            See #parseAnyToken
+        @param aUserDefinedCharactersStart
+            See #parseAnyToken
+        @param nContCharFlags
+            See #parseAnyToken
+        @param aUserDefinedCharactersCont
+            See #parseAnyToken
+
+        @code{.cpp}
+            // Determine if a given name is a valid name (not quoted) and contains
+            // only allowed characters.
+            using namespace ::com::sun::star::i18n;
+            // First character of an identifier may be any alphanumeric or underscore.
+            sal_Int32 nStartFlags = KParseTokens::ANY_ALNUM | KParseTokens::ASC_UNDERSCORE;
+            // No further characters assumed to be contained in an identifier start.
+            OUString aEmptyString;
+            // Continuing characters may be any alphanumeric or underscore.
+            sal_Int32 nContFlags = nStartFlags;
+            // Additionally, continuing characters may contain a blank.
+            OUString aContChars( " " );
+            // Parse predefined (must be an IDENTNAME) token.
+            ParseResult rRes = xCC->parsePredefinedToken( KParseType::IDENTNAME, rName, 0, aLocale,
+                nStartFlags, aEmptyString, nContFlags, aContChars );
+            // Test if it is an identifier name and if it only is one
+            // and no more else is following it.
+            bValid = (rRes.TokenType & KParseType::IDENTNAME) && rRes.EndPos == rName.Len();
+        @endcode
+     */
+
+    ParseResult parsePredefinedToken(
+                            [in] long nTokenType,
+                            [in] string aText,
+                            [in] long nPos,
+                            [in] com::sun::star::lang::Locale aLocale,
+                            [in] long nStartCharFlags,
+                            [in] string aUserDefinedCharactersStart,
+                            [in] long nContCharFlags,
+                            [in] string aUserDefinedCharactersCont
+                            );
+};
+
+}; }; }; };
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */