summaryrefslogtreecommitdiffstats
path: root/include/formula/FormulaCompiler.hxx
blob: 08710f561b5aa048919d7935fd5671eca996c4b3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
 * This file is part of the LibreOffice project.
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 *
 * This file incorporates work covered by the following license notice:
 *
 *   Licensed to the Apache Software Foundation (ASF) under one or more
 *   contributor license agreements. See the NOTICE file distributed
 *   with this work for additional information regarding copyright
 *   ownership. The ASF licenses this file to you under the Apache
 *   License, Version 2.0 (the "License"); you may not use this file
 *   except in compliance with the License. You may obtain a copy of
 *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
 */

#ifndef INCLUDED_FORMULA_FORMULACOMPILER_HXX
#define INCLUDED_FORMULA_FORMULACOMPILER_HXX

#include <memory>
#include <unordered_map>
#include <vector>

#include <com/sun/star/uno/Sequence.hxx>
#include <formula/formuladllapi.h>
#include <formula/grammar.hxx>
#include <formula/opcode.hxx>
#include <formula/tokenarray.hxx>
#include <formula/types.hxx>
#include <formula/paramclass.hxx>
#include <rtl/ustrbuf.hxx>
#include <rtl/ustring.hxx>
#include <sal/types.h>
#include <tools/debug.hxx>

#define FORMULA_MAXJUMPCOUNT    32  /* maximum number of jumps (ocChoose) */
#define FORMULA_MAXTOKENS     8192  /* maximum number of tokens in formula */
#define FORMULA_MAXPARAMS      255  /* maximum number of parameters per function (byte) */
#define FORMULA_MAXPARAMSII      8  /* maximum number of parameters for functions that have implicit intersection ranges */


namespace com::sun::star {
    namespace sheet {
        struct FormulaOpCodeMapEntry;
        struct FormulaToken;
    }
}

class CharClass;
enum class FormulaError : sal_uInt16;
enum class SvNumFormatType : sal_Int16;

namespace formula
{

struct FormulaArrayStack
{
    FormulaArrayStack*  pNext;
    FormulaTokenArray*  pArr;
    FormulaTokenRef     mpLastToken;
    sal_uInt16          nIndex;
    bool bTemp;
};

typedef std::unordered_map< OUString, OpCode > OpCodeHashMap;
typedef std::unordered_map< OUString, OUString > ExternalHashMap;

class FORMULA_DLLPUBLIC FormulaCompiler
{
private:
    FormulaCompiler(const FormulaCompiler&) = delete;
    FormulaCompiler& operator=(const FormulaCompiler&) = delete;
public:
    FormulaCompiler(bool bComputeII = false, bool bMatrixFlag = false);
    FormulaCompiler(FormulaTokenArray& _rArr, bool bComputeII = false, bool bMatrixFlag = false);
    virtual ~FormulaCompiler();

    /** Mappings from strings to OpCodes and vice versa. */
    class FORMULA_DLLPUBLIC OpCodeMap final
    {
        OpCodeHashMap           maHashMap;                  /// Hash map of symbols, OUString -> OpCode
        std::unique_ptr<OUString[]> mpTable;                /// Array of symbols, OpCode -> OUString, offset==OpCode
        ExternalHashMap         maExternalHashMap;          /// Hash map of ocExternal, Filter String -> AddIn String
        ExternalHashMap         maReverseExternalHashMap;   /// Hash map of ocExternal, AddIn String -> Filter String
        FormulaGrammar::Grammar meGrammar;                  /// Grammar, language and reference convention
        sal_uInt16              mnSymbols;                  /// Count of OpCode symbols
        bool                    mbCore          : 1;        /// If mapping was setup by core, not filters
        bool                    mbEnglish       : 1;        /// If English symbols and external names
        bool                    mbEnglishLocale : 1;        /// If English locale for numbers

        OpCodeMap( const OpCodeMap& ) = delete;
        OpCodeMap& operator=( const OpCodeMap& ) = delete;

    public:

        OpCodeMap(sal_uInt16 nSymbols, bool bCore, FormulaGrammar::Grammar eGrammar ) :
            maHashMap(nSymbols),
            mpTable( new OUString[ nSymbols ]),
            meGrammar( eGrammar),
            mnSymbols( nSymbols),
            mbCore( bCore),
            mbEnglish ( FormulaGrammar::isEnglish(eGrammar) ),
            mbEnglishLocale ( mbEnglish )
        {
        }

        /** Copy mappings from r into this map, effectively replacing this map.

            Override known legacy bad function names with
            correct ones if the conditions can be derived from the
            current maps.
         */
        void copyFrom( const OpCodeMap& r );

        /// Get the symbol String -> OpCode hash map for finds.
        const OpCodeHashMap& getHashMap() const { return maHashMap; }

        /// Get the symbol String -> AddIn String hash map for finds.
        const ExternalHashMap& getExternalHashMap() const { return maExternalHashMap; }

        /// Get the AddIn String -> symbol String hash map for finds.
        const ExternalHashMap& getReverseExternalHashMap() const { return maReverseExternalHashMap; }

        /// Get the symbol string matching an OpCode.
        const OUString& getSymbol( const OpCode eOp ) const
        {
            DBG_ASSERT( sal_uInt16(eOp) < mnSymbols, "OpCodeMap::getSymbol: OpCode out of range");
            if (sal_uInt16(eOp) < mnSymbols)
                return mpTable[ eOp ];
            static OUString s_sEmpty;
            return s_sEmpty;
        }

        /// Get the first character of the symbol string matching an OpCode.
        sal_Unicode getSymbolChar( const OpCode eOp ) const {  return getSymbol(eOp)[0]; };

        /// Get the grammar.
        FormulaGrammar::Grammar getGrammar() const { return meGrammar; }

        /// Get the symbol count.
        sal_uInt16 getSymbolCount() const { return mnSymbols; }

        /** Are these English symbols, as opposed to native language (which may
            be English as well)? */
        bool isEnglish() const { return mbEnglish; }

        /** Are inline numbers parsed/formatted in en-US locale, as opposed
            to default locale? */
        bool isEnglishLocale() const { return mbEnglishLocale; }

        /// Is it an ODF 1.1 compatibility mapping?
        bool isPODF() const { return FormulaGrammar::isPODF( meGrammar); }

        /* TODO: add isAPI() once a FormulaLanguage was added. */

        /// Is it an ODFF / ODF 1.2 mapping?
        bool isODFF() const { return FormulaGrammar::isODFF( meGrammar); }

        /// Is it an OOXML mapping?
        bool isOOXML() const { return FormulaGrammar::isOOXML( meGrammar); }

        /// Does it have external symbol/name mappings?
        bool hasExternals() const { return !maExternalHashMap.empty(); }

        /// Put entry of symbol String and OpCode pair.
        void putOpCode( const OUString & rStr, const OpCode eOp, const CharClass* pCharClass );

        /// Put entry of symbol String and AddIn international String pair.
        void putExternal( const OUString & rSymbol, const OUString & rAddIn );

        /** Put entry of symbol String and AddIn international String pair,
            not warning just info as used for AddIn collection and setting up
            alias names. */
        void putExternalSoftly( const OUString & rSymbol, const OUString & rAddIn );

        /// Core implementation of XFormulaOpCodeMapper::getMappings()
        css::uno::Sequence< css::sheet::FormulaToken >
            createSequenceOfFormulaTokens(const FormulaCompiler& _rCompiler,
                    const css::uno::Sequence< OUString >& rNames ) const;

        /// Core implementation of XFormulaOpCodeMapper::getAvailableMappings()
        css::uno::Sequence< css::sheet::FormulaOpCodeMapEntry >
            createSequenceOfAvailableMappings( const FormulaCompiler& _rCompiler,const sal_Int32 nGroup ) const;

        /** The value used in createSequenceOfAvailableMappings() and thus in
            XFormulaOpCodeMapper::getMappings() for an unknown symbol. */
        static sal_Int32 getOpCodeUnknown() { return -1; }

    private:

        /** Conditionally put a mapping in copyFrom() context.

            Does NOT check eOp range!
         */
        void putCopyOpCode( const OUString& rSymbol, OpCode eOp );
    };

public:
    typedef std::shared_ptr< const OpCodeMap >  OpCodeMapPtr;
    typedef std::shared_ptr< OpCodeMap >        NonConstOpCodeMapPtr;

protected:
    /** Get finalized OpCodeMap for formula language.

        Creates/returns a singleton instance of an OpCodeMap that contains
        external AddIn mappings if the derived class supports them. Do not call
        at this base class as it results in a permanent mapping without AddIns
        even for derived classes (unless it is for the implementation of the
        temporary GetOpCodeMap()).

        @param nLanguage
            One of css::sheet::FormulaLanguage constants.
        @return Map for nLanguage. If nLanguage is unknown, a NULL map is returned.
     */
    OpCodeMapPtr GetFinalOpCodeMap( const sal_Int32 nLanguage ) const;

public:
    /** Get OpCodeMap for formula language.

        Returns either the finalized OpCodeMap (created by GetFinalOpCodeMap()
        of a derived class) for nLanguage if there is such, or if not then a
        temporary map of which its singleton is reset immediately and the
        temporary will get destroyed by the caller's scope. A temporary map
        created at this base class does *not* contain AddIn mappings.

        @param nLanguage
            One of css::sheet::FormulaLanguage constants.
        @return Map for nLanguage. If nLanguage is unknown, a NULL map is returned.
     */
    OpCodeMapPtr GetOpCodeMap( const sal_Int32 nLanguage ) const;

    /** Destroy the singleton OpCodeMap for formula language.

        This unconditionally destroys the underlying singleton instance of the
        map to be reinitialized again later on the next GetOpCodeMap() call.
        Use if the base class FormulaCompiler::GetOpCodeMap() was called and
        created the map (i.e. HasOpCodeMap() before returned false) and later a
        derived class like ScCompiler shall initialize it including AddIns.

        @param nLanguage
            One of css::sheet::FormulaLanguage constants.
     */
    void DestroyOpCodeMap( const sal_Int32 nLanguage );

    /** Whether the singleton OpCodeMap for formula language exists already.

        @param nLanguage
            One of css::sheet::FormulaLanguage constants.
     */
    bool HasOpCodeMap( const sal_Int32 nLanguage ) const;

    /** Create an internal symbol map from API mapping.
        @param bEnglish
            Use English number parser / formatter instead of native.
     */
    static OpCodeMapPtr CreateOpCodeMap(
            const css::uno::Sequence< const css::sheet::FormulaOpCodeMapEntry > & rMapping,
            bool bEnglish );

    /** Get current OpCodeMap in effect. */
    const OpCodeMapPtr& GetCurrentOpCodeMap() const { return mxSymbols; }

    /** Get OpCode for English symbol.
        Used in XFunctionAccess to create token array.
        @param rName
            Symbol to lookup. MUST be upper case.
     */
    OpCode GetEnglishOpCode( const OUString& rName ) const;

    FormulaError GetErrorConstant( const OUString& rName ) const;
    void AppendErrorConstant( OUStringBuffer& rBuffer, FormulaError nError ) const;

    void EnableJumpCommandReorder( bool bEnable );
    void EnableStopOnError( bool bEnable );

    static bool IsOpCodeVolatile( OpCode eOp );
    static bool IsOpCodeJumpCommand( OpCode eOp );

    static bool DeQuote( OUString& rStr );


    static const OUString&  GetNativeSymbol( OpCode eOp );
    static sal_Unicode      GetNativeSymbolChar( OpCode eOp );
    static  bool            IsMatrixFunction(OpCode _eOpCode);   // if a function _always_ returns a Matrix

    SvNumFormatType GetNumFormatType() const { return nNumFmt; }
    bool  CompileTokenArray();

    void CreateStringFromTokenArray( OUString& rFormula );
    void CreateStringFromTokenArray( OUStringBuffer& rBuffer );
    const FormulaToken* CreateStringFromToken( OUString& rFormula, const FormulaToken* pToken );
    const FormulaToken* CreateStringFromToken( OUStringBuffer& rBuffer, const FormulaToken* pToken,
                                    bool bAllowArrAdvance = false );

    void AppendBoolean( OUStringBuffer& rBuffer, bool bVal ) const;
    void AppendDouble( OUStringBuffer& rBuffer, double fVal ) const;
    static void AppendString( OUStringBuffer& rBuffer, const OUString & rStr );

    /** Set symbol map corresponding to one of predefined formula::FormulaGrammar::Grammar,
        including an address reference convention. */
    FormulaGrammar::Grammar   GetGrammar() const { return meGrammar; }

    /** Whether current symbol set and grammar need transformation of Table
        structured references to A1 style references when writing / exporting
        (creating strings).
     */
    bool NeedsTableRefTransformation() const;

    /** If a parameter nParam (0-based) is to be forced to array for OpCode
        eOp, i.e. classified as ParamClass::ForceArray or
        ParamClass::ReferenceOrForceArray type. */
    virtual formula::ParamClass GetForceArrayParameter( const FormulaToken* pToken, sal_uInt16 nParam ) const;

    static void UpdateSeparatorsNative( const OUString& rSep, const OUString& rArrayColSep, const OUString& rArrayRowSep );
    static void ResetNativeSymbols();
    static void SetNativeSymbols( const OpCodeMapPtr& xMap );

    /** Sets the implicit intersection compute flag */
    void SetComputeIIFlag(bool bSet) { mbComputeII = bSet; }

    /** Sets the matrix flag for the formula*/
    void SetMatrixFlag(bool bSet) { mbMatrixFlag = bSet; }

    /** Separators mapped when loading opcodes from the resource, values other
        than RESOURCE_BASE may override the resource strings. Used by OpCodeList
        implementation via loadSymbols().
     */
    enum class SeparatorType
    {
        RESOURCE_BASE,
        SEMICOLON_BASE
    };

protected:
    virtual OUString FindAddInFunction( const OUString& rUpperName, bool bLocalFirst ) const;
    virtual void fillFromAddInCollectionUpperName( const NonConstOpCodeMapPtr& xMap ) const;
    virtual void fillFromAddInMap( const NonConstOpCodeMapPtr& xMap, FormulaGrammar::Grammar _eGrammar ) const;
    virtual void fillFromAddInCollectionEnglishName( const NonConstOpCodeMapPtr& xMap ) const;
    virtual void fillAddInToken(::std::vector< css::sheet::FormulaOpCodeMapEntry >& _rVec, bool _bIsEnglish) const;

    virtual void SetError(FormulaError nError);
    virtual FormulaTokenRef ExtendRangeReference( FormulaToken & rTok1, FormulaToken & rTok2 );
    virtual bool HandleExternalReference(const FormulaToken& _aToken);
    virtual bool HandleRange();
    virtual bool HandleColRowName();
    virtual bool HandleDbData();
    virtual bool HandleTableRef();

    virtual void CreateStringFromExternal( OUStringBuffer& rBuffer, const FormulaToken* pToken ) const;
    virtual void CreateStringFromSingleRef( OUStringBuffer& rBuffer, const FormulaToken* pToken ) const;
    virtual void CreateStringFromDoubleRef( OUStringBuffer& rBuffer, const FormulaToken* pToken ) const;
    virtual void CreateStringFromMatrix( OUStringBuffer& rBuffer, const FormulaToken* pToken ) const;
    virtual void CreateStringFromIndex( OUStringBuffer& rBuffer, const FormulaToken* pToken ) const;
    virtual void LocalizeString( OUString& rName ) const;   // modify rName - input: exact name

    bool   GetToken();
    OpCode NextToken();
    void PutCode( FormulaTokenRef& );
    void Factor();
    void RangeLine();
    void UnionLine();
    void IntersectionLine();
    void UnaryLine();
    void PostOpLine();
    void PowLine();
    void MulDivLine();
    void AddSubLine();
    void ConcatLine();
    void CompareLine();
    OpCode Expression();
    void PopTokenArray();
    void PushTokenArray( FormulaTokenArray*, bool );

    bool MergeRangeReference( FormulaToken * * const pCode1, FormulaToken * const * const pCode2 );

    // Returns whether the opcode has implicit intersection ranges as parameters.
    // Called for (most) opcodes to possibly handle implicit intersection for the parameters.
    virtual void HandleIIOpCode(FormulaToken* /*token*/,
                                FormulaToken*** /*pppToken*/, sal_uInt8 /*nNumParams*/) {}

    // Called from CompileTokenArray() after RPN code generation is done.
    virtual void PostProcessCode() {}

    virtual void AnnotateOperands() {}

    OUString            aCorrectedFormula;      // autocorrected Formula
    OUString            aCorrectedSymbol;       // autocorrected Symbol

    OpCodeMapPtr        mxSymbols;              // which symbols are used

    FormulaTokenRef     mpToken;                // current token
    FormulaTokenRef     pCurrentFactorToken;    // current factor token (of Factor() method)
    sal_uInt16          nCurrentFactorParam;    // current factor token's parameter, 1-based
    FormulaTokenArray*  pArr;
    FormulaTokenArrayPlainIterator maArrIterator;
    FormulaTokenRef     mpLastToken;            // last token

    FormulaToken**      pCode;
    FormulaArrayStack*  pStack;

    OpCode              eLastOp;
    short               nRecursion;             // GetToken() recursions
    SvNumFormatType     nNumFmt;                // set during CompileTokenArray()
    sal_uInt16          pc;                     // program counter

    FormulaGrammar::Grammar meGrammar;          // The grammar used, language plus convention.

    bool                bAutoCorrect;           // whether to apply AutoCorrection
    bool                bCorrected;             // AutoCorrection was applied
    bool                glSubTotal;             // if code contains one or more subtotal functions
    bool                needsRPNTokenCheck;     // whether to make FormulaTokenArray check all tokens at the end

    bool mbJumpCommandReorder; /// Whether or not to reorder RPN for jump commands.
    bool mbStopOnError;        /// Whether to stop compilation on first encountered error.

    bool mbComputeII;  // whether to attempt computing implicit intersection ranges while building the RPN array.
    bool mbMatrixFlag; // whether the formula is a matrix formula (needed for II computation)

public:
    enum InitSymbols
    {
        ASK = 0,
        INIT,
        DESTROY
    };

private:
    bool InitSymbolsNative( InitSymbols ) const;    /// only SymbolsNative, on first document creation
    bool InitSymbolsEnglish( InitSymbols ) const;   /// only SymbolsEnglish, maybe later
    bool InitSymbolsPODF( InitSymbols ) const;      /// only SymbolsPODF, on demand
    bool InitSymbolsAPI( InitSymbols ) const;       /// only SymbolsAPI, on demand
    bool InitSymbolsODFF( InitSymbols ) const;      /// only SymbolsODFF, on demand
    bool InitSymbolsEnglishXL( InitSymbols ) const; /// only SymbolsEnglishXL, on demand
    bool InitSymbolsOOXML( InitSymbols ) const;     /// only SymbolsOOXML, on demand

    void loadSymbols(const std::pair<const char*, int>* pSymbols, FormulaGrammar::Grammar eGrammar, NonConstOpCodeMapPtr& rxMap,
            SeparatorType eSepType = SeparatorType::SEMICOLON_BASE) const;

    /** Check pCurrentFactorToken for nParam's (0-based) ForceArray types and
        set ForceArray at rCurr if so. Set nParam+1 as 1-based
        nCurrentFactorParam for subsequent ForceArrayOperator() calls.
     */
    void CheckSetForceArrayParameter( FormulaTokenRef const & rCurr, sal_uInt8 nParam );

    void ForceArrayOperator( FormulaTokenRef const & rCurr );

    class CurrentFactor
    {
        FormulaTokenRef  pPrevFac;
        sal_uInt16       nPrevParam;
        FormulaCompiler* pCompiler;
        CurrentFactor( const CurrentFactor& ) = delete;
        CurrentFactor& operator=( const CurrentFactor& ) = delete;
    public:
        explicit CurrentFactor( FormulaCompiler* pComp )
            : pPrevFac( pComp->pCurrentFactorToken )
            , nPrevParam( pComp->nCurrentFactorParam )
            , pCompiler( pComp )
            {}
        ~CurrentFactor()
            {
                pCompiler->pCurrentFactorToken = pPrevFac;
                pCompiler->nCurrentFactorParam = nPrevParam;
            }
        // yes, this operator= may modify the RValue
        void operator=( FormulaTokenRef const & r )
            {
                pCompiler->ForceArrayOperator( r );
                pCompiler->pCurrentFactorToken = r;
                pCompiler->nCurrentFactorParam = 0;
            }
        void operator=( FormulaToken* p )
            {
                FormulaTokenRef xTemp( p );
                *this = xTemp;
            }
        operator FormulaTokenRef&()
            { return pCompiler->pCurrentFactorToken; }
        FormulaToken* operator->()
            { return pCompiler->pCurrentFactorToken.operator->(); }
        operator FormulaToken*()
            { return operator->(); }
    };


    mutable NonConstOpCodeMapPtr  mxSymbolsODFF;      // ODFF symbols
    mutable NonConstOpCodeMapPtr  mxSymbolsPODF;      // ODF 1.1 symbols
    mutable NonConstOpCodeMapPtr  mxSymbolsAPI;       // XFunctionAccess API symbols
    mutable NonConstOpCodeMapPtr  mxSymbolsNative;    // native symbols
    mutable NonConstOpCodeMapPtr  mxSymbolsEnglish;   // English symbols
    mutable NonConstOpCodeMapPtr  mxSymbolsEnglishXL; // English Excel symbols (for VBA formula parsing)
    mutable NonConstOpCodeMapPtr  mxSymbolsOOXML;     // Excel OOXML symbols

    static FormulaTokenArray smDummyTokenArray;
};

} // formula


#endif // INCLUDED_FORMULA_FORMULACOMPILER_HXX


/* vim:set shiftwidth=4 softtabstop=4 expandtab: */